[med-svn] [pbseqlib] 02/04: Imported Upstream version 0~20151014+gitbe5d1bf

Afif Elghraoui afif-guest at moszumanska.debian.org
Sat Oct 17 08:32:07 UTC 2015


This is an automated email from the git hooks/post-receive script.

afif-guest pushed a commit to branch master
in repository pbseqlib.

commit 4bf697372bc65dd482e535d429f3bed9c2bd63d9
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Thu Oct 15 23:27:53 2015 -0700

    Imported Upstream version 0~20151014+gitbe5d1bf
---
 .gitignore                                         |   9 +-
 .travis.yml                                        |   6 +-
 Makefile                                           |  36 ---
 README.md                                          |  24 +-
 alignment/Makefile                                 | 113 -------
 .../algorithms/anchoring/MapBySuffixArrayImpl.hpp  |  20 +-
 alignment/algorithms/sorting/DifferenceCovers.cpp  |   3 +-
 .../algorithms/sorting/LightweightSuffixArray.cpp  |  11 +-
 alignment/algorithms/sorting/MultikeyQuicksort.cpp |   2 +-
 alignment/algorithms/sorting/qsufsort.hpp          |   5 +-
 alignment/build.mk                                 |   1 +
 alignment/bwt/Occ.hpp                              |   2 +-
 alignment/bwt/PackedHash.hpp                       |  14 +-
 .../datastructures/alignment/FilterCriteria.cpp    |   2 +
 alignment/files/FragmentCCSIterator.cpp            |   3 +-
 alignment/files/ReaderAgglomerate.cpp              |  52 +++-
 alignment/files/ReaderAgglomerate.hpp              |   4 +
 alignment/format/BAMPrinter.hpp                    |  18 +-
 alignment/format/BAMPrinterImpl.hpp                |  67 +++-
 alignment/format/SAMHeaderPrinter.cpp              |  25 +-
 alignment/format/SAMPrinter.cpp                    |   7 +-
 alignment/format/SAMPrinter.hpp                    |   1 +
 alignment/format/SAMPrinterImpl.hpp                |  14 +-
 alignment/makefile                                 |  47 +++
 alignment/simple.mk                                |  82 -----
 alignment/simulator/ContextOutputList.hpp          |   2 +-
 alignment/simulator/ContextSet.cpp                 |   4 +-
 alignment/simulator/OutputSampleListSet.hpp        |   3 +-
 alignment/suffixarray/LCPTable.hpp                 |  21 +-
 alignment/suffixarray/SuffixArray.hpp              |   1 -
 alignment/suffixarray/ssort.hpp                    |   6 +-
 alignment/tuples/TupleCountTableImpl.hpp           |   5 +-
 alignment/tuples/TupleListImpl.hpp                 |   1 -
 alignment/utils/RegionUtils.cpp                    | 111 +------
 alignment/utils/RegionUtils.hpp                    |  48 ---
 alignment/utils/RegionUtilsImpl.hpp                | 231 +++-----------
 common.mk                                          |  84 -----
 configure.py                                       | 339 +++++++++++++++++++++
 hdf/BufferedHDF2DArrayImpl.hpp                     |  15 +-
 hdf/BufferedHDFArrayImpl.hpp                       |   3 +-
 hdf/HDFAtom.cpp                                    |   9 +-
 hdf/HDFAtom.hpp                                    |  37 ++-
 hdf/HDFAttributable.cpp                            |   6 +-
 hdf/HDFAttributable.hpp                            |   4 +-
 hdf/HDFBasReader.hpp                               | 240 ++++++++-------
 hdf/HDFBaseCallsWriter.cpp                         | 326 ++++++++++++++++++++
 hdf/HDFBaseCallsWriter.hpp                         | 233 ++++++++++++++
 hdf/HDFBaxWriter.cpp                               | 141 +++++++++
 hdf/HDFBaxWriter.hpp                               | 172 +++++++++++
 hdf/HDFCmpFile.hpp                                 |  14 +-
 hdf/HDFCmpReader.hpp                               |   1 +
 hdf/HDFCmpRefAlignmentGroup.hpp                    |   3 +-
 hdf/HDFData.cpp                                    |  10 +-
 hdf/HDFData.hpp                                    |  10 +-
 hdf/HDFPlsReader.hpp                               |  16 +-
 hdf/HDFRegionTableReader.cpp                       | 115 ++++---
 hdf/HDFRegionTableReader.hpp                       |  30 +-
 hdf/HDFRegionsWriter.cpp                           |  99 ++++++
 hdf/HDFRegionsWriter.hpp                           | 101 ++++++
 hdf/HDFScanDataReader.cpp                          |  29 +-
 hdf/HDFScanDataReader.hpp                          |   7 +-
 hdf/HDFScanDataWriter.cpp                          | 127 +++++---
 hdf/HDFScanDataWriter.hpp                          |  55 +++-
 hdf/HDFWriteBuffer.hpp                             |   3 +-
 hdf/HDFWriterBase.cpp                              |  98 ++++++
 hdf/HDFWriterBase.hpp                              |  88 ++++++
 hdf/HDFZMWMetricsWriter.cpp                        | 142 +++++++++
 hdf/HDFZMWMetricsWriter.hpp                        | 117 +++++++
 hdf/HDFZMWWriter.cpp                               | 144 +++++++++
 hdf/HDFZMWWriter.hpp                               | 120 ++++++++
 hdf/Makefile                                       |  94 ------
 hdf/build.mk                                       |   1 +
 hdf/makefile                                       |  33 ++
 makefile                                           |  31 ++
 pbdata/.gitignore                                  |   2 +
 pbdata/CCSSequence.cpp                             |  21 +-
 pbdata/CCSSequence.hpp                             |  22 +-
 pbdata/CompressedDNASequence.hpp                   |   6 +-
 pbdata/CompressedSequenceImpl.hpp                  |  13 +-
 pbdata/DNASequence.cpp                             |  30 +-
 pbdata/DNASequence.hpp                             |  15 +-
 pbdata/Enumerations.h                              |  47 ++-
 pbdata/FASTAReader.cpp                             |  11 +-
 pbdata/FASTASequence.cpp                           |  38 +--
 pbdata/FASTASequence.hpp                           |  16 +-
 pbdata/FASTQReader.cpp                             |   2 +-
 pbdata/FASTQSequence.cpp                           |  50 ++-
 pbdata/FASTQSequence.hpp                           |  41 ++-
 pbdata/MD5Utils.cpp                                |   5 +-
 pbdata/Makefile                                    |  85 ------
 pbdata/PacBioDefs.h                                | 180 +++++++++++
 pbdata/PackedDNASequence.cpp                       |   6 +-
 pbdata/ReverseCompressIndex.cpp                    |   3 +-
 pbdata/SMRTSequence.cpp                            | 289 +++++++++++++-----
 pbdata/SMRTSequence.hpp                            | 161 +++++++---
 pbdata/StringUtils.cpp                             |  52 +---
 pbdata/StringUtils.hpp                             |   2 +-
 pbdata/build.mk                                    |   1 +
 pbdata/makefile                                    |  39 +++
 pbdata/matrix/FlatMatrixImpl.hpp                   |   9 +-
 pbdata/matrix/MatrixImpl.hpp                       |   7 +-
 pbdata/metagenome/SequenceIndexDatabaseImpl.hpp    |  16 +-
 pbdata/metagenome/TitleTable.cpp                   |  10 +-
 pbdata/reads/BaseFile.cpp                          |   6 +-
 pbdata/reads/PulseBaseCommon.cpp                   |  39 ++-
 pbdata/reads/PulseBaseCommon.hpp                   |  39 ++-
 pbdata/reads/PulseFile.cpp                         |  40 ++-
 pbdata/reads/PulseFileImpl.hpp                     |   4 +-
 pbdata/reads/ReadInterval.hpp                      |  19 +-
 pbdata/reads/RegionAnnotation.cpp                  |  49 +++
 pbdata/reads/RegionAnnotation.hpp                  | 241 +++++++++++++++
 pbdata/reads/RegionAnnotations.cpp                 | 179 +++++++++++
 pbdata/reads/RegionAnnotations.hpp                 | 122 ++++++++
 pbdata/reads/RegionTable.cpp                       | 239 +++++++--------
 pbdata/reads/RegionTable.hpp                       | 166 ++++++----
 pbdata/reads/RegionTypeMap.cpp                     |  89 ++++++
 pbdata/reads/RegionTypeMap.hpp                     |  85 ++++++
 pbdata/reads/ScanData.cpp                          | 155 +++++++++-
 pbdata/reads/ScanData.hpp                          |  86 +++++-
 pbdata/sam/SAMReaderImpl.hpp                       |   2 +-
 pbdata/utils.hpp                                   |   5 +-
 pbdata/utils/SMRTReadUtils.cpp                     |   4 +-
 pbdata/utils/SMRTTitle.hpp                         |  14 +
 pbdata/utilsImpl.hpp                               |  32 +-
 rules.mk                                           |  28 ++
 simple.mk                                          |  16 -
 travis.sh                                          |  14 +
 unittest/.gitignore                                |   1 +
 unittest/Makefile                                  |  41 ---
 unittest/alignment/Makefile                        |   9 +-
 unittest/alignment/files/CCSIterator_gtest.cpp     |   2 -
 .../alignment/files/FragmentCCSIterator_gtest.cpp  |   5 +-
 .../files/FragmentCCSIterator_other_gtest.cpp      | 100 ++++++
 .../alignment/files/ReaderAgglomerate_gtest.cpp    |  16 +
 unittest/alignment/utils/RegionUtils_gtest.cpp     |   1 -
 unittest/build.mk                                  |  27 ++
 unittest/common.mk                                 |  64 ----
 unittest/hdf/HDFPlsReader_gtest.cpp                |   3 +-
 unittest/hdf/HDFScanDataWriter_gtest.cpp           |   2 +
 unittest/hdf/Makefile                              |   9 +-
 unittest/makefile                                  | 120 ++++++++
 unittest/pbdata/CCSSequence_gtest.cpp              |   8 +-
 unittest/pbdata/DNASequence_gtest.cpp              |   2 +-
 unittest/pbdata/Makefile                           |   9 +-
 unittest/pbdata/SMRTSequence_gtest.cpp             |   6 +-
 unittest/pbdata/StringUtils_gtest.cpp              |  44 ++-
 unittest/pbdata/reads/RegionAnnotations_gtest.cpp  | 203 ++++++++++++
 unittest/pbdata/reads/RegionTypeMap_gtest.cpp      |  61 ++++
 148 files changed, 5744 insertions(+), 1939 deletions(-)

diff --git a/.gitignore b/.gitignore
index bebcdca..93add20 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,10 @@
 *.o
-*.a
 *.d
+*.a
+*.so
+*.dylib
+defines.mk
+all.xml
+*.h5
+libconfig.h
+/hdf/hdf5-1.8.12-headers/
diff --git a/.travis.yml b/.travis.yml
index 699f0ab..b4fc2f8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
 language: cpp
 script: 
-  - make nopbbam=1 COMMON_NO_THIRD_PARTY_REQD=true
+  - ./travis.sh
 compiler:
   - gcc
 # - clang
@@ -13,8 +13,8 @@ addons:
     packages:
     - gcc-4.8
     - g++-4.8
-    - clang
-#   - libxqilla-dev # missing, but not needed?
+#   - clang
+#   - libhdf5-serial-1.8.4
 notifications:
   email: false
 sudo: false
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 6252198..0000000
--- a/Makefile
+++ /dev/null
@@ -1,36 +0,0 @@
-SHELL=/bin/bash
-
-.PHONY: all debug profile gtest clean cleanall
-
-# $Change: 140182 $ 
-
-all:
-	make -C pbdata all
-	make -C hdf all
-	make -C alignment all
-
-debug:
-	make -C pbdata debug
-	make -C hdf debug
-	make -C alignment debug
-
-profile:
-	make -C pbdata profile
-	make -C hdf profile
-	make -C alignment profile
-
-g:
-	make -C pbdata g
-	make -C hdf g
-	make -C alignment g
-
-gtest:
-	make -C unittest gtest
-
-clean:
-	@make -C pbdata clean
-	@make -C hdf clean
-	@make -C alignment clean
-	@make -C unittest clean
-
-cleanall: clean
diff --git a/README.md b/README.md
index 7c0c052..7d95e8d 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
 [![Build Status](https://travis-ci.org/PacificBiosciences/blasr_libcpp.svg?branch=master)](https://travis-ci.org/PacificBiosciences/blasr_libcpp)
+
 #What is blasr_libcpp#
 
-Blasr_libcpp is a **library** used by blasr and other executables such as samtoh5, loadPulses for analyzing PacBio sequences. This library contains three sub-directories, including pbdata, hdf and alignment.
+**Blasr_libcpp** is a *library* used by **blasr** and other executables such as samtoh5, loadPulses for analyzing PacBio sequences. This library contains three sub-libraries, including pbdata, hdf and alignment:
 + pbdata  
    - contains source code for handling Pacbio sequences and can build lib ```libpbdata```.
 + hdf 
@@ -9,16 +10,13 @@ Blasr_libcpp is a **library** used by blasr and other executables such as samtoh
 + alignment 
    - contains source code for aligning Pacbio reads to target sequences used in blasr and builds ```libblasr```.
 
+For more information, see
+* https://github.com/PacificBiosciences/blasr_libcpp/wiki
 
-##Appendix: Dependencies##
-+ libpbdata
-   -  does **not** depend on ```libhdf5```
-   -  should build without the ```pbbam``` library *for now*
-
-- libpbhdf
-   -  depends on the ```libpbdata``` and ```libhdf5``` libraries to build
-   
-- alignment
-   -  depends on the ```libpbdata``` library to build
-   -  can build either with or without the ```libpbhdf``` library
-   -  can build either with or without the ```pbbam``` library
+## Building
+The simplest way is:
+```
+NOPBBAM=1 ./configure.py
+make -j all
+```
+That will skip pbbam, and it will download HDF5 headers.
diff --git a/alignment/Makefile b/alignment/Makefile
deleted file mode 100644
index 9c27250..0000000
--- a/alignment/Makefile
+++ /dev/null
@@ -1,113 +0,0 @@
-
-include ../common.mk
-
-# To enable building a shared library, invoke as "make SHARED_LIB=true ..."
-ifneq ($(SHARED_LIB),)
-    # Generating shared library
-    CXX_SHAREDFLAGS := -fPIC
-    LD_SHAREDFLAGS  := -shared -fPIC
-    TARGET_LIB      := libblasr.so
-    # Developers should set these to appropriate defaults (other systems 
-    # will override these on the command line):
-    HDF5_LIB        := ../../../../prebuilt.out/prebuilt.out/hdf5/hdf5-1.8.12/centos-5/lib/libhdf5.so
-    ZLIB_LIB        := ../../../../prebuilt.tmpsrc/zlib/zlib_1.2.8/_output/install/lib/libz.so
-    HTSLIB_LIB      := ../../../staging/PostPrimary/pbbam/_output/install-build/lib/libpbbam.so
-    PBBAM_LIB       := ../../../staging/PostPrimary/pbbam/third-party/htslib/_output/install-build/lib/libhts.so
-    LIBPBDATA_LIB   := ../../../staging/PostPrimary/pbbam/third-party/htslib/_output/install-build/lib/libhts.so
-else
-    # Generating shared library
-    CXX_SHAREDFLAGS :=
-    LD_SHAREDFLAGS  :=
-    TARGET_LIB      := libblasr.a
-    HDF5_LIB        :=
-    ZLIB_LIB        :=
-    HTSLIB_LIB      :=
-    PBBAM_LIB       :=
-    LIBPBDATA_LIB   :=
-endif
-
-DEP_LIBS := $(HDF5_LIB) $(ZLIB_LIB) $(HTSLIB_LIB) $(PBBAM_LIB) $(PBDATA_LIB)
-
-LIBPBDATA_INCLUDE := ../pbdata
-LIBPBIHDF_INCLUDE := ../hdf
-PBBAM_INCLUDE := $(PBBAM)/include
-HTSLIB_INCLUDE ?= $(PBBAM)/third-party/htslib
-
-INCLUDES = -I$(LIBPBDATA_INCLUDE) \
-           -I$(LIBPBIHDF_INCLUDE) \
-	   -I.
-ifneq ($(HDF5_INC),)
-INCLUDES += -I$(HDF5_INC)
-else
-HDF_HEADERS := hdf5-1.8.12-headers
-INCLUDES += -I../hdf/$(HDF_HEADERS)/src -I../hdf/$(HDF_HEADERS)/c++/src
-endif
-
-ifneq ($(ZLIB_ROOT), notfound)
-	INCLUDES += -I$(ZLIB_ROOT)/include
-endif
-
-ifeq ($(origin nopbbam), undefined)
-    INCLUDES += -I$(PBBAM_INCLUDE) -I$(HTSLIB_INCLUDE) -I$(BOOST_INCLUDE)
-endif
-
-CXXOPTS := -std=c++11 -pedantic -Wno-long-long -MMD -MP
-
-sources := $(wildcard algorithms/alignment/*.cpp) \
-		   $(wildcard algorithms/alignment/sdp/*.cpp) \
-		   $(wildcard algorithms/anchoring/*.cpp) \
-		   $(wildcard algorithms/compare/*.cpp) \
-		   $(wildcard algorithms/sorting/*.cpp) \
-		   $(wildcard datastructures/alignment/*.cpp) \
-		   $(wildcard datastructures/alignmentset/*.cpp) \
-		   $(wildcard datastructures/anchoring/*.cpp) \
-		   $(wildcard datastructures/tuplelists/*.cpp) \
-		   $(wildcard suffixarray/*.cpp) \
-		   $(wildcard qvs/*.cpp) \
-		   $(wildcard statistics/*.cpp) \
-		   $(wildcard tuples/*.cpp) \
-		   $(wildcard utils/*.cpp) \
-		   $(wildcard files/*.cpp) \
-		   $(wildcard format/*.cpp) \
-		   $(wildcard simulator/*.cpp) \
-		   $(wildcard *.cpp) 
-
-ifdef nohdf
-sources := $(filter-out files/% utils/FileOfFileNames.cpp, $(sources))
-endif
-
-objects := $(sources:.cpp=.o)
-shared_objects := $(sources:.cpp=.shared.o)
-dependencies := $(objects:.o=.d) $(shared_objects:.o=.d)
-
-all : CXXFLAGS ?= -O3
-
-debug : CXXFLAGS ?= -g -ggdb -fno-inline
-
-profile : CXXFLAGS ?= -Os -pg
-
-g: CXXFLAGS = -g -ggdb -fno-inline -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fno-omit-frame-pointer 
-
-all debug profile g: $(TARGET_LIB)
-
-libblasr.a: $(objects)
-	$(AR_pp) $(ARFLAGS) $@ $^
-
-libblasr.so: $(shared_objects) $(DEP_LIBS)
-	$(CXX_pp) $(LD_SHAREDFLAGS) -o $@ $^
-
-%.o: %.cpp
-	$(CXX) $(CXXOPTS) $(CXXFLAGS) $(LEGACY) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-%.shared.o: %.cpp
-	$(CXX_pp) $(CXX_SHAREDFLAGS) $(CXXOPTS) $(CXXFLAGS) $(LEGACY) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-# .INTERMEDIATE: $(objects)
-
-clean: 
-	@rm -f libblasr.a libblasr.so
-	@rm -f $(objects) $(shared_objects) $(dependencies)
-	@find . -type f -name \*.o -delete
-	@find . -type f -name \*.d -delete
-
--include $(dependencies)
diff --git a/alignment/algorithms/anchoring/MapBySuffixArrayImpl.hpp b/alignment/algorithms/anchoring/MapBySuffixArrayImpl.hpp
index 6cbac02..2405f45 100644
--- a/alignment/algorithms/anchoring/MapBySuffixArrayImpl.hpp
+++ b/alignment/algorithms/anchoring/MapBySuffixArrayImpl.hpp
@@ -31,14 +31,14 @@ int LocateAnchorBoundsInSuffixArray(T_RefSequence &reference,
     // anything shorter than that.
     //
     if (minPrefixMatchLength > 0 and 
-        read.subreadEnd - read.subreadStart < minPrefixMatchLength) {
+        read.SubreadLength() < minPrefixMatchLength) {
         return 0;
     }
 
     DNALength p, m;
     DNALength alignEnd;
-    DNALength matchEnd = read.subreadEnd - minPrefixMatchLength + 1;
-    DNALength numSearchedPositions = matchEnd - read.subreadStart;
+    DNALength matchEnd = read.SubreadEnd() - minPrefixMatchLength + 1;
+    DNALength numSearchedPositions = matchEnd - read.SubreadStart();
 
     matchLength.resize(numSearchedPositions);
     matchLow.resize(numSearchedPositions);
@@ -49,7 +49,7 @@ int LocateAnchorBoundsInSuffixArray(T_RefSequence &reference,
     std::fill(matchHigh.begin(), matchHigh.end(), 0);
     vector<SAIndex> lowMatchBound, highMatchBound;	
 
-    for (m = 0, p = read.subreadStart; p < matchEnd; p++, m++) {
+    for (m = 0, p = read.SubreadStart(); p < matchEnd; p++, m++) {
         DNALength lcpLow, lcpHigh, lcpLength;
         lowMatchBound.clear(); highMatchBound.clear();
         lcpLow = 0;
@@ -212,7 +212,7 @@ int LocateAnchorBoundsInSuffixArray(T_RefSequence &reference,
 template<typename T_SuffixArray, 
          typename T_RefSequence, 
          typename T_Sequence, 
-        typename T_MatchPos>
+         typename T_MatchPos>
 int MapReadToGenome(T_RefSequence &reference,
     T_SuffixArray &sa, T_Sequence &read, 
     unsigned int minPrefixMatchLength,
@@ -222,7 +222,7 @@ int MapReadToGenome(T_RefSequence &reference,
     vector<DNALength> matchLow, matchHigh, matchLength;
 
     int minMatchLen = anchorParameters.minMatchLength;
-    if (read.subreadEnd - read.subreadStart < minMatchLen) {
+    if (read.SubreadLength() < minMatchLen) {
         matchPosList.clear();
         return 0;
     }
@@ -269,15 +269,15 @@ int MapReadToGenome(T_RefSequence &reference,
     // 
     DNALength endOfMapping;
     DNALength trim = MAX(minMatchLen + 1, sa.lookupPrefixLength + 1);
-    if (read.subreadEnd < trim) {
+    if (read.SubreadEnd() < trim) {
         endOfMapping = 0;
     }
     else {
-        endOfMapping = read.subreadEnd - trim;
+        endOfMapping = read.SubreadEnd() - trim;
     }
 
-    for (pos = read.subreadStart; pos < endOfMapping; pos++) {	
-        int matchIndex = pos - read.subreadStart;
+    for (pos = read.SubreadStart(); pos < endOfMapping; pos++) {
+        int matchIndex = pos - read.SubreadStart();
         assert(matchIndex < matchHigh.size());
         if (matchHigh[matchIndex] - matchLow[matchIndex] <= 
             anchorParameters.maxAnchorsPerPosition) {
diff --git a/alignment/algorithms/sorting/DifferenceCovers.cpp b/alignment/algorithms/sorting/DifferenceCovers.cpp
index 09a4be7..33d24a3 100644
--- a/alignment/algorithms/sorting/DifferenceCovers.cpp
+++ b/alignment/algorithms/sorting/DifferenceCovers.cpp
@@ -1,4 +1,5 @@
 #include <cstring>
+#include "utils.hpp"
 #include "DifferenceCovers.hpp"
 
 int InitializeDifferenceCover(int diffCoverSize, UInt &diffCoverLength, UInt *&diffCover) {
@@ -6,7 +7,7 @@ int InitializeDifferenceCover(int diffCoverSize, UInt &diffCoverLength, UInt *&d
 	for (index = 0; index < N_COVERS; index++) {
 		if (diffCovers[index][0] == diffCoverSize) {
 			diffCoverLength = diffCovers[index][1];
-			diffCover = new UInt[diffCoverLength];
+			diffCover = ProtectedNew<UInt>(diffCoverLength);
 			memcpy(diffCover, &diffCovers[index][2], sizeof(UInt)*diffCoverLength);
 			return 1;
 		}
diff --git a/alignment/algorithms/sorting/LightweightSuffixArray.cpp b/alignment/algorithms/sorting/LightweightSuffixArray.cpp
index eb0e607..626d0c1 100644
--- a/alignment/algorithms/sorting/LightweightSuffixArray.cpp
+++ b/alignment/algorithms/sorting/LightweightSuffixArray.cpp
@@ -1,3 +1,4 @@
+#include "utils.hpp"
 #include "LightweightSuffixArray.hpp"
 
 UInt DiffMod(UInt a, UInt b, UInt d) {
@@ -60,7 +61,7 @@ DiffCoverMu::~DiffCoverMu() {
 }
 
 void DiffCoverMu::Initialize(UInt diffCoverP[], UInt diffCoverLengthP, UInt diffCoverSizeP, UInt textSizeP) {
-    diffCoverReverseLookup = new UInt[diffCoverSizeP];
+    diffCoverReverseLookup = ProtectedNew<UInt>(diffCoverSizeP);
     diffCoverLength = diffCoverLengthP;
     textSize        = textSizeP;
     diffCoverSize   = diffCoverSizeP;
@@ -100,7 +101,7 @@ void BuildDiffCoverLookup(UInt diffCover[], UInt diffCoverLength, UInt v, UInt d
 }
 
 void DiffCoverDelta::Initialize(UInt diffCoverP[], UInt diffCoverLengthP, UInt diffCoverSizeP) {
-    diffCoverLookup = new UInt[diffCoverSizeP];
+    diffCoverLookup = ProtectedNew<UInt>(diffCoverSizeP);
     diffCoverSize   = diffCoverSizeP;
     BuildDiffCoverLookup(diffCoverP, diffCoverLengthP, diffCoverSizeP, diffCoverLookup);
 }
@@ -299,11 +300,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i
     // by setting s^\prime[\mu(i)] = l^v(i)
     //
     UInt *lexVNaming;
-    lexVNaming = new UInt[dSetSize+1];
-    if (lexVNaming == NULL) {
-        std::cout << "Could not initialize welterweight order structure." << std::endl;
-        exit(1);
-    }
+    lexVNaming = ProtectedNew<UInt>(dSetSize+1);
     DiffCoverMu mu;
     mu.Initialize(diffCover, diffCoverLength, diffCoverSize, textLength);
     UInt largestLexName;
diff --git a/alignment/algorithms/sorting/MultikeyQuicksort.cpp b/alignment/algorithms/sorting/MultikeyQuicksort.cpp
index 40936ee..1dd776b 100644
--- a/alignment/algorithms/sorting/MultikeyQuicksort.cpp
+++ b/alignment/algorithms/sorting/MultikeyQuicksort.cpp
@@ -117,7 +117,7 @@ void MediankeyBoundedQuicksort(unsigned char text[], UInt index[], UInt length,
                 maxChar = c;
             }
         }
-        freq = new UInt[maxChar+1];
+        freq = ProtectedNew<UInt>(maxChar+1);
         deleteFreq = true;
     }
 
diff --git a/alignment/algorithms/sorting/qsufsort.hpp b/alignment/algorithms/sorting/qsufsort.hpp
index 5525fe4..39ff283 100644
--- a/alignment/algorithms/sorting/qsufsort.hpp
+++ b/alignment/algorithms/sorting/qsufsort.hpp
@@ -1,5 +1,6 @@
 #ifndef _BLASR_QSUFSORT_HPP_
 #define _BLASR_QSUFSORT_HPP_
+#include "utils.hpp"
 #include <assert.h>
 
 void suffixsort(int *x, int *p, int n, int k, int l);
@@ -204,8 +205,8 @@ suffix sorting algorithm.
             assert(pi - p == pi - I);
             //			boundaries[pi-p] = 0;
         }
-        int *buckets = new int[k];
-        T_Index *starts  = new T_Index[k];
+        int *buckets = ProtectedNew<int>(k);
+        T_Index *starts  = ProtectedNew<T_Index>(k);
         /*MC+1*/
         for (i = 0; i < k; i++ ){
             buckets[i] = -1;
diff --git a/alignment/build.mk b/alignment/build.mk
new file mode 120000
index 0000000..2247f36
--- /dev/null
+++ b/alignment/build.mk
@@ -0,0 +1 @@
+makefile
\ No newline at end of file
diff --git a/alignment/bwt/Occ.hpp b/alignment/bwt/Occ.hpp
index 0e2640b..5f02d18 100644
--- a/alignment/bwt/Occ.hpp
+++ b/alignment/bwt/Occ.hpp
@@ -203,7 +203,7 @@ public:
             DNALength bwtSeqLength;
             in.read((char*)&bwtSeqLength, sizeof(bwtSeqLength));
             if (full.matrix) {delete [] full.matrix;}
-            full.matrix = new DNALength[bwtSeqLength *AlphabetSize];
+            full.matrix = ProtectedNew<DNALength>(bwtSeqLength *AlphabetSize);
             full.nRows = bwtSeqLength;
             full.nCols = AlphabetSize;
             in.read((char*)&full.matrix[0], sizeof(DNALength)* bwtSeqLength * AlphabetSize);
diff --git a/alignment/bwt/PackedHash.hpp b/alignment/bwt/PackedHash.hpp
index f1fa540..5b45aef 100644
--- a/alignment/bwt/PackedHash.hpp
+++ b/alignment/bwt/PackedHash.hpp
@@ -71,8 +71,8 @@ public:
         Free();
 
         tableLength = CeilOfFraction(sequenceLength, (DNALength) BinSize);
-        table  = new uint32_t[tableLength];
-        values = new uint64_t[tableLength];
+        table  = ProtectedNew<uint32_t>(tableLength);
+        values = ProtectedNew<uint64_t>(tableLength);
         std::fill(&table[0], &table[tableLength], 0);
         std::fill(&values[0], &values[tableLength], 0);
         hashLengths.resize(tableLength);
@@ -122,7 +122,7 @@ public:
         DNALength v0, v1;
         v0 = ((DNALength)storage);
         v1 = ((DNALength)(storage >> 32));
-        DNALength *storagePtr = new DNALength[3];
+        DNALength *storagePtr = ProtectedNew<DNALength>(3);
         storage = (uint64_t) storagePtr;
 
         //
@@ -149,7 +149,7 @@ public:
          * and inserts the new value into its position that maintains
          * sorted order in the list.
          */
-        DNALength *newListPtr = new DNALength[curStorageLength + 1];
+        DNALength *newListPtr = ProtectedNew<DNALength>(curStorageLength + 1);
         //
         // Copy the values from the old list making space for the new
         // value.
@@ -331,15 +331,15 @@ public:
         Free();
         in.read((char*)&tableLength, sizeof(tableLength));
         if (tableLength > 0) {
-            table  = new uint32_t[tableLength];
-            values = new uint64_t[tableLength];
+            table  = ProtectedNew<uint32_t>(tableLength);
+            values = ProtectedNew<uint64_t>(tableLength);
             in.read((char*)table, sizeof(uint32_t)*tableLength);
             in.read((char*)values, sizeof(uint64_t)*tableLength);
             DNALength tablePos;
             for (tablePos = 0; tablePos < tableLength; tablePos++) {
                 int nSetBits = CountBits(table[tablePos]);
                 if (nSetBits > 2) {
-                    values[tablePos] = (uint64_t) new uint32_t[nSetBits];
+                    values[tablePos] = (uint64_t)(ProtectedNew<uint32_t>(nSetBits));
                     in.read((char*)values[tablePos], nSetBits * sizeof(uint32_t));
                 }
             }
diff --git a/alignment/datastructures/alignment/FilterCriteria.cpp b/alignment/datastructures/alignment/FilterCriteria.cpp
index 16f4bff..8fbb587 100644
--- a/alignment/datastructures/alignment/FilterCriteria.cpp
+++ b/alignment/datastructures/alignment/FilterCriteria.cpp
@@ -18,6 +18,8 @@
 
 #include "FilterCriteria.hpp"
 
+constexpr float Score::errorunit;
+
 Score::Score(const float & value, const ScoreSign & sign)
     : _value(value)
     , _sign(sign)
diff --git a/alignment/files/FragmentCCSIterator.cpp b/alignment/files/FragmentCCSIterator.cpp
index d02876c..ac0b64a 100644
--- a/alignment/files/FragmentCCSIterator.cpp
+++ b/alignment/files/FragmentCCSIterator.cpp
@@ -23,8 +23,7 @@ Initialize(CCSSequence *_seqPtr, RegionTable *_regionTablePtr) {
     //
     // Since this iterator covers all passes, and not just those
     // included in the ccs, the the regions need to be loaded.
-    //
-    CollectSubreadIntervals(*seqPtr, regionTablePtr, subreadIntervals);
+    subreadIntervals = (*regionTablePtr)[seqPtr->HoleNumber()].SubreadIntervals(seqPtr->unrolledRead.length, true);
     if (subreadIntervals.size() == 0) { return;}
 
     readIntervalDirection.resize(subreadIntervals.size());
diff --git a/alignment/files/ReaderAgglomerate.cpp b/alignment/files/ReaderAgglomerate.cpp
index ff04a08..219da10 100644
--- a/alignment/files/ReaderAgglomerate.cpp
+++ b/alignment/files/ReaderAgglomerate.cpp
@@ -15,6 +15,7 @@ void ReaderAgglomerate::InitializeParameters() {
 #ifdef USE_PBBAM
     bamFilePtr = NULL;
     entireFileQueryPtr = NULL;
+    zmwGroupQueryPtr = NULL;
 #endif
 }
 
@@ -156,6 +157,7 @@ bool ReaderAgglomerate::HasRegionTable() {
 
 #define RESET_PBBAM_PTRS() \
     if (bamFilePtr != NULL) {delete bamFilePtr; bamFilePtr = NULL;} \
+    if (zmwGroupQueryPtr != NULL) {delete zmwGroupQueryPtr; zmwGroupQueryPtr = NULL;} \
     if (entireFileQueryPtr != NULL) {delete entireFileQueryPtr; entireFileQueryPtr = NULL;}
 
 #endif
@@ -202,13 +204,18 @@ int ReaderAgglomerate::Initialize() {
             RESET_PBBAM_PTRS();
             try {
                 bamFilePtr = new PacBio::BAM::BamFile(fileName);
+                assert(bamFilePtr != nullptr);
             } catch (std::exception e) {
                 cout << "ERROR! Failed to open " << fileName 
                      << ": " << e.what() << endl;
                 return 0;
             }
             entireFileQueryPtr = new PacBio::BAM::EntireFileQuery(*bamFilePtr);
+            assert(entireFileQueryPtr != nullptr);
             bamIterator = entireFileQueryPtr->begin();
+            zmwGroupQueryPtr = new PacBio::BAM::QNameQuery(*bamFilePtr);
+            assert(zmwGroupQueryPtr != nullptr);
+            zmwGroupIterator = zmwGroupQueryPtr->begin();
             break;
 #endif
         case HDFCCS:
@@ -315,6 +322,32 @@ int ReaderAgglomerate::GetNext(FASTQSequence &seq) {
     return numRecords;
 }
 
+int ReaderAgglomerate::GetNext(vector<SMRTSequence> & reads) {
+    int numRecords = 0;
+    reads.clear();
+
+    if (Subsample(subsample) == 0) {
+        return 0;
+    }
+    if (fileType == PBBAM) {
+#ifdef USE_PBBAM
+        if (zmwGroupIterator != zmwGroupQueryPtr->end()) {
+            const vector<PacBio::BAM::BamRecord> & records = *zmwGroupIterator;
+            numRecords = records.size();
+            reads.resize(numRecords);
+            for (size_t i=0; i < records.size(); i++) {
+                reads[i].Copy(records[i]);
+            }
+            zmwGroupIterator++;
+        }
+#endif
+    } else {
+        UNREACHABLE();
+    }
+    if (numRecords >= 1) readGroupId = reads[0].ReadGroupId();
+    return numRecords;
+}
+
 int ReaderAgglomerate::GetNext(SMRTSequence &seq) {
     int numRecords = 0;
 
@@ -355,14 +388,15 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) {
     // and should be empty, use this->readGroupId instead. Otherwise, 
     // read group id should be loaded from BamRecord to SMRTSequence, 
     // update this->readGroupId accordingly.
-    if (fileType != PBBAM) seq.SetReadGroupId(readGroupId);
-    else readGroupId = seq.GetReadGroupId();
+    if (fileType != PBBAM) seq.ReadGroupId(readGroupId);
+    else readGroupId = seq.ReadGroupId();
 
     if (stride > 1)
         Advance(stride-1);
     return numRecords;
 }
 
+
 int ReaderAgglomerate::GetNextBases(SMRTSequence &seq, bool readQVs) {
     int numRecords = 0;
 
@@ -400,8 +434,8 @@ int ReaderAgglomerate::GetNextBases(SMRTSequence &seq, bool readQVs) {
             break;
     }
 
-    if (fileType != PBBAM) seq.SetReadGroupId(readGroupId);
-    else readGroupId = seq.GetReadGroupId();
+    if (fileType != PBBAM) seq.ReadGroupId(readGroupId);
+    else readGroupId = seq.ReadGroupId();
 
     if (stride > 1)
         Advance(stride-1);
@@ -418,13 +452,11 @@ int ReaderAgglomerate::GetNext(CCSSequence &seq) {
         case Fasta:
             // This just reads in the fasta sequence as if it were a ccs sequence
             numRecords = fastaReader.GetNext(seq);
-            seq.subreadStart = 0;
-            seq.subreadEnd   = 0;
+            seq.SubreadStart(0).SubreadEnd(0);
             break;
         case Fastq:
             numRecords = fastqReader.GetNext(seq);
-            seq.subreadStart = 0;
-            seq.subreadEnd   = 0;
+            seq.SubreadStart(0).SubreadEnd(0);
             break;
         case HDFPulse:
         case HDFBase:
@@ -444,8 +476,8 @@ int ReaderAgglomerate::GetNext(CCSSequence &seq) {
             break;
     }
 
-    if (fileType != PBBAM) seq.SetReadGroupId(readGroupId);
-    else readGroupId = seq.GetReadGroupId();
+    if (fileType != PBBAM) seq.ReadGroupId(readGroupId);
+    else readGroupId = seq.ReadGroupId();
 
     if (stride > 1)
         Advance(stride-1);
diff --git a/alignment/files/ReaderAgglomerate.hpp b/alignment/files/ReaderAgglomerate.hpp
index 0955498..905d432 100644
--- a/alignment/files/ReaderAgglomerate.hpp
+++ b/alignment/files/ReaderAgglomerate.hpp
@@ -19,6 +19,7 @@
 #ifdef USE_PBBAM
 #include "pbbam/BamFile.h"
 #include "pbbam/EntireFileQuery.h"
+#include "pbbam/QNameQuery.h"
 #include "pbbam/BamRecord.h"
 #endif
 
@@ -105,6 +106,7 @@ public:
   int GetNext(FASTQSequence &seq);
   int GetNext(SMRTSequence &seq);
   int GetNext(CCSSequence &seq);
+  int GetNext(vector<SMRTSequence> & reads);
 
   template<typename T_Sequence>
       int GetNext(T_Sequence & seq, int & randNum);
@@ -121,6 +123,8 @@ public:
   PacBio::BAM::BamFile * bamFilePtr;
   PacBio::BAM::EntireFileQuery * entireFileQueryPtr;
   PacBio::BAM::EntireFileQuery::iterator bamIterator;
+  PacBio::BAM::QNameQuery * zmwGroupQueryPtr;
+  PacBio::BAM::QNameQuery::iterator zmwGroupIterator;
 #endif
 };
 
diff --git a/alignment/format/BAMPrinter.hpp b/alignment/format/BAMPrinter.hpp
index 50f6b79..b1a53f0 100644
--- a/alignment/format/BAMPrinter.hpp
+++ b/alignment/format/BAMPrinter.hpp
@@ -8,20 +8,32 @@
 #include "pbbam/BamHeader.h"
 #include "pbbam/BamWriter.h"
 
+namespace BAMOutput {
+
+template<typename T_Sequence>
+void SetAlignedSequence(T_AlignmentCandidate &alignment, T_Sequence &read,
+        T_Sequence &alignedSeq);
+
+template<typename T_Sequence>
+void CreateCIGARString(T_AlignmentCandidate &alignment,
+        T_Sequence &read, std::string &cigarString, const bool cigarUseSeqMatch);
+
 template<typename T_Sequence>
 void AlignmentToBamRecord(T_AlignmentCandidate & alignment, 
         T_Sequence & read, PacBio::BAM::BamRecord & bamRecord, 
         AlignmentContext & context, SupplementalQVList & qvList,
-        Clipping clipping);
+        Clipping clipping, bool cigarUseSeqMatch);
+
 
-namespace BAMOutput {
 
 template<typename T_Sequence>
 void PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read,
         PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context, 
-        SupplementalQVList & qvList, Clipping clipping);
+        SupplementalQVList & qvList, Clipping clipping, 
+        bool cigarUseSeqMatch=false);
 }
 
+
 #include "BAMPrinterImpl.hpp"
 
 #endif
diff --git a/alignment/format/BAMPrinterImpl.hpp b/alignment/format/BAMPrinterImpl.hpp
index e023a4d..2377911 100644
--- a/alignment/format/BAMPrinterImpl.hpp
+++ b/alignment/format/BAMPrinterImpl.hpp
@@ -11,10 +11,53 @@ using namespace std;
 #include "pbbam/BamFile.h"
 
 template<typename T_Sequence>
-void AlignmentToBamRecord(T_AlignmentCandidate & alignment, 
+void BAMOutput::CreateCIGARString(T_AlignmentCandidate &alignment,
+        T_Sequence &read, std::string &cigarString, const bool cigarUseSeqMatch)
+{
+    cigarString = "";
+    // All cigarString use the no clipping core
+    std::vector<int> opSize;
+    std::vector<char> opChar;
+
+    SAMOutput::CreateNoClippingCigarOps(alignment, opSize, opChar, cigarUseSeqMatch);
+
+    // Clipping needs to be added
+    DNALength prefixSoftClip = alignment.QAlignStart() - read.SubreadStart();
+    DNALength suffixSoftClip = read.SubreadEnd() - alignment.QAlignEnd();
+
+    if (alignment.tStrand == 1) {
+        std::swap(prefixSoftClip, suffixSoftClip);
+    }
+    if (prefixSoftClip > 0) {
+        opSize.insert(opSize.begin(), prefixSoftClip);
+        opChar.insert(opChar.begin(), 'S');
+    }
+    if (suffixSoftClip > 0) {
+        opSize.push_back(suffixSoftClip);
+        opChar.push_back('S');
+    }
+    SAMOutput::CigarOpsToString(opSize, opChar, cigarString);
+}
+
+template<typename T_Sequence>
+void BAMOutput::SetAlignedSequence(T_AlignmentCandidate &alignment, T_Sequence &read,
+        T_Sequence &alignedSeq) {
+    if (alignment.tStrand == 0) {
+        alignedSeq.ReferenceSubstring(read);
+    }
+    else {
+        T_Sequence subSeq;
+        subSeq.ReferenceSubstring(read);
+        subSeq.MakeRC(alignedSeq);
+    }
+}
+
+template<typename T_Sequence>
+void BAMOutput::AlignmentToBamRecord(T_AlignmentCandidate & alignment,
         T_Sequence & read, PacBio::BAM::BamRecord & bamRecord,
         AlignmentContext & context, SupplementalQVList & qvList,
-        Clipping clipping) {
+        Clipping clipping, bool cigarUseSeqMatch) {
+    // soft clipping and subread clipping are identical for BAM
     assert(clipping == SAMOutput::soft or clipping == SAMOutput::subread);
 
     // Build from scratch if input reads are not from pbbam files.
@@ -32,14 +75,11 @@ void AlignmentToBamRecord(T_AlignmentCandidate & alignment,
 
     // build cigar string.
     string cigarString;
-    T_Sequence alignedSequence;
-    DNALength prefixSoftClip = 0, suffixSoftClip = 0;
-    DNALength prefixHardClip = 0, suffixHardClip = 0;
-    CreateCIGARString(alignment, read, cigarString, clipping,
-                      prefixSoftClip, suffixSoftClip, 
-                      prefixHardClip, suffixHardClip);
-    SetAlignedSequence(alignment, read, alignedSequence, clipping);
+    BAMOutput::CreateCIGARString(alignment, read, cigarString, cigarUseSeqMatch);
     PacBio::BAM::Cigar cigar = PacBio::BAM::Cigar::FromStdString(cigarString);
+
+    T_Sequence alignedSequence;
+    BAMOutput::SetAlignedSequence(alignment, read, alignedSequence);
  
     // build flag
     uint16_t flag;
@@ -86,8 +126,8 @@ void AlignmentToBamRecord(T_AlignmentCandidate & alignment,
         PacBio::BAM::TagCollection tags;
         tags["RG"] = context.readGroupId;
         if (dynamic_cast<CCSSequence*>(&read) == NULL) { // subread
-            tags["qs"] = read.subreadStart;
-            tags["qe"] = read.subreadEnd;
+            tags["qs"] = read.SubreadStart();
+            tags["qe"] = read.SubreadEnd();
             /// Number of passes for a subread should always be 1.
             tags["np"] = 1;
         } else { // ccs read
@@ -150,16 +190,15 @@ void AlignmentToBamRecord(T_AlignmentCandidate & alignment,
 
     // Set Flag 
     bamRecord.Impl().Flag(static_cast<uint32_t>(flag));
-
 }
 
 template<typename T_Sequence>
 void BAMOutput::PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read,
         PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context, 
-        SupplementalQVList & qvList, Clipping clipping) {
+        SupplementalQVList & qvList, Clipping clipping, bool cigarUseSeqMatch) {
 
     PacBio::BAM::BamRecord bamRecord;
-    AlignmentToBamRecord(alignment, read, bamRecord, context, qvList, clipping);
+    BAMOutput::AlignmentToBamRecord(alignment, read, bamRecord, context, qvList, clipping, cigarUseSeqMatch);
     bamWriter.Write(bamRecord);
 }
 #endif
diff --git a/alignment/format/SAMHeaderPrinter.cpp b/alignment/format/SAMHeaderPrinter.cpp
index 6bad4a5..c8fc103 100644
--- a/alignment/format/SAMHeaderPrinter.cpp
+++ b/alignment/format/SAMHeaderPrinter.cpp
@@ -1,14 +1,15 @@
+#include <assert.h>
 #include "format/SAMHeaderPrinter.hpp"
 
 const std::string SAMVERSION("1.5");
-const std::string PBBAMVERSION("3.0b5");
+const std::string PBBAMVERSION("3.0.1");
 const std::string PACBIOPL("PACBIO");
 
 std::vector<SAMHeaderItem> MakeSAMHeaderItems(const std::string & fromString){
     std::vector<SAMHeaderItem> items;
 
     std::vector<std::string> vs;
-    Tokenize(fromString, ";", vs);
+    Splice(fromString, ";", vs);
     std::vector<std::string>::iterator it;
     for (it = vs.begin(); it != vs.end(); it++) {
         items.push_back(SAMHeaderItem(*it));
@@ -33,15 +34,18 @@ std::string SAMHeaderItem::ToString() {
 
 // SAMHeaderTag
 SAMHeaderTag::SAMHeaderTag(const std::string & fromString) {
-    std::vector<std::string> vs;
-    Tokenize(fromString, ":", vs);
-    if (vs.size() == 2) {
-        _tagName = vs[0];
-        if (vs[1].find("=") != std::string::npos) {
-            AddItems(vs[1]);
+    size_t pos = fromString.find(":");
+    if (pos != string::npos) {
+        _tagName = fromString.substr(0, pos);
+        string tagValue = fromString.substr(pos + 1);
+        if (tagValue.find("=") != std::string::npos) {
+            AddItems(tagValue);
         } else {
-            _tagValue = vs[1];
+            _tagValue = tagValue;
         }
+    } else {
+        cout << "Unable to parse SAM/BAM header" << fromString << endl;
+        exit(1);
     }
 }
 
@@ -86,7 +90,7 @@ void SAMHeaderTag::AddItems(const std::string & fromString) {
 SAMHeaderGroup::SAMHeaderGroup(const std::string & fromString) {
     if (fromString == "" || fromString[0] != '@') return;
     std::vector<std::string> vs;
-    Tokenize(fromString.substr(1), "\t", vs);
+    Splice(fromString.substr(1), "\t", vs);
     if (vs.size() >= 1) {
         std::vector<std::string>::iterator it = vs.begin();
         _groupName = (*it);
@@ -306,6 +310,7 @@ SAMHeaderRGs SAMHeaderPrinter::MakeRGs(const std::vector<std::string> & readsFil
 
     if (fileType != PBBAM) {
         ReaderAgglomerate * reader = new ReaderAgglomerate();
+        assert(reader != nullptr);
         std::vector<std::string>::const_iterator rfit;
         for(rfit = readsFiles.begin(); rfit != readsFiles.end(); rfit++) {
             std::string rf(*rfit);
diff --git a/alignment/format/SAMPrinter.cpp b/alignment/format/SAMPrinter.cpp
index 9ff7520..c44c40a 100644
--- a/alignment/format/SAMPrinter.cpp
+++ b/alignment/format/SAMPrinter.cpp
@@ -77,9 +77,10 @@ void SAMOutput::AddGaps(T_AlignmentCandidate &alignment, int gapIndex,
     }
 }
 
-void SAMOutput::AddMatchBlockCigarOps(DNASequence & qSeq, DNASequence & tSeq, blasr::Block & b,
+void SAMOutput::AddMatchBlockCigarOps(DNASequence & qSeq, DNASequence & tSeq, 
+        blasr::Block & b, DNALength & qSeqPos, DNALength & tSeqPos,
         std::vector<int> & opSize, std::vector<char> & opChar) {
-    DNALength qPos = b.qPos, tPos = b.tPos, n = 0;
+    DNALength qPos = qSeqPos + b.qPos, tPos = tSeqPos + b.tPos, n = 0;
     bool started = false, prevSeqMatch = false;
     for(DNALength i = 0; i < b.length; i++) {
         bool curSeqMatch = (qSeq[qPos + i] == tSeq[tPos + i]);
@@ -137,6 +138,7 @@ void SAMOutput::CreateNoClippingCigarOps(T_AlignmentCandidate &alignment,
                     AddMatchBlockCigarOps(alignment.qAlignedSeq, 
                                           alignment.tAlignedSeq, 
                                           alignment.blocks[b], 
+                                          alignment.qPos, alignment.tPos,
                                           opSize, opChar);
                 } else {
                     opSize.push_back(matchLength);
@@ -158,6 +160,7 @@ void SAMOutput::CreateNoClippingCigarOps(T_AlignmentCandidate &alignment,
                 AddMatchBlockCigarOps(alignment.qAlignedSeq, 
                                       alignment.tAlignedSeq, 
                                       alignment.blocks[b], 
+                                      alignment.qPos, alignment.tPos,
                                       opSize, opChar);
             } else {
                 opSize.push_back(matchLength);
diff --git a/alignment/format/SAMPrinter.hpp b/alignment/format/SAMPrinter.hpp
index 2628690..c47f8ec 100644
--- a/alignment/format/SAMPrinter.hpp
+++ b/alignment/format/SAMPrinter.hpp
@@ -41,6 +41,7 @@ void AddGaps(T_AlignmentCandidate &alignment, int gapIndex,
 
 // Add sequence match/mismatch CIGAR string Ops for block b.
 void AddMatchBlockCigarOps(DNASequence & qSeq, DNASequence & tSeq, blasr::Block & b,
+        DNALength & qSeqPos, DNALength & tSeqPos,
         std::vector<int> & opSize, std::vector<char> & opChar);
 
 // If cigarUseSeqMatch is true, cigar string uses '=' and 'X' 
diff --git a/alignment/format/SAMPrinterImpl.hpp b/alignment/format/SAMPrinterImpl.hpp
index f83d1a9..ec20b49 100644
--- a/alignment/format/SAMPrinterImpl.hpp
+++ b/alignment/format/SAMPrinterImpl.hpp
@@ -27,8 +27,8 @@ void SAMOutput::SetAlignedSequence(T_AlignmentCandidate &alignment, T_Sequence &
         clippedStartPos = read.lowQualityPrefix;
     }
     else if (clipping == subread) {
-        clippedReadLength = read.subreadEnd - read.subreadStart;
-        clippedStartPos = read.subreadStart;
+        clippedReadLength = read.SubreadLength();
+        clippedStartPos = read.SubreadStart();
     }
     else {
         std::cout <<" ERROR! The clipping must be none, hard, subread, or soft when setting the aligned sequence." << std::endl;
@@ -130,8 +130,8 @@ void SAMOutput::CreateCIGARString(T_AlignmentCandidate &alignment,
           suffixHardClip = read.lowQualitySuffix;
       }
       else if (clipping == subread) {
-          prefixHardClip = std::max((DNALength) read.subreadStart, read.lowQualityPrefix);
-          suffixHardClip = std::max((DNALength)(read.length - read.subreadEnd), read.lowQualitySuffix);
+          prefixHardClip = std::max((DNALength) read.SubreadStart(), read.lowQualityPrefix);
+          suffixHardClip = std::max((DNALength)(read.length - read.SubreadEnd()), read.lowQualitySuffix);
       }
 
       SetSoftClip(alignment, read, prefixHardClip, suffixHardClip, prefixSoftClip, suffixSoftClip);
@@ -280,9 +280,9 @@ void SAMOutput::PrintAlignment(T_AlignmentCandidate &alignment,
         assert(read.length - suffixHardClip == prefixHardClip + alignedSequence.length);
         samFile << "XE:i:" << xe + 1 << "\t";
     }
-    samFile << "YS:i:" << read.subreadStart << "\t";
-    samFile << "YE:i:" << read.subreadEnd << "\t";
-    samFile << "ZM:i:" << read.zmwData.holeNumber << "\t";
+    samFile << "YS:i:" << read.SubreadStart() << "\t";
+    samFile << "YE:i:" << read.SubreadEnd() << "\t";
+    samFile << "ZM:i:" << read.HoleNumber() << "\t";
     samFile << "XL:i:" << alignment.qAlignedSeq.length << "\t";
     samFile << "XT:i:1\t"; // reads are allways continuous reads, not
                         // referenced based circular consensus when
diff --git a/alignment/makefile b/alignment/makefile
new file mode 100644
index 0000000..61295eb
--- /dev/null
+++ b/alignment/makefile
@@ -0,0 +1,47 @@
+all:
+
+THISDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+-include ${CURDIR}/defines.mk
+include ${THISDIR}/../rules.mk
+
+CXXOPTS  := -std=c++11 -pedantic -Wno-long-long
+INCLUDES += ${THISDIR} ${LIBPBDATA_INC} ${LIBPBIHDF_INC} ${HDF5_INC} ${PBBAM_INC} ${HTSLIB_INC} ${BOOST_INC}
+LIBS     += ${LIBPBDATA_LIB} ${LIBPBIHDF_LIB} ${HDF5_LIB} ${PBBAM_LIB} ${HTSLIB_LIB} ${ZLIB_LIB}
+LDFLAGS  += $(patsubst %,-L%,${LIBS})
+LDLIBS += -lpbdata
+
+ifeq (${nohdf},)
+  LDLIBS+= -lpbihdf
+  #LDFLAGS+= -flat_namespace # so we do not need LDLIBS+= -lhdf5 -lhdf5_cpp
+endif
+# We might also need some -l* for pbbam, etc.
+
+all: libblasr.a libblasr${SH_LIB_EXT}
+
+paths := . simulator format files utils tuples statistics qvs suffixarray \
+	datastructures/alignment datastructures/alignmentset datastructures/anchoring datastructures/tuplelists \
+	algorithms/alignment algorithms/alignment/sdp algorithms/anchoring algorithms/compare algorithms/sorting
+paths := $(patsubst %,${THISDIR}%,${paths})
+sources := $(shell find ${THISDIR} -name '*.cpp')
+
+ifdef nohdf
+sources := $(filter-out ${THISDIR}files/% ${THISDIR}utils/FileOfFileNames.cpp ${THISDIR}format/SAMHeaderPrinter.cpp, $(sources))
+endif
+
+sources := $(notdir ${sources})
+objects := $(sources:.cpp=.o)
+shared_objects := $(sources:.cpp=.shared.o)
+dependencies := $(objects:.o=.d) $(shared_objects:.o=.d)
+
+vpath %.cpp ${paths}
+
+libblasr.a: $(objects)
+	$(AR) $(ARFLAGS) $@ $^
+
+libblasr${SH_LIB_EXT}: $(shared_objects)
+
+clean: 
+	rm -f libblasr.a libblasr.so *.o *.d
+
+-include $(dependencies)
+depend: $(dependencies:.d=.depend)
diff --git a/alignment/simple.mk b/alignment/simple.mk
deleted file mode 100644
index 253d7a2..0000000
--- a/alignment/simple.mk
+++ /dev/null
@@ -1,82 +0,0 @@
-# Requirements:
-#   pbbam
-#   htslib
-#   hdf5
-#   boost
-# Plus relative packages:
-#   pbdata
-#   hdf
-PREFIX?=/usr
-include ../simple.mk
-
-LIBPBDATA_INCLUDE := ../pbdata
-LIBPBIHDF_INCLUDE := ../hdf
-#PBBAM_INCLUDE := $(PBBAM)/include
-#HTSLIB_INCLUDE := $(PBBAM)/third-party/htslib
-
-INCLUDES = -I${PREFIX}/include \
-           -I$(LIBPBDATA_INCLUDE) \
-           -I$(LIBPBIHDF_INCLUDE) \
-	   -I.
-
-#ifneq ($(ZLIB_ROOT), notfound)
-#	INCLUDES += -I$(ZLIB_ROOT)/include
-#endif
-
-#ifeq ($(origin nopbbam), undefined)
-#    INCLUDES += -I$(PBBAM_INCLUDE) -I$(HTSLIB_INCLUDE) -I$(BOOST_INCLUDE)
-#endif
-
-CXXOPTS := -std=c++11 -pedantic -Wno-long-long -MMD -MP
-
-sources := $(wildcard algorithms/alignment/*.cpp) \
-		   $(wildcard algorithms/alignment/sdp/*.cpp) \
-		   $(wildcard algorithms/anchoring/*.cpp) \
-		   $(wildcard algorithms/compare/*.cpp) \
-		   $(wildcard algorithms/sorting/*.cpp) \
-		   $(wildcard datastructures/alignment/*.cpp) \
-		   $(wildcard datastructures/alignmentset/*.cpp) \
-		   $(wildcard datastructures/anchoring/*.cpp) \
-		   $(wildcard datastructures/tuplelists/*.cpp) \
-		   $(wildcard suffixarray/*.cpp) \
-		   $(wildcard qvs/*.cpp) \
-		   $(wildcard statistics/*.cpp) \
-		   $(wildcard tuples/*.cpp) \
-		   $(wildcard utils/*.cpp) \
-		   $(wildcard files/*.cpp) \
-		   $(wildcard format/*.cpp) \
-		   $(wildcard simulator/*.cpp) \
-		   $(wildcard *.cpp)
-
-ifdef nohdf
-sources := $(filter-out files/% utils/FileOfFileNames.cpp format/SAMHeaderPrinter.cpp, $(sources))
-endif
-
-objects := $(sources:.cpp=.o)
-dependencies := $(sources:.cpp=.d)
-
-all : CXXFLAGS ?= -O3
-
-debug : CXXFLAGS ?= -g -ggdb -fno-inline
-
-profile : CXXFLAGS ?= -Os -pg
-
-g: CXXFLAGS = -g -ggdb -fno-inline -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fno-omit-frame-pointer
-
-all debug profile g: libblasr.a
-
-libblasr.a: $(objects)
-	$(AR_pp) $(ARFLAGS) $@ $^
-
-%.o: %.cpp
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) $(LEGACY) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-# .INTERMEDIATE: $(objects)
-
-clean:
-	@rm -f libblasr.a
-	@find . -type f -name \*.o -delete
-	@find . -type f -name \*.d -delete
-
-
--include $(dependencies)
diff --git a/alignment/simulator/ContextOutputList.hpp b/alignment/simulator/ContextOutputList.hpp
index 5023463..870ac64 100644
--- a/alignment/simulator/ContextOutputList.hpp
+++ b/alignment/simulator/ContextOutputList.hpp
@@ -68,7 +68,7 @@ public:
             int e;
             i = 0;
             if (outputMap.find(context) == outputMap.end()) {
-                outputMap[context] = new OutputList;
+                outputMap[context] = ProtectedNew<OutputList>();
             }
 
             while(i < outputsLine.size()) {
diff --git a/alignment/simulator/ContextSet.cpp b/alignment/simulator/ContextSet.cpp
index 1d84d72..7f08785 100644
--- a/alignment/simulator/ContextSet.cpp
+++ b/alignment/simulator/ContextSet.cpp
@@ -21,13 +21,13 @@ void ContextSampleMap::Read(std::ifstream &in) {
     int numContext;
     in.read((char*)&numContext, sizeof(numContext));
     int i;
-    char *context = new char[contextLength+1];
+    char *context = ProtectedNew<char>(contextLength+1);
     context[contextLength] = '\0';
     for (i = 0; i < numContext; i++) {
         in.read(context, contextLength);
         std::string contextString = context;
         // Allocate the context
-        (*this)[contextString] = new ContextSample;
+        (*this)[contextString] = ProtectedNew<ContextSample>();
         (*this)[contextString]->Read(in);
     }
     delete[] context;
diff --git a/alignment/simulator/OutputSampleListSet.hpp b/alignment/simulator/OutputSampleListSet.hpp
index dbfe8d0..0a36ae2 100644
--- a/alignment/simulator/OutputSampleListSet.hpp
+++ b/alignment/simulator/OutputSampleListSet.hpp
@@ -4,6 +4,7 @@
 #include <map>
 #include <string>
 #include <iostream>
+#include "utils.hpp"
 #include "OutputSampleList.hpp"
 
 
@@ -67,7 +68,7 @@ class OutputSampleListSet {
     in.read((char*) &keyLength, sizeof(int));
 
     if (keyLength == 0 or setSize == 0) { return; }
-    char *key = new char[keyLength+1];
+    char *key = ProtectedNew<char>(keyLength+1);
     key[keyLength] = '\0';
     int i;
     for (i = 0; i < setSize; i++) {
diff --git a/alignment/suffixarray/LCPTable.hpp b/alignment/suffixarray/LCPTable.hpp
index 85037f2..99f594e 100644
--- a/alignment/suffixarray/LCPTable.hpp
+++ b/alignment/suffixarray/LCPTable.hpp
@@ -3,6 +3,7 @@
 
 #include <map>
 #include <fstream>
+#include "utils.hpp"
 
 template <typename T>
 class LCPTable {
@@ -38,8 +39,8 @@ class LCPTable {
     void Init(T* data, unsigned int pTableLength, unsigned int *index) {
         tableLength = pTableLength;
         maxPrefixLength = (PrefixLength) (SignedPrefixLength(-1));
-        llcp = new PrefixLength[tableLength];
-        rlcp = new PrefixLength[tableLength];
+        llcp = ProtectedNew<PrefixLength>(tableLength);
+        rlcp = ProtectedNew<PrefixLength>(tableLength);
         std::fill(llcp, llcp + tableLength, 0);
         std::fill(rlcp, rlcp + tableLength, 0);
         FillTable(data, index);
@@ -94,14 +95,14 @@ class LCPTable {
 
 
     ~LCPTable() {
-        /*
-           if (llcp != NULL) 
-           delete[] llcp;
-           llcp = NULL;
-           if (rlcp != NULL)
-           delete[] rlcp;
-           rlcp = NULL;
-           */
+        if (llcp != NULL) {
+            delete[] llcp;
+            llcp = NULL;
+        }
+        if (rlcp != NULL) {
+            delete[] rlcp;
+            rlcp = NULL;
+        }
         // the two maps automatically go away.
     }
 
diff --git a/alignment/suffixarray/SuffixArray.hpp b/alignment/suffixarray/SuffixArray.hpp
index 907bf1b..b816d5e 100644
--- a/alignment/suffixarray/SuffixArray.hpp
+++ b/alignment/suffixarray/SuffixArray.hpp
@@ -309,7 +309,6 @@ public:
         std::fill(count.begin(), count.end(), 0);
         assert(index == NULL or not deleteStructures);
         index = ProtectedNew<SAIndex>(targetLength);
-        //index = new SAIndex[targetLength];
         deleteStructures = true;
         for (a = 0; a < alphabet.size(); a++ ) {
             bucket[a] = -1;
diff --git a/alignment/suffixarray/ssort.hpp b/alignment/suffixarray/ssort.hpp
index 0a6f9b8..7b451bc 100644
--- a/alignment/suffixarray/ssort.hpp
+++ b/alignment/suffixarray/ssort.hpp
@@ -234,6 +234,7 @@ Bad input
 */
 
 #include <stdlib.h>
+#include "utils.hpp"
 
 enum {
 	ORIG = ~(~0u>>1),			/* sign bit */
@@ -262,8 +263,7 @@ ssort(SAIndex a[], SAIndex s[])
             j = a[n];		/* and max element */
     if(a[n++]<0 || j>=n)
         finish(2);
-    //	p = malloc(n*sizeof(int));
-    p = new SAIndex[n];
+    p = ProtectedNew<SAIndex>(n);
     if(p == 0)
         finish(1);
 
@@ -272,7 +272,7 @@ ssort(SAIndex a[], SAIndex s[])
 
     if(s) {					/* shared lengths */
         //		q = malloc(n*sizeof(int));
-        q = new SAIndex[n];
+        q = ProtectedNew<SAIndex>(n);
         if(q == 0)
             finish(1);
     }
diff --git a/alignment/tuples/TupleCountTableImpl.hpp b/alignment/tuples/TupleCountTableImpl.hpp
index 805dc66..a2d9e0e 100644
--- a/alignment/tuples/TupleCountTableImpl.hpp
+++ b/alignment/tuples/TupleCountTableImpl.hpp
@@ -1,5 +1,6 @@
 #ifndef _BLASR_TUPLE_COUNT_TABLE_IMPL_HPP_
 #define _BLASR_TUPLE_COUNT_TABLE_IMPL_HPP_
+#include "utils.hpp"
 
 using namespace std;
 
@@ -15,7 +16,7 @@ void TupleCountTable<TSequence, TTuple>::InitCountTable(TupleMetrics &ptm) {
     countTableLength = countTableLength << ((tm.tupleSize - 1)*2);
 
     assert(countTableLength > 0);
-    countTable = new int[countTableLength];
+    countTable = ProtectedNew<int>(countTableLength);
     deleteStructures = true;
     fill(&countTable[0], &countTable[countTableLength], 0);
     nTuples = 0;
@@ -93,7 +94,7 @@ void TupleCountTable<TSequence, TTuple>::Read(ifstream &in) {
     in.read((char*) &nTuples, sizeof(int));
     in.read((char*) &tm.tupleSize, sizeof(int));
     tm.InitializeMask();
-    countTable = new int[countTableLength];
+    countTable = ProtectedNew<int>(countTableLength);
     deleteStructures = true;
     in.read((char*) countTable, sizeof(int) * countTableLength);
 }
diff --git a/alignment/tuples/TupleListImpl.hpp b/alignment/tuples/TupleListImpl.hpp
index fdbf91f..54bfde7 100644
--- a/alignment/tuples/TupleListImpl.hpp
+++ b/alignment/tuples/TupleListImpl.hpp
@@ -43,7 +43,6 @@ int TupleList<T>::InitFromFile(std::string &fileName) {
     listIn.read((char*) &listLength, sizeof(int));
     listIn.read((char*) &tm.tupleSize, sizeof(int));
     tm.InitializeMask();
-    //list = new T[listLength];
     tupleList.resize(listLength);
     listIn.read((char*) &tupleList[0], sizeof(T) * listLength);
     return 1;
diff --git a/alignment/utils/RegionUtils.cpp b/alignment/utils/RegionUtils.cpp
index 0448f2e..305c362 100644
--- a/alignment/utils/RegionUtils.cpp
+++ b/alignment/utils/RegionUtils.cpp
@@ -4,109 +4,18 @@
 bool LookupHQRegion(int holeNumber, RegionTable &regionTable, 
     int &start, int &end, int &score) {
 
-	int regionLowIndex, regionHighIndex;
-	regionLowIndex = regionHighIndex = 0;
-
-	regionTable.LookupRegionsByHoleNumber(holeNumber,
-        regionLowIndex, regionHighIndex);
-
-	bool readHasGoodRegion = true;
-	int  regionIndex = regionLowIndex;
-	while (regionIndex < regionHighIndex and 
-		   regionTable.GetType(regionIndex) != HQRegion) {
-		regionIndex++;
-	}
-	
-	if (regionIndex == regionHighIndex) {
-    start = end = score = 0;
-		return false;
-	}
-	else {
-		start = regionTable.GetStart(regionIndex);
-		end   = regionTable.GetEnd(regionIndex);
-        score = regionTable.GetScore(regionIndex);
-		return true;
-	}
-}
-
-// Functions of class CompareRegionIndicesByStart.
-int CompareRegionIndicesByStart::
-operator()(const int a, const int b) const {
-    if (regionTablePtr->GetStart(a) == regionTablePtr->GetStart(b)) {
-        return (regionTablePtr->GetEnd(a) < regionTablePtr->GetEnd(b));
-    }
-    else {
-        return (regionTablePtr->GetStart(a) < regionTablePtr->GetStart(b));
-    }
-}
-
-// General functions.
-int SortRegionIndicesByStart(RegionTable &regionTable, 
-    std::vector<int> &indices) {
-
-    CompareRegionIndicesByStart cmpFctr;
-    cmpFctr.regionTablePtr = ®ionTable;
-    std::sort(indices.begin(), indices.end(), cmpFctr);
-    return indices.size();
-}
-
-
-// Functions of OrderRegionsByReadStart:
-int OrderRegionsByReadStart::
-operator()(const ReadInterval &lhs, const ReadInterval &rhs) const {
-    return lhs.start < rhs.start;
-}
-
-
-// General functions.
-int FindRegionIndices(unsigned int holeNumber, RegionTable *regionTablePtr,
-    int &regionLowIndex, int &regionHighIndex) {
-
-    int regionIndex;						 
-    regionLowIndex = regionHighIndex = 0;
-
-    regionTablePtr->LookupRegionsByHoleNumber(holeNumber, 
-        regionLowIndex, regionHighIndex);  
-
-    return regionHighIndex - regionLowIndex;
-}
-
-
-int FindRegionIndices(SMRTSequence &read, RegionTable *regionTablePtr, 
-    int &regionLowIndex, int &regionHighIndex) {
-    return FindRegionIndices(read.zmwData.holeNumber, 
-        regionTablePtr, regionLowIndex, regionHighIndex);
-}
-
-
-//
-// Collect region indices for either all region types, or just a few specific region types.
-//
-//
-int CollectRegionIndices(SMRTSequence &read, RegionTable &regionTable, 
-    std::vector<int> &regionIndices, RegionType *regionTypes,
-    int numRegionTypes) {
-
-    int regionLow, regionHigh;
-    int prevNumRegionIndices = regionIndices.size();
-    if (FindRegionIndices(read, &regionTable, regionLow, regionHigh)) {
-        int i;
-        for (i = regionLow; i < regionHigh; i++) {
-            if (regionTypes == NULL) {
-                regionIndices.push_back(i);
-            }
-            else {
-                int t;
-                for (t = 0; t < numRegionTypes; t++) {
-                    if (regionTable.GetType(i) == regionTypes[t]) {
-                        regionIndices.push_back(i);
-                        break;
-                    }
-                }
-            }
+    if (regionTable.HasHoleNumber(holeNumber)) {
+        RegionAnnotations zmwRegions = regionTable[holeNumber];
+        if (zmwRegions.HasHQRegion()) {
+            start = zmwRegions.HQStart();
+            end   = zmwRegions.HQEnd();
+            score = zmwRegions.HQScore();
+            return true;
         }
     }
-    return regionIndices.size() - prevNumRegionIndices;
+
+    start = end = score = 0;
+    return false;
 }
 
 
diff --git a/alignment/utils/RegionUtils.hpp b/alignment/utils/RegionUtils.hpp
index f5fb870..e5a56d5 100644
--- a/alignment/utils/RegionUtils.hpp
+++ b/alignment/utils/RegionUtils.hpp
@@ -21,54 +21,6 @@ bool GetReadTrimCoordinates(T_Sequence &fastaRead,
 	ZMWGroupEntry &zmwData,	RegionTable &regionTable,
 	DNALength &readStart ,DNALength &readEnd, int &score);
 
-template<typename T_Sequence>
-bool TrimRead(T_Sequence &fastaRead, ZMWGroupEntry &zmwData,
-    RegionTable &regionTable, T_Sequence &trimmedRead);
-
-
-class CompareRegionIndicesByStart {
-public:
-	RegionTable *regionTablePtr;
-	int operator()(const int a, const int b) const;
-};
-
-		
-int SortRegionIndicesByStart(RegionTable &regionTable, 
-    std::vector<int> &indices);
-
-class OrderRegionsByReadStart {
-public:
-    int operator()(const ReadInterval &lhs, const ReadInterval &rhs) const;
-};
-
-int FindRegionIndices(unsigned int holeNumber, RegionTable *regionTablePtr,
-    int &regionLowIndex, int &regionHighIndex);
-
-int FindRegionIndices(SMRTSequence &read, RegionTable *regionTablePtr, 
-    int &regionLowIndex, int &regionHighIndex);
-
-//
-// Collect region indices for either all region types, or just a few 
-// specific region types.
-//
-int CollectRegionIndices(SMRTSequence &read, RegionTable &regionTable, 
-    std::vector<int> &regionIndices, RegionType *regionTypes=NULL,
-    int numRegionTypes = 0);
-
-
-template<typename T_Sequence>
-void CollectSubreadIntervals(T_Sequence &read, RegionTable *regionTablePtr, 
-    std::vector<ReadInterval> &subreadIntervals, bool byAdapter=false);
-
-// Get all adapter intervals of a ZMW.
-// Input:
-//   read - read.zmwData.holeNumber specifies the zmw.
-//   regionTablePtr - a pointer to a region table.
-// Output:
-//   adapterIntervals - where to assign all adapter intervals of the zmw
-template<typename T_Sequence>
-void CollectAdapterIntervals(T_Sequence &read, RegionTable *regionTablePtr,
-    std::vector<ReadInterval> &adapterIntervals);
 
 // Given a vecotr of ReadInterval objects and their corresponding 
 // directions, intersect each object with an interval 
diff --git a/alignment/utils/RegionUtilsImpl.hpp b/alignment/utils/RegionUtilsImpl.hpp
index fc4b859..3ccfbb2 100644
--- a/alignment/utils/RegionUtilsImpl.hpp
+++ b/alignment/utils/RegionUtilsImpl.hpp
@@ -2,214 +2,51 @@
 #define _BLASR_REGION_UTILS_IMPL_HPP 
 
 
+//FIXME: move all functions to class SMRTSequence
 template<typename T_Sequence>
-bool MaskRead(T_Sequence &fastaRead, ZMWGroupEntry &zmwData,
-    RegionTable &regionTable) {
-    int regionIndex;						 
-    int regionLowIndex, regionHighIndex;
-    regionLowIndex = regionHighIndex = 0;
-
-    regionTable.LookupRegionsByHoleNumber(zmwData.holeNumber, 
-        regionLowIndex, regionHighIndex);
-
-    bool readHasGoodRegion = true;
-
-    DNALength readPos;
-
-    regionIndex = regionLowIndex;
-    int lastHQRegionIndex;
-
-    int hqRegionStart=0, hqRegionEnd=0, hqRegionScore = 0;
-    readHasGoodRegion = LookupHQRegion(zmwData.holeNumber, 
-        regionTable, hqRegionStart, hqRegionEnd, hqRegionScore);
-
-    //
-    // Mask off the low quality portion of this read.
-    //
-    for (readPos = 0; (readPos < hqRegionStart and
-         readPos < fastaRead.length); readPos++) {
-        fastaRead.seq[readPos] = 'N';
-    }
-
-    for (readPos = hqRegionEnd; readPos < fastaRead.length; readPos++) {
-        fastaRead.seq[readPos] = 'N';
-    }
-
-    //
-    // Look to see if there is region information provided, but the
-    // entire read is bad.
-    //
-    if (hqRegionEnd == hqRegionStart) {
-        //
-        // This read is entirely bad, flag that.
-        //
-        readHasGoodRegion = false;
+bool MaskRead(T_Sequence &fastaRead,
+              ZMWGroupEntry &zmwData,
+              RegionTable &regionTable)
+{
+    if (not regionTable.HasHoleNumber(zmwData.holeNumber)) {
+        return false;
+    } else {
+        RegionAnnotations regions = regionTable[zmwData.holeNumber];
+
+        // Mask off the low quality portion of this read.
+        DNALength readPos;
+        for (readPos = 0; readPos < std::min(regions.HQStart(), fastaRead.length); readPos++) {
+            fastaRead.seq[readPos] = 'N';
+        }
+        for (readPos = regions.HQEnd(); readPos < fastaRead.length; readPos++) {
+            fastaRead.seq[readPos] = 'N';
+        }
+        return regions.HasHQRegion();
     }
-
-    return readHasGoodRegion;
 }
 
-
+/// \params[in]  - fastaRead, zmwData, regionTable
+/// \params[out] - readStart
+/// \params[out] - readEnd
+/// \params[out] - score
+/// \returns Whether or not read coordinate trimmed according to HQRegion
 template<typename T_Sequence>
 bool GetReadTrimCoordinates(T_Sequence &fastaRead,
 	ZMWGroupEntry &zmwData,	RegionTable &regionTable,
 	DNALength &readStart ,DNALength &readEnd, int &score) {
 
-	int regionIndex;						 
-	int regionLowIndex, regionHighIndex;
-	regionLowIndex = regionHighIndex = 0;
-
-	regionTable.LookupRegionsByHoleNumber(zmwData.holeNumber,
-        regionLowIndex, regionHighIndex);
-
-	bool readHasGoodRegion = true;
-
-	DNALength readPos;
-
-	regionIndex = regionLowIndex;
-	int lastHQRegionIndex;
-	
-	while (regionIndex < regionHighIndex and 
-		   regionTable.GetType(regionIndex) != HQRegion) {
-	    regionIndex++;
-	}
-	
-	if (regionIndex < regionHighIndex ) {
-		readStart = regionTable.GetStart(regionIndex);
-		readEnd   = regionTable.GetEnd(regionIndex);
-        score     = regionTable.GetScore(regionIndex);
-		return true;
-	}
-	else {
-		readStart = 0;
-		readEnd   = fastaRead.length;
-		return false;
-	}
-}
-
-
-template<typename T_Sequence>
-bool TrimRead(T_Sequence &fastaRead, ZMWGroupEntry &zmwData,
-    RegionTable &regionTable, T_Sequence &trimmedRead) {
-
-	DNALength readStart, readEnd;
-
-	GetReadTrimCoordinates(fastaRead, zmwData, regionTable,
-        readStart, readEnd);
-
-	if (readEnd - readStart > 0) {
-		trimmedRead.CopySubsequence((FASTQSequence&)fastaRead, 
-			readStart, readEnd);
-		// signal that the read has a good region.
-		return true;
-	}
-	else {
-		//
-		// There is no information for this read. Make it skipped.
-		//
-		trimmedRead.seq = NULL;
-		trimmedRead.CopyTitle(fastaRead.title);
-		// signal this read has no good region.
-		return false;
-	}
-}
-
-template<typename T_Sequence>
-void CollectSubreadIntervals(T_Sequence &read, RegionTable *regionTablePtr, 
-    std::vector<ReadInterval> &subreadIntervals, bool byAdapter) {
-
-	int regionIndex;						 
-	int regionLowIndex, regionHighIndex;
-	regionLowIndex = regionHighIndex = 0;
-
-	regionTablePtr->LookupRegionsByHoleNumber(read.zmwData.holeNumber,
-        regionLowIndex, regionHighIndex);
-
-	if (byAdapter == false) { 
-        // read subreads (insert) directly from region table. 
-		for (regionIndex = regionLowIndex; 
-             regionIndex < regionHighIndex; regionIndex++) {
-			if (regionTablePtr->GetType(regionIndex) ==  Insert) {
-
-                RegionAnnotation & ra = regionTablePtr->table[regionIndex];
-				subreadIntervals.push_back(ReadInterval(
-                    ra.row[RegionAnnotation::RegionStart],
-				    ra.row[RegionAnnotation::RegionEnd],
-                    ra.row[RegionAnnotation::RegionScore]));
-			}
-		}
-	}
-	else { // Determine subreads according to adapters only.
-        std::vector<int> adapterIntervalIndices;
-		for (regionIndex = regionLowIndex; 
-             regionIndex < regionHighIndex; regionIndex++) {
-			if (regionTablePtr->GetType(regionIndex) == Adapter) {
-				adapterIntervalIndices.push_back(regionIndex);
-			}
-		}
-		// Sort indices so that the intervals appear in order on the read.
-		SortRegionIndicesByStart(*regionTablePtr, adapterIntervalIndices);
-		int curIntervalStart = 0;
-		int i;
-		if (adapterIntervalIndices.size() == 0) {
-            // no adapter, this zmw has only one subread (pass)
-			subreadIntervals.push_back(ReadInterval(0, read.length));
-		}
-		else {
-            // The first subread covers [0, RegionStart of first adapter)
-			subreadIntervals.push_back(ReadInterval(0, 
-                regionTablePtr->table[adapterIntervalIndices[0]].
-                    row[RegionAnnotation::RegionStart]));
-
-            // The subread[i] covers (RegionEnd of i-1-th adapter, RegionStart of i-th adapter)
-			for (i = 0; i + 1 < adapterIntervalIndices.size(); i++) {
-				subreadIntervals.push_back(ReadInterval(
-                    regionTablePtr->table[adapterIntervalIndices[i]].
-                        row[RegionAnnotation::RegionEnd],
-				    regionTablePtr->table[adapterIntervalIndices[i+1]].
-                        row[RegionAnnotation::RegionStart]));
-			}
-            // The last subread covers (RegionEnd of last adapter, end of read)
-			subreadIntervals.push_back(
-                ReadInterval(regionTablePtr->table[
-                    adapterIntervalIndices[adapterIntervalIndices.size()-1]].
-                        row[RegionAnnotation::RegionEnd],
-				read.length));
-		}
-	}
-	sort(subreadIntervals.begin(), subreadIntervals.end(), 
-        OrderRegionsByReadStart());
-}
-
-
-// Get all adapter intervals of a ZMW.
-// Input:
-//   read - read.zmwData.holeNumber specifies the zmw.
-//   regionTablePtr - a pointer to a region table.
-// Output:
-//   adapterIntervals - where to assign all adapter intervals of the zmw
-template<typename T_Sequence>
-void CollectAdapterIntervals(T_Sequence &read, 
-    RegionTable *regionTablePtr, 
-    std::vector<ReadInterval> &adapterIntervals) {
-
-    assert(regionTablePtr != NULL);
-    int regionLowIndex = 0, regionHighIndex = 0;
-
-    regionTablePtr->LookupRegionsByHoleNumber(read.zmwData.holeNumber,
-        regionLowIndex, regionHighIndex);
-
-    for (int regionIndex = regionLowIndex; 
-         regionIndex < regionHighIndex; regionIndex++) {
-
-        if (regionTablePtr->GetType(regionIndex) ==  Adapter) {
-            RegionAnnotation & ra = regionTablePtr->table[regionIndex];
-            adapterIntervals.push_back(ReadInterval(
-                ra.row[RegionAnnotation::RegionStart],
-                ra.row[RegionAnnotation::RegionEnd],
-                ra.row[RegionAnnotation::RegionScore]));
+    if (regionTable.HasHoleNumber(zmwData.holeNumber)) {
+        RegionAnnotations regions = regionTable[zmwData.holeNumber];
+        if (regions.HasHQRegion()) {
+            readStart = regions.HQStart();
+            readEnd   = regions.HQEnd();
+            return true;
         }
     }
+
+    readStart = 0;
+    readEnd = fastaRead.length;
+    return false;
 }
 
 #endif
diff --git a/common.mk b/common.mk
deleted file mode 100644
index 8108011..0000000
--- a/common.mk
+++ /dev/null
@@ -1,84 +0,0 @@
-SHELL          = bash
-G_BUILDOS_CMD := bash -c 'set -e; set -o pipefail; id=$$(lsb_release -si | tr "[:upper:]" "[:lower:]"); rel=$$(lsb_release -sr); case $$id in ubuntu) printf "$$id-%04d\n" $${rel/./};; centos) echo "$$id-$${rel%%.*}";; *) echo "$$id-$$rel";; esac' 2>/dev/null
-OS_STRING     ?= $(shell $(G_BUILDOS_CMD))
-
-ifeq ($(origin PREBUILT), undefined)
-PREBUILT := $(shell cd ../../../../prebuilt.out 2>/dev/null && pwd || echo -n notfound)
-endif
-
-THIRD_PARTY_PREFIX ?= ../..
-
-ifneq ($(COMMON_NO_THIRD_PARTY_REQD),true)
-    #
-    # Definitions common to all make files for library code.
-    # All paths are relative from inside the subdirectories, not this file
-    #
-
-    # git layout vs p4 layout automagic
-    THIRD_PARTY ?= $(shell cd $(abspath $(THIRD_PARTY_PREFIX)/third-party) 2>/dev/null && pwd || echo -n notfound)
-    ifeq ($(THIRD_PARTY), notfound)
-	THIRD_PARTY := $(shell cd $(abspath $(THIRD_PARTY_PREFIX)/../third-party/cpp) 2>/dev/null && pwd || echo -n notfound)
-    endif
-
-    # handle HDF5_INC HDF5_LIB
-    ifeq ($(origin HDF5_INC), undefined)
-	HDF5_INC := $(shell cd $(PREBUILT)/hdf5/hdf5-1.8.12/$(OS_STRING)/include 2>/dev/null && pwd || echo -n notfound)
-    else
-	HDF5_INC := $(shell cd $(HDF5_INC) 2>/dev/null && pwd || echo -n notfound)
-    endif
-    ifeq ($(HDF5_INC), notfound)
-	ifeq ($(THIRD_PARTY), notfound)
-                $(error cannot find third-party libraries!)
-	endif
-	HDF5_INC := $(shell cd $(THIRD_PARTY)/hdf5/include 2>/dev/null && pwd || echo -n notfound)
-    endif
-    ifeq ($(origin HDF5_LIB), undefined)
-	HDF5_LIB := $(shell cd $(PREBUILT)/hdf5/hdf5-1.8.12/$(OS_STRING)/lib 2>/dev/null && pwd || echo -n notfound)
-    else
-	HDF5_LIB := $(shell cd $(HDF5_LIB) 2>/dev/null && pwd || echo -n notfound)
-    endif
-    ifeq ($(HDF5_LIB), notfound)
-	ifeq ($(THIRD_PARTY), notfound)
-                $(error cannot find third-party libraries!)
-	endif
-	HDF5_LIB := $(shell cd $(THIRD_PARTY)/hdf5/lib 2>/dev/null && pwd || echo -n notfound)
-    endif
-
-    # handle ZLIB_ROOT
-    ifeq ($(origin ZLIB_ROOT), undefined)
-	ZLIB_ROOT := $(shell cd $(PREBUILT)/zlib/zlib-1.2.5/$(OS_STRING) 2>/dev/null && pwd || echo -n notfound)
-    else
-	ZLIB_ROOT := $(shell cd $(ZLIB_ROOT) 2>/dev/null && pwd || echo -n notfound)
-    endif
-
-    ifeq ($(ZLIB_ROOT), notfound)
-	# we don't need a backup ZLIB_ROOT here, because almost everybody has one in their paths
-    endif
-endif
-
-# handle BOOST
-ifeq ($(origin BOOST_INCLUDE), undefined)
-ifeq ($(origin BOOST_ROOT), undefined)
-BOOST_INCLUDE := $(PREBUILT)/boost/boost_1_55_0
-else
-BOOST_INCLUDE := $(BOOST_ROOT)
-endif
-endif
-
-# handle PBBAM
-ifeq ($(origin PBBAM), undefined)
-PBBAM := $(shell cd $(THIRD_PARTY_PREFIX)/../staging/PostPrimary/pbbam 2>/dev/null && pwd || echo -n notfound)
-endif
-
-# magic for non-verbose builds
-V ?= 0
-
-CXX_0 = @echo "  CXX	$@"; $(CXX)
-CXX_1 = $(CXX)
-CXX_pp = $(CXX_$(V))
-
-AR_0 = @echo "  AR	$@"; $(AR)
-AR_1 = $(AR)
-AR_pp = $(AR_$(V))
-
-ARFLAGS := rc
diff --git a/configure.py b/configure.py
new file mode 100755
index 0000000..137160d
--- /dev/null
+++ b/configure.py
@@ -0,0 +1,339 @@
+#!/usr/bin/env python
+"""Configure the build.
+
+- Fetch HDF5 headers.
+- Create libconfig.h
+- Create defines.mk
+
+This is not used by './unittest/'.
+"""
+import commands
+import contextlib
+import os
+import sys
+
+thisdir = os.path.dirname(os.path.abspath(__file__))
+
+def log(msg):
+    sys.stderr.write(msg)
+    sys.stderr.write('\n')
+
+def shell(cmd):
+    log(cmd)
+    status, output = commands.getstatusoutput(cmd)
+    if status:
+        raise Exception('%d <- %r' %(status, cmd))
+    return output
+
+def update_content(fn, content):
+    direc = os.path.abspath(os.path.dirname(fn))
+    if not os.path.isdir(direc):
+        shell('mkdir -p %s' %direc)
+    current_content = open(fn).read() if os.path.exists(fn) else None
+    if content != current_content:
+        log('writing to %r' %fn)
+        log('"""\n' + content + '"""')
+        open(fn, 'w').write(content)
+
+def compose_libconfig(pbbam=False):
+    if pbbam:
+        content = """
+#define USE_PBBAM
+"""
+    else:
+        content = """
+"""
+    return content
+
+def compose_defines_with_hdf(HDF5_INC, HDF5_LIB):
+    """We have to use := for HDF5_LIB b/c blasr
+    is using it to mean the directory, not the file,
+    and it's in the environment.
+    """
+    return """
+HDF5_INC:=%(HDF5_INC)s
+HDF5_LIB:=%(HDF5_LIB)s
+#CPPFLAGS+= -I../pbdata -I../hdf -I../alignment
+LIBPBDATA_INC     ?=../pbdata
+LIBPBIHDF_INC     ?=../hdf
+LIBBLASR_INC      ?=../alignment
+LIBPBDATA_LIB     ?=../pbdata
+LIBPBIHDF_LIB     ?=../hdf
+LIBBLASR_LIB      ?=../alignment
+"""%(dict(
+    thisdir=thisdir,
+    HDF5_INC=HDF5_INC,
+    HDF5_LIB=HDF5_LIB))
+
+
+def compose_defines_with_hdf_headers(HDF_HEADERS):
+    return """
+HDF_HEADERS:=%(HDF_HEADERS)s
+#HDF5_INC  ?=${HDF_HEADERS}/src
+CPPFLAGS+= -I${HDF_HEADERS}/src -I${HDF_HEADERS}/c++/src
+CPPFLAGS+= -I../pbdata -I../hdf -I../alignment
+LIBPBDATA_LIB     ?=../pbdata/
+LIBPBIHDF_LIB     ?=../hdf/
+LIBBLASR_LIB      ?=../alignment/
+"""%(dict(thisdir=thisdir, HDF_HEADERS=HDF_HEADERS))
+
+def compose_defines():
+    """
+    Note that our local 'hdf' subdir will not even build
+    in this case.
+    """
+    return """
+LIBPBDATA_INC ?=../pbdata
+LIBPBIHDF_INC ?=../hdf
+LIBBLASR_INC  ?=../alignment
+LIBPBDATA_LIB ?=%(thisdir)s/pbdata/
+LIBPBIHDF_LIB ?=%(thisdir)s/hdf/
+LIBBLASR_LIB  ?=%(thisdir)s/alignment/
+nohdf         ?=1
+"""%(dict(thisdir=thisdir))
+
+def get_OS_STRING():
+    G_BUILDOS_CMD = """bash -c 'set -e; set -o pipefail; id=$(lsb_release -si | tr "[:upper:]" "[:lower:]"); rel=$(lsb_release -sr); case $id in ubuntu) printf "$id-%04d\n" ${rel/./};; centos) echo "$id-${rel%%.*}";; *) echo "$id-$rel";; esac' 2>/dev/null"""
+    return shell(G_BUILDOS_CMD)
+
+def get_PBBAM(env, prefix):
+    """
+    key = 'PBBAM'
+    if key in env:
+        return env[key]
+    cmd = 'cd $(THIRD_PARTY_PREFIX)/../staging/PostPrimary/pbbam 2>/dev/null && pwd || echo -n notfound' %(
+            THIRD_PARTY_PREFIX=prefix)
+    return shell(cmd)
+    """
+def get_HTSLIB(env, prefix):
+    """
+    key = 'HTSLIB'
+    if key in env:
+        return env[key]
+    cmd = 'cd $(THIRD_PARTY_PREFIX)/../staging/PostPrimary/htslib 2>/dev/null && pwd || echo -n notfound' %(
+            THIRD_PARTY_PREFIX=prefix)
+    return shell(cmd)
+    """
+def ifenvf(env, key, func):
+    if key in env:
+        return env[key]
+    else:
+        return func()
+def setifenvf(envout, envin, key, func):
+    envout[key] = ifenvf(envin, key, func)
+def setifenv(envout, envin, key, val):
+    envout[key] = envin.get(key, val)
+def setenv(envout, key, val):
+    envout[key] = val
+def update_env_if(envout, envin, keys):
+    for key in keys:
+        if key in envin:
+            envout[key] = envin[key]
+def compose_defs_env(env):
+    # We disallow env overrides for anything with a default from GNU make.
+    nons = ['CXX', 'CC', 'AR'] # 'SHELL'?
+    ovr    = ['%-20s ?= %s' %(k, v) for k,v in sorted(env.items()) if k not in nons]
+    nonovr = ['%-20s := %s' %(k, v) for k,v in sorted(env.items()) if k in nons]
+    return '\n'.join(ovr + nonovr + [''])
+def append_common(envin, content):
+    """Dumb way to do this, but this whole thing is evolving.
+    """
+    # This is the original libconfig.h. However, in case somebody (like
+    # pbdagcon) builds libpbdata in-place, we need to drop a copy of
+    # libconfig.h wherever pbdata is actually built, which we will not
+    # know until later. This can all be cleared up later, when we are
+    # more clear about where things are built.
+    libconfig_h = os.path.abspath(os.path.join(os.getcwd(), 'libconfig.h'))
+    content += """
+LIBCONFIG_H:=%s
+# Use PREFIX dir, if available.
+INCLUDES      += ${PREFIX_INC}
+LIBS          += ${PREFIX_LIB}
+"""%libconfig_h
+    env = dict(envin)
+    # Some extra defs.
+    if 'PREFIX' in envin:
+        PREFIX = envin['PREFIX']
+        setenv(env, 'PREFIX_INC', os.path.join(PREFIX, 'include'))
+        setenv(env, 'PREFIX_LIB', os.path.join(PREFIX, 'lib'))
+    poss = [
+        'CXXFLAGS',
+        'SH_LIB_EXT',
+        'EXTRA_LDFLAGS',
+        'PREFIX_LIB', 'PREFIX_INC',
+    ]
+    vals = ['%-20s := %s' %(k, v) for k,v in sorted(env.items()) if k in poss]
+    return '\n'.join([''] + vals + ['']) + content
+def compose_defines_pacbio(envin):
+    """
+    This is used by mobs via buildcntl.sh.
+    """
+    env = dict()
+    setenv(env, 'SHELL', 'bash')
+    setifenvf(env, envin, 'OS_STRING', get_OS_STRING)
+    setifenv(env, envin, 'LIBPBDATA_INC', '../pbdata')
+    setifenv(env, envin, 'LIBPBIHDF_INC', '../hdf')
+    setifenv(env, envin, 'LIBBLASR_INC', '../alignment')
+    setifenv(env, envin, 'LIBPBDATA_LIB', '../pbdata/')
+    setifenv(env, envin, 'LIBPBIHDF_LIB', '../hdf/')
+    setifenv(env, envin, 'LIBBLASR_LIB', '../alignment/')
+    if 'nohdf' in envin:
+        env['nohdf'] = envin['nohdf']
+        # Otherwise, do not define it at all. TODO(CD): Remove nohdf, as it is not used.
+    nondefaults = set([
+            'CXX', 'AR',
+            'HDF5_INC', 'HDF5_LIB',
+            'PBBAM_INC', 'PBBAM_LIB',
+            'HTSLIB_INC', 'HTSLIB_LIB',
+            'BOOST_INC',
+            'ZLIB_LIB',
+            'GCC_LIB',
+            'GTEST_INC', 'GTEST_SRCDIR',
+    ])
+    update_env_if(env, envin, nondefaults)
+    return compose_defs_env(env)
+
+ at contextlib.contextmanager
+def cd(nwd):
+    cwd = os.getcwd()
+    log('cd %r -> %r' %(cwd, nwd))
+    os.chdir(nwd)
+    yield
+    os.chdir(cwd)
+    log('cd %r <- %r' %(cwd, nwd))
+
+def fetch_hdf5_headers():
+    """Fetch into ./hdf/HEADERS directory.
+    This should not be used when an external build-dir is needed.
+    Return actual directory path, relative to subdirs.
+    """
+    version = 'hdf5-1.8.12-headers'
+    version_dn = os.path.join(thisdir, 'hdf', version)
+    if not os.path.isdir(version_dn):
+        with cd(os.path.dirname(version_dn)):
+            cmd = 'curl -k -L https://www.dropbox.com/s/8971bcyy5o42rxb/hdf5-1.8.12-headers.tar.bz2\?dl\=0 | tar xjf -'
+            shell(cmd)
+    return version_dn # Relative path might help caching.
+
+def update(content_defines_mk, content_libconfig_h):
+    """ Write these relative to the same directory as *this* file.
+
+    Unfortunately, we need to record the exact path of libconfig.h
+    in defines.mk, so we know how to copy it.
+    """
+    fn_libconfig_h = os.path.join('.', 'libconfig.h')
+    update_content(fn_libconfig_h, content_libconfig_h)
+    #content_defines_mk += 'LIBCONFIG_H:=%s\n' %os.path.abspath(fn_libconfig_h)
+    fn_defines_mk = 'defines.mk'
+    update_content(fn_defines_mk, content_defines_mk)
+    if thisdir == os.path.abspath('.'):
+        # This was run in the root directory, so symlink defines.mk
+        # in sub-dirs, which now include defines.mk from CURDIR
+        # in order to facilitate building in external output directories.
+        for sub in ('pbdata', 'hdf', 'alignment', 'unittest'):
+            lname = os.path.join(sub, 'defines.mk')
+            if not os.path.lexists(lname):
+                os.symlink(os.path.join('..', 'defines.mk'), lname)
+
+def configure_nopbbam(envin):
+    """Use HDF5 from env-vars.
+    This is the path used by blasr in a GitHub build, for now.
+    """
+    HDF5_INC = envin.get('HDF5_INC')
+    if not HDF5_INC:
+        HDF5_INC = envin['HDF5_INCLUDE']
+    HDF5_LIB = envin['HDF5_LIB']
+    content1 = compose_defines_with_hdf(HDF5_INC, HDF5_LIB)
+    content1 = append_common(envin, content1)
+    content2 = compose_libconfig(pbbam=False)
+    update(content1, content2)
+
+def configure_nopbbam_skip_hdf(envin):
+    """Fetch HDF5 headers.
+    We lack HDF5 libs, so we cannot build our hdf/ subdir.
+    But the others are fine.
+    """
+    HDF_HEADERS = fetch_hdf5_headers()
+    content1 = compose_defines_with_hdf_headers(HDF_HEADERS)
+    content1 = append_common(envin, content1)
+    content2 = compose_libconfig(pbbam=False)
+    update(content1, content2)
+
+def configure_nopbbam_nohdf5(envin):
+    content1 = compose_defines()
+    content1 = append_common(envin, content1)
+    content2 = compose_libconfig(pbbam=False)
+    update(content1, content2)
+
+def configure_pacbio(envin):
+    content1 = compose_defines_pacbio(envin)
+    content1 = append_common(envin, content1)
+    content2 = compose_libconfig(pbbam=True)
+    update(content1, content2)
+
+def get_make_style_env(envin, args):
+    envout = dict()
+    for arg in args:
+        if '=' in arg:
+            k, v = arg.split('=')
+            envout[k] = v
+    envout.update(envin)
+    return envout
+
+class OsType:
+    Unknown, Linux, Darwin = range(3)
+
+def getOsType():
+    uname = shell('uname -s')
+    log('uname=%r' %uname)
+    if 'Darwin' in uname:
+        return OsType.Darwin
+    elif 'Linux' in uname:
+        return OsType.Linux
+    else:
+        return OsType.Unknown
+
+def update_env_for_linux(env):
+    env['SET_LIB_NAME'] = '-soname'
+    env['SH_LIB_EXT'] = '.so'
+def update_env_for_darwin(env):
+    env['SET_LIB_NAME'] = '-install_name'
+    env['SH_LIB_EXT'] = '.dylib'
+    env['EXTRA_LDFLAGS'] = '-flat_namespace'
+    # -flat_namespace makes BSD ld act like Linux ld, finding
+    # shared libs recursively.
+def update_env_for_unknown(env):
+    env['SET_LIB_NAME'] = '-soname'
+    env['SH_LIB_EXT'] = '.so'
+update_env_for_os = {
+    OsType.Linux: update_env_for_linux,
+    OsType.Darwin: update_env_for_darwin,
+    OsType.Unknown: update_env_for_unknown,
+}
+
+def main(prog, *args):
+    """Include shell environ plus KEY=VAL pairs in args.
+    """
+    ost = getOsType()
+    envin = get_make_style_env(os.environ, args)
+    update_env_for_os[ost](envin)
+    if 'NOPBBAM' in envin:
+        if 'NOHDF' in envin:
+            configure_nopbbam_nohdf5(envin)
+        else:
+            if 'HDF5_LIB' in envin:
+                if 'HDF5_INCLUDE' in envin:
+                    if 'HDF5_INC' not in envin:
+                        envin['HDF5_INC'] = envin['HDF5_INCLUDE']
+                    else:
+                        print("WARNING: Found both HDF5_INC and HDF5_INCLUDE in environ!")
+                assert 'HDF5_INC' in envin, 'Hey! You have HDF5_LIB but not HDF5_INC!'
+                configure_nopbbam(envin)
+            else:
+                configure_nopbbam_skip_hdf(envin)
+    else:
+        configure_pacbio(envin)
+
+
+if __name__=="__main__":
+    main(*sys.argv)
diff --git a/hdf/BufferedHDF2DArrayImpl.hpp b/hdf/BufferedHDF2DArrayImpl.hpp
index 49336ef..bddd762 100644
--- a/hdf/BufferedHDF2DArrayImpl.hpp
+++ b/hdf/BufferedHDF2DArrayImpl.hpp
@@ -3,6 +3,7 @@
 
 #include <cstring>
 #include <cassert>
+#include "utils.hpp"
 
 template<typename T>
 BufferedHDF2DArray<T>::BufferedHDF2DArray(H5::CommonFG *_container, 
@@ -104,7 +105,7 @@ int BufferedHDF2DArray<T>::Initialize(HDFGroup &group, std::string datasetName,
             if (dimSize) {
                 delete [] dimSize;
             }
-            dimSize = new hsize_t[nDims];
+            dimSize = ProtectedNew<hsize_t>(nDims);
             dataspace.getSimpleExtentDims(dimSize);
             rowLength = dimSize[0];
             colLength = dimSize[1];
@@ -125,8 +126,6 @@ int BufferedHDF2DArray<T>::Initialize(HDFGroup &group, std::string datasetName,
 
 template<typename T>
 int BufferedHDF2DArray<T>::size() {
-    // Why assert nDims == 1 for 2D Array?
-    assert(nDims == 1);
     dataspace.getSimpleExtentDims(dimSize);
     return dimSize[0];
 }
@@ -185,7 +184,7 @@ void BufferedHDF2DArray<T>::Create(H5::CommonFG *_container, string _datasetName
             assert(this->writeBuffer != NULL);
             delete[] this->writeBuffer;
         }
-        this->writeBuffer = new T[rowLength];
+        this->writeBuffer = ProtectedNew<T>(rowLength);
         this->bufferSize = rowLength;
     }
 
@@ -282,7 +281,6 @@ void BufferedHDF2DArray<T>::Flush(int destRow) {
     //
     // A default writeRow of -1 implies append
     //
-    int numRowsToCreate; // FIXME(yli): why is numRowsToCreate assigned but not used?
     int numDataRows;
     //
     // this->bufferIndex points after the end of the last data in the
@@ -291,12 +289,6 @@ void BufferedHDF2DArray<T>::Flush(int destRow) {
     //
     numDataRows = this->bufferIndex / rowLength;
 
-    if (destRow < 0) {
-        numRowsToCreate = this->bufferIndex / rowLength;  
-    }
-    else {
-        numRowsToCreate = this->bufferIndex / rowLength + destRow;
-    }
     if (numDataRows > 0) {
         assert(fileDataSpaceInitialized);
 
@@ -340,7 +332,6 @@ void BufferedHDF2DArray<T>::Flush(int destRow) {
         // Store the newly dimensioned dataspaces.
         //
         fileSpace.getSimpleExtentDims(fileArraySize, fileArrayMaxSize);			
-        //int extendedSize = extendedSpace.getSimpleExtentNpoints(); // FIXME(yli): should this be used??
         //
         // Configure the proper addressing to append to the array.
         //
diff --git a/hdf/BufferedHDFArrayImpl.hpp b/hdf/BufferedHDFArrayImpl.hpp
index 9f890ca..41b4486 100644
--- a/hdf/BufferedHDFArrayImpl.hpp
+++ b/hdf/BufferedHDFArrayImpl.hpp
@@ -4,6 +4,7 @@
 #include <cstdlib>
 #include <iostream>
 #include <cstring>
+#include "utils.hpp"
 #include "BufferedHDFArray.hpp"
 
 template<typename T>
@@ -311,7 +312,7 @@ int BufferedHDFArray<T>::UpdateH5Dataspace() {
             delete [] dimSize;
             dimSize = NULL;
         }
-        dimSize = new hsize_t[nDims];
+        dimSize = ProtectedNew<hsize_t>(nDims);
 
         dataspace.getSimpleExtentDims(dimSize);
         arrayLength = dimSize[0];
diff --git a/hdf/HDFAtom.cpp b/hdf/HDFAtom.cpp
index ef23749..a70804b 100644
--- a/hdf/HDFAtom.cpp
+++ b/hdf/HDFAtom.cpp
@@ -1,17 +1,16 @@
 #include "HDFAtom.hpp"
 
 template<>
-void HDFAtom<std::string>::Create(H5::H5Location &object, std::string atomName) {
+void HDFAtom<std::string>::Create(H5::H5Location &object, const std::string & atomName) {
     H5::StrType strType(0, H5T_VARIABLE);
     hsize_t defaultDims[] = {1};
     H5::DataSpace defaultDataSpace(1, defaultDims);
     attribute = object.createAttribute(atomName.c_str(), strType, H5::DataSpace(H5S_SCALAR));
-    initialized= true;
 }
 
 
 #define DEFINE_TYPED_CREATE_ATOM(T, predtype) template<> \
-	void HDFAtom<T>::TypedCreate(H5::H5Location &object, std::string &atomName, H5::DataSpace &defaultDataSpace) {				\
+	void HDFAtom<T>::TypedCreate(H5::H5Location &object, const std::string &atomName, H5::DataSpace &defaultDataSpace) {				\
   attribute = object.createAttribute(atomName.c_str(), (predtype), defaultDataSpace );	\
 }
 
@@ -84,7 +83,6 @@ void HDFAtom<std::string>::Read(std::string &value) {
 	else {
 		hsize_t stsize = attribute.getStorageSize();
 		value.resize(stsize);
-		//		char *valueStr = new char[stsize+1];
 		attribute.read(stringType, &value[0]);
 		if (stsize > 0 and value[stsize-1] == '\0') {
 			value.resize(stsize-1);
@@ -131,7 +129,6 @@ void HDFAtom<std::vector<std::string> >::Read(std::vector<std::string> &values)
 	hsize_t nPoints;
 	nPoints = attributeSpace.getSelectNpoints();
 	H5::DataType attrType = attribute.getDataType(); // necessary for attr.read()
-
 	// Declare and initialize std::vector of pointers to std::string attribute list.
 	std::vector<char*> ptrsToHDFControlledMemory;
 	ptrsToHDFControlledMemory.resize(nPoints);
@@ -141,7 +138,7 @@ void HDFAtom<std::vector<std::string> >::Read(std::vector<std::string> &values)
 	unsigned int i;
 	for (i = 0; i < ptrsToHDFControlledMemory.size(); i++ ){
 		values.push_back(ptrsToHDFControlledMemory[i]);
-		free(ptrsToHDFControlledMemory[i]);
+        free(ptrsToHDFControlledMemory[i]);
 	}
 }
 
diff --git a/hdf/HDFAtom.hpp b/hdf/HDFAtom.hpp
index f8934df..5b921a9 100644
--- a/hdf/HDFAtom.hpp
+++ b/hdf/HDFAtom.hpp
@@ -16,12 +16,11 @@ class HDFAtom : public HDFData {
 public:
     H5::Attribute attribute;
 
-    bool initialized;
     HDFAtom() {
-        initialized = false;
+        isInitialized = false;
     }
     ~HDFAtom() {
-        if (initialized) {
+        if (IsInitialized()) {
             attribute.close();
         }
     }
@@ -30,21 +29,21 @@ public:
         return NULL;
     }
 
-    int Initialize(H5::H5Location &object, std::string attributeName, bool createIfMissing=false) {
+    int Initialize(H5::H5Location &object, const std::string & attributeName, bool createIfMissing=false) {
         attribute = object.openAttribute(attributeName.c_str());
-        initialized = true;
+        isInitialized = true;
         return 1;
     }
 
-    int Initialize(HDFGroup &group, std::string attributeName, bool createIfMissing=false) {
+    int Initialize(HDFGroup &group, const std::string & attributeName, bool createIfMissing=false) {
         return Initialize(group.group, attributeName);
     }
 
-    int Initialize(HDFData &data, std::string attributeName, bool createIfMissing=false) {
+    int Initialize(HDFData &data, const std::string & attributeName, bool createIfMissing=false) {
         return Initialize(data.dataset, attributeName);
     }
 
-    int Initialize(H5::Group &object, std::string attributeName, bool createIfMissing=false) {
+    int Initialize(H5::Group &object, const std::string & attributeName, bool createIfMissing=false) {
         try {
             attribute = object.openAttribute(attributeName.c_str());
         }
@@ -52,15 +51,15 @@ public:
             cout << "ERROR. Could not open attribute " << attributeName << endl;
             exit(1);
         }
-        initialized  = true;
+        isInitialized  = true;
         return 1;
     }
 
-    int Initialize(H5::H5File &hdfFile, std::string groupName, std::string attributeName) {
+    int Initialize(H5::H5File &hdfFile, const std::string & groupName, const std::string & attributeName) {
         HDFGroup group;
         group.Initialize(hdfFile, groupName);
         attribute = group.group.openAttribute(attributeName.c_str());
-        initialized = true;
+        isInitialized = true;
         return 1;
     }
 
@@ -68,28 +67,28 @@ public:
     // This handles creation of all non-std::string types.  A specialization
     // for std::strings is provided below.
     //
-    void Create(H5::H5Location &object, std::string atomName) {
+    void Create(H5::H5Location &object, const std::string & atomName) {
         hsize_t defaultDims[] = {1};
         H5::DataSpace defaultDataSpace(1, defaultDims);
         TypedCreate(object, atomName, defaultDataSpace);
     }
 
 
-    void Create(H5::H5Location &object, std::string name, std::string value) {
+    void Create(H5::H5Location &object, const std::string & name, const std::string & value) {
         H5::StrType strType(0, value.size());
         attribute = object.createAttribute(name.c_str(), strType, H5::DataSpace(0,NULL));
-        initialized = true;
+        isInitialized = true;
         attribute.write(strType, value.c_str());
     }
 
-    void Create(H5::H5Location &object, std::string name, std::vector<int> &vect) {
+    void Create(H5::H5Location &object, const std::string & name, std::vector<int> &vect) {
         hsize_t length = vect.size();
         H5::ArrayType arrayDataType(H5::PredType::NATIVE_INT, 1, &length);
         attribute = object.createAttribute(name.c_str(), H5::PredType::NATIVE_INT, H5::DataSpace(1, &length));
         attribute.write(H5::PredType::NATIVE_INT, &((vect)[0]));    
     }
 
-    void Create(H5::H5Location &object, std::string name, std::vector<std::string> &vect) {
+    void Create(H5::H5Location &object, const std::string & name, const std::vector<std::string> &vect) {
         hsize_t length = vect.size();
         H5::StrType strType(0,H5T_VARIABLE);
         H5::ArrayType arrayDataType(strType, 1, &length);
@@ -97,7 +96,7 @@ public:
         attribute.write(strType, &((vect)[0]));    
     }
 
-    void TypedCreate(H5::H5Location &object, std::string &atomName, H5::DataSpace &dataSpace) {
+    void TypedCreate(H5::H5Location &object, const std::string &atomName, H5::DataSpace &dataSpace) {
         assert("Calling HDFAtom<T>::typedCreate on an unsupported type" == 0);
     }
 
@@ -118,10 +117,10 @@ public:
 //
 
 template<>
-void HDFAtom<std::string>::Create(H5::H5Location &object, std::string atomName);
+void HDFAtom<std::string>::Create(H5::H5Location &object, const std::string & atomName);
 
 #define DECLARE_TYPED_CREATE_ATOM(T, predtype) template<> \
-	void HDFAtom<T>::TypedCreate(H5::H5Location &object, std::string &atomName, H5::DataSpace &defaultDataSpace);
+	void HDFAtom<T>::TypedCreate(H5::H5Location &object, const std::string & atomName, H5::DataSpace &defaultDataSpace);
 
 DECLARE_TYPED_CREATE_ATOM(int, H5::PredType::NATIVE_INT)
 DECLARE_TYPED_CREATE_ATOM(unsigned int, H5::PredType::NATIVE_UINT)
diff --git a/hdf/HDFAttributable.cpp b/hdf/HDFAttributable.cpp
index da57d08..b092cd8 100644
--- a/hdf/HDFAttributable.cpp
+++ b/hdf/HDFAttributable.cpp
@@ -5,11 +5,11 @@ using namespace std;
 using namespace H5;
 
 void CallStoreAttributeName(H5Location &obj, string attrName, void *attrList){ 
-    ((vector<string>*)attrList)->push_back(attrName);
+    ((vector<string>*)attrList)->push_back(string(attrName));
 }
 
 void HDFAttributable::StoreAttributeNames(H5Location &thisobject, 
-    std::vector<std::string> &attributeNames) {
+    const std::vector<std::string> &attributeNames) {
     int nAttr = thisobject.getNumAttrs();
     unsigned int bounds[2];
     bounds[0] = 0;
@@ -23,7 +23,7 @@ H5Location* HDFAttributable::GetObject() {
     return NULL;
 }
 
-int HDFAttributable::ContainsAttribute(string attributeName) {
+int HDFAttributable::ContainsAttribute(const string & attributeName) {
     size_t i;
     std::vector<std::string> tmpAttributeNames;
     try{
diff --git a/hdf/HDFAttributable.hpp b/hdf/HDFAttributable.hpp
index 4b223c7..ba97acc 100644
--- a/hdf/HDFAttributable.hpp
+++ b/hdf/HDFAttributable.hpp
@@ -12,11 +12,11 @@ public:
     std::vector<std::string> attributeNameList;
 
     void StoreAttributeNames(H5::H5Location &thisobject, 
-        std::vector<std::string> &attributeNames); 
+        const std::vector<std::string> &attributeNames); 
 
     virtual H5::H5Location* GetObject(); 
 
-    int ContainsAttribute(std::string attributeName); 
+    int ContainsAttribute(const std::string & attributeName); 
 
 };
 
diff --git a/hdf/HDFBasReader.hpp b/hdf/HDFBasReader.hpp
index 23fe235..ef596bd 100644
--- a/hdf/HDFBasReader.hpp
+++ b/hdf/HDFBasReader.hpp
@@ -242,7 +242,7 @@ public:
     }
 
     void GetChangeListID(std::string &changeListID) {
-        if (changeListIDAtom.initialized) {
+        if (changeListIDAtom.IsInitialized()) {
             changeListIDAtom.Read(changeListID);
         }
         else {
@@ -388,65 +388,109 @@ public:
             }
         }
 
+
     int InitializeSequenceFields(HDFGroup &baseCallsGroup) {
-        //
         // The only field that is absoultely required is Basecall
         if (baseArray.InitializeForReading(baseCallsGroup, "Basecall")        == false) return 0;
-        if (includedFields["QualityValue"] and qualArray.InitializeForReading(baseCallsGroup, "QualityValue")    == false) return 0;
-        if (includedFields["InsertionQV"] and insertionQVArray.InitializeForReading(baseCallsGroup, "InsertionQV")     == false) return 0;
-        if (includedFields["DeletionQV"] and deletionQVArray.InitializeForReading(baseCallsGroup, "DeletionQV")      == false) return 0;
-        if (includedFields["DeletionTag"] and deletionTagArray.InitializeForReading(baseCallsGroup, "DeletionTag")     == false) return 0;
-        if (includedFields["SubstitutionQV"] and substitutionQVArray.InitializeForReading(baseCallsGroup, "SubstitutionQV")  == false) return 0;
-        if (includedFields["SubstitutionTag"] and substitutionTagArray.InitializeForReading(baseCallsGroup, "SubstitutionTag") == false) return 0;
-        // if (includedFields["PreBaseFrames"] and preBaseFramesArray.InitializeForReading(baseCallsGroup, "PreBaseFrames")   == false) return 0;
-
-        if (baseCallsGroup.ContainsObject("PreBaseFrames")) {
-            if (preBaseFramesArray.InitializeForReading(baseCallsGroup, "PreBaseFrames") == false) return 0;
-        } else {
-            includedFields["PreBaseFrames"] = false;
-        }
 
         //
         // These fields are not always present in bas.h5 files.
         //
-        if (baseCallsGroup.ContainsObject("PulseIndex")) {
-            if (pulseIndexArray.InitializeForReading(baseCallsGroup,        "PulseIndex")      == false) return 0;
-        }
-        else {
-            includedFields["PulseIndex"] = false;
-        }
+        //
+        std::string fieldName = "QualityValue";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not qualArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
 
-        if (baseCallsGroup.ContainsObject("WidthInFrames")) {    
-            if (basWidthInFramesArray.InitializeForReading(baseCallsGroup,  "WidthInFrames")   == false) return 0;
-        }
-        else {
-            includedFields["WidthInFrames"] = false;
-        }
+        fieldName = "InsertionQV";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not insertionQVArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
 
-        if (baseCallsGroup.ContainsObject("MergeQV")) {
-            if (includedFields["MergeQV"] and mergeQVArray.InitializeForReading(baseCallsGroup, "MergeQV") == false) return false;
-        }
-        else {
-            includedFields["MergeQV"] = false;
-        }
+        fieldName = "DeletionQV";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not deletionQVArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
 
-        if ((includedFields["HQRegionSNR"] or includedFields["ReadScore"]) and
-                (baseCallsGroup.ContainsObject(zmwMetricsGroupName) == 0 or
-                zmwMetricsGroup.Initialize(baseCallsGroup.group, zmwMetricsGroupName) == 0)) {
-            includedFields["HQRegionSNR"] = false;
-            includedFields["ReadScore"] = false;
-        }
-        if (includedFields["HQRegionSNR"] and (zmwMetricsGroup.ContainsObject("HQRegionSNR") == 0 or
-                GetDatasetNDim(zmwMetricsGroup.group, "HQRegionSNR") != 2 or
-                hqRegionSNRMatrix.InitializeForReading(zmwMetricsGroup, "HQRegionSNR") == false or
-                hqRegionSNRMatrix.GetNCols() != 4)) {
+        fieldName = "DeletionTag";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not deletionTagArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        fieldName = "SubstitutionQV";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not substitutionQVArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        fieldName = "SubstitutionTag";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not substitutionTagArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        fieldName = "PreBaseFrames";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not preBaseFramesArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        fieldName = "PulseIndex";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not pulseIndexArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        fieldName = "WidthInFrames";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not basWidthInFramesArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        fieldName = "MergeQV";
+        if (baseCallsGroup.ContainsObject(fieldName)) {
+            if (includedFields[fieldName] and
+                not mergeQVArray.InitializeForReading(baseCallsGroup, fieldName))
+                return 0;
+        } else includedFields[fieldName] = false;
+
+        if (not baseCallsGroup.ContainsObject(zmwMetricsGroupName) or 
+            not zmwMetricsGroup.Initialize(baseCallsGroup.group, zmwMetricsGroupName)) {
             includedFields["HQRegionSNR"] = false;
-        }
-        if (includedFields["ReadScore"] and (zmwMetricsGroup.ContainsObject("ReadScore") == 0 or
-                readScoreArray.InitializeForReading(zmwMetricsGroup, "ReadScore")) == false) {
             includedFields["ReadScore"] = false;
-        }
+        } else {
+            if (includedFields["HQRegionSNR"]) {
+                if (not zmwMetricsGroup.ContainsObject("HQRegionSNR") or
+                    not hqRegionSNRMatrix.InitializeForReading(zmwMetricsGroup, "HQRegionSNR") or
+                    GetDatasetNDim(zmwMetricsGroup.group, "HQRegionSNR") != 2 or 
+                    hqRegionSNRMatrix.GetNCols() != 4) {
+                    includedFields["HQRegionSNR"] = false;
+                } else if (not useScanData) {
+                    includedFields["HQRegionSNR"] = false;
+                    std::cerr << "WARNING: could not read HQRegionSNR because ScanData is absent!" << std::endl;
+                }
+            } 
+
+            if (includedFields["ReadScore"] and
+                (not zmwMetricsGroup.ContainsObject("ReadScore") or
+                 not readScoreArray.InitializeForReading(zmwMetricsGroup, "ReadScore"))) {
+                includedFields["ReadScore"] = false;
+            }
 
+        }
         return 1;
     }
 
@@ -488,8 +532,6 @@ public:
         //
         nReads = zmwReader.numEventArray.arrayLength;
 
-
-
         if (scanDataReader.platformId == Astro) {
             if (InitializeAstro() == 0) {
                 return 0;
@@ -563,7 +605,6 @@ public:
             exit(1);
         }
         curBasePos += seqLength;
-        seq.StorePlatformId(scanDataReader.platformId);
         return 1;
     }
 
@@ -581,25 +622,24 @@ public:
                     seq.AllocateQualitySpace(seqLength);
                     qualArray.Read((int)curBasePos, (int) curBasePos + seqLength, (unsigned char*) seq.qual.data);
                 }
-            }
-
-            if (includedFields["DeletionQV"]) {
-                GetNextDeletionQV(seq);
-            }
-            if (includedFields["DeletionTag"]) {
-                GetNextDeletionTag(seq);
-            }
-            if (includedFields["InsertionQV"]) {
-                GetNextInsertionQV(seq);
-            }
-            if (includedFields["SubstitutionQV"]) {
-                GetNextSubstitutionQV(seq);
-            }
-            if (includedFields["SubstitutionTag"]) {
-                GetNextSubstitutionTag(seq);
-            }
-            if (includedFields["MergeQV"]) {
-                GetNextMergeQV(seq);
+                if (includedFields["DeletionQV"]) {
+                    GetNextDeletionQV(seq);
+                }
+                if (includedFields["DeletionTag"]) {
+                    GetNextDeletionTag(seq);
+                }
+                if (includedFields["InsertionQV"]) {
+                    GetNextInsertionQV(seq);
+                }
+                if (includedFields["SubstitutionQV"]) {
+                    GetNextSubstitutionQV(seq);
+                }
+                if (includedFields["SubstitutionTag"]) {
+                    GetNextSubstitutionTag(seq);
+                }
+                if (includedFields["MergeQV"]) {
+                    GetNextMergeQV(seq);
+                }
             }
             seq.SetQVScale(qvScale);
             curBasePos += seqLength;
@@ -629,7 +669,6 @@ public:
             if (includedFields["ReadScore"]) {
                 GetNextReadScore(seq);
             }
-
             int seqLength = GetNextWithoutPosAdvance(seq);
             seq.length = seqLength;
             if(readQVs) {
@@ -646,11 +685,8 @@ public:
             seq.SetQVScale(qvScale);
             curBasePos += seqLength;
 
-            seq.subreadStart = 0;
-            seq.subreadEnd   = seq.length;
+            seq.SubreadStart(0).SubreadEnd(seq.length);
             zmwReader.GetNext(seq.zmwData);
-            seq.xy[0] = seq.zmwData.x;
-            seq.xy[1] = seq.zmwData.y;
         } catch (H5::DataSetIException e) {
             cout << "ERROR, could not read bases or QVs for SMRTSequence "
                 << seq.GetName() << endl;
@@ -671,6 +707,7 @@ public:
         int retVal;
 
         DNALength  curBasPosCopy = curBasePos;
+
         //
         // Getting next advances the curBasPos to the end of 
         // the current sequence. 
@@ -681,21 +718,21 @@ public:
                 return 0;
             }
 
-            // get ZMWMetrics fields, must be done before GetNext
-            // (which calls GetNextWithoutAdvancePos, which increments curRead)
+            //
+            // Bail now if the file is already done
+            //
+            if ((retVal = this->GetNext((FASTQSequence&)seq)) == 0) {
+                return 0;
+            }
+            // GetNext calls GetNextWithoutPosAdvance, which increments curRead
+            curRead--;
             if (includedFields["HQRegionSNR"]) {
                 GetNextHQRegionSNR(seq);
             }
             if (includedFields["ReadScore"]) {
                 GetNextReadScore(seq);
             }
-
-        //
-        // Bail now if the file is already done
-        //
-        if ((retVal = this->GetNext((FASTQSequence&)seq)) == 0) {
-            return 0;
-        }
+            curRead++;
 
             DNALength nextBasePos = curBasePos;
             curBasePos = curBasPosCopy;
@@ -716,11 +753,8 @@ public:
             // By default, the subread of a read without subread information is
             // the whole read.
             //
-            seq.subreadStart = 0;
-            seq.subreadEnd   = seq.length;
+            seq.SubreadStart(0).SubreadEnd(seq.length);
             zmwReader.GetNext(seq.zmwData);
-            seq.xy[0] = seq.zmwData.x;
-            seq.xy[1] = seq.zmwData.y;
         } catch(H5::DataSetIException e) {
             cout << "ERROR, could not read pulse metrics for SMRTSequence " 
                 << seq.GetName() << endl;
@@ -728,16 +762,7 @@ public:
         }
         return retVal;
     }
-    /*
-       int16_t xy[2];
-       if (zmwReader.readHoleXY) {
-       zmwReader.xyArray.Read(curRead, curRead+1, 0, 2, xy);
-       }
-       else {
-       xy[0] = xy[1] = 0;
-       }
-       seq.StoreXY(xy);
-       */
+
     void GetAllPulseIndex(std::vector<int> &pulseIndex) {
         CheckMemoryAllocation(pulseIndexArray.arrayLength, maxAllocNElements, "PulseIndex");
         pulseIndex.resize(pulseIndexArray.arrayLength);
@@ -799,12 +824,10 @@ public:
 
         std::string readTitle;
         unsigned int holeNumber;
-        unsigned char holeStatus;
         zmwReader.holeNumberArray.Read(curRead, curRead+1, &holeNumber);
-        seq.StoreHoleNumber(holeNumber);
 
+        unsigned char holeStatus;
         zmwReader.holeStatusArray.Read(curRead, curRead+1, &holeStatus);
-        seq.StoreHoleStatus(holeStatus);
 
         DNALength simIndex=0, simCoordinate=0;
 
@@ -856,7 +879,7 @@ public:
             delete [] seq.widthInFrames;
             seq.widthInFrames = NULL;
         }
-        seq.widthInFrames = new HalfWord[seq.length];
+        seq.widthInFrames = ProtectedNew<HalfWord>(seq.length);
         basWidthInFramesArray.Read((int)curBasePos, (int) curBasePos + seq.length, (HalfWord*) seq.widthInFrames);
         return seq.length;
     }
@@ -867,7 +890,7 @@ public:
             delete [] seq.preBaseFrames;
             seq.preBaseFrames = NULL;
         }
-        seq.preBaseFrames = new HalfWord[seq.length];
+        seq.preBaseFrames = ProtectedNew<HalfWord>(seq.length);
         preBaseFramesArray.Read((int)curBasePos, (int) curBasePos + seq.length, (HalfWord*) seq.preBaseFrames);
         return seq.length;
     }
@@ -877,12 +900,21 @@ public:
             delete [] seq.pulseIndex;
             seq.pulseIndex = NULL;
         }
-        seq.pulseIndex = new int[seq.length];
+        seq.pulseIndex = ProtectedNew<int>(seq.length);
         pulseIndexArray.Read((int)curBasePos, (int) curBasePos + seq.length, (int*) seq.pulseIndex);
         return seq.length;
     }
     int GetNextHQRegionSNR(SMRTSequence &seq) {
-        hqRegionSNRMatrix.Read(curRead, curRead + 1, seq.hqRegionSnr);
+        float snrs[4];
+        hqRegionSNRMatrix.Read(curRead, curRead + 1, snrs);
+
+        // Get BaseMap from ScanData.
+        std::map<char, size_t> baseMap = scanDataReader.BaseMap();
+        assert(ScanData::IsValidBaseMap(baseMap));
+        seq.HQRegionSnr('A', snrs[baseMap['A']])
+           .HQRegionSnr('C', snrs[baseMap['C']])
+           .HQRegionSnr('G', snrs[baseMap['G']])
+           .HQRegionSnr('T', snrs[baseMap['T']]);
         return 4;
     }
     int GetNextReadScore(SMRTSequence &seq) {
diff --git a/hdf/HDFBaseCallsWriter.cpp b/hdf/HDFBaseCallsWriter.cpp
new file mode 100644
index 0000000..5fe5938
--- /dev/null
+++ b/hdf/HDFBaseCallsWriter.cpp
@@ -0,0 +1,326 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "HDFBaseCallsWriter.hpp"
+
+HDFBaseCallsWriter::HDFBaseCallsWriter(const std::string & filename,
+                                       HDFGroup & parentGroup,
+                                       const std::map<char, size_t> & baseMap,
+                                       const std::vector<std::string> & qvsToWrite,
+                                       const bool fakeQualityValue)
+    : HDFWriterBase(filename)
+    , parentGroup_(parentGroup)
+    , baseMap_(baseMap)
+    , qvsToWrite_({}) // Input qvsToWrite must be checked.
+    , zmwWriter_(nullptr)
+    , zmwMetricsWriter_(nullptr)
+    , fakeQualityValue_(fakeQualityValue)
+{
+    // Add BaseCalls as a child group to the parent group.
+    AddChildGroup(parentGroup_, basecallsGroup_, PacBio::GroupNames::basecalls);
+
+    // Initialize the 'basecall' group.
+    basecallArray_.Initialize(basecallsGroup_, PacBio::GroupNames::basecall);
+
+    // Sanity check QVs to write.
+    if (SanityCheckQVs(qvsToWrite)) {
+        // Initialize QV groups
+        if (not InitializeQVGroups()) {
+            AddErrorMessage("Failed to initialize QV Groups.");
+        }
+    }
+
+    // Create a zmwWriter.
+    zmwWriter_.reset(new HDFZMWWriter(Filename(), basecallsGroup_, true));
+
+    // Create a zmwMetricsWriter.
+    zmwMetricsWriter_.reset(new HDFZMWMetricsWriter(Filename(), basecallsGroup_, baseMap_));
+}
+
+std::vector<std::string> HDFBaseCallsWriter::Errors(void) const {
+    std::vector<std::string> retErrors = this->errors_;
+    std::vector<std::string> zmwErrors = zmwWriter_->Errors();
+    std::vector<std::string> zmwMetricsErrors = zmwMetricsWriter_->Errors();
+
+    retErrors.insert(retErrors.end(), zmwErrors.begin(), zmwErrors.end());
+    retErrors.insert(retErrors.end(), zmwMetricsErrors.begin(), zmwMetricsErrors.end());
+    return retErrors;
+}
+
+HDFBaseCallsWriter::~HDFBaseCallsWriter(void) {
+    this->Close();
+}
+
+const std::vector<std::string> & HDFBaseCallsWriter::QVNamesToWrite(void) const {
+    return qvsToWrite_;
+}
+
+const std::vector<std::string> & HDFBaseCallsWriter::ValidQVNames(void) const {
+    return PacBio::GroupNames::BaxQVNames;
+}
+
+bool HDFBaseCallsWriter::InitializeQVGroups(void) {
+    int ret = 1;
+    // special dataset
+    if (FakeQualityValue())
+        ret *= qualityValueArray_.Initialize(basecallsGroup_,    PacBio::GroupNames::qualityvalue);
+
+    // normal datasets
+    if (_HasQV(PacBio::GroupNames::deletionqv)) 
+        ret *= deletionQVArray_.Initialize(basecallsGroup_,      PacBio::GroupNames::deletionqv);
+    if (_HasQV(PacBio::GroupNames::deletiontag))
+        ret *= deletionTagArray_.Initialize(basecallsGroup_,     PacBio::GroupNames::deletiontag);
+    if (_HasQV(PacBio::GroupNames::insertionqv))
+        ret *= insertionQVArray_.Initialize(basecallsGroup_,     PacBio::GroupNames::insertionqv);
+    if (_HasQV(PacBio::GroupNames::mergeqv))
+        ret *= mergeQVArray_.Initialize(basecallsGroup_,         PacBio::GroupNames::mergeqv);
+    if (_HasQV(PacBio::GroupNames::substitutionqv))
+        ret *= substitutionQVArray_.Initialize(basecallsGroup_,  PacBio::GroupNames::substitutionqv);
+    if (_HasQV(PacBio::GroupNames::substitutiontag))
+        ret *= substitutionTagArray_.Initialize(basecallsGroup_, PacBio::GroupNames::substitutiontag);
+    if (_HasQV(PacBio::GroupNames::prebaseframes))
+        ret *= preBaseFramesArray_.Initialize(basecallsGroup_,   PacBio::GroupNames::prebaseframes);
+    if (_HasQV(PacBio::GroupNames::widthinframes))
+        ret *= widthInFramesArray_.Initialize(basecallsGroup_,   PacBio::GroupNames::widthinframes);
+    return (ret != 0);
+}
+
+bool HDFBaseCallsWriter::SanityCheckQVs(const std::vector<std::string> & qvsToWrite) {
+    bool allQVsToAddInSpec = true; 
+    qvsToWrite_.clear();
+    // Filter qvs which are not in format specification.
+    const std::vector<std::string> & qvsInSpec = ValidQVNames();
+    for(auto qv : qvsToWrite) {
+        if (std::find(qvsInSpec.begin(), qvsInSpec.end(), qv) != qvsInSpec.end()) {
+            if (std::find(qvsToWrite_.begin(), qvsToWrite_.end(), qv) == qvsToWrite_.end()) 
+                qvsToWrite_.push_back(qv);
+            // else redundant
+        } else {
+            allQVsToAddInSpec = false;
+            AddErrorMessage(std::string("Unsupported quality value ") + qv);
+        }
+    }
+    return allQVsToAddInSpec;
+}
+
+bool HDFBaseCallsWriter::WriteBaseCallerVersion(const std::string & basecallerVersion) {
+    changeListIDAtom_.Create(basecallsGroup_.group, 
+                             PacBio::AttributeNames::Common::changelistid,
+                             basecallerVersion);
+    return true;
+}
+
+bool HDFBaseCallsWriter::WriteOneZmw(const SMRTSequence & read) {
+    bool OK = zmwWriter_->WriteOneZmw(read);
+    OK = OK and zmwMetricsWriter_->WriteOneZmw(read);
+    OK = OK and _WriteBasecall(read);
+
+    if (FakeQualityValue()) 
+        OK = OK and _WriteQualityValue(read);
+    OK = OK and _WriteDeletionQV(read);
+    OK = OK and _WriteDeletionTag(read);
+    OK = OK and _WriteInsertionQV(read);
+    OK = OK and _WriteMergeQV(read);
+    OK = OK and _WriteSubstitutionTag(read);
+    OK = OK and _WriteSubstitutionQV(read);
+    OK = OK and _WritePreBaseFrames(read);
+    OK = OK and _WriteWidthInFrames(read);
+    return OK;
+}
+
+bool HDFBaseCallsWriter::_WriteBasecall(const SMRTSequence & read) {
+	basecallArray_.Write((const unsigned char*) read.seq, read.length);
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteQualityValue(const SMRTSequence & read) {
+    if (FakeQualityValue()) {
+        if (read.length <= 0) {
+            AddErrorMessage(read.GetTitle() + std::string(" is empty."));
+            return false;
+        }
+        if (not read.deletionQV.Empty()) {
+            // Use deletionQV to fake QualityValue if possible.
+            qualityValueArray_.Write(read.deletionQV.data, read.length);
+        } else { // otherwise, fill with 255.
+            QualityValueVector<QualityValue> fakedata;
+            fakedata.Allocate(read.length);
+            memset(fakedata.data, MAX_QUALITY_VALUE, read.length * sizeof(QualityValue));
+            qualityValueArray_.Write(fakedata.data, read.length);
+            fakedata.Free();
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteDeletionQV(const SMRTSequence & read) {
+    if (HasDeletionQV()) {
+        if (read.deletionQV.Empty()) {
+            AddErrorMessage(std::string(PacBio::GroupNames::deletionqv) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+            deletionQVArray_.Write(read.deletionQV.data, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteDeletionTag(const SMRTSequence & read) {
+    if (HasDeletionTag()) {
+        if (read.deletionTag == nullptr) {
+            AddErrorMessage(std::string(PacBio::GroupNames::deletiontag) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+            deletionTagArray_.Write(read.deletionTag, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteInsertionQV(const SMRTSequence & read) {
+    if (HasInsertionQV()) {
+        if (read.insertionQV.Empty()) {
+            AddErrorMessage(std::string(PacBio::GroupNames::insertionqv) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+			insertionQVArray_.Write(read.insertionQV.data, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteSubstitutionTag(const SMRTSequence & read) {
+    if (HasSubstitutionTag()) {
+        if (read.substitutionTag == nullptr) {
+            AddErrorMessage(std::string(PacBio::GroupNames::substitutiontag) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+			substitutionTagArray_.Write(read.substitutionTag, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteSubstitutionQV(const SMRTSequence & read) {
+    if (HasSubstitutionQV()) {
+        if (read.substitutionQV.Empty()) {
+            AddErrorMessage(std::string(PacBio::GroupNames::substitutionqv) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+			substitutionQVArray_.Write(read.substitutionQV.data, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteMergeQV(const SMRTSequence & read) {
+    if (HasMergeQV()) {
+        if (read.mergeQV.Empty()) {
+            AddErrorMessage(std::string(PacBio::GroupNames::mergeqv) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+			mergeQVArray_.Write(read.mergeQV.data, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WritePreBaseFrames(const SMRTSequence & read) {
+     if (HasPreBaseFrames()) {
+        if (read.preBaseFrames == nullptr) {
+            AddErrorMessage(std::string(PacBio::GroupNames::prebaseframes) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+            preBaseFramesArray_.Write(read.preBaseFrames, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+
+bool HDFBaseCallsWriter::_WriteWidthInFrames(const SMRTSequence & read) {
+    if (HasWidthInFrames()) {
+        if (read.widthInFrames == nullptr) {
+            AddErrorMessage(std::string(PacBio::GroupNames::widthinframes) + " absent in read " + read.GetTitle());
+            return false;
+        } else {
+            widthInFramesArray_.Write(read.widthInFrames, read.length);
+            return true;
+        }
+    }
+    return true;
+}
+ 
+
+void HDFBaseCallsWriter::Flush(void) {
+    basecallArray_.Flush();
+
+    if (HasQualityValue())    qualityValueArray_.Flush();
+    if (HasDeletionQV())      deletionQVArray_.Flush();
+    if (HasDeletionTag())     deletionTagArray_.Flush();
+    if (HasInsertionQV())     insertionQVArray_.Flush();
+    if (HasMergeQV())         mergeQVArray_.Flush();
+    if (HasSubstitutionQV())  substitutionQVArray_.Flush();
+    if (HasSubstitutionTag()) substitutionTagArray_.Flush();
+    if (HasPreBaseFrames())   preBaseFramesArray_.Flush();
+    if (HasWidthInFrames())   widthInFramesArray_.Flush();
+
+    zmwWriter_->Flush();
+    zmwMetricsWriter_->Flush();
+}
+
+void HDFBaseCallsWriter::Close(void) {
+    this->Flush();
+
+    basecallArray_.Close();
+
+    if (HasQualityValue())    qualityValueArray_.Close();
+    if (HasDeletionQV())      deletionQVArray_.Close();
+    if (HasDeletionTag())     deletionTagArray_.Close();
+    if (HasInsertionQV())     insertionQVArray_.Close();
+    if (HasMergeQV())         mergeQVArray_.Close();
+    if (HasSubstitutionQV())  substitutionQVArray_.Close();
+    if (HasSubstitutionTag()) substitutionTagArray_.Close();
+    if (HasPreBaseFrames())   preBaseFramesArray_.Close();
+    if (HasWidthInFrames())   widthInFramesArray_.Close();
+}
diff --git a/hdf/HDFBaseCallsWriter.hpp b/hdf/HDFBaseCallsWriter.hpp
new file mode 100644
index 0000000..a65f5b0
--- /dev/null
+++ b/hdf/HDFBaseCallsWriter.hpp
@@ -0,0 +1,233 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#ifndef _BLASR_HDF_BASECALLS_WRITER_HPP_
+#define _BLASR_HDF_BASECALLS_WRITER_HPP_
+
+#include <memory>
+#include <algorithm>
+#include "HDFAtom.hpp"
+#include "HDFWriterBase.hpp"
+#include "HDFZMWWriter.hpp"
+#include "HDFZMWMetricsWriter.hpp"
+
+class HDFBaseCallsWriter: public HDFWriterBase {
+    /// \name \{
+public:
+    HDFBaseCallsWriter(const std::string & filename,
+                       HDFGroup & parentGroup,
+                       const std::map<char, size_t> & baseMap,
+                       const std::vector<std::string> & qvsToWrite = {},
+                       const bool fakeQualityValue = true);
+
+    ~HDFBaseCallsWriter(void);
+
+    /// \brief Write base caller version (changeListId)
+    bool WriteBaseCallerVersion(const std::string & basecallerVersion);
+
+    /// \brief Write a zmw read.
+    bool WriteOneZmw(const SMRTSequence & read);
+
+    /// \brief return a vector of QV name strings specified in file format specification.
+    const std::vector<std::string> & ValidQVNames(void) const;
+
+    /// \brief return a vector of QV name strings to write.
+    const std::vector<std::string> & QVNamesToWrite(void) const;
+
+    void Flush(void);
+
+    void Close(void);
+
+public:
+    /// \brief Sanity check QVs to add. Remove QVs which are 
+    ///        not included in file format specification, and
+    ///        remove redundant QVs.
+    /// \returns Whether or not a QV is not included in sepcification.
+    bool SanityCheckQVs(const std::vector<std::string> & qvsToWrite);
+
+    /// \returns true if FakeQualityValue() and qualityValueArray_ 
+    ///          has been initialized
+    inline bool HasQualityValue(void) const;
+
+    /// \returns true if has DeletionQV dataset and deletionQVArray_
+    ///          has been initialized.
+    inline bool HasDeletionQV(void) const;
+    inline bool HasDeletionTag(void) const;
+    inline bool HasInsertionQV(void) const;
+    inline bool HasSubstitutionTag(void) const;
+    inline bool HasSubstitutionQV(void) const;
+    inline bool HasMergeQV(void) const;
+    inline bool HasPreBaseFrames(void) const;
+    inline bool HasIPD(void) const;
+    inline bool HasWidthInFrames(void) const;
+    inline bool HasPulseWidth(void) const;
+
+    std::vector<std::string> Errors(void) const;
+
+public: 
+    /// \returns whether or not to fake QualityValue.
+    bool FakeQualityValue() const;
+
+private:
+    bool fakeQualityValue_;
+
+
+private:
+    inline bool _HasQV(const std::string & qvToQuery) const;
+
+    bool _WriteBasecall(const SMRTSequence & read);
+
+    /// Write fake values to the 'QualityValue' dataset.
+    bool _WriteQualityValue(const SMRTSequence & read);
+
+    /// Write real data in the following.
+    bool _WriteDeletionQV(const SMRTSequence & read);
+    bool _WriteDeletionTag(const SMRTSequence & read);
+    bool _WriteInsertionQV(const SMRTSequence & read);
+    bool _WriteSubstitutionTag(const SMRTSequence & read);
+    bool _WriteSubstitutionQV(const SMRTSequence & read);
+    bool _WriteMergeQV(const SMRTSequence & read);
+    bool _WritePreBaseFrames(const SMRTSequence & read);
+    bool _WriteWidthInFrames(const SMRTSequence & read);
+
+private:
+    /// \brief Create and initialize QV groups.
+    /// \returns Whether or not QV groups initialized successfully.
+    bool InitializeQVGroups(void);
+
+private:
+    HDFGroup & parentGroup_;
+    std::map<char, size_t> baseMap_;
+    std::vector<string> qvsToWrite_;
+    std::unique_ptr<HDFZMWWriter> zmwWriter_;
+    std::unique_ptr<HDFZMWMetricsWriter> zmwMetricsWriter_;
+	HDFGroup basecallsGroup_;
+
+private:
+	HDFAtom<string> changeListIDAtom_;
+
+    /// BaseCalls/Basecall group
+	BufferedHDFArray<unsigned char> basecallArray_;
+
+    /// This is a mandatory dataset for 2.3, whose existence is 
+    /// to ensure bam2bax to generate 2.3 compatible bax.h5 files.
+	BufferedHDFArray<unsigned char> qualityValueArray_;
+
+	/// \brief Define arrays for rich quality values.
+    ///        DeletionQV         dq --> BaseCalls/DeletionQV
+    ///        DeletionTag        dt --> BaseCalls/DeletionTag
+    ///        InsertionQV        iq --> BaseCalls/InsertionQV
+    ///        MergeQV            mq --> BaseCalls/MergeQV
+    ///        SubstitutionQV     sq --> BaseCalls/SubstitutionQV
+    ///        SubstitutionTag    st --> BaseCalls/SubstitutionTag
+    ///        Ipd:Frames         ip --> BaseCalls/PreBaseFrames
+    ///        PulseWidth:Frames  pw --> BaseCalls/WidthInFrames
+	BufferedHDFArray<unsigned char> deletionQVArray_;
+	BufferedHDFArray<unsigned char> deletionTagArray_;
+	BufferedHDFArray<unsigned char> insertionQVArray_;
+	BufferedHDFArray<unsigned char> mergeQVArray_;
+	BufferedHDFArray<unsigned char> substitutionQVArray_;
+	BufferedHDFArray<unsigned char> substitutionTagArray_;
+	BufferedHDFArray<HalfWord> preBaseFramesArray_;
+	BufferedHDFArray<HalfWord> widthInFramesArray_;
+
+    /// \}
+};
+
+inline
+bool HDFBaseCallsWriter::_HasQV(const std::string & qvToQuery) const {
+    return (std::find(qvsToWrite_.begin(), qvsToWrite_.end(), qvToQuery) != qvsToWrite_.end());
+}
+
+inline
+bool HDFBaseCallsWriter::HasQualityValue(void) const
+{return (FakeQualityValue() and 
+        qualityValueArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasDeletionQV(void) const
+{return (_HasQV(PacBio::GroupNames::deletionqv) and 
+        deletionQVArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasDeletionTag(void) const
+{return (_HasQV(PacBio::GroupNames::deletiontag) and
+        deletionTagArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasInsertionQV(void) const
+{return (_HasQV(PacBio::GroupNames::insertionqv) and
+        insertionQVArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasSubstitutionTag(void) const
+{return (_HasQV(PacBio::GroupNames::substitutiontag) and
+        substitutionTagArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasSubstitutionQV(void) const
+{return (_HasQV(PacBio::GroupNames::substitutionqv) and 
+        substitutionQVArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasMergeQV(void) const
+{return (_HasQV(PacBio::GroupNames::mergeqv) and 
+        mergeQVArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasPreBaseFrames(void) const
+{return (_HasQV(PacBio::GroupNames::prebaseframes) and
+        preBaseFramesArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasIPD(void) const
+{return HasPreBaseFrames();}
+
+inline
+bool HDFBaseCallsWriter::HasWidthInFrames(void) const
+{return (_HasQV(PacBio::GroupNames::widthinframes) and
+        widthInFramesArray_.IsInitialized());}
+
+inline
+bool HDFBaseCallsWriter::HasPulseWidth(void) const
+{return this->HasWidthInFrames();}
+
+inline
+bool HDFBaseCallsWriter::FakeQualityValue(void) const
+{return this->fakeQualityValue_;}
+
+#endif
diff --git a/hdf/HDFBaxWriter.cpp b/hdf/HDFBaxWriter.cpp
new file mode 100644
index 0000000..745c2c5
--- /dev/null
+++ b/hdf/HDFBaxWriter.cpp
@@ -0,0 +1,141 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "HDFBaxWriter.hpp"
+
+HDFBaxWriter::HDFBaxWriter(const std::string & filename,
+                           const ScanData & sd,
+                           const std::string & basecallerVersion,
+                           const std::vector<std::string> & qvsToWrite,
+                           const std::vector<std::string> & regionTypes,
+                           const H5::FileAccPropList & fileAccPropList)
+    : HDFWriterBase(filename)
+    , fileaccproplist_(fileAccPropList)
+    , scandataWriter_(nullptr)
+    , basecallsWriter_(nullptr) 
+    , regionsWriter_(nullptr)
+{
+    // sanity check chemistry meta data. 
+    SanityCheckChemistry(sd.BindingKit(),
+                         sd.SequencingKit(), 
+                         basecallerVersion);
+
+    // open file 
+    outfile_.Open(filename_, H5F_ACC_TRUNC, fileaccproplist_);
+
+    // Add PulseData group to the root group '/'
+    AddChildGroup(outfile_.rootGroup, pulseDataGroup_, PacBio::GroupNames::pulsedata);
+
+    // Create a ScanData writer.
+    scandataWriter_.reset(new HDFScanDataWriter(outfile_.rootGroup)); 
+    scandataWriter_->Write(sd);
+
+    // Create a BaseCaller writer.
+    basecallsWriter_.reset(new HDFBaseCallsWriter(filename_, pulseDataGroup_, sd.BaseMap(), qvsToWrite));
+    basecallsWriter_->WriteBaseCallerVersion(basecallerVersion);
+
+    // Create a Regions writer.
+    regionsWriter_.reset(new HDFRegionsWriter(filename_, pulseDataGroup_, regionTypes));
+}
+
+HDFBaxWriter::~HDFBaxWriter(void) {
+    this->Close();
+}
+
+void HDFBaxWriter::Flush(void) {
+    basecallsWriter_->Flush();
+    regionsWriter_->Flush();
+}
+
+std::vector<std::string> HDFBaxWriter::Errors(void) {
+    std::vector<std::string> errors = errors_;
+
+    //for (auto error: scandataWriter_->Errors())
+    //    errors.emplace_back(error);
+
+    for (auto error: basecallsWriter_->Errors())
+        errors.emplace_back(error);
+
+    for (auto error: regionsWriter_->Errors())
+        errors.emplace_back(error);
+
+    return errors;
+}
+
+void HDFBaxWriter::Close(void) {
+    basecallsWriter_->Close();
+    scandataWriter_->Close();
+    regionsWriter_->Close();
+    outfile_.Close();
+}
+
+bool HDFBaxWriter::SanityCheckChemistry(const std::string & bindingKit,
+                                        const std::string & sequencingKit,
+                                        const std::string & basecallerVersion)
+{
+    bool OK = true;
+    if (bindingKit.empty()) {
+        OK = false;
+        AddErrorMessage("Binding kit must be specified.");
+    }
+    if (sequencingKit.empty()) {
+        OK = false;
+        AddErrorMessage("Sequencing kit must be specified.");
+    }
+    if (basecallerVersion.empty()) {
+        OK = false;
+        AddErrorMessage("Base caller version must be specified.");
+    }
+    return OK;
+}
+
+bool HDFBaxWriter::WriteOneZmw(const SMRTSequence & seq) {
+    return basecallsWriter_->WriteOneZmw(seq);
+}
+
+bool HDFBaxWriter::WriteOneZmw(const SMRTSequence & seq, 
+                               const std::vector<RegionAnnotation> & regions) {
+    if (not this->WriteOneZmw(seq)) {
+        return false;
+    }
+    if (regions.size() == 0) {
+        std::vector<RegionAnnotation> fake = {RegionAnnotation(seq.HoleNumber(), HQRegion, 0, 0, 0)};
+        return regionsWriter_->Write(fake);
+    } else {
+        return regionsWriter_->Write(regions);
+    }
+}
diff --git a/hdf/HDFBaxWriter.hpp b/hdf/HDFBaxWriter.hpp
new file mode 100644
index 0000000..c390d8f
--- /dev/null
+++ b/hdf/HDFBaxWriter.hpp
@@ -0,0 +1,172 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+
+#ifndef _BLASR_HDF_BAX_WRITER_HPP_
+#define _BLASR_HDF_BAX_WRITER_HPP_
+
+#include <sstream>
+#include <memory>
+#include "Enumerations.h"
+#include "SMRTSequence.hpp"
+#include "HDFFile.hpp"
+#include "HDFWriterBase.hpp"
+#include "HDFScanDataWriter.hpp"
+#include "HDFBaseCallsWriter.hpp"
+#include "HDFRegionsWriter.hpp"
+
+using namespace H5;
+using namespace std;
+
+class HDFBaxWriter : public HDFWriterBase {
+public:
+    /// \name Constructor & Related Methods
+    /// \{
+    /// \brief Sets output h5 file name, scan data, base caller version
+    ///        QVs to write, and h5 file access property list.
+    /// \param[in] filename output h5 file name.
+    /// \param[in] ScanData meta data string, must contain bindingKit and sequencingKit.
+    /// \param[in] basecallerVersion meta data string
+    /// \param[in] qvsToWrite Quality values to include in output h5 file. 
+    /// \param[in] regionTypes, regionTypes as /Regions/RegionTypes
+    /// \param[in] fileAccPropList H5 file access property list
+    HDFBaxWriter(const std::string & filename,
+                 const ScanData & sd,
+                 const std::string & basecallerVersion,
+                 const std::vector<std::string> & qvsToWrite,
+                 const std::vector<std::string> & regionTypes = PacBio::AttributeValues::Regions::regiontypes,
+                 const H5::FileAccPropList & fileAccPropList = H5::FileAccPropList::DEFAULT);
+
+	~HDFBaxWriter(void);
+
+    /// \brief Write one zmw sequence to output h5 file. 
+    /// \param[in] seq, the SMRTSequence to write
+    bool WriteOneZmw(const SMRTSequence & seq);
+
+    /// \brief Write one zmw sequence and its region table to output h5 file. 
+    /// \param[in] seq, the SMRTSequence to write
+    /// \param[in] regions, region annotations of this zmw.
+    bool WriteOneZmw(const SMRTSequence & seq, 
+                     const std::vector<RegionAnnotation> & regions);
+
+    /// \brief Flushes buffered data.
+    void Flush(void);
+
+    /// \returns all errors from all writers.
+    std::vector<std::string> Errors(void);
+
+    /// \}
+
+private:
+    /// \name Private Variables
+    /// \{
+	HDFFile outfile_;  ///< HDFFile file handler
+
+    H5::FileAccPropList fileaccproplist_; ///< H5 file access property list
+
+	HDFGroup pulseDataGroup_; ///< /PulseData group
+
+private:
+    /// Points to scan data writer.
+    std::unique_ptr<HDFScanDataWriter>  scandataWriter_;
+    /// Points to base caller writer.
+    std::unique_ptr<HDFBaseCallsWriter> basecallsWriter_;
+    /// Points to region table writer.
+    std::unique_ptr<HDFRegionsWriter>   regionsWriter_;
+    /// \}
+
+public:
+    /// \name Which QV will be written.
+    /// \{
+    inline bool HasDeletionQV(void) const;
+    inline bool HasDeletionTag(void) const;
+    inline bool HasInsertionQV(void) const;
+    inline bool HasSubstitutionTag(void) const;
+    inline bool HasSubstitutionQV(void) const;
+    inline bool HasMergeQV(void) const;
+    inline bool HasPreBaseFrames(void) const;
+    inline bool HasIPD(void) const;
+    inline bool HasWidthInFrames(void) const;
+    inline bool HasPulseWidth(void) const;
+    /// \}
+ 
+private:
+    /// \name Private Methods.
+    /// \{
+    /// \brief Checks whether chemistry triple, including
+    ///        binding kit, sequencing kit and base caller version
+    ///        are set. 
+    ///        If not, add error messages.
+    bool SanityCheckChemistry(const std::string & bindingKit,
+                              const std::string & sequencingKit,
+                              const std::string & basecallerVersion);
+
+    /// \brief Closes HDFBaxWriter.
+    void Close(void);
+    /// \}
+};
+
+inline bool HDFBaxWriter::HasDeletionQV(void) const 
+{return basecallsWriter_->HasDeletionQV();}
+
+inline bool HDFBaxWriter::HasDeletionTag(void) const 
+{return basecallsWriter_->HasDeletionTag();}
+
+inline bool HDFBaxWriter::HasInsertionQV(void) const
+{return basecallsWriter_->HasInsertionQV();}
+
+inline bool HDFBaxWriter::HasSubstitutionTag(void) const
+{return basecallsWriter_->HasSubstitutionTag();}
+
+inline bool HDFBaxWriter::HasSubstitutionQV(void) const
+{return basecallsWriter_->HasSubstitutionQV();}
+
+inline bool HDFBaxWriter::HasMergeQV(void) const
+{return basecallsWriter_->HasMergeQV();}
+
+inline bool HDFBaxWriter::HasPreBaseFrames(void) const
+{return basecallsWriter_->HasPreBaseFrames();}
+
+inline bool HDFBaxWriter::HasIPD(void) const 
+{return this->HasPreBaseFrames();}
+
+inline bool HDFBaxWriter::HasWidthInFrames(void) const
+{return basecallsWriter_->HasWidthInFrames();}
+
+inline bool HDFBaxWriter::HasPulseWidth(void) const 
+{return this->HasWidthInFrames();}
+#endif
diff --git a/hdf/HDFCmpFile.hpp b/hdf/HDFCmpFile.hpp
index d4d6984..ceefb94 100644
--- a/hdf/HDFCmpFile.hpp
+++ b/hdf/HDFCmpFile.hpp
@@ -182,6 +182,10 @@ public:
 
         // 2.
         HDFCmpRefAlignmentGroup *newGroup = new HDFCmpRefAlignmentGroup;
+        if (newGroup == nullptr) { 
+            cout << "ERROR, unable to allocate memory for cmp.h5 file." << endl; 
+            exit(1);
+        }
         newGroup->Create(rootGroup.rootGroup, refGroupName);
         refAlignGroups.push_back(newGroup);
         unsigned int id = refAlignGroups.size();
@@ -362,6 +366,10 @@ public:
         for (refSeqIndex = 0; refSeqIndex < cmpFile.refGroup.path.size(); refSeqIndex++) {
             HDFCmpRefAlignmentGroup* refAlignGroup;
             refAlignGroup = new HDFCmpRefAlignmentGroup;
+            if (refAlignGroup == nullptr) { 
+                cout << "ERROR, unable to allocate memory for cmp.h5 file." << endl; 
+                exit(1);
+            }
             refAlignGroup->Initialize(rootGroup.rootGroup.group, cmpFile.refGroup.path[refSeqIndex]);
             int refAlignGroupIndex = refAlignGroups.size();
             refAlignGroups.push_back(refAlignGroup);
@@ -630,7 +638,7 @@ public:
         //
         int queryStart = cmpAlignment.GetQueryStart();
         int queryEnd   = cmpAlignment.GetQueryEnd();
-        read.holeNumber = cmpAlignment.GetHoleNumber();
+        read.HoleNumber(cmpAlignment.GetHoleNumber());
         int refGroupId = cmpAlignment.GetRefGroupId();
         int alnGroupId = cmpAlignment.GetAlnGroupId();
         int refGroupIndex  = refGroupIdToArrayIndex[refGroupId];
@@ -814,6 +822,10 @@ public:
         
         // 2.
         HDFCmpRefAlignmentGroup *newGroup = new HDFCmpRefAlignmentGroup;
+        if (newGroup == nullptr) { 
+            cout << "ERROR, unable to allocate memory for cmp.h5 file." << endl; 
+            exit(1);
+        }
         newGroup->Create(rootGroup.rootGroup, refGroupName);
         refAlignGroups.push_back(newGroup);
 
diff --git a/hdf/HDFCmpReader.hpp b/hdf/HDFCmpReader.hpp
index f12d2e6..38a2839 100644
--- a/hdf/HDFCmpReader.hpp
+++ b/hdf/HDFCmpReader.hpp
@@ -233,6 +233,7 @@ public:
         for (refSeqIndex = 0; refSeqIndex < cmpFile.refGroup.path.size(); refSeqIndex++) {
             HDFCmpRefAlignmentGroup* refAlignGroup;
             refAlignGroup = new HDFCmpRefAlignmentGroup;
+            if (refAlignGroup == nullptr) {cout << "ERROR, unable to allocate memory for HDFCmpReader." << endl; exit(1);}
             refAlignGroup->Initialize(rootGroup.rootGroup.group, cmpFile.refGroup.path[refSeqIndex]);
             int refAlignGroupIndex = refAlignGroups.size();
             refAlignGroups.push_back(refAlignGroup);
diff --git a/hdf/HDFCmpRefAlignmentGroup.hpp b/hdf/HDFCmpRefAlignmentGroup.hpp
index 69650ec..165dc17 100644
--- a/hdf/HDFCmpRefAlignmentGroup.hpp
+++ b/hdf/HDFCmpRefAlignmentGroup.hpp
@@ -56,6 +56,7 @@ class HDFCmpRefAlignmentGroup {
     //
     int newReadGroupIndex = readGroups.size();
     HDFCmpExperimentGroup* readGroupPtr = new HDFCmpExperimentGroup;
+    if (readGroupPtr == nullptr) {cout << "ERROR, failed to allocate memory for HDFCmpExperimentGroup!" << endl; exit(1);}
     readGroups.push_back(readGroupPtr);
     experimentNameToIndex[readGroupName] = newReadGroupIndex;
 
@@ -77,7 +78,7 @@ class HDFCmpRefAlignmentGroup {
 	HDFCmpExperimentGroup* InitializeExperimentGroup(string experimentGroupName, set<string> &includedFields) {
 		if (refGroup.ContainsObject(experimentGroupName)) {
 			HDFCmpExperimentGroup* newGroup = new HDFCmpExperimentGroup;
-
+            if (newGroup == nullptr) {cout << "ERROR, failed to allocate memory for HDFCmpExperimentGroup!" << endl; exit(1);}
 			if (newGroup->Initialize(refGroup, experimentGroupName, includedFields) == 0) {
 				cout << "ERROR, could not initialize the exp group." << endl;
 				exit(1);
diff --git a/hdf/HDFData.cpp b/hdf/HDFData.cpp
index 0283992..1d1d090 100644
--- a/hdf/HDFData.cpp
+++ b/hdf/HDFData.cpp
@@ -7,7 +7,7 @@ H5Location* HDFData::GetObject() {
     return &dataset;
 }
 
-HDFData::HDFData(CommonFG* _container, string _datasetName) {
+HDFData::HDFData(CommonFG* _container, const string & _datasetName) {
     container   = _container;
     datasetName = _datasetName;
     fileDataSpaceInitialized = false;
@@ -20,7 +20,7 @@ HDFData::HDFData() {
     isInitialized = false;
 }
 
-bool HDFData::IsInitialized() {
+bool HDFData::IsInitialized() const {
     return isInitialized;
 }
 
@@ -32,18 +32,18 @@ int HDFData::Initialize(HDFGroup &parentGroup, const string &datasetName) {
     exit(1);
 }
 
-int HDFData::BaseInitializeDataset(CommonFG &hdfFile, string _datasetName) {
+int HDFData::BaseInitializeDataset(CommonFG &hdfFile, const string & _datasetName) {
     dataset   = hdfFile.openDataSet(_datasetName.c_str());
     isInitialized = true;
     fileDataSpaceInitialized = true;
     return 1;
 }
 
-int HDFData::InitializeDataset(HDFGroup &group, string _datasetName) {
+int HDFData::InitializeDataset(HDFGroup &group, const string & _datasetName) {
     return InitializeDataset(group.group, _datasetName);
 }
 
-int HDFData::InitializeDataset(CommonFG &hdfFile, string _datasetName) {
+int HDFData::InitializeDataset(CommonFG &hdfFile, const string & _datasetName) {
     try {
         datasetName = _datasetName;
         dataset   = hdfFile.openDataSet(_datasetName.c_str());
diff --git a/hdf/HDFData.hpp b/hdf/HDFData.hpp
index 5ca1455..f31a313 100644
--- a/hdf/HDFData.hpp
+++ b/hdf/HDFData.hpp
@@ -20,11 +20,11 @@ public:
 
     H5::H5Location* GetObject(); 
 
-    HDFData(H5::CommonFG* _container, std::string _datasetName); 
+    HDFData(H5::CommonFG* _container, const std::string & _datasetName); 
 
     HDFData(); 
 
-    bool IsInitialized(); 
+    bool IsInitialized() const; 
 
     //
     // Allow derived classes to be initialized generically.
@@ -40,11 +40,11 @@ public:
     //
     virtual int Initialize(HDFGroup &parentGroup, const std::string &datasetName);
 
-    int BaseInitializeDataset(H5::CommonFG &hdfFile, std::string _datasetName); 
+    int BaseInitializeDataset(H5::CommonFG &hdfFile, const std::string & _datasetName); 
 
-    int InitializeDataset(HDFGroup &group, std::string _datasetName); 
+    int InitializeDataset(HDFGroup &group, const std::string & _datasetName); 
 
-    int InitializeDataset(H5::CommonFG &hdfFile, std::string _datasetName); 
+    int InitializeDataset(H5::CommonFG &hdfFile, const std::string & _datasetName); 
 
     void Close(); 
 };
diff --git a/hdf/HDFPlsReader.hpp b/hdf/HDFPlsReader.hpp
index 750be08..e440674 100644
--- a/hdf/HDFPlsReader.hpp
+++ b/hdf/HDFPlsReader.hpp
@@ -311,7 +311,7 @@ class HDFPlsReader : public DatasetCollection, public HDFPulseDataFile  {
 
         Nucleotide * destSeqCopy = NULL;
         if (destSequence != "") {
-            destSeqCopy = new Nucleotide[destSequence.size()];
+            destSeqCopy = ProtectedNew<Nucleotide>(destSequence.size());
             for(int i = 0 ; i < destSequence.size(); i++) {
                 destSeqCopy[i] = (Nucleotide)destSequence[i];
             }
@@ -446,7 +446,7 @@ class HDFPlsReader : public DatasetCollection, public HDFPulseDataFile  {
         signalMatrix.Read(curPos, curPos + plsSeqLength, &signal[0]); // read off one row.
         int i;
         for (i = 0; i < basSeqLength; i++) {
-          dest[i] = signal[basToPlsIndex[i]*4 + scanDataReader.baseMap[basSeq[i]]];
+          dest[i] = signal[basToPlsIndex[i]*4 + scanDataReader.BaseMap()[basSeq[i]]];
         }
       }
       else {
@@ -479,7 +479,7 @@ class HDFPlsReader : public DatasetCollection, public HDFPulseDataFile  {
       pulseStartFrame.resize(seqLength);
       startFrameArray.Read(curPos, curPos + seqLength, &pulseStartFrame[0]);
       if (read.startFrame) {delete [] read.startFrame; read.startFrame = NULL;}
-      read.startFrame = new unsigned int[read.length];
+      read.startFrame = ProtectedNew<unsigned int>(read.length);
       StoreField(pulseStartFrame, basToPlsIndex, read.startFrame, read.length);
     }
 
@@ -488,27 +488,27 @@ class HDFPlsReader : public DatasetCollection, public HDFPulseDataFile  {
       pulseWidthInFrames.resize(seqLength);
       plsWidthInFramesArray.Read(curPos, curPos + seqLength, &pulseWidthInFrames[0]);
       if (read.widthInFrames) {delete [] read.widthInFrames; read.widthInFrames = NULL;}
-      read.widthInFrames = new HalfWord[read.length];
+      read.widthInFrames = ProtectedNew<HalfWord>(read.length);
       StoreField(pulseWidthInFrames, basToPlsIndex, read.widthInFrames, read.length);
     }
     
 		if (includedFields["MidSignal"]) {
             if (read.midSignal) {delete [] read.midSignal; read.midSignal = NULL;}
-      read.midSignal = new HalfWord[read.length];
+      read.midSignal = ProtectedNew<HalfWord>(read.length);
       ReadSignal("MidSignal", midSignalArray, midSignalMatrix, seqLength, midSignalNDims, 
                  read.seq, read.length, basToPlsIndex, read.midSignal);
     }
     
 		if (includedFields["MaxSignal"]) {
         if (read.maxSignal) {delete [] read.maxSignal; read.maxSignal = NULL;}
-      read.maxSignal = new HalfWord[read.length];
+      read.maxSignal = ProtectedNew<HalfWord>(read.length);
       ReadSignal("MaxSignal", maxSignalArray, maxSignalMatrix, seqLength, maxSignalNDims, 
                  read.seq, read.length, basToPlsIndex, read.maxSignal);
 		}
 
 		if (includedFields["MeanSignal"]) {
             if (read.meanSignal) {delete [] read.meanSignal; read.meanSignal = NULL;}
-      read.meanSignal = new HalfWord[read.length];
+      read.meanSignal = ProtectedNew<HalfWord>(read.length);
       ReadSignal("MeanSignal", meanSignalArray, meanSignalMatrix, seqLength, meanSignalNDims, 
                  read.seq, read.length, basToPlsIndex, read.meanSignal);
 		}
@@ -518,7 +518,7 @@ class HDFPlsReader : public DatasetCollection, public HDFPulseDataFile  {
       pulseClassifierQV.resize(seqLength);
       classifierQVArray.Read(curPos, curPos + seqLength, &pulseClassifierQV[0]);
       if (read.classifierQV) {delete [] read.classifierQV; read.classifierQV = NULL;}
-      read.classifierQV = new float[read.length];
+      read.classifierQV = ProtectedNew<float>(read.length);
       StoreField(pulseClassifierQV, basToPlsIndex, read.classifierQV, read.length);
 		}
     
diff --git a/hdf/HDFRegionTableReader.cpp b/hdf/HDFRegionTableReader.cpp
index 80bc2db..4df91ea 100644
--- a/hdf/HDFRegionTableReader.cpp
+++ b/hdf/HDFRegionTableReader.cpp
@@ -1,3 +1,4 @@
+#include <cassert>
 #include "HDFRegionTableReader.hpp"
 
 using namespace std;
@@ -29,30 +30,39 @@ int HDFRegionTableReader::Initialize(string &regionTableFileName,
         return 0;
     }
 
-    nRows = regions.GetNRows();
-
-    if (columnNames.Initialize(regions.dataset, "ColumnNames") == 0) {
+    if (columnNames.Initialize(regions, "ColumnNames") == 0) {
         return 0;
     }
-    if (regionTypes.Initialize(regions.dataset, "RegionTypes") == 0) {
+    if (regionTypes.Initialize(regions, "RegionTypes") == 0) {
         return 0;
     }
-    if (regionDescriptions.Initialize(regions.dataset, "RegionDescriptions") == 0) {
+    if (regionDescriptions.Initialize(regions, "RegionDescriptions") == 0) {
         return 0;
     }
-    if (regionSources.Initialize(regions.dataset,  "RegionSources") == 0) {
+    if (regionSources.Initialize(regions,  "RegionSources") == 0) {
         return 0;
     }
 
+    nRows = regions.GetNRows();
+    isInitialized_ = true;
     curRow = 0;
     return 1;
 }
 
+bool HDFRegionTableReader::IsInitialized(void) const {
+    return isInitialized_;
+}
+
+bool HDFRegionTableReader::HasRegionTable(void) const {
+    assert(IsInitialized() && "HDFRegionTable is not initialize!");
+    return fileContainsRegionTable;
+}
+
 int HDFRegionTableReader::GetNext(RegionAnnotation &annotation) {
+    assert(IsInitialized() && "HDFRegionTable is not initialize!");
     //
     // Bail with no-op if this is the last row.
     //
-
     if (fileContainsRegionTable == false) {
         return 0;
     }
@@ -66,69 +76,58 @@ int HDFRegionTableReader::GetNext(RegionAnnotation &annotation) {
     return 1;
 }	
 
-void HDFRegionTableReader::RegionTypesToMap(RegionTable &table) {
-    size_t i;
-    table.regionTypeEnums.resize(table.regionTypes.size());
-    for (i = 0;i < table.regionTypes.size(); i++) {
-        if (table.regionTypes[i] == "GlobalAccuracy") {
-            table.regionTypeEnums[i] = GlobalAccuracy;
-        }
-        else if (table.regionTypes[i] == "HQRegion") {
-            table.regionTypeEnums[i] = HQRegion;
-        }
-        else if (table.regionTypes[i] == "Adapter") {
-            table.regionTypeEnums[i] = Adapter;
-        }
-        else if (table.regionTypes[i] == "Insert") {
-            table.regionTypeEnums[i] = Insert;
-        }
-        else if (table.regionTypes[i] == "Accuracy") {
-            table.regionTypeEnums[i] = Insert;
-        }
-        else if (table.regionTypes[i] == "ArtifactRegion") {
-            table.regionTypeEnums[i] = ArtifactRegion;
-        }
-        else {
-            cout << "ERROR! Region Type " << table.regionTypes[i] << " is not supported.  Check Enumerations.h" << endl;
-            assert(0);
-        }
-    }
-}
-
-int HDFRegionTableReader::ReadTableAttributes(RegionTable &table) {
-    if (fileContainsRegionTable == false) {
-        return 0;
-    }
-    columnNames.Read(table.columnNames);
-    regionTypes.Read(table.regionTypes);
-    RegionTypesToMap(table);
-    regionDescriptions.Read(table.regionDescriptions);
-    regionSources.Read(table.regionSources);
-    // All ok.
-    return 1;
-}
 
 void HDFRegionTableReader::Close() {
+    isInitialized_ = false;
+    fileContainsRegionTable = false;
+    columnNames.Close();
+    regionTypes.Close();
+    regionDescriptions.Close();
+    regionSources.Close();
     pulseDataGroup.Close();
     regions.Close();
     regionTableFile.Close();
 }
 
-void HDFRegionTableReader::ReadTable(RegionTable &table) {
-    if (fileContainsRegionTable == false) {
-        return;
-    }
-    ReadTableAttributes(table);
-    table.table.resize(nRows);
-    int i = 0;
-    while(GetNext(table.table[curRow])) {
-        i++;
+// Note that (1) there is NO GUARANTEE that region annotations in hdf5
+// `Regions` dataset be sorted in any order, so we cannot iterate over
+// `Regions` in order to traverse zmws in order.
+// (2) region table of a million zmws is approximately 5M.
+void HDFRegionTableReader::ReadTable(RegionTable & table) {
+    assert(IsInitialized() && "HDFRegionTable is not initialize!");
+    table.Reset();
+
+    if (fileContainsRegionTable) {
+        // Read attributes.
+        std::vector<std::string> names, types, descs, sources;
+        if (columnNames.IsInitialized()) columnNames.Read(names);
+        if (regionTypes.IsInitialized()) regionTypes.Read(types);
+        else {
+            cout << "ERROR MUST HAVE REGIONTYPES" << endl;
+            exit(1);
+        }
+        if (regionDescriptions.IsInitialized()) regionDescriptions.Read(descs);
+        if (regionSources.IsInitialized()) regionSources.Read(sources);
+
+        // Read region annotations
+        std::vector<RegionAnnotation> ras;
+        ras.resize(nRows);
+        assert(curRow == 0);
+        for (; curRow < nRows; curRow++) {
+            regions.Read(curRow, curRow+1, ras[curRow].row);
+        }
+
+        // Reconstruct table
+        table.ConstructTable(ras, types);
+        table.ColumnNames(names);
+        table.RegionDescriptions(descs);
+        table.RegionSources(sources);
     }
 }
 
-
 void HDFRegionTableReader::GetMinMaxHoleNumber(UInt &minHole,
                                                UInt &maxHole) {
+    assert(IsInitialized() && "HDFRegionTable is not initialize!");
     // Hole numbers may not be sorted ascendingly, so do not
     // return the first and last hole numbers as the min and max.
     UInt saveCurRow = curRow;
diff --git a/hdf/HDFRegionTableReader.hpp b/hdf/HDFRegionTableReader.hpp
index 470cddf..baabfc7 100644
--- a/hdf/HDFRegionTableReader.hpp
+++ b/hdf/HDFRegionTableReader.hpp
@@ -14,7 +14,7 @@
 
 
 class HDFRegionTableReader {
-public:
+private:
     HDFFile regionTableFile;
     HDFGroup pulseDataGroup;
     HDF2DArray<int> regions;
@@ -23,24 +23,36 @@ public:
     HDFAtom<std::vector<std::string> > regionDescriptions;
     HDFAtom<std::vector<std::string> > regionSources;
     HDFAtom<std::vector<std::string> > columnNames;
+
     int curRow;
+
+    bool isInitialized_; // whether or not this reader is initialized.
+
     int nRows;
+
     bool fileContainsRegionTable;
 
-    int Initialize(std::string &regionTableFileName, 
-            const H5::FileAccPropList & fileAccPropList = H5::FileAccPropList::DEFAULT); 
+public:
 
-    int GetNext(RegionAnnotation &annotation); 
+    HDFRegionTableReader(void)
+    : curRow(0), isInitialized_(false), nRows(0) 
+    , fileContainsRegionTable(false) {}
 
-    void RegionTypesToMap(RegionTable &table); 
+    int Initialize(std::string &regionTableFileName, 
+                   const H5::FileAccPropList & fileAccPropList = H5::FileAccPropList::DEFAULT);
 
-    int ReadTableAttributes(RegionTable &table); 
+    bool IsInitialized(void) const;
 
-    void Close(); 
+    bool HasRegionTable(void) const;
 
-    void ReadTable(RegionTable &table); 
-    
     void GetMinMaxHoleNumber(UInt &minHole, UInt &maxHole);
+
+    void ReadTable(RegionTable &table);
+
+    void Close();
+
+private:
+    int GetNext(RegionAnnotation &annotation);
 };
 
 
diff --git a/hdf/HDFRegionsWriter.cpp b/hdf/HDFRegionsWriter.cpp
new file mode 100644
index 0000000..05427c6
--- /dev/null
+++ b/hdf/HDFRegionsWriter.cpp
@@ -0,0 +1,99 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "HDFRegionsWriter.hpp"
+
+HDFRegionsWriter::HDFRegionsWriter(const std::string & filename,
+                                   HDFGroup & parentGroup,
+                                   const std::vector<std::string> & regionTypes) 
+    : HDFWriterBase(filename)
+    , parentGroup_(parentGroup)
+    , regionTypes_(regionTypes)
+    , curRow_(0)
+{
+    // Initialize the 'regions' group.
+    regionsArray_.Initialize(parentGroup_, PacBio::GroupNames::regions, RegionAnnotation::NCOLS);
+}
+
+HDFRegionsWriter::~HDFRegionsWriter(void)
+{
+    WriteAttributes();
+    Close();
+}
+
+bool HDFRegionsWriter::WriteAttributes(void) 
+{
+    if (curRow_ > 0) {
+        AddAttribute(regionsArray_, PacBio::AttributeNames::Regions::columnnames, PacBio::AttributeValues::Regions::columnnames);
+        AddAttribute(regionsArray_, PacBio::AttributeNames::Regions::regiontypes, regionTypes_);
+        AddAttribute(regionsArray_, PacBio::AttributeNames::Regions::regiondescriptions, PacBio::AttributeValues::Regions::regiondescriptions);
+        AddAttribute(regionsArray_, PacBio::AttributeNames::Regions::regionsources, PacBio::AttributeValues::Regions::regionsources);
+        return true;
+    } else {
+        AddErrorMessage("Could not write attributes when Regions group is empty.");
+        return false;
+    }
+}
+
+bool HDFRegionsWriter::Write(const std::vector<RegionAnnotation> &annotations) {
+    for (auto annotation: annotations)
+        if (not Write(annotation))
+            return false;
+    return true;
+}
+
+bool HDFRegionsWriter::Write(const RegionAnnotation &annotation) {
+    try {
+        regionsArray_.WriteRow(annotation.row, HDFRegionsWriter::NCOLS);
+    }
+    catch (H5::Exception &e) {
+        AddErrorMessage("Failed to write region annotation " + 
+                annotation.GetHoleNumber());
+        return false;
+    }
+    ++curRow_;
+    return true;
+}	
+
+void HDFRegionsWriter::Flush(void) {
+    regionsArray_.Flush();
+}
+
+void HDFRegionsWriter::Close(void) {
+    Flush();
+    regionsArray_.Close();
+}
diff --git a/hdf/HDFRegionsWriter.hpp b/hdf/HDFRegionsWriter.hpp
new file mode 100644
index 0000000..13e2445
--- /dev/null
+++ b/hdf/HDFRegionsWriter.hpp
@@ -0,0 +1,101 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#ifndef _HDF_REGIONS_WRITER_HPP_
+#define _HDF_REGIONS_WRITER_HPP_
+
+#include <string>
+#include "Enumerations.h"
+#include "reads/RegionTable.hpp"
+#include "HDFFile.hpp"
+#include "HDFArray.hpp"
+#include "HDF2DArray.hpp"
+#include "HDFAtom.hpp"
+#include "HDFWriterBase.hpp"
+
+using namespace H5;
+using namespace std;
+
+class HDFRegionsWriter: public HDFWriterBase {
+public:
+    /// \name Constructor and destructor
+    /// \{
+    /// \param[in] filename, hdf file name
+    /// \param[in] parentGroup, parent hdf group in hirarchy
+    HDFRegionsWriter(const std::string & filename, 
+                     HDFGroup & parentGroup,
+                     const std::vector<std::string> & regionTypes = PacBio::AttributeValues::Regions::regiontypes);
+    ~HDFRegionsWriter(void);
+    /// \}
+
+private:
+    /// \name Private variables for hdf IO. 
+    /// \{
+    HDFGroup & parentGroup_; //< parent hdf group
+
+    /// A vector of strings of region types for RegionTypeIndex to look up. Order matters!
+    std::vector<std::string> regionTypes_; 
+
+	HDF2DArray<int> regionsArray_; //< HDF2DArray for writing regions to hdf
+
+	int curRow_; //< which row to write
+
+    static const int NCOLS = 5; //< number of columns in Regions table.
+
+    /// \brief Write attributes of the 'regions' group
+    bool WriteAttributes(void);
+    /// \}
+       
+public:
+    /// \name Method to write region annotations. 
+    /// \{
+    /// \brief Append a vector of region annotations to 'regions'
+    /// \param[in] annotations - region annotations to append. 
+    /// \returns true if succeeded.
+    bool Write(const std::vector<RegionAnnotation> &annotations);
+
+    /// \brief Append a region annotation to 'regions' 
+    /// \param[in] annotation - region annotation to append
+    /// \returns true if succeeded.
+    bool Write(const RegionAnnotation &annotation);
+
+    void Flush(void);
+
+    void Close(void);
+};
+
+#endif
diff --git a/hdf/HDFScanDataReader.cpp b/hdf/HDFScanDataReader.cpp
index daed4b2..3b2443d 100644
--- a/hdf/HDFScanDataReader.cpp
+++ b/hdf/HDFScanDataReader.cpp
@@ -89,7 +89,7 @@ int HDFScanDataReader::Initialize(HDFGroup *pulseDataGroup) {
     // Load baseMap which maps bases (ATGC) to channel orders.
     // This should always be present.
     //
-    if (LoadBaseMap(baseMap) == 0)
+    if (LoadBaseMap(baseMap_) == 0)
         return 0;
 
     //
@@ -126,6 +126,10 @@ int HDFScanDataReader::Read(ScanData &scanData) {
         whenStartedAtom.Read(scanData.whenStarted);
     }
 
+    ReadSequencingKit(scanData.sequencingKit_);
+
+    ReadBindingKit(scanData.bindingKit_);
+
     return 1;
 }
 
@@ -190,7 +194,7 @@ int HDFScanDataReader::LoadMovieName(string &movieNameP) {
     }
 }
 
-int HDFScanDataReader::LoadBaseMap(map<char, int> & baseMap) {
+int HDFScanDataReader::LoadBaseMap(map<char, size_t> & baseMap) {
     // Map bases to channel order in hdf pls file.
     if (dyeSetGroup.ContainsAttribute("BaseMap") and
             baseMapAtom.Initialize(dyeSetGroup, "BaseMap")) {
@@ -204,8 +208,8 @@ int HDFScanDataReader::LoadBaseMap(map<char, int> & baseMap) {
         baseMap.clear();
         for(size_t i = 0; i < baseMapStr.size(); i++) {
             baseMap[toupper(baseMapStr[i])] = i;
-            baseMap[tolower(baseMapStr[i])] = i;
         }
+        this->baseMap_ = baseMap;
         return 1;
     }
     return 0;
@@ -213,20 +217,21 @@ int HDFScanDataReader::LoadBaseMap(map<char, int> & baseMap) {
 
 void HDFScanDataReader::Close() {
     if (useMovieName) {
-        movieNameAtom.dataspace.close();
+        movieNameAtom.Close();
     }
     if (useRunCode) {
-        runCodeAtom.dataspace.close();
+        runCodeAtom.Close();
     }
     if (useWhenStarted) {
-        whenStartedAtom.dataspace.close();
+        whenStartedAtom.Close();
     }
-    baseMapAtom.dataspace.close();
-    platformIdAtom.dataspace.close();
-    frameRateAtom.dataspace.close();
-    numFramesAtom.dataspace.close();
-    sequencingKitAtom.dataspace.close();
-    bindingKitAtom.dataspace.close();
+
+    baseMapAtom.Close();
+    platformIdAtom.Close();
+    frameRateAtom.Close();
+    numFramesAtom.Close();
+    sequencingKitAtom.Close();
+    bindingKitAtom.Close();
 
     scanDataGroup.Close();
     dyeSetGroup.Close();
diff --git a/hdf/HDFScanDataReader.hpp b/hdf/HDFScanDataReader.hpp
index 7da6649..b9ec164 100644
--- a/hdf/HDFScanDataReader.hpp
+++ b/hdf/HDFScanDataReader.hpp
@@ -38,7 +38,6 @@ public:
     //
     bool   useMovieName;
     std::string movieName, runCode;
-    std::map<char, int> baseMap;
     PlatformId platformId;
 
     HDFScanDataReader(); 
@@ -81,11 +80,15 @@ public:
 
     int LoadMovieName(std::string &movieName); 
 
-    int LoadBaseMap(map<char, int> & baseMap); 
+    int LoadBaseMap(map<char, size_t> & baseMap); 
+
+    std::map<char, size_t> BaseMap(void) const {return baseMap_;} 
 
     void Close(); 
 
 private:
+    std::map<char, size_t> baseMap_;
+
     /// Reads value of a string attribute within a HDFGroup.
     /// \returns 1 if succesfully read value of the string attribute, 0 otherwise.
     /// \param[out] attributeValue, value of a string attribute.
diff --git a/hdf/HDFScanDataWriter.cpp b/hdf/HDFScanDataWriter.cpp
index 874aba0..34e75c7 100644
--- a/hdf/HDFScanDataWriter.cpp
+++ b/hdf/HDFScanDataWriter.cpp
@@ -1,3 +1,40 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
 #include "HDFScanDataWriter.hpp"
 
 void HDFScanDataWriter::CreateAcqParamsGroup() {
@@ -15,7 +52,7 @@ void HDFScanDataWriter::CreateDyeSetGroup(){
         std::cout << "ERROR could not create /ScanData/DyeSet." << std::endl;
         exit(1);
     }
-    baseMapAtom.Create(dyeSetGroup.group, "BaseMap");
+    baseMapAtom.Create(dyeSetGroup.group, PacBio::AttributeNames::ScanData::DyeSet::basemap);
     numAnalogAtom.Create(dyeSetGroup.group, "NumAnalog");
 }
 
@@ -28,6 +65,8 @@ void HDFScanDataWriter::CreateRunInfoGroup(){
     platformIdAtom.Create(runInfoGroup.group, "PlatformId");
     platformNameAtom.Create(runInfoGroup.group, "PlatformName");
     runCodeAtom.Create(runInfoGroup.group, "RunCode");
+    bindingKitAtom.Create(runInfoGroup.group, "BindingKit");
+    sequencingKitAtom.Create(runInfoGroup.group, "SequencingKit");
 }
 
 HDFScanDataWriter::HDFScanDataWriter(HDFFile & _outFile) {
@@ -39,8 +78,7 @@ HDFScanDataWriter::HDFScanDataWriter(HDFGroup & _rootGroup) {
 }
 
 HDFScanDataWriter::~HDFScanDataWriter() { 
-    // Assume that closing the hdf file must be done
-    // manually and not in a destructor.
+    this->Close();
 }
 
 int HDFScanDataWriter::Initialize(HDFGroup & _rootGroup) {
@@ -61,31 +99,45 @@ int HDFScanDataWriter::Initialize(HDFGroup & _rootGroup) {
     return 1;
 }
 
-void HDFScanDataWriter::Write(ScanData & scanData) {
+void HDFScanDataWriter::Write(const ScanData & scanData) {
+    const float DEFAULT_FRAMERATE        = 75.0;
+    const unsigned int DEFAULT_NUMFRAMES = 1000000;
+    const std::string DEFAULT_DATE       = "2013-01-01T01:01:01";
+    const int DEFAULT_NUMANALOG          = 4;
+    const std::string DEFAULT_MOVIENAME  = "simulated_movie";
+    const std::string DEFAULT_RUNCODE    = "simulated_runcode";
+
     WriteFrameRate((scanData.frameRate==0)?
-            (75):(scanData.frameRate));
+                   (DEFAULT_FRAMERATE):(scanData.frameRate));
     WriteNumFrames((scanData.numFrames==0)?
-            (1000000):(scanData.numFrames));
+                   (DEFAULT_NUMFRAMES):(scanData.numFrames));
     WriteWhenStarted((scanData.whenStarted.empty())?
-            ("2013-01-01T01:01:01"):(scanData.whenStarted));
-    std::string baseMapStr = BaseMapToStr(scanData.baseMap);
-    WriteBaseMap((baseMapStr == "")?("TGAC"):baseMapStr);
-    WriteNumAnalog(4);
+                     (DEFAULT_DATE):(scanData.whenStarted));
+
+    // Base map is VITAL, must be specified
+    if (scanData.BaseMapStr().empty()) {
+        assert("ScanData/DyeSet attribute BaseMap MUST be specified." == 0);
+    }
+    WriteBaseMap(scanData.BaseMapStr()); 
+    WriteNumAnalog(DEFAULT_NUMANALOG);
 
     WriteMovieName((scanData.movieName.empty()?
-                ("simulated_movie"):scanData.movieName));
+                   (DEFAULT_MOVIENAME):scanData.movieName));
     WriteRunCode((scanData.runCode.empty())?
-            "simulated_runcode":(scanData.runCode));
+                 (DEFAULT_RUNCODE):(scanData.runCode));
     WritePlatformId((scanData.platformId==NoPlatform)?
             (Springfield):(scanData.platformId));
+
+    WriteBindingKit(scanData.BindingKit());
+    WriteSequencingKit(scanData.SequencingKit());
 }
 
-void HDFScanDataWriter::WriteFrameRate(float frameRate) {
+void HDFScanDataWriter::WriteFrameRate(const float frameRate) {
     // Write /ScanData/AcqParams/FrameRate attribute.
     frameRateAtom.Write(frameRate);
 }
 
-void HDFScanDataWriter::WriteNumFrames(unsigned int numFrames) {
+void HDFScanDataWriter::WriteNumFrames(const unsigned int numFrames) {
     // Write /ScanData/AcqParams/NumFrames attribute.
     numFramesAtom.Write(numFrames);
 }
@@ -95,23 +147,6 @@ void HDFScanDataWriter::WriteWhenStarted(const std::string whenStarted) {
     whenStartedAtom.Write(whenStarted);
 }
 
-std::string HDFScanDataWriter::BaseMapToStr(std::map<char, int> & baseMap) {
-    std::string baseMapStr = ""; //4 dye channels.
-    if (not baseMap.empty()) {
-        baseMapStr = "    ";
-        map<char, int>::iterator it;
-        for (it = baseMap.begin(); it != baseMap.end(); ++it){
-            if (it->second > 4 or it->second < 0) {
-                std::cout << "ERROR, there are more than four dye channels."
-                          << std::endl;
-                exit(1);
-            }
-            baseMapStr[it->second]= it->first;
-        }
-    }
-    return baseMapStr;
-}
-
 void HDFScanDataWriter::WriteBaseMap(const std::string baseMapStr) {
     //Write /ScanData/DyeSet/BaseMap attribute.
     baseMapAtom.Write(baseMapStr);
@@ -123,9 +158,9 @@ void HDFScanDataWriter::WriteNumAnalog(const unsigned int numAnalog) {
 }
 
 void HDFScanDataWriter::WritePlatformId(const PlatformId id) {
-    //Write /ScanData/RunInfo/Flatform attribute.
-    platformIdAtom.Write(id);
+    //Write /ScanData/RunInfo/Platform attribute.
     std::string name = (id == Springfield)?"Springfield":"Astro";
+    platformIdAtom.Write(id);
     platformNameAtom.Write(name);
 }
 
@@ -139,20 +174,30 @@ void HDFScanDataWriter::WriteRunCode(const std::string runCode) {
     runCodeAtom.Write(runCode);
 }
 
+void HDFScanDataWriter::WriteBindingKit(const std::string & bindingKit) {
+    bindingKitAtom.Write(bindingKit);
+}
+
+void HDFScanDataWriter::WriteSequencingKit(const std::string & sequencingKit) {
+    sequencingKitAtom.Write(sequencingKit);
+}
+
 void HDFScanDataWriter::Close() {
     // Close /ScanData/AcqParams attributes.
-    whenStartedAtom.dataspace.close();
-    frameRateAtom.dataspace.close();
-    numFramesAtom.dataspace.close();
+    whenStartedAtom.Close();
+    frameRateAtom.Close();
+    numFramesAtom.Close();
 
     // Close /ScanData/DyeSet attributes.
-    baseMapAtom.dataspace.close();
+    baseMapAtom.Close();
 
     // Close /ScanData/RunInfo attributes.
-    movieNameAtom.dataspace.close();
-    runCodeAtom.dataspace.close();
-    platformIdAtom.dataspace.close();
-    platformNameAtom.dataspace.close();
+    movieNameAtom.Close();
+    runCodeAtom.Close();
+    platformIdAtom.Close();
+    platformNameAtom.Close();
+    bindingKitAtom.Close();
+    sequencingKitAtom.Close();
 
     // Close /ScanData/AcqParams|DyeSet|RunInfo.
     acqParamsGroup.Close();
diff --git a/hdf/HDFScanDataWriter.hpp b/hdf/HDFScanDataWriter.hpp
index f92d8e6..136e416 100644
--- a/hdf/HDFScanDataWriter.hpp
+++ b/hdf/HDFScanDataWriter.hpp
@@ -1,3 +1,40 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
 #ifndef DATA_HDF_HDF_SCAN_DATA_WRITER_H_
 #define DATA_HDF_HDF_SCAN_DATA_WRITER_H_
 
@@ -27,6 +64,9 @@ private:
 	HDFAtom<std::string> movieNameAtom;
 	HDFAtom<std::string> runCodeAtom;
 
+	HDFAtom<std::string> bindingKitAtom;
+	HDFAtom<std::string> sequencingKitAtom;
+
 	HDFAtom<unsigned int> platformIdAtom;
 	HDFAtom<std::string> platformNameAtom;
 
@@ -45,16 +85,17 @@ public:
     
     int Initialize(HDFGroup & _rootGroup);
       
-    void Write(ScanData & scanData);
+    void Write(const ScanData & scanData);
    
-	void WriteFrameRate(float frameRate);
+	void WriteFrameRate(const float frameRate);
 
-    void WriteNumFrames(unsigned int numFrames);
+    void WriteNumFrames(const unsigned int numFrames);
 
     void WriteWhenStarted(const std::string whenStarted);
 
-    std::string BaseMapToStr(std::map<char, int> & baseMap);
-   
+	void Close();
+  
+private:
     void WriteBaseMap(const std::string baseMapStr);
    
     void WriteNumAnalog(const unsigned int numAnalog);
@@ -65,7 +106,9 @@ public:
 
     void WriteRunCode(const std::string runCode);
 
-	void Close();
+    void WriteBindingKit(const std::string & bindingKit);
+
+    void WriteSequencingKit(const std::string & sequencingKit);
 };
 
 #endif
diff --git a/hdf/HDFWriteBuffer.hpp b/hdf/HDFWriteBuffer.hpp
index 3600c5a..db812df 100644
--- a/hdf/HDFWriteBuffer.hpp
+++ b/hdf/HDFWriteBuffer.hpp
@@ -2,6 +2,7 @@
 #define _BLASR_HDF_WRITE_BUFFER_HPP_
 
 #include <cstddef>
+#include "utils.hpp"
 
 template<typename T>
 class HDFWriteBuffer {
@@ -20,7 +21,7 @@ public:
         Free(); // Free before reusing the buffer.
         bufferSize = pBufferSize;
         if (bufferSize > 0) {
-            writeBuffer = new T[bufferSize];
+            writeBuffer = ProtectedNew<T>(bufferSize);
         }
         else {
             writeBuffer = NULL;
diff --git a/hdf/HDFWriterBase.cpp b/hdf/HDFWriterBase.cpp
new file mode 100644
index 0000000..856dfb1
--- /dev/null
+++ b/hdf/HDFWriterBase.cpp
@@ -0,0 +1,98 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.  //
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "HDFWriterBase.hpp"
+
+std::vector<std::string> HDFWriterBase::Errors(void) const {
+    return errors_;
+}
+
+bool HDFWriterBase::AddChildGroup(HDFGroup & parentGroup, 
+                                  HDFGroup & childGroup,
+                                  const std::string & childGroupName) {
+    parentGroup.AddGroup(childGroupName);
+    if (childGroup.Initialize(parentGroup, childGroupName) == 0) {
+        FAILED_TO_CREATE_GROUP_ERROR(childGroupName);
+        return false;
+    }
+    return true;
+}
+
+bool HDFWriterBase::AddAttribute(HDFData & group, 
+                                 const std::string & attributeName, 
+                                 const std::string & attributeValue)
+{
+    return this->AddAttribute(group, attributeName, std::vector<std::string>({attributeValue}));
+}
+
+bool HDFWriterBase::AddAttribute(HDFData & group, 
+                                 const std::string & attributeName, 
+                                 const std::vector<std::string> & attributeValues)
+{
+    try {
+        HDFAtom<std::vector<std::string> > attributeAtom;
+        attributeAtom.Create(group.dataset, std::string(attributeName), attributeValues);
+        attributeAtom.Close();
+    }
+    catch (H5::Exception &e) {
+        FAILED_TO_CREATE_ATTRIBUTE_ERROR(attributeName);
+        return false;
+    }
+    return true;
+}
+
+void HDFWriterBase::AddErrorMessage(const std::string & errmsg) {
+    errors_.push_back(errmsg);
+}
+
+void HDFWriterBase::FAILED_TO_CREATE_GROUP_ERROR(const std::string & groupName) {
+    std::stringstream ss;
+    ss << "Failed to create group " << groupName << " in " << filename_;
+    AddErrorMessage(ss.str());
+}
+
+void HDFWriterBase::FAILED_TO_CREATE_ATTRIBUTE_ERROR(const std::string & attributeName) {
+    std::stringstream ss;
+    ss << "Failed to create attribute " << attributeName << " in " << filename_;
+    AddErrorMessage(ss.str());
+}
+
+void HDFWriterBase::PARENT_GROUP_NOT_INITIALIZED_ERROR(const std::string & groupName) {
+    std::stringstream ss;
+    ss << "Parent hdf group of " << groupName << " in file " << filename_
+       << " is not initialized.";
+    AddErrorMessage(ss.str());
+}
diff --git a/hdf/HDFWriterBase.hpp b/hdf/HDFWriterBase.hpp
new file mode 100644
index 0000000..39b33c2
--- /dev/null
+++ b/hdf/HDFWriterBase.hpp
@@ -0,0 +1,88 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#ifndef _BLASR_HDFWRITERBASE_HPP_
+#define _BLASR_HDFWRITERBASE_HPP_
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include "HDFGroup.hpp"
+#include "HDFAtom.hpp"
+
+class HDFWriterBase {
+public:
+    HDFWriterBase(const std::string & filename)
+    : filename_(filename)
+    {}
+
+    ~HDFWriterBase() {}
+
+public:
+    /// \returns Target H5 filename.
+    std::string Filename(void) {return filename_;}
+
+    std::vector<std::string> Errors(void) const;
+
+protected:
+    std::string filename_;
+    std::vector<std::string> errors_; 
+
+    bool AddChildGroup(HDFGroup & parentGroup, 
+                       HDFGroup & childGroup,
+                       const std::string & childGroupName);
+
+    bool AddAttribute(HDFData & group, 
+                      const std::string & attributeName, 
+                      const std::string & attributeValue);
+
+    bool AddAttribute(HDFData & group, 
+                      const std::string & attributeName, 
+                      const std::vector<std::string> & attributeValues);
+    
+    void AddErrorMessage(const std::string & errmsg);
+
+    void FAILED_TO_CREATE_GROUP_ERROR(const std::string & groupName);
+   
+    void FAILED_TO_CREATE_ATTRIBUTE_ERROR(const std::string & attributeName);
+
+    void PARENT_GROUP_NOT_INITIALIZED_ERROR(const std::string & groupName);
+
+    virtual void Close(void) = 0;
+};
+
+#endif
diff --git a/hdf/HDFZMWMetricsWriter.cpp b/hdf/HDFZMWMetricsWriter.cpp
new file mode 100644
index 0000000..f05a0b6
--- /dev/null
+++ b/hdf/HDFZMWMetricsWriter.cpp
@@ -0,0 +1,142 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "HDFZMWMetricsWriter.hpp"
+#include "reads/ScanData.hpp"
+
+HDFZMWMetricsWriter::HDFZMWMetricsWriter(const std::string & filename, 
+        HDFGroup & parentGroup, const std::map<char, size_t> & baseMap)
+    : HDFWriterBase(filename)
+    , parentGroup_(parentGroup)
+    , baseMap_(baseMap)
+    , curRow_(0)
+{
+    if (not parentGroup.groupIsInitialized)
+        PARENT_GROUP_NOT_INITIALIZED_ERROR(PacBio::GroupNames::zmwmetrics);
+    else {
+        parentGroup_.AddGroup(PacBio::GroupNames::zmwmetrics); 
+
+        if (zmwMetricsGroup_.Initialize(parentGroup_, PacBio::GroupNames::zmwmetrics) == 0)
+            FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::zmwmetrics);
+
+        InitializeChildHDFGroups();
+    }
+
+    // Sanity Check BaseMap
+    assert(ScanData::IsValidBaseMap(baseMap));
+}
+
+HDFZMWMetricsWriter::~HDFZMWMetricsWriter() {
+    Flush(); // Must flush in case group is empty.
+    assert(WriteAttributes());
+    Close();
+}
+
+bool HDFZMWMetricsWriter::WriteOneZmw(const SMRTSequence & read) {
+    try {
+        float snrs[4];
+        for (char base: {'A', 'C', 'G', 'T'}) {
+            snrs[baseMap_[base]] = read.HQRegionSnr(base);
+        }
+        hqRegionSNRArray_.WriteRow(snrs, SNRNCOLS);
+        readScoreArray_.Write(&read.readScore, 1);
+        productivityArray_.Write(&read.zmwData.holeStatus, 1);
+    }
+    catch (H5::Exception & e) {
+        AddErrorMessage("Failed to write HQRegionSNR or ReadScore or Productivity.");
+        return false;
+    }
+    ++curRow_;
+
+    return true;
+}
+
+void HDFZMWMetricsWriter::Flush(void) {
+    hqRegionSNRArray_.Flush();
+    readScoreArray_.Flush();
+    productivityArray_.Flush();
+}
+
+void HDFZMWMetricsWriter::Close(void) {
+    hqRegionSNRArray_.Close();
+    readScoreArray_.Close();
+    productivityArray_.Close();
+
+    zmwMetricsGroup_.Close();
+}
+
+bool HDFZMWMetricsWriter::InitializeChildHDFGroups(void) {
+    bool OK = true;
+
+    if (hqRegionSNRArray_.Initialize(zmwMetricsGroup_, PacBio::GroupNames::hqregionsnr, SNRNCOLS) == 0) { 
+        FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::hqregionsnr);
+        OK = false;
+    }
+
+    if (readScoreArray_.Initialize(zmwMetricsGroup_, PacBio::GroupNames::readscore) == 0) {
+        FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::readscore);
+        OK = false;
+    }
+
+    if (productivityArray_.Initialize(zmwMetricsGroup_, PacBio::GroupNames::productivity) == 0) {
+        FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::productivity);
+        OK = false;
+    }
+
+    return OK;
+}
+
+bool HDFZMWMetricsWriter::WriteAttributes(void) {
+    if (curRow_ > 0) {
+        bool OK = 
+        AddAttribute(hqRegionSNRArray_, 
+                     PacBio::AttributeNames::Common::description,
+                     PacBio::AttributeValues::ZMWMetrics::HQRegionSNR::description)
+        and 
+        AddAttribute(readScoreArray_,
+                     PacBio::AttributeNames::Common::description, 
+                     PacBio::AttributeValues::ZMWMetrics::ReadScore::description)
+        and 
+        AddAttribute(productivityArray_,
+                     PacBio::AttributeNames::Common::description, 
+                     PacBio::AttributeValues::ZMWMetrics::Productivity::description);
+        return OK;
+    } else {
+        AddErrorMessage("Could not write attributes when ZMWMetrics group is empty.");
+        return false;
+    }
+}
diff --git a/hdf/HDFZMWMetricsWriter.hpp b/hdf/HDFZMWMetricsWriter.hpp
new file mode 100644
index 0000000..61234e2
--- /dev/null
+++ b/hdf/HDFZMWMetricsWriter.hpp
@@ -0,0 +1,117 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+
+#ifndef _BLASR_HDF_HDFZMWMETRICSWriter_HPP_
+#define _BLASR_HDF_HDFZMWMETRICSWriter_HPP_
+
+#include "SMRTSequence.hpp"
+#include "HDFWriterBase.hpp"
+#include "BufferedHDFArray.hpp"
+#include "BufferedHDF2DArray.hpp"
+
+
+class HDFBaseCallerWriter;
+
+class HDFZMWMetricsWriter: public HDFWriterBase {
+
+friend class HDFBaseCallerWriter;
+private:
+    /// \name Private variable
+    /// \{
+    HDFGroup & parentGroup_;
+    
+	HDFGroup zmwMetricsGroup_;
+
+    /// HDF2DArray for writing average SNR within HQRegion.
+   	BufferedHDF2DArray<float> hqRegionSNRArray_;
+
+    /// HDFArray for writing read raw accuracy prediction.
+   	BufferedHDFArray<float> readScoreArray_;
+
+    /// HDFArray for writing Productivity
+    BufferedHDFArray<unsigned char> productivityArray_;
+
+    /// Map bases (e.g., ACGT) to indices
+    std::map<char, size_t> baseMap_;
+
+    int curRow_;
+
+    static const int SNRNCOLS = 4;
+    /// \}
+
+public:
+    /// \name Constructors and Destructors
+    /// \{
+    HDFZMWMetricsWriter(const std::string & filename, 
+                        HDFGroup & parentGroup,
+                        const std::map<char, size_t> & baseMap);
+
+    ~HDFZMWMetricsWriter(void) ;
+    /// \}
+
+    /// \name Public Methods
+    /// \{
+    
+    /// \note Write info of a SMRTSequence to ZMWMetrics,
+    ///       (1) add average signal to noise ratio in HQRegion to HQRegionSNR 
+    ///       (2) add read raw accuracy prediction to ReadScore 
+    bool WriteOneZmw(const SMRTSequence & read);
+
+
+    /// \note Flushes all data from cache to disc.
+    void Flush(void);
+
+    /// \note Closes this zmw group as well as child hdf groups.
+    void Close(void);
+    /// \}
+
+private:
+    /// \name Private Methods
+    /// \{
+    
+    /// \note Initialize child hdf groups under ZMWMetrics, including
+    ///       HQRegionSNR and ReadScore 
+    /// \reutrns bool, whether or not child hdf groups successfully initialized.
+    bool InitializeChildHDFGroups(void);
+
+    /// \note Write Attributes.
+    bool WriteAttributes(void);
+    /// \}
+};
+
+#endif
diff --git a/hdf/HDFZMWWriter.cpp b/hdf/HDFZMWWriter.cpp
new file mode 100644
index 0000000..447d832
--- /dev/null
+++ b/hdf/HDFZMWWriter.cpp
@@ -0,0 +1,144 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "HDFZMWWriter.hpp"
+
+HDFZMWWriter::HDFZMWWriter(const std::string & filename, 
+        HDFGroup & parentGroup, 
+        bool hasHoleXY)
+    : HDFWriterBase(filename)
+    , parentGroup_(parentGroup)
+    , hasHoleXY_(hasHoleXY)
+{
+    if (not parentGroup.groupIsInitialized)
+        PARENT_GROUP_NOT_INITIALIZED_ERROR(PacBio::GroupNames::zmw);
+    else {
+        parentGroup_.AddGroup(PacBio::GroupNames::zmw); 
+
+        if (zmwGroup_.Initialize(parentGroup_, PacBio::GroupNames::zmw) == 0)
+            FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::zmw);
+
+        this->InitializeChildHDFGroups();
+    }
+}
+
+HDFZMWWriter::~HDFZMWWriter() {
+    this->_WriteAttributes();
+    this->Close();
+}
+
+bool HDFZMWWriter::WriteOneZmw(const SMRTSequence & read) {
+    int length_ = static_cast<int> (read.length);
+    numEventArray_.Write(&length_, 1);
+
+    UInt hn_ = read.HoleNumber();
+    holeNumberArray_.Write(&hn_, 1);
+
+    unsigned char hs_ = read.HoleStatus();
+    holeStatusArray_.Write(&hs_, 1);
+
+    if (HasHoleXY()) {
+        int16_t xy[2] = {static_cast<int16_t>(read.HoleX()),
+                         static_cast<int16_t>(read.HoleY())};
+        holeXYArray_.WriteRow(xy, 2);
+    }
+    return true;
+}
+
+void HDFZMWWriter::Flush(void) {
+    numEventArray_.Flush();
+    holeNumberArray_.Flush();
+    holeStatusArray_.Flush();
+    if (HasHoleXY())
+        holeXYArray_.Flush();
+}
+
+void HDFZMWWriter::Close(void) {
+    this->Flush();
+
+    numEventArray_.Close();
+    holeNumberArray_.Close();
+    holeStatusArray_.Close();
+    if (HasHoleXY())
+        holeXYArray_.Close();
+    zmwGroup_.Close();
+}
+
+bool HDFZMWWriter::InitializeChildHDFGroups(void) {
+    bool OK = true;
+
+    if (numEventArray_.Initialize(zmwGroup_, PacBio::GroupNames::numevent) == 0) { 
+        FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::numevent);
+        OK = false;
+    }
+
+    if (holeNumberArray_.Initialize(zmwGroup_, PacBio::GroupNames::holenumber) == 0) {
+        FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::holenumber);
+        OK = false;
+    }
+
+    if (holeStatusArray_.Initialize(zmwGroup_, PacBio::GroupNames::holestatus) == 0) {
+        FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::holestatus);
+        OK = false;
+    }
+
+    if (HasHoleXY()) {
+        if (holeXYArray_.Initialize(zmwGroup_, PacBio::GroupNames::holexy, 2) == 0) {
+            FAILED_TO_CREATE_GROUP_ERROR(PacBio::GroupNames::holexy);
+            OK = false;
+        }
+    }
+
+    return OK;
+}
+
+void HDFZMWWriter::_WriteAttributes(void)
+{
+    if (holeNumberArray_.IsInitialized() and holeNumberArray_.size() > 0) {
+        AddAttribute(holeNumberArray_, PacBio::AttributeNames::Common::description, PacBio::AttributeValues::ZMW::HoleNumber::description);
+    }
+
+    if (holeStatusArray_.IsInitialized() and holeStatusArray_.size() > 0) {
+        AddAttribute(holeStatusArray_, PacBio::AttributeNames::Common::description, PacBio::AttributeValues::ZMW::HoleStatus::description);
+        AddAttribute(holeStatusArray_, PacBio::AttributeNames::ZMW::HoleStatus::lookuptable, PacBio::AttributeValues::ZMW::HoleStatus::lookuptable);
+    }
+
+    if (holeXYArray_.IsInitialized()) {
+        AddAttribute(holeXYArray_, PacBio::AttributeNames::Common::description, PacBio::AttributeValues::ZMW::HoleXY::description);
+    }
+}
+
diff --git a/hdf/HDFZMWWriter.hpp b/hdf/HDFZMWWriter.hpp
new file mode 100644
index 0000000..ea2d4f1
--- /dev/null
+++ b/hdf/HDFZMWWriter.hpp
@@ -0,0 +1,120 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+
+#ifndef _BLASR_HDF_HDFZMWWriter_HPP_
+#define _BLASR_HDF_HDFZMWWriter_HPP_
+
+#include "HDFWriterBase.hpp"
+#include "BufferedHDFArray.hpp"
+#include "BufferedHDF2DArray.hpp"
+#include "SMRTSequence.hpp"
+
+class HDFBaseCallerWriter;
+
+class HDFZMWWriter: public HDFWriterBase {
+
+friend class HDFBaseCallerWriter;
+
+private:
+    /// ZMW/NumEvent
+   	BufferedHDFArray<int> numEventArray_;
+
+    // ZMW/HoleNumber
+	BufferedHDFArray<unsigned int> holeNumberArray_; 
+
+    // ZMW/HoleStatus
+	BufferedHDFArray<unsigned char> holeStatusArray_;
+
+    // ZMW/HoleXY
+	BufferedHDF2DArray<int16_t> holeXYArray_;
+
+private:
+	HDFGroup zmwGroup_;
+    HDFGroup & parentGroup_;
+    bool hasHoleXY_;
+
+public:
+    /// \name Constructors and Destructors
+    /// \{
+    HDFZMWWriter(const std::string & filename, 
+                 HDFGroup & parentGroup, 
+                 bool hasHoleXY = true);
+
+    ~HDFZMWWriter() ;
+    /// \}
+
+    /// \name Public Methods
+    /// \{
+    
+    /// \note Write info of a SMRTSequence to ZMW,
+    ///       (1) add length (UInt) of the sequence to NumEvent,
+    ///       (2) add zmw hole number (UInt) of the sequence as a UInt to HoleNumber,
+    ///       (3) add hole status (unsigned char) to HoleStatus,
+    ///       (4) add hole coordinate xy as (int16_t, int16_t) to HoleXY
+    bool WriteOneZmw(const SMRTSequence & read);
+
+    /// \returns Whether or not ZMW contains the HoleXY dataset.
+    inline bool HasHoleXY(void) const;
+
+    /// \note Flushes all data from cache to disc.
+    void Flush(void);
+
+    /// \note Closes this zmw group as well as child hdf groups.
+    void Close(void);
+ 
+    /// \}
+
+private:
+    /// \name Private Methods
+    /// \{
+    
+    /// \note Initialize child hdf groups under ZMW, including
+    ///       NumEvent, HoleNumber, HoleStatus, HoleXY
+    /// \reutrns bool, whether or not child hdf groups successfully initialized.
+    bool InitializeChildHDFGroups(void);
+
+    /// \name Add attributes to HoleNumber, HoleXY, HoleStatus.
+    void _WriteAttributes(void);
+
+    /// \}
+};
+
+inline bool HDFZMWWriter::HasHoleXY(void) const 
+{return hasHoleXY_;}
+
+#endif
diff --git a/hdf/Makefile b/hdf/Makefile
deleted file mode 100644
index 25247e3..0000000
--- a/hdf/Makefile
+++ /dev/null
@@ -1,94 +0,0 @@
-
-include ../common.mk
-
-# To enable building a shared library, invoke as "make SHARED_LIB=true ..."
-ifneq ($(SHARED_LIB),)
-    # Generating shared library
-    CXX_SHAREDFLAGS := -fPIC
-    LD_SHAREDFLAGS  := -shared -fPIC
-    TARGET_LIB      := libpbihdf.so
-    # Developers should set these to appropriate defaults (other systems 
-    # will override these on the command line):
-    HDF5_LIB        := ../../../../prebuilt.out/prebuilt.out/hdf5/hdf5-1.8.12/centos-5/lib/libhdf5.so
-    ZLIB_LIB        := ../../../../prebuilt.tmpsrc/zlib/zlib_1.2.8/_output/install/lib/libz.so
-    HTSLIB_LIB      := ../../../staging/PostPrimary/pbbam/_output/install-build/lib/libpbbam.so
-    PBBAM_LIB       := ../../../staging/PostPrimary/pbbam/third-party/htslib/_output/install-build/lib/libhts.so
-    LIBPBDATA_LIB   := ../../../staging/PostPrimary/pbbam/third-party/htslib/_output/install-build/lib/libhts.so
-else
-    # Generating shared library
-    CXX_SHAREDFLAGS :=
-    LD_SHAREDFLAGS  :=
-    TARGET_LIB      := libpbihdf.a
-    HDF5_LIB        :=
-    ZLIB_LIB        :=
-    HTSLIB_LIB      :=
-    PBBAM_LIB       :=
-    LIBPBDATA_LIB   :=
-endif
-
-DEP_LIBS := $(HDF5_LIB) $(ZLIB_LIB) $(HTSLIB_LIB) $(PBBAM_LIB) $(PBDATA_LIB)
-
-# FIXME: remove PBDATA_INCLUDE and assign directly to LIBPBDATA_INCLUDE.
-#        Use only LIBPBDATA_INCLUDE to be consistent with the libblasr Makefile
-PBDATA_INCLUDE := ../pbdata
-LIBPBDATA_INCLUDE := $(PBDATA_INCLUDE)
-PBBAM_INCLUDE := $(PBBAM)/include
-HTSLIB_INCLUDE ?= $(PBBAM)/third-party/htslib
-
-INCLUDES = -I$(LIBPBDATA_INCLUDE)
-
-
-ifeq ($(origin nopbbam), undefined)
-    INCLUDES += -I$(PBBAM_INCLUDE) -I$(HTSLIB_INCLUDE) -I$(BOOST_INCLUDE)
-endif
-
-ifneq ($(ZLIB_ROOT), notfound)
-	INCLUDES += -I$(ZLIB_ROOT)/include
-endif
-
-CXXOPTS := -std=c++11 -pedantic -MMD -MP
-sources := $(wildcard *.cpp)
-objects := $(sources:.cpp=.o)
-shared_objects := $(sources:.cpp=.shared.o)
-dependencies := $(objects:.o=.d) $(shared_objects:.o=.d)
-
-ifneq ($(HDF5_INC),)
-HDF_HEADERS :=
-INCLUDES += -I$(HDF5_INC)
-else
-HDF_HEADERS := hdf5-1.8.12-headers
-INCLUDES += -I./$(HDF_HEADERS)/src -I./$(HDF_HEADERS)/c++/src
-endif
-
-all : CXXFLAGS ?= -O3
-
-debug : CXXFLAGS ?= -g -ggdb -fno-inline
-
-profile : CXXFLAGS ?= -Os -pg
-
-g : CXXFLAGS ?= -g -ggdb -fno-inline -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fno-omit-frame-pointer 
-
-all debug profile g: $(HDF_HEADERS) $(TARGET_LIB)
-
-libpbihdf.a: $(objects)
-	$(AR_pp) $(ARFLAGS) $@ $^
-
-libpbihdf.so: $(shared_objects) $(DEP_LIBS)
-	$(CXX) $(LD_SHAREDFLAGS) -o $@ $^
-
-%.o: %.cpp
-	$(CXX) $(CXXOPTS) $(CXXFLAGS) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-%.shared.o: %.cpp
-	$(CXX) $(CXX_SHAREDFLAGS) $(CXXOPTS) $(CXXFLAGS) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-$(HDF_HEADERS): 
-	curl -k -L https://www.dropbox.com/s/8971bcyy5o42rxb/hdf5-1.8.12-headers.tar.bz2\?dl\=0 | tar xjf - 
-
-# .INTERMEDIATE: $(objects)
-
-clean: 
-	@rm -f libpbihdf.a libpbihdf.so
-	@rm -f $(objects) $(shared_objects) $(dependencies)
-
--include $(dependencies)
diff --git a/hdf/build.mk b/hdf/build.mk
new file mode 120000
index 0000000..2247f36
--- /dev/null
+++ b/hdf/build.mk
@@ -0,0 +1 @@
+makefile
\ No newline at end of file
diff --git a/hdf/makefile b/hdf/makefile
new file mode 100644
index 0000000..8bf9ad5
--- /dev/null
+++ b/hdf/makefile
@@ -0,0 +1,33 @@
+all:
+
+THISDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+-include ${CURDIR}/defines.mk
+include ${THISDIR}/../rules.mk
+
+CXXOPTS  += -std=c++11 -pedantic
+INCLUDES += ${LIBPBDATA_INC} ${HDF5_INC} ${PBBAM_INC} ${HTSLIB_INC} ${BOOST_INC}
+LIBS     += ${LIBPBDATA_LIB} ${HDF5_LIB} ${PBBAM_LIB} ${HTSLIB_LIB} ${ZLIB_LIB}
+LDFLAGS  += $(patsubst %,-L%,${LIBS})
+LDLIBS += -lpbdata -lhdf5 -lhdf5_cpp
+
+all: libpbihdf.a libpbihdf${SH_LIB_EXT}
+
+paths := ${THISDIR}
+sources := $(wildcard ${THISDIR}*.cpp)
+sources := $(notdir ${sources})
+objects := $(sources:.cpp=.o)
+shared_objects := $(sources:.cpp=.shared.o)
+dependencies := $(objects:.o=.d) $(shared_objects:.o=.d)
+
+vpath %.cpp ${paths}
+
+libpbihdf.a: $(objects)
+	$(AR) $(ARFLAGS) $@ $^
+
+libpbihdf${SH_LIB_EXT}: $(shared_objects)
+
+clean: 
+	rm -f libpbihdf.a libpbihdf.so *.o *.d
+
+-include $(dependencies)
+depend: $(dependencies:.d=.depend)
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..c9d37af
--- /dev/null
+++ b/makefile
@@ -0,0 +1,31 @@
+SHELL=/bin/bash
+
+THISDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+
+.PHONY: all libpbdata libhdf libblasr gtest clean cleanall
+
+all:
+	${MAKE} libpbdata
+	${MAKE} libpbihdf
+	${MAKE} libblasr
+all-debug:
+	${MAKE} CXXFLAGS=-g all
+all-opt:
+	${MAKE} CXXFLAGS=-O3 all
+all-depend:
+	${MAKE} -C ${THISDIR}/pbdata depend
+libpbdata:
+	${MAKE} -C ${THISDIR}/pbdata libconfig.h
+	${MAKE} -C ${THISDIR}/pbdata all
+libpbihdf:
+	${MAKE} -C ${THISDIR}/hdf all
+libblasr:
+	${MAKE} -C ${THISDIR}/alignment all
+gtest:
+	${MAKE} -C ${THISDIR}/unittest gtest
+clean:
+	${MAKE} -C ${THISDIR}/pbdata clean
+	${MAKE} -C ${THISDIR}/hdf clean
+	${MAKE} -C ${THISDIR}/alignment clean
+	${MAKE} -C ${THISDIR}/unittest clean
+cleanall: clean
diff --git a/pbdata/.gitignore b/pbdata/.gitignore
new file mode 100644
index 0000000..4c3cf0b
--- /dev/null
+++ b/pbdata/.gitignore
@@ -0,0 +1,2 @@
+/libconfig.h
+/defines.mk
diff --git a/pbdata/CCSSequence.cpp b/pbdata/CCSSequence.cpp
index 1f8e5b9..85239db 100644
--- a/pbdata/CCSSequence.cpp
+++ b/pbdata/CCSSequence.cpp
@@ -5,22 +5,22 @@ void CCSSequence::Free() {
     numConsensusBases = 0;
     SMRTSequence::Free();
     unrolledRead.Free();
-    /*
-    ClearMemory(passStartPulse);
-    ClearMemory(passNumPulses);
-    ClearMemory(passStartBase);
-    ClearMemory(passNumBases);
-    ClearMemory(passDirection);
-    ClearMemory(adapterHitBefore);
-    ClearMemory(adapterHitAfter);
-    ClearMemory(adapterHitConfidence);
-    */
 }
 
 int CCSSequence::GetStorageSize() {
     return SMRTSequence::GetStorageSize() + unrolledRead.GetStorageSize();
 }
 
+UInt CCSSequence::HoleNumber(void) const {
+    return SMRTSequence::HoleNumber();
+}
+
+CCSSequence & CCSSequence::HoleNumber(const UInt holeNumber) {
+    SMRTSequence::HoleNumber(holeNumber);
+    unrolledRead.HoleNumber(holeNumber);
+    return *this;
+}
+
 //
 // In the first iteration, Explode simply pulls the subreads out
 // that are used in the ccs.   Eventually, it will pull out all
@@ -31,5 +31,6 @@ void CCSSequence::Explode(std::vector<SMRTSequence> &subreads) {
     int subreadIndex;
     for (subreadIndex = 0; subreadIndex < numPasses; subreadIndex++) {
         subreads[subreadIndex].ReferenceSubstring(this->unrolledRead, passStartBase[subreadIndex], passNumBases[subreadIndex]);
+        subreads[subreadIndex].zmwData = unrolledRead.zmwData;
     }
 }
diff --git a/pbdata/CCSSequence.hpp b/pbdata/CCSSequence.hpp
index 15e89d6..a415aca 100644
--- a/pbdata/CCSSequence.hpp
+++ b/pbdata/CCSSequence.hpp
@@ -5,11 +5,12 @@
 #include "SMRTSequence.hpp"
 #include "VectorUtils.hpp"
 
+
 //
 // A CCS Sequence is both a SMRTSequence itself, and contains a list of SMRTSequences.
 //
 class CCSSequence : public SMRTSequence {
- public:
+public:
 	UInt numPasses;
 	UInt numConsensusBases;
 	std::vector<DNALength> passStartPulse, passNumPulses, passStartBase, passNumBases;
@@ -23,17 +24,24 @@ class CCSSequence : public SMRTSequence {
 	//
 	SMRTSequence      unrolledRead;
 
+public:
     inline ~CCSSequence();
 	void Free(); 
 
+    UInt HoleNumber(void) const;
+
+    CCSSequence & HoleNumber(const UInt holeNumber);
+
 	int GetStorageSize(); 
-	//
-	//
-	// In the first iteration, Explode simply pulls the subreads out
-	// that are used in the ccs.   Eventually, it will pull out all
-	// high-quality subreads.
-	// 
+
+    /// \name
+    /// \{
+        /// In the first iteration, Explode simply pulls the subreads out
+        /// that are used in the ccs.   Eventually, it will pull out all
+        /// high-quality subreads.
+        ///
 	void Explode(std::vector<SMRTSequence> &subreads); 
+    /// \}
 };
 
 inline CCSSequence::~CCSSequence() {
diff --git a/pbdata/CompressedDNASequence.hpp b/pbdata/CompressedDNASequence.hpp
index a3fa6d1..2613f07 100644
--- a/pbdata/CompressedDNASequence.hpp
+++ b/pbdata/CompressedDNASequence.hpp
@@ -29,7 +29,7 @@ class CompressedDNASequence: public DNASequence {
 	CompressedDNASequence() {
 		const char t[] = "Compressed sequence\0";
 		titleLength = strlen(t);
-		title = new char[titleLength+1];
+		title = ProtectedNew<char>(titleLength+1);
 		strcpy(title, t);
 		title[titleLength] = '\0';
 	}
@@ -62,13 +62,13 @@ class CompressedDNASequence: public DNASequence {
 	}
 	
 	void Copy(FASTASequence &rhs) {
-		seq = new CompressedNucleotide[rhs.length];
+		seq = ProtectedNew<CompressedNucleotide>(rhs.length);
 		memcpy(seq, rhs.seq, rhs.length);
 		length = rhs.length;
 		if (title != NULL) {
 			delete[] title;
 		}
-		title = new char[rhs.titleLength+1];
+		title = ProtectedNew<char>(rhs.titleLength+1);
 		memcpy(title, rhs.title, rhs.titleLength);
 		titleLength = rhs.titleLength;
 		title[titleLength] = '\0';
diff --git a/pbdata/CompressedSequenceImpl.hpp b/pbdata/CompressedSequenceImpl.hpp
index 7759541..2144bcd 100644
--- a/pbdata/CompressedSequenceImpl.hpp
+++ b/pbdata/CompressedSequenceImpl.hpp
@@ -1,5 +1,6 @@
 #ifndef _BLASR_COMPRESSED_SEQUENCES_IMPL_HPP_
 #define _BLASR_COMPRESSED_SEQUENCES_IMPL_HPP_
+#include "utils.hpp"
 
 template<typename T_Sequence>
 void CompressedSequence<T_Sequence>::CopyConfiguration(CompressedSequence<T_Sequence> &rhs) {
@@ -54,13 +55,13 @@ char* CompressedSequence<T_Sequence>::GetName() {
 
 template<typename T_Sequence>
 void CompressedSequence<T_Sequence>::Copy(FASTASequence &rhs) {
-    seq = new CompressedNucleotide[rhs.length];
+    seq = ProtectedNew<CompressedNucleotide>(rhs.length);
     memcpy(seq, rhs.seq, rhs.length);
     length = rhs.length;
     if (title != NULL) {
         delete[] title;
     }
-    title = new char[rhs.titleLength+1];
+    title = ProtectedNew<char>(rhs.titleLength+1);
     memcpy(title, rhs.title, rhs.titleLength);
     titleLength = rhs.titleLength;
     title[titleLength] = '\0';
@@ -140,14 +141,14 @@ void CompressedSequence<T_Sequence>::Read(std::string inFileName) {
     if (hasTitle) {
         int inTitleLength;
         in.read((char*) &inTitleLength, sizeof(int));
-        char * inTitle = new char[inTitleLength+1];
+        char * inTitle = ProtectedNew<char>(inTitleLength+1);
         in.read((char*) inTitle, inTitleLength);
         inTitle[titleLength] = '\0';
         CopyTitle(inTitle, inTitleLength);
         delete [] inTitle;
     }
     in.read((char*) &length, sizeof(DNALength));
-    seq = new Nucleotide[length];
+    seq = ProtectedNew<Nucleotide>(length);
     in.read((char*) seq, length * sizeof(Nucleotide));
     if (hasIndex) {
         index.Read(in);
@@ -190,7 +191,7 @@ int CompressedSequence<T_Sequence>::BuildReverseIndex(int maxRun, int binSize) {
     //
     index.Free();
     index.indexLength = hpi/index.binSize + 1;
-    index.index = new int[index.indexLength];
+    index.index = ProtectedNew<int>(index.indexLength);
     hpi = 0;
     int ii = 0;
     for (i = 0; i < length; i++) { 
@@ -307,7 +308,7 @@ DNALength CompressedSequence<T_Sequence>::FourBitDecompressHomopolymers(int star
         count >>= 4;
         decompSeq.length += count;
     }
-    decompSeq.seq = new Nucleotide[decompSeq.length];
+    decompSeq.seq = ProtectedNew<Nucleotide>(decompSeq.length);
 
     //
     // Now store the actual decompressed seq.
diff --git a/pbdata/DNASequence.cpp b/pbdata/DNASequence.cpp
index cec3fab..eef5f0a 100644
--- a/pbdata/DNASequence.cpp
+++ b/pbdata/DNASequence.cpp
@@ -33,7 +33,7 @@ void DNASequence::Append(const DNASequence &rhs, DNALength appendPos) {
     //
     if (appendPos == 0) {
         DNALength  newSeqLength = length + rhs.length;
-        newSeq = new Nucleotide[newSeqLength];
+        newSeq = ProtectedNew<Nucleotide>(newSeqLength);
         memcpy(newSeq, seq, length);
         memcpy(&newSeq[length], rhs.seq, rhs.length);
 
@@ -53,7 +53,7 @@ void DNASequence::Append(const DNASequence &rhs, DNALength appendPos) {
             length = appendPos;
             DNALength newSeqLength;
             newSeqLength = length + rhs.length;
-            newSeq = new Nucleotide[newSeqLength];
+            newSeq = ProtectedNew<Nucleotide>(newSeqLength);
             memcpy(newSeq, seq, length);
             memcpy(&newSeq[length], rhs.seq, rhs.length);
             if (deleteOnExit and lengthCopy != 0) {
@@ -114,7 +114,7 @@ DNASequence& DNASequence::Copy(const DNASequence &rhs, DNALength rhsPos, DNALeng
         seq = NULL;
     }
     else {
-        seq = new Nucleotide [rhsLength];
+        seq = ProtectedNew<Nucleotide>(rhsLength);
         memcpy(seq, &rhs.seq[rhsPos], rhsLength);
     }
     length = rhsLength;
@@ -139,7 +139,7 @@ void DNASequence::ShallowCopy(const DNASequence &rhs) {
     deleteOnExit = false;
 }
 
-int DNASequence::GetStorageSize() {
+int DNASequence::GetStorageSize() const {
     return (length * sizeof(Nucleotide));
 }
 
@@ -151,11 +151,11 @@ DNASequence &DNASequence::operator=(const DNASequence &rhs){
 //
 // synonym for printseq
 //
-void DNASequence::Print(std::ostream &out, int lineLength) {
+void DNASequence::Print(std::ostream &out, int lineLength) const {
     PrintSeq(out, lineLength);
 }
 
-void DNASequence::PrintSeq(std::ostream &out, int lineLength) {
+void DNASequence::PrintSeq(std::ostream &out, int lineLength) const {
     if (lineLength == 0) {
         std::string line;
         line.assign((char*)seq, length);
@@ -181,8 +181,7 @@ void DNASequence::PrintSeq(std::ostream &out, int lineLength) {
 
 void DNASequence::Allocate(DNALength plength) {
     DNASequence::Free();
-
-    seq = new Nucleotide [plength];
+    seq = ProtectedNew<Nucleotide> (plength);
     length = plength;
     deleteOnExit = true;
 }
@@ -266,12 +265,12 @@ void DNASequence::Assign(DNASequence &ref, DNALength start, DNALength plength) {
     
     if (plength) {
         length = plength;
-        seq = new Nucleotide[length];
+        seq = ProtectedNew<Nucleotide> (length);
         memcpy(seq, &ref.seq[start], length);
     }
     else if (start) {
         length = ref.length - start;
-        seq = new Nucleotide[length];
+        seq = ProtectedNew<Nucleotide> (length);
         memcpy(seq, &ref.seq[start], length);
     }
     else {
@@ -298,7 +297,7 @@ void DNASequence::Concatenate(const Nucleotide *moreSeq, DNALength moreSeqLength
     DNALength prevLength = length;
     length += moreSeqLength;
     Nucleotide *prev = seq;
-    seq = new Nucleotide[length];
+    seq = ProtectedNew<Nucleotide> (length);
     if (prev != NULL) {
         memcpy(seq, prev, prevLength);
         delete[] prev;
@@ -351,11 +350,11 @@ void DNASequence::CleanupASCII() {
     }
 }
 
-Nucleotide DNASequence::GetNuc(DNALength i) {
+Nucleotide DNASequence::GetNuc(DNALength i) const {
     return seq[i];
 }
 
-DNALength DNASequence::GetRepeatContent() {
+DNALength DNASequence::GetRepeatContent() const {
     DNALength i;
     DNALength nRepeat = 0;
     for (i =0 ; i < length;i++) {
@@ -385,12 +384,12 @@ void DNASequence::Free() {
 
 void DNASequence::Resize(DNALength newLength) {
     DNASequence::Free();
-    seq = new Nucleotide[newLength];
+    seq = ProtectedNew<Nucleotide>(newLength);
     length = newLength;
     deleteOnExit = true;
 }
 
-DNALength DNASequence::GetSeqStorage() {
+DNALength DNASequence::GetSeqStorage() const{
     return length;
 }
 
@@ -400,3 +399,4 @@ DNASequence & DNASequence::Copy(const PacBio::BAM::BamRecord & record) {
 }
 #endif
 
+
diff --git a/pbdata/DNASequence.hpp b/pbdata/DNASequence.hpp
index 286e7d8..5aed371 100644
--- a/pbdata/DNASequence.hpp
+++ b/pbdata/DNASequence.hpp
@@ -8,6 +8,7 @@
 #include <cassert>
 #include "Types.h"
 #include "NucConversion.hpp"
+#include "utils.hpp"
 #include "libconfig.h"
 
 #ifdef USE_PBBAM
@@ -41,15 +42,15 @@ public:
 
     DNASequence & Copy(const std::string & rhs);
 
-    int GetStorageSize();
+    int GetStorageSize() const;
 
     DNASequence &operator=(const DNASequence &rhs);
 
     DNASequence &operator=(const std::string &rhs);
 
-    void Print(std::ostream &out, int lineLength = 50);
+    void Print(std::ostream &out, int lineLength = 50) const;
 
-    void PrintSeq(std::ostream &out, int lineLength = 50);
+    void PrintSeq(std::ostream &out, int lineLength = 50) const;
 
     void Allocate(DNALength plength);
 
@@ -99,9 +100,9 @@ public:
         return seq[i];
     }
 
-    Nucleotide GetNuc(DNALength i); 
+    Nucleotide GetNuc(DNALength i) const; 
 
-    DNALength GetRepeatContent(); 
+    DNALength GetRepeatContent() const; 
 
     void CleanupOnFree();
 
@@ -109,7 +110,7 @@ public:
 
     void Resize(DNALength newLength);
 
-    DNALength GetSeqStorage();
+    DNALength GetSeqStorage() const;
 
 #ifdef USE_PBBAM
     /// Copies a BamRecord as a DNASequence.
@@ -174,7 +175,7 @@ template<typename T>
 DNALength ResizeSequence(T &dnaseq, DNALength newLength) {
     assert(newLength > 0);
     ((T&)dnaseq).Free();
-    dnaseq.seq = new Nucleotide[newLength];
+    dnaseq.seq = ProtectedNew<Nucleotide>(newLength);
     dnaseq.length = newLength;
     dnaseq.deleteOnExit = true;
     return newLength;
diff --git a/pbdata/Enumerations.h b/pbdata/Enumerations.h
index b51575b..0fbd6fe 100644
--- a/pbdata/Enumerations.h
+++ b/pbdata/Enumerations.h
@@ -1,3 +1,40 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #ifndef _BLASR_ENUMERATIONS_HPP_
 #define _BLASR_ENUMERATIONS_HPP_
 
@@ -15,17 +52,17 @@ typedef enum T_FileType {Fasta,
 typedef enum T_Strand {Forward, Reverse} Strand;
 
 typedef enum T_PlatformType {
-    Springfield,
-    Astro,
+    Astro=1,
+    Springfield=2,
     NoPlatform
 } PlatformId;
 
 typedef enum T_RegionType {
-    GlobalAccuracy,
-    HQRegion,
     Adapter,
     Insert, 
-    ArtifactRegion
+    HQRegion,
+    BarCode,
+    UnknownRegionType
 } RegionType;
 
 typedef enum T_PulseMetricType {
diff --git a/pbdata/FASTAReader.cpp b/pbdata/FASTAReader.cpp
index 6477fc0..ba10d21 100644
--- a/pbdata/FASTAReader.cpp
+++ b/pbdata/FASTAReader.cpp
@@ -81,7 +81,7 @@ int FASTAReader::Init(string &seqInName, int passive) {
     }
     SetFileSize();
     filePtr = (char*) mmap(0, fileSize, PROT_READ, MAP_PRIVATE, fileDes, 0);
-    if (filePtr == NULL) {
+    if (filePtr == MAP_FAILED) {
         cout << "ERROR, Fail to load FASTA file " << seqInName 
              << " to virtual memory." << endl;
         exit(1);
@@ -217,7 +217,7 @@ void FASTAReader::ReadTitle(long &p, FASTASequence & seq) {
     int seqTitleLen; 
     ReadTitle(p, seqTitle, seqTitleLen);
     seq.CopyTitle(seqTitle, seqTitleLen);
-    if (seqTitle) {delete seqTitle;}
+    if (seqTitle) {delete [] seqTitle;}
 }
 
 void FASTAReader::ReadTitle(long &p, char *&title, int &titleLength) {
@@ -233,7 +233,10 @@ void FASTAReader::ReadTitle(long &p, char *&title, int &titleLength) {
     titleLength = p - curPos;
     if (titleLength > 0) {
         if (title) {delete [] title; title = NULL;}
-        title = new char[titleLength+1];
+        title = ProtectedNew<char> (titleLength + 1);
+        if (title == nullptr) {
+            cout << "ERROR, unable to read FASTA file to memory. " << endl; exit(1);
+        }
         int t = 0;
         for (p = curPos; p < curPos + titleLength; p++, t++) {
             title[t] = filePtr[p];
@@ -294,7 +297,7 @@ int FASTAReader::GetNext(FASTASequence &seq) {
     seq.length = 0;
     if (seqLength > 0) {
         seq.length = seqLength;
-        seq.seq = new Nucleotide[seqLength+padding+1];
+        seq.seq = ProtectedNew <Nucleotide>(seqLength+padding+1);
         p = curPos;
         seq.deleteOnExit = true;
         long s = 0;
diff --git a/pbdata/FASTASequence.cpp b/pbdata/FASTASequence.cpp
index 4b579cc..67cf81b 100644
--- a/pbdata/FASTASequence.cpp
+++ b/pbdata/FASTASequence.cpp
@@ -12,14 +12,14 @@ FASTASequence::FASTASequence() : DNASequence() {
     // regardless of deleteOnExit.
 }
 
-void FASTASequence::PrintSeq(ostream &out, int lineLength, char delim) {
+void FASTASequence::PrintSeq(ostream &out, int lineLength, char delim) const {
     out << delim;
     if (title) out << title;
     out << endl;
-    static_cast<DNASequence*>(this)->PrintSeq(out, lineLength); 
+    static_cast<const DNASequence*>(this)->PrintSeq(out, lineLength); 
 }
 
-int FASTASequence::GetStorageSize() {
+int FASTASequence::GetStorageSize() const {
     if (!title) 
         return DNASequence::GetStorageSize();
     return strlen(title) + DNASequence::GetStorageSize();
@@ -42,36 +42,6 @@ string FASTASequence::GetName() const {
     return name;
 }
 
-//
-// Define  some no-ops to satisfy instantiating templates that
-// expect these to exist.
-//
-bool FASTASequence::StoreHoleNumber(int holeNumber) {return false;}
-bool FASTASequence::StoreHoleStatus(unsigned char holeStatus) {return false;}
-bool FASTASequence::StorePlatformId(PlatformId platformId) { return false;}
-bool FASTASequence::StoreZMWData(ZMWGroupEntry &data) { return false;}
-bool GetHoleNumber (int &holeNumberP) {
-    //
-    // There is no notion of a hole number for a fasta sequence.
-    //
-    return false;
-}
-
-bool FASTASequence::StoreXY(int16_t xy[]) {return false;}
-
-bool FASTASequence::GetXY(int xyP[]) {
-    //
-    // Although the xyP is stored in the fasta title for astro reads
-    // this class is more general than an astro read, so do not assume 
-    // that it may be found in the title.
-    //
-    // So, this function is effectively a noop.
-    //
-    xyP[0] = xyP[1] = 0;
-    return false;
-}
-
-
 void FASTASequence::ShallowCopy(const FASTASequence &rhs) {
     CheckBeforeCopyOrReference(rhs, "FASTASequence");
     FASTASequence::Free();
@@ -124,7 +94,7 @@ void FASTASequence::CopyTitle(string str) {
     FASTASequence::CopyTitle(str.c_str(), str.size());
 }
 
-void FASTASequence::GetFASTATitle(string& fastaTitle) {
+void FASTASequence::GetFASTATitle(string& fastaTitle) const {
     // look for the first space, and return the string until there.
     int i;
     for (i = 0; i < titleLength; i++ ){
diff --git a/pbdata/FASTASequence.hpp b/pbdata/FASTASequence.hpp
index bfadb58..24b1633 100644
--- a/pbdata/FASTASequence.hpp
+++ b/pbdata/FASTASequence.hpp
@@ -29,22 +29,12 @@ public:
     FASTASequence();
     inline ~FASTASequence();
 
-    void PrintSeq(std::ostream &out, int lineLength = 50, char delim='>');
+    void PrintSeq(std::ostream &out, int lineLength = 50, char delim='>') const;
 
-    int GetStorageSize();
+    int GetStorageSize() const;
 
     std::string GetName() const;
 
-    virtual bool StoreHoleNumber(int holeNumber);
-    virtual bool StoreHoleStatus(unsigned char holeStatus);
-    virtual bool StorePlatformId(PlatformId platformId);
-    virtual bool StoreZMWData(ZMWGroupEntry &data);
-    virtual bool StoreXY(int16_t xy[]);
-
-    bool GetHoleNumber (int &holeNumberP); 
-
-    bool GetXY(int xyP[]); 
-
     void ShallowCopy(const FASTASequence &rhs); 
 
     std::string GetTitle() const; 
@@ -55,7 +45,7 @@ public:
 
     void CopyTitle(std::string str);
 
-    void GetFASTATitle(std::string& fastaTitle); 
+    void GetFASTATitle(std::string& fastaTitle) const; 
 
     void CopySubsequence(FASTASequence &rhs, int readStart, int readEnd=-1); 
 
diff --git a/pbdata/FASTQReader.cpp b/pbdata/FASTQReader.cpp
index 324d57a..73128e5 100644
--- a/pbdata/FASTQReader.cpp
+++ b/pbdata/FASTQReader.cpp
@@ -45,7 +45,7 @@ int FASTQReader::GetNext(FASTQSequence &seq) {
     seq.length = p2 - p;
     long seqPos;
     if (seq.length > 0) {
-        seq.seq = new Nucleotide[seq.length];
+        seq.seq = ProtectedNew<Nucleotide>(seq.length);
         p2 = p;
         seqPos = 0;
         while(p2 < fileSize and filePtr[p2] != '\n') { seq.seq[seqPos] = filePtr[p2]; p2++; seqPos++;}
diff --git a/pbdata/FASTQSequence.cpp b/pbdata/FASTQSequence.cpp
index 26789ac..aa9c59a 100644
--- a/pbdata/FASTQSequence.cpp
+++ b/pbdata/FASTQSequence.cpp
@@ -18,7 +18,7 @@ using namespace std;
 //
 int FASTQSequence::charToQuality = FASTQ_CHAR_TO_QUALITY;
 
-QVScale FASTQSequence::GetQVScale() {
+QVScale FASTQSequence::GetQVScale() const {
     return qvScale;
 }
 
@@ -42,7 +42,7 @@ FASTQSequence::GetQVPointerByIndex(int index) {
     return NULL;
 }
 
-int FASTQSequence::GetStorageSize() {
+int FASTQSequence::GetStorageSize() const {
     int total = 0;
     int nQV = 0;
     int nTag =0;
@@ -88,12 +88,10 @@ FASTQSequence::FASTQSequence() : FASTASequence() {
     insertionQVPrior = 0;
     substitutionQVPrior = 0;
     preBaseDeletionQVPrior = 0;
-
-    subreadStart = subreadEnd = 0;
     qvScale = PHRED;
 }
 
-QualityValue FASTQSequence::GetDeletionQV(DNALength pos) {
+QualityValue FASTQSequence::GetDeletionQV(DNALength pos) const {
     assert(pos < ((unsigned int)-1));
     assert(pos < length);
     if (deletionQV.Empty()) {
@@ -104,7 +102,7 @@ QualityValue FASTQSequence::GetDeletionQV(DNALength pos) {
     }
 }
 
-QualityValue FASTQSequence::GetMergeQV(DNALength pos) {
+QualityValue FASTQSequence::GetMergeQV(DNALength pos) const {
     assert(pos < ((unsigned int)-1));
     assert(pos < length);
     if (mergeQV.Empty()) {
@@ -115,7 +113,7 @@ QualityValue FASTQSequence::GetMergeQV(DNALength pos) {
     }
 }
 
-Nucleotide FASTQSequence::GetSubstitutionTag(DNALength pos) {
+Nucleotide FASTQSequence::GetSubstitutionTag(DNALength pos) const {
     if (substitutionTag == NULL) {
         return 'N';
     }
@@ -124,7 +122,7 @@ Nucleotide FASTQSequence::GetSubstitutionTag(DNALength pos) {
     return substitutionTag[pos];
 }
 
-Nucleotide FASTQSequence::GetDeletionTag(DNALength pos) {
+Nucleotide FASTQSequence::GetDeletionTag(DNALength pos) const {
     if (deletionTag == NULL) {
         return 'N';
     }
@@ -133,7 +131,7 @@ Nucleotide FASTQSequence::GetDeletionTag(DNALength pos) {
     return deletionTag[pos];
 }
 
-QualityValue FASTQSequence::GetInsertionQV(DNALength pos) {
+QualityValue FASTQSequence::GetInsertionQV(DNALength pos) const {
     if (insertionQV.Empty()) {
         return insertionQVPrior;
     }
@@ -142,7 +140,7 @@ QualityValue FASTQSequence::GetInsertionQV(DNALength pos) {
     return insertionQV[pos];
 }
 
-QualityValue FASTQSequence::GetSubstitutionQV(DNALength pos) {
+QualityValue FASTQSequence::GetSubstitutionQV(DNALength pos) const {
     if (substitutionQV.Empty()) {
         return substitutionQVPrior;
     }
@@ -151,7 +149,7 @@ QualityValue FASTQSequence::GetSubstitutionQV(DNALength pos) {
     return substitutionQV[pos];
 }
 
-QualityValue FASTQSequence::GetPreBaseDeletionQV(DNALength pos, Nucleotide nuc) {
+QualityValue FASTQSequence::GetPreBaseDeletionQV(DNALength pos, Nucleotide nuc) const {
     if (preBaseDeletionQV.Empty()) {
         return preBaseDeletionQVPrior;
     }
@@ -270,7 +268,7 @@ void FASTQSequence::AllocateMergeQVSpace(DNALength len) {
 
 void FASTQSequence::AllocateDeletionTagSpace(DNALength qualLength) {
     if (deletionTag != NULL) delete[] deletionTag;
-    deletionTag = new Nucleotide[qualLength];
+    deletionTag = ProtectedNew<Nucleotide>(qualLength);
 }
 
 void FASTQSequence::AllocatePreBaseDeletionQVSpace(DNALength qualLength) {
@@ -287,7 +285,7 @@ void FASTQSequence::AllocateSubstitutionQVSpace(DNALength qualLength ){
 
 void FASTQSequence::AllocateSubstitutionTagSpace(DNALength qualLength ){ 
     if (substitutionTag != NULL) delete[] substitutionTag;
-    substitutionTag = new Nucleotide[qualLength];
+    substitutionTag = ProtectedNew<Nucleotide>(qualLength);
 }
 
 void FASTQSequence::AllocateRichQualityValues(DNALength qualLength) {
@@ -335,7 +333,7 @@ void FASTQSequence::Assign(FASTQSequence &rhs) {
     FASTQSequence::CopyQualityValues(rhs);
 }
 
-void FASTQSequence::PrintFastq(ostream &out, int lineLength) {
+void FASTQSequence::PrintFastq(ostream &out, int lineLength) const {
     PrintSeq(out, lineLength, '@');
     if (lineLength == 0) { 
         out << endl;
@@ -346,12 +344,12 @@ void FASTQSequence::PrintFastq(ostream &out, int lineLength) {
     }
 }
 
-void FASTQSequence::PrintFastqQuality(ostream &out, int lineLength) {
+void FASTQSequence::PrintFastqQuality(ostream &out, int lineLength) const {
     out << "+" << endl;
     PrintAsciiQual(out, lineLength);
 }
 
-bool FASTQSequence::GetQVs(const QVIndex & qvIndex, std::vector<uint8_t> & qvs, bool reverse) {
+bool FASTQSequence::GetQVs(const QVIndex & qvIndex, std::vector<uint8_t> & qvs, bool reverse) const {
     qvs.clear();
     uint8_t *  qualPtr;
     int charOffset = charToQuality;
@@ -391,7 +389,7 @@ bool FASTQSequence::GetQVs(const QVIndex & qvIndex, std::vector<uint8_t> & qvs,
     return true;
 }
 
-QVIndex FASTQSequence::GetQVIndex(const std::string & qvName) {
+QVIndex FASTQSequence::GetQVIndex(const std::string & qvName) const {
     if (qvName == "QualityValue") {
         return I_QualityValue;
     } else if (qvName == "InsertionQV") {
@@ -412,11 +410,11 @@ QVIndex FASTQSequence::GetQVIndex(const std::string & qvName) {
     }
 }
 
-bool FASTQSequence::GetQVs(const std::string & qvName, std::vector<uint8_t> & qvs, bool reverse){
+bool FASTQSequence::GetQVs(const std::string & qvName, std::vector<uint8_t> & qvs, bool reverse) const {
     return GetQVs(GetQVIndex(qvName), qvs, reverse);
 }
 
-bool FASTQSequence::GetQVs(const std::string & qvName, std::string & qvsStr, bool reverse) {
+bool FASTQSequence::GetQVs(const std::string & qvName, std::string & qvsStr, bool reverse) const {
     std::vector<uint8_t> qvs;
     bool OK = GetQVs(qvName, qvs, reverse);
     qvsStr = string(qvs.begin(), qvs.end());
@@ -424,7 +422,7 @@ bool FASTQSequence::GetQVs(const std::string & qvName, std::string & qvsStr, boo
 }
 
 void FASTQSequence::PrintAsciiRichQuality(ostream &out, 
-        int whichQuality, int lineLength) {
+        int whichQuality, int lineLength) const {
     vector<uint8_t> qvs;
     bool OK = GetQVs(static_cast<QVIndex>(whichQuality), qvs);
     
@@ -460,11 +458,11 @@ void FASTQSequence::PrintAsciiRichQuality(ostream &out,
     }
 }
 
-void FASTQSequence::PrintAsciiQual(ostream &out, int lineLength) {
+void FASTQSequence::PrintAsciiQual(ostream &out, int lineLength) const {
     PrintAsciiRichQuality(out, 0, lineLength);
 }
 
-void FASTQSequence::PrintQual(ostream &out, int lineLength) {
+void FASTQSequence::PrintQual(ostream &out, int lineLength) const {
     out << ">" << this->title << endl;
     DNALength i;
     for (i = 0; i < length; i++ ){
@@ -479,7 +477,7 @@ void FASTQSequence::PrintQual(ostream &out, int lineLength) {
     }
 }
 
-void FASTQSequence::PrintQualSeq(ostream &out, int lineLength) {
+void FASTQSequence::PrintQualSeq(ostream &out, int lineLength) const {
     FASTASequence::PrintSeq(out, lineLength);
     lineLength /= 4;
     PrintQual(out, lineLength);
@@ -593,7 +591,7 @@ void FASTQSequence::LowerCaseMask(int qThreshold) {
     }
 }
 
-float FASTQSequence::GetAverageQuality() {
+float FASTQSequence::GetAverageQuality() const {
     DNALength p;
     float totalQ;
     if (qual.Empty() == true) { return 0.0; }
@@ -643,9 +641,5 @@ void FASTQSequence::Copy(const PacBio::BAM::BamRecord & record) {
         AllocateDeletionTagSpace(static_cast<DNALength>(qvs.size()));
         std::memcpy(deletionTag, qvs.c_str(), qvs.size() * sizeof(char));
     }
-    // preBaseQVs are not included in BamRecord, and will not be copied.
-    
-    subreadStart = static_cast<int>(record.QueryStart());
-    subreadEnd = static_cast<int>(record.QueryEnd());
 }
 #endif
diff --git a/pbdata/FASTQSequence.hpp b/pbdata/FASTQSequence.hpp
index f94b2b0..334d5e6 100644
--- a/pbdata/FASTQSequence.hpp
+++ b/pbdata/FASTQSequence.hpp
@@ -22,35 +22,34 @@ public:
     QualityValueVector<QualityValue> mergeQV;
     Nucleotide *deletionTag;
     Nucleotide *substitutionTag;
-    int subreadStart, subreadEnd;
     QualityValue deletionQVPrior, insertionQVPrior, substitutionQVPrior, preBaseDeletionQVPrior;
 
     QVScale qvScale;
 
-    QVScale GetQVScale(); 
+    QVScale GetQVScale() const; 
 
     void SetQVScale(QVScale qvScaleP); 
 
     QualityValueVector<QualityValue>* GetQVPointerByIndex(int index);
 
-    int GetStorageSize(); 
+    int GetStorageSize() const; 
 
     FASTQSequence();
     inline ~FASTQSequence();
 
-    QualityValue GetDeletionQV(DNALength pos); 
+    QualityValue GetDeletionQV(DNALength pos) const; 
 
-    QualityValue GetMergeQV(DNALength pos); 
+    QualityValue GetMergeQV(DNALength pos) const; 
 
-    Nucleotide GetSubstitutionTag(DNALength pos); 
+    Nucleotide GetSubstitutionTag(DNALength pos) const; 
 
-    Nucleotide GetDeletionTag(DNALength pos); 
+    Nucleotide GetDeletionTag(DNALength pos) const; 
 
-    QualityValue GetInsertionQV(DNALength pos); 
+    QualityValue GetInsertionQV(DNALength pos) const; 
 
-    QualityValue GetSubstitutionQV(DNALength pos); 
+    QualityValue GetSubstitutionQV(DNALength pos) const; 
 
-    QualityValue GetPreBaseDeletionQV(DNALength pos, Nucleotide nuc); 
+    QualityValue GetPreBaseDeletionQV(DNALength pos, Nucleotide nuc) const; 
 
     void ShallowCopy(const FASTQSequence &rhs); 
 
@@ -90,40 +89,40 @@ public:
 
     void Assign(FASTQSequence &rhs); 
 
-    void PrintFastq(std::ostream &out, int lineLength=50); 
+    void PrintFastq(std::ostream &out, int lineLength=50) const; 
 
-    void PrintFastqQuality(std::ostream &out, int lineLength=50); 
+    void PrintFastqQuality(std::ostream &out, int lineLength=50) const; 
 
-    QVIndex GetQVIndex(const std::string & qvName);
+    QVIndex GetQVIndex(const std::string & qvName) const;
 
     /// Get QVs in vector<uint8_t> associated with the given QVIndex.
     /// \returns true if qvs are available, false otherwise
     /// \param [in] qvIndex - enum QVIndex
     /// \param [out] qvs - obtained QVs.
     /// \param [in] reverse - reverse orders of QVs or not
-    bool GetQVs(const QVIndex & qvIndex, std::vector<uint8_t> & qvs, bool reverse=false);
+    bool GetQVs(const QVIndex & qvIndex, std::vector<uint8_t> & qvs, bool reverse=false) const;
 
     /// Get QVs in vector<uint8_t>, given with QV Name.
     /// \returns true if qvs are available, false, otherwise
     /// \param [in] qvName - InsertionQV, DeletionQV, SubstitionQV, MergeQV, SubstitutionTag, DeletionTag
     /// \param [out] qvs - obtians QVs.
     /// \param [in] reverse - reverse orders of QVs or not.
-    bool GetQVs(const std::string & qvName, std::vector<uint8_t> & qvs, bool reverse=false);
+    bool GetQVs(const std::string & qvName, std::vector<uint8_t> & qvs, bool reverse=false) const;
 
     /// Get QVs in string, given with QV Name.
     /// \returns true if qvs are available, false, otherwise
     /// \param [in] qvName - InsertionQV, DeletionQV, SubstitionQV, MergeQV, SubstitutionTag, DeletionTag
     /// \param [out] qvs - obtians QVs.
     /// \param [in] reverse - reverse order of QVs or not
-    bool GetQVs(const std::string & qvName, std::string & qvs, bool reverse=false);
+    bool GetQVs(const std::string & qvName, std::string & qvs, bool reverse=false) const;
 
-    void PrintAsciiRichQuality(std::ostream &out, int whichQuality, int lineLength=50);
+    void PrintAsciiRichQuality(std::ostream &out, int whichQuality, int lineLength=50) const;
 
-    void PrintAsciiQual(std::ostream &out, int lineLength=50) ;
+    void PrintAsciiQual(std::ostream &out, int lineLength=50) const;
 
-    void PrintQual(std::ostream &out, int lineLength = 50); 
+    void PrintQual(std::ostream &out, int lineLength = 50) const;
 
-    void PrintQualSeq(std::ostream &out, int lineLength = 50);
+    void PrintQualSeq(std::ostream &out, int lineLength = 50) const;
 
     void MakeRC(FASTQSequence &rc); 
 
@@ -131,7 +130,7 @@ public:
 
     void LowerCaseMask(int qThreshold); 
 
-    float GetAverageQuality(); 
+    float GetAverageQuality() const; 
 
 #ifdef USE_PBBAM
     /// Copy name, sequence, and QVs from BamRecord.
diff --git a/pbdata/MD5Utils.cpp b/pbdata/MD5Utils.cpp
index dd1f45a..02f2e67 100644
--- a/pbdata/MD5Utils.cpp
+++ b/pbdata/MD5Utils.cpp
@@ -44,6 +44,7 @@ documentation and/or software.
 #include <assert.h>
 #include <strings.h>
 #include <iostream>
+#include "utils.hpp"
 #include "MD5Utils.hpp"
 
 using namespace std;
@@ -234,7 +235,7 @@ MD5::MD5(ifstream& stream){
 
 unsigned char *MD5::raw_digest(){
 
-  uint1 *s = new uint1[16];
+  uint1 *s = ProtectedNew<uint1>(16);
 
   if (!finalized){
     cerr << "MD5::raw_digest:  Can't get digest if you haven't "<<
@@ -253,7 +254,7 @@ unsigned char *MD5::raw_digest(){
 char *MD5::hex_digest(){
 
   int i;
-  char *s= new char[33];
+  char *s= ProtectedNew<char>(33);
 
   if (!finalized){
     cerr << "MD5::hex_digest:  Can't get digest if you haven't "<<
diff --git a/pbdata/Makefile b/pbdata/Makefile
deleted file mode 100644
index 4d3a688..0000000
--- a/pbdata/Makefile
+++ /dev/null
@@ -1,85 +0,0 @@
-# Let common.mk know that that the third party checks should be ignored
-COMMON_NO_THIRD_PARTY_REQD := true
-
-include ../common.mk
-
-CXXOPTS := -std=c++11 -pedantic -MMD -MP
-INCLUDES := -I. -Imatrix -Ireads -Iqvs -Imetagenome -Isaf -Iutils -Ialignment
-
-# To enable building a shared library, invoke as "make SHARED_LIB=true ..."
-ifneq ($(SHARED_LIB),)
-    # Generating shared library
-    CXX_SHAREDFLAGS := -fPIC
-    LD_SHAREDFLAGS  := -shared -fPIC
-    TARGET_LIB      := libpbdata.so
-    # Developers should set these to appropriate defaults (other systems 
-    # will override these on the command line):
-    PBBAM_LIB      := ../../../staging/PostPrimary/pbbam/_output/install-build/lib/libpbbam.so
-    HTSLIB_LIB       := ../../../staging/PostPrimary/htslib/_output/install-build/lib/libhts.so
-else
-    # Generating shared library
-    CXX_SHAREDFLAGS :=
-    TARGET_LIB      := libpbdata.a
-    LD_SHAREDFLAGS  :=
-    HTSLIB_LIB      :=
-    PBBAM_LIB       := 
-endif
-
-DEP_LIBS := $(HTSLIB_LIB) $(PBBAM_LIB)
-
-PBBAM_INCLUDE := $(PBBAM)/include
-HTSLIB_INCLUDE ?= $(PBBAM)/third-party/htslib
-
-sources := $(wildcard *.cpp) \
-	       $(wildcard matrix/*.cpp) \
-	       $(wildcard reads/*.cpp) \
-	       $(wildcard metagenome/*.cpp) \
-	       $(wildcard qvs/*.cpp) \
-	       $(wildcard saf/*.cpp) \
-	       $(wildcard utils/*.cpp) \
-	       $(wildcard loadpulses/*.cpp) \
-	       $(wildcard alignment/*.cpp) \
-	       $(wildcard amos/*.cpp) \
-	       $(wildcard sam/*.cpp) 
-
-objects := $(sources:.cpp=.o)
-shared_objects := $(sources:.cpp=.shared.o)
-dependencies := $(objects:.o=.d) $(shared_objects:.o=.d)
-
-all : CXXFLAGS ?= -O3
-
-debug : CXXFLAGS ?= -g -ggdb -fno-inline
-
-profile : CXXFLAGS ?= -Os -pg
-
-g: CXXFLAGS = -g -ggdb -fno-inline -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -fno-omit-frame-pointer 
-
-all debug profile g: mklibconfig $(TARGET_LIB)
-
-mklibconfig:
-ifeq ($(origin nopbbam), undefined)
-	@grep "USE_PBBAM" libconfig.h 2>/dev/null 1>/dev/null || echo "#define USE_PBBAM" > libconfig.h
-    INCLUDES += -I$(PBBAM_INCLUDE) -I$(HTSLIB_INCLUDE) -I$(BOOST_INCLUDE)
-else
-	@rm -f libconfig.h && echo "" > libconfig.h && echo "no use libpbbam"
-endif
-
-libpbdata.a: $(objects)
-	$(AR_pp) $(ARFLAGS) $@ $^
-
-libpbdata.so: $(shared_objects) $(DEP_LIBS)
-	$(CXX_pp) $(LD_SHAREDFLAGS) -o $@ $^
-
-%.o: %.cpp
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-%.shared.o: %.cpp
-	$(CXX_pp) $(CXX_SHAREDFLAGS) $(CXXOPTS) $(CXXFLAGS) $(INCLUDES) -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.o) $(@:%.o=%.d)" -c $< -o $@
-
-# .INTERMEDIATE: $(objects)
-
-clean: 
-	@rm -f libpbdata.a  libpbdata.so
-	@rm -f $(objects) $(shared_objects) $(dependencies)
-
--include $(dependencies)
diff --git a/pbdata/PacBioDefs.h b/pbdata/PacBioDefs.h
new file mode 100644
index 0000000..2f80381
--- /dev/null
+++ b/pbdata/PacBioDefs.h
@@ -0,0 +1,180 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#ifndef _PBDATA_PACBIO_DEFS_H_
+#define _PBDATA_PACBIO_DEFS_H_
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace GroupNames {
+    ///PulseData
+    static const std::string pulsedata        = "PulseData";
+
+    ///PulseData/BaseCalls
+    static const std::string basecalls        = "BaseCalls";
+    ///PulseData/Regions
+    static const std::string regions          = "Regions";
+
+    ///PulseData/BaseCalls/BaseCall
+    static const std::string basecall         = "Basecall";
+
+    static const std::string qualityvalue     = "QualityValue";
+    static const std::string deletionqv       = "DeletionQV";
+    static const std::string deletiontag      = "DeletionTag";
+    static const std::string insertionqv      = "InsertionQV";
+    static const std::string mergeqv          = "MergeQV";
+    static const std::string substitutionqv   = "SubstitutionQV";
+    static const std::string substitutiontag  = "SubstitutionTag";
+    static const std::string prebaseframes    = "PreBaseFrames";
+    static const std::string widthinframes    = "WidthInFrames";
+
+    ///PulseData/BaseCalls/ZMW
+    static const std::string zmw              = "ZMW";
+    static const std::string zmwmetrics       = "ZMWMetrics";
+
+    ///PulseData/BaseCalls/ZMW/HoleNumber
+    static const std::string holenumber       = "HoleNumber";
+    static const std::string holestatus       = "HoleStatus";
+    static const std::string holexy           = "HoleXY";
+    static const std::string numevent         = "NumEvent";
+
+    ///PulseData/BaseCalls/ZMWMetrics/HQRegionSNR
+    static const std::string hqregionsnr      = "HQRegionSNR";
+    static const std::string readscore        = "ReadScore";
+    static const std::string productivity     = "Productivity";
+
+    static const std::vector<std::string> BaxQVNames ({
+            deletionqv, 
+            deletiontag, 
+            insertionqv,
+            mergeqv,
+            substitutionqv,
+            substitutiontag,
+            prebaseframes,
+            widthinframes,
+            hqregionsnr,
+            readscore});
+} // namespace GroupNames
+
+namespace AttributeNames {
+    namespace Common {
+        static const std::string changelistid       = "ChangeListID";
+        static const std::string description        = "Description";
+    } // Common
+
+    namespace ZMW {
+        namespace HoleStatus {
+            static const std::string lookuptable    = "LookupTable";
+        }
+    }
+
+    namespace Regions {
+        static const std::string columnnames        = "ColumnNames";
+        static const std::string regiontypes        = "RegionTypes";
+        static const std::string regiondescriptions = "RegionDescriptions";
+        static const std::string regionsources      = "RegionSources";
+    } // Regions
+
+    namespace ScanData {
+        namespace DyeSet {
+            static const std::string basemap        = "BaseMap";
+        }
+    } // ScanData
+
+
+} // namespace AttributeNames 
+
+namespace AttributeValues {
+
+    namespace ZMW {
+        namespace HoleNumber {
+            static const std::string description     = "Hole number on chip array";
+        } //namespace HoleNumber
+
+        namespace HoleStatus {
+            static const std::string description     = "Type of data coming from ZMW";
+            static const std::vector<std::string> lookuptable    =
+            {"SEQUENCING", "ANTIHOLE", "FIDUCIAL", "SUSPECT", 
+             "ANTIMIRROR", "FDZMW",    "FBZMW",    "ANTIBEAMLET", "OUTSIDEFOV"};
+
+            static const unsigned char sequencingzmw = 0; // not '0'
+            static const unsigned char outsidefov    = 8; // not '8'
+        } // namespace HoleStatus
+
+        namespace HoleXY {
+            static const std::string description     = "Coordinates of ZMW on Chip";
+        } // namespace HoleXY
+
+    } // namespace ZMW
+
+    namespace Regions {
+        static const std::vector<std::string> columnnames        =
+        {"HoleNumber", "Region type index", "Region start in bases",
+         "Region end in bases", "Region score"};
+        static const std::vector<std::string> regiontypes        =
+        {"Adapter", "Insert", "HQRegion"};
+        static const std::vector<std::string> regiondescriptions =
+        {"Adapter Hit", "Insert Region",
+         "High Quality bases region. Score is 1000 * predicted accuracy, where predicted accuary is 0 to 1.0"};
+        static const std::vector<std::string> regionsources      =
+        {"AdapterFinding", "AdapterFinding", "PulseToBase Region classifer"};
+    }
+
+    namespace ZMWMetrics {
+        namespace HQRegionSNR {
+            static const std::string description = "HQRegion average signal to noise ratio";
+        }
+        namespace ReadScore {
+            static const std::string description = "Read raw accuracy prediction";
+        }
+        namespace Productivity {
+            static const std::string description = "ZMW productivity classification";
+        }
+    } // ZMWMetrics
+
+    namespace ScanData {
+        namespace DyeSet {
+            static const std::string basemap = "ACGT"; // default, order matters!
+        }
+    } // ScanData
+
+} // namespace AttributeValues 
+
+} // namespace PacBio 
+
+#endif
diff --git a/pbdata/PackedDNASequence.cpp b/pbdata/PackedDNASequence.cpp
index 957839b..33a556f 100644
--- a/pbdata/PackedDNASequence.cpp
+++ b/pbdata/PackedDNASequence.cpp
@@ -69,7 +69,7 @@ void PackedDNASequence::Allocate(DNALength numberOfNucleotides) {
     length = numberOfNucleotides;
     if (seq) {delete [] seq; seq = NULL;}
     if (arrayLength > 0) {
-        seq = new PackedDNAWord[arrayLength];
+        seq = ProtectedNew<PackedDNAWord>(arrayLength);
         std::fill(seq, seq + arrayLength, 0);
     }
 }
@@ -79,7 +79,7 @@ void PackedDNASequence::CreateFromDNASequence(DNASequence &dnaSeq) {
     length = dnaSeq.length;
     if (seq) {delete [] seq; seq = NULL;}
     if (arrayLength > 0) {
-        seq = new PackedDNAWord[arrayLength];
+        seq = ProtectedNew<PackedDNAWord>(arrayLength);
         DNALength pos;
         for (pos = 0; pos < dnaSeq.length; pos++) {
             Set(pos, ThreeBit[dnaSeq[pos]]);
@@ -214,7 +214,7 @@ void PackedDNASequence::Read(std::istream &in) {
     in.read((char*)&length, sizeof(length));
     if (seq) {delete [] seq; seq = NULL;}
     if (arrayLength > 0) {
-        seq = new PackedDNAWord[arrayLength];
+        seq = ProtectedNew<PackedDNAWord>(arrayLength);
         in.read((char*)seq, sizeof(PackedDNAWord)*arrayLength);
     }
 }
diff --git a/pbdata/ReverseCompressIndex.cpp b/pbdata/ReverseCompressIndex.cpp
index 5179735..d96a20f 100644
--- a/pbdata/ReverseCompressIndex.cpp
+++ b/pbdata/ReverseCompressIndex.cpp
@@ -1,5 +1,6 @@
 #include <iostream>
 #include <fstream>
+#include "utils.hpp"
 #include "ReverseCompressIndex.hpp"
 
 ReverseCompressIndex::ReverseCompressIndex() {
@@ -27,7 +28,7 @@ void ReverseCompressIndex::Read(std::ifstream &in) {
     in.read((char*) &indexLength, sizeof(int));
     in.read((char*) &binSize, sizeof(int));
     in.read((char*) &maxRun, sizeof(int));
-    index = new int[indexLength];
+    index = ProtectedNew<int>(indexLength);
     in.read((char*) index, sizeof(int) *indexLength);
 } 
 
diff --git a/pbdata/SMRTSequence.cpp b/pbdata/SMRTSequence.cpp
index baabf57..6fc8f7b 100644
--- a/pbdata/SMRTSequence.cpp
+++ b/pbdata/SMRTSequence.cpp
@@ -1,36 +1,72 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #include <stdlib.h> 
+#include "utils/SMRTTitle.hpp"
 #include "SMRTSequence.hpp"
 
 using namespace std;
 
-void SMRTSequence::SetNull() {
-    pulseIndex    = NULL;
-    preBaseFrames = NULL;
-    widthInFrames = NULL;
-    xy[0] = 0; xy[1] = 0;
-    // These are not allocted by default.
-    meanSignal = maxSignal = midSignal = NULL;
-    classifierQV = NULL;
-    startFrame   = NULL;
-    platform     = NoPlatform;
-    // By default, allow the entire read.
-    lowQualityPrefix = lowQualitySuffix = 0;
-    highQualityRegionScore = 0;
+SMRTSequence::SMRTSequence()
+    : FASTQSequence()
+    , subreadStart_(0)      // subread start
+    , subreadEnd_(0)        // subread end
+    , preBaseFrames(nullptr)
+    , widthInFrames(nullptr)
+    , pulseIndex(nullptr)
+    , startFrame(nullptr)   // not allocated by default
+    , meanSignal(nullptr)   // not allocated by default
+    , maxSignal(nullptr)    // not allocated by default
+    , midSignal(nullptr)    // not allocated by default
+    , classifierQV(nullptr) // not allocated by default
+    , lowQualityPrefix(0)   // By default, allow the entire read.
+    , lowQualitySuffix(0)   // By default, allow the entire read.
+    , highQualityRegionScore(0) // HQ read score
+    , readScore(0)          // read score
+    , readGroupId_("")      // read group id
+    , copiedFromBam(false)
+#ifdef USE_PBBAM
+    , bamRecord(PacBio::BAM::BamRecord())
+#endif
+{
     // ZMWMetrics
     for (size_t i = 0; i < 4; i++) {
-        hqRegionSnr[i] = -1;
+        hqRegionSnr_[i] = -1;
     }
-    readScore = -1;
-    holeNumber = static_cast<UInt>(-1);
-    readGroupId = "";
-    copiedFromBam = false;
-#ifdef USE_PBBAM
-    bamRecord = PacBio::BAM::BamRecord();
-#endif
-}
-
-SMRTSequence::SMRTSequence() : FASTQSequence() {
-    SetNull();
 }
 
 void SMRTSequence::Allocate(DNALength length) {
@@ -42,12 +78,13 @@ void SMRTSequence::Allocate(DNALength length) {
     }
 
     FASTQSequence::AllocateRichQualityValues(length);
-    seq           = new Nucleotide[length];
+    seq           = ProtectedNew<Nucleotide>(length);
+    this->length  = length;
     qual.Allocate(length);
-    preBaseFrames = new HalfWord[length];
-    widthInFrames = new HalfWord[length];
-    pulseIndex    = new int[length];
-    subreadEnd    = length;
+    preBaseFrames = ProtectedNew<HalfWord>(length);
+    widthInFrames = ProtectedNew<HalfWord>(length);
+    pulseIndex    = ProtectedNew<int>(length);
+    subreadEnd_   = length;
     deleteOnExit  = true;
 }
 
@@ -62,8 +99,8 @@ void SMRTSequence::SetSubreadBoundaries(SMRTSequence &subread, DNALength subread
         subreadEnd = length;
     }
     assert(subreadEnd - subreadStart <= length);
-    subread.subreadStart= subreadStart;
-    subread.subreadEnd  = subreadEnd;
+    subread.subreadStart_ = subreadStart;
+    subread.subreadEnd_  = subreadEnd;
     SetSubreadTitle(subread, subreadStart, subreadEnd);
 }
 
@@ -128,22 +165,22 @@ void SMRTSequence::Copy(const SMRTSequence &rhs, int rhsPos, int rhsLength) {
 
         // Copy SMRT QVs
         if (rhs.preBaseFrames != NULL) {
-            preBaseFrames = new HalfWord[length];
+            preBaseFrames = ProtectedNew<HalfWord>(length);
             memcpy(preBaseFrames, rhs.preBaseFrames, length*sizeof(HalfWord));
         }
         if (rhs.widthInFrames != NULL) {
-            widthInFrames = new HalfWord[length];
+            widthInFrames = ProtectedNew<HalfWord>(length);
             memcpy(widthInFrames, rhs.widthInFrames, length*sizeof(HalfWord));
         }
         if (rhs.pulseIndex != NULL) {
-            pulseIndex = new int[length];
+            pulseIndex = ProtectedNew <int>(length);
             memcpy(pulseIndex, rhs.pulseIndex, sizeof(int) * length);
         }
     }
 
     // Copy other member variables from rhs
-    subreadStart = rhs.subreadStart;
-    subreadEnd   = rhs.subreadEnd;
+    subreadStart_ = rhs.subreadStart_;
+    subreadEnd_   = rhs.subreadEnd_;
     lowQualityPrefix = rhs.lowQualityPrefix;
     lowQualitySuffix = rhs.lowQualitySuffix;
     highQualityRegionScore = rhs.highQualityRegionScore;
@@ -158,9 +195,9 @@ void SMRTSequence::Copy(const SMRTSequence &rhs, int rhsPos, int rhsLength) {
 #endif
 }
 
-void SMRTSequence::Print(ostream &out) {
-    out << "SMRTSequence for zmw " << zmwData.holeNumber
-        << ", [" << subreadStart << ", " << subreadEnd << ")" << endl;
+void SMRTSequence::Print(ostream &out) const {
+    out << "SMRTSequence for zmw " << HoleNumber()
+        << ", [" << SubreadStart() << ", " << SubreadEnd() << ")" << endl;
     DNASequence::Print(out);
 }
 
@@ -183,6 +220,9 @@ void SMRTSequence::Free() {
         if (startFrame) {
             delete[] startFrame;
         }
+        // FIXME: memory of QVs should be handled within class
+        //        in a consistent way.
+        // Comments from Mark Chaisson:
         // meanSignal, maxSignal, midSignal and classifierQV
         // need to be handled separatedly.
     }
@@ -194,68 +234,113 @@ void SMRTSequence::Free() {
     startFrame = NULL;
 
     // Reset member variables
-    xy[0] = 0; xy[1] = 0;
+    subreadStart_ = subreadEnd_ = 0;
     lowQualityPrefix = lowQualitySuffix = 0;
+    readScore = 0;
     highQualityRegionScore = 0;
-    holeNumber = static_cast<UInt>(-1);
-    readGroupId = "";
+    readGroupId_ = "";
     copiedFromBam = false;
 #ifdef USE_PBBAM
     bamRecord = PacBio::BAM::BamRecord();
 #endif 
 
+    // ZMWMetrics
+    for (size_t i = 0; i < 4; i++) {
+        hqRegionSnr_[i] = -1;
+    }
+
     // Free seq, title and FASTQ QVs, also reset deleteOnExit.
     // Don't call FASTQSequence::Free() before freeing SMRT QVs.
     FASTQSequence::Free();
 }
 
-bool SMRTSequence::StoreXY(int16_t xyP[]) {
-    xy[0] = xyP[0];
-    xy[1] = xyP[1];
-    return true;
+SMRTSequence & SMRTSequence::HoleNumber(UInt holeNumber) {
+    zmwData.holeNumber = holeNumber;
+    return *this;
+}
+
+UInt SMRTSequence::HoleNumber(void) const {
+    return zmwData.holeNumber;
+}
+
+SMRTSequence & SMRTSequence::HoleXY(const int x, const int y) {
+    zmwData.x = x;
+    zmwData.y = y;
+    return *this;
+}
+
+UInt SMRTSequence::HoleX(void) const {
+    return zmwData.x;
 }
 
-bool SMRTSequence::StorePlatformId(PlatformId pid) {
-    platform = pid;
-    return true;
+UInt SMRTSequence::HoleY(void) const {
+    return zmwData.y;
 }
 
-bool SMRTSequence::StoreHoleNumber(UInt holeNumberP){ 
-    zmwData.holeNumber = holeNumber = holeNumberP;
-    return true;
+SMRTSequence & SMRTSequence::HoleStatus(const unsigned char holeStatus) {
+    zmwData.holeStatus = holeStatus;
+    return *this;
+}
+
+unsigned char SMRTSequence::HoleStatus(void) const {
+    return zmwData.holeStatus;
 }
 
-bool SMRTSequence::StoreHoleStatus(unsigned char s) {
-    zmwData.holeStatus = s;
-    return true;
+std::string SMRTSequence::MovieName(void) const {
+    return SMRTTitle(GetTitle()).MovieName();
 }
 
-bool SMRTSequence::StoreZMWData(ZMWGroupEntry &data) {
-    zmwData = data;
-    return true;
+DNALength SMRTSequence::SubreadStart(void) const {
+    return subreadStart_;
 }
 
-bool SMRTSequence::GetXY(int xyP[]) {
-    xyP[0] = xy[0];
-    xyP[1] = xy[1];
-    return true;
+SMRTSequence & SMRTSequence::SubreadStart(const DNALength start) {
+    subreadStart_ = start;
+    return *this;
 }
 
-bool SMRTSequence::GetHoleNumber(UInt & holeNumberP) {
-    holeNumberP = holeNumber;
-    return true;
+DNALength SMRTSequence::SubreadEnd(void) const {
+    return subreadEnd_;
+}
+
+SMRTSequence & SMRTSequence::SubreadEnd(const DNALength end) {
+    subreadEnd_ = end;
+    return *this;
 }
 
-std::string SMRTSequence::GetReadGroupId() {
-    return readGroupId;
+DNALength SMRTSequence::SubreadLength(void) const {
+    return subreadEnd_ - subreadStart_;
 }
 
-void SMRTSequence::SetReadGroupId(const std::string & rid) {
-    readGroupId = rid;
+std::string SMRTSequence::ReadGroupId() const {
+    return readGroupId_;
+}
+
+SMRTSequence & SMRTSequence::ReadGroupId(const std::string & rid) {
+    readGroupId_ = rid;
+    return *this;
+}
+
+float SMRTSequence::HQRegionSnr(const char base) const {
+    if (::toupper(base) == 'A')      return hqRegionSnr_[SMRTSequence::SnrIndex4Base::A];
+    else if (::toupper(base) == 'C') return hqRegionSnr_[SMRTSequence::SnrIndex4Base::C];
+    else if (::toupper(base) == 'G') return hqRegionSnr_[SMRTSequence::SnrIndex4Base::G];
+    else if (::toupper(base) == 'T') return hqRegionSnr_[SMRTSequence::SnrIndex4Base::T];
+    else assert("Base must be in A, C, G, T" == 0);
+}
+
+SMRTSequence & SMRTSequence::HQRegionSnr(const char base, float v) {
+    if (::toupper(base) == 'A')      hqRegionSnr_[SMRTSequence::SnrIndex4Base::A] = v;
+    else if (::toupper(base) == 'C') hqRegionSnr_[SMRTSequence::SnrIndex4Base::C] = v;
+    else if (::toupper(base) == 'G') hqRegionSnr_[SMRTSequence::SnrIndex4Base::G] = v;
+    else if (::toupper(base) == 'T') hqRegionSnr_[SMRTSequence::SnrIndex4Base::T] = v;
+    else assert("Base must be in A, C, G, T" == 0);
+    return *this;
 }
 
 #ifdef USE_PBBAM
-void SMRTSequence::Copy(const PacBio::BAM::BamRecord & record) {
+void SMRTSequence::Copy(const PacBio::BAM::BamRecord & record,
+                        bool copyAllQVs) {
     Free();
 
     copiedFromBam = true;
@@ -267,17 +352,71 @@ void SMRTSequence::Copy(const PacBio::BAM::BamRecord & record) {
     // Do NOT copy other SMRTQVs such as startFrame, meanSignal...
     (static_cast<FASTQSequence*>(this))->Copy(record);
 
+    // Set subread start, subread end in coordinate of zmw.
+    if (record.Type() != PacBio::BAM::RecordType::CCS) { 
+        subreadStart_ = static_cast<int>(record.QueryStart());
+        subreadEnd_ = static_cast<int>(record.QueryEnd());
+    } else {
+        subreadStart_ = 0;
+        subreadEnd_ =  static_cast<int>(record.Sequence().length());;
+    }
+
+    // Shall we copy all pulse QVs including ipd and pw?
+    if (copyAllQVs) {
+        if (record.HasPreBaseFrames()) {
+            std::vector<uint16_t> qvs = record.PreBaseFrames().DataRaw();
+            assert(preBaseFrames == nullptr);
+            preBaseFrames = ProtectedNew<HalfWord>(qvs.size());
+            std::memcpy(preBaseFrames, &qvs[0], qvs.size() * sizeof(HalfWord));
+        }
+        if (record.HasIPD()) {
+            std::vector<uint16_t> qvs = record.IPD().DataRaw();
+            assert(widthInFrames == nullptr);
+            widthInFrames = ProtectedNew<HalfWord>(qvs.size());
+            std::memcpy(widthInFrames, &qvs[0], qvs.size() * sizeof(HalfWord));
+        }
+    }
+
+    // preBaseQVs are not included in BamRecord, and will not be copied.
     // Copy read group id from BamRecord.
-    SetReadGroupId(record.ReadGroupId());
+    ReadGroupId(record.ReadGroupId());
 
     // PacBio bam for secondary analysis does NOT carry zmw
     // info other than holeNumber, including holeStatus, holeX,
     // holeY, numEvents. 
-    zmwData.holeNumber = static_cast<UInt> (record.HoleNumber()); 
+    UInt hn = static_cast<UInt> (record.HoleNumber());
+    this->HoleNumber(hn).
+    // Assumption: holeStatus of a bam record must be 'SEQUENCING'
+          HoleStatus(static_cast<unsigned char> (PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw)).
+    // x = lower 16 bit, y = upper 16 bit
+          HoleXY(hn & 0x0000FFFF, hn >> 16);
 
     // Set hq region read score
-    if (record.Impl().HasTag("rq"))
-        highQualityRegionScore = record.Impl().TagValue("rq").ToInt32();
+    if (record.HasReadAccuracy()) {
+        // In pre 3.0.1 BAM, ReadAccuracy is in [0, 1000],
+        // in post 3.0.1 BAM, ReadAccuracy is a float in [0, 1]
+        // In blasr_libcpp, which supports both HDF5 and BAM,
+        // readScore should always be a float in [0, 1],
+        // and highQualityRegionScore always be a int in [0, 1000]
+        readScore = float(record.ReadAccuracy());
+        if (readScore <= 1.0) { 
+            highQualityRegionScore = int(readScore * 1000);
+        } else { 
+            highQualityRegionScore = int(readScore);
+            readScore /= 1000.0;
+        }
+    }
 
+    // Set HQRegionSNR if record has the 'sn' tag 
+    if (record.HasSignalToNoise()) {
+        // Signal to noise ratio of ACGT (in that particular ORDER) over 
+        // HQRegion from BAM: record.SignalToNoise() 
+        std::vector<float> snrs = record.SignalToNoise();
+        this->HQRegionSnr('A', snrs[0]) 
+             .HQRegionSnr('C', snrs[1])
+             .HQRegionSnr('G', snrs[2]) 
+             .HQRegionSnr('T', snrs[3]);
+    }
 }
+
 #endif
diff --git a/pbdata/SMRTSequence.hpp b/pbdata/SMRTSequence.hpp
index f7cff1c..151e809 100644
--- a/pbdata/SMRTSequence.hpp
+++ b/pbdata/SMRTSequence.hpp
@@ -1,3 +1,40 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #ifndef _BLASR_SMRT_SEQUENCE_HPP_
 #define _BLASR_SMRT_SEQUENCE_HPP_
 
@@ -12,16 +49,28 @@
 #include "reads/RegionTable.hpp"
 #include "reads/ZMWGroupEntry.hpp"
 
+
 class SMRTSequence : public FASTQSequence {
+friend class HDFZMWReader;
+friend class HDFZMWWriter;
+friend class HDFZMWMetricsWriter;
+
+private:
+    enum SnrIndex4Base {A=0, C=1, G=2, T=3};
+    float hqRegionSnr_[4]; // Always saved as 'ACGT'
+
+    DNALength subreadStart_;
+    DNALength subreadEnd_;
+
+    // read group id associated with each SMRTSequence
+    std::string readGroupId_;
+
 public:
-    int16_t xy[2];
-    UInt holeNumber;
-    float hqRegionSnr[4];
-    float readScore;
     ZMWGroupEntry zmwData;
-    PlatformId platform;
+
     HalfWord *preBaseFrames;
     HalfWord *widthInFrames;
+
     //
     // The following are fields that are read in from the pulse file.
     // Because they are not standard in bas.h5 files, these fields
@@ -34,22 +83,83 @@ public:
     float *classifierQV;
     unsigned int *startFrame;
     int *pulseIndex;
+
     DNALength lowQualityPrefix, lowQualitySuffix;
     int highQualityRegionScore; // High quality region score in region table.
+    float readScore;
 
-protected:
-    // read group id associated with each SMRTSequence
-    std::string readGroupId; 
-
-public:
     // Whether or not this is originally copied from a BamRecord.
     bool copiedFromBam;
 
-    void SetNull(); 
-
+public:
     SMRTSequence();
+
     inline ~SMRTSequence();
 
+    /// \name Sets and gets attributes.
+    /// \{
+    /// Set HoleNumber.
+    /// \returns this SMRTSequence
+    SMRTSequence & HoleNumber(UInt holeNumber);
+
+    /// \reutrns HoleNumber
+    UInt HoleNumber(void) const;
+
+    /// Set HoleXY
+    SMRTSequence & HoleXY(const int x, const int y);
+
+    /// \returns HoleX
+    UInt HoleX(void) const;
+
+    /// \returns HoleY
+    UInt HoleY(void) const;
+
+    /// Set HoleStatus
+    SMRTSequence & HoleStatus(const unsigned char);
+
+    /// \returns HoleStatus
+    unsigned char HoleStatus(void) const;
+
+    /// \returns movie name parsed from sequence title
+    std::string MovieName(void) const;
+
+    /// \returns start pos of this sequence in coordinate of zmw polymerase sequence
+    DNALength SubreadStart(void) const;
+
+    /// Sets subreadStart.
+    SMRTSequence & SubreadStart(const DNALength start);
+
+    /// \returns subread end pos of this sequence in coordinate of zmw polymerase sequence
+    DNALength SubreadEnd(void) const;
+
+    /// Set subread end pos in coordinate of polymerase sequence.
+    SMRTSequence & SubreadEnd(const DNALength end);
+
+    /// A SMRTSequence's this->seq may point to sequence of a whole
+    /// polymerase read, but only represents a subread [subreadStart_,  subreadEnd_).
+    /// \returns subread length (SubreadEnd() - SubreadStart())
+    DNALength SubreadLength(void) const;
+
+    /// \returns read group id for this sequence.
+    std::string ReadGroupId(void) const;
+
+    /// Set readGroup Id for this sequence.
+    SMRTSequence & ReadGroupId(const std::string & rid);
+
+    /// Access to HQRegion SNRs must be done via public API.
+    float HQRegionSnr(const char base) const;
+
+    /// Set HQRegion SNR of base as v.
+    SMRTSequence & HQRegionSnr(const char base, float v);
+
+    /// \}
+
+public:
+    /// \name Clip subread
+    /// \{
+    SMRTSequence & Clip(const DNALength subreadStart, const DNALength subreadEnd);
+    /// \}
+
     void Allocate(DNALength length); 
 
     void SetSubreadTitle(SMRTSequence &subread, DNALength subreadStart, 
@@ -68,37 +178,19 @@ public:
 
     void Copy(const SMRTSequence &rhs, int rhsPos, int rhsLength); 
 
-    void Print(std::ostream &out); 
+    void Print(std::ostream &out) const;
 
     SMRTSequence& operator=(const SMRTSequence &rhs); 
 
     void Free(); 
-
-    bool StoreXY(int16_t xyP[]); 
-
-    bool StorePlatformId(PlatformId pid); 
-
-    bool StoreHoleNumber(UInt holeNumberP);
-
-    bool StoreHoleStatus(unsigned char s); 
-
-    bool StoreZMWData(ZMWGroupEntry &data); 
-
-    bool GetXY(int xyP[]); 
-
-    bool GetHoleNumber(UInt & holeNumberP);   
-
-    // Get read group id for this sequence.
-    std::string GetReadGroupId();
-
-    // Set readGroup Id for this sequence.
-    void SetReadGroupId(const std::string & rid);
     
 #ifdef USE_PBBAM
 public:
     // Copy read sequence, title, holeNumber, readGroupId, and QVs
     // (iq, dq, sq, mq, st, dt) from BamRecord to this SMRTSequence.
-    void Copy(const PacBio::BAM::BamRecord & record);
+    // If copyAllQVs is false, also copy all QVs.
+    void Copy(const PacBio::BAM::BamRecord & record, 
+              bool copyAllQVs = false);
 
     // Keep track of BamRecord from which this SMRTSequence is 
     // originally copied. However, one should NOT assume
@@ -112,4 +204,5 @@ public:
 inline SMRTSequence::~SMRTSequence(){
     SMRTSequence::Free();
 }
+
 #endif  // _BLASR_SMRT_SEQUENCE_HPP_
diff --git a/pbdata/StringUtils.cpp b/pbdata/StringUtils.cpp
index e21a675..2b008d4 100644
--- a/pbdata/StringUtils.cpp
+++ b/pbdata/StringUtils.cpp
@@ -62,49 +62,23 @@ int ToWords(string &orig, vector<string> &words) {
     return words.size();
 }
 
-int Tokenize(string orig, string pattern, vector<string> &tokens) {
-    VectorIndex tokenStart, tokenEnd;
-    int patternLength = pattern.size();
-    int origLength    = orig.size();
-    if (origLength == 0) {
-        return 0;
-    }
-    bool prevWasToken = false;
-    tokenEnd = 0;
-    tokenStart = 0;
-    //for (tokenEnd = 0; tokenEnd < origLength-patternLength; tokenEnd) {
-    while(tokenEnd < origLength - patternLength) {
-        while (tokenStart < origLength - patternLength and 
-                orig.compare(tokenStart, patternLength, pattern, 0, patternLength) == 0) {
-            tokenStart++;
-        }
-        tokenEnd = tokenStart + 1;
-
-        prevWasToken = false;
-        while (tokenEnd < origLength - patternLength) {
-            if (orig.compare(tokenEnd, patternLength, pattern, 0, patternLength) == 0) {
-                // add this token to the vector of tokens
-                if (tokenEnd - tokenStart >= 1) {
-                    prevWasToken = true;
-                    tokens.push_back(orig.substr(tokenStart, tokenEnd - tokenStart));
-                }
-                tokenEnd+=patternLength;
-                tokenStart = tokenEnd;
-                break;
-            }
-            else {
-                prevWasToken = false;
-                ++tokenEnd;
-            }
-        }
-    }
-    if (tokenEnd - tokenStart > 1) {
-        tokens.push_back(orig.substr(tokenStart, tokenEnd - tokenStart+1));
+// Splice a string by pattern and save to a vector of token strings.
+int Splice(const string & orig, const string & pattern, vector<string> & tokens) {
+    assert(pattern.size() > 0);
+
+    tokens.clear();
+    size_t search_start = 0;
+    size_t find_pos = orig.find(pattern, search_start);
+    while(find_pos != string::npos) {
+        string x = orig.substr(search_start, find_pos - search_start);
+        tokens.push_back(x);
+        search_start = find_pos + pattern.size();
+        find_pos = orig.find(pattern, search_start);
     }
+    tokens.push_back(orig.substr(search_start));
     return tokens.size();
 }
 
-
 void ParseSeparatedList(const string &csl, vector<string> &values, char delim) {
     stringstream cslStrm(csl);
     string valString;
diff --git a/pbdata/StringUtils.hpp b/pbdata/StringUtils.hpp
index 578f0c2..ea895a2 100644
--- a/pbdata/StringUtils.hpp
+++ b/pbdata/StringUtils.hpp
@@ -23,7 +23,7 @@ int IsSpace(char c);
 
 int ToWords(string &orig, vector<string> &words); 
 
-int Tokenize(string orig, string pattern, vector<string> &tokens); 
+int Splice(const string & orig, const string & pattern, vector<string> &tokens); 
 
 void ParseSeparatedList(const string &csl, vector<string> &values, char delim=','); 
 
diff --git a/pbdata/build.mk b/pbdata/build.mk
new file mode 120000
index 0000000..2247f36
--- /dev/null
+++ b/pbdata/build.mk
@@ -0,0 +1 @@
+makefile
\ No newline at end of file
diff --git a/pbdata/makefile b/pbdata/makefile
new file mode 100644
index 0000000..034ee60
--- /dev/null
+++ b/pbdata/makefile
@@ -0,0 +1,39 @@
+all:
+
+THISDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+-include ${CURDIR}/defines.mk
+include ${THISDIR}/../rules.mk
+
+CXXOPTS  += -std=c++11 -pedantic
+# CURDIR should have libconfig.h
+INCLUDES += ${CURDIR}
+INCLUDES += ${THISDIR} matrix reads qvs metagenome saf utils
+INCLUDES += ${LIBBLASR_INC} ${PBBAM_INC} ${HTSLIB_INC} ${BOOST_INC}
+LIBS     += ${PBBAM_LIB} ${HTSLIB_LIB}
+LDFLAGS  += $(patsubst %,-L%,${LIBS})
+
+all: libpbdata.a libpbdata${SH_LIB_EXT}
+
+paths := . matrix reads metagenome qvs saf utils loadpulses alignment amos sam
+paths := $(patsubst %,${THISDIR}%,${paths})
+sources := $(shell find ${THISDIR} -name '*.cpp')
+sources := $(notdir ${sources})
+objects := $(sources:.cpp=.o)
+shared_objects := $(sources:.cpp=.shared.o)
+dependencies := $(objects:.o=.d) $(shared_objects:.o=.d)
+
+vpath %.cpp ${paths}
+
+libpbdata.a: $(objects)
+	$(AR) $(ARFLAGS) $@ $^
+
+libpbdata${SH_LIB_EXT}: $(shared_objects)
+
+libconfig.h:
+	cp -af ${LIBCONFIG_H} $@
+
+clean: 
+	rm -f libpbdata.a  libpbdata.so *.o *.d
+
+-include $(dependencies)
+depend: $(dependencies:.d=.depend)
diff --git a/pbdata/matrix/FlatMatrixImpl.hpp b/pbdata/matrix/FlatMatrixImpl.hpp
index 5536cb7..7fe77c7 100644
--- a/pbdata/matrix/FlatMatrixImpl.hpp
+++ b/pbdata/matrix/FlatMatrixImpl.hpp
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <assert.h>
 #include "Types.h"
+#include "utils.hpp"
 #include "FlatMatrix.hpp"
 
 template<typename T>
@@ -61,7 +62,7 @@ void FlatMatrix2D<T>::Resize(unsigned int _totalSize) {
     if (matrix != NULL) {
         delete[] matrix;
     }
-    matrix = new T[_totalSize];
+    matrix = ProtectedNew<T>(_totalSize);
     totalSize = _totalSize;
 }
 
@@ -87,7 +88,7 @@ void FlatMatrix2D<T>::Grow(int _nRows, int _nCols) {
         if (totalSize != 0 && matrix)
             delete[] matrix;
         totalSize = nRows * nCols;
-        matrix = new T[totalSize];
+        matrix = ProtectedNew<T>(totalSize);
     }
 }
 
@@ -115,7 +116,7 @@ template<typename T>
 void FlatMatrix2D<T>::Allocate(UInt _nRows, UInt _nCols) {
     nRows = _nRows;
     nCols = _nCols;
-    matrix = new T[nRows * nCols];
+    matrix = ProtectedNew<T>(nRows * nCols);
 }
 
 template<typename T>
@@ -163,7 +164,7 @@ void FlatMatrix3D<T>::Grow(int _nx, int _ny, int _nz) {
             delete[] matrix;
         }
         totalSize = nx*ny*nz;
-        matrix = new T[totalSize];
+        matrix = ProtectedNew<T>(totalSize);
     }
     xy = nx*ny;
 }
diff --git a/pbdata/matrix/MatrixImpl.hpp b/pbdata/matrix/MatrixImpl.hpp
index c66aaf4..d08974c 100644
--- a/pbdata/matrix/MatrixImpl.hpp
+++ b/pbdata/matrix/MatrixImpl.hpp
@@ -5,13 +5,14 @@
 #include <iostream>
 #include <fstream>
 #include <stdint.h>
+#include "utils.hpp"
 #include "Types.h"
 
 template<typename T>
 void CreateMatrix(int rows, int cols, std::vector<T*> matrix) {
 	matrix.resize(rows);
     if (matrix[0]) {delete [] matrix[0]; matrix[0] = NULL;}
-	matrix[0] = new T[rows*cols];
+	matrix[0] = ProtectedNew<T>(rows*cols);
 	VectorIndex r = 1;
 	for (r = 1; r < rows; r++) {
 		matrix[r] = &matrix[cols * r];
@@ -56,14 +57,14 @@ void Matrix<T>::Resize(VectorIndex nRowsP, VectorIndex nColsP) {
             }
         }
         if (matrix == NULL) {
-            matrix = new T*[nRows];
+            matrix = ProtectedNew<T*>(nRows);
         }
         else {
             if (matrix[0] != NULL) {
                 delete[] matrix[0]; matrix[0] = NULL;
             }
         }
-        matrix[0] = new T[matrixBufferSize];
+        matrix[0] = ProtectedNew<T>(matrixBufferSize);
         VectorIndex rowIndex;
         for (rowIndex = 1; rowIndex < nRows; rowIndex++ ){
             matrix[rowIndex] = &matrix[0][nCols * rowIndex];
diff --git a/pbdata/metagenome/SequenceIndexDatabaseImpl.hpp b/pbdata/metagenome/SequenceIndexDatabaseImpl.hpp
index e26f61e..6ad9456 100644
--- a/pbdata/metagenome/SequenceIndexDatabaseImpl.hpp
+++ b/pbdata/metagenome/SequenceIndexDatabaseImpl.hpp
@@ -191,25 +191,25 @@ ReadDatabase(std::ifstream &in) {
 
     in.read((char*) &nSeqPos, sizeof(int));
     assert(seqStartPos == NULL);
-    seqStartPos = new DNALength[nSeqPos];
+    seqStartPos = ProtectedNew<DNALength>(nSeqPos);
     deleteSeqStartPos = true;
     in.read((char*) seqStartPos, sizeof(DNALength) * nSeqPos);
     int nSeq = nSeqPos - 1;
 
     // Get the lengths of the strings to read.
     assert(nameLengths == NULL);
-    nameLengths = new int[nSeq];
+    nameLengths = ProtectedNew<int>(nSeq);
     deleteNameLengths = true;
     in.read((char*)nameLengths, sizeof(int) * nSeq);
 
     // Get the titles of the sequences.
     assert(names == NULL); // Otherwise need to delete names; 
-    names = new char*[nSeq];
+    names = ProtectedNew<char*>(nSeq);
     deleteNames = true;
     char *namePtr;
     int i;
     for (i = 0; i < nSeq; i++) { 
-        namePtr = new char[nameLengths[i]];
+        namePtr = ProtectedNew<char>(nameLengths[i]);
         if (nameLengths[i] > 0) {
             in.read(namePtr, nameLengths[i]);
         }
@@ -227,7 +227,7 @@ SequenceTitleLinesToNames() {
         std::string tmpName;
         AssignUntilFirstSpace(names[seqIndex], nameLengths[seqIndex], tmpName);
         if (names[seqIndex]) {delete[] names[seqIndex];}
-        names[seqIndex] = new char[tmpName.size()+1];
+        names[seqIndex] = ProtectedNew<char>(tmpName.size()+1);
         strcpy(names[seqIndex], tmpName.c_str());
         names[seqIndex][tmpName.size()] = '\0';
         nameLengths[seqIndex] = tmpName.size();
@@ -267,14 +267,14 @@ Finalize() {
     int nSeq = nSeqPos - 1;
 
     assert(names==NULL);
-    names = new char*[nSeq];
+    names = ProtectedNew<char*>(nSeq);
     deleteNames = true;
     unsigned int i;
     if (nameLengths) {delete [] nameLengths; nameLengths = NULL;}
-    nameLengths = new int[nSeq];
+    nameLengths = ProtectedNew<int>(nSeq);
     deleteNameLengths = true;
     for (i = 0; i < nSeq; i++) {
-        names[i] = new char[growableName[i].size() + 1];
+        names[i] = ProtectedNew<char>(growableName[i].size() + 1);
 
         memcpy((char*) names[i], (char*) growableName[i].c_str(), 
             growableName[i].size());
diff --git a/pbdata/metagenome/TitleTable.cpp b/pbdata/metagenome/TitleTable.cpp
index 078da23..7f207b2 100644
--- a/pbdata/metagenome/TitleTable.cpp
+++ b/pbdata/metagenome/TitleTable.cpp
@@ -11,12 +11,12 @@ TitleTable::~TitleTable() {
 
 void TitleTable::Copy(char **src, int nSrc) {
     Free(); //Free before copy
-    table = new char*[nSrc];
+    table = ProtectedNew<char*>(nSrc);
     tableLength = nSrc;
     int i;
     for (i = 0; i < nSrc; i++ ){
         int lenStrI = strlen(src[i]);
-        table[i] = new char[lenStrI+1];
+        table[i] = ProtectedNew<char>(lenStrI+1);
         memcpy(table[i], src[i], lenStrI);
         table[i][lenStrI] = '\0';
     }
@@ -44,10 +44,10 @@ void TitleTable::Read(std::string &inFileName) {
 void TitleTable::CopyFromVector(std::vector<std::string> &titles) {
     Free(); //Free before copy.
     tableLength = titles.size();
-    table = new char*[tableLength];
+    table = ProtectedNew<char*>(tableLength);
     int i;
     for (i = 0; i < tableLength; i++) {
-        table[i] = new char[titles[i].size() + 1];
+        table[i] = ProtectedNew<char>(titles[i].size() + 1);
         memcpy(table[i], titles[i].c_str(), titles[i].size());
         table[i][titles[i].size()] = '\0';
     }
@@ -101,7 +101,7 @@ void TitleTable::ResetTableToIntegers(char **table,
         namestrm << i;
         std::string name;
         name = namestrm.str();
-        table[i] = new char[name.size()+1];
+        table[i] = ProtectedNew<char>(name.size()+1);
         memcpy( table[i], name.c_str(), name.size());
         table[i][name.size()] = '\0';
         tableLengths[i] = (int) name.size() + 1;
diff --git a/pbdata/reads/BaseFile.cpp b/pbdata/reads/BaseFile.cpp
index bf4c56a..56bac24 100644
--- a/pbdata/reads/BaseFile.cpp
+++ b/pbdata/reads/BaseFile.cpp
@@ -27,11 +27,11 @@ bool BaseFile::LookupReadIndexByXY(uint16_t x, uint16_t y, int &index) {
 
 void BaseFile::CopyReadAt(uint32_t readIndex, SMRTSequence &read) {
     assert(holeNumbers.size() > readIndex);
-    read.zmwData.holeNumber = holeNumbers[readIndex];
+    read.HoleNumber(holeNumbers[readIndex]);
     if (holeXY.size() > 0) {
         assert(holeXY.size() > readIndex);
-        read.zmwData.x = holeXY[readIndex].xy[0];
-        read.zmwData.y = holeXY[readIndex].xy[1];
+        read.HoleXY(holeXY[readIndex].xy[0],
+                    holeXY[readIndex].xy[1]);
     }
 
     int startPos = readStartPositions[readIndex];
diff --git a/pbdata/reads/PulseBaseCommon.cpp b/pbdata/reads/PulseBaseCommon.cpp
index 437a0fb..16bc95c 100644
--- a/pbdata/reads/PulseBaseCommon.cpp
+++ b/pbdata/reads/PulseBaseCommon.cpp
@@ -1,3 +1,40 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #include <vector>
 #include <algorithm>
 #include "PulseBaseCommon.hpp"
@@ -14,7 +51,7 @@ std::string PulseBaseCommon::GetMovieName() {
     return scanData.movieName;
 }
 
-std::map<char, int> PulseBaseCommon::GetBaseMap() {
+std::map<char, size_t> PulseBaseCommon::GetBaseMap() {
     return scanData.baseMap;
 }
 
diff --git a/pbdata/reads/PulseBaseCommon.hpp b/pbdata/reads/PulseBaseCommon.hpp
index e8b86fc..5143682 100644
--- a/pbdata/reads/PulseBaseCommon.hpp
+++ b/pbdata/reads/PulseBaseCommon.hpp
@@ -1,3 +1,40 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #ifndef DATASTRUCTURES_READS_PULSE_BASE_COMMON_H_
 #define DATASTRUCTURES_READS_PULSE_BASE_COMMON_H_
 
@@ -18,7 +55,7 @@ public:
 
     std::string GetMovieName(); 
 
-    std::map<char, int> GetBaseMap(); 
+    std::map<char, size_t> GetBaseMap(); 
 
     bool LookupReadIndexByHoleNumber(uint32_t holeNumber, int &readIndex); 
 };
diff --git a/pbdata/reads/PulseFile.cpp b/pbdata/reads/PulseFile.cpp
index 7d2db0b..3cdb4e3 100644
--- a/pbdata/reads/PulseFile.cpp
+++ b/pbdata/reads/PulseFile.cpp
@@ -1,5 +1,41 @@
-#include "PulseFile.hpp"
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
 
+#include "PulseFile.hpp"
 
 void PulseFile::CopySignal(HalfWord *signalData, // either a vector or matrix
                             int signalNDims,
@@ -12,7 +48,7 @@ void PulseFile::CopySignal(HalfWord *signalData, // either a vector or matrix
     // pulseStartPos must be 0; 
     // otherwise, pulseStartPos is pulseStartPositions[holeIndex]
 
-    std::map<char, int> baseMap = GetBaseMap();
+    std::map<char, size_t> baseMap = GetBaseMap();
     int i;
     if (signalNDims == 1) {
         for (i = 0; i < readLength; i++) {
diff --git a/pbdata/reads/PulseFileImpl.hpp b/pbdata/reads/PulseFileImpl.hpp
index 7fc1f8d..394ad35 100644
--- a/pbdata/reads/PulseFileImpl.hpp
+++ b/pbdata/reads/PulseFileImpl.hpp
@@ -1,6 +1,8 @@
 #ifndef _BLASR_PULSE_FILE_IMPL_HPP_
 #define _BLASR_PULSE_FILE_IMPL_HPP_
 
+#include "utils.hpp"
+
 template<typename T_FieldType>
     void PulseFile::StoreField(std::vector<T_FieldType> &source, int *basToPlsIndex, T_FieldType *dest, int destLength) {
     int i;
@@ -14,7 +16,7 @@ template <typename T>
     if (ptr != NULL) {
     delete[] ptr;
     }
-    ptr = new T[length];
+    ptr = ProtectedNew<T>(length);
     return ptr != NULL;
 }
 
diff --git a/pbdata/reads/ReadInterval.hpp b/pbdata/reads/ReadInterval.hpp
index c21838f..8db6a22 100644
--- a/pbdata/reads/ReadInterval.hpp
+++ b/pbdata/reads/ReadInterval.hpp
@@ -1,18 +1,35 @@
 #ifndef _BLASR_READ_INTERVAL_HPP_
 #define _BLASR_READ_INTERVAL_HPP_
 
+#include "RegionAnnotation.hpp"
+
+class RegionAnnotation;
+
 class ReadInterval {
 public:
     int start;
     int end;
     int score;
-    ReadInterval(int s, int e, int sc=0) : start(s), end(e), score(sc) {};
+
+    ReadInterval(int s=0, int e=0, int sc=0) : start(s), end(e), score(sc) {};
+
+    ReadInterval(const RegionAnnotation & ra)
+    : start(ra.GetStart())
+    , end(ra.GetEnd())
+    , score(ra.GetScore()) {}
+
     ReadInterval& operator=(const ReadInterval &rhs) {
         start = rhs.start;
         end   = rhs.end;
         score = rhs.score;
         return *this;
     }
+
+    bool operator==(const ReadInterval &rhs) const {
+        return (start == rhs.start and
+                end   == rhs.end   and
+                score == rhs.score);
+    }
 };
 
 #endif
diff --git a/pbdata/reads/RegionAnnotation.cpp b/pbdata/reads/RegionAnnotation.cpp
new file mode 100644
index 0000000..bee4a9b
--- /dev/null
+++ b/pbdata/reads/RegionAnnotation.cpp
@@ -0,0 +1,49 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
+
+#include <ostream>
+#include "RegionAnnotation.hpp"
+
+std::ostream & operator << (std::ostream & os, const RegionAnnotation& ra) {
+    os << "ZMW " << ra.GetHoleNumber()
+       << ", region type index " << ra.GetTypeIndex()
+       << " [" << ra.GetStart()
+       << ", " << ra.GetEnd()
+       << "), " << ra.GetScore();
+    return os;
+}
diff --git a/pbdata/reads/RegionAnnotation.hpp b/pbdata/reads/RegionAnnotation.hpp
new file mode 100644
index 0000000..38732b4
--- /dev/null
+++ b/pbdata/reads/RegionAnnotation.hpp
@@ -0,0 +1,241 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.  //
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
+#ifndef _BLASR_REGION_ANNOTATION_HPP_
+#define _BLASR_REGION_ANNOTATION_HPP_
+
+#include <cassert>
+#include <cstring>
+#include <string>
+#include <iostream>
+#include <vector>
+#include <map>
+#include <ostream>
+#include "Types.h"
+#include "Enumerations.h"
+#include "PacBioDefs.h"
+#include "RegionTypeMap.hpp"
+
+
+class HDFRegionTableReader;
+class HDFRegionTableWriter;
+class HDFRegionsWriter;
+
+
+class RegionAnnotation {
+friend class HDFRegionTableReader;
+friend class HDFRegionTableWriter;
+friend class HDFRegionsWriter;
+
+public:
+    static const int HOLENUMBERCOL = 0;
+    static const int REGIONTYPEINDEXCOL = 1;
+    static const int REGIONSTARTCOL = 2;
+    static const int REGIONENDCOL = 3;
+    static const int REGIONSCORECOL = 4;
+    static const int NCOLS=5;
+
+    int row[NCOLS];
+
+public:
+    // FIXME: use regionType as a member varaible instead of regionTypeIndex
+    inline RegionAnnotation(UInt holeNumber = 0,
+                            int typeIndex = 0,
+                            int start = 0, int end = 0,
+                            int score = -1);
+
+    inline bool operator<(const RegionAnnotation &rhs) const;
+
+    inline bool operator<(int holeNumber) const;
+
+    inline RegionAnnotation& operator=(const RegionAnnotation &rhs);
+
+    inline bool operator==(const RegionAnnotation &rhs) const;
+
+    inline int GetHoleNumber(void) const;
+
+    inline RegionAnnotation & SetHoleNumber(int holeNumber);
+
+    inline int GetTypeIndex(void) const;
+
+    inline std::string GetTypeString(const std::vector<RegionType> & types) const;
+
+    inline RegionAnnotation & SetTypeIndex(int typeIndex);
+
+    inline int GetStart(void) const;
+
+    inline RegionAnnotation & SetStart(int start);
+
+    inline int GetEnd(void) const;
+
+    inline RegionAnnotation & SetEnd(int end);
+
+    inline int GetScore(void) const;
+
+    inline RegionAnnotation & SetScore(int score);
+
+public:
+    friend std::ostream & operator << (std::ostream & os, const RegionAnnotation& ra);
+};
+
+inline
+bool compare_region_annotation_by_type(const RegionAnnotation & lhs,
+                                       const RegionAnnotation & rhs);
+
+inline
+RegionAnnotation::RegionAnnotation(UInt holeNumber,
+        int typeIndex, int start, int end, int score) {
+    SetHoleNumber(static_cast<int>(holeNumber));
+    SetTypeIndex(typeIndex);
+    SetStart(start);
+    SetEnd(end);
+    SetScore(score);
+}
+
+inline
+bool RegionAnnotation::operator<(const RegionAnnotation &rhs) const
+{
+    if (GetHoleNumber() == rhs.GetHoleNumber())
+        if (GetStart() == rhs.GetStart()) {
+            if (GetEnd() == rhs.GetEnd())
+                return GetScore() < rhs.GetScore();
+            else
+                return GetEnd() > rhs.GetEnd();
+        } else {
+            return GetStart() < rhs.GetStart();
+        }
+    else
+        return GetHoleNumber() < rhs.GetHoleNumber();
+}
+
+inline
+bool RegionAnnotation::operator<(int holeNumber) const
+{ return GetHoleNumber() < holeNumber; }
+
+
+inline
+RegionAnnotation& RegionAnnotation::operator=(const RegionAnnotation &rhs) {
+    memcpy(row, rhs.row, sizeof(int)*NCOLS);
+    return *this;
+}
+
+inline
+bool RegionAnnotation::operator==(const RegionAnnotation &rhs) const {
+    return (GetHoleNumber() == rhs.GetHoleNumber() and
+            GetTypeIndex() == rhs.GetTypeIndex() and
+            GetStart() == rhs.GetStart() and
+            GetEnd()   == rhs.GetEnd()   and
+            GetScore() == rhs.GetScore());
+}
+
+inline
+int RegionAnnotation::GetHoleNumber(void) const {
+    return row[HOLENUMBERCOL];
+}
+
+inline
+RegionAnnotation & RegionAnnotation::SetHoleNumber(int holeNumber) {
+    row[HOLENUMBERCOL] = holeNumber;
+    return *this;
+}
+
+inline
+int RegionAnnotation::GetTypeIndex(void) const {
+    return row[REGIONTYPEINDEXCOL];
+}
+
+inline std::string RegionAnnotation::GetTypeString(const std::vector<RegionType> & typesTable) const {
+    assert(GetTypeIndex() >= 0 and GetTypeIndex() < static_cast<int>(typesTable.size()));
+    return RegionTypeMap::ToString(typesTable[GetTypeIndex()]);
+}
+
+inline
+RegionAnnotation & RegionAnnotation::SetTypeIndex(int regionTypeIndex) {
+    row[REGIONTYPEINDEXCOL] = regionTypeIndex;
+    return *this;
+}
+
+inline
+int RegionAnnotation::GetStart(void) const {
+    return row[REGIONSTARTCOL];
+}
+
+inline
+RegionAnnotation & RegionAnnotation::SetStart(int start) {
+    row[REGIONSTARTCOL] = start;
+    return *this;
+}
+
+inline
+int RegionAnnotation::GetEnd(void) const {
+    return row[REGIONENDCOL];
+}
+
+inline
+RegionAnnotation & RegionAnnotation::SetEnd(int end) {
+    row[REGIONENDCOL] = end;
+    return *this;
+}
+
+inline
+int RegionAnnotation::GetScore(void) const {
+    return row[REGIONSCORECOL];
+}
+
+inline
+RegionAnnotation & RegionAnnotation::SetScore(int score) {
+    row[REGIONSCORECOL] = score;
+    return *this;
+}
+
+inline
+bool compare_region_annotation_by_type(const RegionAnnotation & lhs,
+                                       const RegionAnnotation & rhs)
+{
+    if (lhs.GetHoleNumber() == rhs.GetHoleNumber()) {
+        if (lhs.GetTypeIndex() == rhs.GetTypeIndex()) {
+            if (lhs.GetStart() == rhs.GetStart()) {
+                 if (lhs.GetEnd() == rhs.GetEnd())
+                     return lhs.GetScore() < rhs.GetScore();
+                 else return lhs.GetEnd() > rhs.GetEnd();
+            } else return lhs.GetStart() < rhs.GetStart();
+        } else return lhs.GetTypeIndex() < rhs.GetTypeIndex();
+    } else {
+        return lhs.GetHoleNumber() < rhs.GetHoleNumber();
+    }
+}
+
+#endif // _BLASR_REGION_ANNOTATION_HPP_
diff --git a/pbdata/reads/RegionAnnotations.cpp b/pbdata/reads/RegionAnnotations.cpp
new file mode 100644
index 0000000..22a5e3f
--- /dev/null
+++ b/pbdata/reads/RegionAnnotations.cpp
@@ -0,0 +1,179 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+
+#include "RegionAnnotations.hpp"
+#include <algorithm>
+#include <cassert>
+
+
+RegionAnnotations::RegionAnnotations(const UInt holeNumber,
+                                     const std::vector<RegionAnnotation> & annotations,
+                                     const std::vector<RegionType> & types)
+    : holeNumber_(holeNumber)
+    , table_(annotations)
+    , types_(types)
+{
+    for (auto annotation: annotations) {
+        // Only allow RegionAnnotations of a single ZMW
+        if (holeNumber_ != annotation.GetHoleNumber()) {
+            assert(false && "RegionAnnotations must contain regions from a single ZMW");
+        }
+    }
+
+    std::sort(table_.begin(), table_.end(), compare_region_annotation_by_type);
+}
+
+RegionAnnotations::RegionAnnotations(const RegionAnnotations & rhs)
+    : holeNumber_(rhs.holeNumber_)
+    , table_(rhs.table_)
+    , types_(rhs.types_)
+{ }
+
+UInt RegionAnnotations::HoleNumber(void) const {
+    return holeNumber_;
+}
+
+std::vector<RegionAnnotation>
+RegionAnnotations::RegionAnnotationsOfType(RegionType type) const {
+
+    std::vector<RegionAnnotation> ret;
+    int typeIndex = RegionTypeMap::ToIndex(type, types_);
+    if (typeIndex >= 0) {
+        for (auto ra: table_)
+            if (ra.GetTypeIndex() == typeIndex) ret.push_back(ra);
+        sort(ret.begin(), ret.end());
+    }
+    return ret;
+}
+
+std::vector<RegionAnnotation> RegionAnnotations::Adapters() const {
+    return RegionAnnotationsOfType(Adapter);
+}
+
+bool RegionAnnotations::HasHQRegion() const {
+    return (HQRegions().size() >= 1 and
+            HQEnd() - HQStart() > 0);
+}
+
+std::vector<RegionAnnotation>
+RegionAnnotations::HQRegions() const {
+    return RegionAnnotationsOfType(HQRegion);
+}
+
+RegionAnnotation
+RegionAnnotations::TheHQRegion() const {
+    std::vector<RegionAnnotation> hqs_ = HQRegions();
+    if (hqs_.size() == 0)
+        return RegionAnnotation(holeNumber_, RegionTypeMap::ToIndex(HQRegion, types_), 0, 0, 0);
+    else if (hqs_.size() == 1)
+        return hqs_[0];
+    else assert(false && "Zmw has more than one HQRegion.");
+}
+
+DNALength RegionAnnotations::HQStart() const {
+    return TheHQRegion().GetStart();
+}
+
+DNALength RegionAnnotations::HQEnd() const {
+    return TheHQRegion().GetEnd();
+}
+
+int RegionAnnotations::HQScore() const {
+    return TheHQRegion().GetScore();
+}
+
+std::vector<RegionAnnotation>
+RegionAnnotations::Inserts() const {
+    return RegionAnnotationsOfType(Insert);
+}
+
+std::vector<ReadInterval>
+RegionAnnotations::AdapterIntervals() const {
+    std::vector<ReadInterval> ret;
+    for (auto adapter: Adapters()) {
+        ret.push_back(ReadInterval(adapter));
+    }
+    return ret;
+}
+
+std::vector<ReadInterval>
+RegionAnnotations::SubreadIntervals(const DNALength wholeLength,
+                                    const bool byAdapter,
+                                    const bool byHQRegion) const {
+    std::vector<RegionAnnotation> inserts;
+    if (not byAdapter) {
+        inserts = Inserts();
+    } else {
+        if (Adapters().size() != 0) {
+            // Must have at least one adapter in order find inserts by adapter.
+            std::vector<DNALength> starts, ends;
+            starts.push_back(0);
+            for(auto adapter: Adapters()) {
+                assert(wholeLength >= adapter.GetStart() and
+                       wholeLength >= adapter.GetEnd()); // bug if fail assert
+                starts.push_back(adapter.GetEnd());
+                ends.push_back(adapter.GetStart());
+            }
+            ends.push_back(wholeLength);
+
+            for (size_t i = 0; i < starts.size(); i++) {
+                // Use adapter to infer subreads, read score considered unknown.
+                if (ends[i] > starts[i]) {
+                    inserts.push_back(RegionAnnotation(holeNumber_, Insert, starts[i], ends[i], 0));
+                }
+            }
+        } // else no inserts can be found
+    }
+
+    std::vector<ReadInterval> ret;
+    for (auto insert: inserts) {
+        if (byHQRegion) {
+            if (HasHQRegion()) {
+                DNALength s = std::max(static_cast<UInt>(insert.GetStart()), HQStart());
+                DNALength e = std::min(static_cast<UInt>(insert.GetEnd()), HQEnd());
+                if (s < e) {
+                    // subreads' read score = HQRegion score.
+                    ret.push_back(ReadInterval(s, e, HQScore()));
+                }
+            } // else ret = {}
+        } else {
+           ret.push_back(ReadInterval(insert));
+        }
+    }
+    return ret;
+}
diff --git a/pbdata/reads/RegionAnnotations.hpp b/pbdata/reads/RegionAnnotations.hpp
new file mode 100644
index 0000000..8a16490
--- /dev/null
+++ b/pbdata/reads/RegionAnnotations.hpp
@@ -0,0 +1,122 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#ifndef _PBDATA_READS_REGION_ANNOTATIONS_HPP_
+#define _PBDATA_READS_REGION_ANNOTATIONS_HPP_
+
+#include <algorithm>
+#include <string>
+#include <vector>
+#include "ReadInterval.hpp"
+#include "RegionAnnotation.hpp"
+
+class RegionAnnotations {
+
+    /// \name Region Annotations of a single ZMW
+    /// \{
+private:
+    /// \name region table of a zmw
+    std::vector<RegionAnnotation> table_;
+
+    /// \name hole number of a zmw
+    UInt holeNumber_;
+
+    /// \name region types in order.
+    std::vector<RegionType> types_;
+
+public:
+    RegionAnnotations(const UInt holeNumber,
+                      const std::vector<RegionAnnotation> & annotations,
+                      const std::vector<RegionType> & types);
+
+    RegionAnnotations(const RegionAnnotations & rhs);
+
+    ~RegionAnnotations() {}
+
+    /// \returns zmw holeNumber.
+    UInt HoleNumber(void) const;
+
+    /// \returns sorted adapters of this zmw
+    std::vector<RegionAnnotation> Adapters() const;
+
+    /// \returns whether or not has HQ region specified in table.
+    bool HasHQRegion() const;
+
+    /// \returns exactly one HQ region of this zmw.
+    /// \note If no HQ region exists, return a RegionAnnotation of length  0.
+    ///       If more than one HQ region is found for this zmw, raise an assertion error.
+    RegionAnnotation TheHQRegion() const;
+
+    /// \returns HQ start position of this zmw.
+    DNALength HQStart() const;
+
+    /// \returns HQ end position of this zmw.
+    DNALength HQEnd() const;
+
+    /// \returns HQ score of this zmw.
+    int HQScore() const;
+
+    /// \returns sorted insert regions of this zmw.
+    std::vector<RegionAnnotation> Inserts() const;
+
+    /// \returns a vector of all adapters
+    std::vector<ReadInterval> AdapterIntervals() const;
+
+    /// \returns a vector of all subreads
+    /// \param[in] wholeLength     Length of unrolled sequence of this zmw. Note that
+    ///                            this piece of info does not exist in region table.
+    /// \param[in] byAdapter       false: return inserts in region table directly.
+    ///                            true : infer inserts according to adapters.
+    /// \param[in] byHQRegion      false: inserts may contain both HQ and LQ regions
+    ///                            true : inserts in HQ regions only.
+    std::vector<ReadInterval>
+    SubreadIntervals(const DNALength wholeLength,
+                     const bool byAdapter = true,
+                     const bool byHQRegion = true) const;
+
+private:
+    /// \returns sorted vector of region annotations of a RegionType.
+    std::vector<RegionAnnotation>
+    RegionAnnotationsOfType(RegionType type) const;
+
+    /// \returns HQ regions of this zmw.
+    std::vector<RegionAnnotation> HQRegions() const;
+
+    /// \}
+};
+
+#endif
diff --git a/pbdata/reads/RegionTable.cpp b/pbdata/reads/RegionTable.cpp
index 54582a7..e50c699 100644
--- a/pbdata/reads/RegionTable.cpp
+++ b/pbdata/reads/RegionTable.cpp
@@ -1,159 +1,136 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+// Modified by: Yuan Li
+
+
 #include <algorithm>
+#include <iostream>
+#include <ostream>
 #include "RegionTable.hpp"
 
 using namespace std;
 
-RegionAnnotation& RegionAnnotation::operator=(const RegionAnnotation &rhs) {
-    memcpy(row, rhs.row, sizeof(int)*NCOLS);
-    return *this;
-}
-int RegionAnnotation::GetHoleNumber() {
-    return row[HoleNumber];
-}
-
-void RegionAnnotation::SetHoleNumber(int holeNumber) {
-    row[HoleNumber] = holeNumber;
-}
 
-int RegionAnnotation::GetType() const {
-    return row[RegionType];
-}
-
-void RegionAnnotation::SetType(int regionType) {
-    row[RegionType] = regionType;
-}
-
-int RegionAnnotation::GetStart() {
-    return row[RegionStart];
-}
-
-void RegionAnnotation::SetStart(int start) {
-    row[RegionStart] = start;
-}
-int RegionAnnotation::GetEnd() {
-    return row[RegionEnd];
-}
-
-void RegionAnnotation::SetEnd(int end) {
-    row[RegionEnd] = end;
-}
-
-int RegionAnnotation::GetScore() {
-    return row[RegionScore];
+RegionTable & RegionTable::Reset() {
+    map_.clear();
+    columnNames.clear();
+    regionTypes.clear();
+    regionDescriptions.clear();
+    regionSources.clear();
+    regionTypeEnums.clear();
+    return *this;
 }
 
-void RegionAnnotation::SetScore(int score) {
-    row[RegionScore] = score;
-}
+std::vector<RegionType> RegionTable::RegionTypeEnums(void) const
+{ return regionTypeEnums; }
 
-int RegionTable::LookupRegionsByHoleNumber(int holeNumber, int &low, int &high) const {
-    std::vector<RegionAnnotation>::const_iterator lowIt, highIt;
-    lowIt  = std::lower_bound(table.begin(), table.end(), holeNumber);
-    highIt = std::lower_bound(table.begin(), table.end(), holeNumber+1);
-    low =  lowIt - table.begin();
-    high = highIt - table.begin();
-    return high-low;
-}
+std::vector<std::string> RegionTable::RegionTypes(void) const
+{ return regionTypes; }
 
-//
-// Define a bunch of accessor functions.
-//
+std::vector<std::string> RegionTable::ColumnNames(void) const
+{ return columnNames; }
 
-//
-// Different region tables have different ways of encoding regions.
-// This maps from the way they are encoded in the rgn table to a
-// standard encoding.
-//
+std::vector<std::string> RegionTable::RegionDescriptions(void) const
+{ return regionDescriptions; }
 
-RegionType RegionTable::GetType(int regionIndex) const {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    return (RegionType) regionTypeEnums[table[regionIndex].GetType()];
-}
+std::vector<std::string> RegionTable::RegionSources(void) const
+{ return regionSources;}
 
-int RegionTable::GetStart(int regionIndex) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    return table[regionIndex].GetStart();
-}
+RegionTable & RegionTable::ConstructTable(std::vector<RegionAnnotation> & table,
+                                          const std::vector<std::string> & regionTypeStrs) {
+    RegionTypes(regionTypeStrs); //< Set both regionTypes and regionTypeEnums.
 
-void RegionTable::SetStart(int regionIndex, int start) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    table[regionIndex].SetStart(start);
-}
+    // Must sort region annotations by HoleNumber, RegionTypeIndex, Start, End, and Score
+    std::sort(table.begin(), table.end(), compare_region_annotation_by_type);
 
-int RegionTable::GetEnd(int regionIndex) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    return table[regionIndex].GetEnd();
-}
+    // Construct map_<holeNumber, RegionAnnotations>
+    if (table.size() > 0) {
+        UInt pre_hn = table[0].GetHoleNumber();
+        auto itBegin = table.begin();
+        for (auto it = table.begin(); it != table.end(); it++) {
+            if (it->GetHoleNumber() > pre_hn) {
+                map_.insert(std::pair<UInt, RegionAnnotations>(pre_hn,
+                            RegionAnnotations(pre_hn,
+                                              std::vector<RegionAnnotation>(itBegin, it),
+                                              regionTypeEnums)));
+                pre_hn = it->GetHoleNumber();
+                itBegin = it;
+            }
+        }
 
-void RegionTable::SetEnd(int regionIndex, int end) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    table[regionIndex].SetEnd(end);
+        map_.insert(std::pair<UInt, RegionAnnotations>(pre_hn,
+                    RegionAnnotations(pre_hn,
+                                      std::vector<RegionAnnotation>(itBegin, table.end()),
+                                      regionTypeEnums)));
+    }
 }
 
-int RegionTable::GetHoleNumber(int regionIndex) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    return table[regionIndex].GetHoleNumber();
+std::vector<RegionType> RegionTable::DefaultRegionTypes(void) {
+    std::vector<RegionType> ret;
+    for (std::string regionTypeString: PacBio::AttributeValues::Regions::regiontypes) {
+        ret.push_back(RegionTypeMap::ToRegionType(regionTypeString));
+    }
+    return ret;
 }
 
-void RegionTable::SetHoleNumber(int regionIndex, int holeNumber) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    table[regionIndex].SetHoleNumber(holeNumber);
+RegionTable & RegionTable::RegionTypes(const std::vector<std::string> & regionTypeStrs) {
+    regionTypes = regionTypeStrs;
+    for (std::string regionTypeString: regionTypeStrs) {
+        regionTypeEnums.push_back(RegionTypeMap::ToRegionType(regionTypeString));
+    }
+    return *this;
 }
 
-int RegionTable::GetScore(int regionIndex) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    return table[regionIndex].row[RegionAnnotation::RegionScore];
-}
+RegionTable & RegionTable::ColumnNames(const std::vector<std::string> & in)
+{ columnNames = in; return *this; }
 
-void RegionTable::SetScore(int regionIndex, int score) {
-    assert(regionIndex < table.size());
-    assert(regionIndex >= 0);
-    table[regionIndex].row[RegionAnnotation::RegionScore] = score;
-}
+RegionTable & RegionTable::RegionDescriptions(const std::vector<std::string> & in)
+{ regionDescriptions = in; return *this; }
 
-void RegionTable::SortTableByHoleNumber() {
-    std::stable_sort(table.begin(), table.end());
-}
+RegionTable & RegionTable::RegionSources(const std::vector<std::string> & in)
+{ regionSources = in; return *this; }
 
-void RegionTable::Reset() {
-    table.clear();
-    columnNames.clear();
-    regionTypes.clear();
-    regionDescriptions.clear();
-    regionSources.clear();
-    regionTypeEnums.clear();
+bool RegionTable::HasHoleNumber(const UInt holeNumber) const {
+    return (map_.find(holeNumber) != map_.end());
 }
 
-void RegionTable::CreateDefaultAttributes() {
-    columnNames.clear();
-    columnNames.push_back("HoleNumber");
-    columnNames.push_back("Region type index");
-    columnNames.push_back("Region start in bases");
-    columnNames.push_back("Region end in bases");
-    columnNames.push_back("Region score");
-
-    regionTypes.push_back("Adapter");
-    regionTypes.push_back("Insert");
-    regionTypes.push_back("HQRegion");
-
-    regionDescriptions.push_back("Adapter Hit");
-    regionDescriptions.push_back("Insert Region");
-    regionDescriptions.push_back("High Quality bases region. Score is 1000 * "
-            "predicted accuracy, where predicted accuary is 0 to 1.0"); 
-
-    regionSources.push_back("AdapterFinding");
-    regionSources.push_back("AdapterFinding");
-    regionSources.push_back("PulseToBase Region classifer");
-
-    regionTypeEnums.push_back(Adapter);
-    regionTypeEnums.push_back(Insert);
-    regionTypeEnums.push_back(HQRegion);
+RegionAnnotations RegionTable::operator [] (const UInt holeNumber) const {
+    // Must check whether a zmw exists or not first.
+    assert (HasHoleNumber(holeNumber)
+            && "Could not find zmw in region table.");
+    return map_.find(holeNumber)->second;
 }
diff --git a/pbdata/reads/RegionTable.hpp b/pbdata/reads/RegionTable.hpp
index 497e625..f5b750a 100644
--- a/pbdata/reads/RegionTable.hpp
+++ b/pbdata/reads/RegionTable.hpp
@@ -1,95 +1,145 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #ifndef _BLASR_REGION_TABLE_HPP_
 #define _BLASR_REGION_TABLE_HPP_
 
 #include <cassert>
 #include <cstring>
 #include <string>
+#include <iostream>
 #include <vector>
+#include <map>
+#include <ostream>
+#include "Types.h"
 #include "Enumerations.h"
+#include "PacBioDefs.h"
+#include "RegionAnnotation.hpp"
+#include "RegionAnnotations.hpp"
 
-class RegionAnnotation {
-public:
-    typedef enum T_AnnotationRow {HoleNumber, RegionType, RegionStart, 
-        RegionEnd, RegionScore} AnnotationRow;
-    static const int NCOLS=5;
-    int row[NCOLS];
-
-    inline
-    bool operator<(const RegionAnnotation &rhs) const
-    { return row[HoleNumber] < rhs.row[HoleNumber]; }
-
-    inline
-    bool operator<(int holeNumber) const
-    { return row[HoleNumber] < holeNumber; }
-
-    RegionAnnotation& operator=(const RegionAnnotation &rhs); 
-
-    int GetHoleNumber(); 
-
-    void SetHoleNumber(int holeNumber); 
-
-    int GetType() const; 
-
-    void SetType(int regionType); 
-
-    int GetStart(); 
-
-    void SetStart(int start); 
-
-    int GetEnd(); 
-
-    void SetEnd(int end); 
-
-    int GetScore(); 
-
-    void SetScore(int score); 
-};
 
 class RegionTable {
-public:
-    std::vector<RegionAnnotation> table;
+private:
+    /// RegionTable reading from h5 file 'Regions' dataset.
+    /// \name member variables
+    /// \{
+    /// Map zmw hole number to zmw RegionAnnotations.
+    std::map<UInt, RegionAnnotations> map_;
+    /// \}
+
+    /// \name Region table attributes.
     std::vector<std::string> columnNames;
     std::vector<std::string> regionTypes;
     std::vector<std::string> regionDescriptions;
     std::vector<std::string> regionSources;
-    std::vector<RegionType> regionTypeEnums;
+    std::vector<RegionType>  regionTypeEnums;
+    /// \}
+
+public:
+    /// \name Constructor & destructor & reset
+    /// \{
+    RegionTable() {}
 
-    int LookupRegionsByHoleNumber(int holeNumber, int &low, int &high) const; 
+    ~RegionTable() {}
 
-    //
-    // Define a bunch of accessor functions.
-    //
+    /// Clears member variables in region table.
+    /// \returns *this
+    RegionTable& Reset();
+    /// \}
 
-    //
     // Different region tables have different ways of encoding regions.
     // This maps from the way they are encoded in the rgn table to a
     // standard encoding.
     //
+    /// \name Accessor functions to region table attributes.
+    /// \{
 
-    RegionType GetType(int regionIndex) const; 
+    /// \returns *default PacBio* region types (order matters).
+    static std::vector<RegionType> DefaultRegionTypes(void);
 
-    int GetStart(int regionIndex); 
+    /// \returns RegionType enums (order matters).
+    std::vector<RegionType> RegionTypeEnums(void) const;
 
-    void SetStart(int regionIndex, int start); 
+    /// \returns RegionType strings in order
+    std::vector<std::string> RegionTypes(void) const;
 
-    int GetEnd(int regionIndex); 
+    /// \returns column names.
+    std::vector<std::string> ColumnNames(void) const;
 
-    void SetEnd(int regionIndex, int end); 
+    /// \returns region descriptions.
+    std::vector<std::string> RegionDescriptions(void) const;
 
-    int GetHoleNumber(int regionIndex); 
+    /// \returns region sources.
+    std::vector<std::string> RegionSources(void) const;
 
-    void SetHoleNumber(int regionIndex, int holeNumber); 
+    /// Construct map_ (holeNumber --> RegionAnnotations) from table.
+    /// \params[in] region table containing region annotations of all zmws
+    /// \params[in] ordered region type strings, which maps region types
+    ///             to region type indice.
+    RegionTable & ConstructTable(std::vector<RegionAnnotation> & table,
+                                 const std::vector<std::string> & regionTypeStrs);
 
-    int GetScore(int regionIndex); 
+    /// Note that the ORDER of region types does matter.
+    /// Set region types (order matters).
+    RegionTable & RegionTypes(const std::vector<std::string> & in);
 
-    void SetScore(int regionIndex, int score); 
+    /// Set column names, e.g.,
+    /// {"HoleNumber", "TypeIndex", "Start", "End", "Score"}
+    RegionTable & ColumnNames(const std::vector<std::string> & in);
 
-    void SortTableByHoleNumber(); 
+    /// Set region descriptions. e.g.,
+    /// {"desc of holenumber", "desc of index", "desc of start", "desc of end", "desc of score"}
+    RegionTable & RegionDescriptions(const std::vector<std::string> & in);
 
-    void Reset(); 
+    /// Set region sources, e.g.,
+    /// {"source of holenumber", "source of index", "source of start", "source of end", "source of score"}
+    RegionTable & RegionSources(const std::vector<std::string> & in);
+    /// \}
 
-    void CreateDefaultAttributes(); 
-};
+    /// \name Assessor functions to zmw region annotations.
+    /// \{
+    /// \returns Whether or not this region table has regions of a zmw.
+    bool HasHoleNumber(const UInt holeNumber) const;
 
+    /// Get zmw region annotaions given its hole number.
+    /// Note that HasHoleNumber must be called first.
+    /// \returns RegionAnnotations of a zmw.
+    RegionAnnotations operator [] (const UInt holeNumber) const;
+    /// \}
+};
 
 #endif // _BLASR_REGION_TABLE_HPP_
diff --git a/pbdata/reads/RegionTypeMap.cpp b/pbdata/reads/RegionTypeMap.cpp
new file mode 100644
index 0000000..e6e4f4a
--- /dev/null
+++ b/pbdata/reads/RegionTypeMap.cpp
@@ -0,0 +1,89 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "RegionTypeMap.hpp"
+
+std::string RegionTypeMap::ToString(RegionType rt) {
+    assert(RegionTypeToString.find(rt) != RegionTypeToString.end());
+    return RegionTypeToString.find(rt)->second;
+}
+
+RegionType RegionTypeMap::ToRegionType(const std::string & str) {
+    if (StringToRegionType.find(str) == StringToRegionType.end()) {
+        std::cout << "Unsupported RegionType " << str << std::endl;
+        assert(false);
+    }
+    return StringToRegionType.find(str)->second;
+}
+
+int RegionTypeMap::ToIndex(const std::string & typeStr, const std::vector<std::string> & typeStrs) {
+    auto it = std::find(typeStrs.begin(), typeStrs.end(), typeStr);
+    if (it == typeStrs.end()) {
+        std::cout << "Could not find RegionType " << typeStr << std::endl;
+        assert(false);
+    } else {
+        return std::distance(typeStrs.begin(), it);
+    }
+}
+
+int RegionTypeMap::ToIndex(RegionType rt, const std::vector<std::string> & typeStrs) {
+    return RegionTypeMap::ToIndex(RegionTypeMap::ToString(rt), typeStrs);
+}
+
+int RegionTypeMap::ToIndex(RegionType rt, const std::vector<RegionType> & regionTypes) {
+    auto it = std::find(regionTypes.begin(), regionTypes.end(), rt);
+    if (it == regionTypes.end()) {
+        std::cout << "Could not find RegionType " << RegionTypeMap::ToString(rt) << std::endl;
+        assert(false);
+    } else {
+        return std::distance(regionTypes.begin(), it);
+    }
+}
+
+const std::map<RegionType, std::string> RegionTypeMap::RegionTypeToString = {
+    {Adapter,  "Adapter"},
+    {Insert,   "Insert"},
+    {HQRegion, "HQRegion"},
+    {BarCode,  "Barcode"}
+};
+
+const std::map<std::string, RegionType> RegionTypeMap::StringToRegionType = {
+    {"Adapter",  Adapter},
+    {"Insert",   Insert},
+    {"HQRegion", HQRegion},
+    {"Barcode",  BarCode},
+};
diff --git a/pbdata/reads/RegionTypeMap.hpp b/pbdata/reads/RegionTypeMap.hpp
new file mode 100644
index 0000000..0b2903c
--- /dev/null
+++ b/pbdata/reads/RegionTypeMap.hpp
@@ -0,0 +1,85 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#ifndef _BLASR_REGION_TYPE_MAP_HPP_
+#define _BLASR_REGION_TYPE_MAP_HPP_
+
+#include <cassert>
+#include <string>
+#include <iostream>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include "Types.h"
+#include "Enumerations.h"
+
+
+class RegionTypeMap {
+public:
+    /// \name Map region type to/from string and index
+    /// \{
+    static std::string ToString(RegionType rt);
+
+    static RegionType ToRegionType(const std::string & str);
+
+    /// \params[in] typeStr - query region type as a string
+    /// \params[in] typeStrs - a vector region type strings in order
+    /// \returns index of a region type as string in a vector of region type strings
+    static int ToIndex(const std::string & typeStr,
+                       const std::vector<std::string> & typeStrs);
+
+    /// \params[in] rt - query region type
+    /// \params[in] typeStrs - a vector region type strings in order
+    /// \returns index of the query region type in a vector of region type strings
+    static int ToIndex(RegionType rt,
+                       const std::vector<std::string> & typeStrs);
+
+    /// \params[in] rt - query region type
+    /// \params[in] regionTypes - a vector region type strings in order
+    /// \returns index of the query region type in a vector of region type enums
+    static int ToIndex(RegionType rt,
+                       const std::vector<RegionType> & regionTypes);
+private:
+    // Map region type to string
+    static const std::map<RegionType, std::string> RegionTypeToString;
+
+    // Map string to region type
+    static const std::map<std::string, RegionType> StringToRegionType;
+    /// \}
+};
+
+#endif // _BLASR_REGION_TYPE_MAP_HPP_
diff --git a/pbdata/reads/ScanData.cpp b/pbdata/reads/ScanData.cpp
index 3114376..37699f8 100644
--- a/pbdata/reads/ScanData.cpp
+++ b/pbdata/reads/ScanData.cpp
@@ -1,8 +1,87 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #include "ScanData.hpp"
+#include <iostream>
+
+std::string ScanData::BaseMapToStr(const std::map<char, size_t> & baseMap) {
+    std::string baseMapStr = ""; //4 dye channels.
+    if (not baseMap.empty()) {
+        baseMapStr = "    ";
+        for (auto it = baseMap.begin(); it != baseMap.end(); ++it){
+            if (it->second > 4 or it->second < 0) {
+                std::cout << "ERROR, there are more than four dye channels."
+                          << std::endl;
+                exit(1);
+            }
+            baseMapStr[it->second]= it->first;
+        }
+    }
+    return baseMapStr;
+}
+
+std::map<char, size_t> ScanData::StrToBaseMap(const std::string & baseMapStr) {
+    std::map<char, size_t> ret;
+    for (auto i = 0; i < baseMapStr.size(); i++) {
+        ret[baseMapStr[i]] = i;
+    }
+    return ret;
+}
+
+bool ScanData::IsValidBaseMap(const std::map<char, size_t> & baseMap) {
+    const char X = 'x';
+    std::string v(4, X);
+
+    for(const char base : {'A', 'T', 'G', 'C'}) {
+        size_t index = baseMap.find(base)->second;
+        if (not (baseMap.find(base) != baseMap.end() and 
+                 index >= 0 and index <= 3))
+            return false;
+        else
+            v[index] = 'o';
+    }
+    if (v.find(X) != std::string::npos) return false;
+    else return true;
+}
 
 ScanData::ScanData() {
     platformId = NoPlatform;
-    frameRate = numFrames = 0;
+    frameRate = 0.0;
+    numFrames = 0;
     movieName = runCode = whenStarted = "";
     baseMap.clear();
 }
@@ -10,3 +89,77 @@ ScanData::ScanData() {
 std::string ScanData::GetMovieName() {
     return movieName;
 }
+
+ScanData & ScanData::PlatformID(const PlatformId & id) {
+    platformId = id;
+    return *this;
+}
+ScanData & ScanData::FrameRate(const float & rate) {
+    frameRate = rate;
+    return *this;
+}
+ScanData & ScanData::NumFrames(const unsigned int & num) {
+    numFrames = num;
+    return *this;
+}
+ScanData & ScanData::MovieName(const std::string & name) {
+    movieName = name;
+    return *this;
+}
+ScanData & ScanData::RunCode(const std::string & code) {
+    runCode = code;
+    return *this;
+}
+ScanData & ScanData::WhenStarted(const std::string & when) {
+    whenStarted = when;
+    return *this;
+}
+ScanData & ScanData::BaseMap(const std::map<char, size_t> & bmp) {
+    baseMap.clear();
+    baseMap.insert(bmp.begin(), bmp.end());
+    return *this;
+}
+ScanData & ScanData::BaseMap(const std::string & baseMapStr) {
+   return this->BaseMap(ScanData::StrToBaseMap(baseMapStr));
+}
+ScanData & ScanData::SequencingKit(const std::string sequencingKit) {
+    sequencingKit_ = sequencingKit;
+    return *this;
+}
+ScanData & ScanData::BindingKit(const std::string bindingKit) {
+    bindingKit_ = bindingKit;
+    return *this;
+}
+
+PlatformId ScanData::PlatformID(void) const {
+    return platformId;
+}
+float ScanData::FrameRate(void) const {
+    return frameRate;
+}
+unsigned int ScanData::NumFrames(void) const {
+    return numFrames;
+}
+std::string ScanData::MovieName(void) const {
+    return movieName;
+}
+std::string ScanData::RunCode(void) const {
+    return runCode;
+}
+std::string ScanData::WhenStarted(void) const {
+    return whenStarted;
+}
+std::map<char, size_t> ScanData::BaseMap(void) const {
+    return baseMap;
+}
+
+std::string ScanData::BaseMapStr(void) const {
+    return ScanData::BaseMapToStr(baseMap);
+}
+
+std::string ScanData::SequencingKit(void) const {
+    return sequencingKit_;
+}
+std::string ScanData::BindingKit(void) const {
+    return bindingKit_;
+}
diff --git a/pbdata/reads/ScanData.hpp b/pbdata/reads/ScanData.hpp
index 8cf07ea..a68bb37 100644
--- a/pbdata/reads/ScanData.hpp
+++ b/pbdata/reads/ScanData.hpp
@@ -1,20 +1,104 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Mark Chaisson
+
 #ifndef DATASTRUCTURES_READS_SCAN_DATA_H_
 #define DATASTRUCTURES_READS_SCAN_DATA_H_
 
 #include <string>
 #include <map>
 #include "Enumerations.h"
+#include "PacBioDefs.h"
+
+class HDFScanDataReader;
+class HDFScanDataWriter;
 
 class ScanData {
+friend class HDFScanDataReader;
+friend class HDFScanDataWriter;
+public:
+    // Convert base map from a map<char, size_t> to a string. 
+    // e.g., {{'A', 2}, {'C', 1}, {'T', 0}, {'G', 3}} --> TCAG
+    static std::string BaseMapToStr(const std::map<char, size_t> & baseMap);
+
+    // Convert base map from a string to a map<char, size_t>.
+    // e.g., TCAG --> {{'A', 2}, {'C', 1}, {'T', 0}, {'G', 3}} 
+    static std::map<char, size_t> StrToBaseMap(const std::string & baseMapStr);
+
+    // A baseMap must contain maps from bases (e.g., ACGT) to indices (e.g., 0, 1, 2, 3). 
+    static bool IsValidBaseMap(const std::map<char, size_t> & baseMap);
+    
 public:
     PlatformId platformId;
     float frameRate;
     unsigned int numFrames;
     std::string movieName, runCode;
     std::string whenStarted;
-    std::map<char, int> baseMap;
+    std::map<char, size_t> baseMap;
+
     ScanData();
     std::string GetMovieName(); 
+
+    ScanData & PlatformID(const PlatformId & id);
+    ScanData & FrameRate(const float & rate);
+    ScanData & NumFrames(const unsigned int & num);
+    ScanData & MovieName(const std::string & name);
+    ScanData & RunCode(const std::string & code);
+    ScanData & WhenStarted(const std::string & when);
+    ScanData & BaseMap(const std::map<char, size_t> & bmp);
+    ScanData & BaseMap(const std::string & baseMapStr); 
+    ScanData & SequencingKit(const std::string sequencingKit);
+    ScanData & BindingKit(const std::string bindingKit);
+
+    PlatformId PlatformID(void) const;
+    float FrameRate(void) const;
+    unsigned int NumFrames(void) const;
+    std::string MovieName(void) const;
+    std::string RunCode(void) const;
+    std::string WhenStarted(void) const;
+    std::map<char, size_t> BaseMap(void) const;
+    std::string BaseMapStr(void) const;
+
+    std::string SequencingKit(void) const;
+    std::string BindingKit(void) const;
+ 
+
+private:
+    std::string sequencingKit_;
+    std::string bindingKit_;
 };
 
 #endif
diff --git a/pbdata/sam/SAMReaderImpl.hpp b/pbdata/sam/SAMReaderImpl.hpp
index fd83c76..b92ffc1 100644
--- a/pbdata/sam/SAMReaderImpl.hpp
+++ b/pbdata/sam/SAMReaderImpl.hpp
@@ -73,7 +73,7 @@ void SAMReader<T_ReferenceSequence, T_ReadGroup, T_SAMAlignment>::StoreKVPairs(s
   // Split on tab delineated line.
   //
   std::vector<std::string> kvPairStrings;
-  Tokenize(line, "\t", kvPairStrings);
+  Splice(line, "\t", kvPairStrings);
   KeywordValueStringsToPairs(kvPairStrings, kvPairs);
 }
 
diff --git a/pbdata/utils.hpp b/pbdata/utils.hpp
index 9686cb5..ed80f30 100644
--- a/pbdata/utils.hpp
+++ b/pbdata/utils.hpp
@@ -13,7 +13,10 @@ template<typename T_Int>
 T_Int CeilOfFraction(T_Int num, T_Int denom);
 
 template<typename T>
-T* ProtectedNew(unsigned long size); 
+inline T* ProtectedNew(unsigned long size); 
+
+template<typename T>
+inline T* ProtectedNew(void); 
 
 #include "utilsImpl.hpp"
 
diff --git a/pbdata/utils/SMRTReadUtils.cpp b/pbdata/utils/SMRTReadUtils.cpp
index 4bc2f6c..b2430b0 100644
--- a/pbdata/utils/SMRTReadUtils.cpp
+++ b/pbdata/utils/SMRTReadUtils.cpp
@@ -4,7 +4,7 @@
 void GetSMRTReadCoordinates(FASTQSequence &seq, int &x, int &y) {
 	std::string str(seq.title, seq.titleLength);
 	std::vector<std::string> titleTokens;
-	Tokenize(str, "_", titleTokens);
+	Splice(str, "_", titleTokens);
 	int i;
 	x = y = -1;
 	int cmp;
@@ -22,7 +22,7 @@ void GetSMRTReadCoordinates(FASTQSequence &seq, int &x, int &y) {
 
 void GetSpringfieldHoleNumberFromTitle(FASTQSequence &seq, unsigned int &holeNumber) {
 	std::vector<std::string> titleTokens;
-	Tokenize(seq.title, "/", titleTokens);
+	Splice(seq.title, "/", titleTokens);
 	if (titleTokens.size() < 2) {
 		return;
 	}
diff --git a/pbdata/utils/SMRTTitle.hpp b/pbdata/utils/SMRTTitle.hpp
index 33b108b..6b0da3e 100644
--- a/pbdata/utils/SMRTTitle.hpp
+++ b/pbdata/utils/SMRTTitle.hpp
@@ -20,5 +20,19 @@ public:
     /// \returns smrt title movie/zmw/s_e, if input read is a smrt title;
     /// otherwise, return an empty string.
     std::string ToString();
+
+public:
+    inline std::string MovieName(void) const;
+    inline UInt HoleNumber(void) const;
+    inline DNALength Start(void) const;
+    inline DNALength End(void) const;
+    inline operator bool(void) const;
 };
+
+inline std::string SMRTTitle::MovieName(void) const {return movieName;}
+inline UInt SMRTTitle::HoleNumber(void) const {return holeNumber;}
+inline DNALength SMRTTitle::Start(void) const {return start;}
+inline DNALength SMRTTitle::End(void) const {return end;}
+inline SMRTTitle::operator bool(void) const {return isSMRTTitle;}
+
 #endif
diff --git a/pbdata/utilsImpl.hpp b/pbdata/utilsImpl.hpp
index ee899cd..f58aa10 100644
--- a/pbdata/utilsImpl.hpp
+++ b/pbdata/utilsImpl.hpp
@@ -1,5 +1,10 @@
 #ifndef _BLASR_UTIL_IMPL_HPP_
 #define _BLASR_UTIL_IMPL_HPP_
+#include <stdlib.h>
+#include <cstdlib>   // abort()
+#include <new>       // bad_alloc
+#include <iostream>  // cout/cerr
+
 
 template<typename t_file>
 void CrucialOpen(std::string &fileName, t_file &file, std::ios_base::openmode mode) {
@@ -19,12 +24,27 @@ T_Int CeilOfFraction(T_Int num, T_Int denom) {
 }
 
 template<typename T>
-T* ProtectedNew(unsigned long size) {
-    T* ptr;
-    ptr = new T[size];
-    if (ptr == NULL) {
-        std::cout << "ERROR, allocating " << size * sizeof(T) << " bytes.";
-        exit(1);
+inline T* ProtectedNew(unsigned long size) {
+    T * ptr = nullptr;
+    try {
+        ptr = new T[size];
+    } catch (std::bad_alloc & ba) {
+        std::cout << "ERROR, allocating " << size * sizeof(T) << " bytes."
+                  << ba.what() << std::endl;
+        abort();
+    }
+    return ptr;
+}
+
+template<typename T>
+inline T* ProtectedNew(void) {
+    T * ptr = nullptr;
+    try {
+       ptr = new T;
+    } catch (std::bad_alloc & ba) {
+        std::cout << "ERROR, allocating " << sizeof(T) << " bytes."
+                  << ba.what() << std::endl;
+        abort();
     }
     return ptr;
 }
diff --git a/rules.mk b/rules.mk
new file mode 100644
index 0000000..6b1739a
--- /dev/null
+++ b/rules.mk
@@ -0,0 +1,28 @@
+ARFLAGS         := rc
+CXX_SHAREDFLAGS := -fPIC
+#LD_SHAREDFLAGS  := -dynamiclib -fPIC
+CPPFLAGS        += $(patsubst %,-I%,${INCLUDES})
+CFLAGS          += -fno-common
+LDFLAGS         += ${EXTRA_LDFLAGS}
+
+
+%.a:
+	${AR} ${ARFLAGS} $@ $^
+
+%.so:
+	${CXX} -shared ${LDFLAGS} -o $@ -Wl,-soname,$@ $^ ${LDLIBS}
+
+%.dylib:
+	${CXX} -dynamiclib ${LDFLAGS} -o $@ -Wl,-install_name,$@ $^ ${LDLIBS}
+
+%.o: %.cpp
+	${CXX} ${CXXOPTS} ${CXXFLAGS} ${CPPFLAGS} -c $< -o $@
+
+%.shared.o: %.cpp
+	${CXX} ${CXXOPTS} ${CXXFLAGS} ${CPPFLAGS} ${CXX_SHAREDFLAGS} -c $< -o $@
+
+%.depend: %.cpp
+	${CXX} ${CXXOPTS} ${CXXFLAGS} ${CPPFLAGS} -MM -MP -MG -MT $(@:.depend=.o) -MF $(@:.depend=.d) $<
+
+%.shared.depend: %.cpp
+	${CXX} ${CXXOPTS} ${CXXFLAGS} ${CPPFLAGS} -MM -MP -MG -MT $(@:.depend=.o) -MF $(@:.depend=.d) $<
diff --git a/simple.mk b/simple.mk
deleted file mode 100644
index e725a6e..0000000
--- a/simple.mk
+++ /dev/null
@@ -1,16 +0,0 @@
-SHELL          = bash
-G_BUILDOS_CMD := bash -c 'set -e; set -o pipefail; id=$$(lsb_release -si | tr "[:upper:]" "[:lower:]"); rel=$$(lsb_release -sr); case $$id in ubuntu) printf "$$id-%04d\n" $${rel/./};; centos) echo "$$id-$${rel%%.*}";; *) echo "$$id-$$rel";; esac' 2>/dev/null
-OS_STRING     ?= $(shell $(G_BUILDOS_CMD))
-
-# magic for non-verbose builds
-V ?= 0
-
-CXX_0 = @echo "  CXX	$@"; $(CXX)
-CXX_1 = $(CXX)
-CXX_pp = $(CXX_$(V))
-
-AR_0 = @echo "  AR	$@"; $(AR)
-AR_1 = $(AR)
-AR_pp = $(AR_$(V))
-
-ARFLAGS := rc
diff --git a/travis.sh b/travis.sh
new file mode 100755
index 0000000..a0edb8b
--- /dev/null
+++ b/travis.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+ls /usr/include/hdf*
+ls /usr/lib/libhdf*
+set -ex
+NOHDF=1 NOPBBAM=1 ./configure.py
+make -j4 libpbdata
+make -j4 libblasr
+
+# Test compilation of ./hdf using our own HDF5 headers, for now.
+# (This fails on Darwin b/c our HDF5_HEADERS were configured for Linux.)
+NOPBBAM=1 ./configure.py
+make -j4 -C ./hdf libpbihdf.a
+
+# make -j4 gtest
diff --git a/unittest/.gitignore b/unittest/.gitignore
new file mode 100644
index 0000000..26ca380
--- /dev/null
+++ b/unittest/.gitignore
@@ -0,0 +1 @@
+/test-runner
diff --git a/unittest/Makefile b/unittest/Makefile
deleted file mode 100644
index 6df2e5f..0000000
--- a/unittest/Makefile
+++ /dev/null
@@ -1,41 +0,0 @@
-SHELL=bash
-
-PBINCROOT   := $(realpath ..)
-PREBUILT           ?= $(realpath ../../../../prebuilt.out)
-THIRD_PARTY_PREFIX ?= $(realpath ../..)
-
-include ./common.mk
-
-OS := $(shell uname)
-
-ifeq ($(OS), Darwin)
-	LD_WHOLE_ARCHIVE := -all_load
-	LD_NO_WHOLE_ARCHIVE := -noall_load
-else
-	LD_WHOLE_ARCHIVE := --whole-archive
-	LD_NO_WHOLE_ARCHIVE := --no-whole-archive
-endif
-
-EXE := test-runner
-
-all: $(EXE)
-	
-gtest: $(EXE)
-	./$< --gtest_output=xml:./xml/all.xml
-
-LIBS := alignment/libblasr_gtest.a \
-		hdf/libpbihdf_gtest.a \
-		pbdata/libpbdata_gtest.a
-
-$(EXE): $(LIBS)
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) $(GTEST_SRC) -Wl,$(LD_WHOLE_ARCHIVE) $^ -Wl,$(LD_NO_WHOLE_ARCHIVE) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
-
-$(LIBS):
-	make -C $(dir $@) $(notdir $@)
-
-clean:
-	@make -C alignment clean
-	@make -C hdf clean
-	@make -C pbdata clean
-	@rm -fr $(EXE) xml
-
diff --git a/unittest/alignment/Makefile b/unittest/alignment/Makefile
index f0bc981..154ab5e 100644
--- a/unittest/alignment/Makefile
+++ b/unittest/alignment/Makefile
@@ -1,4 +1,5 @@
-include ../common.mk
+include ../../rules.mk
+include ../defines.mk
 
 SOURCES    = $(wildcard *.cpp) \
 		     $(wildcard utils/*.cpp) \
@@ -17,13 +18,13 @@ EXE := test-runner
 all debug profile: $(EXE)
 
 libblasr_gtest.a: $(OBJECTS)
-	$(AR_pp) $(ARFLAGS)c $@ $^
+	$(AR) $(ARFLAGS)c $@ $^
 
 $(EXE): $(OBJECTS)
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) $^ $(GTEST_SRC) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) $^ $(GTEST_SRC) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
 
 $(OBJECTS): %.o: %.cpp
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) -c $< -o $@ $(INCDIRS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) -c $< -o $@ $(INCDIRS)
 
 gtest: $(EXE)
 	./$< --gtest_output=xml:../xml/alignment.xml
diff --git a/unittest/alignment/files/CCSIterator_gtest.cpp b/unittest/alignment/files/CCSIterator_gtest.cpp
index eba2a24..91da996 100644
--- a/unittest/alignment/files/CCSIterator_gtest.cpp
+++ b/unittest/alignment/files/CCSIterator_gtest.cpp
@@ -40,8 +40,6 @@ public:
         EXPECT_TRUE(rev);
         reader->ReadTable(*rgn);
         reader->Close();
-
-        rgn->SortTableByHoleNumber();
     }
 
     void TearDown() {
diff --git a/unittest/alignment/files/FragmentCCSIterator_gtest.cpp b/unittest/alignment/files/FragmentCCSIterator_gtest.cpp
index e1b2d0c..d97ce22 100644
--- a/unittest/alignment/files/FragmentCCSIterator_gtest.cpp
+++ b/unittest/alignment/files/FragmentCCSIterator_gtest.cpp
@@ -41,8 +41,6 @@ public:
         EXPECT_TRUE(rev);
         reader->ReadTable(*rgn);
         reader->Close();
-
-        rgn->SortTableByHoleNumber();
     }
 
     void TearDown() {
@@ -64,7 +62,8 @@ public:
 
 TEST_F(FragmentCCSIteratorTestFixture, Initialize) {
     // void Initialize(CCSSequence *_seqPtr, RegionTable *_regionTablePtr) {
-    ccs->zmwData.holeNumber = 10;
+    ccs->HoleNumber(10);
+    ccs->unrolledRead.Allocate(7000);
     it.Initialize(ccs, rgn);
 
     int numPasses = it.GetNumPasses();
diff --git a/unittest/alignment/files/FragmentCCSIterator_other_gtest.cpp b/unittest/alignment/files/FragmentCCSIterator_other_gtest.cpp
new file mode 100644
index 0000000..cd13cb2
--- /dev/null
+++ b/unittest/alignment/files/FragmentCCSIterator_other_gtest.cpp
@@ -0,0 +1,100 @@
+/*
+ * =====================================================================================
+ *
+ *       Filename:  CCSIterator_gtest.cpp
+ *
+ *    Description:  Test alignment/files/CCSIterator.hpp
+ *
+ *        Version:  1.0
+ *        Created:  11/29/2012 04:51:02 PM
+ *       Revision:  08/20/2014
+ *       Compiler:  gcc
+ *
+ *         Author:  Yuan Li (yli), yli at pacificbiosciences.com
+ *        Company:  Pacific Biosciences
+ *
+ * =====================================================================================
+ */
+
+#include "gtest/gtest.h"
+#define private public
+#include "files/CCSIterator.hpp"
+#include "files/FragmentCCSIterator.hpp"
+#include "reads/RegionTable.hpp"
+#include "HDFRegionTableReader.hpp"
+#include "pbdata/testdata.h"
+
+using namespace std;
+
+static const UInt HOLENUMBER = 76772;
+
+// Adater - 0, Insert - 1, HQRegion - 2
+static const std::vector<RegionType> TYPES = {Adapter, Insert, HQRegion};
+static const std::vector<std::string> TYPESTRS = {"Adapter", "Insert", "HQRegion"};
+
+static const std::vector<RegionAnnotation> INSERTS = {
+    RegionAnnotation(HOLENUMBER, 1, 0,   253,  -1),
+    RegionAnnotation(HOLENUMBER, 1, 301, 678,  -1),
+    RegionAnnotation(HOLENUMBER, 1, 724, 1101, -1),
+    RegionAnnotation(HOLENUMBER, 1, 1150, 1534, -1),
+    RegionAnnotation(HOLENUMBER, 1, 1575, 1956, -1),
+    RegionAnnotation(HOLENUMBER, 1, 1999, 2379, -1),
+    RegionAnnotation(HOLENUMBER, 1, 2417, 2803, -1),
+    RegionAnnotation(HOLENUMBER, 1, 2852, 3245, -1),
+    RegionAnnotation(HOLENUMBER, 1, 3287, 3727, -1),
+    RegionAnnotation(HOLENUMBER, 1, 3778, 4176, -1),
+    RegionAnnotation(HOLENUMBER, 1, 4221, 4618, -1),
+    RegionAnnotation(HOLENUMBER, 1, 4661, 4862, -1)
+};
+
+static const std::vector<RegionAnnotation> ADAPTERS = {
+    RegionAnnotation(HOLENUMBER, 0, 253, 301, 854),
+    RegionAnnotation(HOLENUMBER, 0, 678, 724, 978),
+    RegionAnnotation(HOLENUMBER, 0, 1101, 1150, 897),
+    RegionAnnotation(HOLENUMBER, 0, 1534, 1575, 804),
+    RegionAnnotation(HOLENUMBER, 0, 1956, 1999, 930),
+    RegionAnnotation(HOLENUMBER, 0, 2379, 2417, 736),
+    RegionAnnotation(HOLENUMBER, 0, 2803, 2852, 918),
+    RegionAnnotation(HOLENUMBER, 0, 3245, 3287, 928),
+    RegionAnnotation(HOLENUMBER, 0, 3727, 3778, 784),
+    RegionAnnotation(HOLENUMBER, 0, 4176, 4221, 911),
+    RegionAnnotation(HOLENUMBER, 0, 4618, 4661, 767)
+};
+
+static const std::vector<RegionAnnotation> HQREGION = {
+    RegionAnnotation(HOLENUMBER, 2, 0, 4861, 865)
+};
+
+static const DNALength EXPECTED_HQSTART = 0;
+
+static const DNALength EXPECTED_HQEND   = 4861;
+
+static const DNALength EXPECTED_SCORE   = 865;
+
+static const DNALength WHOLE_LENGTH = 5000;
+
+static const int EXPECTED_NUM_SUBREADS = 12;
+
+
+TEST(CCSFragmentIterator, Constructor) {
+    std::vector<RegionAnnotation> regions = INSERTS;
+    regions.insert(regions.end(), HQREGION.begin(), HQREGION.end());
+    regions.insert(regions.end(), ADAPTERS.begin(), ADAPTERS.end());
+
+    CCSSequence ccs;
+    ccs.HoleNumber(HOLENUMBER);
+    ccs.Allocate(WHOLE_LENGTH);
+    ccs.unrolledRead.Allocate(WHOLE_LENGTH);
+
+    RegionTable table;
+    table.ConstructTable(regions, TYPESTRS);
+
+    FragmentCCSIterator it;
+    it.Initialize(&ccs, &table);
+
+    EXPECT_EQ(it.subreadIntervals.size(), EXPECTED_NUM_SUBREADS);
+
+    EXPECT_EQ(it.subreadIntervals[0], ReadInterval(0, 253, 865));
+
+    EXPECT_EQ(it.subreadIntervals[EXPECTED_NUM_SUBREADS-1], ReadInterval(4661, 4861, 865));
+}
diff --git a/unittest/alignment/files/ReaderAgglomerate_gtest.cpp b/unittest/alignment/files/ReaderAgglomerate_gtest.cpp
index f27eb16..f39a529 100644
--- a/unittest/alignment/files/ReaderAgglomerate_gtest.cpp
+++ b/unittest/alignment/files/ReaderAgglomerate_gtest.cpp
@@ -127,3 +127,19 @@ TEST_F(ReaderAgglomerateTest, ReadFromBam) {
 
     reader->Close();
 }
+
+TEST_F(ReaderAgglomerateTest, ReadsFromBam) {
+    string fn (bamFile1);
+    reader->SetReadFileName(fn);
+    EXPECT_EQ(reader->Initialize(), 1);
+
+    vector<SMRTSequence> seqs;
+    int ret, count=0;
+    while (ret = reader->GetNext(seqs) and ret != 0) {
+        count+ = seqs.size();
+    }
+
+    EXPECT_EQ(count, 116);
+
+    reader->Close();
+}
diff --git a/unittest/alignment/utils/RegionUtils_gtest.cpp b/unittest/alignment/utils/RegionUtils_gtest.cpp
index 2daaa86..52b0b44 100644
--- a/unittest/alignment/utils/RegionUtils_gtest.cpp
+++ b/unittest/alignment/utils/RegionUtils_gtest.cpp
@@ -233,4 +233,3 @@ TEST_F(RegionUtilTestFixture, GetTypicalFullSubreadIndex) {
     EXPECT_EQ(idx, 22); 
     // Typical = the second longest full pass subread (6647, 7145)
 }
-
diff --git a/unittest/build.mk b/unittest/build.mk
new file mode 100644
index 0000000..a8dde8f
--- /dev/null
+++ b/unittest/build.mk
@@ -0,0 +1,27 @@
+all:
+
+include ../rules.mk
+include defines.mk
+
+EXE := test-runner
+
+all: $(EXE)
+	
+gtest: $(EXE)
+	./$< --gtest_output=xml:./xml/all.xml
+
+LIBS := alignment/libblasr_gtest.a \
+		hdf/libpbihdf_gtest.a \
+		pbdata/libpbdata_gtest.a
+
+$(EXE): $(LIBS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) $(GTEST_SRC) -Wl,$(LD_WHOLE_ARCHIVE) $^ -Wl,$(LD_NO_WHOLE_ARCHIVE) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
+
+$(LIBS):
+	${MAKE} -C $(dir $@) PBINCROOT=${PBINCROOT}/.. $(notdir $@)
+
+clean:
+	@${MAKE} -C alignment clean
+	@${MAKE} -C hdf clean
+	@${MAKE} -C pbdata clean
+	@${RM} -fr $(EXE) xml
diff --git a/unittest/common.mk b/unittest/common.mk
deleted file mode 100644
index 5337ea9..0000000
--- a/unittest/common.mk
+++ /dev/null
@@ -1,64 +0,0 @@
-SHELL=bash
-
-.PHONY: all debug profile clean
-
-#
-# Definitions common to all make files for unit tests.
-# All paths are relative from inside the subdirectories, not this file
-#
-
-PBINCROOT 	   ?= $(realpath ../..)
-PREBUILT    	   ?= $(realpath ../../../../../prebuilt.out)
-THIRD_PARTY_PREFIX ?= ../../..
-
-include $(PBINCROOT)/common.mk
-
-# All Google Test headers.  Usually you shouldn't change this.
-GTEST_ROOT := $(THIRD_PARTY)/gtest/fused-src
-GTEST_SRC  := $(GTEST_ROOT)/gtest/gtest-all.cc \
-              $(GTEST_ROOT)/gtest/gtest_main.cc
-
-# INCLUDE DIRS
-INCDIRS    = -I$(PBINCROOT)/alignment \
-             -I$(PBINCROOT)/hdf \
-             -I$(PBINCROOT)/pbdata \
-			 -I$(PBINCROOT)/unittest \
-             -I$(GTEST_ROOT) \
-             -I$(HDF5_INC)
-
-# LIB DIRS
-LIBDIRS    = -L$(PBINCROOT)/alignment \
-             -L$(PBINCROOT)/hdf \
-             -L$(PBINCROOT)/pbdata \
-             -L$(HDF5_LIB) 
-
-LDFLAGS1 := -lblasr -lpbihdf -lpbdata 
-# The order of -l{lib} matters
-
-ifeq ($(origin nopbbam), undefined)
-	INCDIRS += -I$(PBBAM)/include -I$(PBBAM)/third-party/htslib
-	LIBDIRS += -L$(PBBAM)/lib -L$(PBBAM)/third-party/htslib
-	LDFLAGS1 += -lpbbam
-# Use libhts.a built with pbbam
-ifneq ($(wildcard "$(PBBAM)/third-party/htslib/libhts.a"), "")
-	LDFLAGS1 += $(PBBAM)/third-party/htslib/libhts.a 
-else
-    LDFLAGS1 += lhts
-endif
-endif
-
-ifneq ($(ZLIB_ROOT), notfound)
-	INCDIRS += -I$(ZLIB_ROOT)/include
-	LIBDIRS += -L$(ZLIB_ROOT)/lib
-endif
-
-ifneq ($(wildcard "$(HDF5_LIB)/libhdf5_cpp.a"),"")
-    LDFLAGS   := $(LDFLAGS1) $(HDF5_LIB)/libhdf5_cpp.a $(HDF5_LIB)/libhdf5.a -lpthread -lz -ldl
-else
-    LDFLAGS   := $(LDFLAGS1) -lhdf5_cpp -lhdf5 -lpthread -lz -ldl
-endif
-
-
-CXX := g++
-CXXOPTS := -std=c++11 -Wno-div-by-zero 
-CXXFLAGS := -O3
diff --git a/unittest/hdf/HDFPlsReader_gtest.cpp b/unittest/hdf/HDFPlsReader_gtest.cpp
index a4f63a2..8bcf35d 100644
--- a/unittest/hdf/HDFPlsReader_gtest.cpp
+++ b/unittest/hdf/HDFPlsReader_gtest.cpp
@@ -42,7 +42,8 @@ TEST_F(HDFPlsReaderTEST, ReadToPulseFile) {
     reader.IncludeField("StartFrame");
     reader.ReadPulseFileInit(pulseFile);
     reader.ReadPulseFile(pulseFile);
-    ASSERT_EQ(pulseFile.platformId, 0);
+    //Astro = 1, Springfield = 2
+    ASSERT_EQ(pulseFile.platformId, 2);
     ASSERT_EQ(pulseFile.startFrame.size(), 197626964);
 }
 
diff --git a/unittest/hdf/HDFScanDataWriter_gtest.cpp b/unittest/hdf/HDFScanDataWriter_gtest.cpp
index 15831ca..40e07a4 100644
--- a/unittest/hdf/HDFScanDataWriter_gtest.cpp
+++ b/unittest/hdf/HDFScanDataWriter_gtest.cpp
@@ -17,6 +17,7 @@
  */
 
 #include "gtest/gtest.h"
+#define private public
 #include "HDFScanDataWriter.hpp"
 #include "HDFFile.hpp"
 #include "reads/ScanData.hpp"
@@ -24,6 +25,7 @@
 TEST(HDFScanDataWriter, Write) {
     ScanData sd;
     sd.frameRate = 100;
+    sd.BaseMap("ATGC");
 
     HDFFile outFile;
     outFile.Open("scandata.h5", H5F_ACC_TRUNC);
diff --git a/unittest/hdf/Makefile b/unittest/hdf/Makefile
index 77fe15e..d942d8f 100644
--- a/unittest/hdf/Makefile
+++ b/unittest/hdf/Makefile
@@ -1,4 +1,5 @@
-include ../common.mk
+include ../../rules.mk
+include ../defines.mk
 
 SOURCES    = $(wildcard *.cpp) 
 OBJECTS    = $(SOURCES:.cpp=.o)
@@ -8,13 +9,13 @@ EXE := test-runner
 all debug profile: $(EXE)
 
 libpbihdf_gtest.a: $(OBJECTS)
-	$(AR_pp) $(ARFLAGS)c $@ $^
+	$(AR) $(ARFLAGS)c $@ $^
 
 $(EXE): $(OBJECTS)
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) $^ $(GTEST_SRC) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) $^ $(GTEST_SRC) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
 
 $(OBJECTS): %.o: %.cpp
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) -c $< -o $@ $(INCDIRS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) -c $< -o $@ $(INCDIRS)
 
 gtest: $(EXE)
 	./$< --gtest_output=xml:../xml/hdf.xml
diff --git a/unittest/makefile b/unittest/makefile
new file mode 100644
index 0000000..e2a56c7
--- /dev/null
+++ b/unittest/makefile
@@ -0,0 +1,120 @@
+SHELL=bash
+THISDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
+SRCDIR:=${THISDIR}
+-include ${CURDIR}/defines.mk
+include ${THISDIR}/../rules.mk
+
+MKDIR := mkdir
+
+null :=
+space := $(null) $(null)
+
+broken_test_sources := \
+	${SRCDIR}/alignment/files/ReaderAgglomerate_gtest.cpp \
+	${SRCDIR}/alignment/format/SAMHeaderPrinter_gtest.cpp \
+	${SRCDIR}/alignment/format/SAMPrinter_gtest.cpp \
+	${SRCDIR}/alignment/utils/FileUtils_gtest.cpp \
+	$(null)
+
+
+gtest_sources  := $(GTEST_SRCDIR)/gtest/gtest-all.cc \
+                  $(GTEST_SRCDIR)/gtest/gtest_main.cc \
+		  $(null)
+
+test_sources   := $(wildcard ${SRCDIR}/pbdata/*.cpp) \
+                  $(wildcard ${SRCDIR}/pbdata/utils/*.cpp) \
+                  $(wildcard ${SRCDIR}/pbdata/metagenome/*.cpp) \
+                  $(wildcard ${SRCDIR}/pbdata/saf/*.cpp) \
+                  $(wildcard ${SRCDIR}/pbdata/reads/*.cpp) \
+                  $(wildcard ${SRCDIR}/pbdata/qvs/*.cpp)  \
+                  \
+                  $(wildcard ${SRCDIR}/hdf/*.cpp) \
+                  \
+                  $(wildcard ${SRCDIR}/alignment/*.cpp) \
+                  $(wildcard ${SRCDIR}/alignment/utils/*.cpp) \
+                  $(wildcard ${SRCDIR}/alignment/datastructures/alignment/*.cpp) \
+                  $(wildcard ${SRCDIR}/alignment/files/*.cpp) \
+                  $(wildcard ${SRCDIR}/alignment/format/*.cpp) \
+                  $(null)
+
+# Remove broken tests from the test_sources list
+test_sources   := $(filter-out $(broken_test_sources),$(test_sources))
+
+paths := alignment alignment/files alignment/datastructures/alignment alignment/utils alignment/format \
+	pbdata pbdata/utils pbdata/metagenome pbdata/saf pbdata/reads pbdata/qvs \
+	hdf
+paths := $(patsubst %,${SRCDIR}%,${paths}) ${GTEST_SRCDIR}/gtest
+sources   := $(gtest_sources) $(test_sources)
+sources   := $(notdir ${sources})
+objects   := $(patsubst %.cc,%.o,$(filter %.cc,$(sources))) \
+             $(patsubst %.cpp,%.o,$(filter %.cpp,$(sources))) \
+             $(null)
+dependencies:=$(objects:%.o=%.d)
+
+
+INCLUDES+= \
+    ${SRCDIR} \
+    $(GTEST_INC) \
+    $(LIBBLASR_INC) \
+    $(LIBPBIHDF_INC) \
+    $(LIBPBDATA_INC) \
+    $(PBBAM_INC) \
+    $(HTSLIB_INC) \
+    $(HDF5_INC) \
+    $(BOOST_INC) \
+    $(null)
+
+LIBS+= \
+    $(LIBBLASR_LIB) \
+    $(LIBPBIHDF_LIB) \
+    $(LIBPBDATA_LIB) \
+    $(PBBAM_LIB) \
+    $(HTSLIB_LIB) \
+    $(HDF5_LIB) \
+    $(HDF5_CPP_LIB) \
+    $(ZLIB_LIB) \
+    $(GCC_LIB) \
+    $(null)
+
+ldlibs     := -lblasr -lpbihdf -lpbdata -lpbbam -lhts -lhdf5_cpp -lhdf5 -lz
+sys_ldlibs := -lpthread -ldl -lrt
+
+cxxopts  := -std=c++11 -Wno-div-by-zero 
+cxxflags := -O3
+cppflags := $(patsubst %,-I%,${includes})
+ldflags  := $(patsubst %,-L%,${LIBS}) $(sys_ldflags)
+
+
+
+override CPPFLAGS := $(cppflags) $(CPPFLAGS)
+override CXXFLAGS := $(cxxflags) $(cxxopts) $(CXXFLAGS)
+override LDLIBS   := $(ldlibs) $(sys_ldlibs) $(LDLIBS)
+override LDFLAGS  := $(ldflags) $(LDFLAGS)
+
+COMPILE.cpp   = $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+COMPILE.cc    = $(COMPILE.cpp)
+LINK.o        = $(CXX) $(LDFLAGS) $(TARGET_ARCH)
+
+vpath %.cpp ${paths}
+vpath %.cc ${paths}
+
+all: test-runner
+
+test-runner: $(objects)
+	$(LINK.o) $^ $(LDLIBS) -o $@
+
+gtest: test-runner
+	LD_LIBRARY_PATH=$(subst $(space),:,$(strip $(LIBS))) ./$< --gtest_output=xml:./xml/all.xml
+
+# Build objects
+%.o: %.cpp
+	$(COMPILE.cpp) -o $@ $<
+
+%.o: %.cc
+	$(COMPILE.cc) -o $@ $<
+
+clean:
+	$(RM) -r $(OUTDIR) *.o test-runner
+
+-include ${dependencies}
+depend: $(dependencies:.d=.depend)
diff --git a/unittest/pbdata/CCSSequence_gtest.cpp b/unittest/pbdata/CCSSequence_gtest.cpp
index 02e72cd..f5e13d1 100644
--- a/unittest/pbdata/CCSSequence_gtest.cpp
+++ b/unittest/pbdata/CCSSequence_gtest.cpp
@@ -44,9 +44,9 @@ public:
         smrt.length = size;
         smrt.deleteOnExit = false;
 
-        smrt.zmwData.holeNumber = holeNumber;
-        smrt.subreadStart = start;
-        smrt.subreadEnd = end;
+        smrt.HoleNumber  (holeNumber);
+        smrt.SubreadStart(start);
+        smrt.SubreadEnd  (end);
 
         stringstream ss;
     }
@@ -74,7 +74,7 @@ public:
         ccs.passDirection.resize(numSubreads);
         s = 0;
         for(int i=0; i < ccs.numPasses; i++) {
-            ccs.passStartBase[i] = subreads[i].subreadStart;
+            ccs.passStartBase[i] = subreads[i].SubreadStart();
             ccs.passDirection[i] = (i%2==0)?(0):(1);
             ccs.passNumBases[i] = subreads[i].length;
         }
diff --git a/unittest/pbdata/DNASequence_gtest.cpp b/unittest/pbdata/DNASequence_gtest.cpp
index 6d16f01..96e516d 100644
--- a/unittest/pbdata/DNASequence_gtest.cpp
+++ b/unittest/pbdata/DNASequence_gtest.cpp
@@ -257,7 +257,7 @@ TEST_F(DNASequenceTest, ReferenceSubstring) {
     EXPECT_FALSE(dnaTwo.deleteOnExit);
 
 //    EXPECT_DEATH_IF_SUPPORTED(dnaTwo.ReferenceSubstring(dnaOne, 100), "");
-    delete dnaOne.seq;
+    delete [] dnaOne.seq;
 }
 /*
 TEST_F(DNASequenceTest, CopyFromString) {
diff --git a/unittest/pbdata/Makefile b/unittest/pbdata/Makefile
index 61df3a9..a6474af 100644
--- a/unittest/pbdata/Makefile
+++ b/unittest/pbdata/Makefile
@@ -1,4 +1,5 @@
-include ../common.mk
+include ../../rules.mk
+include ../defines.mk
 
 SOURCES    = $(wildcard *.cpp) \
 		     $(wildcard utils/*.cpp) \
@@ -13,13 +14,13 @@ EXE := test-runner
 all debug profile: $(EXE)
 
 libpbdata_gtest.a: $(OBJECTS)
-	$(AR_pp) $(ARFLAGS)c $@ $^
+	$(AR) $(ARFLAGS)c $@ $^
 
 $(EXE): $(OBJECTS)
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) $^ $(GTEST_SRC) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) $^ $(GTEST_SRC) -o $@ -I$(GTEST_ROOT) $(LIBDIRS) $(LDFLAGS)
 
 $(OBJECTS): %.o: %.cpp
-	$(CXX_pp) $(CXXOPTS) $(CXXFLAGS) -c $< -o $@ $(INCDIRS)
+	$(CXX) $(CXXOPTS) $(CXXFLAGS) -c $< -o $@ $(INCDIRS)
 
 gtest: $(EXE)
 	./$< --gtest_output=xml:../xml/pbdata.xml
diff --git a/unittest/pbdata/SMRTSequence_gtest.cpp b/unittest/pbdata/SMRTSequence_gtest.cpp
index 43f40d9..a3cfaea 100644
--- a/unittest/pbdata/SMRTSequence_gtest.cpp
+++ b/unittest/pbdata/SMRTSequence_gtest.cpp
@@ -30,9 +30,9 @@ public:
         smrt.seq = seqnt;
         int len = sizeof(seqnt) / sizeof(Nucleotide) - 1;
         smrt.length = len; 
-        smrt.zmwData.holeNumber = 1;
-        smrt.subreadStart = 0;
-        smrt.subreadEnd = 19;
+        smrt.HoleNumber(1);
+        smrt.SubreadStart(0);
+        smrt.SubreadEnd  (19);
         smrt.AllocateDeletionQVSpace(len);
         for(int i=0; i < 19; i ++) {
             smrt.deletionQV[i] = i;
diff --git a/unittest/pbdata/StringUtils_gtest.cpp b/unittest/pbdata/StringUtils_gtest.cpp
index 3b2b4fb..abd6622 100644
--- a/unittest/pbdata/StringUtils_gtest.cpp
+++ b/unittest/pbdata/StringUtils_gtest.cpp
@@ -32,7 +32,49 @@ TEST(StringUtilTest, MakeReadGroupId) {
     readType = ReadType::CCS;
     expectedReadGroupId = "f5b4ffb6";
     EXPECT_EQ(MakeReadGroupId(movieName, readType), expectedReadGroupId);
-
 }
 
+TEST(StringUtilTest, Splice) {
+    vector<string> tokens;
+
+    Splice("movie/zmw/0_1", "/", tokens);
+    vector<string> exp = {"movie", "zmw", "0_1"};
+    EXPECT_EQ(tokens, exp); 
+
+    string test = "abc,ef,12,4";
+    Splice(test, ",", tokens);
+    exp = vector<string>{"abc", "ef", "12", "4"};
+    EXPECT_EQ(tokens, exp); 
+    
+    Splice(test, "ef,", tokens);
+    exp = vector<string>{"abc,", "12,4"};
+    EXPECT_EQ(tokens, exp);
+
+    Splice("", ",", tokens);
+    exp = vector<string>{""};
+    EXPECT_EQ(tokens, exp); 
+
+    Splice(",", ",", tokens);
+    exp = vector<string>{"", ""};
+    EXPECT_EQ(tokens, exp); 
+    
+    Splice(",abc,", ",", tokens);
+    exp = vector<string>{"", "abc", ""};
+    EXPECT_EQ(tokens, exp); 
 
+    Splice("abc,", ",", tokens);
+    exp = vector<string>{"abc", ""};
+    EXPECT_EQ(tokens, exp); 
+
+    Splice(",abc", ",",  tokens);
+    exp = vector<string>{"", "abc"};
+    EXPECT_EQ(tokens, exp); 
+
+    Splice("abc", "abc",  tokens);
+    exp = vector<string>{"", ""};
+    EXPECT_EQ(tokens, exp); 
+
+    Splice("a\tb\tc", "\t",  tokens);
+    exp = vector<string>{"a", "b", "c"};
+    EXPECT_EQ(tokens, exp); 
+}
diff --git a/unittest/pbdata/reads/RegionAnnotations_gtest.cpp b/unittest/pbdata/reads/RegionAnnotations_gtest.cpp
new file mode 100644
index 0000000..03014e7
--- /dev/null
+++ b/unittest/pbdata/reads/RegionAnnotations_gtest.cpp
@@ -0,0 +1,203 @@
+/*
+ * ==================================================================
+ *
+ *       Filename:  RegionAnnotations_gtest.cpp
+ *
+ *    Description:  Test pbdata/reads/RegionAnnotations.hpp
+ *
+ *        Version:  1.0
+ *        Created:  09/27/2015 03:54:55 PM
+ *       Compiler:  gcc
+ *
+ *         Author:  Yuan Li (yli), yli at pacificbiosciences.com
+ *        Company:  Pacific Biosciences
+ *
+ * ==================================================================
+ */
+#include "gtest/gtest.h"
+#define private public
+#include "reads/ReadInterval.hpp"
+#include "reads/RegionAnnotations.hpp"
+#include <algorithm>
+#include <iostream>
+
+using namespace std;
+
+static const UInt HOLENUMBER = 1720;
+
+// Adater - 0, Insert - 1, HQRegion - 2
+static const std::vector<RegionType> TYPES = {Adapter, Insert, HQRegion};
+static const std::vector<std::string> TYPESTRS = {"Adapter", "Insert", "HQRegion"};
+
+static const std::vector<RegionAnnotation> REGIONS = {
+    RegionAnnotation(HOLENUMBER, 2, 50,  900, 900),// hqregion
+    RegionAnnotation(HOLENUMBER, 1, 700, 999, -1), // insert
+    RegionAnnotation(HOLENUMBER, 0, 649, 700, 700),// adapter
+    RegionAnnotation(HOLENUMBER, 1, 300, 650, -1), // insert
+    RegionAnnotation(HOLENUMBER, 0, 249, 329, 800),// adapter
+    RegionAnnotation(HOLENUMBER, 1, 0,   250, -1)  // insert
+};
+
+static const std::vector<RegionAnnotation> EXPECTED_HQREGIONS = {
+    RegionAnnotation(HOLENUMBER, 2, 50,  900, 900) // hqregion
+};
+
+static const DNALength EXPECTED_HQSTART = 50;
+
+static const DNALength EXPECTED_HQEND   = 900;
+
+static const DNALength WHOLE_LENGTH = 1200;
+
+static const std::vector<RegionAnnotation> EXPECTED_ADAPTERS = {
+    RegionAnnotation(HOLENUMBER, 0, 249, 329, 800),// adapter
+    RegionAnnotation(HOLENUMBER, 0, 649, 700, 700) // adapter
+};
+
+static const std::vector<RegionAnnotation> EXPECTED_INSERTS = {
+    RegionAnnotation(HOLENUMBER, 1, 0,   250, -1),// insert
+    RegionAnnotation(HOLENUMBER, 1, 300, 650, -1),// insert
+    RegionAnnotation(HOLENUMBER, 1, 700, 999, -1) // insert
+};
+
+static const std::vector<ReadInterval> EXPECTED_SUBREAD_INTERVALS_BYADAPTER_NOHQ = {
+    ReadInterval(0,   249, 0),   // by Adapter, subread score unknown.
+    ReadInterval(329, 649, 0),
+    ReadInterval(700, 1200, 0)
+};
+
+static const std::vector<ReadInterval> EXPECTED_SUBREAD_INTERVALS_BYADAPTER_HQ = {
+    ReadInterval(50,  249, 900), // by HQ, subread score = HQRegion score
+    ReadInterval(329, 649, 900),
+    ReadInterval(700, 900, 900)
+};
+
+static const std::vector<ReadInterval> EXPECTED_SUBREAD_INTERVALS_NOHQ = {
+    ReadInterval(0,   250, -1),  // not by adapter, not by HQ, use the original score.
+    ReadInterval(300, 650, -1),
+    ReadInterval(700, 999, -1)
+};
+
+static const std::vector<ReadInterval> EXPECTED_SUBREAD_INTERVALS_HQ = {
+    ReadInterval(50,  250, 900),  // by HQ, subread score = HQRegion score
+    ReadInterval(300, 650, 900),
+    ReadInterval(700, 900, 900)
+};
+
+static const std::vector<ReadInterval> EXPECTED_ADAPTER_INTERVALS = {
+    ReadInterval(249,  329, 800),
+    ReadInterval(649, 700, 700)
+};
+
+static const std::vector<RegionAnnotation> REGIONS_SORTED_BY_POS = {
+    RegionAnnotation(HOLENUMBER, 1, 0,   250, -1), // insert
+    RegionAnnotation(HOLENUMBER, 2, 50,  900, 900),// hqregion
+    RegionAnnotation(HOLENUMBER, 0, 249, 329, 800),// adapter
+    RegionAnnotation(HOLENUMBER, 1, 300, 650, -1), // insert
+    RegionAnnotation(HOLENUMBER, 0, 649, 700, 700),// adapter
+    RegionAnnotation(HOLENUMBER, 1, 700, 999, -1)  // insert
+};
+
+static const std::vector<RegionAnnotation> REGIONS_SORTED_BY_TYPE = {
+    RegionAnnotation(HOLENUMBER, 0, 249, 329, 800),// adapter
+    RegionAnnotation(HOLENUMBER, 0, 649, 700, 700),// adapter
+    RegionAnnotation(HOLENUMBER, 1, 0,   250, -1), // insert
+    RegionAnnotation(HOLENUMBER, 1, 300, 650, -1), // insert
+    RegionAnnotation(HOLENUMBER, 1, 700, 999, -1), // insert
+    RegionAnnotation(HOLENUMBER, 2, 50,  900, 900) // hqregion
+};
+
+TEST(RegionAnnotationTest, Sort_By_Pos) {
+    std::vector<RegionAnnotation> ras = REGIONS;
+    std::sort(ras.begin(), ras.end());
+    EXPECT_EQ(ras, REGIONS_SORTED_BY_POS);
+}
+
+TEST(RegionAnnotationTest, Sort_By_Type) {
+    std::vector<RegionAnnotation> ras = REGIONS;
+    std::sort(ras.begin(), ras.end(), compare_region_annotation_by_type);
+    EXPECT_EQ(ras, REGIONS_SORTED_BY_TYPE);
+}
+
+TEST(RegionAnnotationsTest, Constructor) {
+    RegionAnnotations ras(HOLENUMBER, REGIONS, TYPES);
+    EXPECT_EQ(ras.table_, REGIONS_SORTED_BY_TYPE);
+    EXPECT_EQ(ras.HoleNumber(), HOLENUMBER);
+}
+
+TEST(RegionAnnotationsTest, RegionAnnotationsOfType) {
+    RegionAnnotations ras(HOLENUMBER, REGIONS, TYPES);
+    EXPECT_EQ(ras.Adapters(), EXPECTED_ADAPTERS);
+    EXPECT_EQ(ras.HQRegions(), EXPECTED_HQREGIONS);
+    EXPECT_EQ(ras.Inserts(), EXPECTED_INSERTS);
+    EXPECT_EQ(ras.HQStart(), EXPECTED_HQSTART);
+    EXPECT_EQ(ras.HQEnd(), EXPECTED_HQEND);
+}
+
+TEST(RegionAnnotationsTest, SubreadIntervals) {
+    RegionAnnotations ras(HOLENUMBER, REGIONS, TYPES);
+    vector<ReadInterval> ris = ras.SubreadIntervals(WHOLE_LENGTH, true, false);
+    EXPECT_EQ(ris, EXPECTED_SUBREAD_INTERVALS_BYADAPTER_NOHQ);
+
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, true, true);
+    EXPECT_EQ(ris, EXPECTED_SUBREAD_INTERVALS_BYADAPTER_HQ);
+
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, false, false);
+    EXPECT_EQ(ris, EXPECTED_SUBREAD_INTERVALS_NOHQ);
+
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, false, true);
+    EXPECT_EQ(ris, EXPECTED_SUBREAD_INTERVALS_HQ);
+}
+
+TEST(RegionAnnotationsTest, AdapterIntervals) {
+    RegionAnnotations ras(HOLENUMBER, REGIONS, TYPES);
+    EXPECT_EQ(ras.AdapterIntervals(), EXPECTED_ADAPTER_INTERVALS);
+}
+
+TEST(RegionAnnotationsTest, SubreadIntervals_2) {
+    std::vector<RegionAnnotation> regions({
+            RegionAnnotation(HOLENUMBER, 0, 0, 112, -1)// adapter, no insert, no hq
+            });
+    RegionAnnotations ras(HOLENUMBER, regions, TYPES);
+
+    vector<ReadInterval> ris = ras.SubreadIntervals(WHOLE_LENGTH, true, false);
+    EXPECT_EQ(ris.size(), 1); // (112, WHOLE_LENGTH, -1)
+    EXPECT_EQ(ris[0].start, 112);
+    EXPECT_EQ(ris[0].end, WHOLE_LENGTH);
+
+    // no insert, no hq && require adapter, require hq
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, true, true);
+    EXPECT_EQ(ris.size(), 0);
+
+    // no require adapter, no require hq
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, false, false);// no insert
+    EXPECT_EQ(ris.size(), 0);
+
+    // no require adapter, require hq
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, false, true); // no hq
+    EXPECT_EQ(ris.size(), 0);
+}
+
+TEST(RegionAnnotationsTest, SubreadIntervals_3) {
+    std::vector<RegionAnnotation> regions({
+            RegionAnnotation(HOLENUMBER, 1, 0, 170, -1),// insert
+            RegionAnnotation(HOLENUMBER, 2, 0, 0, 0)   //  hq length = 0
+            });
+    RegionAnnotations ras(HOLENUMBER, regions, TYPES);
+
+    // require adapter, no require hq
+    vector<ReadInterval> ris = ras.SubreadIntervals(WHOLE_LENGTH, true, false);
+    EXPECT_EQ(ris.size(), 0);
+
+    // require adapter, require hq
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, true, true);
+    EXPECT_EQ(ris.size(), 0);
+
+    // no require adapter, no require hq
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, false, false);
+    EXPECT_EQ(ris.size(), 1);
+    EXPECT_EQ(ris[0], ReadInterval(0, 170, -1));
+
+    // no require adapter, require hq
+    ris = ras.SubreadIntervals(WHOLE_LENGTH, false, true);
+    EXPECT_EQ(ris.size(), 0);
+}
diff --git a/unittest/pbdata/reads/RegionTypeMap_gtest.cpp b/unittest/pbdata/reads/RegionTypeMap_gtest.cpp
new file mode 100644
index 0000000..12e108d
--- /dev/null
+++ b/unittest/pbdata/reads/RegionTypeMap_gtest.cpp
@@ -0,0 +1,61 @@
+/*
+ * ==================================================================
+ *
+ *       Filename:  RegionTypeMap_gtest.cpp
+ *
+ *    Description:  Test pbdata/reads/RegionAnnotations.hpp
+ *
+ *        Version:  1.0
+ *        Created:  09/27/2015 03:54:55 PM
+ *       Compiler:  gcc
+ *
+ *         Author:  Yuan Li (yli), yli at pacificbiosciences.com
+ *        Company:  Pacific Biosciences
+ *
+ * ==================================================================
+ */
+#include "gtest/gtest.h"
+#define private public
+#include "reads/RegionTypeMap.hpp"
+
+using namespace std;
+
+
+// Adater - 0, Insert - 1, HQRegion - 2
+const vector<RegionType> TYPES = {Adapter, Insert, HQRegion};
+
+TEST(RegionTypeMapTest, ToString) {
+    EXPECT_EQ(RegionTypeMap::ToString(Adapter),  "Adapter");
+    EXPECT_EQ(RegionTypeMap::ToString(HQRegion), "HQRegion");
+    EXPECT_EQ(RegionTypeMap::ToString(Insert),   "Insert");
+}
+
+TEST(RegionTypeMapTest, ToRegionType) {
+    EXPECT_EQ(RegionTypeMap::ToRegionType("Adapter"),  Adapter);
+    EXPECT_EQ(RegionTypeMap::ToRegionType("HQRegion"), HQRegion);
+    EXPECT_EQ(RegionTypeMap::ToRegionType("Insert"),   Insert);
+}
+
+TEST(RegionTypeMapTest, ToIndex) {
+    // In most bas.h5 files, order of region types:
+    std::vector<std::string> typeStrs = {"Insert", "Adapter", "HQRegion"};
+
+    EXPECT_EQ(RegionTypeMap::ToIndex(Insert,   typeStrs), 0);
+    EXPECT_EQ(RegionTypeMap::ToIndex(Adapter,  typeStrs), 1);
+    EXPECT_EQ(RegionTypeMap::ToIndex(HQRegion, typeStrs), 2);
+
+    EXPECT_EQ(RegionTypeMap::ToIndex("Insert",   typeStrs), 0);
+    EXPECT_EQ(RegionTypeMap::ToIndex("Adapter",  typeStrs), 1);
+    EXPECT_EQ(RegionTypeMap::ToIndex("HQRegion", typeStrs), 2);
+
+    // Test given a different region type order.
+    typeStrs = {"Insert", "HQRegion", "Adapter", "BarCode"};
+
+    EXPECT_EQ(RegionTypeMap::ToIndex(Insert,   typeStrs), 0);
+    EXPECT_EQ(RegionTypeMap::ToIndex(HQRegion, typeStrs), 1);
+    EXPECT_EQ(RegionTypeMap::ToIndex(Adapter,  typeStrs), 2);
+
+    EXPECT_EQ(RegionTypeMap::ToIndex("Insert",   typeStrs), 0);
+    EXPECT_EQ(RegionTypeMap::ToIndex("HQRegion", typeStrs), 1);
+    EXPECT_EQ(RegionTypeMap::ToIndex("Adapter",  typeStrs), 2);
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbseqlib.git



More information about the debian-med-commit mailing list