[med-svn] [pbbam] 01/12: Imported Upstream version 0.5.0

Tue Jul 5 03:24:42 UTC 2016

This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository pbbam.

commit ce586756f62774b0f76bea862cf29431444e01aa
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Mon Jul 4 15:18:40 2016 -0700

    Imported Upstream version 0.5.0
---
 .travis.yml                                        |   64 +
 CHANGELOG.md                                       |  248 +-
 CMakeLists.txt                                     |   80 +-
 INSTALL.md                                         |  115 +-
 LICENSE.txt                                        |   34 +
 README.md                                          |   29 +
 cmake/FindCSharp.cmake                             |   72 +
 cmake/FindDotNetFrameworkSdk.cmake                 |   29 +
 cmake/FindMono.cmake                               |  167 +
 cmake/FindR.cmake                                  |   48 +
 cmake/PbbamTool.cmake                              |   23 +
 cmake/UseCSharp.cmake                              |  111 +
 cmake/UseDotNetFrameworkSdk.cmake                  |   16 +
 cmake/UseMono.cmake                                |   16 +
 docs/Doxyfile.in                                   |   24 +-
 docs/Makefile                                      |  168 +
 docs/examples/code/BarcodeQuery.txt                |   17 +
 docs/examples/code/Compare.txt                     |    3 +
 docs/examples/code/Compare_AlignedEnd.txt          |    2 +
 docs/examples/code/Compare_AlignedStart.txt        |    2 +
 docs/examples/code/Compare_AlignedStrand.txt       |    2 +
 docs/examples/code/Compare_BarcodeForward.txt      |    2 +
 docs/examples/code/Compare_BarcodeQuality.txt      |    2 +
 docs/examples/code/Compare_BarcodeReverse.txt      |    2 +
 docs/examples/code/Compare_FullName.txt            |    2 +
 docs/examples/code/Compare_LocalContextFlag.txt    |    2 +
 docs/examples/code/Compare_MapQuality.txt          |    2 +
 docs/examples/code/Compare_MovieName.txt           |    2 +
 docs/examples/code/Compare_NumDeletedBases.txt     |    2 +
 docs/examples/code/Compare_NumInsertedBases.txt    |    2 +
 docs/examples/code/Compare_NumMatches.txt          |    2 +
 docs/examples/code/Compare_NumMismatches.txt       |    2 +
 docs/examples/code/Compare_QueryEnd.txt            |    2 +
 docs/examples/code/Compare_QueryStart.txt          |    2 +
 docs/examples/code/Compare_ReadAccuracy.txt        |    2 +
 docs/examples/code/Compare_ReadGroupId.txt         |    2 +
 docs/examples/code/Compare_ReadGroupNumericId.txt  |    2 +
 docs/examples/code/Compare_ReferenceEnd.txt        |    2 +
 docs/examples/code/Compare_ReferenceId.txt         |    2 +
 docs/examples/code/Compare_ReferenceName.txt       |    2 +
 docs/examples/code/Compare_ReferenceStart.txt      |    2 +
 docs/examples/code/Compare_TypeFromOperator.txt    |    2 +
 docs/examples/code/Compare_TypeToName.txt          |    2 +
 docs/examples/code/Compare_Zmw.txt                 |    2 +
 docs/examples/code/EntireFileQuery.txt             |   15 +
 docs/examples/code/EntireFileQuery_BamFilename.txt |    4 +
 docs/examples/code/EntireFileQuery_NonConst.txt    |    4 +
 docs/examples/code/GenomicIntervalQuery.txt        |   16 +
 docs/examples/code/GenomicIntervalQuery_Reuse.txt  |    8 +
 docs/examples/code/PbiAlignedEndFilter.txt         |    4 +
 docs/examples/code/PbiAlignedLengthFilter.txt      |    4 +
 docs/examples/code/PbiAlignedStartFilter.txt       |    4 +
 docs/examples/code/PbiAlignedStrandFilter.txt      |    5 +
 docs/examples/code/PbiBarcodeFilter.txt            |   17 +
 docs/examples/code/PbiBarcodeForwardFilter.txt     |   15 +
 docs/examples/code/PbiBarcodeQualityFilter.txt     |    5 +
 docs/examples/code/PbiBarcodeReverseFilter.txt     |   15 +
 docs/examples/code/PbiBarcodesFilter.txt           |    6 +
 docs/examples/code/PbiBuilder_WithReader.txt       |   30 +
 docs/examples/code/PbiBuilder_WithWriter.txt       |   12 +
 docs/examples/code/PbiFilterQuery.txt              |   22 +
 docs/examples/code/PbiFilter_Composition.txt       |    8 +
 docs/examples/code/PbiFilter_CustomFilter.txt      |   21 +
 docs/examples/code/PbiFilter_Interface.txt         |    1 +
 docs/examples/code/PbiFilter_Intersection_Copy.txt |    3 +
 docs/examples/code/PbiFilter_Intersection_Move.txt |    3 +
 docs/examples/code/PbiFilter_Union_Copy.txt        |    3 +
 docs/examples/code/PbiFilter_Union_Move.txt        |    3 +
 docs/examples/code/PbiIdentityFilter.txt           |    6 +
 docs/examples/code/PbiLocalContextFilter.txt       |   22 +
 docs/examples/code/PbiMapQualityFilter.txt         |    5 +
 docs/examples/code/PbiMovieNameFilter.txt          |   14 +
 docs/examples/code/PbiNumDeletedBasesFilter.txt    |    6 +
 docs/examples/code/PbiNumInsertedBasesFilter.txt   |    6 +
 docs/examples/code/PbiNumMatchesFilter.txt         |    6 +
 docs/examples/code/PbiNumMismatchesFilter.txt      |    6 +
 docs/examples/code/PbiQueryEndFilter.txt           |    5 +
 docs/examples/code/PbiQueryLengthFilter.txt        |    5 +
 docs/examples/code/PbiQueryNameFilter.txt          |   15 +
 docs/examples/code/PbiQueryStartFilter.txt         |    5 +
 docs/examples/code/PbiReadAccuracyFilter.txt       |    5 +
 docs/examples/code/PbiReadGroupFilter.txt          |   64 +
 docs/examples/code/PbiReferenceEndFilter.txt       |    5 +
 docs/examples/code/PbiReferenceIdFilter.txt        |   16 +
 docs/examples/code/PbiReferenceNameFilter.txt      |   15 +
 docs/examples/code/PbiReferenceStartFilter.txt     |    5 +
 docs/examples/code/PbiZmwFilter.txt                |   16 +
 docs/examples/code/ReadAccuracyQuery.txt           |   15 +
 docs/examples/code/SubreadLengthQuery.txt          |   15 +
 docs/examples/code/Tag_AsciiCtor.txt               |   10 +
 docs/examples/code/ZmwGroupQuery.txt               |   23 +
 docs/examples/code/ZmwQuery.txt                    |    6 +
 docs/examples/code/ZmwWhitelistVirtualReader.txt   |    6 +
 docs/examples/plaintext/AlignmentPrinterOutput.txt |   13 +
 .../plaintext/PbiFilter_DataSetXmlFilters.txt      |   14 +
 docs/source/api/Accuracy.rst                       |   11 +
 docs/source/api/AlignmentPrinter.rst               |   11 +
 docs/source/api/AlignmentSet.rst                   |   11 +
 docs/source/api/BaiIndexedBamReader.rst            |   11 +
 docs/source/api/BamFile.rst                        |   11 +
 docs/source/api/BamHeader.rst                      |   11 +
 docs/source/api/BamReader.rst                      |   11 +
 docs/source/api/BamRecord.rst                      |   17 +
 docs/source/api/BamRecordBuilder.rst               |   11 +
 docs/source/api/BamRecordImpl.rst                  |   11 +
 docs/source/api/BamRecordView.rst                  |   11 +
 docs/source/api/BamTagCodec.rst                    |   11 +
 docs/source/api/BamWriter.rst                      |   11 +
 docs/source/api/BarcodeLookupData.rst              |   11 +
 docs/source/api/BarcodeQuery.rst                   |   11 +
 docs/source/api/BarcodeSet.rst                     |   11 +
 docs/source/api/BasicLookupData.rst                |   11 +
 docs/source/api/Cigar.rst                          |   11 +
 docs/source/api/CigarOperation.rst                 |   13 +
 docs/source/api/Compare.rst                        |    8 +
 docs/source/api/Config.rst                         |    8 +
 docs/source/api/ConsensusAlignmentSet.rst          |   11 +
 docs/source/api/ConsensusReadSet.rst               |   11 +
 docs/source/api/ContigSet.rst                      |   11 +
 docs/source/api/DataSet.rst                        |   11 +
 docs/source/api/DataSetBase.rst                    |   11 +
 docs/source/api/DataSetMetadata.rst                |   11 +
 docs/source/api/EntireFileQuery.rst                |   11 +
 docs/source/api/ExtensionElement.rst               |   11 +
 docs/source/api/Extensions.rst                     |   11 +
 docs/source/api/ExternalResource.rst               |   11 +
 docs/source/api/ExternalResources.rst              |   11 +
 docs/source/api/FileIndex.rst                      |   11 +
 docs/source/api/FileIndices.rst                    |   11 +
 docs/source/api/Filter.rst                         |   11 +
 docs/source/api/Filters.rst                        |   11 +
 docs/source/api/Frames.rst                         |   11 +
 docs/source/api/GenomicInterval.rst                |   11 +
 .../api/GenomicIntervalCompositeBamReader.rst      |   11 +
 docs/source/api/GenomicIntervalQuery.rst           |   11 +
 docs/source/api/HdfSubreadSet.rst                  |   11 +
 docs/source/api/IndexResultBlock.rst               |   17 +
 docs/source/api/IndexedFastaReader.rst             |   11 +
 docs/source/api/Interval.rst                       |   11 +
 .../api/InvalidSequencingChemistryException.rst    |   11 +
 docs/source/api/LocalContextFlags.rst              |    8 +
 docs/source/api/MappedLookupData.rst               |   11 +
 docs/source/api/NamespaceInfo.rst                  |   11 +
 docs/source/api/NamespaceRegistry.rst              |   11 +
 docs/source/api/OrderedLookup.rst                  |   11 +
 docs/source/api/Orientation.rst                    |    8 +
 docs/source/api/ParentTool.rst                     |   11 +
 docs/source/api/PbiBuilder.rst                     |   11 +
 docs/source/api/PbiFile.rst                        |   14 +
 docs/source/api/PbiFilter.rst                      |   11 +
 docs/source/api/PbiFilterCompositeBamReader.rst    |   11 +
 docs/source/api/PbiFilterQuery.rst                 |   11 +
 docs/source/api/PbiFilterTypes.rst                 |    8 +
 docs/source/api/PbiIndex.rst                       |   11 +
 docs/source/api/PbiIndexedBamReader.rst            |   11 +
 docs/source/api/PbiRawBarcodeData.rst              |   11 +
 docs/source/api/PbiRawBasicData.rst                |   11 +
 docs/source/api/PbiRawData.rst                     |   11 +
 docs/source/api/PbiRawMappedData.rst               |   11 +
 docs/source/api/PbiRawReferenceData.rst            |   11 +
 docs/source/api/PbiReferenceEntry.rst              |   11 +
 docs/source/api/Position.rst                       |   10 +
 docs/source/api/ProgramInfo.rst                    |   11 +
 docs/source/api/QNameQuery.rst                     |   11 +
 docs/source/api/QualityValue.rst                   |   11 +
 docs/source/api/QualityValues.rst                  |   11 +
 docs/source/api/ReadAccuracyQuery.rst              |   11 +
 docs/source/api/ReadGroupInfo.rst                  |   21 +
 docs/source/api/ReferenceLookupData.rst            |   11 +
 docs/source/api/ReferenceSet.rst                   |   11 +
 docs/source/api/SamTagCodec.rst                    |   11 +
 docs/source/api/SequenceInfo.rst                   |   11 +
 docs/source/api/SequentialCompositeBamReader.rst   |   11 +
 docs/source/api/Strand.rst                         |    8 +
 docs/source/api/SubDataSets.rst                    |   11 +
 docs/source/api/SubreadLengthQuery.rst             |   11 +
 docs/source/api/SubreadSet.rst                     |   11 +
 docs/source/api/Tag.rst                            |   15 +
 docs/source/api/TagCollection.rst                  |   11 +
 docs/source/api/UnorderedLookup.rst                |   11 +
 docs/source/api/VirtualPolymeraseBamRecord.rst     |   11 +
 .../api/VirtualPolymeraseCompositeReader.rst       |   11 +
 docs/source/api/VirtualPolymeraseReader.rst        |   11 +
 docs/source/api/VirtualRegion.rst                  |   11 +
 docs/source/api/VirtualRegionType.rst              |    8 +
 docs/source/api/VirtualRegionTypeMap.rst           |   11 +
 docs/source/api/ZmwGroupQuery.rst                  |   11 +
 docs/source/api/ZmwQuery.rst                       |   11 +
 docs/source/api/ZmwWhitelistVirtualReader.rst      |   11 +
 docs/source/api_reference.rst                      |   12 +
 docs/source/commandline_utilities.rst              |   15 +
 docs/source/conf.py                                |  332 +
 docs/source/getting_started.rst                    |  144 +
 docs/source/index.rst                              |   33 +
 docs/source/pacbio-theme/static/headerGradient.jpg |  Bin 0 -> 7099 bytes
 docs/source/pacbio-theme/static/pacbio.css         |  238 +
 docs/source/pacbio-theme/static/pacbioLogo.png     |  Bin 0 -> 3128 bytes
 docs/source/pacbio-theme/static/pygments.css       |   55 +
 docs/source/pacbio-theme/theme.conf                |    4 +
 docs/source/requirements.txt                       |    1 +
 docs/source/swig_bindings.rst                      |  257 +
 docs/source/tools/bam2sam.rst                      |   21 +
 docs/source/tools/pbindex.rst                      |   18 +
 docs/source/tools/pbindexdump.rst                  |  233 +
 docs/source/tools/pbmerge.rst                      |   30 +
 docs/specs/pbbam.rst                               |  631 ++
 include/pbbam/Accuracy.h                           |   51 +-
 include/pbbam/AlignmentPrinter.h                   |   55 +-
 include/pbbam/BaiIndexedBamReader.h                |  130 +
 include/pbbam/BamFile.h                            |  105 +-
 include/pbbam/BamHeader.h                          |  240 +-
 include/pbbam/BamReader.h                          |  157 +-
 include/pbbam/BamRecord.h                          | 1312 ++--
 include/pbbam/BamRecordBuilder.h                   |  198 +-
 include/pbbam/BamRecordImpl.h                      |  468 +-
 include/pbbam/BamTagCodec.h                        |   73 +-
 include/pbbam/BamWriter.h                          |   95 +-
 include/pbbam/{ZmwQuery.h => BarcodeQuery.h}       |   55 +-
 include/pbbam/Cigar.h                              |   66 +-
 include/pbbam/CigarOperation.h                     |   90 +-
 include/pbbam/Compare.h                            |  430 ++
 include/pbbam/CompositeBamReader.h                 |  269 +
 include/pbbam/Config.h                             |  111 +-
 include/pbbam/DataSet.h                            |  620 +-
 include/pbbam/DataSetTypes.h                       |  601 +-
 include/pbbam/DataSetXsd.h                         |   54 +-
 include/pbbam/EntireFileQuery.h                    |   49 +-
 include/pbbam/Frames.h                             |   86 +-
 include/pbbam/GenomicInterval.h                    |  144 +-
 include/pbbam/GenomicIntervalQuery.h               |   62 +-
 include/pbbam/GroupQuery.h                         |   88 -
 include/pbbam/GroupQueryBase.h                     |  214 -
 include/pbbam/IndexedFastaReader.h                 |  120 +-
 include/pbbam/Interval.h                           |  149 +-
 include/pbbam/LocalContextFlags.h                  |   25 +-
 include/pbbam/Orientation.h                        |   20 +-
 include/pbbam/PbiBasicTypes.h                      |  108 +
 include/pbbam/PbiBuilder.h                         |  143 +-
 include/pbbam/PbiFile.h                            |   36 +-
 include/pbbam/PbiFilter.h                          |  343 +
 .../pbbam/{ZmwGroupQuery.h => PbiFilterQuery.h}    |   55 +-
 include/pbbam/PbiFilterTypes.h                     | 1028 +++
 include/pbbam/PbiIndex.h                           |  277 +-
 include/pbbam/PbiIndexedBamReader.h                |  174 +
 include/pbbam/PbiLookupData.h                      |  718 ++
 include/pbbam/PbiRawData.h                         |  398 +-
 include/pbbam/Position.h                           |   13 +-
 include/pbbam/ProgramInfo.h                        |  166 +-
 include/pbbam/{BamReader.h => QNameQuery.h}        |   96 +-
 include/pbbam/QualityValue.h                       |   63 +-
 include/pbbam/QualityValues.h                      |  203 +-
 include/pbbam/QueryBase.h                          |  241 -
 include/pbbam/{ZmwQuery.h => ReadAccuracyQuery.h}  |   62 +-
 include/pbbam/ReadGroupInfo.h                      |  571 +-
 include/pbbam/SamTagCodec.h                        |   26 +-
 include/pbbam/SequenceInfo.h                       |  174 +-
 include/pbbam/Strand.h                             |   13 +-
 .../{ZmwGroupQuery.h => SubreadLengthQuery.h}      |   58 +-
 include/pbbam/Tag.h                                |  175 +-
 include/pbbam/TagCollection.h                      |   12 +-
 include/pbbam/ZmwGroupQuery.h                      |   43 +-
 include/pbbam/ZmwQuery.h                           |   42 +-
 .../{virtual/VirtualRegionType.h => ZmwType.h}     |   29 +-
 include/pbbam/{TagCollection.h => ZmwTypeMap.h}    |   25 +-
 .../InvalidSequencingChemistryException.h}         |   96 +-
 .../pbbam/{Position.h => internal/Accuracy.inl}    |   35 +-
 include/pbbam/internal/BamHeader.inl               |  154 +
 include/pbbam/internal/BamRecord.inl               |  166 +
 .../BamRecordBuilder.inl}                          |   93 +-
 include/pbbam/internal/BamRecordImpl.inl           |  216 +
 include/pbbam/internal/BamRecordSort.h             |  138 -
 .../pbbam/internal/Cigar.inl                       |   57 +-
 .../CigarOperation.inl}                            |  127 +-
 .../{SequentialMergeStrategy.h => Compare.inl}     |   55 +-
 include/pbbam/internal/CompositeBamReader.inl      |  397 ++
 include/pbbam/internal/DataSet.inl                 |    6 +-
 include/pbbam/internal/DataSetBaseTypes.h          |   11 +-
 include/pbbam/internal/DataSetElement.h            |    8 +-
 include/pbbam/internal/DataSetElement.inl          |   40 +-
 include/pbbam/internal/DataSetTypes.inl            |    6 +-
 include/pbbam/{Accuracy.h => internal/Frames.inl}  |   85 +-
 .../GenomicInterval.inl}                           |   67 +-
 include/pbbam/internal/IMergeStrategy.h            |   67 -
 .../pbbam/{Interval.h => internal/Interval.inl}    |  116 +-
 include/pbbam/internal/MergeStrategy.h             |  239 -
 .../pbbam/internal/PbiBasicTypes.inl               |   56 +-
 include/pbbam/internal/PbiFilter.inl               |  312 +
 include/pbbam/internal/PbiFilterTypes.inl          |  553 ++
 include/pbbam/internal/PbiIndex.inl                |  165 +
 include/pbbam/internal/PbiIndex_p.h                |  931 ---
 include/pbbam/internal/PbiIndex_p.inl              |  927 ---
 include/pbbam/internal/PbiLookupData.inl           |  531 ++
 include/pbbam/internal/PbiRawData.inl              |  113 +
 .../{ProgramInfo.h => internal/ProgramInfo.inl}    |  105 +-
 .../internal/{MergeItem.h => QualityValue.inl}     |   52 +-
 .../QualityValues.inl}                             |   99 +-
 include/pbbam/internal/QueryBase.h                 |  152 +-
 include/pbbam/internal/QueryBase.inl               |  177 +
 .../ReadGroupInfo.inl}                             |  286 +-
 .../{SequenceInfo.h => internal/SequenceInfo.inl}  |  111 +-
 include/pbbam/internal/Tag.inl                     |   19 +-
 include/pbbam/virtual/VirtualPolymeraseBamRecord.h |  109 +-
 .../virtual/VirtualPolymeraseCompositeReader.h     |  111 +
 include/pbbam/virtual/VirtualPolymeraseReader.h    |   96 +-
 include/pbbam/virtual/VirtualRegion.h              |   89 +-
 include/pbbam/virtual/VirtualRegionType.h          |   23 +-
 include/pbbam/virtual/VirtualRegionTypeMap.h       |   11 +-
 include/pbbam/virtual/ZmwWhitelistVirtualReader.h  |  151 +
 src/Accuracy.cpp                                   |   10 +-
 src/AlignmentPrinter.cpp                           |   15 +-
 src/BaiIndexedBamReader.cpp                        |  141 +
 src/BamFile.cpp                                    |  110 +-
 src/BamHeader.cpp                                  |  289 +-
 src/BamReader.cpp                                  |  189 +
 src/BamRecord.cpp                                  |  408 +-
 src/BamRecordImpl.cpp                              |  240 +-
 src/BamTagCodec.cpp                                |   76 +-
 src/BamWriter.cpp                                  |   80 +-
 .../src/test_TimeUtils.cpp => src/BarcodeQuery.cpp |   41 +-
 .../IBamFileIterator.h => src/ChemistryTable.cpp   |   50 +-
 .../pbbam/Orientation.h => src/ChemistryTable.h    |   20 +-
 src/Cigar.cpp                                      |    8 +-
 src/CigarOperation.cpp                             |    6 +-
 src/Compare.cpp                                    |  141 +
 src/Config.cpp                                     |   16 +-
 src/DataSet.cpp                                    |  143 +-
 src/DataSetBaseTypes.cpp                           |   48 +-
 src/DataSetElement.cpp                             |    4 +-
 src/DataSetIO.cpp                                  |    7 +-
 src/DataSetTypes.cpp                               |  110 +-
 src/DataSetUtils.h                                 |   12 +
 src/DataSetXsd.cpp                                 |  177 +-
 src/EntireFileQuery.cpp                            |   73 +-
 src/FileUtils.cpp                                  |  246 +
 src/FileUtils.h                                    |  145 +-
 src/Frames.cpp                                     |   10 +-
 src/GenomicInterval.cpp                            |    7 +-
 src/GenomicIntervalQuery.cpp                       |  113 +-
 src/GroupQuery.cpp                                 |   91 -
 src/IndexedFastaReader.cpp                         |   37 +-
 src/MemoryUtils.h                                  |    9 +
 src/PbiBuilder.cpp                                 |  225 +-
 src/PbiFile.cpp                                    |   49 +-
 src/PbiFilter.cpp                                  |  249 +
 src/{FilterEngine.cpp => PbiFilterQuery.cpp}       |   56 +-
 src/PbiFilterTypes.cpp                             |  313 +
 src/PbiIndex.cpp                                   |  143 +-
 src/PbiIndexIO.cpp                                 |  111 +-
 src/PbiIndexIO.h                                   |    8 +-
 src/PbiIndexedBamReader.cpp                        |  187 +
 src/PbiRawData.cpp                                 |  215 +-
 src/ProgramInfo.cpp                                |    6 +-
 src/{FilterEngine.cpp => QNameQuery.cpp}           |   88 +-
 src/QualityValue.cpp                               |    6 +-
 .../ReadAccuracyQuery.cpp                          |   44 +-
 src/ReadGroupInfo.cpp                              |  503 +-
 src/SamTagCodec.cpp                                |   26 +-
 src/SequenceInfo.cpp                               |    6 +-
 .../SubreadLengthQuery.cpp                         |   44 +-
 src/Tag.cpp                                        |   25 +-
 src/TagCollection.cpp                              |    6 +-
 src/TimeUtils.h                                    |   19 +
 src/VirtualPolymeraseBamRecord.cpp                 |  149 +-
 src/VirtualPolymeraseCompositeReader.cpp           |  146 +
 src/VirtualPolymeraseReader.cpp                    |  286 +-
 src/VirtualRegionTypeMap.cpp                       |   17 +-
 src/XmlReader.cpp                                  |    2 +-
 src/XmlWriter.cpp                                  |   83 +-
 src/ZmwGroupQuery.cpp                              |  133 +-
 src/ZmwQuery.cpp                                   |  111 +-
 src/{VirtualRegionTypeMap.cpp => ZmwTypeMap.cpp}   |   20 +-
 ...aseReader.cpp => ZmwWhitelistVirtualReader.cpp} |  102 +-
 src/files.cmake                                    |   89 +-
 src/swig/Accuracy.i                                |    4 +-
 src/swig/BamFile.i                                 |   13 +-
 src/swig/BamHeader.i                               |    7 -
 src/swig/BamRecord.i                               |    8 +-
 src/swig/BamRecordBuilder.i                        |    6 +
 src/swig/BamRecordImpl.i                           |    4 +-
 src/swig/BamWriter.i                               |    5 +-
 src/swig/CigarOperation.i                          |    2 -
 src/swig/DataSet.i                                 |   34 +-
 src/swig/DataSetTypes.i                            |   59 +
 src/swig/EntireFileQuery.i                         |    4 +-
 src/swig/GenomicInterval.i                         |    4 +-
 src/swig/GenomicIntervalQuery.i                    |    6 +-
 src/swig/LocalContextFlags.i                       |    4 +
 src/swig/PacBioBam.i                               |   43 +-
 src/swig/PbiRawData.i                              |   12 +-
 src/swig/Tag.i                                     |  174 +-
 src/swig/VirtualPolymeraseBamRecord.i              |   24 +
 src/swig/VirtualPolymeraseReader.i                 |   11 +
 src/swig/VirtualRegion.i                           |   18 +
 src/swig/ZmwWhitelistVirtualReader.i               |   11 +
 tests/CMakeLists.txt                               |   15 +-
 tests/data/chunking/chunking.subreadset.xml        |   65 +
 .../chunking/chunking_emptyfilters.subreadset.xml  |   59 +
 .../chunking_missingfilters.subreadset.xml         |   58 +
 ...20800000001823174110291514_s1_p0.1.subreads.bam |  Bin 0 -> 1090276 bytes
 ...0000001823174110291514_s1_p0.1.subreads.bam.pbi |  Bin 0 -> 4163 bytes
 ...20800000001823174110291514_s1_p0.2.subreads.bam |  Bin 0 -> 980379 bytes
 ...0000001823174110291514_s1_p0.2.subreads.bam.pbi |  Bin 0 -> 3933 bytes
 ...20800000001823174110291514_s1_p0.3.subreads.bam |  Bin 0 -> 973029 bytes
 ...0000001823174110291514_s1_p0.3.subreads.bam.pbi |  Bin 0 -> 3698 bytes
 tests/data/dataset/ali1.xml                        |    2 +-
 tests/data/dataset/ali2.xml                        |    2 +-
 tests/data/dataset/ali3.xml                        |    2 +-
 tests/data/dataset/ali4.xml                        |    2 +-
 tests/data/dataset/bam_mapping.bam                 |  Bin 169668 -> 169668 bytes
 tests/data/dataset/bam_mapping.bam.pbi             |  Bin 2452 -> 2469 bytes
 tests/data/dataset/bam_mapping_1.bam               |  Bin 167530 -> 167530 bytes
 tests/data/dataset/bam_mapping_1.bam.pbi           |  Bin 2437 -> 2448 bytes
 tests/data/dataset/bam_mapping_2.bam               |  Bin 165778 -> 165778 bytes
 tests/data/dataset/bam_mapping_2.bam.pbi           |  Bin 2422 -> 2435 bytes
 tests/data/dataset/bam_mapping_new.bam             |  Bin 0 -> 22428 bytes
 tests/data/dataset/bam_mapping_new.bam.pbi         |  Bin 0 -> 362 bytes
 tests/data/dataset/bam_mapping_staggered.xml       |    2 +-
 tests/data/dataset/barcode.dataset.xml             |    4 +-
 tests/data/dataset/ccsread.dataset.xml             |    2 +-
 tests/data/dataset/contig.dataset.xml              |   30 +-
 tests/data/dataset/hdfsubread_dataset.xml          |    2 +-
 tests/data/dataset/lambda_contigs.xml              |    6 +-
 tests/data/dataset/malformed.xml                   |   84 +
 tests/data/dataset/merge.fofn                      |    2 +
 .../pbalchemy10kbp.pbalign.sorted.pbver1.bam       |  Bin 35251 -> 35235 bytes
 .../pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai   |  Bin 632 -> 632 bytes
 tests/data/dataset/pbalchemy10kbp.xml              |    2 +-
 tests/data/dataset/reference.dataset.xml           |    2 +-
 tests/data/dataset/subread_dataset1.xml            |    2 +-
 tests/data/dataset/subread_dataset2.xml            |    2 +-
 tests/data/dataset/subread_dataset3.xml            |    2 +-
 .../dataset/transformed_rs_subread_dataset.xml     |   25 +-
 tests/data/ex2.bam                                 |  Bin 125999 -> 126008 bytes
 tests/data/ex2.bam.bai                             |  Bin 176 -> 176 bytes
 tests/data/ex2.sam                                 |    2 +-
 tests/data/{ex2.bam => ex2_copy.bam}               |  Bin
 tests/data/{ex2.bam.bai => ex2_copy.bam.bai}       |  Bin
 tests/data/phi29.bam                               |  Bin 0 -> 71653 bytes
 .../polymerase/consolidate.subread.dataset.xml     |   38 +
 .../filtered_resources.subread.dataset.xml         |   67 +
 tests/data/polymerase/internal.hqregions.bam       |  Bin 0 -> 84164 bytes
 tests/data/polymerase/internal.lqregions.bam       |  Bin 0 -> 53088 bytes
 tests/data/polymerase/internal.polymerase.bam      |  Bin 134303 -> 133770 bytes
 tests/data/polymerase/internal.scraps.bam          |  Bin 68735 -> 67986 bytes
 tests/data/polymerase/internal.scraps.bam.pbi      |  Bin 0 -> 275 bytes
 tests/data/polymerase/internal.subreads.bam        |  Bin 73590 -> 73170 bytes
 tests/data/polymerase/internal.subreads.bam.pbi    |  Bin 0 -> 185 bytes
 tests/data/polymerase/internal_hq.hqregion.bam     |  Bin 82035 -> 0 bytes
 tests/data/polymerase/internal_hq.scraps.bam       |  Bin 51450 -> 0 bytes
 tests/data/polymerase/internal_polymerase.fasta    |    2 -
 .../multiple_resources.subread.dataset.xml         |   46 +
 tests/data/polymerase/production.polymerase.bam    |  Bin 25072 -> 25082 bytes
 tests/data/polymerase/production.scraps.bam        |  Bin 13530 -> 13535 bytes
 tests/data/polymerase/production.scraps.bam.pbi    |  Bin 0 -> 279 bytes
 tests/data/polymerase/production.subreads.bam      |  Bin 14655 -> 14659 bytes
 tests/data/polymerase/production.subreads.bam.pbi  |  Bin 0 -> 186 bytes
 tests/data/polymerase/production_hq.hqregion.bam   |  Bin 15796 -> 15803 bytes
 .../data/polymerase/production_hq.hqregion.bam.pbi |  Bin 0 -> 90 bytes
 tests/data/polymerase/production_hq.scraps.bam     |  Bin 10061 -> 10070 bytes
 tests/data/polymerase/production_hq.scraps.bam.pbi |  Bin 0 -> 104 bytes
 .../polymerase/whitelist/internal.polymerase.bam   |  Bin 0 -> 400494 bytes
 .../whitelist/internal.polymerase.bam.pbi          |  Bin 0 -> 105 bytes
 .../data/polymerase/whitelist/internal.scraps.bam  |  Bin 0 -> 203149 bytes
 .../polymerase/whitelist/internal.scraps.bam.pbi   |  Bin 0 -> 420 bytes
 .../polymerase/whitelist/internal.subreads.bam     |  Bin 0 -> 218703 bytes
 .../polymerase/whitelist/internal.subreads.bam.pbi |  Bin 0 -> 264 bytes
 .../data/polymerase/whitelist/scrapless.scraps.bam |  Bin 0 -> 436 bytes
 .../polymerase/whitelist/scrapless.scraps.bam.pbi  |  Bin 0 -> 65 bytes
 .../polymerase/whitelist/scrapless.subreads.bam    |  Bin 0 -> 33466 bytes
 .../whitelist/scrapless.subreads.bam.pbi           |  Bin 0 -> 113 bytes
 tests/data/relative/a/test.bam                     |  Bin 0 -> 351 bytes
 tests/data/relative/b/test1.bam                    |  Bin 0 -> 351 bytes
 tests/data/relative/b/test2.bam                    |  Bin 0 -> 351 bytes
 tests/data/relative/relative.fofn                  |    3 +
 tests/data/relative/relative.xml                   |    8 +
 tests/data/relative/relative2.fofn                 |    4 +
 tests/data/test_group_query/test1.bam              |  Bin 2165 -> 2168 bytes
 tests/data/test_group_query/test2.bam              |  Bin 13004 -> 13008 bytes
 tests/data/test_group_query/test2.bam.pbi          |  Bin 194 -> 197 bytes
 tests/data/test_group_query/test3.bam              |  Bin 19277 -> 19294 bytes
 tests/data/truncated.bam                           |  Bin 0 -> 200 bytes
 tests/files.cmake                                  |   12 +-
 tests/scripts/cram.py                              |  516 ++
 tests/src/R/test_pbbam.sh.in                       |   11 +-
 tests/src/R/tests/test_Accuracy.R                  |   33 +-
 tests/src/R/tests/test_BamFile.R                   |    3 +-
 tests/src/R/tests/test_BamHeader.R                 |    8 +-
 tests/src/R/tests/test_EndToEnd.R                  |   41 +-
 tests/src/R/tests/test_Intervals.R                 |   52 +-
 tests/src/R/tests/test_PolymeraseStitching.R       |  427 ++
 tests/src/TestData.h.in                            |    8 +-
 tests/src/cram/bam2sam.t                           |   63 +
 tests/src/cram/pbindexdump_cpp.t                   |   39 +
 tests/src/cram/pbindexdump_json.t                  |   83 +
 tests/src/cram/pbmerge_aligned_ordering.t          |  197 +
 tests/src/cram/pbmerge_dataset.t                   |  144 +
 tests/src/cram/pbmerge_fofn.t                      |  134 +
 tests/src/cram/pbmerge_mixed_ordering.t            |   57 +
 tests/src/cram/pbmerge_pacbio_ordering.t           |  227 +
 tests/src/python/test/test_Accuracy.py             |   24 +-
 tests/src/python/test/test_BamFile.py              |    1 -
 tests/src/python/test/test_BamHeader.py            |    8 +-
 tests/src/python/test/test_PolymeraseStitching.py  |  383 +
 tests/src/test_Accuracy.cpp                        |   24 +-
 tests/src/test_AlignmentPrinter.cpp                |  102 +-
 tests/src/test_BamFile.cpp                         |  108 +
 tests/src/test_BamHeader.cpp                       |  216 +-
 tests/src/test_BamRecord.cpp                       |    4 +
 tests/src/test_BamRecordClipping.cpp               |   24 +
 tests/src/test_BamWriter.cpp                       |   27 +
 .../src/test_BarcodeQuery.cpp                      |   19 +-
 tests/src/test_Compare.cpp                         |  739 ++
 tests/src/test_DataSetCore.cpp                     |   74 +-
 tests/src/test_DataSetIO.cpp                       |  436 +-
 tests/src/test_DataSetQuery.cpp                    |  126 +-
 tests/src/test_DataSetXsd.cpp                      |   80 +-
 tests/src/test_EndToEnd.cpp                        |  160 +-
 tests/src/test_EntireFileQuery.cpp                 |    3 +-
 tests/src/test_FileUtils.cpp                       |  325 +
 tests/src/test_GenomicIntervalQuery.cpp            |  348 +-
 tests/src/test_PacBioIndex.cpp                     |  514 +-
 tests/src/test_PbiFilter.cpp                       | 1300 ++++
 tests/src/test_PbiFilterQuery.cpp                  |  245 +
 tests/src/test_PolymeraseStitching.cpp             |  191 +-
 .../{test_GroupQuery.cpp => test_QNameQuery.cpp}   |   81 +-
 ...test_BamFile.cpp => test_ReadAccuracyQuery.cpp} |   39 +-
 tests/src/test_ReadGroupInfo.cpp                   |   64 +-
 tests/src/test_SequenceUtils.cpp                   |   24 -
 .../{test_TimeUtils.cpp => test_StringUtils.cpp}   |   29 +-
 ...est_BamFile.cpp => test_SubreadLengthQuery.cpp} |   48 +-
 tests/src/test_Tags.cpp                            |  246 +-
 tests/src/test_TimeUtils.cpp                       |   16 +-
 .../src/test_VirtualPolymeraseCompositeReader.cpp  |  132 +
 tools/CMakeLists.txt                               |   18 +-
 tools/bam2sam/CMakeLists.txt                       |   32 +
 tools/bam2sam/src/Bam2Sam.cpp                      |  121 +
 .../pbbam/Strand.h => tools/bam2sam/src/Bam2Sam.h  |   24 +-
 .../bam2sam/src/Bam2SamVersion.h.in                |   19 +-
 .../bam2sam/src/Settings.h                         |   33 +-
 tools/{pbindex => bam2sam}/src/main.cpp            |   87 +-
 .../common/BamFileMerger.h                         |   65 +-
 tools/common/BamFileMerger.inl                     |  262 +
 tools/{pbindex/src => common}/OptionParser.cpp     |    0
 tools/{pbindex/src => common}/OptionParser.h       |    0
 tools/pbindex/CMakeLists.txt                       |   27 +-
 tools/pbindex/src/PbIndex.cpp                      |    2 +-
 tools/pbindex/src/main.cpp                         |    2 +-
 tools/pbindexdump/CMakeLists.txt                   |   35 +
 tools/pbindexdump/src/CppFormatter.cpp             |  177 +
 .../pbindexdump/src/CppFormatter.h                 |   25 +-
 .../pbindexdump/src/IFormatter.h                   |   36 +-
 tools/pbindexdump/src/JsonFormatter.cpp            |  195 +
 .../pbindexdump/src/JsonFormatter.h                |   48 +-
 .../src/PbIndexDump.cpp}                           |   49 +-
 .../pbindexdump/src/PbIndexDump.h                  |   24 +-
 .../pbindexdump/src/PbIndexDumpVersion.h.in        |   22 +-
 .../pbindexdump/src/Settings.h                     |   35 +-
 tools/pbindexdump/src/json.hpp                     | 7295 ++++++++++++++++++++
 tools/{pbindex => pbindexdump}/src/main.cpp        |   86 +-
 tools/pbmerge/CMakeLists.txt                       |   36 +
 .../pbmerge/src/PbMergeVersion.h.in                |   19 +-
 tools/pbmerge/src/main.cpp                         |  174 +
 562 files changed, 38529 insertions(+), 9895 deletions(-)

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..b1990e9
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,64 @@
+language: cpp
+compiler:
+  - gcc
+
+before_install:
+
+  # Travis's default installs of gcc, boost, & cmake currently lag behind the minimums we need.
+  # So we need to manually setup them up. 
+  #
+  #  - gcc 4.8 (current default on Travis is 4.7, which is no good for C++11 work)
+  #  - boost 1.55
+  #  - cmake 3.x
+  
+  # add external repos
+  - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test           # gcc
+  - sudo add-apt-repository -y ppa:boost-latest/ppa                  # boost
+  - sudo add-apt-repository -y ppa:george-edison55/precise-backports # cmake
+  
+  # remove existing cmake install
+  - sudo apt-get remove -qq cmake cmake-data
+  - sudo apt-get autoremove -qq
+  
+  # update apt 
+  - sudo apt-get update -y -qq
+
+  # install
+  - sudo apt-get install -y -qq g++-4.8 boost1.55 cmake-data cmake 
+  
+  # make sure we're using new gcc tools
+  - sudo update-alternatives --install /usr/bin/g++  g++  /usr/bin/g++-4.8  90
+  - sudo update-alternatives --install /usr/bin/gcc  gcc  /usr/bin/gcc-4.8  90 
+  - sudo update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-4.8 90
+
+  # prep zlib
+  - sudo apt-get install -y -qq zlib1g-dev
+
+  # prep htslib
+  - "cd .. && git clone https://github.com/PacificBiosciences/htslib.git && cd htslib && make && sudo make install; cd $TRAVIS_BUILD_DIR"
+
+  # prep GoogleTest 
+  - sudo apt-get install -y -qq libgtest-dev
+
+before_script:
+  # run cmake
+  - mkdir build 
+  - cd build
+  - cmake .. -DGTEST_SRC_DIR=/usr/src/gtest -DCMAKE_BUILD_TYPE=Debug
+    
+script:
+  # build & test
+  - make -j 3
+  - make test
+
+branches:
+  only:
+    - master
+    
+notifications:
+  recipients:
+    - dbarnett at pacb.com
+  email:
+    on_success: change
+    on_failure: always 
+   
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68703c1..bd2c228 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,36 +3,199 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/). 
 
-**NOTE:** The current series (0.y.z) is under initial development. Anything may change at any time. 
-The public API should not be considered stable yet. Once we lock down a version 1.0.0, this will 
-define a reference point & compatibility guarantees will be maintained within each major version 
-series.
-
+**NOTE:** The current series (0.y.z) is under initial development. Anything may
+change at any time. The public API should not be considered stable yet. Once we
+lock down a version 1.0.0, this will define a reference point & compatibility
+guarantees will be maintained within each major version series.
 
 ## Active
 
+## [0.5.0] - 2016-02-22
+
+### Added
+- Platform model tag added to read group as RG::PM
+- New scrap zmw type sz
+
+### Added
+- pbmerge accepts DataSetXML as input - using top-level resource BAMs as input,
+applying filters, and generating a merged BAM. Also added FOFN support, instead
+of listing out BAMs as command line args.
+- PbiLocalContextFilter to allow filtering on subread local context.
+- PbiBuilder: multithreading & zlib compression-level tuning for PBI output
+
+### Fixed
+- Fixed mishandling of relative BAM filenames in the filename constructor for
+DataSet (e.g. DataSet ds("../data.bam")).
+
+## [0.4.5] - 2016-01-14
+
+### Changed
+- PbiFilterQuery (and any other PBI-backed query, e.g. ZmwQuery ) now throws if
+PBI file(s) missing insted of returning empty result.
+- GenomicIntervalQuery now throws if BAI file(s) missing instead of returning
+empty result.
+- BamFile will throw if file is truncated (e.g. missing the EOF block). Disable
+by defining PBBAM_NO_CHECK_EOF .
+
+## [0.4.4] - 2016-01-07
+
+### Added
+- bam2sam command line utility. The primary benefit is removing the dependency
+on samtools during tests, but also provides users a functioning BAM -> SAM
+converter in the absence of samtools.
+- pbmerge command line utility. Allows merging N BAM files into one, optionally
+creating the PBI file alongside.
+- Added BamRecord::Pkmean2 & Pkmid2, 2D equivalent of Pkmean/Pkmid, for internal
+BAMs.
+
+### Removed 
+- samtools dependency
+
+## [0.4.3] - 2015-12-22
+
+### Added
+- Compile using ccache by default, if available. Can be manually disabled using
+-DPacBioBAM_use_ccache=OFF with cmake.
+- pbindexdump: command-line utility that converts PBI file data into human-
+readable formats. (JSON by default).
+
+### Changed
+- CMake option PacBioBAM_build_pbindex is being deprecated. Use
+PacBioBAM_build_tools instead.
+
+## [0.4.2] - 2015-12-22
+
+### Changed
+- BamFile::PacBioIndexExists & StandardIndexExists no longer check timestamps.
+Copying/moving files around can yield timestamps that are not helpful (no longer
+guaranteed that the .pbi will be "newer" than the .bam, even though no content
+changed). Added methods (e.g. bool BamFile::PacBioIndexIsNewer()) to do that
+lookup if needed, but it is no longer done automatically.
+
+## [0.4.1] - 2015-12-18
+
+### Added
+- BamRecord::HasNumPasses
+
+### Changed
+- VirtualPolymeraseBamRecord::VirtualRegionsTable(type) returns an empty vector
+of regions if none are associated with the requested type, instead of throwing.
+
+## [0.4.0] - 2015-12-15
+
+### Changed
+- Redesigned PbiFilter interface and backend. Previous implementation did not
+scale well as intermediate results were far too unwieldy. This redesign provides
+speedups of orders of magnitude in many cases.
+
+## [0.3.2] - 2015-12-10
+
+### Added 
+- Support for ReadGroupInfo sequencing chemistry data.
+InvalidSequencingChemistryException thrown if an unsupported combination is
+encountered.
+- VirtualPolymeraseCompositeReader - for re-stitching records, across multiple
+resources (e.g. from DataSetXML). Reader respects DataSet filter criteria.
+
+## [0.3.1] - 2015-10-30
+
+### Added
+- ZmwWhitelistVirtualReader: similar to VirtualPolymeraseReader but restricts
+iteration to a whitelist of ZMW hole numbers, leveraging PBI index data for
+random-access.
+
+### Fixed
+- Fixed error in PBI construction, in which entire file sections (e.g.
+BarcodeData or MappedData) where being dropped when any one record lacked data.
+Correct behavior is to allow file section ommission if all records lack that
+data type.
+
+## [0.3.0] - 2015-10-29
+
+### Fixed
+- Improper reporting of current offset from multi-threaded BamWriter. This had
+the effect of creating broken PBIs that were written alongside the BAM. Added a
+flush step, which incurs a performance hit, but restores correctness.
+
+## [0.2.4] - 2015-10-26
+
+### Fixed
+- Empty PbiFilter now returns all records, instead of filtering away all records.
+
+## [0.2.3] - 2015-10-26
+
+### Added/Fixed
+- Syncing DataSetXML across APIs. Primary changes include output of Version
+attribute ("3.0.1") on appropriate elements, as well as resolution of namespace
+issues.
+
+## [0.2.2] - 2015-10-22
+
+### Added
+- Added BAI bin calculation to BamWriter::Write, to ensure maximal compatibility
+with downstream tools (e.g. 'samtools index'). A new BinCalculationMode enum
+flag in BamWriter constructor cotnrols whether this behavior is enabled[default]
+or not.
+
+## [0.2.1] - 2015-10-19
+
+### Added
+- Exposed the following classes to public API:
+  - BamReader
+  - BaiIndexedBamReader
+  - PbiIndexedBamReader
+  - GenomicIntervalCompositeBamReader
+  - PbiFilterCompositeBamReader
+
+## [0.2.0] - 2015-10-09
+
+### Changed
+- BAM spec v3.0.1 compliance. Previous (betas) versions of the BAM spec are not
+supported and will causean exception to be throw if encountered.
+- PBI lookup interface & backend, see PbiIndex.h & PbiLookupData.h for details.
+
 ### Added 
-- BamFile::PacBioIndexExists() & BamFile::StandardIndexExists() - query the existence of index files 
-without auto-building them if they are missing, as in BamFile::Ensure*IndexExists().
-- GenomicInterval now accepts an htslib/samtools-style REGION string in the constructor: 
-GenomicInterval("chr1:1000-2000"). Please note though, that pbbam uses 0-based coordinates throughout, 
-whereas samtools expects 1-based. The above string is equivalent to "chr1:1001-2000" in samtools.
+- BamFile::PacBioIndexExists() & BamFile::StandardIndexExists() - query the
+existence of index files without auto-building them if they are missing, as in
+BamFile::Ensure*IndexExists().
+- GenomicInterval now accepts an htslib/samtools-style REGION string in the
+constructor: GenomicInterval("chr1:1000-2000"). Please note though, that pbbam
+uses 0-based coordinates throughout, whereas samtools expects 1-based. The above
+string is equivalent to "chr1:1001-2000" in samtools.
+- Built-in PBI filters. See PbiFlter.h & PbiFilterTypes.h for built-in filters
+and constructing composite filters. These can be used in conjunction with the
+new PbiFilterQuery, which takes a generic PbiFilter and applies that to a
+DataSet for iteration.
+- New built-in queries: BarcodeQuery, ReadAccuracyQuery, SubreadLengthQuery.
+These leverage the new filter API to construct a PbiFilter and apply to a
+DataSet.
+- Built-in BamRecord comparators that are STL-compatible. See Compare.h for full
+list. This allows for statements like the following, which sorts records by ZMW
+number:
+``` c++
+    vector<BamRecord> data;
+    std::sort(data.begin(), data.end(), Compare::Zmw());
+```
+- "exciseSoftClips" option to BamRecord::CigarData()
 
 ## [0.1.0] - 2015-07-17
 
 ### Changed
 - BAM spec v3.0b7 compliance
- - Removal of 'M' as allowed CIGAR operation. Attempt to use such a CIGAR op will throw an exception.
+ - Removal of 'M' as allowed CIGAR operation. Attempt to use such a CIGAR op
+ will throw an exception.
  - Addition of IPD/PulseWidth codec version info in header
   
 ### Added
 - Auto-generation of UTC timestamp for DataSet objects
-- PbiBuilder - allows generation of PBI index data alongside generation/modification of BAM record
-data. This obviates the need to wait for a completed BAM, then go through the zlib decompression, etc.
-- Added DataSet::FromXml(string xml) to create DataSets from "raw" XML string, rather than building up 
-using DataSet API or loading from existing file.
-- "pbindex" command line tool to generate ".pbi" files from BAM data. The executable is built by default, 
-but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
+- PbiBuilder - allows generation of PBI index data alongside generation or
+modification of BAM record data. This obviates the need to wait for a completed
+BAM, then go through the zlib decompression, etc.
+- Added DataSet::FromXml(string xml) to create DataSets from "raw" XML string,
+rather than building up using DataSet API or loading from existing file.
+- "pbindex" command line tool to generate ".pbi" files from BAM data. The
+executable is built by default, but can be disabled using the cmake option
+"-DPacBioBAM_build_pbindex=OFF".
   
 ### Fixed
 - PBI construction failing on CCS reads
@@ -45,23 +208,28 @@ but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
 ## [0.0.7] - 2015-07-02
 
 ### Added
-- PBI index lookup API. Not so much intended for client use directly, but will enable construction of
-  higher-level semantic queries: grouping by, filtering, etc.
-- DataSet & PBI-aware queries (e.g. ZmwGroupQuery). More PBI-enabled queries to follow.
-- More flexibility in tag access. Samtools has a habit of performing a "shrink-to-fit" when it handles
-  integer-valued tag data. Thus we cannot **guarantee** the binary type that our API will have to process.
-  Safe conversions are allowed on integer-like data only. Under- or overflows in casting will trigger an 
-  exception. All other tag data types must be asked for explicitly, or else an exception will be raised, 
-  as before.
-- BamHeader::DeepCopy - allows creation of editable header data, without overwriting all shared instances
+- PBI index lookup API. Not so much intended for client use directly, but will
+enable construction of higher-level semantic queries: grouping by, filtering,
+etc.
+- DataSet & PBI-aware queries (e.g. ZmwGroupQuery). More PBI-enabled queries to
+follow.
+- More flexibility in tag access. Samtools has a habit of performing a
+"shrink-to-fit" when it handles integer-valued tag data. Thus we cannot
+**guarantee** the binary type that our API will have to process. Safe
+conversions are allowed on integer-like data only. Under- or overflows in
+casting will trigger an exception. All other tag data types must be asked for
+explicitly, or else an exception will be raised, as before.
+- BamHeader::DeepCopy - allows creation of editable header data, without
+overwriting all shared instances
 
 ### Fixed
 - XSD compliance for DataSet APIs.
 
 ### Changed
-- The functionality provided by ZmwQuery (group by hole number), is now available using the ZmwGroupQuery
-  object. The new ZmwQuery returns a single-record iterator (a la EntireFileQuery), but limited to a whitelist 
-  of requested hole numbers.
+- The functionality provided by ZmwQuery (group by hole number), is now
+available using the ZmwGroupQuery object. The new ZmwQuery returns a single-
+record iterator (a la EntireFileQuery), but limited to a whitelist of requested
+hole numbers.
 
 ### Removed
 - XSD non-compliant classes (e.g. ExternalDataReference)
@@ -98,19 +266,22 @@ but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
 
 ### Added
 
-- DataSet support. This includes XML I/O, basic dataset query/manipulation, and multi-BAM-file 
-  queries. New classes are located in <pbbam/dataset/>. DataSet-capable queries currently reside in the 
-  PacBio::BAM::staging namespace. These will be ported over to the main namespace once the support is 
-  stabilized and works seamlessly with either a single BamFile or DataSet object as input. (bug 25941)
-- PBI support. This includes read/write raw data & building from a BamFile. The lookup API for 
-  random-access queries is under development, but the raw data is available - for creating PBI files & 
-  generating summary statistics. (bug 26025)
+- DataSet support. This includes XML I/O, basic dataset query/manipulation, and
+multi-BAM-file queries. New classes are located in <pbbam/dataset/>. DataSet-
+capable queries currently reside in the PacBio::BAM::staging namespace. These
+will be ported over to the main namespace once the support is stabilized and
+works seamlessly with either a single BamFile or DataSet object as input. (bug
+25941)
+- PBI support. This includes read/write raw data & building from a BamFile. The
+lookup API for random-access queries is under development, but the raw data is
+available - for creating PBI files & generating summary statistics. (bug 26025)
 - C# SWIG bindings, alongside existing Python and R wrappers.
 - LocalContextFlags support in BamRecord (bug 26623)
 
 ### Fixed
 
-- BamRecord[Impl] map quality now  initialized with 255 (missing) value, instead of 0. (bug 26228)
+- BamRecord[Impl] map quality now  initialized with 255 (missing) value, instead
+of 0. (bug 26228)
 - ReadGroupId calculation. (bug 25940)
   
 ## [0.0.4] - 2015-04-22
@@ -124,7 +295,8 @@ but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
 ### Changed
 
 - Now using exceptions instead of return codes, output parameters, etc.
-- Removed "messy" shared_ptrs across interface (see especially BamHeader). These are now taken care of within the API, not exposed to client code.
+- Removed "messy" shared_ptrs across interface (see especially BamHeader). These
+are now taken care of within the API, not exposed to client code.
 
 ### Removed
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7a646d..4908a52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,25 +3,58 @@
 ########################################################################
 
 cmake_policy(SET CMP0048 NEW)  # lets us set version in project()
-project(PacBioBAM VERSION 0.1.0 LANGUAGES CXX C)
+project(PacBioBAM VERSION 0.5.0 LANGUAGES CXX C)
 cmake_minimum_required(VERSION 3.0)
 
-# project version
+# project name & version
+set(PacBioBAM_NAME pbbam)
 set(PacBioBAM_VERSION
   "${PacBioBAM_VERSION_MAJOR}.${PacBioBAM_VERSION_MINOR}.${PacBioBAM_VERSION_PATCH}"
 )
 
 # list build-time options
-option(PacBioBAM_build_docs    "Build PacBioBAM's API documentation."           ON)
-option(PacBioBAM_build_tests   "Build PacBioBAM's unit tests."                  ON)
-option(PacBioBAM_build_shared  "Build PacBioBAM as shared library as well."     OFF)
-option(PacBioBAM_build_pbindex "Build pbindex tool."                            ON)
-option(PacBioBAM_wrap_csharp   "Build PacBioBAM with SWIG bindings for C#."     OFF)
-option(PacBioBAM_wrap_python   "Build PacBioBAM with SWIG bindings for Python." OFF)
-option(PacBioBAM_wrap_r        "Build PacBioBAM with SWIG bindings for R."      OFF)
-option(PacBioBAM_use_modbuild  "Build PacBioBAM using Modular Build System."    OFF)
+option(PacBioBAM_build_docs    "Build PacBioBAM's API documentation."                   ON)
+option(PacBioBAM_build_tests   "Build PacBioBAM's unit tests."                          ON)
+option(PacBioBAM_build_shared  "Build PacBioBAM as shared library as well."             OFF)
+option(PacBioBAM_build_tools   "Build PacBioBAM command line utilities (e.g. pbindex)"  ON)
+option(PacBioBAM_wrap_csharp   "Build PacBioBAM with SWIG bindings for C#."             OFF)
+option(PacBioBAM_wrap_python   "Build PacBioBAM with SWIG bindings for Python."         OFF)
+option(PacBioBAM_wrap_r        "Build PacBioBAM with SWIG bindings for R."              OFF)
+option(PacBioBAM_use_modbuild  "Build PacBioBAM using Modular Build System."            OFF)
+option(PacBioBAM_use_ccache    "Build PacBioBAM using ccache, if available."            ON)
+
+# enable ccache, if available 
+if(PacBioBAM_use_ccache)
+    find_program(CCACHE_FOUND ccache)
+    if(CCACHE_FOUND)
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK    ccache)
+    endif()
+endif()
+
+# Deprecating the "PacBioBAM_build_pbindex" command line option in favor of more
+# general "PacBioBAM_build_tools", as we're starting to add new utilities.
+#
+# That said, I don't want to break current auto tests/builds, so I'm providing a
+# warning message so devs are aware.
+#
+if(DEFINED PacBioBAM_build_pbindex)
+
+    # construct warning message
+    set(pbindex_warning "\nDeprecated:\n-DPacBioBAM_build_pbindex\n")
+    if (PacBioBAM_build_pbindex)
+        set(pbindex_warning "${pbindex_warning} Building as requested,")
+    else()
+        set(pbindex_warning "${pbindex_warning} Skipping as requested,")
+    endif()
+    set(pbindex_warning "${pbindex_warning} but support for this option will be removed at some point in the future.\n")
+    message(AUTHOR_WARNING "${pbindex_warning} ** Use -DPacBioBAM_build_tools instead. **\n")
+
+    # force PacBioBAM_build_tools option
+    set(PacBioBAM_build_tools ${PacBioBAM_build_pbindex} CACHE BOOL
+        "Build PacBioBAM with add'l utilities (e.g. pbindex, pbindexdump)." FORCE)
+endif()
 
-# --check build-time options --
 
 # enable testing if requested
 if(PacBioBAM_build_tests)
@@ -54,6 +87,7 @@ set(PacBioBAM_IncludeDir    ${PacBioBAM_RootDir}/include)
 set(PacBioBAM_SourceDir     ${PacBioBAM_RootDir}/src)
 set(PacBioBAM_SwigSourceDir ${PacBioBAM_RootDir}/src/swig)
 set(PacBioBAM_TestsDir      ${PacBioBAM_RootDir}/tests)
+set(PacBioBAM_ToolsDir      ${PacBioBAM_RootDir}/tools)
 
 if(NOT PacBioBAM_OutputDir)
     set(PacBioBAM_OutputDir ${PacBioBAM_RootDir})
@@ -117,16 +151,36 @@ if(APPLE)
     set(CMAKE_MACOSX_RPATH OFF)
 endif()
 
+# Turn on windows-style filepath resolution.
+# We need to add this #define early (not just in the C# SWIG wrapper)
+if(WIN32 AND PacBioBAM_wrap_csharp)
+    add_definitions(-DPBBAM_WIN_FILEPATHS)
+endif()
+
 # keep this order (src first, at least)
 add_subdirectory(src)
-add_subdirectory(tools)
+
+if(PacBioBAM_build_tools)
+    add_subdirectory(tools)
+endif()
+
 if(PacBioBAM_build_docs)
     add_subdirectory(docs)
 endif()
+
 if(PacBioBAM_build_tests)
+
     if (NOT GTEST_SRC_DIR)
-        set(GTEST_SRC_DIR ../gtest)
+        set(PREBUILT_GTEST_SRC ${PacBioBAM_RootDir}/../../../../prebuilt.tmpout/gtest/gtest_1.7.0/)
+        if(EXISTS ${PREBUILT_GTEST_SRC})
+            set(GTEST_SRC_DIR ${PREBUILT_GTEST_SRC})
+        else()
+            set(GTEST_SRC_DIR ../gtest) # keep old fallback behavior for external builds, for now at least
+        endif()
     endif()
+
     add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
     add_subdirectory(tests)
+
 endif()
+
diff --git a/INSTALL.md b/INSTALL.md
index 1d72d77..86dddda 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,114 +1,3 @@
-# PacBio::BAM - building & integration
+# PacBio::BAM - building & integrating
 
-- [Dependencies](#dependencies)
-- [Build](#build)
-- [Test](#test)
-- [Integration](#integration)
-    - [CMake](#cmake)
-	- [Other](#other)
-- [SWIG](#swig)
-    - [Python](#python)
-	- [R](#r)
-	- [CSharp](#csharp)
-
-## Dependencies
-  - CMake v2.8+
-  - Boost 1.54+
-  - zlib
-  - samtools exe (*)
-
-(*) NOTE: ppbam uses samtools for some of its tests, for now at least. The current
-build system points uses a relative path to one of the "prebuilt" samtools binaries.
-If you have checked out pbbam to any path that is NOT:
-
-    ///depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/pbbam
-
-then please edit the Samtools_Dir variable in pbbam/tests/CMakeLists.txt to a place
-that works for your setup. That could just be as simple as "" if you already have
-samtools somewhere in your PATH.
-
-## Build
-
-To perform a simple build of the library (and its tests):
-
-    $ cd <pbbam_root>
-    $ mkdir build
-    $ cd build
-    $ cmake ..
-    $ make
-
-## Test
-    
-There are 2 options for testing the library. 
-
-1) Run the test executable directly:
-
-    $ <pbbam_root>/tests/bin/test_pbbam 
-
-which displays the GoogleTest-formatted results for the 250+ individual tests. This
-provides fine-grained info on any failed test.
-
-2) The other option is to use CMake/CTest-generated 'make' command:
-
-    $ cd <pbbam_root>/build
-    $ make test
-
-which collapses all of the test output into a single, CTest-formatted pass/fail display.
-
-## Integration
-
-### CMake
-
-If you are using CMake for your library or application, you can use the following steps
-to automate both the building of pbbam and its dependencies (if necessary) and importing
-the proper include paths, library paths, etc. If the pbbam library already exists, then
-the header/lib variables are simply imported.
-
-    # just for convenience
-    set(PacBioBAM_RootDir </anywhere/on/disk/path/to/pbbam>)  
-
-    # add_subdirectory() sounds a bit misleading, the path can be *anywhere* on disk. 
-    # the 2nd arg tells CMake where it should build pbbam if necessary
-    add_subdirectory(${PacBioBAM_RootDir} ${PacBioBAM_RootDir}/build) 
-  
-    # setup your client 
-    add_executable(foo ....)
-
-    # PacBioBAM_INCLUDE_DIRS provides all pbbam headers, as well as dependencies
-    include_directories( .... ${PacBioBAM_INCLUDE_DIRS} )
-
-    # PacBioBAM_LIBRARIES provides libpbbam.a, as well as dependencies
-    target_link_libraries( foo ..... ${PacBioBAM_LIBRARIES} )
-
-### Other
-
-The following instructions apply to all non-CMake-based builds. In addition to Boost headers & zlib, the relevant include paths for pbbam are:
-
-    <pbbam_root>/include
-    <pbbam_root>/third-party/htslib
-
-which allows these statements:
-
-    #include <pbbam/BamRecord.h>
-    #include <htslib/sam.h>       
-
-and so on in your code. And the relevant libraries to link to are:
-
-    <pbbam_root>/lib/libpbbam.a
-    <pbbam_root>/third-party/htslib/libhts.a
-
-## SWIG
-
-TODO: fill this in
-
-### Python
-
-TODO: fill this in
-
-### R
-
-TODO: fill this in
-
-### CSharp
-
-TODO: fill this in
+Detailed build instructions can be found [here](http://pbbam.readthedocs.org/en/latest/getting_started.html).
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..77e9557
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,34 @@
+Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted (subject to the limitations in the
+disclaimer below) provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+ * Neither the name of Pacific Biosciences nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..046296e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,29 @@
+# pbbam
+
+[![Build Status](https://travis-ci.org/PacificBiosciences/pbbam.svg?branch=master)](https://travis-ci.org/PacificBiosciences/pbbam) [![Documentation Status](https://readthedocs.org/projects/pbbam/badge/?version=latest)](http://pbbam.readthedocs.org/en/latest/?badge=latest)
+ 
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard BAM
+format for (both aligned and unaligned) basecall data files. We have also formulated
+a BAM companion file format (bam.pbi) enabling fast access to a richer set of per-read
+information as well as compatibility for software built around the legacy cmp.h5 format.
+ 
+The **pbbam** software package provides components to create, query, & edit PacBio BAM
+files and associated indices. These components include a core C++ library, bindings for
+additional languages, and command-line utilities.
+
+### Note:
+
+This library is **not** intended to be used as a general-purpose BAM utility - all input & output BAMs must adhere to the [PacBio BAM format specification](https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst). Non-PacBio BAMs will cause exceptions to be thrown.
+ 
+##  Documentation
+
+  - [Documentation Home](http://pbbam.readthedocs.org/en/latest/index.html)
+    - [Getting Started](http://pbbam.readthedocs.org/en/latest/getting_started.html)
+    - [C++ API Reference](http://pbbam.readthedocs.org/en/latest/api_reference.html)
+
+  - [Changelog](https://github.com/PacificBiosciences/pbbam/blob/master/CHANGELOG.md)
+
+## License
+
+ - [PacBio open source license](https://github.com/PacificBiosciences/pbbam/blob/master/LICENSE.txt)
+
diff --git a/cmake/FindCSharp.cmake b/cmake/FindCSharp.cmake
new file mode 100644
index 0000000..08d09a7
--- /dev/null
+++ b/cmake/FindCSharp.cmake
@@ -0,0 +1,72 @@
+#
+# A CMake Module for finding and using C# (.NET and Mono).
+#
+# The following variables are set:
+#   CSHARP_FOUND - set to ON if C# is found
+#   CSHARP_USE_FILE - the path to the C# use file
+#   CSHARP_TYPE - the type of the C# compiler (eg. ".NET" or "Mono")
+#   CSHARP_VERSION - the version of the C# compiler (eg. "v4.0" or "2.10.2")
+#   CSHARP_COMPILER - the path to the C# compiler executable (eg. "C:/Windows/Microsoft.NET/Framework/v4.0.30319/csc.exe" or "/usr/bin/gmcs")
+#   CSHARP_INTERPRETER - the path to interpreter needed to run CSharp executables
+#   CSHARP_PLATFORM - the C# target platform
+#   CSHARP_SDK - the SDK commandline switch (empty for .NET, for Mono eg. "/sdk:2" or "/sdk:4")
+#
+# This file is based on the work of GDCM:
+#   http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindCSharp.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+# TODO: ADD ABILITY TO SELECT WHICH C# COMPILER eg. .NET or Mono (if both exist). For the moment, .NET is selected above Mono.
+
+# Make sure find package macros are included
+include( FindPackageHandleStandardArgs )
+
+unset( CSHARP_COMPILER CACHE )
+unset( CSHARP_INTERPRETER CACHE )
+unset( CSHARP_TYPE CACHE )
+unset( CSHARP_VERSION CACHE )
+unset( CSHARP_FOUND CACHE )
+
+# By default use anycpu platform, allow the user to override
+set( CSHARP_PLATFORM "anycpu" CACHE STRING "C# target platform: x86, x64, anycpu, or itanium" )
+if( NOT ${CSHARP_PLATFORM} MATCHES "x86|x64|anycpu|itanium" )
+  message( FATAL_ERROR "The C# target platform '${CSHARP_PLATFORM}' is not valid. Please enter one of the following: x86, x64, anycpu, or itanium" )
+endif( )
+
+if( WIN32 )
+  find_package( DotNetFrameworkSdk )
+  if( NOT CSHARP_DOTNET_FOUND )
+    find_package( Mono )
+  endif( )
+else( UNIX )
+  find_package( Mono )
+endif( )
+
+if( CSHARP_DOTNET_FOUND )
+  set( CSHARP_TYPE ".NET" CACHE STRING "Using the .NET compiler" )
+  set( CSHARP_VERSION ${CSHARP_DOTNET_VERSION} CACHE STRING "C# .NET compiler version" FORCE )
+  set( CSHARP_COMPILER ${CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION}} CACHE STRING "Full path to .NET compiler" FORCE )
+  set( CSHARP_INTERPRETER "" CACHE INTERNAL "Interpretor not required for .NET" FORCE )
+elseif( CSHARP_MONO_FOUND )
+  set( CSHARP_TYPE "Mono" CACHE STRING "Using the Mono compiler" )
+  set( CSHARP_VERSION ${CSHARP_MONO_VERSION} CACHE STRING "C# Mono compiler version" FORCE )
+  set( CSHARP_COMPILER ${CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION}} CACHE STRING "Full path to Mono compiler" FORCE )
+  set( CSHARP_INTERPRETER ${CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION}} CACHE STRING "Full path to Mono interpretor" FORCE )
+  set( CSHARP_SDK "/sdk:4.5" CACHE STRING "C# Mono SDK commandline switch (e.g. /sdk:2, /sdk:4, /sdk:5)" )
+endif( )
+
+# Handle WIN32 specific issues
+if ( WIN32 )
+  if ( CSHARP_COMPILER MATCHES "bat" )
+    set( CSHARP_COMPILER "call ${CSHARP_COMPILER}" )
+  endif ( )
+endif( )
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(CSharp DEFAULT_MSG CSHARP_TYPE CSHARP_VERSION CSHARP_COMPILER)
+
+mark_as_advanced( CSHARP_TYPE CSHARP_VERSION CSHARP_COMPILER CSHARP_INTERPRETER CSHARP_PLATFORM CSHARP_SDK )
+
+# Set the USE_FILE path
+# http://public.kitware.com/Bug/view.php?id=7757
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( CSHARP_USE_FILE ${current_list_path}/UseCSharp.cmake )
diff --git a/cmake/FindDotNetFrameworkSdk.cmake b/cmake/FindDotNetFrameworkSdk.cmake
new file mode 100644
index 0000000..8e12c70
--- /dev/null
+++ b/cmake/FindDotNetFrameworkSdk.cmake
@@ -0,0 +1,29 @@
+# Set paths and vars for .NET compilers 
+# This is hand-rolled because I had problems with the one from SimpleITK
+
+#
+# The following variables are set:
+#   CSHARP_DOTNET_FOUND
+#   CSHARP_DOTNET_COMPILER_${version} eg. "CSHARP_DOTNET_COMPILER_v4.0.30319"
+#   CSHARP_DOTNET_VERSION eg. "v4.0.30319"
+#   CSHARP_DOTNET_VERSIONS eg. "v2.0.50727, v3.5, v4.0.30319"
+#   DotNetFrameworkSdk_USE_FILE
+#
+#   CSHARP_PROJECT_BUILDER (xbuild/msbuild)
+
+set(framework_dir "C:/Windows/Microsoft.NET/Framework")
+
+set(CSHARP_DOTNET_VERSION "v4.0.30319")
+set(CSHARP_DOTNET_VERSIONS "")
+set(CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION} "${framework_dir}/${CSHARP_DOTNET_VERSION}/csc.exe")
+set(CSHARP_PROJECT_BUILDER "${framework_dir}/${CSHARP_DOTNET_VERSION}/MSBuild.exe")
+
+if(EXISTS ${CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION}})
+	set(CSHARP_DOTNET_FOUND 1)
+else()
+	set(CSHARP_DOTNET_FOUND 0)
+endif()
+
+# Set USE_FILE
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( DotNetFrameworkSdk_USE_FILE ${current_list_path}/UseDotNetFrameworkSdk.cmake )
\ No newline at end of file
diff --git a/cmake/FindMono.cmake b/cmake/FindMono.cmake
new file mode 100644
index 0000000..0fab116
--- /dev/null
+++ b/cmake/FindMono.cmake
@@ -0,0 +1,167 @@
+#
+# A CMake Module for finding Mono.
+#
+# The following variables are set:
+#   CSHARP_MONO_FOUND
+#   CSHARP_MONO_COMPILER_${version} eg. "CSHARP_MONO_COMPILER_2.10.2"
+#   CSHARP_MONO_INTERPRETOR_${version} eg. "CSHARP_MONO_INTERPRETOR_2.10.2"
+#   CSHARP_MONO_VERSION eg. "2.10.2"
+#   CSHARP_MONO_VERSIONS eg. "2.10.2, 2.6.7"
+#
+# Additional references can be found here:
+#   http://www.mono-project.com/Main_Page
+#   http://www.mono-project.com/CSharp_Compiler
+#   http://mono-project.com/FAQ:_Technical (How can I tell where the Mono runtime is installed)
+#
+# This file is based on the work of GDCM:
+#   http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+set( csharp_mono_valid 1 )
+if( DEFINED CSHARP_MONO_FOUND )
+  # The Mono compiler has already been found
+  # It may have been reset by the user, verify it is correct
+  if( NOT DEFINED CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+    set( csharp_mono_version_user ${CSHARP_MONO_VERSION} )
+    set( csharp_mono_valid 0 )
+    set( CSHARP_MONO_FOUND 0 )
+    set( CSHARP_MONO_VERSION "CSHARP_MONO_VERSION-NOTVALID" CACHE STRING "C# Mono compiler version, choices: ${CSHARP_MONO_VERSIONS}" FORCE )
+    message( FATAL_ERROR "The C# Mono version '${csharp_mono_version_user}' is not valid. Please enter one of the following: ${CSHARP_MONO_VERSIONS}" )
+  endif( NOT DEFINED CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+endif( DEFINED CSHARP_MONO_FOUND )
+
+unset( CSHARP_MONO_VERSIONS CACHE ) # Clear versions
+if( WIN32 )
+  # Search for Mono on Win32 systems
+  # See http://mono-project.com/OldReleases and http://www.go-mono.com/mono-downloads/download.html
+  set( csharp_mono_bin_dirs )
+  set( csharp_mono_search_hints
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.11.2;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.9;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.8;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.7;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.6;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.5;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.4;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.3;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.2;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.1;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.8;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.7;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.4;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.3;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.1;SdkInstallRoot]/bin"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6;SdkInstallRoot]/bin"
+  )
+  foreach( csharp_mono_search_hint ${csharp_mono_search_hints} )
+    get_filename_component( csharp_mono_bin_dir "${csharp_mono_search_hint}" ABSOLUTE )
+    if ( EXISTS "${csharp_mono_bin_dir}" )
+      set( csharp_mono_bin_dirs ${csharp_mono_bin_dirs} ${csharp_mono_bin_dir} )
+    endif ( EXISTS "${csharp_mono_bin_dir}" )
+  endforeach( csharp_mono_search_hint )
+  # TODO: Use HKLM_LOCAL_MACHINE\Software\Novell\Mono\DefaultCLR to specify default version
+  # get_filename_component( test "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono;DefaultCLR]" NAME )
+
+  foreach ( csharp_mono_bin_dir ${csharp_mono_bin_dirs} )
+    string( REPLACE "\\" "/" csharp_mono_bin_dir ${csharp_mono_bin_dir} )
+    if (EXISTS "${csharp_mono_bin_dir}/dmcs.bat")
+      set( csharp_mono_executable "${csharp_mono_bin_dir}/dmcs.bat")
+    elseif (EXISTS "${csharp_mono_bin_dir}/gmcs.bat")
+      set( csharp_mono_executable "${csharp_mono_bin_dir}/gmcs.bat")
+    elseif (EXISTS "${csharp_mono_bin_dir}/mcs.bat")
+      set( csharp_mono_executable "${csharp_mono_bin_dir}/mcs.bat")
+    endif (EXISTS "${csharp_mono_bin_dir}/dmcs.bat")
+
+    if( csharp_mono_valid )
+      # Extract version number (eg. 2.10.2)
+      string(REGEX MATCH "([0-9]*)([.])([0-9]*)([.]*)([0-9]*)" csharp_mono_version_temp ${csharp_mono_bin_dir})
+      set( CSHARP_MONO_VERSION ${csharp_mono_version_temp} CACHE STRING "C# Mono compiler version" )
+      mark_as_advanced( CSHARP_MONO_VERSION )
+
+      # Add variable holding executable
+      set( CSHARP_MONO_COMPILER_${csharp_mono_version_temp} ${csharp_mono_executable} CACHE STRING "C# Mono compiler ${csharp_mono_version_temp}" FORCE )
+      mark_as_advanced( CSHARP_MONO_COMPILER_${csharp_mono_version_temp} )
+
+      # Set interpreter
+      if (EXISTS "${csharp_mono_bin_dir}/mono.exe")
+        set( CSHARP_MONO_INTERPRETER_${csharp_mono_version_temp} "${csharp_mono_bin_dir}/mono.exe" CACHE STRING "C# Mono interpreter ${csharp_mono_version_temp}" FORCE )
+        mark_as_advanced( CSHARP_MONO_INTERPRETER_${csharp_mono_version_temp} )
+      endif (EXISTS "${csharp_mono_bin_dir}/mono.exe")
+    endif( csharp_mono_valid )
+
+    # Create a list of supported compiler versions
+    if( NOT DEFINED CSHARP_MONO_VERSIONS )
+      set( CSHARP_MONO_VERSIONS "${csharp_mono_version_temp}" CACHE STRING "Available C# Mono compiler versions" FORCE )
+    else( NOT DEFINED CSHARP_MONO_VERSIONS )
+      set( CSHARP_MONO_VERSIONS "${CSHARP_MONO_VERSIONS}, ${csharp_mono_version_temp}"  CACHE STRING "Available C# Mono versions" FORCE )
+    endif( NOT DEFINED CSHARP_MONO_VERSIONS )
+    mark_as_advanced( CSHARP_MONO_VERSIONS )
+
+    # We found at least one Mono compiler version
+    set( CSHARP_MONO_FOUND 1 CACHE INTERNAL "Boolean indicating if C# Mono was found" )
+  endforeach( csharp_mono_bin_dir )
+
+else( UNIX )
+  # Search for Mono on non-Win32 systems
+  set( chsarp_mono_names "mcs" "mcs.exe" "dmcs" "dmcs.exe" "smcs" "smcs.exe" "gmcs" "gmcs.exe" )
+  set(
+    csharp_mono_paths
+    "/usr/bin/"
+    "/usr/local/bin/"
+    "/usr/lib/mono/2.0"
+    "/opt/novell/mono/bin"
+  )
+  find_program(
+    csharp_mono_compiler # variable is added to the cache, we removed it below
+    NAMES ${chsarp_mono_names}
+    PATHS ${csharp_mono_paths}
+  )
+
+  if( EXISTS ${csharp_mono_compiler} )
+    # Determine version
+    find_program(
+      csharp_mono_interpreter # variable is added to the cache, we removed it below
+      NAMES mono
+      PATHS ${csharp_mono_paths}
+    )
+    if ( EXISTS ${csharp_mono_interpreter} )
+      execute_process(
+        COMMAND ${csharp_mono_interpreter} -V
+        OUTPUT_VARIABLE csharp_mono_version_string
+      )
+      string( REGEX MATCH "([0-9]*)([.])([0-9]*)([.]*)([0-9]*)" csharp_mono_version_temp ${csharp_mono_version_string} )
+      set( CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION} ${csharp_mono_interpreter} CACHE STRING "C# Mono interpreter ${csharp_mono_version_temp}" FORCE )
+      mark_as_advanced( CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION} )
+    endif ( EXISTS ${csharp_mono_interpreter} )
+    unset( csharp_mono_interpreter CACHE )
+
+    # We found Mono compiler
+    set( CSHARP_MONO_VERSION ${csharp_mono_version_temp} CACHE STRING "C# Mono compiler version" )
+    mark_as_advanced( CSHARP_MONO_VERSION )
+    set( CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} ${csharp_mono_compiler} CACHE STRING "C# Mono compiler ${CSHARP_MONO_VERSION}" FORCE )
+    mark_as_advanced( CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+    set( CSHARP_MONO_VERSIONS ${CSHARP_MONO_VERSION} CACHE STRING "Available C# Mono compiler versions" FORCE )
+    mark_as_advanced( CSHARP_MONO_VERSIONS )
+    set( CSHARP_MONO_FOUND 1 CACHE INTERNAL "Boolean indicating if C# Mono was found" )
+
+    # Assume xbuild is just xbuild.
+    set(CSHARP_PROJECT_BUILDER "xbuild")
+
+
+  endif( EXISTS ${csharp_mono_compiler} )
+
+  # Remove temp variable from cache
+  unset( csharp_mono_compiler CACHE )
+
+endif( WIN32 )
+
+if( CSHARP_MONO_FOUND )
+  # Report the found versions
+  message( STATUS "Found the following C# Mono versions: ${CSHARP_MONO_VERSIONS}" )
+endif( CSHARP_MONO_FOUND )
+
+# Set USE_FILE
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( Mono_USE_FILE ${current_list_path}/UseMono.cmake )
diff --git a/cmake/FindR.cmake b/cmake/FindR.cmake
new file mode 100644
index 0000000..6ae4354
--- /dev/null
+++ b/cmake/FindR.cmake
@@ -0,0 +1,48 @@
+
+#
+# - This module locates an installed R distribution.
+#
+# Defines the following:
+#
+# R_INCLUDE_DIR      - Path to R include directory
+# R_LIBRARIES        - Path to R library
+# R_LIBRARY_BASE     -
+# R_COMMAND          - Path to R command
+# RSCRIPT_EXECUTABLE - Path to Rscript command
+#
+
+
+# Make sure find package macros are included
+include( FindPackageHandleStandardArgs )
+
+set(TEMP_CMAKE_FIND_APPBUNDLE ${CMAKE_FIND_APPBUNDLE})
+set(CMAKE_FIND_APPBUNDLE "NEVER")
+find_program(R_COMMAND R DOC "R executable.")
+if(R_COMMAND)
+  execute_process(WORKING_DIRECTORY . COMMAND ${R_COMMAND} RHOME OUTPUT_VARIABLE R_BASE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
+  set(R_HOME ${R_BASE_DIR} CACHE PATH "R home directory obtained from R RHOME")
+  mark_as_advanced(R_HOME)
+endif(R_COMMAND)
+
+find_program(RSCRIPT_EXECUTABLE Rscript DOC "Rscript executable.")
+
+set(CMAKE_FIND_APPBUNDLE ${TEMP_CMAKE_FIND_APPBUNDLE})
+
+# R.h gets installed in all sorts of places -
+# ubuntu: /usr/share/R/include, RHEL/Fedora: /usr/include/R/R.h
+find_path(R_INCLUDE_DIR R.h PATHS ${R_INCLUDE_DIR_HINT} /usr/local/lib /usr/local/lib64 /usr/share /usr/include ${R_BASE_DIR} PATH_SUFFIXES include R R/include DOC "Path to file R.h")
+find_library(R_LIBRARY_BASE R PATHS ${R_BASE_DIR} PATH_SUFFIXES /lib DOC "R library (example libR.a, libR.dylib, etc.).")
+
+set(R_LIBRARIES ${R_LIBRARY_BASE})
+mark_as_advanced(RSCRIPT_EXECUTABLE R_LIBRARIES R_INCLUDE_DIR R_COMMAND R_LIBRARY_BASE)
+
+
+set( _REQUIRED_R_VARIABLES R_INCLUDE_DIR R_COMMAND )
+
+if( APPLE )
+  # On linux platform some times the libR.so is not available, however
+  # on apple a link error results if the library is linked.
+  list(  APPEND _REQUIRED_R_VARIABLES R_LIBRARIES R_LIBRARY_BASE )
+endif()
+
+find_package_handle_standard_args(R DEFAULT_MSG ${_REQUIRED_R_VARIABLES} )
diff --git a/cmake/PbbamTool.cmake b/cmake/PbbamTool.cmake
new file mode 100644
index 0000000..a1411a7
--- /dev/null
+++ b/cmake/PbbamTool.cmake
@@ -0,0 +1,23 @@
+include(CMakeParseArguments)
+
+function(create_pbbam_tool)
+
+    # parse args
+    set(oneValueArgs TARGET)
+    set(multiValueArgs SOURCES)
+    cmake_parse_arguments(create_pbbam_tool "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    # create executable
+    include_directories(
+        ${ToolsCommonDir}              # shared tool code
+        ${CMAKE_CURRENT_BINARY_DIR}    # generated version headers
+        ${PacBioBAM_INCLUDE_DIRS}      # pbbam/htslib includes
+    )
+    add_executable(${create_pbbam_tool_TARGET} ${create_pbbam_tool_SOURCES})
+    set_target_properties(
+        ${create_pbbam_tool_TARGET} PROPERTIES
+        RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+    )
+    target_link_libraries(${create_pbbam_tool_TARGET} pbbam)
+
+endfunction(create_pbbam_tool)
diff --git a/cmake/UseCSharp.cmake b/cmake/UseCSharp.cmake
new file mode 100644
index 0000000..dac4537
--- /dev/null
+++ b/cmake/UseCSharp.cmake
@@ -0,0 +1,111 @@
+# CMake Module for finding and using C# (.NET and Mono).
+#
+# The following global variables are assumed to exist:
+#   CSHARP_SOURCE_DIRECTORY - path to C# sources
+#   CSHARP_BINARY_DIRECTORY - path to place resultant C# binary files
+#
+# The following variables are set:
+#   CSHARP_TYPE - the type of the C# compiler (eg. ".NET" or "Mono")
+#   CSHARP_COMPILER - the path to the C# compiler executable (eg. "C:/Windows/Microsoft.NET/Framework/v4.0.30319/csc.exe")
+#   CSHARP_VERSION - the version number of the C# compiler (eg. "v4.0.30319")
+#
+# The following macros are defined:
+#   CSHARP_ADD_EXECUTABLE( name references [files] [output_dir] ) - Define C# executable with the given name
+#   CSHARP_ADD_LIBRARY( name references [files] [output_dir] ) - Define C# library with the given name
+#
+# Examples:
+#   CSHARP_ADD_EXECUTABLE( MyExecutable "" "Program.cs" )
+#   CSHARP_ADD_EXECUTABLE( MyExecutable "ref1.dll ref2.dll" "Program.cs File1.cs" )
+#   CSHARP_ADD_EXECUTABLE( MyExecutable "ref1.dll;ref2.dll" "Program.cs;File1.cs" )
+#
+# This file is based on the work of GDCM:
+#   http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/UseCSharp.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+# TODO: ADD SUPPORT FOR LINK LIBRARIES
+
+# Check something was found
+if( NOT CSHARP_COMPILER )
+  message( WARNING "A C# compiler executable was not found on your system" )
+endif( NOT CSHARP_COMPILER )
+
+# Include type-based USE_FILE
+if( CSHARP_TYPE MATCHES ".NET" )
+  include( ${DotNetFrameworkSdk_USE_FILE} )
+elseif ( CSHARP_TYPE MATCHES "Mono" )
+  include( ${Mono_USE_FILE} )
+endif ( CSHARP_TYPE MATCHES ".NET" )
+
+macro( CSHARP_ADD_LIBRARY name )
+  CSHARP_ADD_PROJECT( "library" ${name} ${ARGN} )
+endmacro( CSHARP_ADD_LIBRARY )
+
+macro( CSHARP_ADD_EXECUTABLE name )
+  CSHARP_ADD_PROJECT( "exe" ${name} ${ARGN} )
+endmacro( CSHARP_ADD_EXECUTABLE )
+
+# Private macro
+macro( CSHARP_ADD_PROJECT type name )
+  set( refs "/reference:System.dll" )
+  set( sources )
+  set( sources_dep )
+
+  if( ${type} MATCHES "library" )
+    set( output "dll" )
+  elseif( ${type} MATCHES "exe" )
+    set( output "exe" )
+  endif( ${type} MATCHES "library" )
+
+  # Step through each argument
+  foreach( it ${ARGN} )
+    if( ${it} MATCHES "(.*)(dll)" )
+       # Argument is a dll, add reference
+       list( APPEND refs /reference:${it} )
+    else( )
+      # Argument is a source file
+      if( EXISTS ${it} )
+        list( APPEND sources ${it} )
+        list( APPEND sources_dep ${it} )
+      elseif( EXISTS ${CSHARP_SOURCE_DIRECTORY}/${it} )
+        list( APPEND sources ${CSHARP_SOURCE_DIRECTORY}/${it} )
+        list( APPEND sources_dep ${CSHARP_SOURCE_DIRECTORY}/${it} )
+      elseif( ${it} MATCHES "[*]" )
+        # For dependencies, we need to expand wildcards
+        FILE( GLOB it_glob ${it} )
+        list( APPEND sources ${it} )
+        list( APPEND sources_dep ${it_glob} )
+      endif( )
+    endif ( )
+  endforeach( )
+
+  # Check we have at least one source
+  list( LENGTH sources_dep sources_length )
+  if ( ${sources_length} LESS 1 )
+    MESSAGE( SEND_ERROR "No C# sources were specified for ${type} ${name}" )
+  endif ()
+  list( SORT sources_dep )
+
+  # Perform platform specific actions
+  if (WIN32)
+    string( REPLACE "/" "\\" sources ${sources} )
+  else (UNIX)
+    string( REPLACE "\\" "/" sources ${sources} )
+  endif (WIN32)
+
+  # Add custom target and command
+  MESSAGE( STATUS "Adding C# ${type} ${name}: '${CSHARP_COMPILER} /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}'" )
+  add_custom_command(
+    COMMENT "Compiling C# ${type} ${name}: '${CSHARP_COMPILER} /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}'"
+    OUTPUT ${CSHARP_BINARY_DIRECTORY}/${name}.${output}
+    COMMAND ${CSHARP_COMPILER}
+    ARGS /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}
+    WORKING_DIRECTORY ${CSHARP_BINARY_DIRECTORY}
+    DEPENDS ${sources_dep}
+  )
+  add_custom_target(
+    ${name} ALL
+    DEPENDS ${CSHARP_BINARY_DIRECTORY}/${name}.${output}
+    SOURCES ${sources_dep}
+  )
+endmacro( CSHARP_ADD_PROJECT )
diff --git a/cmake/UseDotNetFrameworkSdk.cmake b/cmake/UseDotNetFrameworkSdk.cmake
new file mode 100644
index 0000000..6be4027
--- /dev/null
+++ b/cmake/UseDotNetFrameworkSdk.cmake
@@ -0,0 +1,16 @@
+#
+# A CMake Module for using Mono.
+#
+# The following variables are set:
+#   (none)
+#
+# Additional references can be found here:
+#   http://www.mono-project.com/Main_Page
+#   http://www.mono-project.com/CSharp_Compiler
+#
+# This file is based on the work of GDCM:
+#   http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+message( STATUS "Using .NET compiler version ${CSHARP_DOTNET_VERSION}" )
\ No newline at end of file
diff --git a/cmake/UseMono.cmake b/cmake/UseMono.cmake
new file mode 100644
index 0000000..16a80ae
--- /dev/null
+++ b/cmake/UseMono.cmake
@@ -0,0 +1,16 @@
+#
+# A CMake Module for using Mono.
+#
+# The following variables are set:
+#   (none)
+#
+# Additional references can be found here:
+#   http://www.mono-project.com/Main_Page
+#   http://www.mono-project.com/CSharp_Compiler
+#
+# This file is based on the work of GDCM:
+#   http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+message( STATUS "Using Mono compiler version ${CSHARP_MONO_VERSION}" )
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
index 66c4de1..90f6f63 100644
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -25,7 +25,7 @@ DOXYFILE_ENCODING      = UTF-8
 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
 # by quotes) that should identify the project.
 
-PROJECT_NAME           = @CMAKE_PROJECT_NAME@
+PROJECT_NAME           = @PacBioBAM_NAME@
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. 
 # This could be handy for archiving the generated documentation or 
@@ -115,7 +115,7 @@ INLINE_INHERITED_MEMB  = NO
 # path before files name in the file list and in the header files. If set 
 # to NO the shortest path that makes the file name unique will be used.
 
-FULL_PATH_NAMES        = NO
+FULL_PATH_NAMES        = YES
 
 # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
 # can be used to strip a user-defined part of the path. Stripping is 
@@ -133,7 +133,7 @@ STRIP_FROM_PATH        =
 # definition is used. Otherwise one should specify the include paths that 
 # are normally passed to the compiler using the -I flag.
 
-STRIP_FROM_INC_PATH    = 
+STRIP_FROM_INC_PATH    = @PacBioBAM_IncludeDir@
 
 # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
 # (but less readable) file names. This can be useful is your file systems 
@@ -238,7 +238,7 @@ EXTENSION_MAPPING      =
 # func(std::string) {}). This also make the inheritance and collaboration 
 # diagrams that involve STL classes more complete and accurate.
 
-BUILTIN_STL_SUPPORT    = NO
+BUILTIN_STL_SUPPORT    = YES
 
 # If you use Microsoft's C++/CLI language, you should set this option to YES to 
 # enable parsing support.
@@ -310,7 +310,7 @@ SYMBOL_CACHE_SIZE      = 0
 # Private class members and static file members will be hidden unless 
 # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
 
-EXTRACT_ALL            = YES
+EXTRACT_ALL            = NO
 
 # If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
 # will be included in the documentation.
@@ -320,13 +320,13 @@ EXTRACT_PRIVATE        = NO
 # If the EXTRACT_STATIC tag is set to YES all static members of a file 
 # will be included in the documentation.
 
-EXTRACT_STATIC         = NO
+EXTRACT_STATIC         = YES
 
 # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
 # defined locally in source files will be included in the documentation. 
 # If set to NO only classes defined in header files are included.
 
-EXTRACT_LOCAL_CLASSES  = YES
+EXTRACT_LOCAL_CLASSES  = NO
 
 # This flag is only useful for Objective-C code. When set to YES local 
 # methods, which are defined in the implementation section but not in 
@@ -592,7 +592,7 @@ WARN_LOGFILE           =
 # directories like "/usr/src/myproject". Separate the files or directories 
 # with spaces.
 
-INPUT                  = @PacBioBAM_IncludeDir@/pbbam @PacBioBAM_SourceDir@
+INPUT                  = @PacBioBAM_IncludeDir@
 
 # This tag can be used to specify the character encoding of the source files 
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
@@ -651,7 +651,7 @@ RECURSIVE              = YES
 # excluded from the INPUT source files. This way you can easily exclude a 
 # subdirectory from a directory tree whose root is specified with the INPUT tag.
 
-EXCLUDE                =
+EXCLUDE                = @PacBioBAM_IncludeDir@/pbbam/internal 
 
 # The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
 # directories that are symbolic links (a Unix filesystem feature) are excluded 
@@ -673,13 +673,13 @@ EXCLUDE_PATTERNS       =
 # wildcard * is used, a substring. Examples: ANamespace, AClass, 
 # AClass::ANamespace, ANamespace::*Test
 
-EXCLUDE_SYMBOLS        =
+EXCLUDE_SYMBOLS        = pugi, PacBio::BAM::internal
 
 # The EXAMPLE_PATH tag can be used to specify one or more files or 
 # directories that contain example code fragments that are included (see 
 # the \include command).
 
-EXAMPLE_PATH           = 
+EXAMPLE_PATH           = examples 
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the 
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
@@ -1226,7 +1226,7 @@ MAN_LINKS              = NO
 # generate an XML file that captures the structure of 
 # the code including all documentation.
 
-GENERATE_XML           = NO
+GENERATE_XML           = YES
 
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. 
 # If a relative path is entered the value of OUTPUT_DIRECTORY will be 
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..14e0fb1
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,168 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = build
+SOURCEDIR	  = source
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) $(SOURCEDIR) 
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) $(SOURCEDIR)
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext fig
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+html: basefig MANY_CLUSTER.png
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pbtoolkits.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pbtoolkits.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/pbtoolkits"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pbtoolkits"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+basefig:
+	dot -Tpng $(SOURCEDIR)/dependencies.dot > $(SOURCEDIR)/$@
+	grep -v "\"pbsmrtpipe\" ->" $(SOURCEDIR)/dependencies.dot  \
+		| grep -v "> \"pbcore\"" \
+		| sed 's/All/Sparse/' > $(SOURCEDIR)/sparse_dependencies.dot  
+	dot -Tpng $(SOURCEDIR)/sparse_dependencies.dot \
+		> $(SOURCEDIR)/sparse_dependencies.png
+
+%.png: basefig
+	grep -v $* $(SOURCEDIR)/sparse_dependencies.dot | \
+	grep -v \? | sed 's/Sparse dependencies/Module bundles/' | \
+	dot -Tpng > $(SOURCEDIR)/$@
+
diff --git a/docs/examples/code/BarcodeQuery.txt b/docs/examples/code/BarcodeQuery.txt
new file mode 100644
index 0000000..3fe8fce
--- /dev/null
+++ b/docs/examples/code/BarcodeQuery.txt
@@ -0,0 +1,17 @@
+// using C++11 range-based for loop
+BarcodeQuery query(42, dataset);
+for (const BamRecord& r : query) {
+    assert(r.HasBarcodes());
+    assert(r.BarcodeForward() == 42 || r.barcodeReverse() == 42);
+}
+
+// OR
+
+// using iterators directly
+BarcodeQuery query(42, dataset);
+auto iter = query.cbegin();
+auto end  = query.cend();
+for (; iter != end; ++iter) {
+    assert(iter->HasBarcodes());
+    assert(iter->BarcodeForward() == 42 || iter->barcodeReverse() == 42);
+} 
diff --git a/docs/examples/code/Compare.txt b/docs/examples/code/Compare.txt
new file mode 100644
index 0000000..deecd8d
--- /dev/null
+++ b/docs/examples/code/Compare.txt
@@ -0,0 +1,3 @@
+// sort on increasing ZMW hole number
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::Zmw());
diff --git a/docs/examples/code/Compare_AlignedEnd.txt b/docs/examples/code/Compare_AlignedEnd.txt
new file mode 100644
index 0000000..d34ed67
--- /dev/null
+++ b/docs/examples/code/Compare_AlignedEnd.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedEnd());
diff --git a/docs/examples/code/Compare_AlignedStart.txt b/docs/examples/code/Compare_AlignedStart.txt
new file mode 100644
index 0000000..68de3e2
--- /dev/null
+++ b/docs/examples/code/Compare_AlignedStart.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedStart());
diff --git a/docs/examples/code/Compare_AlignedStrand.txt b/docs/examples/code/Compare_AlignedStrand.txt
new file mode 100644
index 0000000..6c22cdc
--- /dev/null
+++ b/docs/examples/code/Compare_AlignedStrand.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedStrand());
diff --git a/docs/examples/code/Compare_BarcodeForward.txt b/docs/examples/code/Compare_BarcodeForward.txt
new file mode 100644
index 0000000..1967341
--- /dev/null
+++ b/docs/examples/code/Compare_BarcodeForward.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeForward());
diff --git a/docs/examples/code/Compare_BarcodeQuality.txt b/docs/examples/code/Compare_BarcodeQuality.txt
new file mode 100644
index 0000000..144f483
--- /dev/null
+++ b/docs/examples/code/Compare_BarcodeQuality.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeQuality());
diff --git a/docs/examples/code/Compare_BarcodeReverse.txt b/docs/examples/code/Compare_BarcodeReverse.txt
new file mode 100644
index 0000000..9d3b245
--- /dev/null
+++ b/docs/examples/code/Compare_BarcodeReverse.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeReverse());
diff --git a/docs/examples/code/Compare_FullName.txt b/docs/examples/code/Compare_FullName.txt
new file mode 100644
index 0000000..4b392b9
--- /dev/null
+++ b/docs/examples/code/Compare_FullName.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::FullName());
diff --git a/docs/examples/code/Compare_LocalContextFlag.txt b/docs/examples/code/Compare_LocalContextFlag.txt
new file mode 100644
index 0000000..aeab944
--- /dev/null
+++ b/docs/examples/code/Compare_LocalContextFlag.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::LocalContextFlag());
diff --git a/docs/examples/code/Compare_MapQuality.txt b/docs/examples/code/Compare_MapQuality.txt
new file mode 100644
index 0000000..fe22821
--- /dev/null
+++ b/docs/examples/code/Compare_MapQuality.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::MapQuality());
diff --git a/docs/examples/code/Compare_MovieName.txt b/docs/examples/code/Compare_MovieName.txt
new file mode 100644
index 0000000..cddcb64
--- /dev/null
+++ b/docs/examples/code/Compare_MovieName.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::MovieName());
diff --git a/docs/examples/code/Compare_NumDeletedBases.txt b/docs/examples/code/Compare_NumDeletedBases.txt
new file mode 100644
index 0000000..aa6dd4b
--- /dev/null
+++ b/docs/examples/code/Compare_NumDeletedBases.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumDeletedBases());
diff --git a/docs/examples/code/Compare_NumInsertedBases.txt b/docs/examples/code/Compare_NumInsertedBases.txt
new file mode 100644
index 0000000..917d87f
--- /dev/null
+++ b/docs/examples/code/Compare_NumInsertedBases.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumInsertedBases());
diff --git a/docs/examples/code/Compare_NumMatches.txt b/docs/examples/code/Compare_NumMatches.txt
new file mode 100644
index 0000000..47e3081
--- /dev/null
+++ b/docs/examples/code/Compare_NumMatches.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumMatches());
diff --git a/docs/examples/code/Compare_NumMismatches.txt b/docs/examples/code/Compare_NumMismatches.txt
new file mode 100644
index 0000000..12affb1
--- /dev/null
+++ b/docs/examples/code/Compare_NumMismatches.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumMismatches());
diff --git a/docs/examples/code/Compare_QueryEnd.txt b/docs/examples/code/Compare_QueryEnd.txt
new file mode 100644
index 0000000..d664d28
--- /dev/null
+++ b/docs/examples/code/Compare_QueryEnd.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::QueryEnd());
diff --git a/docs/examples/code/Compare_QueryStart.txt b/docs/examples/code/Compare_QueryStart.txt
new file mode 100644
index 0000000..12f6244
--- /dev/null
+++ b/docs/examples/code/Compare_QueryStart.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::QueryStart());
diff --git a/docs/examples/code/Compare_ReadAccuracy.txt b/docs/examples/code/Compare_ReadAccuracy.txt
new file mode 100644
index 0000000..9454309
--- /dev/null
+++ b/docs/examples/code/Compare_ReadAccuracy.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadAccuracy());
diff --git a/docs/examples/code/Compare_ReadGroupId.txt b/docs/examples/code/Compare_ReadGroupId.txt
new file mode 100644
index 0000000..dab3497
--- /dev/null
+++ b/docs/examples/code/Compare_ReadGroupId.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadGroupId());
diff --git a/docs/examples/code/Compare_ReadGroupNumericId.txt b/docs/examples/code/Compare_ReadGroupNumericId.txt
new file mode 100644
index 0000000..5ad8f9d
--- /dev/null
+++ b/docs/examples/code/Compare_ReadGroupNumericId.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadGroupNumericId());
diff --git a/docs/examples/code/Compare_ReferenceEnd.txt b/docs/examples/code/Compare_ReferenceEnd.txt
new file mode 100644
index 0000000..ed42d05
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceEnd.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceEnd());
diff --git a/docs/examples/code/Compare_ReferenceId.txt b/docs/examples/code/Compare_ReferenceId.txt
new file mode 100644
index 0000000..5628427
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceId.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceId());
diff --git a/docs/examples/code/Compare_ReferenceName.txt b/docs/examples/code/Compare_ReferenceName.txt
new file mode 100644
index 0000000..1f76e7e
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceName.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceName());
diff --git a/docs/examples/code/Compare_ReferenceStart.txt b/docs/examples/code/Compare_ReferenceStart.txt
new file mode 100644
index 0000000..0ccaf36
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceStart.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceStart());
diff --git a/docs/examples/code/Compare_TypeFromOperator.txt b/docs/examples/code/Compare_TypeFromOperator.txt
new file mode 100644
index 0000000..afb0848
--- /dev/null
+++ b/docs/examples/code/Compare_TypeFromOperator.txt
@@ -0,0 +1,2 @@
+Compare::Type type = Compare::TypeFromOperator("!=");
+assert(type == Compare::NOT_EQUAL);
diff --git a/docs/examples/code/Compare_TypeToName.txt b/docs/examples/code/Compare_TypeToName.txt
new file mode 100644
index 0000000..c44e1cb
--- /dev/null
+++ b/docs/examples/code/Compare_TypeToName.txt
@@ -0,0 +1,2 @@
+string name = Compare::TypeToName(Compare::LESS_THAN);
+assert(name = "Compare::LESS_THAN");
diff --git a/docs/examples/code/Compare_Zmw.txt b/docs/examples/code/Compare_Zmw.txt
new file mode 100644
index 0000000..b02c426
--- /dev/null
+++ b/docs/examples/code/Compare_Zmw.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::Zmw());
diff --git a/docs/examples/code/EntireFileQuery.txt b/docs/examples/code/EntireFileQuery.txt
new file mode 100644
index 0000000..d3fcc2c
--- /dev/null
+++ b/docs/examples/code/EntireFileQuery.txt
@@ -0,0 +1,15 @@
+// using C++11 range-based for loop
+EntireFileQuery query(dataset);
+for (const BamRecord& record : query) {
+    // ... do stuff ...
+}
+
+// OR
+
+// using iterators
+EntireFileQuery query(dataset);
+auto iter = query.cbegin();
+auto end  = query.cend();
+for (; iter != end; ++iter) {
+    // ... do stuff ...
+}  
diff --git a/docs/examples/code/EntireFileQuery_BamFilename.txt b/docs/examples/code/EntireFileQuery_BamFilename.txt
new file mode 100644
index 0000000..484db61
--- /dev/null
+++ b/docs/examples/code/EntireFileQuery_BamFilename.txt
@@ -0,0 +1,4 @@
+EntireFileQuery query("foo.bam");
+for (const BamRecord& record : query) {
+    // do stuff
+}
diff --git a/docs/examples/code/EntireFileQuery_NonConst.txt b/docs/examples/code/EntireFileQuery_NonConst.txt
new file mode 100644
index 0000000..a0a092e
--- /dev/null
+++ b/docs/examples/code/EntireFileQuery_NonConst.txt
@@ -0,0 +1,4 @@
+EntireFileQuery query("foo.bam");
+for (BamRecord& record : query) {
+    // ok to modify 'record' here
+} 
diff --git a/docs/examples/code/GenomicIntervalQuery.txt b/docs/examples/code/GenomicIntervalQuery.txt
new file mode 100644
index 0000000..651f254
--- /dev/null
+++ b/docs/examples/code/GenomicIntervalQuery.txt
@@ -0,0 +1,16 @@
+// using C++11 range-based for loop
+GenomicIntervalQuery query(GenomicInterval("chr1:1000-2000"), dataset);
+for (const BamRecord& record : query) {
+    // ... do stuff ...
+}
+
+// OR
+
+// using iterators directly
+GenomicIntervalQuery query(GenomicInterval("chr1:1000-2000"), dataset);
+auto iter = query.cbegin();
+auto end  = query.cend();
+for (; iter != end; ++iter) {
+    // ... do stuff ...
+}
+
diff --git a/docs/examples/code/GenomicIntervalQuery_Reuse.txt b/docs/examples/code/GenomicIntervalQuery_Reuse.txt
new file mode 100644
index 0000000..339ae95
--- /dev/null
+++ b/docs/examples/code/GenomicIntervalQuery_Reuse.txt
@@ -0,0 +1,8 @@
+DataSet ds("data.xml");
+GenomicIntervalQuery query(GenomicInterval(), ds);
+for (const GenomicInterval& interval : intervals) {
+    query.Interval(interval);
+    for (const BamRecord& record : query) {}
+        // do stuff
+    }
+}
\ No newline at end of file
diff --git a/docs/examples/code/PbiAlignedEndFilter.txt b/docs/examples/code/PbiAlignedEndFilter.txt
new file mode 100644
index 0000000..bac1a46
--- /dev/null
+++ b/docs/examples/code/PbiAlignedEndFilter.txt
@@ -0,0 +1,4 @@
+PbiFilterQuery query(PbiAlignedEndFilter{3000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+    assert(record.AlignedEnd() > 3000);
+}
diff --git a/docs/examples/code/PbiAlignedLengthFilter.txt b/docs/examples/code/PbiAlignedLengthFilter.txt
new file mode 100644
index 0000000..38dc3ff
--- /dev/null
+++ b/docs/examples/code/PbiAlignedLengthFilter.txt
@@ -0,0 +1,4 @@
+PbiFilterQuery query(PbiAlignedLengthFilter{1000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+    assert((record.AlignedEnd() - record.AlignedStart()) > 1000);
+}
diff --git a/docs/examples/code/PbiAlignedStartFilter.txt b/docs/examples/code/PbiAlignedStartFilter.txt
new file mode 100644
index 0000000..b78bb2c
--- /dev/null
+++ b/docs/examples/code/PbiAlignedStartFilter.txt
@@ -0,0 +1,4 @@
+PbiFilterQuery query(PbiAlignedStartFilter{3000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+    assert(record.AlignedStart() > 3000);
+}
diff --git a/docs/examples/code/PbiAlignedStrandFilter.txt b/docs/examples/code/PbiAlignedStrandFilter.txt
new file mode 100644
index 0000000..9f9a885
--- /dev/null
+++ b/docs/examples/code/PbiAlignedStrandFilter.txt
@@ -0,0 +1,5 @@
+PbiFilterQuery query(PbiAlignedStrandFilter{Strand::FORWARD});
+for (const BamRecord& record : query) {
+    assert(record.AlignedStrand() == Strand::FORWARD);
+}
+
diff --git a/docs/examples/code/PbiBarcodeFilter.txt b/docs/examples/code/PbiBarcodeFilter.txt
new file mode 100644
index 0000000..3913479
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeFilter.txt
@@ -0,0 +1,17 @@
+// single value
+PbiFilter filter{ PbiBarcodeFilter{17} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    const auto barcodes = record.Barcodes();
+    assert(barcodes.first == 17 || barcodes.second == 17);
+}
+
+// whitelist
+vector<uint16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    const auto barcodes = record.Barcodes();
+    assert(barcodes.first == 50  || barcodes.second == 50 ||
+           barcodes.first == 100 || barcodes.second == 100);
+}
diff --git a/docs/examples/code/PbiBarcodeForwardFilter.txt b/docs/examples/code/PbiBarcodeForwardFilter.txt
new file mode 100644
index 0000000..af88be6
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeForwardFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiBarcodeForwardFilter{50} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.BarcodeForward() == 50);
+}
+
+// whitelist
+vector<uint16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeForwardFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.BarcodeForward() == 50 || record.BarcodeForward() == 100);
+}
+
diff --git a/docs/examples/code/PbiBarcodeQualityFilter.txt b/docs/examples/code/PbiBarcodeQualityFilter.txt
new file mode 100644
index 0000000..34311d0
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeQualityFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiBarcodeQualityFilter{42, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.BarcodeQuality() >= 42);
+}
diff --git a/docs/examples/code/PbiBarcodeReverseFilter.txt b/docs/examples/code/PbiBarcodeReverseFilter.txt
new file mode 100644
index 0000000..27e3e3d
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeReverseFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiBarcodeReverseFilter{50} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.BarcodeReverse() == 50);
+}
+
+// whitelist
+vector<uint16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeReverseFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.BarcodeReverse() == 50 || record.BarcodeReverse() == 100);
+}
+
diff --git a/docs/examples/code/PbiBarcodesFilter.txt b/docs/examples/code/PbiBarcodesFilter.txt
new file mode 100644
index 0000000..a655c57
--- /dev/null
+++ b/docs/examples/code/PbiBarcodesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiBarcodesFilter{17, 18} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.BarcodeForward() == 17 && 
+           record.BarcodeReverse() == 18);
+}
diff --git a/docs/examples/code/PbiBuilder_WithReader.txt b/docs/examples/code/PbiBuilder_WithReader.txt
new file mode 100644
index 0000000..e2748c2
--- /dev/null
+++ b/docs/examples/code/PbiBuilder_WithReader.txt
@@ -0,0 +1,30 @@
+// To simply create a PBI file from BAM, the following is the easiest method:
+//
+#include <pbbam/BamFile.h>
+#include <pbbam/PbiFile.h>
+
+BamFile bamFile("data.bam");
+PbiFile::CreateFrom(bamFile);
+
+
+// However if you need to perform additional operations while reading the BAM file, 
+// you can do something like the following:
+//
+{
+    BamFile bamFile("data.bam");
+    PbiBuilder builder(bamFile.PacBioIndexFilename(), 
+                       bamFile.Header().Sequences().size());
+    BamReader reader(bamFile);
+    BamRecord b;
+    int64_t offset = reader.VirtualTell(); // first record's vOffset
+    while (reader.GetNext(b)) {
+
+        // store PBI recrod entry & get next record's vOffset
+        builder.AddRecord(b, offset);
+        offset = reader.VirtualTell();
+   
+        // ... additional stuff as needed ...
+    }
+
+} // <-- PBI data will only be written here, as PbiBuilder goes out of scope
+
diff --git a/docs/examples/code/PbiBuilder_WithWriter.txt b/docs/examples/code/PbiBuilder_WithWriter.txt
new file mode 100644
index 0000000..0c7d6d1
--- /dev/null
+++ b/docs/examples/code/PbiBuilder_WithWriter.txt
@@ -0,0 +1,12 @@
+BamWriter writer(...);
+PbiBuilder pbiBuilder(...);
+int64_t vOffset;
+BamRecord record;
+while (...) {
+
+    // ... populate record data ...
+
+    // write record to BAM and add PBI entry
+    writer.Write(record, &vOffset);
+    pbiBuilder.AddRecord(record, vOffset);
+}
diff --git a/docs/examples/code/PbiFilterQuery.txt b/docs/examples/code/PbiFilterQuery.txt
new file mode 100644
index 0000000..4914eab
--- /dev/null
+++ b/docs/examples/code/PbiFilterQuery.txt
@@ -0,0 +1,22 @@
+// setup filter
+PbiFilter filter;
+filter.Add(PbiZmwFilter(42));
+filter.Add(PbiReadAccuracyFilter(0.9, Compare::GREATER_THAN_EQUAL));
+
+// using C++11 range-based for loop
+PbiFilterQuery query(filter, dataset);
+for (const BamRecord& r : query) {
+    assert(r.HoleNumber() == 42);
+    assert(r.ReadAccuracy() >= 0.9);
+}
+
+// OR
+
+// using iterators directly
+PbiFilterQuery query(filter, dataset);
+auto iter = query.cbegin();
+auto end  = query.cend();
+for (; iter != end; ++iter) {
+    assert(iter->HoleNumber() == 42);
+    assert(iter->ReadAccuracy() >= 0.9);
+} 
diff --git a/docs/examples/code/PbiFilter_Composition.txt b/docs/examples/code/PbiFilter_Composition.txt
new file mode 100644
index 0000000..22cc6ff
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Composition.txt
@@ -0,0 +1,8 @@
+// (f1 && f2) || f3
+
+PbiFilter f1;
+PbiFilter f2;
+PbiFilter intersect_f1_f2 = PbiFilter::Intersection(f1, f2);
+
+PbiFilter f3;
+PbiFilter final = PbiFilter::Union(intersect_f1_f2, f3);
diff --git a/docs/examples/code/PbiFilter_CustomFilter.txt b/docs/examples/code/PbiFilter_CustomFilter.txt
new file mode 100644
index 0000000..f9cdd21
--- /dev/null
+++ b/docs/examples/code/PbiFilter_CustomFilter.txt
@@ -0,0 +1,21 @@
+struct MyCustomFilter
+{
+    bool Accepts(const PbiRawData& index, const size_t row) const
+    {
+        // Look up data for record at the provided row. Do any calculations
+        // necessary, then return whether that record passes your 
+        // filter criteria. 
+        
+        return true;
+    }
+};
+
+// use in composite filters
+PbiFilter f;
+f.Add(PbiMovieNameFilter("foo"));
+f.Add(MyCustomFilter());
+
+// pass directly to PbiFilterQuery
+PbiFilterQuery query(MyCustomFilter(), "foo.bam");
+for (const BamRecord& record : query)
+    // ... do stuff ...
diff --git a/docs/examples/code/PbiFilter_Interface.txt b/docs/examples/code/PbiFilter_Interface.txt
new file mode 100644
index 0000000..0fea900
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Interface.txt
@@ -0,0 +1 @@
+bool Accepts(const PbiRawData& index, const size_t row) const;
diff --git a/docs/examples/code/PbiFilter_Intersection_Copy.txt b/docs/examples/code/PbiFilter_Intersection_Copy.txt
new file mode 100644
index 0000000..ec0a7ac
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Intersection_Copy.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::INTERSECT };
+result.Add(filters);
+return result;
diff --git a/docs/examples/code/PbiFilter_Intersection_Move.txt b/docs/examples/code/PbiFilter_Intersection_Move.txt
new file mode 100644
index 0000000..2b06106
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Intersection_Move.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::INTERSECT };
+result.Add(std::move(filters));
+return result;
diff --git a/docs/examples/code/PbiFilter_Union_Copy.txt b/docs/examples/code/PbiFilter_Union_Copy.txt
new file mode 100644
index 0000000..7e2a192
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Union_Copy.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::UNION };
+result.Add(filters);
+return result;
diff --git a/docs/examples/code/PbiFilter_Union_Move.txt b/docs/examples/code/PbiFilter_Union_Move.txt
new file mode 100644
index 0000000..2e98d91
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Union_Move.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::UNION };
+result.Add(std::move(filters));
+return result;
diff --git a/docs/examples/code/PbiIdentityFilter.txt b/docs/examples/code/PbiIdentityFilter.txt
new file mode 100644
index 0000000..6fcb8d0
--- /dev/null
+++ b/docs/examples/code/PbiIdentityFilter.txt
@@ -0,0 +1,6 @@
+// single value
+PbiFilter filter{ PbiIdentityFilter{ 0.5, Compare::GREATER_THAN_EQUAL } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    // ... at least 50% of record was aligned ...
+}
diff --git a/docs/examples/code/PbiLocalContextFilter.txt b/docs/examples/code/PbiLocalContextFilter.txt
new file mode 100644
index 0000000..0aaa3eb
--- /dev/null
+++ b/docs/examples/code/PbiLocalContextFilter.txt
@@ -0,0 +1,22 @@
+
+// --------------------
+// has adapter_before
+// --------------------
+
+PbiFilter filter{ PbiLocalContextFilter{LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    const bool hasAdapterBefore = (record.LocalContextFlags() & LocalContextFlags::ADAPTER_BEFORE) != 0;
+    assert(hasAdapterBefore);
+}
+
+// ----------------------------------
+// has any adapters, barcodes, etc.
+// ----------------------------------
+
+PbiFilter filter{ PbiLocalContextFilter{LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    const bool hasContext = (record.LocalContextFlags() != LocalContextFlags::NO_LOCAL_CONTEXT);
+    assert(hasContext);
+}
diff --git a/docs/examples/code/PbiMapQualityFilter.txt b/docs/examples/code/PbiMapQualityFilter.txt
new file mode 100644
index 0000000..67fb5dc
--- /dev/null
+++ b/docs/examples/code/PbiMapQualityFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiMapQualityFilter{75, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.MapQuality() >= 75);
+} 
diff --git a/docs/examples/code/PbiMovieNameFilter.txt b/docs/examples/code/PbiMovieNameFilter.txt
new file mode 100644
index 0000000..dd124e2
--- /dev/null
+++ b/docs/examples/code/PbiMovieNameFilter.txt
@@ -0,0 +1,14 @@
+// single value
+PbiFilter filter{ PbiMovieFilter{ "foo" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.MovieName() == "foo");
+}
+
+// whitelist
+vector<string> whitelist = { "foo", "bar" };
+PbiFilter filter{ PbiMovieNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.MovieName() == "foo" || record.MovieName() == "bar");
+}
diff --git a/docs/examples/code/PbiNumDeletedBasesFilter.txt b/docs/examples/code/PbiNumDeletedBasesFilter.txt
new file mode 100644
index 0000000..e1e3d1f
--- /dev/null
+++ b/docs/examples/code/PbiNumDeletedBasesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumDeletedBasesFilter{50, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.NumDeletedBases() < 50);
+}
+
diff --git a/docs/examples/code/PbiNumInsertedBasesFilter.txt b/docs/examples/code/PbiNumInsertedBasesFilter.txt
new file mode 100644
index 0000000..ab385e4
--- /dev/null
+++ b/docs/examples/code/PbiNumInsertedBasesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumInsertedBasesFilter{50, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.NumInsertedBases() < 50);
+}
+
diff --git a/docs/examples/code/PbiNumMatchesFilter.txt b/docs/examples/code/PbiNumMatchesFilter.txt
new file mode 100644
index 0000000..4e1b97d
--- /dev/null
+++ b/docs/examples/code/PbiNumMatchesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumMatchesFilter{2000, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.NumMatches() >= 2000);
+}
+
diff --git a/docs/examples/code/PbiNumMismatchesFilter.txt b/docs/examples/code/PbiNumMismatchesFilter.txt
new file mode 100644
index 0000000..690e4a1
--- /dev/null
+++ b/docs/examples/code/PbiNumMismatchesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumMismatchesFilter{500, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.NumMismatches() < 500);
+}
+
diff --git a/docs/examples/code/PbiQueryEndFilter.txt b/docs/examples/code/PbiQueryEndFilter.txt
new file mode 100644
index 0000000..f85166b
--- /dev/null
+++ b/docs/examples/code/PbiQueryEndFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiQueryEndFilter{3000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.QueryEnd() > 3000);
+} 
diff --git a/docs/examples/code/PbiQueryLengthFilter.txt b/docs/examples/code/PbiQueryLengthFilter.txt
new file mode 100644
index 0000000..123412a
--- /dev/null
+++ b/docs/examples/code/PbiQueryLengthFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiQueryLengthFilter{2000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert( (record.QueryEnd() - record.QueryStart()) > 2000 );
+}
diff --git a/docs/examples/code/PbiQueryNameFilter.txt b/docs/examples/code/PbiQueryNameFilter.txt
new file mode 100644
index 0000000..f1e51c7
--- /dev/null
+++ b/docs/examples/code/PbiQueryNameFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiQueryNameFilter{ "movie_1/42/100_200" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.FullName() == "movie_1/42/100_200");
+}
+
+// whitelist
+vector<string> whitelist = { "movie_1/42/100_200", "movie_3/24/300_500" };
+PbiFilter filter{ PbiQueryNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.FullName() == "movie_1/42/100_200" || 
+           record.FullName() == "movie_3/24/300_500");
+}
diff --git a/docs/examples/code/PbiQueryStartFilter.txt b/docs/examples/code/PbiQueryStartFilter.txt
new file mode 100644
index 0000000..56353df
--- /dev/null
+++ b/docs/examples/code/PbiQueryStartFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiQueryStartFilter{3000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.QueryStart() > 3000);
+} 
diff --git a/docs/examples/code/PbiReadAccuracyFilter.txt b/docs/examples/code/PbiReadAccuracyFilter.txt
new file mode 100644
index 0000000..dd2df32
--- /dev/null
+++ b/docs/examples/code/PbiReadAccuracyFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiReadAccuracyFilter{0.8, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadAccuracy() >= 0.8);
+}
diff --git a/docs/examples/code/PbiReadGroupFilter.txt b/docs/examples/code/PbiReadGroupFilter.txt
new file mode 100644
index 0000000..9af096d
--- /dev/null
+++ b/docs/examples/code/PbiReadGroupFilter.txt
@@ -0,0 +1,64 @@
+// -------------------------
+// numeric ID
+// -------------------------
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ 2458765 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadGroupNumericId() == 2458765);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 2458765, -32143 };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadGroupNumericId() == 2458765 ||
+           record.ReadGroupNumericId() == -32143);
+}
+
+// -------------------------
+// printable ID
+// -------------------------
+
+// single value 
+PbiFilter filter{ PbiReadGroupFilter{ "12B33F00" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadGroupId() == "12B33F00");
+}
+
+// whitelist
+vector<string> whitelist = { "12B33F00", "123ABC77" };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadGroupId() == "12B33F00" ||
+           record.ReadGroupId() == "123ABC77");
+}
+
+
+// -------------------------
+// read group 
+// -------------------------
+
+BamFile file("foo.bam");
+BamHeader header = file.Header();
+assert(header.ReadGroups().size() > 1);
+
+// single value 
+PbiFilter filter{ PbiReadGroupFilter{ header.ReadGroups()[0] } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadGroup() == header.ReadGroups()[0]);
+}
+
+// whitelist
+vector<ReadGroupInfo> whitelist = { header.ReadGroups()[0], header.ReadGroups()[1] };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReadGroup() == header.ReadGroups()[0] ||
+           record.ReadGroup() == header.ReadGroups()[1]);
+}
diff --git a/docs/examples/code/PbiReferenceEndFilter.txt b/docs/examples/code/PbiReferenceEndFilter.txt
new file mode 100644
index 0000000..ce005c6
--- /dev/null
+++ b/docs/examples/code/PbiReferenceEndFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiReferenceEndFilter{ 2000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReferenceEnd() == 2000);
+}
diff --git a/docs/examples/code/PbiReferenceIdFilter.txt b/docs/examples/code/PbiReferenceIdFilter.txt
new file mode 100644
index 0000000..d963d28
--- /dev/null
+++ b/docs/examples/code/PbiReferenceIdFilter.txt
@@ -0,0 +1,16 @@
+// single value
+PbiFilter filter{ PbiReferenceIdFilter{ 4 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReferenceId() == 4);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 0, 1 };
+PbiFilter filter{ PbiReferenceIdFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReferenceId() == 0 || 
+           record.ReferenceId() == 1);
+}
+
diff --git a/docs/examples/code/PbiReferenceNameFilter.txt b/docs/examples/code/PbiReferenceNameFilter.txt
new file mode 100644
index 0000000..c86b14a
--- /dev/null
+++ b/docs/examples/code/PbiReferenceNameFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiReferenceNameFilter{ "chr1" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReferenceName() == "chr1");
+}
+
+// whitelist
+vector<string> whitelist = { "chr1", "chr5" };
+PbiFilter filter{ PbiReferenceNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReferenceName() == "chr1" ||
+           record.ReferenceName() == "chr5");
+}
diff --git a/docs/examples/code/PbiReferenceStartFilter.txt b/docs/examples/code/PbiReferenceStartFilter.txt
new file mode 100644
index 0000000..d3ffdbb
--- /dev/null
+++ b/docs/examples/code/PbiReferenceStartFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiReferenceStartFilter{ 2000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.ReferenceStart() == 2000);
+}
diff --git a/docs/examples/code/PbiZmwFilter.txt b/docs/examples/code/PbiZmwFilter.txt
new file mode 100644
index 0000000..c63a804
--- /dev/null
+++ b/docs/examples/code/PbiZmwFilter.txt
@@ -0,0 +1,16 @@
+// single value
+PbiFilter filter{ PbiZmwFilter{ 4000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.HoleNumber() == 4000);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 4000, 8000 };
+PbiFilter filter{ PbiZmwFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+    assert(record.HoleNumber() == 4000 || 
+           record.HoleNumber() == 8000);
+}
+
diff --git a/docs/examples/code/ReadAccuracyQuery.txt b/docs/examples/code/ReadAccuracyQuery.txt
new file mode 100644
index 0000000..5b0404f
--- /dev/null
+++ b/docs/examples/code/ReadAccuracyQuery.txt
@@ -0,0 +1,15 @@
+// using C++11 range-based for loop
+ReadAccuracyQuery query(0.9, Compare::GREATER_THAN_EQUAL, dataset);
+for (const BamRecord& r : query) {
+    assert(r.ReadAccuracy() >= 0.9);
+}
+
+// OR
+
+// using iterators directly
+ReadAccuracyQuery query(0.9, Compare::GREATER_THAN_EQUAL, dataset);
+auto iter = query.cbegin();
+auto end  = query.cend();
+for (; iter != end; ++iter) {
+    assert(iter->ReadAccuracy() >= 0.9);
+} 
diff --git a/docs/examples/code/SubreadLengthQuery.txt b/docs/examples/code/SubreadLengthQuery.txt
new file mode 100644
index 0000000..466a1d9
--- /dev/null
+++ b/docs/examples/code/SubreadLengthQuery.txt
@@ -0,0 +1,15 @@
+// using C++11 range-based for loop
+SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, dataset);
+for (const BamRecord& r : query) {
+    assert((r.QueryEnd() - r.QueryStart()) >= 500);  
+}
+
+// OR
+
+// using iterators directly
+SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, dataset);
+auto iter = query.cbegin();
+auto end  = query.cend();
+for (; iter != end; ++iter) {
+    assert((iter->QueryEnd() - iter->QueryStart()) >= 500);
+} 
diff --git a/docs/examples/code/Tag_AsciiCtor.txt b/docs/examples/code/Tag_AsciiCtor.txt
new file mode 100644
index 0000000..057d22f
--- /dev/null
+++ b/docs/examples/code/Tag_AsciiCtor.txt
@@ -0,0 +1,10 @@
+// One-step construction
+// 
+// This is useful in situations that require a const Tag.
+//
+const auto t = Tag('A', TagModifier::ASCII_CHAR);
+
+// or two-step construction
+auto t = Tag('A');
+t.Modifier(TagModifier::ASCII_CHAR);
+
diff --git a/docs/examples/code/ZmwGroupQuery.txt b/docs/examples/code/ZmwGroupQuery.txt
new file mode 100644
index 0000000..1d728ac
--- /dev/null
+++ b/docs/examples/code/ZmwGroupQuery.txt
@@ -0,0 +1,23 @@
+bool allHoleNumbersEqual(const vector<BamRecord>& group) 
+{
+    if (group.empty()) 
+        return true;
+    const auto firstHoleNumber = group[0].HoleNumber();
+    for (size_t i = 1; i < group.size(); ++i) {
+       if (group[i].HoleNumber() != firstHoleNumber)
+           return false;
+    }
+    return true;
+}
+
+vector<int32_t> whitelist = { 50, 100 };
+ZmwGroupQuery query(whitelist, dataset);
+for(const vector<BamRecord>& group : query) {
+
+    assert(allHoleNumbersEqual(group));
+
+    for (const BamRecord& record : group) {
+        assert(record.HoleNumber() == 50 ||
+               record.HoleNumber() == 100);
+    }
+}
diff --git a/docs/examples/code/ZmwQuery.txt b/docs/examples/code/ZmwQuery.txt
new file mode 100644
index 0000000..59c22c4
--- /dev/null
+++ b/docs/examples/code/ZmwQuery.txt
@@ -0,0 +1,6 @@
+vector<int32_t> whitelist = { 50, 100 };
+ZmwQuery query(whitelist, dataset);
+for (const BamRecord& record : query) {
+    assert(record.HoleNumber() == 50 ||
+           record.HoleNumber() == 100);
+}
diff --git a/docs/examples/code/ZmwWhitelistVirtualReader.txt b/docs/examples/code/ZmwWhitelistVirtualReader.txt
new file mode 100644
index 0000000..ae1facf
--- /dev/null
+++ b/docs/examples/code/ZmwWhitelistVirtualReader.txt
@@ -0,0 +1,6 @@
+vector<int32_t> zmws = { ... };
+ZmwWhitelistVirtualReader reader(zmws, "primary.bam", "scraps.bam");
+while(reader.HasNext()) {
+    auto virtualRecord = reader.Next();
+    // ... do stuff ...
+}
diff --git a/docs/examples/plaintext/AlignmentPrinterOutput.txt b/docs/examples/plaintext/AlignmentPrinterOutput.txt
new file mode 100644
index 0000000..21d948b
--- /dev/null
+++ b/docs/examples/plaintext/AlignmentPrinterOutput.txt
@@ -0,0 +1,13 @@
+Read        : singleInsertion2
+Reference   : lambda_NEB3011
+
+Read-length : 49
+Concordance : 0.96
+
+5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249
+       |||||||| ||||||||||||||||||| |||||||||||
+   0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG :   39
+
+5249 : ACTGGCTGAT : 5259
+       ||||||||||
+  39 : ACTGGCTGAT :   49
diff --git a/docs/examples/plaintext/PbiFilter_DataSetXmlFilters.txt b/docs/examples/plaintext/PbiFilter_DataSetXmlFilters.txt
new file mode 100644
index 0000000..5b5e8c2
--- /dev/null
+++ b/docs/examples/plaintext/PbiFilter_DataSetXmlFilters.txt
@@ -0,0 +1,14 @@
+<Filters>
+  <Filter>
+    <Properties>
+      <Property />  # A
+      <Property />  # B
+    </Properties>
+  </Filter>
+  <Filter>
+    <Properties>
+      <Property />  # C
+      <Property />  # D
+    </Properties> 
+  </Filter>
+</Filters>
diff --git a/docs/source/api/Accuracy.rst b/docs/source/api/Accuracy.rst
new file mode 100644
index 0000000..f88b722
--- /dev/null
+++ b/docs/source/api/Accuracy.rst
@@ -0,0 +1,11 @@
+Accuracy
+========
+
+.. code-block:: cpp
+
+   #include <pbbam/Accuracy.h>
+
+.. doxygenclass:: PacBio::BAM::Accuracy
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/AlignmentPrinter.rst b/docs/source/api/AlignmentPrinter.rst
new file mode 100644
index 0000000..ef0b191
--- /dev/null
+++ b/docs/source/api/AlignmentPrinter.rst
@@ -0,0 +1,11 @@
+AlignmentPrinter
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/AlignmentPrinter.h>
+
+.. doxygenclass:: PacBio::BAM::AlignmentPrinter 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/AlignmentSet.rst b/docs/source/api/AlignmentSet.rst
new file mode 100644
index 0000000..1817962
--- /dev/null
+++ b/docs/source/api/AlignmentSet.rst
@@ -0,0 +1,11 @@
+AlignmentSet
+============
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::AlignmentSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BaiIndexedBamReader.rst b/docs/source/api/BaiIndexedBamReader.rst
new file mode 100644
index 0000000..aab136f
--- /dev/null
+++ b/docs/source/api/BaiIndexedBamReader.rst
@@ -0,0 +1,11 @@
+BaiIndexedBamReader
+===================
+
+.. code-block:: cpp
+
+   #include <pbbam/BaiIndexedBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::BaiIndexedBamReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamFile.rst b/docs/source/api/BamFile.rst
new file mode 100644
index 0000000..c7e48fb
--- /dev/null
+++ b/docs/source/api/BamFile.rst
@@ -0,0 +1,11 @@
+BamFile
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/BamFile.h>
+
+.. doxygenclass:: PacBio::BAM::BamFile
+   :members:
+   :protected-members:
+   :undoc-members:
diff --git a/docs/source/api/BamHeader.rst b/docs/source/api/BamHeader.rst
new file mode 100644
index 0000000..6cf06af
--- /dev/null
+++ b/docs/source/api/BamHeader.rst
@@ -0,0 +1,11 @@
+BamHeader
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/BamHeader.h>
+
+.. doxygenclass:: PacBio::BAM::BamHeader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamReader.rst b/docs/source/api/BamReader.rst
new file mode 100644
index 0000000..e0b6f3c
--- /dev/null
+++ b/docs/source/api/BamReader.rst
@@ -0,0 +1,11 @@
+BamReader
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/BamReader.h>
+
+.. doxygenclass:: PacBio::BAM::BamReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecord.rst b/docs/source/api/BamRecord.rst
new file mode 100644
index 0000000..a749775
--- /dev/null
+++ b/docs/source/api/BamRecord.rst
@@ -0,0 +1,17 @@
+BamRecord
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/BamRecord.h>
+
+.. doxygenenum:: PacBio::BAM::ClipType
+
+.. doxygenenum:: PacBio::BAM::RecordType
+
+.. doxygenenum:: PacBio::BAM::FrameEncodingType
+
+.. doxygenclass:: PacBio::BAM::BamRecord
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecordBuilder.rst b/docs/source/api/BamRecordBuilder.rst
new file mode 100644
index 0000000..ce477b4
--- /dev/null
+++ b/docs/source/api/BamRecordBuilder.rst
@@ -0,0 +1,11 @@
+BamRecordBuilder
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/BamRecordBuilder.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordBuilder
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecordImpl.rst b/docs/source/api/BamRecordImpl.rst
new file mode 100644
index 0000000..92b6759
--- /dev/null
+++ b/docs/source/api/BamRecordImpl.rst
@@ -0,0 +1,11 @@
+BamRecordImpl
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/BamRecordImpl.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordImpl
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecordView.rst b/docs/source/api/BamRecordView.rst
new file mode 100644
index 0000000..2bc8fc4
--- /dev/null
+++ b/docs/source/api/BamRecordView.rst
@@ -0,0 +1,11 @@
+BamRecordView
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/BamRecord.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordView
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamTagCodec.rst b/docs/source/api/BamTagCodec.rst
new file mode 100644
index 0000000..9307421
--- /dev/null
+++ b/docs/source/api/BamTagCodec.rst
@@ -0,0 +1,11 @@
+BamTagCodec
+===========
+
+.. code-block:: cpp
+
+   #include <pbbam/BamTagCodec.h>
+
+.. doxygenclass:: PacBio::BAM::BamTagCodec
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamWriter.rst b/docs/source/api/BamWriter.rst
new file mode 100644
index 0000000..2e2951b
--- /dev/null
+++ b/docs/source/api/BamWriter.rst
@@ -0,0 +1,11 @@
+BamWriter
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/BamWriter.h>
+
+.. doxygenclass:: PacBio::BAM::BamWriter
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BarcodeLookupData.rst b/docs/source/api/BarcodeLookupData.rst
new file mode 100644
index 0000000..2dac47d
--- /dev/null
+++ b/docs/source/api/BarcodeLookupData.rst
@@ -0,0 +1,11 @@
+BarcodeLookupData
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeLookupData 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BarcodeQuery.rst b/docs/source/api/BarcodeQuery.rst
new file mode 100644
index 0000000..5836059
--- /dev/null
+++ b/docs/source/api/BarcodeQuery.rst
@@ -0,0 +1,11 @@
+BarcodeQuery
+============
+
+.. code-block:: cpp
+
+   #include <pbbam/BarcodeQuery.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BarcodeSet.rst b/docs/source/api/BarcodeSet.rst
new file mode 100644
index 0000000..a7ee056
--- /dev/null
+++ b/docs/source/api/BarcodeSet.rst
@@ -0,0 +1,11 @@
+BarcodeSet
+==========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BasicLookupData.rst b/docs/source/api/BasicLookupData.rst
new file mode 100644
index 0000000..b991fdf
--- /dev/null
+++ b/docs/source/api/BasicLookupData.rst
@@ -0,0 +1,11 @@
+BasicLookupData
+===============
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::BasicLookupData
+   :members:
+   :protected-members:
+   :undoc-members:
diff --git a/docs/source/api/Cigar.rst b/docs/source/api/Cigar.rst
new file mode 100644
index 0000000..cea30d5
--- /dev/null
+++ b/docs/source/api/Cigar.rst
@@ -0,0 +1,11 @@
+Cigar
+=====
+
+.. code-block:: cpp
+
+   #include <pbbam/Cigar.h>
+
+.. doxygenclass:: PacBio::BAM::Cigar
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/CigarOperation.rst b/docs/source/api/CigarOperation.rst
new file mode 100644
index 0000000..856400a
--- /dev/null
+++ b/docs/source/api/CigarOperation.rst
@@ -0,0 +1,13 @@
+CigarOperation
+==============
+
+.. code-block:: cpp
+
+   #include <pbbam/CigarOperation.h>
+   
+.. doxygenenum:: PacBio::BAM::CigarOperationType   
+
+.. doxygenclass:: PacBio::BAM::CigarOperation
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Compare.rst b/docs/source/api/Compare.rst
new file mode 100644
index 0000000..bb28a7e
--- /dev/null
+++ b/docs/source/api/Compare.rst
@@ -0,0 +1,8 @@
+Compare
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/Compare.h>
+
+.. doxygenfile:: Compare.h
\ No newline at end of file
diff --git a/docs/source/api/Config.rst b/docs/source/api/Config.rst
new file mode 100644
index 0000000..c4be9e4
--- /dev/null
+++ b/docs/source/api/Config.rst
@@ -0,0 +1,8 @@
+Config
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/Conifig.h>
+
+.. doxygenfile:: Config.h
\ No newline at end of file
diff --git a/docs/source/api/ConsensusAlignmentSet.rst b/docs/source/api/ConsensusAlignmentSet.rst
new file mode 100644
index 0000000..bc5a7e5
--- /dev/null
+++ b/docs/source/api/ConsensusAlignmentSet.rst
@@ -0,0 +1,11 @@
+ConsensusAlignmentSet
+=====================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ConsensusAlignmentSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ConsensusReadSet.rst b/docs/source/api/ConsensusReadSet.rst
new file mode 100644
index 0000000..846698d
--- /dev/null
+++ b/docs/source/api/ConsensusReadSet.rst
@@ -0,0 +1,11 @@
+ConsensusReadSet
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ConsensusReadSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ContigSet.rst b/docs/source/api/ContigSet.rst
new file mode 100644
index 0000000..96bb20b
--- /dev/null
+++ b/docs/source/api/ContigSet.rst
@@ -0,0 +1,11 @@
+ContigSet
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ContigSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/DataSet.rst b/docs/source/api/DataSet.rst
new file mode 100644
index 0000000..8b3f0db
--- /dev/null
+++ b/docs/source/api/DataSet.rst
@@ -0,0 +1,11 @@
+DataSet
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSet.h>
+
+.. doxygenclass:: PacBio::BAM::DataSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/DataSetBase.rst b/docs/source/api/DataSetBase.rst
new file mode 100644
index 0000000..f23fbb5
--- /dev/null
+++ b/docs/source/api/DataSetBase.rst
@@ -0,0 +1,11 @@
+DataSetBase
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::DataSetBase
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/DataSetMetadata.rst b/docs/source/api/DataSetMetadata.rst
new file mode 100644
index 0000000..eea260d
--- /dev/null
+++ b/docs/source/api/DataSetMetadata.rst
@@ -0,0 +1,11 @@
+DataSetMetadata
+===============
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::DataSetMetadata
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/EntireFileQuery.rst b/docs/source/api/EntireFileQuery.rst
new file mode 100644
index 0000000..4e7b86b
--- /dev/null
+++ b/docs/source/api/EntireFileQuery.rst
@@ -0,0 +1,11 @@
+EntireFileQuery
+===============
+
+.. code-block:: cpp
+
+   #include <pbbam/EntireFileQuery.h>
+
+.. doxygenclass:: PacBio::BAM::EntireFileQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ExtensionElement.rst b/docs/source/api/ExtensionElement.rst
new file mode 100644
index 0000000..980303e
--- /dev/null
+++ b/docs/source/api/ExtensionElement.rst
@@ -0,0 +1,11 @@
+ExtensionElement
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExtensionElement
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Extensions.rst b/docs/source/api/Extensions.rst
new file mode 100644
index 0000000..6704807
--- /dev/null
+++ b/docs/source/api/Extensions.rst
@@ -0,0 +1,11 @@
+Extensions
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Extensions
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ExternalResource.rst b/docs/source/api/ExternalResource.rst
new file mode 100644
index 0000000..03ab0d3
--- /dev/null
+++ b/docs/source/api/ExternalResource.rst
@@ -0,0 +1,11 @@
+ExternalResource
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExternalResource
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ExternalResources.rst b/docs/source/api/ExternalResources.rst
new file mode 100644
index 0000000..bd72ea4
--- /dev/null
+++ b/docs/source/api/ExternalResources.rst
@@ -0,0 +1,11 @@
+ExternalResources
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExternalResources
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/FileIndex.rst b/docs/source/api/FileIndex.rst
new file mode 100644
index 0000000..c117214
--- /dev/null
+++ b/docs/source/api/FileIndex.rst
@@ -0,0 +1,11 @@
+FileIndex
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::FileIndex
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/FileIndices.rst b/docs/source/api/FileIndices.rst
new file mode 100644
index 0000000..b25720c
--- /dev/null
+++ b/docs/source/api/FileIndices.rst
@@ -0,0 +1,11 @@
+FileIndices
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::FileIndices
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Filter.rst b/docs/source/api/Filter.rst
new file mode 100644
index 0000000..6faa8aa
--- /dev/null
+++ b/docs/source/api/Filter.rst
@@ -0,0 +1,11 @@
+Filter
+======
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Filter
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Filters.rst b/docs/source/api/Filters.rst
new file mode 100644
index 0000000..7ea1620
--- /dev/null
+++ b/docs/source/api/Filters.rst
@@ -0,0 +1,11 @@
+Filters
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Filters
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Frames.rst b/docs/source/api/Frames.rst
new file mode 100644
index 0000000..cf260f2
--- /dev/null
+++ b/docs/source/api/Frames.rst
@@ -0,0 +1,11 @@
+Frames
+======
+
+.. code-block:: cpp
+
+   #include <pbbam/Frames.h>
+
+.. doxygenclass:: PacBio::BAM::Frames
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/GenomicInterval.rst b/docs/source/api/GenomicInterval.rst
new file mode 100644
index 0000000..811b83a
--- /dev/null
+++ b/docs/source/api/GenomicInterval.rst
@@ -0,0 +1,11 @@
+GenomicInterval
+===============
+
+.. code-block:: cpp
+
+   #include <pbbam/GenomicInterval.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicInterval
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/GenomicIntervalCompositeBamReader.rst b/docs/source/api/GenomicIntervalCompositeBamReader.rst
new file mode 100644
index 0000000..f658621
--- /dev/null
+++ b/docs/source/api/GenomicIntervalCompositeBamReader.rst
@@ -0,0 +1,11 @@
+GenomicIntervalCompositeBamReader
+=================================
+
+.. code-block:: cpp
+
+   #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicIntervalCompositeBamReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/GenomicIntervalQuery.rst b/docs/source/api/GenomicIntervalQuery.rst
new file mode 100644
index 0000000..7bae558
--- /dev/null
+++ b/docs/source/api/GenomicIntervalQuery.rst
@@ -0,0 +1,11 @@
+GenomicIntervalQuery
+====================
+
+.. code-block:: cpp
+
+   #include <pbbam/GenomicIntervalQuery.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicIntervalQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/HdfSubreadSet.rst b/docs/source/api/HdfSubreadSet.rst
new file mode 100644
index 0000000..88bf008
--- /dev/null
+++ b/docs/source/api/HdfSubreadSet.rst
@@ -0,0 +1,11 @@
+HdfSubreadSet
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::HdfSubreadSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/IndexResultBlock.rst b/docs/source/api/IndexResultBlock.rst
new file mode 100644
index 0000000..fac804a
--- /dev/null
+++ b/docs/source/api/IndexResultBlock.rst
@@ -0,0 +1,17 @@
+IndexResultBlock
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiBasicTypes.h>
+
+.. doxygenstruct:: PacBio::BAM::IndexResultBlock
+   :members:
+   :protected-members:
+   :undoc-members:
+   
+.. doxygentypedef:: PacBio::BAM::IndexResultBlocks
+
+.. doxygentypedef:: PacBio::BAM::IndexList
+   
+.. doxygentypedef:: PacBio::BAM::IndexRange
\ No newline at end of file
diff --git a/docs/source/api/IndexedFastaReader.rst b/docs/source/api/IndexedFastaReader.rst
new file mode 100644
index 0000000..7c46064
--- /dev/null
+++ b/docs/source/api/IndexedFastaReader.rst
@@ -0,0 +1,11 @@
+IndexedFastaReader
+==================
+
+.. code-block:: cpp
+
+   #include <pbbam/IndexedFastaReader.h>
+
+.. doxygenclass:: PacBio::BAM::IndexedFastaReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Interval.rst b/docs/source/api/Interval.rst
new file mode 100644
index 0000000..f506a19
--- /dev/null
+++ b/docs/source/api/Interval.rst
@@ -0,0 +1,11 @@
+Interval
+========
+
+.. code-block:: cpp
+
+   #include <pbbam/Interval.h>
+
+.. doxygenclass:: PacBio::BAM::Interval
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/InvalidSequencingChemistryException.rst b/docs/source/api/InvalidSequencingChemistryException.rst
new file mode 100644
index 0000000..d521ecc
--- /dev/null
+++ b/docs/source/api/InvalidSequencingChemistryException.rst
@@ -0,0 +1,11 @@
+InvalidSequencingChemistryException
+===================================
+
+.. code-block:: cpp
+
+   #include <pbbam/exception/InvalidSequencingChemistryException.h>
+
+.. doxygenclass:: PacBio::BAM::InvalidSequencingChemistryException
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/LocalContextFlags.rst b/docs/source/api/LocalContextFlags.rst
new file mode 100644
index 0000000..8cd63be
--- /dev/null
+++ b/docs/source/api/LocalContextFlags.rst
@@ -0,0 +1,8 @@
+LocalContextFlags
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/LocalContextFlags.h>
+
+.. doxygenenum:: PacBio::BAM::LocalContextFlags
diff --git a/docs/source/api/MappedLookupData.rst b/docs/source/api/MappedLookupData.rst
new file mode 100644
index 0000000..7cf3c8b
--- /dev/null
+++ b/docs/source/api/MappedLookupData.rst
@@ -0,0 +1,11 @@
+MappedLookupData
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::MappedLookupData 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/NamespaceInfo.rst b/docs/source/api/NamespaceInfo.rst
new file mode 100644
index 0000000..c7613ec
--- /dev/null
+++ b/docs/source/api/NamespaceInfo.rst
@@ -0,0 +1,11 @@
+NamespaceInfo
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetXsd.h>
+
+.. doxygenclass:: PacBio::BAM::NamespaceInfo
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/NamespaceRegistry.rst b/docs/source/api/NamespaceRegistry.rst
new file mode 100644
index 0000000..2f8f9a7
--- /dev/null
+++ b/docs/source/api/NamespaceRegistry.rst
@@ -0,0 +1,11 @@
+NamespaceRegistry
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetXsd.h>
+
+.. doxygenclass:: PacBio::BAM::NamespaceRegistry
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/OrderedLookup.rst b/docs/source/api/OrderedLookup.rst
new file mode 100644
index 0000000..d5b81b6
--- /dev/null
+++ b/docs/source/api/OrderedLookup.rst
@@ -0,0 +1,11 @@
+OrderedLookup
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::OrderedLookup
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Orientation.rst b/docs/source/api/Orientation.rst
new file mode 100644
index 0000000..e9bbc42
--- /dev/null
+++ b/docs/source/api/Orientation.rst
@@ -0,0 +1,8 @@
+Orientation
+===========
+
+.. code-block:: cpp
+
+   #include <pbbam/Orientation.h>
+
+.. doxygenenum:: PacBio::BAM::Orientation
diff --git a/docs/source/api/ParentTool.rst b/docs/source/api/ParentTool.rst
new file mode 100644
index 0000000..e2ffa1b
--- /dev/null
+++ b/docs/source/api/ParentTool.rst
@@ -0,0 +1,11 @@
+ParentTool
+==========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ParentTool
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiBuilder.rst b/docs/source/api/PbiBuilder.rst
new file mode 100644
index 0000000..d795d0f
--- /dev/null
+++ b/docs/source/api/PbiBuilder.rst
@@ -0,0 +1,11 @@
+PbiBuilder
+==========
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiBuilder.h>
+
+.. doxygenclass:: PacBio::BAM::PbiBuilder
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFile.rst b/docs/source/api/PbiFile.rst
new file mode 100644
index 0000000..5a8b85a
--- /dev/null
+++ b/docs/source/api/PbiFile.rst
@@ -0,0 +1,14 @@
+PbiFile
+=======
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiFile.h>
+
+.. doxygenenum:: PacBio::BAM::PbiFile::Section
+
+.. doxygentypedef:: PacBio::BAM::PbiFile::Sections
+
+.. doxygenenum:: PacBio::BAM::PbiFile::VersionEnum
+
+.. doxygenfunction:: PacBio::BAM::PbiFile::CreateFrom
diff --git a/docs/source/api/PbiFilter.rst b/docs/source/api/PbiFilter.rst
new file mode 100644
index 0000000..261498b
--- /dev/null
+++ b/docs/source/api/PbiFilter.rst
@@ -0,0 +1,11 @@
+PbiFilter
+=========
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiFilter.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilter
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFilterCompositeBamReader.rst b/docs/source/api/PbiFilterCompositeBamReader.rst
new file mode 100644
index 0000000..7a69df3
--- /dev/null
+++ b/docs/source/api/PbiFilterCompositeBamReader.rst
@@ -0,0 +1,11 @@
+PbiFilterCompositeBamReader
+===========================
+
+.. code-block:: cpp
+
+   #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilterCompositeBamReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFilterQuery.rst b/docs/source/api/PbiFilterQuery.rst
new file mode 100644
index 0000000..75bbc12
--- /dev/null
+++ b/docs/source/api/PbiFilterQuery.rst
@@ -0,0 +1,11 @@
+PbiFilterQuery
+==============
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiFilterQuery.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilterQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFilterTypes.rst b/docs/source/api/PbiFilterTypes.rst
new file mode 100644
index 0000000..052389b
--- /dev/null
+++ b/docs/source/api/PbiFilterTypes.rst
@@ -0,0 +1,8 @@
+PbiFilterTypes
+==============
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiFilterTypes.h>
+
+.. doxygenfile:: PbiFilterTypes.h
\ No newline at end of file
diff --git a/docs/source/api/PbiIndex.rst b/docs/source/api/PbiIndex.rst
new file mode 100644
index 0000000..811bc68
--- /dev/null
+++ b/docs/source/api/PbiIndex.rst
@@ -0,0 +1,11 @@
+PbiIndex
+========
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiIndex.h>
+
+.. doxygenclass:: PacBio::BAM::PbiIndex
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiIndexedBamReader.rst b/docs/source/api/PbiIndexedBamReader.rst
new file mode 100644
index 0000000..5450c8a
--- /dev/null
+++ b/docs/source/api/PbiIndexedBamReader.rst
@@ -0,0 +1,11 @@
+PbiIndexedBamReader
+===================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiIndexedBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::PbiIndexedBamReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawBarcodeData.rst b/docs/source/api/PbiRawBarcodeData.rst
new file mode 100644
index 0000000..c72ebfb
--- /dev/null
+++ b/docs/source/api/PbiRawBarcodeData.rst
@@ -0,0 +1,11 @@
+PbiRawBarcodeData
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawBarcodeData
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawBasicData.rst b/docs/source/api/PbiRawBasicData.rst
new file mode 100644
index 0000000..2282387
--- /dev/null
+++ b/docs/source/api/PbiRawBasicData.rst
@@ -0,0 +1,11 @@
+PbiRawBasicData
+===============
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawBasicData
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawData.rst b/docs/source/api/PbiRawData.rst
new file mode 100644
index 0000000..1a974e8
--- /dev/null
+++ b/docs/source/api/PbiRawData.rst
@@ -0,0 +1,11 @@
+PbiRawData
+==========
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawData
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawMappedData.rst b/docs/source/api/PbiRawMappedData.rst
new file mode 100644
index 0000000..42e1de1
--- /dev/null
+++ b/docs/source/api/PbiRawMappedData.rst
@@ -0,0 +1,11 @@
+PbiRawMappedData
+================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawMappedData
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawReferenceData.rst b/docs/source/api/PbiRawReferenceData.rst
new file mode 100644
index 0000000..460cde4
--- /dev/null
+++ b/docs/source/api/PbiRawReferenceData.rst
@@ -0,0 +1,11 @@
+PbiRawReferenceData
+===================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawReferenceData
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiReferenceEntry.rst b/docs/source/api/PbiReferenceEntry.rst
new file mode 100644
index 0000000..472e586
--- /dev/null
+++ b/docs/source/api/PbiReferenceEntry.rst
@@ -0,0 +1,11 @@
+PbiReferenceEntry
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiReferenceEntry
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Position.rst b/docs/source/api/Position.rst
new file mode 100644
index 0000000..3c945f2
--- /dev/null
+++ b/docs/source/api/Position.rst
@@ -0,0 +1,10 @@
+Position
+========
+
+.. code-block:: cpp
+
+   #include <pbbam/Position.h>
+
+.. doxygentypedef:: PacBio::BAM::Position
+
+.. doxygenvariable:: PacBio::BAM::UnmappedPosition
\ No newline at end of file
diff --git a/docs/source/api/ProgramInfo.rst b/docs/source/api/ProgramInfo.rst
new file mode 100644
index 0000000..b58c93a
--- /dev/null
+++ b/docs/source/api/ProgramInfo.rst
@@ -0,0 +1,11 @@
+ProgramInfo
+===========
+
+.. code-block:: cpp
+
+   #include <pbbam/ProgramInfo.h>
+
+.. doxygenclass:: PacBio::BAM::ProgramInfo
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/QNameQuery.rst b/docs/source/api/QNameQuery.rst
new file mode 100644
index 0000000..b549436
--- /dev/null
+++ b/docs/source/api/QNameQuery.rst
@@ -0,0 +1,11 @@
+QNameQuery
+==========
+
+.. code-block:: cpp
+
+   #include <pbbam/QNameQuery.h>
+
+.. doxygenclass:: PacBio::BAM::QNameQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/QualityValue.rst b/docs/source/api/QualityValue.rst
new file mode 100644
index 0000000..3520c5a
--- /dev/null
+++ b/docs/source/api/QualityValue.rst
@@ -0,0 +1,11 @@
+QualityValue
+============
+
+.. code-block:: cpp
+
+   #include <pbbam/QualityValue.h>
+
+.. doxygenclass:: PacBio::BAM::QualityValue
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/QualityValues.rst b/docs/source/api/QualityValues.rst
new file mode 100644
index 0000000..8f6dfa5
--- /dev/null
+++ b/docs/source/api/QualityValues.rst
@@ -0,0 +1,11 @@
+QualityValues
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/QualityValues.h>
+
+.. doxygenclass:: PacBio::BAM::QualityValues
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ReadAccuracyQuery.rst b/docs/source/api/ReadAccuracyQuery.rst
new file mode 100644
index 0000000..abfd1e6
--- /dev/null
+++ b/docs/source/api/ReadAccuracyQuery.rst
@@ -0,0 +1,11 @@
+ReadAccuracyQuery
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/ReadAccuracyQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ReadAccuracyQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ReadGroupInfo.rst b/docs/source/api/ReadGroupInfo.rst
new file mode 100644
index 0000000..7fb4f69
--- /dev/null
+++ b/docs/source/api/ReadGroupInfo.rst
@@ -0,0 +1,21 @@
+ReadGroupInfo
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/ReadGroupInfo.h>
+
+.. doxygenenum:: PacBio::BAM::BaseFeature
+
+.. doxygenenum:: PacBio::BAM::FrameCodec
+
+.. doxygenenum:: PacBio::BAM::BarcodeModeType
+
+.. doxygenenum:: PacBio::BAM::BarcodeQualityType
+
+.. doxygenclass:: PacBio::BAM::ReadGroupInfo
+   :members:
+   :protected-members:
+   :undoc-members:
+   
+.. doxygenfunction:: PacBio::BAM::MakeReadGroupId
\ No newline at end of file
diff --git a/docs/source/api/ReferenceLookupData.rst b/docs/source/api/ReferenceLookupData.rst
new file mode 100644
index 0000000..20316fc
--- /dev/null
+++ b/docs/source/api/ReferenceLookupData.rst
@@ -0,0 +1,11 @@
+ReferenceLookupData
+===================
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::ReferenceLookupData
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ReferenceSet.rst b/docs/source/api/ReferenceSet.rst
new file mode 100644
index 0000000..22e4703
--- /dev/null
+++ b/docs/source/api/ReferenceSet.rst
@@ -0,0 +1,11 @@
+ReferenceSet
+============
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ReferenceSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SamTagCodec.rst b/docs/source/api/SamTagCodec.rst
new file mode 100644
index 0000000..4f8d65d
--- /dev/null
+++ b/docs/source/api/SamTagCodec.rst
@@ -0,0 +1,11 @@
+SamTagCodec
+===========
+
+.. code-block:: cpp
+
+   #include <pbbam/SamTagCodec.h>
+
+.. doxygenclass:: PacBio::BAM::SamTagCodec
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SequenceInfo.rst b/docs/source/api/SequenceInfo.rst
new file mode 100644
index 0000000..393d5bb
--- /dev/null
+++ b/docs/source/api/SequenceInfo.rst
@@ -0,0 +1,11 @@
+SequenceInfo
+============
+
+.. code-block:: cpp
+
+   #include <pbbam/SequenceInfo.h>
+
+.. doxygenclass:: PacBio::BAM::SequenceInfo
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SequentialCompositeBamReader.rst b/docs/source/api/SequentialCompositeBamReader.rst
new file mode 100644
index 0000000..31ed3b1
--- /dev/null
+++ b/docs/source/api/SequentialCompositeBamReader.rst
@@ -0,0 +1,11 @@
+SequentialCompositeBamReader
+============================
+
+.. code-block:: cpp
+
+   #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::SequentialCompositeBamReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Strand.rst b/docs/source/api/Strand.rst
new file mode 100644
index 0000000..4978f72
--- /dev/null
+++ b/docs/source/api/Strand.rst
@@ -0,0 +1,8 @@
+Strand
+======
+
+.. code-block:: cpp
+
+   #include <pbbam/Strand.h>
+
+.. doxygenenum:: PacBio::BAM::Strand 
diff --git a/docs/source/api/SubDataSets.rst b/docs/source/api/SubDataSets.rst
new file mode 100644
index 0000000..d179065
--- /dev/null
+++ b/docs/source/api/SubDataSets.rst
@@ -0,0 +1,11 @@
+SubDataSets
+===========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::SubDataSets
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SubreadLengthQuery.rst b/docs/source/api/SubreadLengthQuery.rst
new file mode 100644
index 0000000..23000b3
--- /dev/null
+++ b/docs/source/api/SubreadLengthQuery.rst
@@ -0,0 +1,11 @@
+SubreadLengthQuery
+==================
+
+.. code-block:: cpp
+
+   #include <pbbam/SubreadLengthQuery.h>
+
+.. doxygenclass:: PacBio::BAM::SubreadLengthQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SubreadSet.rst b/docs/source/api/SubreadSet.rst
new file mode 100644
index 0000000..bfc3c13
--- /dev/null
+++ b/docs/source/api/SubreadSet.rst
@@ -0,0 +1,11 @@
+SubreadSet
+==========
+
+.. code-block:: cpp
+
+   #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::SubreadSet 
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Tag.rst b/docs/source/api/Tag.rst
new file mode 100644
index 0000000..50b85c7
--- /dev/null
+++ b/docs/source/api/Tag.rst
@@ -0,0 +1,15 @@
+Tag
+===
+
+.. code-block:: cpp
+
+   #include <pbbam/Tag.h>
+
+.. doxygenenum:: PacBio::BAM::TagDataType
+
+.. doxygenenum:: PacBio::BAM::TagModifier
+
+.. doxygenclass:: PacBio::BAM::Tag
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/TagCollection.rst b/docs/source/api/TagCollection.rst
new file mode 100644
index 0000000..1314b13
--- /dev/null
+++ b/docs/source/api/TagCollection.rst
@@ -0,0 +1,11 @@
+TagCollection
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/TagCollection.h>
+
+.. doxygenclass:: PacBio::BAM::TagCollection
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/UnorderedLookup.rst b/docs/source/api/UnorderedLookup.rst
new file mode 100644
index 0000000..718e4e7
--- /dev/null
+++ b/docs/source/api/UnorderedLookup.rst
@@ -0,0 +1,11 @@
+UnorderedLookup
+===============
+
+.. code-block:: cpp
+
+   #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::UnorderedLookup
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualPolymeraseBamRecord.rst b/docs/source/api/VirtualPolymeraseBamRecord.rst
new file mode 100644
index 0000000..06d5531
--- /dev/null
+++ b/docs/source/api/VirtualPolymeraseBamRecord.rst
@@ -0,0 +1,11 @@
+VirtualPolymeraseBamRecord
+==========================
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseBamRecord
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualPolymeraseCompositeReader.rst b/docs/source/api/VirtualPolymeraseCompositeReader.rst
new file mode 100644
index 0000000..e6cab4e
--- /dev/null
+++ b/docs/source/api/VirtualPolymeraseCompositeReader.rst
@@ -0,0 +1,11 @@
+VirtualPolymeraseCompositeReader
+================================
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseCompositeReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualPolymeraseReader.rst b/docs/source/api/VirtualPolymeraseReader.rst
new file mode 100644
index 0000000..14a46e8
--- /dev/null
+++ b/docs/source/api/VirtualPolymeraseReader.rst
@@ -0,0 +1,11 @@
+VirtualPolymeraseReader
+=======================
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/VirtualPolymeraseReader.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualRegion.rst b/docs/source/api/VirtualRegion.rst
new file mode 100644
index 0000000..7a09846
--- /dev/null
+++ b/docs/source/api/VirtualRegion.rst
@@ -0,0 +1,11 @@
+VirtualRegion
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/VirtualRegion.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualRegion
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualRegionType.rst b/docs/source/api/VirtualRegionType.rst
new file mode 100644
index 0000000..4279200
--- /dev/null
+++ b/docs/source/api/VirtualRegionType.rst
@@ -0,0 +1,8 @@
+VirtualRegionType
+=================
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/VirtualRegionType.h>
+
+.. doxygenenum:: PacBio::BAM::VirtualRegionType
diff --git a/docs/source/api/VirtualRegionTypeMap.rst b/docs/source/api/VirtualRegionTypeMap.rst
new file mode 100644
index 0000000..eebe637
--- /dev/null
+++ b/docs/source/api/VirtualRegionTypeMap.rst
@@ -0,0 +1,11 @@
+VirtualRegionTypeMap
+====================
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/VirtualRegionTypeMap.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualRegionTypeMap
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ZmwGroupQuery.rst b/docs/source/api/ZmwGroupQuery.rst
new file mode 100644
index 0000000..01fc18a
--- /dev/null
+++ b/docs/source/api/ZmwGroupQuery.rst
@@ -0,0 +1,11 @@
+ZmwGroupQuery
+=============
+
+.. code-block:: cpp
+
+   #include <pbbam/ZmwGroupQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwGroupQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ZmwQuery.rst b/docs/source/api/ZmwQuery.rst
new file mode 100644
index 0000000..375fcb0
--- /dev/null
+++ b/docs/source/api/ZmwQuery.rst
@@ -0,0 +1,11 @@
+ZmwQuery
+========
+
+.. code-block:: cpp
+
+   #include <pbbam/ZmwQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwQuery
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ZmwWhitelistVirtualReader.rst b/docs/source/api/ZmwWhitelistVirtualReader.rst
new file mode 100644
index 0000000..95d2d1a
--- /dev/null
+++ b/docs/source/api/ZmwWhitelistVirtualReader.rst
@@ -0,0 +1,11 @@
+ZmwWhitelistVirtualReader
+=========================
+
+.. code-block:: cpp
+
+   #include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwWhitelistVirtualReader
+   :members:
+   :protected-members:
+   :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
new file mode 100644
index 0000000..354c0de
--- /dev/null
+++ b/docs/source/api_reference.rst
@@ -0,0 +1,12 @@
+.. _api_reference:
+
+C++ API Reference
+=================
+
+Watch this space for more recipes & how-tos. 
+
+.. toctree::
+   :maxdepth: 1
+   :glob:
+
+   api/*
diff --git a/docs/source/commandline_utilities.rst b/docs/source/commandline_utilities.rst
new file mode 100644
index 0000000..7f1bdaf
--- /dev/null
+++ b/docs/source/commandline_utilities.rst
@@ -0,0 +1,15 @@
+.. _command_line:
+
+Command Line Utilities
+======================
+
+In addition to the main library and wrappers, pbbam also provides a few basic
+utilities for working with PacBio indices (".pbi" files).
+
+.. toctree::
+   :maxdepth: 1
+
+   tools/bam2sam
+   tools/pbindex
+   tools/pbindexdump
+   tools/pbmerge
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100755
index 0000000..13a512d
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+#
+# pbbam documentation build configuration file, created by
+# sphinx-quickstart on Fri Dec  4 10:08:52 2015.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+import shlex
+import re
+import subprocess
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# get RTD to run doxygen first, per http://breathe.readthedocs.org/en/latest/readthedocs.html
+# but... we generate our actual Doxyfile via CMake in a normal build,
+# so we need to create one here, subbing actual values
+read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
+if read_the_docs_build:
+
+    # fetch directory info
+    this_dir = os.path.abspath(os.getcwd())
+    docs_dir = os.path.abspath(os.path.join(this_dir, '..'))
+    root_dir = os.path.abspath(os.path.join(docs_dir, '..'))
+    include_dir = os.path.abspath(os.path.join(root_dir, 'include'))
+
+    # get project version
+    version = ''
+    with open(os.path.abspath(os.path.join(root_dir, 'CMakeLists.txt')), 'r') as cmakeFile:
+        for line in cmakeFile:
+            if line.startswith('project'):
+                version = re.search(r'VERSION\s*([\d.]+)', line).group(1)
+                break
+
+    # read Doxyfile.in, replace markers with real values, and write Doxyfile
+    inDoxyfile = open(os.path.abspath(os.path.join(docs_dir, 'Doxyfile.in')), 'r')
+    configIn   = inDoxyfile.read()
+    configOut  = re.sub('@PacBioBAM_NAME@',       'pbbam', \
+                 re.sub('@PacBioBAM_VERSION@',    version, \
+                 re.sub('@PacBioBAM_DocsDir@',    docs_dir, \
+                 re.sub('@PacBioBAM_IncludeDir@', include_dir, configIn)))) 
+    outDoxyfile = open(os.path.abspath(os.path.join(docs_dir, 'Doxyfile')), 'w')
+    #print(configOut, outDoxyfile)
+    print >>outDoxyfile, configOut
+    outDoxyfile.close()
+
+    # now run Doxygen
+    subprocess.call('cd ..; doxygen', shell=True)
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['breathe']
+#extensions = [
+#    'sphinx.ext.autodoc',
+ #   'sphinx.ext.coverage',
+ #   'breathe',
+#]
+
+# Setup Breathe extension varialbes
+breathe_projects = { 'pbbam' : os.path.join(os.getcwd(), '..', 'xml') + os.path.sep }
+breathe_default_project = 'pbbam'
+breathe_default_members = ('members', 'undoc-members')
+breathe_implementation_filename_extensions = [ '.cpp', '.inl' ]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pbbam'
+copyright = u'2015, Derek Barnett'
+author = u'Derek Barnett'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.4.5'
+# The full version, including alpha/beta/rc tags.
+release = '0.4.5'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'pacbio-theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ['.']
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pbbamdoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+
+# Latex figure (float) alignment
+#'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  (master_doc, 'pbbam.tex', u'pbbam Documentation',
+   u'Derek Barnett', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'pbbam', u'pbbam Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  (master_doc, 'pbbam', u'pbbam Documentation',
+   author, 'pbbam', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
new file mode 100644
index 0000000..6860f9f
--- /dev/null
+++ b/docs/source/getting_started.rst
@@ -0,0 +1,144 @@
+
+.. _getting_started:
+
+Getting Started
+===============
+
+.. _getting_started-requirements:
+
+Requirements
+------------
+
+These components will almost certainly already be on your system. 
+ 
+* `gcc`_ (4.8+) OR `clang`_ (v3.1+)
+* pthreads
+* zlib
+
+Double-check your compiler version, to be sure it is compatible.
+
+.. code-block:: console
+
+   $ g++ -v    
+   $ clang -v  
+
+Additional requirements:
+
+* `Boost`_ (1.55+)
+* `CMake`_ (3.0+)
+* `Google Test`_
+* `htslib`_ (PacBio fork)
+
+For additional languages:
+
+* `SWIG`_ (3.0.5+)
+
+For building API documentation locally:
+
+* `Doxygen`_
+
+For maximal convenience, install htslib and google test in the same parent directory you plan to install pbbam.
+
+.. _Boost: http://www.boost.org/
+.. _clang: http://clang.llvm.org/
+.. _CMake: https://cmake.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+.. _gcc: https://gcc.gnu.org/
+.. _Google Test: https://github.com/google/googletest
+.. _htslib: https://github.com/PacificBiosciences/htslib.git 
+.. _SWIG: http://www.swig.org/
+
+.. _getting_started-build:
+
+Clone & Build
+-------------
+
+.. note::
+
+   The following steps are for building the C++ library and command-line utilities. 
+   If you are integrating pbbam into a C#, Python, or R project, take a look at the 
+   instructions for :ref:`additional languages <swig_bindings>`.
+
+The basic steps for obtaining pbbam and building it from source are as follows:
+
+.. code-block:: console
+
+   $ git clone https://github.com/PacificBiosciences/pbbam.git
+   $ cd pbbam
+   $ mkdir build
+   $ cd build
+   $ cmake ..
+   $ make -j 4    # compiles using 4 threads
+
+Output:
+
+  * Library   : <pbbam_root>/lib
+  * Headers   : <pbbam_root>/include
+  * Utilities : <pbbam_root>/bin
+ 
+You may need to set a few options on the cmake command, to point to dependencies' install locations. 
+Common installation-related options include:
+
+  * HTSLIB_ROOTDIR
+  * GTEST_SRC_DIR
+  
+Add these using the '-D' argument, like this:
+
+.. code-block:: console
+
+   $ cmake .. -DHTSLIB_ROOTDIR="path/to/htslib"
+ 
+To run the test suite, run:
+
+.. code-block:: console
+
+   $ make test
+
+To build a local copy of the (Doxygen-style) API documentation, run:
+
+.. code-block:: console
+
+   $ make doc
+   
+And then open <pbbam_root>/docs/html/index.html in your favorite browser.
+
+.. _getting_started-integrate:
+
+Integrate
+---------
+
+CMake-based projects
+````````````````````
+
+For CMake-based projects that will "ship with" or otherwise live alongside pbbam, you can 
+use the approach described here.
+
+Before defining your library or executable, add the following:
+
+.. code-block:: cmake
+
+   add_subdirectory(<path/to/pbbam> external/build/pbbam)
+
+When it's time to run "make" this will ensure that pbbam will be built, inside your own project's 
+build directory. After this point in the CMakeLists.txt file(s), a few variables will be available 
+that can be used to setup your include paths and library linking targets:
+
+.. code-block:: cmake
+
+   include_directories( 
+       ${PacBioBAM_INCLUDE_DIRS} 
+       # other includes that your project needs
+   )
+
+   add_executable(foo)
+   
+   target_link_libraries(foo 
+       ${PacBioBAM_LIBRARIES}
+       # other libs that your project needs
+   )
+
+Non-CMake projects
+``````````````````
+
+If you're using something other than CMake for your project's build system, then you need to point 
+it to pbbam's include directory & library, as well as those of its dependencies (primarily htslib). 
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..426c3c5
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,33 @@
+.. pbbam documentation master file, created by
+   sphinx-quickstart on Fri Dec  4 10:08:52 2015.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+.. _home:
+
+pbbam documentation
+===================
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard BAM 
+format for (both aligned and unaligned) basecall data files. We have also formulated 
+a BAM companion file format (bam.pbi) enabling fast access to a richer set of per-read 
+information as well as compatibility for software built around the legacy cmp.h5 format.
+
+The **pbbam** software package provides components to create, query, & edit PacBio BAM
+files and associated indices. These components include a core C++ library, bindings for 
+additional languages, and command-line utilities.
+
+.. toctree::
+   :maxdepth: 1
+
+   getting_started
+   api_reference
+   swig_bindings
+   commandline_utilities
+
+
+Search:
+
+* :ref:`genindex`
+* :ref:`search`
+
diff --git a/docs/source/pacbio-theme/static/headerGradient.jpg b/docs/source/pacbio-theme/static/headerGradient.jpg
new file mode 100644
index 0000000..883f147
Binary files /dev/null and b/docs/source/pacbio-theme/static/headerGradient.jpg differ
diff --git a/docs/source/pacbio-theme/static/pacbio.css b/docs/source/pacbio-theme/static/pacbio.css
new file mode 100644
index 0000000..b4ab87f
--- /dev/null
+++ b/docs/source/pacbio-theme/static/pacbio.css
@@ -0,0 +1,238 @@
+/**
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+ 
+ at import url("basic.css");
+ 
+/* -- page layout ----------------------------------------------------------- */
+ 
+body {
+    font-family: Arial, sans-serif;
+    font-size: 100%;
+    background-color: #555;
+    color: #555;
+    margin: 0;
+    padding: 0;
+    min-width: 500px;
+    max-width: 956px;
+    margin: 0 auto;
+}
+
+div.documentwrapper {
+    float: left;
+    width: 100%;
+}
+
+div.bodywrapper {
+    margin: 0 0 0 230px;
+}
+
+hr{
+    border: 1px solid #B1B4B6;
+    
+}
+ 
+div.document {
+    background-color: #eee;
+}
+ 
+div.body {
+    background-color: #ffffff;
+    color: #3E4349;
+    padding: 30px 30px 30px 30px;
+    font-size: 0.8em;
+}
+ 
+div.footer {
+    color: #555;
+	background-color: #fff;
+    padding: 13px 0;
+    text-align: center;
+    font-size: 75%;
+
+}
+div.footer a {
+    color: #444;
+    text-decoration: underline;
+}
+ 
+div.related {
+    background: #fff url(headerGradient.jpg);
+    line-height: 80px;
+    color: #fff;
+    font-size: 0.80em;
+    height: 79px;
+    z-index: -1;
+}
+
+div.related ul {
+    background: url(pacbioLogo.png) 10px no-repeat;
+    padding: 0 0 0 200px;
+}
+ 
+div.related a {
+    color: #E2F3CC;
+}
+ 
+div.sphinxsidebar {
+    font-size: 0.75em;
+    line-height: 1.5em;
+}
+
+div.sphinxsidebarwrapper{
+    padding: 20px 0;
+}
+ 
+div.sphinxsidebar h3,
+div.sphinxsidebar h4 {
+    font-family: Arial, sans-serif;
+    color: #222;
+    font-size: 1.2em;
+    font-weight: bold;
+    margin: 0;
+    padding: 5px 10px 0 10px;
+}
+
+div.sphinxsidebar h4{
+    font-size: 1.1em;
+}
+ 
+div.sphinxsidebar h3 a {
+    color: #444;
+}
+ 
+ 
+div.sphinxsidebar p {
+    color: #888;
+    padding: 0px 20px;
+	margin-top: 5px;
+}
+ 
+div.sphinxsidebar p.topless {
+}
+ 
+div.sphinxsidebar ul {
+    margin: 5px 20px 10px 20px;
+    padding: 0;
+    color: #000;
+}
+ 
+div.sphinxsidebar a {
+    color: #444;
+}
+ 
+div.sphinxsidebar input {
+    border: 1px solid #ccc;
+    font-family: sans-serif;
+    font-size: 1em;
+}
+
+div.sphinxsidebar input[type=text]{
+    margin-left: 20px;
+}
+ 
+/* -- body styles ----------------------------------------------------------- */
+ 
+a {
+    color: #005B81;
+    text-decoration: none;
+}
+ 
+a:hover {
+    color: #E32E00;
+    text-decoration: underline;
+}
+ 
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+    font-family: Arial, sans-serif;
+    font-weight: bold;
+    color: #264868;
+    margin: 30px 0px 10px 0px;
+    padding: 5px 0 5px 0px;
+}
+ 
+div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 180%; font-weight: normal; }
+div.body h2 { font-size: 125%; }
+div.body h3 { font-size: 110%; }
+div.body h4 { font-size: 100%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+ 
+a.headerlink {
+    color: #c60f0f;
+    font-size: 0.8em;
+    padding: 0 4px 0 4px;
+    text-decoration: none;
+}
+ 
+a.headerlink:hover {
+    background-color: #c60f0f;
+    color: white;
+}
+ 
+div.body p, div.body dd, div.body li {
+    line-height: 1.5em;
+    font-size: 1em;
+}
+ 
+div.admonition p.admonition-title + p {
+    display: inline;
+}
+
+div.highlight{
+    background-color: white;
+}
+
+div.note {
+    background-color: #eee;
+    border: 1px solid #ccc;
+}
+ 
+div.seealso {
+    background-color: #ffc;
+    border: 1px solid #ff6;
+}
+ 
+div.topic {
+    background-color: #eee;
+}
+ 
+div.warning {
+    background-color: #ffe4e4;
+    border: 1px solid #f66;
+}
+ 
+p.admonition-title {
+    display: inline;
+}
+ 
+p.admonition-title:after {
+    content: ":";
+}
+ 
+pre {
+    padding: 10px;
+    background-color: White;
+    color: #222;
+    line-height: 1.2em;
+    border: 1px solid #C6C9CB;
+    font-size: 1.2em;
+    margin: 1.5em 0 1.5em 0;
+    -webkit-box-shadow: 1px 1px 1px #d8d8d8;
+    -moz-box-shadow: 1px 1px 1px #d8d8d8;
+}
+ 
+tt {
+    background-color: #ecf0f3;
+    color: #222;
+    padding: 1px 2px;
+    font-size: 1.2em;
+    font-family: monospace;
+}
+
diff --git a/docs/source/pacbio-theme/static/pacbioLogo.png b/docs/source/pacbio-theme/static/pacbioLogo.png
new file mode 100644
index 0000000..b2e4887
Binary files /dev/null and b/docs/source/pacbio-theme/static/pacbioLogo.png differ
diff --git a/docs/source/pacbio-theme/static/pygments.css b/docs/source/pacbio-theme/static/pygments.css
new file mode 100644
index 0000000..4588cde
--- /dev/null
+++ b/docs/source/pacbio-theme/static/pygments.css
@@ -0,0 +1,55 @@
+.c { color: #999988; font-style: italic } /* Comment */
+.k { font-weight: bold } /* Keyword */
+.o { font-weight: bold } /* Operator */
+.cm { color: #999988; font-style: italic } /* Comment.Multiline */
+.cp { color: #999999; font-weight: bold } /* Comment.preproc */
+.c1 { color: #999988; font-style: italic } /* Comment.Single */
+.gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #aa0000 } /* Generic.Error */
+.gh { color: #999999 } /* Generic.Heading */
+.gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
+.go { color: #111 } /* Generic.Output */
+.gp { color: #555555 } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #aaaaaa } /* Generic.Subheading */
+.gt { color: #aa0000 } /* Generic.Traceback */
+.kc { font-weight: bold } /* Keyword.Constant */
+.kd { font-weight: bold } /* Keyword.Declaration */
+.kp { font-weight: bold } /* Keyword.Pseudo */
+.kr { font-weight: bold } /* Keyword.Reserved */
+.kt { color: #445588; font-weight: bold } /* Keyword.Type */
+.m { color: #009999 } /* Literal.Number */
+.s { color: #bb8844 } /* Literal.String */
+.na { color: #008080 } /* Name.Attribute */
+.nb { color: #999999 } /* Name.Builtin */
+.nc { color: #445588; font-weight: bold } /* Name.Class */
+.no { color: #ff99ff } /* Name.Constant */
+.ni { color: #800080 } /* Name.Entity */
+.ne { color: #990000; font-weight: bold } /* Name.Exception */
+.nf { color: #990000; font-weight: bold } /* Name.Function */
+.nn { color: #555555 } /* Name.Namespace */
+.nt { color: #000080 } /* Name.Tag */
+.nv { color: purple } /* Name.Variable */
+.ow { font-weight: bold } /* Operator.Word */
+.mf { color: #009999 } /* Literal.Number.Float */
+.mh { color: #009999 } /* Literal.Number.Hex */
+.mi { color: #009999 } /* Literal.Number.Integer */
+.mo { color: #009999 } /* Literal.Number.Oct */
+.sb { color: #bb8844 } /* Literal.String.Backtick */
+.sc { color: #bb8844 } /* Literal.String.Char */
+.sd { color: #bb8844 } /* Literal.String.Doc */
+.s2 { color: #bb8844 } /* Literal.String.Double */
+.se { color: #bb8844 } /* Literal.String.Escape */
+.sh { color: #bb8844 } /* Literal.String.Heredoc */
+.si { color: #bb8844 } /* Literal.String.Interpol */
+.sx { color: #bb8844 } /* Literal.String.Other */
+.sr { color: #808000 } /* Literal.String.Regex */
+.s1 { color: #bb8844 } /* Literal.String.Single */
+.ss { color: #bb8844 } /* Literal.String.Symbol */
+.bp { color: #999999 } /* Name.Builtin.Pseudo */
+.vc { color: #ff99ff } /* Name.Variable.Class */
+.vg { color: #ff99ff } /* Name.Variable.Global */
+.vi { color: #ff99ff } /* Name.Variable.Instance */
+.il { color: #009999 } /* Literal.Number.Integer.Long */
+
diff --git a/docs/source/pacbio-theme/theme.conf b/docs/source/pacbio-theme/theme.conf
new file mode 100644
index 0000000..dd24a1a
--- /dev/null
+++ b/docs/source/pacbio-theme/theme.conf
@@ -0,0 +1,4 @@
+[theme]
+inherit = default 
+stylesheet = pacbio.css
+pygments_style = tango
diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt
new file mode 100644
index 0000000..cd6467e
--- /dev/null
+++ b/docs/source/requirements.txt
@@ -0,0 +1 @@
+breathe
diff --git a/docs/source/swig_bindings.rst b/docs/source/swig_bindings.rst
new file mode 100644
index 0000000..e9dc33a
--- /dev/null
+++ b/docs/source/swig_bindings.rst
@@ -0,0 +1,257 @@
+.. _swig_bindings:
+
+Additional Languages
+====================
+
+pbbam uses SWIG to generate bindings for other languages. Currently this includes support for C#, Python, and R.
+
+These bindings are disabled by default. See the entry below for your target language to configure pbbam & integrate
+the bindings into your project.
+
+.. _swig_bindings-csharp:
+
+C#
+------
+
+Building
+````````
+
+To build the support for C#, you need to tell CMake to enable it before building:
+
+.. code-block:: console
+
+   $ cmake .. -DPacBioBAM_wrap_csharp
+   $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them, 
+as a quick sanity-check. 
+
+After building, the libraries and wrappers can be found under the pbbam/lib/csharp directory. 
+
+API Example
+```````````
+
+.. code-block:: c#
+
+   using PacBio.BAM;
+
+   namespace TestStuff
+   {
+       public class TestPbbam
+       {
+           public static void TestZmwQuery()
+           {
+               var d = new DataSet("foo.bam");
+               var q = new ZmwQuery(new IntList {1, 2, 3}, d);
+               var q2 = new ZmwQuery(new IntList { 14743 }, d);
+               if (0 != q.Count() || 4 != q2.Count())
+               {
+                   throw new Exception("ZmwQuery not working");
+               }
+               Console.WriteLine("TestZmwQuery - OK!");
+           }
+       }
+   }
+
+.. _swig_bindings-python:
+
+Python
+------
+
+Building
+````````
+
+To build the support for Python, you need to tell CMake to enable it:
+
+.. code-block:: console
+
+   $ cmake .. -DPacBioBAM_wrap_python
+   $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them, 
+as a quick sanity-check. 
+
+After building, the libraries and wrappers can be found in the pbbam/lib/python directory. 
+'make test' will also include some Python-side unit tests as well.
+
+To use the PacBioBam module, you can set your PYTHONPATH before invoking your script:
+
+.. code-block:: console
+
+   $ PYTHONPATH="path/to/pbbam/lib/python" python myScript.py
+
+Or otherwise configure your environment to find the PacBioBam module. 
+
+API Example
+```````````
+
+.. code-block:: python
+
+   import PacBioBam
+   
+   try:
+       file = PacBioBam.BamFile('foo.bam')
+       writer = PacBioBam.BamWriter('new.bam', file.Header())
+       dataset = PacBioBam.DataSet(file)
+       entireFile = PacBioBam.EntireFileQuery(dataset)
+       for record in PacBioBam.Iterate(entireFile):
+           writer.Write(record)
+   except RuntimeError:
+       # found error
+   
+Python-Specific Notes
+`````````````````````
+   
+Iteration
+.........
+
+Iteration over dataset queries in Python will likely need to use the PacBioBam.Iterate() method. Thus
+file iteration loops will look something like the following:
+
+.. code-block:: python
+       
+   entireFile = PacBioBam.EntireFileQuery("input.bam")
+   for record in PacBioBam.Iterate(entireFile):
+       foo.bar(record)
+
+Exception Handling
+..................
+   
+Exceptions are used widely by the C++ library. To handle them from Python, you can use try blocks, looking for
+any RuntimeError:
+
+.. code-block:: python
+
+   try:
+       file = PacBioBam.BamFile("does_not_exist.bam")
+   except RuntimeError: 
+       print("caught expected error")
+   
+.. _swig_bindings-r:
+
+R
+------
+
+Building
+````````
+
+To build the support for R, you need to tell CMake to enable it:
+
+.. code-block:: console
+
+   $ cmake .. -DPacBioBAM_wrap_r
+   $ make
+   
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them, 
+as a quick sanity-check. 
+
+After building, the libraries and wrappers can be found in the pbbam/lib/R directory. 
+'make test' will also include some R-side unit tests as well.   
+
+To use the PacBioBam module in your script, nothing should be needed up front - simply invoke 'R' as normal. 
+You'll do the dynamic load of the R module near the beginning of your script:
+
+.. code-block:: r
+
+   # load pbbam R library
+   lib_path <- "path/to/pbbam/lib/R"
+   pbbam_libname <- paste(lib_path, "PacBioBam",   sep="/")
+   pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+   dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+   source(pbbam_wrapper)
+   cacheMetaData(1) 
+
+
+API Example
+```````````
+
+.. code-block:: r
+
+   # load pbbam R library
+   lib_path <- "path/to/pbbam/lib/R"
+   pbbam_libname <- paste(lib_path, "PacBioBam",   sep="/")
+   pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+   dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+   source(pbbam_wrapper)
+   cacheMetaData(1)    
+  
+   # sample method
+   copyFileAndFetchRecordNames <-function(inputFn, outputFn) {
+	
+       result <- tryCatch(
+       {
+           file   <- BamFile(inputFn)
+           writer <- BamWriter(outputFn, file$Header())
+           ds     <- DataSet(file)
+            
+           entireFile <- EntireFileQuery(ds)
+           iter <- entireFile$begin()
+           end  <- entireFile$end()
+   			
+           while ( iter$'__ne__'(end) ) {
+               record <- iter$value()
+                
+               names_in <- c(names_in, record$FullName())
+               writer$Write(record)
+               iter$incr()
+            }
+            writer$TryFlush()
+            return(names_in)
+        },
+        error = function(e) {
+            # handle error 
+            return(list())
+        })
+        return(result)
+   }
+
+R-Specific Notes
+````````````````
+
+Iteration
+.........
+
+To compare iterators, you'll need to explicitly use the '__eq__' or '__ne__' methods. Thus iterating over
+a data query, will look something like this:
+
+.. code-block:: r
+
+   iter <- query$begin()
+   end  <- query$end()
+   while ( iter$'__ne__'(end) ) {
+       record <- iter$value() 
+       
+       # do stuff with record
+   }
+   
+operator[]
+..........  
+   
+In C++, operator[] can be used in some classes to directly access elements in a sequence, e.g. Cigar string
+
+.. code-block:: cpp
+
+   CigarOperation op = cigar[0]; 
+   
+For the R wrapper, if you want to do the same sort of thing, you'll need to use the '__getitem__' method. 
+Please note that these are **0-based** indices, not 1-based as in much of R. 
+
+.. code-block:: r
+
+   op <- cigar$'__getitem__'(0) 
+   
+Exception Handling
+..................
+
+Exceptions are used widely by the C++ library. To handle them from R, you can use the 'tryCatch' block, listening for 
+'error' type exceptions.
+
+ .. code-block:: r
+ 
+    result <- tryCatch(
+    {
+        f <- BamFile("does_not_exist.bam") # this statement will throw
+    },
+    error = function(e) {
+        print(paste("caught expected erorr: ",e))
+    })
diff --git a/docs/source/tools/bam2sam.rst b/docs/source/tools/bam2sam.rst
new file mode 100644
index 0000000..4577686
--- /dev/null
+++ b/docs/source/tools/bam2sam.rst
@@ -0,0 +1,21 @@
+.. _bam2sam:
+
+bam2sam
+=======
+
+::
+
+  Usage: bam2sam [options] [input]
+
+  bam2sam converts a BAM file to SAM. It is essentially a stripped-down 'samtools
+  view', mostly useful for testing/debugging without requiring samtools. Input BAM
+  file is read from a file or stdin, and SAM output is written to stdout.
+
+  Options:
+    -h, --help            show this help message and exit
+    --version             show program's version number and exit
+
+  Options:
+    input               Input BAM file. If not provided, stdin will be used as input.
+    --no-header         Omit header from output.
+    --header-only       Print only the header (no records).
diff --git a/docs/source/tools/pbindex.rst b/docs/source/tools/pbindex.rst
new file mode 100644
index 0000000..e7c491f
--- /dev/null
+++ b/docs/source/tools/pbindex.rst
@@ -0,0 +1,18 @@
+.. _pbindex:
+
+pbindex
+=======
+
+::
+
+  Usage: pbindex <input>
+
+  pbindex creates a index file that enables random-access to PacBio-specific data
+  in BAM files. Generated index filename will be the same as input BAM plus .pbi suffix.
+
+  Options:
+    -h, --help            show this help message and exit
+    --version             show program's version number and exit
+
+  Input/Output:
+    input                 Input BAM file
diff --git a/docs/source/tools/pbindexdump.rst b/docs/source/tools/pbindexdump.rst
new file mode 100644
index 0000000..6829064
--- /dev/null
+++ b/docs/source/tools/pbindexdump.rst
@@ -0,0 +1,233 @@
+.. _pbindexdump:
+
+pbindexdump
+===========
+
+::
+
+  Usage: pbindexdump [options] [input]
+
+  pbindexdump prints a human-readable view of PBI data to stdout.
+
+  Options:
+    -h, --help            show this help message and exit
+    --version             show program's version number and exit
+
+  Input/Output:
+    input               Input PBI file. If not provided, stdin will be used as input.
+    --format=STRING     Output format, one of:
+                            json, cpp
+
+                        json: pretty-printed JSON [default]
+
+                        cpp: copy/paste-able C++ code that can be used to
+                        construct the equivalent PacBio::BAM::PbiRawData object
+
+  JSON Formatting:
+    --json-indent-level=INT
+                        JSON indent level [4]
+    --json-raw          Prints fields in a manner that more closely reflects the
+                        PBI file format - presenting data as per-field columns,
+                        not per-record objects.
+
+JSON Output Schemas
+-------------------
+
+Normal JSON:
+
+.. code-block:: JSON
+
+    {
+      "type": "object",
+      "properties": {
+        "fileSections": {
+          "type": "array",
+          "items": { "type": "string" },
+        },
+        "numReads": { "type": "integer" },
+        "reads": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "aEnd": { "type": "integer" },
+              "aStart": { "type": "integer" },
+              "bcForward": { "type": "integer" },
+              "bcQuality": { "type": "integer" },
+              "bcReverse": { "type": "integer" },
+              "contextFlag": { "type": "integer" },
+              "fileOffset": { "type": "integer" },
+              "holeNumber": { "type": "integer" },
+              "mapQuality": { "type": "integer" },
+              "nM": { "type": "integer" },
+              "nMM": { "type": "integer" },
+              "qEnd": { "type": "integer" },
+              "qStart": { "type": "integer" },
+              "readQuality": { "type": "number" },
+              "reverseStrand": { "type": "integer" },
+              "rgId": { "type": "integer" },
+              "tEnd": { "type": "integer" },
+              "tId": { "type": "integer" },
+              "tStart: { "type": "integer" }
+            },
+            "required": [
+              "contextFlag",
+              "fileOffset",
+              "holeNumber",
+              "qEnd",
+              "qStart",
+              "readQuality",
+              "rgId"
+            ]
+          }
+        },
+        "references": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "beginRow": { "type": "integer" },
+              "endRow": { "type": "integer" },
+              "tId": { "type": "integer" }
+            },
+            "required" : [ "beginRow", "endRow","tId" ]
+          }
+        }q
+        "version": { "type": "string" }
+      },
+      "required": [
+        "fileSections",
+        "numReads",
+        "reads",
+        "version"
+      ]
+    }
+
+"Raw" JSON:
+
+.. code-block:: JSON
+
+    {
+      "type": "object",
+      "properties": {
+        "barcodeData" : {
+          "type" : "object",
+          "properties: {
+            "bcForward" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "bcQuality" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "bcReverse" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            }
+          }
+        },
+        "basicData" : {
+          "type" : "object",
+          "properties: {
+            "contextFlag" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "fileOffset" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "holeNumber" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "qEnd" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "qStart" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "readQuality" : {
+              "type": "array",
+              "items" : { "type": "number" }
+            },
+            "rgId : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            }
+          }
+        },
+        "fileSections": {
+          "type": "array",
+          "items": { "type": "string" },
+        },
+        "mappedData" : {
+          "type" : "object",
+          "properties: {
+            "aEnd" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "aStart" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "mapQuality" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "nM" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "nMM" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "readQuality" : {
+              "type": "array",
+              "items" : { "type": "number" }
+            },
+            "reverseStrand" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "tEnd" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "tId" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            },
+            "tStart" : {
+              "type": "array",
+              "items" : { "type": "integer" }
+            }
+          }
+        },
+        "numReads": { "type": "integer" },
+        "references": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "beginRow": { "type": "integer" },
+              "endRow": { "type": "integer" },
+              "tId": { "type": "integer" }
+            },
+            "required" : [ "beginRow", "endRow","tId" ]
+          }
+        },
+        "version" : { "type": "string" }
+      },
+      "required": [
+        "fileSections",
+        "numReads",
+        "basicData",
+        "version"
+      ]
+    }
diff --git a/docs/source/tools/pbmerge.rst b/docs/source/tools/pbmerge.rst
new file mode 100644
index 0000000..937ec56
--- /dev/null
+++ b/docs/source/tools/pbmerge.rst
@@ -0,0 +1,30 @@
+.. _pbmerge:
+
+pbmerge
+=======
+
+::
+
+  Usage: pbmerge [options] [-o <out.bam>] <INPUT>
+
+  pbmerge merges PacBio BAM files. If the input is DataSetXML, any filters will be
+  applied. If no output filename is specified, new BAM will be written to stdout.
+
+  Options:
+  -h, --help            show this help message and exit
+  --version             show program's version number and exit
+
+  Input/Output:
+    -o output           Output BAM filename.
+    --no-pbi            Set this option to skip PBI index file creation. PBI
+                        creation is automatically skipped if no output filename
+                        is provided.
+    INPUT               Input may be one of:
+                            DataSetXML, list of BAM files, or FOFN
+
+                            fofn: pbmerge -o merged.bam bams.fofn
+
+                            bams: pbmerge -o merged.bam 1.bam 2.bam 3.bam
+
+                            xml:  pbmerge -o merged.bam foo.subreadset.xml
+
diff --git a/docs/specs/pbbam.rst b/docs/specs/pbbam.rst
new file mode 100644
index 0000000..6842371
--- /dev/null
+++ b/docs/specs/pbbam.rst
@@ -0,0 +1,631 @@
+=================================================================
+**pbbam Software Design & Functional Specification**
+=================================================================
+| *Version 0.1*
+| *Pacific Biosciences Engineering Group*
+| *Jan 29, 2016*
+
+1. Revision History
+===================
+
++-------------+---------------+--------------------+---------------------------+
+| **Date**    | **Revision**  | **Author(s)**      | **Comments**              |
++=============+===============+====================+===========================+
+| 01-29-2016  | 0.1           | Derek Barnett      | Initial draft created     |
+|             |               |                    |                           |
++-------------+---------------+--------------------+---------------------------+
+
+2. Introduction
+===============
+
+2.1. Document Specification Identifier
+--------------------------------------
+
++-----------------------------------+------------------------------------------+
+| **Document Specification Prefix** | **Description**                          |
++===================================+==========================================+
+| FS\_SA\_PBBAM\_                   | Functional spec for pbbam                |
++-----------------------------------+------------------------------------------+
+
+2.2. Purpose
+------------
+
+This document is intended to describe the requirements and interface of the pbbam
+library, which provides functionality for creating, querying, and editing PacBio
+BAM files and associated file formats.
+
+2.3. Scope of Document
+----------------------
+
+This document covers the expected usage of the pbbam library, as well as any
+desired or required performance characteristics with respect to quality or speed.
+
+This document does not provide installation instructions or API documentation.
+
+2.4. Glossary of Terms
+----------------------
+
+The table below specifies only terms specific to this document, and skips
+acronyms/terms that are specified in `Pacific Biosciences Software Glossary`_.
+
+.. _Pacific Biosciences Software Glossary: http://smrtanalysis-docs/pb_sw_glossary.html
+
++------------------+-----------------------------------------------------------+
+| **Acronym/Term** | **Description**                                           |
++==================+===========================================================+
+| API              | Application Programming Interface - a set of routines,    |
+|                  | protocols, and tools for building software applications.  |
+|                  | In this document , this will consist of one or more       |
+|                  | cooperating libraries that specify data structures,       |
+|                  | methods, etc. for use within a target programming         |
+|                  | language.                                                 |
++------------------+-----------------------------------------------------------+
+| Client           | An application that uses the library.                     |
++------------------+-----------------------------------------------------------+
+| I/O              | Input/output of data.                                     |
++------------------+-----------------------------------------------------------+
+
+2.5. References
+---------------
+
++-------------+------------------------------+--------------------------------------+
+| **Ref No.** | **Document Name, Link**      | **Description**                      |
++=============+==============================+======================================+
+| (1)         | `BAM format`_                | General SAM/BAM specification        |
++-------------+------------------------------+--------------------------------------+
+| (2)         | `PacBio BAM`_                | PacBio BAM specification             |
++-------------+------------------------------+--------------------------------------+
+| (3)         | `PacBio BAM index`_          | PacBio BAM index specification       |
++-------------+------------------------------+--------------------------------------+
+| (4)         | `DataSet XML`_               | PacBio DataSet XML specification     |
++-------------+------------------------------+--------------------------------------+
+| (5)         | `Software Style Guide`_      | PacBio coding standards              |
++-------------+------------------------------+--------------------------------------+
+| (6)         | `SMRT Analysis`_             | General SMRT Analysis infrastructure |
++-------------+------------------------------+--------------------------------------+
+
+.. _BAM format: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _PacBio BAM: http://pacbiofileformats.readthedocs.org/en/3.0/BAM.html
+.. _PacBio BAM index: http://pacbiofileformats.readthedocs.org/en/3.0/PacBioBamIndex.html
+.. _DataSet XML: https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/DataSet.rst
+.. _Software Style Guide: http://smrtanalysis-docs/_downloads/PBISoftwareStyleGuide.doc
+.. _SMRT Analysis: http://smrtanalysis-docs/smrt_docs.html
+
+3. Software Overview
+====================
+
+3.1. Product Description
+------------------------
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard
+`BAM format`_ (1) for (both aligned and unaligned) basecall data files. We have
+also formulated a BAM companion file format (.bam.pbi) enabling fast access to a
+richer set of per-read information as well as compatibility for software built
+around the legacy cmp.h5 format.
+
+The pbbam library provides components to create, query, & transform PacBio BAM
+data: sequence files and their associated indices. This includes a core C++
+library as well as bindings for additional programming languages.
+
+3.2. Product Functional Capabilities
+------------------------------------
+
+The library must be able to read and write BAM files that conform to the
+`PacBio BAM`_ specification (2). BAM records must be editable e.g. adding
+alignment information. Random access must be supported, whether by genomic
+region or by filtering record features. To this end, the library will be able to
+read, write, and create associated index files - both the standard BAM index
+(.bai) and the `PacBio BAM index`_ (.pbi) (3). In addition to working with
+individual files, datasets of related BAM files will be supported. These are
+described in a `DataSet XML`_ document. (4)
+
+3.3. User Characteristics
+-------------------------
+
++---------------------+--------------------------------------------------------+
+| **User Class/Role** | **User Knowledge and Skill Levels**                    |
++=====================+========================================================+
+| Developer           | Competence in one or more programming languages        |
+|                     | supported (C++, R, Python, C#). No knowledge of        |
+|                     | molecular biology wet lab techniques required.         |
++---------------------+--------------------------------------------------------+
+
+3.4. User Operations and Practices
+----------------------------------
+
+Developer users will interact with the software by incorporating the library
+into a client application.
+
+3.5. Operating Environment
+--------------------------
+
+The software is intended to be run in a Linux or OSX environment, with ideally 4
+or more cores.
+
+3.6. Design and Implementation Constraints
+------------------------------------------
+
+Currently there are no constraints outside the operating environment and speed
+requirements. In particular, as the library will be used for writing the BAM
+files coming off a Sequel instrument, it should be able to keep pace.
+
+3.7. Assumptions and Dependencies
+---------------------------------
+
+Input routines for the library will expect to receive files that conform to the
+`PacBio BAM`_ (2) or `DataSet XML`_ (4) specifications.
+
+The pbbam library depends on Boost, zlib, and htslib libraries.
+
+3.8. Other Software
+-------------------
+
+Output PacBio BAMs will be compatible with the `PacBio BAM`_ specification (2)
+and thus compatible with the general `BAM format`_ specification (1). This
+ensures that a wide variety of downstream tools can interact with data files.
+
+The software uses `CMake`_ as its build system.
+
+The core C++ API relies on the following 3rd party components:
+
+* `zlib`_
+* `htslib`_
+* `Boost`_ (header-only modules)
+
+Wrapper APIs for additional languages (Python, R, C#) are generated by `SWIG`_.
+
+API documentation is generated via `Doxygen`_.
+
+.. _CMake: https://cmake.org/
+.. _zlib: http://www.zlib.net/
+.. _htslib: https://github.com/samtools/htslib
+.. _Boost: http://www.boost.org/
+.. _SWIG: http://www.swig.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+
+4. External Interfaces
+======================
+
+4.1. User Interfaces
+--------------------
+
+N/A
+
+4.2. Software Interfaces
+------------------------
+
+pbbam will require the following software:
+
+* `htslib`_ & `zlib`_ - provides low-level handling of compressed BAM data
+* `Boost`_ - provides utility classes
+
+Incoming data from upstream components will be compliant with
+PacBio BAM format - see `PacBio BAM`_ specification (2) for more detail.
+
+4.3. Hardware Interfaces
+------------------------
+
+N/A
+
+4.4. Communications Interfaces
+------------------------------
+
+N/A
+
+5. Functional Requirements
+==========================
+
+5.1. Query BAM data by genomic region
+-----------------------------------------
+
+5.1.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some genomic
+region of interest.
+
+5.1.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a standard index (.bai) for each source BAM file
+* genomic interval (e.g. "chr1:1000-2000")
+
+5.1.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Obtain an `htslib`_ "iterator" object for a given file and region. This will be
+wrapped by pbbam to hide the low-level nature of this type, as well as handling
+memory lifetime.
+
+5.1.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which are aligned to the requested genomic interval.
+
+For example:
+
+.. code:: c++
+
+    GenomicIntervalQuery query(interval, dataset);
+    for (const BamRecord& record : query) {
+        // ... do stuff ...
+    }
+
+
+5.1.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.2. Query BAM data by filter criteria
+-----------------------------------------
+
+5.2.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some filter
+criteria (e.g. only reads from ZMW hole number 200 with a read quality of >0.5).
+
+5.2.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a `PacBio BAM index`_ (.pbi) for each source BAM file
+* filters supported by data contained in the PBI
+
+5.2.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Query PBI files(s) for records that match the provided filter criteria. Merge
+contiguous runs of records into record blocks, to minimize seeks. Advancing the
+iterator either reads the next read from the current block or seeks to the next
+block and fetches the next record.
+
+5.2.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which satisfy the requested filter criteria.
+
+For example:
+
+.. code:: c++
+
+    PbiFilterQuery query(filter, dataset);
+    for (const BamRecord& record : query) {
+        // ... do stuff ...
+    }
+
+5.2.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.3. Write PacBio BAM data
+------------------------------------------
+
+5.3.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall be able to write `PacBio BAM`_ files conforming to the specification.
+
+5.3.2. Inputs
+~~~~~~~~~~~~~
+
+* filename
+* header information
+* BAM records
+
+5.3.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Create file handle for the provided filename, output initial header information.
+As records are passed in, write to file. Upon completion, flush any buffers and
+close file handle.
+
+Multithreading, provided by `htslib`_, will be utilized where possible to speed
+up the compression process - often then main bottleneck of BAM throughput.
+
+5.3.4. Outputs
+~~~~~~~~~~~~~~
+
+BAM file conforming to the `PacBio BAM`_ specification.
+
+5.3.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.4. Create PacBio BAM index file
+------------------------------------------
+
+5.4.1. Description
+~~~~~~~~~~~~~~~~~~
+
+Much of PacBio BAM data processing relies on the presence of a `PacBio BAM index`_
+file. pbbam shall be able to generate this file type for a `PacBio BAM`_ file.
+
+5.4.2. Inputs
+~~~~~~~~~~~~~
+
+`PacBio BAM`_ file
+
+5.4.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Read through the input BAM records, storing the values relevant to a PBI index.
+At end of file, write the index contents to a file and close.
+
+5.4.4. Outputs
+~~~~~~~~~~~~~~
+
+`PacBio BAM index`_ file
+
+5.4.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6. Non-Functional Requirements
+==============================
+
+6.1. Performance Requirements
+-----------------------------
+
+Since pbbam will be used to write all BAM files coming off a Sequel device, the
+library must keep pace with data generation requirements.
+
+** come back to this, hard numbers ?? **
+
+6.2. Safety Requirements
+------------------------
+
+N/A
+
+6.3. Security Requirements
+--------------------------
+
+N/A
+
+6.4. Quality Attributes
+-----------------------
+
+6.4.1. Availability
+~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.4.2. Integrity
+~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+Files that do not meet this requirement will raise exceptions and will not be
+accepted.
+
+6.4.3. Interoperability
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+
+6.4.4. Reliability
+~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product reliability requirements.
+
+6.4.5. Robustness
+~~~~~~~~~~~~~~~~~
+
+pbbam will raise exceptions upon encountering failure cases, allowing client
+applications to recover or report the error to a UI.
+
+6.4.6. Usability
+~~~~~~~~~~~~~~~~
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.4.7. Maintainability
+~~~~~~~~~~~~~~~~~~~~~~
+
+The source code of the software covered in this functional specification shall
+adhere to the PacBio `Software Style Guide`_ (9) work instruction, to guarantee
+high quality of code that facilitates maintainability.
+
+6.4.8. Customizability
+~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.5. Business Rules
+-------------------
+
+N/A
+
+6.6. Installation and Upgrade
+-----------------------------
+
+Installation and Upgrade of this software will be handled as part of the SMRT
+Analysis subsystem. See `SMRT Analysis`_ (6) specifications for more detail.
+
+Additionally, the library may be built independently, either from internal
+version control (Perforce) or from the public-facing Github repository. In
+either case, `CMake`_ is used to drive the build process.
+
+6.7. Administration
+-------------------
+
+N/A
+
+6.8. User Documentation
+-----------------------
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+The "offline" API documentation may be built directly from the source code, using
+`Doxygen`_. Online documentation will be generated via a continuous integration
+server, thus ensuring it is always pointing to the current codebase.
+
+7. High Level Design
+====================
+
+7.1. Top Level Context
+----------------------
+
+The pbbam library is intended to be linked in with client applications,
+providing programmatic access to data files.
+
+7.2. Use Cases
+--------------
+
+Primary use cases for pbbam include:
+
+* BAM file creation
+* BAM file query - iterable access to various subsets of data
+
+8. Detailed Design
+==================
+
+8.1. Structural Representation
+------------------------------
+
+ *image(s) here*
+
+8.2. Behavioral Representation
+------------------------------
+
+This section provides behavioral (dynamic) representation of how the
+elements of the system realize the required use cases.
+
+Describe how the significant subsystems and classes interact with each
+other to realize the architecturally significant use cases.
+
+Provide a link to a file containing Sequence Diagram or Activity Diagram, when applicable.
+The link may be provided with use of 'image' directive.
+
+Sequence Diagram shows one use case scenario, executed by class model,
+with sequence of operations over period of time (time increased from top
+to bottom). It shows interactions between objects, but does not show
+relationships between them.
+
+Activity Diagram is a virtual representation of the sequential flow and
+control logic of a set of related activities or actions. It is a type of
+flowchart, frequently called Swim Lane Diagram, because activities of
+each entity are presented within its swim lane.
+
+Note: You may use http://wsd tool to auto-generate a sequence diagram from
+a descriptive text file, save the diagram to the wsd site, get link to the image,
+and add this link to the document with use of 'image' directive.
+
+8.3. Information Storage
+------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+8.4. Technology Overview
+------------------------
+
+pbbam is implemented in C++-11 and should perform as designed on any UNIX-like
+operating system (Linux distributions, Apple OSX, etc.).
+
+8.5. SOUP Components
+--------------------
+
+pbbam utilizes CMake for its build system. The C++ library uses the following
+3rd-party software components: Boost, htslib, & zlib. Wrappers for additional
+languages are generated using SWIG.
+
+8.6. Deployment and Configuration
+---------------------------------
+
+Please refer to `SMRT Analysis`_ (6) documentation
+
+9. Automated Tests
+==================
+
+9.1. Unit Testing
+-----------------
+
+The library shall have unit tests for all classes & components.
+
+9.2. Performance Testing
+------------------------
+
+Unit tests may evaluate performance requirements as desired.
+
+9.3. Regression Testing
+-----------------------
+
+As its role is primarily in data I/O, pbbam has no "scientific quality/validity"
+metrics that would indicate a regression. Instead, passing its unit tests and
+end-to-end tests will indicate that a regression has not been introduced.
+
+These tests will be run after each check-in and nightly.
+
+10. Requirements Traceability Matrices
+======================================
+
+This section provides traces from requirements specified in PRD/DIR documents to the
+requirements covered in this functional specification, and from these
+functional requirements to corresponding Test Cases/Procedures.
+
+10.1. HPQC Functional Specifications
+------------------------------------
+
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| **PBI_ID**  | **Name**                  | **Description**                                   | **Comment** | **Metric** | **Owner** | **PRD/DIR Path**                          |
++=============+===========================+===================================================+=============+============+===========+===========================================+
+| 5.1         | Query BAM data by         | pbbam shall allow client applications to query    |             |            | dbarnett  |                                           |
+|             | genomic region            | data, limited to some genomic region of interest. |             |            |           |                                           |
+|             |                           |                                                   |             |            |           |                                           |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.2         | Query BAM data by         | pbbam shall allow client applications to query    |             |            | dbarnett  |                                           |
+|             | filter criteria           | data, limited to some filter criteria (e.g. only  |             |            |           |                                           |
+|             |                           | reads from ZMW hole number 200 with a read        |             |            |           |                                           |
+|             |                           | quality of >0.5).                                 |             |            |           |                                           |
+|             |                           |                                                   |             |            |           |                                           |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.3         | Write PacBio BAM data     | pbbam shall be able to write files conforming to  |             |            | dbarnett  |                                           |
+|             |                           | the `PacBio BAM`_ specifictation.                 |             |            |           |                                           |
+|             |                           |                                                   |             |            |           |                                           |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.4         | Create PacBio BAM index   | Much of PacBio BAM data processing relies on the  |             |            | dbarnett  |                                           |
+|             | file                      | presence of a `PacBio BAM index`_ file. pbbam     |             |            |           |                                           |
+|             |                           | shall be able to generate this file type for a    |             |            |           |                                           |
+|             |                           | `PacBio BAM`_ file.                               |             |            |           |                                           |
+|             |                           |                                                   |             |            |           |                                           |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+
+10.2. Automated Tests Coverage
+------------------------------
+
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| **FS Item** | **FS Item Title**         | **Use Case Description**                           | **Test Case Name/ID**                                            |
++=============+===========================+====================================================+==================================================================+
+| 5.1         | Query BAM data by         | pbbam shall allow client applications to query     | TODO                                                             |
+|             | genomic region            | data, limited to some genomic region of interest.  |                                                                  |
+|             |                           |                                                    |                                                                  |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.2         | Query BAM data by         | pbbam shall allow client applications to query     | TODO                                                             |
+|             | filter criteria           | data, limited to some filter criteria (e.g. only   |                                                                  |
+|             |                           | reads from ZMW hole number 200 with a read         |                                                                  |
+|             |                           | quality of >0.5).                                  |                                                                  |
+|             |                           |                                                    |                                                                  |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.3         | Write PacBio BAM data     | pbbam shall be able to write files conforming to   | TODO                                                             |
+|             |                           | the `PacBio BAM`_ specifictation.                  |                                                                  |
+|             |                           |                                                    |                                                                  |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.4         | Create PacBio BAM index   | Much of PacBio BAM data processing relies on the   | TODO                                                             |
+|             | file                      | presence of a `PacBio BAM index`_ file. pbbam      |                                                                  |
+|             |                           | shall be able to generate this file type for a     |                                                                  |
+|             |                           | `PacBio BAM`_ file.                                |                                                                  |
+|             |                           |                                                    |                                                                  |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+
diff --git a/include/pbbam/Accuracy.h b/include/pbbam/Accuracy.h
index 03c233e..f1db014 100644
--- a/include/pbbam/Accuracy.h
+++ b/include/pbbam/Accuracy.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Accuracy.h
+/// \brief Defines the Accuracy class.
+//
 // Author: Derek Barnett
 
 #ifndef ACCURACY_H
@@ -44,55 +48,42 @@ namespace PacBio {
 namespace BAM {
 
 /// \brief The Accuracy class represents the expected accuracy of a BamRecord.
-/// Values are clamped to [0,1000].
+///
+/// Values are clamped to fall within [0,1].
 ///
 class PBBAM_EXPORT Accuracy
 {
 public:
-    static const int MIN;
-    static const int MAX;
+    static const float MIN; ///< Minimum valid accuracy value [0.0]
+    static const float MAX; ///< Maximum valid accuracy value [1.0]
 
 public:
     /// \name Constructors & Related Methods
     /// \{
 
-    /// \note This is not an 'explicit' ctor, to make it as easy to use in
-    ///       numeric operations as possible. We really just want to make
-    ///       sure that the acceptable range is respected.
-    Accuracy(int accuracy);
+    /// Constructs an Accuracy object from a floating-point number.
+    ///
+    /// \note This is not an \b explicit ctor, to make it as easy as
+    ///       possible to use in numeric operations. We really just want
+    ///       to make sure that the acceptable range is respected.
+    ///
+    Accuracy(float accuracy);
     Accuracy(const Accuracy& other);
     ~Accuracy(void);
 
     /// \}
 
 public:
-
-    /// \returns Accuracy as integer
-    operator int(void) const;
+    /// \returns Accuracy as float primitive
+    operator float(void) const;
 
 private:
-    int accuracy_;
+    float accuracy_;
 };
 
-inline Accuracy::Accuracy(int accuracy)
-{
-    if (accuracy < Accuracy::MIN)
-        accuracy = Accuracy::MIN;
-    else if (accuracy > Accuracy::MAX)
-        accuracy = Accuracy::MAX;
-    accuracy_ = accuracy;
-}
-
-inline Accuracy::Accuracy(const Accuracy &other)
-    : accuracy_(other.accuracy_)
-{ }
-
-inline Accuracy::~Accuracy(void) { }
-
-inline Accuracy::operator int(void) const
-{ return accuracy_; }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/Accuracy.inl"
+
 #endif // ACCURACY_H
diff --git a/include/pbbam/AlignmentPrinter.h b/include/pbbam/AlignmentPrinter.h
index 6424c5f..4dda6cd 100644
--- a/include/pbbam/AlignmentPrinter.h
+++ b/include/pbbam/AlignmentPrinter.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file AlignmentPrinter.h
+/// \brief Defines the AlignmentPrinter class.
+//
 // Author: Armin Töpfer
 
 #ifndef ALIGNMENTPRINTER_H
@@ -40,7 +44,6 @@
 
 #include <memory>
 #include <string>
-
 #include "pbbam/BamRecord.h"
 #include "pbbam/IndexedFastaReader.h"
 #include "pbbam/Orientation.h"
@@ -50,29 +53,53 @@ namespace BAM {
 
 class BamRecord;
 
+/// \brief The AlignmentPrinter class "pretty-prints" an alignment with respect
+///        to its associated reference sequence.
+///
+/// Example output:
+/// \verbinclude plaintext/AlignmentPrinterOutput.txt
+///
 class AlignmentPrinter
 {
 public:
-    AlignmentPrinter(const IndexedFastaReader& ifr)
-        : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
-    { }
+    /// \name Constructors & Related Methods
+    /// \{
 
-    AlignmentPrinter() = delete;
-    // Move constructor
-    AlignmentPrinter(AlignmentPrinter&&) = default;
-    // Copy constructor
+    /// Constructs the alignment printer with an associated FASTA file reader.
+    ///
+    /// \param[in] ifr FASTA reader
+    ///
+    /// \throws std::runtime_error if FASTA file cannot be opened for reading.
+    ///
+    AlignmentPrinter(const IndexedFastaReader& ifr);
+
+    AlignmentPrinter(void) = delete;
     AlignmentPrinter(const AlignmentPrinter&) = delete;
-    // Move assignment operator
-    AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
-    // Copy assignment operator
+    AlignmentPrinter(AlignmentPrinter&&) = default;
     AlignmentPrinter& operator=(const AlignmentPrinter&) = delete;
-    // Destructor
-    ~AlignmentPrinter() = default;
+    AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
+    ~AlignmentPrinter(void) = default;
+
+    /// \}
 
 public:
+    /// \name Printing
+    /// \{
+
+    /// Pretty-prints an aligned BamRecord to std::string.
+    ///
+    /// \note The current implementation includes ANSI escape sequences for
+    ///       coloring terminal output. Future versions of this method will
+    ///       likely make this optional.
+    ///
+    /// \returns formatted string containing the alignment and summary
+    ///          information
+    ///
     std::string Print(const BamRecord& record,
                       const Orientation orientation = Orientation::GENOMIC);
 
+    /// \}
+
 private:
 	const std::unique_ptr<IndexedFastaReader> ifr_;
 };
diff --git a/include/pbbam/BaiIndexedBamReader.h b/include/pbbam/BaiIndexedBamReader.h
new file mode 100644
index 0000000..7441c69
--- /dev/null
+++ b/include/pbbam/BaiIndexedBamReader.h
@@ -0,0 +1,130 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BaiIndexedBamReader.h
+/// \brief Defines the BaiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef BAIINDEXEDBAMREADER_H
+#define BAIINDEXEDBAMREADER_H
+
+#include "pbbam/BamReader.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/GenomicInterval.h"
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct BaiIndexedBamReaderPrivate; }
+
+/// \brief The BaiIndexedBamReader class provides read-only iteration over %BAM
+///        records, bounded by a particular genomic interval.
+///
+/// The SAM/BAM standard index (*.bai) is used to allow random-access operations.
+///
+class PBBAM_EXPORT BaiIndexedBamReader : public BamReader
+{
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Constructs %BAM reader, bounded by a genomic interval.
+    ///
+    /// All reads that overlap the interval will be available.
+    ///
+    /// \param[in] interval iteration will be bounded by this GenomicInterval.
+    /// \param[in] filename input %BAM filename
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+    ///         for reading, or if the interval is invalid
+    ///
+    BaiIndexedBamReader(const GenomicInterval& interval,
+                        const std::string& filename);
+
+    /// \brief Constructs BAM reader, bounded by a genomic interval.
+    ///
+    /// All reads that overlap the interval will be available.
+    ///
+    /// \param[in] interval iteration will be bounded by this GenomicInterval.
+    /// \param[in] bamFile input BamFile object
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+    ///         for reading, or if the interval is invalid
+    ///
+    BaiIndexedBamReader(const GenomicInterval& interval, const BamFile& bamFile);
+
+    /// \brief Constructs %BAM reader, bounded by a genomic interval.
+    ///
+    /// All reads that overlap the interval will be available.
+    ///
+    /// \param[in] interval iteration will be bounded by this GenomicInterval.
+    /// \param[in] bamFile input BamFile object
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+    ///         for reading, or if the interval is invalid
+    ///
+    BaiIndexedBamReader(const GenomicInterval& interval, BamFile&& bamFile);
+
+    /// \}
+
+public:
+    /// \name Random-Access
+    /// \{
+
+    /// \returns the current GenomicInterval in use by this reader
+    const GenomicInterval& Interval(void) const;
+
+    /// \brief Sets a new genomic interval on the reader.
+    ///
+    /// \param[in] interval
+    /// \returns reference to this reader
+    ///
+    BaiIndexedBamReader& Interval(const GenomicInterval& interval);
+
+    /// \}
+
+protected:
+    int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+private:
+    std::unique_ptr<internal::BaiIndexedBamReaderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAIINDEXEDBAMREADER_H
diff --git a/include/pbbam/BamFile.h b/include/pbbam/BamFile.h
index 62da044..8a20299 100644
--- a/include/pbbam/BamFile.h
+++ b/include/pbbam/BamFile.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamFile.h
+/// \brief Defines the BamFile class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMFILE_H
@@ -47,6 +51,11 @@ namespace BAM {
 
 namespace internal { class BamFilePrivate; }
 
+/// \brief The BamFile class represents a %BAM file.
+///
+/// It provides access to header metadata and methods for finding/creating
+/// associated index files.
+///
 class PBBAM_EXPORT BamFile
 {
 public:
@@ -54,10 +63,12 @@ public:
     /// \name Constructors & Related Methods
     /// \{
 
-    /// \brief Creates a BamFile object on the provided \p filename & loads header information.
+    /// \brief Creates a BamFile object on the provided \p filename &
+    ///        loads header information.
+    ///
+    /// \param[in] filename %BAM filename
+    /// \throws std::exception on failure to open %BAM file for reading
     ///
-    /// \param[in] filename BAM filename
-    /// \throws std::exception on failure
     BamFile(const std::string& filename);
 
     BamFile(const BamFile& other);
@@ -73,45 +84,87 @@ public:
     /// \name Index & Filename Methods
     /// \{
 
-    /// Check that ".pbi" exists and is newer than this BAM file.
-    /// If not, one will be created.
+    /// \brief Creates a ".pbi" file for this %BAM file.
+    ///
+    /// \note Existing index file will be overwritten. Use
+    ///       EnsurePacBioIndexExists() if this is not desired.
     ///
     /// \throws if PBI file could not be properly created and/or
-    /// written to disk
+    ///         written to disk
+    ///
+    void CreatePacBioIndex(void) const;
+
+    /// \brief Creates a ".bai" file for this %BAM file.
+    ///
+    /// \note Existing index file will be overwritten. Use
+    ///       EnsureStandardIndexExists() if this is not desired.
+    ///
+    /// \throws if BAI file could not be properly created (e.g. this
+    ///         %BAM is not coordinate-sorted) or could not be written to disk
+    ///
+    void CreateStandardIndex(void) const;
+
+    /// \brief Creates a ".pbi" file if one does not exist or is older than its
+    ///        %BAM file.
+    ///
+    /// Equivalent to:
+    /// \code{.cpp}
+    ///    if (!file.PacBioIndexExists())
+    ///        file.CreatePacBioIndex();
+    /// \endcode
+    ///
+    /// \note As of v0.4.02+, no timestamp check is performed. Previously we requr
+    /// with an additional timestamp check.
+    ///
+    /// \throws if PBI file could not be properly created and/or
+    ///         written to disk
     ///
     void EnsurePacBioIndexExists(void) const;
 
-    /// Check that ".bai" exists and is newer than this BAM file.
-    /// If not, one will be created.
+    /// \brief Creates a ".bai" file if one does not exist or is older than its
+    ///        %BAM file.
+    ///
+    /// Equivalent to:
+    /// \code{.cpp}
+    ///    if (!file.StandardIndexExists())
+    ///        file.CreateStandardIndex();
+    /// \endcode
+    ///
+    /// \note As of v0.4.2, no timestamp check is performed.
     ///
     /// \throws if BAI file could not be properly created (e.g. this
-    /// BAM is not coordinate-sorted) or could not be written to disk
+    ///         %BAM is not coordinate-sorted) or could not be written to disk
     ///
     void EnsureStandardIndexExists(void) const;
 
-    /// \returns BAM filename
+    /// \returns %BAM filename
     std::string Filename(void) const;
 
-    /// \returns true if ".pbi" exists and is newer than this BAM file.
+    /// \returns true if ".pbi" exists and is newer than this %BAM file.
     bool PacBioIndexExists(void) const;
 
-    /// \returns filename of PacBio index file (".pbi")
+    /// \returns filename of %PacBio index file (".pbi")
     /// \note No guarantee is made on the existence of this file.
     ///       This method simply returns the expected filename.
     std::string PacBioIndexFilename(void) const;
 
-    /// \returns true if ".bai" exists and is newer than this BAM file.
+    /// \returns true if ".pbi" has a more recent timestamp than this file
+    bool PacBioIndexIsNewer(void) const;
+
+    /// \returns true if ".bai" exists
     bool StandardIndexExists(void) const;
 
-    /// \returns filename of standard index file (".bai")
     /// \note No guarantee is made on the existence of this file.
     ///       This method simply returns the expected filename.
     std::string StandardIndexFilename(void) const;
 
+    /// \returns true if ".bai" has a more recent timestamp than this file
+    bool StandardIndexIsNewer(void) const;
 
     /// \}
 
-    /// \name Header Metadata Methods
+public:
+    /// \name File Header Data
     /// \{
 
     /// \returns true if header metadata has this reference name
@@ -120,10 +173,12 @@ public:
     /// \returns const reference to BamHeader containing the file's metadata
     const BamHeader& Header(void) const;
 
-    /// \returns true if BAM file is a PacBio BAM file (i.e. has non-empty version associated with header "pb" tag)
+    /// \returns true if file is a %PacBio %BAM file (i.e. has non-empty version
+    ///          associated with header "pb" tag)
     bool IsPacBioBAM(void) const;
 
-    /// \returns ID for reference \p name (can be used for e.g. GenomicIntervalQuery), -1 if not found
+    /// \returns ID for reference \p name (can be used for e.g.
+    ///          GenomicIntervalQuery), or -1 if not found
     int ReferenceId(const std::string& name) const;
 
     /// \return name of reference matching \p id, empty string if not found
@@ -137,8 +192,20 @@ public:
 
     /// \}
 
+public:
+    /// \name Additional Attributes
+    /// \{
+
+    /// \returns virtual offset of first alignment. Intended mostly for internal
+    ///          use. Note that this is a BGZF \b virtual offset, not a
+    ///          'normal' file position.
+    ///
+    int64_t FirstAlignmentOffset(void) const;
+
+    /// \}
+
 private:
-    PBBAM_SHARED_PTR<internal::BamFilePrivate> d_;
+    std::unique_ptr<internal::BamFilePrivate> d_;
 };
 
 } // namespace BAM
diff --git a/include/pbbam/BamHeader.h b/include/pbbam/BamHeader.h
index 9dea3cc..eada466 100644
--- a/include/pbbam/BamHeader.h
+++ b/include/pbbam/BamHeader.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamHeader.h
+/// \brief Defines the BamHeader class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMHEADER_H
@@ -51,6 +55,24 @@ namespace BAM {
 
 namespace internal { class BamHeaderPrivate; }
 
+/// \brief The BamHeader class represents the header section of the %BAM file.
+///
+/// It provides metadata about the file including file version, reference
+/// sequences, read groups, comments, etc.
+///
+/// A BamHeader may be fetched from a BamFile to view an existing file's
+/// metadata. Or one may be created/edited for use with writing to a new file
+/// (via BamWriter).
+///
+/// \note A particular BamHeader is likely to be re-used in lots of places
+///       throughout the library, for read-only purposes. For this reason, even
+///       though a BamHeader may be returned by value, it is essentially a thin
+///       wrapper for a shared-pointer to the actual data. This means, though,
+///       that if you need to edit an existing BamHeader for use with a
+///       BamWriter, please consider using BamHeader::DeepCopy. Otherwise any
+///       modifications will affect all BamHeaders that are sharing its
+///       underlying data.
+///
 class PBBAM_EXPORT BamHeader
 {
 public:
@@ -65,111 +87,321 @@ public:
     BamHeader& operator=(BamHeader&& other);
     ~BamHeader(void);
 
+    /// \brief Detaches underlying data from the shared-pointer, returning a
+    ///        independent copy of the header contents.
+    ///
+    /// This ensures that any modifications to the newly returned BamHeader do
+    /// not affect other BamHeader objects that were sharing its underlying data.
+    ///
     BamHeader DeepCopy(void) const;
 
     /// \}
 
 public:
-    /// \name General
+    /// \name Operators
     /// \{
 
+    /// \brief Merges another header with this one.
+    ///
+    /// Headers must be compatible for merging. This means that their Version,
+    /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data,
+    /// Sequences) must all match. If not, an exception will be thrown.
+    ///
+    /// \param[in] other  header to merge with this one
+    /// \returns reference to this header
+    ///
+    /// \throws std::runtime_error if the headers are not compatible
+    ///
+    BamHeader& operator+=(const BamHeader& other);
+
+    /// \brief Creates a new, merged header.
+    ///
+    /// Headers must be compatible for merging. This means that their Version,
+    /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data,
+    /// Sequences) must all match. If not, an exception will be thrown.
+    ///
+    /// Both original headers (this header and \p other) will not be modified.
+    ///
+    /// \param[in] other  header to merge with this one
+    /// \returns merged header
+    ///
+    /// \throws std::runtime_error if the headers are not compatible
+    ///
+    BamHeader operator+(const BamHeader& other) const;
+
+    /// \}
+
+public:
+    /// \name General Attributes
+    /// \{
+
+    /// \returns the %PacBio %BAM version number (\@HD:pb)
+    ///
+    /// \note This is different from the SAM/BAM version number
+    /// \sa BamHeader::Version.
+    ///
     std::string PacBioBamVersion(void) const;
+
+    /// \returns the sort order used
+    ///
+    /// Valid values: "unknown", "unsorted", "queryname", or "coordinate"
+    ///
     std::string SortOrder(void) const;
+
+    /// \returns the SAM/BAM version number (\@HD:VN)
+    ///
+    /// \note This is different from the %PacBio %BAM version number
+    /// \sa BamHeader::PacBioBamVersion
+    ///
     std::string Version(void) const;
 
     /// \}
 
+public:
     /// \name Read Groups
     /// \{
 
+    /// \returns true if the header contains a read group with \p id (\@RG:ID)
     bool HasReadGroup(const std::string& id) const;
+
+    /// \returns a ReadGroupInfo object representing the read group matching
+    ///          \p id (\@RG:ID)
+    /// \throws std::runtime_error if \p id is unknown
+    ///
     ReadGroupInfo ReadGroup(const std::string& id) const;
+
+    /// \returns vector of read group IDs listed in this header
     std::vector<std::string> ReadGroupIds(void) const;
+
+    /// \returns vector of ReadGroupInfo objects, representing all read groups
+    ///          listed in this header
+    ///
     std::vector<ReadGroupInfo> ReadGroups(void) const;
 
     /// \}
 
+public:
     /// \name Sequences
     /// \{
 
+    /// \returns true if header contains a sequence with \p name (\@SQ:SN)
     bool HasSequence(const std::string& name) const;
+
+    /// \returns number of sequences (\@SQ entries) stored in this header
+    size_t NumSequences(void) const;
+
+    /// \returns numeric ID for sequence matching \p name (\@SQ:SN)
+    ///
+    /// This is the numeric ID used elsewhere throughout the API.
+    ///
+    /// \throws std::runtime_error if \p name is unknown
+    /// \sa BamReader::ReferenceId, PbiReferenceIdFilter,
+    ///     PbiRawMappedData::tId_
+    ///
     int32_t SequenceId(const std::string& name) const;
+
+    /// \returns the length of the sequence (\@SQ:LN, e.g. chromosome length) at
+    ///          index \p id
+    ///
+    /// \sa SequenceInfo::Length, BamHeader::SequenceId
+    ///
     std::string SequenceLength(const int32_t id) const;
+
+    /// \returns the name of the sequence (\@SQ:SN) at index \p id
+    ///
+    /// \sa SequenceInfo::Name, BamHeader::SequenceId
+    ///
     std::string SequenceName(const int32_t id) const;
+
+    /// \returns vector of sequence names (\@SQ:SN) stored in this header
+    ///
+    /// Position in the vector is equivalent to SequenceId.
+    ///
     std::vector<std::string> SequenceNames(void) const;
+
+    /// \returns SequenceInfo object at index \p id
+    ///
+    /// \throws std::out_of_range if \p is an invalid or unknown index
+    /// \sa BamHeader::SequenceId
+    ///
     SequenceInfo Sequence(const int32_t id) const;
+
+    /// \returns SequenceInfo for the sequence matching \p name
     SequenceInfo Sequence(const std::string& name) const;
+
+    /// \returns vector of SequenceInfo objects representing the sequences
+    ///          (\@SQ entries) stored in this header
+    ///
     std::vector<SequenceInfo> Sequences(void) const;
 
     /// \}
 
+public:
     /// \name Programs
     /// \{
 
+    /// \returns true if this header contains a program entry with ID (\@PG:ID)
+    ///          matching \p id
+    ///
     bool HasProgram(const std::string& id) const;
+
+    /// \returns ProgramInfo object for the program entry matching \p id
+    /// \throws std::runtime_error if \p id is unknown
+    ///
     ProgramInfo Program(const std::string& id) const;
+
+    /// \returns vector of program IDs (\@PG:ID)
     std::vector<std::string> ProgramIds(void) const;
+
+    /// \returns vector of ProgramInfo objects representing program entries
+    ///          (\@PG) stored in this heder
+    ///
     std::vector<ProgramInfo> Programs(void) const;
 
     /// \}
 
+public:
     /// \name Comments
     /// \{
 
+    /// \returns vector of comment (\@CO) strings
     std::vector<std::string> Comments(void) const;
 
     /// \}
 
+public:
     /// \name Conversion Methods
     /// \{
 
+    /// \returns SAM-header-formatted string representing this header's data
     std::string ToSam(void) const;
 
     /// \}
 
 public:
 
-    /// \name General
+    /// \name General Attributes
     /// \{
 
+    /// \brief Sets this header's PacBioBAM version number (\@HD:pb).
+    ///
+    /// \returns reference to this object
+    /// \throws std::runtime_error if version number cannot be parsed or
+    ///         is less than the minimum version allowed.
+    ///
     BamHeader& PacBioBamVersion(const std::string& version);
+
+    /// \brief Sets this header's sort order label (\@HD:SO).
+    ///
+    /// Valid values: "unknown", "unsorted", "queryname", or "coordinate"
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& SortOrder(const std::string& order);
+
+    /// \brief Sets this header's SAM/BAM version number (\@HD:VN).
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& Version(const std::string& version);
 
     /// \}
 
+public:
     /// \name Read Groups
     /// \{
 
+    /// \brief Appends a read group entry (\@RG) to this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& AddReadGroup(const ReadGroupInfo& readGroup);
+
+    /// \brief Removes all read group entries from this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& ClearReadGroups(void);
+
+    /// \brief Replaces this header's list of read group entries with those in
+    ///        \p readGroups.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& ReadGroups(const std::vector<ReadGroupInfo>& readGroups);
 
     /// \}
 
+public:
     /// \name Sequences
     /// \{
 
+    /// \brief Appends a sequence entry (\@SQ) to this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& AddSequence(const SequenceInfo& sequence);
+
+    /// \brief Removes all sequence entries from this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& ClearSequences(void);
+
+    /// \brief Replaces this header's list of sequence entries with those in
+    ///       \p sequences.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& Sequences(const std::vector<SequenceInfo>& sequences);
 
     /// \}
 
+public:
     /// \name Programs
     /// \{
 
+    /// \brief Appends a program entry (\@PG) to this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& AddProgram(const ProgramInfo& pg);
+
+    /// \brief Removes all program entries from this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& ClearPrograms(void);
+
+    /// \brief Replaces this header's list of program entries with those in
+    ///        \p programs.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& Programs(const std::vector<ProgramInfo>& programs);
 
     /// \}
 
+public:
     /// \name Comments
     /// \{
 
+    /// \brief Appends a comment (\@CO) to this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& AddComment(const std::string& comment);
+
+    /// \brief Removes all comments from this header.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& ClearComments(void);
+
+    /// \brief Replaces this header's list of comments with those in \p comments.
+    ///
+    /// \returns reference to this object
+    ///
     BamHeader& Comments(const std::vector<std::string>& comments);
 
     /// \}
@@ -181,4 +413,6 @@ private:
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/BamHeader.inl"
+
 #endif // BAMHEADER_H
diff --git a/include/pbbam/BamReader.h b/include/pbbam/BamReader.h
index bd0ced6..774a2ec 100644
--- a/include/pbbam/BamReader.h
+++ b/include/pbbam/BamReader.h
@@ -32,80 +32,157 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamReader.h
+/// \brief Defines the BamReader class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMREADER_H
 #define BAMREADER_H
 
+#include "pbbam/BamFile.h"
 #include "pbbam/BamHeader.h"
 #include "pbbam/BamRecord.h"
 #include "pbbam/Config.h"
+#include "pbbam/GenomicInterval.h"
+
+#include <htslib/sam.h>
+#include <memory>
 #include <string>
 
 namespace PacBio {
 namespace BAM {
 
+namespace internal { struct BamReaderPrivate; }
+
+/// \brief The BamReader class provides basic read-access to a %BAM file.
+///
+/// The base-class implementation provides a sequential read-through of BAM
+/// records. Derived classes may implement other access schemes (e.g. genomic
+/// region, PBI-enabled record filtering).
+///
 class PBBAM_EXPORT BamReader
 {
-
 public:
-    enum ReadError
-    {
-        NoError = 0
-      , OpenFileError
-      , ReadHeaderError
-      , ReadRecordError
-    };
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Opens BAM file for reading.
+    ///
+    /// \param[in] fn %BAM filename
+    /// \throws std::runtime_error if failed to open
+    ///
+    explicit BamReader(const std::string& fn);
+
+    /// \brief Opens BAM file for reading.
+    ///
+    /// \param[in] bamFile BamFile object
+    /// \throws std::runtime_error if failed to open
+    ///
+    explicit BamReader(const BamFile& bamFile);
+
+    /// \brief Opens BAM file for reading.
+    ///
+    /// \param[in] bamFile BamFile object
+    /// \throws std::runtime_error if failed to open
+    ///
+    explicit BamReader(BamFile&& bamFile);
 
-public:
-    BamReader(void);
     virtual ~BamReader(void);
 
+    /// \}
+
 public:
+    /// \name BAM File Attributes
+    /// \{
+
+    /// \returns the underlying BamFile
+    const BamFile& File(void) const;
+
+    /// \returns %BAM filename
+    std::string Filename(void) const;
+
+    /// \returns BamHeader object from %BAM header contents
+    const BamHeader& Header(void) const;
 
-    /// Closes the BAM file reader.
-    void Close(void);
+    /// \}
 
-    /// Opens a BAM file for reading.
+public:
+    /// \name BAM File I/O
+    /// \{
+
+    /// \brief Fetches the "next" %BAM record.
+    ///
+    /// Default implementation will read records until EOF. Derived readers may
+    /// use additional criteria to decide which record is "next" and when
+    /// reading is done.
     ///
-    /// Prefix \p filename with "http://" or "ftp://" for remote files,
-    /// or set to "-" for stdin.
+    /// \param[out] record  next BamRecord object. Should not be used if method
+    ///                     returns false.
     ///
-    /// \param[in] filename path to input BAM file
+    /// \returns true if record was read successfully. Returns false if EOF (or
+    ///          end of iterator in derived readers). False is not an error,
+    ///          it indicates "end of data".
     ///
-    /// \returns success/failure
-    bool Open(const std::string& filename);
+    /// \throws std::runtime_error if failed to read from file (e.g. possible
+    ///         truncated or corrupted file).
+    ///
+    bool GetNext(BamRecord& record);
 
-    /// \returns header as BamHeader object
-    BamHeader::SharedPtr Header(void) const;
+    /// \brief Seeks to virtual offset in %BAM.
+    ///
+    /// \note This is \b NOT a normal file offset, but the virtual offset used
+    ///       in %BAM indexing.
+    ///
+    /// \throws std::runtime_error if failed to seek
+    ///
+    void VirtualSeek(int64_t virtualOffset);
 
-    /// \returns error status code
-    BamReader::ReadError Error(void) const;
+    /// \returns current (virtual) file position.
+    ///
+    /// \note This is \b NOT a normal file offset, but the virtual offset used
+    ///       in %BAM indexing.
+    ///
+    int64_t VirtualTell(void) const;
 
-    /// \returns true if error encountered
-    bool HasError(void) const;
+    /// \}
 
-    /// Fetches the next record in a BAM file.
+protected:
+    /// \name BAM File I/O
+    /// \{
+
+    /// \brief Helper method for access to underlying BGZF stream pointer.
+    ///
+    /// Useful for derived readers' contact points with htslib methods.
     ///
-    /// \param[out] record pointer to BamRecord object
+    /// \returns BGZF stream pointer
     ///
-    /// \returns succcess/failure
-    bool GetNext(PBBAM_SHARED_PTR<BamRecord> record);
+    BGZF* Bgzf(void) const;
 
-public:
-    std::string PacBioBamVersion(void) const;
+    /// \brief Performs the actual raw read of the next record from the BAM
+    ///        file.
+    ///
+    /// Default implementation will read records, sequentially, until EOF.
+    /// Derived readers may use additional criteria to decide which record is
+    ///  "next" and when reading is done.
+    ///
+    /// Return value should be equivalent to htslib's bam_read1():
+    ///     >= 0 : normal
+    ///       -1 : EOF (not an error)
+    ///     < -1 : error
+    ///
+    /// \param[in]  bgzf BGZF stream pointer
+    /// \param[out] b    %BAM record pointer
+    /// \returns integer status code, see description
+    ///
+    virtual int ReadRawData(BGZF* bgzf, bam1_t* b);
 
-protected:
-    bool GetNext(PBBAM_SHARED_PTR<bam1_t> rawRecord);
-    void InitialOpen(void);
-    PBBAM_SHARED_PTR<bam_hdr_t> RawHeader(void) const;
+    /// \}
 
-protected:
-    PBBAM_SHARED_PTR<samFile>   file_;
-    PBBAM_SHARED_PTR<bam_hdr_t> header_;
-    std::string filename_;
-    BamReader::ReadError error_;
+private:
+    std::unique_ptr<internal::BamReaderPrivate> d_;
 };
 
 } // namespace BAM
diff --git a/include/pbbam/BamRecord.h b/include/pbbam/BamRecord.h
index 8630e4b..9184121 100644
--- a/include/pbbam/BamRecord.h
+++ b/include/pbbam/BamRecord.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecord.h
+/// \brief Defines the BamRecord & BamRecordView classes.
+//
 // Author: Derek Barnett
 
 #ifndef BAMRECORD_H
@@ -48,8 +52,10 @@
 #include "pbbam/Strand.h"
 #include "pbbam/QualityValues.h"
 #include "pbbam/virtual/VirtualRegionType.h"
+#include "pbbam/ZmwType.h"
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 namespace PacBio {
@@ -57,34 +63,66 @@ namespace BAM {
 
 namespace internal { class BamRecordMemory; }
 
+/// \brief This enum defines the modes supported by BamRecord clipping
+///        operations.
+///
+/// Methods like BamRecord::Clip accept Position parameters - which may be in
+/// either polymerase or reference coorindates. Using this enum as a flag
+/// indicates how the positions should be interpreted.
+///
 enum class ClipType
 {
-    CLIP_NONE
-  , CLIP_TO_QUERY
-  , CLIP_TO_REFERENCE
+    CLIP_NONE           ///< No clipping will be performed.
+  , CLIP_TO_QUERY       ///< Clipping positions are in polymerase coordinates.
+  , CLIP_TO_REFERENCE   ///< Clipping positions are in genomic coordinates.
 };
 
+/// \brief This enum defines the possible PacBio BAM record types.
+///
+/// \sa ReadGroupInfo::ReadType
+///
 enum class RecordType
 {
-    POLYMERASE
-  , HQREGION
-  , SUBREAD
-  , CCS
-  , SCRAP
-  , UNKNOWN
+    POLYMERASE  ///< Polymerase read
+  , HQREGION    ///< High-quality region
+  , SUBREAD     ///< Subread (
+  , CCS         ///< Circular consensus sequence
+  , SCRAP       ///< Additional sequence (barcodes, adapters, etc.)
+  , UNKNOWN     ///< Unknown read type
 };
 
+/// \brief This enum defines the possible encoding modes used in Frames data
+/// (e.g. BamRecord::IPD or BamRecord::PulseWidth).
+///
+/// The LOSSY mode is the default in production output; LOSSLESS mode
+/// being used primarily for internal applications.
+///
+/// \sa https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
+///     for more information on pulse frame encoding schemes.
+///
 enum class FrameEncodingType
 {
-    LOSSY
-  , LOSSLESS
+    LOSSY       ///< 8-bit compression (using CodecV1) of frame data
+  , LOSSLESS    ///< 16-bit native frame data
 };
 
+/// \brief The BamRecord class represents a %PacBio %BAM record.
+///
+/// %PacBio %BAM records are extensions of normal SAM/BAM records. Thus in
+/// addition to normal fields like bases, qualities, mapping coordinates, etc.,
+/// tags are used extensively to annotate records with additional
+/// PacBio-specific data.
+///
+/// Mapping and clipping APIs are provided as well to ensure that such
+/// operations "trickle down" to all data fields properly.
+///
+/// \sa https://samtools.github.io/hts-specs/SAMv1.pdf
+///     for more information on standard %BAM data, and
+///     https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
+///     for more information on %PacBio %BAM fields.
+///
 class PBBAM_EXPORT BamRecord
 {
-public: // static data
-    static const float photonFactor;
-
 public:
     /// \name Constructors & Related Methods
     /// \{
@@ -102,34 +140,208 @@ public:
     /// \}
 
 public:
-    /// \name Per-Record Data
+    /// \name General Data
     /// \{
 
-    /// \note AlignedStart is in polymerase read coordinates, NOT genomic coordinates.
+    /// \returns this record's full name
+    /// \sa BamRecordImpl::Name
     ///
-    /// \returns the record's aligned start position
-    Position AlignedStart(void) const;
+    std::string FullName(void) const;
 
-    /// \note AlignedEnd is in polymerase read coordinates, NOT genomic coordinates.
+    /// \returns shared pointer to this record's associated BamHeader
+    BamHeader Header(void) const;
+
+    /// \returns ZMW hole number
+    /// \throws if missing zm tag & record name does not contain hole number
     ///
+    int32_t HoleNumber(void) const;
+
+    /// \returns this record's LocalContextFlags
+    PacBio::BAM::LocalContextFlags LocalContextFlags(void) const;
+
+    /// \returns this record's movie name
+    std::string MovieName(void) const;
+
+    /// \returns "number of complete passes of the insert"
+    int32_t NumPasses(void) const;
+
+    /// \returns the record's query end position, or Sequence().length() if not
+    ///          stored
+    /// \note QueryEnd is in polymerase read coordinates, NOT genomic
+    ///       coordinates.
+    ///
+    Position QueryEnd(void) const;
+
+    /// \returns the record's query start position, or 0 if not stored
+    ///
+    /// \note QueryStart is in polymerase read coordinates, NOT genomic
+    ///       coordinates.
+    ///
+    Position QueryStart(void) const;
+
+    /// \returns this record's expected read accuracy [0, 1000]
+    Accuracy ReadAccuracy(void) const;
+
+    /// \returns ReadGroupInfo object for this record
+    ReadGroupInfo ReadGroup(void) const;
+
+    /// \returns string ID of this record's read group
+    /// \sa ReadGroupInfo::Id
+    ///
+    std::string ReadGroupId(void) const;
+
+    /// \returns integer value for this record's read group ID
+    int32_t ReadGroupNumericId(void) const;
+
+    /// \returns this scrap record's scrap region type
+    VirtualRegionType ScrapRegionType(void) const;
+
+    /// \returns this scrap record's scrap ZMW type
+    ZmwType ScrapZmwType(void) const;
+
+    /// \returns this record's average signal-to-noise for each of A, C, G,
+    ///          and T
+    ///
+    std::vector<float> SignalToNoise(void) const;
+
+    /// \returns this record's type
+    /// \sa RecordType
+    RecordType Type(void) const;
+
+    /// \}
+
+public:
+    /// \name Mapping Data
+    /// \{
+
     /// \returns the record's aligned end position
+    ///
+    /// \note AlignedEnd is in polymerase read coordinates, NOT genomic
+    ///       coordinates.
+    ///
     Position AlignedEnd(void) const;
 
+    /// \returns the record's aligned start position
+    ///
+    /// \note AlignedStart is in polymerase read coordinates, NOT genomic
+    ///       coordinates.
+    ///
+    Position AlignedStart(void) const;
+
     /// \returns the record's strand as a Strand enum value
     Strand AlignedStrand(void) const;
 
     /// \returns the record's CIGAR data as a Cigar object
-    Cigar CigarData(void) const;
+    ///
+    /// \param[in] exciseAllClips   if true, remove all clipping operations
+    ///                             (hard & soft) [default:false]
+    ///
+    Cigar CigarData(bool exciseAllClips = false) const;
 
-    /// \returns this record's full name
-    /// \sa BamRecordImpl::Name
-    std::string FullName(void) const;
+    /// \returns true if this record was mapped by aligner
+    bool IsMapped(void) const;
+
+    /// \returns this record's mapping quality. A value of 255 indicates
+    ///          "unknown"
+    ///
+    uint8_t MapQuality(void) const;
+
+    /// \returns the number of deleted bases (relative to reference)
+    size_t NumDeletedBases(void) const;
+
+    /// \returns the number of inserted bases (relative to reference)
+    size_t NumInsertedBases(void) const;
+
+    /// \returns the number of matching bases (sum of '=' CIGAR op lengths)
+    size_t NumMatches(void) const;
+
+    /// \returns a tuple containing NumMatches (first) and NumMismatches
+    ///         (second)
+    ///
+    std::pair<size_t, size_t> NumMatchesAndMismatches(void) const;
+
+    /// \returns the number of mismatching bases (sum of 'X' CIGAR op lengths)
+    size_t NumMismatches(void) const;
+
+    /// \returns this record's reference ID, or -1 if unmapped.
+    ///
+    /// \note This is only a valid identifier within this %BAM file
+    ///
+    int32_t ReferenceId(void) const;
+
+    /// \returns this record's reference name.
+    ///
+    /// \throws an exception if unmapped record.
+    ///
+    std::string ReferenceName(void) const;
+
+    /// \returns the record's reference end position, or UnmappedPosition if
+    ///          unmapped
+    ///
+    /// \note ReferenceEnd is in reference coordinates, NOT polymerase read
+    ///       coordinates.
+    ///
+    Position ReferenceEnd(void) const;
+
+    /// \returns the record's reference start position, or UnmappedPosition if
+    ///          unmapped
+    ///
+    /// \note ReferenceStart is in reference coordinates, NOT polymerase read
+    ///       coordinates.
+    ///
+    Position ReferenceStart(void) const;
+
+    /// \}
+
+public:
+    /// \name Barcode Data
+    /// \{
+
+    /// \returns forward barcode id
+    ///
+    /// \throws std::runtime_error if barcode data is absent or malformed.
+    /// \sa HasBarcodes
+    ///
+    uint16_t BarcodeForward(void) const;
+
+    /// \returns barcode call confidence (Phred-scaled posterior probability
+    ///          of correct barcode call)
+    ///
+    /// \sa HasBarcodeQuality
+    ///
+    uint8_t BarcodeQuality(void) const;
+
+    /// \returns reverse barcode id
+    ///
+    /// \throws std::runtime_error if barcode data is absent or malformed.
+    /// \sa HasBarcodes
+    ///
+    uint16_t BarcodeReverse(void) const;
+
+    /// \returns the forward and reverse barcode ids
+    ///
+    /// \throws std::runtime_error if barcode data is absent or malformed.
+    /// \sa HasBarcodes
+    ///
+    std::pair<uint16_t,uint16_t> Barcodes(void) const;
+
+    /// \}
+
+public:
+    /// \name Auxiliary Data Queries
+    /// \{
 
     /// \returns true if this record has AltLabelQV data
     bool HasAltLabelQV(void) const;
 
-    /// \returns true if this record has LabelQV data
-    bool HasLabelQV(void) const;
+    /// \returns true if this record has AltLabelTag data
+    bool HasAltLabelTag(void) const;
+
+    /// \returns true if this record has Barcode data
+    bool HasBarcodes(void) const;
+
+    /// \returns true is this record has BarcodeQuality data
+    bool HasBarcodeQuality(void) const;
 
     /// \returns true if this record has DeletionQV data
     bool HasDeletionQV(void) const;
@@ -137,20 +349,38 @@ public:
     /// \returns true if this record has DeletionTag data
     bool HasDeletionTag(void) const;
 
-    /// \returns true if this record has LocalContextFlags (absent in CCS)
-    bool HasLocalContextFlags(void) const;
+    /// \returns true if this record has a HoleNumber
+    bool HasHoleNumber(void) const;
 
     /// \returns true if this record has InsertionQV data
     bool HasInsertionQV(void) const;
 
+    /// \returns true if this record has IPD data
+    bool HasIPD(void) const;
+
+    /// \returns true if this record has LabelQV data
+    bool HasLabelQV(void) const;
+
+    /// \returns true if this record has LocalContextFlags (absent in CCS)
+    bool HasLocalContextFlags(void) const;
+
+    /// \returns true if this record has MergeQV data
+    bool HasMergeQV(void) const;
+
+    /// \returns true if this record has NumPasses data
+    bool HasNumPasses(void) const;
+
     /// \returns true if this record has Pkmean data
     bool HasPkmean(void) const;
 
     /// \returns true if this record has Pkmid data
     bool HasPkmid(void) const;
 
-    /// \returns true if this record has IPD data
-    bool HasIPD(void) const;
+    /// \returns true if this record has Pkmean2 data
+    bool HasPkmean2(void) const;
+
+    /// \returns true if this record has Pkmid2 data
+    bool HasPkmid2(void) const;
 
     /// \returns true if this record has PreBaseFrames aka IPD data
     bool HasPreBaseFrames(void) const;
@@ -158,135 +388,125 @@ public:
     /// \returns true if this record has PrePulseFrames data
     bool HasPrePulseFrames(void) const;
 
+    /// \returns true if this record has PulseCall data
+    bool HasPulseCall(void) const;
+
     /// \returns true if this record has PulseCallWidth data
     bool HasPulseCallWidth(void) const;
 
-    /// \returns true if this record has MergeQV data
-    bool HasMergeQV(void) const;
-
     /// \returns true if this record has PulseMergeQV data
     bool HasPulseMergeQV(void) const;
 
     /// \returns true if this record has PulseWidth data
     bool HasPulseWidth(void) const;
 
-    /// \returns true if this record has signal-to-noise data (absent in POLYMERASE)
-    bool HasSignalToNoise(void) const;
-
-    /// \returns true if this record has ScrapType data (only in SCRAP)
-    bool HasScrapType(void) const;
-
-    /// \returns true if this record has SubstitutionQV data
-    bool HasSubstitutionQV(void) const;
-
-    /// \returns true if this record has SubstitutionTag data
-    bool HasSubstitutionTag(void) const;
-
-    /// \returns true if this record has AltLabelTag data
-    bool HasAltLabelTag(void) const;
-
-    /// \returns true if this record has PulseCall data
-    bool HasPulseCall(void) const;
-
     /// \returns true if this record has ReadAccuracyTag data
     bool HasReadAccuracy(void) const;
 
-    /// \returns true if this record has a HoleNumber
-    bool HasHoleNumber(void) const;
+    /// \returns true if this record has QueryEnd data
+    bool HasQueryEnd(void) const;
 
     /// \returns true if this record has QueryStart data
     bool HasQueryStart(void) const;
 
-    /// \returns true if this record has QueryEnd data
-    bool HasQueryEnd(void) const;
+    /// \returns true if this record has ScrapRegionType data (only in SCRAP)
+    bool HasScrapRegionType(void) const;
 
-    /// \returns true if this record has Barcode data
-    bool HasBarcodes(void) const;
-
-    /// \returns shared pointer to this record's associated BamHeader
-    BamHeader Header(void) const;
+    /// \returns true if this record has scrap ZMW type data (only in SCRAP)
+    bool HasScrapZmwType(void) const;
 
-    /// \returns ZMW hole number
-    /// \throws if missing zm tag & record name does not contain hole number
-    int32_t HoleNumber(void) const;
+    /// \returns true if this record has signal-to-noise data (absent in
+    ///          POLYMERASE)
+    ///
+    bool HasSignalToNoise(void) const;
 
-    /// \returns true if this record was mapped by aligner
-    /// \sa BamRecordImpl::IsMapped
-    bool IsMapped(void) const;
+    /// \returns true if this record has StartFrame data
+    bool HasStartFrame(void) const;
 
-    /// \returns this record's LocalContextFlags
-    PacBio::BAM::LocalContextFlags LocalContextFlags(void) const;
+    /// \returns true if this record has SubstitutionQV data
+    bool HasSubstitutionQV(void) const;
 
-    /// \returns this record's mapping quality. A value of 255 indicates "unknown"
-    uint8_t MapQuality(void) const;
+    /// \returns true if this record has SubstitutionTag data
+    bool HasSubstitutionTag(void) const;
 
-    /// \returns this record's movie name
-    std::string MovieName(void) const;
+    /// \}
 
-    /// \returns "number of complete passes of the insert"
-    int32_t NumPasses(void) const;
+public:
+    /// \name Sequence & Tag Data
+    /// \{
 
-    /// \note QueryStart is in polymerase read coordinates, NOT genomic coordinates.
+    /// \brief Fetches this record's AltLabelTag values ("pt" tag).
     ///
-    /// \returns the record's query start position, or 0 if not stored
-    Position QueryStart(void) const;
-
-    /// \note QueryEnd is in polymerase read coordinates, NOT genomic coordinates.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new gap chars will be '-' and padding chars will be '*'.
     ///
-    /// \returns the record's query end position, or Sequence().length() if not stored
-    Position QueryEnd(void) const;
-
-    /// \returns the left and right barcode ids
-    std::pair<int,int> Barcodes(void) const;
-
-    /// \returns this record's expected read accuracy [0, 1000]
-    Accuracy ReadAccuracy(void) const;
-
-    /// \returns ReadGroupInfo object for this record
-    ReadGroupInfo ReadGroup(void) const;
-
-    /// \returns ID of this record's read group
-    /// \sa ReadGroupInfo::Id
-    std::string ReadGroupId(void) const;
-
-    /// \returns this record's reference ID, or -1 if unmapped.
-    /// \note This is only a valid identifier within this BAM file
-    int32_t ReferenceId(void) const;
-
-    /// \returns this record's reference name.
-    /// \throws an exception if unmapped record.
-    std::string ReferenceName(void) const;
-
-    /// \note ReferenceStart is in reference coordinates, NOT polymerase read coordinates.
+    /// \param[in] orientation      Orientation of output.
     ///
-    /// \returns the record's reference start position, or UnmappedPosition if unmapped
-    Position ReferenceStart(void) const;
-
-    /// \note ReferenceEnd is in reference coordinates, NOT polymerase read coordinates.
+    /// \returns AltLabelTags string
     ///
-    /// \returns the record's reference end position, or UnmappedPosition if unmapped
-    Position ReferenceEnd(void) const;
+    std::string AltLabelTag(Orientation orientation = Orientation::NATIVE) const;
 
-    /// \returns this scrap record's ScrapType
-    VirtualRegionType ScrapType(void) const;
+    /// \brief Fetches this record's DeletionTag values ("dt" tag).
+    ///
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new gap chars will be '-' and padding chars will be '*'.
+    ///
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns DeletionTag string
+    ///
+    std::string DeletionTag(Orientation orientation = Orientation::NATIVE,
+                            bool aligned = false,
+                            bool exciseSoftClips = false) const;
 
-    /// \returns this record's average signal-to-noise for each of A, C, G, and T
-    std::vector<float> SignalToNoise(void) const;
+    /// \brief Fetches this record's DNA sequence (SEQ field).
+    ///
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new gap chars will be '-' and padding chars will be '*'.
+    ///
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns sequence string
+    ///
+    std::string Sequence(const Orientation orientation = Orientation::NATIVE,
+                         bool aligned = false,
+                         bool exciseSoftClips = false) const;
 
-    /// \returns this record's type
-    /// \sa RecordType
-    RecordType Type(void) const;
+    /// \brief Fetches this record's SubstitutionTag values ("st" tag).
+    ///
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new gap chars will be '-' and padding chars will be '*'.
+    ///
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns SubstitutionTags string
+    ///
+    std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE,
+                                bool aligned = false,
+                                bool exciseSoftClips = false) const;
 
     /// \}
 
 public:
-    /// \name Per-Base Data
+    /// \name Quality Data
     /// \{
 
-    /// \brief Fetch this record's AltLabelQV values ("pv" tag).
+    /// \brief Fetches this record's AltLabelQV values ("pv" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
     ///
     /// \param[in] orientation     Orientation of output.
     ///
@@ -294,70 +514,118 @@ public:
     ///
     QualityValues AltLabelQV(Orientation orientation = Orientation::NATIVE) const;
 
-    /// \brief Fetch this record's AltLabelTag values ("pt" tag).
+    /// \brief Fetches this record's DeletionQV values ("dq" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       gap chars will be '-' and padding chars will be '*'.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
     ///
-    /// \param[in] orientation     Orientation of output.
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
     ///
-    /// \returns AltLabelTags string
+    /// \returns DeletionQV as QualityValues object
     ///
-    std::string AltLabelTag(Orientation orientation = Orientation::NATIVE) const;
+    QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE,
+                             bool aligned = false,
+                             bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's DeletionQV values ("dq" tag).
+    /// \brief Fetches this record's InsertionQV values ("iq" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
     ///
-    /// \returns DeletionQV as QualityValues object
+    /// \returns InsertionQVs as QualityValues object
     ///
-    QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE,
-                             bool aligned = false,
-                             bool exciseSoftClips = false) const;
+    QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE,
+                              bool aligned = false,
+                              bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's DeletionTag values ("dt" tag).
+    /// \brief Fetches this record's LabelQV values ("pq" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       gap chars will be '-' and padding chars will be '*'.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
     ///
     /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
     ///
-    /// \returns DeletionTag string
+    /// \returns LabelQV as QualityValues object
     ///
-    std::string DeletionTag(Orientation orientation = Orientation::NATIVE,
+    QualityValues LabelQV(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \brief Fetches this record's MergeQV values ("mq" tag).
+    ///
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
+    ///
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns MergeQV as QualityValues object
+    ///
+    QualityValues MergeQV(Orientation orientation = Orientation::NATIVE,
+                          bool aligned = false,
+                          bool exciseSoftClips = false) const;
+
+    /// \brief Fetches  this record's %BAM quality values (QUAL field).
+    ///
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
+    ///
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns %BAM qualities as QualityValues object
+    ///
+    QualityValues Qualities(Orientation orientation = Orientation::NATIVE,
                             bool aligned = false,
                             bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's InsertionQV values ("iq" tag).
+    /// \brief Fetches this record's SubstitutionQV values ("sq" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new QVs will have a value of 0.
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
     ///
-    /// \returns InsertionQVs as QualityValues object
+    /// \returns SubstitutionQV as QualityValues object
     ///
-    QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE,
-                              bool aligned = false,
-                              bool exciseSoftClips = false) const;
+    QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE,
+                                 bool aligned = false,
+                                 bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's IPD values ("ip" tag).
+    /// \}
+
+public:
+    /// \name Pulse Data
+    /// \{
+
+    /// \brief Fetches this record's IPD values ("ip" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       frames will have a value of 0;
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new frames will have a value of 0;
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
     ///
     /// \returns IPD as Frames object
     ///
@@ -365,432 +633,514 @@ public:
                bool aligned = false,
                bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's PreBaseFrames aka IPD values ("ip" tag).
-    ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       frames will have a value of 0;
+    /// \brief Fetches this record's IPD values ("ip" tag), but does not upscale.
     ///
     /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
-    ///
     /// \returns IPD as Frames object
     ///
-    Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE,
-               bool aligned = false,
-               bool exciseSoftClips = false) const;
+    Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const;
 
-    /// \brief Fetch this record's IPD values ("ip" tag), but does not upscale.
+    /// \brief Fetches this record's Pkmean values ("pa" tag).
     ///
     /// \param[in] orientation     Orientation of output.
+    /// \returns Pkmean as vector<float> object
     ///
-    /// \returns IPD as Frames object
+    std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \brief Fetches this record's Pkmid values ("pm" tag).
     ///
-    Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const;
+    /// \param[in] orientation     Orientation of output.
+    /// \returns Pkmid as vector<float> object
+    ///
+    std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE) const;
 
-    /// \brief Fetch this record's LabelQV values ("pq" tag).
+    /// \brief Fetches this record's Pkmean2 values ("pi" tag).
+    ///
+    /// \param[in] orientation     Orientation of output.
+    /// \returns Pkmean as vector<float> object
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    std::vector<float> Pkmean2(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \brief Fetches this record's Pkmid2 values ("ps" tag).
     ///
     /// \param[in] orientation     Orientation of output.
+    /// \returns Pkmid as vector<float> object
     ///
-    /// \returns LabelQV as QualityValues object
+    std::vector<float> Pkmid2(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \brief Fetches this record's PreBaseFrames aka IPD values ("ip" tag).
     ///
-    QualityValues LabelQV(Orientation orientation = Orientation::NATIVE) const;
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new frames will have a value of 0;
+    ///
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns IPD as Frames object
+    ///
+    Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE,
+                         bool aligned = false,
+                         bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's MergeQV values ("mq" tag).
+    /// \brief Fetches this record's PrePulseFrames values ("pd" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    /// \param[in] orientation     Orientation of output.
+    /// \returns PrePulseFrames as Frames object
+    ///
+    Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \brief Fetches this record's PulseCall values ("pc" tag).
     ///
     /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    /// \returns PulseCalls string
     ///
-    /// \returns MergeQV as QualityValues object
+    std::string PulseCall(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \brief Fetches this record's PulseCallWidth values ("px" tag).
     ///
-    QualityValues MergeQV(Orientation orientation = Orientation::NATIVE,
-                          bool aligned = false,
-                          bool exciseSoftClips = false) const;
+    /// \param[in] orientation     Orientation of output.
+    /// \returns PulseCallWidth as Frames object
+    ///
+    Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE) const;
 
     /// \brief Fetch this record's PulseMergeQV values ("pg" tag).
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
-    ///
     /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
-    ///
     /// \returns PulseMergeQV as QualityValues object
     ///
     QualityValues PulseMergeQV(Orientation orientation = Orientation::NATIVE) const;
 
-    /// \brief Fetch this record's Pkmean values ("pa" tag).
+    /// \brief Fetches this record's PulseWidth values ("pw" tag).
     ///
-    /// \param[in] orientation     Orientation of output.
+    /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+    ///       new frames will have a value of 0.
     ///
-    /// \returns Pkmean as vector<float> object
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
     ///
-    std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE) const;
+    /// \returns PulseWidths as Frames object
+    ///
+    Frames PulseWidth(Orientation orientation = Orientation::NATIVE,
+                      bool aligned = false,
+                      bool exciseSoftClips = false) const;
 
-    /// \brief Fetch this record's Pkmid values ("pm" tag).
+    /// \brief Fetches this record's PulseWidth values ("pw" tag), but does not
+    ///        upscale.
     ///
     /// \param[in] orientation     Orientation of output.
+    /// \returns PulseWidth as Frames object
     ///
-    /// \returns Pkmid as vector<float> object
-    ///
-    std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE) const;
+    Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE) const;
 
-    /// \brief Fetch this record's PrePulseFrames values ("pd" tag).
-    ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       frames will have a value of 0;
+    /// \brief Fetches this record's StartFrame values ("sf" tag).
     ///
-    /// \param[in] orientation     Orientation of output.
+    /// \param[in] orientation     Orientation of output
     ///
-    /// \returns PrePulseFrames as Frames object
+    /// \returns StartFrame as uint32_t vector
     ///
-    Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE) const;
+    std::vector<uint32_t> StartFrame(Orientation orientation = Orientation::NATIVE) const;
+
+    /// \}
+
+public:
+    /// \name Low-Level Access & Operations
+    /// \{
 
-    /// \brief Fetch this record's PulseCall values ("pc" tag).
+    /// \warning This method should be considered temporary and avoided as much
+    ///          as possible. Direct access to the internal object is likely to
+    ///          disappear as BamRecord interface matures.
     ///
-    /// \param[in] orientation     Orientation of output.
+    /// \returns const reference to underlying BamRecordImpl object
     ///
-    /// \returns PulseCalls string
+    const BamRecordImpl& Impl(void) const;
+
+    /// \warning This method should be considered temporary and avoided as much
+    ///          as possible. Direct access to the internal object is likely to
+    ///          disappear as BamRecord interface matures.
     ///
-    std::string PulseCall(Orientation orientation = Orientation::NATIVE) const;
+    /// \returns reference to underlying BamRecordImpl object
+    ///
+    BamRecordImpl& Impl(void);
+
+    /// \}
+
+public:
+    /// \name General Data
+    /// \{
 
-    /// \brief Fetch this record's PulseCallWidth values ("px" tag).
+    /// \brief Sets this record's ZMW hole number.
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       frames will have a value of 0;
+    /// \param[in] holeNumber
+    /// \returns reference to this record
     ///
-    /// \param[in] orientation     Orientation of output.
+    BamRecord& HoleNumber(const int32_t holeNumber);
+
+    /// \brief Sets this record's local context flags
     ///
-    /// \returns PulseCallWidth as Frames object
+    /// \param[in] flags
+    /// \returns reference to this record
     ///
-    Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE) const;
+    BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags);
 
-    /// \brief Fetch this record's PulseWidth values ("pw" tag).
+    /// \brief Sets this record's "number of complete passes of the insert".
+    ///
+    /// \param[in] numPasses
+    /// \returns reference to this record
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       frames will have a value of 0.
+    BamRecord& NumPasses(const int32_t numPasses);
+
+    /// \brief Sets this record's query end position.
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    /// \note Changing this will modify the name of non-CCS records.
     ///
-    /// \returns PulseWidths as Frames object
+    /// \param[in] pos
+    /// \returns reference to this record
     ///
-    Frames PulseWidth(Orientation orientation = Orientation::NATIVE,
-                      bool aligned = false,
-                      bool exciseSoftClips = false) const;
+    BamRecord& QueryEnd(const PacBio::BAM::Position pos);
 
-    /// \brief Fetch this record's PulseWidth values ("pw" tag), but does not upscale.
+    /// \brief Sets this record's query start position.
     ///
-    /// \param[in] orientation     Orientation of output.
+    /// \note Changing this will modify the name of non-CCS records.
     ///
-    /// \returns PulseWidth as Frames object
+    /// \param[in] pos
+    /// \returns reference to this record
     ///
-    Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE) const;
+    BamRecord& QueryStart(const PacBio::BAM::Position pos);
 
-    /// \brief Fetch this record's BAM quality values (QUAL field).
+    /// \brief Sets this record's expected read accuracy [0, 1000]
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    /// \param[in] accuracy
+    /// \returns reference to this record
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    BamRecord& ReadAccuracy(const Accuracy& accuracy);
+
+    /// \brief Attaches this record to the provided read group, changing the
+    ///        record name & 'RG' tag.
     ///
-    /// \returns BAM qualities as QualityValues object
+    /// \param[in] rg
+    /// \returns reference to this record
     ///
-    QualityValues Qualities(Orientation orientation = Orientation::NATIVE,
-                            bool aligned = false,
-                            bool exciseSoftClips = false) const;
+    BamRecord& ReadGroup(const ReadGroupInfo& rg);
 
-    /// \brief Fetch this record's DNA sequence (SEQ field).
+    /// \brief Attaches this record to the provided read group, changing the
+    ///        record name & 'RG' tag.
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       gap chars will be '-' and padding chars will be '*'.
+    /// \param[in] id
+    /// \returns reference to this record
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    BamRecord& ReadGroupId(const std::string& id);
+
+    /// \brief Sets this scrap record's ScrapRegionType
     ///
-    /// \returns sequence string
+    /// \param[in] type
+    /// \returns reference to this record
     ///
-    std::string Sequence(const Orientation orientation = Orientation::NATIVE,
-                         bool aligned = false,
-                         bool exciseSoftClips = false) const;
+    BamRecord& ScrapRegionType(const VirtualRegionType type);
 
-    /// \brief Fetch this record's SubstitutionQV values ("sq" tag).
+    /// \brief Sets this scrap record's ScrapRegionType
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       QVs will have a value of 0.
+    /// \param[in] type character equivalent of VirtualRegionType
+    /// \returns reference to this record
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    BamRecord& ScrapRegionType(const char type);
+
+    /// \brief Sets this scrap record's ScrapZmwType
     ///
-    /// \returns SubstitutionQV as QualityValues object
+    /// \param[in] type
+    /// \returns reference to this record
     ///
-    QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE,
-                                 bool aligned = false,
-                                 bool exciseSoftClips = false) const;
+    BamRecord& ScrapZmwType(const ZmwType type);
 
-    /// \brief Fetch this record's SubstitutionTag values ("st" tag).
+    /// \brief Sets this scrap record's ScrapZmwType
     ///
-    /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
-    ///       gap chars will be '-' and padding chars will be '*'.
+    /// \param[in] type character equivalent of ZmwType
+    /// \returns reference to this record
     ///
-    /// \param[in] orientation     Orientation of output.
-    /// \param[in] aligned         if true, gaps/padding will be inserted, per Cigar info.
-    /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+    BamRecord& ScrapZmwType(const char type);
+
+    /// \brief Sets this record's average signal-to-noise in each of A, C, G,
+    ///        and T
     ///
-    /// \returns SubstitutionTags string
+    /// \param[in] snr average signal-to-noise of A, C, G, and T (in this order)
+    /// \returns reference to this record
     ///
-    std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE,
-                                bool aligned = false,
-                                bool exciseSoftClips = false) const;
+    BamRecord& SignalToNoise(const std::vector<float>& snr);
 
     /// \}
 
 public:
-    /// \name Low-Level
+    /// \name Barcode Data
     /// \{
 
-    /// \warning This method should be considered temporary and avoided as much as possible.
-    ///          Direct access to the internal object is likely to disappear as BamRecord interface matures.
+    /// \brief Sets this record's barcode IDs ('bc' tag)
     ///
-    /// \returns const reference to underlying BamRecordImpl object
-    const BamRecordImpl& Impl(void) const;
+    /// \param[in] barcodeIds
+    /// \returns reference to this record
+    ///
+    BamRecord& Barcodes(const std::pair<uint16_t,uint16_t>& barcodeIds);
 
-    /// \warning This method should be considered temporary and avoided as much as possible.
-    ///          Direct access to the internal object is likely to disappear as BamRecord interface matures.
+    /// \brief Sets this record's barcode quality ('bq' tag)
     ///
-    /// \returns reference to underlying BamRecordImpl object
-    BamRecordImpl& Impl(void);
+    /// \param[in] quality Phred-scaled confidence call
+    /// \returns reference to this record
+    ///
+    BamRecord& BarcodeQuality(const uint8_t quality);
 
     /// \}
 
 public:
-    /// \name Per-Record Data
+    /// \name Sequence & Tag Data
     /// \{
+
+    /// \brief Sets this record's AltLabelTag values ("at" tag).
+    ///
+    /// \param[in] tags
+    /// \returns reference to this record
     ///
+    BamRecord& AltLabelTag(const std::string& tags);
 
-    /// Sets this record's ZMW hole number.
+    /// \brief Sets this record's DeletionTag values ("dt" tag).
     ///
-    /// \param[in] numPasses
+    /// \param[in] tags
     /// \returns reference to this record
-    BamRecord& HoleNumber(const int32_t holeNumber);
+    ///
+    BamRecord& DeletionTag(const std::string& tags);
 
-    /// Sets this record's local context flags
+    /// \brief Sets this record's SubstitutionTag values ("st" tag).
     ///
-    /// \param[in] flags
+    /// \param[in] tags
     /// \returns reference to this record
-    BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags);
+    ///
+    BamRecord& SubstitutionTag(const std::string& tags);
+
+    /// \}
+
+public:
+    /// \name Quality Data
+    /// \{
 
-    /// Sets this record's "number of complete passes of the insert".
+    /// \brief Sets this record's AltLabelQV values ("pv" tag).
     ///
-    /// \param[in] numPasses
+    /// \param[in] altLabelQVs
     /// \returns reference to this record
-    BamRecord& NumPasses(const int32_t numPasses);
+    ///
+    BamRecord& AltLabelQV(const QualityValues& altLabelQVs);
 
-    /// Sets this record's expected read accuracy [0, 1000]
+    /// \brief Sets this record's DeletionQV values ("dq" tag).
     ///
-    /// \param[in] accuracy
+    /// \param[in] deletionQVs
     /// \returns reference to this record
-    BamRecord& ReadAccuracy(const Accuracy& accuracy);
+    ///
+    BamRecord& DeletionQV(const QualityValues& deletionQVs);
 
-    /// Sets this record's average signal-to-noise in each of A, C, G, and T
+    /// \brief Sets this record's InsertionQV values ("iq" tag).
     ///
-    /// \param[in] average signal-to-noise of A, C, G, and T (in this order)
+    /// \param[in] insertionQVs
     /// \returns reference to this record
-    BamRecord& SignalToNoise(const std::vector<float>& snr);
+    ///
+    BamRecord& InsertionQV(const QualityValues& insertionQVs);
+
+    /// \brief Sets this record's LabelQV values ("pq" tag).
+    ///
+    /// \param[in] labelQVs
+    /// \returns reference to this record
+    ///
+    BamRecord& LabelQV(const QualityValues& labelQVs);
 
-    /// Sets this scrap record's ScrapType
+    /// \brief Sets this record's MergeQV values ("mq" tag).
     ///
-    /// \param[in] ScrapType of type VirtualRegionType
+    /// \param[in] mergeQVs
     /// \returns reference to this record
-    BamRecord& ScrapType(const VirtualRegionType type);
+    ///
+    BamRecord& MergeQV(const QualityValues& mergeQVs);
 
-    /// Sets this scrap record's ScrapType
+    /// \brief Sets this record's SubstitutionQV values ("sq" tag).
     ///
-    /// \param[in] ScrapType as char
+    /// \param[in] substitutionQVs
     /// \returns reference to this record
-    BamRecord& ScrapType(const char type);
+    ///
+    BamRecord& SubstitutionQV(const QualityValues& substitutionQVs);
 
     /// \}
 
 public:
-    /// \name Per-Base Data
+    /// \name Pulse Data
     /// \{
 
-    /// Sets this record's AltLabelQV values ("pv" tag).
+    /// \brief Sets this record's IPD values ("ip" tag).
     ///
-    /// \param[in] altLabelQVs
+    /// \param[in] frames
+    /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+    ///                     16-bit lossless)
     /// \returns reference to this record
-    BamRecord& AltLabelQV(const QualityValues& altLabelQVs);
-
-    /// Sets this record's LabelQV values ("pq" tag).
     ///
-    /// \param[in] labelQVs
-    /// \returns reference to this record
-    BamRecord& LabelQV(const QualityValues& labelQVs);
+    BamRecord& IPD(const Frames& frames,
+                   const FrameEncodingType encoding);
 
-    /// Sets this record's DeletionQV values ("dq" tag).
+    /// \brief Sets this record's Pkmean values ("pm" tag).
     ///
-    /// \param[in] deletionQVs
+    /// \param[in] photons
     /// \returns reference to this record
-    BamRecord& DeletionQV(const QualityValues& deletionQVs);
-
-    /// Sets this record's DeletionTag values ("dt" tag).
     ///
-    /// \param[in] tags
-    /// \returns reference to this record
-    BamRecord& DeletionTag(const std::string& tags);
+    BamRecord& Pkmean(const std::vector<float>& photons);
 
-    /// Sets this record's InsertionQV values ("iq" tag).
+    /// \brief Sets this record's Pkmean values ("pm" tag).
     ///
-    /// \param[in] insertionQVs
+    /// \param[in] encodedPhotons
     /// \returns reference to this record
-    BamRecord& InsertionQV(const QualityValues& insertionQVs);
+    ///
+    BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons);
 
-    /// Sets this record's Pkmid values ("pa" tag).
+    /// \brief Sets this record's Pkmid values ("pa" tag).
     ///
     /// \param[in] photons
     /// \returns reference to this record
+    ///
     BamRecord& Pkmid(const std::vector<float>& photons);
 
-    /// Sets this record's Pkmid values ("pa" tag).
+    /// \brief Sets this record's Pkmid values ("pa" tag).
     ///
-    /// \param[in] encoded photons
+    /// \param[in] encodedPhotons
     /// \returns reference to this record
+    ///
     BamRecord& Pkmid(const std::vector<uint16_t>& encodedPhotons);
 
-    /// Sets this record's Pkmean values ("pm" tag).
+    /// \brief Sets this record's Pkmean2 values ("ps" tag).
     ///
     /// \param[in] photons
     /// \returns reference to this record
-    BamRecord& Pkmean(const std::vector<float>& photons);
+    ///
+    BamRecord& Pkmean2(const std::vector<float>& photons);
 
-    /// Sets this record's Pkmean values ("pm" tag).
+    /// \brief Sets this record's Pkmean2 values ("ps" tag).
     ///
-    /// \param[in] encoded photons
+    /// \param[in] encodedPhotons
     /// \returns reference to this record
-    BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons);
+    ///
+    BamRecord& Pkmean2(const std::vector<uint16_t>& encodedPhotons);
 
-    /// Sets this record's IPD values ("ip" tag).
+    /// \brief Sets this record's Pkmid2 values ("pi" tag).
     ///
-    /// \param[in] frames
-    /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+    /// \param[in] photons
     /// \returns reference to this record
-    BamRecord& IPD(const Frames& frames,
-                   const FrameEncodingType encoding);
+    ///
+    BamRecord& Pkmid2(const std::vector<float>& photons);
+
+    /// \brief Sets this record's Pkmid2 values ("pi" tag).
+    ///
+    /// \param[in] encodedPhotons
+    /// \returns reference to this record
+    ///
+    BamRecord& Pkmid2(const std::vector<uint16_t>& encodedPhotons);
 
-    /// Sets this record's PreBaseFrames aka IPD values ("ip" tag).
+    /// \brief Sets this record's PreBaseFrames aka IPD values ("ip" tag).
     ///
     /// \param[in] frames
-    /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+    /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+    ///                     16-bit lossless)
     /// \returns reference to this record
+    ///
     BamRecord& PreBaseFrames(const Frames& frames,
                              const FrameEncodingType encoding);
 
-    /// Sets this record's PrePulseFrames values ("pd" tag).
+    /// \brief Sets this record's PrePulseFrames values ("pd" tag).
     ///
     /// \param[in] frames
-    /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+    /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+    ///                     16-bit lossless)
     /// \returns reference to this record
+    ///
     BamRecord& PrePulseFrames(const Frames& frames,
                               const FrameEncodingType encoding);
 
-    /// Sets this record's PulseCallWidth values ("px" tag).
+    /// \brief Sets this record's PulseCall values ("pc" tag).
     ///
-    /// \param[in] frames
-    /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+    /// \param[in] tags
     /// \returns reference to this record
-    BamRecord& PulseCallWidth(const Frames& frames,
-                              const FrameEncodingType encoding);
+    ///
+    BamRecord& PulseCall(const std::string& tags);
 
-    /// Sets this record's MergeQV values ("mq" tag).
+    /// \brief Sets this record's PulseCallWidth values ("px" tag).
     ///
-    /// \param[in] mergeQVs
+    /// \param[in] frames
+    /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+    ///                     16-bit lossless)
     /// \returns reference to this record
-    BamRecord& MergeQV(const QualityValues& mergeQVs);
+    ///
+    BamRecord& PulseCallWidth(const Frames& frames,
+                              const FrameEncodingType encoding);
 
-    /// Sets this record's PulseMergeQV values ("pg" tag).
+    /// \brief Sets this record's PulseMergeQV values ("pg" tag).
     ///
     /// \param[in] pulseMergeQVs
     /// \returns reference to this record
+    ///
     BamRecord& PulseMergeQV(const QualityValues& pulseMergeQVs);
 
-    /// Sets this record's PulseWidth values ("pw" tag).
+    /// \brief Sets this record's PulseWidth values ("pw" tag).
     ///
     /// \param[in] frames
-    /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+    /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+    ///                     16-bit lossless)
     /// \returns reference to this record
+    ///
     BamRecord& PulseWidth(const Frames& frames,
                           const FrameEncodingType encoding);
 
-    /// Sets this record's SubstitutionQV values ("sq" tag).
+    /// \brief Sets this record's StartFrame values ("sf" tag).
     ///
-    /// \param[in] substitutionQVs
+    /// \param[in] startFrame
     /// \returns reference to this record
-    BamRecord& SubstitutionQV(const QualityValues& substitutionQVs);
+    ///
+    BamRecord& StartFrame(const std::vector<uint32_t>& startFrame);
+
+    /// \}
+
+public:
+    /// \name Low-Level Access & Operations
+    /// \{
 
-    /// Sets this record's SubstitutionTag values ("st" tag).
+    /// \brief Resets cached aligned start/end.
     ///
-    /// \param[in] tags
-    /// \returns reference to this record
-    BamRecord& SubstitutionTag(const std::string& tags);
+    /// \note This method should not be needed in most client code. It exists
+    ///       primarily as a hook for internal reading loops (queries, index
+    ///       build, etc.) It's essentially a workaround and will likely be
+    ///       removed from the API.
+    ///
+    void ResetCachedPositions(void) const;
 
-    /// Sets this record's AltLabelTag values ("at" tag).
+    /// \brief Resets cached aligned start/end.
     ///
-    /// \param[in] tags
-    /// \returns reference to this record
-    BamRecord& AltLabelTag(const std::string& tags);
+    /// \note This method should not be needed in most client code. It exists
+    ///       primarily as a hook for internal reading loops (queries, index
+    ///       build, etc.) It's essentially a workaround and will likely be
+    ///       removed from the API.
+    ///
+    void ResetCachedPositions(void);
 
-    /// Sets this record's PulseCall values ("pc" tag).
+    /// \brief Updates the record's name (BamRecord::FullName) to reflect
+    ///        modifications to name components (movie name, ZMW hole number,
+    ///        etc.)
     ///
-    /// \param[in] tags
-    /// \returns reference to this record
-    BamRecord& PulseCall(const std::string& tags);
+    void UpdateName(void);
 
     /// \}
 
 public:
-   BamRecord& QueryEnd(const PacBio::BAM::Position pos);
-   BamRecord& QueryStart(const PacBio::BAM::Position pos);
-
-   /// Resets cached aligned start/end.
-   ///
-   /// \note This method should not be needed in most client code. It exists
-   /// primarily as a hook for internal reading loops (queries, index build, etc.)
-   /// It's essentially a workaround and will likely be removed from the API as
-   /// soon as possible.
-   ///
-   void ResetCachedPositions(void) const;
-
-   /// Resets cached aligned start/end.
-   ///
-   /// \note This method should not be needed in most client code. It exists
-   /// primarily as a hook for internal reading loops (queries, index build, etc.)
-   /// It's essentially a workaround and will likely be removed from the API as
-   /// soon as possible.
-   ///
-   void ResetCachedPositions(void);
-
-   void UpdateName(void);
+    /// \name Pulse Data
+    /// \{
 
-   static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data);
+    static const float photonFactor;
 
-   BamRecord& ReadGroup(const ReadGroupInfo& rg);
-   BamRecord& ReadGroupId(const std::string& id);
+    static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data);
 
-//    BamRecord& ReferenceStart(const PacBio::BAM::Position pos);
+    /// \}
 
 public:
     /// \name Clipping & Mapping
@@ -833,22 +1183,24 @@ public:
                      const Strand strand,
                      const Cigar& cigar,
                      const uint8_t mappingQuality) const;
-
     /// \}
+
 private:
     BamRecordImpl impl_;
 
 public:
-    // public & mutable so that queries can directly set the header info,
-    // even on a record that is const from client code's perspective
+    /// public & mutable so that queries can directly set the header info,
+    /// even on a record that is const from client code's perspective
     mutable BamHeader header_;
-    
+
 private:
-    // cached positions (mutable to allow lazy-calc in const methods)
+    /// \internal
+    /// cached positions (mutable to allow lazy-calc in const methods)
     mutable Position alignedStart_;
     mutable Position alignedEnd_;
 
 private:
+    /// \internal
     std::vector<float> FetchPhotons(const std::string& tagName,
                                     const Orientation orientation) const;
     std::string FetchBasesRaw(const std::string& tagName) const;
@@ -889,121 +1241,109 @@ private:
     friend class internal::BamRecordMemory;
 };
 
-inline
-BamRecord BamRecord::Clipped(const BamRecord& input,
-                             const ClipType clipType,
-                             const PacBio::BAM::Position start,
-                             const PacBio::BAM::Position end)
-{
-    return input.Clipped(clipType, start, end);
-}
-
-inline
-BamRecord BamRecord::Clipped(const ClipType clipType,
-                             const PacBio::BAM::Position start,
-                             const PacBio::BAM::Position end) const
-{
-    BamRecord result(*this);
-    result.Clip(clipType, start, end);
-    return result;
-}
-
-inline
-BamRecord BamRecord::Mapped(const BamRecord& input,
-                            const int32_t referenceId,
-                            const Position refStart,
-                            const Strand strand,
-                            const Cigar& cigar,
-                            const uint8_t mappingQuality)
-{
-    return input.Mapped(referenceId, refStart, strand, cigar, mappingQuality);
-}
-
-inline
-BamRecord BamRecord::Mapped(const int32_t referenceId,
-                            const Position refStart,
-                            const Strand strand,
-                            const Cigar& cigar,
-                            const uint8_t mappingQuality) const
-{
-    BamRecord result(*this);
-    result.Map(referenceId, refStart, strand, cigar, mappingQuality);
-    return result;
-}
-
+/// \brief Provides a re-usable "view" onto a BamRecord
+///
+/// This class acts a convenience wrapper for working with per-base BamRecord
+/// data. Most of these BamRecord methods take a list of parameters, to adjust
+/// how the underlying data are presented to client code. Often these parameters
+/// will be re-used for each BamRecord method call. Thus, to simplify such
+/// client code, a BamRecordView can be used to state those parameters once, and
+/// then simply request the desired fields.
+///
+/// \internal
+/// \todo Sync up method names with BamRecord
+/// \endinternal
+///
 class PBBAM_EXPORT BamRecordView
 {
 public:
+    /// \brief Constructs a view onto \p record using the supplied parameters.
+    ///
+    /// For frame or QV data, if \p aligned is true, a value of 0 (Accuracy or
+    /// QualityValue) will be used at each inserted or padded base location.
+    ///
+    /// \param[in] record           BamRecord data source.
+    /// \param[in] orientation      Orientation of output.
+    /// \param[in] aligned          if true, gaps/padding will be inserted, per
+    ///                             Cigar info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
     BamRecordView(const BamRecord& record,
                   const Orientation orientation,
                   const bool aligned,
-                  const bool exciseSoftClips)
-        : record_(record)
-        , orientation_(orientation)
-        , aligned_(aligned)
-        , exciseSoftClips_(exciseSoftClips)
-    { }
+                  const bool exciseSoftClips);
 
 public:
-    QualityValues AltLabelQVs(void) const
-    { return record_.AltLabelQV(orientation_); }
 
-    std::string AltLabelTags(void) const
-    { return record_.AltLabelTag(orientation_); }
+    /// \returns BamRecord::AltLabelQV with this view's parameters applied
+    QualityValues AltLabelQVs(void) const;
+
+    /// \returns BamRecord::AltLabelTag with this view's parameters applied
+    std::string AltLabelTags(void) const;
 
-    QualityValues DeletionQVs(void) const
-    { return record_.DeletionQV(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::DeletionQV with this view's parameters applied
+    QualityValues DeletionQVs(void) const;
 
-    std::string DeletionTags(void) const
-    { return record_.DeletionTag(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::DeletionTag with this view's parameters applied
+    std::string DeletionTags(void) const;
 
-    QualityValues InsertionQVs(void) const
-    { return record_.InsertionQV(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::InsertionQV with this view's parameters applied
+    QualityValues InsertionQVs(void) const;
 
-    Frames IPD(void) const
-    { return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::IPD with this view's parameters applied
+    Frames IPD(void) const;
 
-    Frames PrebaseFrames(void) const
-    { return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::LabelQV with this view's parameters applied
+    QualityValues LabelQVs(void) const;
 
-    QualityValues LabelQVs(void) const
-    { return record_.LabelQV(orientation_); }
+    /// \returns BamRecord::MergeQV with this view's parameters applied
+    QualityValues MergeQVs(void) const;
 
-    QualityValues MergeQVs(void) const
-    { return record_.MergeQV(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::PulseMergeQV with this view's parameters applied
+    QualityValues PulseMergeQVs(void) const;
 
-    QualityValues PulseMergeQVs(void) const
-    { return record_.PulseMergeQV(orientation_); }
+    /// \returns BamRecord::Pkmean with this view's parameters applied
+    std::vector<float> Pkmean(void) const;
 
-    std::vector<float> Pkmean(void) const
-    { return record_.Pkmean(orientation_); }
+    /// \returns BamRecord::Pkmid with this view's parameters applied
+    std::vector<float> Pkmid(void) const;
 
-    std::vector<float> Pkmid(void) const
-    { return record_.Pkmid(orientation_); }
+    /// \returns BamRecord::Pkmean2 with this view's parameters applied
+    std::vector<float> Pkmean2(void) const;
 
-    Frames PrePulseFrames(void) const
-    { return record_.PrePulseFrames(orientation_); }
+    /// \returns BamRecord::Pkmid2 with this view's parameters applied
+    std::vector<float> Pkmid2(void) const;
 
-    std::string PulseCalls(void) const
-    { return record_.PulseCall(orientation_); }
+    /// \returns BamRecord::PreBaseFrames with this view's parameters applied
+    Frames PrebaseFrames(void) const;
 
-    Frames PulseCallWidth(void) const
-    { return record_.PulseCallWidth(orientation_); }
+    /// \returns BamRecord::PrePulseFrames with this view's parameters applied
+    Frames PrePulseFrames(void) const;
 
-    Frames PulseWidths(void) const
-    { return record_.PulseWidth(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::PulseCalls with this view's parameters applied
+    std::string PulseCalls(void) const;
 
-    QualityValues Qualities(void) const
-    { return record_.Qualities(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::PulseCallWidth with this view's parameters applied
+    Frames PulseCallWidth(void) const;
 
-    std::string Sequence(void) const
-    { return record_.Sequence(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::PulseWidths with this view's parameters applied
+    Frames PulseWidths(void) const;
 
-    QualityValues SubstitutionQVs(void) const
-    { return record_.SubstitutionQV(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::Qualities with this view's parameters applied
+    QualityValues Qualities(void) const;
 
-    std::string SubstitutionTags(void) const
-    { return record_.SubstitutionTag(orientation_, aligned_, exciseSoftClips_); }
+    /// \returns BamRecord::Sequence with this view's parameters applied
+    std::string Sequence(void) const;
+
+    /// \returns BamRecord::StartFrame with this view's parameters applied
+    std::vector<uint32_t> StartFrames(void) const;
+
+    /// \returns BamRecord::SubstitutionQV with this view's parameters applied
+    QualityValues SubstitutionQVs(void) const;
+
+    /// \returns BamRecord::SubstitutionTag with this view's parameters applied
+    std::string SubstitutionTags(void) const;
 
 private:
     const BamRecord& record_;
@@ -1015,4 +1355,6 @@ private:
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/BamRecord.inl"
+
 #endif // BAMRECORD_H
diff --git a/include/pbbam/BamRecordBuilder.h b/include/pbbam/BamRecordBuilder.h
index 81002da..c6ff877 100644
--- a/include/pbbam/BamRecordBuilder.h
+++ b/include/pbbam/BamRecordBuilder.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecordBuilder.h
+/// \brief Defines the BamRecordBuilder class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMRECORDBUILDER_H
@@ -46,21 +50,37 @@
 namespace PacBio {
 namespace BAM {
 
-class PBBAM_EXPORT BamImplBuilder
-{
-
-};
-
-
+/// \brief The BamRecordBuilder class provides a helper utility for building
+///        BamRecords.
+///
+/// This class provides a mechanism for building up %BAM data and
+/// lazy-encoding/constructing the actual BamRecord. Currently, the methods here
+/// really only support  filling in the low-level SAM/BAM-style fields, not so
+/// much the PacBio-specific fields.
+///
 class PBBAM_EXPORT BamRecordBuilder
 {
 public:
     /// \name Constructors & Related Methods
     /// \{
 
+    /// \brief Creates an empty %BAM record builder.
     BamRecordBuilder(void);
+
+    /// \brief Creates an empty %BAM record builder, with header info to apply
+    ///        to built records.
+    ///
+    /// \param[in] header   BamHeader object
+    ///
     explicit BamRecordBuilder(const BamHeader& header);
+
+    /// \brief Creates record builder with inital record data.
+    ///
+    /// \param[in] prototype    data from this record will be used to seed the
+    ///                         builder
+    ///
     BamRecordBuilder(const BamRecord& prototype);
+
     BamRecordBuilder(const BamRecordBuilder& other);
     BamRecordBuilder(BamRecordBuilder&& other);
     BamRecordBuilder& operator=(const BamRecordBuilder& other);
@@ -73,28 +93,34 @@ public:
     /// \name Record-Building
     /// \{
 
-    /// Builds a BamRecord from current builder attributes
+    /// \brief Builds a BamRecord from current builder attributes.
+    ///
+    /// \returns newly-built BamRecord object
     ///
-    /// \returns BamRecord object
     BamRecord Build(void) const;
 
-    /// Replaces an existing BamRecord's data with current builder attributes
+    /// \brief Replaces an existing BamRecord's data with current builder
+    ///        attributes.
     ///
     /// \param[out] record resulting record
     /// \returns true if successful
+    ///
     bool BuildInPlace(BamRecord& record) const;
 
-    /// Resets builder attributes to default values
+    /// \brief Resets builder attributes to default values.
+    ///
     void Reset(void);
 
-    /// Resets builder attributes with existing BamRecord data
+    /// \brief Resets builder attributes with \p prototype's data.
     ///
     /// \param[in] prototype
+    ///
     void Reset(const BamRecord& prototype);
 
-    /// Resets builder attributes with existing BamRecord data
+    /// \brief Resets builder attributes with \p prototype's data.
     ///
     /// \param[in] prototype
+    ///
     void Reset(BamRecord&& prototype);
 
     /// \}
@@ -104,52 +130,60 @@ public:
     /// \name Core Attribute Setup
     /// \{
 
-    /// Sets the record's (BAI) index bin ID.
+    /// \brief Sets the record's (BAI) index bin ID.
     ///
     /// \param[in] bin BAI index bin ID.
     /// \returns reference to this builder
+    ///
     BamRecordBuilder& Bin(const uint32_t bin);
 
-    /// Sets this record's alignment flag, using a raw integer.
+    /// \brief Sets this record's alignment flag, using a raw integer.
     ///
     /// \param[in] flag raw alignment flag
     /// \returns reference to this record
+    ///
     BamRecordBuilder& Flag(const uint32_t flag);
 
-    /// Sets this record's insert size.
+    /// \brief Sets this record's insert size.
     ///
     /// \param[in] iSize insert size
     /// \returns reference to this record
+    ///
     BamRecordBuilder& InsertSize(const int32_t iSize);
 
-    /// Sets this record's map quality.
+    /// \brief Sets this record's map quality.
     ///
     /// \param[in] mapQual mapping quality - value of 255 indicates "unknown"
     /// \returns reference to this record
+    ///
     BamRecordBuilder& MapQuality(const uint8_t mapQual);
 
-    /// Sets this record's mate's mapped position.
+    /// \brief Sets this record's mate's mapped position.
     ///
     /// \param[in] pos mapped position. A value of -1 indicates unmapped.
     /// \returns reference to this record
+    ///
     BamRecordBuilder& MatePosition(const int32_t pos);
 
-    /// Sets this record's mate's mapped reference ID
+    /// \brief Sets this record's mate's mapped reference ID
     ///
     /// \param[in] id reference ID. A value of -1 indicates unmapped.
     /// \returns reference to this record
+    ///
     BamRecordBuilder& MateReferenceId(const int32_t id);
 
-    /// Sets this record's mapped position.
+    /// \brief Sets this record's mapped position.
     ///
     /// \param[in] pos mapped position. A value of -1 indicates unmapped.
     /// \returns reference to this record
+    ///
     BamRecordBuilder& Position(const int32_t pos);
 
-    /// Sets this record's mapped reference ID
+    /// \brief Sets this record's mapped reference ID
     ///
     /// \param[in] id reference ID. A value of -1 indicates unmapped.
     /// \returns reference to this record
+    ///
     BamRecordBuilder& ReferenceId(const int32_t id);
 
     /// \}
@@ -158,40 +192,42 @@ public:
     /// \name Alignment Flag Setup
     /// \{
 
-    /// Sets whether this record is a PCR/optical duplicate
+    /// \brief Sets whether this record is a PCR/optical duplicate
     BamRecordBuilder& SetDuplicate(bool ok);
 
-    /// Sets whether this record failed quality controls
+    /// \brief Sets whether this record failed quality controls
     BamRecordBuilder& SetFailedQC(bool ok);
 
-    /// Sets whether this record is the first mate of a pair.
+    /// \brief Sets whether this record is the first mate of a pair.
     BamRecordBuilder& SetFirstMate(bool ok);
 
-    /// Sets whether this record was aligned.
+    /// \brief Sets whether this record was aligned.
     BamRecordBuilder& SetMapped(bool ok);
 
-    /// Sets whether this record's mate was aligned.
+    /// \brief Sets whether this record's mate was aligned.
     BamRecordBuilder& SetMateMapped(bool ok);
 
-    /// Sets whether this record's mate mapped to reverse strand.
+    /// \brief Sets whether this record's mate mapped to reverse strand.
     BamRecordBuilder& SetMateReverseStrand(bool ok);
 
-    /// Sets whether this record came from paired-end sequencing.
+    /// \brief Sets whether this record came from paired-end sequencing.
     BamRecordBuilder& SetPaired(bool ok);
 
-    /// Sets whether this record is a read's primary alignment.
+    /// \brief Sets whether this record is a read's primary alignment.
     BamRecordBuilder& SetPrimaryAlignment(bool ok);
 
-    /// Sets whether this record & its mate were properly mapped, per the aligner.
+    /// \brief Sets whether this record & its mate were properly mapped, per the
+    ///        aligner.
+    ///
     BamRecordBuilder& SetProperPair(bool ok);
 
-    /// Sets whether this record mapped to reverse strand.
+    /// \brief Sets whether this record mapped to reverse strand.
     BamRecordBuilder& SetReverseStrand(bool ok);
 
-    /// Sets whether this record is the second mate of a pair.
+    /// \brief Sets whether this record is the second mate of a pair.
     BamRecordBuilder& SetSecondMate(bool ok);
 
-    /// Sets whether this record is a supplementary alignment.
+    /// \brief Sets whether this record is a supplementary alignment.
     BamRecordBuilder& SetSupplementaryAlignment(bool ok);
 
     /// \}
@@ -200,24 +236,70 @@ public:
     /// \name Variable-Length Data Setup
     /// \{
 
+    /// \brief Sets the record's CIGAR data.
+    ///
+    /// \returns reference to this builder
+    ///
+    BamRecordBuilder& Cigar(const PacBio::BAM::Cigar& cigar);
+
+    /// \brief Sets the record's CIGAR data.
+    ///
+    /// \returns reference to this builder
+    ///
+    BamRecordBuilder& Cigar(PacBio::BAM::Cigar&& cigar);
+
+    /// \brief Sets the record's name.
+    ///
+    /// \returns reference to this builder
+    ///
     BamRecordBuilder& Name(const std::string& name);
-    BamRecordBuilder& Name(std::string&& name);
 
-    BamRecordBuilder& Sequence(const std::string& sequence);
-    BamRecordBuilder& Sequence(std::string&& sequence);
+    /// \brief Sets the record's name.
+    ///
+    /// \returns reference to this builder
+    ///
+    BamRecordBuilder& Name(std::string&& name);
 
+    /// \brief Sets the record's qualities.
+    ///
+    /// \returns reference to this builder
+    ///
     BamRecordBuilder& Qualities(const std::string& qualities);
+
+    /// \brief Sets the record's qualities.
+    ///
+    /// \returns reference to this builder
+    ///
     BamRecordBuilder& Qualities(std::string&& qualities);
 
-    BamRecordBuilder& Cigar(const PacBio::BAM::Cigar& cigar);
-    BamRecordBuilder& Cigar(PacBio::BAM::Cigar&& cigar);
+    /// \brief Sets the record's sequence.
+    ///
+    /// \returns reference to this builder
+    ///
+    BamRecordBuilder& Sequence(const std::string& sequence);
 
+    /// \brief Sets the record's sequence.
+    ///
+    /// \returns reference to this builder
+    ///
+    BamRecordBuilder& Sequence(std::string&& sequence);
+
+    /// \brief Sets the record's tags.
+    ///
+    /// \returns reference to this builder
+    ///
     BamRecordBuilder& Tags(const TagCollection& tags);
+
+    /// \brief Sets the record's tags.
+    ///
+    /// \returns reference to this builder
+    ///
     BamRecordBuilder& Tags(TagCollection&& tags);
 
+    /// \}
+
 private:
     BamHeader header_;
-
     bam1_core_t core_;
     std::string name_;
     std::string sequence_;
@@ -226,43 +308,9 @@ private:
     TagCollection tags_;
 };
 
-inline BamRecordBuilder& BamRecordBuilder::Bin(const uint32_t bin)
-{ core_.bin = bin; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Flag(const uint32_t flag)
-{ core_.flag = flag; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::InsertSize(const int32_t iSize)
-{ core_.isize = iSize; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::MapQuality(const uint8_t mapQual)
-{ core_.qual = mapQual; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::MatePosition(const int32_t pos)
-{ core_.mpos = pos; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::MateReferenceId(const int32_t id)
-{ core_.mtid = id; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Position(const int32_t pos)
-{ core_.pos = pos; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Qualities(const std::string& qualities)
-{ qualities_ = qualities; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Qualities(std::string&& qualities)
-{ qualities_ = std::move(qualities); return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::ReferenceId(const int32_t id)
-{ core_.tid = id; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Tags(const TagCollection& tags)
-{ tags_ = tags; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Tags(TagCollection&& tags)
-{ tags_ = std::move(tags); return *this; }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/BamRecordBuilder.inl"
+
 #endif // BAMRECORDBUILDER_H
diff --git a/include/pbbam/BamRecordImpl.h b/include/pbbam/BamRecordImpl.h
index c42ef0a..a4f23b1 100644
--- a/include/pbbam/BamRecordImpl.h
+++ b/include/pbbam/BamRecordImpl.h
@@ -32,18 +32,23 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecordImpl.h
+/// \brief Defines the BamRecordImpl class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMRECORDIMPL_H
 #define BAMRECORDIMPL_H
 
-#include "htslib/sam.h"
 #include "pbbam/Cigar.h"
 #include "pbbam/Config.h"
 #include "pbbam/Position.h"
 #include "pbbam/QualityValues.h"
 #include "pbbam/TagCollection.h"
+#include <htslib/sam.h>
+#include <map>
 #include <string>
 
 namespace PacBio {
@@ -52,10 +57,14 @@ namespace BAM {
 namespace internal { class BamRecordMemory; }
 
 /// \brief The BamRecordImpl class holds all data necessary for creating,
-/// querying or editing a valid, generic BAM record.
+///        querying or editing a generic %BAM record.
 ///
 /// For PacBio-specific extensions and convenience methods, see BamRecord.
 ///
+/// \note This class is mostly an internal implementation detail and will
+///       likely be removed from the public API in the future. Please use
+///       BamRecord as much as possible.
+///
 class PBBAM_EXPORT BamRecordImpl
 {
 public:
@@ -91,168 +100,172 @@ public:
     /// \}
 
 public:
-
-    /** @name Core Data
-     */
-    ///@{
+    /// \name Core Data
+    /// \{
 
     /// \returns this record's assigned (BAI) index bin ID.
-    inline uint32_t Bin(void) const;
+    uint32_t Bin(void) const;
 
     /// \returns this record's alignment flag, in raw integer form.
-    inline uint32_t Flag(void) const;
+    uint32_t Flag(void) const;
 
     /// \returns this record's insert size
-    inline int32_t InsertSize(void) const;
+    int32_t InsertSize(void) const;
 
     /// \returns this record's mapping quality. A value of 255 indicates "unknown"
-    inline uint8_t MapQuality(void) const;
+    uint8_t MapQuality(void) const;
 
     /// \returns this record's mate's mapped position, or -1 if unmapped
-    inline PacBio::BAM::Position MatePosition(void) const;
+    PacBio::BAM::Position MatePosition(void) const;
 
     /// \returns this record's mate's mapped reference ID, or -1 if unmapped
-    inline int32_t MateReferenceId(void) const;
+    int32_t MateReferenceId(void) const;
 
     /// \returns this record's mapped position, or -1 if unmapped
-    inline PacBio::BAM::Position Position(void) const;
+    PacBio::BAM::Position Position(void) const;
 
     /// \returns this record's mate's mapped reference ID, or -1 if unmapped
-    inline int32_t ReferenceId(void) const;
+    int32_t ReferenceId(void) const;
 
     /// Sets the record's (BAI) index bin ID.
     ///
     /// \param[in] bin BAI index bin ID.
     /// \returns reference to this record
-    inline BamRecordImpl& Bin(uint32_t bin);
+    ///
+    BamRecordImpl& Bin(uint32_t bin);
 
     /// Sets this record's alignment flag, using a raw integer.
     ///
     /// \param[in] flag raw alignment flag
     /// \returns reference to this record
-    inline BamRecordImpl& Flag(uint32_t flag);
+    ///
+    BamRecordImpl& Flag(uint32_t flag);
 
     /// Sets this record's insert size.
     ///
     /// \param[in] iSize insert size
     /// \returns reference to this record
-    inline BamRecordImpl& InsertSize(int32_t iSize);
+    ///
+    BamRecordImpl& InsertSize(int32_t iSize);
 
     /// Sets this record's map quality.
     ///
     /// \param[in] mapQual mapping quality - value of 255 indicates "unknown"
     /// \returns reference to this record
-    inline BamRecordImpl& MapQuality(uint8_t mapQual);
+    ///
+    BamRecordImpl& MapQuality(uint8_t mapQual);
 
     /// Sets this record's mate's mapped position.
     ///
     /// \param[in] pos mapped position. A value of -1 indicates unmapped.
     /// \returns reference to this record
-    inline BamRecordImpl& MatePosition(PacBio::BAM::Position pos);
+    ///
+    BamRecordImpl& MatePosition(PacBio::BAM::Position pos);
 
     /// Sets this record's mate's mapped reference ID
     ///
     /// \param[in] id reference ID. A value of -1 indicates unmapped.
     /// \returns reference to this record
-    inline BamRecordImpl& MateReferenceId(int32_t id);
+    ///
+    BamRecordImpl& MateReferenceId(int32_t id);
 
     /// Sets this record's mapped position.
     ///
     /// \param[in] pos mapped position. A value of -1 indicates unmapped.
     /// \returns reference to this record
-    inline BamRecordImpl& Position(PacBio::BAM::Position pos);
+    ///
+    BamRecordImpl& Position(PacBio::BAM::Position pos);
 
     /// Sets this record's mapped reference ID
     ///
     /// \param[in] id reference ID. A value of -1 indicates unmapped.
     /// \returns reference to this record
-    inline BamRecordImpl& ReferenceId(int32_t id);
+    ///
+    BamRecordImpl& ReferenceId(int32_t id);
 
-    ///@}
+    /// \}
 
 public:
-    /** @name Alignment Flags
-     */
-    ///@{
+    /// \name Alignment Flags
+    /// \{
 
     /// \returns true if this record is a PCR/optical duplicate
-    inline bool IsDuplicate(void) const;
+    bool IsDuplicate(void) const;
 
     /// \returns true if this record failed quality controls
-    inline bool IsFailedQC(void) const;
+    bool IsFailedQC(void) const;
 
     /// \returns true if this record is the first mate of a pair
-    inline bool IsFirstMate(void) const;
+    bool IsFirstMate(void) const;
 
     /// \returns true if this record was mapped by aligner
-    inline bool IsMapped(void) const;
+    bool IsMapped(void) const;
 
     /// \returns true if this record's mate was mapped by aligner
-    inline bool IsMateMapped(void) const;
+    bool IsMateMapped(void) const;
 
     /// \returns true if this record's mate was mapped to the reverse strand
-    inline bool IsMateReverseStrand(void) const;
+    bool IsMateReverseStrand(void) const;
 
     /// \returns true if this record comes from paired-end sequencing
-    inline bool IsPaired(void) const;
+    bool IsPaired(void) const;
 
     /// \returns true if this record is a read's primary alignment
-    inline bool IsPrimaryAlignment(void) const;
+    bool IsPrimaryAlignment(void) const;
 
     /// \returns true if this record & its mate were properly aligned
-    inline bool IsProperPair(void) const;
+    bool IsProperPair(void) const;
 
     /// \returns true if this record was mapped to the reverse strand
-    inline bool IsReverseStrand(void) const;
+    bool IsReverseStrand(void) const;
 
     /// \returns true if this record is the second mate of a pair
-    inline bool IsSecondMate(void) const;
+    bool IsSecondMate(void) const;
 
     /// \returns true if this record is a supplementary alignment
-    inline bool IsSupplementaryAlignment(void) const;
+    bool IsSupplementaryAlignment(void) const;
 
     /// Sets whether this record is a PCR/optical duplicate
-    inline BamRecordImpl& SetDuplicate(bool ok);
+    BamRecordImpl& SetDuplicate(bool ok);
 
     /// Sets whether this record failed quality controls
-    inline BamRecordImpl& SetFailedQC(bool ok);
+    BamRecordImpl& SetFailedQC(bool ok);
 
     /// Sets whether this record is the first mate of a pair.
-    inline BamRecordImpl& SetFirstMate(bool ok);
+    BamRecordImpl& SetFirstMate(bool ok);
 
     /// Sets whether this record was aligned.
-    inline BamRecordImpl& SetMapped(bool ok);
+    BamRecordImpl& SetMapped(bool ok);
 
     /// Sets whether this record's mate was aligned.
-    inline BamRecordImpl& SetMateMapped(bool ok);
+    BamRecordImpl& SetMateMapped(bool ok);
 
     /// Sets whether this record's mate mapped to reverse strand.
-    inline BamRecordImpl& SetMateReverseStrand(bool ok);
+    BamRecordImpl& SetMateReverseStrand(bool ok);
 
     /// Sets whether this record came from paired-end sequencing.
-    inline BamRecordImpl& SetPaired(bool ok);
+    BamRecordImpl& SetPaired(bool ok);
 
     /// Sets whether this record is a read's primary alignment.
-    inline BamRecordImpl& SetPrimaryAlignment(bool ok);
+    BamRecordImpl& SetPrimaryAlignment(bool ok);
 
     /// Sets whether this record & its mate were properly mapped, per the aligner.
-    inline BamRecordImpl& SetProperPair(bool ok);
+    BamRecordImpl& SetProperPair(bool ok);
 
     /// Sets whether this record mapped to reverse strand.
-    inline BamRecordImpl& SetReverseStrand(bool ok);
+    BamRecordImpl& SetReverseStrand(bool ok);
 
     /// Sets whether this record is the second mate of a pair.
-    inline BamRecordImpl& SetSecondMate(bool ok);
+    BamRecordImpl& SetSecondMate(bool ok);
 
     /// Sets whether this record is a supplementary alignment.
-    inline BamRecordImpl& SetSupplementaryAlignment(bool ok);
+    BamRecordImpl& SetSupplementaryAlignment(bool ok);
 
-    ///@}
+    /// \}
 
 public:
-    /** @name Variable-length Data (sequence, qualities, etc.)
-     */
-    ///@{
+    /// \name Variable-length Data (sequence, qualities, etc.)
+    /// \{
 
     /// \returns the record's CIGAR data as a Cigar object
     Cigar CigarData(void) const;
@@ -260,15 +273,15 @@ public:
     /// Sets the record's CIGAR data using a Cigar object
     ///
     /// \param[in] cigar PacBio::BAM::Cigar object
-    ///
     /// \returns reference to this record
+    ///
     BamRecordImpl& CigarData(const Cigar& cigar);
 
     /// Sets the record's CIGAR data using a CIGAR-formatted string.
     ///
     /// \param[in] cigarString CIGAR-formatted string
-    ///
     /// \returns reference to this record
+    ///
     BamRecordImpl& CigarData(const std::string& cigarString);
 
     // TODO: CIGAR iterator - Cigar only or here as well ??
@@ -279,8 +292,8 @@ public:
     /// Sets the record's "query name".
     ///
     /// \param name new name
-    ///
     /// \returns reference to this record
+    ///
     BamRecordImpl& Name(const std::string& name);
 
     /// \returns the record's quality values (phred-style ASCII)
@@ -288,27 +301,31 @@ public:
     /// \note Usually Qualities().size() == Sequence.size(). However, in
     ///       some data sets, the quality values are not provided. In that
     ///       case, this method will return an empty container.
+    ///
     QualityValues Qualities(void) const;
 
     /// \returns the record's DNA sequence.
     std::string Sequence(void) const;
 
+    size_t SequenceLength(void) const;
+
     /// \brief Sets the record's DNA sequence and quality values
     ///
-    /// This is an overloaded function. Sets the DNA sequence and quality values,
-    /// using the length of \p sequence.
+    /// This is an overloaded function. Sets the DNA sequence and quality
+    /// values, using the length of \p sequence.
     ///
-    /// \note When using this overload (and \p qualities is non-empty), the lengths
-    ///       of \p sequence and \p qualities \b must be equal.
+    /// \note When using this overload (and \p qualities is non-empty), the
+    ///       lengths of \p sequence and \p qualities \b must be equal.
     ///
-    /// \todo How to handle mismatched lenths?
+    /// \todo How to handle mismatched lengths?
     ///
     /// \param[in] sequence  std::string containing DNA sequence
     /// \param[in] qualities std::string containing ASCII quality values
     ///
     /// \returns reference to this record.
     ///
-    /// \sa SetSequenceAndQualities(const char* sequence, const size_t sequenceLength, const char* qualities)
+    /// \sa SetSequenceAndQualities(const char* sequence,
+    ///     const size_t sequenceLength, const char* qualities)
     ///
     BamRecordImpl& SetSequenceAndQualities(const std::string& sequence,
                                            const std::string& qualities = std::string());
@@ -316,16 +333,17 @@ public:
     /// \brief Sets the record's DNA sequence and quality values.
     ///
     /// The \p sequence must consist of IUPAC nucleotide codes {=ACMGRSVTWYHKDBN}.
-    /// The \p qualities, if not empty, must consist of 'phred'-style ASCII quality
-    /// values. \p qualities may be an empty string or NULL pointer in cases where
-    /// there are no such data available.
+    /// The \p qualities, if not empty, must consist of 'phred'-style ASCII
+    /// quality values. \p qualities may be an empty string or NULL pointer in
+    /// cases where there are no such data available.
     ///
-    /// \param[in] sequence       C-string containing DNA sequence
-    /// \param[in] sequenceLength length of DNA sequence
-    /// \param[in] qualities      C-string containing 'phred-style' ASCII quality values
+    /// \param[in] sequence         C-string containing DNA sequence
+    /// \param[in] sequenceLength   length of DNA sequence
+    /// \param[in] qualities        C-string containing 'phred-style' ASCII
+    ///                             quality values
     ///
-    /// \note \p sequence does \b NOT have to be NULL-terminated. Length is explicitly
-    ///        determined by the value of \p sequenceLength provided.
+    /// \note \p sequence does \b NOT have to be NULL-terminated. Length is
+    ///       explicitly determined by the value of \p sequenceLength provided.
     ///
     /// \returns reference to this record.
     ///
@@ -335,41 +353,49 @@ public:
 
     /// \brief Sets the record's DNA sequence and quality values.
     ///
-    /// The \p encodedSequence should be preencoded/packed into the BAM binary format.
-    /// The \p qualities, if not empty, must consist of 'phred'-style ASCII quality values.
-    /// \p qualities may be an empty string or NULL pointer in cases where there are no
-    /// such data available.
+    /// The \p encodedSequence should be preencoded/packed into the BAM binary
+    /// format. The \p qualities, if not empty, must consist of 'phred'-style
+    /// ASCII quality values. \p qualities may be an empty string or NULL
+    /// pointer in cases where there are no such data available.
     ///
-    /// \param[in] encodedSequence   C-string containing BAM-format-encoded DNA sequence
-    /// \param[in] rawSequenceLength length of DNA sequence (not the encoded length)
-    /// \param[in] qualities         C-string containing 'phred-style' ASCII quality values
+    /// \param[in] encodedSequence      C-string containing BAM-format-encoded
+    ///                                 DNA sequence
+    /// \param[in] rawSequenceLength    length of DNA sequence (not the encoded
+    ///                                 length)
+    /// \param[in] qualities            C-string containing 'phred-style' ASCII
+    ///                                 quality values
     ///
-    /// \note \p encodedSequence does \b NOT have to be NULL-terminated. Length is explicitly
-    ///        determined by the value of \p sequenceLength provided.
+    /// \note \p encodedSequence does \b NOT have to be NULL-terminated. Length
+    ///       is explicitly determined by the value of \p sequenceLength
+    ///       provided.
     ///
     /// \returns reference to this record.
     ///
-    /// \sa SetSequenceAndQualities(const char* sequence, const size_t sequenceLength, const char* qualities)
+    /// \sa SetSequenceAndQualities(const char* sequence,
+    ///     const size_t sequenceLength, const char* qualities)
     ///
     BamRecordImpl& SetPreencodedSequenceAndQualities(const char* encodedSequence,
                                                      const size_t rawSequenceLength,
                                                      const char* qualities = 0);
 
+    /// \}
+
 public:
-    /** @name Tag Data
-     */
-    ///@{
+    /// \name Tag Data
+    /// \{
 
     /// \returns record's full tag data as a TagCollection object
     TagCollection Tags(void) const;
 
-    /// Sets the record's full tag data via a TagCollection object
+    /// \brief Sets the record's full tag data via a TagCollection object
+    ///
     BamRecordImpl& Tags(const TagCollection& tags);
 
-    /// Adds a new tag to this record.
+    /// \brief Adds a new tag to this record.
     ///
-    /// \param[in] tagName 2-character tag name.
-    /// \param[in] value Tag object that describes the type & value of data to be added
+    /// \param[in] tagName  2-character tag name.
+    /// \param[in] value    Tag object that describes the type & value of data
+    ///                     to be added
     ///
     /// \note Any value that can be used to implicitly construct a Tag is valid.
     /// \code
@@ -380,41 +406,95 @@ public:
     /// \endcode
     ///
     /// \returns true if tag was successfully added.
-    bool AddTag(const std::string& tagName, const Tag& value);
+    ///
+    bool AddTag(const std::string& tagName,
+                const Tag& value);
+
+    /// \brief Adds a new tag to this record, with an optional modifier.
+    ///
+    /// \param[in] tagName              2-character tag name.
+    /// \param[in] value                Tag object that describes the type &
+    ///                                 value of data to be added
+    /// \param[in] additionalModifier   optional extra modifier (for explicit
+    ///                                 modification of an otherwise const Tag)
+    ///
+    /// \note Any value that can be used to implicitly construct a Tag is valid.
+    /// \code
+    ///     char c;
+    ///     string h;
+    ///     record.AddTag("XX", c, TagModifier::ASCII_CHAR); // will add a char-type tag
+    ///     record.AddTag("YY", h, TagModifier::HEX_STRING); // will add a hex string-type tag
+    /// \endcode
+    ///
+    /// \returns true if tag was successfully added.
+    ///
+    bool AddTag(const std::string& tagName,
+                const Tag& value,
+                const TagModifier additionalModifier);
 
-    /// Edits an existing tag on this record.
+    /// \brief Edits an existing tag on this record.
     ///
-    /// \param[in] tagName 2-character tag name. Name must be present (see HasTag)
-    /// \param[in] newValue Tag object that describes the type & value of new data to be added
+    /// \param[in] tagName      2-character tag name. Name must be present
+    ///                         (see HasTag)
+    /// \param[in] newValue     Tag object that describes the type & value of
+    ///                         new data to be added
     ///
     /// \note Any value that can be used to implicitly construct a Tag is valid.
     /// \code
     ///     string s;
     ///     vector<uint32_t> v;
-    ///     record.EditTag("XX", s); // will overwrite tag XX with a string-type Tag
-    ///     record.EditTag("YY", v); // will overwrite tag YY with a uint32-array-type Tag
+    ///     record.EditTag("XX", s); // will overwrite tag XX with a string-type tag
+    ///     record.EditTag("YY", v); // will overwrite tag YY with a uint32-array-type tag
     /// \endcode
     ///
     /// \returns true if tag was successfully edited.
-    bool EditTag(const std::string& tagName, const Tag& newValue);
+    ///
+    bool EditTag(const std::string& tagName,
+                 const Tag& newValue);
+
+    /// \brief Edits an existing tag on this record.
+    ///
+    /// \param[in] tagName              2-character tag name. Name must be
+    ///                                 present (see HasTag)
+    /// \param[in] value                Tag object that describes the type &
+    ///                                 value of new data to be added
+    /// \param[in] additionalModifier   optional extra modifier (for explicit
+    ///                                 modification of an otherwise const Tag)
+    ///
+    /// \note Any value that can be used to implicitly construct a Tag is valid.
+    /// \code
+    ///     char c;
+    ///     string h;
+    ///     record.EditTag("XX", c, TagModifier::ASCII_CHAR); // will overwrite tag XX with a char-type tag
+    ///     record.EditTag("YY", h, TagModifier::HEX_STRING); // will overwrite tag YY with a hex string-type tag
+    /// \endcode
+    ///
+    /// \returns true if tag was successfully edited.
+    ///
+    bool EditTag(const std::string& tagName,
+                 const Tag& value,
+                 const TagModifier additionalModifier);
 
     /// \returns true if a tag with this name is present in this record.
     bool HasTag(const std::string& tagName) const;
 
-    /// Removes an existing tag from this record.
+    /// \brief Removes an existing tag from this record.
     ///
-    /// \param[in] tagName 2-character tag name.
+    /// \param[in] tagName  2-character tag name.
     ///
-    /// \returns true if tag was actaully removed (i.e. false if tagName previously unknown)
+    /// \returns true if tag was actaully removed (i.e. false if tagName
+    ///          previously unknown)
     /// \sa HasTag
+    ///
     bool RemoveTag(const std::string& tagName);
 
-    /// Fetches a tag from this record.
+    /// \brief Fetches a tag from this record.
     ///
-    /// \param[in] tagName 2-character tag name.
+    /// \param[in] tagName  2-character tag name.
+    ///
+    /// \returns Tag object for the requested name. If name is unknown, a
+    ///          default constructed Tag is returned (Tag::IsNull() is true).
     ///
-    /// \returns Tag object for the requested name. If name is unknown, a default constructed
-    ///          Tag is returned (Tag::IsNull() is true).
     Tag TagValue(const std::string& tagName) const;
 
     // change above to Tag();
@@ -423,7 +503,7 @@ public:
 //    T TagValue(const std::string& tagName) const;
 
 
-    ///@}
+    /// \}
 
 private:
     // returns a BamRecordImpl object, with a deep copy of @rawData contents
@@ -432,6 +512,15 @@ private:
     // internal memory setup/expand methods
     void InitializeData(void);
     void MaybeReallocData(void);
+    void UpdateTagMap(void) const; // allowed to be called from const methods
+                                   // (lazy update on request)
+
+    // internal tag helper methods
+    bool AddTagImpl(const std::string& tagName,
+                    const Tag& value,
+                    const TagModifier additionalModifier);
+    bool RemoveTagImpl(const std::string& tagName);
+    int TagOffset(const std::string& tagName) const;
 
     // core seq/qual logic shared by the public API
     BamRecordImpl& SetSequenceAndQualitiesInternal(const char* sequence,
@@ -443,180 +532,15 @@ private:
 
     // data members
     PBBAM_SHARED_PTR<bam1_t> d_;
+    mutable std::map<uint16_t, int> tagOffsets_;
 
     // friends
     friend class internal::BamRecordMemory;
 };
 
-inline uint32_t BamRecordImpl::Bin(void) const
-{ return d_->core.bin; }
-
-inline BamRecordImpl& BamRecordImpl::Bin(uint32_t bin)
-{ d_->core.bin = bin; return *this; }
-
-inline uint32_t BamRecordImpl::Flag(void) const
-{ return d_->core.flag; }
-
-inline BamRecordImpl& BamRecordImpl::Flag(uint32_t flag)
-{ d_->core.flag = flag; return *this; }
-
-inline int32_t BamRecordImpl::InsertSize(void) const
-{ return d_->core.isize; }
-
-inline BamRecordImpl& BamRecordImpl::InsertSize(int32_t iSize)
-{ d_->core.isize = iSize; return *this; }
-
-inline uint8_t BamRecordImpl::MapQuality(void) const
-{ return d_->core.qual; }
-
-inline BamRecordImpl& BamRecordImpl::MapQuality(uint8_t mapQual)
-{ d_->core.qual = mapQual; return *this; }
-
-inline PacBio::BAM::Position BamRecordImpl::MatePosition(void) const
-{ return d_->core.mpos; }
-
-inline BamRecordImpl& BamRecordImpl::MatePosition(PacBio::BAM::Position pos)
-{ d_->core.mpos = pos; return *this; }
-
-inline int32_t BamRecordImpl::MateReferenceId(void) const
-{ return d_->core.mtid; }
-
-inline BamRecordImpl& BamRecordImpl::MateReferenceId(int32_t id)
-{ d_->core.mtid = id; return *this; }
-
-inline PacBio::BAM::Position BamRecordImpl::Position(void) const
-{ return d_->core.pos; }
-
-inline BamRecordImpl& BamRecordImpl::Position(PacBio::BAM::Position pos)
-{ d_->core.pos = pos; return *this; }
-
-inline int32_t BamRecordImpl::ReferenceId(void) const
-{ return d_->core.tid; }
-
-inline BamRecordImpl& BamRecordImpl::ReferenceId(int32_t id)
-{ d_->core.tid = id; return *this; }
-
-inline bool BamRecordImpl::IsDuplicate(void) const
-{ return (d_->core.flag & BamRecordImpl::DUPLICATE) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetDuplicate(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::DUPLICATE;
-    else    d_->core.flag &= ~BamRecordImpl::DUPLICATE;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsFailedQC(void) const
-{ return (d_->core.flag & BamRecordImpl::FAILED_QC) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetFailedQC(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::FAILED_QC;
-    else    d_->core.flag &= ~BamRecordImpl::FAILED_QC;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsFirstMate(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_1) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetFirstMate(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::MATE_1;
-    else    d_->core.flag &= ~BamRecordImpl::MATE_1;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsMapped(void) const
-{ return (d_->core.flag & BamRecordImpl::UNMAPPED) == 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetMapped(bool ok)
-{
-    if (ok) d_->core.flag &= ~BamRecordImpl::UNMAPPED;
-    else    d_->core.flag |=  BamRecordImpl::UNMAPPED;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsMateMapped(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_UNMAPPED) == 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetMateMapped(bool ok)
-{
-    if (ok) d_->core.flag &= ~BamRecordImpl::MATE_UNMAPPED;
-    else    d_->core.flag |=  BamRecordImpl::MATE_UNMAPPED;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsMateReverseStrand(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_REVERSE_STRAND) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetMateReverseStrand(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::MATE_REVERSE_STRAND;
-    else    d_->core.flag &= ~BamRecordImpl::MATE_REVERSE_STRAND;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsPaired(void) const
-{ return (d_->core.flag & BamRecordImpl::PAIRED) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetPaired(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::PAIRED;
-    else    d_->core.flag &= ~BamRecordImpl::PAIRED;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsPrimaryAlignment(void) const
-{ return (d_->core.flag & BamRecordImpl::SECONDARY) == 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetPrimaryAlignment(bool ok)
-{
-    if (ok) d_->core.flag &= ~BamRecordImpl::SECONDARY;
-    else    d_->core.flag |=  BamRecordImpl::SECONDARY;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsProperPair(void) const
-{ return (d_->core.flag & BamRecordImpl::PROPER_PAIR) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetProperPair(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::PROPER_PAIR;
-    else    d_->core.flag &= ~BamRecordImpl::PROPER_PAIR;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsReverseStrand(void) const
-{ return (d_->core.flag & BamRecordImpl::REVERSE_STRAND) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetReverseStrand(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::REVERSE_STRAND;
-    else    d_->core.flag &= ~BamRecordImpl::REVERSE_STRAND;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsSecondMate(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_2) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetSecondMate(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::MATE_2;
-    else    d_->core.flag &= ~BamRecordImpl::MATE_2;
-    return *this;
-}
-
-inline bool BamRecordImpl::IsSupplementaryAlignment(void) const
-{ return (d_->core.flag & BamRecordImpl::SUPPLEMENTARY) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetSupplementaryAlignment(bool ok)
-{
-    if (ok) d_->core.flag |=  BamRecordImpl::SUPPLEMENTARY;
-    else    d_->core.flag &= ~BamRecordImpl::SUPPLEMENTARY;
-    return *this;
-}
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/BamRecordImpl.inl"
+
 #endif // BAMRECORDIMPL_H
diff --git a/include/pbbam/BamTagCodec.h b/include/pbbam/BamTagCodec.h
index 5aad239..9126900 100644
--- a/include/pbbam/BamTagCodec.h
+++ b/include/pbbam/BamTagCodec.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamTagCodec.h
+/// \brief Defines the BamTagCodec class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMTAGCODEC_H
@@ -45,28 +49,73 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The BamTagCodec class provides binary encoding/decoding of %BAM tag
+///        data.
+///
+/// \note BamTagCodec is mostly an implementation and/or testing detail, and may
+///       be removed from the public API.
+///
 class PBBAM_EXPORT BamTagCodec
 {
-
-// high-level, operate on a full collection
 public:
+    /// \name Tag Collection Methods
+    /// \{
+
+    /// \brief Creates a TagCollection from raw BAM data.
+    ///
+    /// \param[in] data     BAM-formatted (binary) tag data
+    /// \returns TagCollection containing tag data
+    ///
     static TagCollection Decode(const std::vector<uint8_t>& data);
+
+    /// \brief Creates binary BAM data from a TagCollection.
+    ///
+    /// \param[in] tags     TagCollection containing tag data
+    /// \returns vector of bytes (encoded BAM data)
+    ///
     static std::vector<uint8_t> Encode(const PacBio::BAM::TagCollection& tags);
 
-// per-tag methods
+    /// \}
+
 public:
+    /// \name Per-Tag Methods
+    /// \{
 
-    // returns the SAM/BAM single char code for tag type
-    static uint8_t TagTypeCode(const PacBio::BAM::Tag& tag);
+    /// \brief Determines the SAM/BAM tag code for a Tag.
+    ///
+    /// \param[in] tag                  Tag object to check
+    /// \param[in] additionalModifier   optional extra modifier (allows explicit
+    ///                                 modification of an otherwise const Tag)
+    ///
+    /// \returns the SAM/BAM single char code for tag type
+    ///
+    static uint8_t TagTypeCode(const PacBio::BAM::Tag& tag,
+                               const TagModifier& additionalModifier = TagModifier::NONE);
 
-    // returns the tag value's raw data in bytes
-    // NOTE: does *NOT* encode name & tag type. It does however,
-    // include the element type of an array tag
-    static std::vector<uint8_t> ToRawData(const PacBio::BAM::Tag& tag);
+    /// \brief Encodes a single Tag's contents in %BAM binary
+    ///
+    /// \note This method does \b NOT encode the tag name & tag type. It does
+    ///       include the element type for array-type tags.
+    ///
+    /// \param[in] tag                  Tag object containing data to encode
+    /// \param[in] additionalModifier   optional extra modifier (allows explicit
+    ///                                 modification of an otherwise const Tag)
+    ///
+    /// \returns vector of bytes (encoded BAM data)
+    ///
+    static std::vector<uint8_t> ToRawData(const PacBio::BAM::Tag& tag,
+                                          const TagModifier& additionalModifier = TagModifier::NONE);
 
-    // TODO: make this hidden a bit more, maybe this whole class in fact
-    // rawData should be the result of sam.h:bam_aux_get(...)
+    /// \brief Creates a Tag object from binary BAM data.
+    ///
+    /// \param[in] rawData      raw BAM bytes (assumed to be the result of
+    ///                         htslib's bam_aux_get())
+    ///
+    /// \returns resulting Tag object
+    ///
     static PacBio::BAM::Tag FromRawData(uint8_t* rawData);
+
+    /// \}
 };
 
 } // namespace BAM
diff --git a/include/pbbam/BamWriter.h b/include/pbbam/BamWriter.h
index e66df0a..3bbe2a5 100644
--- a/include/pbbam/BamWriter.h
+++ b/include/pbbam/BamWriter.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamWriter.h
+/// \brief Defines the BamWriter class.
+//
 // Author: Derek Barnett
 
 #ifndef BAMWRITER_H
@@ -51,13 +55,34 @@ class BamFile;
 
 namespace internal { class BamWriterPrivate; }
 
+/// \brief The BamWriter class provides a writing interface for creating
+///        new %BAM files.
+///
+/// \note The underlying buffered data may not be flushed to the file until the
+///       destructor is called. Trying to access the file (reading, stat-ing,
+///       indexing, etc.) before the BamWriter is destroyed yields undefined
+///       behavior. Enclose the BamWriter in some form of local scope (curly
+///       braces, a separate function, etc.) to ensure that its destructor is
+///       called before proceeding to read-based operations.
+///
+/// \code{.cpp}
+///  {
+///     BamWriter w(...);
+///     // write data
+///  }
+///  // now safe to access the new file
+/// \endcode
+///
+///
 class PBBAM_EXPORT BamWriter
 {
 public:
-    /// This enum allows you to control the compression level of the output BAM file.
+    /// \brief This enum allows you to control the compression level of the
+    ///        output %BAM file.
+    ///
+    /// Values are equivalent to zlib compression levels. See its documentation
+    /// for more details: http://www.zlib.net/manual.html
     ///
-    /// Values are equivalent to zlib compression levels. See its documentation for more details:
-    /// http://www.zlib.net/manual.html
     enum CompressionLevel
     {
         CompressionLevel_0 = 0
@@ -77,28 +102,54 @@ public:
       , BestCompression    = CompressionLevel_9
     };
 
+    /// \brief This enum allows you to control whether BAI bin numbers are
+    ///        calculated for output records.
+    /// 
+    /// For most cases, the default behavior (ON) should be retained for maximum
+    /// compatibility with downstream tools (e.g. samtools index). Disabling bin
+    /// calculation should only be used if all records are known to never be
+    /// mapped, and even then only if profiling revelas the calculation to
+    /// affect extremely performance-sensitive, "critical paths".
+    ///
+    enum BinCalculationMode
+    {
+        BinCalculation_ON = 0
+      , BinCalculation_OFF
+    };
+
 public:
 
     /// \name Constructors & Related Methods
     /// \{
 
-    /// Opens a BAM file for writing & writes the header information.
+    /// \brief Opens a %BAM file for writing & writes the header information.
     ///
     /// The error status will be set if either operation fails.
     ///
     /// \note Set \p filename to "-" for stdout.
     ///
-    /// \param[in] filename         path to output BAM file
+    /// \param[in] filename         path to output %BAM file
     /// \param[in] header           BamHeader object
     /// \param[in] compressionLevel zlib compression level
-    /// \param[in] numThreads       number of threads for compression.
-    ///            If set to 0, BamWriter will attempt to determine a reasonable estimate.
-    ///            If set to 1, this will force single-threaded execution.
-    ///            No checks are made against an upper limit.
+    /// \param[in] numThreads       number of threads for compression. If set to
+    ///                             0, BamWriter will attempt to determine a
+    ///                             reasonable estimate. If set to 1, this will
+    ///                             force single-threaded execution. No checks
+    ///                             are made against an upper limit.
+    ///
+    /// \param[in] binCalculationMode BAI bin calculation mode. The default
+    ///            behavior will ensure proper bin numbers are provided for all
+    ///            records written. This extra step may turned off when bin
+    ///            numbers are not needed. Though if in doubt, keep the default.
+    ///
+    /// \throws std::runtmie_error if there was a problem opening the file for
+    ///         writing or if an error occurred while writing the header
+    ///
     BamWriter(const std::string& filename,
               const BamHeader& header,
               const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
-              const size_t numThreads = 4);
+              const size_t numThreads = 4,
+              const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON);
 
     /// Fully flushes all buffered data & closes file.
     ~BamWriter(void);
@@ -108,33 +159,41 @@ public:
 public:
 
     /// \name Data Writing & Resource Management
+    /// \{
 
-    /// Try to flush any buffered data to file.
+    /// \brief Try to flush any buffered data to file.
     ///
-    /// \note The underlying implementation doesn't necessarily flush buffered data
-    ///       immediately, especially in a multithreaded writer situation.
+    /// \note The underlying implementation doesn't necessarily flush buffered
+    ///       data immediately, especially in a multithreaded writer situation.
     ///       Let the BamWriter go out of scope to fully ensure flushing.
     ///
-    /// \throws
+    /// \throws std::runtime_error if flush fails
+    ///
     void TryFlush(void);
 
-    /// Write a record to the output BAM file.
+    /// \brief Write a record to the output %BAM file.
     ///
     /// \param[in] record BamRecord object
+    ///
     /// \throws std::runtime_error on failure to write
+    ///
     void Write(const BamRecord& record);
 
-    /// Write a record to the output BAM file.
+    /// \brief Write a record to the output %BAM file.
     ///
     /// \param[in] record BamRecord object
     /// \param[out] vOffset BGZF virtual offset to start of \p record
+    ///
     /// \throws std::runtime_error on failure to write
+    ///
     void Write(const BamRecord& record, int64_t* vOffset);
 
-    /// Write a record to the output BAM file.
+    /// \brief Write a record to the output %BAM file.
     ///
     /// \param[in] recordImpl BamRecordImpl object
+    ///
     /// \throws std::runtime_error on failure to write
+    ///
     void Write(const BamRecordImpl& recordImpl);
 
     /// \}
diff --git a/include/pbbam/ZmwQuery.h b/include/pbbam/BarcodeQuery.h
similarity index 58%
copy from include/pbbam/ZmwQuery.h
copy to include/pbbam/BarcodeQuery.h
index fdd1d1d..3072ddf 100644
--- a/include/pbbam/ZmwQuery.h
+++ b/include/pbbam/BarcodeQuery.h
@@ -32,11 +32,15 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BarcodeQuery.h
+/// \brief Defines the BarcodeQuery class.
+//
 // Author: Derek Barnett
 
-#ifndef ZMWQUERY_H
-#define ZMWQUERY_H
+#ifndef BARCODEQUERY_H
+#define BARCODEQUERY_H
 
 #include "pbbam/Config.h"
 #include "pbbam/internal/QueryBase.h"
@@ -44,23 +48,50 @@
 
 namespace PacBio {
 namespace BAM {
-//namespace staging {
 
-class PBBAM_EXPORT ZmwQuery : public internal::IQuery
+/// \brief The BarcodeQuery class provides iterable access to a DataSet's %BAM
+///        records, limiting results to those matching a particular barcode.
+///
+/// Example:
+/// \include code/BarcodeQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+///       Use BamFile::EnsurePacBioIndexExists before creating the query if one
+///       may not be present.
+///
+class PBBAM_EXPORT BarcodeQuery : public internal::IQuery
 {
 public:
-    ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
-             const DataSet& dataset);
+    /// \brief Creates a new BarcodeQuery, limiting record results to only those
+    ///        annotated with a particular barcode ID.
+    ///
+    /// \param[in] barcode  filtering criteria
+    /// \param[in] dataset  input data source(s)
+    ///
+    /// \sa BamRecord::Barcodes
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+    ///         files.
+    ///
+    BarcodeQuery(const uint16_t barcode, const DataSet& dataset);
+
+    ~BarcodeQuery(void);
+
+public:
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& bamFile);
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
 
 private:
-    std::vector<int> whitelist_;
+    struct BarcodeQueryPrivate;
+    std::unique_ptr<BarcodeQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namespace PacBio
 
-#endif // ZMWQUERY_H
+#endif // BARCODEQUERY_H
diff --git a/include/pbbam/Cigar.h b/include/pbbam/Cigar.h
index 1e0bc46..c391057 100644
--- a/include/pbbam/Cigar.h
+++ b/include/pbbam/Cigar.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Cigar.h
+/// \brief Defines the Cigar class.
+//
 // Author: Derek Barnett
 
 #ifndef CIGAR_H
@@ -46,27 +50,39 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The Cigar class represents the CIGAR string used to report alignment
+///        charateristics in SAM/BAM.
+///
+/// \note Use of the 'M' operator is forbidden in PacBio BAMs. See
+///       CigarOperationType description for more information.
+///
+/// \sa https://samtools.github.io/hts-specs/SAMv1.pdf for more information on CIGAR in general.
+///
 class PBBAM_EXPORT Cigar : public std::vector<CigarOperation>
 {
-
 public:
-    /// \name Static Constructor
+    /// \name Constructors & Related Methods
     /// \{
 
-    /// Creates a Cigar object from SAM/BAM string input
+    /// \brief Creates a Cigar object from SAM/BAM string input
+    ///
+    /// \param [in] stdString   SAM/BAM formatted CIGAR data
+    /// \returns a Cigar object representing the input data
+    ///
+    /// \note This class may be removed from the public API in the future,
+    ///       as the constructor taking a std::string accomplishes the same end.
     ///
-    /// \param [in] stdString SAM/BAM formatted CIGAR data
-    /// \returns Cigar object representing the input data
     static Cigar FromStdString(const std::string& stdString);
 
-    /// \}
-
-public:
-    /// \name Constructors & Related Methods
-    /// \{
-
+    /// \brief Creates an empty Cigar.
     Cigar(void);
+
+    /// \brief Creates a Cigar object from SAM/BAM string input
+    ///
+    /// \param [in] cigarString   SAM/BAM formatted CIGAR data
+    ///
     Cigar(const std::string& cigarString);
+
     Cigar(const Cigar& other);
     Cigar(Cigar&& other);
     Cigar& operator=(const Cigar& other);
@@ -82,35 +98,15 @@ public:
     /// Converts Cigar object data to SAM/BAM formatted string
     ///
     /// \returns SAM/BAM formatted std::string
+    ///
     std::string ToStdString(void) const;
 
     /// \}
 };
 
-inline Cigar::Cigar(void)
-    : std::vector<CigarOperation>()
-{ }
-
-inline Cigar::Cigar(const Cigar& other)
-    : std::vector<CigarOperation>(other)
-{ }
-
-inline Cigar::Cigar(Cigar&& other)
-    : std::vector<CigarOperation>(std::move(other))
-{ }
-
-inline Cigar& Cigar::operator=(const Cigar& other)
-{ std::vector<CigarOperation>::operator= (other); return *this; }
-
-inline Cigar& Cigar::operator=(Cigar&& other)
-{ std::vector<CigarOperation>::operator= (std::move(other)); return *this; }
-
-inline Cigar::~Cigar(void) { }
-
-inline Cigar Cigar::FromStdString(const std::string& stdString)
-{ return Cigar(stdString); }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/Cigar.inl"
+
 #endif // CIGAR_H
diff --git a/include/pbbam/CigarOperation.h b/include/pbbam/CigarOperation.h
index 951128d..9b936ef 100644
--- a/include/pbbam/CigarOperation.h
+++ b/include/pbbam/CigarOperation.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file CigarOperation.h
+/// \brief Defines the CigarOperationType enum & CigarOperation class.
+//
 // Author: Derek Barnett
 
 #ifndef CIGAROPERATION_H
@@ -44,11 +48,16 @@
 namespace PacBio {
 namespace BAM {
 
-/// Describes a CIGAR operation. Bracketed character is the corresponding SAM/BAM character code.
+/// \brief Describes a CIGAR operation.
+///
+/// Bracketed character is the corresponding SAM/BAM character code.
 ///
-/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain consistency with htslib.
-/// However, as of PacBio BAM spec version 3.0b7, this CIGAR operation \b forbidden. Attempt to
-/// read or write a record containing this operation will trigger a std::runtime_error.
+/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain
+///          consistency with htslib. However, as of PacBio BAM spec version
+///          3.0b7, this CIGAR operation \b forbidden. Any attempt to read or
+///          write a record containing this operation will trigger a
+///          std::runtime_error. SEQUENCE_MATCH('=) or SEQUENCE_MISMATCH('X')
+///          should be used instead.
 ///
 enum class CigarOperationType
 {
@@ -62,11 +71,11 @@ enum class CigarOperationType
   , PADDING                ///< padding (silent deletion from padded reference) [P]
   , SEQUENCE_MATCH         ///< sequence match [=]
   , SEQUENCE_MISMATCH      ///< sequence mismatch [X]
-
-    // TODO: looks like there is a new 'B' type in htslib source, referring to some 'back' operation...
-    //       no reference in htslib docs though yet as to what that applies to
 };
 
+/// \brief The CigarOperation class represents a single CIGAR operation
+///        (consisting of a type & length).
+///
 class PBBAM_EXPORT CigarOperation
 {
 public:
@@ -157,70 +166,9 @@ private:
     uint32_t length_;
 };
 
-inline CigarOperation::CigarOperation(void)
-    : type_(CigarOperationType::UNKNOWN_OP)
-    , length_(0)
-{ }
-
-inline CigarOperation::CigarOperation(char c, uint32_t length)
-    : type_(CigarOperation::CharToType(c))
-    , length_(length)
-{
-    if (type_ == CigarOperationType::ALIGNMENT_MATCH)
-        throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
-}
-
-inline CigarOperation::CigarOperation(CigarOperationType op, uint32_t length)
-    : type_(op)
-    , length_(length)
-{
-    if (type_ == CigarOperationType::ALIGNMENT_MATCH)
-        throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
-}
-
-inline CigarOperation::CigarOperation(const CigarOperation& other)
-    : type_(other.type_)
-    , length_(other.length_)
-{ }
-
-inline CigarOperation::CigarOperation(CigarOperation&& other)
-    : type_(std::move(other.type_))
-    , length_(std::move(other.length_))
-{ }
-
-inline CigarOperation::~CigarOperation(void) { }
-
-inline uint32_t CigarOperation::Length(void) const
-{ return length_; }
-
-inline CigarOperation& CigarOperation::Length(const uint32_t length)
-{ length_ = length; return *this; }
-
-inline CigarOperationType CigarOperation::Type(void) const
-{ return type_; }
-
-inline CigarOperation &CigarOperation::Type(const CigarOperationType opType)
-{ type_ = opType; return *this; }
-
-inline char CigarOperation::Char(void) const
-{ return CigarOperation::TypeToChar(type_); }
-
-inline CigarOperation &CigarOperation::Char(const char opChar)
-{ type_ = CigarOperation::CharToType(opChar);return *this; }
-
-inline CigarOperation& CigarOperation::operator=(const CigarOperation& other)
-{ type_ = other.type_; length_ = other.length_; return *this; }
-
-inline CigarOperation& CigarOperation::operator=(CigarOperation&& other)
-{ type_ = std::move(other.type_); length_ = std::move(other.length_); return *this; }
-
-inline bool CigarOperation::operator==(const CigarOperation& other) const
-{ return type_ == other.type_ && length_ == other.length_; }
-
-inline bool CigarOperation::operator!=(const CigarOperation& other) const
-{ return !(*this == other); }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/CigarOperation.inl"
+
 #endif // CIGAROPERATION_H
diff --git a/include/pbbam/Compare.h b/include/pbbam/Compare.h
new file mode 100644
index 0000000..da44b48
--- /dev/null
+++ b/include/pbbam/Compare.h
@@ -0,0 +1,430 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.h
+/// \brief Defines the Compare class & a number of function objects for
+///       comparing BamRecords.
+//
+// Author: Derek Barnett
+
+#ifndef COMPARE_H
+#define COMPARE_H
+
+#include "pbbam/BamRecord.h"
+#include <functional>
+#include <string>
+#include <utility>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The Compare class provides utilities for sorting collections of
+///        BamRecords.
+///
+/// \note The functors provided here currently only support std::less<T>
+///       comparisons (i.e. sorting by ascending value).
+///
+/// \include code/Compare.txt
+///
+struct PBBAM_EXPORT Compare
+{
+public:
+
+    /// \name Comparison Type
+    /// \{
+
+    /// \brief This enum defines the supported comparison types
+    ///        { ==, !=, <, <=, >, >=, & (contains), ~ (not contains) }.
+    ///
+    enum Type {
+        EQUAL = 0
+      , NOT_EQUAL
+      , LESS_THAN
+      , LESS_THAN_EQUAL
+      , GREATER_THAN
+      , GREATER_THAN_EQUAL
+      , CONTAINS
+      , NOT_CONTAINS
+    };
+
+    /// \brief Convert operator string to Compare::Type.
+    ///
+    /// \include code/Compare_TypeFromOperator.txt
+    ///
+    /// \param[in] opString operator string. Can be C++-style operators
+    ///                     ("==", "!=", "<=", etc) or alpha equivalents
+    ///                     ("eq", "ne", "lte", etc).
+    ///
+    /// \returns comparison type from an operator string
+    /// \throws std::runtime_error if cannot convert opString to Compare::Type
+    /// \sa Compare::TypeToOperator
+    ///
+    static Compare::Type TypeFromOperator(const std::string& opString);
+
+    /// \brief Convert a Compare::Type to printable enum name.
+    ///
+    /// \include code/Compare_TypeToName.txt
+    ///
+    /// \param[in] type Compare::Type to convert
+    /// \returns the printable name for a Compare::Type enum value.are::Type
+    /// \throws std::runtime_error on unknown Compare::Type
+    ///
+    static std::string TypeToName(const Compare::Type& type);
+
+    /// \brief Convert a Compare::Type to printable operator.
+    ///
+    /// \param[in] type     Compare::Type to convert
+    /// \param[in] asAlpha  (optional) flag to print using alpha equivalents
+    ///                     e.g. "lte" rather than "<="
+    /// \returns the printable operator string
+    /// \throws std::runtime_error on unknown Compare::Type
+    ///
+    static std::string TypeToOperator(const Compare::Type& type,
+                                      bool asAlpha = false);
+
+    /// \}
+
+public:
+
+    /// \name Comparison Function Objects
+    /// \{
+
+    /// %Base class for all BamRecord compare functors.
+    ///
+    /// Mostly used for method signatures that can accept any comparator.
+    ///
+    /// Custom comparators may be used by inheriting from this class.
+    ///
+    struct Base : public std::function<bool(const BamRecord&, const BamRecord&)> { };
+
+private:
+    /// \internal
+    ///
+    /// Exists to provide the typedef we'll use in the actual
+    /// MemberFunctionBase, since we need to use it in the template signature.
+    /// This keeps that a lot easier to read.
+    ///
+    template<typename ValueType>
+    struct MemberFunctionBaseHelper : public Compare::Base
+    {
+        typedef ValueType (BamRecord::*MemberFnType)(void) const;
+    };
+
+public:
+    /// \brief %Base class for all BamRecord compare functors that take a
+    ///        BamRecord function pointer and compare on its return type.
+    ///
+    /// Derived comparators usually need only declare the return value &
+    /// function pointer in the template signature. This class implements the
+    /// basic method-calling machinery.
+    ///
+    /// Custom comparators will work for any BamRecord member function that does
+    /// not take any input parameters.
+    ///
+    template<typename ValueType,
+             typename MemberFunctionBaseHelper<ValueType>::MemberFnType fn,
+             typename CompareType = std::less<ValueType> >
+    struct MemberFunctionBase : public Compare::MemberFunctionBaseHelper<ValueType>
+    {
+        bool operator()(const BamRecord& lhs, const BamRecord& rhs) const;
+    };
+
+public:
+
+    /// \brief Compares on BamRecord::AlignedEnd.
+    ///
+    /// Example:
+    /// \include code/Compare_AlignedEnd.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct AlignedEnd : public MemberFunctionBase<Position, &BamRecord::AlignedEnd> { };
+
+    /// \brief Compares on BamRecord::AlignedStart.
+    ///
+    /// Example:
+    /// \include code/Compare_AlignedStart.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct AlignedStart : public MemberFunctionBase<Position, &BamRecord::AlignedStart> { };
+
+    /// \brief Compares on BamRecord::AlignedStrand
+    ///
+    /// Example:
+    /// \include code/Compare_AlignedStrand.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct AlignedStrand : public MemberFunctionBase<Strand, &BamRecord::AlignedStrand> { };
+
+    /// \brief Compares on BamRecord::BarcodeForward.
+    ///
+    /// Example:
+    /// \include code/Compare_BarcodeForward.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct BarcodeForward : public MemberFunctionBase<uint16_t, &BamRecord::BarcodeForward> { };
+
+    /// \brief Compares on BamRecord::BarcodeQuality.
+    ///
+    /// Example:
+    /// \include code/Compare_BarcodeQuality.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct BarcodeQuality : public MemberFunctionBase<uint8_t, &BamRecord::BarcodeQuality> { };
+
+    /// \brief Compares on BamRecord::BarcodeReverse.
+    ///
+    /// Example:
+    /// \include code/Compare_BarcodeReverse.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct BarcodeReverse: public MemberFunctionBase<uint16_t, &BamRecord::BarcodeReverse> { };
+
+    /// \brief Compares on BamRecord::FullName.
+    ///
+    /// Example:
+    /// \include code/Compare_FullName.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct FullName : public MemberFunctionBase<std::string, &BamRecord::FullName> { };
+
+    /// \brief Compares on BamRecord::LocalContextFlags.
+    ///
+    /// Example:
+    /// \include code/Compare_LocalContextFlag.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct LocalContextFlag : public MemberFunctionBase<LocalContextFlags, &BamRecord::LocalContextFlags> { };
+
+    /// \brief Compares on BamRecord::MapQuality.
+    ///
+    /// Example:
+    /// \include code/Compare_MapQuality.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct MapQuality : public MemberFunctionBase<uint8_t, &BamRecord::MapQuality> { };
+
+    /// \brief Compares on BamRecord::MovieName.
+    ///
+    /// Example:
+    /// \include code/Compare_MovieName.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct MovieName : public MemberFunctionBase<std::string, &BamRecord::MovieName> { };
+
+    /// \brief Provides an operator() is essentially a no-op for
+    ///        comparing/sorting.
+    ///
+    /// If used in a sorting operation, then no change will occur.
+    ///
+    struct None : public Compare::Base
+    {
+        bool operator()(const BamRecord&, const BamRecord&) const;
+    };
+
+    ///\brief Compares on BamRecord::NumDeletedBases.
+    ///
+    /// Example:
+    /// \include code/Compare_NumDeletedBases.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct NumDeletedBases : public MemberFunctionBase<size_t, &BamRecord::NumDeletedBases> { };
+
+    /// \brief Compares on BamRecord::NumInsertedBases.
+    ///
+    /// Example:
+    /// \include code/Compare_NumInsertedBases.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct NumInsertedBases : public MemberFunctionBase<size_t, &BamRecord::NumInsertedBases> { };
+
+    /// \brief Compares on BamRecord::NumMatches.
+    ///
+    /// Example:
+    /// \include code/Compare_NumMatches.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct NumMatches : public MemberFunctionBase<size_t, &BamRecord::NumMatches> { };
+
+    /// \brief Compares on BamRecord::NumMismatches.
+    ///
+    /// Example:
+    /// \include code/Compare_NumMismatches.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct NumMismatches : public MemberFunctionBase<size_t, &BamRecord::NumMismatches> { };
+
+    /// \brief Compares on BamRecord::QueryEnd.
+    ///
+    /// Example:
+    /// \include code/Compare_QueryEnd.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct QueryEnd : public MemberFunctionBase<Position, &BamRecord::QueryEnd> { };
+
+    /// \brief Compares on BamRecord::QueryStart.
+    ///
+    /// Example:
+    /// \include code/Compare_QueryStart.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct QueryStart : public MemberFunctionBase<Position, &BamRecord::QueryStart> { };
+
+    /// \brief Compares on BamRecord::ReadAccuracy.
+    ///
+    /// Example:
+    /// \include code/Compare_ReadAccuracy.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReadAccuracy : public MemberFunctionBase<Accuracy, &BamRecord::ReadAccuracy> { };
+
+    /// \brief Compares on BamRecord::ReadGroupId.
+    ///
+    /// \note Even though the ReadGroupId string contains hex values, it is
+    ///       still just a std::string. Comparisons will use lexical, not
+    ///       numeric ordering. If numeric ordering is desired, use
+    ///       Compare::ReadGroupNumericId instead.
+    ///
+    /// Example:
+    /// \include code/Compare_ReadGroupId.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReadGroupId : public MemberFunctionBase<std::string, &BamRecord::ReadGroupId> { };
+
+    /// \brief Compares on BamRecord::ReadGroupNumericId.
+    ///
+    /// Example:
+    /// \include code/Compare_ReadGroupNumericId.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReadGroupNumericId : public MemberFunctionBase<int32_t, &BamRecord::ReadGroupNumericId> { };
+
+    /// \brief Compares on BamRecord::ReferenceEnd.
+    ///
+    /// Example:
+    /// \include code/Compare_ReferenceEnd.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReferenceEnd : public MemberFunctionBase<Position, &BamRecord::ReferenceEnd> { };
+
+    /// \brief Compares on BamRecord::ReferenceId.
+    ///
+    /// Example:
+    /// \include code/Compare_ReferenceId.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReferenceId : public MemberFunctionBase<int32_t, &BamRecord::ReferenceId> { };
+
+    /// \brief Compares on BamRecord::ReferenceName.
+    ///
+    /// Example:
+    /// \include code/Compare_ReferenceName.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReferenceName : public MemberFunctionBase<std::string, &BamRecord::ReferenceName> { };
+
+    /// \brief Compares on BamRecord::ReferenceStart.
+    ///
+    /// Example:
+    /// \include code/Compare_ReferenceStart.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct ReferenceStart : public MemberFunctionBase<Position, &BamRecord::ReferenceStart> { };
+
+    /// \brief Compares on BamRecord::HoleNumber.
+    ///
+    /// Example:
+    /// \include code/Compare_Zmw.txt
+    ///
+    /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+    ///       ascending value).
+    ///
+    struct Zmw : public MemberFunctionBase<int32_t, &BamRecord::HoleNumber> { };
+
+    /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Compare.inl"
+
+#endif // COMPARE_H
diff --git a/include/pbbam/CompositeBamReader.h b/include/pbbam/CompositeBamReader.h
new file mode 100644
index 0000000..f0de942
--- /dev/null
+++ b/include/pbbam/CompositeBamReader.h
@@ -0,0 +1,269 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CompositeBamReader.h
+/// \brief Defines the composite BAM readers, for working with multiple input
+///       files.
+//
+// Author: Derek Barnett
+
+#ifndef COMPOSITEBAMREADER_H
+#define COMPOSITEBAMREADER_H
+
+#include "pbbam/BaiIndexedBamReader.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamReader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/DataSet.h"
+#include "pbbam/GenomicInterval.h"
+#include "pbbam/PbiIndexedBamReader.h"
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+/// \internal
+/// \brief The CompositeMergeItem class provides a helper struct for composite
+///        readers, containing a single-file reader and its "next" record.
+///
+struct CompositeMergeItem
+{
+public:
+    std::unique_ptr<BamReader> reader;
+    BamRecord record;
+
+public:
+    CompositeMergeItem(std::unique_ptr<BamReader>&& rdr);
+    CompositeMergeItem(std::unique_ptr<BamReader>&& rdr, BamRecord&& rec);
+    CompositeMergeItem(CompositeMergeItem&& other);
+    CompositeMergeItem& operator=(CompositeMergeItem&& other);
+    ~CompositeMergeItem(void);
+};
+
+/// \internal
+/// \brief The CompositeMergeItemSorter class provides a helper function object
+///        for ordering composite reader results.
+///
+/// Essentially just exracts a BamRecord from its parent CompositeMergeItem for
+/// further checks.
+///
+template<typename CompareType>
+struct CompositeMergeItemSorter : public std::function<bool(const CompositeMergeItem&,
+                                                            const CompositeMergeItem&)>
+{
+    bool operator()(const CompositeMergeItem& lhs,
+                    const CompositeMergeItem& rhs);
+};
+
+} // namespace internal
+
+/// \brief The GenomicIntervalCompositeBamReader class provides read access to
+///        multipe %BAM files, limiting results to a genomic region.
+///
+/// Requires a ".bai" file for each input %BAM file.
+///
+/// Results will be returned in order of genomic coordinate (first by reference
+/// ID, then by position).
+///
+class PBBAM_EXPORT GenomicIntervalCompositeBamReader
+{
+public:
+    /// \name Contstructors & Related Methods
+    /// \{
+
+    GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+                                      const std::vector<BamFile>& bamFiles);
+    GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+                                      std::vector<BamFile>&& bamFiles);
+    GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+                                      const DataSet& dataset);
+
+    /// \}
+
+public:
+    /// \name Data Access
+    /// \{
+
+    /// Fetches next BAM record in the interval specified, storing in \p record
+    ///
+    /// \param[out] record
+    /// \returns true on success, false if no more data available.
+    ///
+    bool GetNext(BamRecord& record);
+
+    /// Sets a new genomic interval of interest.
+    ///
+    /// \returns reference to this reader
+    ///
+    GenomicIntervalCompositeBamReader& Interval(const GenomicInterval& interval);
+
+    /// \returns the current specified interval
+    ///
+    const GenomicInterval& Interval(void) const;
+
+    /// \}
+
+private:
+    void UpdateSort(void);
+
+private:
+    GenomicInterval interval_;
+    std::deque<internal::CompositeMergeItem> mergeItems_;
+    std::vector<std::string> filenames_;
+};
+
+/// \brief Provides read access to multipe %BAM files, limiting results to those
+///        passing a PbiFilter.
+///
+/// Requires a ".pbi" file for each input %BAM file.
+///
+/// \note The template parameter OrderByType is not fully implemented at this
+///       time. Use of comparison functor (e.g. Compare::Zmw) for this will
+///       currently result in the proper "next" value <b> at each iteration
+///       step, independently, but not over the full data set. </b> If all
+///       files' "order-by" data values are accessible in increasing order
+///       within each file, then the expected ordering will be observed,
+///       However, if these data are not sorted within a file, the final results
+///       will appear unordered. \n
+///       \n
+///           Example:\n
+///           file 1: { 1, 5, 2, 6 } \n
+///           file 2: { 3, 8, 4, 7 } \n
+///           results: { 1, 3, 5, 2, 6, 8, 4, 7 } \n
+///       \n
+///       This a known issue and will be addressed in a future update. But in
+///       the meantime, use of Compare::None as the OrderByType is recommended,
+///       to explicitly indicate that no particular ordering is expected.
+///
+template<typename OrderByType>
+class PBBAM_EXPORT PbiFilterCompositeBamReader
+{
+public:
+    typedef internal::CompositeMergeItem                      value_type;
+    typedef internal::CompositeMergeItemSorter<OrderByType>   merge_sorter_type;
+    typedef std::deque<value_type>                            container_type;
+    typedef typename container_type::iterator                 iterator;
+    typedef typename container_type::const_iterator           const_iterator;
+
+public:
+    /// \name Contstructors & Related Methods
+    /// \{
+
+    PbiFilterCompositeBamReader(const PbiFilter& filter,
+                                const std::vector<BamFile>& bamFiles);
+    PbiFilterCompositeBamReader(const PbiFilter& filter,
+                                std::vector<BamFile>&& bamFiles);
+    PbiFilterCompositeBamReader(const PbiFilter& filter,
+                                const DataSet& dataset);
+
+    /// \}
+
+public:
+    /// \name Data Access
+    /// \{
+
+    /// Fetches next BAM record in the interval specified.
+    ///
+    /// \returns true on success, false if no more data available.
+    ///
+    bool GetNext(BamRecord& record);
+
+    /// Sets a new PBI filter
+    ///
+    /// \returns reference to this reader
+    ///
+    PbiFilterCompositeBamReader& Filter(const PbiFilter& filter);
+
+    /// \}
+
+private:
+    void UpdateSort(void);
+
+private:
+    container_type mergeQueue_;
+    std::vector<std::string> filenames_;
+};
+
+/// \brief The SequentialCompositeBamReader class provides read access to
+///        multiple %BAM files, reading through the entire contents of each
+///        file.
+///
+/// Input files will be accessed in the order provided to the constructor. Each
+/// file's contents will be exhausted before moving on to the next one (as
+/// opposed to a "round-robin" scheme).
+///
+class PBBAM_EXPORT SequentialCompositeBamReader
+{
+public:
+    /// \name Contstructors & Related Methods
+    /// \{
+
+    SequentialCompositeBamReader(const std::vector<BamFile>& bamFiles);
+    SequentialCompositeBamReader(std::vector<BamFile>&& bamFiles);
+    SequentialCompositeBamReader(const DataSet& dataset);
+
+    /// \}
+
+public:
+    /// \name Data Access
+    /// \{
+
+    /// Fetches next BAM record from the .
+    ///
+    /// \returns true on success, false if no more data available.
+    ///
+    bool GetNext(BamRecord& record);
+
+    /// \}
+
+private:
+    std::deque<std::unique_ptr<BamReader> > readers_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/CompositeBamReader.inl"
+
+#endif // COMPOSITEBAMREADER_H
diff --git a/include/pbbam/Config.h b/include/pbbam/Config.h
index 3d2b5d7..4fbc417 100644
--- a/include/pbbam/Config.h
+++ b/include/pbbam/Config.h
@@ -32,21 +32,20 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Config.h
+/// \brief Defines library-wide macros & global variables.
+//
 // Author: Derek Barnett
 
 #ifndef PBBAM_CONFIG_H
 #define PBBAM_CONFIG_H
 
-// --------------------------------
-// standard types
-// --------------------------------
-
 #include <cstdint>
 
-// -------------------------------------
-// library symbol import/export macros
-// -------------------------------------
+/// \name Library Import/Export
+/// \{
 
 #ifndef PBBAM_LIBRARY_EXPORT
 #  if defined(WIN32)
@@ -72,12 +71,14 @@
 #  endif
 #endif
 
-// ----------------------------------------------------
-// setup the shared_ptr implementation we'll be using
-// ----------------------------------------------------
+/// \}
+
+/// \name Shared Pointer Settings
+/// \{
 
 // uncomment this define, or pass via command-line (-DPBBAM_USE_BOOST_SHARED_PTR),
 // to use boost::shared_ptr<T> instead of std::shared_ptr<T>
+//
 //#define PBBAM_USE_BOOST_SHARED_PTR
 
 #ifdef PBBAM_USE_BOOST_SHARED_PTR
@@ -88,38 +89,88 @@
 #  define PBBAM_SHARED_PTR std::shared_ptr
 #endif
 
-// ----------------------------------------------------
-// htslib verbosity level
-// ----------------------------------------------------
+/// \}
 
-namespace PacBio {
-namespace BAM {
+/// \name Class Definition Helpers
+/// \{
 
-/// \brief Sets the desired verbosity level of htslib warnings.
+/// \brief Disables the use of copy constructors and assignment operators for a
+///        class.
 ///
-/// Change this value to allow debug/warning statements from htslib.
-/// The valid range seems to be [0-3], where 0->OFF, and 3->most verbose.
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+///     DISABLE_COPY(Foo);
+/// };
+/// \endcode
 ///
-extern int HtslibVerbosity;
-
-} // namespace BAM
-} // namespace PacBio
-
-// ----------------------------------------------------
-// additional helper macros
-// ----------------------------------------------------
-
 #ifndef DISABLE_COPY
 #define DISABLE_COPY(Class) \
     Class(const Class&); \
     Class& operator=(const Class&)
 #endif
 
+/// \brief Disables the use of move constructors and assignment operators for a
+///        class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+///     DISABLE_MOVE(Foo);
+/// };
+/// \endcode
+///
+#ifndef DISABLE_MOVE
+#define DISABLE_MOVE(Class) \
+    Class(Class&&); \
+    Class& operator=(Class&&);
+#endif
+
+/// \brief Disables the use of copy & move constructors and assignment operators f
+///        or a class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+///     DISABLE_MOVE_AND_COPY(Foo);
+/// };
+/// \endcode
+///
 #ifndef DISABLE_MOVE_AND_COPY
 #define DISABLE_MOVE_AND_COPY(Class) \
-    Class(Class&&); \
-    Class& operator=(Class&&); \
+    DISABLE_MOVE(Class) \
     DISABLE_COPY(Class)
 #endif
 
+/// \}
+
+namespace PacBio {
+namespace BAM {
+
+/// \name Verbosity Settings
+/// \{
+
+/// \brief Sets the desired verbosity level of htslib warnings.
+///
+/// Change this value to allow debug/warning statements from htslib itself.
+/// The valid range seems to be [0-3], where 0 indicates OFF, and 3 is the
+/// most verbose.
+///
+/// By default, pbbam disables htslib statements to keep output channels clean.
+/// We rely on exceptions & their associated messages instead.
+///
+/// This global variable is obviously not thread-safe by any means. But as a
+/// debug flag, it is unlikely to cause any real issues. The worst case would be
+/// unexpected presence/absence of output statements.
+///
+extern int HtslibVerbosity;
+
+/// \}
+
+} // namespace BAM
+} // namespace PacBio
+
 #endif // PBBAM_CONFIG_H
diff --git a/include/pbbam/DataSet.h b/include/pbbam/DataSet.h
index 21a6aa2..af1b14f 100644
--- a/include/pbbam/DataSet.h
+++ b/include/pbbam/DataSet.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSet.h
+/// \brief Defines the DataSet class.
+//
 // Author: Derek Barnett
 
 #ifndef DATASET_H
@@ -41,20 +45,31 @@
 #include "pbbam/BamFile.h"
 #include "pbbam/Config.h"
 #include "pbbam/DataSetTypes.h"
+#include <chrono>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
 
 namespace PacBio {
 namespace BAM {
 
+/// \brief The DataSet class represents a %PacBio analyis dataset (e.g. from
+///        XML).
+///
+/// \nosubgrouping
+///
+/// It provides resource paths, filters, and metadata associated with a dataset
+/// under analysis.
+///
 class PBBAM_EXPORT DataSet
 {
 public:
-
-    /// \name DataSet Types
+    /// \name DataSet Type
     /// \{
 
+    /// \brief This enum defines the currently-supported DataSet types.
+    ///
     enum TypeEnum {
         GENERIC = 0
       , ALIGNMENT
@@ -67,9 +82,22 @@ public:
       , SUBREAD
     };
 
+    /// \brief Converts printable dataset type to type enum.
+    ///
+    /// \param[in] typeName printable dataset type
+    /// \returns dataset type enum
+    /// \throws std::runtime_error if \p typeName is unknown
+    ///
     static DataSet::TypeEnum NameToType(const std::string& typeName);
 
+    /// \brief Converts dataset type enum to printable name.
+    ///
+    /// \param[in] type dataset type enum
+    /// \returns printable dataset type
+    /// \throws std::runtime_error if \p type is unknown
+    ///
     static std::string TypeToName(const DataSet::TypeEnum& type);
+
     /// \}
 
 public:
@@ -77,17 +105,57 @@ public:
     /// \name Constructors & Related Methods
     /// \{
 
+    /// \brief Constructs an empty, generic DataSet.
+    ///
     DataSet(void);
+
+    /// \brief Constructs an empty DataSet of the type specified.
+    ///
+    /// \param[in] type dataset type
+    /// \throws std::runtime_error if \p type is unknown
+    ///
     DataSet(const DataSet::TypeEnum type);
+
+    /// \brief Constructs a DataSet from a %BAM file.
+    ///
+    /// This currently defaults to a SubreadSet, with an ExternalResource
+    /// pointing to BamFile::Filename.
+    ///
+    /// \param[in] bamFile  BamFile object
+    ///
     DataSet(const BamFile& bamFile);
+
+    /// \brief Loads a DataSet from a file.
+    ///
+    /// \p filename may be one of three types, indicated by its extension:\n
+    ///  - %BAM ("*.bam") \n
+    ///  - FOFN ("*.fofn") \n
+    ///  - DataSetXML ("*.xml") \n
+    ///
+    /// \param[in] filename  input filename
+    /// \throws std::runtime_error if \p filename has an unsupported extension,
+    ///         or if a valid DataSet could not be created from its contents
+    ///
     DataSet(const std::string& filename);
+
+    /// \brief Constructs a DataSet from a list of files.
+    ///
+    /// \param[in] filenames  input filenames
+    /// \throws std::runtime_error if DataSet could not be created from
+    ///         \p filenames
+    ///
+    DataSet(const std::vector<std::string>& filenames);
+
     DataSet(const DataSet& other);
     DataSet(DataSet&& other);
     DataSet& operator=(const DataSet& other);
     DataSet& operator=(DataSet&& other);
     ~DataSet(void);
 
-    /// Creates a DataSet from "raw" XML data.
+    /// \brief Creates a DataSet from "raw" XML data.
+    ///
+    /// \param[in] xml DataSetXML text
+    ///
     static DataSet FromXml(const std::string& xml);
 
     /// \}
@@ -96,37 +164,149 @@ public:
     /// \name Operators
     /// \{
 
+    /// \brief Merges DataSet contents.
+    ///
+    /// Adds contents of \p other to this dataset object
+    ///
+    /// \param[in] other  some other dataset to add to this one
+    /// \returns reference to this dataset object
+    ///
     DataSet& operator+=(const DataSet& other);
 
     /// \}
 
 public:
+    /// \name Serialization
+    /// \{
+
+    /// \brief Saves dataset XML to file.
+    ///
+    /// \param[in] outputFilename destination for XML contents
+    ///
+    /// \throws std::runtime_error if file could be opened or if DataSet
+    ///         elements could not be converted to XML
+    ///
     void Save(const std::string& outputFilename);
+
+    /// \brief Saves dataset XML to output stream, e.g. std::cout,
+    ///        std::stringstream.
+    ///
+    /// \param[out] out destination for XML contents
+    ///
+    /// \throws std::runtime_error if DataSet elements could not be converted to
+    ///         XML
+    ///
     void SaveToStream(std::ostream& out);
 
+    /// \}
+
 public:
 
     /// \name Attributes
     /// \{
     ///
 
+    /// \brief Fetches the value of a DataSet root element's attribute.
+    ///
+    /// These are the attributes attached to the root dataset element: \n
+    /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+    ///
+    /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+    /// but additional attributes can be used as well via these generic
+    /// Attribute methods.
+    ///
+    /// \param[in] name root element's attribute name
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& Attribute(const std::string& name) const;
-    std::string& Attribute(const std::string& name);
-    DataSet& Attribute(const std::string& name, const std::string& value);
 
+    /// \brief Fetches the value of dataset's CreatedAt attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& CreatedAt(void) const;
-    const PacBio::BAM::Extensions& Extensions(void) const;
+
+    /// \brief Fetches the value of dataset's Format attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& Format(void) const;
+
+    /// \brief Fetches the value of dataset's MetaType attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& MetaType(void) const;
+
+    /// \brief Fetches the value of dataset's ModifiedAt attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& ModifiedAt(void) const;
+
+    /// \brief Fetches the value of dataset's Name attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& Name(void) const;
+
+    /// \brief Fetches the value of dataset's ResourceId attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& ResourceId(void) const;
+
+    /// \brief Fetches the value of dataset's Tags attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& Tags(void) const;
+
+    /// \brief Fetches the value of dataset's TimeStampedName attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& TimeStampedName(void) const;
+
+    /// \brief Fetches the value of dataset's UniqueId attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& UniqueId(void) const;
+
+    /// \brief Fetches the value of dataset's Version attribute.
+    ///
+    /// \returns const reference to attribute's value (empty string if not
+    ///          present)
+    ///
     const std::string& Version(void) const;
 
+    /// \}
+
+public:
+    /// \name DataSet Type
+    /// \{
+
+    /// \brief Fetches the dataset's type.
+    ///
+    /// \returns dataset type enum
+    ///
     PacBio::BAM::DataSet::TypeEnum Type(void) const;
+
+    /// \brief Fetches the dataset's type.
+    ///
+    /// \returns printable dataset type
+    ///
     std::string TypeName(void) const;
 
     /// \}
@@ -135,17 +315,101 @@ public:
     /// \name Child Elements
     /// \{
 
+    /// \brief Fetches the dataset's Extensions element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
+    const PacBio::BAM::Extensions& Extensions(void) const;
+
+    /// \brief Fetches the dataset's ExternalResources element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
     const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+    /// \brief Fetches the dataset's Filters element.
+    ///
+    /// \returns const reference to child element
+    ///
     const PacBio::BAM::Filters& Filters(void) const;
+
+    /// \brief Fetches the dataset's DataSetMetadata element.
+    ///
+    /// \returns const reference to child element
+    ///
     const PacBio::BAM::DataSetMetadata& Metadata(void) const;
+
+    /// \brief Fetches the dataset's DataSets element.
+    ///
+    /// \returns const reference to child element
+    ///
     const PacBio::BAM::SubDataSets& SubDataSets(void) const;
 
     /// \}
 
 public:
+    /// \name Resource Handling
+    /// \{
+
+    /// \brief Returns this dataset's primary %BAM resources, with relative
+    ///        filepaths already resolved.
+    ///
+    /// Primary resources are those listed as top-level %ExternalResources, not
+    /// associated files (indices, references, scraps %BAMs, etc.).
+    ///
+    /// \returns vector of BamFiles
+    ///
+    /// \sa DataSet::ResolvedResourceIds
+    ///
+    std::vector<BamFile> BamFiles(void) const;
+
+    /// \brief Returns all primary external resource filepaths, with relative
+    ///        paths resolved.
+    ///
+    /// Primary resources are those listed as top-level %ExternalResources, not
+    /// associated files (indices, references, scraps %BAMs, etc.).
+    ///
+    /// \sa ResolvePath
+    ///
+    /// \returns resourceIds
+    ///
+    std::vector<std::string> ResolvedResourceIds(void) const;
+
+    /// \brief Resolves a filepath (that may be relative to the dataset).
+    ///
+    /// A DataSet's resources may be described using absolute filepaths or with
+    /// relative paths. For absolute paths, nothing is changed from the input.
+    /// For relative paths, these are resolved using the DataSet's own path
+    /// as a starting point. A DataSet's own path will be one of:\n
+    ///  1 - the location of its XML or %BAM input file, e.g. created using
+    ///      DataSet("foo.xml") or DataSet("foo.bam")\n
+    ///  2 - application's current working directory for all other DataSet
+    ///      construction methods { DataSet(), DataSet(type),
+    ///      DataSet("foo.fofn") }\n
+    ///
+    /// \param[in] originalPath     input file path (absolute or relative)
+    /// \returns resolved path
+    ///
+    std::string ResolvePath(const std::string& originalPath) const;
+
+    /// \returns sequence chemistry info for all read groups in this dataset
+    ///
+    /// \sa ReadGroupInfo::SequencingChemistry
+    ///
+    std::set<std::string> SequencingChemistries(void) const;
+
+    /// \}
+
+public:
     /// \name XML Namespace Handling
     /// \{
 
+    /// \brief Access this dataset's namespace info.
+    ///
+    /// \returns const reference to dataset's NamespaceRegistry
+    ///
     const NamespaceRegistry& Namespaces(void) const;
 
     /// \}
@@ -154,30 +418,235 @@ public:
     /// \name Attributes
     /// \{
 
+    /// \brief Fetches the value of a DataSet root element's attribute.
+    ///
+    /// These are the attributes attached to the root dataset element: \n
+    /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+    ///
+    /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+    /// but additional attributes can be used as well via these generic methods.
+    ///
+    /// A new attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] name root element's attribute name
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
+    std::string& Attribute(const std::string& name);
+
+    /// \brief Fetches the value of dataset's CreatedAt attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& CreatedAt(void);
-    PacBio::BAM::Extensions& Extensions(void);
+
+    /// \brief Fetches the value of dataset's Format attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& Format(void);
+
+    /// \brief Fetches the value of dataset's MetaType attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& MetaType(void);
+
+    /// \brief Fetches the value of dataset's ModifiedAt attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& ModifiedAt(void);
+
+    /// \brief Fetches the value of dataset's Name attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& Name(void);
+
+    /// \brief Fetches the value of dataset's ResourceId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& ResourceId(void);
+
+    /// \brief Fetches the value of dataset's Tags attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& Tags(void);
+
+    /// \brief Fetches the value of dataset's TimeStampedName attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& TimeStampedName(void);
+
+    /// \brief Fetches the value of dataset's UniqueId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& UniqueId(void);
+
+    /// \brief Fetches the value of dataset's Version attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute's value (empty string if this
+    ///          is a new attribute)
+    ///
     std::string& Version(void);
     
+    /// \}
+
+public:
+    /// \name Attributes
+    /// \{
+
+    /// \brief Sets this dataset's XML attribute \p name, with \p value
+    ///
+    /// These are the attributes attached to the root dataset element: \n
+    /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+    ///
+    /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+    /// but additional attributes can be used as well via these generic methods.
+    ///
+    /// The attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] name   root element's attribute name
+    /// \param[in] value  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
+    DataSet& Attribute(const std::string& name, const std::string& value);
+
+    /// \brief Sets this dataset's CreatedAt attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] createdAt  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& CreatedAt(const std::string& createdAt);
-    DataSet& Extensions(const PacBio::BAM::Extensions& extensions);
+
+    /// \brief Sets this dataset's Format attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] format  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& Format(const std::string& format);
+
+    /// \brief Sets this dataset's MetaType attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] metatype  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& MetaType(const std::string& metatype);
+
+    /// \brief Sets this dataset's ModifiedAt attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] modifiedAt  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& ModifiedAt(const std::string& modifiedAt);
+
+    /// \brief Sets this dataset's Name attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] name  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& Name(const std::string& name);
+
+    /// \brief Sets this dataset's ResourceId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] resourceId  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& ResourceId(const std::string& resourceId);
+
+    /// \brief Sets this dataset's Tags attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] tags  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& Tags(const std::string& tags);
+
+    /// \brief Sets this dataset's TimeStampedName attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] timeStampedName  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& TimeStampedName(const std::string& timeStampedName);
+
+    /// \brief Sets this dataset's UniqueId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] uuid  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& UniqueId(const std::string& uuid);
+
+    /// \brief Sets this dataset's Version attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] version  new value for the attribute
+    /// \returns reference to this dataset object
+    ///
     DataSet& Version(const std::string& version);
 
+    /// \}
+
+public:
+    /// \name DataSet Type
+    /// \{
+
+    /// \brief Edits dataset type.
+    ///
+    /// \param[in] type  new dataset type
+    /// \returns reference to this dataset object
+    ///
     DataSet& Type(const PacBio::BAM::DataSet::TypeEnum type);
 
     /// \}
@@ -186,14 +655,95 @@ public:
     /// \name Child Elements
     /// \{
 
+    /// \brief Fetches the dataset's Extensions element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
+    PacBio::BAM::Extensions& Extensions(void);
+
+    /// \brief Fetches the dataset's ExternalResources element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::ExternalResources& ExternalResources(void);
+
+    /// \brief Fetches the dataset's Filters element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::Filters& Filters(void);
+
+    /// \brief Fetches the dataset's DataSetMetadata element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::DataSetMetadata& Metadata(void);
+
+    /// \brief Fetches the dataset's DataSets element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::SubDataSets& SubDataSets(void);
 
+    /// \}
+
+public:
+    /// \name Child Elements
+    /// \{
+
+    /// \brief Sets this dataset's Extensions element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] extensions  new value for the element
+    /// \returns reference to this dataset object
+    ///
+    DataSet& Extensions(const PacBio::BAM::Extensions& extensions);
+
+    /// \brief Sets this dataset's ExternalResources element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] resources  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSet& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+    /// \brief Sets this dataset's Filters element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] filters  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSet& Filters(const PacBio::BAM::Filters& filters);
+
+    /// \brief Sets this dataset's DataSetMetadata element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] metadata  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSet& Metadata(const PacBio::BAM::DataSetMetadata& metadata);
+
+    /// \brief Sets this dataset's DataSets element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] subdatasets  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSet& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
     
     /// \}
@@ -202,14 +752,66 @@ public:
     /// \name XML Namespace Handling
     /// \{
 
+    /// \brief Access this dataset's namespace info.
+    ///
+    /// \returns non-const reference to dataset's NamespaceRegistry
+    ///
     NamespaceRegistry& Namespaces(void);
 
     /// \}
 
 private:
     std::unique_ptr<DataSetBase> d_;
+    std::string path_;
 };
 
+/// \name DataSet Timestamp Utilities
+/// \{
+
+/// \brief Fetches current time, in "DataSetXML format".
+///
+/// \returns DataSetXML formatted timestamp
+///
+/// \sa ToDataSetFormat
+///
+PBBAM_EXPORT std::string CurrentTimestamp(void);
+
+/// \brief Converts a time_point to "DataSetXML-formatted" timestamp.
+///
+/// This is the format used as a component of the DataSet::TimeStampedName
+/// (yymmdd_HHmmssttt>.
+///
+/// \returns "DataSetXML-formatted" timestamp
+///
+PBBAM_EXPORT std::string ToDataSetFormat(const std::chrono::system_clock::time_point& tp);
+
+/// \brief Converts a time_t to "DataSetXML-formatted" timestamp.
+///
+/// This is the format used as a component of the DataSet::TimeStampedName
+/// (yymmdd_HHmmssttt>.
+///
+/// \returns "DataSetXML-formatted" timestamp
+///
+PBBAM_EXPORT std::string ToDataSetFormat(const time_t& tp);
+
+/// \brief Converts a time_point to ISO-8601 formatted timestamp.
+///
+/// This is the format used in DataSet::CreatedAt and DataSet::ModifiedAt.
+///
+/// \returns ISO-8601 formatted timestamp
+///
+PBBAM_EXPORT std::string ToIso8601(const std::chrono::system_clock::time_point& tp);
+
+/// \brief Converts a time_t to ISO-8601 formatted timestamp.
+///
+/// This is the format used in DataSet::CreatedAt and DataSet::ModifiedAt.
+///
+/// \returns ISO-8601 formatted timestamp
+///
+PBBAM_EXPORT std::string ToIso8601(const time_t& t);
+
+/// \}
+
 } // namespace BAM
 } // namespace PacBio
 
diff --git a/include/pbbam/DataSetTypes.h b/include/pbbam/DataSetTypes.h
index dd4c496..23df643 100644
--- a/include/pbbam/DataSetTypes.h
+++ b/include/pbbam/DataSetTypes.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetTypes.h
+/// \brief Defines the public DataSet component classes.
+//
 // Author: Derek Barnett
 
 #ifndef DATASETTYPES_H
@@ -47,263 +51,848 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The DataSetMetadata class represents the %DataSetMetadata child
+///        element in DataSetXML.
+///
+/// A few top-level elements are built-in, but as pbbam is not primarily a
+/// DataSetXML API, most of the metadata hierarchy needs to be manually managed.
+///
 class PBBAM_EXPORT DataSetMetadata : public internal::DataSetElement
 {
 public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Constructs a DataSetMetadata with required fields.
     DataSetMetadata(const std::string& numRecords,
                     const std::string& totalLength);
 
+    /// \}
+
 public:
+    /// \name Operators
+    /// \{
+
+    /// \brief Merges DataSetMetadata contents.
+    ///
+    /// Adds contents of \p other to this metadata object
+    ///
+    /// \param[in] other  some other metadata to add to this one
+    /// \returns reference to this object
+    ///
     DataSetMetadata& operator+=(const DataSetMetadata& other);
 
+    /// \}
+
 public:
+    /// \name Child Elements
+    /// \{
+
+    /// \brief Fetches the text of the NumRecords element.
+    ///
+    /// \returns const reference to element text (empty string if not present)
+    ///
     const std::string& NumRecords(void) const;
+
+    /// \brief Fetches the text of the TotalLength element.
+    ///
+    /// \returns const reference to element text (empty string if not present)
+    ///
     const std::string& TotalLength(void) const;
+
+    /// \brief Fetches the Provenance element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
     const PacBio::BAM::Provenance& Provenance(void) const;
 
+    /// \}
+
+public:
+    /// \name Child Elements
+    /// \{
+
+    /// \brief Fetches the text of the NumRecords element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to element text
+    ///
     std::string& NumRecords(void);
+
+    /// \brief Fetches the text of the TotalLength element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to element text
+    ///
     std::string& TotalLength(void);
+
+    /// \brief Fetches Provenance element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::Provenance& Provenance(void);
 
+    /// \}
+
+public:
+    /// \name Child Elements
+    /// \{
+
+    /// \brief Sets the text of the NumRecords element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns reference to this metadata object
+    ///
     DataSetMetadata& NumRecords(const std::string& numRecords);
+
+    /// \brief Sets the text of the TotalLength element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns reference to this metadata object
+    ///
     DataSetMetadata& TotalLength(const std::string& totalLength);
+
+    /// \brief Sets the Provenance child element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns reference to this metadata object
+    ///
     DataSetMetadata& Provenance(const PacBio::BAM::Provenance& provenance);
+
+    /// \}
 };
 
+/// \brief The ExtensionElement class represents an %ExtensionElement element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT ExtensionElement : public internal::DataSetElement  {
 public:
     ExtensionElement(void);
 };
 
+/// \brief The Extensions class represents an %Extensions element in DataSetXML.
+///
+/// The Extensions element is essentially just a list of ExtensionElement
+/// objects.
+///
 class PBBAM_EXPORT Extensions : public internal::DataSetListElement<ExtensionElement>
 {
 public:
+    /// \brief Creates an empty extensions list.
     Extensions(void);
 };
 
+class ExternalResources;
+
+/// \brief The ExternalResource class represents an %ExternalResource element in
+///        DataSetXML.
+///
+/// An ExternalResource can itself have a child element, ExternalResources, that
+/// lists related files (e.g. index files).
+///
 class PBBAM_EXPORT ExternalResource : public internal::IndexedDataType
 {
 public:
-    ExternalResource(void);
+    /// \brief Creates an ExternalResource from a BamFile object.
+    ///
+    /// The metatype & resourceId are automatically set.
+    ///
     ExternalResource(const BamFile& bamFile);
+
+    /// \brief Creates an ExternalResource with provided \p metatype and
+    ///        \p filename as resource ID.
+    ///
     ExternalResource(const std::string& metatype,
                      const std::string& filename);
 
 public:
+    /// \brief Fetches the resource's ExternalResources child element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
+    const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+public:
+    /// \brief Fetches the resource's ExternalResources child element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
+    PacBio::BAM::ExternalResources& ExternalResources(void);
+
+    /// \brief Sets this resource's ExternalResources child element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] resources  new value for the element
+    /// \returns reference to this resource object
+    ///
+    ExternalResource& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+public:
+    /// \brief Converts an ExternalResource to a BamFile object
+    ///
+    /// \returns corresponding BamFile object for this ExternalResource
+    /// \throws std::runtime_error if fails to open %BAM file (e.g. does not
+    ///         exist, not a %BAM file, etc.)
+    ///
+    /// \deprecated Use the results from DataSet::BamFiles instead. This method
+    ///             cannot resolve relative filepaths and will be removed in the
+    ///             near future.
+    ///
     BamFile ToBamFile(void) const;
 };
 
+/// \brief The ExternalResources class represents an %ExternalResources element
+///        in DataSetXML.
+///
+/// The ExternalResources element is essentially just a list of ExternalResource
+/// elements.
+///
 class PBBAM_EXPORT ExternalResources : public internal::DataSetListElement<ExternalResource>
 {
 public:
+    /// \brief Creates an empty resource list.
     ExternalResources(void);
 
+    /// \brief Merges \p other resource list with this one.
     ExternalResources& operator+=(const ExternalResources& other);
 
 public:
+    /// \brief Adds an ExternalResource to this list.
     void Add(const ExternalResource& ext);
+
+    /// \brief Removes an ExternalResource from this list.
     void Remove(const ExternalResource& ext);
 
 public:
+    /// \brief Converts resource list to BamFile objects.
+    ///
+    /// \deprecated Use DataSet::BamFiles instead. This method cannot resolve
+    ///             relative filepaths and will be removed in the near future.
+    ///
     std::vector<BamFile> BamFiles(void) const;
 };
 
+/// \brief The FileIndex class represents a %FileIndex element in DataSetXML.
+///
+/// A FileIndex is used as an auxiliary to an ExternalResource, providing
+/// information about a data file's index file (e.g. for %BAM files, *.bai or
+/// *.pbi).
+///
 class PBBAM_EXPORT FileIndex : public internal::InputOutputDataType
 {
 public:
-    FileIndex(void);
+    /// \brief Creates a FileIndex with provided \p metatype and \p filename as
+    ///        resource ID.
+    ///
+    FileIndex(const std::string& metatype, 
+              const std::string& filename);
 };
 
+/// \brief The FileIndices class represents a %FileIndices element in DataSetXML.
+///
+/// The FileIndices element is essentially just a list of FileIndex elements,
+/// providing information about a data file's index files (e.g. for %BAM files
+/// this will usually be *.bai and/or *.pbi).
+///
 class PBBAM_EXPORT FileIndices : public internal::DataSetListElement<FileIndex>
 {
 public:
+    /// \brief Creates an empty index list.
     FileIndices(void);
 
+public:
+    /// \brief Adds a FileIndex to this list.
     void Add(const FileIndex& index);
+
+    /// \brief Removes a FileIndex from this list.
     void Remove(const FileIndex& index);
 };
 
+/// \brief The Filter class represents a %Filter element in DataSetXML.
+///
+/// The Filter element allows analysis pipelines to describe filters on data
+/// that should be respected downstream, without needing to create filtered
+/// intermediate files.
+///
+/// A filter consists of a list of Property elements, each of which must be
+/// passed (logical AND) to pass the filter, e.g. property1 && property2 &&
+/// property3.
+///
 class PBBAM_EXPORT Filter : public internal::DataSetElement
 {
 public:
+    /// \brief Creates an empty filter.
     Filter(void);
 
 public:
+    /// \brief Fetches the filter's property list element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
     const PacBio::BAM::Properties& Properties(void) const;
+
+public:
+    /// \brief Fetches the filter's property list child element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::Properties& Properties(void);
+
+    /// \brief Sets this filter's Properties child element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] properties new value for the element
+    /// \returns reference to this filter object
+    ///
     Filter& Properties(const PacBio::BAM::Properties& properties);
 };
 
+/// \brief The Filters class represents a %Filters list element in DataSetXML.
+///
+/// The Filters element is essentially a list of Filter elements. For analysis
+/// purpose, each filter is considered separately (logical OR) to consider which
+/// data passes, e.g. filter1 || filter2 || filter3.
+///
 class PBBAM_EXPORT Filters : public internal::DataSetListElement<Filter>
 {
 public:
+    /// \brief Creates an empty filter list.
     Filters(void);
 
+    /// \brief Merges \p other filter list with this one.
     Filters& operator+=(const Filters& other);
 
+public:
+    /// \brief Adds a filter to this list.
     void Add(const Filter& filter);
+
+    /// \brief Removes a filter from this list.
     void Remove(const Filter& filter);
 };
 
+/// \brief The ParentTool class represents a %ParentTool element in DataSetXML.
+///
 class PBBAM_EXPORT ParentTool : public internal::BaseEntityType {
 public:
+    /// \brief Creates an empty %ParentTool element.
     ParentTool(void);
 };
 
+/// \brief The Property class represents a %Property element in DataSetXML.
+///
+/// A Property is the primary building block of %DataSetXML filtering. The
+/// %Property element describes a data record's property (or field), some value,
+/// and a comparison operator.
+///
+/// For example, one could filter all %BAM records with a read accuracy at or
+/// above 0.9. In C++ this could be constructed like:
+/// \code{.cpp}
+/// Property p("accuracy", "0.9", ">=");
+/// \endcode
+///
 class PBBAM_EXPORT Property : public internal::DataSetElement
 {
 public:
+    /// \brief Constructs a filter property.
     Property(const std::string& name,
              const std::string& value,
              const std::string& op);
 
 public:
+
+    /// \brief Fetches the value of property's Name attribute.
+    ///
+    /// \returns const reference to attribute value
+    ///
     const std::string& Name(void) const;
+
+    /// \brief Fetches the value of property's Operator attribute.
+    ///
+    /// \returns const reference to attribute value
+    ///
     const std::string& Operator(void) const;
+
+    /// \brief Fetches the value of property's Value attribute.
+    ///
+    /// \returns const reference to attribute value
+    ///
     const std::string& Value(void) const;
 
+public:
+
+    /// \brief Fetches the value of property's Name attribute.
+    ///
+    /// \returns non-const reference to attribute value
+    ///
     std::string& Name(void);
+
+    /// \brief Fetches the value of property's Operator attribute.
+    ///
+    /// \returns non-const reference to attribute value
+    ///
     std::string& Operator(void);
+
+    /// \brief Fetches the value of property's Value attribute.
+    ///
+    /// \returns nonconst reference to attribute value
+    ///
     std::string& Value(void);
 
+public:
+    /// \brief Sets this property's Name attribute.
+    ///
+    /// \param[in] name  new value for the attribute
+    /// \returns reference to this property object
+    ///
     Property& Name(const std::string& name);
+
+    /// \brief Sets this property's Operator attribute.
+    ///
+    /// \param[in] op  new value for the attribute
+    /// \returns reference to this property object
+    ///
     Property& Operator(const std::string& op);
+
+    /// \brief Sets this property's Value attribute.
+    ///
+    /// \param[in] value  new value for the attribute
+    /// \returns reference to this property object
+    ///
     Property& Value(const std::string& value);
 };
 
+/// \brief The Properties class represents a %Properties list element in
+///        DataSetXML.
+///
+/// The Properties element is essentially a list of Property elements.
+///
 class PBBAM_EXPORT Properties : public internal::DataSetListElement<Property>
 {
 public:
+    /// \brief Creates an empty property list.
     Properties(void);
 
+public:
+    /// \brief Adds a property to this list.
     void Add(const Property& property);
+
+    /// \brief Removes a property from this list.
     void Remove(const Property& property);
 };
 
+/// \brief The Provenance class represents a %Provenance element in DataSetXML.
+///
 class PBBAM_EXPORT Provenance : public internal::DataSetElement
 {
 public:
+    /// \brief Creates a empty provenance element.
     Provenance(void);
 
 public:
+    /// \brief Fetches the value of CreatedBy attribute.
+    ///
+    /// \returns const reference to attribute value (empty string if not
+    ///          present)
+    ///
     const std::string& CreatedBy(void) const;
+
+    /// \brief Fetches the value of CommonServicesInstanceId attribute.
+    ///
+    /// \returns const reference to attribute value (empty string if not
+    ///          present)
+    ///
     const std::string& CommonServicesInstanceId(void) const;
+
+    /// \brief Fetches the value of CreatorUserId attribute.
+    ///
+    /// \returns const reference to attribute value (empty string if not
+    ///          present)
+    ///
     const std::string& CreatorUserId(void) const;
+
+    /// \brief Fetches the value of ParentJobId attribute.
+    ///
+    /// \returns const reference to attribute value (empty string if not
+    ///          present)
+    ///
     const std::string& ParentJobId(void) const;
+
+    /// \brief Fetches the ParentTool child element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
     const PacBio::BAM::ParentTool& ParentTool(void) const;
 
+public:
+
+    /// \brief Fetches the value of CreatedBy attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute value (empty string if this is
+    ///          a new attribute)
+    ///
     std::string& CreatedBy(void);
+
+    /// \brief Fetches the value of CommonServicesInstanceId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute value (empty string if this is
+    ///          a new attribute)
+    ///
     std::string& CommonServicesInstanceId(void);
+
+    /// \brief Fetches the value of CreatorUserId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute value (empty string if this is
+    ///          a new attribute)
+    ///
     std::string& CreatorUserId(void);
+
+    /// \brief Fetches the value of ParentJobId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to attribute value (empty string if this is
+    ///          a new attribute)
+    ///
     std::string& ParentJobId(void);
+
+    /// \brief Fetches the ParentTool element element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::ParentTool& ParentTool(void);
 
+public:
+
+    /// \brief Sets the CreatedBy attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] createdBy  new value for the attribute
+    /// \returns reference to this object
+    ///
     Provenance& CreatedBy(const std::string& createdBy);
+
+    /// \brief Sets the CommonServicesInstanceId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] id  new value for the attribute
+    /// \returns reference to this object
+    ///
     Provenance& CommonServicesInstanceId(const std::string& id);
+
+    /// \brief Sets the CreatorUserId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] id  new value for the attribute
+    /// \returns reference to this object
+    ///
     Provenance& CreatorUserId(const std::string& id);
+
+    /// \brief Sets the ParentJobId attribute.
+    ///
+    /// This attribute will be created if it does not yet exist.
+    ///
+    /// \param[in] id  new value for the attribute
+    /// \returns reference to this object
+    ///
     Provenance& ParentJobId(const std::string& id);
+
+    /// \brief Sets the ParentTool child element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] tool  new value for the element
+    /// \returns reference to this dataset object
+    ///
     Provenance& ParentTool(const PacBio::BAM::ParentTool& tool);
 };
 
 class SubDataSets;
 
+/// \brief The DataSetBase class provides the attributes & child elements shared
+///        by all dataset types.
+///
+/// Client code should not need to use this class directly. It should be
+/// considered as more of an implementation detail and may in fact be removed
+/// from public API in the future. The top-level DataSet is the recommended
+/// entry point.
+///
 class PBBAM_EXPORT DataSetBase : public internal::StrictEntityType
 {
 public:
+
+    /// \brief Creates a DataSetBase object, or one of its subclasses, from an
+    ///        XML element name (e.g. SubreadSet)
+    ///
     static std::shared_ptr<DataSetBase> Create(const std::string& typeName);
 
 public:
+    /// \brief Creates an empty, generic DataSetBase.
     DataSetBase(void);
 
 protected:
-    DataSetBase(const std::string& label, const XsdType& xsd);
+    /// \brief Creates a DataSetBase with key values initialized.
+    DataSetBase(const std::string& metatype, 
+                const std::string& label, 
+                const XsdType& xsd);
+
+    /// \brief Returns a new DataSetBase containing a deep copy of contents
     DataSetBase* DeepCopy(void) const;
 
 public:
+    /// \brief Merges dataset contents.
+    ///
+    /// Adds contents of \p other to this dataset object
+    ///
+    /// \param[in] other  some other dataset to add to this one
+    /// \returns reference to this dataset object
+    ///
     DataSetBase& operator+=(const DataSetBase& other);
 
 public:
+    /// \brief Fetches the dataset's ExternalResources element.
+    ///
+    /// \returns const reference to child element
+    /// \throws std::runtime_error if element does not exist
+    ///
     const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+    /// \brief Fetches the dataset's Filters element.
+    ///
+    /// \returns const reference to child element
+    ///
     const PacBio::BAM::Filters& Filters(void) const;
+
+    /// \brief Fetches the dataset's DataSetMetadata element.
+    ///
+    /// \returns const reference to child element
+    ///
     const PacBio::BAM::DataSetMetadata& Metadata(void) const;
+
+    /// \brief Fetches the dataset's DataSets element.
+    ///
+    /// \returns const reference to child element
+    ///
     const PacBio::BAM::SubDataSets& SubDataSets(void) const;
 
+public:
+    /// \brief Access this dataset's namespace info.
+    ///
+    /// \returns const reference to dataset's NamespaceRegistry
+    ///
+    const NamespaceRegistry& Namespaces(void) const;
+
+public:
+    /// \brief Fetches the dataset's ExternalResources element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::ExternalResources& ExternalResources(void);
+
+    /// \brief Fetches the dataset's Filters element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::Filters& Filters(void);
+
+    /// \brief Fetches the dataset's DataSetMetadata element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::DataSetMetadata& Metadata(void);
+
+    /// \brief Fetches the dataset's DataSets element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \returns non-const reference to child element
+    ///
     PacBio::BAM::SubDataSets& SubDataSets(void);
 
+public:
+    /// \brief Sets this dataset's ExternalResources element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] resources  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSetBase& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+    /// \brief Sets this dataset's Filters element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] filters  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSetBase& Filters(const PacBio::BAM::Filters& filters);
+
+    /// \brief Sets this dataset's DataSetMetadata element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] metadata  new value for the element
+    /// \returns reference to this dataset object
+    ///
     DataSetBase& Metadata(const PacBio::BAM::DataSetMetadata& metadata);
-    DataSetBase& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
 
+    /// \brief Sets this dataset's DataSets element.
+    ///
+    /// This element will be created if it does not yet exist.
+    ///
+    /// \param[in] subdatasets  new value for the element
+    /// \returns reference to this dataset object
+    ///
+    DataSetBase& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
 
 public:
-    const NamespaceRegistry& Namespaces(void) const;
+    /// \brief Access this dataset's namespace info.
+    ///
+    /// \returns non-const reference to dataset's NamespaceRegistry
+    ///
     NamespaceRegistry& Namespaces(void);
 
 private:
     NamespaceRegistry registry_;
 };
 
+/// \brief The AlignmentSet class represents an %AlignmentSet root element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT AlignmentSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty AlignmentSet dataset.
     AlignmentSet(void);
 };
 
+/// \brief The BarcodeSet class represents a %BarcodeSet root element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT BarcodeSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty BarcodeSet dataset.
     BarcodeSet(void);
 };
 
+/// \brief The ConsensusAlignmentSet class represents a %ConsensusAlignmentSet
+///        root element in DataSetXML.
+///
 class PBBAM_EXPORT ConsensusAlignmentSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty ConsensusAlignmentSet dataset.
     ConsensusAlignmentSet(void);
 };
 
+/// \brief The ConsensusReadSet class represents a %ConsensusReadSet root
+///        element in DataSetXML.
+///
 class PBBAM_EXPORT ConsensusReadSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty ConsensusReadSet dataset.
     ConsensusReadSet(void);
 };
 
+/// \brief The ContigSet class represents a %ContigSet root element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT ContigSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty ContigSet dataset.
     ContigSet(void);
 };
 
+/// \brief The HdfSubreadSet class represents a %HdfSubreadSet root element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT HdfSubreadSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty HdfSubreadSet dataset.
     HdfSubreadSet(void);
 };
 
+/// \brief The ReferenceSet class represents a %ReferenceSet root element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT ReferenceSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty ReferenceSet dataset.
     ReferenceSet(void);
 };
 
+/// \brief The SubDataSets class represents a %DataSets list element in
+///        DataSetXML.
+///
+/// The SubDataSets element is essentially a list of DataSets.
+///
 class PBBAM_EXPORT SubDataSets : public internal::DataSetListElement<DataSetBase>
 {
 public:
+    /// \brief Creates an empty list of sub-datasets.
     SubDataSets(void);
 
+public:
+    /// \brief Adds \p other sub-dataset to this list.
     SubDataSets& operator+=(const DataSetBase& other); // single
+
+    /// \brief Adds \p other sub-dataset list to this list.
     SubDataSets& operator+=(const SubDataSets& other); // list
 
+public:
+    /// \brief Adds a sub-dataset to this list.
     void Add(const DataSetBase& subdataset);
+
+    /// \brief Removes a sub-dataset from this list.
     void Remove(const DataSetBase& subdataset);
 };
 
+/// \brief The SubreadSet class represents a %SubreadSet root element in
+///        DataSetXML.
+///
 class PBBAM_EXPORT SubreadSet : public DataSetBase
 {
 public:
+    /// \brief Creates an empty SubreadSet dataset.
     SubreadSet(void);
 };
 
diff --git a/include/pbbam/DataSetXsd.h b/include/pbbam/DataSetXsd.h
index 29df5e1..8d0ec38 100644
--- a/include/pbbam/DataSetXsd.h
+++ b/include/pbbam/DataSetXsd.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetXsd.h
+/// \brief Defines the XSD- and namespace-related classes for DataSetXML.
+//
 // Author: Derek Barnett
 
 #ifndef DATASETXSD_H
@@ -45,6 +49,8 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The XsdType enum defines the supported XSD namespaces.
+///
 enum class XsdType
 {
     NONE
@@ -65,15 +71,26 @@ enum class XsdType
   , SEEDING_DATA
 };
 
+/// \brief The NamespaceInfo class provides XML namespace info (prefix & URI).
+///
 class PBBAM_EXPORT NamespaceInfo
 {
 public:
+    /// \brief Creates an empty entry.
+    ///
+    /// This constructor only exists for STL container compatibility.
+    ///
     NamespaceInfo(void);
+
+    /// \brief Creates a valid info entry.
     NamespaceInfo(const std::string& name,
                   const std::string& uri);
 
 public:
+    /// \brief Fetches namespace name (i.e. prefix)
     const std::string& Name(void) const { return name_; }
+
+    /// \brief Fetches namespace URI.
     const std::string& Uri(void) const { return uri_; }
 
 private:
@@ -81,25 +98,54 @@ private:
     std::string uri_;
 };
 
+/// \brief The NamespaceRegistry class provides a per-dataset registry of XML
+///        namespace information.
+///
+/// This is used to format XML output - properly prefixing element labels with
+/// namespace as appropriate.
+///
 class PBBAM_EXPORT NamespaceRegistry
 {
 public:
+    /// \name Constructors & Related Methods
+    /// \{
+
     NamespaceRegistry(void);
     NamespaceRegistry(const NamespaceRegistry& other);
+    NamespaceRegistry(NamespaceRegistry&& other);
     NamespaceRegistry& operator=(const NamespaceRegistry& other);
+    NamespaceRegistry& operator=(NamespaceRegistry&& other);
     ~NamespaceRegistry(void);
 
+    /// \}
+
 public:
+    /// \name Registry Access
+    /// \{
+
+    /// \brief Fetches namespace info for the dataset's default XSD type.
     const NamespaceInfo& DefaultNamespace(void) const;
+
+    /// \brief Fetches dataset's default XSD type.
     XsdType DefaultXsd(void) const;
-    const NamespaceInfo& Namespace(const XsdType& xsd) const;
 
-    XsdType XsdForUri(const std::string& uri) const;
+    /// \brief Fetches namespace info for the requested XSD type.
+    const NamespaceInfo& Namespace(const XsdType& xsd) const;
 
-public:
+    /// \brief Registers namespace info for a particular XSD type.
     void Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo);
+
+    /// \brief Updates dataset's default XSD type.
     void SetDefaultXsd(const XsdType& xsd);
 
+    /// \brief Fetches the XSD type for \p elementLabel.
+    XsdType XsdForElement(const std::string& elementLabel) const;
+
+    /// \brief Fetches the XSD type for a particular URI.
+    XsdType XsdForUri(const std::string& uri) const;
+
+    /// \}
+
 private:
     std::map<XsdType, NamespaceInfo> data_;
     XsdType defaultXsdType_;
diff --git a/include/pbbam/EntireFileQuery.h b/include/pbbam/EntireFileQuery.h
index cd5809e..10c06ff 100644
--- a/include/pbbam/EntireFileQuery.h
+++ b/include/pbbam/EntireFileQuery.h
@@ -32,26 +32,63 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file EntireFileQuery.h
+/// \brief Defines the EntireFileQuery class.
+//
 // Author: Derek Barnett
 
 #ifndef ENTIREFILEQUERY_H
 #define ENTIREFILEQUERY_H
 
 #include "pbbam/internal/QueryBase.h"
-#include <htslib/sam.h>
+#include <memory>
 
 namespace PacBio {
 namespace BAM {
 
-class BamFile;
-
+/// \brief The EntireFileQuery class provides iterable access to a DataSet's
+///        %BAM records, reading through the entire contents of each file.
+///
+/// Input files will be accessed in the order listed in the DataSet.
+///
+/// \include code/EntireFileQuery.txt
+///
+/// Iteration is not limited to only 'const' records. The files themselves will
+/// not be affected, but individual records may be modified if needed.
+///
+/// \include code/EntireFileQuery_NonConst.txt
+///
+/// \note DataSets can be implicitly constructed from %BAM filenames as well.
+///       Thus a single %BAM file can be read through using the following:
+///
+/// \include code/EntireFileQuery_BamFilename.txt
+///
 class PBBAM_EXPORT EntireFileQuery : public internal::IQuery
 {
 public:
+    /// \brief Creates a new EntireFileQuery, reading through the entire
+    ///        contents of a dataset.
+    ///
+    /// \param[in] dataset  input data source(s)
+    /// \throws std::runtime_error on failure to open/read underlying %BAM
+    ///         files.
+    ///
     EntireFileQuery(const PacBio::BAM::DataSet& dataset);
-protected:
-    FileIterPtr CreateIterator(const BamFile& bamFile);
+    ~EntireFileQuery(void);
+
+public:
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
+
+private:
+    struct EntireFileQueryPrivate;
+    std::unique_ptr<EntireFileQueryPrivate> d_;
 };
 
 } // namespace BAM
diff --git a/include/pbbam/Frames.h b/include/pbbam/Frames.h
index f11598c..326701b 100644
--- a/include/pbbam/Frames.h
+++ b/include/pbbam/Frames.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Frames.h
+/// \brief Defines the Frames class.
+//
 // Author: Derek Barnett
 
 #ifndef FRAMES_H
@@ -44,26 +48,36 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The Frames class represents pulse frame data.
+///
+/// Frame data may be stored in either their raw, 16-bit values or
+/// using a lossy, 8-bit compression scheme.
+///
+/// This class is used to store the data and convert between the 2 storage types.
+///
 class PBBAM_EXPORT Frames
 {
 public:
     /// \name Conversion Methods
     /// \{
 
-    /// Constructs a Frames object from encoded (lossy, 8-bit data).
+    /// \brief Constructs a Frames object from encoded (lossy, 8-bit) data.
     ///
-    /// \note This method should probably not be needed often by client code working with frame data.
-    /// It exists primarily for (internal) parsing & interpretation of the BAM file contents. The
-    /// method is available, though, should the conversion operation be needed.
+    /// \note This method should probably not be needed often by client code
+    ///       working with frame data. It exists primarily for (internal)
+    ///       parsing & interpretation of the %BAM file contents. The method is
+    ///       available, though, should the conversion operation be needed.
     ///
-    /// \param[in] codedData encoded data
+    /// \param[in] codedData    encoded data
     /// \returns Frames object
+    ///
     static Frames Decode(const std::vector<uint8_t>& codedData);
 
-    /// Encodes a container of (raw) frames values in our 8-bit encoding.
+    /// \brief Creates encoded, compressed frame data from raw input data.
+    ///
+    /// \param[in] frames   raw frame data
+    /// \returns lossy, 8-bit encoded frame data
     ///
-    /// \param[in] frames expanded frame data
-    /// \returns lossy, 8-bit encoded frame codes
     static std::vector<uint8_t> Encode(const std::vector<uint16_t>& frames);
 
     /// \}
@@ -112,7 +126,7 @@ public:
     /// \}
 
 public:
-    /// \name Iterators
+    /// \name STL Compatbility
     /// \{
 
     /// \returns A const_iterator to the beginning of the sequence.
@@ -133,6 +147,12 @@ public:
     /// \returns An iterator to the element past the end of the sequence.
     std::vector<uint16_t>::iterator end(void);
 
+    /// \returns The number of frame data points.
+    size_t size(void) const;
+
+    /// \returns True if the container is empty, false otherwise.
+    bool empty(void) const;
+
     /// \} 
 
 public:
@@ -143,15 +163,14 @@ public:
     ///
     /// \param[in] frames data in expanded (not encoded) form
     /// \returns reference to this object
+    ///
     Frames& Data(const std::vector<uint16_t>& frames);
 
     /// Sets this record's data.
     ///
-    /// This is an overloaded function, allowing move semantics
-    /// (instead of copying the data).
-    ///
     /// \param[in] frames data in expanded (not encoded) form
     /// \returns reference to this object
+    ///
     Frames& Data(std::vector<uint16_t>&& frames);
 
     /// \}
@@ -160,46 +179,9 @@ private:
     std::vector<uint16_t> data_;
 };
 
-inline const std::vector<uint16_t>& Frames::Data(void) const
-{ return data_; }
-
-inline std::vector<uint16_t>& Frames::DataRaw(void)
-{ return data_; }
-
-inline std::vector<uint8_t> Frames::Encode(void) const
-{ return Frames::Encode(data_); }
-
-inline Frames& Frames::Data(const std::vector<uint16_t>& frames)
-{ data_ = frames; return *this; }
-
-inline Frames& Frames::Data(std::vector<uint16_t>&& frames)
-{ data_ = std::move(frames); return *this; }
-
-inline std::vector<uint16_t>::const_iterator Frames::cbegin(void) const
-{ return data_.cbegin(); }
-
-inline std::vector<uint16_t>::const_iterator Frames::cend(void) const
-{ return data_.cend(); }
-
-inline std::vector<uint16_t>::const_iterator Frames::begin(void) const
-{ return data_.begin(); }
-
-inline std::vector<uint16_t>::const_iterator Frames::end(void) const
-{ return data_.end(); }
-
-inline std::vector<uint16_t>::iterator Frames::begin(void)
-{ return data_.begin(); }
-
-inline std::vector<uint16_t>::iterator Frames::end(void)
-{ return data_.end(); }
-
-inline bool Frames::operator==(const Frames& other) const
-{ return data_ == other.data_; }
-
-inline bool Frames::operator!=(const Frames& other) const
-{ return !(*this == other); }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/Frames.inl"
+
 #endif // FRAMES_H
diff --git a/include/pbbam/GenomicInterval.h b/include/pbbam/GenomicInterval.h
index 12ebb9a..a7d4986 100644
--- a/include/pbbam/GenomicInterval.h
+++ b/include/pbbam/GenomicInterval.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicInterval.h
+/// \brief Defines the GenomicInterval class.
+//
 // Author: Derek Barnett
 
 #ifndef GENOMICINTERVAL_H
@@ -46,29 +50,33 @@
 namespace PacBio {
 namespace BAM {
 
-/// This class represents a genomic interval (reference name, and 0-based coordinates)
+/// \brief The GenomicInterval class represents a genomic interval (reference
+///        name and 0-based coordinates).
+///
 class PBBAM_EXPORT GenomicInterval
 {
 public:
     /// \name Constructors & Related Methods
     ///  \{
 
-    /// Default constructor; yields an empty genomic interval
+    /// \brief Creates an empty genomic interval
     GenomicInterval(void);
 
-    /// Constructor for interval on sequence with \p name, using range: [\p start, \p stop)
+    /// \brief Creates a genomic interval on sequence with \p name, using range:
+    ///       [\p start, \p stop)
     GenomicInterval(const std::string& name,
                     const Position& start,
                     const Position& stop);
 
-    /// Constructor for interval, using REGION string
+    /// \brief Creates a genomic interval, using REGION string
     ///
     /// "<ref>:<start>-<stop>" ("chr8:200-600")
     ///
-    /// \note The htslib/samtools REGION string expects start positions to be 1-based.
-    ///       However, throughout pbbam (including the rest of this class), we stick
-    ///       to 0-based start coordinates. Thus, while the syntax matches that of samtools,
-    ///       we are using a 0-based start coordinate here.
+    /// \note The htslib/samtools REGION string expects start positions to be
+    ///       1-based. However, throughout pbbam (including the rest of this
+    ///       class), we stick to 0-based start coordinates. Thus, while the
+    ///       syntax matches that of samtools, we are using a 0-based start
+    ///       coordinate here.
     ///
     GenomicInterval(const std::string& zeroBasedRegionString);
 
@@ -80,6 +88,42 @@ public:
     /// \}
 
 public:
+    /// \name Comparison Operators
+    /// \{
+
+    /// \returns true if same id & underlying interval
+    bool operator==(const GenomicInterval& other) const;
+
+    /// \returns true if either ids or underlying intervals differ
+    bool operator!=(const GenomicInterval& other) const;
+
+    /// \}
+
+public:
+    /// \name Interval Operations
+    /// \{
+
+    /// \returns true if same id and underlying Interval::CoveredBy() other.
+    bool CoveredBy(const GenomicInterval& other) const;
+
+    /// \returns true if same id and underlying Interval::Covers() other.
+    bool Covers(const GenomicInterval& other) const;
+
+    /// \returns true if same id and underlying Interval::Intersects() other.
+    bool Intersects(const GenomicInterval& other) const;
+
+    /// \returns true if underlying Interval::IsValid(), and id/endpoints are
+    ///          non-negative.
+    ///
+    bool IsValid(void) const;
+
+    /// \returns length of underlying
+    size_t Length(void) const;
+
+    /// \}
+
+
+public:
     /// \name Attributes
     /// \{
 
@@ -105,110 +149,40 @@ public:
     ///
     /// \param[in] name
     /// \returns reference to this interval
+    ///
     GenomicInterval& Name(const std::string& name);
 
     /// Sets this underlying Interval
     ///
     /// \param[in] interval
     /// \returns reference to this interval
+    ///
     GenomicInterval& Interval(const PacBio::BAM::Interval<Position>& interval);
 
     /// Sets this interval's start coordinate.
     ///
     /// \param[in] start
     /// \returns reference to this interval
+    ///
     GenomicInterval& Start(const Position start);
 
     /// Sets this interval's stop coordinate.
     ///
     /// \param[in] stop
     /// \returns reference to this interval
+    ///
     GenomicInterval& Stop(const Position stop);
 
     /// \}
 
-public:
-    /// \name Interval Operations
-    /// \{
-
-    /// \returns true if same id and underlying Interval::CoveredBy() other.
-    bool CoveredBy(const GenomicInterval& other) const;
-
-    /// \returns true if same id and underlying Interval::Covers() other.
-    bool Covers(const GenomicInterval& other) const;
-
-    /// \returns true if same id and underlying Interval::Intersects() other.
-    bool Intersects(const GenomicInterval& other) const;
-
-    /// \returns true if underlying Interval::IsValid(), and id/endpoints are non-negative.
-    bool IsValid(void) const;
-
-    /// \returns length of underlying
-    size_t Length(void) const;
-
-    /// \}
-
-public:
-    /// \name Comparison Operators
-    /// \{
-
-    /// \returns true if same id & underlying interval
-    bool operator==(const GenomicInterval& other) const;
-
-    /// \returns true if either ids or underlying intervals differ
-    bool operator!=(const GenomicInterval& other) const;
-
-    /// \}
-
 private:
     std::string name_;
     PacBio::BAM::Interval<Position> interval_;
 };
 
-inline GenomicInterval::~GenomicInterval(void) { }
-
-inline std::string GenomicInterval::Name(void) const
-{ return name_; }
-
-inline GenomicInterval& GenomicInterval::Name(const std::string& name)
-{ name_ = name; return *this; }
-
-inline PacBio::BAM::Interval<Position> GenomicInterval::Interval(void) const
-{ return interval_; }
-
-inline GenomicInterval& GenomicInterval::Interval(const PacBio::BAM::Interval<Position>& interval)
-{ interval_ = interval; return *this; }
-
-inline bool GenomicInterval::IsValid(void) const
-{
-    return !name_.empty() &&
-           interval_.Start() >= 0 &&
-           interval_.Stop()  >= 0 &&
-           interval_.IsValid();
-}
-
-inline size_t GenomicInterval::Length(void) const
-{ return interval_.Length(); }
-
-inline Position GenomicInterval::Start(void) const
-{ return interval_.Start(); }
-
-inline GenomicInterval& GenomicInterval::Start(const Position start)
-{ interval_.Start(start); return *this; }
-
-inline Position GenomicInterval::Stop(void) const
-{ return interval_.Stop(); }
-
-inline GenomicInterval& GenomicInterval::Stop(const Position stop)
-{ interval_.Stop(stop); return *this; }
-
-inline bool GenomicInterval::operator==(const GenomicInterval& other) const
-{ return name_ == other.name_ && interval_ == other.interval_; }
-
-inline bool GenomicInterval::operator!=(const GenomicInterval& other) const
-{ return !(*this == other); }
-
 } // namespace BAM
 } // namspace PacBio
 
+#include "pbbam/internal/GenomicInterval.inl"
+
 #endif // GENOMICINTERVAL_H
diff --git a/include/pbbam/GenomicIntervalQuery.h b/include/pbbam/GenomicIntervalQuery.h
index c1e10f9..7df7721 100644
--- a/include/pbbam/GenomicIntervalQuery.h
+++ b/include/pbbam/GenomicIntervalQuery.h
@@ -32,40 +32,80 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicIntervalQuery.h
+/// \brief Defines the GenomicIntervalQuery class.
+//
 // Author: Derek Barnett
 
 #ifndef GENOMICINTERVALQUERY_H
 #define GENOMICINTERVALQUERY_H
 
 #include "pbbam/GenomicInterval.h"
-#include "pbbam/QueryBase.h"
 #include "pbbam/internal/QueryBase.h"
-#include <string>
+#include <memory>
 
 namespace PacBio {
 namespace BAM {
 
-class BamFile;
-
+/// \brief The GenomicIntervalQuery class provides iterable access to a
+///        DataSet's %BAM records, limiting results to those overlapping a
+///        GenomicInterval.
+///
+/// Example:
+/// \include code/GenomicIntervalQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".bai" index file.
+///       Use BamFile::EnsureStandardIndexExists before creating the query if
+///       one may not be present.
+///
 class PBBAM_EXPORT GenomicIntervalQuery : public internal::IQuery
 {
 public:
+
+    /// \brief Constructs a new GenomiIntervalQuery, limiting record results to
+    ///        only those overalpping a GenomicInterval.
+    ///
+    /// \param[in] interval genomic interval of interest
+    /// \param[in] dataset  input data source(s)
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or
+    ///         BAI files.
+    ///
     GenomicIntervalQuery(const GenomicInterval& interval,
-                         const DataSet& dataset);
+                         const PacBio::BAM::DataSet& dataset);
+    ~GenomicIntervalQuery(void);
+
+public:
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
 
 public:
+    /// \brief Sets a new genomic interval.
+    ///
+    /// This allows the same dataset/query to be re-used over multiple regions of
+    /// interest:
+    ///
+    /// \include code/GenomicIntervalQuery_Reuse.txt
+    ///
+    /// \param[in] interval new genomic interval
+    /// \returns reference to this query
+    ///
     GenomicIntervalQuery& Interval(const GenomicInterval& interval);
-    GenomicInterval Interval(void) const;
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& bamFile);
+    /// \returns Current genomic interval active on this query.
+    const GenomicInterval& Interval(void) const;
 
 private:
-    GenomicInterval interval_;
+    struct GenomicIntervalQueryPrivate;
+    std::unique_ptr<GenomicIntervalQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namspace PacBio
 
diff --git a/include/pbbam/GroupQuery.h b/include/pbbam/GroupQuery.h
deleted file mode 100644
index abc1f5c..0000000
--- a/include/pbbam/GroupQuery.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Yuan Li
-
-#ifndef _GROUP_QUERY_H_
-#define _GROUP_QUERY_H_
-#include "GroupQueryBase.h"
-#include <htslib/sam.h>
-#include <vector>
-
-namespace PacBio {
-namespace BAM {
-
-class PBBAM_EXPORT SequentialGroupQueryBase: public GroupQueryBase
-{
-public:
-    SequentialGroupQueryBase(const BamFile & bamFile);
-
-protected:
-    virtual bool InSameGroup(const BamRecord & record, const BamRecord & another) = 0;
-    bool GetNext(std::vector<BamRecord> & records);
-    PBBAM_SHARED_PTR<samFile>   htsFile_;
-    PBBAM_SHARED_PTR<bam_hdr_t> htsHeader_;
-    BamRecord nextRecord_;
-};
-
-//class PBBAM_EXPORT ZmwQuery: public SequentialGroupQueryBase
-//{
-//public:
-//    ZmwQuery(const BamFile & bamFile)
-//    : SequentialGroupQueryBase(bamFile) { }
-
-//private:
-//    bool InSameGroup(const BamRecord & record, const BamRecord & another) {
-//        return (record.MovieName() == another.MovieName() &&
-//                record.HoleNumber() == another.HoleNumber());
-//    }
-//};
-
-class PBBAM_EXPORT QNameQuery: public SequentialGroupQueryBase
-{
-public:
-    QNameQuery(const BamFile & bamFile) 
-    : SequentialGroupQueryBase(bamFile) { }
-
-private:
-    bool InSameGroup(const BamRecord & record, const BamRecord & another) {
-        return (record.Impl().Name() == another.Impl().Name());
-    }
-};
-
-} // namespace BAM
-} // namespace PacBio
-
-#endif // _SEQUENTIAL_GROUP_QUERY_BASE_H_
diff --git a/include/pbbam/GroupQueryBase.h b/include/pbbam/GroupQueryBase.h
deleted file mode 100644
index 624bdb1..0000000
--- a/include/pbbam/GroupQueryBase.h
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Yuan Li
-// TODO: Up to Derek's decision. This class mostly references 
-// QueryBase.h. We may make QueryBase a template class and make
-// GroupQueryBase a specialization of the template.
-
-#ifndef _GROUP_QUERY_BASE_H_
-#define _GROUP_QUERY_BASE_H_
-
-#include "pbbam/QueryBase.h"
-#include "pbbam/BamRecord.h"
-#include <memory>
-#include <vector>
-
-namespace PacBio {
-namespace BAM {
-
-class GroupQueryBase;
-
-class GroupQueryIterator
-{
-public:
-    std::vector<BamRecord> & operator* (void);
-    std::vector<BamRecord> * operator-> (void);
-    GroupQueryIterator& operator++ (void);
-    GroupQueryIterator  operator++ (int);
-
-    bool operator== (const GroupQueryIterator & other) const;
-    bool operator!= (const GroupQueryIterator & other) const;
-
-    GroupQueryIterator(void);
-    GroupQueryIterator(GroupQueryBase & parent);
-
-private:
-    GroupQueryBase * query_;
-    std::vector<BamRecord> records_;
-    friend class GroupQueryBase;
-};
-
-class GroupQueryConstIterator
-{
-public:
-    const std::vector<BamRecord>& operator*(void) const;
-    const std::vector<BamRecord>* operator->(void) const;
-    GroupQueryConstIterator& operator++(void);
-    GroupQueryConstIterator operator++(int);
-    bool operator==(const GroupQueryConstIterator& other) const;
-    bool operator!=(const GroupQueryConstIterator& other) const;
-
-    GroupQueryConstIterator(void);
-    GroupQueryConstIterator(const GroupQueryBase& parent);
-
-private:
-    GroupQueryBase* query_;
-    std::vector<BamRecord> records_;
-    friend class GroupQueryBase;
-};
-
-
-class PBBAM_EXPORT GroupQueryBase
-{
-public:
-    typedef GroupQueryIterator iterator;
-
-protected:
-    BamFile file_;
-
-public:
-    virtual ~GroupQueryBase(void);
-
-public:
-    GroupQueryBase::iterator begin(void);
-    GroupQueryBase::iterator end(void);
-
-protected:
-    GroupQueryBase(const BamFile & file);
-    virtual bool GetNext(std::vector<BamRecord>& records) = 0;
-
-    friend class GroupQueryIterator;
-    friend class GroupQueryConstIterator;
-};
-
-inline GroupQueryBase::iterator GroupQueryBase::begin(void)
-{ return GroupQueryBase::iterator(*this); }
-
-inline GroupQueryBase::iterator GroupQueryBase::end(void)
-{ return GroupQueryBase::iterator(); }
-
-
-inline GroupQueryBase::GroupQueryBase(const BamFile & file)
-    : file_(file)
-{ }
-
-inline GroupQueryBase::~GroupQueryBase(void) { }
-
-// -------------------
-// GroupQueryIterator
-// -------------------
-
-inline GroupQueryIterator::GroupQueryIterator(void): query_(0) {}
-
-inline GroupQueryIterator::GroupQueryIterator(GroupQueryBase & parent)
-    : query_(& parent)
-    , records_()
-{
-    if (!(query_->GetNext(records_)))
-        query_ = 0;
-}
-
-inline std::vector<BamRecord>& GroupQueryIterator::operator* (void)
-{ return records_; }
-
-inline std::vector<BamRecord>* GroupQueryIterator::operator-> (void)
-{ return &(operator*()); }
-
-inline GroupQueryIterator& GroupQueryIterator::operator++ (void)
-{
-    if (!(query_->GetNext(records_)))
-        query_ = 0;
-    return *this;
-}
-
-inline GroupQueryIterator GroupQueryIterator::operator++ (int)
-{
-    GroupQueryIterator result(*this);
-    ++(*this);
-    return result;
-}
-
-inline bool GroupQueryIterator::operator==(const GroupQueryIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool GroupQueryIterator::operator!=(const GroupQueryIterator& other) const
-{ return !(*this == other); }
-
-
-// -------------------
-// GroupQueryConstIterator
-// -------------------
-
-inline const std::vector<BamRecord>& GroupQueryConstIterator::operator*(void) const
-{ return records_; }
-
-inline const std::vector<BamRecord>* GroupQueryConstIterator::operator->(void) const
-{ return &(operator*()); }
-
-inline GroupQueryConstIterator& GroupQueryConstIterator::operator++(void)
-{
-    if (!(query_->GetNext(records_)))
-        query_ = 0;
-    return *this;
-}
-
-inline GroupQueryConstIterator GroupQueryConstIterator::operator++(int)
-{
-    GroupQueryConstIterator result(*this);
-    ++(*this);
-    return result;
-}
-
-inline bool GroupQueryConstIterator::operator==(const GroupQueryConstIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool GroupQueryConstIterator::operator!=(const GroupQueryConstIterator& other) const
-{ return !(*this == other); }
-
-inline GroupQueryConstIterator::GroupQueryConstIterator(void): query_(0) { }
-
-inline GroupQueryConstIterator::GroupQueryConstIterator(const GroupQueryBase& parent)
-    : query_(const_cast<GroupQueryBase*>(&parent))
-    , records_()
-{
-    if (!(query_->GetNext(records_)))
-        query_ = 0;
-}
-
-} // namespace BAM
-} // namespace PacBio
-
-#endif // _GROUP_QUERY_BASE_H_
diff --git a/include/pbbam/IndexedFastaReader.h b/include/pbbam/IndexedFastaReader.h
index aa485db..b382d96 100644
--- a/include/pbbam/IndexedFastaReader.h
+++ b/include/pbbam/IndexedFastaReader.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file IndexedFastaReader.h
+/// \brief Defines the IndexedFastaReader class.
+//
 // Author: David Alexander
 
 #ifndef INDEXEDFASTAREADER_H
@@ -40,8 +44,7 @@
 
 #include "pbbam/Orientation.h"
 #include "pbbam/Position.h"
-#include "htslib/faidx.h"
-
+#include <htslib/faidx.h>
 #include <string>
 #include <iostream>
 #include <stdexcept>
@@ -52,50 +55,106 @@ namespace BAM {
 class GenomicInterval;
 class BamRecord;
 
+/// \brief The IndexedFastaReader class provides random-access to FASTA file
+///        data.
+///
 class IndexedFastaReader {
 
 public:
-    IndexedFastaReader() = delete;
+    /// \name Constructors & Related Methods
+    /// \{
+
+    IndexedFastaReader(void) = delete;
     IndexedFastaReader(const std::string& filename);
-    ~IndexedFastaReader();
+    IndexedFastaReader(const IndexedFastaReader& src);
+    IndexedFastaReader& operator=(const IndexedFastaReader& rhs);
+    ~IndexedFastaReader(void);
 
-public:
-    // Copy constructor
-    IndexedFastaReader(const IndexedFastaReader& src)
-    {
-        if (!Open(src.filename_)) 
-            throw std::runtime_error("Cannot open file " + src.filename_);
-    }
-
-    // Copy assignment operator
-    IndexedFastaReader& operator=(const IndexedFastaReader& rhs)
-    {
-        if(&rhs == this) return *this;
-
-        Open(rhs.filename_);
-        return *this;
-    }
+    /// \}
 
 public:
-    std::string Subsequence(const std::string& id, Position begin, Position end) const;
+    /// name Sequence Access
+    /// \{
+
+    /// \brief Fetches FASTA sequence for desired interval.
+    ///
+    /// \param[in] id       reference sequence name
+    /// \param[in] begin    start position
+    /// \param[in] end      end position
+    ///
+    /// \returns sequence string at desired interval
+    ///
+    /// \throws std::runtime_error on failure to fetch sequence
+    ///
+    std::string Subsequence(const std::string& id,
+                            Position begin,
+                            Position end) const;
+
+    /// \brief Fetches FASTA sequence for desired interval.
+    ///
+    /// \param[in] interval desired interval
+    ///
+    /// \returns sequence string at desired interval
+    ///
+    /// \throws std::runtime_error on failure to fetch sequence
+    ///
     std::string Subsequence(const GenomicInterval& interval) const;
+
+    /// \brief Fetches FASTA sequence for desired interval.
+    ///
+    /// \param[in] htslibRegion htslib/samtools-formatted REGION string
+    ///                         representing the desired interval
+    ///
+    /// \returns sequence string at desired interval
+    ///
+    /// \throws std::runtime_error on failure to fetch sequence
+    ///
     std::string Subsequence(const char* htslibRegion) const;
 
-public:
-    // \returns subsequence of the reference corresponding to the bamRecord,
-    // oriented and gapped as requested.  For example, "native" orientation
-    // and "gapped" will return the reference sequence with gaps inserted, as
-    // would align against the read in "native" orientation
+    /// \brief Fetches FASTA sequence corresponding to a BamRecord, oriented and
+    ///        gapped as requested.
+    ///
+    /// For example, "native" orientation and "gapped" will return the reference
+    /// sequence with gaps inserted, as would align against the read in "native"
+    /// orientation.
+    ///
+    /// \param[in] bamRecord        input BamRecord to derive interval/CIGAR
+    ///                             data
+    /// \param[in] orientation      orientation of output
+    /// \param[in] gapped           if true, gaps/padding will be inserted, per
+    ///                             record's CIGAR info.
+    /// \param[in] exciseSoftClips  if true, any soft-clipped positions will be
+    ///                             removed from query ends
+    ///
+    /// \returns sequence string over the record's interval
+    ///
+    /// \throws std::runtime_error on failure to fetch sequence
+    ///
     std::string ReferenceSubsequence(const BamRecord& bamRecord,
                                      const Orientation orientation=Orientation::GENOMIC,
                                      const bool gapped=false,
                                      const bool exciseSoftClips=false) const;
 
+    /// \}
+
 public:
-    int NumSequences() const;
+    /// \name File Attributes
+    /// \{
+
+    /// \returns true if FASTA file contains a sequence matching \p name
     bool HasSequence(const std::string& name) const;
+
+    /// \returns number of sequences stored in FASTA file
+    int NumSequences(void) const;
+
+    /// \returns length of FASTA sequence
+    ///
+    /// \throws std::runtime_error if length could not be determined
+    ///
     int SequenceLength(const std::string& name) const;
 
+    /// \}
+
 private:
     std::string filename_;
     faidx_t* handle_;
@@ -105,8 +164,7 @@ private:
     bool Open(const std::string& filename);
 };
 
+}  // namespace BAM
+}  // namespace PacBio
 
-
-}  // PacBio
-}  // BAM
 #endif  // INDEXEDFASTAREADER_H
diff --git a/include/pbbam/Interval.h b/include/pbbam/Interval.h
index 6c2e91a..3f5a40e 100644
--- a/include/pbbam/Interval.h
+++ b/include/pbbam/Interval.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Interval.h
+/// \brief Defines the Interval class.
+//
 // Author: Derek Barnett
 
 #ifndef INTERVAL_H
@@ -48,10 +52,11 @@
 namespace PacBio {
 namespace BAM {
 
-/// \brief Utility class for working with half-open (right-open) intervals. [start, stop)
+/// \brief Represents a half-open (right-open) interval [start, stop)
 ///
 /// \note This class is agnostic whether the values are 0-based or 1-based.
-/// \todo Should it be? Should that go here or "higher up"?
+///       Client code should primarily work with GenomicInterval, which does
+///       enforce this distinction.
 ///
 template<typename T>
 class Interval
@@ -60,21 +65,31 @@ public:
     typedef boost::icl::discrete_interval<T> interval_type;
 
 public:
-
-    /// \name Constructors
+    /// \name Constructors & Related Methods
     /// \{
 
-    /** Default constructor; yields an empty interval [0,0) */
-    inline Interval(void);
+    /// \brief Creates an empty interval [0,0)
+    Interval(void);
 
-    /** Constructor for a singleton interval [val,val+1) */
-    inline Interval(const T val);
+    /// \brief Creates a 'singleton' interval [val,val+1)
+    Interval(const T val);
 
-    /** Constructor for interval from [start, stop) */
-    inline Interval(const T start, const T stop);
+    /// brief Creates an interval from [start, stop) */
+    Interval(const T start, const T stop);
 
-    /** Copy constructor */
-    inline Interval(const Interval<T>& other);
+    Interval(const Interval<T>& other);
+
+    /// \}
+
+public:
+    /// \name Comparison Operators
+    /// \{
+
+    /// \returns true if both intervals share the same endpoints
+    bool operator==(const Interval<T>& other) const;
+
+    /// \returns true if either interval's endpoints differ
+    bool operator!=(const Interval<T>& other) const;
 
     /// \}
 
@@ -82,53 +97,45 @@ public:
     /// \name Attributes
     /// \{
 
-    /// \returns interval start coordinate
-    inline T Start(void) const;
+    /// \returns interval's start coordinate
+    T Start(void) const;
 
     /// Sets this interval's start coordinate.
     ///
     /// \param[in] start
     /// \returns reference to this interval
-    inline Interval<T>& Start(const T& start);
+    ///
+    Interval<T>& Start(const T& start);
 
-    /// \returns interval stop coordinate
-    inline T Stop(void) const;
+    /// \returns interval's stop coordinate
+    T Stop(void) const;
 
     /// Sets this interval's stop coordinate.
     ///
     /// \param[in] stop
     /// \returns reference to this interval
-    inline Interval<T>& Stop(const T& stop);
+    ///
+    Interval<T>& Stop(const T& stop);
 
     /// \}
 
+public:
     /// \name Interval Operations
 
     /// \returns true if this interval is fully covered by (or contained in) \p other
-    inline bool CoveredBy(const Interval<T>& other) const;
+    bool CoveredBy(const Interval<T>& other) const;
 
     //// \returns true if this interval covers (or contains) \p other
-    inline bool Covers(const Interval<T>& other) const;
+    bool Covers(const Interval<T>& other) const;
 
     /// \returns true if intervals interset
-    inline bool Intersects(const Interval<T>& other) const;
+    bool Intersects(const Interval<T>& other) const;
 
     /// \returns true if interval is valid (e.g. start < stop)
-    inline bool IsValid(void) const;
+    bool IsValid(void) const;
 
     /// \returns interval length
-    inline size_t Length(void) const;
-
-    /// \}
-
-    /// \name Comparison Operators
-    /// \{
-
-    /// \returns true if both intervals share the same endpoints
-    inline bool operator==(const Interval<T>& other) const;
-
-    /// \returns true if either interval's endpoints differ
-    inline bool operator!=(const Interval<T>& other) const;
+    size_t Length(void) const;
 
     /// \}
 
@@ -136,77 +143,9 @@ private:
     interval_type data_;
 };
 
-template<typename T>
-Interval<T>::Interval(void)
-    : data_(boost::icl::discrete_interval<T>::right_open(0,0))
-{ }
-
-template<typename T>
-Interval<T>::Interval(const T val)
-    : data_(boost::icl::discrete_interval<T>::right_open(val,val+1))
-{ }
-
-template<typename T>
-Interval<T>::Interval(const T start, const T stop)
-    : data_(boost::icl::discrete_interval<T>::right_open(start,stop))
-{ }
-
-template<typename T>
-Interval<T>::Interval(const Interval<T>& other)
-    : data_(boost::icl::discrete_interval<T>::right_open(other.Start(), other.Stop()))
-{ }
-
-template<typename T>
-inline bool Interval<T>::operator==(const Interval<T>& other) const
-{ return data_ == other.data_; }
-
-template<typename T>
-inline bool Interval<T>::operator!=(const Interval<T>& other) const
-{ return !(data_ == other.data_); }
-
-template<typename T>
-inline bool Interval<T>::CoveredBy(const Interval<T>& other) const
-{ return boost::icl::within(data_, other.data_); }
-
-template<typename T>
-inline bool Interval<T>::Covers(const Interval<T>& other) const
-{ return boost::icl::contains(data_, other.data_); }
-
-template<typename T>
-inline bool Interval<T>::Intersects(const Interval<T>& other) const
-{ return boost::icl::intersects(data_, other.data_); }
-
-template<typename T>
-inline bool Interval<T>::IsValid(void) const
-{ return !boost::icl::is_empty(data_); }
-
-template<typename T>
-inline size_t Interval<T>::Length(void) const
-{ return boost::icl::length(data_); }
-
-template<typename T>
-inline T Interval<T>::Start(void) const
-{ return data_.lower(); }
-
-template<typename T>
-inline Interval<T>& Interval<T>::Start(const T& start)
-{
-    data_ = boost::icl::discrete_interval<T>::right_open(start, data_.upper());
-    return *this;
-}
-
-template<typename T>
-inline T Interval<T>::Stop(void) const
-{ return data_.upper(); }
-
-template<typename T>
-inline Interval<T>& Interval<T>::Stop(const T& stop)
-{
-    data_ = boost::icl::discrete_interval<T>::right_open(data_.lower(), stop);
-    return *this;
-}
-
 } // namespace BAM
 } // namspace PacBio
 
+#include "pbbam/internal/Interval.inl"
+
 #endif // GENOMICINTERVAL_H
diff --git a/include/pbbam/LocalContextFlags.h b/include/pbbam/LocalContextFlags.h
index 53e8c9e..0c59707 100644
--- a/include/pbbam/LocalContextFlags.h
+++ b/include/pbbam/LocalContextFlags.h
@@ -33,6 +33,10 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file LocalContextFlags.h
+/// \brief Defines the LocalContextFlags enum & helper method(s).
+//
 // Author: Lance Hepler
 
 #ifndef LOCALCONTEXTFLAGS_H
@@ -43,17 +47,24 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The LocalContextFlags enum defines the flags that can be used
+///        to describe a subread's "local context", i.e. whether it is
+///        flanked by barcodes/adapters or its pass orientation.
+///
 enum LocalContextFlags : uint8_t
 {
-    NO_LOCAL_CONTEXT = 0,
-    ADAPTER_BEFORE   = 1,
-    ADAPTER_AFTER    = 2,
-    BARCODE_BEFORE   = 4,
-    BARCODE_AFTER    = 8,
-    FORWARD_PASS     = 16,
-    REVERSE_PASS     = 32
+    NO_LOCAL_CONTEXT = 0,   ///< No context information available
+    ADAPTER_BEFORE   = 1,   ///< Adapter precedes subread
+    ADAPTER_AFTER    = 2,   ///< Adapter follows subread
+    BARCODE_BEFORE   = 4,   ///< Barcode precedes subread
+    BARCODE_AFTER    = 8,   ///< Barcode follows subread
+    FORWARD_PASS     = 16,  ///< Subread's orientation is 'forward pass'
+    REVERSE_PASS     = 32   ///< Subread's orientation is 'reverse pass'
 };
 
+
+/// \returns a LocalContextFlags value containing the result of the bitwise-OR
+///          operation of \p lhs and \p rhs.
 // constexpr is implicitly inline
 constexpr LocalContextFlags operator|(const LocalContextFlags lhs, const LocalContextFlags rhs)
 {
diff --git a/include/pbbam/Orientation.h b/include/pbbam/Orientation.h
index 7582199..c354822 100644
--- a/include/pbbam/Orientation.h
+++ b/include/pbbam/Orientation.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Orientation.h
+/// \brief Defines the Orientation enum.
+//
 // Author: Derek Barnett
 
 #ifndef ORIENTATION_H
@@ -43,10 +47,20 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief This enum defines the orientations recognized by BamRecord, for
+///        presenting "per-base" data.
+///
+/// Orientation::NATIVE indicates that data should be presented in the subread's
+/// original form.
+///
+/// Orientation::GENOMIC indicates that data should be presented relative to
+/// genomic forward strand. This means that data will be reversed (or
+/// reverse-complemented) if the subread was aligned to the reverse strand.
+///
 enum class Orientation
 {
-    NATIVE
-  , GENOMIC
+    NATIVE      ///< Present data in 'raw' original orientation, regardless of aligned Strand
+  , GENOMIC     ///< Present data in aligned orientation, always relative to Strand::FORWARD.
 };
 
 } // namespace BAM
diff --git a/include/pbbam/PbiBasicTypes.h b/include/pbbam/PbiBasicTypes.h
new file mode 100644
index 0000000..4006ed4
--- /dev/null
+++ b/include/pbbam/PbiBasicTypes.h
@@ -0,0 +1,108 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiBasicTypes.h
+/// \brief Defines the basic data structures used in PBI lookups.
+//
+// Author: Derek Barnett
+
+#ifndef PBIBASICTYPES_H
+#define PBIBASICTYPES_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/Config.h"
+#include <deque>
+#include <utility>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The IndexResultBlock class represents a contiguous group of records
+///        returned from a PBI lookup.
+///
+/// Contiguous reads that satisfy a PBI lookup query will be merged down into a
+/// single block. This helps to minimize the number of seeks in subsequent read
+/// operations.
+///
+/// An PBI-enabled reader or query can iterate over a list of IndexResultBlocks;
+/// for each block, seeking to the first record and then sequentially reading
+/// 'numReads' consecutive records before needing to seek again.
+///
+struct PBBAM_EXPORT IndexResultBlock
+{
+public:
+    IndexResultBlock(void);
+    IndexResultBlock(size_t idx, size_t numReads);
+
+public:
+    bool operator==(const IndexResultBlock& other) const;
+    bool operator!=(const IndexResultBlock& other) const;
+
+public:
+    size_t  firstIndex_;     ///< index of block's first record in BAM/PBI files (e.g. i-th record)
+    size_t  numReads_;       ///< number of reads in this block
+    int64_t virtualOffset_;  ///< virtual offset of first record in this block
+};
+
+/// \brief container of PBI result blocks
+///
+typedef std::deque<IndexResultBlock> IndexResultBlocks;
+
+/// \brief container of raw PBI indices
+///
+/// This is the primary result of PbiFilter -associated classes. This raw list
+/// can participate in set operations (union, intersect) for compound filters,
+/// and then be merged down into IndexResultBlocks for actual data file
+/// random-access.
+///
+typedef std::vector<size_t> IndexList;
+
+/// \brief pair representing a range of PBI indices: where interval
+///        is [first, second)
+///
+/// Used primarily by the PBI's CoordinateSortedData components.
+///
+/// \sa PbiReferenceEntry, PbiRawReferenceData, & ReferenceLookupData
+///
+typedef std::pair<size_t, size_t> IndexRange;
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiBasicTypes.inl"
+
+#endif // PBIBASICTYPES_H
diff --git a/include/pbbam/PbiBuilder.h b/include/pbbam/PbiBuilder.h
index 6e99302..d1d83bc 100644
--- a/include/pbbam/PbiBuilder.h
+++ b/include/pbbam/PbiBuilder.h
@@ -33,6 +33,10 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiBuilder.h
+/// \brief Defines the PbiBuilder class.
+//
 // Author: Derek Barnett
 
 #ifndef PBIBUILDER_H
@@ -50,31 +54,117 @@ class PbiRawData;
 
 namespace internal { class PbiBuilderPrivate; }
 
-/// This class may be used to construct PBI index data while a BAM file is being
-/// written, rather than waiting to process it at the end.
+/// \brief The PbiBuilder class construct PBI index data from %BAM record data.
+///
+/// Records are added one-by-one. This allows for either whole-file indexing of
+/// existing %BAM files or for indexing "on-the-fly" alongside a %BAM file as it
+/// is generated.
+///
+/// For simple PBI creation from existing %BAM files, see PbiFile::CreateFrom.
+/// This is the recommended approach, unless finer control or additional
+/// processing is needed.
 ///
 class PBBAM_EXPORT PbiBuilder
 {
 public:
+    /// \brief This enum allows you to control the compression level of the
+    ///        output PBI file.
+    ///
+    /// Values are equivalent to zlib compression levels. See its documentation
+    /// for more details: http://www.zlib.net/manual.html
+    ///
+    enum CompressionLevel
+    {
+        CompressionLevel_0 = 0
+      , CompressionLevel_1 = 1
+      , CompressionLevel_2 = 2
+      , CompressionLevel_3 = 3
+      , CompressionLevel_4 = 4
+      , CompressionLevel_5 = 5
+      , CompressionLevel_6 = 6
+      , CompressionLevel_7 = 7
+      , CompressionLevel_8 = 8
+      , CompressionLevel_9 = 9
+
+      , DefaultCompression = -1
+      , NoCompression      = CompressionLevel_0
+      , FastCompression    = CompressionLevel_1
+      , BestCompression    = CompressionLevel_9
+    };
+
+public:
     /// \name Constructors & Related Methods
     /// \{
 
-    /// Initialize builder to write data to \p pbiFilename.
+    /// \brief Initializes builder to write data to \p pbiFilename.
+    ///
+    /// \param[in] pbiFilename      output filename
+    /// \param[in] compressionLevel zlib compression level
+    /// \param[in] numThreads       number of threads for compression. If set to
+    ///                             0, PbiBuilder will attempt to determine a
+    ///                             reasonable estimate. If set to 1, this will
+    ///                             force single-threaded execution. No checks
+    ///                             are made against an upper limit.
+    ///
+    /// \throws std::runtime_error if PBI file cannot be opened for writing
+    ///
+    PbiBuilder(const std::string& pbiFilename,
+               const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+               const size_t numThreads = 4);
+
+    /// \brief Initializes builder to write data to \p pbiFilename.
+    ///
+    /// Reference data-tracking structures will be initialized to expect
+    /// \p numReferenceSequences. (This is useful so that we can mark any
+    /// references that lack observed data appropriately).
+    ///
+    /// \param[in] pbiFilename              output filename
+    /// \param[in] numReferenceSequences    number of possible reference
+    ///                                     sequences, e.g. BamHeader::NumSequences
+    /// \param[in] compressionLevel zlib compression level
+    /// \param[in] numThreads       number of threads for compression. If set to
+    ///                             0, PbiBuilder will attempt to determine a
+    ///                             reasonable estimate. If set to 1, this will
+    ///                             force single-threaded execution. No checks
+    ///                             are made against an upper limit.
     ///
     /// \throws std::runtime_error if PBI file cannot be opened for writing
     ///
-    PbiBuilder(const std::string& pbiFilename);
+    PbiBuilder(const std::string& pbiFilename,
+               const size_t numReferenceSequences,
+               const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+               const size_t numThreads = 4);
 
-    /// Initialize builder to write data to \p pbiFilename. Reference data-tracking
-    /// structures will be initialized to expect \p numReferenceSequences. (This is
-    /// useful so that we can mark any references that lack observed data appropriately).
+    /// \brief Initializes builder to write data to \p pbiFilename.
+    ///
+    /// Reference data-tracking structures will be initialized to expect
+    /// \p numReferenceSequences, but only if \p isCoordinateSorted is true.
+    ///
+    /// \param[in] pbiFilename              output filename
+    /// \param[in] numReferenceSequences    number of possible reference
+    ///                                     sequences, e.g. BamHeader::NumSequences
+    /// \param[in] isCoordinateSorted       if false, disables reference
+    ///                                     sequence tracking
+    ///                                     (BamHeader::SortOrder != "coordinate")
+    /// \param[in] compressionLevel zlib compression level
+    /// \param[in] numThreads       number of threads for compression. If set to
+    ///                             0, PbiBuilder will attempt to determine a
+    ///                             reasonable estimate. If set to 1, this will
+    ///                             force single-threaded execution. No checks
+    ///                             are made against an upper limit.
     ///
     /// \throws std::runtime_error if PBI file cannot be opened for writing
     ///
-    PbiBuilder(const std::string& pbiFilename, const size_t numReferenceSequences);
+    PbiBuilder(const std::string& pbiFilename,
+               const size_t numReferenceSequences,
+               const bool isCoordinateSorted,
+               const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+               const size_t numThreads = 4);
 
-    /// On destruction, data summaries are calculated, raw data is written to file, and
-    /// file handle closed.
+    /// \brief Destroys builder, writing its data out to PBI file.
+    ///
+    /// On destruction, data summaries are calculated, raw data is written to
+    /// file, and file handle closed.
     ///
     ~PbiBuilder(void);
 
@@ -84,28 +174,27 @@ public:
     /// \name Index Building
     /// \{
 
-    /// Adds \p record's data to underlying raw data structure. \p vOffset is the BGZF
-    /// virtual offset into the BAM file where the record begins.
+    /// \brief Adds \p record's data to underlying raw data structure.
+    ///
+    /// \note \p vOffset is a BGZF \b virtual offset into the %BAM file. To get
+    ///          this value, you should use one of the following: \n
+    ///        - while reading existing %BAM: BamReader::VirtualTell \n
+    ///        - while writing new %BAM:      BamWriter::Write(const BamRecord& record, int64_t* vOffset) \n
+    ///
+    ///
+    /// To build a PBI index while generating a %BAM file:
+    /// \include code/PbiBuilder_WithWriter.txt
     ///
-    /// \sa BamWriter::Write(const BamRecord& record, int64_t* vOffset) for the easiest
-    ///     way to retrieve this information while generating a BAM file. See example below:
+    /// To build a PBI index from an existing %BAM file:
+    /// \include code/PbiBuilder_WithReader.txt
     ///
-    /// \code{.cpp}
-    ///  BamWriter writer(...);
-    ///  PbiBuilder pbiBuilder(...);
-    ///  int64_t vOffset;
-    ///  while (...) {
-    ///     BamRecord record;
-    ///     // ... generate record data ...
-    ///     writer.Write(record, &vOffset);
-    ///     pbiBuilder.AddRecord(record, &vOffset);
-    ///  }
-    /// \endcode
+    /// \param[in] record   input BamRecord to pull index data from
+    /// \param[in] vOffset  \b virtual offset into %BAM file where record begins
     ///
     void AddRecord(const BamRecord& record, const int64_t vOffset);
 
-    /// \returns const reference to current raw index data. Mostly only used for testing;
-    ///          shouldn't be needed by most client code.
+    /// \returns const reference to current raw index data. Mostly only used for
+    ///          testing; shouldn't be needed by most client code.
     ///
     const PbiRawData& Index(void) const;
 
diff --git a/include/pbbam/PbiFile.h b/include/pbbam/PbiFile.h
index 81c3de3..89bffa3 100644
--- a/include/pbbam/PbiFile.h
+++ b/include/pbbam/PbiFile.h
@@ -33,12 +33,17 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiFile.h
+/// \brief Defines the PbiFile enums, typedefs, and methods.
+//
 // Author: Derek Barnett
 
 #ifndef PBIFILE_H
 #define PBIFILE_H
 
 #include "pbbam/Config.h"
+#include "pbbam/PbiBuilder.h"
 #include <string>
 
 namespace PacBio {
@@ -46,40 +51,43 @@ namespace BAM {
 
 class BamFile;
 
-// class-like namespace
 namespace PbiFile
 {
-
-    /// PBI File Sections
-    ///
-    /// See (spec/doc links) for more details.
+    /// \brief This enum describes the PBI file sections
     ///
     enum Section
     {
-         SUBREAD   = 0x0000  ///< SubreadData   (required)
+         BASIC     = 0x0000  ///< BasicData     (required)
        , MAPPED    = 0x0001  ///< MappedData    (always optional)
        , REFERENCE = 0x0002  ///< ReferenceData (always optional)
        , BARCODE   = 0x0004  ///< BarcodeData   (always optional)
 
-       , ALL  = SUBREAD | MAPPED | REFERENCE | BARCODE  ///< synonym for building
+       , ALL  = BASIC | MAPPED | REFERENCE | BARCODE    ///< Synonym for 'all sections'
     };
+
+    /// \brief Helper typedef for storing multiple Section flags.
+    ///
     typedef uint16_t Sections;
 
-    /// PBI File Version
+    /// \brief This enum describes the PBI file version.
     enum VersionEnum
     {
-        Version_3_0_0 = 0x030000
+        Version_3_0_0 = 0x030000        ///< v3.0.0
+      , Version_3_0_1 = 0x030001        ///< v3.0.1
 
-      , CurrentVersion = Version_3_0_0
+      , CurrentVersion = Version_3_0_1  ///< Synonym for the current PBI version.
     };
 
-    /// Builds PBI index data from the supplied ".bam" file and writes a ".pbi" file.
+    /// \brief Builds PBI index data from the supplied %BAM file and writes a
+    ///        ".pbi" file.
     ///
-    /// \param[in] bamFile The source BamFile.
+    /// \param[in] bamFile source %BAM file
     ///
-    /// \throws std::exception if index file could not be created
+    /// \throws std::runtime_error if index file could not be created
     ///
-    PBBAM_EXPORT void CreateFrom(const BamFile& bamFile);
+    PBBAM_EXPORT void CreateFrom(const BamFile& bamFile,
+                                 const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+                                 const size_t numThreads = 4);
 
 } // namespace PbiFile
 } // namespace BAM
diff --git a/include/pbbam/PbiFilter.h b/include/pbbam/PbiFilter.h
new file mode 100644
index 0000000..65ef7ef
--- /dev/null
+++ b/include/pbbam/PbiFilter.h
@@ -0,0 +1,343 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.h
+/// \brief Defines the PbiFilter class & helper 'concept'.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTER_H
+#define PBIFILTER_H
+
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiBasicTypes.h"
+#include "pbbam/PbiIndex.h"
+#include <boost/concept_check.hpp>
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct PbiFilterPrivate; }
+
+/// \brief The PbiFilterConcept class provides compile-time enforcement of the
+///        required interface for PbiFilter's child filters.
+///
+template<typename T>
+struct PbiFilterConcept
+{
+    BOOST_CONCEPT_USAGE(PbiFilterConcept)
+    {
+        // All PBI filters (built-in or client-define) need only provide this
+        // interface:
+        //
+        //    bool Accepts(const PbiRawData& index, const size_t row) const;
+        //
+        const PbiRawData index;
+        bool result = filter.Accepts(index, 0);
+        (void)result;
+    }
+
+private:
+    T filter;
+//    PbiRawData index;
+};
+
+/// \brief The PbiFilter class provides a mechanism for performing PBI-enabled
+///        lookups.
+///
+/// The PbiFilter API is designed to be flexible, both built-in and for
+/// client-side customization. Built-in filters are provided for common queries,
+/// and client code can define and use custom filters as well. More complex
+/// filtering rules can be constructed via composition of simpler child filters.
+///
+/// Filter objects used as children of PbiFilter need only provide a method that
+/// matches this signature:
+///
+/// \include code/PbiFilter_Interface.txt
+///
+/// This requirement is enforced internally, using the PbiFilterConcept to
+/// require a compatible interface without requiring inheritance. This approach
+/// allows composition of heterogeneous filter types without worrying about a
+/// class hierarchy, pointer ownership across library/client boundaries, etc.
+///
+/// Thus a client application can define a custom filter if the built-in filters
+/// do not quite meet requirements. This filter may then be used in further
+/// PbiFilter composition, or directly to PbiFilterQuery
+///
+/// \include code/PbiFilter_CustomFilter.txt
+///
+/// As mentioned above, complex filters can be built up using multiple "child"
+/// filters. These complex filters are constructed by using either
+/// PbiFilter::Union (logical-OR over all direct children) or
+/// PbiFilter::Intersection (logical-AND over direct children).
+///
+/// \include code/PbiFilter_Composition.txt
+///
+class PBBAM_EXPORT PbiFilter
+{
+public:
+    enum CompositionType
+    {
+        INTERSECT
+      , UNION
+    };
+
+public:
+    /// \name Set Operations
+    /// \{
+
+    /// \brief Creates a PbiFilter that acts as intersection of the input
+    ///        filters.
+    ///
+    /// A record must satisfy \b all of this filter's direct "child" filters.
+    ///
+    /// Equivalent to:
+    /// \include code/PbiFilter_Intersection_Copy.txt
+    ///
+    /// \param[in] filters  vector of child filters
+    /// \returns composite filter
+    ///
+    static PbiFilter Intersection(const std::vector<PbiFilter>& filters);
+
+    /// \brief Creates a PbiFilter that acts as an intersection of the input
+    ///        filters.
+    ///
+    /// A record must satisfy \b all of this filter's direct "child" filters.
+    ///
+    /// Equivalent to:
+    /// \include code/PbiFilter_Intersection_Move.txt
+    ///
+    /// \param[in] filters  vector of child filters
+    /// \returns composite filter
+    ///
+    static PbiFilter Intersection(std::vector<PbiFilter>&& filters);
+
+    /// \brief Creates a PbiFilter that acts as a union of the input filters.
+    ///
+    /// A record must satisfy \b any of this filter's direct "child" filters.
+    ///
+    /// Equivalent to:
+    /// \include code/PbiFilter_Union_Copy.txt
+    ///
+    /// \param[in] filters  vector of child filters
+    /// \returns composite filter
+    ///
+    static PbiFilter Union(const std::vector<PbiFilter>& filters);
+
+    /// \brief Creates a PbiFilter that acts as a union of the input filters.
+    ///
+    /// A record must satisfy \b any of this filter's direct "child" filters.
+    ///
+    /// Equivalent to:
+    /// \include code/PbiFilter_Union_Move.txt
+    ///
+    /// \param[in] filters  vector of child filters
+    /// \returns composite filter
+    ///
+    static PbiFilter Union(std::vector<PbiFilter>&& filters);
+
+    /// \}
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates a PbiFilter from a %DataSet's described filters.
+    ///
+    /// A DataSet may contain a Filters element, itself a list of Filter
+    /// elements. Each Filter element will contain a Properties element, itself
+    /// a list of Property elements.
+    ///
+    /// The Filters hierarchy looks like this (in its XML output):
+    /// \verbinclude examples/plaintext/PbiFilter_DataSetXmlFilters.txt
+    ///
+    /// The resulting PbiFilter represents a union over all Filter elements,
+    /// with each Filter element requiring an intersection of all of its
+    /// Property criteria. These Property elements are mapped to built-in PBI
+    /// filter types. To use the labels in the example XML above, the filter
+    /// created here is equivalent to:
+    ///
+    /// (A && B) || (C && D)
+    ///
+    /// If a DataSet lacks any Filters, then an empty PbiFilter will be created
+    /// - corresponding to the dataset's entire contents.
+    ///
+    /// \param[in] dataset  maybe containing filters
+    /// \returns composite filter
+    ///
+    static PbiFilter FromDataSet(const DataSet& dataset);
+
+public:
+
+    /// \brief Creates an empty filter.
+    ///
+    /// \note An empty filter will result in all records being returned, e.g.
+    ///       for query iteration.
+    ///
+    /// \param[in] type composition type. Any additional child filters added to
+    ///                 this composite will be treated according to this type.
+    ///                 If INTERSECT, a record must match all child filters. If
+    ///                 UNION, a record must match any child filter.
+    ///
+    PbiFilter(const CompositionType type = INTERSECT);
+
+    /// \brief Creates a composite filter (of INTERSECT type) with an initial
+    ///        child filter.
+    ///
+    /// \note T must satisfy PbiFilterConcept
+    ///
+    /// \param[in] filter initial child filter
+    ///
+    template<typename T>
+    PbiFilter(const T& filter);
+
+    /// \brief Creates a composite filter (of INTERSECT type) with an initial
+    ///        child filter.
+    ///
+    /// \note T must satisfy PbiFilterConcept
+    ///
+    /// \param[in] filter initial child filter
+    ///
+    template<typename T>
+    PbiFilter(T&& filter);
+
+    /// \brief Creates a composite filter (of INTERSECT type) with a list of
+    ///        initial child filters.
+    ///
+    /// \param[in] filters initial child filters
+    ///
+    PbiFilter(const std::vector<PbiFilter>& filters);
+
+    /// \brief Creates composite filter (of INTERSECT type) with a list of
+    ///        initial child filters.
+    ///
+    /// \param[in] filters initial child filters
+    ///
+    PbiFilter(std::vector<PbiFilter>&& filters);
+
+    PbiFilter(const PbiFilter& other);
+    PbiFilter(PbiFilter&& other) noexcept;
+    PbiFilter& operator=(const PbiFilter& other);
+    PbiFilter& operator=(PbiFilter&& other) noexcept;
+    ~PbiFilter(void);
+
+    /// \}
+
+public:
+    /// \name Composition
+    /// \{
+
+    /// \brief Adds a new child filter of type T.
+    ///
+    /// \param[in] filter   additional child filter. Type T must satisfy
+    ///                     PbiFilterConcept.
+    /// \returns reference to this filter
+    ///
+    template<typename T>
+    PbiFilter& Add(const T& filter);
+
+    /// \brief Adds a new child filter of type T.
+    ///
+    /// \param[in] filter   additional child filter. Type T must satisfy
+    ///                     PbiFilterConcept.
+    /// \returns reference to this filter
+    ///
+    template<typename T>
+    PbiFilter& Add(T&& filter);
+
+    /// \brief Adds a new child filter.
+    ///
+    /// \param[in] filter   additional child filter
+    /// \returns reference to this filter
+    ///
+    PbiFilter& Add(const PbiFilter& filter);
+
+    /// \brief Adds a new child filter.
+    ///
+    /// \param[in] filter   additional child filter
+    /// \returns reference to this filter
+    ///
+    PbiFilter& Add(PbiFilter&& filter);
+
+    /// \brief Add child filters.
+    ///
+    /// \param[in] filters  additional child filters
+    /// \returns reference to this filter
+    ///
+    PbiFilter& Add(const std::vector<PbiFilter>& filters);
+
+    /// \brief Add child filters.
+    ///
+    /// \param[in] filters  additional child filters
+    /// \returns reference to this filter
+    ///
+    PbiFilter& Add(std::vector<PbiFilter>&& filters);
+
+    /// \returns true if this filter has no child filters.
+    bool IsEmpty(void) const;
+
+    /// \}
+
+public:
+    /// \name Lookup
+    /// \{
+
+    /// \brief Performs the PBI index lookup, combining child results a
+    ///        composite filter.
+    ///
+    /// \param[in] idx  PBI (raw) index object
+    /// \param[in] row  record number in %BAM/PBI files
+    ///
+    /// \returns true if record at \p row passes this filter criteria, 
+    ///          including children (if any)
+    ///
+    bool Accepts(const BAM::PbiRawData& idx, const size_t row) const;
+
+    /// \}
+
+private:
+    std::unique_ptr<internal::PbiFilterPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiFilter.inl"
+#include "pbbam/PbiFilterTypes.h"
+
+#endif // PBIFILTER_H
diff --git a/include/pbbam/ZmwGroupQuery.h b/include/pbbam/PbiFilterQuery.h
similarity index 58%
copy from include/pbbam/ZmwGroupQuery.h
copy to include/pbbam/PbiFilterQuery.h
index 8b88113..120a30d 100644
--- a/include/pbbam/ZmwGroupQuery.h
+++ b/include/pbbam/PbiFilterQuery.h
@@ -32,36 +32,65 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file PbiFilterQuery.h
+/// \brief Defines the PbiFilterQuery class.
+//
 // Author: Derek Barnett
 
-#ifndef ZMWGROUPQUERY_H
-#define ZMWGROUPQUERY_H
+#ifndef PBIFILTERQUERY_H
+#define PBIFILTERQUERY_H
 
 #include "pbbam/Config.h"
+#include "pbbam/PbiFilter.h"
 #include "pbbam/internal/QueryBase.h"
 #include <vector>
 
 namespace PacBio {
 namespace BAM {
-//namespace staging {
 
-class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
+/// \brief The PbiFilter class provides iterable access to a DataSet's %BAM
+///        records, limiting results to those matching filter criteria.
+///
+/// Example:
+/// \include code/PbiFilterQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+///       Use BamFile::EnsurePacBioIndexExists before creating the query if one
+///       may not be present.
+///
+class PBBAM_EXPORT PbiFilterQuery : public internal::IQuery
 {
 public:
-    ZmwGroupQuery(const DataSet& dataset);
-    ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
-                  const DataSet& dataset);
+    /// \brief Creates a new PbiFilterQuery, limiting record results to only
+    ///        those matching filter criteria
+    ///
+    /// \param[in] filter   filtering criteria
+    /// \param[in] dataset  input data source(s)
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or
+    ///         PBI files.
+    ///
+    PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset);
+
+    ~PbiFilterQuery(void);
+
+public:
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& file);
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
 
 private:
-    std::vector<int> whitelist_;
+    struct PbiFilterQueryPrivate;
+    std::unique_ptr<PbiFilterQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namespace PacBio
 
-#endif // ZMWGROUPQUERY_H
+#endif // PBIFILTERQUERY_H
diff --git a/include/pbbam/PbiFilterTypes.h b/include/pbbam/PbiFilterTypes.h
new file mode 100644
index 0000000..b6ae4ad
--- /dev/null
+++ b/include/pbbam/PbiFilterTypes.h
@@ -0,0 +1,1028 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.h
+/// \brief Defines the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTERTYPES_H
+#define PBIFILTERTYPES_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiIndex.h"
+#include <boost/optional.hpp>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+/// \internal
+///
+/// Provides basic container for value/compare-type pair
+///
+template<typename T>
+struct FilterBase
+{
+public:
+    T value_;
+    boost::optional<std::vector<T> > multiValue_;
+    Compare::Type cmp_;
+protected:
+    FilterBase(const T& value, const Compare::Type cmp);
+    FilterBase(T&& value, const Compare::Type cmp);
+    FilterBase(const std::vector<T>& values);
+    FilterBase(std::vector<T>&& values);
+protected:
+    bool CompareHelper(const T& lhs) const;
+private:
+    bool CompareSingleHelper(const T& lhs) const;
+    bool CompareMultiHelper(const T& lhs) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to BarcodeLookupData
+///
+template<typename T, BarcodeLookupData::Field field>
+struct BarcodeDataFilterBase : public FilterBase<T>
+{
+protected:
+    BarcodeDataFilterBase(const T& value, const Compare::Type cmp);
+    BarcodeDataFilterBase(T&& value, const Compare::Type cmp);
+    BarcodeDataFilterBase(const std::vector<T>& values);
+    BarcodeDataFilterBase(std::vector<T>&& values);
+public:
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to BasicLookupData
+///
+template<typename T, BasicLookupData::Field field>
+struct BasicDataFilterBase : public FilterBase<T>
+{
+protected:
+    BasicDataFilterBase(const T& value, const Compare::Type cmp);
+    BasicDataFilterBase(T&& value, const Compare::Type cmp);
+    BasicDataFilterBase(const std::vector<T>& values);
+    BasicDataFilterBase(std::vector<T>&& values);
+public:
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to MappedLookupData
+///
+template<typename T, MappedLookupData::Field field>
+struct MappedDataFilterBase : public FilterBase<T>
+{
+protected:
+    MappedDataFilterBase(const T& value, const Compare::Type cmp);
+    MappedDataFilterBase(T&& value, const Compare::Type cmp);
+    MappedDataFilterBase(const std::vector<T>& values);
+    MappedDataFilterBase(std::vector<T>&& values);
+public:
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+} // namespace internal
+
+/// \brief The PbiAlignedEndFilter class provides a PbiFilter-compatible filter
+///        on aligned end.
+///
+/// Example: \include code/PbiAlignedEndFilter.txt
+///
+/// \sa BamRecord::AlignedEnd
+///
+struct PbiAlignedEndFilter
+    : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_END>
+{
+public:
+    /// \brief Creates a filter on aligned end.
+    ///
+    /// \param[in] position value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiAlignedEndFilter(const uint32_t position,
+                        const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiAlignedLengthFilter class provides a PbiFilter-compatible
+///        filter on aligned length.
+///
+/// Example: \include code/PbiAlignedLengthFilter.txt
+///
+/// \sa BamRecord::AlignedEnd, BamRecord::AlignedStart
+///
+struct PbiAlignedLengthFilter : public internal::FilterBase<uint32_t>
+{
+public:
+    /// \brief Creates a filter on aligned length.
+    ///
+    /// \param[in] length value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiAlignedLengthFilter(const uint32_t length,
+                           const Compare::Type cmp = Compare::EQUAL);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiAlignedStartFilter class provides a PbiFilter-compatible
+///        filter on aligned start.
+///
+/// Example: \include code/PbiAlignedStartFilter.txt
+///
+/// \sa BamRecord::AlignedStart
+///
+struct PbiAlignedStartFilter
+    : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_START>
+{
+public:
+    /// \brief Creates a filter on aligned start.
+    ///
+    /// \param[in] position value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiAlignedStartFilter(const uint32_t position,
+                          const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiAlignedStrandFilter class provides a PbiFilter-compatible
+///        filter on aligned strand.
+///
+/// Example: \include code/PbiAlignedStrandFilter.txt
+///
+/// \sa BamRecord::AlignedStrand
+///
+struct PbiAlignedStrandFilter
+    : public internal::MappedDataFilterBase<Strand, MappedLookupData::STRAND>
+{
+public:
+    /// \brief Creates a strand filter.
+    ///
+    /// \param[in] strand  strand value to compare on
+    /// \param[in] cmp     compare type
+    ///
+    PbiAlignedStrandFilter(const Strand strand,
+                           const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiBarcodeFilter class provides a PbiFilter-compatible filter on
+///        barcode ID.
+///
+/// Any record with this barcode ID (forward or reverse) will pass this filter.
+///
+/// Example: \include code/PbiBarcodeFilter.txt
+///
+/// \sa BamRecord::BarcodeForward, BamRecord::BarcodeReverse
+///
+struct PbiBarcodeFilter
+{
+public:
+    /// \brief Creates a single-value barcode filter.
+    ///
+    /// \param[in] barcode  barcode ID to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiBarcodeFilter(const uint16_t barcode,
+                     const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' barcode filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly, in either bc_forward or bc_reverse.
+    ///
+    /// \param[in] whitelist  barcode IDs to compare on
+    ///
+    PbiBarcodeFilter(const std::vector<uint16_t>& whitelist);
+
+    /// \brief Creates a 'whitelisted' barcode filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly, in either bc_forward or bc_reverse.
+    ///
+    /// \param[in] whitelist  barcode IDs to compare on
+    ///
+    PbiBarcodeFilter(std::vector<uint16_t>&& whitelist);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+    PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiBarcodeForwardFilter class provides a PbiFilter-compatible
+///        filter on forward barcode ID.
+///
+/// Example: \include code/PbiBarcodeForwardFilter.txt
+///
+/// \sa BamRecord::BarcodeForward
+///
+struct PbiBarcodeForwardFilter
+    : public internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>
+{
+public:
+    /// \brief Creates a single-value forward barcode filter.
+    ///
+    /// \param[in] bcFwdId  (forward) barcode ID to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiBarcodeForwardFilter(const uint16_t bcFwdId,
+                            const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' forward barcode filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly, in bc_forward.
+    ///
+    /// \param[in] whitelist  barcode IDs to compare on
+    ///
+    PbiBarcodeForwardFilter(const std::vector<uint16_t>& whitelist);
+
+    /// \brief Creates a 'whitelisted' forward barcode filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly, in bc_forward.
+    ///
+    /// \param[in] whitelist  barcode IDs to compare on
+    ///
+    PbiBarcodeForwardFilter(std::vector<uint16_t>&& whitelist);
+};
+
+/// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible
+///        filter on  barcode quality.
+///
+/// Example: \include code/PbiBarcodeQualityFilter.txt
+///
+/// \sa BamRecord::BarcodeQuality
+///
+struct PbiBarcodeQualityFilter
+    : public internal::BarcodeDataFilterBase<uint8_t, BarcodeLookupData::BC_QUALITY>
+{
+public:
+    /// \brief Creates a single-value barcode quality filter.
+    ///
+    /// \param[in] bcQuality    barcode quality to compare on
+    /// \param[in] cmp          compare type
+    ///
+    PbiBarcodeQualityFilter(const uint8_t bcQuality,
+                            const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiBarcodeReverseFilter class provides a PbiFilter-compatible
+///        filter on forward barcode ID.
+///
+/// Example: \include code/PbiBarcodeReverseFilter.txt
+///
+/// \sa BamRecord::BarcodeReverse
+///
+struct PbiBarcodeReverseFilter
+    : public internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>
+{
+public:
+    /// \brief Creates a single-value reverse barcode filter.
+    ///
+    /// \param[in] bcRevId  (reverse) barcode ID to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiBarcodeReverseFilter(const uint16_t bcRevId,
+                            const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' reverse barcode filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly, in bc_reverse.
+    ///
+    /// \param[in] whitelist  barcode IDs to compare on
+    ///
+    PbiBarcodeReverseFilter(const std::vector<uint16_t>& whitelist);
+
+    /// \brief Creates a 'whitelisted' reverse barcode filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly, in bc_reverse.
+    ///
+    /// \param[in] whitelist  barcode IDs to compare on
+    ///
+    PbiBarcodeReverseFilter(std::vector<uint16_t>&& whitelist);
+};
+
+/// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on
+///        both forward & reverse barcode IDs.
+///
+/// A record must match both IDs to pass the filter.
+///
+/// Example: \include code/PbiBarcodesFilter.txt
+///
+/// \sa BamRecord::Barcodes
+///
+struct PbiBarcodesFilter
+{
+public:
+    /// \brief Creates a barcodes filter from a std::pair of IDs.
+    ///
+    /// pair.first -> BarcodeForward\n
+    /// pair.second -> BarcodeReverse
+    ///
+    /// \param[in] barcodes barcode IDs to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiBarcodesFilter(const std::pair<uint16_t, uint16_t> barcodes,
+                      const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a barcodes filter from forward & reverse IDs.
+    ///
+    /// \param[in] bcForward    forward barcode ID to compare on
+    /// \param[in] bcReverse    reverse barcode ID to compare on
+    /// \param[in] cmp          compare type
+    ///
+    PbiBarcodesFilter(const uint16_t bcForward,
+                      const uint16_t bcReverse,
+                      const Compare::Type cmp = Compare::EQUAL);
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+    PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiIdentityFilter class provides a PbiFilter-compatible filter on
+///        read identity (% aligned match).
+///
+/// Read identity is equivalent to: 1.0 - (nMM + nDel + nIns)/readLength.
+///
+/// Example: \include code/PbiIdentityFilter.txt
+///
+struct PbiIdentityFilter : public internal::FilterBase<float>
+{
+public:
+    /// \brief Creates a read identity filter.
+    ///
+    /// \param[in] identity value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiIdentityFilter(const float identity,
+                      const Compare::Type cmp = Compare::EQUAL);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiLocalContextFilter class provides a PbiFilter-compatible
+///        filter on local context (adapter, barcode, etc.).
+///
+/// The primary Compare::Type operators intended for this filter are:
+/// Compare::EQUAL, Compare::NOT_EQUAL, Compare::CONTAINS, and
+/// Compare::NOT_CONTAINS.
+///
+/// Example: \include code/PbiLocalContextFilter.txt
+///
+struct PbiLocalContextFilter
+    : public internal::BasicDataFilterBase<LocalContextFlags,
+                                           BasicLookupData::CONTEXT_FLAG >
+{
+public:
+    PbiLocalContextFilter(const LocalContextFlags& flags,
+                          const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiMapQualityFilter class provides a PbiFilter-compatible filter on
+///        mapping quality.
+///
+/// Example: \include code/PbiMapQualityFilter.txt
+///
+/// \sa BamRecord::MapQuality
+///
+struct PbiMapQualityFilter
+    : public internal::MappedDataFilterBase<uint8_t, MappedLookupData::MAP_QUALITY>
+{
+public:
+    /// \brief Creates a map quality filter.
+    ///
+    /// \param[in] mapQual  value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiMapQualityFilter(const uint8_t mapQual,
+                        const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiMovieNameFilter class provides a PbiFilter-compatible filter
+///        on movie name.
+///
+/// Example: \include code/PbiMovieNameFilter.txt
+///
+/// \sa BamRecord::MovieName
+///
+struct PbiMovieNameFilter
+{
+public:
+    /// \brief Creates a single-value movie name filter.
+    ///
+    /// \param[in] movieName    movie name to compare on
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match movie name, exactly.
+    ///
+    PbiMovieNameFilter(const std::string& movieName);
+
+    /// \brief Creates a 'whitelisted' movie name filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    movie names to compare on
+    ///
+    PbiMovieNameFilter(const std::vector<std::string>& whitelist);
+
+    /// \brief Creates a 'whitelisted' movie name filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    movie names to compare on
+    ///
+    PbiMovieNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+   PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiNumDeletedBasesFilter class provides a PbiFilter-compatible
+///        filter on the number of deleted bases.
+///
+/// Example: \include code/PbiNumDeletedBasesFilter.txt
+///
+/// \sa BamRecord::NumDeletedBases
+///
+struct PbiNumDeletedBasesFilter
+    : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_DEL>
+{
+public:
+    /// \brief Creates a filter on the number of deleted bases.
+    ///
+    /// \param[in] numDeletions value to compare on
+    /// \param[in] cmp          compare type
+    ///
+    PbiNumDeletedBasesFilter(const size_t numDeletions,
+                             const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumInsertededBasesFilter class provides a PbiFilter-compatible
+///        filter on the number of inserted bases.
+///
+/// Example: \include code/PbiNumInsertedBasesFilter.txt
+///
+/// \sa BamRecord::NumInsertedBases
+///
+struct PbiNumInsertedBasesFilter
+    : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_INS>
+{
+public:
+    /// \brief Creates a filter on the number of inserted bases.
+    ///
+    /// \param[in] numInsertions    value to compare on
+    /// \param[in] cmp              compare type
+    ///
+    PbiNumInsertedBasesFilter(const size_t numInsertions,
+                              const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumMatchesFilter class provides a PbiFilter-compatible filter
+///        on the number of matched bases.
+///
+/// Example: \include code/PbiNumMatchesFilter.txt
+///
+/// \sa BamRecord::NumMatches
+///
+struct PbiNumMatchesFilter
+    : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_M>
+{
+public:
+    /// \brief Creates a filter on the number of matched bases.
+    ///
+    /// \param[in] numMatchedBases  value to compare on
+    /// \param[in] cmp              compare type
+    ///
+    PbiNumMatchesFilter(const size_t numMatchedBases,
+                        const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumMismatchesFilter class provides a PbiFilter-compatible
+///        filter on the number of mismatched bases.
+///
+/// Example: \include code/PbiNumMismatchesFilter.txt
+///
+/// \sa BamRecord::NumMismatches
+///
+struct PbiNumMismatchesFilter
+    : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_MM>
+{
+public:
+    /// \brief Creates a filter on the number of mismatched bases.
+    ///
+    /// \param[in] numMismatchedBases   value to compare on
+    /// \param[in] cmp                  compare type
+    ///
+    PbiNumMismatchesFilter(const size_t numMismatchedBases,
+                           const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiQueryEndFilter class provides a PbiFilter-compatible filter
+///        on query end.
+///
+/// Example: \include code/PbiQueryEndFilter.txt
+///
+/// \sa BamRecord::QueryEnd
+///
+struct PbiQueryEndFilter
+    : public internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_END>
+{
+public:
+    /// \brief Creates a filter on query end position.
+    ///
+    /// \param[in] position value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiQueryEndFilter(const int32_t position,
+                      const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiQueryLengthFilter class provides a PbiFilter-compatible filter
+///        on query length.
+///
+/// queryLength = (queryEnd - queryStart)
+///
+/// Example: \include code/PbiQueryLengthFilter.txt
+///
+/// \sa BamRecord::QueryEnd, BamRecord::QueryStart
+///
+struct PbiQueryLengthFilter : public internal::FilterBase<int32_t>
+{
+public:
+    /// \brief Creates a filter on query length
+    ///
+    /// \param[in] length   value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiQueryLengthFilter(const int32_t length,
+                         const Compare::Type cmp = Compare::EQUAL);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiQueryNameFilter class provides a PbiFilter-compatible filter
+///        on name length.
+///
+/// Example: \include code/PbiQueryNameFilter.txt
+///
+/// \sa BamRecord::FullName
+///
+struct PbiQueryNameFilter
+{
+public:
+    /// \brief Creates a single-value query name filter.
+    ///
+    /// \param[in] qname    query name to compare on
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match query name, exactly.
+    ///
+    PbiQueryNameFilter(const std::string& qname);
+
+    /// \brief Creates a 'whitelisted' query name filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    query names to compare on
+    ///
+    PbiQueryNameFilter(const std::vector<std::string>& whitelist);
+
+    /// \brief Creates a 'whitelisted' query name filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    query names to compare on
+    ///
+    PbiQueryNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+    PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiQueryStartFilter class provides a PbiFilter-compatible filter
+///        on query start.
+///
+/// Example: \include code/PbiQueryStartFilter.txt
+///
+/// \sa BamRecord::QueryStart
+///
+struct PbiQueryStartFilter
+    : public internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_START>
+{
+public:
+    /// \brief Creates a filter on query start position.
+    ///
+    /// \param[in] position value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiQueryStartFilter(const int32_t position,
+                        const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReadAccuracyFilter class provides a PbiFilter-compatible filter
+///        on read accuracy.
+///
+/// Example: \include code/PbiReadAccuracyFilter.txt
+///
+/// \sa BamRecord::ReadAccuracy
+///
+struct PbiReadAccuracyFilter
+    : public internal::BasicDataFilterBase<Accuracy, BasicLookupData::READ_QUALITY>
+{
+public:
+    /// \brief Creates a filter on read accuracy.
+    ///
+    /// \param[in] accuracy value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiReadAccuracyFilter(const Accuracy accuracy,
+                          const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReadGroupFilter class provides a PbiFilter-compatible filter
+///        on read group.
+///
+/// Example: \include code/PbiReadGroupFilter.txt
+///
+/// \sa BamRecord::ReadGroup,
+///     BamRecord::ReadGroupId,
+///     BamRecord::ReadGroupNumericId
+///
+struct PbiReadGroupFilter
+    : public internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>
+{
+public:
+    /// \brief Creates a filter on read group (numeric) ID value
+    ///
+    /// \param[in] rgId     numeric read group ID
+    /// \param[in] cmp      compare type
+    ///
+    /// \sa BamRecord::ReadGroupNumericId
+    ///
+    PbiReadGroupFilter(const int32_t rgId,
+                       const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a filter on printable read group ID value
+    ///
+    /// \param[in] rgId     read group ID string
+    /// \param[in] cmp      compare type
+    ///
+    /// \sa BamRecord::ReadGroupId
+    ///
+    PbiReadGroupFilter(const std::string rgId,
+                       const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a filter on read group (object).
+    ///
+    /// \param[in] rg   read group object
+    /// \param[in] cmp  compare type
+    ///
+    /// \sa BamRecord::ReadGroup
+    ///
+    PbiReadGroupFilter(const ReadGroupInfo& rg,
+                       const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' filter on read group numeric IDs.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    read group IDs to compare on
+    ///
+    PbiReadGroupFilter(const std::vector<int32_t>& whitelist);
+
+    /// \brief Creates a 'whitelisted' filter on read group numeric IDs.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    read group IDs to compare on
+    ///
+    PbiReadGroupFilter(std::vector<int32_t>&& whitelist);
+
+    /// \brief Creates a 'whitelisted' filter on read group printable IDs.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    read group ID strings to compare on
+    ///
+    PbiReadGroupFilter(const std::vector<std::string>& whitelist);
+
+    /// \brief Creates a 'whitelisted' filter on read group printable IDs.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    read group ID strings to compare on
+    ///
+    PbiReadGroupFilter(std::vector<std::string>&& whitelist);
+
+    /// \brief Creates a 'whitelisted' filter using read group objects.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    read group objects to compare on
+    ///
+    PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist);
+
+    /// \brief Creates a 'whitelisted' filter using read group objects.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    read group objects to compare on
+    ///
+    PbiReadGroupFilter(std::vector<ReadGroupInfo>&& whitelist);
+};
+
+/// \brief The PbiReferenceEndFilter class provides a PbiFilter-compatible
+///        filter on reference end.
+///
+/// Example: \include code/PbiReferenceEndFilter.txt
+///
+/// \sa BamRecord::ReferenceEnd
+///
+struct PbiReferenceEndFilter
+    : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_END>
+{
+public:
+    /// \brief Creates a filter on reference end.
+    ///
+    /// \param[in] tEnd     value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiReferenceEndFilter(const uint32_t tEnd,
+                          const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReferenceIdFilter class provides a PbiFilter-compatible
+///        filter on reference ID.
+///
+/// Example: \include code/PbiReferenceIdFilter.txt
+///
+/// \sa BamRecord::ReferenceId
+///
+struct PbiReferenceIdFilter
+    : public internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>
+{
+public:
+    /// \brief Creates a single-value reference ID filter.
+    ///
+    /// \param[in] tId  reference ID to compare on
+    /// \param[in] cmp  compare type
+    ///
+    PbiReferenceIdFilter(const int32_t tId,
+                         const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' reference ID filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    reference IDs to compare on
+    ///
+    PbiReferenceIdFilter(const std::vector<int32_t>& whitelist);
+
+    /// \brief Creates a 'whitelisted' reference ID filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    reference IDs to compare on
+    ///
+    PbiReferenceIdFilter(std::vector<int32_t>&& whitelist);
+};
+
+/// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible
+///        filter on reference name.
+///
+/// Example: \include code/PbiReferenceNameFilter.txt
+///
+/// \sa BamRecord::ReferenceName
+///
+struct PbiReferenceNameFilter
+{
+public:
+    /// \brief Creates a single-value reference name filter.
+    ///
+    /// \param[in] rname    reference ID to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiReferenceNameFilter(const std::string& rname,
+                           const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' reference name filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    reference names to compare on
+    ///
+    PbiReferenceNameFilter(const std::vector<std::string>& whitelist);
+
+    /// \brief Creates a 'whitelisted' reference name filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    reference names to compare on
+    ///
+    PbiReferenceNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+    mutable bool initialized_;
+    mutable PbiFilter subFilter_;
+    std::string rname_;
+    boost::optional<std::vector<std::string> > rnameWhitelist_;
+    Compare::Type cmp_;
+
+private:
+    // marked const so we can delay setup of filter in Accepts(), once we have
+    // access to PBI/BAM input. modified values marked mutable accordingly
+    void Initialize(const PbiRawData& idx) const;
+};
+
+/// \brief The PbiReferenceStartFilter class provides a PbiFilter-compatible
+///        filter on reference start.
+///
+/// Example: \include code/PbiReferenceStartFilter.txt
+///
+/// \sa BamRecord::ReferenceStart
+///
+struct PbiReferenceStartFilter
+    : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_START>
+{
+public:
+    /// \brief Creates a filter on reference start.
+    ///
+    /// \param[in] tStart   value to compare on
+    /// \param[in] cmp      compare type
+    ///
+    PbiReferenceStartFilter(const uint32_t tStart,
+                            const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiZmwFilter class provides a PbiFilter-compatible filter on
+///        ZMW hole number.
+///
+/// Example: \include code/PbiZmwFilter.txt
+///
+/// \sa BamRecord::HoleNumber
+///
+struct PbiZmwFilter : public internal::BasicDataFilterBase<int32_t,
+                                                           BasicLookupData::ZMW>
+{
+public:
+    /// \brief Creates a single-value ZMW hole number filter.
+    ///
+    /// \param[in] zmw  value to compare on
+    /// \param[in] cmp  compare type
+    ///
+    PbiZmwFilter(const int32_t zmw,
+                 const Compare::Type cmp = Compare::EQUAL);
+
+    /// \brief Creates a 'whitelisted' ZMW hole number filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    ZMW hole numbers to compare on
+    ///
+    PbiZmwFilter(const std::vector<int32_t>& whitelist);
+
+    /// \brief Creates a 'whitelisted' ZMW hole number filter.
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Records will match at least one value from the
+    ///       whitelist, exactly.
+    ///
+    /// \param[in] whitelist    ZMW hole numbers to compare on
+    ///
+    PbiZmwFilter(std::vector<int32_t>&& whitelist);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiFilterTypes.inl"
+
+#endif // PBIFILTERTYPES_H
diff --git a/include/pbbam/PbiIndex.h b/include/pbbam/PbiIndex.h
index 79aa880..09b61b8 100644
--- a/include/pbbam/PbiIndex.h
+++ b/include/pbbam/PbiIndex.h
@@ -33,131 +33,51 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiIndex.h
+/// \brief Defines the PbiIndex class.
+//
 // Author: Derek Barnett
 
 #ifndef PBIINDEX_H
 #define PBIINDEX_H
 
 #include "pbbam/Config.h"
-#include "pbbam/LocalContextFlags.h"
 #include "pbbam/PbiFile.h"
-#include "pbbam/Strand.h"
-#include <deque>
+#include "pbbam/PbiLookupData.h"
 #include <memory>
 #include <string>
-#include <vector>
 
 namespace PacBio {
 namespace BAM {
 
 namespace internal { class PbiIndexPrivate; }
 
-enum class SubreadField
-{
-    RG_ID
-  , Q_START
-  , Q_END
-  , ZMW
-  , READ_QUALITY
-  , VIRTUAL_OFFSET
-};
-
-enum class MappedField
-{
-    T_ID
-  , T_START
-  , T_END
-  , A_START
-  , A_END
-  , N_M
-  , N_MM
-  , N_INS
-  , N_DEL
-  , MAP_QUALITY
-  , STRAND
-};
-
-enum class BarcodeField
-{
-    BC_LEFT
-  , BC_RIGHT
-  , BC_QUALITY
-  , CONTEXT_FLAG
-};
-
-enum class CompareType
-{
-    EQUAL
-  , LESS_THAN
-  , LESS_THAN_EQUAL
-  , GREATER_THAN
-  , GREATER_THAN_EQUAL
-  , NOT_EQUAL
-};
-
-//
-// Contiguous reads that satisfy a query will be returned as a block.
-// This is to help minimize number of seeks (or even unneccesary checks).
-//
-// An index query can iterate over the lookup result 'IndexResultBlocks' list to
-// perform a seek and fetch 'numReads' consecutive records before needing to
-// seek again.
-//
-struct PBBAM_EXPORT IndexResultBlock
-{
-public:
-    IndexResultBlock(void);
-    IndexResultBlock(size_t idx, size_t numReads);
-
-public:
-    bool operator==(const IndexResultBlock& other) const;
-    bool operator!=(const IndexResultBlock& other) const;
-
-public:
-    size_t  firstIndex_;
-    size_t  numReads_;
-    int64_t virtualOffset_;
-};
-
-typedef std::deque<IndexResultBlock> IndexResultBlocks;
-
-typedef std::vector<size_t>       IndexList;
-typedef std::pair<size_t, size_t> IndexRange;
-
-template<typename FieldType, typename ValueType>
-struct IndexRequestBase
-{
-public:
-    FieldType field_;
-    ValueType value_;
-    CompareType compareType_;
-
-protected:
-    IndexRequestBase(const FieldType field,
-                     const ValueType& value,
-                     const CompareType compareType = CompareType::EQUAL);
-};
-
-// all multi-requests use CompareType::EQUAL
-template<typename FieldType, typename ValueType>
-struct IndexMultiRequestBase
-{
-public:
-    FieldType field_;
-    std::vector<ValueType> values_;
-
-protected:
-    IndexMultiRequestBase(const FieldType field,
-                          const std::vector<ValueType>& values);
-};
-
+/// \brief The PbiIndex class provides an representation of PBI index data that
+///        is rearranged for quick lookups.
+///
+/// The PbiIndex class itself provides access to a few high-level attributes
+/// (e.g. version, number of records, etc.). The actual lookup data is stored
+/// in its member components:
+///     BasicLookupData,
+///     MappedLookupData,
+///     ReferenceLookupData, &
+///     BarcodeLookupData .
+///
 class PBBAM_EXPORT PbiIndex
 {
 public:
     /// \name Constructors & Related Methods
     /// \{
 
+    /// \brief Creates a PbiIndex lookup structure from a PBI file.
+    ///
+    /// \param[in] pbiFilename  filename
+    ///
+    /// \throws std::runtime_error if failed to load data from file
+    ///
     PbiIndex(const std::string& pbiFilename);
+
     PbiIndex(const PbiIndex& other);
     PbiIndex(PbiIndex&& other);
     PbiIndex& operator=(const PbiIndex& other);
@@ -167,133 +87,76 @@ public:
     /// \}
 
 public:
-    // PBI attributes
+    /// \name PBI General Attributes
+    /// \{
+
+    /// \returns true if index has BarcodeData section
     bool HasBarcodeData(void) const;
+
+    /// \returns true if index has MappedData section
     bool HasMappedData(void) const;
+
+    /// \returns true if index has ReferenceData section
     bool HasReferenceData(void) const;
+
+    /// \returns true if index has \b section
+    /// \param[in] section PbiFile::Section identifier
+    ///
     bool HasSection(const PbiFile::Section section) const;
 
+    /// \returns index filename ("*.pbi")
+    ///
+    /// \note Returns an empty string if the underlying data was generated, not
+    ///       loaded from file.
+    ///
+    std::string Filename(void) const;
+
+    /// \returns enum flags representing the file sections present
     PbiFile::Sections FileSections(void) const;
+
+    /// \returns the number of records in the PBI (& associated %BAM)
     uint32_t NumReads(void) const;
+
+    /// \returns the PBI file's version
     PbiFile::VersionEnum Version(void) const;
 
-public:
+    /// \}
 
-    template<typename FieldType, typename ValueType>
-    IndexList RawIndices(const IndexRequestBase<FieldType, ValueType>& request) const;
+public:
+    /// \name Lookup Data Components
+    /// \{
 
-    template<typename FieldType, typename ValueType>
-    IndexList RawIndices(const IndexMultiRequestBase<FieldType, ValueType>& request) const;
+    /// \returns const reference to BarcodeData lookup structure
+    ///
+    /// May be empty, check result of HasBarcodeData.
+    ///
+    const BarcodeLookupData& BarcodeData(void) const;
 
-    template<typename FieldType, typename ValueType>
-    IndexResultBlocks Lookup(const IndexRequestBase<FieldType, ValueType>& request) const;
+    /// \returns const reference to BasicData lookup structure
+    const BasicLookupData& BasicData(void) const;
 
-    template<typename FieldType, typename ValueType>
-    IndexResultBlocks Lookup(const IndexMultiRequestBase<FieldType, ValueType>& request) const;
+    /// \returns const reference to MappedData lookup structure
+    ///
+    /// May be empty, check result of HasMappedData.
+    ///
+    const MappedLookupData& MappedData(void) const;
 
-    IndexResultBlocks LookupReference(const int32_t tId) const;
+    /// \returns const reference to reference data lookup structure
+    ///
+    /// May be empty, check result of HasReferenceData.
+    ///
+    const ReferenceLookupData& ReferenceData(void) const;
 
-    const std::vector<int64_t>& VirtualFileOffsets(void) const;
+    /// }
 
 private:
     PbiIndex(void);
     std::unique_ptr<internal::PbiIndexPrivate> d_;
 };
 
-template<SubreadField field, typename ValueType>
-class SubreadIndexRequest : public IndexRequestBase<SubreadField, ValueType>
-{
-public:
-    SubreadIndexRequest(const ValueType& value,
-                        const CompareType& compareType = CompareType::EQUAL);
-};
-
-template<SubreadField field, typename ValueType>
-class SubreadIndexMultiRequest : public IndexMultiRequestBase<SubreadField, ValueType>
-{
-public:
-    SubreadIndexMultiRequest(const std::vector<ValueType>& values);
-};
-
-typedef SubreadIndexRequest<SubreadField::RG_ID,        int32_t>  ReadGroupIndexRequest;
-typedef SubreadIndexRequest<SubreadField::Q_START,      int32_t>  QueryStartIndexRequest;
-typedef SubreadIndexRequest<SubreadField::Q_END,        int32_t>  QueryEndIndexRequest;
-typedef SubreadIndexRequest<SubreadField::ZMW,          int32_t>  ZmwIndexRequest;
-typedef SubreadIndexRequest<SubreadField::READ_QUALITY, uint16_t> ReadQualityIndexRequest;
-
-typedef SubreadIndexMultiRequest<SubreadField::RG_ID,        int32_t>  ReadGroupIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::Q_START,      int32_t>  QueryStartIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::Q_END,        int32_t>  QueryEndIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::ZMW,          int32_t>  ZmwIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::READ_QUALITY, uint16_t> ReadQualityIndexMultiRequest;
-
-template<MappedField field, typename ValueType>
-class MappedIndexRequest : public IndexRequestBase<MappedField, ValueType>
-{
-public:
-    MappedIndexRequest(const ValueType& value, const
-                       CompareType& compareType = CompareType::EQUAL);
-};
-
-template<MappedField field, typename ValueType>
-class MappedIndexMultiRequest : public IndexMultiRequestBase<SubreadField, ValueType>
-{
-public:
-    MappedIndexMultiRequest(const std::vector<ValueType>& values);
-};
-
-typedef MappedIndexRequest<MappedField::T_ID,         int32_t> ReferenceIdIndexRequest;
-typedef MappedIndexRequest<MappedField::T_START,      int32_t> ReferenceStartIndexRequest;
-typedef MappedIndexRequest<MappedField::T_END,        int32_t> ReferenceEndIndexRequest;
-typedef MappedIndexRequest<MappedField::A_START,      int32_t> AlignedStartIndexRequest;
-typedef MappedIndexRequest<MappedField::A_END,        int32_t> AlignedEndIndexRequest;
-typedef MappedIndexRequest<MappedField::N_M,          int32_t> NumMatchesIndexRequest;
-typedef MappedIndexRequest<MappedField::N_MM,         int32_t> NumMismatchesIndexRequest;
-typedef MappedIndexRequest<MappedField::N_INS,        int32_t> NumInsertionsIndexRequest;
-typedef MappedIndexRequest<MappedField::N_DEL,        int32_t> NumDeletionsIndexRequest;
-typedef MappedIndexRequest<MappedField::MAP_QUALITY,  uint8_t> MapQualityIndexRequest;
-typedef MappedIndexRequest<MappedField::STRAND,       Strand>  StrandIndexRequest;
-
-typedef MappedIndexMultiRequest<MappedField::T_ID,         int32_t> ReferenceIdIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::T_START,      int32_t> ReferenceStartIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::T_END,        int32_t> ReferenceEndIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::A_START,      int32_t> AlignedStartIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::A_END,        int32_t> AlignedEndIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_M,          int32_t> NumMatchesIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_MM,         int32_t> NumMismatchesIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_INS,        int32_t> NumInsertionsIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_DEL,        int32_t> NumDeletionsIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::MAP_QUALITY,  uint8_t> MapQualityIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::STRAND,       Strand>  StrandIndexMultiRequest;
-
-template<BarcodeField field, typename ValueType>
-class BarcodeIndexRequest : public IndexRequestBase<BarcodeField, ValueType>
-{
-public:
-    BarcodeIndexRequest(const ValueType& value,
-                        const CompareType& compareType = CompareType::EQUAL);
-};
-
-template<BarcodeField field, typename ValueType>
-class BarcodeIndexMultiRequest : public IndexMultiRequestBase<BarcodeField, ValueType>
-{
-public:
-    BarcodeIndexMultiRequest(const std::vector<ValueType>& values);
-};
-
-typedef BarcodeIndexRequest<BarcodeField::BC_LEFT,      uint16_t> BarcodeLeftIndexRequest;
-typedef BarcodeIndexRequest<BarcodeField::BC_RIGHT,     uint16_t> BarcodeRightIndexRequest;
-typedef BarcodeIndexRequest<BarcodeField::BC_QUALITY,   uint8_t>  BarcodeQualityIndexRequest;
-typedef BarcodeIndexRequest<BarcodeField::CONTEXT_FLAG, LocalContextFlags> ContextFlagIndexRequest;
-
-typedef BarcodeIndexMultiRequest<BarcodeField::BC_LEFT,      uint16_t> BarcodeLeftIndexMultiRequest;
-typedef BarcodeIndexMultiRequest<BarcodeField::BC_RIGHT,     uint16_t> BarcodeRightIndexMultiRequest;
-typedef BarcodeIndexMultiRequest<BarcodeField::BC_QUALITY,   uint8_t>  BarcodeQualityIndexMultiRequest;
-typedef BarcodeIndexMultiRequest<BarcodeField::CONTEXT_FLAG, LocalContextFlags> ContextFlagIndexMultiRequest;
-
 } // namespace BAM
 } // namespace PacBio
 
-#include "internal/PbiIndex_p.inl"
+#include "internal/PbiIndex.inl"
 
 #endif // PBIINDEX_H
diff --git a/include/pbbam/PbiIndexedBamReader.h b/include/pbbam/PbiIndexedBamReader.h
new file mode 100644
index 0000000..17c46b5
--- /dev/null
+++ b/include/pbbam/PbiIndexedBamReader.h
@@ -0,0 +1,174 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndexedBamReader.h
+/// \brief Defines the PbiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef PBIINDEXEDBAMREADER_H
+#define PBIINDEXEDBAMREADER_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamReader.h"
+#include "pbbam/PbiBasicTypes.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiIndex.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct PbiIndexedBamReaderPrivate; }
+
+/// \brief The PbiIndexedBamReader class provides read-only iteration over %BAM
+///        records, limited to some filtering criteria.
+///
+/// The PacBio BAM index (*.pbi) is used to allow random-access operations.
+///
+class PBBAM_EXPORT PbiIndexedBamReader : public BamReader
+{
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Constructs %BAM reader, with an initial filter.
+    ///
+    /// All reads that satisfy the filter will be available.
+    ///
+    /// \param[in] filter       PbiFilter or compatible object
+    /// \param[in] bamFilename  input %BAM filename
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+    ///         read
+    ///
+    PbiIndexedBamReader(const PbiFilter& filter, const std::string& bamFilename);
+
+    /// \brief Constructs %BAM reader, with an initial filter.
+    ///
+    /// All reads that satisfy the filter will be available.
+    ///
+    /// \param[in] filter       PbiFilter or compatible object
+    /// \param[in] bamFile      input BamFile object
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+    ///         read
+    ///
+    PbiIndexedBamReader(const PbiFilter& filter, const BamFile& bamFile);
+
+    /// \brief Constructs %BAM reader, with an initial filter.
+    ///
+    /// All reads that satisfy the filter will be available.
+    ///
+    /// \param[in] filter       PbiFilter or compatible object
+    /// \param[in] bamFile      input BamFile object
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+    ///         read
+    ///
+    PbiIndexedBamReader(const PbiFilter& filter, BamFile&& bamFile);
+
+    /// \brief Constructs %BAM reader, with no initial filter.
+    ///
+    /// Useful for delaying either specifying the filtering criteria or
+    /// performing the PBI lookups.
+    ///
+    /// \param[in] bamFilename  input %BAM filename
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+    ///         read
+    ///
+    PbiIndexedBamReader(const std::string& bamFilename);
+
+    /// \brief Constructs %BAM reader, with no initial filter.
+    ///
+    /// Useful for delaying either specifying the filtering criteria or
+    /// performing the PBI lookups.
+    ///
+    /// \param[in] bamFile      input BamFile object
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+    ///         read
+    ///
+    PbiIndexedBamReader(const BamFile& bamFile);
+
+    /// \brief Constructs %BAM reader, with no initial filter.
+    ///
+    /// Useful for delaying either specifying the filtering criteria or
+    /// performing the PBI lookups.
+    ///
+    /// \param[in] bamFile      input BamFile object
+    ///
+    /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+    ///         read
+    ///
+    PbiIndexedBamReader(BamFile&& bamFile);
+
+    ~PbiIndexedBamReader(void);
+
+    /// \}
+
+public:
+    /// \name Filtering & Index Data
+    /// \{
+
+    /// \returns the current filter active on this reader
+    const PbiFilter& Filter(void) const;
+
+//    /// \returns the reader's underlying index data
+//    const PbiIndex& Index(void) const;
+
+public:
+    /// \brief Sets a new filter on the reader.
+    ///
+    /// \param[in] filter
+    /// \returns reference to this reader
+    ///
+    PbiIndexedBamReader& Filter(const PbiFilter& filter);
+
+    /// \}
+
+protected:
+    int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+private:
+    std::unique_ptr<internal::PbiIndexedBamReaderPrivate> d_;
+};
+
+} // namespace internal
+} // namespace BAM
+
+#endif // PBIINDEXEDBAMREADER_H
diff --git a/include/pbbam/PbiLookupData.h b/include/pbbam/PbiLookupData.h
new file mode 100644
index 0000000..398c349
--- /dev/null
+++ b/include/pbbam/PbiLookupData.h
@@ -0,0 +1,718 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiLookupData.h
+/// \brief Defines the classes used for PBI data lookup.
+//
+// Author: Derek Barnett
+
+#ifndef PBILOOKUPDATA_H
+#define PBILOOKUPDATA_H
+
+#include "pbbam/Config.h"
+#include "pbbam/Compare.h"
+#include "pbbam/PbiBasicTypes.h"
+#include <deque>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+class PbiRawBarcodeData;
+class PbiRawBasicData;
+class PbiRawMappedData;
+class PbiRawReferenceData;
+
+/// \brief The OrderedLookup class provides a quick lookup structure for
+///        PBI index data, where key values are sorted.
+///
+/// The main, underlying lookup structure is essentailly a std::map, where the
+/// key is some value (e.g. readAccuracy) and the value is the list of indices
+/// (i-th record) in the %BAM file.
+///
+/// This lookup class is one of the main building blocks for the PBI index
+/// lookup components.
+///
+/// \param T    type of key stored (Accuracy for readAccuracy, int32_t for ZMW,
+///             etc.)
+///
+template<typename T>
+class OrderedLookup
+{
+public:
+    typedef T                                       key_type;
+    typedef IndexList                               value_type;
+    typedef std::map<key_type, value_type>          container_type;
+    typedef typename container_type::iterator       iterator;
+    typedef typename container_type::const_iterator const_iterator;
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty OrderedLookup structure.
+    ///
+    OrderedLookup(void);
+
+    /// \brief Creates an OrderedLookup struture, from another's underlying
+    ///        lookup container.
+    ///
+    /// \param[in] data     lookup data container
+    ///
+    OrderedLookup(const container_type& data);
+
+    /// \brief Creates an OrderedLookup struture, from another's underlying
+    ///        lookup container.
+    ///
+    /// \param[in] data     lookup data container
+    ///
+    OrderedLookup(container_type&& data);
+
+    /// \brief Creates an OrderedLookup struture, from raw data.
+    ///
+    /// \param[in] rawData  raw data values, where i is the index into the %BAM
+    ///                     file, and rawData[i] is the key value
+    ///
+    OrderedLookup(const std::vector<T>& rawData);
+
+    /// \brief Creates an OrderedLookup struture, from raw data.
+    ///
+    /// \param[in] rawData  raw data values, where i is the index into the %BAM
+    ///                     file, and rawData[i] is the key value
+    ///
+    OrderedLookup(std::vector<T>&& rawData);
+
+    /// \}
+
+public:
+    /// \name Operators
+    /// \{
+
+    /// \returns true if this lookup is same as \p other
+    bool operator==(const OrderedLookup<T>& other) const;
+
+    /// \returns true if this lookup is not the same as \p other
+    bool operator!=(const OrderedLookup<T>& other) const;
+
+    /// \}
+
+public:
+    /// \name STL-Compatibility Methods
+    /// \{
+
+    /// \returns an iterator to the first element in the underlying container
+    iterator begin(void);
+
+    /// \returns a const iterator to the first element in the underlying
+    ///          container
+    const_iterator begin(void) const;
+
+    /// \returns a const iterator to the first element in the underlying
+    ///
+    const_iterator cbegin(void) const;
+
+    /// \returns an iterator after the last element in the underlying container
+    iterator end(void);
+
+    /// \returns a const iterator after the last element in the underlying
+    ///          container
+    const_iterator end(void) const;
+
+    /// \returns a const iterator after the last element in the underlying
+    ///          container
+    const_iterator cend(void) const;
+
+    /// \returns true if underlying container is empty
+    bool empty(void) const;
+
+    /// \returns number of keys in the container
+    size_t size(void) const;
+
+    /// \}
+
+public:
+    /// \name Lookup Data
+    /// \{
+
+    /// \brief Performs a lookup into the underlying data.
+    ///
+    /// \param[in] key      key value to lookup
+    /// \param[in] compare  compare type
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup key &
+    ///          compare type
+    ///
+    IndexList LookupIndices(const key_type& key,
+                            const Compare::Type& compare) const;
+
+    /// \brief Converts the lookup structure back into its raw data.
+    ///
+    /// \returns raw data values, where i is the index into the %BAM file, and
+    ///          rawData[i] is the key value
+    ///
+    std::vector<T> Unpack(void) const;
+
+    /// \}
+
+private:
+    IndexList LookupInclusiveRange(const const_iterator& begin,
+                                   const const_iterator& end) const;
+
+    IndexList LookupExclusiveRange(const const_iterator& begin,
+                                   const const_iterator& end,
+                                   const key_type& key) const;
+
+private:
+    container_type data_;
+};
+
+/// \brief The UnorderedLookup class provides a quick lookup structure for
+///        PBI index data, where key values are not sorted.
+///
+/// The main, underlying lookup structure is essentailly a std::unordered_map,
+/// where the key is some value (e.g. read group ID) and the value is the list
+/// of indices (i-th record) in the %BAM file.
+///
+/// This lookup class is one of the main building blocks for the PBI index
+/// lookup components.
+///
+/// \param T    type of key stored (Accuracy for readAccuracy, int32_t for ZMW,
+///             etc.)
+///
+template<typename T>
+class UnorderedLookup
+{
+public:
+    typedef T                                        key_type;
+    typedef IndexList                                value_type;
+    typedef std::unordered_map<key_type, value_type> container_type;
+    typedef typename container_type::iterator        iterator;
+    typedef typename container_type::const_iterator  const_iterator;
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty UnorderedLookup structure.
+    ///
+    UnorderedLookup(void);
+
+    /// \brief Creates an UnorderedLookup struture, from another's underlying
+    ///        lookup container.
+    ///
+    /// \param[in] data     lookup data container
+    ///
+    UnorderedLookup(const container_type& data);
+
+    /// \brief Creates an UnorderedLookup struture, from another's underlying
+    ///        lookup container.
+    ///
+    /// \param[in] data     lookup data container
+    ///
+    UnorderedLookup(container_type&& data);
+
+    /// \brief Creates an UnorderedLookup struture, from raw data.
+    ///
+    /// \param[in] rawData  raw data values, where i is the index into the %BAM
+    ///                     file, and rawData[i] is the key value
+    ///
+    UnorderedLookup(const std::vector<T>& rawData);
+
+    /// \brief Creates an UnorderedLookup struture, from raw data.
+    ///
+    /// \param[in] rawData  raw data values, where i is the index into the %BAM
+    ///                     file, and rawData[i] is the key value
+    ///
+    UnorderedLookup(std::vector<T>&& rawData);
+
+    /// \}
+
+public:
+    /// \name Operators
+    /// \{
+
+    /// \returns true if this lookup is same as \p other
+    bool operator==(const UnorderedLookup<T>& other) const;
+
+    /// \returns true if this lookup is not the same as \p other
+    bool operator!=(const UnorderedLookup<T>& other) const;
+
+    /// \}
+
+public:
+    /// \name STL-Compatibility Methods
+    /// \{
+
+    /// \returns an iterator to the first element in the underlying container
+    iterator begin(void);
+
+    /// \returns a const iterator to the first element in the underlying
+    ///          container
+    const_iterator begin(void) const;
+
+    /// \returns a const iterator to the first element in the underlying
+    ///
+    const_iterator cbegin(void) const;
+
+    /// \returns an iterator after the last element in the underlying container
+    iterator end(void);
+
+    /// \returns a const iterator after the last element in the underlying
+    ///          container
+    const_iterator end(void) const;
+
+    /// \returns a const iterator after the last element in the underlying
+    ///          container
+    const_iterator cend(void) const;
+
+    /// \returns true if underlying container is empty
+    bool empty(void) const;
+
+    /// \returns number of keys in the container
+    size_t size(void) const;
+
+    /// \}
+
+public:
+    /// \name Lookup Data
+    /// \{
+
+    /// \brief Performs a lookup into the underlying data.
+    ///
+    /// \param[in] key      key value to lookup
+    /// \param[in] compare  compare type
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup key &
+    ///          compare type
+    ///
+    IndexList LookupIndices(const key_type& key,
+                            const Compare::Type& compare) const;
+
+    /// \brief Converts the lookup structure back into its raw data.
+    ///
+    /// \returns raw data values, where i is the index into the %BAM file, and
+    ///          rawData[i] is the key value
+    ///
+    std::vector<T> Unpack(void) const;
+
+    /// \}
+
+private:
+    template<typename Compare>
+    IndexList LookupHelper(const key_type& key,
+                           const Compare& cmp) const;
+
+private:
+    container_type data_;
+};
+
+/// \brief The BasicLookupData class provides quick lookup access to the
+///        "BasicData" section of the PBI index.
+///
+class PBBAM_EXPORT BasicLookupData
+{
+public:
+    /// \brief This enum describes the component fields of the BasicData
+    ///        section.
+    enum Field
+    {
+        RG_ID
+      , Q_START
+      , Q_END
+      , ZMW
+      , READ_QUALITY
+      , CONTEXT_FLAG
+      , VIRTUAL_OFFSET
+    };
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty lookup data object.
+    BasicLookupData(void);
+
+    /// \brief Creates a lookup data object from the corresponding raw data.
+    ///
+    /// \param[in] rawData  raw data loaded from a PBI file
+    ///
+    BasicLookupData(const PbiRawBasicData& rawData);
+
+    /// \}
+
+public:
+    /// \name Lookup Data Methods
+    /// \{
+
+    /// \brief Adds \b virtual file offset data to the index lookup result
+    ///        blocks.
+    ///
+    /// A PBI lookup will result in a number of index lists, depending on the
+    /// complexity of the PbiFilter involved. These index lists are then merged
+    /// down into blocks of contiguous values, where each block describes a
+    /// particular record index and the number of subsequent, contiguous reads
+    /// that immediately follow it. In this manner, we need only perform seeks
+    /// to the first record of each block.
+    ///
+    /// This method takes such blocks and annotates them with the corresponding
+    /// \b virtual file offset. Subsequent %BAM readers can use this information
+    /// to control file seeks.
+    ///
+    /// \param[in,out] blocks
+    ///
+    /// \throws std::out_of_range if a block has an invalid index value
+    ///
+    void ApplyOffsets(IndexResultBlocks& blocks) const;
+
+    /// \brief This method dispatches a single-value lookup query to the proper
+    ///         data member.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \param[in] field            section field to lookup
+    /// \param[in] value            value to lookup
+    /// \param[in] compareType      compare type
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup
+    ///
+    template<typename T>
+    IndexList Indices(const BasicLookupData::Field& field,
+                      const T& value,
+                      const Compare::Type& compareType = Compare::EQUAL) const;
+
+    /// \brief This method dispatches a multi-value lookup query to the proper
+    ///        data member.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Results will correspond to an exact match on at
+    ///       least one value in the list.
+    ///
+    /// \param[in] field        section field to lookup
+    /// \param[in] values       values to lookup
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup
+    ///
+    template<typename T>
+    IndexList IndicesMulti(const BasicLookupData::Field& field,
+                           const std::vector<T>& values) const;
+
+    /// \returns the \b virtual file offsets for all records
+    ///
+    const std::vector<int64_t>& VirtualFileOffsets(void) const;
+
+    /// \}
+
+public:
+    /// \brief Lookup Data Members
+    /// \{
+
+    // map ordering doesn't make sense, optimize for direct lookup
+    UnorderedLookup<int32_t> rgId_;
+
+    // numeric comparisons make sense, keep key ordering preserved
+    OrderedLookup<int32_t>  qStart_;
+    OrderedLookup<int32_t>  qEnd_;
+    OrderedLookup<int32_t>  holeNumber_;
+    OrderedLookup<float>    readQual_;
+
+    // see if this works, or if can use unordered, 'direct' query
+    OrderedLookup<uint8_t> ctxtFlag_;
+
+    // offsets
+    std::vector<int64_t> fileOffset_;
+
+    /// \}
+};
+
+/// \brief The MappedLookupData class provides quick lookup access to the
+///        "MappedData" section of the PBI index.
+///
+class PBBAM_EXPORT MappedLookupData
+{
+public:
+    /// \brief This enum describes the component fields of the MappedData
+    ///        section.
+    enum Field
+    {
+        T_ID
+      , T_START
+      , T_END
+      , A_START
+      , A_END
+      , N_M
+      , N_MM
+      , N_INS
+      , N_DEL
+      , MAP_QUALITY
+      , STRAND
+    };
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty lookup data object.
+    MappedLookupData(void);
+
+    /// \brief Creates a lookup data object from the corresponding raw data.
+    ///
+    /// \param[in] rawData  raw data loaded from a PBI file
+    ///
+    MappedLookupData(const PbiRawMappedData& rawData);
+
+    /// \}
+
+public:
+    /// \name Lookup Data Methods
+    /// \{
+
+    /// \brief This method dispatches a single-value lookup query to the proper
+    ///         data member.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \param[in] field            section field to lookup
+    /// \param[in] value            value to lookup
+    /// \param[in] compareType      compare type
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup
+    ///
+    template<typename T>
+    IndexList Indices(const MappedLookupData::Field& field,
+                      const T& value,
+                      const Compare::Type& compareType = Compare::EQUAL) const;
+
+    /// \brief This method dispatches a multi-value lookup query to the proper
+    ///        data member.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Results will correspond to an exact match on at
+    ///       least one value in the list.
+    ///
+    /// \param[in] field        section field to lookup
+    /// \param[in] values       values to lookup
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup
+    ///
+    template<typename T>
+    IndexList IndicesMulti(const MappedLookupData::Field& field,
+                           const std::vector<T>& values) const;
+
+    /// \}
+
+public:
+    /// \name Lookup Data Members
+    /// \{
+
+    // numeric comparisons make sense, keep key ordering preserved
+    OrderedLookup<int32_t>  tId_;
+    OrderedLookup<uint32_t> tStart_;
+    OrderedLookup<uint32_t> tEnd_;
+    OrderedLookup<uint32_t> aStart_;
+    OrderedLookup<uint32_t> aEnd_;
+    OrderedLookup<uint32_t> nM_;
+    OrderedLookup<uint32_t> nMM_;
+    OrderedLookup<uint8_t>  mapQV_;
+
+    // generated values, not stored directly in PBI file
+    OrderedLookup<uint32_t> nIns_;
+    OrderedLookup<uint32_t> nDel_;
+
+    // no need for map overhead, just store direct indices
+    IndexList reverseStrand_;
+    IndexList forwardStrand_;
+
+    /// \}
+};
+
+/// \brief The ReferenceLookupData class provides quick lookup access to the
+///        "CoordinateSortedData" section of the PBI index.
+///
+class PBBAM_EXPORT ReferenceLookupData
+{
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty lookup data object.
+    ///
+    ReferenceLookupData(void);
+
+    /// \brief Creates a lookup data object from the corresponding raw data.
+    ///
+    /// \param[in] rawData  raw data loaded from a PBI file
+    ///
+    ReferenceLookupData(const PbiRawReferenceData& rawData);
+
+    /// \}
+
+public:
+    /// \name Lookup Data Methods
+    /// \{
+
+    /// \brief Retrieves the index range for all records that map to a
+    ///        particular reference.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \param[in] tId      reference ID to lookup
+    ///
+    /// \returns resulting index range [begin, end). If \p tId is unknown,
+    ///          will return IndexRange(-1,-1) .
+    ///
+    IndexRange Indices(const int32_t tId) const;
+
+    /// \}
+
+public:
+    /// \name Lookup Data Members
+    /// \{
+
+    // references_[tId] = [begin, end) indices
+    std::unordered_map<int32_t, IndexRange> references_;
+
+    /// \}
+};
+
+/// \brief The BarcodeLookupData class provides quick lookup access to the
+///        "BarcodeData" section of the PBI index.
+///
+class PBBAM_EXPORT BarcodeLookupData
+{
+public:
+    /// \brief This enum describes the component fields of the BarcodeData
+    ///        section.
+    enum Field
+    {
+        BC_FORWARD
+      , BC_REVERSE
+      , BC_QUALITY
+    };
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty lookup data object.
+    ///
+    BarcodeLookupData(void);
+
+    /// \brief Creates a lookup data object from the corresponding raw data.
+    ///
+    /// \param[in] rawData  raw data loaded from a PBI file
+    ///
+    BarcodeLookupData(const PbiRawBarcodeData& rawData);
+
+    /// \}
+
+public:
+    /// \name Lookup Data Methods
+    /// \{
+
+    /// \brief This method dispatches a single-value lookup query to the proper
+    ///         data member.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \param[in] field            section field to lookup
+    /// \param[in] value            value to lookup
+    /// \param[in] compareType      compare type
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup
+    ///
+    template<typename T>
+    IndexList Indices(const BarcodeLookupData::Field& field,
+                      const T& value,
+                      const Compare::Type& compareType = Compare::EQUAL) const;
+
+    /// \brief This method dispatches a multi-value lookup query to the proper
+    ///        data member.
+    ///
+    /// Client code, such as custom filters, should use this when possible, only
+    /// touching the raw fields for more complex operations (e.g. when unpacking
+    /// is necessary).
+    ///
+    /// \note There is no compare type parameter here, it is always
+    ///       Compare::EQUAL. Results will correspond to an exact match on at
+    ///       least one value in the list.
+    ///
+    /// \param[in] field        section field to lookup
+    /// \param[in] values       values to lookup
+    ///
+    /// \returns sorted list of unique indices that satisfy the lookup
+    ///
+    template<typename T>
+    IndexList IndicesMulti(const BarcodeLookupData::Field& field,
+                           const std::vector<T>& values) const;
+
+    /// \}
+
+public:
+    /// \name Lookup Data Members
+    /// \{
+
+    // numeric comparisons make sense, keep key ordering preserved
+    OrderedLookup<int16_t> bcForward_;
+    OrderedLookup<int16_t> bcReverse_;
+    OrderedLookup<int8_t>  bcQual_;
+
+    /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/PbiLookupData.inl"
+
+#endif // PBILOOKUPDATA_H
diff --git a/include/pbbam/PbiRawData.h b/include/pbbam/PbiRawData.h
index 37c7706..41ebe00 100644
--- a/include/pbbam/PbiRawData.h
+++ b/include/pbbam/PbiRawData.h
@@ -33,6 +33,10 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiRawData.h
+/// \brief Defines the classes used for working with raw PBI data.
+//
 // Author: Derek Barnett
 
 #ifndef PBIRAWDATA_H
@@ -48,46 +52,130 @@ namespace BAM {
 
 class BamRecord;
 
+/// \brief The PbiRawBarcodeData class represents the raw data stored in the
+///        "BarcodeData" section of the PBI index.
+///
 class PBBAM_EXPORT PbiRawBarcodeData
 {
 public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty data structure.
     PbiRawBarcodeData(void);
+
+    /// \brief Creates an empty data structure, preallocating space for a known
+    ///        number of records.
     PbiRawBarcodeData(uint32_t numReads);
+
     PbiRawBarcodeData(const PbiRawBarcodeData& other);
     PbiRawBarcodeData(PbiRawBarcodeData&& other);
     PbiRawBarcodeData& operator=(const PbiRawBarcodeData& other);
     PbiRawBarcodeData& operator=(PbiRawBarcodeData&& other);
 
+    /// \}
+
 public:
-    /// Maybe add barcode data for \p b, if available.
-    /// \returns true if record had barcode data
+    /// \name Index Construction
+    /// \{
+
+    /// \brief Adds a record's barcode data.
     ///
-    bool AddRecord(const BamRecord& b);
+    /// \param[in] b    %BAM record
+    ///
+    void AddRecord(const BamRecord& b);
+
+    /// \}
 
 public:
-    std::vector<uint16_t> bcLeft_;
-    std::vector<uint16_t> bcRight_;
-    std::vector<uint8_t>  bcQual_;
-    std::vector<uint8_t>  ctxtFlag_;
+    /// \name Raw Data Containers
+    /// \{
+
+    std::vector<int16_t> bcForward_;
+    std::vector<int16_t> bcReverse_;
+    std::vector<int8_t>  bcQual_;
+
+    /// \}
 };
 
+/// \brief The PbiRawMappedData class represents the raw data stored in the
+///        "MappedData" section of the PBI index.
+///
 class PBBAM_EXPORT PbiRawMappedData
 {
 public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty data structure.
     PbiRawMappedData(void);
+
+    /// \brief Creates an empty data structure, preallocating space for a known
+    ///        number of records.
     PbiRawMappedData(uint32_t numReads);
+
     PbiRawMappedData(const PbiRawMappedData& other);
     PbiRawMappedData(PbiRawMappedData&& other);
     PbiRawMappedData& operator=(const PbiRawMappedData& other);
     PbiRawMappedData& operator=(PbiRawMappedData&& other);
 
+    /// \}
+
+public:
+    /// \name Index Construction
+    /// \{
+
+    /// \brief Adds a record's mapping data.
+    ///
+    /// \param[in] b    %BAM record
+    ///
+    void AddRecord(const BamRecord& b);
+
+    /// \}
+
 public:
-    /// Maybe add mapping data for \p b, if available.
-    /// \returns true if record had mapping data
+    /// \name Index Data Query
+    /// \{
+
+    /// \brief Calculates the number of deleted bases for a particular record.
+    ///
+    /// Convenvience method. Equivalent to:
+    /// \code{.cpp}
+    /// NumDeletedAndInsertedBasesAt(i).first;
+    /// \endcode
+    ///
+    /// \param[in] recordIndex  i-th record
+    /// \returns number of deleted bases
+    ///
+    uint32_t NumDeletedBasesAt(size_t recordIndex) const;
+
+    /// \brief Calculates the number of inserted bases for a particular record.
+    ///
+    /// Convenvience method. Equivalent to:
+    /// \code{.cpp}
+    /// NumDeletedAndInsertedBasesAt(i).second;
+    /// \endcode
     ///
-    bool AddRecord(const BamRecord& b);
+    /// \param[in] recordIndex  i-th record
+    /// \returns number of inserted bases
+    ///
+    uint32_t NumInsertedBasesAt(size_t recordIndex) const;
+
+    /// \brief Calculates the number of deleted & inserted bases for a
+    ///        particular record.
+    ///
+    /// \param[in] recordIndex  i-th record in the data set
+    /// \returns a pair consisting of (numDeletions,numInsertions)
+    ///
+    std::pair<uint32_t, uint32_t>
+    NumDeletedAndInsertedBasesAt(size_t recordIndex) const;
+
+    /// \}
 
 public:
+    /// \name Raw Data Containers
+    /// \{
+
     std::vector<int32_t>  tId_;
     std::vector<uint32_t> tStart_;
     std::vector<uint32_t> tEnd_;
@@ -97,8 +185,18 @@ public:
     std::vector<uint32_t> nM_;
     std::vector<uint32_t> nMM_;
     std::vector<uint8_t>  mapQV_;
+
+    /// \}
 };
 
+/// \brief The PbiReferenceEntryClass represents a single reference in the PBI
+///        CoordinateSorted section.
+///
+/// A reference entry consists of an associated reference ID (tId), as well as
+/// start and end indices into the %BAM or PBI.
+///
+/// \note Rows are given in the interval [start, end).
+///
 class PBBAM_EXPORT PbiReferenceEntry
 {
 public:
@@ -106,80 +204,168 @@ public:
     typedef uint32_t Row;
 
 public:
+    static const ID  UNMAPPED_ID;
+    static const Row UNSET_ROW;
+
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates a default entry.
+    ///
+    /// - default ID:   PbiReferenceEntry::UNMAPPED_ID \n
+    /// - default rows: PbiReferenceEntry::UNSET_ROW
+    ///
     PbiReferenceEntry(void);
+
+    /// \brief Creates a reference entry, with no rows set.
+    ///
+    /// - default rows: PbiReferenceEntry::UNSET_ROW
+    ///
     PbiReferenceEntry(ID id);
+
+    /// \brief Creates a reference entry, with rows set.
+    ///
+    PbiReferenceEntry(ID id, Row beginRow, Row endRow);
+
     PbiReferenceEntry(const PbiReferenceEntry& other);
     PbiReferenceEntry(PbiReferenceEntry&& other);
     PbiReferenceEntry& operator=(const PbiReferenceEntry& other);
     PbiReferenceEntry& operator=(PbiReferenceEntry&& other);
 
-    bool operator==(const PbiReferenceEntry& other) const
-    {
-        return tId_      == other.tId_ &&
-               beginRow_ == other.beginRow_ &&
-               endRow_   == other.endRow_;
-    }
+    bool operator==(const PbiReferenceEntry& other) const;
 
-public:
-    static const ID  UNMAPPED_ID;
-    static const Row UNSET_ROW;
+    /// \}
 
 public:
+    /// \name Reference Data Members
+    /// \{
+
     ID  tId_;
     Row beginRow_;
     Row endRow_;
+
+    /// \}
 };
 
+/// \brief The PbiRawReferenceData class represents the raw data stored in the
+///        "CoordinateSortedData" section of the PBI index.
+///
 class PBBAM_EXPORT PbiRawReferenceData
 {
 public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty data structure.
     PbiRawReferenceData(void);
+
+    /// \brief Creates an empty data structure, preallocating space for a
+    ///        number of references.
+    ///
+    /// This constructor is recommended as this is the safest way to ensure that
+    /// references without observed mappings are included in the final output.
+    ///
     PbiRawReferenceData(uint32_t numRefs);
+
     PbiRawReferenceData(const PbiRawReferenceData& other);
     PbiRawReferenceData(PbiRawReferenceData&& other);
     PbiRawReferenceData& operator=(const PbiRawReferenceData& other);
     PbiRawReferenceData& operator=(PbiRawReferenceData&& other);
 
+    /// \}
+
 public:
+    /// \name Raw Data Containers
+    /// \{
+
     std::vector<PbiReferenceEntry> entries_;
+
+    /// \}
 };
 
-class PBBAM_EXPORT PbiRawSubreadData
+/// \brief The PbiRawBasicData class represents the raw data stored in the
+///        "BasicData" section of the PBI index.
+///
+class PBBAM_EXPORT PbiRawBasicData
 {
 public:
-    PbiRawSubreadData(void);
-    PbiRawSubreadData(uint32_t numReads);
-    PbiRawSubreadData(const PbiRawSubreadData& other);
-    PbiRawSubreadData(PbiRawSubreadData&& other);
-    PbiRawSubreadData& operator=(const PbiRawSubreadData& other);
-    PbiRawSubreadData& operator=(PbiRawSubreadData&& other);
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates an empty data structure.
+    PbiRawBasicData(void);
+
+    /// \brief Creates an empty data structure, preallocating space for a known
+    ///        number of records.
+    PbiRawBasicData(uint32_t numReads);
+
+    PbiRawBasicData(const PbiRawBasicData& other);
+    PbiRawBasicData(PbiRawBasicData&& other);
+    PbiRawBasicData& operator=(const PbiRawBasicData& other);
+    PbiRawBasicData& operator=(PbiRawBasicData&& other);
+
+    /// \}
 
 public:
+    /// \name Index Construction
+    /// \{
+
+    /// \brief Adds a record's mapping data.
+    ///
+    /// \param[in] b        %BAM record
+    /// \param[in] offset   \b virtual file offset where record begins
+    ///
     void AddRecord(const BamRecord& b, int64_t offset);
 
+    /// \}
+
 public:
+    /// \name Raw Data Containers
+    /// \{
+
     std::vector<int32_t>  rgId_;
     std::vector<int32_t>  qStart_;
     std::vector<int32_t>  qEnd_;
     std::vector<int32_t>  holeNumber_;
-    std::vector<uint16_t> readQual_;
+    std::vector<float>    readQual_;
+    std::vector<uint8_t>  ctxtFlag_;
     std::vector<int64_t>  fileOffset_;
+
+    /// \}
 };
 
+/// \deprecated For legacy-code support only, and will be removed soon.
+///             Use PbiRawBasicData instead.
+///
+typedef PbiRawBasicData PbiRawSubreadData;
+
+/// \brief The PbiRawData class provides an representation of raw PBI index
+///        data, used mostly for construction or I/O.
+///
+/// The PbiRawData class itself provides access to a few high-level attributes
+/// (e.g. version, number of records, etc.). The actual index data is stored
+/// in its member components:
+///     PbiRawBasicData,
+///     PbiRawMappedData,
+///     PbiRawReferenceData, &
+///     PbiRawBarcodeData .
+///
 class PBBAM_EXPORT PbiRawData
 {
 public:
     /// \name Constructors & Related Methods
     /// \{
 
-    /// Default ctor. Used in index building
+    /// \brief Creates an empty raw data structure, ready for building.
+    ///
     PbiRawData(void);
 
-    /// Load raw data from \p pbiFilename.
+    /// \brief Loads raw PBI data from a file.
     ///
-    /// \param[in] pbiFilename PBI filename
+    /// \param[in] pbiFilename      ".pbi" filename
     ///
-    /// \throws if file contents cannot be loaded properly
+    /// \throws std::runtime_error if file contents cannot be loaded properly
     ///
     PbiRawData(const std::string& pbiFilename);
 
@@ -192,116 +378,136 @@ public:
     /// \}
 
 public:
-    /// \name Attributes
+    /// \name PBI General Attributes
     /// \{
 
+    /// \returns true if index has BarcodeData section
     bool HasBarcodeData(void) const;
+
+    /// \returns true if index has MappedData section
     bool HasMappedData(void) const;
+
+    /// \returns true if index has ReferenceData section
     bool HasReferenceData(void) const;
+
+    /// \returns true if index has \b section
+    /// \param[in] section PbiFile::Section identifier
+    ///
     bool HasSection(const PbiFile::Section section) const;
 
+    /// \returns index filename ("*.pbi")
+    ///
+    /// \note Returns an empty string if the underlying data was generated, not
+    ///       loaded from file.
+    ///
+    std::string Filename(void) const;
+
+    /// \returns enum flags representing the file sections present
     PbiFile::Sections FileSections(void) const;
+
+    /// \returns the number of records in the PBI (& associated %BAM)
     uint32_t NumReads(void) const;
+
+    /// \returns the PBI file's version
     PbiFile::VersionEnum Version(void) const;
 
     /// \}
 
 public:
-    /// \name Indexed Sections
+    /// \name Raw Data Components
     /// \{
 
-    const PbiRawBarcodeData&   BarcodeData(void) const;
-    const PbiRawMappedData&    MappedData(void) const;
+    /// \returns const reference to BarcodeData lookup structure
+    ///
+    /// May be empty, check result of HasBarcodeData.
+    ///
+    const PbiRawBarcodeData& BarcodeData(void) const;
+
+    /// \returns const reference to BasicData lookup structure
+    const PbiRawBasicData& BasicData(void) const;
+
+    /// \returns const reference to MappedData lookup structure
+    ///
+    /// May be empty, check result of HasMappedData.
+    ///
+    const PbiRawMappedData& MappedData(void) const;
+
+    /// \returns const reference to reference data lookup structure
+    ///
+    /// May be empty, check result of HasReferenceData.
+    ///
     const PbiRawReferenceData& ReferenceData(void) const;
-    const PbiRawSubreadData&   SubreadData(void) const;
 
     /// \}
 
 public:
-    /// \name Attributes
+    /// \name PBI General Attributes
     /// \{
 
+    /// \brief Sets the file section flags.
+    ///
+    /// \param[in] sections     section flags
+    /// \returns reference to this index
+    ///
     PbiRawData& FileSections(PbiFile::Sections sections);
+
+    /// \brief Sets the number of indexed records.
+    ///
+    /// \param[in] num  number of records
+    /// \returns reference to this index
+    ///
     PbiRawData& NumReads(uint32_t num);
+
+    /// \brief Sets PBI file version.
+    ///
+    /// \param[in] version  file version
+    /// \returns reference to this index
+    ///
     PbiRawData& Version(PbiFile::VersionEnum version);
 
     /// \}
 
 public:
-    /// \name Indexed Sections
+    /// \name Raw Data Components
+    /// \{
+
+    /// \returns reference to BarcodeData lookup structure
+    ///
+    /// May be empty, check result of HasBarcodeData.
+    ///
+    PbiRawBarcodeData& BarcodeData(void);
 
-    PbiRawBarcodeData&   BarcodeData(void);
-    PbiRawMappedData&    MappedData(void);
+    /// \returns reference to BasicData lookup structure
+    PbiRawBasicData& BasicData(void);
+
+    /// \returns reference to MappedData lookup structure
+    ///
+    /// May be empty, check result of HasMappedData.
+    ///
+    PbiRawMappedData& MappedData(void);
+
+    /// \returns reference to reference data lookup structure
+    ///
+    /// May be empty, check result of HasReferenceData.
+    ///
     PbiRawReferenceData& ReferenceData(void);
-    PbiRawSubreadData&   SubreadData(void);
 
     /// \}
 
 private:
+    std::string          filename_;
     PbiFile::VersionEnum version_;
     PbiFile::Sections    sections_;
     uint32_t             numReads_;
     PbiRawBarcodeData    barcodeData_;
     PbiRawMappedData     mappedData_;
     PbiRawReferenceData  referenceData_;
-    PbiRawSubreadData    subreadData_;
+    PbiRawBasicData      basicData_;
 };
 
-inline const PbiRawBarcodeData& PbiRawData::BarcodeData(void) const
-{ return barcodeData_; }
-
-inline PbiRawBarcodeData& PbiRawData::BarcodeData(void)
-{ return barcodeData_; }
-
-inline PbiFile::Sections PbiRawData::FileSections(void) const
-{ return sections_; }
-
-inline PbiRawData& PbiRawData::FileSections(PbiFile::Sections sections)
-{ sections_ = sections; return *this; }
-
-inline bool PbiRawData::HasBarcodeData(void) const
-{ return HasSection(PbiFile::BARCODE); }
-
-inline bool PbiRawData::HasMappedData(void) const
-{ return HasSection(PbiFile::MAPPED); }
-
-inline bool PbiRawData::HasReferenceData(void) const
-{ return HasSection(PbiFile::REFERENCE); }
-
-inline bool PbiRawData::HasSection(const PbiFile::Section section) const
-{ return (sections_ & section) != 0; }
-
-inline uint32_t PbiRawData::NumReads(void) const
-{ return numReads_; }
-
-inline PbiRawData& PbiRawData::NumReads(uint32_t num)
-{ numReads_ = num; return *this; }
-
-inline const PbiRawMappedData& PbiRawData::MappedData(void) const
-{ return mappedData_; }
-
-inline PbiRawMappedData& PbiRawData::MappedData(void)
-{ return mappedData_; }
-
-inline const PbiRawReferenceData& PbiRawData::ReferenceData(void) const
-{ return referenceData_; }
-
-inline PbiRawReferenceData& PbiRawData::ReferenceData(void)
-{ return referenceData_; }
-
-inline const PbiRawSubreadData& PbiRawData::SubreadData(void) const
-{ return subreadData_; }
-
-inline PbiRawSubreadData& PbiRawData::SubreadData(void)
-{ return subreadData_; }
-
-inline PbiFile::VersionEnum PbiRawData::Version(void) const
-{ return version_; }
-
-inline PbiRawData& PbiRawData::Version(PbiFile::VersionEnum version)
-{ version_ = version; return *this; }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/PbiRawData.inl"
+
 #endif // PBIRAWDATA_H
diff --git a/include/pbbam/Position.h b/include/pbbam/Position.h
index 110d7ed..aece8c2 100644
--- a/include/pbbam/Position.h
+++ b/include/pbbam/Position.h
@@ -33,6 +33,10 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file Position.h
+/// \brief Defines the Position typedef.
+//
 // Author: Derek Barnett
 
 #ifndef POSITION_H
@@ -43,14 +47,17 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief This type is used to refer to genomic positions.
 /// \typedef typedef int32_t PacBio::BAM::Position
 ///
-/// This type refers to all genomic positions. We use signed
-/// because SAM/BAM uses the -1 value to indicate unknown, unmapped, etc.
-/// positions.
+/// We use a signed integer because SAM/BAM uses the -1 value to indicate
+/// unknown or unmapped positions.
 ///
 typedef int32_t Position;
 
+/// \brief This constant is widely used as a "missing" or "invalid" position
+///        marker.
+///
 static const Position UnmappedPosition = Position(-1);
 
 } // namespace BAM
diff --git a/include/pbbam/ProgramInfo.h b/include/pbbam/ProgramInfo.h
index d1bbcfe..e137707 100644
--- a/include/pbbam/ProgramInfo.h
+++ b/include/pbbam/ProgramInfo.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ProgramInfo.h
+/// \brief Defines the ProgramInfo class.
+//
 // Author: Derek Barnett
 
 #ifndef PROGRAMINFO_H
@@ -45,14 +49,27 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The ProgramInfo class represents a program entry (\@PG) in the SAM
+///        header.
+///
 class PBBAM_EXPORT ProgramInfo
 {
 public:
     /// \name Conversion & Validation
     ///
 
+    /// \brief Creates a ProgramInfo object from SAM-formatted text.
+    ///
+    /// \param[in] sam  SAM-formatted text
+    /// \returns program info object
+    ///
     static ProgramInfo FromSam(const std::string& sam);
 
+    /// \brief Converts a ProgramInfo object to its SAM-formatted text.
+    ///
+    /// \param[in] prog     input ProgramInfo object
+    /// \returns SAM-formatted text (no trailing newline)
+    ///
     static std::string ToSam(const ProgramInfo& prog);
 
     /// \}
@@ -61,8 +78,16 @@ public:
     /// \name Constructors & Related Methods
     /// \{
 
+    /// \brief Creates an empty program info object.
     ProgramInfo(void);
+
+    /// \brief Creates a program info object with an ID.
+    ///
+    /// \param[in] id       program ID (\@PG:ID)
+    ///
     ProgramInfo(const std::string& id);
+
+
     ProgramInfo(const ProgramInfo& other);
     ProgramInfo(ProgramInfo&& other);
     ProgramInfo& operator=(const ProgramInfo& other);
@@ -72,115 +97,126 @@ public:
     /// \}
 
 public:
+    /// \name Conversion & Validation
+    ///
+
+    /// \returns true if program info is valid
+    ///
+    /// Currently this checks to see that ProgramInfo::Id does not contain an
+    /// empty string.
+    ///
+    bool IsValid(void) const;
+
+    /// \brief Converts this object to its SAM-formatted text.
+    ///
+    /// \returns SAM-formatted text (no trailing newline)
+    ///
+    std::string ToSam(void) const;
+
+    /// \}
+
+public:
     /// \name Attributes
     /// \{
 
+    /// \returns string value of \@PG:CL
     std::string CommandLine(void) const;
 
+    /// \returns any non-standard tags added to the \@PG entry
+    ///
+    /// Result map consists of {tagName => value}.
+    ///
     std::map<std::string, std::string> CustomTags(void) const;
 
+    /// \returns string value of \@PG:DS
     std::string Description(void) const;
 
+    /// \returns string value of \@PG:ID
     std::string Id(void) const;
 
+    /// \returns string value of \@PG:PN
     std::string Name(void) const;
 
+    /// \returns string value of \@PG:PP
     std::string PreviousProgramId(void) const;
 
+    /// \returns string value of \@PG:VN
     std::string Version(void) const;
 
     /// \}
 
-    /// \name Conversion & Validation
-    ///
-
-    bool IsValid(void) const;
-
-    std::string ToSam(void) const;
-
-    /// \}
-
 public:
     /// \name Attributes
     /// \{
 
+    /// \brief Sets the value for \@PG:CL
+    ///
+    /// \param[in] cmd      new value
+    /// \returns reference to this object
+    ///
     ProgramInfo& CommandLine(const std::string& cmd);
 
+    /// \brief Sets a new collection of non-standard tags.
+    ///
+    /// Custom tag map entries should consist of {tagName => value}.
+    ///
+    /// \param[in] custom      new tags
+    /// \returns reference to this object
+    ///
     ProgramInfo& CustomTags(const std::map<std::string, std::string>& custom);
 
+    /// \brief Sets the value for \@PG:DS
+    ///
+    /// \param[in] description      new value
+    /// \returns reference to this object
+    ///
     ProgramInfo& Description(const std::string& description);
 
+    /// \brief Sets the value for \@PG:ID
+    ///
+    /// \param[in] id      new value
+    /// \returns reference to this object
+    ///
     ProgramInfo& Id(const std::string& id);
 
+    /// \brief Sets the value for \@PG:PN
+    ///
+    /// \param[in] name      new value
+    /// \returns reference to this object
+    ///
     ProgramInfo& Name(const std::string& name);
 
+    /// \brief Sets the value for \@PG:PP
+    ///
+    /// \param[in] id      new value
+    /// \returns reference to this object
+    ///
     ProgramInfo& PreviousProgramId(const std::string& id);
 
+    /// \brief Sets the value for \@PG:VN
+    ///
+    /// \param[in] version      new value
+    /// \returns reference to this object
+    ///
     ProgramInfo& Version(const std::string& version);
 
     /// \}
 
 private:
-    std::string commandLine_;            // CL:<CommandLine>
-    std::string description_;            // DS:<Description>
-    std::string id_;                     // ID:<ID>              * Unique ID required for valid SAM header*
-    std::string name_;                   // PN:<Name>
-    std::string previousProgramId_;      // PP:<PreviousProgramID>
-    std::string version_;                // VN:<Version>
+    std::string commandLine_;       // CL:<CommandLine>
+    std::string description_;       // DS:<Description>
+    std::string id_;                // ID:<ID>  * must be unique for valid SAM *
+    std::string name_;              // PN:<Name>
+    std::string previousProgramId_; // PP:<PreviousProgramID>
+    std::string version_;           // VN:<Version>
 
     // custom attributes
-    std::map<std::string, std::string> custom_; // tag => value
+    std::map<std::string, std::string> custom_;     // tag => value
 };
 
-inline std::string ProgramInfo::CommandLine(void) const
-{ return commandLine_; }
-
-inline ProgramInfo& ProgramInfo::CommandLine(const std::string& cmd)
-{ commandLine_ = cmd; return *this; }
-
-inline std::map<std::string, std::string> ProgramInfo::CustomTags(void) const
-{ return custom_; }
-
-inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string, std::string>& custom)
-{ custom_ = custom; return *this; }
-
-inline std::string ProgramInfo::Description(void) const
-{ return description_; }
-
-inline ProgramInfo& ProgramInfo::Description(const std::string& description)
-{ description_ = description; return *this; }
-
-inline std::string ProgramInfo::Id(void) const
-{ return id_; }
-
-inline ProgramInfo& ProgramInfo::Id(const std::string& id)
-{ id_ = id; return *this; }
-
-inline bool ProgramInfo::IsValid(void) const
-{ return !id_.empty(); }
-
-inline std::string ProgramInfo::Name(void) const
-{ return name_; }
-
-inline ProgramInfo& ProgramInfo::Name(const std::string& name)
-{ name_ = name; return *this; }
-
-inline std::string ProgramInfo::PreviousProgramId(void) const
-{ return previousProgramId_; }
-
-inline ProgramInfo& ProgramInfo::PreviousProgramId(const std::string& id)
-{ previousProgramId_ = id; return *this; }
-
-inline std::string ProgramInfo::ToSam(const ProgramInfo& prog)
-{ return prog.ToSam(); }
-
-inline std::string ProgramInfo::Version(void) const
-{ return version_; }
-
-inline ProgramInfo& ProgramInfo::Version(const std::string& version)
-{ version_ = version; return *this; }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/ProgramInfo.inl"
+
 #endif // PROGRAMINFO_H
diff --git a/include/pbbam/BamReader.h b/include/pbbam/QNameQuery.h
similarity index 55%
copy from include/pbbam/BamReader.h
copy to include/pbbam/QNameQuery.h
index bd0ced6..ad93d03 100644
--- a/include/pbbam/BamReader.h
+++ b/include/pbbam/QNameQuery.h
@@ -32,83 +32,63 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QNameQuery.h
+/// \brief Defines the QNameQuery class.
+//
 // Author: Derek Barnett
 
-#ifndef BAMREADER_H
-#define BAMREADER_H
+#ifndef QNAMEQUERY_H
+#define QNAMEQUERY_H
 
-#include "pbbam/BamHeader.h"
-#include "pbbam/BamRecord.h"
-#include "pbbam/Config.h"
-#include <string>
+#include "pbbam/internal/QueryBase.h"
+#include <memory>
 
 namespace PacBio {
 namespace BAM {
 
-class PBBAM_EXPORT BamReader
+/// \brief The QNameQuery class provides iterable access to a DataSet's records,
+///        with each iteration of the query returning a contiguous block of
+///        records that share a name.
+///
+/// There is no random-access here. It is simply a sequential read-through,
+/// grouping contiguous results that share a BamRecord::FullName.
+///
+/// \note The name is not ideal - but for legacy reasons, it will remain as-is
+///       for now. It will likely become something more explicit, like
+///       "SequentialQNameGroupQuery", so that the name "QNameQuery" will be
+///       available for a built-in query on a QNAME filter (or whitelist). This
+///       will make it more consistent with other queries (ReadAccuracyQuery,
+///       SubreadLengthQuery, ZmwQuery, etc).
+///
+class PBBAM_EXPORT QNameQuery : public internal::IGroupQuery
 {
-
-public:
-    enum ReadError
-    {
-        NoError = 0
-      , OpenFileError
-      , ReadHeaderError
-      , ReadRecordError
-    };
-
-public:
-    BamReader(void);
-    virtual ~BamReader(void);
-
 public:
 
-    /// Closes the BAM file reader.
-    void Close(void);
-
-    /// Opens a BAM file for reading.
+    /// \brief Creates a new QNameQuery.
     ///
-    /// Prefix \p filename with "http://" or "ftp://" for remote files,
-    /// or set to "-" for stdin.
+    /// \param[in] dataset      input data source(s)
     ///
-    /// \param[in] filename path to input BAM file
+    /// \throws std::runtime_error on failure to open/read underlying %BAM files
     ///
-    /// \returns success/failure
-    bool Open(const std::string& filename);
-
-    /// \returns header as BamHeader object
-    BamHeader::SharedPtr Header(void) const;
-
-    /// \returns error status code
-    BamReader::ReadError Error(void) const;
-
-    /// \returns true if error encountered
-    bool HasError(void) const;
+    QNameQuery(const DataSet& dataset);
+    ~QNameQuery(void);
 
-    /// Fetches the next record in a BAM file.
+public:
+    /// \brief Main iteration point for record access.
     ///
-    /// \param[out] record pointer to BamRecord object
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
     ///
-    /// \returns succcess/failure
-    bool GetNext(PBBAM_SHARED_PTR<BamRecord> record);
-
-public:
-    std::string PacBioBamVersion(void) const;
-
-protected:
-    bool GetNext(PBBAM_SHARED_PTR<bam1_t> rawRecord);
-    void InitialOpen(void);
-    PBBAM_SHARED_PTR<bam_hdr_t> RawHeader(void) const;
+    bool GetNext(std::vector<BamRecord>& records);
 
-protected:
-    PBBAM_SHARED_PTR<samFile>   file_;
-    PBBAM_SHARED_PTR<bam_hdr_t> header_;
-    std::string filename_;
-    BamReader::ReadError error_;
+private:
+    struct QNameQueryPrivate;
+    std::unique_ptr<QNameQueryPrivate> d_;
 };
 
 } // namespace BAM
 } // namespace PacBio
 
-#endif // BAMREADER_H
+#endif // QNAMEQUERY_H
diff --git a/include/pbbam/QualityValue.h b/include/pbbam/QualityValue.h
index 7eecc12..ab108d0 100644
--- a/include/pbbam/QualityValue.h
+++ b/include/pbbam/QualityValue.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValue.h
+/// \brief Defines the QualityValue class.
+//
 // Author: Derek Barnett
 
 #ifndef QUALITYVALUE_H
@@ -47,11 +51,12 @@ namespace BAM {
 
 /// \brief The QualityValue class represents a FASTQ-compatible quality value.
 ///
-/// Integers are clamped to [0, 93] (corresponding to ASCII printable chars [!-~]).
+/// Integers are clamped to [0, 93] (corresponding to ASCII printable chars
+/// [!-~]).
 ///
-/// Use the explicitly-named static method for constructing QualityValue entries from
-/// FASTQ encoding characters. Otherwise, the value will be interpreted as the actual
-/// integer value.
+/// Use QualityValue::FromFastq for constructing entries from FASTQ encoding
+/// characters. Otherwise, the resulting QualityValue will be interpreted using
+/// the character's numeric value (ignoring the FASTQ offset of 33).
 ///
 class PBBAM_EXPORT QualityValue
 {
@@ -59,54 +64,52 @@ public:
     static const uint8_t MAX;
 
 public:
-    /// Creates a QualityValue from a FASTQ encoding character.
+    /// \name Conversion Methods
+    /// \{
+
+    /// \brief Creates a QualityValue from a FASTQ-encoding character.
+    ///
+    /// \param[in] c    FASTQ character
+    /// \returns quality value representing (c - 33)
+    ///
     static QualityValue FromFastq(const char c);
 
+    /// \}
+
 public:
     /// \name Constructors & Related Methods
     ///  \{
 
+    /// \brief Creates a QualityValue with specified value.
+    ///
+    /// \param[in] value    quality value
+    ///
     QualityValue(const uint8_t value = 0);
+
     QualityValue(const QualityValue& other);
     ~QualityValue(void);
 
     /// \}
 
 public:
-    /// \returns the FASTQ encoding char for this QualityValue
+    /// \name Conversion Methods
+    /// \{
+
+    /// \returns the FASTQ-encoding char for this QualityValue
     char Fastq(void) const;
 
     /// \returns the integer value of this QualityValue
     operator uint8_t(void) const;
 
+    /// \}
+
 private:
     uint8_t value_;
 };
 
-inline QualityValue::QualityValue(const uint8_t value)
-    : value_(value)
-{
-    // clamp QV
-    if (value_ > QualityValue::MAX)
-        value_ = QualityValue::MAX;
-}
-
-inline QualityValue::QualityValue(const QualityValue& other)
-    : value_(other.value_)
-{ }
-
-inline QualityValue::~QualityValue(void) { }
-
-inline char QualityValue::Fastq(void) const
-{ return static_cast<char>(value_ + 33); }
-
-inline QualityValue::operator uint8_t(void) const
-{ return value_; }
-
-inline QualityValue QualityValue::FromFastq(const char c)
-{ return QualityValue(static_cast<uint8_t>(c-33)); }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/QualityValue.inl"
+
 #endif // QUALITYVALUE_H
diff --git a/include/pbbam/QualityValues.h b/include/pbbam/QualityValues.h
index 240b96f..af054f6 100644
--- a/include/pbbam/QualityValues.h
+++ b/include/pbbam/QualityValues.h
@@ -32,14 +32,17 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValues.h
+/// \brief Defines the QualityValues class.
+//
 // Author: Derek Barnett
 
 #ifndef QUALITYVALUES_H
 #define QUALITYVALUES_H
 
 #include "pbbam/QualityValue.h"
-#include <algorithm>
 #include <string>
 #include <vector>
 
@@ -47,39 +50,101 @@ namespace PacBio {
 namespace BAM {
 
 /// \brief The QualityValues class represents a sequence of FASTQ-compatible
-/// quality values. See QualityValue documentation for details.
+///        quality values. See QualityValue documentation for more details.
 ///
 class PBBAM_EXPORT QualityValues : public std::vector<QualityValue>
 {
 public:
-    /// Creates a QualityValues collection from a FASTQ-encoded string.
+    /// \brief Creates a QualityValues object from a FASTQ-encoded string.
+    ///
+    /// \param[in] fastq    FASTQ-encoded string
+    /// \returns corresponding QualityValues object
+    ///
     static QualityValues FromFastq(const std::string& fastq);
 
 public:
     /// \name Constructors & Related Methods
     ///  \{
 
+    /// \brief Default constructor - creates an empty QualityValues object.
     QualityValues(void);
+
+    /// \brief Creates a QualityValues object from a FASTQ-encoded string.
+    ///
+    /// \param[in] fastqString  FASTQ-encoded string
+    ///
     explicit QualityValues(const std::string& fastqString);
+
+    /// \brief Creates a QualityValues object from a vector of QualityValue
+    ///        elements.
+    ///
+    /// \param[in] quals    vector of QualityValue elements
+    ///
     explicit QualityValues(const std::vector<QualityValue>& quals);
+
+    /// \brief Creates a QualityValues object from a vector of QualityValue
+    ///        elements.
+    ///
+    /// \param[in] quals    vector of QualityValue elements
+    ///
+    QualityValues(std::vector<QualityValue>&& quals);
+
+    /// \brief Creates a QualityValues object from a vector of (numeric) quality
+    ///        values.
+    ///
+    /// \param[in] quals    vector of quality value numbers
+    ///
     explicit QualityValues(const std::vector<uint8_t>& quals);
 
+    /// \brief Creates a QualityValues object from the contents of the range:
+    ///        [first, last)
+    ///
+    /// \param[in] first    input iterator, whose element is a numeric quality
+    /// \param[in] last     input iterator, whose element is a numeric quality
+    ///
     QualityValues(const std::vector<uint8_t>::const_iterator first,
                   const std::vector<uint8_t>::const_iterator last);
+
+    /// \brief Creates a QualityValues object from the contents of the range:
+    ///        [first, last)
+    ///
+    /// \param[in] first    input iterator, whose element is a QualityValue
+    /// \param[in] last     input iterator, whose element is a QualityValue
+    ///
     QualityValues(const QualityValues::const_iterator first,
                   const QualityValues::const_iterator last);
 
+    /// \brief Copy constructor
     QualityValues(const QualityValues& other);
-    QualityValues(QualityValues&& other);
 
-    QualityValues(std::vector<QualityValue>&& quals);
+    /// \brief Move constructor
+    QualityValues(QualityValues&& other);
 
+    /// \brief Copy assignment operator
+    ///
+    /// \param[in] other    QualityValues object
+    ///
     QualityValues& operator=(const QualityValues& other);
+
+    /// \brief Move assignment operator
+    ///
+    /// \param[in] other    QualityValues object
+    ///
     QualityValues& operator=(QualityValues&& other);
 
+    /// \brief Copy assignment operator
+    ///
+    /// \param[in] quals    vector of QualityValue elements
+    ///
     QualityValues& operator=(const std::vector<QualityValue>& quals);
+
+    /// \brief Move assignment operator
+    ///
+    /// \param[in] quals    vector of QualityValue elements
+    ///
     QualityValues& operator=(std::vector<QualityValue>&& quals);
 
+    /// \brief Destructor
     ~QualityValues(void);
 
     /// \}
@@ -97,137 +162,39 @@ public:
     /// \name Iterators
     /// \{
 
-    /// \returns A const_iterator to the beginning of the sequence.
+    /// \returns a const_iterator to the beginning of the sequence
     std::vector<QualityValue>::const_iterator cbegin(void) const;
 
-    /// \returns A const_iterator to the element past the end of the sequence.
+    /// \returns a const_iterator to the element following the last element
     std::vector<QualityValue>::const_iterator cend(void) const;
 
-    /// \returns A const_iterator to the beginning of the sequence.
+    /// \returns a const_iterator to the beginning of the sequence
     std::vector<QualityValue>::const_iterator begin(void) const;
 
-    /// \returns A const_iterator to the element past the end of the sequence.
+    /// \returns a const_iterator to the element following the last element
     std::vector<QualityValue>::const_iterator end(void) const;
 
-    /// \returns An iterator to the beginning of the sequence.
+    /// \returns an iterator to the beginning of the sequence
     std::vector<QualityValue>::iterator begin(void);
 
-    /// \returns An iterator to the element past the end of the sequence.
+    /// \returns an iterator to the element following the last element
     std::vector<QualityValue>::iterator end(void);
 
     /// \}
 
 public:
-    /// \returns the FASTQ-encoded string for this collection
-    std::string Fastq(void) const;
-};
-
-inline QualityValues::QualityValues(void)
-    : std::vector<QualityValue>()
-{ }
-
-inline QualityValues::QualityValues(const std::string& fastqString)
-    : std::vector<QualityValue>()
-{
-    resize(fastqString.size());
-    std::transform(fastqString.cbegin(), fastqString.cend(),
-                   begin(), QualityValue::FromFastq);
-}
-
-inline QualityValues::QualityValues(const std::vector<QualityValue>& quals)
-    : std::vector<QualityValue>(quals)
-{ }
-
-inline QualityValues::QualityValues(const std::vector<uint8_t>& quals)
-    : std::vector<QualityValue>()
-{
-    resize(quals.size());
-    std::copy(quals.cbegin(), quals.cend(), begin());
-}
-
-inline QualityValues::QualityValues(const std::vector<uint8_t>::const_iterator first,
-                                    const std::vector<uint8_t>::const_iterator last)
-    : std::vector<QualityValue>(first, last)
-{ }
-
-inline QualityValues::QualityValues(const QualityValues::const_iterator first,
-                                    const QualityValues::const_iterator last)
-    : std::vector<QualityValue>()
-{
-    assign(first, last);
-}
-
-inline QualityValues::QualityValues(const QualityValues& other)
-    : std::vector<QualityValue>(other)
-{ }
-
-inline QualityValues::QualityValues(std::vector<QualityValue>&& quals)
-    : std::vector<QualityValue>(std::move(quals))
-{ }
-
-inline QualityValues::QualityValues(QualityValues&& other)
-    : std::vector<QualityValue>(std::move(other))
-{ }
-
-inline QualityValues& QualityValues::operator=(const QualityValues& other)
-{ std::vector<QualityValue>::operator=(other); return *this; }
-
-inline QualityValues& QualityValues::operator=(const std::vector<QualityValue>& quals)
-{ std::vector<QualityValue>::operator=(quals); return *this; }
-
-inline QualityValues& QualityValues::operator=(QualityValues&& other)
-{ std::vector<QualityValue>::operator=(std::move(other)); return *this; }
-
-inline QualityValues& QualityValues::operator=(std::vector<QualityValue>&& quals)
-{ std::vector<QualityValue>::operator=(std::move(quals)); return *this; }
-
-inline QualityValues::~QualityValues(void) { }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::cbegin(void) const
-{ return std::vector<QualityValue>::cbegin(); }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::cend(void) const
-{ return std::vector<QualityValue>::cend(); }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::begin(void) const
-{ return std::vector<QualityValue>::begin(); }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::end(void) const
-{ return std::vector<QualityValue>::end(); }
-
-inline std::vector<QualityValue>::iterator QualityValues::begin(void)
-{ return std::vector<QualityValue>::begin(); }
-
-inline std::vector<QualityValue>::iterator QualityValues::end(void)
-{ return std::vector<QualityValue>::end(); }
-
-inline QualityValues QualityValues::FromFastq(const std::string& fastq)
-{
-    return QualityValues(fastq);
-//    QualityValues result;
-//    result.resize(fastq.size());
-//    std::transform(fastq.cbegin(), fastq.cend(), result.begin(), QualityValue::FromFastq);
-//    return result;
-}
-
-inline std::string QualityValues::Fastq(void) const
-{
-    std::string result;
-    result.reserve(size());
-    auto iter = cbegin();
-    const auto end = cend();
-    for (; iter != end; ++iter)
-        result.push_back((*iter).Fastq());
-    return result;
-}
+    /// \name Conversion Methods
+    /// \{
 
-inline bool QualityValues::operator==(const std::string& fastq) const
-{ return *this == QualityValues(fastq); }
+    /// \returns the FASTQ-encoded string for this sequence of quality values
+    std::string Fastq(void) const;
 
-inline bool QualityValues::operator!=(const std::string& fastq) const
-{ return *this != QualityValues(fastq); }
+    /// \}
+};
 
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/QualityValues.inl"
+
 #endif // QUALITYVALUES_H
diff --git a/include/pbbam/QueryBase.h b/include/pbbam/QueryBase.h
deleted file mode 100644
index 6106a4a..0000000
--- a/include/pbbam/QueryBase.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef QUERYBASE_H
-#define QUERYBASE_H
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/BamFile.h"
-
-namespace PacBio {
-namespace BAM {
-
-class QueryBase;
-
-class QueryIterator
-{
-public:
-    BamRecord& operator*(void);
-    BamRecord* operator->(void);
-    QueryIterator& operator++(void);
-    QueryIterator operator++(int);
-    bool operator==(const QueryIterator& other) const;
-    bool operator!=(const QueryIterator& other) const;
-
-    QueryIterator(void);
-    QueryIterator(QueryBase& parent);
-
-private:
-    QueryBase* query_;
-    BamRecord record_;
-    friend class QueryBase;
-};
-
-class QueryConstIterator
-{
-public:
-    const BamRecord& operator*(void) const;
-    const BamRecord* operator->(void) const;
-    QueryConstIterator& operator++(void);
-    QueryConstIterator operator++(int);
-    bool operator==(const QueryConstIterator& other) const;
-    bool operator!=(const QueryConstIterator& other) const;
-
-    QueryConstIterator(void);
-    QueryConstIterator(const QueryBase& parent);
-
-private:
-    QueryBase* query_;
-    BamRecord record_;
-    friend class QueryBase;
-};
-
-/// This class provides the base functionality and iterators for querying BAM files.
-class PBBAM_EXPORT QueryBase {
-
-public:
-    typedef QueryIterator      iterator;
-    typedef QueryConstIterator const_iterator;
-
-public:
-    virtual ~QueryBase(void);
-
-public:
-
-    /// \name Iterators
-    /// \{
-
-    /// \returns an iterator to the beginning of the query results.
-    QueryBase::iterator begin(void);
-
-    /// \returns a const_iterator to the beginning of the query results.
-    QueryBase::const_iterator begin(void) const;
-
-    /// \returns a const_iterator to the beginning of the query results.
-    QueryBase::const_iterator cbegin(void) const;
-
-    /// \returns an iterator marking the end of query results.
-    QueryBase::iterator end(void);
-
-    /// \returns a const_iterator marking the end of query results.
-    QueryBase::const_iterator end(void) const;
-
-    /// \returns a const_iterator marking the end of query results.
-    QueryBase::const_iterator cend(void) const;
-
-    /// \}
-
-protected:
-    QueryBase(const BamFile& file);
-
-    /// Primary method for iterating through a query. Derived classes will implement this
-    /// method to return
-    virtual bool GetNext(BamRecord& x) =0;
-
-protected:
-    const BamFile& file_;
-
-    friend class QueryIterator;
-    friend class QueryConstIterator;
-};
-
-inline QueryBase::iterator QueryBase::begin(void)
-{ return QueryBase::iterator(*this); }
-
-inline QueryBase::const_iterator QueryBase::begin(void) const
-{ return QueryBase::const_iterator(*this); }
-
-inline QueryBase::const_iterator QueryBase::cbegin(void) const
-{ return QueryBase::const_iterator(*this); }
-
-inline QueryBase::iterator QueryBase::end(void)
-{ return QueryBase::iterator(); }
-
-inline QueryBase::const_iterator QueryBase::end(void) const
-{ return QueryBase::const_iterator(); }
-
-inline QueryBase::const_iterator QueryBase::cend(void) const
-{ return QueryBase::const_iterator(); }
-
-// ---------------
-// QueryIterator
-// ---------------
-
-inline QueryIterator::QueryIterator(void)
-    : query_(0)
-{ }
-
-inline QueryIterator::QueryIterator(QueryBase& parent)
-    : query_(&parent)
-    , record_(parent.file_.Header())
-{
-    if (!(query_->GetNext(record_)))
-        query_ = 0;
-}
-
-inline BamRecord& QueryIterator::operator*(void)
-{ return record_; }
-
-inline BamRecord* QueryIterator::operator->(void)
-{ return &(operator*()); }
-
-inline QueryIterator& QueryIterator::operator++(void)
-{
-    if (!(query_->GetNext(record_)))
-        query_ = 0;
-    return *this;
-}
-
-inline QueryIterator QueryIterator::operator++(int)
-{
-    QueryIterator result(*this);
-    ++(*this);
-    return result;
-}
-
-inline bool QueryIterator::operator==(const QueryIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool QueryIterator::operator!=(const QueryIterator& other) const
-{ return !(*this == other); }
-
-// --------------------
-// QueryConstIterator
-// --------------------
-
-inline const BamRecord& QueryConstIterator::operator*(void) const
-{ return record_; }
-
-inline const BamRecord* QueryConstIterator::operator->(void) const
-{ return &(operator*()); }
-
-inline QueryConstIterator& QueryConstIterator::operator++(void)
-{
-    if (!(query_->GetNext(record_)))
-        query_ = 0;
-    return *this;
-}
-
-inline QueryConstIterator QueryConstIterator::operator++(int)
-{
-    QueryConstIterator result(*this);
-    ++(*this);
-    return result;
-}
-
-inline bool QueryConstIterator::operator==(const QueryConstIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool QueryConstIterator::operator!=(const QueryConstIterator& other) const
-{ return !(*this == other); }
-
-inline QueryConstIterator::QueryConstIterator(void)
-    : query_(0)
-{ }
-
-inline QueryConstIterator::QueryConstIterator(const QueryBase& parent)
-    : record_(parent.file_.Header())
-{
-    query_ = const_cast<QueryBase*>(&parent);
-    if (!(query_->GetNext(record_)))
-        query_ = 0;
-}
-
-} // namespace BAM
-} // namspace PacBio
-
-#endif // QUERYBASE_H
diff --git a/include/pbbam/ZmwQuery.h b/include/pbbam/ReadAccuracyQuery.h
similarity index 54%
copy from include/pbbam/ZmwQuery.h
copy to include/pbbam/ReadAccuracyQuery.h
index fdd1d1d..1eecb6c 100644
--- a/include/pbbam/ZmwQuery.h
+++ b/include/pbbam/ReadAccuracyQuery.h
@@ -32,35 +32,73 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadAccuracyQuery.h
+/// \brief Defines the ReadAccuracyQuery class.
+//
 // Author: Derek Barnett
 
-#ifndef ZMWQUERY_H
-#define ZMWQUERY_H
+#ifndef READACCURACYQUERY_H
+#define READACCURACYQUERY_H
 
+#include "pbbam/Accuracy.h"
+#include "pbbam/Compare.h"
 #include "pbbam/Config.h"
 #include "pbbam/internal/QueryBase.h"
 #include <vector>
 
 namespace PacBio {
 namespace BAM {
-//namespace staging {
 
-class PBBAM_EXPORT ZmwQuery : public internal::IQuery
+/// \brief The ReadAccuracyQuery class provides iterable access to a DataSet's
+///        %BAM records, limiting results to those matching a read accuracy
+///        criterion.
+///
+/// Example:
+/// \include code/ReadAccuracyQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+///       Use BamFile::EnsurePacBioIndexExists before creating the query if one
+///       may not be present.
+///
+class PBBAM_EXPORT ReadAccuracyQuery : public internal::IQuery
 {
 public:
-    ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
-             const DataSet& dataset);
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& bamFile);
+    /// \brief Creates a new ReadAccuracyQuery, limiting record results to only
+    ///        those matching a read accuracy criterion.
+    ///
+    /// \param[in] accuracy     read accuracy value
+    /// \param[in] compareType  compare operator
+    /// \param[in] dataset      input data source(s)
+    ///
+    /// \sa BamRecord::ReadAccuracy
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+    ///         files.
+    ///
+    ReadAccuracyQuery(const Accuracy accuracy,
+                      const Compare::Type compareType,
+                      const DataSet& dataset);
+
+    ~ReadAccuracyQuery(void);
+
+public:
+
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
 
 private:
-    std::vector<int> whitelist_;
+    struct ReadAccuracyQueryPrivate;
+    std::unique_ptr<ReadAccuracyQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namespace PacBio
 
-#endif // ZMWQUERY_H
+#endif // READACCURACYQUERY_H
diff --git a/include/pbbam/ReadGroupInfo.h b/include/pbbam/ReadGroupInfo.h
index 86372ee..f29dc46 100644
--- a/include/pbbam/ReadGroupInfo.h
+++ b/include/pbbam/ReadGroupInfo.h
@@ -32,19 +32,29 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadGroupInfo.h
+/// \brief Defines the ReadGroupInfo class.
+//
 // Author: Derek Barnett
 
 #ifndef READGROUPINFO_H
 #define READGROUPINFO_H
 
 #include "pbbam/Config.h"
+#include "pbbam/exception/InvalidSequencingChemistryException.h"
 #include <map>
 #include <string>
 
 namespace PacBio {
 namespace BAM {
 
+/// \brief This enum describes the base features that may be present in a read
+///        group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
 enum class BaseFeature
 {
     DELETION_QV
@@ -57,6 +67,8 @@ enum class BaseFeature
   , PULSE_WIDTH
   , PKMID
   , PKMEAN
+  , PKMID2
+  , PKMEAN2
   , LABEL
   , LABEL_QV
   , ALT_LABEL
@@ -65,33 +77,138 @@ enum class BaseFeature
   , PULSE_CALL
   , PRE_PULSE_FRAMES
   , PULSE_CALL_WIDTH
+  , START_FRAME
 };
 
+/// \brief This enum describes the encoding types used for frame data within a
+///        read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
 enum class FrameCodec
 {
     RAW
   , V1
 };
 
+/// \brief This enum describes the experimental design of the barcodes within a
+///        read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BarcodeModeType
+{
+   NONE
+ , SYMMETRIC
+ , ASYMMETRIC
+};
+
+/// \brief This enum describes the type of value encoded by barcode quality,
+///        within a read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BarcodeQualityType
+{
+    NONE
+  , SCORE
+  , PROBABILITY
+};
+
+/// \brief This enum describes the instrument type / platform model,
+///        within a read group's records.
+///
+/// This information is stored in its description (\@RG:PM).
+///
+enum class PlatformModelType
+{
+    ASTRO
+  , RS
+  , SEQUEL
+};
+
+/// \brief The ReadGroupInfo class represents a read group entry (\@RG) in the
+///        SAM header.
+///
 class PBBAM_EXPORT ReadGroupInfo
 {
 public:
     /// \name Conversion & Validation
     ///
 
+    /// \brief Creates a ReadGroupInfo object from SAM-formatted text.
+    ///
+    /// \param[in] sam  SAM-formatted text
+    /// \returns read group info object
+    ///
     static ReadGroupInfo FromSam(const std::string& sam);
 
+    /// \brief Converts a ReadGroupInfo object to its SAM-formatted text.
+    ///
+    /// \param[in] rg     input ReadGroupInfo object
+    /// \returns SAM-formatted text (no trailing newline)
+    ///
     static std::string ToSam(const ReadGroupInfo& rg);
 
+    /// \brief Converts a read group ID (string) to its numeric value.
+    ///
+    /// \param[in] rgId     read group ID string
+    /// \returns numeric value of ID
+    ///
+    static int32_t IdToInt(const std::string& rgId);
+
+    /// \brief Converts a read group ID number to its string representation.
+    ///
+    /// \param[in] id     read group ID number
+    /// \returns hexadecimal string representation of ID
+    ///
+    static std::string IntToId(const int32_t id);
+
+    /// \returns sequencing chemistry from (bindingKig, sequencingKit,
+    ///          basecallerVersion)
+    ///
+    static std::string SequencingChemistryFromTriple(const std::string& bindingKit,
+                                                     const std::string& sequencingKit,
+                                                     const std::string& basecallerVersion);
+
     /// \}
 
 public:
     /// \name Constructors & Related Methods
     /// \{
 
+    /// \brief Creates an empty read group info object.
     ReadGroupInfo(void);
+
+    /// \brief Creates a read group info object with an ID.
+    ///
+    /// \param[in] id   string representation of read group ID
+    ///
     ReadGroupInfo(const std::string& id);
-    ReadGroupInfo(const std::string& movieName, const std::string& readType);
+
+    /// \brief Creates a read group info object from a movie name & read type.
+    ///
+    /// \param[in] movieName    sequencing movie name
+    /// \param[in] readType     string version of record type
+    ///
+    /// \sa RecordType
+    ///
+    ReadGroupInfo(const std::string& movieName,
+                  const std::string& readType);
+
+    /// \brief Creates a read group info object from a movie name, read type,
+    ///        and platform model.
+    ///
+    /// \param[in] movieName    sequencing movie name
+    /// \param[in] readType     string version of record type
+    /// \param[in] platform     platform model type
+    ///
+    /// \sa RecordType
+    ///
+    ReadGroupInfo(const std::string& movieName,
+                  const std::string& readType,
+                  const PlatformModelType platform);
+
     ReadGroupInfo(const ReadGroupInfo& other);
     ReadGroupInfo(ReadGroupInfo&& other);
     ReadGroupInfo& operator=(const ReadGroupInfo& other);
@@ -101,68 +218,148 @@ public:
     /// \}
 
 public:
+    /// \name Comparison Operators
+    /// \{
+
+    bool operator==(const ReadGroupInfo& other) const;
+
+    /// \}
+
+public:
+    /// \name Conversion & Validation
+    /// \{
+
+    /// \returns true if read group info is valid
+    ///
+    /// Currently this checks to see that ReadGroupInfo::Id does not contain an
+    /// empty string.
+    ///
+    bool IsValid(void) const;
+
+    /// \brief Converts this object to its SAM-formatted text.
+    ///
+    /// \returns SAM-formatted text (no trailing newline)
+    ///
+    std::string ToSam(void) const;
+
+    /// \}
+
+public:
     /// \name Attributes
     /// \{
 
-    const std::string& BasecallerVersion(void) const;
+    /// \returns the number of barcode sequences in BarcodeFile
+    ///
+    /// \throws std::runtime_error if barcode data not set.
+    ///         Check HasBarcodeData if this data may be absent.
+    ///
+    size_t BarcodeCount(void) const;
 
-    bool HasBaseFeature(const BaseFeature& feature) const;
+    /// \returns name of FASTA file containing barcode sequences
+    ///
+    /// \throws std::runtime_error if barcode data not set.
+    ///         Check HasBarcodeData if this data may be absent.
+    ///
+    std::string  BarcodeFile(void) const;
+
+    /// \returns MD5 hash of the contents of BarcodeFile
+    ///
+    /// \throws std::runtime_error if barcode data not set.
+    ///         Check HasBarcodeData if this data may be absent.
+    ///
+    std::string BarcodeHash(void) const;
+
+    /// \returns experimental design type of barcodes
+    ///
+    /// \throws std::runtime_error if barcode data not set.
+    ///         Check HasBarcodeData if this data may be absent.
+    ///
+    BarcodeModeType BarcodeMode(void) const;
+
+    /// \returns type of value encoded in the 'bq' tag
+    ///
+    /// \throws std::runtime_error if barcode data is not set.
+    ///         Check HasBarcodeData if this data may be absent.
+    ///
+    BarcodeQualityType BarcodeQuality(void) const;
+
+    /// \returns basecaller version number (e.g. "2.1")
+    std::string BasecallerVersion(void) const;
 
+    /// \returns tag name in use for the specified for base feature
     std::string BaseFeatureTag(const BaseFeature& feature) const;
 
+    /// \returns binding kit part number (e.g. "100236500")
     std::string BindingKit(void) const;
 
+    /// \returns true if reads are classified as spike-in controls
     bool Control(void) const;
 
+    /// \returns any non-standard tags added to the \@PG entry
+    ///
+    /// Result map consists of {tagName => value}.
+    ///
     std::map<std::string, std::string> CustomTags(void) const;
 
+    /// \returns string value of \@RG:DT
     std::string Date(void) const;
 
+    /// \returns string value of \@RG:FO
     std::string FlowOrder(void) const;
 
+    /// \returns frame rate in Hz
     std::string FrameRateHz(void) const;
 
+    /// \returns true if read group has barcode data
+    bool HasBarcodeData(void) const;
+
+    /// \returns true if read group has an entry for the specified base feature
+    bool HasBaseFeature(const BaseFeature& feature) const;
+
+    /// \returns string value of \@RG:ID
     std::string Id(void) const;
 
+    /// \returns codec type in use for IPD
     FrameCodec IpdCodec(void) const;
 
+    /// \returns string value of \@RG:KS
     std::string KeySequence(void) const;
 
+    /// \returns string value of \@RG:LB
     std::string Library(void) const;
 
+    /// \returns movie name (stored in \@RG:PU)
     std::string MovieName(void) const;
 
+    /// \returns string value of \@RG:PL
     std::string Platform(void) const;
 
+    /// \returns string value of \@RG:PM
+    PlatformModelType PlatformModel(void) const;
+
+    /// \returns string value of \@RG:PI
     std::string PredictedInsertSize(void) const;
 
+    /// \returns string value of \@RG:PG
     std::string Programs(void) const;
 
+    /// \returns codec type in use for PulseWidth
     FrameCodec PulseWidthCodec(void) const;
 
+    /// \returns string value of read type
     std::string ReadType(void) const;
 
+    /// \returns string value of \@RG:SM
     std::string Sample(void) const;
 
+    /// \returns string value of \@RG:CN
     std::string SequencingCenter(void) const;
 
-    std::string SequencingKit(void) const;
-
-    /// \}
-
-    /// \name Conversion & Validation
-    /// \{
-
-    bool IsValid(void) const;
-
-    std::string ToSam(void) const;
-
-    /// \}
-
-    /// \name Comparison
-    /// \{
+    /// \returns sequencing chemistry name
+    std::string SequencingChemistry(void) const;
 
-    bool operator==(const ReadGroupInfo& other) const;
+    /// \returns sequencing kit part number
+    std::string SequencingKit(void) const;
 
     /// \}
 
@@ -170,62 +367,217 @@ public:
     /// \name Attributes
     /// \{
 
+    /// \brief Sets read group's barcode data.
+    ///
+    /// Barcode fields are either absent or all must be present.
+    ///
+    /// \param[in] barcodeFile      barcode filename
+    /// \param[in] barcodeHash      MD5 hash of barcode file
+    /// \param[in] barcodeCount     number of records in barcode file
+    /// \param[in] barcodeMode      experimental design of barcodes
+    /// \param[in] barcodeQuality   type of barcode quality value
+    ///
+    /// \sa BarcodeFile \n
+    ///     BarcodeHash \n
+    ///     BarcodeCount \n
+    ///     BarcodeMode \n
+    ///     BarcodeQuality \n
+    ///     ReadGroupInfo::ClearBarcodeData
+    ///
+    /// \returns reference to this object
+    ///
+    ReadGroupInfo& BarcodeData(const std::string& barcodeFile,
+                               const std::string& barcodeHash,
+                               size_t barcodeCount,
+                               BarcodeModeType barcodeMode,
+                               BarcodeQualityType barcodeQuality);
+
+    /// \brief Sets the basecaller version number.
+    ///
+    /// \param[in] versionNumber   new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& BasecallerVersion(const std::string& versionNumber);
 
+    /// \brief Sets the tag to be used for a particular base feature.
+    ///
+    /// \param[in] feature      feature type begin updated
+    /// \param[in] tag          new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& BaseFeatureTag(const BaseFeature& feature,
                                   const std::string& tag);
 
+    /// \brief Sets the binding kit part number.
+    ///
+    /// \param[in] kitNumber    new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& BindingKit(const std::string& kitNumber);
 
+    /// \brief Removes all barcode data from this read group.
+    ///
+    /// \returns reference to this read group
+    ///
+    ReadGroupInfo& ClearBarcodeData(void);
+
+    /// \brief Sets whether read group's records are classifed as spike-in
+    ///        controls.
+    ///
+    /// \param[in] ctrl     true if records are spike-in controls
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& Control(const bool ctrl);
 
+    /// \brief Sets a new collection of non-standard tags.
+    ///
+    /// Custom tag map entries should consist of {tagName => value}.
+    ///
+    /// \param[in] custom      new tags
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& CustomTags(const std::map<std::string, std::string>& custom);
 
+    /// \brief Sets the value for \@RG:DT
+    ///
+    /// \param[in] date      new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& Date(const std::string& date);
 
+    /// \brief Sets the value for \@RG:FO
+    ///
+    /// \param[in] order     new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& FlowOrder(const std::string& order);
 
+    /// \brief Sets the frame rate.
+    ///
+    /// \param[in] frameRateHz     string value of frame rate in Hz
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& FrameRateHz(const std::string& frameRateHz);
 
+    /// \brief Sets the read group's ID.
+    ///
+    /// \param[in] id     string value of ID
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& Id(const std::string& id);
 
-    ReadGroupInfo& Id(const std::string& movieName, const std::string& readType);
+    /// \brief Sets the read group's ID, from movie name & read type
+    ///
+    /// \param[in] movieName    sequencing movie name
+    /// \param[in] readType     string version of read type
+    /// \returns reference to this object
+    ///
+    ReadGroupInfo& Id(const std::string& movieName,
+                      const std::string& readType);
 
-    ReadGroupInfo& IpdCodec(const FrameCodec& codec, const std::string& tag = std::string());
+    /// \brief Sets the codec type used for IPD
+    ///
+    /// \param[in] codec    codec type
+    /// \param[in] tag      IPD tag
+    /// \returns reference to this object
+    ///
+    ReadGroupInfo& IpdCodec(const FrameCodec& codec,
+                            const std::string& tag = std::string());
 
+    /// \brief Sets the value for \@RG:KS
+    ///
+    /// \param[in] sequence      new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& KeySequence(const std::string& sequence);
 
+    /// \brief Sets the value for \@RG:LB
+    ///
+    /// \param[in] library      new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& Library(const std::string& library);
 
-    ReadGroupInfo& MovieName(const std::string& id);
+    /// \brief Sets the value for movie name (stored in \@RG:PU).
+    ///
+    /// \param[in] movieName    new value
+    /// \returns reference to this object
+    ///
+    ReadGroupInfo& MovieName(const std::string& movieName);
 
+    /// \brief Sets the value for \@RG:PI
+    ///
+    /// \param[in] size         new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& PredictedInsertSize(const std::string& size);
 
+    /// \brief Sets the value for \@RG:PG
+    ///
+    /// \param[in] programs     new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& Programs(const std::string& programs);
 
-    ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec, const std::string& tag = std::string());
+    /// \brief Sets the value for \@RG:PM
+    ///
+    /// \param[in] platformModel new value
+    /// \returns reference to this object
+    ///
+    ReadGroupInfo& PlatformModel(const PlatformModelType& platform);
+
+    /// \brief Sets the codec type used for PulseWidth
+    ///
+    /// \param[in] codec    codec type
+    /// \param[in] tag      pulse width tag
+    /// \returns reference to this object
+    ///
+    ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec,
+                                   const std::string& tag = std::string());
 
+    /// \brief Sets the read type.
+    ///
+    /// \param[in] type    new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& ReadType(const std::string& type);
 
+    /// \brief Sets the value for \@RG:SM
+    ///
+    /// \param[in] sample       new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& Sample(const std::string& sample);
 
+    /// \brief Sets the value for \@RG:CN
+    ///
+    /// \param[in] center       new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& SequencingCenter(const std::string& center);
 
+    /// \brief Sets the sequencing kit part number.
+    ///
+    /// \param[in] kitNumber    new value
+    /// \returns reference to this object
+    ///
     ReadGroupInfo& SequencingKit(const std::string& kitNumber);
 
     /// \}
 
 private:
-    std::string id_;                     // ID * Unique ID required for valid SAM/BAM header *
-    std::string sequencingCenter_;       // CN
-    std::string date_;                   // DT * (ISO 8601) *
-    std::string flowOrder_;              // FO
-    std::string keySequence_;            // KS
-    std::string library_;                // LB
-    std::string programs_;               // PG
-    std::string predictedInsertSize_;    // PI
-    std::string movieName_;              // PU * more explicit, in place of "platform unit" *
-    std::string sample_;                 // SM
+    std::string id_;                    // ID * must be unique for valid SAM *
+    std::string sequencingCenter_;      // CN
+    std::string date_;                  // DT * (ISO-8601) *
+    std::string flowOrder_;             // FO
+    std::string keySequence_;           // KS
+    std::string library_;               // LB
+    std::string programs_;              // PG
+    std::string predictedInsertSize_;   // PI
+    std::string movieName_;             // PU
+    std::string sample_;                // SM
+
+    PlatformModelType platformModel_;   // PM
 
     // DS:<Description> components
     std::string readType_;
@@ -236,6 +588,12 @@ private:
     bool        control_ = false;
     FrameCodec  ipdCodec_;
     FrameCodec  pulseWidthCodec_;
+    bool        hasBarcodeData_ = false;
+    std::string barcodeFile_;
+    std::string barcodeHash_;
+    size_t      barcodeCount_ = 0;
+    BarcodeModeType barcodeMode_ = BarcodeModeType::NONE;
+    BarcodeQualityType barcodeQuality_ = BarcodeQualityType::NONE;
     std::map<BaseFeature, std::string> features_;
 
     // custom attributes
@@ -246,147 +604,20 @@ private:
     void DecodeSamDescription(const std::string& description);
 };
 
+/// \brief Creates a read group ID from a movie name & read type.
+///
+/// \param[in] movieName    sequencing movie name
+/// \param[in] readType     string version of read type
+///
+/// \returns hexadecimal string read group ID
+///
 PBBAM_EXPORT
 std::string MakeReadGroupId(const std::string& movieName,
                             const std::string& readType);
 
-inline const std::string& ReadGroupInfo::BasecallerVersion(void) const
-{ return basecallerVersion_; }
-
-inline ReadGroupInfo& ReadGroupInfo::BasecallerVersion(const std::string& versionNumber)
-{ basecallerVersion_ = versionNumber; return *this; }
-
-inline std::string ReadGroupInfo::BaseFeatureTag(const BaseFeature& feature) const
-{
-    const auto iter = features_.find(feature);
-    if (iter == features_.end())
-        return std::string();
-    return iter->second;
-}
-
-inline ReadGroupInfo& ReadGroupInfo::BaseFeatureTag(const BaseFeature& feature,
-                                                    const std::string& tag)
-{ features_[feature] = tag; return *this; }
-
-inline std::string ReadGroupInfo::BindingKit(void) const
-{ return bindingKit_; }
-
-inline ReadGroupInfo& ReadGroupInfo::BindingKit(const std::string& kitNumber)
-{ bindingKit_ = kitNumber; return *this; }
-
-inline bool ReadGroupInfo::Control(void) const
-{ return control_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Control(const bool ctrl)
-{ control_ = ctrl; return *this; }
-
-inline std::map<std::string, std::string> ReadGroupInfo::CustomTags(void) const
-{ return custom_; }
-
-inline ReadGroupInfo& ReadGroupInfo::CustomTags(const std::map<std::string, std::string>& custom)
-{ custom_ = custom; return *this; }
-
-inline std::string ReadGroupInfo::Date(void) const
-{ return date_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Date(const std::string& date)
-{ date_ = date; return *this; }
-
-inline std::string ReadGroupInfo::FlowOrder(void) const
-{ return flowOrder_; }
-
-inline ReadGroupInfo& ReadGroupInfo::FlowOrder(const std::string& order)
-{ flowOrder_ = order; return *this; }
-
-inline std::string ReadGroupInfo::FrameRateHz(void) const
-{ return frameRateHz_; }
-
-inline ReadGroupInfo& ReadGroupInfo::FrameRateHz(const std::string& frameRateHz)
-{ frameRateHz_ = frameRateHz; return *this; }
-
-inline bool ReadGroupInfo::HasBaseFeature(const BaseFeature& feature) const
-{ return features_.find(feature) != features_.end(); }
-
-inline std::string ReadGroupInfo::Id(void) const
-{ return id_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& id)
-{ id_ = id; return *this; }
-
-inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& movieName,
-                                        const std::string& readType)
-{ id_ = MakeReadGroupId(movieName, readType); return *this; }
-
-inline FrameCodec ReadGroupInfo::IpdCodec(void) const
-{ return ipdCodec_; }
-
-inline bool ReadGroupInfo::IsValid(void) const
-{ return !id_.empty(); }
-
-inline std::string ReadGroupInfo::KeySequence(void) const
-{ return keySequence_; }
-
-inline ReadGroupInfo& ReadGroupInfo::KeySequence(const std::string& sequence)
-{ keySequence_ = sequence; return *this; }
-
-inline std::string ReadGroupInfo::Library(void) const
-{ return library_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Library(const std::string& library)
-{ library_ = library; return *this; }
-
-inline std::string ReadGroupInfo::MovieName(void) const
-{ return movieName_; }
-
-inline ReadGroupInfo& ReadGroupInfo::MovieName(const std::string& movieName)
-{ movieName_ = movieName; return *this; }
-
-inline std::string ReadGroupInfo::Platform(void) const
-{ return std::string("PACBIO"); }
-
-inline std::string ReadGroupInfo::PredictedInsertSize(void) const
-{ return predictedInsertSize_; }
-
-inline ReadGroupInfo& ReadGroupInfo::PredictedInsertSize(const std::string& size)
-{ predictedInsertSize_ = size; return *this; }
-
-inline std::string ReadGroupInfo::Programs(void) const
-{ return programs_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Programs(const std::string& programs)
-{ programs_ = programs; return *this; }
-
-inline FrameCodec ReadGroupInfo::PulseWidthCodec(void) const
-{ return pulseWidthCodec_; }
-
-inline std::string ReadGroupInfo::ReadType(void) const
-{ return readType_; }
-
-inline ReadGroupInfo& ReadGroupInfo::ReadType(const std::string& type)
-{ readType_ = type; return *this; }
-
-inline std::string ReadGroupInfo::Sample(void) const
-{ return sample_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Sample(const std::string& sample)
-{ sample_ = sample; return *this; }
-
-inline std::string ReadGroupInfo::SequencingCenter(void) const
-{ return sequencingCenter_; }
-
-inline ReadGroupInfo& ReadGroupInfo::SequencingCenter(const std::string& center)
-{ sequencingCenter_ = center; return *this; }
-
-inline std::string ReadGroupInfo::SequencingKit(void) const
-{ return sequencingKit_; }
-
-inline ReadGroupInfo& ReadGroupInfo::SequencingKit(const std::string& kitNumber)
-{ sequencingKit_ = kitNumber; return *this; }
-
-inline std::string ReadGroupInfo::ToSam(const ReadGroupInfo& rg)
-{ return rg.ToSam(); }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/ReadGroupInfo.inl"
+
 #endif // READGROUPINFO_H
diff --git a/include/pbbam/SamTagCodec.h b/include/pbbam/SamTagCodec.h
index b0b6796..cc4def4 100644
--- a/include/pbbam/SamTagCodec.h
+++ b/include/pbbam/SamTagCodec.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SamTagCodec.h
+/// \brief Defines the SamTagCodec class.
+//
 // Author: Derek Barnett
 
 #ifndef SAMTAGCODEC_H
@@ -45,10 +49,30 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The SamTagCodec class provides text-based encoding/decoding of %BAM
+///        tag data.
+///
+/// \note SamTagCodec is mostly an implementation and/or testing detail, and may
+///       be removed from the public API.
+///
 class PBBAM_EXPORT SamTagCodec
 {
 public:
+    /// \name Tag Collection Methods
+    /// \{
+
+    /// \brief Creates a TagCollection from SAM-formatted tag data.
+    ///
+    /// \param[in] tagString    SAM-formmated string
+    /// \returns resulting tag collection
+    ///
     static TagCollection Decode(const std::string& tagString);
+
+    /// \brief Creates SAM-formatted string from a TagCollection.
+    ///
+    /// \param[in] tags     TagCollection containing tag data
+    /// \returns SAM-formatted string
+    ///
     static std::string Encode(const PacBio::BAM::TagCollection& tags);
 };
 
diff --git a/include/pbbam/SequenceInfo.h b/include/pbbam/SequenceInfo.h
index 0cf9d04..88b8dd1 100644
--- a/include/pbbam/SequenceInfo.h
+++ b/include/pbbam/SequenceInfo.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SequenceInfo.h
+/// \brief Defines the SequenceInfo class.
+//
 // Author: Derek Barnett
 
 #ifndef SEQUENCEINFO_H
@@ -45,14 +49,27 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The SequenceInfo class represents a program entry (\@SQ) in the SAM
+///        header.
+///
 class PBBAM_EXPORT SequenceInfo
 {
 public:
     /// \name Conversion & Validation
     ///
 
+    /// \brief Creates a SequenceInfo object from SAM-formatted text.
+    ///
+    /// \param[in] sam  SAM-formatted text
+    /// \returns program info object
+    ///
     static SequenceInfo FromSam(const std::string& sam);
 
+    /// \brief Converts a SequenceInfo object to its SAM-formatted text.
+    ///
+    /// \param[in] seq     input SequenceInfo object
+    /// \returns SAM-formatted text (no trailing newline)
+    ///
     static std::string ToSam(const SequenceInfo& seq);
 
     /// \}
@@ -61,8 +78,17 @@ public:
     /// \name Constructors & Related Methods
     /// \{
 
+    /// \brief Creates an empty sequence info object.
     SequenceInfo(void);
-    SequenceInfo(const std::string& name, const std::string& length = "0");
+
+    /// \brief Creates a sequence info object with name & (optional) length.
+    ///
+    /// \param[in] name       sequence name (\@SQ:SN)
+    /// \param[in] length     sequence length (\@SQ:LN)
+    ///
+    SequenceInfo(const std::string& name,
+                 const std::string& length = "0");
+
     SequenceInfo(const SequenceInfo& other);
     SequenceInfo(SequenceInfo&& other);
     SequenceInfo& operator=(const SequenceInfo& other);
@@ -72,111 +98,135 @@ public:
     /// \}
 
 public:
+    /// \name Operators
+    /// \{
+
+    bool operator==(const SequenceInfo& other) const;
+    bool operator!=(const SequenceInfo& other) const;
+
+    /// \}
+
+public:
+    /// \name Conversion & Validation
+    ///
+
+    /// \returns true if sequence info is valid
+    ///
+    /// Currently this checks to see that Name is non-empty and Length is within
+    /// the accepted range.
+    ///
+    bool IsValid(void) const;
+
+    /// \brief Converts this object to its SAM-formatted text.
+    ///
+    /// \returns SAM-formatted text (no trailing newline)
+    ///
+    std::string ToSam(void) const;
+
+    /// \}
+
+public:
     /// \name Attributes
     /// \{
 
+    /// \returns string value of \@SQ:AS
     std::string AssemblyId(void) const;
 
+    /// \returns string value of \@SQ:M5
     std::string Checksum(void) const;
 
+    /// \returns any non-standard tags added to the \@PG entry
+    ///
+    /// Result map consists of {tagName => value}.
+    ///
     std::map<std::string, std::string> CustomTags(void) const;
 
+    /// \returns string value of \@SQ:LN
     std::string Length(void) const;
 
+    /// \returns string value of \@SQ:SN
     std::string Name(void) const;
 
+    /// \returns string value of \@SQ:SP
     std::string Species(void) const;
 
+    /// \returns string value of \@SQ:UR
     std::string Uri(void) const;
 
     /// \}
 
-    /// \name Conversion & Validation
-    ///
-
-    bool IsValid(void) const;
-
-    std::string ToSam(void) const;
-
-    /// \}
-
 public:
     /// \name Attributes
+    /// \{
 
+    /// \brief Sets the value for \@SQ:AS
+    ///
+    /// \param[in] id      new value
+    /// \returns reference to this object
+    ///
     SequenceInfo& AssemblyId(const std::string& id);
 
+    /// \brief Sets the value for \@SQ:M5
+    ///
+    /// \param[in] checksum      new value
+    /// \returns reference to this object
+    ///
     SequenceInfo& Checksum(const std::string& checksum);
 
+    /// \brief Sets a new collection of non-standard tags.
+    ///
+    /// Custom tag map entries should consist of {tagName => value}.
+    ///
+    /// \param[in] custom      new tags
+    /// \returns reference to this object
+    ///
     SequenceInfo& CustomTags(const std::map<std::string, std::string>& custom);
 
+    /// \brief Sets the value for \@SQ:LN
+    ///
+    /// \param[in] length      new value
+    /// \returns reference to this object
+    ///
     SequenceInfo& Length(const std::string& length);
 
+    /// \brief Sets the value for \@SQ:SN
+    ///
+    /// \param[in] name      new value
+    /// \returns reference to this object
+    ///
     SequenceInfo& Name(const std::string& name);
 
+    /// \brief Sets the value for \@SQ:SP
+    ///
+    /// \param[in] species     new value
+    /// \returns reference to this object
+    ///
     SequenceInfo& Species(const std::string& species);
 
+    /// \brief Sets the value for \@SQ:UR
+    ///
+    /// \param[in] uri      new value
+    /// \returns reference to this object
+    ///
     SequenceInfo& Uri(const std::string& uri);
 
     /// \}
 
 private:
-    std::string name_;                   // SN:<Name>            * Unique Name required for valid SAM header*
-    std::string length_;                 // LN:<Length>          * [0 - 2^31-1]
-    std::string assemblyId_;             // AS:<AssemblyId>
-    std::string checksum_;               // M5:<Checksum>
-    std::string species_;                // SP:<Species>
-    std::string uri_;                    // UR:<URI>
+    std::string name_;          // SN:<Name>    * must be unique for valid SAM *
+    std::string length_;        // LN:<Length>  * must be within [0 - 2^31-1] *
+    std::string assemblyId_;    // AS:<AssemblyId>
+    std::string checksum_;      // M5:<Checksum>
+    std::string species_;       // SP:<Species>
+    std::string uri_;           // UR:<URI>
 
     // custom attributes
     std::map<std::string, std::string> custom_; // tag => value
 };
 
-inline std::string SequenceInfo::AssemblyId(void) const
-{ return assemblyId_; }
-
-inline SequenceInfo& SequenceInfo::AssemblyId(const std::string& id)
-{ assemblyId_ = id; return *this; }
-
-inline std::string SequenceInfo::Checksum(void) const
-{ return checksum_; }
-
-inline SequenceInfo& SequenceInfo::Checksum(const std::string& checksum)
-{ checksum_ = checksum; return *this; }
-
-inline std::map<std::string, std::string> SequenceInfo::CustomTags(void) const
-{ return custom_; }
-
-inline SequenceInfo& SequenceInfo::CustomTags(const std::map<std::string, std::string>& custom)
-{ custom_ = custom; return *this; }
-
-inline std::string SequenceInfo::Length(void) const
-{ return length_; }
-
-inline SequenceInfo& SequenceInfo::Length(const std::string& length)
-{ length_ = length; return *this; }
-
-inline std::string SequenceInfo::Name(void) const
-{ return name_; }
-
-inline SequenceInfo& SequenceInfo::Name(const std::string& name)
-{ name_ = name; return *this; }
-
-inline std::string SequenceInfo::Species(void) const
-{ return species_; }
-
-inline SequenceInfo& SequenceInfo::Species(const std::string& species)
-{ species_ = species; return *this; }
-
-inline std::string SequenceInfo::ToSam(const SequenceInfo& seq)
-{ return seq.ToSam(); }
-
-inline std::string SequenceInfo::Uri(void) const
-{ return uri_; }
-
-inline SequenceInfo& SequenceInfo::Uri(const std::string& uri)
-{ uri_ = uri; return *this; }
-
 } // namespace BAM
 } // namespace PacBio
 
+#include "pbbam/internal/SequenceInfo.inl"
+
 #endif // SEQUENCEINFO_H
diff --git a/include/pbbam/Strand.h b/include/pbbam/Strand.h
index aa8535f..6fa5043 100644
--- a/include/pbbam/Strand.h
+++ b/include/pbbam/Strand.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Strand.h
+/// \brief Defines the Strand enum.
+//
 // Author: Derek Barnett
 
 #ifndef STRAND_H
@@ -43,10 +47,13 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief This enum defines the strand orientations used for reporting
+///        alignment-related information.
+///
 enum class Strand
 {
-    FORWARD
-  , REVERSE
+    FORWARD     ///< Forward strand
+  , REVERSE     ///< Reverse strand
 };
 
 } // namespace BAM
diff --git a/include/pbbam/ZmwGroupQuery.h b/include/pbbam/SubreadLengthQuery.h
similarity index 55%
copy from include/pbbam/ZmwGroupQuery.h
copy to include/pbbam/SubreadLengthQuery.h
index 8b88113..e8839fe 100644
--- a/include/pbbam/ZmwGroupQuery.h
+++ b/include/pbbam/SubreadLengthQuery.h
@@ -32,36 +32,68 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SubreadLengthQuery.h
+/// \brief Defines the SubreadLengthQuery class.
+//
 // Author: Derek Barnett
 
-#ifndef ZMWGROUPQUERY_H
-#define ZMWGROUPQUERY_H
+#ifndef SUBREADLENGTHQUERY_H
+#define SUBREADLENGTHQUERY_H
 
+#include "pbbam/Compare.h"
 #include "pbbam/Config.h"
 #include "pbbam/internal/QueryBase.h"
 #include <vector>
 
 namespace PacBio {
 namespace BAM {
-//namespace staging {
 
-class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
+/// \brief The SubreadLengthQuery class provides iterable access to a DataSet's
+///        %BAM records, limiting results to those matching a subread length
+///        criterion.
+///
+/// Example:
+/// \include code/SubreadLengthQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+///       Use BamFile::EnsurePacBioIndexExists before creating the query if one
+///       may not be present.
+///
+class PBBAM_EXPORT SubreadLengthQuery : public internal::IQuery
 {
 public:
-    ZmwGroupQuery(const DataSet& dataset);
-    ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
-                  const DataSet& dataset);
+    /// \brief Creates a new SubreadLengthQuery, limiting record results to only
+    ///        those matching a subread length criterion.
+    ///
+    /// \param[in] length       subread length value
+    /// \param[in] compareType  compare operator
+    /// \param[in] dataset      input data source(s)
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+    ///         files.
+    ///
+    SubreadLengthQuery(const int32_t length,
+                       const Compare::Type compareType,
+                       const DataSet& dataset);
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& file);
+    ~SubreadLengthQuery(void);
+
+public:
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
 
 private:
-    std::vector<int> whitelist_;
+    struct SubreadLengthQueryPrivate;
+    std::unique_ptr<SubreadLengthQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namespace PacBio
 
-#endif // ZMWGROUPQUERY_H
+#endif // SUBREADLENGTHQUERY_H
diff --git a/include/pbbam/Tag.h b/include/pbbam/Tag.h
index c7f3d94..0520e38 100644
--- a/include/pbbam/Tag.h
+++ b/include/pbbam/Tag.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Tag.h
+/// \brief Defines the Tag class.
+//
 // Author: Derek Barnett
 
 #ifndef TAG_H
@@ -46,7 +50,9 @@
 namespace PacBio {
 namespace BAM {
 
-/// \brief Provides information on the exact (C++) data type held by a Tag.
+/// \brief This enum is used to describe the exact (C++) data type held by a
+///        Tag.
+///
 enum class TagDataType
 {
     INVALID      = 0     ///< boost::blank
@@ -67,66 +73,137 @@ enum class TagDataType
   , FLOAT_ARRAY  = 15    ///< std::vector<float>
 };
 
-/// \brief Provides additional instructions on interpreting the tag's value.
+/// \brief This enum provides additional instructions on interpreting the tag's
+///        value.
+///
+/// Some C++ data types (e.g. std::string) may represent more than one BAM tag
+/// type ('H' vs 'Z'). Thus a TagModifier may be used to indicate how to
+/// properly distinguish between these shared data types.
 ///
-/// Some C++ data types (e.g. std::string) may represent more than one BAM tag type
-/// ('H' vs 'Z'). These modifiers indicate how to properly interpret those shared
-/// data types.
 enum class TagModifier
 {
-    /// \brief This indicates the tag has no modifiers set.
+    /// \brief This value indicates that the tag has no modifiers set.
+    ///
     NONE = 0,
 
     /// \brief This modifier marks an integer as ASCII.
     ///
-    /// SAM/BAM has the concept of an ASCII character that is distinct from an 8-bit
-    /// integer. However, there is no such pure separation in C++
-    /// (int8_t/uint8_t are likely implemented as typedefs around char/unsigned char).
-    /// Thus this modifier can be used to indicate a tag's integer data should be
+    /// SAM/BAM has the concept of an ASCII character that is distinct from an
+    /// 8-bit integer. However, there is no such pure separation in C++ - as
+    /// int8_t/uint8_t are likely implemented as typedefs around char/unsigned
+    /// char. Thus this modifier can be used to indicate a tag's value should be
     /// interpreted as a printable, ASCII character.
+    ///
     ASCII_CHAR,
 
-    /// \brief This modifier marks std::string data as "hex string", rather than a regular string.
+    /// \brief This modifier marks std::string data as "hex string", rather than
+    ///        a regular string.
+    ///
+    /// SAM/BAM has a distinction between regular strings and "Hex format"
+    /// strings. However, they are both manipulated in C++ via std::string. Thus
+    /// this modifier can be used to indicate that a tag's string data should be
+    /// interpreted as "Hex format" rather than a regular, literal string.
     ///
-    /// SAM/BAM has a distinction between regular strings and "Hex format" strings.
-    /// However, they are both manipulated in C++ via std::string. Thus this modifier
-    /// can be used to indicate that a tag's string data should be interpreted as
-    /// "Hex format" rather than a regular, literal string.
     HEX_STRING
 };
 
+/// \brief The Tag class represents a SAM/BAM record tag value.
+///
+/// SAM/BAM tags may store values from a variety of types: varying fixed-width
+/// integers, strings, arrays of data, etc.
+///
+/// The Tag class allow tags to be handled in a generic fashion, while
+/// maintaining a high level of type-safety. Only those types recognized by the
+/// SAM/BAM format are allowed, and extracting the value from a tag is subject
+/// to allowed conversion rules, as well.
+///
 // Inspired by (but greatly simplified & modified from) the boost::variant
 // wrapper approach taken by DynamicCpp (https://code.google.com/p/dynamic-cpp)
+//
 class PBBAM_EXPORT Tag
 {
 public:
-
     /// \name Constructors & Related Methods
     /// \{
 
-    /// Constructs a null tag.
-    /// \sa IsNull()
+    /// \brief Creates an empty, null tag
     Tag(void);
 
+    /// \brief Creates a Tag from a signed 8-bit integer or character.
+    ///
+    /// Without a TagModifier, the resulting Tag will be annotated as containing
+    /// an 8-bit integer, whether the input \p value was an integer or a char.
+    /// For ASCII tags, use one of these methods:
+    /// \include code/Tag_AsciiCtor.txt
+    ///
     Tag(int8_t value);
+
+   /// \brief Creates a Tag from a signed 8-bit integer or character,
+   ///        applying the provided modifier.
+    ///
+    /// This method allows direct construction of an ASCII character, rather
+    /// than an 8-bit integer (e.g. Tag('A', TagModifier::ASCII_CHAR) ).
+    ///
+    /// \throws runtime_error if \p modifier is not valid for int8_t data
+    ///
+    Tag(int8_t value, const TagModifier mod);
+
+    /// \brief Creates a Tag from an unsigned 8-bit integer or character.
+    ///
+    /// Without a TagModifier, the resulting Tag will be annotated as containing
+    /// an 8-bit unsigned integer, whether the input \p value was an integer or
+    /// a char. For ASCII tags, use one of these methods:
+    /// \include code/Tag_AsciiCtor.txt
+    ///
     Tag(uint8_t value);
+
+    /// \brief Creates a Tag from 16-bit integer.
     Tag(int16_t value);
+
+    /// \brief Creates a Tag from 16-bit unsigned integer.
     Tag(uint16_t value);
+
+    /// \brief Creates a Tag from 32-bit signed integer.
     Tag(int32_t value);
+
+    /// \brief Creates a Tag from 32-bit unsigned integer.
     Tag(uint32_t value);
+
+    /// \brief Creates a Tag from floating-point value.
     Tag(float value);
+
+    /// \brief Creates a Tag from string data.
     Tag(const std::string& value);
+
+    /// \brief Creates a Tag from string data, adding modifier.
+    ///
+    /// \throws runtime_error if \p modifier is not valid for string data
+    ///
+    Tag(const std::string& value, const TagModifier mod);
+
+    /// \brief Creates a Tag from a vector of 8-bit integers.
     Tag(const std::vector<int8_t>& value);
+
+    /// \brief Creates a Tag from a vector of 8-bit unsigned integers.
     Tag(const std::vector<uint8_t>& value);
+
+    /// \brief Creates a Tag from a vector of 16-bit integers.
     Tag(const std::vector<int16_t>& value);
+
+    /// \brief Creates a Tag from a vector of 16-bit unsigned integers.
     Tag(const std::vector<uint16_t>& value);
+
+    /// Constructs a Tag from a vector of 32-bit integers.
     Tag(const std::vector<int32_t>& value);
+
+    /// \brief Creates a Tag from a vector of 32-bit unsigned integers.
     Tag(const std::vector<uint32_t>& value);
+
+    /// \brief Creates a Tag from a vector of floating-point values.
     Tag(const std::vector<float>& value);
     
     Tag(const Tag& other);
     Tag(Tag&& other);
-
     ~Tag(void);
 
     Tag& operator=(boost::blank value);
@@ -157,72 +234,82 @@ public:
     /// \name Data Conversion & Validation
     /// \{
 
-    /// Converts the tag value to an ASCII character
+    /// \brief Converts the tag value to an ASCII character.
+    ///
+    /// Tag must hold an integral type, within the valid ASCII range [33-127].
     ///
-    /// Tag must hold an integer type, within the valid ASCII range [33-127].
+    /// \returns ASCII character value
+    /// \throws std::runtime_error if not ASCII-compatible
     ///
-    /// \returns ASCII character if valid
-    /// \throws if not ASCII-compatible
     char ToAscii(void) const;
 
     /// \returns tag data as signed 8-bit (casting if needed)
-    /// \throws if not integral data, or out of valid range
+    /// \throws std::runtime_error if not integral data, or out of valid range
     int8_t ToInt8(void) const;
 
     /// \returns tag data as unsigned 8-bit (casting if needed)
-    /// \throws if not integral data, or out of valid range
+    /// \throws std::runtime_error if not integral data, or out of valid range
     uint8_t ToUInt8(void) const;
 
     /// \returns tag data as signed 16-bit (casting if needed)
-    /// \throws if not integral data, or out of valid range
+    /// \throws std::runtime_error if not integral data, or out of valid range
     int16_t ToInt16(void) const;
 
     /// \returns tag data as unsigned 16-bit (casting if needed)
-    /// \throws if not integral data, or out of valid range
+    /// \throws std::runtime_error if not integral data, or out of valid range
     uint16_t ToUInt16(void) const;
 
     /// \returns tag data as signed 32-bit (casting if needed)
-    /// \throws if not integral data, or out of valid range
+    /// \throws std::runtime_error if not integral data, or out of valid range
     int32_t ToInt32(void) const;
 
     /// \returns tag data as unsigned 32-bit (casting if needed)
-    /// \throws if not integral data, or out of valid range
+    /// \throws std::runtime_error if not integral data, or out of valid range
     uint32_t ToUInt32(void) const;
 
     /// \returns tag data as float
-    /// \throws if tag does not contain a value of explicit type: float
+    /// \throws std::runtime_error if tag does not contain a value of
+    ///         explicit type: float
     float ToFloat(void) const;
 
     /// \returns tag data as std::string
-    /// \throws if tag does not contain a value of explicit type: std::string
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::string
     std::string ToString(void) const;
 
     /// \returns tag data as std::vector<int8_t>
-    /// \throws if tag does not contain a value of explicit type: std::vector<int8_t>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<int8_t>
     std::vector<int8_t> ToInt8Array(void) const;
 
     /// \returns tag data as std::vector<uint8_t>
-    /// \throws if tag does not contain a value of explicit type: std::vector<uint8_t>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<uint8_t>
     std::vector<uint8_t> ToUInt8Array(void) const;
 
     /// \returns tag data as std::vector<int16_t>
-    /// \throws if tag does not contain a value of explicit type: std::vector<int16_t>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<int16_t>
     std::vector<int16_t> ToInt16Array(void) const;
 
     /// \returns tag data as std::vector<uint16_t>
-    /// \throws if tag does not contain a value of explicit type: std::vector<uint16_t>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<uint16_t>
     std::vector<uint16_t> ToUInt16Array(void) const;
 
     /// \returns tag data as std::vector<int32_t>
-    /// \throws if tag does not contain a value of explicit type: std::vector<int32_t>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<int32_t>
     std::vector<int32_t> ToInt32Array(void) const;
 
     /// \returns tag data as std::vector<uint32_t>
-    /// \throws if tag does not contain a value of explicit type: std::vector<uint32_t>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<uint32_t>
     std::vector<uint32_t> ToUInt32Array(void) const;
 
     /// \returns tag data as std::vector<float>
-    /// \throws if tag does not contain a value of explicit type: std::vector<float>
+    /// \throws std::runtime_error if tag does not contain a value of explicit
+    ///         type: std::vector<float>
     std::vector<float> ToFloatArray(void) const;
 
     /// \}
@@ -259,7 +346,8 @@ public:
     /// \returns true if tag contains a value of type: std::string
     bool IsString(void) const;
 
-    /// \returns true if tag contains a value of type: std::string \b AND has a TagModifier of HEX_STRING
+    /// \returns true if tag contains a value of type: std::string \b AND has a
+    ///          TagModifier of TagModifier::HEX_STRING
     bool IsHexString(void) const;
 
     /// \returns true if tag contains a value of type: std::vector<int8_t>
@@ -325,7 +413,10 @@ public:
     /// \returns current tag data modifier
     TagModifier Modifier(void) const;
 
-    /// Sets tag data modifier
+    /// \brief Sets tag data modifier.
+    ///
+    /// \param[in] m    new modifier value
+    ///
     /// \returns reference to this tag
     Tag& Modifier(const TagModifier m);
 
@@ -357,6 +448,6 @@ private :
 } // namespace BAM
 } // namespace PacBio
 
-#include "internal/Tag.inl"
+#include "pbbam/internal/Tag.inl"
 
 #endif // TAG_H
diff --git a/include/pbbam/TagCollection.h b/include/pbbam/TagCollection.h
index 42b4018..11c80ff 100644
--- a/include/pbbam/TagCollection.h
+++ b/include/pbbam/TagCollection.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file TagCollection.h
+/// \brief Defines the TagCollection class.
+//
 // Author: Derek Barnett
 
 #ifndef TAGCOLLECTION_H
@@ -46,9 +50,15 @@
 namespace PacBio {
 namespace BAM {
 
+/// \brief The TagCollection class represents a collection (or "dictionary") of
+///        tags.
+///
+/// Tags are mapped to their tag name, a 2-character string.
+///
 class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
 {
 public:
+    /// \returns true if the collection contains a tag with \p name
     bool Contains(const std::string& name) const;
 };
 
diff --git a/include/pbbam/ZmwGroupQuery.h b/include/pbbam/ZmwGroupQuery.h
index 8b88113..290d3ad 100644
--- a/include/pbbam/ZmwGroupQuery.h
+++ b/include/pbbam/ZmwGroupQuery.h
@@ -32,35 +32,62 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwGroupQuery.h
+/// \brief Defines the ZmwGroupQuery class.
+//
 // Author: Derek Barnett
 
 #ifndef ZMWGROUPQUERY_H
 #define ZMWGROUPQUERY_H
 
-#include "pbbam/Config.h"
 #include "pbbam/internal/QueryBase.h"
 #include <vector>
 
 namespace PacBio {
 namespace BAM {
-//namespace staging {
 
+/// \brief The ZmwGroupQuery class provides iterable access to a DataSet's
+///        %BAM records, limiting results to those matching a ZMW hole number
+///        whitelist, and grouping those results by hole number.
+///
+/// Example:
+/// \include code/ZmwGroupQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+///       Use BamFile::EnsurePacBioIndexExists before creating the query if one
+///       may not be present.
+///
 class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
 {
 public:
-    ZmwGroupQuery(const DataSet& dataset);
+    /// \brief Creates a new ZmwGroupQuery, limiting record results to only
+    ///        those matching a ZMW hole number criterion.
+    ///
+    /// \param[in] zmwWhitelist     vector of allowed ZMW hole numbers
+    /// \param[in] dataset          input data source(s)
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or
+    ///         PBI files.
+    ///
     ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
                   const DataSet& dataset);
+    ~ZmwGroupQuery(void);
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& file);
+public:
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(std::vector<BamRecord>& records);
 
 private:
-    std::vector<int> whitelist_;
+    struct ZmwGroupQueryPrivate;
+    std::unique_ptr<ZmwGroupQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namespace PacBio
 
diff --git a/include/pbbam/ZmwQuery.h b/include/pbbam/ZmwQuery.h
index fdd1d1d..0d6e166 100644
--- a/include/pbbam/ZmwQuery.h
+++ b/include/pbbam/ZmwQuery.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwQuery.h
+/// \brief Defines the ZmwQuery class.
+//
 // Author: Derek Barnett
 
 #ifndef ZMWQUERY_H
@@ -44,22 +48,48 @@
 
 namespace PacBio {
 namespace BAM {
-//namespace staging {
 
+/// \brief The ZmwQuery class provides iterable access to a DataSet's
+///        %BAM records, limiting results to those matching a ZMW hole number
+///        whitelist.
+///
+/// Example:
+/// \include code/ZmwQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+///       Use BamFile::EnsurePacBioIndexExists before creating the query if one
+///       may not be present.
+///
 class PBBAM_EXPORT ZmwQuery : public internal::IQuery
 {
 public:
+    /// \brief Creates a new ZmwQuery, limiting record results to only
+    ///        those matching a ZMW hole number criterion.
+    ///
+    /// \param[in] zmwWhitelist     vector of allowed ZMW hole numbers
+    /// \param[in] dataset          input data source(s)
+    ///
+    /// \throws std::runtime_error on failure to open/read underlying %BAM or
+    ///         PBI files.
+    ///
     ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
              const DataSet& dataset);
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& bamFile);
+    ~ZmwQuery(void);
+
+public:
+    /// \brief Main iteration point for record access.
+    ///
+    /// Most client code should not need to use this method directly. Use
+    /// iterators instead.
+    ///
+    bool GetNext(BamRecord& r);
 
 private:
-    std::vector<int> whitelist_;
+    struct ZmwQueryPrivate;
+    std::unique_ptr<ZmwQueryPrivate> d_;
 };
 
-//} // namespace staging
 } // namespace BAM
 } // namespace PacBio
 
diff --git a/include/pbbam/virtual/VirtualRegionType.h b/include/pbbam/ZmwType.h
similarity index 84%
copy from include/pbbam/virtual/VirtualRegionType.h
copy to include/pbbam/ZmwType.h
index 6b917bf..a93e295 100644
--- a/include/pbbam/virtual/VirtualRegionType.h
+++ b/include/pbbam/ZmwType.h
@@ -32,27 +32,32 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwType.h
+/// \brief Defines the ZmwType enum.
+//
+// Author: Armin Töpfer
 
-// Author: Derek Barnett
-
-#ifndef REGIONTYPE_H
-#define REGIONTYPE_H
+#ifndef ZMWTYPE_H
+#define ZMWTYPE_H
 
 #include "pbbam/Config.h"
 
 namespace PacBio {
 namespace BAM {
-/// Type of annotated region.
-enum class VirtualRegionType : char
+
+/// \brief This enum defines the different ZMW categories of scraps
+///
+enum class ZmwType : char
 {
-	ADAPTER        = 'A',
-	BARCODE        = 'B',
-	SUBREAD        = 'S',
-	HQREGION       = 'H',
-	LQREGION       = 'L'  // Outside the HQ region
+    CONTROL   = 'C',
+    MALFORMED = 'M',
+    NORMAL    = 'N',
+    SENTINEL  = 'S'
 };
 
 } // namespace BAM
 } // namespace PacBio
 
-#endif // REGIONTYPE_H
+#endif // ZMWTYPE_H
diff --git a/include/pbbam/TagCollection.h b/include/pbbam/ZmwTypeMap.h
similarity index 83%
copy from include/pbbam/TagCollection.h
copy to include/pbbam/ZmwTypeMap.h
index 42b4018..4dc781c 100644
--- a/include/pbbam/TagCollection.h
+++ b/include/pbbam/ZmwTypeMap.h
@@ -32,27 +32,34 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwTypeMap.h
+/// \brief Defines the ZmwTypeMap class.
+//
+// Author: Armin Töpfer
 
-// Author: Derek Barnett
+#ifndef ZMWTYPEMAP_H
+#define ZMWTYPEMAP_H
 
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#include <map>
 
 #include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
-#include <string>
+#include "pbbam/ZmwType.h"
 
 namespace PacBio {
 namespace BAM {
 
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+/// \brief The ZmwTypeMap class provides mapping between char codes and
+///        ZmwType enum keys.
+///
+class ZmwTypeMap
 {
 public:
-    bool Contains(const std::string& name) const;
+	static std::map<char, ZmwType> ParseChar;
 };
 
 } // namespace BAM
 } // namespace PacBio
 
-#endif // TAGCOLLECTION_H
+#endif // ZMWTYPEMAP_H
diff --git a/include/pbbam/BamReader.h b/include/pbbam/exception/InvalidSequencingChemistryException.h
similarity index 53%
copy from include/pbbam/BamReader.h
copy to include/pbbam/exception/InvalidSequencingChemistryException.h
index bd0ced6..9761703 100644
--- a/include/pbbam/BamReader.h
+++ b/include/pbbam/exception/InvalidSequencingChemistryException.h
@@ -32,83 +32,67 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file InvalidSequencingChemistryException.h
+/// \brief Defines the InvalidSequencingChemistryException class.
+//
 // Author: Derek Barnett
 
-#ifndef BAMREADER_H
-#define BAMREADER_H
+#ifndef INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
+#define INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
 
-#include "pbbam/BamHeader.h"
-#include "pbbam/BamRecord.h"
-#include "pbbam/Config.h"
+#include <exception>
+#include <sstream>
 #include <string>
 
 namespace PacBio {
 namespace BAM {
 
-class PBBAM_EXPORT BamReader
+/// \brief The InvalidSequencingChemistryException class represents an exception
+///        that will be thrown when an invalid sequencing chemistry combination
+///        is encountered.
+///
+class InvalidSequencingChemistryException : public std::exception
 {
-
 public:
-    enum ReadError
+    InvalidSequencingChemistryException(const std::string& bindingKit,
+                                        const std::string& sequencingKit,
+                                        const std::string& basecallerVersion)
+        : bindingKit_(bindingKit)
+        , sequencingKit_(sequencingKit)
+        , basecallerVersion_(basecallerVersion)
     {
-        NoError = 0
-      , OpenFileError
-      , ReadHeaderError
-      , ReadRecordError
-    };
-
-public:
-    BamReader(void);
-    virtual ~BamReader(void);
+        std::stringstream s;
+        s << "unsupported sequencing chemistry combination: " << std::endl
+          << "    binding kit:        " << bindingKit_ << std::endl
+          << "    sequencing kit:     " << sequencingKit_ << std::endl
+          << "    basecaller version: " << basecallerVersion_ << std::endl;
+        what_ = s.str();
+    }
 
 public:
+    const std::string& BindingKit(void) const
+    { return bindingKit_; }
 
-    /// Closes the BAM file reader.
-    void Close(void);
+    const std::string& SequencingKit(void) const
+    { return sequencingKit_; }
 
-    /// Opens a BAM file for reading.
-    ///
-    /// Prefix \p filename with "http://" or "ftp://" for remote files,
-    /// or set to "-" for stdin.
-    ///
-    /// \param[in] filename path to input BAM file
-    ///
-    /// \returns success/failure
-    bool Open(const std::string& filename);
-
-    /// \returns header as BamHeader object
-    BamHeader::SharedPtr Header(void) const;
-
-    /// \returns error status code
-    BamReader::ReadError Error(void) const;
-
-    /// \returns true if error encountered
-    bool HasError(void) const;
-
-    /// Fetches the next record in a BAM file.
-    ///
-    /// \param[out] record pointer to BamRecord object
-    ///
-    /// \returns succcess/failure
-    bool GetNext(PBBAM_SHARED_PTR<BamRecord> record);
+    const std::string& BasecallerVersion(void) const
+    { return basecallerVersion_; }
 
 public:
-    std::string PacBioBamVersion(void) const;
-
-protected:
-    bool GetNext(PBBAM_SHARED_PTR<bam1_t> rawRecord);
-    void InitialOpen(void);
-    PBBAM_SHARED_PTR<bam_hdr_t> RawHeader(void) const;
+    virtual const char* what(void) const noexcept
+    { return what_.c_str(); }
 
 protected:
-    PBBAM_SHARED_PTR<samFile>   file_;
-    PBBAM_SHARED_PTR<bam_hdr_t> header_;
-    std::string filename_;
-    BamReader::ReadError error_;
+    std::string bindingKit_;
+    std::string sequencingKit_;
+    std::string basecallerVersion_;
+    std::string what_;
 };
 
 } // namespace BAM
 } // namespace PacBio
 
-#endif // BAMREADER_H
+#endif // INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
diff --git a/include/pbbam/Position.h b/include/pbbam/internal/Accuracy.inl
similarity index 77%
copy from include/pbbam/Position.h
copy to include/pbbam/internal/Accuracy.inl
index 110d7ed..f859662 100644
--- a/include/pbbam/Position.h
+++ b/include/pbbam/internal/Accuracy.inl
@@ -33,27 +33,34 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file Accuracy.inl
+/// \brief Inline implementations for the Accuracy class.
+//
 // Author: Derek Barnett
 
-#ifndef POSITION_H
-#define POSITION_H
-
-#include "pbbam/Config.h"
+#include "pbbam/Accuracy.h"
 
 namespace PacBio {
 namespace BAM {
 
-/// \typedef typedef int32_t PacBio::BAM::Position
-///
-/// This type refers to all genomic positions. We use signed
-/// because SAM/BAM uses the -1 value to indicate unknown, unmapped, etc.
-/// positions.
-///
-typedef int32_t Position;
+inline Accuracy::Accuracy(float accuracy)
+{
+    if (accuracy < Accuracy::MIN)
+        accuracy = Accuracy::MIN;
+    else if (accuracy > Accuracy::MAX)
+        accuracy = Accuracy::MAX;
+    accuracy_ = accuracy;
+}
+
+inline Accuracy::Accuracy(const Accuracy &other)
+    : accuracy_(other.accuracy_)
+{ }
 
-static const Position UnmappedPosition = Position(-1);
+inline Accuracy::~Accuracy(void) { }
+
+inline Accuracy::operator float(void) const
+{ return accuracy_; }
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // POSITION_H
diff --git a/include/pbbam/internal/BamHeader.inl b/include/pbbam/internal/BamHeader.inl
new file mode 100644
index 0000000..2445a25
--- /dev/null
+++ b/include/pbbam/internal/BamHeader.inl
@@ -0,0 +1,154 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamHeader.inl
+/// \brief Inline implementations for the BamHeader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamHeader.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class BamHeaderPrivate
+{
+public:
+    std::string version_;
+    std::string pacbioBamVersion_;
+    std::string sortOrder_;
+    std::map<std::string, std::string> headerLineCustom_;
+
+    std::map<std::string, ReadGroupInfo> readGroups_; // id => read group info
+    std::map<std::string, ProgramInfo> programs_;     // id => program info
+    std::vector<std::string> comments_;
+
+    // we need to preserve insertion order, use lookup for access by name
+    std::vector<SequenceInfo> sequences_;
+    std::map<std::string, int32_t> sequenceIdLookup_;
+};
+
+} // namespace internal
+
+inline BamHeader::BamHeader(void)
+    : d_(new internal::BamHeaderPrivate)
+{ }
+
+inline BamHeader::BamHeader(const BamHeader& other)
+    : d_(other.d_)
+{ }
+
+inline BamHeader::BamHeader(BamHeader&& other)
+    : d_(std::move(other.d_))
+{ }
+
+inline BamHeader& BamHeader::operator=(const BamHeader& other)
+{ d_ = other.d_; return *this; }
+
+inline BamHeader& BamHeader::operator=(BamHeader&& other)
+{ d_ = std::move(other.d_); return *this; }
+
+inline BamHeader::~BamHeader(void) { }
+
+inline BamHeader BamHeader::operator+(const BamHeader& other) const
+{ return DeepCopy() += other; }
+
+inline BamHeader& BamHeader::AddComment(const std::string& comment)
+{ d_->comments_.push_back(comment); return *this; }
+
+inline BamHeader& BamHeader::AddProgram(const ProgramInfo& pg)
+{ d_->programs_[pg.Id()] = pg; return *this; }
+
+inline BamHeader& BamHeader::AddReadGroup(const ReadGroupInfo& readGroup)
+{ d_->readGroups_[readGroup.Id()] = readGroup; return *this; }
+
+inline BamHeader& BamHeader::ClearComments(void)
+{ d_->comments_.clear(); return* this; }
+
+inline BamHeader& BamHeader::ClearPrograms(void)
+{ d_->programs_.clear(); return *this; }
+
+inline BamHeader& BamHeader::ClearReadGroups(void)
+{ d_->readGroups_.clear(); return *this; }
+
+inline std::vector<std::string> BamHeader::Comments(void) const
+{ return d_->comments_; }
+
+inline BamHeader& BamHeader::Comments(const std::vector<std::string>& comments)
+{ d_->comments_ = comments; return *this; }
+
+inline bool BamHeader::HasProgram(const std::string& id) const
+{ return d_->programs_.find(id) != d_->programs_.cend(); }
+
+inline bool BamHeader::HasReadGroup(const std::string& id) const
+{ return d_->readGroups_.find(id) != d_->readGroups_.cend(); }
+
+inline bool BamHeader::HasSequence(const std::string& name) const
+{ return d_->sequenceIdLookup_.find(name) != d_->sequenceIdLookup_.cend(); }
+
+inline size_t BamHeader::NumSequences(void) const
+{ return d_->sequences_.size(); }
+
+inline std::string BamHeader::PacBioBamVersion(void) const
+{ return d_->pacbioBamVersion_; }
+
+inline SequenceInfo BamHeader::Sequence(const int32_t id) const
+{ return d_->sequences_.at(id); }
+
+inline std::string BamHeader::SequenceLength(const int32_t id) const
+{ return Sequence(id).Length(); }
+
+inline std::string BamHeader::SequenceName(const int32_t id) const
+{ return Sequence(id).Name(); }
+
+inline std::vector<SequenceInfo> BamHeader::Sequences(void) const
+{ return d_->sequences_; }
+
+inline std::string BamHeader::SortOrder(void) const
+{ return d_->sortOrder_; }
+
+inline BamHeader& BamHeader::SortOrder(const std::string& order)
+{ d_->sortOrder_ = order; return *this; }
+
+inline std::string BamHeader::Version(void) const
+{ return d_->version_; }
+
+inline BamHeader& BamHeader::Version(const std::string& version)
+{ d_->version_ = version; return *this; }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/BamRecord.inl b/include/pbbam/internal/BamRecord.inl
new file mode 100644
index 0000000..11e2985
--- /dev/null
+++ b/include/pbbam/internal/BamRecord.inl
@@ -0,0 +1,166 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecord.inl
+/// \brief Inline implementations for the BamRecord & BamRecordView classes.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline BamRecord BamRecord::Clipped(const BamRecord& input,
+                                    const ClipType clipType,
+                                    const PacBio::BAM::Position start,
+                                    const PacBio::BAM::Position end)
+{
+    return input.Clipped(clipType, start, end);
+}
+
+inline BamRecord BamRecord::Clipped(const ClipType clipType,
+                                    const PacBio::BAM::Position start,
+                                    const PacBio::BAM::Position end) const
+{
+    BamRecord result(*this);
+    result.Clip(clipType, start, end);
+    return result;
+}
+
+inline BamRecord BamRecord::Mapped(const BamRecord& input,
+                                   const int32_t referenceId,
+                                   const Position refStart,
+                                   const Strand strand,
+                                   const Cigar& cigar,
+                                   const uint8_t mappingQuality)
+{
+    return input.Mapped(referenceId, refStart, strand, cigar, mappingQuality);
+}
+
+inline BamRecord BamRecord::Mapped(const int32_t referenceId,
+                                   const Position refStart,
+                                   const Strand strand,
+                                   const Cigar& cigar,
+                                   const uint8_t mappingQuality) const
+{
+    BamRecord result(*this);
+    result.Map(referenceId, refStart, strand, cigar, mappingQuality);
+    return result;
+}
+
+
+inline BamRecordView::BamRecordView(const BamRecord& record,
+                                    const Orientation orientation,
+                                    const bool aligned,
+                                    const bool exciseSoftClips)
+    : record_(record)
+    , orientation_(orientation)
+    , aligned_(aligned)
+    , exciseSoftClips_(exciseSoftClips)
+{ }
+
+inline QualityValues BamRecordView::AltLabelQVs(void) const
+{ return record_.AltLabelQV(orientation_); }
+
+inline std::string BamRecordView::AltLabelTags(void) const
+{ return record_.AltLabelTag(orientation_); }
+
+inline QualityValues BamRecordView::DeletionQVs(void) const
+{ return record_.DeletionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::DeletionTags(void) const
+{ return record_.DeletionTag(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::InsertionQVs(void) const
+{ return record_.InsertionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline Frames BamRecordView::IPD(void) const
+{ return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+
+inline Frames BamRecordView::PrebaseFrames(void) const
+{ return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::LabelQVs(void) const
+{ return record_.LabelQV(orientation_); }
+
+inline QualityValues BamRecordView::MergeQVs(void) const
+{ return record_.MergeQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::PulseMergeQVs(void) const
+{ return record_.PulseMergeQV(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmean(void) const
+{ return record_.Pkmean(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmid(void) const
+{ return record_.Pkmid(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmean2(void) const
+{ return record_.Pkmean2(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmid2(void) const
+{ return record_.Pkmid2(orientation_); }
+
+inline Frames BamRecordView::PrePulseFrames(void) const
+{ return record_.PrePulseFrames(orientation_); }
+
+inline std::string BamRecordView::PulseCalls(void) const
+{ return record_.PulseCall(orientation_); }
+
+inline Frames BamRecordView::PulseCallWidth(void) const
+{ return record_.PulseCallWidth(orientation_); }
+
+inline Frames BamRecordView::PulseWidths(void) const
+{ return record_.PulseWidth(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::Qualities(void) const
+{ return record_.Qualities(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::Sequence(void) const
+{ return record_.Sequence(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::vector<uint32_t> BamRecordView::StartFrames(void) const
+{ return record_.StartFrame(orientation_); }
+
+inline QualityValues BamRecordView::SubstitutionQVs(void) const
+{ return record_.SubstitutionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::SubstitutionTags(void) const
+{ return record_.SubstitutionTag(orientation_, aligned_, exciseSoftClips_); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/DataSetXsd.h b/include/pbbam/internal/BamRecordBuilder.inl
similarity index 55%
copy from include/pbbam/DataSetXsd.h
copy to include/pbbam/internal/BamRecordBuilder.inl
index 29df5e1..212e831 100644
--- a/include/pbbam/DataSetXsd.h
+++ b/include/pbbam/internal/BamRecordBuilder.inl
@@ -32,80 +32,53 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecordBuilder.inl
+/// \brief Inline implementations for the BamRecordBuilder class.
+//
 // Author: Derek Barnett
 
-#ifndef DATASETXSD_H
-#define DATASETXSD_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/BamRecordBuilder.h"
 
 namespace PacBio {
 namespace BAM {
 
-enum class XsdType
-{
-    NONE
+inline BamRecordBuilder& BamRecordBuilder::Bin(const uint32_t bin)
+{ core_.bin = bin; return *this; }
 
-  , AUTOMATION_CONSTRAINTS
-  , BASE_DATA_MODEL
-  , COLLECTION_METADATA
-  , COMMON_MESSAGES
-  , DATA_MODEL
-  , DATA_STORE
-  , DATASETS
-  , DECL_DATA
-  , PART_NUMBERS
-  , PRIMARY_METRICS
-  , REAGENT_KIT
-  , RIGHTS_AND_ROLES
-  , SAMPLE_INFO
-  , SEEDING_DATA
-};
+inline BamRecordBuilder& BamRecordBuilder::Flag(const uint32_t flag)
+{ core_.flag = flag; return *this; }
 
-class PBBAM_EXPORT NamespaceInfo
-{
-public:
-    NamespaceInfo(void);
-    NamespaceInfo(const std::string& name,
-                  const std::string& uri);
+inline BamRecordBuilder& BamRecordBuilder::InsertSize(const int32_t iSize)
+{ core_.isize = iSize; return *this; }
 
-public:
-    const std::string& Name(void) const { return name_; }
-    const std::string& Uri(void) const { return uri_; }
+inline BamRecordBuilder& BamRecordBuilder::MapQuality(const uint8_t mapQual)
+{ core_.qual = mapQual; return *this; }
 
-private:
-    std::string name_;
-    std::string uri_;
-};
+inline BamRecordBuilder& BamRecordBuilder::MatePosition(const int32_t pos)
+{ core_.mpos = pos; return *this; }
 
-class PBBAM_EXPORT NamespaceRegistry
-{
-public:
-    NamespaceRegistry(void);
-    NamespaceRegistry(const NamespaceRegistry& other);
-    NamespaceRegistry& operator=(const NamespaceRegistry& other);
-    ~NamespaceRegistry(void);
+inline BamRecordBuilder& BamRecordBuilder::MateReferenceId(const int32_t id)
+{ core_.mtid = id; return *this; }
 
-public:
-    const NamespaceInfo& DefaultNamespace(void) const;
-    XsdType DefaultXsd(void) const;
-    const NamespaceInfo& Namespace(const XsdType& xsd) const;
+inline BamRecordBuilder& BamRecordBuilder::Position(const int32_t pos)
+{ core_.pos = pos; return *this; }
 
-    XsdType XsdForUri(const std::string& uri) const;
+inline BamRecordBuilder& BamRecordBuilder::Qualities(const std::string& qualities)
+{ qualities_ = qualities; return *this; }
 
-public:
-    void Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo);
-    void SetDefaultXsd(const XsdType& xsd);
+inline BamRecordBuilder& BamRecordBuilder::Qualities(std::string&& qualities)
+{ qualities_ = std::move(qualities); return *this; }
 
-private:
-    std::map<XsdType, NamespaceInfo> data_;
-    XsdType defaultXsdType_;
-};
+inline BamRecordBuilder& BamRecordBuilder::ReferenceId(const int32_t id)
+{ core_.tid = id; return *this; }
 
-} // namespace PacBio
-} // namespace BAM
+inline BamRecordBuilder& BamRecordBuilder::Tags(const TagCollection& tags)
+{ tags_ = tags; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Tags(TagCollection&& tags)
+{ tags_ = std::move(tags); return *this; }
 
-#endif // DATASETXSD_H
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/BamRecordImpl.inl b/include/pbbam/internal/BamRecordImpl.inl
new file mode 100644
index 0000000..6c0ecef
--- /dev/null
+++ b/include/pbbam/internal/BamRecordImpl.inl
@@ -0,0 +1,216 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordImpl.inl
+/// \brief Inline implementations for the BamRecordImpl class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordImpl.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline uint32_t BamRecordImpl::Bin(void) const
+{ return d_->core.bin; }
+
+inline BamRecordImpl& BamRecordImpl::Bin(uint32_t bin)
+{ d_->core.bin = bin; return *this; }
+
+inline uint32_t BamRecordImpl::Flag(void) const
+{ return d_->core.flag; }
+
+inline BamRecordImpl& BamRecordImpl::Flag(uint32_t flag)
+{ d_->core.flag = flag; return *this; }
+
+inline int32_t BamRecordImpl::InsertSize(void) const
+{ return d_->core.isize; }
+
+inline BamRecordImpl& BamRecordImpl::InsertSize(int32_t iSize)
+{ d_->core.isize = iSize; return *this; }
+
+inline uint8_t BamRecordImpl::MapQuality(void) const
+{ return d_->core.qual; }
+
+inline BamRecordImpl& BamRecordImpl::MapQuality(uint8_t mapQual)
+{ d_->core.qual = mapQual; return *this; }
+
+inline PacBio::BAM::Position BamRecordImpl::MatePosition(void) const
+{ return d_->core.mpos; }
+
+inline BamRecordImpl& BamRecordImpl::MatePosition(PacBio::BAM::Position pos)
+{ d_->core.mpos = pos; return *this; }
+
+inline int32_t BamRecordImpl::MateReferenceId(void) const
+{ return d_->core.mtid; }
+
+inline BamRecordImpl& BamRecordImpl::MateReferenceId(int32_t id)
+{ d_->core.mtid = id; return *this; }
+
+inline PacBio::BAM::Position BamRecordImpl::Position(void) const
+{ return d_->core.pos; }
+
+inline BamRecordImpl& BamRecordImpl::Position(PacBio::BAM::Position pos)
+{ d_->core.pos = pos; return *this; }
+
+inline int32_t BamRecordImpl::ReferenceId(void) const
+{ return d_->core.tid; }
+
+inline BamRecordImpl& BamRecordImpl::ReferenceId(int32_t id)
+{ d_->core.tid = id; return *this; }
+
+inline bool BamRecordImpl::IsDuplicate(void) const
+{ return (d_->core.flag & BamRecordImpl::DUPLICATE) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetDuplicate(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::DUPLICATE;
+    else    d_->core.flag &= ~BamRecordImpl::DUPLICATE;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsFailedQC(void) const
+{ return (d_->core.flag & BamRecordImpl::FAILED_QC) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetFailedQC(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::FAILED_QC;
+    else    d_->core.flag &= ~BamRecordImpl::FAILED_QC;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsFirstMate(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_1) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetFirstMate(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::MATE_1;
+    else    d_->core.flag &= ~BamRecordImpl::MATE_1;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsMapped(void) const
+{ return (d_->core.flag & BamRecordImpl::UNMAPPED) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMapped(bool ok)
+{
+    if (ok) d_->core.flag &= ~BamRecordImpl::UNMAPPED;
+    else    d_->core.flag |=  BamRecordImpl::UNMAPPED;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsMateMapped(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_UNMAPPED) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMateMapped(bool ok)
+{
+    if (ok) d_->core.flag &= ~BamRecordImpl::MATE_UNMAPPED;
+    else    d_->core.flag |=  BamRecordImpl::MATE_UNMAPPED;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsMateReverseStrand(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_REVERSE_STRAND) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMateReverseStrand(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::MATE_REVERSE_STRAND;
+    else    d_->core.flag &= ~BamRecordImpl::MATE_REVERSE_STRAND;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsPaired(void) const
+{ return (d_->core.flag & BamRecordImpl::PAIRED) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetPaired(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::PAIRED;
+    else    d_->core.flag &= ~BamRecordImpl::PAIRED;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsPrimaryAlignment(void) const
+{ return (d_->core.flag & BamRecordImpl::SECONDARY) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetPrimaryAlignment(bool ok)
+{
+    if (ok) d_->core.flag &= ~BamRecordImpl::SECONDARY;
+    else    d_->core.flag |=  BamRecordImpl::SECONDARY;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsProperPair(void) const
+{ return (d_->core.flag & BamRecordImpl::PROPER_PAIR) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetProperPair(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::PROPER_PAIR;
+    else    d_->core.flag &= ~BamRecordImpl::PROPER_PAIR;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsReverseStrand(void) const
+{ return (d_->core.flag & BamRecordImpl::REVERSE_STRAND) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetReverseStrand(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::REVERSE_STRAND;
+    else    d_->core.flag &= ~BamRecordImpl::REVERSE_STRAND;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsSecondMate(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_2) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetSecondMate(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::MATE_2;
+    else    d_->core.flag &= ~BamRecordImpl::MATE_2;
+    return *this;
+}
+
+inline bool BamRecordImpl::IsSupplementaryAlignment(void) const
+{ return (d_->core.flag & BamRecordImpl::SUPPLEMENTARY) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetSupplementaryAlignment(bool ok)
+{
+    if (ok) d_->core.flag |=  BamRecordImpl::SUPPLEMENTARY;
+    else    d_->core.flag &= ~BamRecordImpl::SUPPLEMENTARY;
+    return *this;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/BamRecordSort.h b/include/pbbam/internal/BamRecordSort.h
deleted file mode 100644
index 53dab05..0000000
--- a/include/pbbam/internal/BamRecordSort.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef BAMRECORDSORT_H
-#define BAMRECORDSORT_H
-
-#include "pbbam/BamRecord.h"
-#include <functional>
-#include <cassert>
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-enum class SortOrder {
-    Ascending = 0
-  , Descending
-};
-
-template<typename ElemType>
-inline bool sort_helper(const SortOrder& order,
-                        const ElemType& lhs,
-                        const ElemType& rhs)
-{
-    switch ( order ) {
-        case SortOrder::Ascending   : { std::less<ElemType> comp;    return comp(lhs, rhs); }
-        case SortOrder::Descending  : { std::greater<ElemType> comp; return comp(lhs, rhs); }
-        default :
-            assert(false);
-    }
-    return false; // <-- unreachable
-}
-
-typedef std::binary_function<BamRecord, BamRecord, bool> BamRecordSortBase;
-
-struct Unsorted : public BamRecordSortBase
-{
-public:
-    Unsorted(const SortOrder& order = SortOrder::Ascending)
-    { (void)order; }
-
-    bool operator()(const BamRecord& lhs, const BamRecord& rhs)
-    { (void)lhs; (void)rhs; return false; }
-};
-
-struct ByQName : public BamRecordSortBase
-{
-public:
-    ByQName(const SortOrder& order = SortOrder::Ascending)
-        : m_order(order)
-    { }
-
-    bool operator()(const BamRecord& lhs, const BamRecord& rhs)
-    { return sort_helper(m_order, lhs.FullName(), rhs.FullName()); }
-
-private:
-    const SortOrder m_order;
-};
-
-struct ByPosition : public BamRecordSortBase
-{
-public:
-    ByPosition(const SortOrder& order = SortOrder::Ascending)
-        : m_order(order)
-    { }
-
-    // comparison function
-    bool operator()(const BamRecord& lhs, const BamRecord& rhs) {
-
-        const int32_t lhsId = lhs.ReferenceId();
-        const int32_t rhsId = rhs.ReferenceId();
-
-        // force unmapped aligmnents to end
-        if ( lhsId == -1 ) return false;
-        if ( rhsId == -1 ) return true;
-
-        // if on same reference, sort on position
-        if ( lhsId == rhsId )
-            return sort_helper(m_order, lhs.ReferenceStart(), rhs.ReferenceStart());
-
-        // otherwise sort on reference ID
-        return sort_helper(m_order, lhsId, rhsId);
-    }
-
-private:
-    const SortOrder m_order;
-};
-
-struct ByZmw : public BamRecordSortBase {
-public:
-    ByZmw(const SortOrder& order = SortOrder::Ascending) : m_order(order) { }
-
-    bool operator()(const BamRecord& lhs, const BamRecord& rhs)
-    { return sort_helper(m_order, lhs.HoleNumber(), rhs.HoleNumber()); }
-
-private:
-    const SortOrder m_order;
-};
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-#endif // BAMRECORDSORT_H
diff --git a/src/FilterEngine.cpp b/include/pbbam/internal/Cigar.inl
similarity index 71%
copy from src/FilterEngine.cpp
copy to include/pbbam/internal/Cigar.inl
index 1f47967..4799a72 100644
--- a/src/FilterEngine.cpp
+++ b/include/pbbam/internal/Cigar.inl
@@ -32,45 +32,46 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Cigar.inl
+/// \brief Inline implemenations for the Cigar class.
+//
 // Author: Derek Barnett
 
-#include "pbbam/internal/FilterEngine.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace PacBio::BAM::internal;
-using namespace std;
+#include "pbbam/Cigar.h"
 
 namespace PacBio {
 namespace BAM {
-namespace internal {
 
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+inline Cigar::Cigar(void)
+    : std::vector<CigarOperation>()
+{ }
 
+inline Cigar::Cigar(const Cigar& other)
+    : std::vector<CigarOperation>(other)
+{ }
 
-FilterEngine::FilterEngine(void) { }
+inline Cigar::Cigar(Cigar&& other)
+    : std::vector<CigarOperation>(std::move(other))
+{ }
 
-bool FilterEngine::Accepts(const BamRecord& r) const
+inline Cigar& Cigar::operator=(const Cigar& other)
 {
-//        foreach ( const FilterParameter& param, parameters_ ) {
-//            if (!param.Accepts(r))
-//                return false;
-//        }
-//        return true;
-    (void)r;
-    return true;
+    std::vector<CigarOperation>::operator=(other);
+    return *this;
 }
 
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
+inline Cigar& Cigar::operator=(Cigar&& other)
 {
-    size_t i = 0;
-    while (i < r.size()) {
-        if (!Accepts(r.at(i)))
-            r.erase(r.begin() + i);
-        else
-            ++i;
-    }
-    return !r.empty();
+    std::vector<CigarOperation>::operator=(std::move(other));
+    return *this;
 }
+
+inline Cigar::~Cigar(void) { }
+
+inline Cigar Cigar::FromStdString(const std::string& stdString)
+{ return Cigar(stdString); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/CigarOperation.h b/include/pbbam/internal/CigarOperation.inl
similarity index 51%
copy from include/pbbam/CigarOperation.h
copy to include/pbbam/internal/CigarOperation.inl
index 951128d..167528c 100644
--- a/include/pbbam/CigarOperation.h
+++ b/include/pbbam/internal/CigarOperation.inl
@@ -32,131 +32,18 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file CigarOperation.inl
+/// \brief Inline implemenations for the CigarOperation class.
+//
 // Author: Derek Barnett
 
-#ifndef CIGAROPERATION_H
-#define CIGAROPERATION_H
-
-#include "pbbam/Config.h"
-#include <stdexcept>
+#include "pbbam/CigarOperation.h"
 
 namespace PacBio {
 namespace BAM {
 
-/// Describes a CIGAR operation. Bracketed character is the corresponding SAM/BAM character code.
-///
-/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain consistency with htslib.
-/// However, as of PacBio BAM spec version 3.0b7, this CIGAR operation \b forbidden. Attempt to
-/// read or write a record containing this operation will trigger a std::runtime_error.
-///
-enum class CigarOperationType
-{
-    UNKNOWN_OP        = -1 ///< unknown/invalid CIGAR operator
-  , ALIGNMENT_MATCH   = 0  ///< alignment match (can be a sequence match or mismatch) [M]
-  , INSERTION              ///< insertion to the reference [I]
-  , DELETION               ///< deletion from the reference [D]
-  , REFERENCE_SKIP         ///< skipped region from the reference [N]
-  , SOFT_CLIP              ///< soft clipping (clipped sequences present in SEQ) [S]
-  , HARD_CLIP         = 5  ///< hard clipping (clipped sequences NOT present in SEQ) [H]
-  , PADDING                ///< padding (silent deletion from padded reference) [P]
-  , SEQUENCE_MATCH         ///< sequence match [=]
-  , SEQUENCE_MISMATCH      ///< sequence mismatch [X]
-
-    // TODO: looks like there is a new 'B' type in htslib source, referring to some 'back' operation...
-    //       no reference in htslib docs though yet as to what that applies to
-};
-
-class PBBAM_EXPORT CigarOperation
-{
-public:
-
-    /// \name Operation Type Conversion Methods
-    /// \{
-
-    /// Convert between CigarOperationType enum & SAM/BAM character code.
-    ///
-    /// \param[in] type CigarOperationType value
-    /// \returns SAM/BAM character code
-    static char TypeToChar(const CigarOperationType type);
-
-    /// Convert between CigarOperationType enum & SAM/BAM character code.
-    ///
-    /// \param[in] c SAM/BAM character code
-    /// \returns CigarOperationType value
-    static CigarOperationType CharToType(const char c);
-
-    /// \}
-
-public:
-    /// \name Constructors & Related Methods
-    /// \{
-
-    CigarOperation(void);
-    CigarOperation(char c, uint32_t length);
-    CigarOperation(CigarOperationType op, uint32_t length);
-    CigarOperation(const CigarOperation& other);
-    CigarOperation(CigarOperation&& other);
-    CigarOperation& operator=(const CigarOperation& other);
-    CigarOperation& operator=(CigarOperation&& other);
-    ~CigarOperation(void);
-
-    /// \}
-
-public:
-
-    /// \returns operation type as SAM/BAM char code
-    inline char Char(void) const;
-
-    /// \returns operation length
-    inline uint32_t Length(void) const;
-
-    /// \returns operation type as CigarOperationType enum value
-    inline CigarOperationType Type(void) const;
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    /// Sets this operation type.
-    ///
-    /// \param[in] opChar SAM/BAM character code
-    /// \returns reference to this operation
-    inline CigarOperation& Char(const char opChar);
-
-    /// Sets this operation length.
-    ///
-    /// \param[in] length
-    /// \returns reference to this operation
-    inline CigarOperation& Length(const uint32_t length);
-
-    /// Sets this operation type.
-    ///
-    /// \param[in] opType CigarOperationType value
-    /// \returns reference to this operation
-    inline CigarOperation& Type(const CigarOperationType opType);
-
-    /// \}
-
-public:
-    /// \name Comparison Operators
-    /// \{
-
-    /// \returns true if both CIGAR operation type & length match
-    inline bool operator==(const CigarOperation& other) const;
-
-    /// \returns true if either CIGAR operation type or length differ
-    inline bool operator!=(const CigarOperation& other) const;
-
-    /// \}
-
-private:
-    CigarOperationType type_;
-    uint32_t length_;
-};
-
 inline CigarOperation::CigarOperation(void)
     : type_(CigarOperationType::UNKNOWN_OP)
     , length_(0)
@@ -222,5 +109,3 @@ inline bool CigarOperation::operator!=(const CigarOperation& other) const
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // CIGAROPERATION_H
diff --git a/include/pbbam/internal/SequentialMergeStrategy.h b/include/pbbam/internal/Compare.inl
similarity index 63%
rename from include/pbbam/internal/SequentialMergeStrategy.h
rename to include/pbbam/internal/Compare.inl
index cf8e591..4eb5ccf 100644
--- a/include/pbbam/internal/SequentialMergeStrategy.h
+++ b/include/pbbam/internal/Compare.inl
@@ -32,48 +32,47 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Compare.inl
+/// \brief Inline implementations for the Compare class & inner classes.
+//
 // Author: Derek Barnett
 
-#ifndef SEQUENTIALMERGESTRATEGY_H
-#define SEQUENTIALMERGESTRATEGY_H
-
-#include "pbbam/internal/IMergeStrategy.h"
+#include "pbbam/Compare.h"
 
 namespace PacBio {
 namespace BAM {
 namespace internal {
 
-class SequentialMergeStrategy : public IMergeStrategy
+template <typename T, T> struct MemberFnProxy;
+
+template<typename T, typename R, typename... Args, R (T::*fn)(Args...)const>
+struct MemberFnProxy<R (T::*)(Args...)const, fn>
 {
-public:
-    SequentialMergeStrategy(const std::vector<FileIterPtr>& iters);
-    bool GetNext(BamRecord& r);
-private:
-    std::vector<FileIterPtr> iters_;
+    static R call(const T& obj, Args&&... args)
+    {
+        return (obj.*fn)(std::forward<Args>(args)...);
+    }
 };
 
-inline SequentialMergeStrategy::SequentialMergeStrategy(const std::vector<FileIterPtr>& iters)
-    : IMergeStrategy()
-    , iters_(iters)
-{ }
+} // namespace internal
 
-inline bool SequentialMergeStrategy::GetNext(BamRecord& r)
+template<typename ValueType,
+         typename Compare::MemberFunctionBaseHelper<ValueType>::MemberFnType fn,
+         typename CompareType>
+inline bool Compare::MemberFunctionBase<ValueType, fn, CompareType>::operator()(const BamRecord& lhs,
+                                                                                const BamRecord& rhs) const
 {
-    if (iters_.empty())
-        return false;
+    using MemberFnType = typename Compare::MemberFunctionBaseHelper<ValueType>::MemberFnType;
+    using Proxy = internal::MemberFnProxy<MemberFnType, fn>;
 
-    FileIterPtr iter = iters_.front();
-    if (iter->GetNext(r))
-        return true;
-    else {
-        iters_.erase(iters_.begin());
-        return GetNext(r);
-    }
+    CompareType cmp;
+    return cmp(Proxy::call(lhs), Proxy::call(rhs));
 }
 
-} // namespace internal
+inline bool Compare::None::operator()(const BamRecord&, const BamRecord&) const
+{ return false; }
+
 } // namespace BAM
 } // namespace PacBio
-
-#endif // SEQUENCTIALMERGESTRATEGY_H
diff --git a/include/pbbam/internal/CompositeBamReader.inl b/include/pbbam/internal/CompositeBamReader.inl
new file mode 100644
index 0000000..9703697
--- /dev/null
+++ b/include/pbbam/internal/CompositeBamReader.inl
@@ -0,0 +1,397 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CompositeBamReader.inl
+/// \brief Inline implementations for the composite BAM readers, for
+///        working with multiple input files.
+//
+// Author: Derek Barnett
+
+#include "pbbam/CompositeBamReader.h"
+#include <algorithm>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// -----------------------------------
+// Merging helpers
+// -----------------------------------
+
+inline CompositeMergeItem::CompositeMergeItem(std::unique_ptr<BamReader>&& rdr)
+    : reader(std::move(rdr))
+{ }
+
+inline CompositeMergeItem::CompositeMergeItem(std::unique_ptr<BamReader>&& rdr,
+                              BamRecord&& rec)
+    : reader(std::move(rdr))
+    , record(std::move(rec))
+{ }
+
+inline CompositeMergeItem::CompositeMergeItem(CompositeMergeItem&& other)
+    : reader(std::move(other.reader))
+    , record(std::move(other.record))
+{ }
+
+inline CompositeMergeItem& CompositeMergeItem::operator=(CompositeMergeItem&& other)
+{
+    reader = std::move(other.reader);
+    record = std::move(other.record);
+    return *this;
+}
+
+inline CompositeMergeItem::~CompositeMergeItem(void) { }
+
+template<typename CompareType>
+inline bool CompositeMergeItemSorter<CompareType>::operator()(const CompositeMergeItem& lhs,
+                                                              const CompositeMergeItem& rhs)
+{
+    const BamRecord& l = lhs.record;
+    const BamRecord& r = rhs.record;
+    return CompareType()(l, r);
+}
+
+} // namespace internal
+
+// -----------------------------------
+// GenomicIntervalCompositeBamReader
+// -----------------------------------
+
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+                                                                            const std::vector<BamFile>& bamFiles)
+{
+    filenames_.reserve(bamFiles.size());
+    for(const auto& bamFile : bamFiles)
+        filenames_.push_back(bamFile.Filename());
+    Interval(interval);
+}
+
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+                                                                            std::vector<BamFile>&& bamFiles)
+{
+    filenames_.reserve(bamFiles.size());
+    for(auto&& bamFile : bamFiles)
+        filenames_.push_back(bamFile.Filename());
+    Interval(interval);
+}
+
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+                                                                            const DataSet& dataset)
+    : GenomicIntervalCompositeBamReader(interval, std::move(dataset.BamFiles()))
+{ }
+
+inline bool GenomicIntervalCompositeBamReader::GetNext(BamRecord& record)
+{
+    // nothing left to read
+    if (mergeItems_.empty())
+        return false;
+
+    // non-destructive 'pop' of first item from queue
+    auto firstIter = mergeItems_.begin();
+    auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };
+    mergeItems_.pop_front();
+
+    // store its record in our output record
+    std::swap(record, firstItem.record);
+
+    // try fetch 'next' from first item's reader
+    // if successful, re-insert it into container & re-sort on our new values
+    // otherwise, this item will go out of scope & reader destroyed
+    if (firstItem.reader->GetNext(firstItem.record)) {
+        mergeItems_.push_front(std::move(firstItem));
+        UpdateSort();
+    }
+
+    // return success
+    return true;
+}
+
+inline const GenomicInterval& GenomicIntervalCompositeBamReader::Interval(void) const
+{ return interval_; }
+
+inline GenomicIntervalCompositeBamReader& GenomicIntervalCompositeBamReader::Interval(const GenomicInterval& interval)
+{
+    auto updatedMergeItems = std::deque<internal::CompositeMergeItem>{ };
+    auto filesToCreate = std::set<std::string>{ filenames_.cbegin(), filenames_.cend() };
+
+    // update existing readers
+    while (!mergeItems_.empty()) {
+
+        // non-destructive 'pop' of first item from queue
+        auto firstIter = mergeItems_.begin();
+        auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };
+        mergeItems_.pop_front();
+
+        // reset interval
+        BaiIndexedBamReader* baiReader = dynamic_cast<BaiIndexedBamReader*>(firstItem.reader.get());
+        assert(baiReader);
+        baiReader->Interval(interval);
+
+        // try fetch 'next' from first item's reader
+        // if successful, re-insert it into container & re-sort on our new values
+        // otherwise, this item will go out of scope & reader destroyed
+        if (firstItem.reader->GetNext(firstItem.record)) {
+            updatedMergeItems.push_front(std::move(firstItem));
+            filesToCreate.erase(firstItem.reader->Filename());
+        }
+    }
+
+    // create readers for files that were not 'active' for the previous
+    std::vector<std::string> missingBai;
+    for (auto&& fn : filesToCreate) {
+        auto bamFile = BamFile{ fn };
+        if (bamFile.StandardIndexExists()) {
+            auto item = internal::CompositeMergeItem{ std::unique_ptr<BamReader>{ new BaiIndexedBamReader{ interval, std::move(bamFile) } } };
+            if (item.reader->GetNext(item.record))
+                updatedMergeItems.push_back(std::move(item));
+            // else not an error, simply no data matching interval
+        }
+        else {
+            // maybe handle PBI-backed interval searches if BAI missing, but for now treat as error
+            missingBai.push_back(bamFile.Filename());
+        }
+    }
+
+    // throw if any files missing BAI
+    if (!missingBai.empty()) {
+        std::stringstream e;
+        e << "failed to open GenomicIntervalCompositeBamReader because the following files are missing a BAI file:" << std::endl;
+        for (const auto& fn : missingBai)
+            e << "  " << fn << std::endl;
+        throw std::runtime_error(e.str());
+    }
+
+    // update our actual container and return
+    mergeItems_ = std::move(updatedMergeItems);
+    UpdateSort();
+    return *this;
+}
+
+struct OrderByPosition
+{
+    static inline bool less_than(const BamRecord& lhs, const BamRecord& rhs)
+    {
+        const int32_t lhsId = lhs.ReferenceId();
+        const int32_t rhsId = rhs.ReferenceId();
+        if (lhsId == -1) return false;
+        if (rhsId == -1) return true;
+
+        if (lhsId == rhsId)
+            return lhs.ReferenceStart() < rhs.ReferenceStart();
+        else return lhsId < rhsId;
+    }
+
+    static inline bool equals(const BamRecord& lhs, const BamRecord& rhs)
+    {
+        return lhs.ReferenceId() == rhs.ReferenceId() &&
+               lhs.ReferenceStart() == rhs.ReferenceStart();
+    }
+};
+
+struct PositionSorter : std::binary_function<internal::CompositeMergeItem, internal::CompositeMergeItem, bool>
+{
+    bool operator()(const internal::CompositeMergeItem& lhs,
+                    const internal::CompositeMergeItem& rhs)
+    {
+        const BamRecord& l = lhs.record;
+        const BamRecord& r = rhs.record;
+        return OrderByPosition::less_than(l, r);
+    }
+};
+
+inline void GenomicIntervalCompositeBamReader::UpdateSort(void)
+{ std::sort(mergeItems_.begin(), mergeItems_.end(), PositionSorter{ }); }
+
+// ------------------------------
+// PbiRequestCompositeBamReader
+// ------------------------------
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,
+                                                                             const std::vector<BamFile>& bamFiles)
+{
+    filenames_.reserve(bamFiles.size());
+    for(const auto& bamFile : bamFiles)
+        filenames_.push_back(bamFile.Filename());
+    Filter(filter);
+}
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,
+                                                                             std::vector<BamFile>&& bamFiles)
+{
+    filenames_.reserve(bamFiles.size());
+    for(auto&& bamFile : bamFiles)
+        filenames_.push_back(bamFile.Filename());
+    Filter(filter);
+}
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,
+                                                                             const DataSet& dataset)
+    : PbiFilterCompositeBamReader(filter, std::move(dataset.BamFiles()))
+{ }
+
+template<typename OrderByType>
+inline bool PbiFilterCompositeBamReader<OrderByType>::GetNext(BamRecord& record)
+{
+    // nothing left to read
+    if (mergeQueue_.empty())
+        return false;
+
+    // non-destructive 'pop' of first item from queue
+    auto firstIter = mergeQueue_.begin();
+    auto firstItem = value_type{ std::move(firstIter->reader), std::move(firstIter->record) };
+    mergeQueue_.pop_front();
+
+    // store its record in our output record
+    std::swap(record, firstItem.record);
+
+    // try fetch 'next' from first item's reader
+    // if successful, re-insert it into container & re-sort on our new values
+    // otherwise, this item will go out of scope & reader destroyed
+    if (firstItem.reader->GetNext(firstItem.record)) {
+        mergeQueue_.push_front(std::move(firstItem));
+        UpdateSort();
+    }
+
+    // return success
+    return true;
+}
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>&
+PbiFilterCompositeBamReader<OrderByType>::Filter(const PbiFilter& filter)
+{
+    auto updatedMergeItems = container_type{ };
+    auto filesToCreate = std::set<std::string>{ filenames_.cbegin(), filenames_.cend() };
+
+    // update existing readers
+    while (!mergeQueue_.empty()) {
+
+        // non-destructive 'pop' of first item from queue
+        auto firstIter = mergeQueue_.begin();
+        auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };
+        mergeQueue_.pop_front();
+
+        // reset request
+        PbiIndexedBamReader* pbiReader = dynamic_cast<PbiIndexedBamReader*>(firstItem.reader.get());
+        assert(pbiReader);
+        pbiReader->Filter(filter);
+
+        // try fetch 'next' from first item's reader
+        // if successful, re-insert it into container & re-sort on our new values
+        // otherwise, this item will go out of scope & reader destroyed
+        if (firstItem.reader->GetNext(firstItem.record)) {
+            updatedMergeItems.push_front(std::move(firstItem));
+            filesToCreate.erase(firstItem.reader->Filename());
+        }
+    }
+
+    // create readers for files that were not 'active' for the previous
+    std::vector<std::string> missingPbi;
+    for (auto&& fn : filesToCreate) {
+        auto bamFile = BamFile{ fn };
+        if (bamFile.PacBioIndexExists()) {
+            auto item = internal::CompositeMergeItem{ std::unique_ptr<BamReader>{ new PbiIndexedBamReader{ filter, std::move(bamFile) } } };
+            if (item.reader->GetNext(item.record))
+                updatedMergeItems.push_back(std::move(item));
+            // else not an error, simply no data matching filter
+        }
+        else
+            missingPbi.push_back(fn);
+    }
+
+    // throw if any files missing PBI
+    if (!missingPbi.empty()) {
+        std::stringstream e;
+        e << "failed to open PbiFilterCompositeBamReader because the following files are missing a PBI file:" << std::endl;
+        for (const auto& fn : missingPbi)
+            e << "  " << fn << std::endl;
+        throw std::runtime_error(e.str());
+    }
+
+    // update our actual container and return
+    mergeQueue_ = std::move(updatedMergeItems);
+    UpdateSort();
+    return *this;
+}
+
+template<typename OrderByType>
+inline void PbiFilterCompositeBamReader<OrderByType>::UpdateSort(void)
+{ std::sort(mergeQueue_.begin(), mergeQueue_.end(), merge_sorter_type{}); }
+
+// ------------------------------
+// SequentialCompositeBamReader
+// ------------------------------
+
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(const std::vector<BamFile>& bamFiles)
+{
+    for (auto&& bamFile : bamFiles)
+        readers_.emplace_back(new BamReader{ bamFile });
+}
+
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(std::vector<BamFile>&& bamFiles)
+{
+    for (auto&& bamFile : bamFiles)
+        readers_.emplace_back(new BamReader{ std::move(bamFile) });
+}
+
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(const DataSet& dataset)
+    : SequentialCompositeBamReader(std::move(dataset.BamFiles()))
+{ }
+
+inline bool SequentialCompositeBamReader::GetNext(BamRecord& record)
+{
+    // try first reader, if successful return true
+    // else pop reader and try next, until all readers exhausted
+    while (!readers_.empty()) {
+        auto& reader = readers_.front();
+        if (reader->GetNext(record))
+            return true;
+        else
+            readers_.pop_front();
+    }
+
+    // no readers available
+    return false;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/DataSet.inl b/include/pbbam/internal/DataSet.inl
index 34f4af3..6627ddf 100644
--- a/include/pbbam/internal/DataSet.inl
+++ b/include/pbbam/internal/DataSet.inl
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSet.inl
+/// \brief Inline implementations for the DataSet class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/DataSet.h"
diff --git a/include/pbbam/internal/DataSetBaseTypes.h b/include/pbbam/internal/DataSetBaseTypes.h
index 98fae1f..917162a 100644
--- a/include/pbbam/internal/DataSetBaseTypes.h
+++ b/include/pbbam/internal/DataSetBaseTypes.h
@@ -129,7 +129,8 @@ public:
 class StrictEntityType : public BaseEntityType
 {
 protected:
-    StrictEntityType(const std::string& label,
+    StrictEntityType(const std::string& metatype,
+                     const std::string& label,
                      const XsdType& xsd = XsdType::BASE_DATA_MODEL);
 
 public:
@@ -149,14 +150,18 @@ public:
 class InputOutputDataType : public StrictEntityType
 {
 protected:
-    InputOutputDataType(const std::string& label,
+    InputOutputDataType(const std::string& metatype, 
+                        const std::string& filename,
+                        const std::string& label,
                         const XsdType& xsd = XsdType::BASE_DATA_MODEL);
 };
 
 class IndexedDataType : public InputOutputDataType
 {
 protected:
-    IndexedDataType(const std::string& label,
+    IndexedDataType(const std::string& metatype, 
+                    const std::string& filename,
+                    const std::string& label,
                     const XsdType& xsd = XsdType::BASE_DATA_MODEL);
 
 public:
diff --git a/include/pbbam/internal/DataSetElement.h b/include/pbbam/internal/DataSetElement.h
index e80e55e..c7f7c8d 100644
--- a/include/pbbam/internal/DataSetElement.h
+++ b/include/pbbam/internal/DataSetElement.h
@@ -63,7 +63,7 @@ class XmlName
     //  prefix    local name
 
 public:
-    XmlName(const std::string& fullName);
+    XmlName(const std::string& fullName, bool verbatim = false);
     XmlName(const std::string& localName, const std::string& prefix);
     XmlName(const XmlName& other);
     XmlName(XmlName&& other);
@@ -79,18 +79,23 @@ public:
     const boost::string_ref LocalName(void) const;
     const boost::string_ref Prefix(void) const;
     const std::string& QualifiedName(void) const;
+    bool Verbatim(void) const;
 
 private:
     std::string qualifiedName_;
     size_t prefixSize_;
     size_t localNameOffset_;
     size_t localNameSize_;
+    bool verbatim_;
 };
 
+struct FromInputXml { };
+
 class DataSetElement
 {
 public:
     DataSetElement(const std::string& label, const XsdType& xsd = XsdType::NONE);
+    DataSetElement(const std::string& label, const FromInputXml& fromInputXml, const XsdType& xsd = XsdType::NONE);
     DataSetElement(const DataSetElement& other);
     DataSetElement(DataSetElement&& other);
     DataSetElement& operator=(const DataSetElement& other);
@@ -115,6 +120,7 @@ public:
     const boost::string_ref LocalNameLabel(void) const;
     const boost::string_ref PrefixLabel(void) const;
     const std::string& QualifiedNameLabel(void) const;
+    bool IsVerbatimLabel(void) const;
 
     const std::string& Text(void) const;
     std::string& Text(void);
diff --git a/include/pbbam/internal/DataSetElement.inl b/include/pbbam/internal/DataSetElement.inl
index 08f15b1..37a673f 100644
--- a/include/pbbam/internal/DataSetElement.inl
+++ b/include/pbbam/internal/DataSetElement.inl
@@ -52,6 +52,13 @@ inline DataSetElement::DataSetElement(const std::string& label, const XsdType& x
     , label_(label)
 { }
 
+inline DataSetElement::DataSetElement(const std::string& label,
+                                      const FromInputXml&,
+                                      const XsdType& xsd)
+    : xsd_(xsd)
+    , label_(label, true)
+{ }
+
 inline DataSetElement::DataSetElement(const DataSetElement& other)
     : xsd_(other.xsd_)
     , label_(other.label_)
@@ -216,7 +223,7 @@ inline const std::string& DataSetElement::QualifiedNameLabel(void) const
 //{ return label_.QualifiedName(); }
 
 inline void DataSetElement::Label(const std::string& label)
-{ label_ = XmlName(label); }
+{ label_ = XmlName(label, true); }
 
 inline size_t DataSetElement::NumAttributes(void) const
 { return attributes_.size(); }
@@ -246,6 +253,9 @@ inline void DataSetElement::ChildText(const std::string& label,
     }
 }
 
+inline bool DataSetElement::IsVerbatimLabel(void) const
+{ return label_.Verbatim(); }
+
 inline const std::string& DataSetElement::Text(void) const
 { return text_; }
 
@@ -262,14 +272,13 @@ inline const XsdType& DataSetElement::Xsd(void) const
 // XmlName
 // ----------------
 
-inline XmlName::XmlName(const std::string& fullName)
+inline XmlName::XmlName(const std::string& fullName, bool verbatim)
     : qualifiedName_(fullName)
     , prefixSize_(0)
     , localNameOffset_(0)
     , localNameSize_(0)
+    , verbatim_(verbatim)
 {
-//    std::cerr << "Creating XmlName from fullName: " << fullName << std::endl;
-
     const size_t colonFound = qualifiedName_.find(':');
     if (colonFound == std::string::npos || colonFound == 0)
         localNameSize_ = qualifiedName_.size();
@@ -282,10 +291,6 @@ inline XmlName::XmlName(const std::string& fullName)
     localNameOffset_ = prefixSize_;
     if (prefixSize_ != 0)
         ++localNameOffset_;
-
-//    std::cerr << "  qualName:  " << qualifiedName_ << std::endl;
-//    std::cerr << "  prefix:    " << Prefix() << std::endl;
-//    std::cerr << "  localName: " << LocalName() << std::endl;
 }
 
 inline XmlName::XmlName(const std::string& localName,
@@ -293,6 +298,7 @@ inline XmlName::XmlName(const std::string& localName,
     : prefixSize_(prefix.size())
     , localNameOffset_(prefixSize_)
     , localNameSize_(localName.size())
+    , verbatim_(true)
 {
     qualifiedName_.clear();
     qualifiedName_.reserve(localNameSize_+ prefixSize_ + 1);
@@ -311,6 +317,7 @@ inline XmlName::XmlName(const XmlName& other)
     , prefixSize_(other.prefixSize_)
     , localNameOffset_(other.localNameOffset_)
     , localNameSize_(other.localNameSize_)
+    , verbatim_(other.verbatim_)
 { }
 
 inline XmlName::XmlName(XmlName&& other)
@@ -318,6 +325,7 @@ inline XmlName::XmlName(XmlName&& other)
     , prefixSize_(std::move(other.prefixSize_))
     , localNameOffset_(std::move(other.localNameOffset_))
     , localNameSize_(std::move(other.localNameSize_))
+    , verbatim_(std::move(other.verbatim_))
 { }
 
 inline XmlName& XmlName::operator=(const XmlName& other)
@@ -326,6 +334,7 @@ inline XmlName& XmlName::operator=(const XmlName& other)
     prefixSize_      = other.prefixSize_;
     localNameOffset_ = other.localNameOffset_;
     localNameSize_   = other.localNameSize_;
+    verbatim_        = other.verbatim_;
     return *this;
 }
 
@@ -335,6 +344,7 @@ inline XmlName& XmlName::operator=(XmlName&& other)
     prefixSize_      = std::move(other.prefixSize_);
     localNameOffset_ = std::move(other.localNameOffset_);
     localNameSize_   = std::move(other.localNameSize_);
+    verbatim_        = std::move(other.verbatim_);
     return *this;
 }
 
@@ -346,17 +356,6 @@ inline bool XmlName::operator==(const XmlName& other) const
 inline bool XmlName::operator!=(const XmlName& other) const
 { return !(*this == other); }
 
-//inline void XmlName::CalculateSizes(const size_t localNameSize,
-//                                    const size_t prefixSize)
-//{
-//    size_t offset = prefixSize;
-//    if (offset != 0)
-//        ++offset;
-
-//    localName_ = boost::string_ref(qualifiedName_.data() + offset, localNameSize);
-//    prefix_    = boost::string_ref(qualifiedName_.data(), prefixSize);
-//}
-
 inline const boost::string_ref XmlName::LocalName(void) const
 { return boost::string_ref(qualifiedName_.data() + localNameOffset_, localNameSize_); }
 
@@ -366,6 +365,9 @@ inline const boost::string_ref XmlName::Prefix(void) const
 inline const std::string& XmlName::QualifiedName(void) const
 { return qualifiedName_; }
 
+inline bool XmlName::Verbatim(void) const
+{ return verbatim_; }
+
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
diff --git a/include/pbbam/internal/DataSetTypes.inl b/include/pbbam/internal/DataSetTypes.inl
index 105dffb..dbcbd26 100644
--- a/include/pbbam/internal/DataSetTypes.inl
+++ b/include/pbbam/internal/DataSetTypes.inl
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetTypes.inl
+/// \brief Inline implementations for the public DataSet component classes.
+//
 // Author: Derek Barnett
 
 #include "pbbam/DataSetTypes.h"
diff --git a/include/pbbam/Accuracy.h b/include/pbbam/internal/Frames.inl
similarity index 56%
copy from include/pbbam/Accuracy.h
copy to include/pbbam/internal/Frames.inl
index 03c233e..37cb64b 100644
--- a/include/pbbam/Accuracy.h
+++ b/include/pbbam/internal/Frames.inl
@@ -32,67 +32,62 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Frames.inl
+/// \brief Inline implementations for the Frames class.
+//
 // Author: Derek Barnett
 
-#ifndef ACCURACY_H
-#define ACCURACY_H
-
-#include "pbbam/Config.h"
+#include "pbbam/Frames.h"
 
 namespace PacBio {
 namespace BAM {
 
-/// \brief The Accuracy class represents the expected accuracy of a BamRecord.
-/// Values are clamped to [0,1000].
-///
-class PBBAM_EXPORT Accuracy
-{
-public:
-    static const int MIN;
-    static const int MAX;
+inline const std::vector<uint16_t>& Frames::Data(void) const
+{ return data_; }
+
+inline std::vector<uint16_t>& Frames::DataRaw(void)
+{ return data_; }
 
-public:
-    /// \name Constructors & Related Methods
-    /// \{
+inline std::vector<uint8_t> Frames::Encode(void) const
+{ return Frames::Encode(data_); }
 
-    /// \note This is not an 'explicit' ctor, to make it as easy to use in
-    ///       numeric operations as possible. We really just want to make
-    ///       sure that the acceptable range is respected.
-    Accuracy(int accuracy);
-    Accuracy(const Accuracy& other);
-    ~Accuracy(void);
+inline Frames& Frames::Data(const std::vector<uint16_t>& frames)
+{ data_ = frames; return *this; }
 
-    /// \}
+inline Frames& Frames::Data(std::vector<uint16_t>&& frames)
+{ data_ = std::move(frames); return *this; }
 
-public:
+inline std::vector<uint16_t>::const_iterator Frames::begin(void) const
+{ return data_.begin(); }
 
-    /// \returns Accuracy as integer
-    operator int(void) const;
+inline std::vector<uint16_t>::iterator Frames::begin(void)
+{ return data_.begin(); }
 
-private:
-    int accuracy_;
-};
+inline std::vector<uint16_t>::const_iterator Frames::cbegin(void) const
+{ return data_.cbegin(); }
 
-inline Accuracy::Accuracy(int accuracy)
-{
-    if (accuracy < Accuracy::MIN)
-        accuracy = Accuracy::MIN;
-    else if (accuracy > Accuracy::MAX)
-        accuracy = Accuracy::MAX;
-    accuracy_ = accuracy;
-}
+inline std::vector<uint16_t>::const_iterator Frames::cend(void) const
+{ return data_.cend(); }
 
-inline Accuracy::Accuracy(const Accuracy &other)
-    : accuracy_(other.accuracy_)
-{ }
+inline std::vector<uint16_t>::const_iterator Frames::end(void) const
+{ return data_.end(); }
 
-inline Accuracy::~Accuracy(void) { }
+inline std::vector<uint16_t>::iterator Frames::end(void)
+{ return data_.end(); }
 
-inline Accuracy::operator int(void) const
-{ return accuracy_; }
+inline size_t Frames::size(void) const
+{ return data_.size(); }
+
+inline bool Frames::empty(void) const
+{ return data_.empty(); }
+
+inline bool Frames::operator==(const Frames& other) const
+{ return data_ == other.data_; }
+
+inline bool Frames::operator!=(const Frames& other) const
+{ return !(*this == other); }
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // ACCURACY_H
diff --git a/include/pbbam/GenomicIntervalQuery.h b/include/pbbam/internal/GenomicInterval.inl
similarity index 55%
copy from include/pbbam/GenomicIntervalQuery.h
copy to include/pbbam/internal/GenomicInterval.inl
index c1e10f9..07c18ef 100644
--- a/include/pbbam/GenomicIntervalQuery.h
+++ b/include/pbbam/internal/GenomicInterval.inl
@@ -32,41 +32,60 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicInterval.inl
+/// \brief Inline implementations for the GenomicInterval class.
+//
 // Author: Derek Barnett
 
-#ifndef GENOMICINTERVALQUERY_H
-#define GENOMICINTERVALQUERY_H
-
 #include "pbbam/GenomicInterval.h"
-#include "pbbam/QueryBase.h"
-#include "pbbam/internal/QueryBase.h"
-#include <string>
 
 namespace PacBio {
 namespace BAM {
 
-class BamFile;
+inline GenomicInterval::~GenomicInterval(void) { }
+
+inline std::string GenomicInterval::Name(void) const
+{ return name_; }
+
+inline GenomicInterval& GenomicInterval::Name(const std::string& name)
+{ name_ = name; return *this; }
+
+inline PacBio::BAM::Interval<Position> GenomicInterval::Interval(void) const
+{ return interval_; }
 
-class PBBAM_EXPORT GenomicIntervalQuery : public internal::IQuery
+inline GenomicInterval& GenomicInterval::Interval(const PacBio::BAM::Interval<Position>& interval)
+{ interval_ = interval; return *this; }
+
+inline bool GenomicInterval::IsValid(void) const
 {
-public:
-    GenomicIntervalQuery(const GenomicInterval& interval,
-                         const DataSet& dataset);
+    return !name_.empty() &&
+           interval_.Start() >= 0 &&
+           interval_.Stop()  >= 0 &&
+           interval_.IsValid();
+}
 
-public:
-    GenomicIntervalQuery& Interval(const GenomicInterval& interval);
-    GenomicInterval Interval(void) const;
+inline size_t GenomicInterval::Length(void) const
+{ return interval_.Length(); }
 
-protected:
-    FileIterPtr CreateIterator(const BamFile& bamFile);
+inline Position GenomicInterval::Start(void) const
+{ return interval_.Start(); }
 
-private:
-    GenomicInterval interval_;
-};
+inline GenomicInterval& GenomicInterval::Start(const Position start)
+{ interval_.Start(start); return *this; }
 
-//} // namespace staging
-} // namespace BAM
-} // namspace PacBio
+inline Position GenomicInterval::Stop(void) const
+{ return interval_.Stop(); }
+
+inline GenomicInterval& GenomicInterval::Stop(const Position stop)
+{ interval_.Stop(stop); return *this; }
 
-#endif // GENOMICINTERVALQUERY_H
+inline bool GenomicInterval::operator==(const GenomicInterval& other) const
+{ return name_ == other.name_ && interval_ == other.interval_; }
+
+inline bool GenomicInterval::operator!=(const GenomicInterval& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/IMergeStrategy.h b/include/pbbam/internal/IMergeStrategy.h
deleted file mode 100644
index 0677f31..0000000
--- a/include/pbbam/internal/IMergeStrategy.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef IMERGESTRATEGY_H
-#define IMERGESTRATEGY_H
-
-#include "pbbam/internal/IBamFileIterator.h"
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-template<typename T>
-class IMergeStrategyBase
-{
-public:
-    typedef typename IBamFileIteratorBase<T>::Ptr FileIterPtr;
-protected:
-    IMergeStrategyBase(void) { }
-public:
-    virtual ~IMergeStrategyBase(void) { }
-public:
-    virtual bool GetNext(T& result) =0;
-};
-
-typedef IMergeStrategyBase<BamRecord>               IMergeStrategy;
-typedef IMergeStrategyBase<std::vector<BamRecord> > IGroupMergeStrategy;
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-#endif // IMERGESTRATEGY_H
diff --git a/include/pbbam/Interval.h b/include/pbbam/internal/Interval.inl
similarity index 57%
copy from include/pbbam/Interval.h
copy to include/pbbam/internal/Interval.inl
index 6c2e91a..e9c7edd 100644
--- a/include/pbbam/Interval.h
+++ b/include/pbbam/internal/Interval.inl
@@ -32,127 +32,35 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Interval.inl
+/// \brief Inline implementations for the Interval class.
+//
 // Author: Derek Barnett
 
-#ifndef INTERVAL_H
-#define INTERVAL_H
-
-#include "pbbam/Config.h"
-#include <string>
-
-#define BOOST_ICL_USE_STATIC_BOUNDED_INTERVALS
-#include <boost/icl/discrete_interval.hpp>
-#include <boost/icl/interval_traits.hpp>
+#include "pbbam/Interval.h"
 
 namespace PacBio {
 namespace BAM {
 
-/// \brief Utility class for working with half-open (right-open) intervals. [start, stop)
-///
-/// \note This class is agnostic whether the values are 0-based or 1-based.
-/// \todo Should it be? Should that go here or "higher up"?
-///
-template<typename T>
-class Interval
-{
-public:
-    typedef boost::icl::discrete_interval<T> interval_type;
-
-public:
-
-    /// \name Constructors
-    /// \{
-
-    /** Default constructor; yields an empty interval [0,0) */
-    inline Interval(void);
-
-    /** Constructor for a singleton interval [val,val+1) */
-    inline Interval(const T val);
-
-    /** Constructor for interval from [start, stop) */
-    inline Interval(const T start, const T stop);
-
-    /** Copy constructor */
-    inline Interval(const Interval<T>& other);
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    /// \returns interval start coordinate
-    inline T Start(void) const;
-
-    /// Sets this interval's start coordinate.
-    ///
-    /// \param[in] start
-    /// \returns reference to this interval
-    inline Interval<T>& Start(const T& start);
-
-    /// \returns interval stop coordinate
-    inline T Stop(void) const;
-
-    /// Sets this interval's stop coordinate.
-    ///
-    /// \param[in] stop
-    /// \returns reference to this interval
-    inline Interval<T>& Stop(const T& stop);
-
-    /// \}
-
-    /// \name Interval Operations
-
-    /// \returns true if this interval is fully covered by (or contained in) \p other
-    inline bool CoveredBy(const Interval<T>& other) const;
-
-    //// \returns true if this interval covers (or contains) \p other
-    inline bool Covers(const Interval<T>& other) const;
-
-    /// \returns true if intervals interset
-    inline bool Intersects(const Interval<T>& other) const;
-
-    /// \returns true if interval is valid (e.g. start < stop)
-    inline bool IsValid(void) const;
-
-    /// \returns interval length
-    inline size_t Length(void) const;
-
-    /// \}
-
-    /// \name Comparison Operators
-    /// \{
-
-    /// \returns true if both intervals share the same endpoints
-    inline bool operator==(const Interval<T>& other) const;
-
-    /// \returns true if either interval's endpoints differ
-    inline bool operator!=(const Interval<T>& other) const;
-
-    /// \}
-
-private:
-    interval_type data_;
-};
-
 template<typename T>
-Interval<T>::Interval(void)
+inline Interval<T>::Interval(void)
     : data_(boost::icl::discrete_interval<T>::right_open(0,0))
 { }
 
 template<typename T>
-Interval<T>::Interval(const T val)
+inline Interval<T>::Interval(const T val)
     : data_(boost::icl::discrete_interval<T>::right_open(val,val+1))
 { }
 
 template<typename T>
-Interval<T>::Interval(const T start, const T stop)
+inline Interval<T>::Interval(const T start, const T stop)
     : data_(boost::icl::discrete_interval<T>::right_open(start,stop))
 { }
 
 template<typename T>
-Interval<T>::Interval(const Interval<T>& other)
+inline Interval<T>::Interval(const Interval<T>& other)
     : data_(boost::icl::discrete_interval<T>::right_open(other.Start(), other.Stop()))
 { }
 
@@ -207,6 +115,4 @@ inline Interval<T>& Interval<T>::Stop(const T& stop)
 }
 
 } // namespace BAM
-} // namspace PacBio
-
-#endif // GENOMICINTERVAL_H
+} // namespace PacBio
diff --git a/include/pbbam/internal/MergeStrategy.h b/include/pbbam/internal/MergeStrategy.h
deleted file mode 100644
index 1a8a564..0000000
--- a/include/pbbam/internal/MergeStrategy.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef MERGESTRATEGY_H
-#define MERGESTRATEGY_H
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/internal/IMergeStrategy.h"
-#include "pbbam/internal/MergeItem.h"
-#include <functional>
-#include <set>
-#include <vector>
-#include <cassert>
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-template<typename Compare>
-struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool>
-{
-public:
-    MergeItemSorter(const Compare& comp = Compare())
-        : comp_(comp)
-    { }
-
-    bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
-        const BamRecord& l = lhs.record_;
-        const BamRecord& r = rhs.record_;
-        return comp_(l, r);
-    }
-
-private:
-    Compare comp_;
-};
-
-template<typename Compare>
-struct GroupMergeItemSorter : public std::binary_function<GroupMergeItem, GroupMergeItem, bool>
-{
-public:
-    GroupMergeItemSorter(const Compare& comp = Compare())
-        : comp_(comp)
-    { }
-
-    bool operator()(const GroupMergeItem& lhs, const GroupMergeItem& rhs) {
-        if ( lhs.record_.empty())
-            return false;
-        if ( rhs.record_.empty())
-            return true;
-        assert(!lhs.record_.empty());
-        assert(!rhs.record_.empty());
-        const BamRecord& l = lhs.record_.front();
-        const BamRecord& r = rhs.record_.front();
-        return comp_(l, r);
-    }
-
-private:
-    Compare comp_;
-};
-
-template<typename Compare>
-class MergeStrategy : public IMergeStrategy
-{
-public:
-    MergeStrategy(const std::vector<FileIterPtr>& iters);
-    bool GetNext(BamRecord& record);
-private:
-    std::multiset<MergeItem, MergeItemSorter<Compare> > mergeItems_;
-};
-
-template<typename Compare>
-class GroupMergeStrategy : public IGroupMergeStrategy
-{
-public:
-    GroupMergeStrategy(const std::vector<FileIterPtr>& iters);
-    bool GetNext(std::vector<BamRecord>& records);
-private:
-    GroupMergeItem nextItem_;
-    std::multiset<GroupMergeItem, GroupMergeItemSorter<Compare> > mergeItems_;
-};
-
-// -----------------------
-// MergeStrategy
-// -----------------------
-
-template<typename Compare>
-inline MergeStrategy<Compare>::MergeStrategy(const std::vector<FileIterPtr>& iters)
-    : IMergeStrategy()
-{
-    BamRecord r;
-    for (FileIterPtr iter : iters) {
-        if (iter->GetNext(r)) {
-            MergeItem item(r, iter);
-            mergeItems_.insert(item);
-        }
-    }
-}
-
-template<typename Compare>
-inline bool MergeStrategy<Compare>::GetNext(BamRecord& record)
-{
-    if (mergeItems_.empty())
-        return false;
-
-    // pop first merge item & record
-    auto firstIter = mergeItems_.begin();
-    MergeItem firstItem = (*firstIter);
-    mergeItems_.erase(firstIter);
-    record = firstItem.record_;
-
-    // try fetch iter's next (if failed, do not replace)
-    if (firstItem.iter_->GetNext(firstItem.record_))
-        mergeItems_.insert(firstItem);
-    return true;
-}
-
-// -----------------------
-// GroupMergeStrategy
-// -----------------------
-
-template<typename Compare>
-inline GroupMergeStrategy<Compare>::GroupMergeStrategy(const std::vector<FileIterPtr>& iters)
-    : IGroupMergeStrategy()
-{
-    std::vector<BamRecord> r;
-    for (FileIterPtr iter : iters) {
-        if (iter->GetNext(r)) {
-            GroupMergeItem item(r, iter);
-            mergeItems_.insert(item);
-        }
-    }
-    if (!mergeItems_.empty()) {
-        auto firstIter = mergeItems_.begin();
-        nextItem_ = (*firstIter);
-        mergeItems_.erase(firstIter);
-    }
-}
-
-template<typename Compare>
-inline bool GroupMergeStrategy<Compare>::GetNext(std::vector<BamRecord>& records)
-{
-    records.clear();
-    if (nextItem_.IsNull())
-        return false;
-
-    // append "nextItem" records
-    records = nextItem_.record_;
-
-    // try fetch iter's next (if failed, do not replace)
-    if (nextItem_.iter_->GetNext(nextItem_.record_))
-        mergeItems_.insert(nextItem_);
-    else
-        nextItem_ = GroupMergeItem();
-
-    while (!mergeItems_.empty()) {
-
-        // pop first merge item
-        auto firstIter = mergeItems_.begin();
-        GroupMergeItem firstItem = (*firstIter);
-        mergeItems_.erase(firstIter);
-
-        // if first item has records
-        if (!firstItem.record_.empty()) {
-
-            // if first block to store
-            if (records.empty())
-                records = firstItem.record_;
-
-            // else see if we match current group
-            else {
-                const BamRecord& lhs = records.front();
-                const BamRecord& rhs = firstItem.record_.front();
-
-                // if match, append to output & fetch next
-                if (firstItem.iter_->InSameGroup(lhs, rhs)) {
-
-                    for (const BamRecord& r : firstItem.record_)
-                        records.push_back(r);
-                    if (firstItem.iter_->GetNext(firstItem.record_))
-                        mergeItems_.insert(firstItem);
-                }
-
-                // no match, item becomes the "next item" to use
-                else {
-                    nextItem_ = firstItem;
-                    break;
-                }
-            }
-        }
-
-        // first item has no records, try fetch next
-        else {
-            if (firstItem.iter_->GetNext(firstItem.record_))
-                mergeItems_.insert(firstItem);
-        }
-    }
-
-    return true;
-}
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-#endif // MERGESTRATEGY_H
diff --git a/src/FilterEngine.cpp b/include/pbbam/internal/PbiBasicTypes.inl
similarity index 71%
copy from src/FilterEngine.cpp
copy to include/pbbam/internal/PbiBasicTypes.inl
index 1f47967..229841e 100644
--- a/src/FilterEngine.cpp
+++ b/include/pbbam/internal/PbiBasicTypes.inl
@@ -32,45 +32,39 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file PbiBasicTypes.inl
+/// \brief Inline implementations for the basic data structures used in PBI lookups.
+//
 // Author: Derek Barnett
 
-#include "pbbam/internal/FilterEngine.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace PacBio::BAM::internal;
-using namespace std;
+#include "pbbam/PbiBasicTypes.h"
 
 namespace PacBio {
 namespace BAM {
-namespace internal {
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
 
+inline IndexResultBlock::IndexResultBlock(void)
+    : firstIndex_(0)
+    , numReads_(0)
+    , virtualOffset_(-1)
+{ }
 
-FilterEngine::FilterEngine(void) { }
+inline IndexResultBlock::IndexResultBlock(size_t idx, size_t numReads)
+    : firstIndex_(idx)
+    , numReads_(numReads)
+    , virtualOffset_(-1)
+{ }
 
-bool FilterEngine::Accepts(const BamRecord& r) const
+inline bool IndexResultBlock::operator==(const IndexResultBlock& other) const
 {
-//        foreach ( const FilterParameter& param, parameters_ ) {
-//            if (!param.Accepts(r))
-//                return false;
-//        }
-//        return true;
-    (void)r;
-    return true;
+    return firstIndex_ == other.firstIndex_ &&
+           numReads_ == other.numReads_ &&
+           virtualOffset_ == other.virtualOffset_;
 }
 
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
-{
-    size_t i = 0;
-    while (i < r.size()) {
-        if (!Accepts(r.at(i)))
-            r.erase(r.begin() + i);
-        else
-            ++i;
-    }
-    return !r.empty();
-}
+inline bool IndexResultBlock::operator!=(const IndexResultBlock& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiFilter.inl b/include/pbbam/internal/PbiFilter.inl
new file mode 100644
index 0000000..18c26d0
--- /dev/null
+++ b/include/pbbam/internal/PbiFilter.inl
@@ -0,0 +1,312 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.inl
+/// \brief Inline implementations for the PbiFilter class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilter.h"
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <set>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \internal
+///
+/// This class wraps a the basic PBI filter (whether property filter or some operator
+/// e.g. union, intersect, etc.). The wrapper allows PbiFilters to hold heterogeneous,
+/// recursive filter types - without exposing pointers & worrying about memory ownership
+/// issues between client & library.
+///
+/// Filters can be given by value from client code and we will wrap them for composition.
+///
+/// \code{.cpp}
+///    PbiFilter f1(PbiZmwFilter(42));
+///    PbiFilter f2;
+///    f2.Add(PbiQueryLengthFilter(3000, GREATER_THAN_EQUAL));
+///    f2.Add(MyApplicationCustomFilter("foo"));
+///    PbiFilter intersect = PbiFilter::Intersect(f1, f2);
+///    ...
+/// \endcode
+///
+struct FilterWrapper
+{
+public:
+    template<typename T> FilterWrapper(T x);
+
+    FilterWrapper(const FilterWrapper& other);
+    FilterWrapper(FilterWrapper&&) noexcept = default;
+    FilterWrapper& operator=(const FilterWrapper& other);
+    FilterWrapper& operator=(FilterWrapper&&) noexcept = default;
+    ~FilterWrapper(void);
+
+public:
+    bool Accepts(const PacBio::BAM::PbiRawData& idx, const size_t row) const;
+
+private:
+    struct WrapperInterface
+    {
+        virtual ~WrapperInterface(void) = default;
+        virtual WrapperInterface* Clone(void) const =0;
+        virtual bool Accepts(const PacBio::BAM::PbiRawData& idx,
+                             const size_t row) const =0;
+    };
+
+    template<typename T>
+    struct WrapperImpl : public WrapperInterface
+    {
+        WrapperImpl(T x);
+        WrapperImpl(const WrapperImpl& other);
+        WrapperInterface* Clone(void) const;
+        bool Accepts(const PacBio::BAM::PbiRawData& idx, const size_t row) const;
+        T data_;
+    };
+
+private:
+    std::unique_ptr<WrapperInterface> self_;
+};
+
+// ---------------
+// FilterWrapper
+// ---------------
+
+template<typename T>
+inline FilterWrapper::FilterWrapper(T x)
+    : self_(new WrapperImpl<T>(std::move(x)))
+{ }
+
+inline FilterWrapper::FilterWrapper(const FilterWrapper& other)
+    : self_(other.self_->Clone())
+{ }
+
+inline FilterWrapper& FilterWrapper::operator=(const FilterWrapper& other)
+{
+    self_.reset(other.self_->Clone());
+    return *this;
+}
+
+inline FilterWrapper::~FilterWrapper(void) { }
+
+inline bool FilterWrapper::Accepts(const PbiRawData& idx, const size_t row) const
+{ return self_->Accepts(idx, row); }
+
+// ----------------
+// WrapperImpl<T>
+// ----------------
+
+template<typename T>
+inline FilterWrapper::WrapperImpl<T>::WrapperImpl(T x)
+    : FilterWrapper::WrapperInterface()
+    , data_(std::move(x))
+{
+    BOOST_CONCEPT_ASSERT((PbiFilterConcept<T>));
+}
+
+template<typename T>
+inline FilterWrapper::WrapperImpl<T>::WrapperImpl(const WrapperImpl& other)
+    : FilterWrapper::WrapperInterface()
+    , data_(other.data_)
+{ }
+
+template<typename T>
+inline FilterWrapper::WrapperInterface* FilterWrapper::WrapperImpl<T>::Clone(void) const
+{ return new WrapperImpl(*this); }
+
+template<typename T>
+inline bool FilterWrapper::WrapperImpl<T>::Accepts(const PbiRawData& idx,
+                                                   const size_t row) const
+{ return data_.Accepts(idx, row); }
+
+struct PbiFilterPrivate
+{
+    PbiFilterPrivate(PbiFilter::CompositionType type)
+        : type_(type)
+    { }
+
+    template<typename T>
+    void Add(T&& filter)
+    {
+        filters_.emplace_back(std::move(filter));
+    }
+
+    std::unique_ptr<internal::PbiFilterPrivate> DeepCopy(void)
+    {
+        auto copy = std::unique_ptr<PbiFilterPrivate>{ new PbiFilterPrivate{type_} };
+        copy->filters_ = this->filters_;
+        return copy;
+    }
+
+    bool Accepts(const PbiRawData& idx, const size_t row) const
+    {
+        // no filter -> accepts every record
+        if (filters_.empty())
+            return true;
+
+        // intersection of child filters
+        if (type_ == PbiFilter::INTERSECT) {
+            for (const auto& filter : filters_) {
+                if (!filter.Accepts(idx, row))
+                    return false; // break early on failure
+            }
+            return true; // all passed
+        }
+
+        // union of child filters
+        else if (type_ == PbiFilter::UNION) {
+            for (const auto& filter : filters_) {
+                if (filter.Accepts(idx, row))
+                    return true; // break early on pass
+            }
+            return false; // none passed
+        }
+
+        else
+            //assert(false); // invalid composite filter type
+            throw std::runtime_error("invalid composite filter type in PbiFilterPrivate::Accepts");
+    }
+
+    PbiFilter::CompositionType type_;
+    std::vector<FilterWrapper> filters_;
+};
+
+} // namespace internal
+
+inline PbiFilter::PbiFilter(const CompositionType type)
+    : d_{ new internal::PbiFilterPrivate{ type } }
+{ }
+
+template<typename T> inline
+PbiFilter::PbiFilter(const T& filter)
+    : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+    Add(filter);
+}
+
+template<typename T> inline
+PbiFilter::PbiFilter(T&& filter)
+    : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+    Add(std::move(filter));
+}
+
+inline PbiFilter::PbiFilter(const std::vector<PbiFilter>& filters)
+    : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+    Add(filters);
+}
+
+inline PbiFilter::PbiFilter(std::vector<PbiFilter>&& filters)
+    : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT} }
+{
+    Add(std::move(filters));
+}
+
+inline PbiFilter::PbiFilter(const PbiFilter& other)
+    : d_{ other.d_->DeepCopy() }
+{ }
+
+inline PbiFilter::PbiFilter(PbiFilter&& other) noexcept
+    : d_{ std::move(other.d_) }
+{ }
+
+inline PbiFilter& PbiFilter::operator=(const PbiFilter& other)
+{
+    d_ = other.d_->DeepCopy();
+    return *this;
+}
+
+inline PbiFilter& PbiFilter::operator=(PbiFilter&& other) noexcept
+{
+    d_ = std::move(other.d_);
+    return *this;
+}
+
+inline PbiFilter::~PbiFilter(void) { }
+
+inline bool PbiFilter::Accepts(const PacBio::BAM::PbiRawData& idx,
+                               const size_t row) const
+{ return d_->Accepts(idx, row); }
+
+template<typename T>
+inline PbiFilter& PbiFilter::Add(const T& filter)
+{
+    T copy = filter;
+    return Add(std::move(copy));
+}
+
+template<typename T>
+inline PbiFilter& PbiFilter::Add(T&& filter)
+{
+    d_->Add(std::move(filter));
+    return *this;
+}
+
+inline PbiFilter& PbiFilter::Add(const PbiFilter& filter)
+{
+    PbiFilter copy = filter;
+    return Add(std::move(copy));
+}
+
+inline PbiFilter& PbiFilter::Add(PbiFilter&& filter)
+{
+    d_->Add(std::move(filter));
+    return *this;
+}
+
+inline PbiFilter& PbiFilter::Add(const std::vector<PbiFilter>& filters)
+{
+    std::vector<PbiFilter> copy = filters;
+    return Add(std::move(copy));
+}
+
+inline PbiFilter& PbiFilter::Add(std::vector<PbiFilter>&& filters)
+{
+    for (auto&& filter : filters)
+        d_->Add(std::move(filter));
+    return *this;
+}
+
+inline bool PbiFilter::IsEmpty(void) const
+{ return d_->filters_.empty(); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiFilterTypes.inl b/include/pbbam/internal/PbiFilterTypes.inl
new file mode 100644
index 0000000..89caca4
--- /dev/null
+++ b/include/pbbam/internal/PbiFilterTypes.inl
@@ -0,0 +1,553 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.inl
+/// \brief Inline implementations for the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterTypes.h"
+#include <cassert>
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+template <typename T>
+inline FilterBase<T>::FilterBase(const T& value, const Compare::Type cmp)
+    : value_(value)
+    , cmp_(cmp)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(T&& value, const Compare::Type cmp)
+    : value_(std::move(value))
+    , cmp_(cmp)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(const std::vector<T>& values)
+    : multiValue_(values)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(std::vector<T>&& values)
+    : multiValue_(std::move(values))
+{ }
+
+template<typename T>
+inline bool FilterBase<T>::CompareHelper(const T& lhs) const
+{
+    if (multiValue_ == boost::none)
+        return CompareSingleHelper(lhs);
+    else
+        return CompareMultiHelper(lhs);
+}
+
+template<typename T>
+inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
+{
+    // check provided value against all filter criteria,
+    // return true on any exact match
+    auto iter = multiValue_.get().cbegin();
+    const auto end  = multiValue_.get().cend();
+    for (; iter != end; ++iter) {
+        if (*iter == lhs)
+            return true;
+    }
+    return false; // no matches
+}
+
+template<typename T>
+inline bool FilterBase<T>::CompareSingleHelper(const T& lhs) const
+{
+    switch(cmp_) {
+        case Compare::EQUAL:              return lhs == value_;
+        case Compare::LESS_THAN:          return lhs < value_;
+        case Compare::LESS_THAN_EQUAL:    return lhs <= value_;
+        case Compare::GREATER_THAN:       return lhs > value_;
+        case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
+        case Compare::NOT_EQUAL:          return lhs != value_;
+        default:
+            assert(false);
+            throw std::runtime_error("unsupported compare type requested");
+    }
+}
+
+template<>
+inline bool FilterBase<LocalContextFlags>::CompareSingleHelper(const LocalContextFlags& lhs) const
+{
+    switch(cmp_) {
+        case Compare::EQUAL:              return lhs == value_;
+        case Compare::LESS_THAN:          return lhs < value_;
+        case Compare::LESS_THAN_EQUAL:    return lhs <= value_;
+        case Compare::GREATER_THAN:       return lhs > value_;
+        case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
+        case Compare::NOT_EQUAL:          return lhs != value_;
+        case Compare::CONTAINS:           return ((lhs & value_) != 0);
+        case Compare::NOT_CONTAINS:       return ((lhs & value_) == 0);
+
+        default:
+            assert(false);
+            throw std::runtime_error("unsupported compare type requested");
+    }
+}
+
+// BarcodeDataFilterBase
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(const T& value, const Compare::Type cmp)
+    : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(T&& value, const Compare::Type cmp)
+    : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(const std::vector<T>& values)
+    : FilterBase<T>(values)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T>&& values)
+    : FilterBase<T>(std::move(values))
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline bool BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase::Accepts(const PbiRawData& idx,
+                                           const size_t row) const
+{
+    const PbiRawBarcodeData& barcodeData = idx.BarcodeData();
+    switch (field) {
+        case BarcodeLookupData::BC_FORWARD: return FilterBase<T>::CompareHelper(barcodeData.bcForward_.at(row));
+        case BarcodeLookupData::BC_REVERSE: return FilterBase<T>::CompareHelper(barcodeData.bcReverse_.at(row));
+        case BarcodeLookupData::BC_QUALITY: return FilterBase<T>::CompareHelper(barcodeData.bcQual_.at(row));
+        default:
+            assert(false);
+            throw std::runtime_error("unsupported BarcodeData field requested");
+    }
+}
+
+// BasicDataFilterBase
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(const T& value, const Compare::Type cmp)
+    : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(T&& value, const Compare::Type cmp)
+    : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(const std::vector<T>& values)
+    : FilterBase<T>(values)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T>&& values)
+    : FilterBase<T>(std::move(values))
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline bool BasicDataFilterBase<T, field>::BasicDataFilterBase::Accepts(const PbiRawData& idx,
+                                                                        const size_t row) const
+{
+    const PbiRawBasicData& basicData = idx.BasicData();
+    switch (field) {
+        case BasicLookupData::RG_ID:        return FilterBase<T>::CompareHelper(basicData.rgId_.at(row));
+        case BasicLookupData::Q_START:      return FilterBase<T>::CompareHelper(basicData.qStart_.at(row));
+        case BasicLookupData::Q_END:        return FilterBase<T>::CompareHelper(basicData.qEnd_.at(row));
+        case BasicLookupData::ZMW:          return FilterBase<T>::CompareHelper(basicData.holeNumber_.at(row));
+        case BasicLookupData::READ_QUALITY: return FilterBase<T>::CompareHelper(basicData.readQual_.at(row));
+        //   BasicLookupData::CONTEXT_FLAG has its own specialization
+        default:
+            assert(false);
+            throw std::runtime_error("unsupported BasicData field requested");
+    }
+}
+
+// this typedef exists purely so that the next method signature isn't 2 screen widths long
+typedef BasicDataFilterBase<LocalContextFlags, BasicLookupData::CONTEXT_FLAG> LocalContextFilter__;
+
+template<>
+inline bool LocalContextFilter__::BasicDataFilterBase::Accepts(const PbiRawData& idx,
+                                                               const size_t row) const
+{
+    const PbiRawBasicData& basicData = idx.BasicData();
+    const LocalContextFlags rowFlags = static_cast<LocalContextFlags>(basicData.ctxtFlag_.at(row));
+    return FilterBase<LocalContextFlags>::CompareHelper(rowFlags);
+}
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(const T& value, const Compare::Type cmp)
+    : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(T&& value, const Compare::Type cmp)
+    : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(const std::vector<T>& values)
+    : FilterBase<T>(values)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T>&& values)
+    : FilterBase<T>(std::move(values))
+{ }
+
+template<>
+inline bool MappedDataFilterBase<Strand, MappedLookupData::STRAND>::MappedDataFilterBase::Accepts(const PbiRawData& idx,
+                                                                                                  const size_t row) const
+{
+    const PbiRawMappedData& mappedData = idx.MappedData();
+    const Strand strand = (mappedData.revStrand_.at(row) == 1 ? Strand::REVERSE : Strand::FORWARD);
+    return FilterBase<Strand>::CompareHelper(strand);
+}
+
+template<typename T, MappedLookupData::Field field>
+inline bool MappedDataFilterBase<T, field>::MappedDataFilterBase::Accepts(const PbiRawData& idx,
+                                                                          const size_t row) const
+{
+    const PbiRawMappedData& mappedData = idx.MappedData();
+    switch (field) {
+        case MappedLookupData::T_ID:        return FilterBase<T>::CompareHelper(mappedData.tId_.at(row));
+        case MappedLookupData::T_START:     return FilterBase<T>::CompareHelper(mappedData.tStart_.at(row));
+        case MappedLookupData::T_END:       return FilterBase<T>::CompareHelper(mappedData.tEnd_.at(row));
+        case MappedLookupData::A_START:     return FilterBase<T>::CompareHelper(mappedData.aStart_.at(row));
+        case MappedLookupData::A_END:       return FilterBase<T>::CompareHelper(mappedData.aEnd_.at(row));
+        case MappedLookupData::N_M:         return FilterBase<T>::CompareHelper(mappedData.nM_.at(row));
+        case MappedLookupData::N_MM:        return FilterBase<T>::CompareHelper(mappedData.nMM_.at(row));
+        case MappedLookupData::N_DEL:       return FilterBase<T>::CompareHelper(mappedData.NumDeletedBasesAt(row));
+        case MappedLookupData::N_INS:       return FilterBase<T>::CompareHelper(mappedData.NumInsertedBasesAt(row));
+        case MappedLookupData::MAP_QUALITY: return FilterBase<T>::CompareHelper(mappedData.mapQV_.at(row));
+        default:
+            assert(false);
+            throw std::runtime_error("unsupported MappedData field requested");
+    }
+}
+
+} // namespace internal
+
+// PbiAlignedEndFilter
+
+inline PbiAlignedEndFilter::PbiAlignedEndFilter(const uint32_t position, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_END>(position, cmp)
+{ }
+
+// PbiAlignedLengthFilter
+
+inline PbiAlignedLengthFilter::PbiAlignedLengthFilter(const uint32_t length, const Compare::Type cmp)
+    : internal::FilterBase<uint32_t>(length, cmp)
+{ }
+
+// PbiAlignedStartFilter
+
+inline PbiAlignedStartFilter::PbiAlignedStartFilter(const uint32_t position, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_START>(position, cmp)
+{ }
+
+// PbiAlignedStrandFilter
+
+inline PbiAlignedStrandFilter::PbiAlignedStrandFilter(const Strand strand, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<Strand, MappedLookupData::STRAND>(strand, cmp)
+{
+    if (cmp != Compare::EQUAL && cmp != Compare::NOT_EQUAL) {
+        auto msg = std::string{ "Compare type: " };
+        msg += Compare::TypeToName(cmp);
+        msg += " not supported for PbiAlignedStrandFilter (use one of Compare::EQUAL or Compare::NOT_EQUAL).";
+        throw std::runtime_error(msg);
+    }
+}
+
+// PbiBarcodeFilter
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(const uint16_t barcode, const Compare::Type cmp)
+    : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{barcode,cmp},
+                                           PbiBarcodeReverseFilter{barcode,cmp}
+                                         })
+                      }
+{ }
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(const std::vector<uint16_t> &whitelist)
+    : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{whitelist},
+                                           PbiBarcodeReverseFilter{whitelist}
+                                         })
+                      }
+{ }
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<uint16_t> &&whitelist)
+    : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist)},
+                                           PbiBarcodeReverseFilter{std::move(whitelist)}
+                                         })
+                      }
+{ }
+
+inline bool PbiBarcodeFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiBarcodeForwardFilter
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const uint16_t bcFwdId, const Compare::Type cmp)
+    : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>(bcFwdId, cmp)
+{ }
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const std::vector<uint16_t>& whitelist)
+    : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>(whitelist)
+{ }
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<uint16_t>&& whitelist)
+    : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>(std::move(whitelist))
+{ }
+
+// PbiBarcodeQualityFilter
+
+inline PbiBarcodeQualityFilter::PbiBarcodeQualityFilter(const uint8_t bcQuality, const Compare::Type cmp)
+    : internal::BarcodeDataFilterBase<uint8_t, BarcodeLookupData::BC_QUALITY>(bcQuality, cmp)
+{ }
+
+// PbiBarcodeReverseFilter
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const uint16_t bcRevId, const Compare::Type cmp)
+    : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>(bcRevId, cmp)
+{ }
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const std::vector<uint16_t>& whitelist)
+    : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>(whitelist)
+{ }
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<uint16_t>&& whitelist)
+    : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>(std::move(whitelist))
+{ }
+
+// PbiBarcodesFilter
+
+inline PbiBarcodesFilter::PbiBarcodesFilter(const std::pair<uint16_t, uint16_t> barcodes, const Compare::Type cmp)
+    : PbiBarcodesFilter(barcodes.first, barcodes.second, cmp)
+{ }
+
+inline PbiBarcodesFilter::PbiBarcodesFilter(const uint16_t bcForward, const uint16_t bcReverse, const Compare::Type cmp)
+    : compositeFilter_{ PbiFilter::Intersection({ PbiBarcodeForwardFilter{bcForward,cmp},
+                                                  PbiBarcodeReverseFilter{bcReverse,cmp}
+                                                })
+                      }
+{ }
+
+inline bool PbiBarcodesFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiIdentityFilter
+
+inline PbiIdentityFilter::PbiIdentityFilter(const float identity,
+                                            const Compare::Type cmp)
+    : internal::FilterBase<float>(identity, cmp)
+{ }
+
+// PbiLocalContextFilter
+
+inline PbiLocalContextFilter::PbiLocalContextFilter(const LocalContextFlags& flags,
+                                                    const Compare::Type cmp)
+    : internal::BasicDataFilterBase<LocalContextFlags, BasicLookupData::CONTEXT_FLAG>(flags, cmp)
+{ }
+
+// PbiMapQualityFilter
+
+inline PbiMapQualityFilter::PbiMapQualityFilter(const uint8_t mapQual, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<uint8_t, MappedLookupData::MAP_QUALITY>(mapQual, cmp)
+{ }
+
+// PbiMovieNameFilter
+
+inline bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiNumDeletedBasesFilter
+
+inline PbiNumDeletedBasesFilter::PbiNumDeletedBasesFilter(const size_t numDeletions, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<size_t, MappedLookupData::N_DEL>(numDeletions, cmp)
+{ }
+
+// PbiNumInsertedBasesFilter
+
+inline PbiNumInsertedBasesFilter::PbiNumInsertedBasesFilter(const size_t numInsertions, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<size_t, MappedLookupData::N_INS>(numInsertions, cmp)
+{ }
+
+// PbiNumMatchesFilter
+
+inline PbiNumMatchesFilter::PbiNumMatchesFilter(const size_t numMatchedBases, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<size_t, MappedLookupData::N_M>(numMatchedBases, cmp)
+{ }
+
+// PbiNumMismatchesFilter
+
+inline PbiNumMismatchesFilter::PbiNumMismatchesFilter(const size_t numMismatchedBases, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<size_t, MappedLookupData::N_MM>(numMismatchedBases, cmp)
+{ }
+
+// PbiQueryEndFilter
+
+inline PbiQueryEndFilter::PbiQueryEndFilter(const int32_t position, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_END>(position, cmp)
+{ }
+
+// PbiQueryLengthFilter
+
+inline PbiQueryLengthFilter::PbiQueryLengthFilter(const int32_t length, const Compare::Type cmp)
+    : internal::FilterBase<int32_t>(length, cmp)
+{ }
+
+// PbiQueryNameFilter
+
+inline bool PbiQueryNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiQueryStartFilter
+
+inline PbiQueryStartFilter::PbiQueryStartFilter(const int32_t position, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_START>(position, cmp)
+{ }
+
+// PbiReadAccuracyFilter
+
+inline PbiReadAccuracyFilter::PbiReadAccuracyFilter(const Accuracy accuracy, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<Accuracy, BasicLookupData::READ_QUALITY>(accuracy, cmp)
+{ }
+
+// PbiReadGroupFilter
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const int32_t rgId, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(rgId, cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::string rgId, const Compare::Type cmp)
+    : PbiReadGroupFilter(ReadGroupInfo::IdToInt(rgId), cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Compare::Type cmp)
+    : PbiReadGroupFilter(rg.Id(), cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<int32_t>& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(whitelist)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t>&& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::move(whitelist))
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+    multiValue_->reserve(whitelist.size());
+    for (const auto& rg : whitelist)
+        multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<std::string>&& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+    multiValue_->reserve(whitelist.size());
+    for (auto&& rg : whitelist)
+        multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+    multiValue_->reserve(whitelist.size());
+    for (const auto& rg : whitelist)
+        multiValue_->push_back(ReadGroupInfo::IdToInt(rg.Id()));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<ReadGroupInfo>&& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+    multiValue_->reserve(whitelist.size());
+    for (auto&& rg : whitelist)
+        multiValue_->push_back(ReadGroupInfo::IdToInt(rg.Id()));
+}
+
+// PbiReferenceEndFilter
+
+inline PbiReferenceEndFilter::PbiReferenceEndFilter(const uint32_t tEnd, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_END>(tEnd, cmp)
+{ }
+
+// PbiReferenceIdFilter
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(const int32_t tId, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(tId, cmp)
+{ }
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(const std::vector<int32_t>& whitelist)
+    : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(whitelist)
+{ }
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t>&& whitelist)
+    : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(std::move(whitelist))
+{ }
+
+// PbiReferenceStartFilter
+
+inline PbiReferenceStartFilter::PbiReferenceStartFilter(const uint32_t tStart, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_START>(tStart, cmp)
+{ }
+
+// PbiZmwFilter
+
+inline PbiZmwFilter::PbiZmwFilter(const int32_t zmw, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(zmw, cmp)
+{ }
+
+inline PbiZmwFilter::PbiZmwFilter(const std::vector<int32_t>& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(whitelist)
+{ }
+
+inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t>&& whitelist)
+    : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(std::move(whitelist))
+{ }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiIndex.inl b/include/pbbam/internal/PbiIndex.inl
new file mode 100644
index 0000000..ca4c4ce
--- /dev/null
+++ b/include/pbbam/internal/PbiIndex.inl
@@ -0,0 +1,165 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndex.inl
+/// \brief Inline implementations for the PbiIndex class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/PbiFile.h"
+#include "pbbam/PbiIndex.h"
+#include "pbbam/PbiRawData.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// --------------------------
+// Pbi Lookup Aggregate
+// --------------------------
+
+class PbiIndexPrivate
+{
+public:
+    PbiIndexPrivate(void);
+    PbiIndexPrivate(const PbiRawData& rawIndex);
+    PbiIndexPrivate(PbiRawData&& rawIndex);
+
+    std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
+
+public:
+    bool HasSection(const PbiFile::Section flag) const;
+    void SetSection(const PbiFile::Section flag, bool ok = true);
+
+public:
+    IndexResultBlocks LookupReference(const int32_t tId) const;
+
+private:
+    IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
+
+public:
+    std::string filename_;
+    PbiFile::VersionEnum version_;
+    PbiFile::Sections sections_;
+    uint32_t numReads_;
+
+    // lookup structures
+    BasicLookupData     basicData_;
+    MappedLookupData    mappedData_;
+    ReferenceLookupData referenceData_;
+    BarcodeLookupData   barcodeData_;
+
+private:
+    // not-implemented - ensure no copy
+    PbiIndexPrivate(const PbiIndexPrivate& other);
+    PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
+};
+
+inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
+{ return (sections_ & flag) != 0; }
+
+inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
+{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
+
+inline IndexResultBlocks
+PbiIndexPrivate::LookupReference(const int32_t tId) const
+{
+    if (!HasSection(PbiFile::REFERENCE))
+        return IndexResultBlocks{ };
+
+    const auto& indexRange = referenceData_.Indices(tId);
+    if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
+        return IndexResultBlocks{ };
+    const auto numReads = indexRange.second - indexRange.first;
+    auto blocks = IndexResultBlocks{ IndexResultBlock(indexRange.first, numReads) };
+    basicData_.ApplyOffsets(blocks);
+    return blocks;
+}
+
+inline IndexResultBlocks
+PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
+{
+    auto blocks = mergedIndexBlocks(indices);
+    basicData_.ApplyOffsets(blocks);
+    return blocks;
+}
+
+} // namespace internal
+
+inline PbiFile::Sections PbiIndex::FileSections(void) const
+{ return d_->sections_; }
+
+inline bool PbiIndex::HasBarcodeData(void) const
+{ return d_->HasSection(PbiFile::BARCODE); }
+
+inline bool PbiIndex::HasMappedData(void) const
+{ return d_->HasSection(PbiFile::MAPPED); }
+
+inline bool PbiIndex::HasReferenceData(void) const
+{ return d_->HasSection(PbiFile::REFERENCE); }
+
+inline bool PbiIndex::HasSection(const PbiFile::Section section) const
+{ return d_->HasSection(section); }
+
+inline uint32_t PbiIndex::NumReads(void) const
+{ return d_->numReads_; }
+
+inline PbiFile::VersionEnum PbiIndex::Version(void) const
+{ return d_->version_; }
+
+inline const BarcodeLookupData& PbiIndex::BarcodeData(void) const
+{ return d_->barcodeData_; }
+
+inline const BasicLookupData& PbiIndex::BasicData(void) const
+{ return d_->basicData_; }
+
+inline const MappedLookupData& PbiIndex::MappedData(void) const
+{ return d_->mappedData_; }
+
+inline const ReferenceLookupData& PbiIndex::ReferenceData(void) const
+{ return d_->referenceData_; }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiIndex_p.h b/include/pbbam/internal/PbiIndex_p.h
deleted file mode 100644
index 5f0db35..0000000
--- a/include/pbbam/internal/PbiIndex_p.h
+++ /dev/null
@@ -1,931 +0,0 @@
-//// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-////
-//// All rights reserved.
-////
-//// Redistribution and use in source and binary forms, with or without
-//// modification, are permitted (subject to the limitations in the
-//// disclaimer below) provided that the following conditions are met:
-////
-////  * Redistributions of source code must retain the above copyright
-////    notice, this list of conditions and the following disclaimer.
-////
-////  * Redistributions in binary form must reproduce the above
-////    copyright notice, this list of conditions and the following
-////    disclaimer in the documentation and/or other materials provided
-////    with the distribution.
-////
-////  * Neither the name of Pacific Biosciences nor the names of its
-////    contributors may be used to endorse or promote products derived
-////    from this software without specific prior written permission.
-////
-//// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-//// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-//// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-//// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-//// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-//// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-//// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-//// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-//// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-//// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-//// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-//// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-//// SUCH DAMAGE.
-////
-//// Author: Derek Barnett
-
-//#ifndef PBIINDEX_P_H
-//#define PBIINDEX_P_H
-
-//#include "pbbam/BamRecord.h"
-//#include "pbbam/PbiFile.h"
-//#include "pbbam/PbiIndex.h"
-//#include "pbbam/PbiRawData.h"
-
-//#include <algorithm>
-//#include <map>
-//#include <memory>
-//#include <unordered_map>
-//#include <utility>
-//#include <vector>
-//#include <cassert>
-
-//namespace PacBio {
-//namespace BAM {
-//namespace internal {
-
-//// --------------------------
-//// Ordered Lookup Container (e.g. map)
-//// --------------------------
-
-//template<typename T>
-//class OrderedLookup
-//{
-//public:
-//    typedef T         KeyType;
-//    typedef IndexList ValueType;
-//    typedef std::map<KeyType, ValueType> ContainerType;
-//    typedef typename ContainerType::const_iterator IterType;
-
-//public:
-//    OrderedLookup(void);
-//    OrderedLookup(size_t n);
-//    OrderedLookup(const ContainerType& data);
-//    OrderedLookup(ContainerType&& data);
-//    OrderedLookup(const std::vector<T>& rawData);
-//    OrderedLookup(std::vector<T>&& rawData);
-
-//public:
-//    bool operator==(const OrderedLookup<T>& other) const;
-//    bool operator!=(const OrderedLookup<T>& other) const;
-
-//public:
-//    IndexList LookupIndices(const KeyType& key,
-//                            const CompareType& compare) const;
-
-//private:
-//    IndexList LookupInclusiveRange(const IterType& begin,
-//                                   const IterType& end) const;
-
-//    IndexList LookupExclusiveRange(const IterType& begin,
-//                                   const IterType& end,
-//                                   const KeyType& key) const;
-
-//private:
-//    ContainerType data_;
-//};
-
-//// --------------------------
-//// Unordered Lookup Container (e.g. hash)
-//// --------------------------
-
-//template<typename T>
-//class UnorderedLookup
-//{
-//public:
-//    typedef T         KeyType;
-//    typedef IndexList ValueType;
-//    typedef std::unordered_map<KeyType, ValueType> ContainerType;
-
-//public:
-//    UnorderedLookup(void);
-//    UnorderedLookup(size_t n);
-//    UnorderedLookup(const ContainerType& data);
-//    UnorderedLookup(ContainerType&& data);
-//    UnorderedLookup(const std::vector<T>& rawData);
-//    UnorderedLookup(std::vector<T>&& rawData);
-
-//public:
-//    bool operator==(const UnorderedLookup<T>& other) const;
-//    bool operator!=(const UnorderedLookup<T>& other) const;
-
-//public:
-//    IndexList LookupIndices(const KeyType& key,
-//                            const CompareType& compare) const;
-
-//private:
-//    template<typename Compare>
-//    IndexList LookupHelper(const KeyType& key, const Compare& cmp) const;
-
-//private:
-//    ContainerType data_;
-//};
-
-//// ----------------
-//// Subread Data
-//// ----------------
-
-//struct SubreadLookupData
-//{
-//    // ctors
-//    SubreadLookupData(void);
-//    SubreadLookupData(const PbiRawSubreadData& rawData);
-////    SubreadLookupData(PbiRawSubreadData&& rawData);
-
-//    // add offset data to index result blocks
-//    void ApplyOffsets(IndexResultBlocks& blocks) const;
-
-//    template<typename T>
-//    IndexList Indices(const SubreadField& field,
-//                      const T& value,
-//                      const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexList IndicesMulti(const SubreadField& field,
-//                           const std::vector<T>& values) const;
-
-//    // map ordering doesn't make sense, optimize for direct lookup
-//    UnorderedLookup<int32_t> rgId_;
-
-//    // numeric comparisons make sense, keep key ordering preserved
-//    OrderedLookup<int32_t>  qStart_;
-//    OrderedLookup<int32_t>  qEnd_;
-//    OrderedLookup<int32_t>  holeNumber_;
-//    OrderedLookup<uint16_t> readQual_;
-
-//    // offsets
-//    std::vector<int64_t> fileOffset_;
-//};
-
-//// -----------------
-//// Mapped Data
-//// -----------------
-
-//struct MappedLookupData
-//{
-//    // ctors
-//    MappedLookupData(void);
-//    MappedLookupData(const PbiRawMappedData& rawData);
-////    MappedLookupData(PbiRawMappedData&& rawData);
-
-//    template<typename T>
-//    IndexList Indices(const MappedField& field,
-//                      const T& value,
-//                      const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexList IndicesMulti(const MappedField& field,
-//                           const std::vector<T>& values) const;
-
-//    // numeric comparisons make sense, keep key ordering preserved
-//    OrderedLookup<int32_t>  tId_;
-//    OrderedLookup<uint32_t> tStart_;
-//    OrderedLookup<uint32_t> tEnd_;
-//    OrderedLookup<uint32_t> aStart_;
-//    OrderedLookup<uint32_t> aEnd_;
-//    OrderedLookup<uint32_t> nM_;
-//    OrderedLookup<uint32_t> nMM_;
-//    OrderedLookup<uint8_t>  mapQV_;
-
-//    // generated, not stored in PBI
-//    OrderedLookup<uint32_t> nIns_;
-//    OrderedLookup<uint32_t> nDel_;
-
-//    // no need for map overhead, just store direct indices
-//    IndexList reverseStrand_;
-//    IndexList forwardStrand_;
-//};
-
-//// ------------------
-//// Reference Data
-//// ------------------
-
-//struct ReferenceLookupData
-//{
-//    // ctors
-//    ReferenceLookupData(void);
-//    ReferenceLookupData(const PbiRawReferenceData& rawData);
-////    ReferenceLookupData(PbiRawReferenceData&& rawData);
-
-//    IndexRange Indices(const int32_t tId) const;
-
-//    // references_[tId] = (begin, end) indices
-//    // into SubreadLookupData::fileOffset_
-//    std::unordered_map<int32_t, IndexRange> references_;
-//};
-
-//// ---------------
-//// Barcode Data
-//// ---------------
-
-//struct BarcodeLookupData
-//{
-//    // ctors
-//    BarcodeLookupData(void);
-//    BarcodeLookupData(const PbiRawBarcodeData& rawData);
-////    BarcodeLookupData(PbiRawBarcodeData&& rawData);
-
-//    template<typename T>
-//    IndexList Indices(const BarcodeField& field,
-//                      const T& value,
-//                      const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexList IndicesMulti(const BarcodeField& field,
-//                           const std::vector<T>& values) const;
-
-//    // numeric comparisons make sense, keep key ordering preserved
-//    OrderedLookup<uint16_t> bcLeft_;
-//    OrderedLookup<uint16_t> bcRight_;
-//    OrderedLookup<uint8_t>  bcQual_;
-
-//    // see if this works, or if can use unordered, 'direct' query
-//    OrderedLookup<uint8_t> ctxtFlag_;
-//};
-
-//// --------------------------
-//// Pbi Lookup Aggregate
-//// --------------------------
-
-//class PbiIndexPrivate
-//{
-//public:
-//    PbiIndexPrivate(void);
-//    PbiIndexPrivate(const PbiRawData& rawIndex);
-//    PbiIndexPrivate(PbiRawData&& rawIndex);
-
-//    std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
-
-//public:
-//    bool HasSection(const PbiFile::Section flag) const;
-//    void SetSection(const PbiFile::Section flag, bool ok = true);
-
-//public:
-
-//    template<typename T>
-//    IndexList Indices(const SubreadField& field,
-//                      const T& value,
-//                      const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexList Indices(const MappedField& field,
-//                      const T& value,
-//                      const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexList Indices(const BarcodeField& field,
-//                      const T& value,
-//                      const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexList IndicesMulti(const SubreadField& field,
-//                           const T& value) const;
-
-//    template<typename T>
-//    IndexList IndicesMulti(const MappedField& field,
-//                           const T& value) const;
-
-//    template<typename T>
-//    IndexList IndicesMulti(const BarcodeField& field,
-//                           const T& value) const;
-
-//    template<typename T>
-//    IndexResultBlocks Lookup(const SubreadField& field,
-//                             const T& value,
-//                             const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexResultBlocks Lookup(const MappedField& field,
-//                             const T& value,
-//                             const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexResultBlocks Lookup(const BarcodeField& field,
-//                             const T& value,
-//                             const CompareType& compareType) const;
-
-//    template<typename T>
-//    IndexResultBlocks LookupMulti(const SubreadField& field,
-//                                  const std::vector<T>& values) const;
-
-//    template<typename T>
-//    IndexResultBlocks LookupMulti(const MappedField& field,
-//                                  const std::vector<T>& values) const;
-
-//    template<typename T>
-//    IndexResultBlocks LookupMulti(const BarcodeField& field,
-//                                  const std::vector<T>& values) const;
-
-//    IndexResultBlocks LookupReference(const int32_t tId) const;
-
-//private:
-//    IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
-
-//public:
-//    PbiFile::VersionEnum version_;
-//    PbiFile::Sections sections_;
-//    uint32_t numReads_;
-
-//    // lookup structures
-//    SubreadLookupData   subreadData_;
-//    MappedLookupData    mappedData_;
-//    ReferenceLookupData referenceData_;
-//    BarcodeLookupData   barcodeData_;
-
-//private:
-//    // not-implemented - ensure no copy
-//    PbiIndexPrivate(const PbiIndexPrivate& other);
-//    PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
-//};
-
-//// ----------------
-//// helper methods
-//// ----------------
-
-//inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
-//{
-//    if (indices.empty())
-//        return IndexResultBlocks();
-//    std::sort(indices.begin(), indices.end());
-
-//    IndexResultBlocks result;
-//    result.push_back(IndexResultBlock(indices.at(0), 1));
-//    const size_t numIndices = indices.size();
-//    for (size_t i = 1; i < numIndices; ++i) {
-//        if (indices.at(i) == indices.at(i-1)+1)
-//            ++result.back().numReads_;
-//        else
-//            result.push_back(IndexResultBlock(indices.at(i), 1));
-//    }
-//    return result;
-//}
-
-//inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
-//{
-//    IndexList copy = indices;
-//    return mergedIndexBlocks(std::move(copy));
-//}
-
-//inline size_t nullIndex(void)
-//{ return static_cast<size_t>(-1); }
-
-//inline
-//void pushBackIndices(IndexList& result,
-//                     const IndexList& toAppend)
-//{
-//    result.reserve(result.size() + toAppend.size());
-//    for (auto element : toAppend)
-//        result.push_back(element);
-//}
-
-//// -----------------
-//// OrderedLookup
-//// -----------------
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(void) { }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(size_t n)
-//{ data_.reserve(n); }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(const ContainerType& data)
-//    : data_(data)
-//{ }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(ContainerType&& data)
-//    : data_(std::move(data))
-//{ }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
-//{
-//    const size_t numElements = rawData.size();
-//    for (size_t i = 0; i < numElements; ++i)
-//        data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
-//{
-//    const size_t numElements = rawData.size();
-//    for (size_t i = 0; i < numElements; ++i)
-//        data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
-//{ return data_ == other.data_; }
-
-//template<typename T>
-//inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
-//{ return !(*this == other); }
-
-//template<typename T>
-//inline IndexList
-//OrderedLookup<T>::LookupInclusiveRange(const IterType& begin,
-//                                       const IterType& end) const
-//{
-//    IndexList result;
-//    for ( auto iter = begin; iter != end; ++iter )
-//        pushBackIndices(result, iter->second);
-//    std::sort(result.begin(), result.end());
-//    return result;
-//}
-
-//template<typename T>
-//inline IndexList
-//OrderedLookup<T>::LookupExclusiveRange(const IterType& begin,
-//                                       const IterType& end,
-//                                       const KeyType& key) const
-//{
-//    IndexList result;
-//    for ( auto iter = begin; iter != end; ++iter ) {
-//        if (iter->first != key)
-//            pushBackIndices(result, iter->second);
-//    }
-//    std::sort(result.begin(), result.end());
-//    return result;
-//}
-
-//template<typename T>
-//inline IndexList
-//OrderedLookup<T>::LookupIndices(const OrderedLookup::KeyType& key,
-//                                const CompareType& compare) const
-//{
-//    const IterType begin = data_.cbegin();
-//    const IterType end   = data_.cend();
-//    switch(compare)
-//    {
-//        case CompareType::EQUAL:
-//        {
-//            const auto found = data_.find(key);
-//            if (found != end)
-//                return found->second;
-//            return IndexList();
-//        }
-//        case CompareType::LESS_THAN:          return LookupExclusiveRange(begin, data_.upper_bound(key), key);
-//        case CompareType::LESS_THAN_EQUAL:    return LookupInclusiveRange(begin, data_.upper_bound(key));
-//        case CompareType::GREATER_THAN:       return LookupExclusiveRange(data_.lower_bound(key), end, key);
-//        case CompareType::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
-//        case CompareType::NOT_EQUAL:          return LookupExclusiveRange(begin, end, key);
-//        default:
-//            assert(false);
-//    }
-//    return IndexList();
-//}
-
-//// -----------------
-//// UnorderedLookup
-//// -----------------
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(void) { }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(size_t n)
-//{ data_.reserve(n); }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(const ContainerType& data)
-//    : data_(data)
-//{ }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(ContainerType&& data)
-//    : data_(std::move(data))
-//{ }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T> &rawData)
-//{
-//    const size_t numElements = rawData.size();
-//    for (size_t i = 0; i < numElements; ++i)
-//        data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
-//{
-//    const size_t numElements = rawData.size();
-//    for (size_t i = 0; i < numElements; ++i)
-//        data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
-//{ return data_ == other.data_; }
-
-//template<typename T>
-//inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
-//{ return !(*this == other); }
-
-//template<typename T>
-//template<typename Compare>
-//inline IndexList
-//UnorderedLookup<T>::LookupHelper(const UnorderedLookup::KeyType& key,
-//                                 const Compare& cmp) const
-//{
-//    auto iter = data_.cbegin();
-//    const auto end = data_.cend();
-//    IndexList result; // init with some avg size ??
-//    for ( ; iter != end; ++iter ) {
-//        const auto e = (iter->first);
-//        if (cmp(e, key))
-//            pushBackIndices(result, iter->second);
-//    }
-//    std::sort(result.begin(), result.end());
-//    return result;
-//}
-
-//template<typename T>
-//inline IndexList
-//UnorderedLookup<T>::LookupIndices(const UnorderedLookup::KeyType& key,
-//                                  const CompareType& compare) const
-//{
-//    switch (compare) {
-//        case CompareType::EQUAL:
-//        {
-//            const auto found = data_.find(key);
-//            if (found != data_.cend())
-//                return found->second;
-//            else
-//                return IndexList();
-//        }
-//        case CompareType::LESS_THAN:          return LookupHelper(key, std::less<KeyType>());
-//        case CompareType::LESS_THAN_EQUAL:    return LookupHelper(key, std::less_equal<KeyType>());
-//        case CompareType::GREATER_THAN:       return LookupHelper(key, std::greater<KeyType>());
-//        case CompareType::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<KeyType>());
-//        case CompareType::NOT_EQUAL:          return LookupHelper(key, std::not_equal_to<KeyType>());
-//        default:
-//            assert(false);
-//    }
-//    return IndexList();
-//}
-
-//// -------------------
-//// SubreadLookupData
-//// -------------------
-
-//inline
-//void SubreadLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
-//{
-//    for (IndexResultBlock& block : blocks)
-//        block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
-//}
-
-//template<typename T>
-//inline IndexList SubreadLookupData::Indices(const SubreadField& field,
-//                                            const T& value,
-//                                            const CompareType& compareType) const
-//{
-//    switch(field) {
-//        case SubreadField::RG_ID:        return rgId_.LookupIndices(value, compareType);
-//        case SubreadField::Q_START:      return qStart_.LookupIndices(value, compareType);
-//        case SubreadField::Q_END:        return qEnd_.LookupIndices(value, compareType);
-//        case SubreadField::ZMW:          return holeNumber_.LookupIndices(value, compareType);
-//        case SubreadField::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
-
-//        case SubreadField::VIRTUAL_OFFSET : // fall-through, not supported this way
-//        default:
-//            assert(false);
-//    }
-//    return IndexList();
-//}
-
-//template<typename T>
-//inline IndexList SubreadLookupData::IndicesMulti(const SubreadField& field,
-//                                                 const std::vector<T>& values) const
-//{
-//    IndexList result;
-//    for (auto value : values) {
-//        const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-//        result.reserve(result.size() + valueIndices.size());
-//        for (auto i : valueIndices)
-//            result.push_back(i);
-//    }
-//    return result;
-//}
-
-//// -------------------
-//// MappedLookupData
-//// -------------------
-
-//template<typename T>
-//inline IndexList MappedLookupData::Indices(const MappedField& field,
-//                                           const T& value,
-//                                           const CompareType& compareType) const
-//{
-//    switch(field) {
-//        case MappedField::T_ID:        return tId_.LookupIndices(value, compareType);
-//        case MappedField::T_START:     return tStart_.LookupIndices(value, compareType);
-//        case MappedField::T_END:       return tEnd_.LookupIndices(value, compareType);
-//        case MappedField::A_START:     return aStart_.LookupIndices(value, compareType);
-//        case MappedField::A_END:       return aEnd_.LookupIndices(value, compareType);
-//        case MappedField::N_M:         return nM_.LookupIndices(value, compareType);
-//        case MappedField::N_MM:        return nM_.LookupIndices(value, compareType);
-//        case MappedField::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
-
-//        // MappedField::STRAND has its own specialization
-
-//        default:
-//            assert(false);
-//    }
-//    return IndexList();
-//}
-
-//template<>
-//inline IndexList MappedLookupData::Indices(const MappedField& field,
-//                                           const Strand& strand,
-//                                           const CompareType& compareType) const
-//{
-//    assert(field == MappedField::STRAND);
-
-//    if (compareType == CompareType::EQUAL) {
-//        if (strand == Strand::FORWARD)
-//            return forwardStrand_;
-//        else
-//            return reverseStrand_;
-//    } else if (compareType == CompareType::NOT_EQUAL) {
-//        if (strand == Strand::FORWARD)
-//            return reverseStrand_;
-//        else
-//            return forwardStrand_;
-//    }
-
-//    // only EQUAL/NOT_EQUAL supported
-//    assert(false);
-//    return IndexList();
-//}
-
-//template<typename T>
-//inline IndexList MappedLookupData::IndicesMulti(const MappedField& field,
-//                                                const std::vector<T>& values) const
-//{
-//    IndexList result;
-//    for (auto value : values) {
-//        const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-//        result.reserve(result.size() + valueIndices.size());
-//        for (auto i : valueIndices)
-//            result.push_back(i);
-//    }
-//    return result;
-//}
-
-
-//// ---------------------
-//// ReferenceLookupData
-//// ---------------------
-
-//inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
-//{
-//    auto found = references_.find(tId);
-//    if (found == references_.cend())
-//        return IndexRange(nullIndex(), nullIndex());
-//    return found->second;
-//}
-
-//// -------------------
-//// BarcodeLookupData
-//// -------------------
-
-//template<typename T>
-//inline IndexList BarcodeLookupData::Indices(const BarcodeField& field,
-//                                            const T& value,
-//                                            const CompareType& compareType) const
-//{
-//    switch(field) {
-//        case BarcodeField::BC_LEFT:      return bcLeft_.LookupIndices(value, compareType);
-//        case BarcodeField::BC_RIGHT:     return bcRight_.LookupIndices(value, compareType);
-//        case BarcodeField::BC_QUALITY:   return bcQual_.LookupIndices(value, compareType);
-//        case BarcodeField::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
-//        default:
-//            assert(false);
-//    }
-//    return IndexList();
-//}
-
-//template<typename T>
-//inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeField& field,
-//                                                 const std::vector<T>& values) const
-//{
-//    IndexList result;
-//    for (auto value : values) {
-//        const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-//        result.reserve(result.size() + valueIndices.size());
-//        for (auto i : valueIndices)
-//            result.push_back(i);
-//    }
-//    return result;
-//}
-
-
-//// -----------------
-//// PbiIndexPrivate
-//// -----------------
-
-//inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
-//{ return (sections_ & flag) != 0; }
-
-//inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
-//{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::Indices(const SubreadField& field,
-//                         const T& value,
-//                         const CompareType& compareType) const
-//{ return subreadData_.Indices(field, value, compareType); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::Indices(const MappedField& field,
-//                         const T& value,
-//                         const CompareType& compareType) const
-//{ return mappedData_.Indices(field, value, compareType); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::Indices(const BarcodeField& field,
-//                         const T& value,
-//                         const CompareType& compareType) const
-//{ return barcodeData_.Indices(field, value, compareType); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::IndicesMulti(const SubreadField& field,
-//                              const T& value) const
-//{ return subreadData_.IndicesMulti(field, value); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::IndicesMulti(const MappedField& field,
-//                              const T& value) const
-//{ return mappedData_.IndicesMulti(field, value); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::IndicesMulti(const BarcodeField& field,
-//                              const T& value) const
-//{ return barcodeData_.IndicesMulti(field, value); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::Lookup(const SubreadField& field,
-//                        const T& value,
-//                        const CompareType& compareType) const
-//{ return MergeBlocksWithOffsets(subreadData_.Indices(field, value, compareType)); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::Lookup(const MappedField& field,
-//                        const T& value,
-//                        const CompareType& compareType) const
-//{
-//    if (!HasSection(PbiFile::MAPPED))
-//        return IndexResultBlocks();
-//    return MergeBlocksWithOffsets(mappedData_.Indices(field, value, compareType));
-//}
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::Lookup(const BarcodeField& field,
-//                        const T& value,
-//                        const CompareType& compareType) const
-//{
-//    if (!HasSection(PbiFile::BARCODE))
-//        return IndexResultBlocks();
-//    return MergeBlocksWithOffsets(barcodeData_.Indices(field, value, compareType));
-//}
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupMulti(const SubreadField& field,
-//                             const std::vector<T>& values) const
-//{ return MergeBlocksWithOffsets(subreadData_.IndicesMulti(field, values)); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupMulti(const MappedField& field,
-//                             const std::vector<T>& values) const
-//{ return MergeBlocksWithOffsets(mappedData_.IndicesMulti(field, values)); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupMulti(const BarcodeField& field,
-//                             const std::vector<T>& values) const
-//{ return MergeBlocksWithOffsets(barcodeData_.IndicesMulti(field, values)); }
-
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupReference(const int32_t tId) const
-//{
-//    if (!HasSection(PbiFile::REFERENCE))
-//        return IndexResultBlocks();
-//    const IndexRange& indexRange = referenceData_.Indices(tId);
-//    if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
-//        return IndexResultBlocks();
-//    const size_t numReads = indexRange.second - indexRange.first;
-//    IndexResultBlocks blocks(1, IndexResultBlock(indexRange.first, numReads));
-//    subreadData_.ApplyOffsets(blocks);
-//    return blocks;
-//}
-
-//inline IndexResultBlocks
-//PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
-//{
-//    IndexResultBlocks blocks = mergedIndexBlocks(indices);
-//    subreadData_.ApplyOffsets(blocks);
-//    return blocks;
-//}
-
-//} // namespace internal
-
-//template<typename FieldType, typename ValueType>
-//inline IndexRequestBase<FieldType, ValueType>::IndexRequestBase(const FieldType field,
-//                                                                const ValueType& value,
-//                                                                const CompareType compareType)
-//    : field_(field)
-//    , value_(value)
-//    , compareType_(compareType)
-//{ }
-
-//template<typename FieldType, typename ValueType>
-//inline IndexMultiRequestBase<FieldType, ValueType>::IndexMultiRequestBase(const FieldType field,
-//                                                                          const std::vector<ValueType>& values)
-//    : field_(field)
-//    , values_(values)
-//{ }
-
-//template<SubreadField field, typename ValueType>
-//inline SubreadIndexRequest<field, ValueType>::SubreadIndexRequest(const ValueType& value,
-//                                                                  const CompareType& compareType)
-//    : IndexRequestBase<SubreadField, ValueType>(field, value, compareType)
-//{ }
-
-//template<SubreadField field, typename ValueType>
-//inline SubreadIndexMultiRequest<field, ValueType>::SubreadIndexMultiRequest(const std::vector<ValueType>& values)
-//    : IndexMultiRequestBase<SubreadField, ValueType>(field, values)
-//{ }
-
-//template<MappedField field, typename ValueType>
-//inline MappedIndexRequest<field, ValueType>::MappedIndexRequest(const ValueType& value,
-//                                                                const CompareType& compareType)
-//    : IndexRequestBase<MappedField, ValueType>(field, value, compareType)
-//{ }
-
-//template<MappedField field, typename ValueType>
-//inline MappedIndexMultiRequest<field, ValueType>::MappedIndexMultiRequest(const std::vector<ValueType>& values)
-//    : IndexMultiRequestBase<MappedField, ValueType>(field, values)
-//{ }
-
-//template<BarcodeField field, typename ValueType>
-//inline BarcodeIndexRequest<field, ValueType>::BarcodeIndexRequest(const ValueType& value,
-//                                                                  const CompareType& compareType)
-//    : IndexRequestBase<BarcodeField, ValueType>(field, value, compareType)
-//{ }
-
-//template<BarcodeField field, typename ValueType>
-//inline BarcodeIndexMultiRequest<field, ValueType>::BarcodeIndexMultiRequest(const std::vector<ValueType>& values)
-//    : IndexMultiRequestBase<BarcodeField, ValueType>(field, values)
-//{ }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexList
-//PbiIndex::RawIndices(const IndexRequestBase<FieldType, ValueType>& request) const
-//{ return d_->Indices(request.field_, request.value_, request.compareType_); }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexList
-//PbiIndex::RawIndices(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-//{ return d_->Indices(request.field_, request.values_); }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexResultBlocks
-//PbiIndex::Lookup(const IndexRequestBase<FieldType, ValueType>& request) const
-//{ return d_->Lookup(request.field_, request.value_, request.compareType_); }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexResultBlocks
-//PbiIndex::Lookup(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-//{ return d_->LookupMulti(request.field_, request.values_); }
-
-//inline IndexResultBlocks PbiIndex::LookupReference(const int32_t tId) const
-//{ return d_->LookupReference(tId); }
-
-//} // namespace BAM
-//} // namespace PacBio
-
-//#endif // PACBIOINDEX_P_H
diff --git a/include/pbbam/internal/PbiIndex_p.inl b/include/pbbam/internal/PbiIndex_p.inl
deleted file mode 100644
index 41dc831..0000000
--- a/include/pbbam/internal/PbiIndex_p.inl
+++ /dev/null
@@ -1,927 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-//
-// Author: Derek Barnett
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/PbiFile.h"
-#include "pbbam/PbiIndex.h"
-#include "pbbam/PbiRawData.h"
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-#include <cassert>
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-// --------------------------
-// Ordered Lookup Container (e.g. map)
-// --------------------------
-
-template<typename T>
-class OrderedLookup
-{
-public:
-    typedef T         KeyType;
-    typedef IndexList ValueType;
-    typedef std::map<KeyType, ValueType> ContainerType;
-    typedef typename ContainerType::const_iterator IterType;
-
-public:
-    OrderedLookup(void);
-    OrderedLookup(size_t n);
-    OrderedLookup(const ContainerType& data);
-    OrderedLookup(ContainerType&& data);
-    OrderedLookup(const std::vector<T>& rawData);
-    OrderedLookup(std::vector<T>&& rawData);
-
-public:
-    bool operator==(const OrderedLookup<T>& other) const;
-    bool operator!=(const OrderedLookup<T>& other) const;
-
-public:
-    IndexList LookupIndices(const KeyType& key,
-                            const CompareType& compare) const;
-
-private:
-    IndexList LookupInclusiveRange(const IterType& begin,
-                                   const IterType& end) const;
-
-    IndexList LookupExclusiveRange(const IterType& begin,
-                                   const IterType& end,
-                                   const KeyType& key) const;
-
-private:
-    ContainerType data_;
-};
-
-// --------------------------
-// Unordered Lookup Container (e.g. hash)
-// --------------------------
-
-template<typename T>
-class UnorderedLookup
-{
-public:
-    typedef T         KeyType;
-    typedef IndexList ValueType;
-    typedef std::unordered_map<KeyType, ValueType> ContainerType;
-
-public:
-    UnorderedLookup(void);
-    UnorderedLookup(size_t n);
-    UnorderedLookup(const ContainerType& data);
-    UnorderedLookup(ContainerType&& data);
-    UnorderedLookup(const std::vector<T>& rawData);
-    UnorderedLookup(std::vector<T>&& rawData);
-
-public:
-    bool operator==(const UnorderedLookup<T>& other) const;
-    bool operator!=(const UnorderedLookup<T>& other) const;
-
-public:
-    IndexList LookupIndices(const KeyType& key,
-                            const CompareType& compare) const;
-
-private:
-    template<typename Compare>
-    IndexList LookupHelper(const KeyType& key, const Compare& cmp) const;
-
-private:
-    ContainerType data_;
-};
-
-// ----------------
-// Subread Data
-// ----------------
-
-struct SubreadLookupData
-{
-    // ctors
-    SubreadLookupData(void);
-    SubreadLookupData(const PbiRawSubreadData& rawData);
-//    SubreadLookupData(PbiRawSubreadData&& rawData);
-
-    // add offset data to index result blocks
-    void ApplyOffsets(IndexResultBlocks& blocks) const;
-
-    template<typename T>
-    IndexList Indices(const SubreadField& field,
-                      const T& value,
-                      const CompareType& compareType) const;
-
-    template<typename T>
-    IndexList IndicesMulti(const SubreadField& field,
-                           const std::vector<T>& values) const;
-
-    // map ordering doesn't make sense, optimize for direct lookup
-    UnorderedLookup<int32_t> rgId_;
-
-    // numeric comparisons make sense, keep key ordering preserved
-    OrderedLookup<int32_t>  qStart_;
-    OrderedLookup<int32_t>  qEnd_;
-    OrderedLookup<int32_t>  holeNumber_;
-    OrderedLookup<uint16_t> readQual_;
-
-    // offsets
-    std::vector<int64_t> fileOffset_;
-};
-
-// -----------------
-// Mapped Data
-// -----------------
-
-struct MappedLookupData
-{
-    // ctors
-    MappedLookupData(void);
-    MappedLookupData(const PbiRawMappedData& rawData);
-//    MappedLookupData(PbiRawMappedData&& rawData);
-
-    template<typename T>
-    IndexList Indices(const MappedField& field,
-                      const T& value,
-                      const CompareType& compareType) const;
-
-    template<typename T>
-    IndexList IndicesMulti(const MappedField& field,
-                           const std::vector<T>& values) const;
-
-    // numeric comparisons make sense, keep key ordering preserved
-    OrderedLookup<int32_t>  tId_;
-    OrderedLookup<uint32_t> tStart_;
-    OrderedLookup<uint32_t> tEnd_;
-    OrderedLookup<uint32_t> aStart_;
-    OrderedLookup<uint32_t> aEnd_;
-    OrderedLookup<uint32_t> nM_;
-    OrderedLookup<uint32_t> nMM_;
-    OrderedLookup<uint8_t>  mapQV_;
-
-    // generated, not stored in PBI
-    OrderedLookup<uint32_t> nIns_;
-    OrderedLookup<uint32_t> nDel_;
-
-    // no need for map overhead, just store direct indices
-    IndexList reverseStrand_;
-    IndexList forwardStrand_;
-};
-
-// ------------------
-// Reference Data
-// ------------------
-
-struct ReferenceLookupData
-{
-    // ctors
-    ReferenceLookupData(void);
-    ReferenceLookupData(const PbiRawReferenceData& rawData);
-//    ReferenceLookupData(PbiRawReferenceData&& rawData);
-
-    IndexRange Indices(const int32_t tId) const;
-
-    // references_[tId] = (begin, end) indices
-    // into SubreadLookupData::fileOffset_
-    std::unordered_map<int32_t, IndexRange> references_;
-};
-
-// ---------------
-// Barcode Data
-// ---------------
-
-struct BarcodeLookupData
-{
-    // ctors
-    BarcodeLookupData(void);
-    BarcodeLookupData(const PbiRawBarcodeData& rawData);
-//    BarcodeLookupData(PbiRawBarcodeData&& rawData);
-
-    template<typename T>
-    IndexList Indices(const BarcodeField& field,
-                      const T& value,
-                      const CompareType& compareType) const;
-
-    template<typename T>
-    IndexList IndicesMulti(const BarcodeField& field,
-                           const std::vector<T>& values) const;
-
-    // numeric comparisons make sense, keep key ordering preserved
-    OrderedLookup<uint16_t> bcLeft_;
-    OrderedLookup<uint16_t> bcRight_;
-    OrderedLookup<uint8_t>  bcQual_;
-
-    // see if this works, or if can use unordered, 'direct' query
-    OrderedLookup<uint8_t> ctxtFlag_;
-};
-
-// --------------------------
-// Pbi Lookup Aggregate
-// --------------------------
-
-class PbiIndexPrivate
-{
-public:
-    PbiIndexPrivate(void);
-    PbiIndexPrivate(const PbiRawData& rawIndex);
-    PbiIndexPrivate(PbiRawData&& rawIndex);
-
-    std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
-
-public:
-    bool HasSection(const PbiFile::Section flag) const;
-    void SetSection(const PbiFile::Section flag, bool ok = true);
-
-public:
-
-    template<typename T>
-    IndexList Indices(const SubreadField& field,
-                      const T& value,
-                      const CompareType& compareType) const;
-
-    template<typename T>
-    IndexList Indices(const MappedField& field,
-                      const T& value,
-                      const CompareType& compareType) const;
-
-    template<typename T>
-    IndexList Indices(const BarcodeField& field,
-                      const T& value,
-                      const CompareType& compareType) const;
-
-    template<typename T>
-    IndexList IndicesMulti(const SubreadField& field,
-                           const T& value) const;
-
-    template<typename T>
-    IndexList IndicesMulti(const MappedField& field,
-                           const T& value) const;
-
-    template<typename T>
-    IndexList IndicesMulti(const BarcodeField& field,
-                           const T& value) const;
-
-    template<typename T>
-    IndexResultBlocks Lookup(const SubreadField& field,
-                             const T& value,
-                             const CompareType& compareType) const;
-
-    template<typename T>
-    IndexResultBlocks Lookup(const MappedField& field,
-                             const T& value,
-                             const CompareType& compareType) const;
-
-    template<typename T>
-    IndexResultBlocks Lookup(const BarcodeField& field,
-                             const T& value,
-                             const CompareType& compareType) const;
-
-    template<typename T>
-    IndexResultBlocks LookupMulti(const SubreadField& field,
-                                  const std::vector<T>& values) const;
-
-    template<typename T>
-    IndexResultBlocks LookupMulti(const MappedField& field,
-                                  const std::vector<T>& values) const;
-
-    template<typename T>
-    IndexResultBlocks LookupMulti(const BarcodeField& field,
-                                  const std::vector<T>& values) const;
-
-    IndexResultBlocks LookupReference(const int32_t tId) const;
-
-private:
-    IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
-
-public:
-    PbiFile::VersionEnum version_;
-    PbiFile::Sections sections_;
-    uint32_t numReads_;
-
-    // lookup structures
-    SubreadLookupData   subreadData_;
-    MappedLookupData    mappedData_;
-    ReferenceLookupData referenceData_;
-    BarcodeLookupData   barcodeData_;
-
-private:
-    // not-implemented - ensure no copy
-    PbiIndexPrivate(const PbiIndexPrivate& other);
-    PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
-};
-
-// ----------------
-// helper methods
-// ----------------
-
-inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
-{
-    if (indices.empty())
-        return IndexResultBlocks();
-    std::sort(indices.begin(), indices.end());
-
-    IndexResultBlocks result;
-    result.push_back(IndexResultBlock(indices.at(0), 1));
-    const size_t numIndices = indices.size();
-    for (size_t i = 1; i < numIndices; ++i) {
-        if (indices.at(i) == indices.at(i-1)+1)
-            ++result.back().numReads_;
-        else
-            result.push_back(IndexResultBlock(indices.at(i), 1));
-    }
-    return result;
-}
-
-inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
-{
-    IndexList copy = indices;
-    return mergedIndexBlocks(std::move(copy));
-}
-
-inline size_t nullIndex(void)
-{ return static_cast<size_t>(-1); }
-
-inline
-void pushBackIndices(IndexList& result,
-                     const IndexList& toAppend)
-{
-    result.reserve(result.size() + toAppend.size());
-    for (auto element : toAppend)
-        result.push_back(element);
-}
-
-// -----------------
-// OrderedLookup
-// -----------------
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(void) { }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(size_t n)
-{ data_.reserve(n); }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(const ContainerType& data)
-    : data_(data)
-{ }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(ContainerType&& data)
-    : data_(std::move(data))
-{ }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
-{
-    const size_t numElements = rawData.size();
-    for (size_t i = 0; i < numElements; ++i)
-        data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
-{
-    const size_t numElements = rawData.size();
-    for (size_t i = 0; i < numElements; ++i)
-        data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
-{ return data_ == other.data_; }
-
-template<typename T>
-inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
-{ return !(*this == other); }
-
-template<typename T>
-inline IndexList
-OrderedLookup<T>::LookupInclusiveRange(const IterType& begin,
-                                       const IterType& end) const
-{
-    IndexList result;
-    for ( auto iter = begin; iter != end; ++iter )
-        pushBackIndices(result, iter->second);
-    std::sort(result.begin(), result.end());
-    return result;
-}
-
-template<typename T>
-inline IndexList
-OrderedLookup<T>::LookupExclusiveRange(const IterType& begin,
-                                       const IterType& end,
-                                       const KeyType& key) const
-{
-    IndexList result;
-    for ( auto iter = begin; iter != end; ++iter ) {
-        if (iter->first != key)
-            pushBackIndices(result, iter->second);
-    }
-    std::sort(result.begin(), result.end());
-    return result;
-}
-
-template<typename T>
-inline IndexList
-OrderedLookup<T>::LookupIndices(const OrderedLookup::KeyType& key,
-                                const CompareType& compare) const
-{
-    const IterType begin = data_.cbegin();
-    const IterType end   = data_.cend();
-    switch(compare)
-    {
-        case CompareType::EQUAL:
-        {
-            const auto found = data_.find(key);
-            if (found != end)
-                return found->second;
-            return IndexList();
-        }
-        case CompareType::LESS_THAN:          return LookupExclusiveRange(begin, data_.upper_bound(key), key);
-        case CompareType::LESS_THAN_EQUAL:    return LookupInclusiveRange(begin, data_.upper_bound(key));
-        case CompareType::GREATER_THAN:       return LookupExclusiveRange(data_.lower_bound(key), end, key);
-        case CompareType::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
-        case CompareType::NOT_EQUAL:          return LookupExclusiveRange(begin, end, key);
-        default:
-            assert(false);
-    }
-    return IndexList();
-}
-
-// -----------------
-// UnorderedLookup
-// -----------------
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(void) { }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(size_t n)
-{ data_.reserve(n); }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(const ContainerType& data)
-    : data_(data)
-{ }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(ContainerType&& data)
-    : data_(std::move(data))
-{ }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T> &rawData)
-{
-    const size_t numElements = rawData.size();
-    for (size_t i = 0; i < numElements; ++i)
-        data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
-{
-    const size_t numElements = rawData.size();
-    for (size_t i = 0; i < numElements; ++i)
-        data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
-{ return data_ == other.data_; }
-
-template<typename T>
-inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
-{ return !(*this == other); }
-
-template<typename T>
-template<typename Compare>
-inline IndexList
-UnorderedLookup<T>::LookupHelper(const UnorderedLookup::KeyType& key,
-                                 const Compare& cmp) const
-{
-    auto iter = data_.cbegin();
-    const auto end = data_.cend();
-    IndexList result; // init with some avg size ??
-    for ( ; iter != end; ++iter ) {
-        const auto e = (iter->first);
-        if (cmp(e, key))
-            pushBackIndices(result, iter->second);
-    }
-    std::sort(result.begin(), result.end());
-    return result;
-}
-
-template<typename T>
-inline IndexList
-UnorderedLookup<T>::LookupIndices(const UnorderedLookup::KeyType& key,
-                                  const CompareType& compare) const
-{
-    switch (compare) {
-        case CompareType::EQUAL:
-        {
-            const auto found = data_.find(key);
-            if (found != data_.cend())
-                return found->second;
-            else
-                return IndexList();
-        }
-        case CompareType::LESS_THAN:          return LookupHelper(key, std::less<KeyType>());
-        case CompareType::LESS_THAN_EQUAL:    return LookupHelper(key, std::less_equal<KeyType>());
-        case CompareType::GREATER_THAN:       return LookupHelper(key, std::greater<KeyType>());
-        case CompareType::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<KeyType>());
-        case CompareType::NOT_EQUAL:          return LookupHelper(key, std::not_equal_to<KeyType>());
-        default:
-            assert(false);
-    }
-    return IndexList();
-}
-
-// -------------------
-// SubreadLookupData
-// -------------------
-
-inline
-void SubreadLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
-{
-    for (IndexResultBlock& block : blocks)
-        block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
-}
-
-template<typename T>
-inline IndexList SubreadLookupData::Indices(const SubreadField& field,
-                                            const T& value,
-                                            const CompareType& compareType) const
-{
-    switch(field) {
-        case SubreadField::RG_ID:        return rgId_.LookupIndices(value, compareType);
-        case SubreadField::Q_START:      return qStart_.LookupIndices(value, compareType);
-        case SubreadField::Q_END:        return qEnd_.LookupIndices(value, compareType);
-        case SubreadField::ZMW:          return holeNumber_.LookupIndices(value, compareType);
-        case SubreadField::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
-
-        case SubreadField::VIRTUAL_OFFSET : // fall-through, not supported this way
-        default:
-            assert(false);
-    }
-    return IndexList();
-}
-
-template<typename T>
-inline IndexList SubreadLookupData::IndicesMulti(const SubreadField& field,
-                                                 const std::vector<T>& values) const
-{
-    IndexList result;
-    for (auto value : values) {
-        const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-        result.reserve(result.size() + valueIndices.size());
-        for (auto i : valueIndices)
-            result.push_back(i);
-    }
-    return result;
-}
-
-// -------------------
-// MappedLookupData
-// -------------------
-
-template<typename T>
-inline IndexList MappedLookupData::Indices(const MappedField& field,
-                                           const T& value,
-                                           const CompareType& compareType) const
-{
-    switch(field) {
-        case MappedField::T_ID:        return tId_.LookupIndices(value, compareType);
-        case MappedField::T_START:     return tStart_.LookupIndices(value, compareType);
-        case MappedField::T_END:       return tEnd_.LookupIndices(value, compareType);
-        case MappedField::A_START:     return aStart_.LookupIndices(value, compareType);
-        case MappedField::A_END:       return aEnd_.LookupIndices(value, compareType);
-        case MappedField::N_M:         return nM_.LookupIndices(value, compareType);
-        case MappedField::N_MM:        return nM_.LookupIndices(value, compareType);
-        case MappedField::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
-
-        // MappedField::STRAND has its own specialization
-
-        default:
-            assert(false);
-    }
-    return IndexList();
-}
-
-template<>
-inline IndexList MappedLookupData::Indices(const MappedField& field,
-                                           const Strand& strand,
-                                           const CompareType& compareType) const
-{
-    assert(field == MappedField::STRAND);
-
-    if (compareType == CompareType::EQUAL) {
-        if (strand == Strand::FORWARD)
-            return forwardStrand_;
-        else
-            return reverseStrand_;
-    } else if (compareType == CompareType::NOT_EQUAL) {
-        if (strand == Strand::FORWARD)
-            return reverseStrand_;
-        else
-            return forwardStrand_;
-    }
-
-    // only EQUAL/NOT_EQUAL supported
-    assert(false);
-    return IndexList();
-}
-
-template<typename T>
-inline IndexList MappedLookupData::IndicesMulti(const MappedField& field,
-                                                const std::vector<T>& values) const
-{
-    IndexList result;
-    for (auto value : values) {
-        const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-        result.reserve(result.size() + valueIndices.size());
-        for (auto i : valueIndices)
-            result.push_back(i);
-    }
-    return result;
-}
-
-
-// ---------------------
-// ReferenceLookupData
-// ---------------------
-
-inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
-{
-    auto found = references_.find(tId);
-    if (found == references_.cend())
-        return IndexRange(nullIndex(), nullIndex());
-    return found->second;
-}
-
-// -------------------
-// BarcodeLookupData
-// -------------------
-
-template<typename T>
-inline IndexList BarcodeLookupData::Indices(const BarcodeField& field,
-                                            const T& value,
-                                            const CompareType& compareType) const
-{
-    switch(field) {
-        case BarcodeField::BC_LEFT:      return bcLeft_.LookupIndices(value, compareType);
-        case BarcodeField::BC_RIGHT:     return bcRight_.LookupIndices(value, compareType);
-        case BarcodeField::BC_QUALITY:   return bcQual_.LookupIndices(value, compareType);
-        case BarcodeField::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
-        default:
-            assert(false);
-    }
-    return IndexList();
-}
-
-template<typename T>
-inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeField& field,
-                                                 const std::vector<T>& values) const
-{
-    IndexList result;
-    for (auto value : values) {
-        const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-        result.reserve(result.size() + valueIndices.size());
-        for (auto i : valueIndices)
-            result.push_back(i);
-    }
-    return result;
-}
-
-
-// -----------------
-// PbiIndexPrivate
-// -----------------
-
-inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
-{ return (sections_ & flag) != 0; }
-
-inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
-{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::Indices(const SubreadField& field,
-                         const T& value,
-                         const CompareType& compareType) const
-{ return subreadData_.Indices(field, value, compareType); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::Indices(const MappedField& field,
-                         const T& value,
-                         const CompareType& compareType) const
-{ return mappedData_.Indices(field, value, compareType); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::Indices(const BarcodeField& field,
-                         const T& value,
-                         const CompareType& compareType) const
-{ return barcodeData_.Indices(field, value, compareType); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::IndicesMulti(const SubreadField& field,
-                              const T& value) const
-{ return subreadData_.IndicesMulti(field, value); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::IndicesMulti(const MappedField& field,
-                              const T& value) const
-{ return mappedData_.IndicesMulti(field, value); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::IndicesMulti(const BarcodeField& field,
-                              const T& value) const
-{ return barcodeData_.IndicesMulti(field, value); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::Lookup(const SubreadField& field,
-                        const T& value,
-                        const CompareType& compareType) const
-{ return MergeBlocksWithOffsets(subreadData_.Indices(field, value, compareType)); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::Lookup(const MappedField& field,
-                        const T& value,
-                        const CompareType& compareType) const
-{
-    if (!HasSection(PbiFile::MAPPED))
-        return IndexResultBlocks();
-    return MergeBlocksWithOffsets(mappedData_.Indices(field, value, compareType));
-}
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::Lookup(const BarcodeField& field,
-                        const T& value,
-                        const CompareType& compareType) const
-{
-    if (!HasSection(PbiFile::BARCODE))
-        return IndexResultBlocks();
-    return MergeBlocksWithOffsets(barcodeData_.Indices(field, value, compareType));
-}
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::LookupMulti(const SubreadField& field,
-                             const std::vector<T>& values) const
-{ return MergeBlocksWithOffsets(subreadData_.IndicesMulti(field, values)); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::LookupMulti(const MappedField& field,
-                             const std::vector<T>& values) const
-{ return MergeBlocksWithOffsets(mappedData_.IndicesMulti(field, values)); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::LookupMulti(const BarcodeField& field,
-                             const std::vector<T>& values) const
-{ return MergeBlocksWithOffsets(barcodeData_.IndicesMulti(field, values)); }
-
-inline IndexResultBlocks
-PbiIndexPrivate::LookupReference(const int32_t tId) const
-{
-    if (!HasSection(PbiFile::REFERENCE))
-        return IndexResultBlocks();
-    const IndexRange& indexRange = referenceData_.Indices(tId);
-    if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
-        return IndexResultBlocks();
-    const size_t numReads = indexRange.second - indexRange.first;
-    IndexResultBlocks blocks(1, IndexResultBlock(indexRange.first, numReads));
-    subreadData_.ApplyOffsets(blocks);
-    return blocks;
-}
-
-inline IndexResultBlocks
-PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
-{
-    IndexResultBlocks blocks = mergedIndexBlocks(indices);
-    subreadData_.ApplyOffsets(blocks);
-    return blocks;
-}
-
-} // namespace internal
-
-template<typename FieldType, typename ValueType>
-inline IndexRequestBase<FieldType, ValueType>::IndexRequestBase(const FieldType field,
-                                                                const ValueType& value,
-                                                                const CompareType compareType)
-    : field_(field)
-    , value_(value)
-    , compareType_(compareType)
-{ }
-
-template<typename FieldType, typename ValueType>
-inline IndexMultiRequestBase<FieldType, ValueType>::IndexMultiRequestBase(const FieldType field,
-                                                                          const std::vector<ValueType>& values)
-    : field_(field)
-    , values_(values)
-{ }
-
-template<SubreadField field, typename ValueType>
-inline SubreadIndexRequest<field, ValueType>::SubreadIndexRequest(const ValueType& value,
-                                                                  const CompareType& compareType)
-    : IndexRequestBase<SubreadField, ValueType>(field, value, compareType)
-{ }
-
-template<SubreadField field, typename ValueType>
-inline SubreadIndexMultiRequest<field, ValueType>::SubreadIndexMultiRequest(const std::vector<ValueType>& values)
-    : IndexMultiRequestBase<SubreadField, ValueType>(field, values)
-{ }
-
-template<MappedField field, typename ValueType>
-inline MappedIndexRequest<field, ValueType>::MappedIndexRequest(const ValueType& value,
-                                                                const CompareType& compareType)
-    : IndexRequestBase<MappedField, ValueType>(field, value, compareType)
-{ }
-
-template<MappedField field, typename ValueType>
-inline MappedIndexMultiRequest<field, ValueType>::MappedIndexMultiRequest(const std::vector<ValueType>& values)
-    : IndexMultiRequestBase<MappedField, ValueType>(field, values)
-{ }
-
-template<BarcodeField field, typename ValueType>
-inline BarcodeIndexRequest<field, ValueType>::BarcodeIndexRequest(const ValueType& value,
-                                                                  const CompareType& compareType)
-    : IndexRequestBase<BarcodeField, ValueType>(field, value, compareType)
-{ }
-
-template<BarcodeField field, typename ValueType>
-inline BarcodeIndexMultiRequest<field, ValueType>::BarcodeIndexMultiRequest(const std::vector<ValueType>& values)
-    : IndexMultiRequestBase<BarcodeField, ValueType>(field, values)
-{ }
-
-template <typename FieldType, typename ValueType>
-inline IndexList
-PbiIndex::RawIndices(const IndexRequestBase<FieldType, ValueType>& request) const
-{ return d_->Indices(request.field_, request.value_, request.compareType_); }
-
-template <typename FieldType, typename ValueType>
-inline IndexList
-PbiIndex::RawIndices(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-{ return d_->Indices(request.field_, request.values_); }
-
-template <typename FieldType, typename ValueType>
-inline IndexResultBlocks
-PbiIndex::Lookup(const IndexRequestBase<FieldType, ValueType>& request) const
-{ return d_->Lookup(request.field_, request.value_, request.compareType_); }
-
-template <typename FieldType, typename ValueType>
-inline IndexResultBlocks
-PbiIndex::Lookup(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-{ return d_->LookupMulti(request.field_, request.values_); }
-
-inline IndexResultBlocks PbiIndex::LookupReference(const int32_t tId) const
-{ return d_->LookupReference(tId); }
-
-} // namespace BAM
-} // namespace PacBio
-
diff --git a/include/pbbam/internal/PbiLookupData.inl b/include/pbbam/internal/PbiLookupData.inl
new file mode 100644
index 0000000..2ca38f3
--- /dev/null
+++ b/include/pbbam/internal/PbiLookupData.inl
@@ -0,0 +1,531 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiLookupData.inl
+/// \brief Inline implementations for the classes used for PBI data lookup.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiLookupData.h"
+#include "pbbam/PbiRawData.h"
+#include "pbbam/Strand.h"
+#include <algorithm>
+#include <unordered_set>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+
+// ----------------
+// helper methods
+// ----------------
+
+inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
+{
+    if (indices.empty())
+        return IndexResultBlocks{ };
+
+    std::sort(indices.begin(), indices.end());
+    auto newEndIter = std::unique(indices.begin(), indices.end());
+    auto numIndices = std::distance(indices.begin(), newEndIter);
+    assert(!indices.empty());
+    auto result = IndexResultBlocks{ IndexResultBlock(indices.at(0), 1) };
+    for (auto i = 1; i < numIndices; ++i) {
+        if (indices.at(i) == indices.at(i-1)+1)
+            ++result.back().numReads_;
+        else
+            result.push_back(IndexResultBlock(indices.at(i), 1));
+    }
+    return result;
+}
+
+inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
+{
+    auto copy = indices;
+    return mergedIndexBlocks(std::move(copy));
+}
+
+inline size_t nullIndex(void)
+{ return static_cast<size_t>(-1); }
+
+inline void pushBackIndices(IndexList& result,
+                            const IndexList& toAppend)
+{
+    result.reserve(result.size() + toAppend.size());
+    for (auto element : toAppend)
+        result.push_back(element);
+}
+
+// -----------------
+// OrderedLookup
+// -----------------
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(void) { }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(const container_type& data)
+    : data_(data)
+{ }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(container_type&& data)
+    : data_(std::move(data))
+{ }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
+{
+    const auto numElements = rawData.size();
+    for (auto i = decltype(numElements){0}; i < numElements; ++i)
+        data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
+{
+    const auto numElements = rawData.size();
+    for (auto i = decltype(numElements){0}; i < numElements; ++i)
+        data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
+{ return !(*this == other); }
+
+template<typename T>
+inline typename OrderedLookup<T>::iterator OrderedLookup<T>::begin(void)
+{ return data_.begin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::begin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::cbegin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::iterator OrderedLookup<T>::end(void)
+{ return data_.end(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::end(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::cend(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline bool OrderedLookup<T>::empty(void) const
+{ return data_.empty(); }
+
+template<typename T>
+inline size_t OrderedLookup<T>::size(void) const
+{ return data_.size(); }
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupInclusiveRange(const const_iterator &begin,
+                                       const const_iterator &end) const
+{
+    auto result = IndexList{ };
+    for (auto iter = begin; iter != end; ++iter)
+        pushBackIndices(result, iter->second);
+    std::sort(result.begin(), result.end());
+    return result;
+}
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupExclusiveRange(const const_iterator& begin,
+                                       const const_iterator& end,
+                                       const key_type& key) const
+{
+    auto result = IndexList{ };
+    for (auto iter = begin; iter != end; ++iter) {
+        if (iter->first != key)
+            pushBackIndices(result, iter->second);
+    }
+    std::sort(result.begin(), result.end());
+    return result;
+}
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupIndices(const OrderedLookup::key_type& key,
+                                const Compare::Type& compare) const
+{
+    auto begin = data_.cbegin();
+    auto end   = data_.cend();
+    switch(compare)
+    {
+        case Compare::EQUAL:
+        {
+            const auto found = data_.find(key);
+            if (found != end)
+                return found->second;
+            return IndexList();
+        }
+        case Compare::LESS_THAN:          return LookupExclusiveRange(begin, data_.upper_bound(key), key);
+        case Compare::LESS_THAN_EQUAL:    return LookupInclusiveRange(begin, data_.upper_bound(key));
+        case Compare::GREATER_THAN:       return LookupExclusiveRange(data_.lower_bound(key), end, key);
+        case Compare::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
+        case Compare::NOT_EQUAL:          return LookupExclusiveRange(begin, end, key);
+        default:
+            assert(false);
+    }
+    return IndexList{ };
+}
+
+template<typename T>
+inline std::vector<T> OrderedLookup<T>::Unpack(void) const
+{
+    auto result = std::vector<T>{ };
+    auto iter = cbegin();
+    const auto end = cend();
+    for ( ; iter != end; ++iter ) {
+        const auto& indices = iter->second;
+        for (auto&& i : indices) {
+            if (result.size() <= i)
+                result.resize(i+1);
+            result[i] = iter->first;
+        }
+    }
+    return result;
+}
+
+// -----------------
+// UnorderedLookup
+// -----------------
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(void) { }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(const container_type& data)
+    : data_(data)
+{ }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(container_type&& data)
+    : data_(std::move(data))
+{ }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T>& rawData)
+{
+    const auto numElements = rawData.size();
+    for (auto i = decltype(numElements){0}; i < numElements; ++i)
+        data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
+{
+    const auto numElements = rawData.size();
+    for (auto i = decltype(numElements){0}; i < numElements; ++i)
+        data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
+{ return !(*this == other); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::iterator UnorderedLookup<T>::begin(void)
+{ return data_.begin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::begin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::cbegin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::iterator UnorderedLookup<T>::end(void)
+{ return data_.end(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::end(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::cend(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline bool UnorderedLookup<T>::empty(void) const
+{ return data_.empty(); }
+
+template<typename T>
+inline size_t UnorderedLookup<T>::size(void) const
+{ return data_.size(); }
+
+template<typename T>
+template<typename Compare>
+inline IndexList
+UnorderedLookup<T>::LookupHelper(const UnorderedLookup::key_type& key,
+                                 const Compare& cmp) const
+{
+    auto result = IndexList{ }; // init with some avg size ??
+    const auto end = data_.cend();
+    for (auto iter = data_.cbegin(); iter != end; ++iter) {
+        const auto e = (iter->first);
+        if (cmp(e, key))
+            pushBackIndices(result, iter->second);
+    }
+    std::sort(result.begin(), result.end());
+    return result;
+}
+
+template<typename T>
+inline IndexList
+UnorderedLookup<T>::LookupIndices(const UnorderedLookup::key_type& key,
+                                  const Compare::Type& compare) const
+{
+    switch (compare) {
+        case Compare::EQUAL:
+        {
+            const auto found = data_.find(key);
+            if (found != data_.cend())
+                return found->second;
+            else
+                return IndexList();
+        }
+        case Compare::LESS_THAN:          return LookupHelper(key, std::less<key_type>());
+        case Compare::LESS_THAN_EQUAL:    return LookupHelper(key, std::less_equal<key_type>());
+        case Compare::GREATER_THAN:       return LookupHelper(key, std::greater<key_type>());
+        case Compare::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<key_type>());
+        case Compare::NOT_EQUAL:          return LookupHelper(key, std::not_equal_to<key_type>());
+        default:
+            assert(false);
+    }
+    return IndexList{ };
+}
+
+template<typename T>
+inline std::vector<T> UnorderedLookup<T>::Unpack(void) const
+{
+    auto result = std::vector<T>{ };
+    auto iter = cbegin();
+    const auto end = cend();
+    for ( ; iter != end; ++iter ) {
+        const auto& indices = iter->second;
+        for (auto&& i : indices) {
+            if (result.size() <= i)
+                result.resize(i+1);
+            result[i] = iter->first;
+        }
+    }
+    return result;
+}
+
+// -------------------
+// SubreadLookupData
+// -------------------
+
+inline
+void BasicLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
+{
+    for (IndexResultBlock& block : blocks)
+        block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
+}
+
+template<typename T>
+inline IndexList BasicLookupData::Indices(const BasicLookupData::Field& field,
+                                            const T& value,
+                                            const Compare::Type& compareType) const
+{
+    switch(field) {
+        case BasicLookupData::RG_ID:        return rgId_.LookupIndices(value, compareType);
+        case BasicLookupData::Q_START:      return qStart_.LookupIndices(value, compareType);
+        case BasicLookupData::Q_END:        return qEnd_.LookupIndices(value, compareType);
+        case BasicLookupData::ZMW:          return holeNumber_.LookupIndices(value, compareType);
+        case BasicLookupData::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
+        case BasicLookupData::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
+
+        case BasicLookupData::VIRTUAL_OFFSET : // fall-through, not supported this way
+        default:
+            assert(false);
+    }
+    return IndexList{ };
+}
+
+template<typename T>
+inline IndexList BasicLookupData::IndicesMulti(const BasicLookupData::Field& field,
+                                                 const std::vector<T>& values) const
+{
+    auto result = IndexList{ };
+    for (auto value : values) {
+        const auto valueIndices = Indices(field, value, Compare::EQUAL);
+        result.reserve(result.size() + valueIndices.size());
+        for (auto i : valueIndices)
+            result.push_back(i);
+    }
+    return result;
+}
+
+inline const std::vector<int64_t>& BasicLookupData::VirtualFileOffsets(void) const
+{ return fileOffset_; }
+
+// -------------------
+// MappedLookupData
+// -------------------
+
+template<typename T>
+inline IndexList MappedLookupData::Indices(const MappedLookupData::Field& field,
+                                           const T& value,
+                                           const Compare::Type& compareType) const
+{
+    switch(field) {
+        case MappedLookupData::T_ID:        return tId_.LookupIndices(value, compareType);
+        case MappedLookupData::T_START:     return tStart_.LookupIndices(value, compareType);
+        case MappedLookupData::T_END:       return tEnd_.LookupIndices(value, compareType);
+        case MappedLookupData::A_START:     return aStart_.LookupIndices(value, compareType);
+        case MappedLookupData::A_END:       return aEnd_.LookupIndices(value, compareType);
+        case MappedLookupData::N_M:         return nM_.LookupIndices(value, compareType);
+        case MappedLookupData::N_MM:        return nMM_.LookupIndices(value, compareType);
+        case MappedLookupData::N_DEL:       return nDel_.LookupIndices(value, compareType);
+        case MappedLookupData::N_INS:       return nIns_.LookupIndices(value, compareType);
+        case MappedLookupData::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
+
+        // MappedField::STRAND has its own specialization
+
+        default:
+            assert(false);
+    }
+    return IndexList{ };
+}
+
+template<>
+inline IndexList MappedLookupData::Indices(const MappedLookupData::Field& field,
+                                           const Strand& strand,
+                                           const Compare::Type& compareType) const
+{
+    assert(field == MappedLookupData::STRAND);
+    (void)field; // quash warnings building in release mode
+
+    if (compareType == Compare::EQUAL) {
+        if (strand == Strand::FORWARD)
+            return forwardStrand_;
+        else
+            return reverseStrand_;
+    } else if (compareType == Compare::NOT_EQUAL) {
+        if (strand == Strand::FORWARD)
+            return reverseStrand_;
+        else
+            return forwardStrand_;
+    }
+
+    // only EQUAL/NOT_EQUAL supported
+    assert(false);
+    return IndexList{ };
+}
+
+template<typename T>
+inline IndexList MappedLookupData::IndicesMulti(const MappedLookupData::Field& field,
+                                                const std::vector<T>& values) const
+{
+    auto result = IndexList{ };
+    for (auto value : values) {
+        auto valueIndices = Indices(field, value, Compare::EQUAL);
+        result.reserve(result.size() + valueIndices.size());
+        for (auto i : valueIndices)
+            result.push_back(i);
+    }
+    return result;
+}
+
+
+// ---------------------
+// ReferenceLookupData
+// ---------------------
+
+inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
+{
+    auto found = references_.find(tId);
+    if (found == references_.cend())
+        return IndexRange{ nullIndex(), nullIndex() };
+    return found->second;
+}
+
+// -------------------
+// BarcodeLookupData
+// -------------------
+
+template<typename T>
+inline IndexList BarcodeLookupData::Indices(const BarcodeLookupData::Field &field,
+                                            const T& value,
+                                            const Compare::Type &compareType) const
+{
+    switch(field) {
+        case BarcodeLookupData::BC_FORWARD:      return bcForward_.LookupIndices(value, compareType);
+        case BarcodeLookupData::BC_REVERSE:     return bcReverse_.LookupIndices(value, compareType);
+        case BarcodeLookupData::BC_QUALITY:   return bcQual_.LookupIndices(value, compareType);
+        default:
+            assert(false);
+    }
+    return IndexList{ };
+}
+
+template<typename T>
+inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeLookupData::Field &field,
+                                                 const std::vector<T>& values) const
+{
+    IndexList result;
+    for (auto value : values) {
+        const IndexList& valueIndices = Indices(field, value, Compare::EQUAL);
+        result.reserve(result.size() + valueIndices.size());
+        for (auto i : valueIndices)
+            result.push_back(i);
+    }
+    return result;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiRawData.inl b/include/pbbam/internal/PbiRawData.inl
new file mode 100644
index 0000000..af24376
--- /dev/null
+++ b/include/pbbam/internal/PbiRawData.inl
@@ -0,0 +1,113 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiRawData.inl
+/// \brief Inline implementations for the classes used for working with raw PBI
+///        data.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiRawData.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline const PbiRawBarcodeData& PbiRawData::BarcodeData(void) const
+{ return barcodeData_; }
+
+inline PbiRawBarcodeData& PbiRawData::BarcodeData(void)
+{ return barcodeData_; }
+
+inline const PbiRawBasicData& PbiRawData::BasicData(void) const
+{ return basicData_; }
+
+inline PbiRawBasicData& PbiRawData::BasicData(void)
+{ return basicData_; }
+
+inline std::string PbiRawData::Filename(void) const
+{ return filename_; }
+
+inline PbiFile::Sections PbiRawData::FileSections(void) const
+{ return sections_; }
+
+inline PbiRawData& PbiRawData::FileSections(PbiFile::Sections sections)
+{ sections_ = sections; return *this; }
+
+inline bool PbiRawData::HasBarcodeData(void) const
+{ return HasSection(PbiFile::BARCODE); }
+
+inline bool PbiRawData::HasMappedData(void) const
+{ return HasSection(PbiFile::MAPPED); }
+
+inline bool PbiRawData::HasReferenceData(void) const
+{ return HasSection(PbiFile::REFERENCE); }
+
+inline bool PbiRawData::HasSection(const PbiFile::Section section) const
+{ return (sections_ & section) != 0; }
+
+inline uint32_t PbiRawData::NumReads(void) const
+{ return numReads_; }
+
+inline PbiRawData& PbiRawData::NumReads(uint32_t num)
+{ numReads_ = num; return *this; }
+
+inline const PbiRawMappedData& PbiRawData::MappedData(void) const
+{ return mappedData_; }
+
+inline PbiRawMappedData& PbiRawData::MappedData(void)
+{ return mappedData_; }
+
+inline const PbiRawReferenceData& PbiRawData::ReferenceData(void) const
+{ return referenceData_; }
+
+inline PbiRawReferenceData& PbiRawData::ReferenceData(void)
+{ return referenceData_; }
+
+inline PbiFile::VersionEnum PbiRawData::Version(void) const
+{ return version_; }
+
+inline PbiRawData& PbiRawData::Version(PbiFile::VersionEnum version)
+{ version_ = version; return *this; }
+
+inline bool PbiReferenceEntry::operator==(const PbiReferenceEntry& other) const
+{
+    return tId_      == other.tId_ &&
+           beginRow_ == other.beginRow_ &&
+           endRow_   == other.endRow_;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/ProgramInfo.h b/include/pbbam/internal/ProgramInfo.inl
similarity index 60%
copy from include/pbbam/ProgramInfo.h
copy to include/pbbam/internal/ProgramInfo.inl
index d1bbcfe..2f0287f 100644
--- a/include/pbbam/ProgramInfo.h
+++ b/include/pbbam/internal/ProgramInfo.inl
@@ -32,106 +32,18 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ProgramInfo.inl
+/// \brief Inline implementations for the ProgramInfo class.
+//
 // Author: Derek Barnett
 
-#ifndef PROGRAMINFO_H
-#define PROGRAMINFO_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/ProgramInfo.h"
 
 namespace PacBio {
 namespace BAM {
 
-class PBBAM_EXPORT ProgramInfo
-{
-public:
-    /// \name Conversion & Validation
-    ///
-
-    static ProgramInfo FromSam(const std::string& sam);
-
-    static std::string ToSam(const ProgramInfo& prog);
-
-    /// \}
-
-public:
-    /// \name Constructors & Related Methods
-    /// \{
-
-    ProgramInfo(void);
-    ProgramInfo(const std::string& id);
-    ProgramInfo(const ProgramInfo& other);
-    ProgramInfo(ProgramInfo&& other);
-    ProgramInfo& operator=(const ProgramInfo& other);
-    ProgramInfo& operator=(ProgramInfo&& other);
-    ~ProgramInfo(void);
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    std::string CommandLine(void) const;
-
-    std::map<std::string, std::string> CustomTags(void) const;
-
-    std::string Description(void) const;
-
-    std::string Id(void) const;
-
-    std::string Name(void) const;
-
-    std::string PreviousProgramId(void) const;
-
-    std::string Version(void) const;
-
-    /// \}
-
-    /// \name Conversion & Validation
-    ///
-
-    bool IsValid(void) const;
-
-    std::string ToSam(void) const;
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    ProgramInfo& CommandLine(const std::string& cmd);
-
-    ProgramInfo& CustomTags(const std::map<std::string, std::string>& custom);
-
-    ProgramInfo& Description(const std::string& description);
-
-    ProgramInfo& Id(const std::string& id);
-
-    ProgramInfo& Name(const std::string& name);
-
-    ProgramInfo& PreviousProgramId(const std::string& id);
-
-    ProgramInfo& Version(const std::string& version);
-
-    /// \}
-
-private:
-    std::string commandLine_;            // CL:<CommandLine>
-    std::string description_;            // DS:<Description>
-    std::string id_;                     // ID:<ID>              * Unique ID required for valid SAM header*
-    std::string name_;                   // PN:<Name>
-    std::string previousProgramId_;      // PP:<PreviousProgramID>
-    std::string version_;                // VN:<Version>
-
-    // custom attributes
-    std::map<std::string, std::string> custom_; // tag => value
-};
-
 inline std::string ProgramInfo::CommandLine(void) const
 { return commandLine_; }
 
@@ -141,7 +53,8 @@ inline ProgramInfo& ProgramInfo::CommandLine(const std::string& cmd)
 inline std::map<std::string, std::string> ProgramInfo::CustomTags(void) const
 { return custom_; }
 
-inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string, std::string>& custom)
+inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string,
+                                            std::string>& custom)
 { custom_ = custom; return *this; }
 
 inline std::string ProgramInfo::Description(void) const
@@ -182,5 +95,3 @@ inline ProgramInfo& ProgramInfo::Version(const std::string& version)
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // PROGRAMINFO_H
diff --git a/include/pbbam/internal/MergeItem.h b/include/pbbam/internal/QualityValue.inl
similarity index 72%
rename from include/pbbam/internal/MergeItem.h
rename to include/pbbam/internal/QualityValue.inl
index f3d2347..07db35b 100644
--- a/include/pbbam/internal/MergeItem.h
+++ b/include/pbbam/internal/QualityValue.inl
@@ -32,46 +32,40 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValue.inl
+/// \brief Inline implementations for the QualityValue class.
+//
 // Author: Derek Barnett
 
-#ifndef MERGEITEM_H
-#define MERGEITEM_H
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/internal/IBamFileIterator.h"
-#include <vector>
+#include "pbbam/QualityValue.h"
 
 namespace PacBio {
 namespace BAM {
-namespace internal {
 
-template<typename T>
-struct MergeItemBase
+inline QualityValue::QualityValue(const uint8_t value)
+    : value_(value)
 {
-public:
-    typedef typename IBamFileIteratorBase<T>::Ptr FileIterPtr;
+    // clamp QV
+    if (value_ > QualityValue::MAX)
+        value_ = QualityValue::MAX;
+}
+
+inline QualityValue::QualityValue(const QualityValue& other)
+    : value_(other.value_)
+{ }
 
-public:
-    MergeItemBase(void) { }
-    MergeItemBase(const T& r, const FileIterPtr& iter)
-        : record_(r), iter_(iter)
-    { }
+inline QualityValue::~QualityValue(void) { }
 
-public:
-    bool IsNull(void) const
-    { return !iter_; }
+inline char QualityValue::Fastq(void) const
+{ return static_cast<char>(value_ + 33); }
 
-public:
-    T record_;
-    FileIterPtr iter_;
-};
+inline QualityValue::operator uint8_t(void) const
+{ return value_; }
 
-typedef MergeItemBase<BamRecord>               MergeItem;
-typedef MergeItemBase<std::vector<BamRecord> > GroupMergeItem;
+inline QualityValue QualityValue::FromFastq(const char c)
+{ return QualityValue(static_cast<uint8_t>(c-33)); }
 
-} // namespace internal
 } // namespace BAM
 } // namespace PacBio
-
-#endif // MERGEITEM_H
diff --git a/include/pbbam/QualityValues.h b/include/pbbam/internal/QualityValues.inl
similarity index 65%
copy from include/pbbam/QualityValues.h
copy to include/pbbam/internal/QualityValues.inl
index 240b96f..0eabf49 100644
--- a/include/pbbam/QualityValues.h
+++ b/include/pbbam/internal/QualityValues.inl
@@ -32,96 +32,19 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValues.inl
+/// \brief Inline implementations for the QualityValues class.
+//
 // Author: Derek Barnett
 
-#ifndef QUALITYVALUES_H
-#define QUALITYVALUES_H
-
-#include "pbbam/QualityValue.h"
+#include "pbbam/QualityValues.h"
 #include <algorithm>
-#include <string>
-#include <vector>
 
 namespace PacBio {
 namespace BAM {
 
-/// \brief The QualityValues class represents a sequence of FASTQ-compatible
-/// quality values. See QualityValue documentation for details.
-///
-class PBBAM_EXPORT QualityValues : public std::vector<QualityValue>
-{
-public:
-    /// Creates a QualityValues collection from a FASTQ-encoded string.
-    static QualityValues FromFastq(const std::string& fastq);
-
-public:
-    /// \name Constructors & Related Methods
-    ///  \{
-
-    QualityValues(void);
-    explicit QualityValues(const std::string& fastqString);
-    explicit QualityValues(const std::vector<QualityValue>& quals);
-    explicit QualityValues(const std::vector<uint8_t>& quals);
-
-    QualityValues(const std::vector<uint8_t>::const_iterator first,
-                  const std::vector<uint8_t>::const_iterator last);
-    QualityValues(const QualityValues::const_iterator first,
-                  const QualityValues::const_iterator last);
-
-    QualityValues(const QualityValues& other);
-    QualityValues(QualityValues&& other);
-
-    QualityValues(std::vector<QualityValue>&& quals);
-
-    QualityValues& operator=(const QualityValues& other);
-    QualityValues& operator=(QualityValues&& other);
-
-    QualityValues& operator=(const std::vector<QualityValue>& quals);
-    QualityValues& operator=(std::vector<QualityValue>&& quals);
-
-    ~QualityValues(void);
-
-    /// \}
-
-public:
-    /// \name Comparison Operators
-    /// \{
-
-    bool operator==(const std::string& other) const;
-    bool operator!=(const std::string& other) const;
-
-    /// \}
-
-public:
-    /// \name Iterators
-    /// \{
-
-    /// \returns A const_iterator to the beginning of the sequence.
-    std::vector<QualityValue>::const_iterator cbegin(void) const;
-
-    /// \returns A const_iterator to the element past the end of the sequence.
-    std::vector<QualityValue>::const_iterator cend(void) const;
-
-    /// \returns A const_iterator to the beginning of the sequence.
-    std::vector<QualityValue>::const_iterator begin(void) const;
-
-    /// \returns A const_iterator to the element past the end of the sequence.
-    std::vector<QualityValue>::const_iterator end(void) const;
-
-    /// \returns An iterator to the beginning of the sequence.
-    std::vector<QualityValue>::iterator begin(void);
-
-    /// \returns An iterator to the element past the end of the sequence.
-    std::vector<QualityValue>::iterator end(void);
-
-    /// \}
-
-public:
-    /// \returns the FASTQ-encoded string for this collection
-    std::string Fastq(void) const;
-};
-
 inline QualityValues::QualityValues(void)
     : std::vector<QualityValue>()
 { }
@@ -202,13 +125,7 @@ inline std::vector<QualityValue>::iterator QualityValues::end(void)
 { return std::vector<QualityValue>::end(); }
 
 inline QualityValues QualityValues::FromFastq(const std::string& fastq)
-{
-    return QualityValues(fastq);
-//    QualityValues result;
-//    result.resize(fastq.size());
-//    std::transform(fastq.cbegin(), fastq.cend(), result.begin(), QualityValue::FromFastq);
-//    return result;
-}
+{ return QualityValues(fastq); }
 
 inline std::string QualityValues::Fastq(void) const
 {
@@ -229,5 +146,3 @@ inline bool QualityValues::operator!=(const std::string& fastq) const
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // QUALITYVALUES_H
diff --git a/include/pbbam/internal/QueryBase.h b/include/pbbam/internal/QueryBase.h
index 7c16f87..e012f86 100644
--- a/include/pbbam/internal/QueryBase.h
+++ b/include/pbbam/internal/QueryBase.h
@@ -35,15 +35,12 @@
 
 // Author: Derek Barnett
 
-#ifndef QUERYBASE2_H
-#define QUERYBASE2_H
+#ifndef QUERYBASE_H
+#define QUERYBASE_H
 
 #include "pbbam/BamFile.h"
 #include "pbbam/BamRecord.h"
 #include "pbbam/DataSet.h"
-#include "pbbam/internal/FilterEngine.h"
-#include "pbbam/internal/IBamFileIterator.h"
-#include "pbbam/internal/IMergeStrategy.h"
 #include <memory>
 #include <vector>
 #include <cassert>
@@ -58,23 +55,18 @@ class QueryBase;
 template<typename T>
 class QueryIteratorBase
 {
+public:
+    virtual ~QueryIteratorBase(void);
+
+    bool operator==(const QueryIteratorBase<T>& other) const;
+    bool operator!=(const QueryIteratorBase<T>& other) const;
+
 protected:
     QueryIteratorBase(void);
     QueryIteratorBase(QueryBase<T>& query);
 
-public:
-    virtual ~QueryIteratorBase(void) { }
-
-protected:
     void ReadNext(void);
 
-public:
-    bool operator==(const QueryIteratorBase<T>& other) const
-    { return query_ == other.query_; }
-
-    bool operator!=(const QueryIteratorBase<T>& other) const
-    { return !(*this == other); }
-
 protected:
     QueryBase<T>* query_;
     T record_;
@@ -84,46 +76,28 @@ template<typename T>
 class QueryIterator : public QueryIteratorBase<T>
 {
 public:
-    QueryIterator(void) : QueryIteratorBase<T>() { }
-    QueryIterator(QueryBase<T>& query)
-        : QueryIteratorBase<T>(query)
-    { }
-
-    T& operator*(void) { return QueryIteratorBase<T>::record_; }
-    T* operator->(void) { return &(operator*()); }
-
-    QueryIterator<T>& operator++(void)
-    { QueryIteratorBase<T>::ReadNext(); return *this; }
-
-    QueryIterator<T> operator++(int)
-    {
-        QueryIterator<T> result(*this);
-        ++(*this);
-        return result;
-    }
+    QueryIterator(void);
+    QueryIterator(QueryBase<T>& query);
+
+    T& operator*(void);
+    T* operator->(void);
+
+    QueryIterator<T>& operator++(void);
+    QueryIterator<T> operator++(int);
 };
 
 template<typename T>
 class QueryConstIterator : public QueryIteratorBase<T>
 {
 public:
-    QueryConstIterator(void) : QueryIteratorBase<T>() { }
-    QueryConstIterator(const QueryBase<T>& query)
-        : QueryIteratorBase<T>(const_cast<QueryBase<T>&>(query))
-    { }
-
-    const T& operator*(void) const { return QueryIteratorBase<T>::record_; }
-    const T* operator->(void) const { return &(operator*()); }
-
-    QueryConstIterator<T>& operator++(void)
-    { QueryIteratorBase<T>::ReadNext(); return *this; }
-
-    QueryConstIterator<T> operator++(int)
-    {
-        QueryConstIterator<T> result(*this);
-        ++(*this);
-        return result;
-    }
+    QueryConstIterator(void);
+    QueryConstIterator(const QueryBase<T>& query);
+
+    const T& operator*(void) const;
+    const T* operator->(void) const;
+
+    QueryConstIterator<T>& operator++(void);
+    QueryConstIterator<T> operator++(int);
 };
 
 template<typename T>
@@ -133,86 +107,32 @@ public:
     typedef QueryIterator<T>      iterator;
     typedef QueryConstIterator<T> const_iterator;
 
-    typedef typename IBamFileIteratorBase<T>::Ptr FileIterPtr;
-
-protected:
-    QueryBase(const DataSet& dataset);
 public:
-    virtual ~QueryBase(void) { }
-
-    QueryConstIterator<T> begin(void) const  { return QueryConstIterator<T>(*this); }
-    QueryConstIterator<T> cbegin(void) const { return QueryConstIterator<T>(*this); }
-    QueryIterator<T> begin(void) { return QueryIterator<T>(*this); }
-
-    QueryConstIterator<T> end(void) const { return QueryConstIterator<T>(); }
-    QueryConstIterator<T> cend(void) const { return QueryConstIterator<T>(); }
-    QueryIterator<T> end(void) { return QueryIterator<T>(); }
+    virtual ~QueryBase(void);
 
 public:
-    bool GetNext(T& r);
+    QueryConstIterator<T> begin(void) const;
+    QueryConstIterator<T> cbegin(void) const;
+    QueryIterator<T> begin(void);
 
-    std::vector<BamFile> GetBamFiles(void) const
-    { return dataset_.ExternalResources().BamFiles(); }
+    QueryConstIterator<T> end(void) const;
+    QueryConstIterator<T> cend(void) const;
+    QueryIterator<T> end(void);
 
 public:
-    std::vector<FileIterPtr> CreateIterators(void)
-    {
-        const std::vector<BamFile>& bamFiles = dataset_.ExternalResources().BamFiles();
-        std::vector<FileIterPtr> result;
-        result.reserve(bamFiles.size());
-        for (const BamFile& bamFile : bamFiles)
-            result.push_back(CreateIterator(bamFile));
-        return result;
-    }
-
-protected:
-    virtual FileIterPtr CreateIterator(const BamFile& bamFile) = 0;
+    virtual bool GetNext(T& r) =0;
 
 protected:
-    const DataSet dataset_;
-    std::unique_ptr<IMergeStrategyBase<T> > mergeStrategy_;
-    FilterEngine filterEngine_;
+    QueryBase(void);
 };
 
 typedef QueryBase<BamRecord>               IQuery;
 typedef QueryBase<std::vector<BamRecord> > IGroupQuery;
 
-template<typename T>
-inline QueryIteratorBase<T>::QueryIteratorBase(void)
-    : query_(nullptr)
-{ }
-
-template<typename T>
-inline QueryIteratorBase<T>::QueryIteratorBase(QueryBase<T> &query)
-    : query_(&query)
-{ ReadNext(); }
-
-template<typename T>
-inline QueryBase<T>::QueryBase(const DataSet& dataset)
-    : dataset_(dataset)
-    , mergeStrategy_(nullptr)
-{ }
-
-template<typename T>
-inline bool QueryBase<T>::GetNext(T& r)
-{
-    while (mergeStrategy_->GetNext(r)) {
-        if (filterEngine_.Accepts(r))
-            return true;
-    }
-    return false;
-}
-
-template<typename T>
-inline void QueryIteratorBase<T>::ReadNext(void)
-{
-    assert(query_);
-    if (!query_->GetNext(record_))
-        query_ = nullptr;
-}
-
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
 
-#endif // QUERYBASE2_H
+#include "pbbam/internal/QueryBase.inl"
+
+#endif // QUERYBASE_H
diff --git a/include/pbbam/internal/QueryBase.inl b/include/pbbam/internal/QueryBase.inl
new file mode 100644
index 0000000..7f2376f
--- /dev/null
+++ b/include/pbbam/internal/QueryBase.inl
@@ -0,0 +1,177 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/QueryBase.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// -------------------
+// QueryIteratorBase
+// -------------------
+
+template<typename T>
+inline QueryIteratorBase<T>::QueryIteratorBase(void)
+    : query_(nullptr)
+{ }
+
+template<typename T>
+inline QueryIteratorBase<T>::QueryIteratorBase(QueryBase<T>& query)
+    : query_(&query)
+{ ReadNext(); }
+
+template<typename T> inline
+QueryIteratorBase<T>::~QueryIteratorBase(void) { }
+
+template<typename T> inline
+bool QueryIteratorBase<T>::operator==(const QueryIteratorBase<T>& other) const
+{ return query_ == other.query_; }
+
+template<typename T> inline
+bool QueryIteratorBase<T>::operator!=(const QueryIteratorBase<T>& other) const
+{ return !(*this == other); }
+
+// -------------------
+// QueryIterator
+// -------------------
+
+template<typename T> inline
+QueryIterator<T>::QueryIterator(void) : QueryIteratorBase<T>() { }
+
+template<typename T> inline
+QueryIterator<T>::QueryIterator(QueryBase<T>& query)
+    : QueryIteratorBase<T>(query)
+{ }
+
+template<typename T> inline
+T& QueryIterator<T>::operator*(void)
+{ return QueryIteratorBase<T>::record_; }
+
+template<typename T> inline
+T* QueryIterator<T>::operator->(void)
+{ return &(operator*()); }
+
+template<typename T> inline
+QueryIterator<T>& QueryIterator<T>::operator++(void)
+{ QueryIteratorBase<T>::ReadNext(); return *this; }
+
+template<typename T> inline
+QueryIterator<T> QueryIterator<T>::operator++(int)
+{
+    QueryIterator<T> result(*this);
+    ++(*this);
+    return result;
+}
+
+// --------------------
+// QueryConstIterator
+// --------------------
+
+template<typename T> inline
+QueryConstIterator<T>::QueryConstIterator(void) : QueryIteratorBase<T>() { }
+
+template<typename T> inline
+QueryConstIterator<T>::QueryConstIterator(const QueryBase<T>& query)
+    : QueryIteratorBase<T>(const_cast<QueryBase<T>&>(query))
+{ }
+
+template<typename T> inline
+const T& QueryConstIterator<T>::operator*(void) const
+{ return QueryIteratorBase<T>::record_; }
+
+template<typename T> inline
+const T* QueryConstIterator<T>::operator->(void) const
+{ return &(operator*()); }
+
+template<typename T> inline
+QueryConstIterator<T>& QueryConstIterator<T>::operator++(void)
+{ QueryIteratorBase<T>::ReadNext(); return *this; }
+
+template<typename T> inline
+QueryConstIterator<T> QueryConstIterator<T>::operator++(int)
+{
+    QueryConstIterator<T> result(*this);
+    ++(*this);
+    return result;
+}
+
+// -----------
+// QueryBase
+// -----------
+
+template<typename T> inline
+QueryBase<T>::QueryBase(void) { }
+
+template<typename T> inline
+QueryBase<T>::~QueryBase(void) { }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::begin(void) const
+{ return QueryConstIterator<T>(*this); }
+
+template<typename T> inline
+QueryIterator<T> QueryBase<T>::begin(void)
+{ return QueryIterator<T>(*this); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::cbegin(void) const
+{ return QueryConstIterator<T>(*this); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::cend(void) const
+{ return QueryConstIterator<T>(); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::end(void) const
+{ return QueryConstIterator<T>(); }
+
+template<typename T> inline
+QueryIterator<T> QueryBase<T>::end(void)
+{ return QueryIterator<T>(); }
+
+template<typename T>
+inline void QueryIteratorBase<T>::ReadNext(void)
+{
+    assert(query_);
+    if (!query_->GetNext(record_))
+        query_ = nullptr;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/ReadGroupInfo.h b/include/pbbam/internal/ReadGroupInfo.inl
similarity index 55%
copy from include/pbbam/ReadGroupInfo.h
copy to include/pbbam/internal/ReadGroupInfo.inl
index 86372ee..c4b208b 100644
--- a/include/pbbam/ReadGroupInfo.h
+++ b/include/pbbam/internal/ReadGroupInfo.inl
@@ -32,225 +32,69 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadGroupInfo.inl
+/// \brief Inline implementations for the ReadGroupInfo class.
+//
 // Author: Derek Barnett
 
-#ifndef READGROUPINFO_H
-#define READGROUPINFO_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/ReadGroupInfo.h"
 
 namespace PacBio {
 namespace BAM {
 
-enum class BaseFeature
-{
-    DELETION_QV
-  , DELETION_TAG
-  , INSERTION_QV
-  , MERGE_QV
-  , SUBSTITUTION_QV
-  , SUBSTITUTION_TAG
-  , IPD
-  , PULSE_WIDTH
-  , PKMID
-  , PKMEAN
-  , LABEL
-  , LABEL_QV
-  , ALT_LABEL
-  , ALT_LABEL_QV
-  , PULSE_MERGE_QV
-  , PULSE_CALL
-  , PRE_PULSE_FRAMES
-  , PULSE_CALL_WIDTH
-};
-
-enum class FrameCodec
+inline size_t ReadGroupInfo::BarcodeCount(void) const
 {
-    RAW
-  , V1
-};
+    if (!hasBarcodeData_)
+        throw std::runtime_error("barcode count requested but barcode data is missing");
+    return barcodeCount_;
+}
 
-class PBBAM_EXPORT ReadGroupInfo
+inline ReadGroupInfo& ReadGroupInfo::BarcodeData(const std::string& barcodeFile,
+                                                 const std::string& barcodeHash,
+                                                 size_t barcodeCount,
+                                                 BarcodeModeType barcodeMode,
+                                                 BarcodeQualityType barcodeQuality)
 {
-public:
-    /// \name Conversion & Validation
-    ///
-
-    static ReadGroupInfo FromSam(const std::string& sam);
-
-    static std::string ToSam(const ReadGroupInfo& rg);
-
-    /// \}
-
-public:
-    /// \name Constructors & Related Methods
-    /// \{
-
-    ReadGroupInfo(void);
-    ReadGroupInfo(const std::string& id);
-    ReadGroupInfo(const std::string& movieName, const std::string& readType);
-    ReadGroupInfo(const ReadGroupInfo& other);
-    ReadGroupInfo(ReadGroupInfo&& other);
-    ReadGroupInfo& operator=(const ReadGroupInfo& other);
-    ReadGroupInfo& operator=(ReadGroupInfo&& other);
-    ~ReadGroupInfo(void);
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    const std::string& BasecallerVersion(void) const;
-
-    bool HasBaseFeature(const BaseFeature& feature) const;
-
-    std::string BaseFeatureTag(const BaseFeature& feature) const;
-
-    std::string BindingKit(void) const;
-
-    bool Control(void) const;
-
-    std::map<std::string, std::string> CustomTags(void) const;
-
-    std::string Date(void) const;
-
-    std::string FlowOrder(void) const;
-
-    std::string FrameRateHz(void) const;
-
-    std::string Id(void) const;
-
-    FrameCodec IpdCodec(void) const;
-
-    std::string KeySequence(void) const;
-
-    std::string Library(void) const;
-
-    std::string MovieName(void) const;
-
-    std::string Platform(void) const;
-
-    std::string PredictedInsertSize(void) const;
-
-    std::string Programs(void) const;
-
-    FrameCodec PulseWidthCodec(void) const;
-
-    std::string ReadType(void) const;
-
-    std::string Sample(void) const;
-
-    std::string SequencingCenter(void) const;
-
-    std::string SequencingKit(void) const;
-
-    /// \}
-
-    /// \name Conversion & Validation
-    /// \{
-
-    bool IsValid(void) const;
-
-    std::string ToSam(void) const;
-
-    /// \}
-
-    /// \name Comparison
-    /// \{
-
-    bool operator==(const ReadGroupInfo& other) const;
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    ReadGroupInfo& BasecallerVersion(const std::string& versionNumber);
-
-    ReadGroupInfo& BaseFeatureTag(const BaseFeature& feature,
-                                  const std::string& tag);
-
-    ReadGroupInfo& BindingKit(const std::string& kitNumber);
-
-    ReadGroupInfo& Control(const bool ctrl);
-
-    ReadGroupInfo& CustomTags(const std::map<std::string, std::string>& custom);
-
-    ReadGroupInfo& Date(const std::string& date);
-
-    ReadGroupInfo& FlowOrder(const std::string& order);
-
-    ReadGroupInfo& FrameRateHz(const std::string& frameRateHz);
-
-    ReadGroupInfo& Id(const std::string& id);
-
-    ReadGroupInfo& Id(const std::string& movieName, const std::string& readType);
-
-    ReadGroupInfo& IpdCodec(const FrameCodec& codec, const std::string& tag = std::string());
-
-    ReadGroupInfo& KeySequence(const std::string& sequence);
-
-    ReadGroupInfo& Library(const std::string& library);
-
-    ReadGroupInfo& MovieName(const std::string& id);
-
-    ReadGroupInfo& PredictedInsertSize(const std::string& size);
-
-    ReadGroupInfo& Programs(const std::string& programs);
-
-    ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec, const std::string& tag = std::string());
-
-    ReadGroupInfo& ReadType(const std::string& type);
-
-    ReadGroupInfo& Sample(const std::string& sample);
-
-    ReadGroupInfo& SequencingCenter(const std::string& center);
-
-    ReadGroupInfo& SequencingKit(const std::string& kitNumber);
-
-    /// \}
-
-private:
-    std::string id_;                     // ID * Unique ID required for valid SAM/BAM header *
-    std::string sequencingCenter_;       // CN
-    std::string date_;                   // DT * (ISO 8601) *
-    std::string flowOrder_;              // FO
-    std::string keySequence_;            // KS
-    std::string library_;                // LB
-    std::string programs_;               // PG
-    std::string predictedInsertSize_;    // PI
-    std::string movieName_;              // PU * more explicit, in place of "platform unit" *
-    std::string sample_;                 // SM
+    barcodeFile_ = barcodeFile;
+    barcodeHash_ = barcodeHash;
+    barcodeCount_ = barcodeCount;
+    barcodeMode_ = barcodeMode;
+    barcodeQuality_ = barcodeQuality;
+    hasBarcodeData_ = true;
+    return *this;
+}
 
-    // DS:<Description> components
-    std::string readType_;
-    std::string bindingKit_;
-    std::string sequencingKit_;
-    std::string basecallerVersion_;
-    std::string frameRateHz_;
-    bool        control_ = false;
-    FrameCodec  ipdCodec_;
-    FrameCodec  pulseWidthCodec_;
-    std::map<BaseFeature, std::string> features_;
+inline std::string ReadGroupInfo::BarcodeFile(void) const
+{
+    if (!hasBarcodeData_)
+        throw std::runtime_error("barcode file requested but barcode data is missing");
+    return barcodeFile_;
+}
 
-    // custom attributes
-    std::map<std::string, std::string> custom_; // tag => value
+inline std::string ReadGroupInfo::BarcodeHash(void) const
+{
+    if (!hasBarcodeData_)
+        throw std::runtime_error("barcode hash requested but barcode data is missing");
+    return barcodeHash_;
+}
 
-private:
-    std::string EncodeSamDescription(void) const;
-    void DecodeSamDescription(const std::string& description);
-};
+inline BarcodeModeType ReadGroupInfo::BarcodeMode(void) const
+{
+    if (!hasBarcodeData_)
+        throw std::runtime_error("barcode mode requested but barcode data is missing");
+    return barcodeMode_;
+}
 
-PBBAM_EXPORT
-std::string MakeReadGroupId(const std::string& movieName,
-                            const std::string& readType);
+inline BarcodeQualityType ReadGroupInfo::BarcodeQuality(void) const
+{
+    if (!hasBarcodeData_)
+        throw std::runtime_error("barcode quality requested but barcode data is missing");
+    return barcodeQuality_;
+}
 
-inline const std::string& ReadGroupInfo::BasecallerVersion(void) const
+inline std::string ReadGroupInfo::BasecallerVersion(void) const
 { return basecallerVersion_; }
 
 inline ReadGroupInfo& ReadGroupInfo::BasecallerVersion(const std::string& versionNumber)
@@ -274,6 +118,14 @@ inline std::string ReadGroupInfo::BindingKit(void) const
 inline ReadGroupInfo& ReadGroupInfo::BindingKit(const std::string& kitNumber)
 { bindingKit_ = kitNumber; return *this; }
 
+inline ReadGroupInfo& ReadGroupInfo::ClearBarcodeData(void)
+{
+    barcodeFile_.clear();
+    barcodeHash_.clear();
+    hasBarcodeData_ = false;
+    return *this;
+}
+
 inline bool ReadGroupInfo::Control(void) const
 { return control_; }
 
@@ -304,6 +156,9 @@ inline std::string ReadGroupInfo::FrameRateHz(void) const
 inline ReadGroupInfo& ReadGroupInfo::FrameRateHz(const std::string& frameRateHz)
 { frameRateHz_ = frameRateHz; return *this; }
 
+inline bool ReadGroupInfo::HasBarcodeData(void) const
+{ return hasBarcodeData_; }
+
 inline bool ReadGroupInfo::HasBaseFeature(const BaseFeature& feature) const
 { return features_.find(feature) != features_.end(); }
 
@@ -317,6 +172,12 @@ inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& movieName,
                                         const std::string& readType)
 { id_ = MakeReadGroupId(movieName, readType); return *this; }
 
+inline int32_t ReadGroupInfo::IdToInt(const std::string& rgId)
+{
+    const uint32_t rawid = std::stoul(rgId, nullptr, 16);
+    return static_cast<int32_t>(rawid);
+}
+
 inline FrameCodec ReadGroupInfo::IpdCodec(void) const
 { return ipdCodec_; }
 
@@ -344,6 +205,12 @@ inline ReadGroupInfo& ReadGroupInfo::MovieName(const std::string& movieName)
 inline std::string ReadGroupInfo::Platform(void) const
 { return std::string("PACBIO"); }
 
+inline PlatformModelType ReadGroupInfo::PlatformModel(void) const
+{ return platformModel_; }
+
+inline ReadGroupInfo& ReadGroupInfo::PlatformModel(const PlatformModelType& platform)
+{ platformModel_ = platform; return *this; }
+
 inline std::string ReadGroupInfo::PredictedInsertSize(void) const
 { return predictedInsertSize_; }
 
@@ -377,6 +244,13 @@ inline std::string ReadGroupInfo::SequencingCenter(void) const
 inline ReadGroupInfo& ReadGroupInfo::SequencingCenter(const std::string& center)
 { sequencingCenter_ = center; return *this; }
 
+inline std::string ReadGroupInfo::SequencingChemistry(void) const
+{
+    return SequencingChemistryFromTriple(BindingKit(),
+                                         SequencingKit(),
+                                         BasecallerVersion());
+}
+
 inline std::string ReadGroupInfo::SequencingKit(void) const
 { return sequencingKit_; }
 
@@ -388,5 +262,3 @@ inline std::string ReadGroupInfo::ToSam(const ReadGroupInfo& rg)
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // READGROUPINFO_H
diff --git a/include/pbbam/SequenceInfo.h b/include/pbbam/internal/SequenceInfo.inl
similarity index 60%
copy from include/pbbam/SequenceInfo.h
copy to include/pbbam/internal/SequenceInfo.inl
index 0cf9d04..93b653d 100644
--- a/include/pbbam/SequenceInfo.h
+++ b/include/pbbam/internal/SequenceInfo.inl
@@ -32,104 +32,31 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SequenceInfo.inl
+/// \brief Inline implementations for the SequenceInfo class.
+//
 // Author: Derek Barnett
 
-#ifndef SEQUENCEINFO_H
-#define SEQUENCEINFO_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/SequenceInfo.h"
 
 namespace PacBio {
 namespace BAM {
 
-class PBBAM_EXPORT SequenceInfo
+inline bool SequenceInfo::operator==(const SequenceInfo& other) const
 {
-public:
-    /// \name Conversion & Validation
-    ///
-
-    static SequenceInfo FromSam(const std::string& sam);
-
-    static std::string ToSam(const SequenceInfo& seq);
-
-    /// \}
-
-public:
-    /// \name Constructors & Related Methods
-    /// \{
-
-    SequenceInfo(void);
-    SequenceInfo(const std::string& name, const std::string& length = "0");
-    SequenceInfo(const SequenceInfo& other);
-    SequenceInfo(SequenceInfo&& other);
-    SequenceInfo& operator=(const SequenceInfo& other);
-    SequenceInfo& operator=(SequenceInfo&& other);
-    ~SequenceInfo(void);
-
-    /// \}
-
-public:
-    /// \name Attributes
-    /// \{
-
-    std::string AssemblyId(void) const;
-
-    std::string Checksum(void) const;
-
-    std::map<std::string, std::string> CustomTags(void) const;
-
-    std::string Length(void) const;
-
-    std::string Name(void) const;
-
-    std::string Species(void) const;
-
-    std::string Uri(void) const;
-
-    /// \}
-
-    /// \name Conversion & Validation
-    ///
-
-    bool IsValid(void) const;
-
-    std::string ToSam(void) const;
-
-    /// \}
-
-public:
-    /// \name Attributes
-
-    SequenceInfo& AssemblyId(const std::string& id);
-
-    SequenceInfo& Checksum(const std::string& checksum);
-
-    SequenceInfo& CustomTags(const std::map<std::string, std::string>& custom);
-
-    SequenceInfo& Length(const std::string& length);
-
-    SequenceInfo& Name(const std::string& name);
-
-    SequenceInfo& Species(const std::string& species);
-
-    SequenceInfo& Uri(const std::string& uri);
-
-    /// \}
-
-private:
-    std::string name_;                   // SN:<Name>            * Unique Name required for valid SAM header*
-    std::string length_;                 // LN:<Length>          * [0 - 2^31-1]
-    std::string assemblyId_;             // AS:<AssemblyId>
-    std::string checksum_;               // M5:<Checksum>
-    std::string species_;                // SP:<Species>
-    std::string uri_;                    // UR:<URI>
-
-    // custom attributes
-    std::map<std::string, std::string> custom_; // tag => value
-};
+    return assemblyId_ == other.assemblyId_ &&
+           checksum_   == other.checksum_   &&
+           length_     == other.length_     &&
+           name_       == other.name_       &&
+           species_    == other.species_    &&
+           uri_        == other.uri_        &&
+           custom_     == other.custom_;
+}
+
+inline bool SequenceInfo::operator!=(const SequenceInfo& other) const
+{ return !(*this == other); }
 
 inline std::string SequenceInfo::AssemblyId(void) const
 { return assemblyId_; }
@@ -178,5 +105,3 @@ inline SequenceInfo& SequenceInfo::Uri(const std::string& uri)
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // SEQUENCEINFO_H
diff --git a/include/pbbam/internal/Tag.inl b/include/pbbam/internal/Tag.inl
index cf9f60a..f8d4af2 100644
--- a/include/pbbam/internal/Tag.inl
+++ b/include/pbbam/internal/Tag.inl
@@ -32,16 +32,17 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Tag.inl
+/// \brief Inline implementations for the Tag class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/Tag.h"
 #include <boost/numeric/conversion/cast.hpp>
 #include <iostream>
 
-#ifndef TAG_INL
-#define TAG_INL
-
 namespace PacBio {
 namespace BAM {
 namespace internal {
@@ -91,7 +92,7 @@ struct NumericConvertVisitor : public boost::static_visitor<DesiredType>
     {
         const std::string from = typeid(t).name();
         const std::string to   = typeid(DesiredType).name();
-        const std::string msg = std::string("conversion not supported: ") + from + " -> " + to;
+        const std::string msg  = std::string("conversion not supported: ") + from + " -> " + to;
         throw std::runtime_error(msg);
         return 0;
     }
@@ -114,7 +115,7 @@ struct IsEqualVisitor : public boost::static_visitor<bool>
         return false;
     }
 
-    bool operator()(const boost::blank&, const boost::blank&) const
+    bool operator() (const boost::blank&, const boost::blank&) const
     { return true; }
 
     template <typename T>
@@ -273,8 +274,8 @@ inline uint16_t Tag::ToUInt16(void) const
 
 inline int32_t Tag::ToInt32(void) const
 {
-//    if (IsInt32())
-//        return boost::get<int32_t>(data_);
+    if (IsInt32())
+        return boost::get<int32_t>(data_);
     return boost::apply_visitor(internal::ToInt32ConvertVisitor(), data_);
 }
 
@@ -320,5 +321,3 @@ inline std::string Tag::Typename(void) const
 
 } // namespace BAM
 } // namespace PacBio
-
-#endif // TAG_INL
diff --git a/include/pbbam/virtual/VirtualPolymeraseBamRecord.h b/include/pbbam/virtual/VirtualPolymeraseBamRecord.h
index 86f3ef8..9a81790 100644
--- a/include/pbbam/virtual/VirtualPolymeraseBamRecord.h
+++ b/include/pbbam/virtual/VirtualPolymeraseBamRecord.h
@@ -32,11 +32,15 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseBamRecord.h
+/// \brief Defines the VirtualPolymeraseBamRecord class.
+//
 // Author: Armin Töpfer
 
-#ifndef POLYMERASEBAMRECORD_H
-#define POLYMERASEBAMRECORD_H
+#ifndef VIRTUALPOLYMERASEBAMRECORD_H
+#define VIRTUALPOLYMERASEBAMRECORD_H
 
 #include <vector>
 #include <sstream>
@@ -50,87 +54,68 @@
 namespace PacBio {
 namespace BAM {
 
-/// This class represents a polymerase read stitched on the fly
-/// from subreads|hqregion+scraps.
+/// \brief The VirtualPolymeraseBamRecord class represents a polymerase read stitched on the fly
+///        from subreads|hqregion+scraps.
+///
 class VirtualPolymeraseBamRecord : public BamRecord
 {
 public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates a "virtual" polymerase %BAM record, by re-stitching its constituent segments.
+    ///
+    /// \param[in] unorderedSources source data (subreads, scraps, etc.)
+    /// \param[in] header           %BAM header to associate with the new record
+    ///
+    /// \throws std::runtime_error on failure to stitch virtual record
+    ///
     VirtualPolymeraseBamRecord(std::vector<BamRecord>&& unorderedSources,
                                const BamHeader& header);
 
-    VirtualPolymeraseBamRecord() = delete;
-    // Move constructor
+    VirtualPolymeraseBamRecord(void) = delete;
+    VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&) = default; // un-"delete"-ed for SWIG
     VirtualPolymeraseBamRecord(VirtualPolymeraseBamRecord&&) = default;
-    // Copy constructor
-    VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&) = delete;
-    // Move assignment operator
-    VirtualPolymeraseBamRecord& operator=(VirtualPolymeraseBamRecord&&) = default;
-    // Copy assignment operator
     VirtualPolymeraseBamRecord& operator=(const VirtualPolymeraseBamRecord&) = delete;
-    // Destructor
+    VirtualPolymeraseBamRecord& operator=(VirtualPolymeraseBamRecord&&) = default;
     virtual ~VirtualPolymeraseBamRecord() = default;
 
-public:
-    /// Provides bool if a given VirtualRegionType has been annotated
-    bool HasVirtualRegionType(const VirtualRegionType type) const
-    { return virtualRegionsMap_.find(type) != virtualRegionsMap_.end(); }
+    /// \}
 
-    /// Provides annotations of the polymerase read for a given VirtualRegionType
-    std::vector<VirtualRegion> VirtualRegionsTable(const VirtualRegionType type) const
-    { return virtualRegionsMap_.at(type); }
+public:
+    /// \name Virtual Record Attributes
+    ///
 
-    /// Provides all annotations of the polymerase read as a map
-    std::map<VirtualRegionType, std::vector<VirtualRegion>> VirtualRegionsMap() const
-    { return virtualRegionsMap_; }
+    /// \returns true if requested VirtualRegionType has been annotated.
+    ///
+    bool HasVirtualRegionType(const VirtualRegionType regionType) const;
 
-public: // New BamRecord functionality.
+    /// \returns IPD frame data
+    ///
     Frames IPDV1Frames(Orientation orientation = Orientation::NATIVE) const;
 
+    /// \brief Provides all annotations of the polymerase read as a map (type => regions)
+    ///
+    std::map<VirtualRegionType, std::vector<VirtualRegion>> VirtualRegionsMap(void) const;
+
+    /// \brief Provides annotations of the polymerase read for a given VirtualRegionType.
+    ///
+    /// \param[in] regionType  requested region type
+    /// \returns regions that match the requested type (empty vector if none found).
+    ///
+    std::vector<VirtualRegion> VirtualRegionsTable(const VirtualRegionType regionType) const;
+
+    /// \}
+
 private:
     std::vector<BamRecord> sources_;
     std::map<VirtualRegionType, std::vector<VirtualRegion>> virtualRegionsMap_;
 
 private:
-    void StitchSources();
-
-    /// \brief Appends content of src vector to dst vector using move semantics.
-    /// \param[in] src Input vector that will be empty after execution
-    /// \param[in,out] dest Output vector that will be appended to
-    template <typename T>
-    inline void MoveAppend(std::vector<T>& src, std::vector<T>& dst) noexcept
-    {
-        if (dst.empty())
-        {
-            dst = std::move(src);
-        }
-        else
-        {
-            dst.reserve(dst.size() + src.size());
-            std::move(src.begin(), src.end(), std::back_inserter(dst));
-            src.clear();
-        }
-    }
-
-    /// \brief Appends content of src vector to dst vector using move semantics.
-    /// \param[in] src Input vector via perfect forwarding
-    /// \param[in,out] dest Output vector that will be appended to
-    template <typename T>
-    inline void MoveAppend(std::vector<T>&& src, std::vector<T>& dst) noexcept
-    {
-        if (dst.empty())
-        {
-            dst = std::move(src);
-        }
-        else
-        {
-            dst.reserve(dst.size() + src.size());
-            std::move(src.begin(), src.end(), std::back_inserter(dst));
-            src.clear();
-        }
-    }
+    void StitchSources(void);
 };
 
 } // namespace BAM
 } // namespace PacBio
 
-#endif // POLYMERASEBAMRECORD_H
+#endif // VIRTUALPOLYMERASEBAMRECORD_H
diff --git a/include/pbbam/virtual/VirtualPolymeraseCompositeReader.h b/include/pbbam/virtual/VirtualPolymeraseCompositeReader.h
new file mode 100644
index 0000000..9ab025d
--- /dev/null
+++ b/include/pbbam/virtual/VirtualPolymeraseCompositeReader.h
@@ -0,0 +1,111 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseCompositeReader.h
+/// \brief Defines the VirtualPolymeraseCompositeReader class.
+//
+// Author: Derek Barnett
+
+#ifndef VIRTUALPOLYMERASECOMPOSITEREADER_H
+#define VIRTUALPOLYMERASECOMPOSITEREADER_H
+
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/virtual/VirtualPolymeraseReader.h"
+#include <deque>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The VirtualPolymeraseCompositeReader provides an interface for
+///        re-stitching "virtual" polymerase reads from their constituent parts,
+///        across multiple %BAM resources from a DataSet.
+///
+/// This class is essentially a DataSet-aware wrapper around
+/// VirtualPolymeraseReader, enabling multiple resources as input. See that
+/// class's documentation for more info.
+///
+class PBBAM_EXPORT VirtualPolymeraseCompositeReader
+{
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    VirtualPolymeraseCompositeReader(const DataSet& dataset);
+
+    VirtualPolymeraseCompositeReader(void) = delete;
+    VirtualPolymeraseCompositeReader(const VirtualPolymeraseCompositeReader&) = delete;
+    VirtualPolymeraseCompositeReader(VirtualPolymeraseCompositeReader&&) = delete;
+    VirtualPolymeraseCompositeReader& operator=(const VirtualPolymeraseCompositeReader&) = delete;
+    VirtualPolymeraseCompositeReader& operator=(VirtualPolymeraseCompositeReader&&) = delete;
+    ~VirtualPolymeraseCompositeReader(void) = default;
+
+    /// \}
+
+public:
+    /// \name Stitched Record Reading
+    ///
+
+    /// \returns true if more ZMWs/files are available for reading.
+    bool HasNext(void);
+
+    /// \returns the next stitched polymerase read
+    VirtualPolymeraseBamRecord Next(void);
+
+    /// \returns the next set of reads that belong to one ZMW from one %BAM
+    ///          resource (a primary %BAM and/or its scraps file). This enables
+    ///          stitching records in a distinct thread.
+    ///
+    std::vector<BamRecord> NextRaw(void);
+
+    /// \}
+
+private:
+    std::deque< std::pair<std::string, std::string> > sources_;
+    std::unique_ptr<VirtualPolymeraseReader> currentReader_;
+    PbiFilter filter_;
+
+private:
+    void OpenNextReader(void);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALPOLYMERASECOMPOSITEREADER_H
diff --git a/include/pbbam/virtual/VirtualPolymeraseReader.h b/include/pbbam/virtual/VirtualPolymeraseReader.h
index e166482..0e2e198 100644
--- a/include/pbbam/virtual/VirtualPolymeraseReader.h
+++ b/include/pbbam/virtual/VirtualPolymeraseReader.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseReader.h
+/// \brief Defines the VirtualPolymeraseReader class.
+//
 // Author: Armin Töpfer
 
 #ifndef VIRTUALPOLYMERASEREADER_H
@@ -44,59 +48,87 @@
 #include "pbbam/BamRecord.h"
 #include "pbbam/Config.h"
 #include "pbbam/EntireFileQuery.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiFilterQuery.h"
 #include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
 
 namespace PacBio {
 namespace BAM {
 
+/// \brief The VirtualPolymeraseReader class provides an interface for re-stitching
+///        "virtual" polymerase reads from their constituent parts.
+///
 class VirtualPolymeraseReader
 {
 public:
-	/// Constructor takes two input bam file paths.
-	/// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
-	/// \param[in] scrapsBamFilePath scraps.bam file path
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+    ///        and a scraps file, consuming all reads.
+    ///
+    /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
+    /// \param[in] scrapsBamFilePath  scraps.bam file path
+    ///
     VirtualPolymeraseReader(const std::string& primaryBamFilePath,
                             const std::string& scrapsBamFilePath);
 
-    VirtualPolymeraseReader() = delete;
-    // Move constructor
-    VirtualPolymeraseReader(VirtualPolymeraseReader&&) = delete;
-    // Copy constructor
+    /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+    ///        and a scraps file, respecting the provided PBI filter.
+    ///
+    /// \note All %BAM files must have a corresponding ".pbi" index file to use
+    ///       the filter. You may need to call BamFile::EnsurePacBioIndexExists
+    ///       before constructing the reader.
+    ///
+    /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
+    /// \param[in] scrapsBamFilePath  scraps.bam file path
+    /// \param[in] filter PBI filter criteria
+    ///
+    VirtualPolymeraseReader(const std::string& primaryBamFilePath,
+                            const std::string& scrapsBamFilePath,
+                            const PbiFilter& filter);
+
+    VirtualPolymeraseReader(void) = delete;
     VirtualPolymeraseReader(const VirtualPolymeraseReader&) = delete;
-    // Move assignment operator
-    VirtualPolymeraseReader& operator=(VirtualPolymeraseReader&&) = delete;
-    // Copy assignment operator
+    VirtualPolymeraseReader(VirtualPolymeraseReader&&) = delete;
     VirtualPolymeraseReader& operator=(const VirtualPolymeraseReader&) = delete;
-    // Destructor
-    ~VirtualPolymeraseReader() = default;
+    VirtualPolymeraseReader& operator=(VirtualPolymeraseReader&&) = delete;
+    ~VirtualPolymeraseReader(void);
+
+    /// \}
 
 public:
-	/// Provides the next stitched polymerase read
-	VirtualPolymeraseBamRecord Next();
+    /// \name File Headers
+    /// \{
 
-	/// Provides the next set of reads that belong to one ZMW.
-	/// Enables stitching records in a distinct thread.
-	std::vector<BamRecord> NextRaw();
+    /// \returns the BamHeader associated with this reader's "primary" %BAM file
+    BamHeader PrimaryHeader(void) const;
 
-	/// Returns true if more ZMWs are available for reading.
-	bool HasNext();
+    /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+    BamHeader ScrapsHeader(void) const;
 
-	BamHeader PrimaryHeader();
-	BamHeader ScrapsHeader();
+    /// \}
 
-private:
-	const std::string                primaryBamFilePath_;
-	const std::string                scrapsBamFilePath_;
+public:
+    /// \name Stitched Record Reading
+    ///
 
-	std::unique_ptr<BamFile>         primaryBamFile_;
-	std::unique_ptr<BamFile>         scrapsBamFile_;
-    std::unique_ptr<EntireFileQuery> primaryQuery_;
-    std::unique_ptr<EntireFileQuery> scrapsQuery_;
+    /// \returns true if more ZMWs are available for reading.
+    bool HasNext(void);
 
-    EntireFileQuery::iterator              primaryIt_;
-    EntireFileQuery::iterator              scrapsIt_;
+    /// \returns the next stitched polymerase read
+    VirtualPolymeraseBamRecord Next(void);
 
-	std::unique_ptr<BamHeader>       polyHeader_;
+    /// \returns the next set of reads that belong to one ZMW.
+    ///          This enables stitching records in a distinct thread.
+    ///
+    std::vector<BamRecord> NextRaw(void);
+
+    /// \}
+
+private:
+    struct VirtualPolymeraseReaderPrivate;
+    std::unique_ptr<VirtualPolymeraseReaderPrivate> d_;
 };
 
 } // namespace BAM
diff --git a/include/pbbam/virtual/VirtualRegion.h b/include/pbbam/virtual/VirtualRegion.h
index 69c16df..facce7d 100644
--- a/include/pbbam/virtual/VirtualRegion.h
+++ b/include/pbbam/virtual/VirtualRegion.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegion.h
+/// \brief Defines the VirtualRegion class.
+//
 // Author: Armin Töpfer
 
 #ifndef VIRTUALREGION_H
@@ -44,7 +48,9 @@
 
 namespace PacBio {
 namespace BAM {
-/// Represents annotation of a polymerase region.
+
+/// \brief The VirtualRegion represents an annotation of a polymerase region.
+///
 struct VirtualRegion
 {
 public:
@@ -54,49 +60,70 @@ public:
     LocalContextFlags cxTag = LocalContextFlags::NO_LOCAL_CONTEXT;
     int barcodeLeft = -1;
     int barcodeRight = -1;
+    int score = 0;
 
 public:
+    /// \brief Creates a virtual region with basic type & position info.
+    ///
     VirtualRegion(const VirtualRegionType type, 
                   const int beginPos,
-                  const int endPos)
-        : type(type)
-        , beginPos(beginPos)
-        , endPos(endPos), cxTag()
-    {}
+                  const int endPos,
+                  const int score = 0);
+
+    /// \brief Creates a virtual region with type/position info, as well as context & barcode.
+    ///
     VirtualRegion(const VirtualRegionType type, 
                   const int beginPos,
                   const int endPos, 
                   const LocalContextFlags cxTag, 
                   const int barcodeLeft,
-                  const int barcodeRight)
-        : type(type)
-        , beginPos(beginPos)
-        , endPos(endPos)
-        , cxTag(cxTag)
-        , barcodeLeft(barcodeLeft)
-        , barcodeRight(barcodeRight)
-    {}
-    VirtualRegion() = default;
-    // Move constructor
-    VirtualRegion(VirtualRegion&&) = default;
-    // Copy constructor
+                  const int barcodeRight,
+                  const int score = 0);
+
+    VirtualRegion(void) = default;
     VirtualRegion(const VirtualRegion&) = default;
-    // Move assignment operator
+    VirtualRegion(VirtualRegion&&) = default;
+    VirtualRegion& operator=(const VirtualRegion&) = default; // un-"delete"-ed for SWIG
     VirtualRegion& operator=(VirtualRegion&&) = default;
-    // Copy assignment operator
-    VirtualRegion& operator=(const VirtualRegion&) = delete;
-    // Destructor
-    ~VirtualRegion() = default;
+    ~VirtualRegion(void) = default;
+
+    bool operator==(const VirtualRegion &v1) const;
 
-public:
-    bool operator==(const VirtualRegion &v1) const
-    {
-        return (v1.type == this->type &&
-                v1.beginPos == this->beginPos &&
-                v1.endPos == this->endPos);
-    }
 };
 
+inline VirtualRegion::VirtualRegion(const VirtualRegionType type,
+                                    const int beginPos,
+                                    const int endPos,
+                                    const int score)
+    : type(type)
+    , beginPos(beginPos)
+    , endPos(endPos), cxTag()
+    , score(score)
+{}
+
+inline VirtualRegion::VirtualRegion(const VirtualRegionType type,
+                                    const int beginPos,
+                                    const int endPos,
+                                    const LocalContextFlags cxTag,
+                                    const int barcodeLeft,
+                                    const int barcodeRight,
+                                    const int score)
+    : type(type)
+    , beginPos(beginPos)
+    , endPos(endPos)
+    , cxTag(cxTag)
+    , barcodeLeft(barcodeLeft)
+    , barcodeRight(barcodeRight)
+    , score(score)
+{}
+
+inline bool VirtualRegion::operator==(const VirtualRegion& v1) const
+{
+    return (v1.type == this->type &&
+            v1.beginPos == this->beginPos &&
+            v1.endPos == this->endPos);
+}
+
 } // namespace BAM
 } // namespace PacBio
 
diff --git a/include/pbbam/virtual/VirtualRegionType.h b/include/pbbam/virtual/VirtualRegionType.h
index 6b917bf..d359094 100644
--- a/include/pbbam/virtual/VirtualRegionType.h
+++ b/include/pbbam/virtual/VirtualRegionType.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegionType.h
+/// \brief Defines the VirtualRegionType enum.
+//
 // Author: Derek Barnett
 
 #ifndef REGIONTYPE_H
@@ -42,14 +46,17 @@
 
 namespace PacBio {
 namespace BAM {
-/// Type of annotated region.
-enum class VirtualRegionType : char
+
+/// \brief This enum defines the types of annotated region.
+///
+enum class VirtualRegionType // : char
 {
-	ADAPTER        = 'A',
-	BARCODE        = 'B',
-	SUBREAD        = 'S',
-	HQREGION       = 'H',
-	LQREGION       = 'L'  // Outside the HQ region
+    ADAPTER        = 0x41,  ///< Adapter region ('A')
+    BARCODE        = 0x42,  ///< Barcode region ('B')
+    FILTERED       = 0x46,  ///< Filtered subread ('F')
+    SUBREAD        = 0x53,  ///< Subread ('S')
+    HQREGION       = 0x48,  ///< High-quality region ('H')
+    LQREGION       = 0x4C   ///< Low-quality region ('L'), i.e. outside the HQ region
 };
 
 } // namespace BAM
diff --git a/include/pbbam/virtual/VirtualRegionTypeMap.h b/include/pbbam/virtual/VirtualRegionTypeMap.h
index 105696c..200f12f 100644
--- a/include/pbbam/virtual/VirtualRegionTypeMap.h
+++ b/include/pbbam/virtual/VirtualRegionTypeMap.h
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegionTypeMap.h
+/// \brief Defines the VirtualRegionTypeMap class.
+//
 // Author: Derek Barnett
 
 #ifndef VIRTUALREGIONTYPEMAP_H
@@ -45,7 +49,10 @@
 
 namespace PacBio {
 namespace BAM {
-/// Allows mapping of char 'A', 'B', 'H', and 'L' to the respective enum keys.
+
+/// \brief The VirtualRegionTypeMap class provides mapping between char codes and
+///        VirtualRegionType enum keys.
+///
 class VirtualRegionTypeMap
 {
 public:
diff --git a/include/pbbam/virtual/ZmwWhitelistVirtualReader.h b/include/pbbam/virtual/ZmwWhitelistVirtualReader.h
new file mode 100644
index 0000000..d40f03c
--- /dev/null
+++ b/include/pbbam/virtual/ZmwWhitelistVirtualReader.h
@@ -0,0 +1,151 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwWhitelistVirtualReader.h
+/// \brief Defines the ZmwWhitelistVirtualReader class.
+//
+// Author: Derek Barnett
+
+#ifndef ZMWWHITELISTVIRTUALREADER_H
+#define ZMWWHITELISTVIRTUALREADER_H
+
+#include <deque>
+#include <memory>
+#include <vector>
+#include <string>
+#include "pbbam/BamFile.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/PbiIndexedBamReader.h"
+#include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ZmwWhitelistVirtualReader class provides an interface for re-stitching
+///        "virtual" polymerase reads from their constituent parts, limiting results
+///        to only those reads originating from a 'whitelist' of ZMW hole numbers.
+///
+/// Whitelisted ZMWs that are not present in both primary and scraps BAMs
+/// will be "pre-removed." This ensures that, given client code like this:
+///
+/// \include code/ZmwWhitelistVirtualReader.txt
+///
+/// each iteration will always provide valid data - either a valid virtual record from
+/// Next() or a non-empty vector from NextRaw().
+///
+/// \note This reader requires that both input %BAM files also have associated PBI
+///       files available for query. See BamFile::EnsurePacBioIndexExists .
+///
+class ZmwWhitelistVirtualReader
+{
+public:
+    /// \name Constructors & Related Methods
+    /// \{
+
+    /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+    ///        and a scraps file, using a ZMW whitelist to filter the input.
+    ///
+    /// \param[in] zmwWhitelist         list of ZMWs to restrict iteration over
+    /// \param[in] primaryBamFilePath   hqregion.bam or subreads.bam file path
+    /// \param[in] scrapsBamFilePath    scraps.bam file path
+    ///
+    /// \note This reader requires that both input %BAM files also have associated PBI
+    ///       files available for query. See BamFile::EnsurePacBioIndexExists .
+    ///
+    /// \throws std::runtime_error if any files (*.bam and/or *.pbi) were not available for reading, or
+    ///         if malformed data encountered
+    ///
+    ZmwWhitelistVirtualReader(const std::vector<int32_t>& zmwWhitelist,
+                              const std::string& primaryBamFilePath,
+                              const std::string& scrapsBamFilePath);
+
+    ZmwWhitelistVirtualReader(void) = delete;
+    ZmwWhitelistVirtualReader(const ZmwWhitelistVirtualReader&) = delete;
+    ZmwWhitelistVirtualReader(ZmwWhitelistVirtualReader&&)      = delete;
+    ZmwWhitelistVirtualReader& operator=(const ZmwWhitelistVirtualReader&) = delete;
+    ZmwWhitelistVirtualReader& operator=(ZmwWhitelistVirtualReader&&)      = delete;
+    ~ZmwWhitelistVirtualReader(void) = default;
+
+    /// \}
+
+public:
+    /// \name Stitched Record Reading
+    /// \{
+
+    /// \returns true if more ZMWs are available for reading.
+    bool HasNext(void) const;
+
+    /// \returns the re-stitched polymerase read from the next ZMW in the whitelist
+    VirtualPolymeraseBamRecord Next(void);
+
+    /// \returns the set of reads that belong to the next ZMW in the whitelist.
+    ///          This enables stitching records in a distinct thread.
+    ///
+    std::vector<BamRecord> NextRaw(void);
+
+    /// \}
+
+public:
+    /// \name File Headers
+    /// \{
+
+    /// \returns the BamHeader associated with this reader's "primary" %BAM file
+    BamHeader PrimaryHeader(void) const;
+
+    /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+    BamHeader ScrapsHeader(void) const;
+
+    /// \}
+
+private:
+    const std::string        primaryBamFilePath_;
+    const std::string        scrapsBamFilePath_;
+    std::unique_ptr<BamFile> primaryBamFile_;
+    std::unique_ptr<BamFile> scrapsBamFile_;
+    std::unique_ptr<PbiIndexedBamReader> primaryReader_;
+    std::unique_ptr<PbiIndexedBamReader> scrapsReader_;
+    std::unique_ptr<BamHeader> polyHeader_;
+    std::deque<int32_t>        zmwWhitelist_;
+
+private:
+    void PreFilterZmws(const std::vector<int32_t>& zmwWhitelist);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWWHITELISTVIRTUALREADER_H
diff --git a/src/Accuracy.cpp b/src/Accuracy.cpp
index 0c8114a..e335abf 100644
--- a/src/Accuracy.cpp
+++ b/src/Accuracy.cpp
@@ -32,12 +32,16 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Accuracy.cpp
+/// \brief Implements the Accuracy class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/Accuracy.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 
-const int Accuracy::MIN = 0;
-const int Accuracy::MAX = 1000;
+const float Accuracy::MIN = 0.0f;
+const float Accuracy::MAX = 1.0f;
diff --git a/src/AlignmentPrinter.cpp b/src/AlignmentPrinter.cpp
index 6692021..5155859 100644
--- a/src/AlignmentPrinter.cpp
+++ b/src/AlignmentPrinter.cpp
@@ -32,24 +32,31 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file AlignmentPrinter.cpp
+/// \brief Implements the AlignmentPrinter class.
+//
 // Author: Armin Töpfer
 
+#include "pbbam/AlignmentPrinter.h"
+
 #include <cmath>
 #include <iostream>
 #include <iomanip>  
 #include <stdexcept>
 #include <sstream>
 
-#include "pbbam/AlignmentPrinter.h"
-
 using namespace PacBio;
 using namespace PacBio::BAM;
 
+AlignmentPrinter::AlignmentPrinter(const IndexedFastaReader& ifr)
+    : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
+{ }
+
 std::string AlignmentPrinter::Print(const BamRecord& record,
                                     const Orientation orientation)
 {
-
 	std::string seq = record.Sequence(orientation, true, true);
 	std::string ref = ifr_->ReferenceSubsequence(record, orientation, true, true);
 
diff --git a/src/BaiIndexedBamReader.cpp b/src/BaiIndexedBamReader.cpp
new file mode 100644
index 0000000..3f9d538
--- /dev/null
+++ b/src/BaiIndexedBamReader.cpp
@@ -0,0 +1,141 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BaiIndexedBamReader.cpp
+/// \brief Implements the BaiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BaiIndexedBamReader.h"
+#include "MemoryUtils.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct BaiIndexedBamReaderPrivate
+{
+public:
+    BaiIndexedBamReaderPrivate(const BamFile& file,
+                               const GenomicInterval& interval)
+        : htsIndex_(nullptr)
+        , htsIterator_(nullptr)
+    {
+        LoadIndex(file.Filename());
+        Interval(file.Header(), interval);
+    }
+
+    void Interval(const BamHeader& header,
+                  const GenomicInterval& interval)
+    {
+        htsIterator_.reset(nullptr);
+
+        if (header.HasSequence(interval.Name())) {
+            auto id = header.SequenceId(interval.Name());
+            if (id >= 0 && static_cast<size_t>(id) < header.NumSequences()) {
+                htsIterator_.reset(bam_itr_queryi(htsIndex_.get(),
+                                                  id,
+                                                  interval.Start(),
+                                                  interval.Stop()));
+            }
+        }
+
+        if (!htsIterator_)
+            throw std::runtime_error("could not create iterator for requested region");
+    }
+
+    void LoadIndex(const string& fn)
+    {
+        htsIndex_.reset(bam_index_load(fn.c_str()));
+        if (!htsIndex_)
+            throw std::runtime_error("could not load BAI index data");
+    }
+
+    int ReadRawData(BGZF* bgzf, bam1_t* b)
+    {
+        assert(htsIterator_.get());
+        return hts_itr_next(bgzf, htsIterator_.get(), b, nullptr);
+    }
+
+public:
+    GenomicInterval interval_;
+    std::unique_ptr<hts_idx_t, internal::HtslibIndexDeleter>    htsIndex_;
+    std::unique_ptr<hts_itr_t, internal::HtslibIteratorDeleter> htsIterator_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+                                         const std::string& filename)
+    : BaiIndexedBamReader(interval, BamFile(filename))
+{ }
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+                                         const BamFile& bamFile)
+    : BamReader(bamFile)
+    , d_(new BaiIndexedBamReaderPrivate(File(), interval))
+{ }
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+                                         BamFile&& bamFile)
+    : BamReader(std::move(bamFile))
+    , d_(new BaiIndexedBamReaderPrivate(File(), interval))
+{ }
+
+const GenomicInterval& BaiIndexedBamReader::Interval(void) const
+{
+    assert(d_);
+    return d_->interval_;
+}
+
+int BaiIndexedBamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+    assert(d_);
+    return d_->ReadRawData(bgzf, b);
+}
+
+BaiIndexedBamReader& BaiIndexedBamReader::Interval(const GenomicInterval& interval)
+{
+    assert(d_);
+    d_->Interval(Header(), interval);
+    return *this;
+}
diff --git a/src/BamFile.cpp b/src/BamFile.cpp
index f75bc9d..249c3e2 100644
--- a/src/BamFile.cpp
+++ b/src/BamFile.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamFile.cpp
+/// \brief Implements the BamFile class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/BamFile.h"
@@ -41,6 +45,8 @@
 #include "MemoryUtils.h"
 #include <htslib/sam.h>
 #include <memory>
+#include <sstream>
+#include <cassert>
 #include <sys/stat.h>
 using namespace PacBio;
 using namespace PacBio::BAM;
@@ -55,25 +61,52 @@ class BamFilePrivate
 public:
     BamFilePrivate(const string& fn)
         : filename_(fn)
+        , firstAlignmentOffset_(-1)
     {
-        // update verbosity
-        hts_verbose = PacBio::BAM::HtslibVerbosity;
+        // ensure we've updated htslib verbosity with requested verbosity here
+        hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);
 
         // attempt open
         std::unique_ptr<samFile, internal::HtslibFileDeleter> f(sam_open(filename_.c_str(), "rb"));
-        if (!f)
-            throw std::runtime_error("could not open file");
+        if (!f || !f->fp.bgzf)
+            throw std::runtime_error(string("could not open BAM file: ") + filename_);
         if (f->format.format != bam)
             throw std::runtime_error("expected BAM, unknown format");
 
+#ifndef PBBAM_NO_CHECK_EOF
+        // sanity check on file
+        const int eofCheck = bgzf_check_EOF(f->fp.bgzf);
+        if (eofCheck <= 0 ) {
+            // 1:  EOF present & correct
+            // 2:  not seekable (e.g. reading from stdin)
+            // 0:  EOF absent
+            // -1: some other error
+            stringstream e;
+            if (eofCheck == 0)
+                e << fn << " : is missing EOF block" << endl;
+            else
+                e << fn << " : unknown error while checking EOF block" << endl;
+            throw std::runtime_error(e.str());
+        }
+#endif
+
         // attempt fetch header
         std::unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> hdr(sam_hdr_read(f.get()));
         header_ = internal::BamHeaderMemory::FromRawData(hdr.get());
+
+        // cache first alignment offset
+        firstAlignmentOffset_ = bgzf_tell(f->fp.bgzf);
+    }
+
+    unique_ptr<BamFilePrivate> DeepCopy(void)
+    {
+        return unique_ptr<BamFilePrivate>(new BamFilePrivate(filename_));
     }
 
 public:
     std::string filename_;
     BamHeader header_;
+    int64_t firstAlignmentOffset_;
 };
 
 } // namespace internal
@@ -89,7 +122,7 @@ BamFile::BamFile(const std::string& filename)
 { }
 
 BamFile::BamFile(const BamFile& other)
-    : d_(other.d_)
+    : d_(other.d_->DeepCopy())
 { }
 
 BamFile::BamFile(BamFile&& other)
@@ -97,30 +130,45 @@ BamFile::BamFile(BamFile&& other)
 { }
 
 BamFile& BamFile::operator=(const BamFile& other)
-{ d_ = other.d_; return *this; }
+{
+    d_ = other.d_->DeepCopy();
+    return *this;
+}
 
 BamFile& BamFile::operator=(BamFile&& other)
 { d_ = std::move(other.d_); return *this; }
 
 BamFile::~BamFile(void) { }
 
+void BamFile::CreatePacBioIndex(void) const
+{
+    PbiFile::CreateFrom(*this);
+}
+
+void BamFile::CreateStandardIndex(void) const
+{
+    if (bam_index_build(d_->filename_.c_str(), 0) != 0)
+        throw std::runtime_error("could not build BAI index");
+}
+
 void BamFile::EnsurePacBioIndexExists(void) const
 {
     if (!PacBioIndexExists())
-        PbiFile::CreateFrom(*this);
+        CreatePacBioIndex();
 }
 
 void BamFile::EnsureStandardIndexExists(void) const
 {
-    if (!StandardIndexExists()) {
-        if (bam_index_build(d_->filename_.c_str(), 0) != 0)
-            throw std::runtime_error("could not build BAI index");
-    }
+    if (!StandardIndexExists())
+        CreateStandardIndex();
 }
 
 std::string BamFile::Filename(void) const
 { return d_->filename_; }
 
+int64_t BamFile::FirstAlignmentOffset(void) const
+{ return d_->firstAlignmentOffset_; }
+
 bool BamFile::HasReference(const std::string& name) const
 { return d_->header_.HasSequence(name); }
 
@@ -131,20 +179,18 @@ bool BamFile::IsPacBioBAM(void) const
 { return !d_->header_.PacBioBamVersion().empty(); }
 
 bool BamFile::PacBioIndexExists(void) const
-{
-    const string pbiFn = PacBioIndexFilename();
-    if (internal::FileUtils::Exists(pbiFn)) {
-        const time_t bamTimestamp = internal::FileUtils::LastModified(Filename());
-        const time_t pbiTimestamp = internal::FileUtils::LastModified(pbiFn);
-        if (bamTimestamp <= pbiTimestamp)
-            return true;
-    }
-    return false;
-}
+{ return internal::FileUtils::Exists(PacBioIndexFilename()); }
 
 std::string BamFile::PacBioIndexFilename(void) const
 { return d_->filename_ + ".pbi"; }
 
+bool BamFile::PacBioIndexIsNewer(void) const
+{
+    const auto bamTimestamp = internal::FileUtils::LastModified(Filename());
+    const auto pbiTimestamp = internal::FileUtils::LastModified(PacBioIndexFilename());
+    return bamTimestamp <= pbiTimestamp;
+}
+
 int BamFile::ReferenceId(const std::string& name) const
 { return d_->header_.SequenceId(name); }
 
@@ -158,17 +204,15 @@ std::string BamFile::ReferenceName(const int id) const
 { return d_->header_.SequenceName(id); }
 
 bool BamFile::StandardIndexExists(void) const
-{
-    const string bamFn = Filename();
-    const string baiFn = StandardIndexFilename();
-    if (internal::FileUtils::Exists(baiFn)) {
-        const time_t bamTimestamp = internal::FileUtils::LastModified(bamFn);
-        const time_t baiTimestamp = internal::FileUtils::LastModified(baiFn);
-        if (bamTimestamp <= baiTimestamp)
-            return true;
-    }
-    return false;
-}
+{ return internal::FileUtils::Exists(StandardIndexFilename()); }
 
 std::string BamFile::StandardIndexFilename(void) const
 { return d_->filename_ + ".bai"; }
+
+bool BamFile::StandardIndexIsNewer(void) const 
+{ 
+    const auto bamTimestamp  = internal::FileUtils::LastModified(Filename());
+    const auto baiTimestamp = internal::FileUtils::LastModified(StandardIndexFilename());
+    return bamTimestamp <= baiTimestamp;
+}
+
diff --git a/src/BamHeader.cpp b/src/BamHeader.cpp
index 7caa62d..69a9c40 100644
--- a/src/BamHeader.cpp
+++ b/src/BamHeader.cpp
@@ -32,11 +32,15 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamHeader.cpp
+/// \brief Implements the BamHeader class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/BamHeader.h"
-#include "SequenceUtils.h"
+#include "StringUtils.h"
 #include <htslib/hts.h>
 #include <sstream>
 #include <set>
@@ -59,33 +63,153 @@ static const string token_VN = string("VN");
 static const string token_SO = string("SO");
 static const string token_pb = string("pb");
 
-static const string current_version = string("3.0b7");
-
-class BamHeaderPrivate
+struct PacBioVersion
 {
 public:
-    std::string version_;
-    std::string pacbioBamVersion_;
-    std::string sortOrder_;
-    std::map<std::string, std::string> headerLineCustom_;
-
-    std::map<std::string, ReadGroupInfo> readGroups_; // id => read group info
-    std::map<std::string, ProgramInfo> programs_;     // id => program info
-    std::vector<std::string> comments_;
-
-    // we need to preserve insertion order, use lookup for access by name
-    std::vector<SequenceInfo> sequences_;
-    std::map<std::string, int32_t> sequenceIdLookup_;
+    PacBioVersion(int major, int minor, int revision)
+        : major_(major)
+        , minor_(minor)
+        , revision_(revision)
+    { }
+
+    PacBioVersion(const string& v)
+        : major_(0)
+        , minor_(0)
+        , revision_(0)
+    {
+        if (v.empty()) {
+            auto msg = string{ "PacBio BAM version number is missing (@HD pb:<version> tag). See spec for details." };
+            throw std::runtime_error(msg);
+        }
+
+        if (v.find('b') != string::npos) {
+            auto msg = string{ "invalid version number (" + v + "): beta version BAMs are no longer supported" };
+            throw std::runtime_error(msg);
+        }
+
+        try {
+            const auto fields = Split(v, '.');
+            const auto numFields = fields.size();
+            if (numFields > 0) {
+                major_ = stoi(fields.at(0));
+                if (numFields > 1) {
+                    minor_ = stoi(fields.at(1));
+                    if (numFields > 2 )
+                        revision_ = stoi(fields.at(2));
+                }
+            }
+        } catch (std::exception&) {
+            auto msg = string{ "invalid version number (" + v + "): failed to parse" };
+            throw std::runtime_error(msg);
+        }
+    }
+
+public:
+    bool operator==(const PacBioVersion& other) const
+    {
+        return major_ == other.major_ &&
+               minor_ == other.minor_ &&
+               revision_ == other.revision_;
+    }
+
+    bool operator<(const PacBioVersion& other) const
+    {
+        // 2.* < 3.*
+        if (major_ < other.major_)
+            return true;
+
+        // 3. ==  3.
+        else if (major_ == other.major_) {
+
+            // 3.1.* < 3.2.*
+            if (minor_ < other.minor_)
+                return true;
+
+            // 3.2. == 3.2.
+            else if (minor_ == other.minor_) {
+
+                // 3.2.1 < 3.2.2
+                if (revision_ < other.revision_)
+                    return true;
+            }
+        }
+
+        // otherwise not less-than
+        return false;
+    }
+    bool operator>=(const PacBioVersion& other) const
+    { return !operator<(other); }
+
+public:
+    string ToString(void) const
+    {
+        stringstream s;
+        s << major_ << '.' << minor_ << '.' << revision_;
+        return s.str();
+    }
+
+    string ToMsgString(void) const
+    {
+        stringstream s;
+        s << '(' << ToString() << ')';
+        return s.str();
+    }
+
+private:
+    int major_;
+    int minor_;
+    int revision_;
 };
 
+static const PacBioVersion minimum_version = PacBioVersion(3,0,1);
+static const PacBioVersion current_version = PacBioVersion(3,0,3);
+
+static
+void EnsureCanMerge(const BamHeader& lhs, const BamHeader& rhs)
+{
+    // check compatibility
+    const bool samVersionOk = lhs.Version() == rhs.Version();
+    const bool sortOrderOk  = lhs.SortOrder() == rhs.SortOrder();
+    const bool pbVersionOk  = lhs.PacBioBamVersion() == rhs.PacBioBamVersion();
+    const bool sequencesOk  = ( (lhs.SortOrder() == "coordinate") ? lhs.Sequences() == rhs.Sequences()
+                                                                  : true);
+
+    // if all checks out, return
+    if (samVersionOk && sortOrderOk && pbVersionOk && sequencesOk)
+        return;
+
+    // else, format error message & throw
+    stringstream e;
+    e << "could not merge BAM headers:" << endl;
+
+    if (!samVersionOk) {
+        e << "  mismatched SAM versions (@HD:VN) : ("
+          << lhs.Version() << ", " << rhs.Version()
+          << ")" << endl;
+    }
+
+    if (!sortOrderOk) {
+        e << "  mismatched sort orders (@HD:SO) : ("
+          << lhs.SortOrder() << ", " << rhs.SortOrder()
+          << ")" << endl;
+    }
+
+    if (!pbVersionOk) {
+        e << "  mismatched PacBio BAM versions (@HD:pb) : ("
+          << lhs.PacBioBamVersion() << ", " << rhs.PacBioBamVersion()
+          << ")" << endl;
+    }
+
+    if (!sequencesOk)
+        e << "  mismatched sequence lists (@SQ entries)" << endl;
+
+    throw std::runtime_error(e.str());
+}
+
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
 
-BamHeader::BamHeader(void)
-    : d_(new internal::BamHeaderPrivate)
-{ }
-
 BamHeader::BamHeader(const string& samHeaderText)
     : d_(new internal::BamHeaderPrivate)
 {
@@ -134,30 +258,28 @@ BamHeader::BamHeader(const string& samHeaderText)
     }
 }
 
-BamHeader::BamHeader(const BamHeader& other)
-    : d_(other.d_)
-{ }
-
-BamHeader::BamHeader(BamHeader&& other)
-    : d_(std::move(other.d_))
-{ }
-
-BamHeader& BamHeader::operator=(const BamHeader& other)
-{ d_ = other.d_; return *this; }
-
-BamHeader& BamHeader::operator=(BamHeader&& other)
-{ d_ = std::move(other.d_); return *this; }
+BamHeader& BamHeader::operator+=(const BamHeader& other)
+{
+    internal::EnsureCanMerge(*this, other);
 
-BamHeader::~BamHeader(void) { }
+    // merge read groups
+    for (const auto& rg : other.ReadGroups()) {
+        if (!HasReadGroup(rg.Id()))
+            AddReadGroup(rg);
+    }
 
-BamHeader& BamHeader::AddComment(const std::string& comment)
-{ d_->comments_.push_back(comment); return *this; }
+    // merge programs
+    for (const auto& pg : other.Programs()) {
+        if (!HasProgram(pg.Id()))
+            AddProgram(pg);
+    }
 
-BamHeader& BamHeader::AddProgram(const ProgramInfo& pg)
-{ d_->programs_[pg.Id()] = pg; return *this; }
+    // merge comments
+    for (const auto& comment : other.Comments())
+        AddComment(comment);
 
-BamHeader& BamHeader::AddReadGroup(const ReadGroupInfo& readGroup)
-{ d_->readGroups_[readGroup.Id()] = readGroup; return *this; }
+    return *this;
+}
 
 BamHeader& BamHeader::AddSequence(const SequenceInfo& sequence)
 {
@@ -166,15 +288,6 @@ BamHeader& BamHeader::AddSequence(const SequenceInfo& sequence)
     return *this;
 }
 
-BamHeader& BamHeader::ClearComments(void)
-{ d_->comments_.clear(); return* this; }
-
-BamHeader& BamHeader::ClearPrograms(void)
-{ d_->programs_.clear(); return *this; }
-
-BamHeader& BamHeader::ClearReadGroups(void)
-{ d_->readGroups_.clear(); return *this; }
-
 BamHeader& BamHeader::ClearSequences(void)
 {
     d_->sequenceIdLookup_.clear();
@@ -182,12 +295,6 @@ BamHeader& BamHeader::ClearSequences(void)
     return *this;
 }
 
-std::vector<std::string> BamHeader::Comments(void) const
-{ return d_->comments_; }
-
-BamHeader& BamHeader::Comments(const std::vector<std::string>& comments)
-{ d_->comments_ = comments; return *this; }
-
 BamHeader BamHeader::DeepCopy(void) const
 {
     BamHeader result;
@@ -203,20 +310,21 @@ BamHeader BamHeader::DeepCopy(void) const
     return result;
 }
 
-bool BamHeader::HasProgram(const std::string& id) const
-{ return d_->programs_.find(id) != d_->programs_.cend(); }
-
-bool BamHeader::HasReadGroup(const std::string& id) const
-{ return d_->readGroups_.find(id) != d_->readGroups_.cend(); }
-
-bool BamHeader::HasSequence(const std::string& name) const
-{ return d_->sequenceIdLookup_.find(name) != d_->sequenceIdLookup_.cend(); }
-
-std::string BamHeader::PacBioBamVersion(void) const
-{ return d_->pacbioBamVersion_; }
-
 BamHeader& BamHeader::PacBioBamVersion(const std::string& version)
-{ d_->pacbioBamVersion_ = version; return *this; }
+{
+    const auto fileVersion = internal::PacBioVersion{ version };
+    if (fileVersion >= internal::minimum_version)
+        d_->pacbioBamVersion_ = version;
+    else {
+        d_->pacbioBamVersion_.clear();
+        auto msg  = string{ "invalid PacBio BAM version number" };
+             msg += fileVersion.ToMsgString();
+             msg += string{ "is older than the minimum supported version" };
+             msg += internal::minimum_version.ToMsgString();
+        throw std::runtime_error(msg);
+    }
+    return *this;
+}
 
 ProgramInfo BamHeader::Program(const std::string& id) const
 {
@@ -294,14 +402,10 @@ BamHeader& BamHeader::ReadGroups(const vector<ReadGroupInfo>& readGroups)
     return *this;
 }
 
-SequenceInfo BamHeader::Sequence(const int32_t id) const
-{
-    // throws out of range
-    return d_->sequences_.at(id);
-}
-
 SequenceInfo BamHeader::Sequence(const std::string& name) const
 {
+    // TODO: SequenceId(name) throws if not found, should we do so here as well?
+
     const auto iter = d_->sequenceIdLookup_.find(name);
     if (iter == d_->sequenceIdLookup_.cend())
         return SequenceInfo();
@@ -318,12 +422,6 @@ int32_t BamHeader::SequenceId(const std::string& name) const
     return iter->second;
 }
 
-std::string BamHeader::SequenceLength(const int32_t id) const
-{ return Sequence(id).Length(); }
-
-std::string BamHeader::SequenceName(const int32_t id) const
-{ return Sequence(id).Name(); }
-
 vector<string> BamHeader::SequenceNames(void) const
 {
     vector<string> result;
@@ -335,9 +433,6 @@ vector<string> BamHeader::SequenceNames(void) const
     return result;
 }
 
-std::vector<SequenceInfo> BamHeader::Sequences(void) const
-{ return d_->sequences_; }
-
 BamHeader& BamHeader::Sequences(const vector<SequenceInfo>& sequences)
 {
     d_->sequences_.clear();
@@ -346,21 +441,16 @@ BamHeader& BamHeader::Sequences(const vector<SequenceInfo>& sequences)
     return *this;
 }
 
-std::string BamHeader::SortOrder(void) const
-{ return d_->sortOrder_; }
-
-BamHeader& BamHeader::SortOrder(const std::string& order)
-{ d_->sortOrder_ = order; return *this; }
-
 string BamHeader::ToSam(void) const
 {
-    // clear out stream
+    // init stream
     stringstream out("");
 
     // @HD
-    const string& outputVersion   = (d_->version_.empty()   ? string(hts_version()) : d_->version_);
+    const string& outputVersion   = (d_->version_.empty() ? string(hts_version()) : d_->version_);
     const string& outputSortOrder = (d_->sortOrder_.empty() ? string("unknown") : d_->sortOrder_);
-    const string& outputPbBamVersion = (d_->pacbioBamVersion_.empty() ? internal::current_version : d_->pacbioBamVersion_);
+    const string& outputPbBamVersion = (d_->pacbioBamVersion_.empty() ? internal::current_version.ToString()
+                                                                      : d_->pacbioBamVersion_);
 
     out << internal::prefix_HD
         << internal::MakeSamTag(internal::token_VN, outputVersion)
@@ -368,10 +458,6 @@ string BamHeader::ToSam(void) const
         << internal::MakeSamTag(internal::token_pb, outputPbBamVersion)
         << endl;
 
-//    if (!d_->pacbioBamVersion_.empty())
-//        out << internal::MakeSamTag(internal::token_pb, d_->pacbioBamVersion_);
-//     out << endl;
-
     // @SQ
     for (const SequenceInfo& seq : d_->sequences_)
         out << seq.ToSam() << endl;
@@ -382,7 +468,7 @@ string BamHeader::ToSam(void) const
 
     // @PG
     for (const auto& progIter : d_->programs_)
-        out  << progIter.second.ToSam() << endl;
+        out << progIter.second.ToSam() << endl;
 
     // @CO
     for (const string& comment : d_->comments_)
@@ -391,10 +477,3 @@ string BamHeader::ToSam(void) const
     // return result
     return out.str();
 }
-
-std::string BamHeader::Version(void) const
-{ return d_->version_; }
-
-BamHeader& BamHeader::Version(const std::string& version)
-{ d_->version_ = version; return *this; }
-
diff --git a/src/BamReader.cpp b/src/BamReader.cpp
new file mode 100644
index 0000000..f6f4cad
--- /dev/null
+++ b/src/BamReader.cpp
@@ -0,0 +1,189 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamReader.cpp
+/// \brief Implements the BamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamReader.h"
+#include "MemoryUtils.h"
+#include <htslib/bgzf.h>
+#include <htslib/hfile.h>
+#include <htslib/hts.h>
+#include <cassert>
+#include <cstdio>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct BamReaderPrivate
+{
+public:
+    BamReaderPrivate(const BamFile& bamFile)
+        : htsFile_(nullptr)
+        , bamFile_(bamFile)
+    {
+        DoOpen();
+    }
+
+    BamReaderPrivate(BamFile&& bamFile)
+        : htsFile_(nullptr)
+        , bamFile_(std::move(bamFile))
+    {
+        DoOpen();
+    }
+
+    void DoOpen(void) {
+
+        // fetch file pointer
+        htsFile_.reset(sam_open(bamFile_.Filename().c_str(), "rb"));
+        if (!htsFile_)
+            throw std::runtime_error("could not open BAM file for reading");
+    }
+
+public:
+    std::unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
+    BamFile bamFile_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BamReader::BamReader(const string& fn)
+    : BamReader(BamFile(fn))
+{ }
+
+BamReader::BamReader(const BamFile& bamFile)
+    : d_(new internal::BamReaderPrivate(bamFile))
+{
+    // skip header
+    VirtualSeek(d_->bamFile_.FirstAlignmentOffset());
+}
+
+BamReader::BamReader(BamFile&& bamFile)
+    : d_(new internal::BamReaderPrivate(std::move(bamFile)))
+{
+    // skip header
+    VirtualSeek(d_->bamFile_.FirstAlignmentOffset());
+}
+
+BamReader::~BamReader(void) { }
+
+BGZF* BamReader::Bgzf(void) const
+{
+    assert(d_);
+    assert(d_->htsFile_);
+    assert(d_->htsFile_->fp.bgzf);
+    return d_->htsFile_->fp.bgzf;
+}
+
+const BamFile& BamReader::File(void) const
+{
+    assert(d_);
+    return d_->bamFile_;
+}
+
+std::string BamReader::Filename(void) const
+{
+    assert(d_);
+    return d_->bamFile_.Filename();
+}
+
+const BamHeader& BamReader::Header(void) const
+{
+    assert(d_);
+    return d_->bamFile_.Header();
+}
+
+bool BamReader::GetNext(BamRecord& record)
+{
+    assert(Bgzf());
+    assert(internal::BamRecordMemory::GetRawData(record).get());
+
+    auto result = ReadRawData(Bgzf(), internal::BamRecordMemory::GetRawData(record).get());
+
+    // success
+    if (result >= 0) {
+        internal::BamRecordMemory::UpdateRecordTags(record);
+        record.header_ = Header();
+        return true;
+    }
+
+    // EOF or end-of-data range (not an error)
+    else if (result == -1)
+        return false;
+
+    // error corrupted file
+    else {
+        auto errorMsg = string{"corrupted BAM file: "};
+        if (result == -2)
+            errorMsg += "probably truncated";
+        else if (result == -3)
+            errorMsg += "could not read BAM record's' core data";
+        else if (result == -4)
+            errorMsg += "could not read BAM record's' variable-length data";
+        else
+            errorMsg += "unknown reason " + to_string(result);
+        errorMsg += string{" ("};
+        errorMsg += Filename();
+        errorMsg += string{")"};
+        throw std::runtime_error{errorMsg};
+    }
+}
+
+int BamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+    return bam_read1(bgzf, b);
+}
+
+void BamReader::VirtualSeek(int64_t virtualOffset)
+{
+    auto result = bgzf_seek(Bgzf(), virtualOffset, SEEK_SET);
+    if (result != 0)
+        throw std::runtime_error("Failed to seek in BAM file");
+}
+
+int64_t BamReader::VirtualTell(void) const
+{
+    return bgzf_tell(Bgzf());
+}
diff --git a/src/BamRecord.cpp b/src/BamRecord.cpp
index 11387ae..d0da4a7 100644
--- a/src/BamRecord.cpp
+++ b/src/BamRecord.cpp
@@ -32,11 +32,16 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecord.cpp
+/// \brief Implements the BamRecord & BamRecordView classes.
+//
 // Author: Derek Barnett
 
 #include "pbbam/BamRecord.h"
 #include "pbbam/virtual/VirtualRegionTypeMap.h"
+#include "pbbam/ZmwTypeMap.h"
 #include "AssertUtils.h"
 #include "MemoryUtils.h"
 #include "SequenceUtils.h"
@@ -54,33 +59,38 @@ namespace BAM {
 namespace internal {
 
 // BAM record tag names
-static const string tagName_readAccuracy            = "rq";
-static const string tagName_holeNumber              = "zm";
-static const string tagName_numPasses               = "np";
+static const string tagName_alternative_labelQV     = "pv";
+static const string tagName_alternative_labelTag    = "pt";
+static const string tagName_barcodes                = "bc";
+static const string tagName_barcode_quality         = "bq";
 static const string tagName_contextFlags            = "cx";
-static const string tagName_snr                     = "sn";
+static const string tagName_holeNumber              = "zm";
 static const string tagName_deletionQV              = "dq";
 static const string tagName_deletionTag             = "dt";
 static const string tagName_insertionQV             = "iq";
 static const string tagName_ipd                     = "ip";
+static const string tagName_labelQV                 = "pq";
 static const string tagName_mergeQV                 = "mq";
-static const string tagName_pulseWidth              = "pw";
-static const string tagName_readGroup               = "RG";
-static const string tagName_queryStart              = "qs";
-static const string tagName_queryEnd                = "qe";
-static const string tagName_substitutionQV          = "sq";
-static const string tagName_substitutionTag         = "st";
+static const string tagName_numPasses               = "np";
 static const string tagName_pkmean                  = "pa";
 static const string tagName_pkmid                   = "pm";
+static const string tagName_pkmean2                 = "ps";
+static const string tagName_pkmid2                  = "pi";
 static const string tagName_pre_pulse_frames        = "pd";
-static const string tagName_pulse_call_width        = "px";
-static const string tagName_labelQV                 = "pq";
-static const string tagName_alternative_labelQV     = "pv";
-static const string tagName_alternative_labelTag    = "pt";
 static const string tagName_pulse_call              = "pc";
-static const string tagName_scrap_type              = "sc";
-static const string tagName_barcodes                = "bc";
+static const string tagName_pulse_call_width        = "px";
 static const string tagName_pulseMergeQV            = "pg";
+static const string tagName_pulseWidth              = "pw";
+static const string tagName_queryStart              = "qs";
+static const string tagName_queryEnd                = "qe";
+static const string tagName_readAccuracy            = "rq";
+static const string tagName_readGroup               = "RG";
+static const string tagName_scrap_region_type       = "sc";
+static const string tagName_scrap_zmw_type          = "sz";
+static const string tagName_snr                     = "sn";
+static const string tagName_startFrame              = "sf";
+static const string tagName_substitutionQV          = "sq";
+static const string tagName_substitutionTag         = "st";
 
 // faux (helper) tag names
 static const string tagName_QUAL = "QUAL";
@@ -97,7 +107,7 @@ static const string recordTypeName_Unknown    = "UNKNOWN";
 static
 int32_t HoleNumberFromName(const string& fullName)
 {
-    const vector<string> mainTokens = std::move(Split(fullName, '/'));
+    const auto mainTokens = Split(fullName, '/');
     if (mainTokens.size() != 3)
         throw std::runtime_error("malformed record name");
     return stoi(mainTokens.at(1));
@@ -106,10 +116,10 @@ int32_t HoleNumberFromName(const string& fullName)
 static
 Position QueryEndFromName(const string& fullName)
 {
-    const vector<string> mainTokens = std::move(Split(fullName, '/'));
+    const auto mainTokens = Split(fullName, '/');
     if (mainTokens.size() != 3)
         throw std::runtime_error("malformed record name");
-    const vector<string> queryTokens = std::move(Split(mainTokens.at(2), '_'));
+    const auto queryTokens = Split(mainTokens.at(2), '_');
     if (queryTokens.size() != 2)
         throw std::runtime_error("malformed record name");
     return stoi(queryTokens.at(1));
@@ -118,10 +128,10 @@ Position QueryEndFromName(const string& fullName)
 static
 Position QueryStartFromName(const string& fullName)
 {
-    const vector<string> mainTokens = std::move(Split(fullName, '/'));
+    const auto mainTokens = Split(fullName, '/');
     if (mainTokens.size() != 3)
         throw std::runtime_error("malformed record name");
-    const vector<string> queryTokens = std::move(Split(mainTokens.at(2), '_'));
+    const auto queryTokens = Split(mainTokens.at(2), '_');
     if (queryTokens.size() != 2)
         throw std::runtime_error("malformed record name");
     return stoi(queryTokens.at(0));
@@ -140,54 +150,52 @@ BamRecordImpl* CreateOrEdit(const string& tagName,
 }
 
 static
-int32_t AlignedEndOffset(const Cigar& cigar,
-                         const int seqLength)
+pair<int32_t, int32_t> AlignedOffsets(const BamRecord& record,
+                                      const int seqLength)
 {
+    int32_t startOffset = 0;
     int32_t endOffset = seqLength;
 
-    if (!cigar.empty()) {
-        Cigar::const_reverse_iterator cigarIter = cigar.crbegin();
-        Cigar::const_reverse_iterator cigarEnd  = cigar.crend();
-        for (; cigarIter != cigarEnd; ++cigarIter) {
-            const CigarOperation& op = (*cigarIter);
-            if (op.Type() == CigarOperationType::HARD_CLIP) {
-                if (endOffset != 0 && endOffset != seqLength)
-                    return -1;
+    PBBAM_SHARED_PTR<bam1_t> b = internal::BamRecordMemory::GetRawData(record);
+    uint32_t* cigarData = bam_get_cigar(b.get());
+    const size_t numCigarOps = b->core.n_cigar;
+    if (numCigarOps > 0) {
+
+        // start offset
+        for (size_t i = 0; i < numCigarOps; ++i) {
+            const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+            if (type == CigarOperationType::HARD_CLIP) {
+                if (startOffset != 0 && startOffset != seqLength) {
+                    startOffset = -1;
+                    break;
+                }
             }
-            else if (op.Type() == CigarOperationType::SOFT_CLIP)
-                endOffset -= op.Length();
+            else if (type == CigarOperationType::SOFT_CLIP)
+                startOffset += bam_cigar_oplen(cigarData[i]);
             else
                 break;
         }
-    }
 
-    if (endOffset == 0)
-        endOffset = seqLength;
-    return endOffset;
-}
-
-static
-int32_t AlignedStartOffset(const Cigar& cigar,
-                           const int seqLength)
-{
-    int32_t startOffset = 0;
-
-    if (!cigar.empty()) {
-        Cigar::const_iterator cigarIter = cigar.cbegin();
-        Cigar::const_iterator cigarEnd  = cigar.cend();
-        for (; cigarIter != cigarEnd; ++cigarIter) {
-            const CigarOperation& op = (*cigarIter);
-            if (op.Type() == CigarOperationType::HARD_CLIP) {
-                if (startOffset != 0 && startOffset != seqLength)
-                    return -1;
+        // end offset
+        for (int i = numCigarOps-1; i >= 0; --i) {
+            const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+            if (type == CigarOperationType::HARD_CLIP) {
+                if (endOffset != 0 && endOffset != seqLength) {
+                    endOffset = -1;
+                    break;
+                }
             }
-            else if (op.Type() == CigarOperationType::SOFT_CLIP)
-                startOffset += op.Length();
+            else if (type == CigarOperationType::SOFT_CLIP)
+                endOffset -= bam_cigar_oplen(cigarData[i]);
             else
                 break;
+
         }
+
+        if (endOffset == 0)
+            endOffset = seqLength;
     }
-    return startOffset;
+    return std::make_pair(startOffset, endOffset);
 }
 
 template<typename T>
@@ -208,16 +216,16 @@ void MaybeClipAndGapifyBases(const BamRecordImpl& impl,
     if (impl.IsMapped() && (aligned || exciseSoftClips)) {
 
         size_t seqIndex = 0;
-        const Cigar& cigar = impl.CigarData();
-        Cigar::const_iterator cigarIter = cigar.cbegin();
-        Cigar::const_iterator cigarEnd  = cigar.cend();
+        const auto cigar = impl.CigarData();
+        auto cigarIter = cigar.cbegin();
+        auto cigarEnd  = cigar.cend();
         for (; cigarIter != cigarEnd; ++cigarIter) {
-            const CigarOperation& op = (*cigarIter);
-            const CigarOperationType& type = op.Type();
+            const auto op = (*cigarIter);
+            const auto type = op.Type();
 
             // do nothing for hard clips
             if (type != CigarOperationType::HARD_CLIP) {
-                const size_t opLength = op.Length();
+                const auto opLength = op.Length();
 
                 // maybe remove soft clips
                 if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
@@ -254,18 +262,18 @@ void MaybeClipAndGapifyFrames(const BamRecordImpl& impl,
 {
     if (impl.IsMapped() && (aligned || exciseSoftClips)) {
 
-        vector<uint16_t> data = std::move(frames.Data()); // we're going to put it back
+        auto data = std::move(frames.Data()); // we're going to put it back
         size_t frameIndex = 0;
-        const Cigar& cigar = impl.CigarData();
-        Cigar::const_iterator cigarIter = cigar.cbegin();
-        Cigar::const_iterator cigarEnd  = cigar.cend();
+        const auto cigar = impl.CigarData();
+        auto cigarIter = cigar.cbegin();
+        auto cigarEnd  = cigar.cend();
         for (; cigarIter != cigarEnd; ++cigarIter) {
-            const CigarOperation& op = (*cigarIter);
-            const CigarOperationType& type = op.Type();
+            const auto op = (*cigarIter);
+            const auto type = op.Type();
 
             // do nothing for hard clips
             if (type != CigarOperationType::HARD_CLIP) {
-                const size_t opLength = op.Length();
+                const auto opLength = op.Length();
 
                 // maybe remove soft clips
                 if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
@@ -300,17 +308,16 @@ void MaybeClipAndGapifyQualities(const BamRecordImpl& impl,
     if (impl.IsMapped() && (aligned || exciseSoftClips)) {
 
         size_t qualIndex = 0;
-        const Cigar& cigar = impl.CigarData();
-        Cigar::const_iterator cigarIter = cigar.cbegin();
-        Cigar::const_iterator cigarEnd  = cigar.cend();
+        const auto cigar = impl.CigarData();
+        auto cigarIter = cigar.cbegin();
+        auto cigarEnd  = cigar.cend();
         for (; cigarIter != cigarEnd; ++cigarIter) {
-
-            const CigarOperation& op = (*cigarIter);
-            const CigarOperationType& type = op.Type();
+            const auto op = (*cigarIter);
+            const auto type = op.Type();
 
             // do nothing for hard clips
             if (type != CigarOperationType::HARD_CLIP) {
-                const size_t opLength = op.Length();
+                const auto opLength = op.Length();
 
                 // maybe remove soft clips
                 if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
@@ -401,6 +408,13 @@ RecordType NameToType(const string& name)
     return RecordType::UNKNOWN;
 }
 
+static inline
+bool IsClippingOp(const CigarOperation& op)
+{
+    const auto opType = op.Type();
+    return opType == CigarOperationType::SOFT_CLIP ||
+           opType == CigarOperationType::HARD_CLIP;
+}
 
 } // namespace internal
 } // namespace BAM
@@ -507,22 +521,49 @@ BamRecord& BamRecord::AltLabelTag(const std::string& tags)
     return *this;
 }
 
-std::pair<int,int> BamRecord::Barcodes(void) const
+uint16_t BamRecord::BarcodeForward(void) const
+{ return Barcodes().first; }
+
+uint16_t BamRecord::BarcodeReverse(void) const
+{ return Barcodes().second; }
+
+uint8_t BamRecord::BarcodeQuality(void) const
+{
+    const auto bq = impl_.TagValue(internal::tagName_barcode_quality);
+    if (bq.IsNull())
+        return 0; // ?? "missing" value for tags ?? should we consider boost::optional<T> for these kind of guys ??
+    return bq.ToUInt8();
+}
+
+BamRecord& BamRecord::BarcodeQuality(const uint8_t quality)
+{
+    internal::CreateOrEdit(internal::tagName_barcode_quality, quality, &impl_);
+    return *this;
+}
+
+std::pair<uint16_t,uint16_t> BamRecord::Barcodes(void) const
 {
     const Tag& bc = impl_.TagValue(internal::tagName_barcodes);
     if (bc.IsNull())
-        return std::make_pair(-1, -1);
+        throw std::runtime_error("barcode tag (bc) was requested but is missing");
 
     if (!bc.IsUInt16Array())
-        throw std::runtime_error("Barcode tag bc is not of type uint16_t array.");
+        throw std::runtime_error("barcode tag (bc) is malformed: should be a uint16_t array of size==2.");
 
     const auto bcArray = bc.ToUInt16Array();
     if (bcArray.size() != 2)
-        throw std::runtime_error("Barcode array is not of size 2");
+        throw std::runtime_error("barcode tag (bc) is malformed: should be a uint16_t array of size==2.");
 
     return std::make_pair(bcArray[0], bcArray[1]);
 }
 
+BamRecord& BamRecord::Barcodes(const std::pair<uint16_t,uint16_t>& barcodeIds)
+{
+    const auto data = std::vector<uint16_t>{ barcodeIds.first, barcodeIds.second };
+    internal::CreateOrEdit(internal::tagName_barcodes, data, &impl_);
+    return *this;
+}
+
 void BamRecord::CalculateAlignedPositions(void) const
 {
     // reset
@@ -531,16 +572,20 @@ void BamRecord::CalculateAlignedPositions(void) const
     // skip if unmapped, or has no queryStart/End
     if (!impl_.IsMapped())
         return;
-    const Position qStart = QueryStart();
-    const Position qEnd   = QueryEnd();
+
+    // get the query start/end
+    const size_t seqLength = impl_.SequenceLength();
+    const RecordType type  = Type();
+    const Position qStart  = (type == RecordType::CCS) ? Position(0) : QueryStart();
+    const Position qEnd    = (type == RecordType::CCS) ? Position(seqLength) : QueryEnd();
+    
     if (qStart == PacBio::BAM::UnmappedPosition || qEnd == PacBio::BAM::UnmappedPosition)
         return;
 
     // determine clipped end ranges
-    const Cigar& cigar     = impl_.CigarData();
-    const size_t seqLength = impl_.Sequence().size();
-    const int32_t startOffset = internal::AlignedStartOffset(cigar, seqLength);
-    const int32_t endOffset   = internal::AlignedEndOffset(cigar, seqLength);
+    const std::pair<int32_t, int32_t> alignedOffsets = internal::AlignedOffsets(*this, seqLength);
+    const int32_t startOffset = alignedOffsets.first;
+    const int32_t endOffset = alignedOffsets.second;
     if (endOffset == -1 || startOffset == -1)
         return; // TODO: handle error more??
 
@@ -555,8 +600,17 @@ void BamRecord::CalculateAlignedPositions(void) const
     }
 }
 
-Cigar BamRecord::CigarData(void) const
-{ return impl_.CigarData(); }
+Cigar BamRecord::CigarData(bool exciseAllClips) const
+{
+    auto cigar = impl_.CigarData();
+    if (exciseAllClips) {
+        cigar.erase(std::remove_if(cigar.begin(),
+                                   cigar.end(),
+                                   internal::IsClippingOp),
+                    cigar.end());
+    }
+    return cigar;
+}
 
 BamRecord& BamRecord::Clip(const ClipType clipType,
                            const Position start,
@@ -740,9 +794,16 @@ BamRecord& BamRecord::Clip(const ClipType clipType,
     string pulseCall = std::move(PulseCall(Orientation::GENOMIC));
     std::vector<float> pkmean = std::move(Pkmean(Orientation::GENOMIC));
     std::vector<float> pkmid = std::move(Pkmid(Orientation::GENOMIC));
+    std::vector<float> pkmean2 = std::move(Pkmean2(Orientation::GENOMIC));
+    std::vector<float> pkmid2 = std::move(Pkmid2(Orientation::GENOMIC));
     Frames prePulseFrames = std::move(PrePulseFrames(Orientation::GENOMIC).Data());
     Frames pulseCallWidth = std::move(PulseCallWidth(Orientation::GENOMIC).Data());
 
+    // TODO: clean this up
+    std::vector<uint32_t> startFrame;
+    if (HasStartFrame())
+        startFrame = std::move(StartFrame(Orientation::GENOMIC));
+
     // restore native orientation
     if (!isForwardStrand) {
         internal::Reverse(altLabelQV);
@@ -760,8 +821,14 @@ BamRecord& BamRecord::Clip(const ClipType clipType,
         internal::ReverseComplementCaseSens(pulseCall);
         internal::Reverse(pkmean);
         internal::Reverse(pkmid);
+        internal::Reverse(pkmean2);
+        internal::Reverse(pkmid2);
         internal::Reverse(prePulseFrames);
         internal::Reverse(pulseCallWidth);
+
+        if (HasStartFrame())
+            internal::Reverse(startFrame);
+
     }
 
     // update BAM tags
@@ -781,8 +848,13 @@ BamRecord& BamRecord::Clip(const ClipType clipType,
     tags[internal::tagName_pulse_call]          = pulseCall;
     tags[internal::tagName_pkmean]              = EncodePhotons(pkmean);
     tags[internal::tagName_pkmid]               = EncodePhotons(pkmid);
+    tags[internal::tagName_pkmean2]             = EncodePhotons(pkmean2);
+    tags[internal::tagName_pkmid2]              = EncodePhotons(pkmid2);
     tags[internal::tagName_pre_pulse_frames]    = prePulseFrames.Data();
     tags[internal::tagName_pulse_call_width]    = pulseCallWidth.Data();
+    if (HasStartFrame())
+        tags[internal::tagName_startFrame] = startFrame;
+
     impl_.Tags(tags);
 
     // update query start/end
@@ -1083,6 +1155,9 @@ bool BamRecord::HasAltLabelTag(void) const
 bool BamRecord::HasBarcodes(void) const
 { return impl_.HasTag(internal::tagName_barcodes); }
 
+bool BamRecord::HasBarcodeQuality(void) const
+{ return impl_.HasTag(internal::tagName_barcode_quality); }
+
 bool BamRecord::HasLabelQV(void) const
 { return impl_.HasTag(internal::tagName_labelQV); }
 
@@ -1100,6 +1175,9 @@ bool BamRecord::HasHoleNumber(void) const
 bool BamRecord::HasInsertionQV(void) const
 { return impl_.HasTag(internal::tagName_insertionQV); }
 
+bool BamRecord::HasNumPasses(void) const
+{ return impl_.HasTag(internal::tagName_numPasses); }
+
 bool BamRecord::HasPreBaseFrames(void) const
 { return HasIPD(); }
 
@@ -1121,6 +1199,12 @@ bool BamRecord::HasPkmean(void) const
 bool BamRecord::HasPkmid(void) const
 { return impl_.HasTag(internal::tagName_pkmid); }
 
+bool BamRecord::HasPkmean2(void) const
+{ return impl_.HasTag(internal::tagName_pkmean2); }
+
+bool BamRecord::HasPkmid2(void) const
+{ return impl_.HasTag(internal::tagName_pkmid2); }
+
 bool BamRecord::HasPrePulseFrames(void) const
 { return impl_.HasTag(internal::tagName_pre_pulse_frames); }
 
@@ -1146,11 +1230,19 @@ bool BamRecord::HasReadAccuracy(void) const
           && !impl_.TagValue(internal::tagName_readAccuracy).IsNull();
 }
 
-bool BamRecord::HasScrapType(void) const
-{ return impl_.HasTag(internal::tagName_scrap_type)
-          && !impl_.TagValue(internal::tagName_scrap_type).IsNull();
+bool BamRecord::HasScrapRegionType(void) const
+{ return impl_.HasTag(internal::tagName_scrap_region_type)
+          && !impl_.TagValue(internal::tagName_scrap_region_type).IsNull();
+}
+
+bool BamRecord::HasScrapZmwType(void) const
+{ return impl_.HasTag(internal::tagName_scrap_zmw_type)
+          && !impl_.TagValue(internal::tagName_scrap_zmw_type).IsNull();
 }
 
+bool BamRecord::HasStartFrame(void) const
+{ return impl_.HasTag(internal::tagName_startFrame); }
+
 bool BamRecord::HasSignalToNoise(void) const
 { return impl_.HasTag(internal::tagName_snr); }
 
@@ -1223,6 +1315,51 @@ BamRecord& BamRecord::IPD(const Frames& frames,
     return *this;
 }
 
+size_t BamRecord::NumDeletedBases(void) const
+{
+    auto tEnd = ReferenceEnd();
+    auto tStart = ReferenceStart();
+    auto numMatchesAndMismatches = NumMatchesAndMismatches();
+    auto nM = numMatchesAndMismatches.first;
+    auto nMM = numMatchesAndMismatches.second;
+    return (tEnd - tStart - nM - nMM);
+}
+
+size_t BamRecord::NumInsertedBases(void) const
+{
+    auto aEnd = AlignedEnd();
+    auto aStart = AlignedStart();
+    auto numMatchesAndMismatches = NumMatchesAndMismatches();
+    auto nM = numMatchesAndMismatches.first;
+    auto nMM = numMatchesAndMismatches.second;
+    return (aEnd - aStart - nM - nMM);
+}
+
+size_t BamRecord::NumMatches(void) const
+{
+    return NumMatchesAndMismatches().first;
+}
+
+pair<size_t, size_t> BamRecord::NumMatchesAndMismatches(void) const
+{
+    pair<size_t, size_t> result = make_pair(0,0);
+    PBBAM_SHARED_PTR<bam1_t> b = internal::BamRecordMemory::GetRawData(this);
+    uint32_t* cigarData = bam_get_cigar(b.get());
+    for (uint32_t i = 0; i < b->core.n_cigar; ++i) {
+        const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+        if (type == CigarOperationType::SEQUENCE_MATCH)
+            result.first += bam_cigar_oplen(cigarData[i]);
+        else if (type == CigarOperationType::SEQUENCE_MISMATCH)
+            result.second += bam_cigar_oplen(cigarData[i]);
+    }
+    return result;
+}
+
+size_t BamRecord::NumMismatches(void) const
+{
+    return NumMatchesAndMismatches().second;
+}
+
 Frames BamRecord::PreBaseFrames(Orientation orientation, 
                                 bool aligned,
                                 bool exciseSoftClips) const
@@ -1438,6 +1575,40 @@ BamRecord& BamRecord::Pkmid(const std::vector<uint16_t>& encodedPhotons)
     return *this;
 }
 
+std::vector<float> BamRecord::Pkmean2(Orientation orientation) const
+{
+    return FetchPhotons(internal::tagName_pkmean2, orientation);
+}
+
+BamRecord& BamRecord::Pkmean2(const std::vector<float>& photons)
+{
+    Pkmean2(EncodePhotons(photons));
+    return *this;
+}
+
+BamRecord& BamRecord::Pkmean2(const std::vector<uint16_t>& encodedPhotons)
+{
+    internal::CreateOrEdit(internal::tagName_pkmean2, encodedPhotons, &impl_);
+    return *this;
+}
+
+std::vector<float> BamRecord::Pkmid2(Orientation orientation) const
+{
+    return FetchPhotons(internal::tagName_pkmid2, orientation);
+}
+
+BamRecord& BamRecord::Pkmid2(const std::vector<float>& photons)
+{
+    Pkmid2(EncodePhotons(photons));
+    return *this;
+}
+
+BamRecord& BamRecord::Pkmid2(const std::vector<uint16_t>& encodedPhotons)
+{
+    internal::CreateOrEdit(internal::tagName_pkmid2, encodedPhotons, &impl_);
+    return *this;
+}
+
 Frames BamRecord::PrePulseFrames(Orientation orientation) const
 {
     return FetchFrames(internal::tagName_pre_pulse_frames, orientation);
@@ -1583,13 +1754,13 @@ BamRecord& BamRecord::QueryStart(const Position pos)
 Accuracy BamRecord::ReadAccuracy(void) const
 {
     const Tag& readAccuracy = impl_.TagValue(internal::tagName_readAccuracy);
-    return Accuracy(readAccuracy.ToInt32());
+    return Accuracy(readAccuracy.ToFloat());
 }
 
 BamRecord& BamRecord::ReadAccuracy(const Accuracy& accuracy)
 {
     internal::CreateOrEdit(internal::tagName_readAccuracy,
-                           static_cast<int32_t>(accuracy),
+                           static_cast<float>(accuracy),
                            &impl_);
     return *this;
 }
@@ -1621,6 +1792,10 @@ BamRecord& BamRecord::ReadGroupId(const std::string& id)
    return *this;
 }
 
+int32_t BamRecord::ReadGroupNumericId(void) const
+{ return ReadGroupInfo::IdToInt(ReadGroupId()); }
+
+
 Position BamRecord::ReferenceEnd(void) const
 {
     if (!impl_.IsMapped())
@@ -1657,22 +1832,41 @@ void BamRecord::ResetCachedPositions(void)
     alignedStart_ = PacBio::BAM::UnmappedPosition;
 }
 
-VirtualRegionType BamRecord::ScrapType(void) const
+VirtualRegionType BamRecord::ScrapRegionType(void) const
+{
+    const Tag& srTag = impl_.TagValue(internal::tagName_scrap_region_type);
+    return VirtualRegionTypeMap::ParseChar[srTag.ToUInt8()];
+}
+
+BamRecord& BamRecord::ScrapRegionType(const VirtualRegionType type)
 {
-    const Tag& scTag = impl_.TagValue(internal::tagName_scrap_type);
-    return VirtualRegionTypeMap::ParseChar[scTag.ToUInt8()];
+    internal::CreateOrEdit(internal::tagName_scrap_region_type,
+                           static_cast<uint8_t>(type), &impl_);
+    return *this;
 }
 
-BamRecord& BamRecord::ScrapType(const VirtualRegionType type)
+BamRecord& BamRecord::ScrapRegionType(const char type)
 {
-    internal::CreateOrEdit(internal::tagName_scrap_type,
+    internal::CreateOrEdit(internal::tagName_scrap_region_type, type, &impl_);
+    return *this;
+}
+
+ZmwType BamRecord::ScrapZmwType(void) const
+{
+    const Tag& szTag = impl_.TagValue(internal::tagName_scrap_zmw_type);
+    return ZmwTypeMap::ParseChar[szTag.ToUInt8()];
+}
+
+BamRecord& BamRecord::ScrapZmwType(const ZmwType type)
+{
+    internal::CreateOrEdit(internal::tagName_scrap_zmw_type,
                            static_cast<uint8_t>(type), &impl_);
     return *this;
 }
 
-BamRecord& BamRecord::ScrapType(const char type)
+BamRecord& BamRecord::ScrapZmwType(const char type)
 {
-    internal::CreateOrEdit(internal::tagName_scrap_type, type, &impl_);
+    internal::CreateOrEdit(internal::tagName_scrap_zmw_type, type, &impl_);
     return *this;
 }
 
@@ -1698,6 +1892,18 @@ BamRecord& BamRecord::SignalToNoise(const vector<float>& snr)
     return *this;
 }
 
+std::vector<uint32_t> BamRecord::StartFrame(Orientation orientation) const
+{
+    const Tag& sfTag = impl_.TagValue(internal::tagName_startFrame);
+    return sfTag.ToUInt32Array();
+}
+
+BamRecord& BamRecord::StartFrame(const std::vector<uint32_t>& startFrame)
+{
+    internal::CreateOrEdit(internal::tagName_startFrame, startFrame, &impl_);
+    return *this;
+}
+
 QualityValues BamRecord::SubstitutionQV(Orientation orientation,
                                         bool aligned,
                                         bool exciseSoftClips) const
diff --git a/src/BamRecordImpl.cpp b/src/BamRecordImpl.cpp
index 46632b3..abe7bf0 100644
--- a/src/BamRecordImpl.cpp
+++ b/src/BamRecordImpl.cpp
@@ -56,10 +56,12 @@ BamRecordImpl::BamRecordImpl(void)
 
 BamRecordImpl::BamRecordImpl(const BamRecordImpl& other)
     : d_(bam_dup1(other.d_.get()), internal::HtslibRecordDeleter())
+    , tagOffsets_(other.tagOffsets_)
 { }
 
 BamRecordImpl::BamRecordImpl(BamRecordImpl&& other)
     : d_(nullptr)
+    , tagOffsets_(std::move(other.tagOffsets_))
 {
     d_.swap(other.d_);
     other.d_.reset();
@@ -71,6 +73,7 @@ BamRecordImpl& BamRecordImpl::operator=(const BamRecordImpl& other)
         if (d_ == nullptr)
             InitializeData();
         bam_copy1(d_.get(), other.d_.get());
+        tagOffsets_ = other.tagOffsets_;
     }
     return *this;
 }
@@ -80,27 +83,45 @@ BamRecordImpl& BamRecordImpl::operator=(BamRecordImpl&& other)
     if (this != & other) {
         d_.swap(other.d_);
         other.d_.reset();
+
+        tagOffsets_ = std::move(other.tagOffsets_);
     }
     return *this;
 }
 
 BamRecordImpl::~BamRecordImpl(void) { }
 
-bool BamRecordImpl::AddTag(const string& tagName, const Tag &value)
+bool BamRecordImpl::AddTag(const string& tagName,
+                           const Tag &value)
+{
+    return AddTag(tagName, value, TagModifier::NONE);
+}
+
+bool BamRecordImpl::AddTag(const string& tagName,
+                           const Tag& value,
+                           const TagModifier additionalModifier)
 {
     if (tagName.size() != 2 || HasTag(tagName))
         return false;
+    const bool added = AddTagImpl(tagName, value, additionalModifier);
+    if (added)
+        UpdateTagMap();
+    return added;
+}
 
-    const vector<uint8_t> rawData = std::move(BamTagCodec::ToRawData(value));
+bool BamRecordImpl::AddTagImpl(const string& tagName,
+                               const Tag& value,
+                               const TagModifier additionalModifier)
+{
+    const vector<uint8_t> rawData = std::move(BamTagCodec::ToRawData(value, additionalModifier));
     if (rawData.empty())
         return false;
 
     bam_aux_append(d_.get(),
                    tagName.c_str(),
-                   BamTagCodec::TagTypeCode(value),
+                   BamTagCodec::TagTypeCode(value, additionalModifier),
                    rawData.size(),
                    const_cast<uint8_t*>(rawData.data()));
-
     return true;
 }
 
@@ -111,7 +132,7 @@ Cigar BamRecordImpl::CigarData(void) const
     uint32_t* cigarData = bam_get_cigar(d_);
     for (uint32_t i = 0; i < d_->core.n_cigar; ++i) {
         const uint32_t length = bam_cigar_oplen(cigarData[i]);
-        const char type = bam_cigar_opchr(cigarData[i]);
+        const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
         result.push_back(CigarOperation(type, length));
     }
 
@@ -151,9 +172,26 @@ BamRecordImpl& BamRecordImpl::CigarData(const std::string& cigarString)
     return CigarData(Cigar::FromStdString(cigarString));
 }
 
-bool BamRecordImpl::EditTag(const string& tagName, const Tag &newValue)
+bool BamRecordImpl::EditTag(const string& tagName,
+                            const Tag& newValue)
 {
-    return RemoveTag(tagName) && AddTag(tagName, newValue);
+    return EditTag(tagName, newValue, TagModifier::NONE);
+}
+
+bool BamRecordImpl::EditTag(const string& tagName,
+                            const Tag& newValue,
+                            const TagModifier additionalModifier)
+{
+    // try remove old value (with delayed tag map update)
+    const bool removed = RemoveTagImpl(tagName);
+    if (!removed)
+        return false;
+
+    // if old value removed, add new value
+    const bool added = AddTagImpl(tagName, newValue, additionalModifier);
+    if (added)
+        UpdateTagMap();
+    return added;
 }
 
 BamRecordImpl BamRecordImpl::FromRawData(const PBBAM_SHARED_PTR<bam1_t>& rawData)
@@ -167,7 +205,10 @@ bool BamRecordImpl::HasTag(const string& tagName) const
 {
     if (tagName.size() != 2)
         return false;
-    return bam_aux_get(d_.get(), tagName.c_str()) != 0;
+    return TagOffset(tagName) != -1;
+
+    // 27635
+//    return bam_aux_get(d_.get(), tagName.c_str()) != 0;
 }
 
 void BamRecordImpl::InitializeData(void)
@@ -242,22 +283,25 @@ QualityValues BamRecordImpl::Qualities(void) const
     for (size_t i = 0; i < numQuals; ++i)
         result.push_back(QualityValue(qualData[i]));
     return result;
-
-//    string result;
-//    result.reserve(d_->core.l_qseq);
-//    for (int i = 0; i < d_->core.l_qseq; ++i)
-//        result.push_back(qualData[i] + 33);
-//    return result;
 }
 
 bool BamRecordImpl::RemoveTag(const string& tagName)
 {
+    const bool removed = RemoveTagImpl(tagName);
+    if (removed)
+        UpdateTagMap();
+    return removed;
+}
+
+bool BamRecordImpl::RemoveTagImpl(const string &tagName)
+{
     if (tagName.size() != 2)
         return false;
     uint8_t* data = bam_aux_get(d_.get(), tagName.c_str());
     if (data == 0)
         return false;
-    return bam_aux_del(d_.get(), data) == 0;
+    const bool ok = bam_aux_del(d_.get(), data) == 0;
+    return ok;
 }
 
 string BamRecordImpl::Sequence(void) const
@@ -271,6 +315,9 @@ string BamRecordImpl::Sequence(void) const
     return result;
 }
 
+size_t BamRecordImpl::SequenceLength(void) const
+{ return d_->core.l_qseq; }
+
 BamRecordImpl& BamRecordImpl::SetSequenceAndQualities(const std::string& sequence,
                                                       const std::string& qualities)
 {
@@ -335,51 +382,11 @@ BamRecordImpl& BamRecordImpl::SetSequenceAndQualitiesInternal(const char* sequen
         memset(pEncodedSequence, 0, encodedSequenceLength);
         for (size_t i = 0; i < sequenceLength; ++i)
             pEncodedSequence[i>>1] |= seq_nt16_table[(int)sequence[i]] << ((~i&1)<<2);
-
-
-
-//        const char* pRawSequence = sequence;
-//        uint8_t nucleotideCode;
-//        bool useHighWord = true;
-//        for (size_t i = 0; i < sequenceLength; ++i) {
-//            switch (*pRawSequence) {
-//                case '=' : nucleotideCode = 0;  break;
-//                case 'A' : nucleotideCode = 1;  break;
-//                case 'C' : nucleotideCode = 2;  break;
-//                case 'M' : nucleotideCode = 3;  break;
-//                case 'G' : nucleotideCode = 4;  break;
-//                case 'R' : nucleotideCode = 5;  break;
-//                case 'S' : nucleotideCode = 6;  break;
-//                case 'V' : nucleotideCode = 7;  break;
-//                case 'T' : nucleotideCode = 8;  break;
-//                case 'W' : nucleotideCode = 9;  break;
-//                case 'Y' : nucleotideCode = 10; break;
-//                case 'H' : nucleotideCode = 11; break;
-//                case 'K' : nucleotideCode = 12; break;
-//                case 'D' : nucleotideCode = 13; break;
-//                case 'B' : nucleotideCode = 14; break;
-//                case 'N' : nucleotideCode = 15; break;
-//                default :
-//                    PB_ASSERT_UNREACHABLE; // graceful way to handle?
-//                    break;
-//            }
-
-//            // pack the nucleotide code
-//            if (useHighWord) {
-//                *pEncodedSequence = nucleotideCode << 4;
-//                useHighWord = false;
-//            } else {
-//                *pEncodedSequence |= nucleotideCode;
-//                ++pEncodedSequence;
-//                useHighWord = true;
-//            }
-//            ++pRawSequence;
-//        }
     }
 
     // fill in quality values
     uint8_t* encodedQualities = bam_get_qual(d_);
-    if ( (qualities == 0 ) || (::strlen(qualities) == 0) )
+    if ( (qualities == 0 ) || (strlen(qualities) == 0) )
         memset(encodedQualities, 0xff, sequenceLength);
     else {
         for (size_t i = 0; i < sequenceLength; ++i)
@@ -388,6 +395,19 @@ BamRecordImpl& BamRecordImpl::SetSequenceAndQualitiesInternal(const char* sequen
     return *this;
 }
 
+int BamRecordImpl::TagOffset(const string& tagName) const
+{
+    if (tagName.size() != 2)
+        throw std::runtime_error("invalid tag name size");
+
+    if (tagOffsets_.empty())
+        UpdateTagMap();
+
+    const uint16_t tagCode = (static_cast<uint8_t>(tagName.at(0)) << 8) | static_cast<uint8_t>(tagName.at(1));
+    const auto found = tagOffsets_.find(tagCode);
+    return (found != tagOffsets_.cend() ? found->second : -1);
+}
+
 BamRecordImpl& BamRecordImpl::Tags(const TagCollection& tags)
 {
     // convert tags to binary
@@ -405,6 +425,9 @@ BamRecordImpl& BamRecordImpl::Tags(const TagCollection& tags)
 
     // fill in new tag data
     memcpy((void*)tagStart, data, numBytes);
+
+    // update tag info
+    UpdateTagMap();
     return *this;
 }
 
@@ -419,8 +442,107 @@ Tag BamRecordImpl::TagValue(const string& tagName) const
 {
     if (tagName.size() != 2)
         return Tag();
-    uint8_t* data = bam_aux_get(d_.get(), tagName.c_str());
-    if (data == 0)
+
+    const int offset = TagOffset(tagName);
+    if (offset == -1)
         return Tag();
-    return BamTagCodec::FromRawData(data);
+
+    bam1_t* b = d_.get();
+    assert(bam_get_aux(b));
+    uint8_t* tagData = bam_get_aux(b) + offset;
+    if (offset >= b->l_data)
+        return Tag();
+
+    // skip tag name
+    return BamTagCodec::FromRawData(tagData);
+}
+
+void BamRecordImpl::UpdateTagMap(void) const
+{
+    // clear out offsets, leave map structure basically intact
+    auto tagIter = tagOffsets_.begin();
+    auto tagEnd  = tagOffsets_.end();
+    for ( ; tagIter != tagEnd; ++tagIter )
+        tagIter->second = -1;
+
+    const uint8_t* tagStart = bam_get_aux(d_);
+    if (tagStart == 0)
+        return;
+    const ptrdiff_t numBytes = d_->l_data - (tagStart - d_->data);
+
+    // NOTE: using a 16-bit 'code' for tag name here instead of string, to avoid
+    // a lot of string constructions & comparisons. All valid tags will be 2 chars
+    // anyway, so this should be a nice lookup mechanism.
+    //
+    uint16_t tagNameCode;
+    int64_t i = 0;
+    while(i < numBytes) {
+
+        // store (tag name code -> start offset into tag data)
+        tagNameCode = static_cast<char>(tagStart[i]) << 8 | static_cast<char>(tagStart[i+1]);
+        i += 2;
+        tagOffsets_[tagNameCode] = i;
+
+        // skip tag contents
+        const char tagType = static_cast<char>(tagStart[i++]);
+        switch (tagType) {
+            case 'A' :
+            case 'a' :
+            case 'c' :
+            case 'C' :
+            {
+                i += 1;
+                break;
+            }
+            case 's' :
+            case 'S' :
+            {
+                i += 2;
+                break;
+            }
+            case 'i' :
+            case 'I' :
+            case 'f' :
+            {
+                i += 4;
+                break;
+            }
+
+            case 'Z' :
+            case 'H' :
+            {
+                // null-terminated string
+                i += strlen((const char*)&tagStart[i]) + 1;
+                break;
+            }
+
+            case 'B' :
+            {
+                const char subTagType = tagStart[i++];
+                size_t elementSize = 0;
+                switch (subTagType) {
+                    case 'c' :
+                    case 'C' : elementSize = 1; break;
+                    case 's' :
+                    case 'S' : elementSize = 2; break;
+                    case 'i' :
+                    case 'I' :
+                    case 'f' : elementSize = 4; break;
+
+                    // unknown subTagType
+                    default:
+                        PB_ASSERT_OR_RETURN(false);
+                }
+
+                uint32_t numElements = 0;
+                memcpy(&numElements, &tagStart[i], sizeof(uint32_t));
+                i += (4 + (elementSize * numElements));
+                break;
+            }
+
+            // unknown tagType
+            default:
+                PB_ASSERT_OR_RETURN(false);
+        }
+    }
 }
diff --git a/src/BamTagCodec.cpp b/src/BamTagCodec.cpp
index 522e41d..fca2cbe 100644
--- a/src/BamTagCodec.cpp
+++ b/src/BamTagCodec.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamTagCodec.cpp
+/// \brief Implements the BamTagCodec class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/BamTagCodec.h"
@@ -44,30 +48,17 @@ using namespace PacBio::BAM;
 using namespace std;
 
 template<typename T>
-inline void appendBamValue(const T& value, kstring_t* str /*vector<uint8_t>& result*/)
+inline void appendBamValue(const T& value, kstring_t* str)
 {
     kputsn_((char*)&value, sizeof(value), str);
-//    const size_t initialResultSize = result.size();
-//    result.resize(initialResultSize + sizeof(T));
-//    memcpy((uint8_t*)&result[initialResultSize],
-//           (uint8_t*)&value,
-//           sizeof(T));
 }
 
 template<typename T>
-inline void appendBamMultiValue(const vector<T>& container, kstring_t* str /*vector<uint8_t>& result*/)
+inline void appendBamMultiValue(const vector<T>& container, kstring_t* str)
 {
     const uint32_t n = container.size();
     kputsn_(&n, sizeof(n), str);
     kputsn_((char*)&container[0], n*sizeof(T), str);
-
-//    const size_t initialResultSize = result.size();
-//    const uint32_t numValues = container.size();
-//    result.resize(initialResultSize + 4 + numValues*sizeof(T));
-//    memcpy((uint8_t*)&result[initialResultSize], (uint32_t*)&numValues, sizeof(numValues));
-//    memcpy((uint8_t*)&result[initialResultSize + 4],
-//           (uint8_t*)&container[0],
-//            numValues*sizeof(T));
 }
 
 template<typename T>
@@ -147,7 +138,6 @@ TagCollection BamTagCodec::Decode(const vector<uint8_t>& data)
             {
                 const char subTagType = pData[i++];
                 switch (subTagType) {
-
                     case 'c' : tags[tagName] = readBamMultiValue<int8_t>(pData, i);   break;
                     case 'C' : tags[tagName] = readBamMultiValue<uint8_t>(pData, i);  break;
                     case 's' : tags[tagName] = readBamMultiValue<int16_t>(pData, i);  break;
@@ -176,8 +166,6 @@ vector<uint8_t> BamTagCodec::Encode(const TagCollection& tags)
 {
     kstring_t str = { 0, 0, NULL };
 
-    vector<uint8_t> result;
-
     const auto tagEnd  = tags.cend();
     for (auto tagIter = tags.cbegin(); tagIter != tagEnd; ++tagIter) {
         const string& name = (*tagIter).first;
@@ -305,12 +293,14 @@ vector<uint8_t> BamTagCodec::Encode(const TagCollection& tags)
                 break;
             }
 
+            // unsupported tag type
             default :
                 free(str.s);
                 PB_ASSERT_OR_RETURN_VALUE(false, vector<uint8_t>());
         }
     }
 
+    vector<uint8_t> result;
     result.resize(str.l);
     memcpy((char*)&result[0], str.s, str.l);
     free(str.s);
@@ -344,7 +334,7 @@ Tag BamTagCodec::FromRawData(uint8_t* rawData)
             const size_t dataLength = strlen((const char*)&rawData[0]);
             string value;
             value.resize(dataLength);
-            memcpy( (char*)value.data(), &rawData[0], dataLength );
+            memcpy((char*)value.data(), &rawData[0], dataLength);
             Tag t(value);
             if (tagType == 'H')
                 t.Modifier(TagModifier::HEX_STRING);
@@ -375,34 +365,36 @@ Tag BamTagCodec::FromRawData(uint8_t* rawData)
         default:
             PB_ASSERT_OR_RETURN_VALUE(false, Tag());
     }
+    return Tag(); // to avoid compiler warning
 }
 
-vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag)
+vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag,
+                                       const TagModifier& additionalModifier)
 {
+    // temp raw data destination (for use with htslib methods)
     kstring_t str = { 0, 0, NULL };
 
     // "<TYPE>:<DATA>" for printable, ASCII char
-    if (tag.HasModifier(TagModifier::ASCII_CHAR)) {
-        char c = tag.ToAscii();
-        if (c != '\0') {
+    if (tag.HasModifier(TagModifier::ASCII_CHAR) || additionalModifier == TagModifier::ASCII_CHAR) {
+        const char c = tag.ToAscii();
+        if (c != '\0')
             kputc_(c, &str);
-        }
     }
 
     // for all others
     else {
-        switch ( tag.Type() ) {
+        switch (tag.Type()) {
 
             // single, numeric values
-            case TagDataType::INT8   : appendBamValue(tag.ToInt8(), &str);   break;
-            case TagDataType::UINT8  : appendBamValue(tag.ToUInt8(), &str);  break;
-            case TagDataType::INT16  : appendBamValue(tag.ToInt16(), &str);  break;
+            case TagDataType::INT8   : appendBamValue(tag.ToInt8(),   &str); break;
+            case TagDataType::UINT8  : appendBamValue(tag.ToUInt8(),  &str); break;
+            case TagDataType::INT16  : appendBamValue(tag.ToInt16(),  &str); break;
             case TagDataType::UINT16 : appendBamValue(tag.ToUInt16(), &str); break;
-            case TagDataType::INT32  : appendBamValue(tag.ToInt32(), &str);  break;
+            case TagDataType::INT32  : appendBamValue(tag.ToInt32(),  &str); break;
             case TagDataType::UINT32 : appendBamValue(tag.ToUInt32(), &str); break;
-            case TagDataType::FLOAT  : appendBamValue(tag.ToFloat(), &str);  break;
+            case TagDataType::FLOAT  : appendBamValue(tag.ToFloat(),  &str); break;
 
-            // string (& hex-string) values
+            // string & hex-string values
             case TagDataType::STRING :
             {
                 const string& s = tag.ToString();
@@ -454,12 +446,14 @@ vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag)
                 break;
             }
 
+            // unsupported tag type
             default :
                 free(str.s);
                 PB_ASSERT_OR_RETURN_VALUE(false, vector<uint8_t>());
         }
     }
 
+    // store temp contents in actual destination
     vector<uint8_t> result;
     result.resize(str.l);
     memcpy((char*)&result[0], str.s, str.l);
@@ -467,11 +461,12 @@ vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag)
     return result;
 }
 
-uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
+uint8_t BamTagCodec::TagTypeCode(const Tag& tag,
+                                 const TagModifier& additionalModifier)
 {
-    if ( tag.HasModifier(TagModifier::ASCII_CHAR) ) {
+    if (tag.HasModifier(TagModifier::ASCII_CHAR) || additionalModifier == TagModifier::ASCII_CHAR) {
         int64_t value = 0;
-        switch ( tag.Type() ) {
+        switch (tag.Type()) {
             case TagDataType::INT8   : value = static_cast<int64_t>(tag.ToInt8());   break;
             case TagDataType::UINT8  : value = static_cast<int64_t>(tag.ToUInt8());  break;
             case TagDataType::INT16  : value = static_cast<int64_t>(tag.ToInt16());  break;
@@ -479,7 +474,7 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
             case TagDataType::INT32  : value = static_cast<int64_t>(tag.ToInt32());  break;
             case TagDataType::UINT32 : value = static_cast<int64_t>(tag.ToUInt32()); break;
             default:
-                // non integers not
+                // non integers not allowed
                 PB_ASSERT_OR_RETURN_VALUE(false, 0);
         }
         // printable range
@@ -488,7 +483,7 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
         return static_cast<uint8_t>('A');
     }
 
-    switch ( tag.Type() ) {
+    switch (tag.Type()) {
         case TagDataType::INT8   : return static_cast<uint8_t>('c');
         case TagDataType::UINT8  : return static_cast<uint8_t>('C');
         case TagDataType::INT16  : return static_cast<uint8_t>('s');
@@ -499,8 +494,10 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
 
         case TagDataType::STRING :
         {
-            return tag.HasModifier(TagModifier::HEX_STRING) ? static_cast<uint8_t>('H')
-                                                            : static_cast<uint8_t>('Z');
+            if (tag.HasModifier(TagModifier::HEX_STRING) || additionalModifier == TagModifier::HEX_STRING)
+                return static_cast<uint8_t>('H');
+            else
+                return static_cast<uint8_t>('Z');
         }
 
         case TagDataType::INT8_ARRAY   : // fall through
@@ -514,4 +511,5 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
         default:
             PB_ASSERT_OR_RETURN_VALUE(false, 0);
     }
+    return 0; // to avoid compiler warning
 }
diff --git a/src/BamWriter.cpp b/src/BamWriter.cpp
index 0abd6e6..1c12acb 100644
--- a/src/BamWriter.cpp
+++ b/src/BamWriter.cpp
@@ -42,8 +42,8 @@
 #include <htslib/bgzf.h>
 #include <htslib/hfile.h>
 #include <htslib/hts.h>
-#include <thread>
 #include <iostream>
+#include <thread>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -55,35 +55,33 @@ namespace internal {
 class BamWriterPrivate
 {
 public:
-    BamWriterPrivate(void)
-        : file_(nullptr)
-        , header_(nullptr)
-    { }
+    BamWriterPrivate(const std::string& filename,
+                     const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
+                     const BamWriter::CompressionLevel compressionLevel,
+                     const size_t numThreads,
+                     const BamWriter::BinCalculationMode binCalculationMode);
 
 public:
-    void Open(const std::string& filename,
-              const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
-              const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
-              size_t numThreads = 4);
     void Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord);
     void Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord, int64_t* vOffset);
 
 public:
+    bool calculateBins_;
     std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
     PBBAM_SHARED_PTR<bam_hdr_t> header_;
     std::string filename_;
 };
 
-void BamWriterPrivate::Open(const string& filename,
-                            const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
-                            const BamWriter::CompressionLevel compressionLevel,
-                            size_t numThreads)
+BamWriterPrivate::BamWriterPrivate(const string& filename,
+                                   const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
+                                   const BamWriter::CompressionLevel compressionLevel,
+                                   const size_t numThreads,
+                                   const BamWriter::BinCalculationMode binCalculationMode)
+    : calculateBins_(binCalculationMode == BamWriter::BinCalculation_ON)
+    , file_(nullptr)
+    , header_(rawHeader)
+    , filename_(filename)
 {
-    // store filename
-    filename_ = filename;
-
-    // store header
-    header_ = rawHeader;
     if (!header_)
         throw std::runtime_error("null header");
 
@@ -93,18 +91,22 @@ void BamWriterPrivate::Open(const string& filename,
     if (!file_)
         throw std::runtime_error("could not open file for writing");
 
+//    BGZF* bgzf = file_.get()->fp.bgzf;
+//    bgzf_index_build_init(bgzf);
+
     // if no explicit thread count given, attempt built-in check
-    if (numThreads == 0) {
-        numThreads = thread::hardware_concurrency();
+    size_t actualNumThreads = numThreads;
+    if (actualNumThreads == 0) {
+        actualNumThreads = thread::hardware_concurrency();
 
         // if still unknown, default to single-threaded
-        if (numThreads == 0)
-            numThreads = 1;
+        if (actualNumThreads == 0)
+            actualNumThreads = 1;
     }
 
     // if multithreading requested, enable it
-    if (numThreads > 1)
-        hts_set_threads(file_.get(), numThreads);
+    if (actualNumThreads > 1)
+        hts_set_threads(file_.get(), actualNumThreads);
 
     // write header
     const int ret = sam_hdr_write(file_.get(), header_.get());
@@ -114,6 +116,11 @@ void BamWriterPrivate::Open(const string& filename,
 
 void BamWriterPrivate::Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord)
 {
+    // (probably) store bins
+    if (calculateBins_)
+        rawRecord->core.bin = hts_reg2bin(rawRecord->core.pos, bam_endpos(rawRecord.get()), 14, 5); // min_shift=14 & n_lvls=5 are BAM "magic numbers"
+
+    // write record to file
     const int ret = sam_write1(file_.get(), header_.get(), rawRecord.get());
     if (ret <= 0)
         throw std::runtime_error("could not write record");
@@ -125,10 +132,15 @@ void BamWriterPrivate::Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord, int64_t*
     assert(bgzf);
     assert(vOffset);
 
+    // ensure offsets up-to-date
+    bgzf_flush(bgzf);
+
+    // capture virtual offset where we’re about to write
     const off_t rawTell = htell(bgzf->fp);
     const int length = bgzf->block_offset;
-
     *vOffset = (rawTell << 16) | length ;
+
+    // now write data
     Write(rawRecord);
 }
 
@@ -139,14 +151,16 @@ void BamWriterPrivate::Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord, int64_t*
 BamWriter::BamWriter(const std::string& filename,
                      const BamHeader& header,
                      const BamWriter::CompressionLevel compressionLevel,
-                     const size_t numThreads)
-    : d_(new internal::BamWriterPrivate)
-{
-     d_->Open(filename,
-              internal::BamHeaderMemory::MakeRawHeader(header),
-              compressionLevel,
-              numThreads);
-}
+                     const size_t numThreads,
+                     const BinCalculationMode binCalculationMode)
+    : d_{ new internal::BamWriterPrivate{ filename,
+                                          internal::BamHeaderMemory::MakeRawHeader(header),
+                                          compressionLevel,
+                                          numThreads,
+                                          binCalculationMode
+                                        }
+        }
+{ }
 
 BamWriter::~BamWriter(void)
 {
diff --git a/tests/src/test_TimeUtils.cpp b/src/BarcodeQuery.cpp
similarity index 72%
copy from tests/src/test_TimeUtils.cpp
copy to src/BarcodeQuery.cpp
index 7ab9fa5..be45ddb 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/src/BarcodeQuery.cpp
@@ -32,28 +32,37 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file BarcodeQuery.cpp
+/// \brief Implements the BarcodeQuery class.
+//
 // Author: Derek Barnett
 
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include "pbbam/BarcodeQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-TEST(TimeUtilsTest, ToIso8601)
+struct BarcodeQuery::BarcodeQueryPrivate
 {
-    const time_t rawTime = 436428750L;
-    const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+    BarcodeQueryPrivate(const uint16_t barcode, const DataSet& dataset)
+        : reader_(PbiBarcodeFilter(barcode), dataset)
+    { }
+
+    PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+BarcodeQuery::BarcodeQuery(const uint16_t barcode,
+                           const DataSet& dataset)
+    : internal::IQuery()
+    , d_(new BarcodeQueryPrivate(barcode, dataset))
+{ }
+
+BarcodeQuery::~BarcodeQuery(void) { }
 
-    // can't hardcode expected (since we rely on localtime())
-    const std::string& expected = "1983-10-31T06:12:30Z";
-    const std::string& actual = internal::ToIso8601(timestamp);
-    EXPECT_EQ(expected, actual);
-}
+bool BarcodeQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/include/pbbam/internal/IBamFileIterator.h b/src/ChemistryTable.cpp
similarity index 67%
rename from include/pbbam/internal/IBamFileIterator.h
rename to src/ChemistryTable.cpp
index 5ea34f3..622524d 100644
--- a/include/pbbam/internal/IBamFileIterator.h
+++ b/src/ChemistryTable.cpp
@@ -33,47 +33,33 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 
-// Author: Derek Barnett
+// Author: Lance Hepler
 
-#ifndef IBAMFILEITERATOR_H
-#define IBAMFILEITERATOR_H
-
-#include "pbbam/BamFile.h"
-#include "pbbam/BamRecord.h"
-#include <memory>
-#include <vector>
+#include "ChemistryTable.h"
 
 namespace PacBio {
 namespace BAM {
 namespace internal {
 
-template<typename T>
-class IBamFileIteratorBase
-{
-public:
-    typedef std::shared_ptr< IBamFileIteratorBase<T> > Ptr;
-
-protected:
-    IBamFileIteratorBase(const BamFile& file)
-        : header_(file.Header().DeepCopy())
-    { }
-public:
-    virtual ~IBamFileIteratorBase(void) { }
-
-public:
-    virtual bool GetNext(T& result) =0;
-    virtual bool InSameGroup(const BamRecord& lhs, const BamRecord& rhs) const
-    { (void)lhs; (void)rhs; return true; }
+extern const std::vector<std::array<std::string, 4>> ChemistryTable = {
 
-protected:
-    const BamHeader header_;
+    // binding, sequencing, version, chemistry
+    {{"100356300",   "100356200",   "2.1", "P6-C4"}},
+    {{"100356300",   "100356200",   "2.3", "P6-C4"}},
+    {{"100356300",   "100612400",   "2.1", "P6-C4"}},
+    {{"100356300",   "100612400",   "2.3", "P6-C4"}},
+    {{"100372700",   "100356200",   "2.1", "P6-C4"}},
+    {{"100372700",   "100356200",   "2.3", "P6-C4"}},
+    {{"100372700",   "100612400",   "2.1", "P6-C4"}},
+    {{"100372700",   "100612400",   "2.3", "P6-C4"}},
+    {{"100-619-300", "100-619-400", "3.0", "S/P1-C1"}},
+    {{"100-619-300", "100-711-600", "3.0", "S/P1-C1"}},
+    {{"100-619-300", "100-620-000", "3.0", "S/P1-C1"}},
+    {{"100-619-300", "100-619-400", "3.1", "S/P1-C1"}},
+    {{"100-619-300", "100-711-600", "3.1", "S/P1-C1"}},
+    {{"100-619-300", "100-620-000", "3.1", "S/P1-C1"}}
 };
 
-typedef IBamFileIteratorBase<BamRecord>               IBamFileIterator;
-typedef IBamFileIteratorBase<std::vector<BamRecord> > IBamFileGroupIterator;
-
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
-
-#endif // IBAMFILEITERATOR_H
diff --git a/include/pbbam/Orientation.h b/src/ChemistryTable.h
similarity index 87%
copy from include/pbbam/Orientation.h
copy to src/ChemistryTable.h
index 7582199..6caacaa 100644
--- a/include/pbbam/Orientation.h
+++ b/src/ChemistryTable.h
@@ -33,23 +33,23 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 
-// Author: Derek Barnett
+// Author: Lance Hepler
 
-#ifndef ORIENTATION_H
-#define ORIENTATION_H
+#ifndef CHEMISTRYTABLE_H
+#define CHEMISTRYTABLE_H
 
-#include "pbbam/Config.h"
+#include <array>
+#include <string>
+#include <vector>
 
 namespace PacBio {
 namespace BAM {
+namespace internal {
 
-enum class Orientation
-{
-    NATIVE
-  , GENOMIC
-};
+extern const std::vector<std::array<std::string, 4>> ChemistryTable;
 
+} // namespace internal
 } // namespace BAM
 } // namespace PacBio
 
-#endif // ORIENTATION_H
+#endif // CHEMISTRYTABLE_H
diff --git a/src/Cigar.cpp b/src/Cigar.cpp
index 8c7efa7..f099f54 100644
--- a/src/Cigar.cpp
+++ b/src/Cigar.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Cigar.cpp
+/// \brief Implements the Cigar class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/Cigar.h"
@@ -48,7 +52,7 @@ Cigar::Cigar(const string& cigarString)
     const size_t numChars = cigarString.size();
     for (size_t i = 0; i < numChars; ++i) {
         const char c = cigarString.at(i);
-        if (!::isdigit(c)) {
+        if (!isdigit(c)) {
             const size_t distance = i - numberStart;
             const uint32_t length = stoul(cigarString.substr(numberStart, distance));
             push_back(CigarOperation(c, length));
diff --git a/src/CigarOperation.cpp b/src/CigarOperation.cpp
index 50dc4c2..7289983 100644
--- a/src/CigarOperation.cpp
+++ b/src/CigarOperation.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file CigarOperation.cpp
+/// \brief Implements the CigarOperation class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/CigarOperation.h"
diff --git a/src/Compare.cpp b/src/Compare.cpp
new file mode 100644
index 0000000..43874f2
--- /dev/null
+++ b/src/Compare.cpp
@@ -0,0 +1,141 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.cpp
+/// \brief Implements the Compare class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Compare.h"
+#include <functional>
+#include <unordered_map>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct TypeAlias
+{
+    string name_;
+    string op_;
+    string opAlpha_;
+
+    TypeAlias(const string& name = string(),
+              const string& op = string(),
+              const string& opAlpha = string())
+        : name_(name)
+        , op_(op)
+        , opAlpha_(opAlpha)
+    { }
+};
+
+struct CompareTypeHash
+{
+    size_t operator()(const Compare::Type& t) const
+    { return std::hash<int>()(static_cast<int>(t)); }
+};
+
+static const unordered_map<string, Compare::Type> opToTypeMap =
+{
+    // basic operators plus some permissiveness for other representations
+
+    { "==",    Compare::EQUAL },
+    { "=",     Compare::EQUAL },
+    { "eq",    Compare::EQUAL },
+    { "!=",    Compare::NOT_EQUAL },
+    { "ne",    Compare::NOT_EQUAL },
+    { "<",     Compare::LESS_THAN },
+    { "lt",    Compare::LESS_THAN },
+    { "<",  Compare::LESS_THAN },
+    { "<=",    Compare::LESS_THAN_EQUAL },
+    { "lte",   Compare::LESS_THAN_EQUAL },
+    { "<=", Compare::LESS_THAN_EQUAL },
+    { ">",     Compare::GREATER_THAN },
+    { "gt",    Compare::GREATER_THAN },
+    { ">",  Compare::GREATER_THAN },
+    { ">=",    Compare::GREATER_THAN_EQUAL },
+    { "gte",   Compare::GREATER_THAN_EQUAL },
+    { ">=", Compare::GREATER_THAN_EQUAL },
+    { "&",     Compare::CONTAINS },
+    { "~",     Compare::NOT_CONTAINS }
+};
+
+static const unordered_map<Compare::Type, TypeAlias, CompareTypeHash> typeAliases =
+{
+    { Compare::EQUAL,              TypeAlias{ "Compare::EQUAL",              "==", "eq"  } },
+    { Compare::NOT_EQUAL,          TypeAlias{ "Compare::NOT_EQUAL",          "!=", "ne"  } },
+    { Compare::LESS_THAN,          TypeAlias{ "Compare::LESS_THAN",          "<",  "lt"  } },
+    { Compare::LESS_THAN_EQUAL,    TypeAlias{ "Compare::LESS_THAN_EQUAL",    "<=", "lte" } },
+    { Compare::GREATER_THAN,       TypeAlias{ "Compare::GREATER_THAN",       ">",  "gt"  } },
+    { Compare::GREATER_THAN_EQUAL, TypeAlias{ "Compare::GREATER_THAN_EQUAL", ">=", "gte" } },
+    { Compare::CONTAINS,           TypeAlias{ "Compare::CONTAINS",           "&",  "and" } },
+    { Compare::NOT_CONTAINS,       TypeAlias{ "Compare::NOT_CONTAINS",       "~",  "not" } }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+Compare::Type Compare::TypeFromOperator(const string& opString)
+{
+    try {
+        return internal::opToTypeMap.at(opString);
+    } catch (std::exception&) {
+        throw std::runtime_error(opString + " is not a valid comparison operator." );
+    }
+}
+
+string Compare::TypeToName(const Compare::Type& type)
+{
+    try {
+        return internal::typeAliases.at(type).name_;
+    } catch (std::exception&) {
+        throw std::runtime_error("invalid comparison type encountered" );
+    }
+}
+
+string Compare::TypeToOperator(const Compare::Type& type, bool asAlpha)
+{
+    try {
+        return asAlpha ? internal::typeAliases.at(type).opAlpha_
+                       : internal::typeAliases.at(type).op_;
+    } catch (std::exception&) {
+        throw std::runtime_error("invalid comparison type encountered" );
+    }
+}
diff --git a/src/Config.cpp b/src/Config.cpp
index 677ad08..095aa37 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Config.cpp
+/// \brief Initializes global variable defaults.
+//
 // Author: Derek Barnett
 
 #include "pbbam/Config.h"
@@ -42,7 +46,15 @@ using namespace PacBio::BAM;
 namespace PacBio {
 namespace BAM {
 
-int HtslibVerbosity = 0;
+// Initialized to -1 to indicate default. Client code may set this or not.
+//
+// To respect client code or else fallback to default[OFF], this value should be used like this:
+//
+//    hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);
+//
+//
+//
+int HtslibVerbosity = -1;
 
 } // namespace BAM
 } // namespace PacBio
diff --git a/src/DataSet.cpp b/src/DataSet.cpp
index ee43e9f..a44780b 100644
--- a/src/DataSet.cpp
+++ b/src/DataSet.cpp
@@ -32,14 +32,20 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSet.cpp
+/// \brief Implements the DataSet class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/DataSet.h"
 #include "pbbam/DataSetTypes.h"
 #include "pbbam/internal/DataSetBaseTypes.h"
 #include "DataSetIO.h"
+#include "FileUtils.h"
 #include "TimeUtils.h"
+#include <boost/algorithm/string.hpp>
 #include <unordered_map>
 using namespace PacBio;
 using namespace PacBio::BAM;
@@ -47,13 +53,12 @@ using namespace PacBio::BAM::internal;
 using namespace std;
 
 DataSet::DataSet(void)
-    : d_(new DataSetBase)
-{
-    CreatedAt(internal::ToIso8601(internal::CurrentTime()));
-}
+    : DataSet(DataSet::GENERIC)
+{ }
 
 DataSet::DataSet(const DataSet::TypeEnum type)
     : d_(nullptr)
+    , path_(FileUtils::CurrentWorkingDirectory())
 {
     switch(type) {
         case DataSet::GENERIC             : d_.reset(new DataSetBase); break;
@@ -69,20 +74,39 @@ DataSet::DataSet(const DataSet::TypeEnum type)
             throw std::runtime_error("unsupported dataset type"); // unknown type
     }
 
-    CreatedAt(internal::ToIso8601(internal::CurrentTime()));
+    CreatedAt(internal::ToIso8601(CurrentTime()));
 }
 
 DataSet::DataSet(const BamFile& bamFile)
-    : d_(internal::DataSetIO::FromUri(bamFile.Filename()))
+    : d_(DataSetIO::FromUri(bamFile.Filename()))
+    , path_(FileUtils::CurrentWorkingDirectory())
 {
-    CreatedAt(internal::ToIso8601(internal::CurrentTime()));
+    CreatedAt(internal::ToIso8601(CurrentTime()));
 }
 
 DataSet::DataSet(const string& filename)
-    : d_(internal::DataSetIO::FromUri(filename))
+    : d_(DataSetIO::FromUri(filename))
+    , path_(FileUtils::DirectoryName(filename))
+{
+    // for FOFN contents and raw BAM filenames, we can just use the current
+    // directory as the starting path.
+    //
+    // (any relative paths in the FOFN have already been resolved)
+    //
+    if (boost::algorithm::iends_with(filename, ".fofn") ||
+        boost::algorithm::iends_with(filename, ".bam"))
+    {
+        path_ = FileUtils::CurrentWorkingDirectory();
+    }
+}
+
+DataSet::DataSet(const vector<string>& filenames)
+    : d_(DataSetIO::FromUris(filenames))
+    , path_(FileUtils::CurrentWorkingDirectory())
 { }
 
 DataSet::DataSet(const DataSet& other)
+    : path_(other.path_)
 {
     DataSetBase* otherDataset = other.d_.get();
     DataSetElement* copyDataset = new DataSetElement(*otherDataset);
@@ -91,6 +115,7 @@ DataSet::DataSet(const DataSet& other)
 
 DataSet::DataSet(DataSet&& other)
     : d_(std::move(other.d_))
+    , path_(std::move(other.path_))
 {
     assert(other.d_.get() == nullptr);
 }
@@ -100,12 +125,14 @@ DataSet& DataSet::operator=(const DataSet& other)
     DataSetBase* otherDataset = other.d_.get();
     DataSetElement* copyDataset = new DataSetElement(*otherDataset);
     d_.reset(static_cast<DataSetBase*>(copyDataset));
+    path_ = other.path_;
     return *this;
 }
 
 DataSet& DataSet::operator=(DataSet&& other)
 {
     d_ = std::move(other.d_);
+    path_ = std::move(other.path_);
     return *this;
 }
 
@@ -117,6 +144,29 @@ DataSet& DataSet::operator+=(const DataSet& other)
     return *this;
 }
 
+vector<BamFile> DataSet::BamFiles(void) const
+{
+    const PacBio::BAM::ExternalResources& resources = ExternalResources();
+    
+//    cerr << "path: " << this->path_ << endl;
+
+    vector<BamFile> result;
+    result.reserve(resources.Size());
+    for(const ExternalResource& ext : resources) {
+
+//        cerr << ext.ResourceId() << std::endl;
+
+        // only bother resolving file path if this is a BAM file
+        boost::iterator_range<string::const_iterator> bamFound = boost::algorithm::ifind_first(ext.MetaType(), "bam");
+        if (!bamFound.empty()) {
+            const string fn = ResolvePath(ext.ResourceId());
+//            const string fn = internal::FileUtils::ResolvedFilePath(ext.ResourceId(), path_);
+            result.push_back(BamFile(fn));
+        }
+    }
+    return result;
+}
+
 DataSet DataSet::FromXml(const string& xml)
 {
     DataSet result;
@@ -136,24 +186,56 @@ DataSet::TypeEnum DataSet::NameToType(const string& typeName)
 {
     static std::unordered_map<std::string, DataSet::TypeEnum> lookup;
     if (lookup.empty()) {
-        lookup["DataSet"] = DataSet::GENERIC;
-        lookup["AlignmentSet"] = DataSet::ALIGNMENT;
-        lookup["BarcodeSet"] = DataSet::BARCODE;
+        lookup["DataSet"]               = DataSet::GENERIC;
+        lookup["AlignmentSet"]          = DataSet::ALIGNMENT;
+        lookup["BarcodeSet"]            = DataSet::BARCODE;
         lookup["ConsensusAlignmentSet"] = DataSet::CONSENSUS_ALIGNMENT;
-        lookup["ConsensusReadSet"] = DataSet::CONSENSUS_READ;
-        lookup["ContigSet"] = DataSet::CONTIG;
-        lookup["HdfSubreadSet"] = DataSet::HDF_SUBREAD;
-        lookup["ReferenceSet"] = DataSet::REFERENCE;
-        lookup["SubreadSet"] = DataSet::SUBREAD;
+        lookup["ConsensusReadSet"]      = DataSet::CONSENSUS_READ;
+        lookup["ContigSet"]             = DataSet::CONTIG;
+        lookup["HdfSubreadSet"]         = DataSet::HDF_SUBREAD;
+        lookup["ReferenceSet"]          = DataSet::REFERENCE;
+        lookup["SubreadSet"]            = DataSet::SUBREAD;
     }
     return lookup.at(typeName); // throws if unknown typename
 }
 
+vector<string> DataSet::ResolvedResourceIds(void) const
+{
+    const PacBio::BAM::ExternalResources& resources = ExternalResources();
+
+    vector<string> result;
+    result.reserve(resources.Size());
+    for(const ExternalResource& ext : resources) {
+//        const string fn = ;
+//        const string fn = internal::FileUtils::ResolvedFilePath(ext.ResourceId(), path_);
+        result.push_back(ResolvePath(ext.ResourceId()));
+    }
+    return result;
+}
+
+string DataSet::ResolvePath(const string& originalPath) const
+{ return internal::FileUtils::ResolvedFilePath(originalPath, path_); }
+
 void DataSet::Save(const std::string& outputFilename)
-{ internal::DataSetIO::ToFile(d_, outputFilename); }
+{ DataSetIO::ToFile(d_, outputFilename); }
 
 void DataSet::SaveToStream(ostream& out)
-{ internal::DataSetIO::ToStream(d_, out); }
+{ DataSetIO::ToStream(d_, out); }
+
+set<string> DataSet::SequencingChemistries(void) const
+{
+    const vector<BamFile> bamFiles{ BamFiles() };
+
+    set<string> result;
+    for(const BamFile& bf : bamFiles) {
+        if (!bf.IsPacBioBAM())
+            throw std::runtime_error{ "only PacBio BAMs are supported" };
+        const vector<ReadGroupInfo> readGroups{ bf.Header().ReadGroups() };
+        for (const ReadGroupInfo& rg : readGroups)
+            result.insert(rg.SequencingChemistry());
+    }
+    return result;
+}
 
 string DataSet::TypeToName(const DataSet::TypeEnum& type)
 {
@@ -171,3 +253,26 @@ string DataSet::TypeToName(const DataSet::TypeEnum& type)
             throw std::runtime_error("unsupported dataset type"); // unknown type
     }
 }
+
+// Exposed timestamp utils
+
+namespace PacBio {
+namespace BAM {
+
+string CurrentTimestamp(void)
+{ return internal::ToDataSetFormat(internal::CurrentTime()); }
+
+string ToDataSetFormat(const chrono::system_clock::time_point &tp)
+{ return internal::ToDataSetFormat(tp); }
+
+string ToDataSetFormat(const time_t &t)
+{ return ToDataSetFormat(chrono::system_clock::from_time_t(t)); }
+
+string ToIso8601(const chrono::system_clock::time_point &tp)
+{ return internal::ToIso8601(tp); }
+
+string ToIso8601(const time_t &t)
+{ return ToIso8601(chrono::system_clock::from_time_t(t)); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/src/DataSetBaseTypes.cpp b/src/DataSetBaseTypes.cpp
index cc133e3..2c19e0b 100644
--- a/src/DataSetBaseTypes.cpp
+++ b/src/DataSetBaseTypes.cpp
@@ -38,6 +38,8 @@
 #include "pbbam/DataSetTypes.h"
 #include "pbbam/internal/DataSetBaseTypes.h"
 #include "DataSetUtils.h"
+#include "TimeUtils.h"
+#include <boost/algorithm/string.hpp>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
@@ -49,7 +51,10 @@ using namespace std;
 
 BaseEntityType::BaseEntityType(const std::string& label, const XsdType& xsd)
     : DataSetElement(label, xsd)
-{ }
+{
+    if (Version().empty())
+        Version(internal::XML_VERSION);
+}
 
 DEFINE_ACCESSORS(BaseEntityType, Extensions, Extensions)
 
@@ -68,8 +73,11 @@ DataEntityType::DataEntityType(const std::string& label, const XsdType& xsd)
 // IndexedDataType
 // -----------------
 
-IndexedDataType::IndexedDataType(const std::string& label, const XsdType &xsd)
-    : InputOutputDataType(label, xsd)
+IndexedDataType::IndexedDataType(const string& metatype,
+                                 const string& filename,
+                                 const string& label, 
+                                 const XsdType &xsd)
+    : InputOutputDataType(metatype, filename, label, xsd)
 { }
 
 DEFINE_ACCESSORS(IndexedDataType, FileIndices, FileIndices)
@@ -81,14 +89,38 @@ IndexedDataType& IndexedDataType::FileIndices(const PacBio::BAM::FileIndices& in
 // InputOutputDataType
 // ---------------------
 
-InputOutputDataType::InputOutputDataType(const std::string& label, const XsdType &xsd)
-    : StrictEntityType(label, xsd)
-{ }
+InputOutputDataType::InputOutputDataType(const string& metatype,
+                                         const string& filename,
+                                         const string& label,
+                                         const XsdType &xsd)
+    : StrictEntityType(metatype, label, xsd)
+{  
+    ResourceId(filename);
+}
 
 // ----------------
 // StrictEntityType
 // ----------------
 
-StrictEntityType::StrictEntityType(const std::string& label, const XsdType& xsd)
+StrictEntityType::StrictEntityType(const string& metatype, 
+                                   const string& label, 
+                                   const XsdType& xsd)
     : BaseEntityType(label, xsd)
-{ }
+{ 
+    // MetaType
+    MetaType(metatype);
+
+    // TimeStampedName
+    const size_t numChars = metatype.size();
+    string transformedMetatype;
+    transformedMetatype.resize(numChars);
+    for (size_t i = 0; i < numChars; ++i) {
+        const char c = metatype.at(i);
+        transformedMetatype[i] = ((c == '.') ? '_' : tolower(c));
+    }
+    const string& tsn = transformedMetatype + "-" + internal::ToDataSetFormat(internal::CurrentTime());
+    TimeStampedName(tsn);
+
+    // UniqueId
+    UniqueId(internal::GenerateUuid());
+}
diff --git a/src/DataSetElement.cpp b/src/DataSetElement.cpp
index 26c0bb6..6854fd2 100644
--- a/src/DataSetElement.cpp
+++ b/src/DataSetElement.cpp
@@ -36,12 +36,12 @@
 // Author: Derek Barnett
 
 #include "pbbam/internal/DataSetElement.h"
+#include "DataSetUtils.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 
 const std::string& DataSetElement::SharedNullString(void)
 {
-    static const std::string empty = std::string("");
-    return empty;
+    return internal::NullObject<std::string>();
 }
diff --git a/src/DataSetIO.cpp b/src/DataSetIO.cpp
index d618210..e57173f 100644
--- a/src/DataSetIO.cpp
+++ b/src/DataSetIO.cpp
@@ -36,6 +36,7 @@
 // Author: Derek Barnett
 
 #include "DataSetIO.h"
+#include "FileUtils.h"
 #include "FofnReader.h"
 #include "StringUtils.h"
 #include "XmlReader.h"
@@ -77,10 +78,14 @@ unique_ptr<DataSetBase> FromBam(const string& bamFn)
 static
 unique_ptr<DataSetBase> FromFofn(const string& fofn)
 {
+    const string fofnDir = internal::FileUtils::DirectoryName(fofn);
     ifstream in(fofn);
     if (!in)
         throw std::runtime_error("could not open FOFN for reading");
-    const vector<string> filenames = std::move(FofnReader::Files(in));
+
+    vector<string> filenames = std::move(FofnReader::Files(in));
+    for (size_t i = 0; i < filenames.size(); ++i)
+        filenames[i] = internal::FileUtils::ResolvedFilePath(filenames[i], fofnDir);
     return DataSetIO::FromUris(filenames);
 }
 
diff --git a/src/DataSetTypes.cpp b/src/DataSetTypes.cpp
index 3144adc..9dd7b27 100644
--- a/src/DataSetTypes.cpp
+++ b/src/DataSetTypes.cpp
@@ -32,12 +32,18 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetTypes.cpp
+/// \brief Implementations for the public DataSet component classes.
+//
 // Author: Derek Barnett
 
 #include "pbbam/DataSetTypes.h"
 #include "pbbam/internal/DataSetBaseTypes.h"
 #include "DataSetUtils.h"
+#include "FileUtils.h"
+#include "TimeUtils.h"
 #include <set>
 using namespace PacBio;
 using namespace PacBio::BAM;
@@ -49,7 +55,9 @@ using namespace std;
 // -------------------
 
 AlignmentSet::AlignmentSet(void)
-    : DataSetBase("AlignmentSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.AlignmentSet",
+                  "AlignmentSet",
+                  XsdType::DATASETS)
 { }
 
 // -------------------
@@ -57,7 +65,9 @@ AlignmentSet::AlignmentSet(void)
 // -------------------
 
 BarcodeSet::BarcodeSet(void)
-    : DataSetBase("BarcodeSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.BarcodeSet",
+                  "BarcodeSet",
+                  XsdType::DATASETS)
 { }
 
 // -----------------------
@@ -65,7 +75,9 @@ BarcodeSet::BarcodeSet(void)
 // -----------------------
 
 ConsensusAlignmentSet::ConsensusAlignmentSet(void)
-    : DataSetBase("ConsensusAlignmentSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.ConsensusAlignmentSet",
+                  "ConsensusAlignmentSet",
+                  XsdType::DATASETS)
 { }
 
 // -------------------
@@ -73,7 +85,9 @@ ConsensusAlignmentSet::ConsensusAlignmentSet(void)
 // -------------------
 
 ConsensusReadSet::ConsensusReadSet(void)
-    : DataSetBase("ConsensusReadSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.ConsensusReadSet",
+                  "ConsensusReadSet",
+                  XsdType::DATASETS)
 { }
 
 // -------------------
@@ -81,7 +95,9 @@ ConsensusReadSet::ConsensusReadSet(void)
 // -------------------
 
 ContigSet::ContigSet(void)
-    : DataSetBase("ContigSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.ContigSet",
+                  "ContigSet",
+                  XsdType::DATASETS)
 { }
 
 // -------------------
@@ -89,11 +105,15 @@ ContigSet::ContigSet(void)
 // -------------------
 
 DataSetBase::DataSetBase(void)
-    : StrictEntityType("DataSet", XsdType::DATASETS)
+    : StrictEntityType("PacBio.DataSet.DataSet",
+                       "DataSet",
+                       XsdType::DATASETS)
 { }
 
-DataSetBase::DataSetBase(const string& label, const XsdType& xsd)
-    : StrictEntityType(label, xsd)
+DataSetBase::DataSetBase(const string& metatype,
+                         const string& label,
+                         const XsdType& xsd)
+    : StrictEntityType(metatype, label, xsd)
 { }
 
 DEFINE_ACCESSORS(DataSetBase, ExternalResources, ExternalResources)
@@ -141,7 +161,7 @@ DataSetBase* DataSetBase::DeepCopy(void) const
 DataSetBase& DataSetBase::operator+=(const DataSetBase& other)
 {
     // must be same dataset types (or 'other' must be generic)
-    if (other.QualifiedNameLabel() != QualifiedNameLabel() && other.LocalNameLabel() != "DataSet")
+    if (other.LocalNameLabel() != LocalNameLabel() && other.LocalNameLabel() != "DataSet")
         throw std::runtime_error("cannot merge incompatible dataset types");
 
     // check filter match
@@ -211,27 +231,29 @@ Extensions::Extensions(void)
     : DataSetListElement<ExtensionElement>("Extensions", XsdType::BASE_DATA_MODEL)
 { }
 
-ExternalResource::ExternalResource(void)
-    : IndexedDataType("ExternalResource", XsdType::BASE_DATA_MODEL)
-{ }
-
 // -------------------
 // ExternalResource
 // -------------------
 
-ExternalResource::ExternalResource(const BamFile &bamFile)
-    : IndexedDataType("ExternalResource", XsdType::BASE_DATA_MODEL)
-{
-    MetaType("SubreadFile.SubreadBamFile");
-    ResourceId(bamFile.Filename());
-}
+ExternalResource::ExternalResource(const BamFile& bamFile)
+    : IndexedDataType("PacBio.SubreadFile.SubreadBamFile",
+                      bamFile.Filename(),
+                      "ExternalResource",
+                      XsdType::BASE_DATA_MODEL)
+{ }
 
-ExternalResource::ExternalResource(const string& metatype, const string& filename)
-    : IndexedDataType("ExternalResource", XsdType::BASE_DATA_MODEL)
-{
-    MetaType(metatype);
-    ResourceId(filename);
-}
+ExternalResource::ExternalResource(const string& metatype,
+                                   const string& filename)
+    : IndexedDataType(metatype,
+                      filename,
+                      "ExternalResource",
+                      XsdType::BASE_DATA_MODEL)
+{ }
+
+DEFINE_ACCESSORS(ExternalResource, ExternalResources, ExternalResources)
+
+ExternalResource& ExternalResource::ExternalResources(const PacBio::BAM::ExternalResources& resources)
+{ ExternalResources() = resources; return *this; }
 
 BamFile ExternalResource::ToBamFile(void) const
 { return BamFile(ResourceId()); }
@@ -241,7 +263,8 @@ BamFile ExternalResource::ToBamFile(void) const
 // -------------------
 
 ExternalResources::ExternalResources(void)
-    : DataSetListElement<ExternalResource>("ExternalResources", XsdType::BASE_DATA_MODEL)
+    : DataSetListElement<ExternalResource>("ExternalResources",
+                                           XsdType::BASE_DATA_MODEL)
 { }
 
 ExternalResources& ExternalResources::operator+=(const ExternalResources& other)
@@ -270,16 +293,24 @@ ExternalResources& ExternalResources::operator+=(const ExternalResources& other)
 }
 
 void ExternalResources::Add(const ExternalResource& ext)
-{ AddChild(ext); }
+{
+    // disallow external resources w/ duplicate ResourceIds
+    set<std::string> myResourceIds;
+    for (size_t i = 0; i < Size(); ++i) {
+        const ExternalResource& resource = this->operator[](i);
+        myResourceIds.insert(resource.ResourceId());
+    }
+    if (myResourceIds.find(ext.ResourceId()) == myResourceIds.cend())
+        AddChild(ext);
+}
 
 vector<BamFile> ExternalResources::BamFiles(void) const
 {
     vector<BamFile> result;
     const int numResources = Size();
     result.reserve(numResources);
-    for( const ExternalResource& ext : *this ) {
+    for( const ExternalResource& ext : *this )
         result.push_back(ext.ToBamFile());
-    }
     return result;
 }
 
@@ -290,8 +321,11 @@ void ExternalResources::Remove(const ExternalResource& ext)
 // FileIndex
 // -------------------
 
-FileIndex::FileIndex(void)
-    : InputOutputDataType("FileIndex", XsdType::BASE_DATA_MODEL)
+FileIndex::FileIndex(const string& metatype, const string& filename)
+    : InputOutputDataType(metatype,
+                          filename,
+                          "FileIndex",
+                          XsdType::BASE_DATA_MODEL)
 { }
 
 // -------------------
@@ -347,7 +381,9 @@ void Filters::Remove(const Filter& filter)
 // -------------------
 
 HdfSubreadSet::HdfSubreadSet(void)
-    : DataSetBase("HdfSubreadSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.HdfSubreadSet",
+                  "HdfSubreadSet",
+                  XsdType::DATASETS)
 { }
 
 // -------------------
@@ -401,7 +437,9 @@ DEFINE_ACCESSORS(Provenance, ParentTool, ParentTool)
 // -------------------
 
 ReferenceSet::ReferenceSet(void)
-    : DataSetBase("ReferenceSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.ReferenceSet",
+                  "ReferenceSet",
+                  XsdType::DATASETS)
 { }
 
 // -------------------
@@ -436,5 +474,7 @@ void SubDataSets::Remove(const DataSetBase& subdataset)
 // -------------------
 
 SubreadSet::SubreadSet(void)
-    : DataSetBase("SubreadSet", XsdType::DATASETS)
+    : DataSetBase("PacBio.DataSet.SubreadSet",
+                  "SubreadSet",
+                  XsdType::DATASETS)
 { }
diff --git a/src/DataSetUtils.h b/src/DataSetUtils.h
index 2fa1f8c..dcf234c 100644
--- a/src/DataSetUtils.h
+++ b/src/DataSetUtils.h
@@ -39,11 +39,15 @@
 #define DATASETUTILS_H
 
 #include "pbbam/DataSetTypes.h"
+#include <boost/uuid/random_generator.hpp>
+#include <boost/uuid/uuid_io.hpp>
 
 namespace PacBio {
 namespace BAM {
 namespace internal {
 
+static const std::string XML_VERSION = std::string { "3.0.1" };
+
 template<typename T>
 inline const T& NullObject(void)
 {
@@ -58,6 +62,14 @@ inline const PacBio::BAM::DataSetMetadata& NullObject(void)
     return empty;
 }
 
+inline
+std::string GenerateUuid(void)
+{
+    static boost::uuids::random_generator gen;
+    const boost::uuids::uuid uuid = gen();
+    return boost::uuids::to_string(uuid);
+}
+
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
diff --git a/src/DataSetXsd.cpp b/src/DataSetXsd.cpp
index 0474691..161bd2b 100644
--- a/src/DataSetXsd.cpp
+++ b/src/DataSetXsd.cpp
@@ -32,10 +32,15 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetXsd.cpp
+/// \brief Implements the XSD- and namespace-related classes for DataSetXML.
+//
 // Author: Derek Barnett
 
 #include "pbbam/DataSetXsd.h"
+#include <unordered_map>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -46,25 +51,133 @@ namespace internal {
 
 static map<XsdType, NamespaceInfo> DefaultRegistry(void)
 {
-    map<XsdType, NamespaceInfo> result;
-    result[XsdType::NONE]                   = NamespaceInfo();
-    result[XsdType::AUTOMATION_CONSTRAINTS] = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioAutomationConstraints.xsd");
-    result[XsdType::BASE_DATA_MODEL]        = NamespaceInfo("pbbase", "http://pacificbiosciences.com/PacBioBaseDataModel.xsd");
-    result[XsdType::COLLECTION_METADATA]    = NamespaceInfo("pbmeta", "http://pacificbiosciences.com/PacBioCollectionMetadata.xsd");
-    result[XsdType::COMMON_MESSAGES]        = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioCommonMessages.xsd");
-    result[XsdType::DATA_MODEL]             = NamespaceInfo("pbdm",   "http://pacificbiosciences.com/PacBioDataModel.xsd");
-    result[XsdType::DATA_STORE]             = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioDataStore.xsd");
-    result[XsdType::DATASETS]               = NamespaceInfo("pbds",   "http://pacificbiosciences.com/PacBioDatasets.xsd");
-    result[XsdType::DECL_DATA]              = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioDeclData.xsd");
-    result[XsdType::PART_NUMBERS]           = NamespaceInfo("pbpn",   "http://pacificbiosciences.com/PacBioPartNumbers.xsd");
-    result[XsdType::PRIMARY_METRICS]        = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioPrimaryMetrics.xsd");
-    result[XsdType::REAGENT_KIT]            = NamespaceInfo("pbrk",   "http://pacificbiosciences.com/PacBioReagentKit.xsd");
-    result[XsdType::RIGHTS_AND_ROLES]       = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioRightsAndRoles.xsd");
-    result[XsdType::SAMPLE_INFO]            = NamespaceInfo("pbsample", "http://pacificbiosciences.com/PacBioSampleInfo.xsd");
-    result[XsdType::SEEDING_DATA]           = NamespaceInfo("",       "http://pacificbiosciences.com/PacBioSeedingData.xsd");
+    const auto result = map<XsdType, NamespaceInfo>
+    {
+        { XsdType::NONE,                   NamespaceInfo{ "", "" } },
+        { XsdType::AUTOMATION_CONSTRAINTS, NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioAutomationConstraints.xsd" } },
+        { XsdType::BASE_DATA_MODEL,        NamespaceInfo{ "pbbase", "http://pacificbiosciences.com/PacBioBaseDataModel.xsd" } },
+        { XsdType::COLLECTION_METADATA,    NamespaceInfo{ "pbmeta", "http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" } },
+        { XsdType::COMMON_MESSAGES,        NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioCommonMessages.xsd" } },
+        { XsdType::DATA_MODEL,             NamespaceInfo{ "pbdm",   "http://pacificbiosciences.com/PacBioDataModel.xsd" } },
+        { XsdType::DATA_STORE,             NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioDataStore.xsd" } },
+        { XsdType::DATASETS,               NamespaceInfo{ "pbds",   "http://pacificbiosciences.com/PacBioDatasets.xsd" } },
+        { XsdType::DECL_DATA,              NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioDeclData.xsd" } },
+        { XsdType::PART_NUMBERS,           NamespaceInfo{ "pbpn",   "http://pacificbiosciences.com/PacBioPartNumbers.xsd" } },
+        { XsdType::PRIMARY_METRICS,        NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioPrimaryMetrics.xsd" } },
+        { XsdType::REAGENT_KIT,            NamespaceInfo{ "pbrk",   "http://pacificbiosciences.com/PacBioReagentKit.xsd" } },
+        { XsdType::RIGHTS_AND_ROLES,       NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioRightsAndRoles.xsd" } },
+        { XsdType::SAMPLE_INFO,            NamespaceInfo{ "pbsample", "http://pacificbiosciences.com/PacBioSampleInfo.xsd" } },
+        { XsdType::SEEDING_DATA,           NamespaceInfo{ "",       "http://pacificbiosciences.com/PacBioSeedingData.xsd" } }
+    };
     return result;
 }
 
+static const auto elementRegistry = unordered_map<string, XsdType>
+{
+    // 'pbbase' elements
+    //
+    { "AutomationParameter" ,  XsdType::BASE_DATA_MODEL },
+    { "AutomationParameters" , XsdType::BASE_DATA_MODEL },
+    { "BinCount" ,             XsdType::BASE_DATA_MODEL },
+    { "BinCounts" ,            XsdType::BASE_DATA_MODEL },
+    { "BinLabel" ,             XsdType::BASE_DATA_MODEL },
+    { "BinLabels" ,            XsdType::BASE_DATA_MODEL },
+    { "BinWidth" ,             XsdType::BASE_DATA_MODEL },
+    { "ExternalResource" ,     XsdType::BASE_DATA_MODEL },
+    { "ExternalResources" ,    XsdType::BASE_DATA_MODEL },
+    { "FileIndex" ,            XsdType::BASE_DATA_MODEL },
+    { "FileIndices" ,          XsdType::BASE_DATA_MODEL },
+    { "MaxBinValue" ,          XsdType::BASE_DATA_MODEL },
+    { "MaxOutlierValue" ,      XsdType::BASE_DATA_MODEL },
+    { "MetricDescription" ,    XsdType::BASE_DATA_MODEL },
+    { "NumBins" ,              XsdType::BASE_DATA_MODEL },
+    { "Properties" ,           XsdType::BASE_DATA_MODEL },
+    { "Property" ,             XsdType::BASE_DATA_MODEL },
+    { "Sample95thPct" ,        XsdType::BASE_DATA_MODEL },
+    { "SampleMean" ,           XsdType::BASE_DATA_MODEL },
+    { "SampleMed" ,            XsdType::BASE_DATA_MODEL },
+    { "SampleSize" ,           XsdType::BASE_DATA_MODEL },
+    { "SampleStd" ,            XsdType::BASE_DATA_MODEL },
+
+    // 'pbds' elements
+    //
+    { "AdapterDimerFraction",  XsdType::DATASETS },
+    { "AlignmentSet",          XsdType::DATASETS },
+    { "BarcodeConstruction",   XsdType::DATASETS },
+    { "BarcodeSet",            XsdType::DATASETS },
+    { "ConsensusAlignmentSet", XsdType::DATASETS },
+    { "ConsensusReadSet",      XsdType::DATASETS },
+    { "Contig",                XsdType::DATASETS },
+    { "Contigs",               XsdType::DATASETS },
+    { "ContigSet",             XsdType::DATASETS },
+    { "ControlReadLenDist",    XsdType::DATASETS },
+    { "ControlReadQualDist",   XsdType::DATASETS },
+    { "DataSetMetdata",        XsdType::DATASETS },
+    { "DataSet",               XsdType::DATASETS },
+    { "DataSets",              XsdType::DATASETS },
+    { "Filter",                XsdType::DATASETS },
+    { "Filters",               XsdType::DATASETS },
+    { "HdfSubreadSet",         XsdType::DATASETS },
+    { "InsertReadLenDist",     XsdType::DATASETS },
+    { "InsertReadQualDist" ,   XsdType::DATASETS },
+    { "MedianInsertDist",      XsdType::DATASETS },
+    { "NumRecords",            XsdType::DATASETS },
+    { "NumSequencingZmws",     XsdType::DATASETS },
+    { "Organism",              XsdType::DATASETS },
+    { "ParentTool",            XsdType::DATASETS },
+    { "Ploidy",                XsdType::DATASETS },
+    { "ProdDist",              XsdType::DATASETS },
+    { "Provenance",            XsdType::DATASETS },
+    { "ReadLenDist",           XsdType::DATASETS },
+    { "ReadQualDist",          XsdType::DATASETS },
+    { "ReadTypeDist",          XsdType::DATASETS },
+    { "ReferenceSet",          XsdType::DATASETS },
+    { "ShortInsertFraction",   XsdType::DATASETS },
+    { "SubreadSet",            XsdType::DATASETS },
+    { "SummaryStats",          XsdType::DATASETS },
+    { "TotalLength",           XsdType::DATASETS },
+
+    // 'pbmeta' elements
+    //
+    { "Automation",           XsdType::COLLECTION_METADATA },
+    { "AutomationName",       XsdType::COLLECTION_METADATA },
+    { "CellIndex",            XsdType::COLLECTION_METADATA },
+    { "CellPac",              XsdType::COLLECTION_METADATA },
+    { "CollectionFileCopy",   XsdType::COLLECTION_METADATA },
+    { "CollectionMetadata",   XsdType::COLLECTION_METADATA },
+    { "CollectionNumber",     XsdType::COLLECTION_METADATA },
+    { "CollectionPathUri",    XsdType::COLLECTION_METADATA },
+    { "Collections",          XsdType::COLLECTION_METADATA },
+    { "Concentration",        XsdType::COLLECTION_METADATA },
+    { "ConfigFileName",       XsdType::COLLECTION_METADATA },
+    { "CopyFiles",            XsdType::COLLECTION_METADATA },
+    { "InstCtrlVer",          XsdType::COLLECTION_METADATA },
+    { "MetricsVerbosity",     XsdType::COLLECTION_METADATA },
+    { "Name",                 XsdType::COLLECTION_METADATA },
+    { "OutputOptions",        XsdType::COLLECTION_METADATA },
+    { "PlateId",              XsdType::COLLECTION_METADATA },
+    { "Primary",              XsdType::COLLECTION_METADATA },
+    { "Readout",              XsdType::COLLECTION_METADATA },
+    { "ResultsFolder",        XsdType::COLLECTION_METADATA },
+    { "RunDetails",           XsdType::COLLECTION_METADATA },
+    { "RunId",                XsdType::COLLECTION_METADATA },
+    { "SampleReuseEnabled",   XsdType::COLLECTION_METADATA },
+    { "SequencingCondition",  XsdType::COLLECTION_METADATA },
+    { "SigProcVer",           XsdType::COLLECTION_METADATA },
+    { "SizeSelectionEnabled", XsdType::COLLECTION_METADATA },
+    { "StageHotstartEnabled", XsdType::COLLECTION_METADATA },
+    { "UseCount",             XsdType::COLLECTION_METADATA },
+    { "WellName",             XsdType::COLLECTION_METADATA },
+    { "WellSample",           XsdType::COLLECTION_METADATA },
+
+    // 'pbsample' elements
+    //
+    { "BioSample",         XsdType::SAMPLE_INFO },
+    { "BioSamplePointer",  XsdType::SAMPLE_INFO },
+    { "BioSamplePointers", XsdType::SAMPLE_INFO },
+    { "BioSamples",        XsdType::SAMPLE_INFO }
+};
+
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
@@ -95,6 +208,11 @@ NamespaceRegistry::NamespaceRegistry(const NamespaceRegistry &other)
     , defaultXsdType_(other.defaultXsdType_)
 { }
 
+NamespaceRegistry::NamespaceRegistry(NamespaceRegistry &&other)
+    : data_(std::move(other.data_))
+    , defaultXsdType_(std::move(other.defaultXsdType_))
+{ }
+
 NamespaceRegistry& NamespaceRegistry::operator=(const NamespaceRegistry& other)
 {
     data_ = other.data_;
@@ -102,6 +220,13 @@ NamespaceRegistry& NamespaceRegistry::operator=(const NamespaceRegistry& other)
     return *this;
 }
 
+NamespaceRegistry& NamespaceRegistry::operator=(NamespaceRegistry&& other)
+{
+    data_ = std::move(other.data_);
+    defaultXsdType_ = std::move(other.defaultXsdType_);
+    return *this;
+}
+
 NamespaceRegistry::~NamespaceRegistry(void) { }
 
 const NamespaceInfo& NamespaceRegistry::DefaultNamespace(void) const
@@ -113,6 +238,18 @@ XsdType NamespaceRegistry::DefaultXsd(void) const
 const NamespaceInfo& NamespaceRegistry::Namespace(const XsdType& xsd) const
 { return data_.at(xsd); }
 
+void NamespaceRegistry::Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo)
+{ data_[xsd] = namespaceInfo; }
+
+void NamespaceRegistry::SetDefaultXsd(const XsdType& xsd)
+{ defaultXsdType_ = xsd; }
+
+XsdType NamespaceRegistry::XsdForElement(const std::string& elementLabel) const
+{
+    const auto iter = internal::elementRegistry.find(elementLabel);
+    return (iter == internal::elementRegistry.cend() ? XsdType::NONE : iter->second);
+}
+
 XsdType NamespaceRegistry::XsdForUri(const std::string& uri) const
 {
     map<XsdType, NamespaceInfo>::const_iterator iter = data_.cbegin();
@@ -124,9 +261,3 @@ XsdType NamespaceRegistry::XsdForUri(const std::string& uri) const
     }
     return XsdType::NONE;
 }
-
-void NamespaceRegistry::Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo)
-{ data_[xsd] = namespaceInfo; }
-
-void NamespaceRegistry::SetDefaultXsd(const XsdType& xsd)
-{ defaultXsdType_ = xsd; }
diff --git a/src/EntireFileQuery.cpp b/src/EntireFileQuery.cpp
index 814dc51..6813492 100644
--- a/src/EntireFileQuery.cpp
+++ b/src/EntireFileQuery.cpp
@@ -32,71 +32,34 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file EntireFileQuery.cpp
+/// \brief Implements the EntireFileQuery class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/EntireFileQuery.h"
-#include "pbbam/BamFile.h"
-
-#include "pbbam/internal/SequentialMergeStrategy.h"
-
-#include "MemoryUtils.h"
+#include "pbbam/CompositeBamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
-class EntireFileIterator : public internal::IBamFileIterator
+struct EntireFileQuery::EntireFileQueryPrivate
 {
-public:
-    EntireFileIterator(const BamFile& bamFile)
-        : internal::IBamFileIterator(bamFile)
-    {
-        htsFile_.reset(sam_open(bamFile.Filename().c_str(), "rb"));
-        if (!htsFile_)
-            throw std::runtime_error("could not open BAM file for reading");
-
-        htsHeader_.reset(sam_hdr_read(htsFile_.get()));
-        if (!htsHeader_)
-            throw std::runtime_error("could not read BAM header");
-    }
+    EntireFileQueryPrivate(const DataSet& dataset)
+        : reader_(dataset)
+    { }
 
-public:
-    bool GetNext(BamRecord& record) {
-
-//        record = BamRecord(/*fileData_.Header()*/);
-        const int result = sam_read1(htsFile_.get(),
-                                     htsHeader_.get(),
-                                     internal::BamRecordMemory::GetRawData(record).get());
-        record.header_ = header_;
-
-        // success
-        if (result >= 0)
-            return true;
-
-        // normal EOF
-        else if (result == -1)
-            return false;
-
-        // error (truncated file, etc)
-        else
-            throw std::runtime_error("corrupted file, may be truncated");
-    }
-
-private:
-    unique_ptr<samFile,   internal::HtslibFileDeleter>   htsFile_;
-    unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
+    SequentialCompositeBamReader reader_;
 };
 
-EntireFileQuery::EntireFileQuery(const DataSet& dataset)
-    : internal::IQuery(dataset)
-{
-    // check files
-    // if SO all coordinate
-    // else if SO all queryname
-    // else SO unsorted/unknown
-    mergeStrategy_.reset(new internal::SequentialMergeStrategy(CreateIterators()));
-}
+EntireFileQuery::EntireFileQuery(const DataSet &dataset)
+    : internal::IQuery()
+    , d_(new EntireFileQueryPrivate(dataset))
+{ }
 
-EntireFileQuery::FileIterPtr EntireFileQuery::CreateIterator(const BamFile& bamFile)
-{ return FileIterPtr(new EntireFileIterator(bamFile)); }
+EntireFileQuery::~EntireFileQuery(void) { }
 
+bool EntireFileQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp
new file mode 100644
index 0000000..a0a59af
--- /dev/null
+++ b/src/FileUtils.cpp
@@ -0,0 +1,246 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "FileUtils.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <cassert>
+#include <sys/stat.h>
+#include <unistd.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// pops "file://" scheme off the front of a URI/filepath, if found
+static string removeFileUriScheme(const string& uri)
+{
+    assert(!uri.empty());
+
+    auto schemeLess = uri;
+    const auto fileScheme = string{"file://"};
+    const auto schemeFound = schemeLess.find(fileScheme);
+    if (schemeFound != string::npos) {
+        if (schemeFound != 0)
+            throw runtime_error("Malformed URI: scheme not at beginning");
+        schemeLess = schemeLess.substr(fileScheme.size());
+    }
+    return schemeLess;
+}
+
+#ifdef PBBAM_WIN_FILEPATHS
+
+static
+string removeDiskName(const string& filePath)
+{
+    if (filePath.size() >= 2) {
+        const char firstChar = filePath.at(0);
+        if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+            return filePath.substr(2);
+    }
+    return filePath;
+}
+
+static const char native_pathSeparator = '\\';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{
+    assert(!filePath.empty());
+
+    // if starts with single slash or double slash
+    if (boost::algorithm::starts_with(filePath, "\\"))
+        return true;
+
+    // if starts with single or double-dots -> not absolute
+    if (boost::algorithm::starts_with(filePath, "."))
+        return false;
+
+    // if starts with disk drive name and colon ("C:\foo\bar.txt")
+    // strip the drive name and check to see if the remaining path is absolute
+    if (filePath.size() >= 2) {
+        const char firstChar = filePath.at(0);
+        if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+            return native_pathIsAbsolute(removeDiskName(filePath));
+    }
+
+    // otherwise, likely relative
+    return false;
+}
+
+static string native_resolvedFilePath(const string& filePath,
+                                      const string& from)
+{
+    // strip file:// scheme if present
+    auto schemeLess = removeFileUriScheme(filePath);
+
+    // if empty or already absolute path, just return it
+    // upfront empty check simplifies further parsing logic
+    if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+        return schemeLess;
+
+    // else make relative from the provided 'from' directory
+    //
+    // first pop disk name, then any leading single-dot '.'
+    //
+    // since we're prepending the 'from' directory, we can remove
+    // any leading './' form our file path. this may just mean that
+    // we pop it off to add it right back (when from == '.'), but this
+    // keeps it consistent with other 'from' parent directories
+    //
+    schemeLess = removeDiskName(schemeLess);
+
+    const bool thisDirAtStart = (schemeLess.find(".") == 0);
+    if (thisDirAtStart) {
+        if (schemeLess.find(native_pathSeparator) == 1)
+            schemeLess = schemeLess.substr(2);
+    }
+    return from + native_pathSeparator + schemeLess;
+}
+
+#else // else for non-Windows systems
+
+static const char native_pathSeparator = '/';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{ return filePath.at(0) == '/'; }
+
+static string native_resolvedFilePath(const string& filePath,
+                                      const string& from)
+{
+    // strip file:// scheme if present
+    auto schemeLess = removeFileUriScheme(filePath);
+
+    // if empty or already absolute path, just return it
+    // upfront empty check simplifies further parsing logic
+    if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+        return schemeLess;
+
+    // else make relative from the provided 'from' directory
+    //
+    // since we're prepending the 'from' directory, we can remove
+    // any leading './' form our file path. this may just mean that
+    // we pop it off to add it right back (when from == '.'), but this
+    // keeps it consistent with other 'from' parent directories
+    //
+    const bool thisDirAtStart = (schemeLess.find(".") == 0);
+    if (thisDirAtStart) {
+        if (schemeLess.find(native_pathSeparator) == 1)
+            schemeLess = schemeLess.substr(2);
+    }
+    return from + native_pathSeparator + schemeLess;
+}
+
+#endif // PBBAM_WIN_FILEPATHS
+
+// see http://stackoverflow.com/questions/2869594/how-return-a-stdstring-from-cs-getcwd-function
+string FileUtils::CurrentWorkingDirectory(void)
+{
+    const size_t chunkSize = 1024;
+    const size_t maxNumChunks = 20;
+
+    // stack-based buffer for 'normal' case
+    char buffer[chunkSize];
+    if (getcwd(buffer, sizeof(buffer)) != NULL)
+        return string(buffer);
+
+    // if error is not ERANGE, then it's not a problem of too-long name... something else happened
+    if (errno != ERANGE)
+        throw runtime_error("could not determine current working directory path");
+
+    // long path - use heap, trying progressively longer buffers
+    for (size_t chunks = 2; chunks < maxNumChunks; ++chunks) {
+        unique_ptr<char> cwd(new char[chunkSize*chunks]);
+        if (getcwd(cwd.get(), chunkSize*chunks) != NULL)
+            return string(cwd.get());
+
+        // if error is not ERANGE, then it's not a problem of too-long name... something else happened
+        if (errno != ERANGE)
+            throw runtime_error("could not determine current working directory path");
+    }
+
+    // crazy long path name
+    throw runtime_error("could determine current working directory - extremely long path");
+}
+
+string FileUtils::DirectoryName(const string& file)
+{
+    const size_t found = file.rfind(Separator(), file.length());
+    if (found != string::npos)
+        return file.substr(0, found);
+    return string(".");
+}
+
+bool FileUtils::Exists(const char* fn)
+{
+    struct stat buf;
+    return (stat(fn, &buf) != -1);
+}
+
+chrono::system_clock::time_point FileUtils::LastModified(const char* fn)
+{
+    struct stat s;
+    if (stat(fn, &s) != 0)
+        throw runtime_error("could not get file timestamp");
+    return chrono::system_clock::from_time_t(s.st_mtime);
+}
+
+string FileUtils::ResolvedFilePath(const string& filePath,
+                                   const string& from)
+{ return native_resolvedFilePath(filePath, from); }
+
+constexpr char FileUtils::Separator(void)
+{ return native_pathSeparator; }
+
+off_t FileUtils::Size(const char* fn)
+{
+    struct stat s;
+    if (stat(fn, &s) != 0)
+        throw runtime_error("could not determine file size");
+    return s.st_size;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
diff --git a/src/FileUtils.h b/src/FileUtils.h
index 5cf7c72..112223e 100644
--- a/src/FileUtils.h
+++ b/src/FileUtils.h
@@ -38,12 +38,8 @@
 #ifndef FILEUTILS_H
 #define FILEUTILS_H
 
-#include <exception>
-#include <fstream>
-#include <iostream>
+#include <chrono>
 #include <string>
-#include <ctime>
-#include <sys/stat.h>
 
 namespace PacBio {
 namespace BAM {
@@ -52,78 +48,95 @@ namespace internal {
 struct FileUtils
 {
 public:
+
+    /// \returns application's current working directory
+    static std::string CurrentWorkingDirectory(void);
+
+    /// Parses a filepath for the the directory name for a file.
+    ///
+    /// Essentially this method strips the filename from the string provided (/path/to/file => /path/to).
+    /// If only a filename is provided, then "." is returned to indicate the current directory.
+    ///
+    /// \param[in] file name of file (can be just a filename or path/to/filename)
+    /// \returns file's directory name
+    ///
+    static std::string DirectoryName(const std::string& file);
+
+    /// Check for existence of a file.
+    ///
+    /// \param[in] fn full path to file
+    /// \returns true if file exists & can be opened
+    ///
     static bool Exists(const char* fn);
-    static bool Exists(const std::string& fn);
 
-    // throws if can't read
-    static time_t LastModified(const char* fn);
-    static time_t LastModified(const std::string& fn);
+    /// Check for existence of a file.
+    ///
+    /// \param[in] fn full path to file
+    /// \returns true if file exists & can be opened
+    ///
+    static bool Exists(const std::string& fn);
 
-    // throws if can't read
+    /// Check "last modified" timestamp for a file.
+    ///
+    /// \param[in] fn full path to file
+    /// \returns time of last modification
+    /// \throws runtime_error if file info can't be accessed
+    ///
+    static std::chrono::system_clock::time_point LastModified(const char* fn);
+
+    /// Check "last modified" timestamp for a file.
+    ///
+    /// \param[in] fn full path to file
+    /// \returns time of last modification
+    /// \throws runtime_error if file info can't be accessed
+    ///
+    static std::chrono::system_clock::time_point LastModified(const std::string& fn);
+
+    /// Resolves input file path using optional starting directory.
+    ///
+    /// \verbatim
+    ///   /absolute/path/to/file.txt   => /absolute/path/to/file.txt
+    ///   ../relative/path/to/file.txt => <from>/../relative/path/to/file.txt
+    ///   file.txt                     => <from>/file.txt
+    /// \endverbatim
+    ///
+    /// \note This method will strip any URI scheme as well ("file://") so that the result is immediately ready from I/O operations.
+    ///
+    /// \param[in] filePath file path to be resolved
+    /// \param[in] from     optional starting directory (useful if not same as application's working directory)
+    /// \returns resolved file path
+    ///
+    static std::string ResolvedFilePath(const std::string& filePath,
+                                        const std::string& from = ".");
+
+    /// \returns native path separator
+    constexpr static char Separator(void);
+
+    /// Check size of file.
+    ///
+    /// \param[in] fn full path to file
+    /// \returns file size in bytes
+    /// \throws runtime_error if file info can't be accessed
+    ///
     static off_t Size(const char* fn);
+
+    /// Check size of file.
+    ///
+    /// \param[in] fn full path to file
+    /// \returns file size in bytes
+    /// \throws runtime_error if file info can't be accessed
+    ///
     static off_t Size(const std::string& fn);
 };
 
-inline bool FileUtils::Exists(const char* fn)
-{ return Exists(std::string(fn)); }
-
 inline bool FileUtils::Exists(const std::string& fn)
-{
-    std::ifstream stream(fn);
-    return !stream.fail();
-}
+{ return FileUtils::Exists(fn.c_str()); }
 
-inline time_t FileUtils::LastModified(const char* fn)
-{
-    struct stat s;
-    if (stat(fn, &s) != 0)
-        throw std::runtime_error("could not get file timestamp");
-
-#ifdef __DARWIN_64_BIT_INO_T
-    return s.st_mtimespec.tv_sec; // 64-bit OSX has a modified stat struct
-#else
-    return s.st_mtime;            // all others?
-#endif
-}
-
-inline time_t FileUtils::LastModified(const std::string& fn)
-{ return LastModified(fn.c_str()); }
-
-inline off_t FileUtils::Size(const char* fn)
-{
-    struct stat s;
-    if (stat(fn, &s) != 0)
-        throw std::runtime_error("could not determine file size");
-    return s.st_size;
-}
+inline std::chrono::system_clock::time_point FileUtils::LastModified(const std::string& fn)
+{ return FileUtils::LastModified(fn.c_str()); }
 
 inline off_t FileUtils::Size(const std::string& fn)
-{ return Size(fn.c_str()); }
-
-//inline std::string FilenameExtension(const std::string& fn)
-//{
-//    const size_t lastDot = fn.find_last_of(".");
-//    return (lastDot != std::string::npos ? fn.substr(lastDot+1) : std::string());
-//}
-
-////
-//// -- examples --
-////
-//// input: /path/to/file.ext      result: file.ext
-//// input: /path/to/file.ext.zip  result: file.ext.zip
-//// input: file.ext               result: file.ext
-////
-//inline std::string FilenameFromPath(const std::string& fullPath)
-//{
-//    struct MatchesPathSeparator {
-//        bool operator()(char c) const { return c == '/'; }
-//    };
-
-//    const auto lastSeparator = std::find_if(fullPath.rbegin(),
-//                                            fullPath.rend(),
-//                                            MatchesPathSeparator()).base();
-//    return std::string(lastSeparator,fullPath.end());
-//}
+{ return FileUtils::Size(fn.c_str()); }
 
 } // namespace internal
 } // namespace BAM
diff --git a/src/Frames.cpp b/src/Frames.cpp
index 36e49d9..323d02e 100644
--- a/src/Frames.cpp
+++ b/src/Frames.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Frames.cpp
+/// \brief Implements the Frames class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/Frames.h"
@@ -59,8 +63,6 @@ void InitIpdDownsampling(void)
 
     // liftover from Dave's python code:
     // .../bioinformatics/tools/kineticsTools/kineticsTools/_downsampling.py
-    //
-    // TODO: move this conversion functionality to pbbam
 
     const int B = 2;
     const int t = 6;
@@ -173,7 +175,7 @@ Frames& Frames::operator=(Frames&& other)
 { data_ = std::move(other.data_); return *this; }
 
 Frames Frames::Decode(const std::vector<uint8_t>& codedData)
-{  return Frames(std::move(internal::CodeToFrames(codedData))); }
+{ return Frames(std::move(internal::CodeToFrames(codedData))); }
 
 std::vector<uint8_t> Frames::Encode(const std::vector<uint16_t>& frames)
 { return internal::FramesToCode(frames); }
diff --git a/src/GenomicInterval.cpp b/src/GenomicInterval.cpp
index 7cf0b73..10ebc23 100644
--- a/src/GenomicInterval.cpp
+++ b/src/GenomicInterval.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicInterval.cpp
+/// \brief Implements the GenomicInterval class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/GenomicInterval.h"
@@ -41,6 +45,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <ctype.h>
+#include <stdexcept>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
diff --git a/src/GenomicIntervalQuery.cpp b/src/GenomicIntervalQuery.cpp
index 087c626..b6ead9f 100644
--- a/src/GenomicIntervalQuery.cpp
+++ b/src/GenomicIntervalQuery.cpp
@@ -32,109 +32,42 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicIntervalQuery.cpp
+/// \brief Implements the GenomicIntervalQuery class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/GenomicIntervalQuery.h"
-#include "pbbam/BamFile.h"
-#include "pbbam/internal/BamRecordSort.h"
-#include "pbbam/internal/MergeStrategy.h"
-#include "AssertUtils.h"
-#include "MemoryUtils.h"
-#include <cassert>
+#include "pbbam/CompositeBamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
-class GenomicIntervalIterator : public internal::IBamFileIterator
+struct GenomicIntervalQuery::GenomicIntervalQueryPrivate
 {
-public:
-    GenomicIntervalIterator(const GenomicInterval& interval,
-                            const BamFile& bamFile)
-        : internal::IBamFileIterator(bamFile)
-        , interval_(interval)
-    {
-        // open file
-        htsFile_.reset(sam_open(bamFile.Filename().c_str(), "rb"));
-        if (!htsFile_)
-            throw std::runtime_error("could not open BAM file for reading");
-
-        // load header info
-        htsHeader_.reset(sam_hdr_read(htsFile_.get()));
-        if (!htsHeader_)
-            throw std::runtime_error("could not read BAM header data");
-
-        // open index
-        htsIndex_.reset(bam_index_load(bamFile.Filename().c_str()));
-        if (!htsIndex_)
-            throw std::runtime_error("could not load BAI index data");
-
-        // initialize iterator
-        if (bamFile.Header().HasSequence(interval_.Name())) {
-            const int id = bamFile.ReferenceId(interval_.Name());
-            if (id >= 0 && id < htsHeader_->n_targets) {
-                htsIterator_.reset(sam_itr_queryi(htsIndex_.get(),
-                                                  id,
-                                                  interval.Start(),
-                                                  interval.Stop()));
-            }
-        }
-        if (!htsIterator_)
-            throw std::runtime_error("could not create iterator for requested region");
-    }
-
-public:
-    bool GetNext(BamRecord& record) {
+    GenomicIntervalQueryPrivate(const GenomicInterval& interval,
+                                const DataSet& dataset)
+        : reader_(interval, dataset)
+    { }
 
-        assert(htsFile_);
-        assert(htsIterator_);
-        const int result = sam_itr_next(htsFile_.get(),
-                                        htsIterator_.get(),
-                                        internal::BamRecordMemory::GetRawData(record).get());
-        record.header_ = header_;
-
-        // success
-        if (result >= 0)
-            return true;
-
-        // normal EOF
-        else if (result == -1)
-            return false;
-
-        // error (truncated file, etc)
-        else
-            throw std::runtime_error("corrupted file, may be truncated");
-    }
-
-private:
-    GenomicInterval interval_;
-    unique_ptr<samFile,   internal::HtslibFileDeleter>     htsFile_;
-    unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter>   htsHeader_;
-    unique_ptr<hts_idx_t, internal::HtslibIndexDeleter>    htsIndex_;
-    unique_ptr<hts_itr_t, internal::HtslibIteratorDeleter> htsIterator_;
+    GenomicIntervalCompositeBamReader reader_;
 };
 
 GenomicIntervalQuery::GenomicIntervalQuery(const GenomicInterval& interval,
-                                           const DataSet& dataset)
-    : internal::IQuery(dataset)
-    , interval_(interval)
-{
-    Interval(interval_);
-}
+                                           const DataSet &dataset)
+    : internal::IQuery()
+    , d_(new GenomicIntervalQueryPrivate(interval, dataset))
+{ }
+
+GenomicIntervalQuery::~GenomicIntervalQuery(void) { }
 
-GenomicIntervalQuery::FileIterPtr GenomicIntervalQuery::CreateIterator(const BamFile& bamFile)
-{ return FileIterPtr(new GenomicIntervalIterator(interval_, bamFile)); }
+bool GenomicIntervalQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
 
 GenomicIntervalQuery& GenomicIntervalQuery::Interval(const GenomicInterval& interval)
-{
-    interval_ = interval;
-    // check files
-    // if SO all coordinate
-    // else if SO all queryname
-    // else SO unsorted/unknown
-    mergeStrategy_.reset(new internal::MergeStrategy<internal::ByPosition>(CreateIterators()));
-    return *this;
-}
+{ d_->reader_.Interval(interval); return *this; }
 
-GenomicInterval GenomicIntervalQuery::Interval(void) const
-{ return interval_; }
+const GenomicInterval& GenomicIntervalQuery::Interval(void) const
+{ return d_->reader_.Interval(); }
diff --git a/src/GroupQuery.cpp b/src/GroupQuery.cpp
deleted file mode 100644
index 4b72b4f..0000000
--- a/src/GroupQuery.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Yuan Li
-
-#include "pbbam/GroupQuery.h"
-#include "MemoryUtils.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace std;
-
-SequentialGroupQueryBase::SequentialGroupQueryBase(const BamFile & file) 
-    : GroupQueryBase(file)
-    , htsFile_(nullptr)
-    , htsHeader_(nullptr)
-    , nextRecord_()
-{
-    htsFile_.reset(sam_open(file.Filename().c_str(), "rb"), internal::HtslibFileDeleter());
-    if (!htsFile_) 
-        throw std::runtime_error("could not open BAM file for reading");
-
-    htsHeader_.reset(sam_hdr_read(htsFile_.get()), internal::HtslibHeaderDeleter());
-    if (!htsHeader_) 
-        throw std::runtime_error("could not read BAM header data");
-}
-
-bool SequentialGroupQueryBase::GetNext(vector<BamRecord> & records) 
-{
-    records.clear();
-
-    if (nextRecord_.Impl().Name() != "") {
-        records.push_back(nextRecord_);
-        nextRecord_ = BamRecord();
-    }
-
-    while(true) {
-        BamRecord record(file_.Header());
-        const int result = sam_read1(htsFile_.get(),
-                                     htsHeader_.get(),
-                                     internal::BamRecordMemory::GetRawData(record).get());
-        if (result >= 0) { // get next record
-            if (records.size() == 0) {
-                records.push_back(record); // add the first record
-            } else {
-                if (InSameGroup(record, records[0])) {
-                    records.push_back(record); // add remaining record
-                } else {
-                    nextRecord_ = record; // store record from another zmw
-                    return true;
-                }
-            }
-        } else { // unable to get next record
-            if (records.size() > 0) return true; // Has records to return
-            else return false; // Has no records to return
-        }
-    }
-    assert(false); // Should not reach here.
-    return false;
-}
diff --git a/src/IndexedFastaReader.cpp b/src/IndexedFastaReader.cpp
index fffe44b..715dd03 100644
--- a/src/IndexedFastaReader.cpp
+++ b/src/IndexedFastaReader.cpp
@@ -32,29 +32,47 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file IndexedFastaReader.cpp
+/// \brief Implements the IndexedFastaReader class.
+//
 // Author: David Alexander
 
-#include "htslib/faidx.h"
+#include "pbbam/IndexedFastaReader.h"
+
 #include "pbbam/BamRecord.h"
 #include "pbbam/GenomicInterval.h"
-#include "pbbam/IndexedFastaReader.h"
 #include "pbbam/Orientation.h"
 #include "SequenceUtils.h"
-
-#include <cstdlib>
+#include <htslib/faidx.h>
 #include <iostream>
+#include <cstdlib>
 
 namespace PacBio {
 namespace BAM {
 
-
 IndexedFastaReader::IndexedFastaReader(const std::string& filename)
 {
     Open(filename);
 }
 
-IndexedFastaReader::~IndexedFastaReader()
+IndexedFastaReader::IndexedFastaReader(const IndexedFastaReader& src)
+{
+    if (!Open(src.filename_))
+        throw std::runtime_error("Cannot open file " + src.filename_);
+}
+
+IndexedFastaReader& IndexedFastaReader::operator=(const IndexedFastaReader& rhs)
+{
+    if (&rhs == this)
+        return *this;
+
+    Open(rhs.filename_);
+    return *this;
+}
+
+IndexedFastaReader::~IndexedFastaReader(void)
 {
     Close();
 }
@@ -72,7 +90,7 @@ bool IndexedFastaReader::Open(const std::string &filename)
     }
 }
 
-void IndexedFastaReader::Close()
+void IndexedFastaReader::Close(void)
 {
     filename_ = "";
     if (handle_ != nullptr)
@@ -194,7 +212,7 @@ IndexedFastaReader::ReferenceSubsequence(const BamRecord& bamRecord,
 }
 
 
-int IndexedFastaReader::NumSequences() const
+int IndexedFastaReader::NumSequences(void) const
 {
     REQUIRE_FAIDX_LOADED;
     return faidx_nseq(handle_);
@@ -215,5 +233,4 @@ int IndexedFastaReader::SequenceLength(const std::string& name) const
     else return len;
 }
 
-
 }}  // PacBio::BAM
diff --git a/src/MemoryUtils.h b/src/MemoryUtils.h
index dc4be84..c22f9f5 100644
--- a/src/MemoryUtils.h
+++ b/src/MemoryUtils.h
@@ -132,6 +132,9 @@ public:
     static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecord* r);
     static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecordImpl& impl);
     static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecordImpl* impl);
+
+    static void UpdateRecordTags(const BamRecord& r);
+    static void UpdateRecordTags(const BamRecordImpl& r);
 };
 
 inline const BamRecordImpl& BamRecordMemory::GetImpl(const BamRecord& r)
@@ -152,6 +155,12 @@ inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecordImpl&
 inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecordImpl* impl)
 { return impl->d_; }
 
+inline void BamRecordMemory::UpdateRecordTags(const BamRecord& r)
+{ UpdateRecordTags(r.impl_); }
+
+inline void BamRecordMemory::UpdateRecordTags(const BamRecordImpl& r)
+{ r.UpdateTagMap(); }
+
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
diff --git a/src/PbiBuilder.cpp b/src/PbiBuilder.cpp
index 65c87bd..bb172dc 100644
--- a/src/PbiBuilder.cpp
+++ b/src/PbiBuilder.cpp
@@ -33,6 +33,10 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiBuilder.cpp
+/// \brief Implements the PbiBuilder class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/PbiBuilder.h"
@@ -41,6 +45,7 @@
 #include "MemoryUtils.h"
 #include "PbiIndexIO.h"
 #include <htslib/bgzf.h>
+#include <thread>
 #include <cassert>
 using namespace PacBio;
 using namespace PacBio::BAM;
@@ -75,10 +80,15 @@ PbiRawReferenceDataBuilder::PbiRawReferenceDataBuilder(const size_t numReference
     : lastRefId_(-1)
     , lastPos_(-1)
 {
-//    const BamHeader& header = bam.Header();
-//    const size_t numReferences = header.Sequences().size();
+    // initialize with number of references we expect to see
+    //
+    // we can add more later, but want to ensure known references have an entry
+    // even if no records are observed mapping to it
+    //
     for (size_t i = 0; i < numReferenceSequences; ++i)
         rawReferenceEntries_[i] = PbiReferenceEntry(i);
+
+    // also create an "unmapped" entry
     rawReferenceEntries_[PbiReferenceEntry::UNMAPPED_ID] = PbiReferenceEntry();
 }
 
@@ -147,54 +157,112 @@ PbiRawReferenceData PbiRawReferenceDataBuilder::Result(void) const {
 class PbiBuilderPrivate
 {
 public:
-    PbiBuilderPrivate(const string& filename, const size_t numReferenceSequences);
+    PbiBuilderPrivate(const string& filename,
+                      const size_t numReferenceSequences,
+                      const PbiBuilder::CompressionLevel compressionLevel,
+                      const size_t numThreads);
+    PbiBuilderPrivate(const string& filename,
+                      const size_t numReferenceSequences,
+                      const bool isCoordinateSorted,
+                      const PbiBuilder::CompressionLevel compressionLevel,
+                      const size_t numThreads);
     ~PbiBuilderPrivate(void);
 
 public:
     void AddRecord(const BamRecord& record, const int64_t vOffset);
 
 public:
+    bool HasBarcodeData(void) const;
+    bool HasMappedData(void) const;
+    bool HasReferenceData(void) const;
+
+public:
     unique_ptr<BGZF, HtslibBgzfDeleter> bgzf_;
     PbiRawData rawData_;
     PbiReferenceEntry::Row currentRow_;
-    bool hasMappedData_;
-    bool hasBarcodeData_;
-    bool hasReferenceData_;
-    unique_ptr<PbiRawReferenceDataBuilder> refDataBuilder_;
+    unique_ptr<PbiRawReferenceDataBuilder> refDataBuilder_;    
 };
 
-PbiBuilderPrivate::PbiBuilderPrivate(const string& filename, const size_t numReferenceSequences)
-    : bgzf_(bgzf_open(filename.c_str(), "wb"))
+PbiBuilderPrivate::PbiBuilderPrivate(const string& filename,
+                                     const size_t numReferenceSequences,
+                                     const PbiBuilder::CompressionLevel compressionLevel,
+                                     const size_t numThreads)
+    : bgzf_(nullptr)
     , currentRow_(0)
-    , hasMappedData_(true)
-    , hasBarcodeData_(true)
-    , hasReferenceData_(true)
     , refDataBuilder_(nullptr)
 {
-    if (bgzf_.get()== 0)
+    const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+    bgzf_.reset(bgzf_open(filename.c_str(), mode.c_str()));
+    if (bgzf_.get() == 0)
         throw std::runtime_error("could not open PBI file for writing");
 
+    size_t actualNumThreads = numThreads;
+    if (actualNumThreads == 0) {
+        actualNumThreads = thread::hardware_concurrency();
+
+        // if still unknown, default to single-threaded
+        if (actualNumThreads == 0)
+            actualNumThreads = 1;
+    }
+
+    // if multithreading requested, enable it
+    if (actualNumThreads > 1)
+        bgzf_mt(bgzf_.get(), actualNumThreads, 256);
+
     if (numReferenceSequences > 0)
         refDataBuilder_.reset(new PbiRawReferenceDataBuilder(numReferenceSequences));
-    else
-        hasReferenceData_ = false;
+}
+
+PbiBuilderPrivate::PbiBuilderPrivate(const string& filename,
+                                     const size_t numReferenceSequences,
+                                     const bool isCoordinateSorted,
+                                     const PbiBuilder::CompressionLevel compressionLevel,
+                                     const size_t numThreads)
+    : bgzf_(nullptr)
+    , currentRow_(0)
+    , refDataBuilder_(nullptr)
+{
+    const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+    bgzf_.reset(bgzf_open(filename.c_str(), mode.c_str()));
+    if (bgzf_.get() == 0)
+        throw std::runtime_error("could not open PBI file for writing");
+
+    size_t actualNumThreads = numThreads;
+    if (actualNumThreads == 0) {
+        actualNumThreads = thread::hardware_concurrency();
+
+        // if still unknown, default to single-threaded
+        if (actualNumThreads == 0)
+            actualNumThreads = 1;
+    }
+
+    // if multithreading requested, enable it
+    if (actualNumThreads > 1)
+        bgzf_mt(bgzf_.get(), actualNumThreads, 256);
+
+    if (isCoordinateSorted && numReferenceSequences > 0)
+        refDataBuilder_.reset(new PbiRawReferenceDataBuilder(numReferenceSequences));
 }
 
 PbiBuilderPrivate::~PbiBuilderPrivate(void)
 {
     rawData_.NumReads(currentRow_);
 
+    const auto hasBarcodeData   = HasBarcodeData();
+    const auto hasMappedData    = HasMappedData();
+    const auto hasReferenceData = HasReferenceData();
+
     // fetch reference data, if available
-    if (hasReferenceData_) {
+    if (hasReferenceData) {
         assert(refDataBuilder_);
         rawData_.ReferenceData() = std::move(refDataBuilder_->Result());
     }
 
     // determine flags
-    PbiFile::Sections sections = PbiFile::SUBREAD;
-    if (hasMappedData_)    sections |= PbiFile::MAPPED;
-    if (hasBarcodeData_)   sections |= PbiFile::BARCODE;
-    if (hasReferenceData_) sections |= PbiFile::REFERENCE;
+    PbiFile::Sections sections = PbiFile::BASIC;
+    if (hasMappedData)    sections |= PbiFile::MAPPED;
+    if (hasBarcodeData)   sections |= PbiFile::BARCODE;
+    if (hasReferenceData) sections |= PbiFile::REFERENCE;
     rawData_.FileSections(sections);
 
     // write index contents to file
@@ -202,34 +270,85 @@ PbiBuilderPrivate::~PbiBuilderPrivate(void)
     PbiIndexIO::WriteHeader(rawData_, fp);
     const uint32_t numReads = rawData_.NumReads();
     if (numReads > 0) {
-        PbiIndexIO::WriteSubreadData(rawData_.SubreadData(), numReads, fp);
-        if (rawData_.HasMappedData())
-            PbiIndexIO::WriteMappedData(rawData_.MappedData(), numReads, fp);
-        if (rawData_.HasReferenceData())
-            PbiIndexIO::WriteReferenceData(rawData_.ReferenceData(), fp);
-        if (rawData_.HasBarcodeData())
-            PbiIndexIO::WriteBarcodeData(rawData_.BarcodeData(), numReads, fp);
+        PbiIndexIO::WriteBasicData(rawData_.BasicData(), numReads, fp);
+        if (hasMappedData)    PbiIndexIO::WriteMappedData(rawData_.MappedData(), numReads, fp);
+        if (hasReferenceData) PbiIndexIO::WriteReferenceData(rawData_.ReferenceData(), fp);
+        if (hasBarcodeData)   PbiIndexIO::WriteBarcodeData(rawData_.BarcodeData(), numReads, fp);
     }
 }
 
 void PbiBuilderPrivate::AddRecord(const BamRecord& record, const int64_t vOffset)
 {
+    // ensure updated data
     record.ResetCachedPositions();
 
-    rawData_.SubreadData().AddRecord(record, vOffset);
-
-    if (hasMappedData_)
-        hasMappedData_ &= rawData_.MappedData().AddRecord(record);
+    // store data
+    rawData_.BarcodeData().AddRecord(record);
+    rawData_.BasicData().AddRecord(record, vOffset);
+    rawData_.MappedData().AddRecord(record);
 
-    if (hasReferenceData_)
-        hasBarcodeData_ &= rawData_.BarcodeData().AddRecord(record);
+    if (refDataBuilder_) {
 
-    if (hasReferenceData_)
-        hasReferenceData_ &= refDataBuilder_->AddRecord(record, currentRow_);
+        // stop storing coordinate-sorted reference data if we encounter out-of-order record
+        const bool sorted = refDataBuilder_->AddRecord(record, currentRow_);
+        if (!sorted)
+            refDataBuilder_.reset();
+    }
 
+    // increment row counter
     ++currentRow_;
 }
 
+bool PbiBuilderPrivate::HasBarcodeData(void) const
+{
+    // fetch data components
+    const auto& barcodeData = rawData_.BarcodeData();
+    const auto& bcForward   = barcodeData.bcForward_;
+    const auto& bcReverse   = barcodeData.bcReverse_;
+    const auto& bcQuality   = barcodeData.bcQual_;
+
+    // ensure valid sizes
+    if (bcForward.size() != bcReverse.size() &&
+        bcForward.size() != bcQuality.size())
+    {
+        auto msg = string{ "error: inconsistency in PBI barcode data:\n" };
+        msg +=     string{ "  bcForward has " } + to_string(bcForward.size()) + string{ " elements\n" };
+        msg +=     string{ "  bcReverse has " } + to_string(bcReverse.size()) + string{ " elements\n" };
+        msg +=     string{ "  bcQuality has " } + to_string(bcQuality.size()) + string{ " elements\n" };
+        msg +=     string{ "\n" };
+        msg +=     string{ "  these containers should contain equal number of elements.\n" };
+        throw std::runtime_error(msg);
+    }
+    assert(bcForward.size() == rawData_.NumReads());
+
+    // check for data
+    for (uint32_t i = 0; i < rawData_.NumReads(); ++i) {
+        if (bcForward.at(i) != -1 ||
+            bcReverse.at(i)  != -1 ||
+            bcQuality.at(i)  != -1 )
+        {
+            return true;
+        }
+    }
+    // no actual data found
+    return false;
+}
+
+bool PbiBuilderPrivate::HasMappedData(void) const
+{
+    const auto& mappedData = rawData_.MappedData();
+    const auto& tIds = mappedData.tId_;
+    assert(tIds.size() == rawData_.NumReads());
+    for (const auto tId : tIds) {
+        if (tId >= 0)
+            return true;
+    }
+    return false; // all reads unmapped
+}
+
+bool PbiBuilderPrivate::HasReferenceData(void) const
+{ return bool(refDataBuilder_); }
+
 } // namespace internal
 } // namespace BAM
 } // namespace PacBio
@@ -238,18 +357,44 @@ void PbiBuilderPrivate::AddRecord(const BamRecord& record, const int64_t vOffset
 // PbiBuilder implementation
 // ---------------------------
 
-PbiBuilder::PbiBuilder(const string& pbiFilename)
-    : d_(new internal::PbiBuilderPrivate(pbiFilename, 0))
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+                       const CompressionLevel compressionLevel,
+                       const size_t numThreads)
+    : d_(new internal::PbiBuilderPrivate(pbiFilename,
+                                         0,
+                                         compressionLevel,
+                                         numThreads))
 { }
 
-PbiBuilder::PbiBuilder(const string& pbiFilename, const size_t numReferenceSequences)
-    : d_(new internal::PbiBuilderPrivate(pbiFilename, numReferenceSequences))
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+                       const size_t numReferenceSequences,
+                       const CompressionLevel compressionLevel,
+                       const size_t numThreads)
+    : d_(new internal::PbiBuilderPrivate(pbiFilename,
+                                         numReferenceSequences,
+                                         compressionLevel,
+                                         numThreads))
+{ }
+
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+                       const size_t numReferenceSequences,
+                       const bool isCoordinateSorted,
+                       const CompressionLevel compressionLevel,
+                       const size_t numThreads)
+    : d_(new internal::PbiBuilderPrivate(pbiFilename,
+                                         numReferenceSequences,
+                                         isCoordinateSorted,
+                                         compressionLevel,
+                                         numThreads))
 { }
 
 PbiBuilder::~PbiBuilder(void) { }
 
 void PbiBuilder::AddRecord(const BamRecord& record, const int64_t vOffset)
-{ d_->AddRecord(record, vOffset);  }
+{
+    internal::BamRecordMemory::UpdateRecordTags(record);
+    d_->AddRecord(record, vOffset);
+}
 
 const PbiRawData& PbiBuilder::Index(void) const
 { return d_->rawData_; }
diff --git a/src/PbiFile.cpp b/src/PbiFile.cpp
index 0318e42..144c847 100644
--- a/src/PbiFile.cpp
+++ b/src/PbiFile.cpp
@@ -33,15 +33,16 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiFile.cpp
+/// \brief Implements the PbiFile methods.
+//
 // Author: Derek Barnett
 
 #include "pbbam/PbiFile.h"
 #include "pbbam/BamFile.h"
 #include "pbbam/PbiBuilder.h"
-#include "MemoryUtils.h"
-#include <htslib/sam.h>
-#include <cassert>
-
+#include "pbbam/BamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::PbiFile;
@@ -51,34 +52,20 @@ namespace PacBio {
 namespace BAM {
 namespace PbiFile {
 
-void CreateFrom(const BamFile& bamFile)
+void CreateFrom(const BamFile& bamFile,
+                const PbiBuilder::CompressionLevel compressionLevel,
+                const size_t numThreads)
 {
-    // open input file for file handle & header
-    unique_ptr<samFile,internal::HtslibFileDeleter> htsFile(sam_open(bamFile.Filename().c_str(), "rb"));
-    if (!htsFile)
-        throw std::runtime_error("could not open BAM file for reading");
-
-    unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader(sam_hdr_read(htsFile.get()));
-    if (!htsHeader)
-        throw std::runtime_error("could not read BAM header data");
-
-    samFile*   fp  = htsFile.get();
-    bam_hdr_t* hdr = htsHeader.get();
-    assert(fp);
-    assert(hdr);
-
-    // setup our record object
-    BamRecord record;
-    bam1_t* b = internal::BamRecordMemory::GetRawData(record).get();
-    if (b == 0)
-        throw std::runtime_error("could not allocate BAM record");
-
-    // iterate through file, building index data
-    PbiBuilder builder(bamFile.PacBioIndexFilename(), bamFile.Header().Sequences().size());
-    int64_t offset = bgzf_tell(fp->fp.bgzf);
-    while (sam_read1(fp, hdr, b) >= 0) {
-        builder.AddRecord(record, offset);
-        offset = bgzf_tell(fp->fp.bgzf);
+    PbiBuilder builder(bamFile.PacBioIndexFilename(),
+                       bamFile.Header().Sequences().size(),
+                       compressionLevel,
+                       numThreads);
+    BamReader reader(bamFile);
+    BamRecord b;
+    int64_t offset = reader.VirtualTell();
+    while (reader.GetNext(b)) {
+        builder.AddRecord(b, offset);
+        offset = reader.VirtualTell();
     }
 }
 
diff --git a/src/PbiFilter.cpp b/src/PbiFilter.cpp
new file mode 100644
index 0000000..5c738bf
--- /dev/null
+++ b/src/PbiFilter.cpp
@@ -0,0 +1,249 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.cpp
+/// \brief Implements the PbiFilter class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/trim.hpp>
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <cctype>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+enum class BuiltIn
+{
+    AlignedEndFilter
+  , AlignedLengthFilter
+  , AlignedStartFilter
+  , AlignedStrandFilter
+  , BarcodeFilter
+  , BarcodeForwardFilter
+  , BarcodeQualityFilter
+  , BarcodeReverseFilter
+  , BarcodesFilter
+  , IdentityFilter
+  , LocalContextFilter
+  , MovieNameFilter
+  , NumDeletedBasesFilter
+  , NumInsertedBasesFilter
+  , NumMatchesFilter
+  , NumMismatchesFilter
+  , QueryEndFilter
+  , QueryLengthFilter
+  , QueryNameFilter
+  , QueryStartFilter
+  , ReadAccuracyFilter
+  , ReadGroupFilter
+  , ReferenceEndFilter
+  , ReferenceIdFilter
+  , ReferenceNameFilter
+  , ReferenceStartFilter
+  , ZmwFilter
+};
+
+static const unordered_map<string, BuiltIn> builtInLookup =
+{
+    // property name   built-in filter
+    { "ae",            BuiltIn::AlignedEndFilter },
+    { "aend",          BuiltIn::AlignedEndFilter },
+    { "alignedlength", BuiltIn::AlignedLengthFilter },
+    { "as",            BuiltIn::AlignedStartFilter },
+    { "astart",        BuiltIn::AlignedStartFilter },
+    { "readstart",     BuiltIn::AlignedStartFilter },
+    { "bc",            BuiltIn::BarcodeFilter },
+    { "barcode",       BuiltIn::BarcodeFilter },
+    { "accuracy",      BuiltIn::IdentityFilter },
+    { "identity",      BuiltIn::IdentityFilter },
+    { "cx",            BuiltIn::LocalContextFilter },
+    { "movie",         BuiltIn::MovieNameFilter },
+    { "qe",            BuiltIn::QueryEndFilter },
+    { "qend",          BuiltIn::QueryEndFilter },
+    { "length",        BuiltIn::QueryLengthFilter },
+    { "querylength",   BuiltIn::QueryLengthFilter },
+    { "qname",         BuiltIn::QueryNameFilter },
+    { "qs",            BuiltIn::QueryStartFilter },
+    { "qstart",        BuiltIn::QueryStartFilter },
+    { "rq",            BuiltIn::ReadAccuracyFilter },
+    { "te",            BuiltIn::ReferenceEndFilter },
+    { "tend",          BuiltIn::ReferenceEndFilter },
+    { "rname",         BuiltIn::ReferenceNameFilter },
+    { "ts",            BuiltIn::ReferenceStartFilter },
+    { "tstart",        BuiltIn::ReferenceStartFilter },
+    { "pos",           BuiltIn::ReferenceStartFilter },
+    { "zm",            BuiltIn::ZmwFilter },
+    { "zmw",           BuiltIn::ZmwFilter }
+};
+
+static const unordered_map<string, LocalContextFlags> contextFlagNames =
+{
+    { "NO_LOCAL_CONTEXT", LocalContextFlags::NO_LOCAL_CONTEXT },
+    { "ADAPTER_BEFORE",   LocalContextFlags::ADAPTER_BEFORE },
+    { "ADAPTER_AFTER",    LocalContextFlags::ADAPTER_AFTER },
+    { "BARCODE_BEFORE",   LocalContextFlags::BARCODE_BEFORE },
+    { "BARCODE_AFTER",    LocalContextFlags::BARCODE_AFTER },
+    { "FORWARD_PASS",     LocalContextFlags::FORWARD_PASS },
+    { "REVERSE_PASS",     LocalContextFlags::REVERSE_PASS }
+};
+
+static
+PbiFilter CreateLocalContextFilter(const std::string& value,
+                                        const Compare::Type compareType)
+{
+    if (value.empty())
+        throw std::runtime_error("empty value for local context filter property");
+
+    LocalContextFlags filterValue = LocalContextFlags::NO_LOCAL_CONTEXT;
+
+    // if raw integer
+    if (isdigit(value.at(0)))
+        filterValue = static_cast<LocalContextFlags>(stoi(value));
+
+    // else interpret as flag names
+    else {
+        vector<string> tokens = std::move(internal::Split(value, '|'));
+        for (string& token : tokens) {
+            boost::algorithm::trim(token); // trim whitespace
+            filterValue = (filterValue | contextFlagNames.at(token));
+        }
+    }
+
+    return PbiFilter{ PbiLocalContextFilter{filterValue, compareType} };
+}
+
+static
+PbiFilter FromDataSetProperty(const Property& property)
+{
+    try {
+        const string& value = property.Value();
+        const Compare::Type compareType = Compare::TypeFromOperator(property.Operator());
+        const BuiltIn builtInCode = builtInLookup.at(boost::algorithm::to_lower_copy(property.Name()));
+        switch (builtInCode) {
+            case BuiltIn::AlignedEndFilter     : return PbiAlignedEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+            case BuiltIn::AlignedLengthFilter  : return PbiAlignedLengthFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+            case BuiltIn::AlignedStartFilter   : return PbiAlignedStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+            case BuiltIn::BarcodeFilter        : return PbiBarcodeFilter{ static_cast<uint16_t>(stoul(value)), compareType };
+            case BuiltIn::IdentityFilter       : return PbiIdentityFilter{ stof(value), compareType };
+            case BuiltIn::MovieNameFilter      : return PbiMovieNameFilter{ value };
+            case BuiltIn::QueryEndFilter       : return PbiQueryEndFilter{ stoi(value), compareType };
+            case BuiltIn::QueryLengthFilter    : return PbiQueryLengthFilter{ stoi(value), compareType };
+            case BuiltIn::QueryNameFilter      : return PbiQueryNameFilter{ value };
+            case BuiltIn::QueryStartFilter     : return PbiQueryStartFilter{ stoi(value), compareType };
+            case BuiltIn::ReadAccuracyFilter   : return PbiReadAccuracyFilter{ stof(value), compareType };
+            case BuiltIn::ReadGroupFilter      : return PbiReadGroupFilter{ value, compareType };
+            case BuiltIn::ReferenceEndFilter   : return PbiReferenceEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+            case BuiltIn::ReferenceIdFilter    : return PbiReferenceIdFilter{ stoi(value), compareType };
+            case BuiltIn::ReferenceNameFilter  : return PbiReferenceNameFilter{ value };
+            case BuiltIn::ReferenceStartFilter : return PbiReferenceStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+            case BuiltIn::ZmwFilter            : return PbiZmwFilter{ stoi(value), compareType };
+            case BuiltIn::LocalContextFilter   :
+            {
+                return CreateLocalContextFilter(value, compareType);
+            }
+            default :
+                throw std::exception();
+        }
+        // unreachable
+        return PbiFilter{ };
+
+    } catch (std::exception& e) {
+        stringstream s;
+        s << "error: could not create filter from XML Property element: " << endl
+          << "  Name:     " << property.Name()     << endl
+          << "  Value:    " << property.Value()    << endl
+          << "  Operator: " << property.Operator() << endl
+          << "  reason:   " << e.what() << endl;
+        throw std::runtime_error(s.str());
+    }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+PbiFilter PbiFilter::FromDataSet(const DataSet& dataset)
+{
+    auto datasetFilter = PbiFilter{ PbiFilter::UNION };
+    for (auto&& xmlFilter : dataset.Filters()) {
+        auto propertiesFilter = PbiFilter{ };
+        for (auto&& xmlProperty : xmlFilter.Properties())
+            propertiesFilter.Add(internal::FromDataSetProperty(xmlProperty));
+        datasetFilter.Add(propertiesFilter);
+    }
+    return datasetFilter;
+}
+
+PbiFilter PbiFilter::Intersection(const std::vector<PbiFilter>& filters)
+{
+    auto result = PbiFilter{ PbiFilter::INTERSECT };
+    result.Add(filters);
+    return result;
+}
+
+PbiFilter PbiFilter::Intersection(std::vector<PbiFilter>&& filters)
+{
+    auto result = PbiFilter{ PbiFilter::INTERSECT };
+    result.Add(std::move(filters));
+    return result;
+}
+
+PbiFilter PbiFilter::Union(const std::vector<PbiFilter>& filters)
+{
+    auto result = PbiFilter{ PbiFilter::UNION };
+    result.Add(filters);
+    return result;
+}
+
+PbiFilter PbiFilter::Union(std::vector<PbiFilter>&& filters)
+{
+    auto result = PbiFilter{ PbiFilter::UNION };
+    result.Add(std::move(filters));
+    return result;
+}
diff --git a/src/FilterEngine.cpp b/src/PbiFilterQuery.cpp
similarity index 72%
copy from src/FilterEngine.cpp
copy to src/PbiFilterQuery.cpp
index 1f47967..19d2b31 100644
--- a/src/FilterEngine.cpp
+++ b/src/PbiFilterQuery.cpp
@@ -32,45 +32,39 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file PbiFilterQuery.cpp
+/// \brief Implements the PbiFilterQuery class.
+//
 // Author: Derek Barnett
 
-#include "pbbam/internal/FilterEngine.h"
+#include "pbbam/PbiFilterQuery.h"
+#include "pbbam/CompositeBamReader.h"
+
+
+#include <iostream>
+
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-namespace PacBio {
-namespace BAM {
-namespace internal {
+struct PbiFilterQuery::PbiFilterQueryPrivate
+{
+    PbiFilterQueryPrivate(const PbiFilter& filter, const DataSet& dataset)
+        : reader_(filter, dataset)
+    { }
 
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+    PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
 
+PbiFilterQuery::PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset)
+    : internal::IQuery()
+    , d_(new PbiFilterQueryPrivate(filter, dataset))
+{ }
 
-FilterEngine::FilterEngine(void) { }
+PbiFilterQuery::~PbiFilterQuery(void) { }
 
-bool FilterEngine::Accepts(const BamRecord& r) const
-{
-//        foreach ( const FilterParameter& param, parameters_ ) {
-//            if (!param.Accepts(r))
-//                return false;
-//        }
-//        return true;
-    (void)r;
-    return true;
-}
-
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
-{
-    size_t i = 0;
-    while (i < r.size()) {
-        if (!Accepts(r.at(i)))
-            r.erase(r.begin() + i);
-        else
-            ++i;
-    }
-    return !r.empty();
-}
+bool PbiFilterQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/PbiFilterTypes.cpp b/src/PbiFilterTypes.cpp
new file mode 100644
index 0000000..e052c63
--- /dev/null
+++ b/src/PbiFilterTypes.cpp
@@ -0,0 +1,313 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.cpp
+/// \brief Implements the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterTypes.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <sstream>
+#include <string>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+template<typename T>
+IndexList readLengthHelper(const std::vector<T>& start,
+                           const std::vector<T>& end,
+                           const T& value,
+                           const Compare::Type cmp)
+{
+    assert(start.size() == end.size());
+
+    auto result = IndexList{ };
+    const auto numElements = start.size();
+    for (size_t i = 0; i < numElements; ++i) {
+        const auto readLength = end[i] - start[i];
+        bool keep = false;
+        switch(cmp) {
+            case Compare::EQUAL              : keep = (readLength == value); break;
+            case Compare::NOT_EQUAL          : keep = (readLength != value); break;
+            case Compare::LESS_THAN          : keep = (readLength < value); break;
+            case Compare::LESS_THAN_EQUAL    : keep = (readLength <= value); break;
+            case Compare::GREATER_THAN       : keep = (readLength > value); break;
+            case Compare::GREATER_THAN_EQUAL : keep = (readLength >= value); break;
+            default:
+                assert(false);
+                throw std::runtime_error(string{"read length filter encountered unknown Compare::Type: "} +
+                                         Compare::TypeToName(cmp));
+        }
+
+        if (keep)
+            result.push_back(i);
+    }
+    return result;
+}
+
+static
+PbiFilter filterFromMovieName(const string& movieName, bool includeCcs)
+{
+    // we'll match on any rgIds from our candidate list
+    auto filter = PbiFilter{ PbiFilter::UNION };
+    filter.Add(
+    {
+        PbiReadGroupFilter{ MakeReadGroupId(movieName, "POLYMERASE") },
+        PbiReadGroupFilter{ MakeReadGroupId(movieName, "HQREGION") },
+        PbiReadGroupFilter{ MakeReadGroupId(movieName, "SUBREAD") },
+        PbiReadGroupFilter{ MakeReadGroupId(movieName, "SCRAP") },
+        PbiReadGroupFilter{ MakeReadGroupId(movieName, "UNKNOWN") }
+    });
+    if (includeCcs)
+        filter.Add(PbiReadGroupFilter{ MakeReadGroupId(movieName, "CCS") });
+
+    return filter;
+}
+
+static
+PbiFilter filterFromQueryName(const string& queryName)
+{
+    // split full name into moviename, holenumber
+    const auto nameParts = internal::Split(queryName, '/');
+    if (nameParts.size() != 3) {
+        auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+        msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+        throw std::runtime_error(msg);
+    }
+
+    // main filter: {union of candidate rgIds} && zmw [&& qStart && qEnd](non-CCS reads)
+    auto filter = PbiFilter{ };
+    filter.Add(PbiZmwFilter{ stoi(nameParts.at(1)) }); // hole number
+
+    const auto movieName = nameParts.at(0);
+
+    // CCS (only 1 possible candidate rgId)
+    if (nameParts.at(2) == "ccs")
+        filter.Add(PbiReadGroupFilter{ MakeReadGroupId(movieName, "CCS") });
+
+    // all other read types
+    else {
+        // we'll match on any read type that matches our qname
+        // (except for CCS since it has a different QNAME anyway)
+        const auto rgIdFilter = filterFromMovieName(movieName, false);
+        filter.Add(rgIdFilter);
+
+        // add qStart/qEnd filters to our main filter
+        const auto queryIntervalParts = internal::Split(nameParts.at(2), '_');
+        if (queryIntervalParts.size() != 2) {
+            auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+            msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+            throw std::runtime_error(msg);
+        }
+        filter.Add(PbiQueryStartFilter{ stoi(queryIntervalParts.at(0)) });
+        filter.Add(PbiQueryEndFilter{ stoi(queryIntervalParts.at(1)) });
+    }
+    return filter;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// PbiAlignedLengthFilter
+
+bool PbiAlignedLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+    const auto& mappedData = idx.MappedData();
+    const auto& aEnd    = mappedData.aEnd_.at(row) ;
+    const auto& aStart  = mappedData.aStart_.at(row);
+    const auto aLength = aEnd - aStart;
+    return CompareHelper(aLength);
+}
+
+// PbiIdentityFilter
+
+bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+    const auto& mappedData = idx.MappedData();
+    const auto& nMM  = mappedData.nMM_.at(row);
+    const auto& nIndels = mappedData.NumDeletedAndInsertedBasesAt(row);
+    const auto& nDel = nIndels.first;
+    const auto& nIns = nIndels.second;
+
+    const auto& basicData = idx.BasicData();
+    const auto& qStart = basicData.qStart_.at(row);
+    const auto& qEnd   = basicData.qEnd_.at(row);
+
+    const auto readLength = qEnd - qStart;
+    const auto nonMatches = nMM + nDel + nIns;
+    const float identity  = 1.0 - (static_cast<float>(nonMatches)/static_cast<float>(readLength));
+
+    return CompareHelper(identity);
+}
+
+// PbiMovieNameFilter
+
+PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName)
+    : compositeFilter_(internal::filterFromMovieName(movieName, true)) // include CCS
+{ }
+
+PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist)
+    : compositeFilter_(PbiFilter::UNION)
+{
+    for (const auto& movieName : whitelist)
+        compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
+}
+
+PbiMovieNameFilter::PbiMovieNameFilter(std::vector<std::string>&& whitelist)
+    : compositeFilter_(PbiFilter::UNION)
+{
+    for (auto&& movieName : whitelist)
+        compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
+}
+
+// PbiQueryLengthFilter
+
+bool PbiQueryLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+    const auto& basicData = idx.BasicData();
+    const auto& qStart = basicData.qStart_.at(row);
+    const auto& qEnd   = basicData.qEnd_.at(row);
+    const auto readLength = qEnd - qStart;
+    return CompareHelper(readLength);
+}
+
+// PbiQueryNameFilter
+
+PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname)
+    : compositeFilter_(internal::filterFromQueryName(qname))
+{ }
+
+PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist)
+    : compositeFilter_(PbiFilter::UNION)
+{
+    try {
+        for (const auto& qname : whitelist)
+            compositeFilter_.Add(internal::filterFromQueryName(qname));
+    }
+    // simply re-throw our own exception
+    catch (std::runtime_error&) {
+        throw;
+    }
+    // we may hit other exceptions (e.g. in stoi()) - but we'll pin on a bit of extra data
+    catch (std::exception& e) {
+        auto msg = string{ "PbiQueryNameFilter encountered error: " } + e.what();
+        throw std::runtime_error(msg);
+    }
+}
+
+PbiQueryNameFilter::PbiQueryNameFilter(std::vector<std::string>&& whitelist)
+    : compositeFilter_(PbiFilter::UNION)
+{
+    try {
+        for (const auto& qname : whitelist)
+            compositeFilter_.Add(internal::filterFromQueryName(qname));
+    }
+    // simply re-throw our own exception
+    catch (std::runtime_error&) {
+        throw;
+    }
+    // we may hit other exceptions (e.g. in stoi()) - but we'll pin on a bit of extra data
+    catch (std::exception& e) {
+        auto msg = string{ "PbiQueryNameFilter encountered error: " } + e.what();
+        throw std::runtime_error(msg);
+    }
+}
+
+// PbiReferenceNameFilter
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(const std::string& rname,
+                                               const Compare::Type cmp)
+    : initialized_(false)
+    , rname_(rname)
+    , cmp_(cmp)
+{
+    if (cmp != Compare::EQUAL && cmp != Compare::NOT_EQUAL) {
+        auto msg = std::string{ "Compare type: " };
+        msg += Compare::TypeToName(cmp);
+        msg += " not supported for PbiReferenceNameFilter (use one of Compare::EQUAL or Compare::NOT_EQUAL).";
+        throw std::runtime_error(msg);
+    }
+}
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(const std::vector<std::string>& whitelist)
+    : initialized_(false)
+    , rnameWhitelist_(whitelist)
+    , cmp_(Compare::EQUAL)
+{ }
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string>&& whitelist)
+    : initialized_(false)
+    , rnameWhitelist_(std::move(whitelist))
+    , cmp_(Compare::EQUAL)
+{ }
+
+bool PbiReferenceNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+    if (!initialized_)
+        Initialize(idx);
+    return subFilter_.Accepts(idx, row);
+}
+
+void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const
+{
+    const auto pbiFilename = idx.Filename();
+    const auto bamFilename = pbiFilename.substr(0, pbiFilename.length() - 4);
+    const auto bamFile = BamFile{ bamFilename };
+
+    // single-value
+    if (rnameWhitelist_ == boost::none) {
+        const auto tId = bamFile.ReferenceId(rname_);
+        subFilter_ = PbiReferenceIdFilter{ tId, cmp_ };
+    }
+
+    // multi-value whitelist
+    else {
+        subFilter_ = PbiFilter(PbiFilter::UNION);
+        for (const auto& rname : rnameWhitelist_.get())
+            subFilter_.Add(PbiReferenceIdFilter{ bamFile.ReferenceId(rname) });
+    }
+    initialized_ = true;
+}
+
diff --git a/src/PbiIndex.cpp b/src/PbiIndex.cpp
index 2225a6c..3f54f7b 100644
--- a/src/PbiIndex.cpp
+++ b/src/PbiIndex.cpp
@@ -33,6 +33,10 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiIndex.cpp
+/// \brief Implements the PbiIndex class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/PbiIndex.h"
@@ -43,55 +47,21 @@ using namespace PacBio::BAM::internal;
 using namespace std;
 
 // ----------------------------------
-// IndexResultBlock implementation
-// ----------------------------------
-
-IndexResultBlock::IndexResultBlock(void)
-    : firstIndex_(0)
-    , numReads_(0)
-    , virtualOffset_(-1)
-{ }
-
-IndexResultBlock::IndexResultBlock(size_t idx, size_t numReads)
-    : firstIndex_(idx)
-    , numReads_(numReads)
-    , virtualOffset_(-1)
-{ }
-
-bool IndexResultBlock::operator==(const IndexResultBlock& other) const
-{
-    return firstIndex_ == other.firstIndex_ &&
-           numReads_ == other.numReads_ &&
-           virtualOffset_ == other.virtualOffset_;
-}
-
-bool IndexResultBlock::operator!=(const IndexResultBlock& other) const
-{ return !(*this == other); }
-
-// ----------------------------------
 // SubreadLookupData implementation
 // ----------------------------------
 
-SubreadLookupData::SubreadLookupData(void) { }
+BasicLookupData::BasicLookupData(void) { }
 
-SubreadLookupData::SubreadLookupData(const PbiRawSubreadData& rawData)
+BasicLookupData::BasicLookupData(const PbiRawBasicData& rawData)
     : rgId_(rawData.rgId_)
     , qStart_(rawData.qStart_)
     , qEnd_(rawData.qEnd_)
     , holeNumber_(rawData.holeNumber_)
     , readQual_(rawData.readQual_)
+    , ctxtFlag_(rawData.ctxtFlag_)
     , fileOffset_(rawData.fileOffset_)
 { }
 
-//SubreadLookupData::SubreadLookupData(PbiRawSubreadData&& rawData)
-//    : rgId_(std::move(rawData.rgId_))
-//    , qStart_(std::move(rawData.qStart_))
-//    , qEnd_(std::move(rawData.qEnd_))
-//    , holeNumber_(std::move(rawData.holeNumber_))
-//    , readQual_(std::move(rawData.readQual_))
-//    , fileOffset_(std::move(rawData.fileOffset_))
-//{ }
-
 // ----------------------------------
 // MappedLookupData implementation
 // ----------------------------------
@@ -116,17 +86,10 @@ MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
     std::map<uint32_t, IndexList> delRawData;
     for (size_t i = 0; i < numElements; ++i) {
 
-        // nIns, nDel
-        const uint32_t aStart = rawData.aStart_.at(i);
-        const uint32_t aEnd   = rawData.aEnd_.at(i);
-        const uint32_t tStart = rawData.tStart_.at(i);
-        const uint32_t tEnd   = rawData.tEnd_.at(i);
-        const uint32_t nM     = rawData.nM_.at(i);
-        const uint32_t nMM    = rawData.nMM_.at(i);
-        const uint32_t numIns = (aEnd - aStart - nM - nMM);
-        const uint32_t numDel = (tEnd - tStart - nM - nMM);
-        insRawData[numIns].push_back(i);
-        delRawData[numDel].push_back(i);
+        // nDel, nIns
+        const auto indels = rawData.NumDeletedAndInsertedBasesAt(i);
+        delRawData[indels.first].push_back(i);
+        insRawData[indels.second].push_back(i);
 
         // strand
         if (rawData.revStrand_.at(i) == 0)
@@ -139,27 +102,6 @@ MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
     nDel_ = OrderedLookup<uint32_t>(std::move(delRawData));
 }
 
-//MappedLookupData::MappedLookupData(PbiRawMappedData&& rawData)
-//    : tId_(std::move(rawData.tId_))
-//    , tStart_(std::move(rawData.tStart_))
-//    , tEnd_(std::move(rawData.tEnd_))
-//    , aStart_(std::move(rawData.aStart_))
-//    , aEnd_(std::move(rawData.aEnd_))
-//    , nM_(std::move(rawData.nM_))
-//    , nMM_(std::move(rawData.nMM_))
-//    , mapQV_(std::move(rawData.mapQV_))
-//{
-//    const size_t numElements = rawData.revStrand_.size();
-//    reverseStrand_.reserve(numElements/2);
-//    forwardStrand_.reserve(numElements/2);
-//    for (size_t i = 0; i < numElements; ++i) {
-//        if (rawData.revStrand_.at(i) == 0)
-//            forwardStrand_.push_back(i);
-//        else
-//            reverseStrand_.push_back(i);
-//    }
-//}
-
 // ----------------------------------
 // BarcodeLookupData implementation
 // ----------------------------------
@@ -167,18 +109,11 @@ MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
 BarcodeLookupData::BarcodeLookupData(void) { }
 
 BarcodeLookupData::BarcodeLookupData(const PbiRawBarcodeData& rawData)
-    : bcLeft_(rawData.bcLeft_)
-    , bcRight_(rawData.bcRight_)
+    : bcForward_(rawData.bcForward_)
+    , bcReverse_(rawData.bcReverse_)
     , bcQual_(rawData.bcQual_)
-    , ctxtFlag_(rawData.ctxtFlag_)
-{  }
 
-//BarcodeLookupData::BarcodeLookupData(PbiRawBarcodeData&& rawData)
-//    : bcLeft_(std::move(rawData.bcLeft_))
-//    , bcRight_(std::move(rawData.bcRight_))
-//    , bcQual_(std::move(rawData.bcQual_))
-//    , ctxtFlag_(std::move(rawData.ctxtFlag_))
-//{ }
+{  }
 
 // ----------------------------------
 // ReferenceLookupData implementation
@@ -196,41 +131,33 @@ ReferenceLookupData::ReferenceLookupData(const PbiRawReferenceData& rawData)
     }
 }
 
-//ReferenceLookupData::ReferenceLookupData(PbiRawReferenceData&& rawData)
-//{
-//    const size_t numEntries = rawData.entries_.size();
-//    references_.reserve(numEntries);
-//    for (size_t i = 0; i < numEntries; ++i) {
-//        const PbiReferenceEntry& entry = rawData.entries_.at(i);
-//        references_[entry.tId_] = IndexRange(entry.beginRow_, entry.endRow_);
-//    }
-//}
-
 // --------------------------------
 // PbiIndexPrivate implementation
 // --------------------------------
 
 PbiIndexPrivate::PbiIndexPrivate(void)
     : version_(PbiFile::CurrentVersion)
-    , sections_(PbiFile::SUBREAD)
+    , sections_(PbiFile::BASIC)
     , numReads_(0)
 { }
 
 PbiIndexPrivate::PbiIndexPrivate(const PbiRawData& rawIndex)
-    : version_(rawIndex.Version())
+    : filename_(rawIndex.Filename())
+    , version_(rawIndex.Version())
     , sections_(rawIndex.FileSections())
     , numReads_(rawIndex.NumReads())
-    , subreadData_(rawIndex.SubreadData())
+    , basicData_(rawIndex.BasicData())
     , mappedData_(rawIndex.MappedData())
     , referenceData_(rawIndex.ReferenceData())
     , barcodeData_(rawIndex.BarcodeData())
 { }
 
 PbiIndexPrivate::PbiIndexPrivate(PbiRawData&& rawIndex)
-    : version_(std::move(rawIndex.Version()))
+    : filename_(std::move(rawIndex.Filename()))
+    , version_(std::move(rawIndex.Version()))
     , sections_(std::move(rawIndex.FileSections()))
     , numReads_(std::move(rawIndex.NumReads()))
-    , subreadData_(std::move(rawIndex.SubreadData()))
+    , basicData_(std::move(rawIndex.BasicData()))
     , mappedData_(std::move(rawIndex.MappedData()))
     , referenceData_(std::move(rawIndex.ReferenceData()))
     , barcodeData_(std::move(rawIndex.BarcodeData()))
@@ -239,10 +166,11 @@ PbiIndexPrivate::PbiIndexPrivate(PbiRawData&& rawIndex)
 unique_ptr<PbiIndexPrivate> PbiIndexPrivate::DeepCopy(void) const
 {
     std::unique_ptr<PbiIndexPrivate> copy(new PbiIndexPrivate);
+    copy->filename_ = filename_;
     copy->version_  = version_;
     copy->sections_ = sections_;
     copy->numReads_ = numReads_;
-    copy->subreadData_   = subreadData_;
+    copy->basicData_     = basicData_;
     copy->mappedData_    = mappedData_;
     copy->referenceData_ = referenceData_;
     copy->barcodeData_   = barcodeData_;
@@ -286,26 +214,5 @@ PbiIndex& PbiIndex::operator=(PbiIndex&& other)
 
 PbiIndex::~PbiIndex(void) { }
 
-PbiFile::Sections PbiIndex::FileSections(void) const
-{ return d_->sections_; }
-
-bool PbiIndex::HasBarcodeData(void) const
-{ return d_->HasSection(PbiFile::BARCODE); }
-
-bool PbiIndex::HasMappedData(void) const
-{ return d_->HasSection(PbiFile::MAPPED); }
-
-bool PbiIndex::HasReferenceData(void) const
-{ return d_->HasSection(PbiFile::REFERENCE); }
-
-bool PbiIndex::HasSection(const PbiFile::Section section) const
-{ return d_->HasSection(section); }
-
-uint32_t PbiIndex::NumReads(void) const
-{ return d_->numReads_; }
-
-PbiFile::VersionEnum PbiIndex::Version(void) const
-{ return d_->version_; }
-
-const vector<int64_t>& PbiIndex::VirtualFileOffsets(void) const
-{ return d_->subreadData_.fileOffset_; }
+string PbiIndex::Filename(void) const
+{ return d_->filename_; }
diff --git a/src/PbiIndexIO.cpp b/src/PbiIndexIO.cpp
index 9d0d4e7..7b7733b 100644
--- a/src/PbiIndexIO.cpp
+++ b/src/PbiIndexIO.cpp
@@ -73,10 +73,13 @@ void PbiIndexIO::Load(PbiRawData& rawData,
     LoadHeader(rawData, fp);
     const uint32_t numReads = rawData.NumReads();
     if (numReads > 0) {
-        LoadSubreadData(rawData.SubreadData(), numReads, fp);
-        LoadMappedData(rawData.MappedData(), numReads, fp);
-        LoadReferenceData(rawData.ReferenceData(), fp);
-        LoadBarcodeData(rawData.BarcodeData(), numReads, fp);
+        LoadBasicData(rawData.BasicData(), numReads, fp);
+        if (rawData.HasMappedData())
+            LoadMappedData(rawData.MappedData(), numReads, fp);
+        if (rawData.HasReferenceData())
+            LoadReferenceData(rawData.ReferenceData(), fp);
+        if (rawData.HasBarcodeData())
+            LoadBarcodeData(rawData.BarcodeData(), numReads, fp);
     }
 }
 
@@ -85,16 +88,15 @@ void PbiIndexIO::LoadBarcodeData(PbiRawBarcodeData& barcodeData,
                                  BGZF* fp)
 {
     assert(numReads > 0);
+    (void)numReads; // quash warnings building in release mode
 
-    LoadBgzfVector(fp, barcodeData.bcLeft_,   numReads);
-    LoadBgzfVector(fp, barcodeData.bcRight_,  numReads);
-    LoadBgzfVector(fp, barcodeData.bcQual_,   numReads);
-    LoadBgzfVector(fp, barcodeData.ctxtFlag_, numReads);
+    LoadBgzfVector(fp, barcodeData.bcForward_, numReads);
+    LoadBgzfVector(fp, barcodeData.bcReverse_, numReads);
+    LoadBgzfVector(fp, barcodeData.bcQual_,    numReads);
 
-    assert(barcodeData.bcLeft_.size()   == numReads);
-    assert(barcodeData.bcRight_.size()  == numReads);
-    assert(barcodeData.bcQual_.size()   == numReads);
-    assert(barcodeData.ctxtFlag_.size() == numReads);
+    assert(barcodeData.bcForward_.size() == numReads);
+    assert(barcodeData.bcReverse_.size() == numReads);
+    assert(barcodeData.bcQual_.size()    == numReads);
 }
 
 void PbiIndexIO::LoadHeader(PbiRawData& index,
@@ -137,6 +139,7 @@ void PbiIndexIO::LoadMappedData(PbiRawMappedData& mappedData,
                                 BGZF* fp)
 {
     assert(numReads > 0);
+    (void)numReads; // quash warnings building in release mode
 
     LoadBgzfVector(fp, mappedData.tId_,       numReads);
     LoadBgzfVector(fp, mappedData.tStart_,    numReads);
@@ -187,25 +190,28 @@ void PbiIndexIO::LoadReferenceData(PbiRawReferenceData& referenceData,
     }
 }
 
-void PbiIndexIO::LoadSubreadData(PbiRawSubreadData& subreadData,
+void PbiIndexIO::LoadBasicData(PbiRawBasicData& basicData,
                                  const uint32_t numReads,
                                  BGZF* fp)
 {
     assert(numReads > 0);
-
-    LoadBgzfVector(fp, subreadData.rgId_,       numReads);
-    LoadBgzfVector(fp, subreadData.qStart_,     numReads);
-    LoadBgzfVector(fp, subreadData.qEnd_,       numReads);
-    LoadBgzfVector(fp, subreadData.holeNumber_, numReads);
-    LoadBgzfVector(fp, subreadData.readQual_,   numReads);
-    LoadBgzfVector(fp, subreadData.fileOffset_, numReads);
-
-    assert(subreadData.rgId_.size()       == numReads);
-    assert(subreadData.qStart_.size()     == numReads);
-    assert(subreadData.qEnd_.size()       == numReads);
-    assert(subreadData.holeNumber_.size() == numReads);
-    assert(subreadData.readQual_.size()   == numReads);
-    assert(subreadData.fileOffset_.size() == numReads);
+    (void)numReads; // quash warnings building in release mode
+
+    LoadBgzfVector(fp, basicData.rgId_,       numReads);
+    LoadBgzfVector(fp, basicData.qStart_,     numReads);
+    LoadBgzfVector(fp, basicData.qEnd_,       numReads);
+    LoadBgzfVector(fp, basicData.holeNumber_, numReads);
+    LoadBgzfVector(fp, basicData.readQual_,   numReads);
+    LoadBgzfVector(fp, basicData.ctxtFlag_,   numReads);
+    LoadBgzfVector(fp, basicData.fileOffset_, numReads);
+
+    assert(basicData.rgId_.size()       == numReads);
+    assert(basicData.qStart_.size()     == numReads);
+    assert(basicData.qEnd_.size()       == numReads);
+    assert(basicData.holeNumber_.size() == numReads);
+    assert(basicData.readQual_.size()   == numReads);
+    assert(basicData.ctxtFlag_.size()   == numReads);
+    assert(basicData.fileOffset_.size() == numReads);
 }
 
 void PbiIndexIO::Save(const PbiRawData& index,
@@ -219,7 +225,7 @@ void PbiIndexIO::Save(const PbiRawData& index,
     WriteHeader(index, fp);
     const uint32_t numReads = index.NumReads();
     if (numReads > 0) {
-        WriteSubreadData(index.SubreadData(), numReads, fp);
+        WriteBasicData(index.BasicData(), numReads, fp);
 
         if (index.HasMappedData())
             WriteMappedData(index.MappedData(), numReads, fp);
@@ -235,15 +241,14 @@ void PbiIndexIO::WriteBarcodeData(const PbiRawBarcodeData& barcodeData,
                                   BGZF* fp)
 {
     assert(numReads > 0);
-    assert(barcodeData.bcLeft_.size()   == numReads);
-    assert(barcodeData.bcRight_.size()  == numReads);
-    assert(barcodeData.bcQual_.size()   == numReads);
-    assert(barcodeData.ctxtFlag_.size() == numReads);
+    assert(barcodeData.bcForward_.size()   == numReads);
+    assert(barcodeData.bcReverse_.size()   == numReads);
+    assert(barcodeData.bcQual_.size()      == numReads);
+    (void)numReads; // quash warnings building in release mode
 
-    WriteBgzfVector(fp, barcodeData.bcLeft_);
-    WriteBgzfVector(fp, barcodeData.bcRight_);
+    WriteBgzfVector(fp, barcodeData.bcForward_);
+    WriteBgzfVector(fp, barcodeData.bcReverse_);
     WriteBgzfVector(fp, barcodeData.bcQual_);
-    WriteBgzfVector(fp, barcodeData.ctxtFlag_);
 }
 
 void PbiIndexIO::WriteHeader(const PbiRawData& index,
@@ -286,6 +291,7 @@ void PbiIndexIO::WriteMappedData(const PbiRawMappedData& mappedData,
     assert(mappedData.nM_.size()        == numReads);
     assert(mappedData.nMM_.size()       == numReads);
     assert(mappedData.mapQV_.size()     == numReads);
+    (void)numReads; // quash warnings building in release mode
 
     WriteBgzfVector(fp, mappedData.tId_);
     WriteBgzfVector(fp, mappedData.tStart_);
@@ -325,21 +331,24 @@ void PbiIndexIO::WriteReferenceData(const PbiRawReferenceData& referenceData,
     }
 }
 
-void PbiIndexIO::WriteSubreadData(const PbiRawSubreadData& subreadData,
-                                  const uint32_t numReads,
-                                  BGZF* fp)
+void PbiIndexIO::WriteBasicData(const PbiRawBasicData& basicData,
+                                const uint32_t numReads,
+                                BGZF* fp)
 {
-    assert(subreadData.rgId_.size()       == numReads);
-    assert(subreadData.qStart_.size()     == numReads);
-    assert(subreadData.qEnd_.size()       == numReads);
-    assert(subreadData.holeNumber_.size() == numReads);
-    assert(subreadData.readQual_.size()   == numReads);
-    assert(subreadData.fileOffset_.size() == numReads);
-
-    WriteBgzfVector(fp, subreadData.rgId_);
-    WriteBgzfVector(fp, subreadData.qStart_);
-    WriteBgzfVector(fp, subreadData.qEnd_);
-    WriteBgzfVector(fp, subreadData.holeNumber_);
-    WriteBgzfVector(fp, subreadData.readQual_);
-    WriteBgzfVector(fp, subreadData.fileOffset_);
+    assert(basicData.rgId_.size()       == numReads);
+    assert(basicData.qStart_.size()     == numReads);
+    assert(basicData.qEnd_.size()       == numReads);
+    assert(basicData.holeNumber_.size() == numReads);
+    assert(basicData.readQual_.size()   == numReads);
+    assert(basicData.ctxtFlag_.size()   == numReads);
+    assert(basicData.fileOffset_.size() == numReads);
+    (void)numReads; // quash warnings building in release mode
+
+    WriteBgzfVector(fp, basicData.rgId_);
+    WriteBgzfVector(fp, basicData.qStart_);
+    WriteBgzfVector(fp, basicData.qEnd_);
+    WriteBgzfVector(fp, basicData.holeNumber_);
+    WriteBgzfVector(fp, basicData.readQual_);
+    WriteBgzfVector(fp, basicData.ctxtFlag_);
+    WriteBgzfVector(fp, basicData.fileOffset_);
 }
diff --git a/src/PbiIndexIO.h b/src/PbiIndexIO.h
index 9ec001d..1285a68 100644
--- a/src/PbiIndexIO.h
+++ b/src/PbiIndexIO.h
@@ -74,9 +74,9 @@ public:
                                BGZF* fp);
     static void LoadReferenceData(PbiRawReferenceData& referenceData,
                                   BGZF* fp);
-    static void LoadSubreadData(PbiRawSubreadData& subreadData,
-                                const uint32_t numReads,
-                                BGZF* fp);
+    static void LoadBasicData(PbiRawBasicData& basicData,
+                              const uint32_t numReads,
+                              BGZF* fp);
 
     // per-data-field load
     template<typename T>
@@ -96,7 +96,7 @@ public:
                                 BGZF* fp);
     static void WriteReferenceData(const PbiRawReferenceData& referenceData,
                                    BGZF* fp);
-    static void WriteSubreadData(const PbiRawSubreadData& subreadData,
+    static void WriteBasicData(const PbiRawBasicData& subreadData,
                                  const uint32_t numReads,
                                  BGZF* fp);
 
diff --git a/src/PbiIndexedBamReader.cpp b/src/PbiIndexedBamReader.cpp
new file mode 100644
index 0000000..685d4c0
--- /dev/null
+++ b/src/PbiIndexedBamReader.cpp
@@ -0,0 +1,187 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndexedBamReader.cpp
+/// \brief Implements the PbiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiIndexedBamReader.h"
+#include <htslib/bgzf.h>
+
+#include <iostream>
+
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct PbiIndexedBamReaderPrivate
+{
+public:
+    PbiIndexedBamReaderPrivate(const string& pbiFilename)
+        : index_(pbiFilename)
+        , currentBlockReadCount_(0)
+    { }
+
+    void ApplyOffsets(void)
+    {
+        const std::vector<int64_t>& fileOffsets = index_.BasicData().fileOffset_;
+        for (IndexResultBlock& block : blocks_)
+            block.virtualOffset_ = fileOffsets.at(block.firstIndex_);
+    }
+
+    void Filter(const PbiFilter& filter)
+    {
+        // store request & reset counters
+        filter_ = filter;
+        currentBlockReadCount_ = 0;
+        blocks_.clear();
+
+        // find blocks of reads passing filter criteria
+        const uint32_t numReads = index_.NumReads();
+        if (filter_.IsEmpty()) {
+            blocks_.push_back(IndexResultBlock{0, numReads});
+        } else {
+            IndexList indices;
+            indices.reserve(numReads);
+            for (size_t i = 0; i < numReads; ++i) {
+                if (filter_.Accepts(index_, i))
+                    indices.push_back(i);
+            }
+            blocks_ = mergedIndexBlocks(std::move(indices));
+        }
+
+        // apply offsets
+        ApplyOffsets();
+    }
+
+    int ReadRawData(BGZF* bgzf, bam1_t* b)
+    {
+        // no data to fetch, return false
+        if (blocks_.empty())
+            return -1; // "EOF"
+
+        // if on new block, seek to its first record
+        if (currentBlockReadCount_ == 0) {
+            auto seekResult = bgzf_seek(bgzf, blocks_.at(0).virtualOffset_, SEEK_SET);
+            if (seekResult == -1)
+                throw std::runtime_error("could not seek in BAM file");
+        }
+
+        // read next record
+        auto result = bam_read1(bgzf, b);
+
+        // update counters. if block finished, pop & reset
+        ++currentBlockReadCount_;
+        if (currentBlockReadCount_ == blocks_.at(0).numReads_) {
+            blocks_.pop_front();
+            currentBlockReadCount_ = 0;
+        }
+
+        return result;
+    }
+
+public:
+    PbiFilter filter_;
+    PbiRawData index_;
+    IndexResultBlocks blocks_;
+    size_t currentBlockReadCount_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+                                         const std::string& filename)
+    : PbiIndexedBamReader(filter, BamFile(filename))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+                                         const BamFile& bamFile)
+    : PbiIndexedBamReader(bamFile)
+{
+    Filter(filter);
+}
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+                                         BamFile&& bamFile)
+    : PbiIndexedBamReader(std::move(bamFile))
+{
+    Filter(filter);
+}
+
+PbiIndexedBamReader::PbiIndexedBamReader(const std::string& bamFilename)
+    : PbiIndexedBamReader(BamFile(bamFilename))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(const BamFile& bamFile)
+    : BamReader(bamFile)
+    , d_(new internal::PbiIndexedBamReaderPrivate(File().PacBioIndexFilename()))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(BamFile&& bamFile)
+    : BamReader(std::move(bamFile))
+    , d_(new internal::PbiIndexedBamReaderPrivate(File().PacBioIndexFilename()))
+{ }
+
+PbiIndexedBamReader::~PbiIndexedBamReader(void) { }
+
+int PbiIndexedBamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+    assert(d_);
+    return d_->ReadRawData(bgzf, b);
+}
+
+const PbiFilter& PbiIndexedBamReader::Filter(void) const
+{
+    assert(d_);
+    return d_->filter_;
+}
+
+PbiIndexedBamReader& PbiIndexedBamReader::Filter(const PbiFilter& filter)
+{
+    assert(d_);
+    d_->Filter(filter);
+    return *this;
+}
+
diff --git a/src/PbiRawData.cpp b/src/PbiRawData.cpp
index edcb6d0..a219a55 100644
--- a/src/PbiRawData.cpp
+++ b/src/PbiRawData.cpp
@@ -33,17 +33,50 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 //
+// File Description
+/// \file PbiRawData.cpp
+/// \brief Implements the classes used for working with raw PBI data.
+//
 // Author: Derek Barnett
 
 #include "pbbam/PbiRawData.h"
 #include "pbbam/BamFile.h"
 #include "pbbam/BamRecord.h"
 #include "PbiIndexIO.h"
+#include <map>
 #include <cassert>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static
+string ToString(const RecordType type)
+{
+    static const auto lookup = map<RecordType, string>
+    {
+        { RecordType::POLYMERASE, "POLYMERASE" },
+        { RecordType::HQREGION,   "HQREGION" },
+        { RecordType::SUBREAD,    "SUBREAD" },
+        { RecordType::CCS,        "CCS" },
+        { RecordType::SCRAP,      "SCRAP" },
+        { RecordType::UNKNOWN,    "UNKNOWN" }
+    };
+
+    try {
+        return lookup.at(type);
+    } catch (std::exception&) {
+        throw std::runtime_error("error: unknown RecordType encountered");
+    }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namesapce PacBio
+
 // ----------------------------------
 // PbiRawBarcodeData implementation
 // ----------------------------------
@@ -52,63 +85,66 @@ PbiRawBarcodeData::PbiRawBarcodeData(void) { }
 
 PbiRawBarcodeData::PbiRawBarcodeData(uint32_t numReads)
 {
-    bcLeft_.reserve(numReads);
-    bcRight_.reserve(numReads);
+    bcForward_.reserve(numReads);
+    bcReverse_.reserve(numReads);
     bcQual_.reserve(numReads);
-    ctxtFlag_.reserve(numReads);
 }
 
 PbiRawBarcodeData::PbiRawBarcodeData(const PbiRawBarcodeData& other)
-    : bcLeft_(other.bcLeft_)
-    , bcRight_(other.bcRight_)
+    : bcForward_(other.bcForward_)
+    , bcReverse_(other.bcReverse_)
     , bcQual_(other.bcQual_)
-    , ctxtFlag_(other.ctxtFlag_)
 { }
 
 PbiRawBarcodeData::PbiRawBarcodeData(PbiRawBarcodeData&& other)
-    : bcLeft_(std::move(other.bcLeft_))
-    , bcRight_(std::move(other.bcRight_))
+    : bcForward_(std::move(other.bcForward_))
+    , bcReverse_(std::move(other.bcReverse_))
     , bcQual_(std::move(other.bcQual_))
-    , ctxtFlag_(std::move(other.ctxtFlag_))
 { }
 
 PbiRawBarcodeData& PbiRawBarcodeData::operator=(const PbiRawBarcodeData& other)
 {
-    bcLeft_ = other.bcLeft_;
-    bcRight_ = other.bcRight_;
+    bcForward_ = other.bcForward_;
+    bcReverse_ = other.bcReverse_;
     bcQual_ = other.bcQual_;
-    ctxtFlag_ =other.ctxtFlag_;
     return *this;
 }
 
 PbiRawBarcodeData& PbiRawBarcodeData::operator=(PbiRawBarcodeData&& other)
 {
-    bcLeft_ = std::move(other.bcLeft_);
-    bcRight_ = std::move(other.bcRight_);
+    bcForward_ = std::move(other.bcForward_);
+    bcReverse_ = std::move(other.bcReverse_);
     bcQual_ = std::move(other.bcQual_);
-    ctxtFlag_ = std::move(other.ctxtFlag_);
     return *this;
 }
 
-bool PbiRawBarcodeData::AddRecord(const BamRecord& b)
+void PbiRawBarcodeData::AddRecord(const BamRecord& b)
 {
-    const BamRecordImpl& impl = b.Impl();
-    const bool hasBcTag = impl.HasTag("bc");
-    const bool hasBqTag = impl.HasTag("bq");
-    const bool hasCxTag = impl.HasTag("cx");
-    const bool hasBarcodeInfo = hasBcTag && hasBqTag && hasCxTag;
-    if (!hasBarcodeInfo)
-        return false;
-
-    const vector<uint16_t> bcValue = impl.TagValue("bc").ToUInt16Array();
-    assert(bcValue.size() == 2);
-    bcLeft_.push_back(bcValue[0]);
-    bcRight_.push_back(bcValue[1]);
-
-    bcQual_.push_back(impl.TagValue("bq").ToUInt8());
-    ctxtFlag_.push_back(impl.TagValue("cx").ToUInt8());
-
-    return true;
+    // check for any barcode data (both required)
+    if (b.HasBarcodes() && b.HasBarcodeQuality()) {
+
+        // fetch data from record
+        const auto barcodes = b.Barcodes();
+        const auto barcodeQuality = b.BarcodeQuality();
+
+        // convert to signed integers (stored unsigned in BAM)
+        const auto bcForward = static_cast<int16_t>(barcodes.first);
+        const auto bcReverse = static_cast<int16_t>(barcodes.second);
+        const auto bcQuality = static_cast<int8_t>(barcodeQuality);
+
+        // only store actual data if all values >= 0
+        if (bcForward >= 0 && bcReverse >=0 && bcQuality >= 0) {
+            bcForward_.push_back(bcForward);
+            bcReverse_.push_back(bcReverse);
+            bcQual_.push_back(bcQuality);
+            return;
+        }
+    }
+
+    // if we get here, at least one value is either missing or is -1
+    bcForward_.push_back(-1);
+    bcReverse_.push_back(-1);
+    bcQual_.push_back(-1);
 }
 
 // ----------------------------------
@@ -182,11 +218,8 @@ PbiRawMappedData& PbiRawMappedData::operator=(PbiRawMappedData&& other)
     return *this;
 }
 
-bool PbiRawMappedData::AddRecord(const BamRecord& b)
+void PbiRawMappedData::AddRecord(const BamRecord& b)
 {
-    if (!b.IsMapped())
-        return false;
-
     tId_.push_back(b.ReferenceId());
     tStart_.push_back(b.ReferenceStart());
     tEnd_.push_back(b.ReferenceEnd());
@@ -195,25 +228,30 @@ bool PbiRawMappedData::AddRecord(const BamRecord& b)
     revStrand_.push_back( (b.AlignedStrand() == Strand::REVERSE ? 1 : 0) );
     mapQV_.push_back(b.MapQuality());
 
-    uint32_t nM = 0;
-    uint32_t nMM = 0;
-    const Cigar& cigar = b.CigarData();
-    auto cigarIter = cigar.cbegin();
-    auto cigarEnd  = cigar.cend();
-    for (; cigarIter != cigarEnd; ++cigarIter) {
-        const CigarOperation& op = (*cigarIter);
-        if (op.Type() == CigarOperationType::SEQUENCE_MATCH)
-            nM += op.Length();
-        else if (op.Type() == CigarOperationType::SEQUENCE_MISMATCH)
-            nMM += op.Length();
-        else if (op.Type() == CigarOperationType::ALIGNMENT_MATCH)
-            throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
-    }
-    nM_.push_back(nM);
-    nMM_.push_back(nMM);
+    const auto matchesAndMismatches = b.NumMatchesAndMismatches();
+    nM_.push_back(matchesAndMismatches.first);
+    nMM_.push_back(matchesAndMismatches.second);
+}
 
-    return true;
+uint32_t PbiRawMappedData::NumDeletedBasesAt(size_t recordIndex) const
+{ return NumDeletedAndInsertedBasesAt(recordIndex).first; }
+
+std::pair<uint32_t, uint32_t> PbiRawMappedData::NumDeletedAndInsertedBasesAt(size_t recordIndex) const
+{
+    const auto aStart = aStart_.at(recordIndex);
+    const auto aEnd   = aEnd_.at(recordIndex);
+    const auto tStart = tStart_.at(recordIndex);
+    const auto tEnd   = tEnd_.at(recordIndex);
+    const auto nM     = nM_.at(recordIndex);
+    const auto nMM    = nMM_.at(recordIndex);
+    const auto numIns = (aEnd - aStart - nM - nMM);
+    const auto numDel = (tEnd - tStart - nM - nMM);
+    return std::make_pair(numDel, numIns);
 }
+
+uint32_t PbiRawMappedData::NumInsertedBasesAt(size_t recordIndex) const
+{ return NumDeletedAndInsertedBasesAt(recordIndex).second; }
+
 // ------------------------------------
 // PbiReferenceEntry implementation
 // ------------------------------------
@@ -233,6 +271,12 @@ PbiReferenceEntry::PbiReferenceEntry(ID id)
     , endRow_(UNSET_ROW)
 { }
 
+PbiReferenceEntry::PbiReferenceEntry(ID id, Row beginRow, Row endRow)
+    : tId_(id)
+    , beginRow_(beginRow)
+    , endRow_(endRow)
+{ }
+
 PbiReferenceEntry::PbiReferenceEntry(const PbiReferenceEntry& other)
     : tId_(other.tId_)
     , beginRow_(other.beginRow_)
@@ -294,70 +338,74 @@ PbiRawReferenceData& PbiRawReferenceData::operator=(PbiRawReferenceData&& other)
 // PbiRawSubreadData implementation
 // ----------------------------------
 
-PbiRawSubreadData::PbiRawSubreadData(void) { }
+PbiRawBasicData::PbiRawBasicData(void) { }
 
-PbiRawSubreadData::PbiRawSubreadData(uint32_t numReads)
+PbiRawBasicData::PbiRawBasicData(uint32_t numReads)
 {
     rgId_.reserve(numReads);
     qStart_.reserve(numReads);
     qEnd_.reserve(numReads);
     holeNumber_.reserve(numReads);
     readQual_.reserve(numReads);
+    ctxtFlag_.reserve(numReads);
     fileOffset_.reserve(numReads);
 }
 
-PbiRawSubreadData::PbiRawSubreadData(const PbiRawSubreadData& other)
+PbiRawBasicData::PbiRawBasicData(const PbiRawBasicData& other)
     : rgId_(other.rgId_)
     , qStart_(other.qStart_)
     , qEnd_(other.qEnd_)
     , holeNumber_(other.holeNumber_)
     , readQual_(other.readQual_)
+    , ctxtFlag_(other.ctxtFlag_)
     , fileOffset_(other.fileOffset_)
 { }
 
-PbiRawSubreadData::PbiRawSubreadData(PbiRawSubreadData&& other)
+PbiRawBasicData::PbiRawBasicData(PbiRawBasicData&& other)
     : rgId_(std::move(other.rgId_))
     , qStart_(std::move(other.qStart_))
     , qEnd_(std::move(other.qEnd_))
     , holeNumber_(std::move(other.holeNumber_))
     , readQual_(std::move(other.readQual_))
+    , ctxtFlag_(std::move(other.ctxtFlag_))
     , fileOffset_(std::move(other.fileOffset_))
 { }
 
-PbiRawSubreadData& PbiRawSubreadData::operator=(const PbiRawSubreadData& other)
+PbiRawBasicData& PbiRawBasicData::operator=(const PbiRawBasicData& other)
 {
     rgId_ = other.rgId_;
     qStart_ = other.qStart_;
     qEnd_ = other.qEnd_;
     holeNumber_ = other.holeNumber_;
     readQual_ = other.readQual_;
+    ctxtFlag_ = other.ctxtFlag_;
     fileOffset_ = other.fileOffset_;
     return *this;
 }
 
-PbiRawSubreadData& PbiRawSubreadData::operator=(PbiRawSubreadData&& other)
+PbiRawBasicData& PbiRawBasicData::operator=(PbiRawBasicData&& other)
 {
     rgId_ = std::move(other.rgId_);
     qStart_ = std::move(other.qStart_);
     qEnd_ = std::move(other.qEnd_);
     holeNumber_ = std::move(other.holeNumber_);
     readQual_ = std::move(other.readQual_);
+    ctxtFlag_ = std::move(other.ctxtFlag_);
     fileOffset_ = std::move(other.fileOffset_);
     return *this;
 }
 
-void PbiRawSubreadData::AddRecord(const BamRecord& b, int64_t offset)
+void PbiRawBasicData::AddRecord(const BamRecord& b, int64_t offset)
 {
-
-    string rgId = b.ReadGroupId();
-    if (rgId.empty()) {
-        // calculate
-    }
+    // read group ID
+    auto rgId = b.ReadGroupId();
+    if (rgId.empty())
+        rgId = MakeReadGroupId(b.MovieName(), internal::ToString(b.Type()));
     const uint32_t rawid = std::stoul(rgId, nullptr, 16);
     const int32_t id = static_cast<int32_t>(rawid);
-
     rgId_.push_back(id);
 
+    // query start/end
     if (b.Type() == RecordType::CCS) {
         qStart_.push_back(-1);
         qEnd_.push_back(-1);
@@ -366,16 +414,12 @@ void PbiRawSubreadData::AddRecord(const BamRecord& b, int64_t offset)
         qEnd_.push_back(b.QueryEnd());
     }
 
-    if (b.HasHoleNumber())
-        holeNumber_.push_back(b.HoleNumber());
-    else
-        holeNumber_.push_back(0); // TODO: what to do?
-
-    if (b.HasReadAccuracy())
-        readQual_.push_back(b.ReadAccuracy());
-    else
-        readQual_.push_back(0); // TODO: what to do?
+    // add'l basic data
+    holeNumber_.push_back(b.HasHoleNumber() ? b.HoleNumber() : 0);
+    readQual_.push_back(b.HasReadAccuracy() ? static_cast<float>(b.ReadAccuracy()) : 0.0f);
+    ctxtFlag_.push_back(b.HasLocalContextFlags() ? b.LocalContextFlags() : LocalContextFlags::NO_LOCAL_CONTEXT);
 
+    // virtual offset of record start
     fileOffset_.push_back(offset);
 }
 
@@ -390,7 +434,8 @@ PbiRawData::PbiRawData(void)
 { }
 
 PbiRawData::PbiRawData(const string& pbiFilename)
-    : version_(PbiFile::CurrentVersion)
+    : filename_(pbiFilename)
+    , version_(PbiFile::CurrentVersion)
     , sections_(PbiFile::ALL)
     , numReads_(0)
 {
@@ -398,46 +443,50 @@ PbiRawData::PbiRawData(const string& pbiFilename)
 }
 
 PbiRawData::PbiRawData(const PbiRawData& other)
-    : version_(other.version_)
+    : filename_(other.filename_)
+    , version_(other.version_)
     , sections_(other.sections_)
     , numReads_(other.numReads_)
     , barcodeData_(other.barcodeData_)
     , mappedData_(other.mappedData_)
     , referenceData_(other.referenceData_)
-    , subreadData_(other.subreadData_)
+    , basicData_(other.basicData_)
 { }
 
 PbiRawData::PbiRawData(PbiRawData&& other)
-    : version_(std::move(other.version_))
+    : filename_(std::move(other.filename_))
+    , version_(std::move(other.version_))
     , sections_(std::move(other.sections_))
     , numReads_(std::move(other.numReads_))
     , barcodeData_(std::move(other.barcodeData_))
     , mappedData_(std::move(other.mappedData_))
     , referenceData_(std::move(other.referenceData_))
-    , subreadData_(std::move(other.subreadData_))
+    , basicData_(std::move(other.basicData_))
 { }
 
 PbiRawData& PbiRawData::operator=(const PbiRawData& other)
 {
+    filename_ = other.filename_;
     version_ = other.version_;
     sections_ = other.sections_;
     numReads_ = other.numReads_;
     barcodeData_ = other.barcodeData_;
     mappedData_ = other.mappedData_;
     referenceData_ = other.referenceData_;
-    subreadData_ = other.subreadData_;
+    basicData_ = other.basicData_;
     return *this;
 }
 
 PbiRawData& PbiRawData::operator=(PbiRawData&& other)
 {
+    filename_ = std::move(other.filename_);
     version_ = std::move(other.version_);
     sections_ = std::move(other.sections_);
     numReads_ = std::move(other.numReads_);
     barcodeData_ = std::move(other.barcodeData_);
     mappedData_ = std::move(other.mappedData_);
     referenceData_ = std::move(other.referenceData_);
-    subreadData_ = std::move(other.subreadData_);
+    basicData_ = std::move(other.basicData_);
     return *this;
 }
 
diff --git a/src/ProgramInfo.cpp b/src/ProgramInfo.cpp
index 45c8680..75f193a 100644
--- a/src/ProgramInfo.cpp
+++ b/src/ProgramInfo.cpp
@@ -33,7 +33,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ProgramInfo.cpp
+/// \brief Implements the ProgramInfo class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/ProgramInfo.h"
diff --git a/src/FilterEngine.cpp b/src/QNameQuery.cpp
similarity index 53%
rename from src/FilterEngine.cpp
rename to src/QNameQuery.cpp
index 1f47967..e544664 100644
--- a/src/FilterEngine.cpp
+++ b/src/QNameQuery.cpp
@@ -32,45 +32,73 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QNameQuery.cpp
+/// \brief Implements the QNameQuery class.
+//
 // Author: Derek Barnett
 
-#include "pbbam/internal/FilterEngine.h"
+#include "pbbam/QNameQuery.h"
+#include "pbbam/CompositeBamReader.h"
+#include <boost/optional.hpp>
+#include <cassert>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+struct QNameQuery::QNameQueryPrivate
+{
+public:
+    QNameQueryPrivate(const DataSet& dataset)
+        : reader_(new SequentialCompositeBamReader(dataset))
+        , nextRecord_(boost::none)
+    { }
 
+    bool GetNext(vector<BamRecord>& records)
+    {
+        records.clear();
 
-FilterEngine::FilterEngine(void) { }
+        string groupRecordName;
 
-bool FilterEngine::Accepts(const BamRecord& r) const
-{
-//        foreach ( const FilterParameter& param, parameters_ ) {
-//            if (!param.Accepts(r))
-//                return false;
-//        }
-//        return true;
-    (void)r;
-    return true;
-}
+        if (nextRecord_.is_initialized()) {
+            BamRecord r = nextRecord_.get();
+            groupRecordName = r.FullName();
+            records.push_back(std::move(r));
+            nextRecord_ = boost::none;
+        }
 
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
-{
-    size_t i = 0;
-    while (i < r.size()) {
-        if (!Accepts(r.at(i)))
-            r.erase(r.begin() + i);
-        else
-            ++i;
+        BamRecord record;
+        while (reader_->GetNext(record)) {
+            if (records.empty()) {
+                groupRecordName = record.FullName();
+                records.push_back(record);
+            }
+            else {
+                assert(!records.empty());
+                if (record.FullName() == groupRecordName)
+                    records.push_back(record);
+                else {
+                    nextRecord_ = record;
+                    return true;
+                }
+            }
+        }
+        return !records.empty();
     }
-    return !r.empty();
-}
+
+public:
+    unique_ptr<SequentialCompositeBamReader> reader_;
+    boost::optional<BamRecord> nextRecord_;
+};
+
+QNameQuery::QNameQuery(const DataSet& dataset)
+    : internal::IGroupQuery()
+    , d_(new QNameQueryPrivate(dataset))
+{ }
+
+QNameQuery::~QNameQuery(void) { }
+
+bool QNameQuery::GetNext(vector<BamRecord>& records)
+{ return d_->GetNext(records); }
diff --git a/src/QualityValue.cpp b/src/QualityValue.cpp
index 200b96b..e9f63c9 100644
--- a/src/QualityValue.cpp
+++ b/src/QualityValue.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValue.h
+/// \brief Implements the QualityValue class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/QualityValue.h"
diff --git a/tests/src/test_TimeUtils.cpp b/src/ReadAccuracyQuery.cpp
similarity index 65%
copy from tests/src/test_TimeUtils.cpp
copy to src/ReadAccuracyQuery.cpp
index 7ab9fa5..8535189 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/src/ReadAccuracyQuery.cpp
@@ -32,28 +32,40 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadAccuracyQuery.cpp
+/// \brief Implements the ReadAccuracyQuery class.
+//
 // Author: Derek Barnett
 
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include "pbbam/ReadAccuracyQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-TEST(TimeUtilsTest, ToIso8601)
+struct ReadAccuracyQuery::ReadAccuracyQueryPrivate
 {
-    const time_t rawTime = 436428750L;
-    const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+    ReadAccuracyQueryPrivate(const Accuracy accuracy,
+                             const Compare::Type compareType,
+                             const DataSet& dataset)
+        : reader_(PbiReadAccuracyFilter(accuracy, compareType), dataset)
+    { }
+
+    PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+ReadAccuracyQuery::ReadAccuracyQuery(const Accuracy accuracy,
+                                     const Compare::Type compareType,
+                                     const DataSet& dataset)
+    : internal::IQuery()
+    , d_(new ReadAccuracyQueryPrivate(accuracy, compareType, dataset))
+{ }
+
+ReadAccuracyQuery::~ReadAccuracyQuery(void) { }
 
-    // can't hardcode expected (since we rely on localtime())
-    const std::string& expected = "1983-10-31T06:12:30Z";
-    const std::string& actual = internal::ToIso8601(timestamp);
-    EXPECT_EQ(expected, actual);
-}
+bool ReadAccuracyQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/ReadGroupInfo.cpp b/src/ReadGroupInfo.cpp
index b48c602..023f388 100644
--- a/src/ReadGroupInfo.cpp
+++ b/src/ReadGroupInfo.cpp
@@ -32,15 +32,22 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadGroupInfo.cpp
+/// \brief Implements the ReadGroupInfo class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/ReadGroupInfo.h"
+#include "ChemistryTable.h"
 #include "SequenceUtils.h"
 #include <cram/md5.h>
-#include <cstdio>
+#include <iomanip>
 #include <set>
 #include <sstream>
+#include <stdexcept>
+#include <cstdio>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -49,47 +56,69 @@ namespace PacBio {
 namespace BAM {
 namespace internal {
 
-static const string token_ID = string("ID");
-static const string token_CN = string("CN");
-static const string token_DS = string("DS");
-static const string token_DT = string("DT");
-static const string token_FO = string("FO");
-static const string token_KS = string("KS");
-static const string token_LB = string("LB");
-static const string token_PG = string("PG");
-static const string token_PI = string("PI");
-static const string token_PL = string("PL");
-static const string token_PU = string("PU");
-static const string token_SM = string("SM");
-
-static const string feature_DQ = string("DeletionQV");
-static const string feature_DT = string("DeletionTag");
-static const string feature_IQ = string("InsertionQV");
-static const string feature_MQ = string("MergeQV");
-static const string feature_SQ = string("SubstitutionQV");
-static const string feature_ST = string("SubstitutionTag");
-static const string feature_IP = string("Ipd");
-static const string feature_PW = string("PulseWidth");
-static const string feature_PM = string("PkMid");
-static const string feature_PA = string("PkMean");
-static const string feature_LT = string("Label");
-static const string feature_PQ = string("LabelQV");
-static const string feature_PT = string("AltLabel");
-static const string feature_PV = string("AltLabelQV");
-static const string feature_PG = string("PulseMergeQV");
-static const string feature_PC = string("PulseCall");
-static const string feature_PD = string("PrePulseFrames");
-static const string feature_PX = string("PulseCallWidth");
-
-static const string token_RT = string("READTYPE");
-static const string token_BK = string("BINDINGKIT");
-static const string token_SK = string("SEQUENCINGKIT");
-static const string token_BV = string("BASECALLERVERSION");
-static const string token_FR = string("FRAMERATEHZ");
-static const string token_CT = string("CONTROL");
-
-static const string codec_RAW = string("Frames");
-static const string codec_V1  = string("CodecV1");
+static const string sam_ID = string{ "ID" };
+static const string sam_CN = string{ "CN" };
+static const string sam_DS = string{ "DS" };
+static const string sam_DT = string{ "DT" };
+static const string sam_FO = string{ "FO" };
+static const string sam_KS = string{ "KS" };
+static const string sam_LB = string{ "LB" };
+static const string sam_PG = string{ "PG" };
+static const string sam_PI = string{ "PI" };
+static const string sam_PL = string{ "PL" };
+static const string sam_PM = string{ "PM" };
+static const string sam_PU = string{ "PU" };
+static const string sam_SM = string{ "SM" };
+
+static const string feature_DQ = string{ "DeletionQV" };
+static const string feature_DT = string{ "DeletionTag" };
+static const string feature_IQ = string{ "InsertionQV" };
+static const string feature_MQ = string{ "MergeQV" };
+static const string feature_SQ = string{ "SubstitutionQV" };
+static const string feature_ST = string{ "SubstitutionTag" };
+static const string feature_IP = string{ "Ipd" };
+static const string feature_PW = string{ "PulseWidth" };
+static const string feature_PM = string{ "PkMid" };
+static const string feature_PA = string{ "PkMean" };
+static const string feature_PI = string{ "PkMid2" };
+static const string feature_PS = string{ "PkMean2" };
+static const string feature_LT = string{ "Label" };
+static const string feature_PQ = string{ "LabelQV" };
+static const string feature_PT = string{ "AltLabel" };
+static const string feature_PV = string{ "AltLabelQV" };
+static const string feature_PG = string{ "PulseMergeQV" };
+static const string feature_PC = string{ "PulseCall" };
+static const string feature_PD = string{ "PrePulseFrames" };
+static const string feature_PX = string{ "PulseCallWidth" };
+static const string feature_SF = string{ "StartFrame" };
+
+static const string token_RT = string{ "READTYPE" };
+static const string token_BK = string{ "BINDINGKIT" };
+static const string token_SK = string{ "SEQUENCINGKIT" };
+static const string token_BV = string{ "BASECALLERVERSION" };
+static const string token_FR = string{ "FRAMERATEHZ" };
+static const string token_CT = string{ "CONTROL" };
+
+static const string token_BF = string{ "BarcodeFile" };
+static const string token_BH = string{ "BarcodeHash" };
+static const string token_BC = string{ "BarcodeCount" };
+static const string token_BM = string{ "BarcodeMode" };
+static const string token_BQ = string{ "BarcodeQuality" };
+
+static const string codec_RAW = string{ "Frames" };
+static const string codec_V1  = string{ "CodecV1" };
+
+static const string barcodemode_NONE = string{ "None" };
+static const string barcodemode_SYM  = string{ "Symmetric" };
+static const string barcodemode_ASYM = string{ "Asymmetric" };
+
+static const string barcodequal_NONE  = string{ "None" };
+static const string barcodequal_SCORE = string{ "Score" };
+static const string barcodequal_PROB  = string{ "Probability" };
+
+static const string platformModelType_ASTRO  = string{ "ASTRO" };
+static const string platformModelType_RS     = string{ "RS" };
+static const string platformModelType_SEQUEL = string{ "SEQUEL" };
 
 static
 string BaseFeatureName(const BaseFeature& feature)
@@ -105,6 +134,8 @@ string BaseFeatureName(const BaseFeature& feature)
         case BaseFeature::PULSE_WIDTH      : return feature_PW;
         case BaseFeature::PKMID            : return feature_PM;
         case BaseFeature::PKMEAN           : return feature_PA;
+        case BaseFeature::PKMID2           : return feature_PI;
+        case BaseFeature::PKMEAN2          : return feature_PS;
         case BaseFeature::LABEL_QV         : return feature_PQ;
         case BaseFeature::ALT_LABEL        : return feature_PT;
         case BaseFeature::ALT_LABEL_QV     : return feature_PV;
@@ -112,10 +143,11 @@ string BaseFeatureName(const BaseFeature& feature)
         case BaseFeature::PULSE_CALL       : return feature_PC;
         case BaseFeature::PRE_PULSE_FRAMES : return feature_PD;
         case BaseFeature::PULSE_CALL_WIDTH : return feature_PX;
+    case BaseFeature::START_FRAME          : return feature_SF;
         default:
-            throw std::runtime_error("unrecognized base feature");
+            throw std::runtime_error{ "unrecognized base feature" };
     }
-    return string();
+    return string{ }; // unreachable
 }
 
 static
@@ -125,13 +157,55 @@ string FrameCodecName(const FrameCodec& codec)
         case FrameCodec::RAW : return codec_RAW;
         case FrameCodec::V1  : return codec_V1;
         default:
-            throw std::runtime_error("unrecognized frame codec");
+            throw std::runtime_error{ "unrecognized frame codec" };
+    }
+    return string{ }; // unreachable
+}
+
+static
+string BarcodeModeName(const BarcodeModeType& mode)
+{
+    switch (mode) {
+        case BarcodeModeType::NONE       : return barcodemode_NONE;
+        case BarcodeModeType::SYMMETRIC  : return barcodemode_SYM;
+        case BarcodeModeType::ASYMMETRIC : return barcodemode_ASYM;
+        default:
+            throw std::runtime_error{ "unrecognized barcode mode" };
+    }
+    return string{ }; // unreachable
+}
+
+static
+string BarcodeQualityName(const BarcodeQualityType& type)
+{
+    switch (type) {
+        case BarcodeQualityType::NONE  : return barcodequal_NONE;
+        case BarcodeQualityType::SCORE : return barcodequal_SCORE;
+        case BarcodeQualityType::PROBABILITY : return barcodequal_PROB;
+        default:
+            throw std::runtime_error{ "unrecognized barcode quality type" };
+    }
+    return string{ }; // unreachable
+}
+
+static
+string PlatformModelName(const PlatformModelType& type)
+{
+    switch (type) {
+        case PlatformModelType::ASTRO  : return platformModelType_ASTRO;
+        case PlatformModelType::RS     : return platformModelType_RS;
+        case PlatformModelType::SEQUEL : return platformModelType_SEQUEL;
+        default:
+            throw std::runtime_error{ "unrecognized platform model" };
     }
-    return string();
+    return string{ }; // unreachable
 }
 
-static map<string, BaseFeature> nameToFeature;
-static map<string, FrameCodec>  nameToCodec;
+static map<string, BaseFeature>        nameToFeature;
+static map<string, FrameCodec>         nameToCodec;
+static map<string, BarcodeModeType>    nameToBarcodeMode;
+static map<string, BarcodeQualityType> nameToBarcodeQuality;
+static map<string, PlatformModelType>  nameToPlatformModel;
 
 static inline
 void InitNameToFeature(void)
@@ -147,6 +221,8 @@ void InitNameToFeature(void)
         nameToFeature[feature_PW] = BaseFeature::PULSE_WIDTH;
         nameToFeature[feature_PM] = BaseFeature::PKMID;
         nameToFeature[feature_PA] = BaseFeature::PKMEAN;
+        nameToFeature[feature_PI] = BaseFeature::PKMID2;
+        nameToFeature[feature_PS] = BaseFeature::PKMEAN2;
         nameToFeature[feature_PQ] = BaseFeature::LABEL_QV;
         nameToFeature[feature_PT] = BaseFeature::ALT_LABEL;
         nameToFeature[feature_PV] = BaseFeature::ALT_LABEL_QV;
@@ -154,6 +230,7 @@ void InitNameToFeature(void)
         nameToFeature[feature_PG] = BaseFeature::PULSE_MERGE_QV;
         nameToFeature[feature_PD] = BaseFeature::PRE_PULSE_FRAMES;
         nameToFeature[feature_PX] = BaseFeature::PULSE_CALL_WIDTH;
+        nameToFeature[feature_SF] = BaseFeature::START_FRAME;
     }
 }
 
@@ -167,14 +244,48 @@ void InitNameToCodec(void)
 }
 
 static inline
-bool IsBaseFeature(const std::string& name)
+void InitNameToBarcodeMode(void)
+{
+    if (nameToBarcodeMode.empty()) {
+        nameToBarcodeMode[barcodemode_NONE] = BarcodeModeType::NONE;
+        nameToBarcodeMode[barcodemode_SYM]  = BarcodeModeType::SYMMETRIC;
+        nameToBarcodeMode[barcodemode_ASYM] = BarcodeModeType::ASYMMETRIC;
+    }
+}
+
+static inline
+void InitNameToBarcodeQuality(void)
+{
+    if (nameToBarcodeQuality.empty()) {
+        nameToBarcodeQuality[barcodequal_NONE]  = BarcodeQualityType::NONE;
+        nameToBarcodeQuality[barcodequal_SCORE] = BarcodeQualityType::SCORE;
+        nameToBarcodeQuality[barcodequal_PROB]  = BarcodeQualityType::PROBABILITY;
+    }
+}
+
+static inline
+void InitNameToPlatformModel(void)
+{
+    if (nameToPlatformModel.empty()) {
+        nameToPlatformModel[platformModelType_ASTRO]  = PlatformModelType::ASTRO;
+        nameToPlatformModel[platformModelType_RS]     = PlatformModelType::RS;
+        nameToPlatformModel[platformModelType_SEQUEL] = PlatformModelType::SEQUEL;
+    }
+}
+
+static inline
+bool IsLikelyBarcodeKey(const string& name)
+{ return name.find("Barcode") == 0; }
+
+static inline
+bool IsBaseFeature(const string& name)
 {
     InitNameToFeature();
     return nameToFeature.find(name) != nameToFeature.cend();
 }
 
 static inline
-BaseFeature BaseFeatureFromName(const std::string& name)
+BaseFeature BaseFeatureFromName(const string& name)
 {
     InitNameToFeature();
     return nameToFeature.at(name);
@@ -187,16 +298,39 @@ FrameCodec FrameCodecFromName(const string& name)
     return nameToCodec.at(name);
 }
 
+static inline
+BarcodeModeType BarcodeModeFromName(const string& name)
+{
+    InitNameToBarcodeMode();
+    return nameToBarcodeMode.at(name);
+}
+
+static inline
+BarcodeQualityType BarcodeQualityFromName(const string& name)
+{
+    InitNameToBarcodeQuality();
+    return nameToBarcodeQuality.at(name);
+}
+
+static inline
+PlatformModelType PlatformModelFromName(const string& name)
+{
+    InitNameToPlatformModel();
+    return nameToPlatformModel.at(name);
+}
+
 } // namespace internal
 
 ReadGroupInfo::ReadGroupInfo(void)
-    : readType_("UNKNOWN")
+    : platformModel_(PlatformModelType::SEQUEL)
+    , readType_("UNKNOWN")
     , ipdCodec_(FrameCodec::V1)
     , pulseWidthCodec_(FrameCodec::V1)
 { }
 
 ReadGroupInfo::ReadGroupInfo(const std::string& id)
     : id_(id)
+    , platformModel_(PlatformModelType::SEQUEL)
     , readType_("UNKNOWN")
     , ipdCodec_(FrameCodec::V1)
     , pulseWidthCodec_(FrameCodec::V1)
@@ -206,7 +340,21 @@ ReadGroupInfo::ReadGroupInfo(const std::string& movieName,
                              const std::string& readType)
     : id_(MakeReadGroupId(movieName, readType))
     , movieName_(movieName)
+    , platformModel_(PlatformModelType::SEQUEL)
+    , readType_(readType)
+    , ipdCodec_(FrameCodec::V1)
+    , pulseWidthCodec_(FrameCodec::V1)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(const std::string& movieName,
+                             const std::string& readType,
+                             const PlatformModelType platform)
+    : id_(MakeReadGroupId(movieName, readType))
+    , movieName_(movieName)
+    , platformModel_(platform)
     , readType_(readType)
+    , ipdCodec_(FrameCodec::V1)
+    , pulseWidthCodec_(FrameCodec::V1)
 { }
 
 ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
@@ -220,6 +368,7 @@ ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
     , predictedInsertSize_(other.predictedInsertSize_)
     , movieName_(other.movieName_)
     , sample_(other.sample_)
+    , platformModel_(other.platformModel_)
     , readType_(other.readType_)
     , bindingKit_(other.bindingKit_)
     , sequencingKit_(other.sequencingKit_)
@@ -228,6 +377,12 @@ ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
     , control_(other.control_)
     , ipdCodec_(other.ipdCodec_)
     , pulseWidthCodec_(other.pulseWidthCodec_)
+    , hasBarcodeData_(other.hasBarcodeData_)
+    , barcodeFile_(other.barcodeFile_)
+    , barcodeHash_(other.barcodeHash_)
+    , barcodeCount_(other.barcodeCount_)
+    , barcodeMode_(other.barcodeMode_)
+    , barcodeQuality_(other.barcodeQuality_)
     , features_(other.features_)
 {  }
 
@@ -242,6 +397,7 @@ ReadGroupInfo::ReadGroupInfo(ReadGroupInfo&& other)
     , predictedInsertSize_(std::move(other.predictedInsertSize_))
     , movieName_(std::move(other.movieName_))
     , sample_(std::move(other.sample_))
+    , platformModel_(std::move(other.platformModel_))
     , readType_(std::move(other.readType_))
     , bindingKit_(std::move(other.bindingKit_))
     , sequencingKit_(std::move(other.sequencingKit_))
@@ -250,6 +406,12 @@ ReadGroupInfo::ReadGroupInfo(ReadGroupInfo&& other)
     , control_(std::move(other.control_))
     , ipdCodec_(std::move(other.ipdCodec_))
     , pulseWidthCodec_(std::move(other.pulseWidthCodec_))
+    , hasBarcodeData_(std::move(other.hasBarcodeData_))
+    , barcodeFile_(std::move(other.barcodeFile_))
+    , barcodeHash_(std::move(other.barcodeHash_))
+    , barcodeCount_(std::move(other.barcodeCount_))
+    , barcodeMode_(std::move(other.barcodeMode_))
+    , barcodeQuality_(std::move(other.barcodeQuality_))
     , features_(std::move(other.features_))
 { }
 
@@ -264,6 +426,7 @@ ReadGroupInfo& ReadGroupInfo::operator=(const ReadGroupInfo& other)
     keySequence_ = other.keySequence_;
     library_ = other.library_;
     programs_ = other.programs_;
+    platformModel_ = other.platformModel_;
     predictedInsertSize_ = other.predictedInsertSize_;
     movieName_ = other.movieName_;
     sample_ = other.sample_;
@@ -275,6 +438,12 @@ ReadGroupInfo& ReadGroupInfo::operator=(const ReadGroupInfo& other)
     control_ = other.control_;
     ipdCodec_ = other.ipdCodec_;
     pulseWidthCodec_ = other.pulseWidthCodec_;
+    hasBarcodeData_ = other.hasBarcodeData_;
+    barcodeFile_  = other.barcodeFile_;
+    barcodeHash_ = other.barcodeHash_;
+    barcodeCount_ = other.barcodeCount_;
+    barcodeMode_ = other.barcodeMode_;
+    barcodeQuality_ = other.barcodeQuality_;
     features_ = other.features_;
     return *this;
 }
@@ -288,6 +457,7 @@ ReadGroupInfo& ReadGroupInfo::operator=(ReadGroupInfo&& other)
     keySequence_ = std::move(other.keySequence_);
     library_ = std::move(other.library_);
     programs_ = std::move(other.programs_);
+    platformModel_ = std::move(other.platformModel_);
     predictedInsertSize_ = std::move(other.predictedInsertSize_);
     movieName_ = std::move(other.movieName_);
     sample_ = std::move(other.sample_);
@@ -299,6 +469,12 @@ ReadGroupInfo& ReadGroupInfo::operator=(ReadGroupInfo&& other)
     control_ = std::move(other.control_);
     ipdCodec_ = std::move(other.ipdCodec_);
     pulseWidthCodec_ = std::move(other.pulseWidthCodec_);
+    hasBarcodeData_ = std::move(other.hasBarcodeData_);
+    barcodeFile_  = std::move(other.barcodeFile_);
+    barcodeHash_ = std::move(other.barcodeHash_);
+    barcodeCount_ = std::move(other.barcodeCount_);
+    barcodeMode_ = std::move(other.barcodeMode_);
+    barcodeQuality_ = std::move(other.barcodeQuality_);
     features_ = std::move(other.features_);
     return *this;
 }
@@ -309,36 +485,67 @@ void ReadGroupInfo::DecodeSamDescription(const std::string& description)
     // for each, split on equal
     //    determine name ->
 
-    const vector<string>& tokens = internal::Split(description, ';');
+    auto tokens = internal::Split(description, ';');
     if (tokens.empty())
         return;
 
-    // iterate over tokens
-    auto tokenEnd  = tokens.cend();
-    for (auto tokenIter = tokens.cbegin(); tokenIter != tokenEnd; ++tokenIter) {
+    bool hasBarcodeFile = false;
+    bool hasBarcodeHash = false;
+    bool hasBarcodeCount = false;
+    bool hasBarcodeMode = false;
+    bool hasBarcodeQuality = false;
 
-        const string& token = *tokenIter;
+    // iterate over tokens
+    for (auto&& token : tokens) {
 
-        const size_t foundEqual = token.find('=');
+        const auto foundEqual = token.find('=');
         if (foundEqual == string::npos)
             continue;
 
-        const string& key = token.substr(0,foundEqual);
-        const string& value = token.substr(foundEqual+1);
+        const auto key = token.substr(0,foundEqual);
+        const auto value = token.substr(foundEqual+1);
 
+        // 'mandatory' items
         if      (key == internal::token_RT) readType_ = value;
         else if (key == internal::token_BK) bindingKit_ = value;
         else if (key == internal::token_BV) basecallerVersion_ = value;
         else if (key == internal::token_SK) sequencingKit_ = value;
         else if (key == internal::token_FR) frameRateHz_ = value;
-        else if (key == internal::token_CT) control_ = value == "TRUE";
-        else if (internal::IsBaseFeature(key)) {
+        else if (key == internal::token_CT) control_ = (value == "TRUE");
+
+        // base features
+        else if (internal::IsBaseFeature(key))
             features_[internal::BaseFeatureFromName(key)] = value;
-        } 
+
+        // barcode data
+        else if (internal::IsLikelyBarcodeKey(key)) {
+            if (key == internal::token_BF) {
+                barcodeFile_ = value;
+                hasBarcodeFile = true;
+            }
+            else if (key == internal::token_BH) {
+                barcodeHash_ = value;
+                hasBarcodeHash = true;
+            }
+            else if (key == internal::token_BC) {
+                barcodeCount_ = static_cast<size_t>(std::stoul(value));
+                hasBarcodeCount = true;
+            }
+            else if (key == internal::token_BM) {
+                barcodeMode_ = internal::BarcodeModeFromName(value);
+                hasBarcodeMode = true;
+            }
+            else if (key == internal::token_BQ) {
+                barcodeQuality_ = internal::BarcodeQualityFromName(value);
+                hasBarcodeQuality = true;
+            }
+        }
+
+        // frame codecs
         else {
-            const vector<string> keyParts = internal::Split(key, ':');
+            const auto keyParts = internal::Split(key, ':');
             if (keyParts.size() == 2) {
-                const string& subkey = keyParts.at(0);
+                const auto& subkey = keyParts.at(0);
                 if (subkey == internal::feature_IP) {
                     ipdCodec_ = internal::FrameCodecFromName(keyParts.at(1));
                     features_[BaseFeature::IPD] = value;
@@ -350,15 +557,25 @@ void ReadGroupInfo::DecodeSamDescription(const std::string& description)
             }
         }
     }
+
+    hasBarcodeData_ = (hasBarcodeFile  &&
+                       hasBarcodeHash  &&
+                       hasBarcodeCount &&
+                       hasBarcodeMode  &&
+                       hasBarcodeQuality);
 }
 
 std::string ReadGroupInfo::EncodeSamDescription(void) const
 {
-    string result;
+    auto result = string{ };
     result.reserve(256);
     result.append(std::string(internal::token_RT+"=" + readType_));
 
-    string featureName;
+    static const auto SEP   = string{";"};
+    static const auto COLON = string{":"};
+    static const auto EQ    = string{"="};
+
+    auto featureName = string{ };
     const auto featureEnd = features_.cend();
     auto featureIter = features_.cbegin();
     for ( ; featureIter != featureEnd; ++featureIter ) {
@@ -366,21 +583,35 @@ std::string ReadGroupInfo::EncodeSamDescription(void) const
         if (featureName.empty() || featureIter->second.empty())
             continue;
         else if (featureName == internal::feature_IP) {
-            featureName.append(":");
+            featureName.append(COLON);
             featureName.append(internal::FrameCodecName(ipdCodec_));
         }
         else if (featureName == internal::feature_PW) {
-            featureName.append(":");
+            featureName.append(COLON);
             featureName.append(internal::FrameCodecName(pulseWidthCodec_));
         }
-        result.append(string(';' + featureName + '=' + featureIter->second));
+        result.append(string(SEP + featureName + EQ + featureIter->second));
     }
 
-    if (!bindingKit_.empty())        result.append(";"+internal::token_BK+"="+bindingKit_);
-    if (!sequencingKit_.empty())     result.append(";"+internal::token_SK+"="+sequencingKit_);
-    if (!basecallerVersion_.empty()) result.append(";"+internal::token_BV+"="+basecallerVersion_);
-    if (!frameRateHz_.empty())       result.append(";"+internal::token_FR+"="+frameRateHz_);
-    if (control_)                    result.append(";"+internal::token_CT+"="+ (control_ ? "TRUE" : "FALSE"));
+    if (!bindingKit_.empty())        result.append(SEP + internal::token_BK +EQ + bindingKit_);
+    if (!sequencingKit_.empty())     result.append(SEP + internal::token_SK +EQ + sequencingKit_);
+    if (!basecallerVersion_.empty()) result.append(SEP + internal::token_BV +EQ + basecallerVersion_);
+    if (!frameRateHz_.empty())       result.append(SEP + internal::token_FR +EQ + frameRateHz_);
+    if (control_)                    result.append(SEP + internal::token_CT +EQ + (control_ ? "TRUE"
+                                                                                            : "FALSE"));
+
+    if (hasBarcodeData_) {
+        const auto barcodeData =
+            string {
+                SEP + internal::token_BF + EQ + barcodeFile_ +
+                SEP + internal::token_BH + EQ + barcodeHash_ +
+                SEP + internal::token_BC + EQ + std::to_string(barcodeCount_) +
+                SEP + internal::token_BM + EQ + internal::BarcodeModeName(barcodeMode_) +
+                SEP + internal::token_BQ + EQ + internal::BarcodeQualityName(barcodeQuality_)
+            };
+        result.reserve(result.size() + barcodeData.size());
+        result.append(barcodeData);
+    }
 
     return result;
 }
@@ -388,29 +619,30 @@ std::string ReadGroupInfo::EncodeSamDescription(void) const
 ReadGroupInfo ReadGroupInfo::FromSam(const string& sam)
 {
     // pop off '@RG\t', then split rest of line into tokens
-    const vector<string>& tokens = internal::Split(sam.substr(4), '\t');
+    const auto tokens = internal::Split(sam.substr(4), '\t');
     if (tokens.empty())
-        return ReadGroupInfo();
+        return ReadGroupInfo{ };
 
-    ReadGroupInfo rg;
-    map<string, string> custom;
+    auto rg = ReadGroupInfo{ };
+    auto custom = map<string, string>{ };
 
-    for (const string& token : tokens) {
-        const string& tokenTag   = token.substr(0,2);
-        const string& tokenValue = token.substr(3);
+    for (auto&& token : tokens) {
+        const auto tokenTag   = token.substr(0,2);
+        const auto tokenValue = token.substr(3);
 
         // set read group info
-        if      (tokenTag == internal::token_ID) rg.Id(tokenValue);
-        else if (tokenTag == internal::token_CN) rg.SequencingCenter(tokenValue);
-        else if (tokenTag == internal::token_DT) rg.Date(tokenValue);
-        else if (tokenTag == internal::token_FO) rg.FlowOrder(tokenValue);
-        else if (tokenTag == internal::token_KS) rg.KeySequence(tokenValue);
-        else if (tokenTag == internal::token_LB) rg.Library(tokenValue);
-        else if (tokenTag == internal::token_PG) rg.Programs(tokenValue);
-        else if (tokenTag == internal::token_PI) rg.PredictedInsertSize(tokenValue);
-        else if (tokenTag == internal::token_PU) rg.MovieName(tokenValue);
-        else if (tokenTag == internal::token_SM) rg.Sample(tokenValue);
-        else if (tokenTag == internal::token_DS) rg.DecodeSamDescription(tokenValue);
+        if      (tokenTag == internal::sam_ID) rg.Id(tokenValue);
+        else if (tokenTag == internal::sam_CN) rg.SequencingCenter(tokenValue);
+        else if (tokenTag == internal::sam_DT) rg.Date(tokenValue);
+        else if (tokenTag == internal::sam_FO) rg.FlowOrder(tokenValue);
+        else if (tokenTag == internal::sam_KS) rg.KeySequence(tokenValue);
+        else if (tokenTag == internal::sam_LB) rg.Library(tokenValue);
+        else if (tokenTag == internal::sam_PG) rg.Programs(tokenValue);
+        else if (tokenTag == internal::sam_PI) rg.PredictedInsertSize(tokenValue);
+        else if (tokenTag == internal::sam_PU) rg.MovieName(tokenValue);
+        else if (tokenTag == internal::sam_SM) rg.Sample(tokenValue);
+        else if (tokenTag == internal::sam_DS) rg.DecodeSamDescription(tokenValue);
+        else if (tokenTag == internal::sam_PM) rg.PlatformModel(internal::PlatformModelFromName(tokenValue));
 
         // otherwise, "custom" tag
         else
@@ -421,56 +653,83 @@ ReadGroupInfo ReadGroupInfo::FromSam(const string& sam)
     return rg;
 }
 
-ReadGroupInfo& ReadGroupInfo::IpdCodec(const FrameCodec& codec, const string& tag)
+string ReadGroupInfo::IntToId(const int32_t id)
+{
+    stringstream s;
+    s << std::setfill('0') << std::setw(8) << std::hex << id;
+    return s.str();
+}
+
+ReadGroupInfo& ReadGroupInfo::IpdCodec(const FrameCodec& codec,
+                                       const string& tag)
 {
     // store desired codec type
     ipdCodec_ = codec;
 
     // update base features map
-    string actualTag = tag;
+    auto actualTag = tag;
     if (actualTag.empty())
         actualTag = "ip";
     BaseFeatureTag(BaseFeature::IPD, actualTag);
     return *this;
 }
 
-ReadGroupInfo& ReadGroupInfo::PulseWidthCodec(const FrameCodec& codec, const string& tag)
+ReadGroupInfo& ReadGroupInfo::PulseWidthCodec(const FrameCodec& codec,
+                                              const string& tag)
 {
     // store desired codec type
     pulseWidthCodec_ = codec;
 
     // update base features map
-    string actualTag = tag;
+    auto actualTag = tag;
     if (actualTag.empty())
         actualTag = "pw";
     BaseFeatureTag(BaseFeature::PULSE_WIDTH, actualTag);
     return *this;
 }
 
+string ReadGroupInfo::SequencingChemistryFromTriple(const string& bindingKit,
+                                                    const string& sequencingKit,
+                                                    const string& basecallerVersion)
+{
+    const string ver{ basecallerVersion.substr(0, 3) };
+    for (const auto& row : internal::ChemistryTable) {
+        if (bindingKit == row[0] && sequencingKit == row[1] && ver == row[2])
+            return row[3];
+    }
+
+    // not found
+    throw InvalidSequencingChemistryException(bindingKit,
+                                              sequencingKit,
+                                              basecallerVersion);
+}
+
 std::string ReadGroupInfo::ToSam(void) const
 {
     stringstream out;
     out << "@RG"
-        << internal::MakeSamTag(internal::token_ID, id_)
-        << internal::MakeSamTag(internal::token_PL, Platform());
+        << internal::MakeSamTag(internal::sam_ID, id_)
+        << internal::MakeSamTag(internal::sam_PL, Platform());
 
-    const string& description = EncodeSamDescription();
+    auto description = EncodeSamDescription();
     if (!description.empty())
-        out << internal::MakeSamTag(internal::token_DS, description);
-
-    if (!sequencingCenter_.empty())    out << internal::MakeSamTag(internal::token_CN, sequencingCenter_);
-    if (!date_.empty())                out << internal::MakeSamTag(internal::token_DT, date_);
-    if (!flowOrder_.empty())           out << internal::MakeSamTag(internal::token_FO, flowOrder_);
-    if (!keySequence_.empty())         out << internal::MakeSamTag(internal::token_KS, keySequence_);
-    if (!library_.empty())             out << internal::MakeSamTag(internal::token_LB, library_);
-    if (!programs_.empty())            out << internal::MakeSamTag(internal::token_PG, programs_);
-    if (!predictedInsertSize_.empty()) out << internal::MakeSamTag(internal::token_PI, predictedInsertSize_);
-    if (!movieName_.empty())           out << internal::MakeSamTag(internal::token_PU, movieName_);
-    if (!sample_.empty())              out << internal::MakeSamTag(internal::token_SM, sample_);
+        out << internal::MakeSamTag(internal::sam_DS, description);
+
+    if (!sequencingCenter_.empty())    out << internal::MakeSamTag(internal::sam_CN, sequencingCenter_);
+    if (!date_.empty())                out << internal::MakeSamTag(internal::sam_DT, date_);
+    if (!flowOrder_.empty())           out << internal::MakeSamTag(internal::sam_FO, flowOrder_);
+    if (!keySequence_.empty())         out << internal::MakeSamTag(internal::sam_KS, keySequence_);
+    if (!library_.empty())             out << internal::MakeSamTag(internal::sam_LB, library_);
+    if (!programs_.empty())            out << internal::MakeSamTag(internal::sam_PG, programs_);
+    if (!predictedInsertSize_.empty()) out << internal::MakeSamTag(internal::sam_PI, predictedInsertSize_);
+    if (!movieName_.empty())           out << internal::MakeSamTag(internal::sam_PU, movieName_);
+    if (!sample_.empty())              out << internal::MakeSamTag(internal::sam_SM, sample_);
+
+    out << internal::MakeSamTag(internal::sam_PM, internal::PlatformModelName(platformModel_));
 
     // append any custom tags
-    map<string, string>::const_iterator customIter = custom_.cbegin();
-    map<string, string>::const_iterator customEnd  = custom_.cend();
+    auto customIter = custom_.cbegin();
+    auto customEnd  = custom_.cend();
     for ( ; customIter != customEnd; ++customIter )
         out << internal::MakeSamTag(customIter->first, customIter->second);
 
@@ -493,7 +752,7 @@ std::string MakeReadGroupId(const std::string& movieName,
     for (int i = 0; i < 4; ++i)
         sprintf(&hexdigest[2*i], "%02x", digest[i]);
 
-    return std::string(hexdigest, 8);
+    return std::string{hexdigest, 8};
 }
 
 bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
@@ -505,6 +764,7 @@ bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
             && keySequence_ == other.keySequence_             
             && library_ == other.library_                 
             && programs_ == other.programs_                
+            && platformModel_ == other.platformModel_                
             && predictedInsertSize_ == other.predictedInsertSize_     
             && movieName_ == other.movieName_               
             && sample_ == other.sample_                  
@@ -516,9 +776,20 @@ bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
             && control_ == other.control_ 
             && ipdCodec_ == other.ipdCodec_
             && pulseWidthCodec_ == other.pulseWidthCodec_
+            && hasBarcodeData_ == other.hasBarcodeData_
+            && barcodeFile_ == other.barcodeFile_
+            && barcodeHash_ == other.barcodeHash_
+            && barcodeCount_ == other.barcodeCount_
+            && barcodeMode_ == other.barcodeMode_
+            && barcodeQuality_ == other.barcodeQuality_
             && features_.size() == other.features_.size()
-            && std::equal(features_.begin(), features_.end(),
-                          other.features_.begin());
+            && std::equal(features_.cbegin(),
+                          features_.cend(),
+                          other.features_.cbegin())
+            && custom_.size() == other.custom_.size()
+            && std::equal(custom_.begin(),
+                          custom_.end(),
+                          other.custom_.cbegin());
 }
 
 } // namespace BAM
diff --git a/src/SamTagCodec.cpp b/src/SamTagCodec.cpp
index 532998f..43064b8 100644
--- a/src/SamTagCodec.cpp
+++ b/src/SamTagCodec.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SamTagCodec.h
+/// \brief Implements the SamTagCodec class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/SamTagCodec.h"
@@ -142,17 +146,17 @@ TagCollection SamTagCodec::Decode(const string& tagString)
 
         switch (type) {
 
-            // technically only 'A' is allowed in SAM chars, but we'll be a little permissive
+            // technically only 'A' is allowed in SAM chars,
+            // but we'll be a little permissive
             case 'A' :
             case 'a' :
             {
-                tags[name] = static_cast<char>(remainder.at(0));
-                tags[name].Modifier(TagModifier::ASCII_CHAR);
+                tags[name] = Tag(static_cast<char>(remainder.at(0), TagModifier::ASCII_CHAR));
                 break;
             }
 
-            // technically only 'i' is allowed in SAM ints, but we'll be a little permissive
-            // since SAM might be a bit more "user-edited" than BAM
+            // technically only 'i' is allowed in SAM ints, but we'll be a little
+            // permissive since SAM might be a bit more "user-edited" than BAM
             case 'c' :
             case 'C' :
             case 's' :
@@ -160,11 +164,13 @@ TagCollection SamTagCodec::Decode(const string& tagString)
             case 'i' :
             case 'I' :
             {
+                // check out boost::numeric cast for these conversions
+
                 // negative value (force signed int)
                 if (remainder.at(0) == '-') {
                     const int32_t x = boost::lexical_cast<int32_t>(remainder);
                     if ( x >= INT8_MIN )
-                        tags[name] = static_cast<int8_t>(x);  // check out boost::numeric cast
+                        tags[name] = static_cast<int8_t>(x);
                     else if ( x >= INT16_MIN )
                         tags[name] = static_cast<int16_t>(x);
                     else
@@ -198,8 +204,7 @@ TagCollection SamTagCodec::Decode(const string& tagString)
 
             case 'H' :
             {
-                tags[name] = remainder;
-                tags[name].Modifier(TagModifier::HEX_STRING);
+                tags[name] = Tag(remainder, TagModifier::HEX_STRING);
                 break;
             }
 
@@ -230,7 +235,6 @@ TagCollection SamTagCodec::Decode(const string& tagString)
     return tags;
 }
 
-
 string SamTagCodec::Encode(const TagCollection& tags)
 {
     string result;
@@ -263,7 +267,7 @@ string SamTagCodec::Encode(const TagCollection& tags)
         }
 
         // "<TYPE>:<DATA>" for all other data
-        switch ( tag.Type() ) {
+        switch (tag.Type()) {
             case TagDataType::INT8   : result.append("i:"); appendSamValue(tag.ToInt8(),   result, true); break;
             case TagDataType::UINT8  : result.append("i:"); appendSamValue(tag.ToUInt8(),  result, true); break;
             case TagDataType::INT16  : result.append("i:"); appendSamValue(tag.ToInt16(),  result); break;
diff --git a/src/SequenceInfo.cpp b/src/SequenceInfo.cpp
index fa7837d..43e4343 100644
--- a/src/SequenceInfo.cpp
+++ b/src/SequenceInfo.cpp
@@ -33,7 +33,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SequenceInfo.cpp
+/// \brief Implements the SequenceInfo class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/SequenceInfo.h"
diff --git a/tests/src/test_TimeUtils.cpp b/src/SubreadLengthQuery.cpp
similarity index 65%
copy from tests/src/test_TimeUtils.cpp
copy to src/SubreadLengthQuery.cpp
index 7ab9fa5..1c7ce41 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/src/SubreadLengthQuery.cpp
@@ -32,28 +32,40 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file SubreadLengthQuery.cpp
+/// \brief Implements the SubreadLengthQuery class.
+//
 // Author: Derek Barnett
 
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include "pbbam/SubreadLengthQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-TEST(TimeUtilsTest, ToIso8601)
+struct SubreadLengthQuery::SubreadLengthQueryPrivate
 {
-    const time_t rawTime = 436428750L;
-    const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+    SubreadLengthQueryPrivate(const int32_t length,
+                              const Compare::Type compareType,
+                              const DataSet& dataset)
+        : reader_(PbiQueryLengthFilter(length, compareType), dataset)
+    { }
+
+    PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+SubreadLengthQuery::SubreadLengthQuery(const int32_t length,
+                                       const Compare::Type compareType,
+                                       const DataSet& dataset)
+    : internal::IQuery()
+    , d_(new SubreadLengthQueryPrivate(length, compareType, dataset))
+{ }
+
+SubreadLengthQuery::~SubreadLengthQuery(void) { }
 
-    // can't hardcode expected (since we rely on localtime())
-    const std::string& expected = "1983-10-31T06:12:30Z";
-    const std::string& actual = internal::ToIso8601(timestamp);
-    EXPECT_EQ(expected, actual);
-}
+bool SubreadLengthQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/Tag.cpp b/src/Tag.cpp
index 7f0a10c..5c51321 100644
--- a/src/Tag.cpp
+++ b/src/Tag.cpp
@@ -32,10 +32,15 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file Tag.cpp
+/// \brief Defines the Tag class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/Tag.h"
+#include <stdexcept>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -57,6 +62,24 @@ Tag::Tag(const vector<int32_t>& value)  : data_(value), modifier_(TagModifier::N
 Tag::Tag(const vector<uint32_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
 Tag::Tag(const vector<float>& value)    : data_(value), modifier_(TagModifier::NONE) { }
 
+Tag::Tag(int8_t value, const TagModifier mod)
+    : data_(value)
+    , modifier_(mod)
+{
+    if (mod == TagModifier::HEX_STRING)
+        throw runtime_error("HEX_STRING is not a valid tag modifier for int8_t data. "
+                            "It is intended for string-type data only.");
+}
+
+Tag::Tag(const std::string& value, const TagModifier mod)
+    : data_(value)
+    , modifier_(mod)
+{
+    if (mod == TagModifier::ASCII_CHAR)
+        throw runtime_error("ASCII_CHAR is not a valid tag modifier for string-type data. "
+                            "To construct an ASCII char tag, use a single-quoted value (e.g. 'X' instead of \"X\")");
+}
+
 Tag::Tag(const Tag& other)
     : data_(other.data_)
     , modifier_(other.modifier_)
diff --git a/src/TagCollection.cpp b/src/TagCollection.cpp
index 7f50126..98ed22b 100644
--- a/src/TagCollection.cpp
+++ b/src/TagCollection.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file TagCollection.cpp
+/// \brief Implements the TagCollection class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/TagCollection.h"
diff --git a/src/TimeUtils.h b/src/TimeUtils.h
index 615295d..b3fd75f 100644
--- a/src/TimeUtils.h
+++ b/src/TimeUtils.h
@@ -71,6 +71,25 @@ std::string ToIso8601(const std::chrono::system_clock::time_point& tp)
 }
 
 inline
+std::string ToDataSetFormat(const std::chrono::system_clock::time_point& tp)
+{
+    // get time info
+    const time_t ttime_t = std::chrono::system_clock::to_time_t(tp);
+    const std::chrono::system_clock::time_point tp_sec = std::chrono::system_clock::from_time_t(ttime_t);
+    const std::chrono::milliseconds ms = std::chrono::duration_cast<std::chrono::milliseconds>(tp - tp_sec);
+    const std::tm* ttm = gmtime(&ttime_t);  // static obj, no free needed (may not be thread-safe though)
+
+    // format output
+    char date_time_format[] = "%y%m%d_%H%M%S";
+    char date_time_str[50];
+    strftime(date_time_str, sizeof(date_time_str), date_time_format, ttm);
+    std::string result(date_time_str);
+    if (ms.count() > 0)
+        result.append(std::to_string(ms.count()));
+    return result;
+}
+
+inline
 std::chrono::system_clock::time_point CurrentTime(void)
 { return std::chrono::system_clock::now(); }
 
diff --git a/src/VirtualPolymeraseBamRecord.cpp b/src/VirtualPolymeraseBamRecord.cpp
index 7b3bf7b..eb23d6b 100644
--- a/src/VirtualPolymeraseBamRecord.cpp
+++ b/src/VirtualPolymeraseBamRecord.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseBamRecord.cpp
+/// \brief Implements the VirtualPolymeraseBamRecord class.
+//
 // Author: Armin Töpfer
 
 #include <iostream>
@@ -46,6 +50,55 @@
 
 using namespace PacBio;
 using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in]     src  Input vector that will be empty after execution
+/// \param[in,out] dst  Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>& src, std::vector<T>& dst) noexcept
+{
+    if (dst.empty())
+    {
+        dst = std::move(src);
+    }
+    else
+    {
+        dst.reserve(dst.size() + src.size());
+        std::move(src.begin(), src.end(), std::back_inserter(dst));
+        src.clear();
+    }
+}
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in]     src  Input vector via perfect forwarding
+/// \param[in,out] dst  Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>&& src, std::vector<T>& dst) noexcept
+{
+    if (dst.empty())
+    {
+        dst = std::move(src);
+    }
+    else
+    {
+        dst.reserve(dst.size() + src.size());
+        std::move(src.begin(), src.end(), std::back_inserter(dst));
+        src.clear();
+    }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
 
 VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(
     std::vector<BamRecord>&& unorderedSources, const BamHeader& header)
@@ -59,15 +112,22 @@ VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(
     StitchSources();
 }
 
-void VirtualPolymeraseBamRecord::StitchSources()
+bool VirtualPolymeraseBamRecord::HasVirtualRegionType(const VirtualRegionType regionType) const
+{ return virtualRegionsMap_.find(regionType) != virtualRegionsMap_.end(); }
+
+Frames VirtualPolymeraseBamRecord::IPDV1Frames(Orientation orientation) const
+{
+    const auto rawFrames = this->IPDRaw(orientation);
+    const std::vector<uint8_t> rawData(rawFrames.Data().begin(), rawFrames.Data().end());
+    return Frames::Decode(rawData);
+}
+
+
+void VirtualPolymeraseBamRecord::StitchSources(void)
 {
     const auto& firstRecord = sources_[0];
     const auto& lastRecord = sources_[sources_.size() - 1];
 
-    // Temporary variables used for stitching
-    int accuracy = 0;
-    int accuracyCounter = 0;
-
     std::string   sequence;
     std::string   deletionTag;
     std::string   substitutionTag;
@@ -83,12 +143,13 @@ void VirtualPolymeraseBamRecord::StitchSources()
     QualityValues labelQv;
     QualityValues alternativeLabelQv;
 
-    Frames             ipd;
-    Frames             pw;
-    Frames             pd;
-    Frames             px;
-    std::vector<float> pa;
-    std::vector<float> pm;
+    Frames                ipd;
+    Frames                pw;
+    Frames                pd;
+    Frames                px;
+    std::vector<float>    pa;
+    std::vector<float>    pm;
+    std::vector<uint32_t> sf;
 
     // Stitch using tmp vars
     for(auto& b : sources_)
@@ -97,12 +158,6 @@ void VirtualPolymeraseBamRecord::StitchSources()
         
         MoveAppend(b.Qualities(), qualities);
 
-        if (b.HasReadAccuracy())
-        {
-            accuracy += b.ReadAccuracy();
-            ++accuracyCounter;
-        }
-
         if (b.HasDeletionQV())
             MoveAppend(std::move(b.DeletionQV()), deletionQv);
 
@@ -154,10 +209,19 @@ void VirtualPolymeraseBamRecord::StitchSources()
         if (b.HasPkmean())
             MoveAppend(b.Pkmean(), pa);
 
-        if (b.HasScrapType())
-        {
-            const auto regionType = b.ScrapType();
+        if (b.HasPkmid2())
+            MoveAppend(b.Pkmid2(), pm);
 
+        if (b.HasPkmean2())
+            MoveAppend(b.Pkmean2(), pa);
+
+        if (b.HasStartFrame())
+            MoveAppend(b.StartFrame(), sf);
+
+        if (b.HasScrapRegionType())
+        {
+            const VirtualRegionType regionType = b.ScrapRegionType();
+            
             if (!HasVirtualRegionType(regionType))
                 virtualRegionsMap_[regionType] = std::vector<VirtualRegion>();
 
@@ -179,15 +243,28 @@ void VirtualPolymeraseBamRecord::StitchSources()
                 regionType, b.QueryStart(), b.QueryEnd(), b.LocalContextFlags(),
                 barcodes.first, barcodes.second);
         }
+
+        if (b.HasBarcodes() && !this->HasBarcodes())
+            this->Barcodes(b.Barcodes());
+
+        if (b.HasBarcodeQuality() && !this->HasBarcodeQuality())
+            this->BarcodeQuality(b.BarcodeQuality());
+
+        if (b.HasReadAccuracy() && !this->HasReadAccuracy())
+            this->ReadAccuracy(b.ReadAccuracy());
+
+        if (b.HasScrapZmwType())
+        {
+            if (!this->HasScrapZmwType())
+                this->ScrapZmwType(b.ScrapZmwType());
+            else if (this->ScrapZmwType() != b.ScrapZmwType())
+                throw std::runtime_error("ScrapZmwTypes do not match");
+        }
     }
 
     // ReadGroup
     this->ReadGroup(this->header_.ReadGroups()[0]);
 
-    // Avoid division by 0
-    if (accuracyCounter > 0)
-        this->ReadAccuracy(accuracy / accuracyCounter);
-
     this->NumPasses(1);
 
     // All records should contain the same SNR and hole number
@@ -246,6 +323,10 @@ void VirtualPolymeraseBamRecord::StitchSources()
     if (!px.Data().empty())
         this->PulseCallWidth(px, FrameEncodingType::LOSSLESS);
 
+    // 32 bit arrays
+    if (!sf.empty())
+        this->StartFrame(sf);
+
     // Determine HQREGION bases on LQREGIONS
     if (HasVirtualRegionType(VirtualRegionType::LQREGION))
     {
@@ -280,9 +361,17 @@ void VirtualPolymeraseBamRecord::StitchSources()
     }
 }
 
-Frames VirtualPolymeraseBamRecord::IPDV1Frames(Orientation orientation) const
+
+std::map<VirtualRegionType, std::vector<VirtualRegion>>
+VirtualPolymeraseBamRecord::VirtualRegionsMap(void) const
+{ return virtualRegionsMap_; }
+
+std::vector<VirtualRegion>
+VirtualPolymeraseBamRecord::VirtualRegionsTable(const VirtualRegionType regionType) const
 {
-    const auto rawFrames = this->IPDRaw(orientation);
-    const std::vector<uint8_t> rawData(rawFrames.Data().begin(), rawFrames.Data().end());
-    return Frames::Decode(rawData);
-}
\ No newline at end of file
+   const auto iter = virtualRegionsMap_.find(regionType);
+   if (iter != virtualRegionsMap_.cend())
+       return iter->second;
+   return std::vector<VirtualRegion>(); 
+}
+
diff --git a/src/VirtualPolymeraseCompositeReader.cpp b/src/VirtualPolymeraseCompositeReader.cpp
new file mode 100644
index 0000000..a70dfe5
--- /dev/null
+++ b/src/VirtualPolymeraseCompositeReader.cpp
@@ -0,0 +1,146 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseCompositeReader.cpp
+/// \brief Implements the VirtualPolymeraseCompositeReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/virtual/VirtualPolymeraseCompositeReader.h"
+#include <boost/algorithm/string.hpp>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+VirtualPolymeraseCompositeReader::VirtualPolymeraseCompositeReader(const DataSet& dataset)
+    : currentReader_(nullptr)
+    , filter_(PbiFilter::FromDataSet(dataset))
+{
+    // set up source queue
+    string primaryFn;
+    string scrapsFn;
+    const ExternalResources& resources = dataset.ExternalResources();
+    for (const ExternalResource& resource : resources) {
+
+        primaryFn.clear();
+        scrapsFn.clear();
+
+        // if resource is possible "primary" BAM
+        const auto& metatype = resource.MetaType();
+        if (metatype == "PacBio.SubreadFile.SubreadBamFile" ||
+            metatype == "PacBio.SubreadFile.HqRegionBamFile")
+        {
+            // possible resolve relative path
+            primaryFn = dataset.ResolvePath(resource.ResourceId());
+
+            // check for associated scraps file
+            const ExternalResources& childResources = resource.ExternalResources();
+            for (const ExternalResource& childResource : childResources) {
+                const auto& childMetatype = childResource.MetaType();
+                if (childMetatype == "PacBio.SubreadFile.ScrapsBamFile" ||
+                    childMetatype == "PacBio.SubreadFile.HqScrapsBamFile")
+                {
+                    // possible resolve relative path
+                    scrapsFn = dataset.ResolvePath(childResource.ResourceId());
+                    break;
+                }
+            }
+        }
+
+        // queue up source for later
+        if (!primaryFn.empty() && !scrapsFn.empty())
+            sources_.push_back(make_pair(primaryFn,scrapsFn));
+    }
+
+    // open first available source
+    OpenNextReader();
+}
+
+bool VirtualPolymeraseCompositeReader::HasNext(void)
+{
+    return (currentReader_ && currentReader_->HasNext());
+}
+
+VirtualPolymeraseBamRecord VirtualPolymeraseCompositeReader::Next(void)
+{
+    if (currentReader_) {
+        const auto result = currentReader_->Next();
+        if (!currentReader_->HasNext())
+            OpenNextReader();
+        return result;
+    }
+
+    // no reader active
+    const string msg = { "no readers active, make sure you use "
+                         "VirtualPolymeraseCompositeReader::HasNext before "
+                         "requesting next record"
+                      };
+    throw std::runtime_error(msg);
+}
+
+vector<BamRecord> VirtualPolymeraseCompositeReader::NextRaw(void)
+{
+    if (currentReader_) {
+        const auto result = currentReader_->NextRaw();
+        if (!currentReader_->HasNext())
+            OpenNextReader();
+        return result;
+    }
+
+    // no reader active
+    const string msg = { "no readers active, make sure you use "
+                         "VirtualPolymeraseCompositeReader::HasNext before "
+                         "requesting next group of records"
+                      };
+    throw std::runtime_error(msg);
+}
+
+void VirtualPolymeraseCompositeReader::OpenNextReader(void)
+{
+    currentReader_.reset(nullptr);
+
+    // find next source pair with data
+    while(!sources_.empty()) {
+        const auto nextSource = sources_.front();
+        sources_.pop_front();
+
+        currentReader_.reset(new VirtualPolymeraseReader(nextSource.first,
+                                                         nextSource.second,
+                                                         filter_));
+        if (currentReader_->HasNext())
+            return;
+    }
+}
diff --git a/src/VirtualPolymeraseReader.cpp b/src/VirtualPolymeraseReader.cpp
index 271a96e..4c9f4b0 100644
--- a/src/VirtualPolymeraseReader.cpp
+++ b/src/VirtualPolymeraseReader.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseReader.cpp
+/// \brief Implements the VirtualPolymeraseReader class.
+//
 // Author: Armin Töpfer
 
 #include <stdexcept>
@@ -42,72 +46,240 @@
 
 using namespace PacBio;
 using namespace PacBio::BAM;
+using namespace std;
 
-VirtualPolymeraseReader::VirtualPolymeraseReader(
-    const std::string& primaryBamFilePath, const std::string& scrapsBamFilePath)
-    : primaryBamFilePath_(primaryBamFilePath)
-    , scrapsBamFilePath_(scrapsBamFilePath)
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class IBackend
 {
-    primaryBamFile_ = std::unique_ptr<BamFile>(new BamFile(primaryBamFilePath_));
-    primaryQuery_   = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*primaryBamFile_));
-    primaryIt_      = primaryQuery_->begin();
-
-    scrapsBamFile_  = std::unique_ptr<BamFile>(new BamFile(scrapsBamFilePath_));
-    scrapsQuery_    = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*scrapsBamFile_));
-    scrapsIt_       = scrapsQuery_->begin();
-
-    polyHeader_     = std::unique_ptr<BamHeader>(
-                        new BamHeader(primaryBamFile_->Header().ToSam()));
-
-    auto readGroups = polyHeader_->ReadGroups();
-    if (readGroups.empty())
-        throw std::runtime_error("Bam header of the primary bam has no read groups.");
-    readGroups[0].ReadType("POLYMERASE");
-    readGroups[0].Id(readGroups[0].MovieName(), "POLYMERASE");
-    if (readGroups.size() > 1)
+protected:
+    IBackend(const string& primaryBamFilePath,
+             const string& scrapsBamFilePath)
     {
-        std::vector<ReadGroupInfo> singleGroup;
-        singleGroup.emplace_back(std::move(readGroups[0]));
-        readGroups = std::move(singleGroup);
-        polyHeader_->ClearReadGroups();
+        primaryBamFile_ = std::unique_ptr<BamFile>(new BamFile(primaryBamFilePath));
+        scrapsBamFile_  = std::unique_ptr<BamFile>(new BamFile(scrapsBamFilePath));
+
+        polyHeader_     = std::unique_ptr<BamHeader>(
+                            new BamHeader(primaryBamFile_->Header().ToSam()));
+
+        auto readGroups = polyHeader_->ReadGroups();
+        if (readGroups.empty())
+            throw std::runtime_error("Bam header of the primary bam has no read groups.");
+        readGroups[0].ReadType("POLYMERASE");
+        readGroups[0].Id(readGroups[0].MovieName(), "POLYMERASE");
+        if (readGroups.size() > 1)
+        {
+            std::vector<ReadGroupInfo> singleGroup;
+            singleGroup.emplace_back(std::move(readGroups[0]));
+            readGroups = std::move(singleGroup);
+            polyHeader_->ClearReadGroups();
+        }
+        polyHeader_->ReadGroups(readGroups);
     }
-    polyHeader_->ReadGroups(readGroups);
-}
 
-// This method is not thread safe
-VirtualPolymeraseBamRecord VirtualPolymeraseReader::Next()
+public:
+    ~IBackend(void) { }
+
+public:
+    virtual bool HasNext(void) =0;
+    virtual std::vector<BamRecord> NextRaw(void) =0;
+
+    const BamHeader& PolyHeader(void) const
+    { return *polyHeader_; }
+
+    BamHeader PrimaryHeader(void) const
+    { return primaryBamFile_->Header(); }
+
+    BamHeader ScrapsHeader(void) const
+    { return scrapsBamFile_->Header(); }
+
+protected:
+    std::unique_ptr<BamFile>   primaryBamFile_;
+    std::unique_ptr<BamFile>   scrapsBamFile_;
+    std::unique_ptr<BamHeader> polyHeader_;
+};
+
+class EntireFileBackend : public IBackend
 {
-    auto bamRecordVec = NextRaw();
-    VirtualPolymeraseBamRecord stitched(std::move(bamRecordVec), *polyHeader_);
-    return std::move(stitched);
-}
+public:
+    EntireFileBackend(const string& primaryBamFilepath,
+                      const string& scrapsBamFilepath)
+        : IBackend(primaryBamFilepath, scrapsBamFilepath)
+    {
+        primaryQuery_   = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*primaryBamFile_));
+        primaryIt_      = primaryQuery_->begin();
+
+        scrapsQuery_    = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*scrapsBamFile_));
+        scrapsIt_       = scrapsQuery_->begin();
+    }
+
+    ~EntireFileBackend(void) { }
+
+public:
+    bool HasNext(void)
+    {
+        // Return true until both iterators are at the end of the query
+        return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+    }
+
+    std::vector<BamRecord> NextRaw(void)
+    {
+        std::vector<BamRecord> bamRecordVec;
 
-std::vector<BamRecord> VirtualPolymeraseReader::NextRaw()
+        // Current hole number, the smallest of scraps and primary.
+        // It can be that the next ZMW is scrap only.
+        int currentHoleNumber;
+        if (primaryIt_ == primaryQuery_->end())
+            currentHoleNumber = (*scrapsIt_).HoleNumber();
+        else if (scrapsIt_ == scrapsQuery_->end())
+            currentHoleNumber = (*primaryIt_).HoleNumber();
+        else
+            currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
+
+        // collect subreads or hqregions
+        while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
+            bamRecordVec.push_back(*primaryIt_++);
+
+        // collect scraps
+        while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
+            bamRecordVec.push_back(*scrapsIt_++);
+
+        return bamRecordVec;
+    }
+
+    std::unique_ptr<EntireFileQuery> primaryQuery_;
+    std::unique_ptr<EntireFileQuery> scrapsQuery_;
+    EntireFileQuery::iterator        primaryIt_;
+    EntireFileQuery::iterator        scrapsIt_;
+};
+
+class PbiFilterBackend : public IBackend
 {
-    std::vector<BamRecord> bamRecordVec;
-
-    // Current hole number, the smallest of scraps and primary.
-    // It can be that the next ZMW is scrap only.
-    int currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
-    // collect subreads or hqregions
-    while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
-        bamRecordVec.push_back(*primaryIt_++);
-    
-    // collect scraps
-    while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
-        bamRecordVec.push_back(*scrapsIt_++);
-
-    return bamRecordVec;
-}
+public:
+    PbiFilterBackend(const string& primaryBamFilePath,
+                     const string& scrapsBamFilePath,
+                     const PbiFilter& filter)
+        : IBackend(primaryBamFilePath, scrapsBamFilePath)
+    {
+        primaryQuery_   = std::unique_ptr<PbiFilterQuery>(new PbiFilterQuery(filter, *primaryBamFile_));
+        primaryIt_      = primaryQuery_->begin();
 
-bool VirtualPolymeraseReader::HasNext()
+        scrapsQuery_    = std::unique_ptr<PbiFilterQuery>(new PbiFilterQuery(filter, *scrapsBamFile_));
+        scrapsIt_       = scrapsQuery_->begin();
+    }
+
+    ~PbiFilterBackend(void) { }
+
+public:
+    bool HasNext(void)
+    {
+        // Return true until both iterators are at the end of the query
+        return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+    }
+
+    std::vector<BamRecord> NextRaw(void)
+    {
+        std::vector<BamRecord> bamRecordVec;
+
+        // Current hole number, the smallest of scraps and primary.
+        // It can be that the next ZMW is scrap only.
+        int currentHoleNumber;
+        if (primaryIt_ == primaryQuery_->end())
+            currentHoleNumber = (*scrapsIt_).HoleNumber();
+        else if (scrapsIt_ == scrapsQuery_->end())
+            currentHoleNumber = (*primaryIt_).HoleNumber();
+        else
+            currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
+
+        // collect subreads or hqregions
+        while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
+            bamRecordVec.push_back(*primaryIt_++);
+
+        // collect scraps
+        while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
+            bamRecordVec.push_back(*scrapsIt_++);
+
+        return bamRecordVec;
+    }
+
+private:
+    std::unique_ptr<PbiFilterQuery> primaryQuery_;
+    std::unique_ptr<PbiFilterQuery> scrapsQuery_;
+    PbiFilterQuery::iterator        primaryIt_;
+    PbiFilterQuery::iterator        scrapsIt_;
+};
+
+} // namespace internal
+
+struct VirtualPolymeraseReader::VirtualPolymeraseReaderPrivate
+{
+    VirtualPolymeraseReaderPrivate(const string& primaryBamFilepath,
+                                   const string& scrapsBamFilePath,
+                                   const PbiFilter& filter)
+        : backend_(nullptr)
+    {
+        if (filter.IsEmpty()) {
+            backend_.reset(new internal::EntireFileBackend(primaryBamFilepath,
+                                                           scrapsBamFilePath));
+        } else {
+            backend_.reset(new internal::PbiFilterBackend(primaryBamFilepath,
+                                                          scrapsBamFilePath,
+                                                          filter));
+        }
+    }
+
+    bool HasNext(void)
+    { return backend_->HasNext(); }
+
+    std::vector<BamRecord> NextRaw(void)
+    { return backend_->NextRaw(); }
+
+    const BamHeader& PolyHeader(void) const
+    { return backend_->PolyHeader(); }
+
+    BamHeader PrimaryHeader(void) const
+    { return backend_->PrimaryHeader(); }
+
+    BamHeader ScrapsHeader(void) const
+    { return backend_->ScrapsHeader(); }
+
+    std::unique_ptr<internal::IBackend> backend_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+VirtualPolymeraseReader::VirtualPolymeraseReader(const std::string& primaryBamFilePath,
+                                                 const std::string& scrapsBamFilePath)
+    : d_(new VirtualPolymeraseReaderPrivate(primaryBamFilePath, scrapsBamFilePath, PbiFilter()))
+{ }
+
+VirtualPolymeraseReader::VirtualPolymeraseReader(const std::string& primaryBamFilePath,
+                                                 const std::string& scrapsBamFilePath,
+                                                 const PbiFilter& filter)
+    : d_(new VirtualPolymeraseReaderPrivate(primaryBamFilePath, scrapsBamFilePath, filter))
+{ }
+
+VirtualPolymeraseReader::~VirtualPolymeraseReader(void) { }
+
+bool VirtualPolymeraseReader::HasNext(void)
+{ return d_->HasNext(); }
+
+// This method is not thread safe
+VirtualPolymeraseBamRecord VirtualPolymeraseReader::Next(void)
 {
-	// Return true until both iterators are at the end of the query
-	return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+    auto bamRecordVec = NextRaw();
+    VirtualPolymeraseBamRecord stitched(std::move(bamRecordVec), d_->PolyHeader());
+    return std::move(stitched);
 }
 
-BamHeader VirtualPolymeraseReader::PrimaryHeader()
-{ return primaryBamFile_->Header(); }
+std::vector<BamRecord> VirtualPolymeraseReader::NextRaw(void)
+{ return d_->NextRaw(); }
+
+BamHeader VirtualPolymeraseReader::PrimaryHeader(void) const
+{ return d_->PrimaryHeader(); }
 
-BamHeader VirtualPolymeraseReader::ScrapsHeader()
-{ return scrapsBamFile_->Header(); }
+BamHeader VirtualPolymeraseReader::ScrapsHeader(void) const
+{ return d_->ScrapsHeader(); }
diff --git a/src/VirtualRegionTypeMap.cpp b/src/VirtualRegionTypeMap.cpp
index 4839b35..8c6c757 100644
--- a/src/VirtualRegionTypeMap.cpp
+++ b/src/VirtualRegionTypeMap.cpp
@@ -32,7 +32,11 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegionTypeMap.cpp
+/// \brief Implements the VirtualRegionTypeMap class.
+//
 // Author: Armin Töpfer
 
 #include "pbbam/virtual/VirtualRegionTypeMap.h"
@@ -42,8 +46,9 @@ using namespace PacBio::BAM;
 
 std::map<char, VirtualRegionType> VirtualRegionTypeMap::ParseChar
 {
-	{ 'A' , VirtualRegionType::ADAPTER },
-	{ 'B' , VirtualRegionType::BARCODE },
-	{ 'H' , VirtualRegionType::HQREGION },
-	{ 'L' , VirtualRegionType::LQREGION }
-};
\ No newline at end of file
+    { 'A' , VirtualRegionType::ADAPTER },
+    { 'B' , VirtualRegionType::BARCODE },
+    { 'H' , VirtualRegionType::HQREGION },
+    { 'F' , VirtualRegionType::FILTERED },
+    { 'L' , VirtualRegionType::LQREGION }
+};
diff --git a/src/XmlReader.cpp b/src/XmlReader.cpp
index 5e88e47..df4e782 100644
--- a/src/XmlReader.cpp
+++ b/src/XmlReader.cpp
@@ -88,7 +88,7 @@ void FromXml(const pugi::xml_node& xmlNode, DataSetElement& parent)
         return;
 
     // label & text
-    DataSetElement e(xmlNode.name());
+    DataSetElement e(xmlNode.name(), FromInputXml());
     e.Text(xmlNode.text().get());
 
     // iterate attributes
diff --git a/src/XmlWriter.cpp b/src/XmlWriter.cpp
index bf42e36..6c7b7af 100644
--- a/src/XmlWriter.cpp
+++ b/src/XmlWriter.cpp
@@ -40,6 +40,7 @@
 #include "pugixml/pugixml.hpp"
 #include <fstream>
 #include <iostream>
+#include <map>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
@@ -50,19 +51,42 @@ namespace BAM {
 namespace internal {
 
 static
+string Prefix(const string& input)
+{
+    const size_t colonFound = input.find(':');
+    if (colonFound == std::string::npos || colonFound == 0)
+        return string();
+    return input.substr(0, colonFound);
+}
+
+static
 string OutputName(const DataSetElement& node,
                   const NamespaceRegistry& registry)
 {
-    if (node.PrefixLabel().empty())
-        return registry.Namespace(node.Xsd()).Name() + ":" + node.LocalNameLabel().to_string();
-    else
-        return node.QualifiedNameLabel(); // is this correct? what if node's contents don't match registry
-                                          // who gets priority?
+    // if from input XML, respect the namespaces given
+    if (node.IsVerbatimLabel()) 
+        return node.QualifiedNameLabel();
+
+    // otherwise, probably user-generated
+    else {
+        // if no namespace prefix, prepend the appropriate one & return
+        if (node.PrefixLabel().empty()) {
+            static const string colon = ":";
+            XsdType xsdType = node.Xsd();
+            if (xsdType == XsdType::NONE)
+                xsdType = registry.XsdForElement(node.LocalNameLabel().to_string());
+            return registry.Namespace(xsdType).Name() + colon + node.LocalNameLabel().to_string();
+        }
+        // otherwise, has prefix - return full name
+        else
+            return node.QualifiedNameLabel();
+    }
 }
 
 static
 void ToXml(const DataSetElement& node,
            const NamespaceRegistry& registry,
+           map<XsdType, string>& xsdPrefixesUsed,
            pugi::xml_node& parentXml)
 {
     // create child of parent, w/ label & text
@@ -74,6 +98,11 @@ void ToXml(const DataSetElement& node,
     if (!node.Text().empty())
         xmlNode.text().set(node.Text().c_str());
 
+    // store XSD type for later
+    const string prefix = Prefix(label);
+    if (!prefix.empty())
+        xsdPrefixesUsed[node.Xsd()] = prefix;
+
     // add attributes
     auto attrIter = node.Attributes().cbegin();
     auto attrEnd  = node.Attributes().cend();
@@ -92,7 +121,7 @@ void ToXml(const DataSetElement& node,
     auto childEnd  = node.Children().cend();
     for ( ; childIter != childEnd; ++childIter) {
         const DataSetElement& child = (*childIter);
-        ToXml(child, registry, xmlNode);
+        ToXml(child, registry, xsdPrefixesUsed, xmlNode);
     }
 }
 
@@ -108,7 +137,7 @@ void XmlWriter::ToStream(const DataSetBase& dataset,
     const NamespaceRegistry& registry = dataset.Namespaces();
 
     // create top-level dataset XML node
-    const string& label = OutputName(dataset, registry);
+    const string& label = internal::OutputName(dataset, registry);
     if (label.empty())
         throw std::runtime_error("could not convert dataset node to XML");
     pugi::xml_node root = doc.append_child(label.c_str());
@@ -129,12 +158,15 @@ void XmlWriter::ToStream(const DataSetBase& dataset,
         attr.set_value(value.c_str());
     }
 
+    map<XsdType, string> xsdPrefixesUsed;
+    xsdPrefixesUsed[dataset.Xsd()] = Prefix(label);
+
     // iterate children, recursively building up subtree
     auto childIter = dataset.Children().cbegin();
     auto childEnd  = dataset.Children().cend();
     for ( ; childIter != childEnd; ++childIter) {
         const DataSetElement& child = (*childIter);
-        ToXml(child, registry, root);
+        ToXml(child, registry, xsdPrefixesUsed, root);
     }
 
     // write XML to stream
@@ -142,6 +174,41 @@ void XmlWriter::ToStream(const DataSetBase& dataset,
     decl.append_attribute("version")  = "1.0";
     decl.append_attribute("encoding") = "utf-8";
 
+    // add XSD namespace attributes
+    pugi::xml_attribute xmlnsDefaultAttribute = root.attribute("xmlns");
+    if (xmlnsDefaultAttribute.empty()) {
+        xmlnsDefaultAttribute = root.append_attribute("xmlns");
+        xmlnsDefaultAttribute.set_value(registry.DefaultNamespace().Uri().c_str());
+    }
+    pugi::xml_attribute xsiAttribute = root.attribute("xmlns:xsi");
+    if (xsiAttribute.empty()) {
+        xsiAttribute = root.append_attribute("xmlns:xsi");
+        xsiAttribute.set_value("http://www.w3.org/2001/XMLSchema-instance");
+    }
+    pugi::xml_attribute xsiSchemaLocationAttribute = root.attribute("xsi:schemaLocation");
+    if (xsiSchemaLocationAttribute.empty()) {
+        xsiSchemaLocationAttribute = root.append_attribute("xsi:schemaLocation");
+        xsiSchemaLocationAttribute.set_value(registry.DefaultNamespace().Uri().c_str());
+    }
+
+    static const string xmlnsPrefix = "xmlns:";
+    map<XsdType, string>::const_iterator prefixIter = xsdPrefixesUsed.cbegin();
+    map<XsdType, string>::const_iterator prefixEnd  = xsdPrefixesUsed.cend();
+    for ( ; prefixIter != prefixEnd; ++prefixIter ) {
+        const XsdType& xsd = prefixIter->first;
+        const string& prefix = prefixIter->second;
+        if (xsd == XsdType::NONE || prefix.empty())
+            continue;
+        const NamespaceInfo& nsInfo = registry.Namespace(xsd);
+        assert(nsInfo.Name() == prefix);
+        const string xmlnsName = xmlnsPrefix + prefix;
+        pugi::xml_attribute xmlnsAttribute = root.attribute(xmlnsName.c_str());
+        if (xmlnsAttribute.empty()) {
+            xmlnsAttribute = root.append_attribute(xmlnsName.c_str());
+            xmlnsAttribute.set_value(nsInfo.Uri().c_str());
+        }
+    }
+
     // "no escapes" to allow explicit ">" "<" comparison operators in filter parameters
     // we may remove this if/when comparison is separated from the value
     doc.save(out, "\t", pugi::format_default | pugi::format_no_escapes, pugi::encoding_utf8);
diff --git a/src/ZmwGroupQuery.cpp b/src/ZmwGroupQuery.cpp
index bf76ce3..d33b34a 100644
--- a/src/ZmwGroupQuery.cpp
+++ b/src/ZmwGroupQuery.cpp
@@ -32,112 +32,81 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwQuery.cpp
+/// \brief Implements the ZmwQuery class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/ZmwGroupQuery.h"
-#include "pbbam/PbiIndex.h"
-#include "pbbam/internal/BamRecordSort.h"
-#include "pbbam/internal/MergeStrategy.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/CompositeBamReader.h"
+#include "pbbam/PbiFilterTypes.h"
 #include "MemoryUtils.h"
 #include <algorithm>
-#include <map>
+#include <deque>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
-//using namespace PacBio::BAM::staging;
 using namespace std;
 
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-class ZmwQueryGroupIterator : public IBamFileGroupIterator
+struct ZmwGroupQuery::ZmwGroupQueryPrivate
 {
-public:
-    ZmwQueryGroupIterator(const std::vector<int32_t>& zmwWhitelist,
-                          const BamFile& file)
-        : IBamFileGroupIterator(file)
-    {
-        // init BAM file for reading
-        htsFile_.reset(sam_open(file.Filename().c_str(), "rb"));
-        if (!htsFile_)
-            throw std::runtime_error("could not open BAM file for reading");
+    typedef PbiFilterCompositeBamReader<Compare::Zmw> ReaderType;
+    typedef std::unique_ptr<ReaderType> ReaderPtr;
 
-        htsHeader_.reset(sam_hdr_read(htsFile_.get()));
-        if (!htsHeader_)
-            throw std::runtime_error("could not read BAM header data");
-
-        // open index & query for ZMWs
-        PbiIndex index(file.PacBioIndexFilename());
-        for (int32_t zmw : zmwWhitelist)
-            zmwGroups_[zmw] = index.Lookup(ZmwIndexRequest(zmw));
+    ZmwGroupQueryPrivate(const std::vector<int32_t>& zmwWhitelist,
+                         const DataSet& dataset)
+        : whitelist_(zmwWhitelist.cbegin(), zmwWhitelist.cend())
+        , reader_(nullptr)
+    {
+        std::sort(whitelist_.begin(), whitelist_.end());
+        whitelist_.erase(std::unique(whitelist_.begin(),
+                                     whitelist_.end()),
+                         whitelist_.end());
+
+        if (!whitelist_.empty()) {
+            reader_ = ReaderPtr(new ReaderType(PbiZmwFilter{whitelist_.front()}, dataset));
+            whitelist_.pop_front();
+        }
     }
 
-public:
-    bool GetNext(std::vector<BamRecord>& r)
+    bool GetNext(std::vector<BamRecord>& records)
     {
-        r.clear();
-        if (zmwGroups_.empty())
+        records.clear();
+        if (!reader_)
             return false;
 
-        BamRecord record(header_);
-        const IndexResultBlocks& blocks = zmwGroups_.cbegin()->second;
-        for (const IndexResultBlock& block : blocks) {
-
-            // seek to first record in block
-            const int seekResult = bgzf_seek(htsFile_.get()->fp.bgzf, block.virtualOffset_, SEEK_SET);
-            if (seekResult == -1)
-                throw std::runtime_error("could not seek in BAM file");
+        // get all records matching ZMW
+        BamRecord r;
+        while (reader_->GetNext(r))
+            records.push_back(r);
 
-            // read block records
-            for (size_t i = 0; i < block.numReads_; ++i) {
-                const int readResult = sam_read1(htsFile_.get(),
-                                                 htsHeader_.get(),
-                                                 internal::BamRecordMemory::GetRawData(record).get());
-//                record.header_ = fileData_.Header();
-
-                if (readResult >= 0)           // success
-                    r.push_back(record);
-                else if (readResult == -1)     // normal EOF
-                    break;
-                else                           // error (truncated file, etc)
-                    throw std::runtime_error("corrupted file, may be truncated");
-            }
+        // set next ZMW (if any left)
+        if (!whitelist_.empty()) {
+            reader_->Filter(PbiZmwFilter{whitelist_.front()});
+            whitelist_.pop_front();
         }
 
-        // pop zmw info & return success
-        zmwGroups_.erase(zmwGroups_.begin());
-        return !r.empty();
-    }
+        // otherwise destroy reader, next iteration will return false
+        else
+            reader_.reset(nullptr);
 
-    bool InSameGroup(const BamRecord& lhs, const BamRecord& rhs) const
-    { return lhs.HoleNumber() == rhs.HoleNumber(); }
+        return true;
+    }
 
-private:
-    unique_ptr<samFile,   internal::HtslibFileDeleter>   htsFile_;
-    unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
-    map<int32_t, IndexResultBlocks> zmwGroups_;
+    std::deque<int32_t> whitelist_;
+    ReaderPtr reader_;
 };
 
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-ZmwGroupQuery::ZmwGroupQuery(const DataSet& dataset)
-    : IGroupQuery(dataset)
-    , whitelist_(/* all dataset ZMWs */)
-{
-    mergeStrategy_.reset(new GroupMergeStrategy<ByZmw>(CreateIterators()));
-}
-
 ZmwGroupQuery::ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
                              const DataSet& dataset)
-    : IGroupQuery(dataset)
-    , whitelist_(zmwWhitelist)
-{
-    mergeStrategy_.reset(new GroupMergeStrategy<ByZmw>(CreateIterators()));
-}
+    : internal::IGroupQuery()
+    , d_(new ZmwGroupQueryPrivate(zmwWhitelist, dataset))
+{ }
+
+ZmwGroupQuery::~ZmwGroupQuery(void) { }
 
-ZmwGroupQuery::FileIterPtr ZmwGroupQuery::CreateIterator(const BamFile& file)
-{ return FileIterPtr(new ZmwQueryGroupIterator(whitelist_, file)); }
+bool ZmwGroupQuery::GetNext(std::vector<BamRecord>& records)
+{ return d_->GetNext(records); }
diff --git a/src/ZmwQuery.cpp b/src/ZmwQuery.cpp
index 2b25723..7a45541 100644
--- a/src/ZmwQuery.cpp
+++ b/src/ZmwQuery.cpp
@@ -32,107 +32,38 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwQuery.cpp
+/// \brief Implements the ZmwQuery class.
+//
 // Author: Derek Barnett
 
 #include "pbbam/ZmwQuery.h"
-#include "pbbam/PbiIndex.h"
-#include "pbbam/internal/BamRecordSort.h"
-#include "pbbam/internal/MergeStrategy.h"
-#include "MemoryUtils.h"
-#include <htslib/bgzf.h>
-#include <htslib/sam.h>
-#include <algorithm>
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
-//using namespace PacBio::BAM::staging;
 using namespace std;
 
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-class ZmwQueryIterator : public IBamFileIterator
+struct ZmwQuery::ZmwQueryPrivate
 {
-public:
-    ZmwQueryIterator(const std::vector<int32_t>& zmwWhitelist,
-                     const BamFile& bamFile)
-        : internal::IBamFileIterator(bamFile)
-        , currentBlockReadCount_(0)
-        , htsFile_(nullptr)
-        , htsHeader_(nullptr)
-    {
-        // init BAM file for reading
-        htsFile_.reset(sam_open(bamFile.Filename().c_str(), "rb"));
-        if (!htsFile_)
-            throw std::runtime_error("could not open BAM file for reading");
-
-        htsHeader_.reset(sam_hdr_read(htsFile_.get()));
-        if (!htsHeader_)
-            throw std::runtime_error("could not read BAM header data");
-
-        // open index & query for ZMWs
-        PbiIndex index(bamFile.PacBioIndexFilename());
-        blocks_ = index.Lookup(ZmwIndexMultiRequest(zmwWhitelist));
-    }
-
-public:
-    bool GetNext(BamRecord& r){
-
-        // no data to fetch, return false
-        if (blocks_.empty())
-            return false;
+    ZmwQueryPrivate(const std::vector<int32_t>& zmwWhitelist,
+                    const DataSet& dataset)
+        : reader_(PbiZmwFilter(zmwWhitelist), dataset)
+    { }
 
-        // maybe seek to block
-        if (currentBlockReadCount_ == 0) {
-            const int seekResult = bgzf_seek(htsFile_.get()->fp.bgzf, blocks_.at(0).virtualOffset_, SEEK_SET);
-            if (seekResult == -1)
-                throw std::runtime_error("could not seek in BAM file");
-        }
-
-        // read next record
-//        r = BamRecord(fileData_.Header());
-        const int readResult = sam_read1(htsFile_.get(),
-                                         htsHeader_.get(),
-                                         internal::BamRecordMemory::GetRawData(r).get());
-//        r.header_ = fileData_.Header();
-        r.header_ = header_;
-
-        // update counters
-        ++currentBlockReadCount_;
-        if (currentBlockReadCount_ == blocks_.at(0).numReads_) {
-            blocks_.pop_front();
-            currentBlockReadCount_ = 0;
-        }
-
-        // return result of reading
-        if (readResult >= 0)           // success
-            return true;
-        else if (readResult == -1)     // normal EOF
-            return false;
-        else                           // error (truncated file, etc)
-            throw std::runtime_error("corrupted file, may be truncated");
-    }
-
-private:
-    IndexResultBlocks blocks_;
-    size_t currentBlockReadCount_;
-    unique_ptr<samFile,   internal::HtslibFileDeleter>   htsFile_;
-    unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
+    PbiFilterCompositeBamReader<Compare::Zmw> reader_;
 };
 
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-ZmwQuery::ZmwQuery(const std::vector<int32_t> &zmwWhitelist,
+ZmwQuery::ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
                    const DataSet& dataset)
-    : internal::IQuery(dataset)
-    , whitelist_(zmwWhitelist)
-{
-    mergeStrategy_.reset(new MergeStrategy<ByZmw>(CreateIterators()));
-}
+    : internal::IQuery()
+    , d_(new ZmwQueryPrivate(zmwWhitelist, dataset))
+{ }
+
+ZmwQuery::~ZmwQuery(void) { }
 
-ZmwQuery::FileIterPtr ZmwQuery::CreateIterator(const BamFile& bamFile)
-{ return FileIterPtr(new ZmwQueryIterator(whitelist_, bamFile)); }
+bool ZmwQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/VirtualRegionTypeMap.cpp b/src/ZmwTypeMap.cpp
similarity index 86%
copy from src/VirtualRegionTypeMap.cpp
copy to src/ZmwTypeMap.cpp
index 4839b35..2eea7b7 100644
--- a/src/VirtualRegionTypeMap.cpp
+++ b/src/ZmwTypeMap.cpp
@@ -32,18 +32,22 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwTypeMap.cpp
+/// \brief Implements the ZmwTypeMap class.
+//
 // Author: Armin Töpfer
 
-#include "pbbam/virtual/VirtualRegionTypeMap.h"
+#include "pbbam/ZmwTypeMap.h"
 
 using namespace PacBio;
 using namespace PacBio::BAM;
 
-std::map<char, VirtualRegionType> VirtualRegionTypeMap::ParseChar
+std::map<char, ZmwType> ZmwTypeMap::ParseChar
 {
-	{ 'A' , VirtualRegionType::ADAPTER },
-	{ 'B' , VirtualRegionType::BARCODE },
-	{ 'H' , VirtualRegionType::HQREGION },
-	{ 'L' , VirtualRegionType::LQREGION }
-};
\ No newline at end of file
+	{ 'C' , ZmwType::CONTROL   },
+	{ 'M' , ZmwType::MALFORMED },
+	{ 'N' , ZmwType::NORMAL    },
+	{ 'S' , ZmwType::SENTINEL  }
+};
diff --git a/src/VirtualPolymeraseReader.cpp b/src/ZmwWhitelistVirtualReader.cpp
similarity index 50%
copy from src/VirtualPolymeraseReader.cpp
copy to src/ZmwWhitelistVirtualReader.cpp
index 271a96e..e716e1c 100644
--- a/src/VirtualPolymeraseReader.cpp
+++ b/src/ZmwWhitelistVirtualReader.cpp
@@ -32,33 +32,36 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwWhitelistVirtualReader.cpp
+/// \brief Implements the ZmwWhitelistVirtualReader class.
+//
+// Author: Derek Barnett
 
-// Author: Armin Töpfer
-
-#include <stdexcept>
-
-#include "pbbam/virtual/VirtualPolymeraseReader.h"
+#include "pbbam/virtual/ZmwWhitelistVirtualReader.h"
+#include "pbbam/PbiFilterTypes.h"
 #include "pbbam/ReadGroupInfo.h"
-
+#include <set>
+#include <stdexcept>
 using namespace PacBio;
 using namespace PacBio::BAM;
+using namespace std;
 
-VirtualPolymeraseReader::VirtualPolymeraseReader(
-    const std::string& primaryBamFilePath, const std::string& scrapsBamFilePath)
+ZmwWhitelistVirtualReader::ZmwWhitelistVirtualReader(const vector<int32_t>& zmwWhitelist,
+                                                     const string& primaryBamFilePath,
+                                                     const string& scrapsBamFilePath)
     : primaryBamFilePath_(primaryBamFilePath)
     , scrapsBamFilePath_(scrapsBamFilePath)
 {
-    primaryBamFile_ = std::unique_ptr<BamFile>(new BamFile(primaryBamFilePath_));
-    primaryQuery_   = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*primaryBamFile_));
-    primaryIt_      = primaryQuery_->begin();
-
-    scrapsBamFile_  = std::unique_ptr<BamFile>(new BamFile(scrapsBamFilePath_));
-    scrapsQuery_    = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*scrapsBamFile_));
-    scrapsIt_       = scrapsQuery_->begin();
-
-    polyHeader_     = std::unique_ptr<BamHeader>(
-                        new BamHeader(primaryBamFile_->Header().ToSam()));
+    // setup BAM files/readers
+    primaryBamFile_ = unique_ptr<BamFile>(new BamFile(primaryBamFilePath_));
+    scrapsBamFile_  = unique_ptr<BamFile>(new BamFile(scrapsBamFilePath_));
+    primaryReader_  = unique_ptr<PbiIndexedBamReader>(new PbiIndexedBamReader(*primaryBamFile_));
+    scrapsReader_   = unique_ptr<PbiIndexedBamReader>(new PbiIndexedBamReader(*scrapsBamFile_));
 
+    // setup new header for stitched data
+    polyHeader_ = unique_ptr<BamHeader>(new BamHeader(primaryBamFile_->Header().ToSam()));
     auto readGroups = polyHeader_->ReadGroups();
     if (readGroups.empty())
         throw std::runtime_error("Bam header of the primary bam has no read groups.");
@@ -72,42 +75,67 @@ VirtualPolymeraseReader::VirtualPolymeraseReader(
         polyHeader_->ClearReadGroups();
     }
     polyHeader_->ReadGroups(readGroups);
+
+    // remove ZMWs up front, that are not found in either file
+    PreFilterZmws(zmwWhitelist);
 }
 
+bool ZmwWhitelistVirtualReader::HasNext(void) const
+{ return !zmwWhitelist_.empty(); }
+
 // This method is not thread safe
-VirtualPolymeraseBamRecord VirtualPolymeraseReader::Next()
+VirtualPolymeraseBamRecord ZmwWhitelistVirtualReader::Next(void)
 {
     auto bamRecordVec = NextRaw();
     VirtualPolymeraseBamRecord stitched(std::move(bamRecordVec), *polyHeader_);
     return std::move(stitched);
 }
 
-std::vector<BamRecord> VirtualPolymeraseReader::NextRaw()
+vector<BamRecord> ZmwWhitelistVirtualReader::NextRaw(void)
 {
-    std::vector<BamRecord> bamRecordVec;
+    auto result = vector<BamRecord>{ };
+    if (!HasNext())
+        return result;
+
+    const auto& zmw = zmwWhitelist_.front();
+    primaryReader_->Filter(PbiZmwFilter{zmw});
+    scrapsReader_->Filter(PbiZmwFilter{zmw});
 
-    // Current hole number, the smallest of scraps and primary.
-    // It can be that the next ZMW is scrap only.
-    int currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
-    // collect subreads or hqregions
-    while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
-        bamRecordVec.push_back(*primaryIt_++);
-    
-    // collect scraps
-    while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
-        bamRecordVec.push_back(*scrapsIt_++);
+    auto record = BamRecord{ };
+    while (primaryReader_->GetNext(record))
+        result.push_back(record);
+    while (scrapsReader_->GetNext(record))
+        result.push_back(record);
 
-    return bamRecordVec;
+    zmwWhitelist_.pop_front();
+    return result;
 }
 
-bool VirtualPolymeraseReader::HasNext()
+void ZmwWhitelistVirtualReader::PreFilterZmws(const std::vector<int32_t>& zmwWhitelist)
 {
-	// Return true until both iterators are at the end of the query
-	return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+    // fetch input ZMWs
+    const PbiRawData primaryIndex(primaryBamFile_->PacBioIndexFilename());
+    const PbiRawData scrapsIndex(scrapsBamFile_->PacBioIndexFilename());
+    const auto& primaryZmws = primaryIndex.BasicData().holeNumber_;
+    const auto& scrapsZmws = scrapsIndex.BasicData().holeNumber_;
+
+    // toss them all into a set (for uniqueness & lookup here soon)
+    set<int32_t> inputZmws;
+    for (const auto& zmw : primaryZmws)
+        inputZmws.insert(zmw);
+    for (const auto& zmw : scrapsZmws)
+        inputZmws.insert(zmw);
+
+    // check our requested whitelist against files' ZMWs, keep if found
+    const auto inputEnd = inputZmws.cend();
+    for (const int32_t zmw : zmwWhitelist) {
+        if (inputZmws.find(zmw) != inputEnd)
+            zmwWhitelist_.push_back(zmw);
+    }
 }
 
-BamHeader VirtualPolymeraseReader::PrimaryHeader()
+BamHeader ZmwWhitelistVirtualReader::PrimaryHeader(void) const
 { return primaryBamFile_->Header(); }
 
-BamHeader VirtualPolymeraseReader::ScrapsHeader()
+BamHeader ZmwWhitelistVirtualReader::ScrapsHeader(void) const
 { return scrapsBamFile_->Header(); }
diff --git a/src/files.cmake b/src/files.cmake
index 3a399d0..29243ea 100644
--- a/src/files.cmake
+++ b/src/files.cmake
@@ -11,9 +11,14 @@ set( PacBioBAM_H
     ${PacBioBAM_IncludeDir}/pbbam/BamRecordBuilder.h
     ${PacBioBAM_IncludeDir}/pbbam/BamRecordImpl.h
     ${PacBioBAM_IncludeDir}/pbbam/BamTagCodec.h
+    ${PacBioBAM_IncludeDir}/pbbam/BaiIndexedBamReader.h
+    ${PacBioBAM_IncludeDir}/pbbam/BamReader.h
+    ${PacBioBAM_IncludeDir}/pbbam/CompositeBamReader.h
     ${PacBioBAM_IncludeDir}/pbbam/BamWriter.h
+    ${PacBioBAM_IncludeDir}/pbbam/BarcodeQuery.h
     ${PacBioBAM_IncludeDir}/pbbam/Cigar.h
     ${PacBioBAM_IncludeDir}/pbbam/CigarOperation.h
+    ${PacBioBAM_IncludeDir}/pbbam/Compare.h
     ${PacBioBAM_IncludeDir}/pbbam/Config.h
     ${PacBioBAM_IncludeDir}/pbbam/DataSet.h
     ${PacBioBAM_IncludeDir}/pbbam/DataSetTypes.h
@@ -22,33 +27,52 @@ set( PacBioBAM_H
     ${PacBioBAM_IncludeDir}/pbbam/Frames.h
     ${PacBioBAM_IncludeDir}/pbbam/GenomicInterval.h
     ${PacBioBAM_IncludeDir}/pbbam/GenomicIntervalQuery.h
-    ${PacBioBAM_IncludeDir}/pbbam/GroupQuery.h
-    ${PacBioBAM_IncludeDir}/pbbam/GroupQueryBase.h
     ${PacBioBAM_IncludeDir}/pbbam/IndexedFastaReader.h
     ${PacBioBAM_IncludeDir}/pbbam/Interval.h
     ${PacBioBAM_IncludeDir}/pbbam/LocalContextFlags.h
     ${PacBioBAM_IncludeDir}/pbbam/Orientation.h
+    ${PacBioBAM_IncludeDir}/pbbam/PbiBasicTypes.h
     ${PacBioBAM_IncludeDir}/pbbam/PbiBuilder.h
     ${PacBioBAM_IncludeDir}/pbbam/PbiFile.h
+    ${PacBioBAM_IncludeDir}/pbbam/PbiFilter.h
+    ${PacBioBAM_IncludeDir}/pbbam/PbiFilterQuery.h
+    ${PacBioBAM_IncludeDir}/pbbam/PbiFilterTypes.h
     ${PacBioBAM_IncludeDir}/pbbam/PbiIndex.h
+    ${PacBioBAM_IncludeDir}/pbbam/PbiIndexedBamReader.h
+    ${PacBioBAM_IncludeDir}/pbbam/PbiLookupData.h
     ${PacBioBAM_IncludeDir}/pbbam/PbiRawData.h
     ${PacBioBAM_IncludeDir}/pbbam/Position.h
     ${PacBioBAM_IncludeDir}/pbbam/ProgramInfo.h
+    ${PacBioBAM_IncludeDir}/pbbam/QNameQuery.h
     ${PacBioBAM_IncludeDir}/pbbam/QualityValue.h
     ${PacBioBAM_IncludeDir}/pbbam/QualityValues.h
-    ${PacBioBAM_IncludeDir}/pbbam/QueryBase.h
+    ${PacBioBAM_IncludeDir}/pbbam/ReadAccuracyQuery.h
     ${PacBioBAM_IncludeDir}/pbbam/ReadGroupInfo.h
     ${PacBioBAM_IncludeDir}/pbbam/SamTagCodec.h
     ${PacBioBAM_IncludeDir}/pbbam/SequenceInfo.h
     ${PacBioBAM_IncludeDir}/pbbam/Strand.h  
+    ${PacBioBAM_IncludeDir}/pbbam/SubreadLengthQuery.h
     ${PacBioBAM_IncludeDir}/pbbam/Tag.h
     ${PacBioBAM_IncludeDir}/pbbam/TagCollection.h
 #    ${PacBioBAM_IncludeDir}/pbbam/UnmappedReadsQuery.h
     ${PacBioBAM_IncludeDir}/pbbam/ZmwGroupQuery.h
     ${PacBioBAM_IncludeDir}/pbbam/ZmwQuery.h
+    ${PacBioBAM_IncludeDir}/pbbam/ZmwType.h
+    ${PacBioBAM_IncludeDir}/pbbam/ZmwTypeMap.h
 
-    # internal headers
-    ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordSort.h
+    # exception headers
+    ${PacBioBAM_IncludeDir}/pbbam/exception/InvalidSequencingChemistryException.h
+
+    # API-internal headers & inline files
+    ${PacBioBAM_IncludeDir}/pbbam/internal/Accuracy.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/BamHeader.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecord.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordBuilder.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordImpl.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/Cigar.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/CigarOperation.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/Compare.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/CompositeBamReader.inl
     ${PacBioBAM_IncludeDir}/pbbam/internal/DataSet.inl
     ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetBaseTypes.h
     ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetBaseTypes.inl
@@ -57,25 +81,36 @@ set( PacBioBAM_H
     ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.h
     ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.inl
     ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetTypes.inl
-    ${PacBioBAM_IncludeDir}/pbbam/internal/FilterEngine.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/IBamFileIterator.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/IMergeStrategy.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/MergeItem.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/MergeStrategy.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex_p.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex_p.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/Frames.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/GenomicInterval.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/Interval.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiBasicTypes.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiFilter.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiFilterTypes.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiLookupData.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/PbiRawData.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/ProgramInfo.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/QualityValue.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/QualityValues.inl
     ${PacBioBAM_IncludeDir}/pbbam/internal/QueryBase.h
-    ${PacBioBAM_IncludeDir}/pbbam/internal/SequentialMergeStrategy.h
+    ${PacBioBAM_IncludeDir}/pbbam/internal/QueryBase.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/ReadGroupInfo.inl
+    ${PacBioBAM_IncludeDir}/pbbam/internal/SequenceInfo.inl
     ${PacBioBAM_IncludeDir}/pbbam/internal/Tag.inl
 
     # virtual headers
     ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseBamRecord.h
+    ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseCompositeReader.h
     ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseReader.h
     ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegion.h
     ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegionType.h
     ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegionTypeMap.h
+    ${PacBioBAM_IncludeDir}/pbbam/virtual/ZmwWhitelistVirtualReader.h
 
+    # library-internal headers
     ${PacBioBAM_SourceDir}/AssertUtils.h
+    ${PacBioBAM_SourceDir}/ChemistryTable.h
     ${PacBioBAM_SourceDir}/DataSetIO.h
     ${PacBioBAM_SourceDir}/DataSetUtils.h
     ${PacBioBAM_SourceDir}/FileUtils.h
@@ -94,19 +129,23 @@ set( PacBioBAM_H
 # sources
 set( PacBioBAM_CPP
 
-    # main API headers
     ${PacBioBAM_SourceDir}/Accuracy.cpp
     ${PacBioBAM_SourceDir}/AlignmentPrinter.cpp
     ${PacBioBAM_SourceDir}/AssertUtils.cpp
+    ${PacBioBAM_SourceDir}/BaiIndexedBamReader.cpp
     ${PacBioBAM_SourceDir}/BamFile.cpp
     ${PacBioBAM_SourceDir}/BamHeader.cpp
+    ${PacBioBAM_SourceDir}/BamReader.cpp
     ${PacBioBAM_SourceDir}/BamRecord.cpp
     ${PacBioBAM_SourceDir}/BamRecordBuilder.cpp
     ${PacBioBAM_SourceDir}/BamRecordImpl.cpp
     ${PacBioBAM_SourceDir}/BamTagCodec.cpp
     ${PacBioBAM_SourceDir}/BamWriter.cpp
+    ${PacBioBAM_SourceDir}/BarcodeQuery.cpp
+    ${PacBioBAM_SourceDir}/ChemistryTable.cpp
     ${PacBioBAM_SourceDir}/Cigar.cpp
     ${PacBioBAM_SourceDir}/CigarOperation.cpp
+    ${PacBioBAM_SourceDir}/Compare.cpp
     ${PacBioBAM_SourceDir}/Config.cpp
     ${PacBioBAM_SourceDir}/DataSet.cpp
     ${PacBioBAM_SourceDir}/DataSetBaseTypes.cpp
@@ -115,37 +154,43 @@ set( PacBioBAM_CPP
     ${PacBioBAM_SourceDir}/DataSetTypes.cpp
     ${PacBioBAM_SourceDir}/DataSetXsd.cpp
     ${PacBioBAM_SourceDir}/EntireFileQuery.cpp
-    ${PacBioBAM_SourceDir}/FilterEngine.cpp
+    ${PacBioBAM_SourceDir}/FileUtils.cpp
     ${PacBioBAM_SourceDir}/FofnReader.cpp
     ${PacBioBAM_SourceDir}/Frames.cpp
     ${PacBioBAM_SourceDir}/GenomicInterval.cpp
     ${PacBioBAM_SourceDir}/GenomicIntervalQuery.cpp
-    ${PacBioBAM_SourceDir}/GroupQuery.cpp
     ${PacBioBAM_SourceDir}/IndexedFastaReader.cpp
     ${PacBioBAM_SourceDir}/MemoryUtils.cpp
     ${PacBioBAM_SourceDir}/PbiBuilder.cpp
     ${PacBioBAM_SourceDir}/PbiFile.cpp
+    ${PacBioBAM_SourceDir}/PbiFilter.cpp
+    ${PacBioBAM_SourceDir}/PbiFilterQuery.cpp
+    ${PacBioBAM_SourceDir}/PbiFilterTypes.cpp
     ${PacBioBAM_SourceDir}/PbiIndex.cpp
+    ${PacBioBAM_SourceDir}/PbiIndexedBamReader.cpp
     ${PacBioBAM_SourceDir}/PbiIndexIO.cpp
     ${PacBioBAM_SourceDir}/PbiRawData.cpp
     ${PacBioBAM_SourceDir}/ProgramInfo.cpp
+    ${PacBioBAM_SourceDir}/QNameQuery.cpp
     ${PacBioBAM_SourceDir}/QualityValue.cpp
-    ${PacBioBAM_SourceDir}/QueryBase.cpp
+    ${PacBioBAM_SourceDir}/ReadAccuracyQuery.cpp
     ${PacBioBAM_SourceDir}/ReadGroupInfo.cpp
     ${PacBioBAM_SourceDir}/SamTagCodec.cpp
     ${PacBioBAM_SourceDir}/SequenceInfo.cpp
+    ${PacBioBAM_SourceDir}/SubreadLengthQuery.cpp
     ${PacBioBAM_SourceDir}/Tag.cpp
     ${PacBioBAM_SourceDir}/TagCollection.cpp
 #    ${PacBioBAM_SourceDir}/UnmappedReadsQuery.cpp
+    ${PacBioBAM_SourceDir}/VirtualPolymeraseBamRecord.cpp
+    ${PacBioBAM_SourceDir}/VirtualPolymeraseCompositeReader.cpp
+    ${PacBioBAM_SourceDir}/VirtualPolymeraseReader.cpp
+    ${PacBioBAM_SourceDir}/VirtualRegionTypeMap.cpp
     ${PacBioBAM_SourceDir}/XmlReader.cpp
     ${PacBioBAM_SourceDir}/XmlWriter.cpp
     ${PacBioBAM_SourceDir}/ZmwGroupQuery.cpp
     ${PacBioBAM_SourceDir}/ZmwQuery.cpp
-
-    # virtual
-    ${PacBioBAM_SourceDir}/VirtualPolymeraseBamRecord.cpp
-    ${PacBioBAM_SourceDir}/VirtualPolymeraseReader.cpp
-    ${PacBioBAM_SourceDir}/VirtualRegionTypeMap.cpp
+    ${PacBioBAM_SourceDir}/ZmwTypeMap.cpp
+    ${PacBioBAM_SourceDir}/ZmwWhitelistVirtualReader.cpp
 
     # XML I/O
     ${PacBioBAM_SourceDir}/pugixml/pugixml.cpp
diff --git a/src/swig/Accuracy.i b/src/swig/Accuracy.i
index c315115..1e6015c 100644
--- a/src/swig/Accuracy.i
+++ b/src/swig/Accuracy.i
@@ -9,9 +9,9 @@ using namespace PacBio::BAM;
 %}
 
 #ifdef SWIGPYTHON
-%rename(__int__) PacBio::BAM::Accuracy::operator int;
+%rename(__float__) PacBio::BAM::Accuracy::operator float;
 #else // C#, R
-%rename(ToInt) PacBio::BAM::Accuracy::operator int;
+%rename(ToFloat) PacBio::BAM::Accuracy::operator float;
 #endif
 
 %include <pbbam/Accuracy.h>
\ No newline at end of file
diff --git a/src/swig/BamFile.i b/src/swig/BamFile.i
index 5b8a916..4a429e9 100644
--- a/src/swig/BamFile.i
+++ b/src/swig/BamFile.i
@@ -8,14 +8,11 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 %}
 
+#ifdef SWIGR
+%ignore PacBio::BAM::BamFile::BamFile(const BamFile&);
+#endif 
+
 %ignore PacBio::BAM::BamFile::BamFile(BamFile&&);
 %ignore PacBio::BAM::BamFile::operator=;
 
-HANDLE_STD_EXCEPTION(BamFile);
-HANDLE_STD_EXCEPTION(EnsurePacBioIndexExists);
-HANDLE_STD_EXCEPTION(EnsureStandardIndexExists);
-HANDLE_STD_EXCEPTION(ReferenceId);
-HANDLE_STD_EXCEPTION(ReferenceLength);
-HANDLE_STD_EXCEPTION(ReferenceName);
-
-%include <pbbam/BamFile.h>
\ No newline at end of file
+%include <pbbam/BamFile.h>
diff --git a/src/swig/BamHeader.i b/src/swig/BamHeader.i
index 5f7ea8e..3572f04 100644
--- a/src/swig/BamHeader.i
+++ b/src/swig/BamHeader.i
@@ -18,11 +18,4 @@ using namespace PacBio::BAM;
 %template(ReadGroupInfoList) std::vector<PacBio::BAM::ReadGroupInfo>;
 %template(SequenceInfoList)  std::vector<PacBio::BAM::SequenceInfo>;
 
-HANDLE_STD_EXCEPTION(Program);
-HANDLE_STD_EXCEPTION(ReadGroup);
-HANDLE_STD_EXCEPTION(Sequence);
-HANDLE_STD_EXCEPTION(SequenceId);
-HANDLE_STD_EXCEPTION(SequenceLength);
-HANDLE_STD_EXCEPTION(SequenceName);
-
 %include <pbbam/BamHeader.h>
diff --git a/src/swig/BamRecord.i b/src/swig/BamRecord.i
index 16e6036..4b8cee6 100644
--- a/src/swig/BamRecord.i
+++ b/src/swig/BamRecord.i
@@ -23,11 +23,11 @@ using namespace PacBio::BAM;
 // C# gets confused by the const and nonconst overloads
 %ignore PacBio::BAM::BamRecord::Impl() const;
 
-#ifdef SWIGR
+#if defined(SWIGR) || defined(SWIGPYTHON)
 %rename("EncodedPkmean") PacBio::BAM::BamRecord::Pkmean(const std::vector<uint16_t>&);
 %rename("EncodedPkmid")  PacBio::BAM::BamRecord::Pkmid(const std::vector<uint16_t>&);
-#endif // SWIGR
-
-HANDLE_STD_EXCEPTION(CigarData);
+%rename("EncodedPkmean2") PacBio::BAM::BamRecord::Pkmean2(const std::vector<uint16_t>&);
+%rename("EncodedPkmid2")  PacBio::BAM::BamRecord::Pkmid2(const std::vector<uint16_t>&);
+#endif
 
 %include <pbbam/BamRecord.h>
diff --git a/src/swig/BamRecordBuilder.i b/src/swig/BamRecordBuilder.i
index 7e968f9..52e7690 100644
--- a/src/swig/BamRecordBuilder.i
+++ b/src/swig/BamRecordBuilder.i
@@ -8,5 +8,11 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 %}
 
+%ignore PacBio::BAM::BamRecordBuilder::BamRecordBuilder(BamRecordBuilder&&);      // move ctors not used
+%ignore PacBio::BAM::BamRecordBuilder::operator=;
+
+%ignore PacBio::BAM::BamRecordBuilder::Reset(BamRecord&&);
+%ignore PacBio::BAM::BamRecordBuilder::Cigar(PacBio::BAM::Cigar&&);
+%ignore PacBio::BAM::BamRecordBuilder::Tags(TagCollection&&);
 
 %include <pbbam/BamRecordBuilder.h>
diff --git a/src/swig/BamRecordImpl.i b/src/swig/BamRecordImpl.i
index 3899147..2c8a48f 100644
--- a/src/swig/BamRecordImpl.i
+++ b/src/swig/BamRecordImpl.i
@@ -11,6 +11,4 @@ using namespace PacBio::BAM;
 %ignore PacBio::BAM::BamRecordImpl::BamRecordImpl(BamRecordImpl&&); 
 %ignore PacBio::BAM::BamRecordImpl::operator=;
 
-HANDLE_STD_EXCEPTION(CigarData);
-
-%include <pbbam/BamRecordImpl.h>
\ No newline at end of file
+%include <pbbam/BamRecordImpl.h>
diff --git a/src/swig/BamWriter.i b/src/swig/BamWriter.i
index 87e332f..dd23e5b 100644
--- a/src/swig/BamWriter.i
+++ b/src/swig/BamWriter.i
@@ -12,7 +12,4 @@ using namespace PacBio::BAM;
 %ignore PacBio::BAM::BamWriter(BamWriter&&);       // move ctor not used
 %ignore PacBio::BAM::BamWriter::operator=;         // assignment operators not used
 
-HANDLE_STD_EXCEPTION(BamWriter);
-HANDLE_STD_EXCEPTION(Write);
-
-%include <pbbam/BamWriter.h>
\ No newline at end of file
+%include <pbbam/BamWriter.h>
diff --git a/src/swig/CigarOperation.i b/src/swig/CigarOperation.i
index b2a9586..0a23a17 100644
--- a/src/swig/CigarOperation.i
+++ b/src/swig/CigarOperation.i
@@ -15,8 +15,6 @@ using namespace PacBio::BAM;
 %ignore PacBio::BAM::CigarOperation::CigarOperation(CigarOperationType, uint32_t);
 #endif
 
-HANDLE_STD_EXCEPTION(CigarOperation);
-
 %include <pbbam/CigarOperation.h>
 
 // enums aren't always named consistently (at least between Mac/clang/swig & Linux/gcc/swig)
diff --git a/src/swig/DataSet.i b/src/swig/DataSet.i
index 8ba22c4..f8cba2b 100644
--- a/src/swig/DataSet.i
+++ b/src/swig/DataSet.i
@@ -14,4 +14,36 @@ using namespace PacBio::BAM;
 // assignment operators not used
 %ignore PacBio::BAM::DataSet::operator=;                 
 
-%include <pbbam/DataSet.h>
\ No newline at end of file
+#ifdef SWIGCSHARP
+
+// ignore non-const accessors
+%ignore PacBio::BAM::DataSet::Attribute(const std::string&);
+%ignore PacBio::BAM::DataSet::CreatedAt();
+%ignore PacBio::BAM::DataSet::Extensions();
+%ignore PacBio::BAM::DataSet::ExternalResources();
+%ignore PacBio::BAM::DataSet::Filters();
+%ignore PacBio::BAM::DataSet::Format();
+%ignore PacBio::BAM::DataSet::Metadata();
+%ignore PacBio::BAM::DataSet::MetaType();
+%ignore PacBio::BAM::DataSet::ModifiedAt();
+%ignore PacBio::BAM::DataSet::Name();
+%ignore PacBio::BAM::DataSet::Namespaces();
+%ignore PacBio::BAM::DataSet::ResourceId();
+%ignore PacBio::BAM::DataSet::SubDataSets();
+%ignore PacBio::BAM::DataSet::Tags();
+%ignore PacBio::BAM::DataSet::TimeStampedName();
+%ignore PacBio::BAM::DataSet::UniqueId();
+%ignore PacBio::BAM::DataSet::Version();
+
+// disable operator(s)
+%ignore PacBio::BAM::DataSet::operator+=;
+
+#endif // C#
+
+#ifdef SWIGR
+%ignore PacBio::BAM::DataSet::DataSet(const DataSet::TypeEnum type);
+/*%ignore PacBio::BAM::DataSet::DataSet(const BamFile& bamFile);*/
+#endif // R
+
+
+%include <pbbam/DataSet.h>
diff --git a/src/swig/DataSetTypes.i b/src/swig/DataSetTypes.i
index 2aeb0ff..5644d3f 100644
--- a/src/swig/DataSetTypes.i
+++ b/src/swig/DataSetTypes.i
@@ -18,6 +18,65 @@ using namespace PacBio::BAM::internal;
 %ignore PacBio::BAM::internal::DataSetElement::operator[];
 /*%rename(__getitem__) PacBio::BAM::internal::DataSetElement::operator[];*/
 
+%ignore PacBio::BAM::internal::XmlName::XmlName(XmlName&&);
+%ignore PacBio::BAM::internal::XmlName::operator=;
+
+#ifdef SWIGCSHARP
+
+// ignore non-const accessors
+%ignore PacBio::BAM::DataSetBase::ExternalResources();
+%ignore PacBio::BAM::DataSetBase::Filters();
+%ignore PacBio::BAM::DataSetBase::Metadata();
+%ignore PacBio::BAM::DataSetBase::Namespaces();
+%ignore PacBio::BAM::DataSetBase::SubDataSets();
+%ignore PacBio::BAM::DataSetMetadata::NumRecords();
+%ignore PacBio::BAM::DataSetMetadata::Provenance();
+%ignore PacBio::BAM::DataSetMetadata::TotalLength();
+%ignore PacBio::BAM::ExternalResource::ExternalResources();
+%ignore PacBio::BAM::Filter::Properties();
+%ignore PacBio::BAM::Property::Name();
+%ignore PacBio::BAM::Property::Operator();
+%ignore PacBio::BAM::Property::Value();
+%ignore PacBio::BAM::Provenance::CreatedBy();
+%ignore PacBio::BAM::Provenance::CommonServicesInstanceId();
+%ignore PacBio::BAM::Provenance::CreatorUserId();
+%ignore PacBio::BAM::Provenance::ParentJobId();
+%ignore PacBio::BAM::Provenance::ParentTool();
+%ignore PacBio::BAM::internal::BaseEntityType::Description();
+%ignore PacBio::BAM::internal::BaseEntityType::Extensions();
+%ignore PacBio::BAM::internal::BaseEntityType::Format();
+%ignore PacBio::BAM::internal::BaseEntityType::ModifiedAt();
+%ignore PacBio::BAM::internal::BaseEntityType::Name();
+%ignore PacBio::BAM::internal::BaseEntityType::ResourceId();
+%ignore PacBio::BAM::internal::BaseEntityType::Tags();
+%ignore PacBio::BAM::internal::BaseEntityType::Version();
+%ignore PacBio::BAM::internal::DataEntityType::Checksum();
+%ignore PacBio::BAM::internal::DataEntityType::EncodedValue();
+%ignore PacBio::BAM::internal::DataEntityType::MetaType();
+%ignore PacBio::BAM::internal::DataEntityType::SimpleValue();
+%ignore PacBio::BAM::internal::DataEntityType::TimeStampedName();
+%ignore PacBio::BAM::internal::DataEntityType::UniqueId();
+%ignore PacBio::BAM::internal::DataEntityType::ValueDataType();
+%ignore PacBio::BAM::internal::DataSetElement::Attribute(const std::string&);
+%ignore PacBio::BAM::internal::DataSetElement::Attributes();
+%ignore PacBio::BAM::internal::DataSetElement::Children();
+%ignore PacBio::BAM::internal::DataSetElement::ChildText(const std::string&);
+%ignore PacBio::BAM::internal::DataSetElement::CreatedAt();
+%ignore PacBio::BAM::internal::DataSetElement::Text();
+%ignore PacBio::BAM::internal::IndexedDataType::FileIndices();
+%ignore PacBio::BAM::internal::StrictEntityType::MetaType();
+%ignore PacBio::BAM::internal::StrictEntityType::TimeStampedName();
+%ignore PacBio::BAM::internal::StrictEntityType::UniqueId();
+
+// disable operator(s)
+%ignore PacBio::BAM::DataSetMetadata::operator+=;
+%ignore PacBio::BAM::ExternalResources::operator+=;
+%ignore PacBio::BAM::Filters::operator+=;
+%ignore PacBio::BAM::DataSetBase::operator+=;
+%ignore PacBio::BAM::SubDataSets::operator+=;
+
+#endif // C#
+
 %include <pbbam/internal/DataSetElement.h>
 
 %ignore PacBio::BAM::internal::DataSetElementList::operator[];
diff --git a/src/swig/EntireFileQuery.i b/src/swig/EntireFileQuery.i
index a0571e8..c7c0b06 100644
--- a/src/swig/EntireFileQuery.i
+++ b/src/swig/EntireFileQuery.i
@@ -10,8 +10,6 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 %}
 
-HANDLE_STD_EXCEPTION(EntireFileQuery);
-
 %include <pbbam/DataSet.h>
 %include <pbbam/internal/QueryBase.h>
-%include <pbbam/EntireFileQuery.h>
\ No newline at end of file
+%include <pbbam/EntireFileQuery.h>
diff --git a/src/swig/GenomicInterval.i b/src/swig/GenomicInterval.i
index 626c8ac..199a3c3 100644
--- a/src/swig/GenomicInterval.i
+++ b/src/swig/GenomicInterval.i
@@ -8,4 +8,6 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 %}
 
-%include <pbbam/GenomicInterval.h>
\ No newline at end of file
+%ignore PacBio::BAM::GenomicInterval::operator=;
+
+%include <pbbam/GenomicInterval.h>
diff --git a/src/swig/GenomicIntervalQuery.i b/src/swig/GenomicIntervalQuery.i
index 0ed7886..d3f9fa7 100644
--- a/src/swig/GenomicIntervalQuery.i
+++ b/src/swig/GenomicIntervalQuery.i
@@ -8,8 +8,4 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 %}
 
-HANDLE_STD_EXCEPTION(CreateIterator);
-HANDLE_STD_EXCEPTION(GenomicIntervalQuery);
-HANDLE_STD_EXCEPTION(Interval);
-
-%include <pbbam/GenomicIntervalQuery.h>
\ No newline at end of file
+%include <pbbam/GenomicIntervalQuery.h>
diff --git a/src/swig/LocalContextFlags.i b/src/swig/LocalContextFlags.i
index b47a4d4..66ee990 100644
--- a/src/swig/LocalContextFlags.i
+++ b/src/swig/LocalContextFlags.i
@@ -8,4 +8,8 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 %}
 
+#ifdef SWIGCSHARP
+%ignore operator|(const LocalContextFlags, const LocalContextFlags);
+#endif
+
 %include <pbbam/LocalContextFlags.h>
diff --git a/src/swig/PacBioBam.i b/src/swig/PacBioBam.i
index ed8c746..668f06e 100644
--- a/src/swig/PacBioBam.i
+++ b/src/swig/PacBioBam.i
@@ -33,7 +33,6 @@ endif*/
 
 /********* SWIG includes ************/
 
-%include "exception.i"
 %include "stdint.i"
 %include "std_common.i"
 
@@ -56,35 +55,15 @@ endif*/
 %template(ShortList)  std::vector<short>;
 %template(CharList)   std::vector<char>;
 
-// basic exception-handler helper
-//
-// -- STL builtins --
-// std::invalid_argument -> ValueError
-// std::domain_error     -> ValueError
-// std::overflow_error   -> OverflowError
-// std::out_of_range     -> IndexError
-// std::length_error     -> IndexError
-// std::runtime_error    -> RuntimeError
-// std::exception        -> SystemError
-//
-// (anything else)       -> UnknownError
-//
-// * All pbbam exceptions are simply std::exception (SystemErro) for now,
-//   until (if?) we flesh out a more detailed exception hierarchy.
-//   Either way, new ones will inherit from std::exception, so SystemError
-//   should still remain a valid handler.
-//
-%define HANDLE_STD_EXCEPTION(MethodName)
-%exception MethodName {
+// exception handling
+%include "exception.i"
+%exception {
     try {
-                $action
-        }
-    SWIG_CATCH_STDEXCEPT // catch std::exception
-    catch (...) {
-                SWIG_exception(SWIG_UnknownError, "Unknown exception");
-        }
+        $action
+    } catch (const std::exception& e) {
+        SWIG_exception(SWIG_RuntimeError, e.what());
+    }
 }
-%enddef
 
 /********* PacBioBAM includes ************/
 
@@ -150,8 +129,6 @@ endif*/
 
 // Query/iterator API
 %include "QueryBase.i"
-%include "GroupQueryBase.i"
-%include "GroupQuery.i"
 %include "EntireFileQuery.i"
 %include "GenomicIntervalQuery.i"
 %include "ZmwQuery.i"
@@ -164,3 +141,9 @@ endif*/
 
 // FASTA
 %include "IndexedFastaReader.i"
+
+// VirtualPolymeraseBamRecord 
+%include "VirtualRegion.i"
+%include "VirtualPolymeraseBamRecord.i"
+%include "VirtualPolymeraseReader.i"
+%include "ZmwWhitelistVirtualReader.i"
diff --git a/src/swig/PbiRawData.i b/src/swig/PbiRawData.i
index 4992cc4..3db9ece 100644
--- a/src/swig/PbiRawData.i
+++ b/src/swig/PbiRawData.i
@@ -13,7 +13,7 @@ using namespace PacBio::BAM;
 %ignore PacBio::BAM::PbiRawMappedData::PbiRawMappedData(PbiRawMappedData&&);
 %ignore PacBio::BAM::PbiReferenceEntry::PbiReferenceEntry(PbiReferenceEntry&&);
 %ignore PacBio::BAM::PbiRawReferenceData::PbiRawReferenceData(PbiRawReferenceData&&);
-%ignore PacBio::BAM::PbiRawSubreadData::PbiRawSubreadData(PbiRawSubreadData&&);
+%ignore PacBio::BAM::PbiRawBasicData::PbiRawBasicData(PbiRawBasicData&&);
 %ignore PacBio::BAM::PbiRawData::PbiRawData(PbiRawData&&); 
 
 // assignment operators not used
@@ -21,9 +21,15 @@ using namespace PacBio::BAM;
 %ignore PacBio::BAM::PbiRawMappedData::operator=;
 %ignore PacBio::BAM::PbiReferenceEntry::operator=;
 %ignore PacBio::BAM::PbiRawReferenceData::operator=;
-%ignore PacBio::BAM::PbiRawSubreadData::operator=;
+%ignore PacBio::BAM::PbiRawBasicData::operator=;
 %ignore PacBio::BAM::PbiRawData::operator=;
 
-HANDLE_STD_EXCEPTION(PacBio::BAM::PbiRawMappedData::AddRecord);
+#ifdef SWIGCSHARP
+// ignore non-const accessors
+%ignore PacBio::BAM::PbiRawData::BarcodeData();
+%ignore PacBio::BAM::PbiRawData::MappedData();
+%ignore PacBio::BAM::PbiRawData::ReferenceData();
+%ignore PacBio::BAM::PbiRawData::BasicData();
+#endif // C#
 
 %include <pbbam/PbiRawData.h>
diff --git a/src/swig/Tag.i b/src/swig/Tag.i
index 838454c..832c856 100644
--- a/src/swig/Tag.i
+++ b/src/swig/Tag.i
@@ -11,23 +11,7 @@ using namespace PacBio::BAM;
 %ignore PacBio::BAM::Tag::Tag(Tag&&);
 %ignore PacBio::BAM::Tag::operator=;
 
-HANDLE_STD_EXCEPTION(ToInt8);
-HANDLE_STD_EXCEPTION(ToUInt8);
-HANDLE_STD_EXCEPTION(ToInt16);
-HANDLE_STD_EXCEPTION(ToUInt16);
-HANDLE_STD_EXCEPTION(ToInt32);
-HANDLE_STD_EXCEPTION(ToUInt32);
-HANDLE_STD_EXCEPTION(ToFloat);
-HANDLE_STD_EXCEPTION(ToString);
-HANDLE_STD_EXCEPTION(ToInt8Array);
-HANDLE_STD_EXCEPTION(ToUInt8Array);
-HANDLE_STD_EXCEPTION(ToInt16Array);
-HANDLE_STD_EXCEPTION(ToUInt16Array);
-HANDLE_STD_EXCEPTION(ToInt32Array);
-HANDLE_STD_EXCEPTION(ToUInt32Array);
-HANDLE_STD_EXCEPTION(ToFloatArray);
-
-#ifdef SWIGR
+#if defined(SWIGR) || defined(SWIGPYTHON)
 
 %ignore PacBio::BAM::Tag::Tag(int8_t value);
 %ignore PacBio::BAM::Tag::Tag(uint8_t value);
@@ -47,84 +31,84 @@ HANDLE_STD_EXCEPTION(ToFloatArray);
 
 %extend PacBio::BAM::Tag {
 	
-	PacBio::BAM::Tag FromInt8(int x)   { return PacBio::BAM::Tag(static_cast<int8_t>(x));   }
-	PacBio::BAM::Tag FromUInt8(int x)  { return PacBio::BAM::Tag(static_cast<uint8_t>(x));  }
-	PacBio::BAM::Tag FromInt16(int x)  { return PacBio::BAM::Tag(static_cast<int16_t>(x));  }
-	PacBio::BAM::Tag FromUInt16(int x) { return PacBio::BAM::Tag(static_cast<uint16_t>(x)); }
-	PacBio::BAM::Tag FromInt32(int x)  { return PacBio::BAM::Tag(static_cast<int32_t>(x));  }
-	PacBio::BAM::Tag FromUInt32(int x) { return PacBio::BAM::Tag(static_cast<uint32_t>(x)); }
-	PacBio::BAM::Tag FromFloat(int x)  { return PacBio::BAM::Tag(static_cast<float>(x));    }
-	
-	PacBio::BAM::Tag FromInt8Array(const std::vector<int>& v)
-	{
-		std::vector<int8_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<int8_t>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
-	
-	PacBio::BAM::Tag FromUInt8Array(const std::vector<int>& v)
-	{
-		std::vector<uint8_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<uint8_t>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
-	
-	PacBio::BAM::Tag FromInt16Array(const std::vector<int>& v)
-	{
-		std::vector<int16_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<int16_t>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
-	
-	PacBio::BAM::Tag FromUInt16Array(const std::vector<int>& v)
-	{
-		std::vector<int16_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<uint16_t>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
-	
-	PacBio::BAM::Tag FromInt32Array(const std::vector<int>& v)
-	{
-		std::vector<int16_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<int32_t>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
-	
-	PacBio::BAM::Tag FromUInt32Array(const std::vector<int>& v)
-	{
-		std::vector<int16_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<uint32_t>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
-	
-	PacBio::BAM::Tag FromFloatArray(const std::vector<int>& v)
-	{
-		std::vector<int16_t> result;
-		const size_t numElements = v.size();
-		result.reserve(numElements);
-		for (size_t i = 0; i < numElements; ++i) 
-			result.push_back(static_cast<float>(v.at(i)));
-		return PacBio::BAM::Tag(result); 
-	}
+    PacBio::BAM::Tag FromInt8(int x)   { return PacBio::BAM::Tag(static_cast<int8_t>(x));   }
+    PacBio::BAM::Tag FromUInt8(int x)  { return PacBio::BAM::Tag(static_cast<uint8_t>(x));  }
+    PacBio::BAM::Tag FromInt16(int x)  { return PacBio::BAM::Tag(static_cast<int16_t>(x));  }
+    PacBio::BAM::Tag FromUInt16(int x) { return PacBio::BAM::Tag(static_cast<uint16_t>(x)); }
+    PacBio::BAM::Tag FromInt32(int x)  { return PacBio::BAM::Tag(static_cast<int32_t>(x));  }
+    PacBio::BAM::Tag FromUInt32(int x) { return PacBio::BAM::Tag(static_cast<uint32_t>(x)); }
+    PacBio::BAM::Tag FromFloat(int x)  { return PacBio::BAM::Tag(static_cast<float>(x));    }
+
+    PacBio::BAM::Tag FromInt8Array(const std::vector<int>& v)
+    {
+        std::vector<int8_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<int8_t>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
+
+    PacBio::BAM::Tag FromUInt8Array(const std::vector<int>& v)
+    {
+        std::vector<uint8_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<uint8_t>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
+
+    PacBio::BAM::Tag FromInt16Array(const std::vector<int>& v)
+    {
+        std::vector<int16_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<int16_t>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
+
+    PacBio::BAM::Tag FromUInt16Array(const std::vector<int>& v)
+    {
+        std::vector<int16_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<uint16_t>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
+
+    PacBio::BAM::Tag FromInt32Array(const std::vector<int>& v)
+    {
+        std::vector<int16_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<int32_t>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
+
+    PacBio::BAM::Tag FromUInt32Array(const std::vector<int>& v)
+    {
+        std::vector<int16_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<uint32_t>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
+
+    PacBio::BAM::Tag FromFloatArray(const std::vector<int>& v)
+    {
+        std::vector<int16_t> result;
+        const size_t numElements = v.size();
+        result.reserve(numElements);
+        for (size_t i = 0; i < numElements; ++i)
+            result.push_back(static_cast<float>(v.at(i)));
+        return PacBio::BAM::Tag(result);
+    }
 }
 #endif // SWIGR
 
-%include <pbbam/Tag.h>
\ No newline at end of file
+%include <pbbam/Tag.h>
diff --git a/src/swig/VirtualPolymeraseBamRecord.i b/src/swig/VirtualPolymeraseBamRecord.i
new file mode 100644
index 0000000..1a2a9c5
--- /dev/null
+++ b/src/swig/VirtualPolymeraseBamRecord.i
@@ -0,0 +1,24 @@
+/* VirtualPolymeraseBamRecord.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+/*%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&);*/
+%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(VirtualPolymeraseBamRecord&&);
+%ignore PacBio::BAM::VirtualPolymeraseBamRecord::operator=;
+
+// disabled - can't get it to work right (at least in Python)
+// but the same info is available (& correct) from record.VirtualRegionsTable(regionType)
+%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualRegionsMap;
+
+%template(VirtualRegionList) std::vector<PacBio::BAM::VirtualRegion>;
+%template(VirtualRegionsMap) std::map<PacBio::BAM::VirtualRegionType, std::vector<PacBio::BAM::VirtualRegion> >;
+
+%include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
\ No newline at end of file
diff --git a/src/swig/VirtualPolymeraseReader.i b/src/swig/VirtualPolymeraseReader.i
new file mode 100644
index 0000000..7ab62fe
--- /dev/null
+++ b/src/swig/VirtualPolymeraseReader.i
@@ -0,0 +1,11 @@
+/* VirtualPolymeraseReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/virtual/VirtualPolymeraseReader.h>
\ No newline at end of file
diff --git a/src/swig/VirtualRegion.i b/src/swig/VirtualRegion.i
new file mode 100644
index 0000000..2436de2
--- /dev/null
+++ b/src/swig/VirtualRegion.i
@@ -0,0 +1,18 @@
+/* VirtualRegion.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <map>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::VirtualRegion::VirtualRegion(VirtualRegion&&);
+%ignore PacBio::BAM::VirtualRegion::operator=;
+
+%include <pbbam/virtual/VirtualRegionType.h>
+%include <pbbam/virtual/VirtualRegion.h>
diff --git a/src/swig/ZmwWhitelistVirtualReader.i b/src/swig/ZmwWhitelistVirtualReader.i
new file mode 100644
index 0000000..5647ccf
--- /dev/null
+++ b/src/swig/ZmwWhitelistVirtualReader.i
@@ -0,0 +1,11 @@
+/* ZmwWhitelistVirtualReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 38dfe39..8b603a6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,15 +1,10 @@
 find_package(Threads REQUIRED)
 
-# find samtools binary for comparing against 'gold standard' files
-#set(Samtools_Dir "/Users/derek/development/samtools")
-set(Samtools_Dir "${PacBioBAM_RootDir}/../../../../prebuilt.out/samtools/samtools-0.1.19/ubuntu-1404/bin")
-find_program(Samtools_Bin samtools HINTS ${Samtools_Dir})
-
 # ensure tests directory exists
 file(MAKE_DIRECTORY ${PacBioBAM_TestsDir}/bin)
 file(MAKE_DIRECTORY ${PacBioBAM_TestsDir}/data/temp)
 
-# generate paths/values used by for test
+# generate paths/values used by for unit tests
 configure_file(
     ${PacBioBAM_TestsDir}/src/TestData.h.in
     ${PacBioBAM_TestsDir}/src/TestData.h
@@ -26,7 +21,7 @@ include_directories(
     ${gtest_SOURCE_DIR}
 )
 
-# grab PacBioBAM test source files
+# grab PacBioBAM unit test source files
 include(files.cmake)
 set(SOURCES
     ${PacBioBAMTest_H}
@@ -34,7 +29,7 @@ set(SOURCES
 )
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
 
-# define test executable
+# define unit test executable
 add_definitions(-DPBBAM_TESTING)
 if(MSVC)
     # VS2012+ pooh-pooh's Derek's "#define private public" trick
@@ -42,7 +37,7 @@ if(MSVC)
 endif()
 
 if(PacBioBAM_wrap_r)
-    # SWIG R does not support PBBAM_SHARED_PTR, but it does support boost::shared_ptr
+    # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
     # So force boost if we're wrapping for R.
     add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
 endif()
@@ -51,7 +46,7 @@ set_target_properties(test_pbbam PROPERTIES
     RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_TestsDir}/bin
 )
 
-# set up unit test to run our executable
+# add unit tests to test framework
 add_test(
     NAME UnitTests
     WORKING_DIRECTORY ${PacBioBAM_TestsDir}/bin
diff --git a/tests/data/chunking/chunking.subreadset.xml b/tests/data/chunking/chunking.subreadset.xml
new file mode 100644
index 0000000..6d15ff1
--- /dev/null
+++ b/tests/data/chunking/chunking.subreadset.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" 
+    UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" 
+    TimeStampedName="subreadset_150304_231155" 
+    MetaType="PacBio.DataSet.SubreadSet" 
+    Name="DataSet_SubreadSet" 
+    Tags="" 
+    Version="3.0.0" 
+    CreatedAt="2015-01-27T09:00:01"> 
+<pbbase:ExternalResources>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource><pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+    <pbds:Filter>
+        <pbbase:Properties>
+            <pbbase:Property Name="movie" Operator="=" Value="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"/>
+            <pbbase:Property Name="zm" Operator="lt" Value="1816"/>
+        </pbbase:Properties>
+    </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
diff --git a/tests/data/chunking/chunking_emptyfilters.subreadset.xml b/tests/data/chunking/chunking_emptyfilters.subreadset.xml
new file mode 100644
index 0000000..917872b
--- /dev/null
+++ b/tests/data/chunking/chunking_emptyfilters.subreadset.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" 
+    UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" 
+    TimeStampedName="subreadset_150304_231155" 
+    MetaType="PacBio.DataSet.SubreadSet" 
+    Name="DataSet_SubreadSet" 
+    Tags="" 
+    Version="3.0.0" 
+    CreatedAt="2015-01-27T09:00:01"> 
+<pbbase:ExternalResources>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource><pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+</pbds:Filters>
+</pbds:SubreadSet>
diff --git a/tests/data/chunking/chunking_missingfilters.subreadset.xml b/tests/data/chunking/chunking_missingfilters.subreadset.xml
new file mode 100644
index 0000000..b91708e
--- /dev/null
+++ b/tests/data/chunking/chunking_missingfilters.subreadset.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" 
+    UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" 
+    TimeStampedName="subreadset_150304_231155" 
+    MetaType="PacBio.DataSet.SubreadSet" 
+    Name="DataSet_SubreadSet" 
+    Tags="" 
+    Version="3.0.0" 
+    CreatedAt="2015-01-27T09:00:01"> 
+<pbbase:ExternalResources>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource><pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+        <pbbase:FileIndices>
+            <pbbase:FileIndex 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196" 
+                TimeStampedName="bam_index_150304_231155" 
+                MetaType="PacBio.Index.PacBioIndex" 
+                ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+        </pbbase:FileIndices>
+    </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+</pbds:SubreadSet>
+
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam
new file mode 100644
index 0000000..c4ec7ea
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi
new file mode 100644
index 0000000..4af87e2
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam
new file mode 100644
index 0000000..e623aca
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi
new file mode 100644
index 0000000..6479979
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam
new file mode 100644
index 0000000..8544f6a
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi
new file mode 100644
index 0000000..a9f4edb
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi differ
diff --git a/tests/data/dataset/ali1.xml b/tests/data/dataset/ali1.xml
index 015068e..ab0a82a 100644
--- a/tests/data/dataset/ali1.xml
+++ b/tests/data/dataset/ali1.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0"  xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments0.bam" Tags="Example">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/ali2.xml b/tests/data/dataset/ali2.xml
index f71e2d2..c35f9ec 100644
--- a/tests/data/dataset/ali2.xml
+++ b/tests/data/dataset/ali2.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments2.bam" Tags="Example">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/ali3.xml b/tests/data/dataset/ali3.xml
index d0dc0d6..f58d25f 100644
--- a/tests/data/dataset/ali3.xml
+++ b/tests/data/dataset/ali3.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01"  MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments2.bam" Tags="Example">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/ali4.xml b/tests/data/dataset/ali4.xml
index 015068e..ab0a82a 100644
--- a/tests/data/dataset/ali4.xml
+++ b/tests/data/dataset/ali4.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0"  xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments0.bam" Tags="Example">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/bam_mapping.bam b/tests/data/dataset/bam_mapping.bam
index 00637b9..2d4ae7b 100644
Binary files a/tests/data/dataset/bam_mapping.bam and b/tests/data/dataset/bam_mapping.bam differ
diff --git a/tests/data/dataset/bam_mapping.bam.pbi b/tests/data/dataset/bam_mapping.bam.pbi
index 8ad2fd5..fe7c3be 100644
Binary files a/tests/data/dataset/bam_mapping.bam.pbi and b/tests/data/dataset/bam_mapping.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_1.bam b/tests/data/dataset/bam_mapping_1.bam
index 9644940..1e9670e 100644
Binary files a/tests/data/dataset/bam_mapping_1.bam and b/tests/data/dataset/bam_mapping_1.bam differ
diff --git a/tests/data/dataset/bam_mapping_1.bam.pbi b/tests/data/dataset/bam_mapping_1.bam.pbi
index 11e85a0..d99a174 100644
Binary files a/tests/data/dataset/bam_mapping_1.bam.pbi and b/tests/data/dataset/bam_mapping_1.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_2.bam b/tests/data/dataset/bam_mapping_2.bam
index 419701d..09678ea 100644
Binary files a/tests/data/dataset/bam_mapping_2.bam and b/tests/data/dataset/bam_mapping_2.bam differ
diff --git a/tests/data/dataset/bam_mapping_2.bam.pbi b/tests/data/dataset/bam_mapping_2.bam.pbi
index 9c46e0e..d1765ef 100644
Binary files a/tests/data/dataset/bam_mapping_2.bam.pbi and b/tests/data/dataset/bam_mapping_2.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_new.bam b/tests/data/dataset/bam_mapping_new.bam
new file mode 100644
index 0000000..3039331
Binary files /dev/null and b/tests/data/dataset/bam_mapping_new.bam differ
diff --git a/tests/data/dataset/bam_mapping_new.bam.pbi b/tests/data/dataset/bam_mapping_new.bam.pbi
new file mode 100644
index 0000000..82d497c
Binary files /dev/null and b/tests/data/dataset/bam_mapping_new.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_staggered.xml b/tests/data/dataset/bam_mapping_staggered.xml
index 51a8b71..879c193 100644
--- a/tests/data/dataset/bam_mapping_staggered.xml
+++ b/tests/data/dataset/bam_mapping_staggered.xml
@@ -1,5 +1,5 @@
 <?xml version='1.0' encoding='UTF-8'?>
-<pbds:DataSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CreatedAt="2015-05-13T10:58:26" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="30f72098-bc5b-e06b-566c-8b28dda909a8" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:DataSet CreatedAt="2015-05-13T10:58:26" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="30f72098-bc5b-e06b-566c-8b28dda909a8" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
     <pbbase:ExternalResources>
         <pbbase:ExternalResource ResourceId="file:tests/data/bam_mapping_1.bam">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/barcode.dataset.xml b/tests/data/dataset/barcode.dataset.xml
index 3613e20..1fbbb18 100644
--- a/tests/data/dataset/barcode.dataset.xml
+++ b/tests/data/dataset/barcode.dataset.xml
@@ -1,11 +1,11 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:BarcodeSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.BarcodeSet" Name="DataSet_BarcodeSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:BarcodeSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.BarcodeSet" Name="DataSet_BarcodeSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First Barcodes FASTA" Description="Points to an example Barcodes FASTA file." MetaType="BarcodeFile.BarcodeFastaFile" ResourceId="file:///mnt/path/to/barcode.fasta" Tags="Example"/>
 	</pbbase:ExternalResources>
 	<pbds:DataSetMetadata>
 		<pbds:TotalLength>400</pbds:TotalLength>
 		<pbds:NumRecords>30</pbds:NumRecords>
-		<pbsec:BarcodeConstruction>paired</pbsec:BarcodeConstruction>
+		<pbds:BarcodeConstruction>paired</pbds:BarcodeConstruction>
 	</pbds:DataSetMetadata>
 </pbds:BarcodeSet>
diff --git a/tests/data/dataset/ccsread.dataset.xml b/tests/data/dataset/ccsread.dataset.xml
index 9baafab..97b5943 100644
--- a/tests/data/dataset/ccsread.dataset.xml
+++ b/tests/data/dataset/ccsread.dataset.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:ConsensusReadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.ConsensusReadSet" Name="DataSet_ConsensusReadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:ConsensusReadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ConsensusReadSet" Name="DataSet_ConsensusReadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First ConsensusRead BAM" Description="Points to an example ConsensusRead BAM file." MetaType="PacBio.ConsensusReadFile.ConsensusReadBamFile" ResourceId="file:///mnt/path/to/ccsreads0.bam" Tags="Example">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/contig.dataset.xml b/tests/data/dataset/contig.dataset.xml
index 77d7c4e..11a9b12 100644
--- a/tests/data/dataset/contig.dataset.xml
+++ b/tests/data/dataset/contig.dataset.xml
@@ -1,18 +1,18 @@
-<?xml version="1.0" encoding="utf-8"?>
-<pbds:ContigSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.ContigSet" Name="DataSet_ContigSet" Tags="AHAcontigs" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
-	<pbbase:ExternalResources>
-		<pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:ContigSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ContigSet" Name="DataSet_ContigSet" Tags="AHAcontigs" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBiosets.xsd">
+	<pbbase:ExternalResources>
+		<pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
 			<pbbase:FileIndices>
-				<pbbase:FileIndex MetaType="PacBio.Index.SaWriterIndex" ResourceId="file:///mnt/path/to/reference.fasta.sa"/>
-				<pbbase:FileIndex MetaType="PacBio.Index.SamIndex" ResourceId="file:///mnt/path/to/reference.fasta.fai"/>
+				<pbbase:FileIndex MetaType="PacBio.Index.SaWriterIndex" ResourceId="file:///mnt/path/to/reference.fasta.sa"/>
+				<pbbase:FileIndex MetaType="PacBio.Index.SamIndex" ResourceId="file:///mnt/path/to/reference.fasta.fai"/>
 			</pbbase:FileIndices>
-		</pbbase:ExternalResource>
-	</pbbase:ExternalResources>
-	<pbds:DataSetMetadata>
-		<pbds:TotalLength>5000000</pbds:TotalLength>
-		<pbds:NumRecords>500</pbds:NumRecords>
-		<pbsec:Contigs>
-			<pbsec:Contig Name="gi|229359445|emb|AM181176.4|" Description="Pseudomonas fluorescens SBW25 complete genome|quiver" Length="6722109" Digest="f627c795efad7ce0050ed42b942d408e"/>
-		</pbsec:Contigs>
-	</pbds:DataSetMetadata>
+		</pbbase:ExternalResource>
+	</pbbase:ExternalResources>
+	<pbds:DataSetMetadata>
+		<pbds:TotalLength>5000000</pbds:TotalLength>
+		<pbds:NumRecords>500</pbds:NumRecords>
+		<pbds:Contigs>
+			<pbds:Contig Name="gi|229359445|emb|AM181176.4|" Description="Pseudomonas fluorescens SBW25 complete genome|quiver" Length="6722109" Digest="f627c795efad7ce0050ed42b942d408e"/>
+		</pbds:Contigs>
+	</pbds:DataSetMetadata>
 </pbds:ContigSet>
diff --git a/tests/data/dataset/hdfsubread_dataset.xml b/tests/data/dataset/hdfsubread_dataset.xml
index 8a0e0e2..29fdf31 100644
--- a/tests/data/dataset/hdfsubread_dataset.xml
+++ b/tests/data/dataset/hdfsubread_dataset.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:HdfSubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.HdfSubreadSet" Name="DataSet_HdfSubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:HdfSubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.HdfSubreadSet" Name="DataSet_HdfSubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"  [...]
 	<pbbase:ExternalResourcess>
         <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads0.bam" Tags="Example">
             <pbbase:FileIndices>
diff --git a/tests/data/dataset/lambda_contigs.xml b/tests/data/dataset/lambda_contigs.xml
index e2bfb18..4abc8cc 100644
--- a/tests/data/dataset/lambda_contigs.xml
+++ b/tests/data/dataset/lambda_contigs.xml
@@ -1,2 +1,6 @@
 <?xml version='1.0' encoding='UTF-8'?>
-<pbds:ReferenceSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CreatedAt="2015-05-28T10:56:36" MetaType="PacBio.DataSet.ReferenceSet" Name="" Tags="" UniqueId="596e87db-34f9-d2fd-c905-b017543170e1" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"><pbbase:ExternalResources><pbbase:ExternalResource ResourceId="file:tests/data/lambda_contigs.fasta" /></pbbase:ExternalResources></pbds:Re [...]
\ No newline at end of file
+<pbds:ReferenceSet CreatedAt="2015-05-28T10:56:36" MetaType="PacBio.DataSet.ReferenceSet" Name="" Tags="" UniqueId="596e87db-34f9-d2fd-c905-b017543170e1" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+    <pbbase:ExternalResources>
+        <pbbase:ExternalResource ResourceId="file:tests/data/lambda_contigs.fasta"/>
+    </pbbase:ExternalResources>
+</pbds:ReferenceSet>
\ No newline at end of file
diff --git a/tests/data/dataset/malformed.xml b/tests/data/dataset/malformed.xml
new file mode 100644
index 0000000..e9000c8
--- /dev/null
+++ b/tests/data/dataset/malformed.xml
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<SubreadSet 
+	Description="Merged dataset from 1 files using DatasetMerger 0.1.2" 
+    MetaType="PacBio.DataSet.HdfSubreadSet" 
+    Name="Subreads from runr000013_42267_150403" 
+    Tags="pacbio.secondary.instrument=RS" 
+    TimeStampedName="hdfsubreadset_2015-08-19T15:39:36.331-07:00" 
+    UniqueId="b4741521-2a4c-42df-8a13-0a755ca9ed1e"
+    Version="0.5" 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:ns0="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:ns1="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:ns2="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:ns3="http://pacificbiosciences.com/PacBioReagentKit.xsd">
+	<ns0:ExternalResources>
+        <ns0:ExternalResource 
+            MetaType="SubreadFile.SubreadBamFile"
+            TimeStampedName="SubreadFile.SubreadBamFile_00000000000000"
+            UniqueId="251acf71-9eb0-489e-9dd1-cdbd11432753" 
+            ResourceId="file:///mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0//mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0/file.subreads.sub [...]
+    </ns0:ExternalResources>
+    <DataSetMetadata>
+        <TotalLength>50000000</TotalLength>
+        <NumRecords>150000</NumRecords>
+        <ns2:Collections>
+            <ns2:CollectionMetadata 
+                Context="m150404_101626_42267_c100807920800000001823174110291514_s1_p0" 
+                InstrumentId="1" 
+                InstrumentName="42267" 
+                MetaType="PacBio.Collection" 
+                TimeStampedName="m150404_101626_42267_c100807920800000001823174110291514_s1_p0" 
+                UniqueId="d66c8372-2b70-4dcf-b64f-9f8b5cc351fd">
+                <ns2:InstCtrlVer>2.3.0.1.142990</ns2:InstCtrlVer>
+                <ns2:SigProcVer>NRT at 172.31.128.10:8082, SwVer=2301.142990, HwVer=1.0</ns2:SigProcVer>
+                <ns2:RunDetails>
+                    <ns2:RunId>r000013_42267_150403</ns2:RunId>
+                    <ns2:Name>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:Name>
+                </ns2:RunDetails>
+                <ns2:WellSample Name="Inst42267-040315-SAT-100pM-2kb-P6C4">
+                    <ns2:PlateId>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:PlateId>
+                    <ns2:WellName>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:WellName>
+                    <ns2:Concentration>0.0</ns2:Concentration>                         
+                    <ns2:SampleReuseEnabled>false</ns2:SampleReuseEnabled>
+                    <ns2:StageHotstartEnabled>false</ns2:StageHotstartEnabled>
+                    <ns2:SizeSelectionEnabled>false</ns2:SizeSelectionEnabled>
+                    <ns2:UseCount>1</ns2:UseCount>
+                    <ns1:BioSamplePointers>
+                        <ns1:BioSamplePointer>251acf71-9eb0-489e-9dd1-cdbd11432752</ns1:BioSamplePointer>
+                    </ns1:BioSamplePointers>
+                </ns2:WellSample>
+                <ns2:Automation>
+                    <ns0:AutomationParameters>
+                        <ns0:AutomationParameter />
+                    </ns0:AutomationParameters>
+                </ns2:Automation>
+                <ns2:CollectionNumber>7</ns2:CollectionNumber>
+                <ns2:CellIndex>4</ns2:CellIndex>
+                <ns2:CellPac Barcode="10080792080000000182317411029151" />
+                <ns2:Primary>
+                    <ns2:AutomationName>BasecallerV1</ns2:AutomationName>
+                    <ns2:ConfigFileName>2-3-0_P6-C4.xml</ns2:ConfigFileName>
+                    <ns2:SequencingCondition />
+                    <ns2:OutputOptions>
+                        <ns2:ResultsFolder>Analysis_Results</ns2:ResultsFolder>
+                        <ns2:CollectionPathUri>rsy://mp-rsync/vol55//RS_DATA_STAGING/42267/Inst42267-040315-SAT-100pM-2kb-P6C4_13/A04_7/</ns2:CollectionPathUri>
+                        <ns2:CopyFiles>
+                            <ns2:CollectionFileCopy>Fasta</ns2:CollectionFileCopy>
+                        </ns2:CopyFiles>
+                        <ns2:Readout>Bases</ns2:Readout>
+                        <ns2:MetricsVerbosity>Minimal</ns2:MetricsVerbosity>
+                     </ns2:OutputOptions>
+                 </ns2:Primary>
+             </ns2:CollectionMetadata>
+         </ns2:Collections>
+         <ns1:BioSamples>
+             <ns1:BioSample
+                 Description="Inst42267-SAT-100pM-2kbLambda-P6C4-Std120_CPS_040315"
+                 MetaType="PacBio.Sample" 
+                 Name="Inst42267-040315-SAT-100pM-2kb-P6C4" 
+                 TimeStampedName="biosample_2015-08-19T15:39:36.331-07:00" 
+                 UniqueId="251acf71-9eb0-489e-9dd1-cdbd11432752" />
+         </ns1:BioSamples>
+      </DataSetMetadata>
+</SubreadSet>
diff --git a/tests/data/dataset/merge.fofn b/tests/data/dataset/merge.fofn
new file mode 100644
index 0000000..6524ba5
--- /dev/null
+++ b/tests/data/dataset/merge.fofn
@@ -0,0 +1,2 @@
+bam_mapping_1.bam
+bam_mapping_2.bam
diff --git a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam
index fb08bf9..52c0c8e 100644
Binary files a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam and b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam differ
diff --git a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai
index b87bfa7..b8892c2 100644
Binary files a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai and b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai differ
diff --git a/tests/data/dataset/pbalchemy10kbp.xml b/tests/data/dataset/pbalchemy10kbp.xml
index 1b90cec..96189ad 100644
--- a/tests/data/dataset/pbalchemy10kbp.xml
+++ b/tests/data/dataset/pbalchemy10kbp.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<pbds:DataSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" CreatedAt="2015-05-22T16:56:16" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="58e3f7c5-24c1-b58b-fbd5-37de268cc2f0" Version="2.3.0" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:DataSet CreatedAt="2015-05-22T16:56:16" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="58e3f7c5-24c1-b58b-fbd5-37de268cc2f0" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
   <pbbase:ExternalResources>
     <pbbase:ExternalResource ResourceId="file:tests/data/pbalchemy10kbp.pbalign.sorted.pbver1.bam">
       <pbbase:FileIndices>
diff --git a/tests/data/dataset/reference.dataset.xml b/tests/data/dataset/reference.dataset.xml
index 953f863..3cfbe8c 100644
--- a/tests/data/dataset/reference.dataset.xml
+++ b/tests/data/dataset/reference.dataset.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:ReferenceSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.ReferenceSet" Name="DataSet_ReferenceSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:ReferenceSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ReferenceSet" Name="DataSet_ReferenceSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0"  xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
 	<pbbase:ExternalResources>
 		<pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
 			<pbbase:FileIndices>
diff --git a/tests/data/dataset/subread_dataset1.xml b/tests/data/dataset/subread_dataset1.xml
index ac6325b..1d64e79 100644
--- a/tests/data/dataset/subread_dataset1.xml
+++ b/tests/data/dataset/subread_dataset1.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" >
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0"  xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xs [...]
     <pbbase:ExternalResources>
         <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads0.bam" Tags="Example">
             <pbbase:FileIndices>
diff --git a/tests/data/dataset/subread_dataset2.xml b/tests/data/dataset/subread_dataset2.xml
index 63da322..a395330 100644
--- a/tests/data/dataset/subread_dataset2.xml
+++ b/tests/data/dataset/subread_dataset2.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" >
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xsi [...]
     <pbbase:ExternalResources>
         <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads2.bam" Tags="Example">
             <pbbase:FileIndices>
diff --git a/tests/data/dataset/subread_dataset3.xml b/tests/data/dataset/subread_dataset3.xml
index 00a1786..91923a8 100644
--- a/tests/data/dataset/subread_dataset3.xml
+++ b/tests/data/dataset/subread_dataset3.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" >
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0"  xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xs [...]
     <pbbase:ExternalResources>
         <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads2.bam" Tags="Example">
             <pbbase:FileIndices>
diff --git a/tests/data/dataset/transformed_rs_subread_dataset.xml b/tests/data/dataset/transformed_rs_subread_dataset.xml
index 6b93870..465d9a6 100644
--- a/tests/data/dataset/transformed_rs_subread_dataset.xml
+++ b/tests/data/dataset/transformed_rs_subread_dataset.xml
@@ -1,13 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<pbds:HdfSubreadSet xmlns:uuid="java:java.util.UUID" xmlns:bax="http://whatever"
-               xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd"
-               xmlns:xs="http://www.w3.org/2001/XMLSchema"
-               xmlns:fn="http://www.w3.org/2005/xpath-functions"
-               Name="Subreads from run r001173_42129_130607"
-               MetaType="PacBio.DataSet.SubreadSet"
-               Tags="pacbio.secondary.instrument=RS"
-               Version="0.5"
-               UniqueId="abbc9183-b01e-4671-8c12-19efee534647">
+<pbds:HdfSubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xmlns:bax="http://whatever"
+    xmlns:fn="http://www.w3.org/2005/xpath-functions"
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" 
+    xmlns:uuid="java:java.util.UUID" 
+    xmlns:xs="http://www.w3.org/2001/XMLSchema"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    Name="Subreads from run r001173_42129_130607"
+    MetaType="PacBio.DataSet.SubreadSet"
+    Tags="pacbio.secondary.instrument=RS"
+    Version="0.5"
+    UniqueId="abbc9183-b01e-4671-8c12-19efee534647">
    <pbbase:ExternalResources>
       <pbbase:ExternalResource MetaType="PacBio.SubreadFile.BaxFile"
           ResourceId="file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.0.bax.h5"/>
diff --git a/tests/data/ex2.bam b/tests/data/ex2.bam
index efba168..3fb5f49 100644
Binary files a/tests/data/ex2.bam and b/tests/data/ex2.bam differ
diff --git a/tests/data/ex2.bam.bai b/tests/data/ex2.bam.bai
index f44c34d..a0a7868 100644
Binary files a/tests/data/ex2.bam.bai and b/tests/data/ex2.bam.bai differ
diff --git a/tests/data/ex2.sam b/tests/data/ex2.sam
index b609d6c..a984e87 100644
--- a/tests/data/ex2.sam
+++ b/tests/data/ex2.sam
@@ -1,4 +1,4 @@
- at HD	VN:1.0	SO:coordinate	pb:3.0b7
+ at HD	VN:1.0	SO:coordinate	pb:3.0.1
 @SQ	SN:seq1	LN:1575
 @SQ	SN:seq2	LN:1584
 B7_591:4:96:693:509	73	seq1	1	99	36M	*	0	0	CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCG	<<<<<<<<<<<<<<<;<<<<<<<<<5<<<<<;:<;7	MF:i:18	Aq:i:73	NM:i:0	UQ:i:0	H0:i:1	H1:i:0
diff --git a/tests/data/ex2.bam b/tests/data/ex2_copy.bam
similarity index 100%
copy from tests/data/ex2.bam
copy to tests/data/ex2_copy.bam
diff --git a/tests/data/ex2.bam.bai b/tests/data/ex2_copy.bam.bai
similarity index 100%
copy from tests/data/ex2.bam.bai
copy to tests/data/ex2_copy.bam.bai
diff --git a/tests/data/phi29.bam b/tests/data/phi29.bam
new file mode 100644
index 0000000..46176b6
Binary files /dev/null and b/tests/data/phi29.bam differ
diff --git a/tests/data/polymerase/consolidate.subread.dataset.xml b/tests/data/polymerase/consolidate.subread.dataset.xml
new file mode 100644
index 0000000..ca85a7a
--- /dev/null
+++ b/tests/data/polymerase/consolidate.subread.dataset.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" 
+    UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" 
+    TimeStampedName="subreadset_150304_231155" 
+    MetaType="PacBio.DataSet.SubreadSet" 
+    Name="DataSet_SubreadSet" 
+    Version="3.0.0" 
+    CreatedAt="2015-01-27T09:00:01"> 
+<pbbase:ExternalResources>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="production.subreads.bam">
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource
+        UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+        TimeStampedName="scraps_bam_150304_231155"
+        MetaType="PacBio.SubreadFile.ScrapsBamFile"
+        ResourceId="production.scraps.bam">
+    </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+    <pbds:Filter>
+        <pbbase:Properties>
+            <pbbase:Property Name="qStart" Value="4000" Operator=">"/>
+            <pbbase:Property Name="qStart" Value="5000" Operator="<"/>
+        </pbbase:Properties>
+    </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
diff --git a/tests/data/polymerase/filtered_resources.subread.dataset.xml b/tests/data/polymerase/filtered_resources.subread.dataset.xml
new file mode 100644
index 0000000..e414e00
--- /dev/null
+++ b/tests/data/polymerase/filtered_resources.subread.dataset.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" 
+    UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" 
+    TimeStampedName="subreadset_150304_231155" 
+    MetaType="PacBio.DataSet.SubreadSet" 
+    Name="DataSet_SubreadSet" 
+    Version="3.0.0" 
+    CreatedAt="2015-01-27T09:00:01"> 
+<pbbase:ExternalResources>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="./production.subreads.bam">
+        <pbbase:ExternalResources>
+            <pbbase:ExternalResource 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195" 
+                TimeStampedName="scraps_bam_150304_231155" 
+                MetaType="PacBio.SubreadFile.ScrapsBamFile" 
+                ResourceId="./production.scraps.bam">
+            </pbbase:ExternalResource>
+        </pbbase:ExternalResources>
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="./internal.subreads.bam">
+        <pbbase:ExternalResources>
+            <pbbase:ExternalResource 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195" 
+                TimeStampedName="scraps_bam_150304_231155" 
+                MetaType="PacBio.SubreadFile.ScrapsBamFile" 
+                ResourceId="./internal.scraps.bam">
+            </pbbase:ExternalResource>
+        </pbbase:ExternalResources>
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.HqRegionBamFile" 
+        ResourceId="./production_hq.hqregion.bam">
+        <pbbase:ExternalResources>
+            <pbbase:ExternalResource 
+                UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5199" 
+                TimeStampedName="scraps_bam_150304_231155" 
+                MetaType="PacBio.SubreadFile.HqScrapsBamFile" 
+                ResourceId="./production_hq.scraps.bam">
+            </pbbase:ExternalResource>
+        </pbbase:ExternalResources>
+    </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+    <pbds:Filter>
+        <pbbase:Properties>
+            <pbbase:Property Name="zm" Value="100000" Operator="=="/>
+        </pbbase:Properties>
+    </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
\ No newline at end of file
diff --git a/tests/data/polymerase/internal.hqregions.bam b/tests/data/polymerase/internal.hqregions.bam
new file mode 100644
index 0000000..e2f7f09
Binary files /dev/null and b/tests/data/polymerase/internal.hqregions.bam differ
diff --git a/tests/data/polymerase/internal.lqregions.bam b/tests/data/polymerase/internal.lqregions.bam
new file mode 100644
index 0000000..b8aeed3
Binary files /dev/null and b/tests/data/polymerase/internal.lqregions.bam differ
diff --git a/tests/data/polymerase/internal.polymerase.bam b/tests/data/polymerase/internal.polymerase.bam
index 5a9a3c2..2a01fc3 100644
Binary files a/tests/data/polymerase/internal.polymerase.bam and b/tests/data/polymerase/internal.polymerase.bam differ
diff --git a/tests/data/polymerase/internal.scraps.bam b/tests/data/polymerase/internal.scraps.bam
index ee501d3..2c2f3fc 100644
Binary files a/tests/data/polymerase/internal.scraps.bam and b/tests/data/polymerase/internal.scraps.bam differ
diff --git a/tests/data/polymerase/internal.scraps.bam.pbi b/tests/data/polymerase/internal.scraps.bam.pbi
new file mode 100644
index 0000000..2d0bad9
Binary files /dev/null and b/tests/data/polymerase/internal.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/internal.subreads.bam b/tests/data/polymerase/internal.subreads.bam
index a352448..c45ff59 100644
Binary files a/tests/data/polymerase/internal.subreads.bam and b/tests/data/polymerase/internal.subreads.bam differ
diff --git a/tests/data/polymerase/internal.subreads.bam.pbi b/tests/data/polymerase/internal.subreads.bam.pbi
new file mode 100644
index 0000000..8059402
Binary files /dev/null and b/tests/data/polymerase/internal.subreads.bam.pbi differ
diff --git a/tests/data/polymerase/internal_hq.hqregion.bam b/tests/data/polymerase/internal_hq.hqregion.bam
deleted file mode 100644
index e59134a..0000000
Binary files a/tests/data/polymerase/internal_hq.hqregion.bam and /dev/null differ
diff --git a/tests/data/polymerase/internal_hq.scraps.bam b/tests/data/polymerase/internal_hq.scraps.bam
deleted file mode 100644
index 154d2a1..0000000
Binary files a/tests/data/polymerase/internal_hq.scraps.bam and /dev/null differ
diff --git a/tests/data/polymerase/internal_polymerase.fasta b/tests/data/polymerase/internal_polymerase.fasta
deleted file mode 100644
index 9fb8832..0000000
--- a/tests/data/polymerase/internal_polymerase.fasta
+++ /dev/null
@@ -1,2 +0,0 @@
->m130615_051803_richard_c100541252550000001823084511241346_s1_p0/66617/2659_7034
-CCAGTTTCTCTCTCACGTCACACCCATGAAAAGCAATGGATCTCTCTCTACACAACACAGAGCAAAGCGGAGGTTGGAGCTGTGAAAAAAAGAGATTGAGAATCCAATCCTTAGACCTCTATTAAGTCGACAACACCGCAGAGAACAAGCATCCTATCTGTGTCATTATCCGGTGGTGTTGGAGAGTTTGATATTATACAACAATAAAATACATATAATAGTAGAAATTCATCCCACAACAAAATCTTTATTAGGGTAAGTAGAAGCTTATCTATGAAACTGGGTTCATAAAAAGTTAAGAAAGAATAACAATAGATATAAAGAGGAACACACAAGTTGAATTTTATCGCATATAGCAATTAATCAAGAGGAATTGCAGGTTTTAATCTCATGGCTTTGAGCCTACTATTCTATGAGCTTGGGCTTACACACAGTGTCTTGACCGTAGATGCAGATCTTCTCCTCCAAACACACTAAACACCACCTTCACAA [...]
diff --git a/tests/data/polymerase/multiple_resources.subread.dataset.xml b/tests/data/polymerase/multiple_resources.subread.dataset.xml
new file mode 100644
index 0000000..109535d
--- /dev/null
+++ b/tests/data/polymerase/multiple_resources.subread.dataset.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet 
+    xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" 
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+    xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+    xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+    xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+    xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+    xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" 
+    UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" 
+    TimeStampedName="subreadset_150304_231155" 
+    MetaType="PacBio.DataSet.SubreadSet" 
+    Name="DataSet_SubreadSet" 
+    Version="3.0.0" 
+    CreatedAt="2015-01-27T09:00:01"> 
+<pbbase:ExternalResources>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.SubreadBamFile" 
+        ResourceId="./production.subreads.bam">
+        <pbbase:ExternalResources>
+            <pbbase:ExternalResource 
+                UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195" 
+                TimeStampedName="scraps_bam_150304_231155" 
+                MetaType="PacBio.SubreadFile.ScrapsBamFile" 
+                ResourceId="./production.scraps.bam">
+            </pbbase:ExternalResource>
+        </pbbase:ExternalResources>
+    </pbbase:ExternalResource>
+    <pbbase:ExternalResource 
+        UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197" 
+        TimeStampedName="subread_bam_150304_231155" 
+        MetaType="PacBio.SubreadFile.HqRegionBamFile" 
+        ResourceId="./production_hq.hqregion.bam">
+        <pbbase:ExternalResources>
+            <pbbase:ExternalResource 
+                UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5199" 
+                TimeStampedName="scraps_bam_150304_231155" 
+                MetaType="PacBio.SubreadFile.HqScrapsBamFile" 
+                ResourceId="./production_hq.scraps.bam">
+            </pbbase:ExternalResource>
+        </pbbase:ExternalResources>
+    </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+</pbds:SubreadSet>
\ No newline at end of file
diff --git a/tests/data/polymerase/production.polymerase.bam b/tests/data/polymerase/production.polymerase.bam
index 9c192da..4c84b23 100644
Binary files a/tests/data/polymerase/production.polymerase.bam and b/tests/data/polymerase/production.polymerase.bam differ
diff --git a/tests/data/polymerase/production.scraps.bam b/tests/data/polymerase/production.scraps.bam
index c8c20df..a32bdfb 100644
Binary files a/tests/data/polymerase/production.scraps.bam and b/tests/data/polymerase/production.scraps.bam differ
diff --git a/tests/data/polymerase/production.scraps.bam.pbi b/tests/data/polymerase/production.scraps.bam.pbi
new file mode 100644
index 0000000..c3abd5c
Binary files /dev/null and b/tests/data/polymerase/production.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/production.subreads.bam b/tests/data/polymerase/production.subreads.bam
index 9c56583..452aad5 100644
Binary files a/tests/data/polymerase/production.subreads.bam and b/tests/data/polymerase/production.subreads.bam differ
diff --git a/tests/data/polymerase/production.subreads.bam.pbi b/tests/data/polymerase/production.subreads.bam.pbi
new file mode 100644
index 0000000..f504955
Binary files /dev/null and b/tests/data/polymerase/production.subreads.bam.pbi differ
diff --git a/tests/data/polymerase/production_hq.hqregion.bam b/tests/data/polymerase/production_hq.hqregion.bam
index 2993089..66d436b 100644
Binary files a/tests/data/polymerase/production_hq.hqregion.bam and b/tests/data/polymerase/production_hq.hqregion.bam differ
diff --git a/tests/data/polymerase/production_hq.hqregion.bam.pbi b/tests/data/polymerase/production_hq.hqregion.bam.pbi
new file mode 100644
index 0000000..5ffa37c
Binary files /dev/null and b/tests/data/polymerase/production_hq.hqregion.bam.pbi differ
diff --git a/tests/data/polymerase/production_hq.scraps.bam b/tests/data/polymerase/production_hq.scraps.bam
index 1c392fc..716e098 100644
Binary files a/tests/data/polymerase/production_hq.scraps.bam and b/tests/data/polymerase/production_hq.scraps.bam differ
diff --git a/tests/data/polymerase/production_hq.scraps.bam.pbi b/tests/data/polymerase/production_hq.scraps.bam.pbi
new file mode 100644
index 0000000..f719103
Binary files /dev/null and b/tests/data/polymerase/production_hq.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/internal.polymerase.bam b/tests/data/polymerase/whitelist/internal.polymerase.bam
new file mode 100644
index 0000000..015ba80
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.polymerase.bam differ
diff --git a/tests/data/polymerase/whitelist/internal.polymerase.bam.pbi b/tests/data/polymerase/whitelist/internal.polymerase.bam.pbi
new file mode 100644
index 0000000..3961e55
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.polymerase.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/internal.scraps.bam b/tests/data/polymerase/whitelist/internal.scraps.bam
new file mode 100644
index 0000000..3ff05a5
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.scraps.bam differ
diff --git a/tests/data/polymerase/whitelist/internal.scraps.bam.pbi b/tests/data/polymerase/whitelist/internal.scraps.bam.pbi
new file mode 100644
index 0000000..ea72b36
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/internal.subreads.bam b/tests/data/polymerase/whitelist/internal.subreads.bam
new file mode 100644
index 0000000..ed5ba3a
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.subreads.bam differ
diff --git a/tests/data/polymerase/whitelist/internal.subreads.bam.pbi b/tests/data/polymerase/whitelist/internal.subreads.bam.pbi
new file mode 100644
index 0000000..f584738
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.subreads.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/scrapless.scraps.bam b/tests/data/polymerase/whitelist/scrapless.scraps.bam
new file mode 100644
index 0000000..7b989c4
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.scraps.bam differ
diff --git a/tests/data/polymerase/whitelist/scrapless.scraps.bam.pbi b/tests/data/polymerase/whitelist/scrapless.scraps.bam.pbi
new file mode 100644
index 0000000..140af8a
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/scrapless.subreads.bam b/tests/data/polymerase/whitelist/scrapless.subreads.bam
new file mode 100644
index 0000000..739b3b4
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.subreads.bam differ
diff --git a/tests/data/polymerase/whitelist/scrapless.subreads.bam.pbi b/tests/data/polymerase/whitelist/scrapless.subreads.bam.pbi
new file mode 100644
index 0000000..19ce255
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.subreads.bam.pbi differ
diff --git a/tests/data/relative/a/test.bam b/tests/data/relative/a/test.bam
new file mode 100644
index 0000000..26d72fb
Binary files /dev/null and b/tests/data/relative/a/test.bam differ
diff --git a/tests/data/relative/b/test1.bam b/tests/data/relative/b/test1.bam
new file mode 100644
index 0000000..26d72fb
Binary files /dev/null and b/tests/data/relative/b/test1.bam differ
diff --git a/tests/data/relative/b/test2.bam b/tests/data/relative/b/test2.bam
new file mode 100644
index 0000000..26d72fb
Binary files /dev/null and b/tests/data/relative/b/test2.bam differ
diff --git a/tests/data/relative/relative.fofn b/tests/data/relative/relative.fofn
new file mode 100644
index 0000000..755c589
--- /dev/null
+++ b/tests/data/relative/relative.fofn
@@ -0,0 +1,3 @@
+a/test.bam
+b/test1.bam
+b/test2.bam
diff --git a/tests/data/relative/relative.xml b/tests/data/relative/relative.xml
new file mode 100644
index 0000000..0e78fe4
--- /dev/null
+++ b/tests/data/relative/relative.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+        <pbbase:ExternalResources>
+                <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./a/test.bam" />
+                <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./b/test1.bam" />
+                <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./b/test2.bam"/>
+        </pbbase:ExternalResources>
+</pbds:SubreadSet>
diff --git a/tests/data/relative/relative2.fofn b/tests/data/relative/relative2.fofn
new file mode 100644
index 0000000..f1969ac
--- /dev/null
+++ b/tests/data/relative/relative2.fofn
@@ -0,0 +1,4 @@
+a/test.bam
+b/test1.bam
+b/test2.bam
+relative.xml
diff --git a/tests/data/test_group_query/test1.bam b/tests/data/test_group_query/test1.bam
index f92d6bf..5673abc 100644
Binary files a/tests/data/test_group_query/test1.bam and b/tests/data/test_group_query/test1.bam differ
diff --git a/tests/data/test_group_query/test2.bam b/tests/data/test_group_query/test2.bam
index 53c1d8f..565b224 100644
Binary files a/tests/data/test_group_query/test2.bam and b/tests/data/test_group_query/test2.bam differ
diff --git a/tests/data/test_group_query/test2.bam.pbi b/tests/data/test_group_query/test2.bam.pbi
index 1b0c1b9..384ad28 100644
Binary files a/tests/data/test_group_query/test2.bam.pbi and b/tests/data/test_group_query/test2.bam.pbi differ
diff --git a/tests/data/test_group_query/test3.bam b/tests/data/test_group_query/test3.bam
index 5b8548b..3b1e21b 100644
Binary files a/tests/data/test_group_query/test3.bam and b/tests/data/test_group_query/test3.bam differ
diff --git a/tests/data/truncated.bam b/tests/data/truncated.bam
new file mode 100644
index 0000000..f40e5f1
Binary files /dev/null and b/tests/data/truncated.bam differ
diff --git a/tests/files.cmake b/tests/files.cmake
index ea69cac..27cc8d4 100644
--- a/tests/files.cmake
+++ b/tests/files.cmake
@@ -18,26 +18,34 @@ set( PacBioBAMTest_CPP
     ${PacBioBAM_TestsDir}/src/test_BamRecordImplVariableData.cpp
     ${PacBioBAM_TestsDir}/src/test_BamRecordMapping.cpp
     ${PacBioBAM_TestsDir}/src/test_BamWriter.cpp
+    ${PacBioBAM_TestsDir}/src/test_BarcodeQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_Cigar.cpp
+    ${PacBioBAM_TestsDir}/src/test_Compare.cpp
     ${PacBioBAM_TestsDir}/src/test_DataSetCore.cpp
     ${PacBioBAM_TestsDir}/src/test_DataSetIO.cpp
     ${PacBioBAM_TestsDir}/src/test_DataSetQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_DataSetXsd.cpp
     ${PacBioBAM_TestsDir}/src/test_EndToEnd.cpp
     ${PacBioBAM_TestsDir}/src/test_EntireFileQuery.cpp
+    ${PacBioBAM_TestsDir}/src/test_FileUtils.cpp
     ${PacBioBAM_TestsDir}/src/test_Frames.cpp
     ${PacBioBAM_TestsDir}/src/test_GenomicIntervalQuery.cpp
-    ${PacBioBAM_TestsDir}/src/test_GroupQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_IndexedFastaReader.cpp
     ${PacBioBAM_TestsDir}/src/test_Intervals.cpp
     ${PacBioBAM_TestsDir}/src/test_PacBioIndex.cpp
+    ${PacBioBAM_TestsDir}/src/test_PbiFilter.cpp
+    ${PacBioBAM_TestsDir}/src/test_PbiFilterQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_PolymeraseStitching.cpp
+    ${PacBioBAM_TestsDir}/src/test_QNameQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_QualityValues.cpp
+    ${PacBioBAM_TestsDir}/src/test_ReadAccuracyQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_ReadGroupInfo.cpp
     ${PacBioBAM_TestsDir}/src/test_SequenceUtils.cpp
+    ${PacBioBAM_TestsDir}/src/test_StringUtils.cpp
+    ${PacBioBAM_TestsDir}/src/test_SubreadLengthQuery.cpp
     ${PacBioBAM_TestsDir}/src/test_Tags.cpp
     ${PacBioBAM_TestsDir}/src/test_TimeUtils.cpp
     # ${PacBioBAM_TestsDir}/src/test_UnmappedReadsQuery.cpp
+    ${PacBioBAM_TestsDir}/src/test_VirtualPolymeraseCompositeReader.cpp
     ${PacBioBAM_TestsDir}/src/test_ZmwQuery.cpp
-
 )
diff --git a/tests/scripts/cram.py b/tests/scripts/cram.py
new file mode 100755
index 0000000..20c4681
--- /dev/null
+++ b/tests/scripts/cram.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python
+"""Functional testing framework for command line applications"""
+
+import difflib
+import itertools
+import optparse
+import os
+import re
+import signal
+import subprocess
+import sys
+import shutil
+import time
+import tempfile
+
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
+
+__all__ = ['main', 'test']
+
+def findtests(paths):
+    """Yield tests in paths in sorted order"""
+    for p in paths:
+        if os.path.isdir(p):
+            for root, dirs, files in os.walk(p):
+                if os.path.basename(root).startswith('.'):
+                    continue
+                for f in sorted(files):
+                    if not f.startswith('.') and f.endswith('.t'):
+                        yield os.path.normpath(os.path.join(root, f))
+        else:
+            yield os.path.normpath(p)
+
+def regex(pattern, s):
+    """Match a regular expression or return False if invalid.
+
+    >>> [bool(regex(r, 'foobar')) for r in ('foo.*', '***')]
+    [True, False]
+    """
+    try:
+        return re.match(pattern + r'\Z', s)
+    except re.error:
+        return False
+
+def glob(el, l):
+    r"""Match a glob-like pattern.
+
+    The only supported special characters are * and ?. Escaping is
+    supported.
+
+    >>> bool(glob(r'\* \\ \? fo?b*', '* \\ ? foobar'))
+    True
+    """
+    i, n = 0, len(el)
+    res = ''
+    while i < n:
+        c = el[i]
+        i += 1
+        if c == '\\' and el[i] in '*?\\':
+            res += el[i - 1:i + 1]
+            i += 1
+        elif c == '*':
+            res += '.*'
+        elif c == '?':
+            res += '.'
+        else:
+            res += re.escape(c)
+    return regex(res, l)
+
+annotations = {'glob': glob, 're': regex}
+
+def match(el, l):
+    """Match patterns based on annotations"""
+    for k in annotations:
+        ann = ' (%s)\n' % k
+        if el.endswith(ann) and annotations[k](el[:-len(ann)], l[:-1]):
+            return True
+    return False
+
+class SequenceMatcher(difflib.SequenceMatcher, object):
+    """Like difflib.SequenceMatcher, but matches globs and regexes"""
+
+    def find_longest_match(self, alo, ahi, blo, bhi):
+        """Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
+        # SequenceMatcher uses find_longest_match() to slowly whittle down
+        # the differences between a and b until it has each matching block.
+        # Because of this, we can end up doing the same matches many times.
+        matches = []
+        for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
+            if el != line and match(el, line):
+                # This fools the superclass's method into thinking that the
+                # regex/glob in a is identical to b by replacing a's line (the
+                # expected output) with b's line (the actual output).
+                self.a[alo + n] = line
+                matches.append((n, el))
+        ret = super(SequenceMatcher, self).find_longest_match(alo, ahi,
+                                                              blo, bhi)
+        # Restore the lines replaced above. Otherwise, the diff output
+        # would seem to imply that the tests never had any regexes/globs.
+        for n, el in matches:
+            self.a[alo + n] = el
+        return ret
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+                 tofiledate='', n=3, lineterm='\n', matcher=SequenceMatcher):
+    """Compare two sequences of lines; generate the delta as a unified diff.
+
+    This is like difflib.unified_diff(), but allows custom matchers.
+    """
+    started = False
+    for group in matcher(None, a, b).get_grouped_opcodes(n):
+        if not started:
+            fromdate = fromfiledate and '\t%s' % fromfiledate or ''
+            todate = fromfiledate and '\t%s' % tofiledate or ''
+            yield '--- %s%s%s' % (fromfile, fromdate, lineterm)
+            yield '+++ %s%s%s' % (tofile, todate, lineterm)
+            started = True
+        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+        yield "@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1,
+                                         lineterm)
+        for tag, i1, i2, j1, j2 in group:
+            if tag == 'equal':
+                for line in a[i1:i2]:
+                    yield ' ' + line
+                continue
+            if tag == 'replace' or tag == 'delete':
+                for line in a[i1:i2]:
+                    yield '-' + line
+            if tag == 'replace' or tag == 'insert':
+                for line in b[j1:j2]:
+                    yield '+' + line
+
+needescape = re.compile(r'[\x00-\x09\x0b-\x1f\x7f-\xff]').search
+escapesub = re.compile(r'[\x00-\x09\x0b-\x1f\\\x7f-\xff]').sub
+escapemap = dict((chr(i), r'\x%02x' % i) for i in range(256))
+escapemap.update({'\\': '\\\\', '\r': r'\r', '\t': r'\t'})
+
+def escape(s):
+    """Like the string-escape codec, but doesn't escape quotes"""
+    return escapesub(lambda m: escapemap[m.group(0)], s[:-1]) + ' (esc)\n'
+
+def makeresetsigpipe():
+    """Make a function to reset SIGPIPE to SIG_DFL (for use in subprocesses).
+
+    Doing subprocess.Popen(..., preexec_fn=makeresetsigpipe()) will prevent
+    Python's SIGPIPE handler (SIG_IGN) from being inherited by the
+    child process.
+    """
+    if sys.platform == 'win32' or getattr(signal, 'SIGPIPE', None) is None:
+        return None
+    return lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+def test(path, shell, indent=2):
+    """Run test at path and return input, output, and diff.
+
+    This returns a 3-tuple containing the following:
+
+        (list of lines in test, same list with actual output, diff)
+
+    diff is a generator that yields the diff between the two lists.
+
+    If a test exits with return code 80, the actual output is set to
+    None and diff is set to [].
+    """
+    indent = ' ' * indent
+    cmdline = '%s$ ' % indent
+    conline = '%s> ' % indent
+
+    f = open(path)
+    abspath = os.path.abspath(path)
+    env = os.environ.copy()
+    env['TESTDIR'] = os.path.dirname(abspath)
+    env['TESTFILE'] = os.path.basename(abspath)
+    p = subprocess.Popen([shell, '-'], bufsize=-1, stdin=subprocess.PIPE,
+                         stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                         universal_newlines=True, env=env,
+                         preexec_fn=makeresetsigpipe(),
+                         close_fds=os.name == 'posix')
+    salt = 'CRAM%s' % time.time()
+
+    after = {}
+    refout, postout = [], []
+    i = pos = prepos = -1
+    stdin = []
+    for i, line in enumerate(f):
+        refout.append(line)
+        if line.startswith(cmdline):
+            after.setdefault(pos, []).append(line)
+            prepos = pos
+            pos = i
+            stdin.append('echo "\n%s %s $?"\n' % (salt, i))
+            stdin.append(line[len(cmdline):])
+        elif line.startswith(conline):
+            after.setdefault(prepos, []).append(line)
+            stdin.append(line[len(conline):])
+        elif not line.startswith(indent):
+            after.setdefault(pos, []).append(line)
+    stdin.append('echo "\n%s %s $?"\n' % (salt, i + 1))
+
+    output = p.communicate(input=''.join(stdin))[0]
+    if p.returncode == 80:
+        return (refout, None, [])
+
+    # Add a trailing newline to the input script if it's missing.
+    if refout and not refout[-1].endswith('\n'):
+        refout[-1] += '\n'
+
+    # We use str.split instead of splitlines to get consistent
+    # behavior between Python 2 and 3. In 3, we use unicode strings,
+    # which has more line breaks than \n and \r.
+    pos = -1
+    ret = 0
+    for i, line in enumerate(output[:-1].split('\n')):
+        line += '\n'
+        if line.startswith(salt):
+            presalt = postout.pop()
+            if presalt != '%s\n' % indent:
+                postout.append(presalt[:-1] + ' (no-eol)\n')
+            ret = int(line.split()[2])
+            if ret != 0:
+                postout.append('%s[%s]\n' % (indent, ret))
+            postout += after.pop(pos, [])
+            pos = int(line.split()[1])
+        else:
+            if needescape(line):
+                line = escape(line)
+            postout.append(indent + line)
+    postout += after.pop(pos, [])
+
+    diffpath = os.path.basename(abspath)
+    diff = unified_diff(refout, postout, diffpath, diffpath + '.err')
+    for firstline in diff:
+        return refout, postout, itertools.chain([firstline], diff)
+    return refout, postout, []
+
+def prompt(question, answers, auto=None):
+    """Write a prompt to stdout and ask for answer in stdin.
+
+    answers should be a string, with each character a single
+    answer. An uppercase letter is considered the default answer.
+
+    If an invalid answer is given, this asks again until it gets a
+    valid one.
+
+    If auto is set, the question is answered automatically with the
+    specified value.
+    """
+    default = [c for c in answers if c.isupper()]
+    while True:
+        sys.stdout.write('%s [%s] ' % (question, answers))
+        sys.stdout.flush()
+        if auto is not None:
+            sys.stdout.write(auto + '\n')
+            sys.stdout.flush()
+            return auto
+
+        answer = sys.stdin.readline().strip().lower()
+        if not answer and default:
+            return default[0]
+        elif answer and answer in answers.lower():
+            return answer
+
+def log(msg=None, verbosemsg=None, verbose=False):
+    """Write msg to standard out and flush.
+
+    If verbose is True, write verbosemsg instead.
+    """
+    if verbose:
+        msg = verbosemsg
+    if msg:
+        sys.stdout.write(msg)
+        sys.stdout.flush()
+
+def patch(cmd, diff, path):
+    """Run echo [lines from diff] | cmd -p0"""
+    p = subprocess.Popen([cmd, '-p0'], bufsize=-1, stdin=subprocess.PIPE,
+                         universal_newlines=True,
+                         preexec_fn=makeresetsigpipe(),
+                         cwd=path,
+                         close_fds=os.name == 'posix')
+    p.communicate(''.join(diff))
+    return p.returncode == 0
+
+def run(paths, tmpdir, shell, quiet=False, verbose=False, patchcmd=None,
+        answer=None, indent=2):
+    """Run tests in paths in tmpdir.
+
+    If quiet is True, diffs aren't printed. If verbose is True,
+    filenames and status information are printed.
+
+    If patchcmd is set, a prompt is written to stdout asking if
+    changed output should be merged back into the original test. The
+    answer is read from stdin. If 'y', the test is patched using patch
+    based on the changed output.
+    """
+    cwd = os.getcwd()
+    seen = set()
+    basenames = set()
+    skipped = failed = 0
+    for i, path in enumerate(findtests(paths)):
+        abspath = os.path.abspath(path)
+        if abspath in seen:
+            continue
+        seen.add(abspath)
+
+        log(None, '%s: ' % path, verbose)
+        if not os.stat(abspath).st_size:
+            skipped += 1
+            log('s', 'empty\n', verbose)
+        else:
+            basename = os.path.basename(path)
+            if basename in basenames:
+                basename = '%s-%s' % (basename, i)
+            else:
+                basenames.add(basename)
+            testdir = os.path.join(tmpdir, basename)
+            os.mkdir(testdir)
+            try:
+                os.chdir(testdir)
+                refout, postout, diff = test(abspath, shell, indent)
+            finally:
+                os.chdir(cwd)
+
+            errpath = abspath + '.err'
+            if postout is None:
+                skipped += 1
+                log('s', 'skipped\n', verbose)
+            elif not diff:
+                log('.', 'passed\n', verbose)
+                if os.path.exists(errpath):
+                    os.remove(errpath)
+            else:
+                failed += 1
+                log('!', 'failed\n', verbose)
+                if not quiet:
+                    log('\n', None, verbose)
+                errfile = open(errpath, 'w')
+                try:
+                    for line in postout:
+                        errfile.write(line)
+                finally:
+                    errfile.close()
+                if not quiet:
+                    if patchcmd:
+                        diff = list(diff)
+                    for line in diff:
+                        log(line)
+                    if (patchcmd and
+                        prompt('Accept this change?', 'yN', answer) == 'y'):
+                        if patch(patchcmd, diff, os.path.dirname(abspath)):
+                            log(None, '%s: merged output\n' % path, verbose)
+                            os.remove(errpath)
+                        else:
+                            log('%s: merge failed\n' % path)
+    log('\n', None, verbose)
+    log('# Ran %s tests, %s skipped, %s failed.\n'
+        % (len(seen), skipped, failed))
+    return bool(failed)
+
+def which(cmd):
+    """Return the patch to cmd or None if not found"""
+    for p in os.environ['PATH'].split(os.pathsep):
+        path = os.path.join(p, cmd)
+        if os.path.isfile(path) and os.access(path, os.X_OK):
+            return os.path.abspath(path)
+    return None
+
+def expandpath(path):
+    """Expands ~ and environment variables in path"""
+    return os.path.expanduser(os.path.expandvars(path))
+
+class OptionParser(optparse.OptionParser):
+    """Like optparse.OptionParser, but supports setting values through
+    CRAM= and .cramrc."""
+
+    def __init__(self, *args, **kwargs):
+        self._config_opts = {}
+        optparse.OptionParser.__init__(self, *args, **kwargs)
+
+    def add_option(self, *args, **kwargs):
+        option = optparse.OptionParser.add_option(self, *args, **kwargs)
+        if option.dest and option.dest != 'version':
+            key = option.dest.replace('_', '-')
+            self._config_opts[key] = option.action == 'store_true'
+        return option
+
+    def parse_args(self, args=None, values=None):
+        config = configparser.RawConfigParser()
+        config.read(expandpath(os.environ.get('CRAMRC', '.cramrc')))
+        defaults = {}
+        for key, isbool in self._config_opts.items():
+            try:
+                if isbool:
+                    try:
+                        value = config.getboolean('cram', key)
+                    except ValueError:
+                        value = config.get('cram', key)
+                        self.error('--%s: invalid boolean value: %r'
+                                   % (key, value))
+                else:
+                    value = config.get('cram', key)
+            except (configparser.NoSectionError, configparser.NoOptionError):
+                pass
+            else:
+                defaults[key] = value
+        self.set_defaults(**defaults)
+
+        eargs = os.environ.get('CRAM', '').strip()
+        if eargs:
+            import shlex
+            args = args or []
+            args += shlex.split(eargs)
+
+        try:
+            return optparse.OptionParser.parse_args(self, args, values)
+        except optparse.OptionValueError:
+            self.error(str(sys.exc_info()[1]))
+
+def main(args):
+    """Main entry point.
+
+    args should not contain the script name.
+    """
+    p = OptionParser(usage='cram [OPTIONS] TESTS...', prog='cram')
+    p.add_option('-V', '--version', action='store_true',
+                 help='show version information and exit')
+    p.add_option('-q', '--quiet', action='store_true',
+                 help="don't print diffs")
+    p.add_option('-v', '--verbose', action='store_true',
+                 help='show filenames and test status')
+    p.add_option('-i', '--interactive', action='store_true',
+                 help='interactively merge changed test output')
+    p.add_option('-y', '--yes', action='store_true',
+                 help='answer yes to all questions')
+    p.add_option('-n', '--no', action='store_true',
+                 help='answer no to all questions')
+    p.add_option('-E', '--preserve-env', action='store_true',
+                 help="don't reset common environment variables")
+    p.add_option('--keep-tmpdir', action='store_true',
+                 help='keep temporary directories')
+    p.add_option('--shell', action='store', default='/bin/sh', metavar='PATH',
+                 help='shell to use for running tests')
+    p.add_option('--indent', action='store', default=2, metavar='NUM',
+                 type='int', help='number of spaces to use for indentation')
+    opts, paths = p.parse_args(args)
+
+    if opts.version:
+        sys.stdout.write("""Cram CLI testing framework (version 0.6)
+
+Copyright (C) 2010-2011 Brodie Rao <brodie at bitheap.org> and others
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+""")
+        return
+
+    conflicts = [('-y', opts.yes, '-n', opts.no),
+                 ('-q', opts.quiet, '-i', opts.interactive)]
+    for s1, o1, s2, o2 in conflicts:
+        if o1 and o2:
+            sys.stderr.write('options %s and %s are mutually exclusive\n'
+                             % (s1, s2))
+            return 2
+
+    patchcmd = None
+    if opts.interactive:
+        patchcmd = which('patch')
+        if not patchcmd:
+            sys.stderr.write('patch(1) required for -i\n')
+            return 2
+
+    if not paths:
+        sys.stdout.write(p.get_usage())
+        return 2
+
+    badpaths = [path for path in paths if not os.path.exists(path)]
+    if badpaths:
+        sys.stderr.write('no such file: %s\n' % badpaths[0])
+        return 2
+
+    tmpdir = os.environ['CRAMTMP'] = tempfile.mkdtemp('', 'cramtests-')
+    proctmp = os.path.join(tmpdir, 'tmp')
+    os.mkdir(proctmp)
+    for s in ('TMPDIR', 'TEMP', 'TMP'):
+        os.environ[s] = proctmp
+
+    if not opts.preserve_env:
+        for s in ('LANG', 'LC_ALL', 'LANGUAGE'):
+            os.environ[s] = 'C'
+        os.environ['TZ'] = 'GMT'
+        os.environ['CDPATH'] = ''
+        os.environ['COLUMNS'] = '80'
+        os.environ['GREP_OPTIONS'] = ''
+
+    if opts.yes:
+        answer = 'y'
+    elif opts.no:
+        answer = 'n'
+    else:
+        answer = None
+
+    try:
+        return run(paths, tmpdir, opts.shell, opts.quiet, opts.verbose,
+                   patchcmd, answer, opts.indent)
+    finally:
+        if opts.keep_tmpdir:
+            log('# Kept temporary directory: %s\n' % tmpdir)
+        else:
+            shutil.rmtree(tmpdir)
+
+if __name__ == '__main__':
+    try:
+        sys.exit(main(sys.argv[1:]))
+    except KeyboardInterrupt:
+        pass
diff --git a/tests/src/R/test_pbbam.sh.in b/tests/src/R/test_pbbam.sh.in
index af6eb89..458b149 100644
--- a/tests/src/R/test_pbbam.sh.in
+++ b/tests/src/R/test_pbbam.sh.in
@@ -37,9 +37,18 @@
 
 #! /usr/bin/sh
 
+GENERATED_BAM=@PacBioBAM_TestsDir@/data/generated.bam
+
+touch $GENERATED_BAM
+chmod 644 $GENERATED_BAM
+
 R --slave --no-save < @RTestRootDir@/test_pbbam.R --args \
 	@RTestRootDir@/tests \
 	@PacBioBAM_RLibDir@ \
 	@PacBioBAM_TestsDir@/data
+    
+STATUS=$?
 	
-rm @PacBioBAM_TestsDir@/data/generated.bam
+rm $GENERATED_BAM
+
+exit $STATUS
\ No newline at end of file
diff --git a/tests/src/R/tests/test_Accuracy.R b/tests/src/R/tests/test_Accuracy.R
index bc29eb0..e7e98e6 100644
--- a/tests/src/R/tests/test_Accuracy.R
+++ b/tests/src/R/tests/test_Accuracy.R
@@ -37,17 +37,26 @@
 
 test_case("Accuracy_Clamp", {
 	
-    a_zero     <- Accuracy(0)
-    a_neg      <- Accuracy(-1)
-    a_min      <- Accuracy(0)
-    a_normal   <- Accuracy(300)
-    a_max      <- Accuracy(1000)
-    a_tooLarge <- Accuracy(2000)
+    a_zero     <- Accuracy(0.0)
+    a_neg      <- Accuracy(-0.5)
+    a_min      <- Accuracy(0.0)
+    a_normal   <- Accuracy(0.9)
+    a_max      <- Accuracy(1.0)
+    a_tooLarge <- Accuracy(1.1)
 	
-    assertEqual(0L,    a_zero$ToInt())
-    assertEqual(0L,    a_neg$ToInt())
-    assertEqual(0L,    a_min$ToInt())
-    assertEqual(300L,  a_normal$ToInt())
-    assertEqual(1000L, a_max$ToInt())
-    assertEqual(1000L, a_tooLarge$ToInt())
+    tolerance = 1e-5
+
+    assertTrue( abs(0.0 - a_zero$ToFloat())     <= tolerance )
+    assertTrue( abs(0.0 - a_neg$ToFloat())      <= tolerance )
+    assertTrue( abs(0.0 - a_min$ToFloat())      <= tolerance )
+    assertTrue( abs(0.9 - a_normal$ToFloat())   <= tolerance )
+    assertTrue( abs(1.0 - a_max$ToFloat())      <= tolerance )
+    assertTrue( abs(1.0 - a_tooLarge$ToFloat()) <= tolerance )
+    
+    # assertEqual(0.0, a_zero$ToFloat())
+    # assertEqual(0.0, a_neg$ToFloat())
+    # assertEqual(0.0, a_min$ToFloat())
+    # assertEqual(0.9, a_normal$ToFloat())
+    # assertEqual(1.0, a_max$ToFloat())
+    # assertEqual(1.0, a_tooLarge$ToFloat())
 })
diff --git a/tests/src/R/tests/test_BamFile.R b/tests/src/R/tests/test_BamFile.R
index 41e419c..93eea2f 100644
--- a/tests/src/R/tests/test_BamFile.R
+++ b/tests/src/R/tests/test_BamFile.R
@@ -61,11 +61,10 @@ test_case("BamFile_Ctor", {
 	result <- tryCatch(
 		{
 			f <- BamFile(fn)
-			assertFalse(f$IsPacBioBAM())
 			invisible()
 		},
 		warning = function(w) {
-			assertTrue(TRUE)
+			assertTrue(FALSE)
 			invisible()
 		},
 		error = function(e) {
diff --git a/tests/src/R/tests/test_BamHeader.R b/tests/src/R/tests/test_BamHeader.R
index eab44e5..b0008ea 100644
--- a/tests/src/R/tests/test_BamHeader.R
+++ b/tests/src/R/tests/test_BamHeader.R
@@ -100,7 +100,7 @@ test_case("BamHeader_Defaults", {
 
 test_case("BamHeader_Decode", { 
 	
-    text <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3",
+    text <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1",
 			      "@SQ\tSN:chr1\tLN:2038\tSP:chocobo",
 				  "@SQ\tSN:chr2\tLN:3042\tSP:chocobo",
 				  "@RG\tID:rg1\tSM:control",
@@ -116,7 +116,7 @@ test_case("BamHeader_Decode", {
 	
 	assertEqual("1.1",       header$Version())
 	assertEqual("queryname", header$SortOrder())
-	assertEqual("3.0b3",     header$PacBioBamVersion())
+	assertEqual("3.0.1",     header$PacBioBamVersion())
 
 	assertEqual(3L, header$ReadGroups()$size())
 	assertTrue(header$HasReadGroup("rg1"))
@@ -146,7 +146,7 @@ test_case("BamHeader_Decode", {
 	
 test_case("BamHeader_Encode", { 
 	
-    expectedText <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3",
+    expectedText <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1",
 			              "@SQ\tSN:chr1\tLN:2038\tSP:chocobo",
 				          "@SQ\tSN:chr2\tLN:3042\tSP:chocobo",
 				          "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control",
@@ -179,7 +179,7 @@ test_case("BamHeader_Encode", {
 	header <- BamHeader()
 	header$Version("1.1")
 	header$SortOrder("queryname")
-	header$PacBioBamVersion("3.0b3")
+	header$PacBioBamVersion("3.0.1")
 	header$AddReadGroup(rg1)
 	header$AddReadGroup(rg2)
 	header$AddReadGroup(rg3)
diff --git a/tests/src/R/tests/test_EndToEnd.R b/tests/src/R/tests/test_EndToEnd.R
index 04a06ac..ce0a2eb 100644
--- a/tests/src/R/tests/test_EndToEnd.R
+++ b/tests/src/R/tests/test_EndToEnd.R
@@ -41,17 +41,20 @@ originalNames <-function(inputFn, generatedFn) {
 		{
 			file <- BamFile(inputFn)
 			writer <- BamWriter(generatedFn, file$Header())
-			entireFile <- EntireFileQuery(file)
+            
+            ds <- DataSet(file)
+			entireFile <- EntireFileQuery(ds)
 		
 			names_in <- list()
 			iter <- entireFile$begin()
 			end <- entireFile$end()
 			while ( iter$'__ne__'(end) ) {
-				record <- iter$value()
+                record <- iter$value()
 				names_in <- c(names_in, record$FullName())
-				writer$Write(record)
+                writer$Write(record)
 				iter$incr()
 			}
+            writer$TryFlush()
 			return(names_in)
 		},
 		error = function(e) {
@@ -64,18 +67,18 @@ originalNames <-function(inputFn, generatedFn) {
 
 generatedNames <- function(generatedFn) {
 	
-	result <- tryCatch(
-		{
-			file <- BamFile(generatedFn)
-			entireFile <- EntireFileQuery(file)
+    result <- tryCatch(
+        {
+            ds <- DataSet(generatedFn)
+            entireFile <- EntireFileQuery(ds)
 	
 			names_out <- list()
-			iter <- entireFile$begin()
-			end <- entireFile$end()
-			while ( iter$'__ne__'(end) ) {
-				names_out <- c(names_out, iter$FullName())
-				iter$incr()
-			}
+            iter <- entireFile$begin()
+            end <- entireFile$end()
+            while ( iter$'__ne__'(end) ) {
+                names_out <- c(names_out, iter$FullName())
+                iter$incr()
+            }
 			return(names_out)
 		},
 		error = function(e) {
@@ -86,7 +89,7 @@ generatedNames <- function(generatedFn) {
 	return(result)
 }
 
-test_case("EndToEnd_Placeholder", {
+test_case("EndToEnd_CopyFileAndReadBack", {
 	
 	inputFn     <- paste(test_data_path, "ex2.bam", sep="/")
 	generatedFn <- paste(test_data_path, "generated.bam", sep="/")
@@ -94,9 +97,9 @@ test_case("EndToEnd_Placeholder", {
 	# loop over original file, store names, write to generated file
 	names_in  <- originalNames(inputFn, generatedFn)
 	
-	# read names from new file
-	names_out <- generatedNames(generatedFn)
-	
-	# ensure equal
-	assertEqual(names_in, names_out)
+    # read names from new file
+    names_out <- generatedNames(generatedFn)
+
+    # ensure equal
+    assertEqual(names_in, names_out)
 })
diff --git a/tests/src/R/tests/test_Intervals.R b/tests/src/R/tests/test_Intervals.R
index 5160449..0071750 100644
--- a/tests/src/R/tests/test_Intervals.R
+++ b/tests/src/R/tests/test_Intervals.R
@@ -216,20 +216,20 @@ test_case("Intervals_Length",{
 test_case("GenomicIntervals_Ctors", { 
 	
     empty  <- GenomicInterval()
-    normal <- GenomicInterval(0, 100, 200)
+    normal <- GenomicInterval("seq1", 100, 200)
     
-    assertEqual(-1L, empty$Id())
+    assertEqual("",  empty$Name())
     assertEqual(0L,  empty$Start())
     assertEqual(0L,  empty$Stop())
     
-    assertEqual(0L,   normal$Id())
-    assertEqual(100L, normal$Start())
-    assertEqual(200L, normal$Stop())
+    assertEqual("seq1", normal$Name())
+    assertEqual(100L,   normal$Start())
+    assertEqual(200L,   normal$Stop())
 })
 
 test_case("GenomicIntervals_Copy", { 
 	
-    a <- GenomicInterval(1, 10, 20)
+    a <- GenomicInterval("seq1", 10, 20)
     b <- GenomicInterval(a)
     c <- a
     
@@ -241,10 +241,10 @@ test_case("GenomicIntervals_Copy", {
 
 test_case("GenomicIntervals_Modifiers", { 
 	
-    a <- GenomicInterval(1, 10, 20)
+    a <- GenomicInterval("seq1", 10, 20)
     
     b <- GenomicInterval(a)
-    b$Id(5)
+    b$Name("seq5")
 	b$Start(2)
 	b$Stop(10)
     
@@ -253,11 +253,11 @@ test_case("GenomicIntervals_Modifiers", {
     
     assertNotEqual(a, b)
 	
-    assertEqual(5L,  b$Id())
+    assertEqual("seq5",  b$Name())
     assertEqual(2L,  b$Start())
     assertEqual(10L, b$Stop())        
 	
-    assertEqual(a$Id(), c$Id())
+    assertEqual(a$Name(), c$Name())
 	
 	# TODO: fix this to work with == or *anything* cleaner
 	assertTrue(b$Interval()$'__eq__'(c$Interval()))
@@ -265,12 +265,12 @@ test_case("GenomicIntervals_Modifiers", {
 
 test_case("GenomicIntervals_Cover", { 
 	
-    a <- GenomicInterval(0,2,4)
-    b <- GenomicInterval(0,3,5)
-    c <- GenomicInterval(0,6,8)
-    d <- GenomicInterval(0,1,7)
-    e <- GenomicInterval(0,5,8)
-    f <- GenomicInterval(1,3,5)  # same as b, different ref
+    a <- GenomicInterval("seq1",2,4)
+    b <- GenomicInterval("seq1",3,5)
+    c <- GenomicInterval("seq1",6,8)
+    d <- GenomicInterval("seq1",1,7)
+    e <- GenomicInterval("seq1",5,8)
+    f <- GenomicInterval("seq2",3,5)  # same as b, different ref
     
     #   0123456789  
     # a   --
@@ -315,16 +315,16 @@ test_case("GenomicIntervals_Cover", {
 test_case("GenomicIntervals_Validity", { 
 	
     a <- GenomicInterval()       # default
-    b <- GenomicInterval(0,0,0)  # valid id, start == stop (zero)
-    c <- GenomicInterval(0,4,4)  # valid id, start == stop (non-zero)
-    d <- GenomicInterval(0,0,1)  # valid id, start <  stop (start == zero)     OK
-    e <- GenomicInterval(0,4,5)  # valid id, start <  stop (start >  zero)     OK
-    f <- GenomicInterval(0,5,4)  # valid id, start >  stop 
-    g <- GenomicInterval(-1,0,0) # invalid id, start == stop (zero)
-    h <- GenomicInterval(-1,4,4) # invalid id, start == stop (non-zero)
-    i <- GenomicInterval(-1,0,1) # invalid id, start <  stop (start == zero)
-    j <- GenomicInterval(-1,4,5) # invalid id, start <  stop (start >  zero)
-    k <- GenomicInterval(-1,5,4) # invalid id, start >  stop 
+    b <- GenomicInterval("seq1",0,0)  # valid id, start == stop (zero)
+    c <- GenomicInterval("seq1",4,4)  # valid id, start == stop (non-zero)
+    d <- GenomicInterval("seq",0,1)  # valid id, start <  stop (start == zero)     OK
+    e <- GenomicInterval("seq1",4,5)  # valid id, start <  stop (start >  zero)     OK
+    f <- GenomicInterval("seq1",5,4)  # valid id, start >  stop 
+    g <- GenomicInterval("",0,0) # invalid id, start == stop (zero)
+    h <- GenomicInterval("",4,4) # invalid id, start == stop (non-zero)
+    i <- GenomicInterval("",0,1) # invalid id, start <  stop (start == zero)
+    j <- GenomicInterval("",4,5) # invalid id, start <  stop (start >  zero)
+    k <- GenomicInterval("",5,4) # invalid id, start >  stop 
          
     assertTrue(d$IsValid())
     assertTrue(e$IsValid())
diff --git a/tests/src/R/tests/test_PolymeraseStitching.R b/tests/src/R/tests/test_PolymeraseStitching.R
new file mode 100644
index 0000000..3e2a943
--- /dev/null
+++ b/tests/src/R/tests/test_PolymeraseStitching.R
@@ -0,0 +1,427 @@
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above
+#    copyright notice, this list of conditions and the following
+#    disclaimer in the documentation and/or other materials provided
+#    with the distribution.
+#
+#  * Neither the name of Pacific Biosciences nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF
+# USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+compareContainers <- function(c1, c2) {
+    
+	assertEqual(length(c1), length(c2))
+    
+	numElements <- length(c1)
+	for (i in 1:numElements)
+		assertEqual(c1[i], c2[i])
+}
+
+compareFrames <- function(f1, f2) {
+    
+    d1 <- f1$Data()
+    d2 <- f2$Data()
+    compareContainers(d1, d2)
+}
+
+compareRecords <- function(b1, b2) {
+
+    assertTrue(b1$HasDeletionQV())
+    assertTrue(b1$HasDeletionTag())
+    assertTrue(b1$HasInsertionQV())
+    assertTrue(b1$HasMergeQV())
+    assertTrue(b1$HasSubstitutionQV())
+    assertTrue(b1$HasSubstitutionTag())
+    assertTrue(b1$HasLabelQV())
+    assertTrue(b1$HasAltLabelQV())
+    assertTrue(b1$HasAltLabelTag())
+    assertTrue(b1$HasPkmean())
+    assertTrue(b1$HasPkmid())
+    assertTrue(b1$HasPulseCall())
+    assertTrue(b1$HasIPD())
+    assertTrue(b1$HasPulseWidth())
+    assertTrue(b1$HasPrePulseFrames())
+    assertTrue(b1$HasPulseCallWidth())
+    assertTrue(b1$HasPulseMergeQV())
+    
+    assertTrue(b2$HasDeletionQV())
+    assertTrue(b2$HasDeletionTag())
+    assertTrue(b2$HasInsertionQV())
+    assertTrue(b2$HasMergeQV())
+    assertTrue(b2$HasSubstitutionQV())
+    assertTrue(b2$HasSubstitutionTag())
+    assertTrue(b2$HasLabelQV())
+    assertTrue(b2$HasAltLabelQV())
+    assertTrue(b2$HasAltLabelTag())
+    assertTrue(b2$HasPkmean())
+    assertTrue(b2$HasPkmid())
+    assertTrue(b2$HasPulseCall())
+    assertTrue(b2$HasIPD())
+    assertTrue(b2$HasPulseWidth())
+    assertTrue(b2$HasPrePulseFrames())
+    assertTrue(b2$HasPulseCallWidth())
+    assertTrue(b2$HasPulseMergeQV())
+ 
+    assertEqual(b1$FullName(),        b2$FullName())
+    assertEqual(b1$HoleNumber(),      b2$HoleNumber())
+    assertEqual(b1$NumPasses(),       b2$NumPasses())
+    assertEqual(b1$Sequence(),        b2$Sequence())
+    assertEqual(b1$DeletionTag(),     b2$DeletionTag())
+    assertEqual(b1$SubstitutionTag(), b2$SubstitutionTag())
+    assertEqual(b1$AltLabelTag(),     b2$AltLabelTag())
+    assertEqual(b1$PulseCall(),       b2$PulseCall())
+    
+    # compareContainers(b1$Pkmean(), b2$Pkmean())
+    # compareContainers(b1$Pkmid(), b2$Pkmid())
+    #
+    # compareFrames(b1$IPD(),             b2$IPD())
+    # compareFrames(b1$PulseWidth(),      b2$PulseWidth())
+    # compareFrames(b1$PrePulseFrames(),  b2$PrePulseFrames())
+    # compareFrames(b1$PulseCallWidth(),  b2$PulseCallWidth())
+
+    assertEqual(b1$ReadGroup()$Id(), b2$ReadGroup()$Id())
+    
+    assertEqual(b1$Qualities()$Fastq(),       b2$Qualities()$Fastq())
+    assertEqual(b1$DeletionQV()$Fastq(),      b2$DeletionQV()$Fastq())
+    assertEqual(b1$InsertionQV()$Fastq(),     b2$InsertionQV()$Fastq())
+    assertEqual(b1$MergeQV()$Fastq(),         b2$MergeQV()$Fastq())
+    assertEqual(b1$SubstitutionQV()$Fastq(),  b2$SubstitutionQV()$Fastq())
+    assertEqual(b1$LabelQV()$Fastq(),         b2$LabelQV()$Fastq())
+    assertEqual(b1$AltLabelQV()$Fastq(),      b2$AltLabelQV()$Fastq())
+    assertEqual(b1$PulseMergeQV()$Fastq(),    b2$PulseMergeQV()$Fastq())
+    
+    return
+}
+
+getVirtualRecord <- function(fn1, fn2) {
+    
+    result <- tryCatch(
+        {
+            vpr <- VirtualPolymeraseReader(fn1, fn2)
+            
+            assertTrue(vpr$HasNext())
+            
+            virtualRecord <- vpr$Next()
+            
+            assertFalse(vpr$HasNext())
+            
+            return(virtualRecord)
+        },
+        error = function(e) {
+            print(paste('e:',e))
+            assertTrue(FALSE) # should not throw
+            return
+        }    
+    )
+    return(result)
+}
+
+getPolymeraseRecord <- function(fn) {
+    
+    result <- tryCatch(
+        {
+            ds <- DataSet(fn)
+            entireFile <- EntireFileQuery(ds)
+            
+            polyIter <- entireFile$begin()
+            polyEnd <- entireFile$end()
+            
+            assertTrue(polyIter$'__ne__'(polyEnd))
+            
+            polyRecord <- polyIter$value()
+            polyIter$incr()
+            
+            assertTrue(polyIter$'__eq__'(polyEnd))
+            
+            return(polyRecord)
+        },
+        error = function(e) {
+            print(paste('e:',e))
+            assertTrue(FALSE) # should not throw
+            return
+        }    
+    )
+    return(result)
+}
+
+test_case("PolymeraseStitching_VirtualRegions", {
+	
+	subreadsFn <- paste(test_data_path, "polymerase/internal.subreads.bam", sep="/")
+	scrapsFn   <- paste(test_data_path, "polymerase/internal.scraps.bam", sep="/")
+    virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+    
+    # -- ADAPTER -- #
+    
+    adapter <- virtualRecord$VirtualRegionsTable('ADAPTER')
+    assertEqual(7L, adapter$size())
+    
+    region <- adapter$'__getitem__'(0)
+    assertEqual(3047L, region$beginPos)
+    assertEqual(3095L, region$endPos)
+    
+    region <- adapter$'__getitem__'(1)
+    assertEqual(3650L, region$beginPos)
+    assertEqual(3700L, region$endPos)
+    
+    region <- adapter$'__getitem__'(2)
+    assertEqual(4289L, region$beginPos)
+    assertEqual(4335L, region$endPos)
+    
+    region <- adapter$'__getitem__'(3)
+    assertEqual(4888L, region$beginPos)
+    assertEqual(4939L, region$endPos)
+    
+    region <- adapter$'__getitem__'(4)
+    assertEqual(5498L, region$beginPos)
+    assertEqual(5546L, region$endPos)
+    
+    region <- adapter$'__getitem__'(5)
+    assertEqual(6116L, region$beginPos)
+    assertEqual(6173L, region$endPos)
+    
+    region <- adapter$'__getitem__'(6)
+    assertEqual(6740L, region$beginPos)
+    assertEqual(6790L, region$endPos)
+
+    # -- BARCODE -- #
+
+    barcode = virtualRecord$VirtualRegionsTable('BARCODE')
+    assertEqual(14L, barcode$size())
+
+    region <- barcode$'__getitem__'(0)
+    assertEqual(3025L, region$beginPos)
+    assertEqual(3047L, region$endPos)
+    
+    region <- barcode$'__getitem__'(1)
+    assertEqual(3095L, region$beginPos)
+    assertEqual(3116L, region$endPos)
+    
+    region <- barcode$'__getitem__'(2)
+    assertEqual(3628L, region$beginPos)
+    assertEqual(3650L, region$endPos)
+    
+    region <- barcode$'__getitem__'(3)
+    assertEqual(3700L, region$beginPos)
+    assertEqual(3722L, region$endPos)
+    
+    region <- barcode$'__getitem__'(4)
+    assertEqual(4267L, region$beginPos)
+    assertEqual(4289L, region$endPos)
+    
+    region <- barcode$'__getitem__'(5)
+    assertEqual(4335L, region$beginPos)
+    assertEqual(4356L, region$endPos)
+    
+    region <- barcode$'__getitem__'(6)
+    assertEqual(4864L, region$beginPos)
+    assertEqual(4888L, region$endPos)
+
+    region <- barcode$'__getitem__'(7)
+    assertEqual(4939L, region$beginPos)
+    assertEqual(4960L, region$endPos)
+    
+    region <- barcode$'__getitem__'(8)
+    assertEqual(5477L, region$beginPos)
+    assertEqual(5498L, region$endPos)
+    
+    region <- barcode$'__getitem__'(9)
+    assertEqual(5546L, region$beginPos)
+    assertEqual(5571L, region$endPos)
+    
+    region <- barcode$'__getitem__'(10)
+    assertEqual(6087L, region$beginPos)
+    assertEqual(6116L, region$endPos)
+    
+    region <- barcode$'__getitem__'(11)
+    assertEqual(6173L, region$beginPos)
+    assertEqual(6199L, region$endPos)
+    
+    region <- barcode$'__getitem__'(12)
+    assertEqual(6719L, region$beginPos)
+    assertEqual(6740L, region$endPos)
+    
+    region <- barcode$'__getitem__'(13)
+    assertEqual(6790L, region$beginPos)
+    assertEqual(6812L, region$endPos)
+
+    # -- LQREGION -- #
+
+    lqregion = virtualRecord$VirtualRegionsTable('LQREGION')
+    assertEqual(2L, lqregion$size())
+    
+    region <- lqregion$'__getitem__'(0)
+    assertEqual(0L, region$beginPos)
+    assertEqual(2659L, region$endPos)
+    
+    region <- lqregion$'__getitem__'(1)
+    assertEqual(7034L, region$beginPos)
+    assertEqual(7035L, region$endPos)
+    
+    # -- HQREGION -- #
+
+    hqregion = virtualRecord$VirtualRegionsTable('HQREGION')
+    assertEqual(1L, hqregion$size())
+    
+    region <- hqregion$'__getitem__'(0)
+    assertEqual(2659L, region$beginPos)
+    assertEqual(7034L, region$endPos)
+})
+
+test_case("PolymeraseStitching_InternalSubreadsToOriginal", {
+  
+    # stitch virtual polymerase record
+    subreadsFn <- paste(test_data_path, "polymerase/internal.subreads.bam", sep="/")
+    scrapsFn   <- paste(test_data_path, "polymerase/internal.scraps.bam", sep="/")
+    virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+    # fetch original polymerase record
+    polyFn <- paste(test_data_path, "polymerase/internal.polymerase.bam", sep="/")
+    polyRecord <- getPolymeraseRecord(polyFn)      
+
+    # check
+    compareRecords(polyRecord, virtualRecord)
+})
+
+test_case("PolymeraseStitching_InternalHQToOriginal", {
+  
+    # stitch virtual polymerase record
+    hqRegionFn <- paste(test_data_path, "polymerase/internal.hqregions.bam", sep="/")
+    lqRegionFn <- paste(test_data_path, "polymerase/internal.lqregions.bam", sep="/")
+    virtualRecord <- getVirtualRecord(hqRegionFn, lqRegionFn)
+    
+    # fetch original polymerase record
+    polyFn <- paste(test_data_path, "polymerase/internal.polymerase.bam", sep="/")
+    polyRecord <- getPolymeraseRecord(polyFn)      
+
+    # check
+    compareRecords(polyRecord, virtualRecord)
+})
+
+test_case("PolymeraseStitching_ProductionSubreadsToOriginal", {
+  
+    # stitch virtual polymerase record
+    subreadsFn <- paste(test_data_path, "polymerase/production.subreads.bam", sep="/")
+    scrapsFn   <- paste(test_data_path, "polymerase/production.scraps.bam", sep="/")
+    virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+    
+    # fetch original polymerase record
+    polyFn <- paste(test_data_path, "polymerase/production.polymerase.bam", sep="/")
+    polyRecord <- getPolymeraseRecord(polyFn)  
+    
+    # compare
+    assertEqual(polyRecord$FullName(),        virtualRecord$FullName())
+    assertEqual(polyRecord$HoleNumber(),      virtualRecord$HoleNumber())
+    assertEqual(polyRecord$NumPasses(),       virtualRecord$NumPasses())
+    assertEqual(polyRecord$Sequence(),        virtualRecord$Sequence())
+    assertEqual(polyRecord$DeletionTag(),     virtualRecord$DeletionTag())
+    assertEqual(polyRecord$SubstitutionTag(), virtualRecord$SubstitutionTag())
+    
+    compareFrames(polyRecord$IPD(),                virtualRecord$IPDV1Frames())
+    assertEqual(polyRecord$ReadGroup()$Id(),       virtualRecord$ReadGroup()$Id())
+    
+    tolerance = 1e-5
+    assertTrue( abs(polyRecord$ReadAccuracy()$ToFloat() - virtualRecord$ReadAccuracy()$ToFloat()) <= tolerance )
+    # assertEqual(polyRecord$ReadAccuracy()$ToFloat(), virtualRecord$ReadAccuracy()$ToFloat())
+
+    assertEqual(polyRecord$Qualities()$Fastq(),       virtualRecord$Qualities()$Fastq())
+    assertEqual(polyRecord$DeletionQV()$Fastq(),      virtualRecord$DeletionQV()$Fastq())
+    assertEqual(polyRecord$InsertionQV()$Fastq(),     virtualRecord$InsertionQV()$Fastq())
+    assertEqual(polyRecord$MergeQV()$Fastq(),         virtualRecord$MergeQV()$Fastq())
+    assertEqual(polyRecord$SubstitutionQV()$Fastq(),  virtualRecord$SubstitutionQV()$Fastq())
+})
+
+test_case("PolymeraseStitching_ProductionHQToOriginal", {
+  
+    # stitch virtual polymerase record
+    hqRegionFn <- paste(test_data_path, "polymerase/production_hq.hqregion.bam", sep="/")
+    lqRegionFn <- paste(test_data_path, "polymerase/production_hq.scraps.bam", sep="/")
+    virtualRecord <- getVirtualRecord(hqRegionFn, lqRegionFn)
+  
+    # fetch original polymerase record
+    polyFn <- paste(test_data_path, "polymerase/production.polymerase.bam", sep="/")
+    polyRecord <- getPolymeraseRecord(polyFn)
+  
+    # compare
+    assertEqual(polyRecord$FullName(),        virtualRecord$FullName())
+    assertEqual(polyRecord$HoleNumber(),      virtualRecord$HoleNumber())
+    assertEqual(polyRecord$NumPasses(),       virtualRecord$NumPasses())
+    assertEqual(polyRecord$Sequence(),        virtualRecord$Sequence())
+    assertEqual(polyRecord$DeletionTag(),     virtualRecord$DeletionTag())
+    assertEqual(polyRecord$SubstitutionTag(), virtualRecord$SubstitutionTag())
+
+    compareFrames(polyRecord$IPD(),                virtualRecord$IPDV1Frames())
+    assertEqual(polyRecord$ReadGroup()$Id(),       virtualRecord$ReadGroup()$Id())
+    
+    tolerance = 1e-5
+    assertTrue( abs(polyRecord$ReadAccuracy()$ToFloat() - virtualRecord$ReadAccuracy()$ToFloat()) <= tolerance )
+    # assertEqual(polyRecord$ReadAccuracy()$ToInt(), virtualRecord$ReadAccuracy()$ToInt())
+    
+    assertEqual(polyRecord$Qualities()$Fastq(),       virtualRecord$Qualities()$Fastq())
+    assertEqual(polyRecord$DeletionQV()$Fastq(),      virtualRecord$DeletionQV()$Fastq())
+    assertEqual(polyRecord$InsertionQV()$Fastq(),     virtualRecord$InsertionQV()$Fastq())
+    assertEqual(polyRecord$MergeQV()$Fastq(),         virtualRecord$MergeQV()$Fastq())
+    assertEqual(polyRecord$SubstitutionQV()$Fastq(),  virtualRecord$SubstitutionQV()$Fastq())
+
+    assertTrue(polyRecord$HasDeletionQV())
+    assertTrue(polyRecord$HasDeletionTag())
+    assertTrue(polyRecord$HasInsertionQV())
+    assertTrue(polyRecord$HasMergeQV())
+    assertTrue(polyRecord$HasSubstitutionQV())
+    assertTrue(polyRecord$HasSubstitutionTag())
+    assertTrue(polyRecord$HasIPD())
+    assertFalse(polyRecord$HasLabelQV())
+    assertFalse(polyRecord$HasAltLabelQV())
+    assertFalse(polyRecord$HasAltLabelTag())
+    assertFalse(polyRecord$HasPkmean())
+    assertFalse(polyRecord$HasPkmid())
+    assertFalse(polyRecord$HasPulseCall())
+    assertFalse(polyRecord$HasPulseWidth())
+    assertFalse(polyRecord$HasPrePulseFrames())
+    assertFalse(polyRecord$HasPulseCallWidth())
+    assertFalse(polyRecord$HasPulseCall())
+
+    assertTrue(virtualRecord$HasDeletionQV())
+    assertTrue(virtualRecord$HasDeletionTag())
+    assertTrue(virtualRecord$HasInsertionQV())
+    assertTrue(virtualRecord$HasMergeQV())
+    assertTrue(virtualRecord$HasSubstitutionQV())
+    assertTrue(virtualRecord$HasSubstitutionTag())
+    assertTrue(virtualRecord$HasIPD())
+    assertFalse(virtualRecord$HasLabelQV())
+    assertFalse(virtualRecord$HasAltLabelQV())
+    assertFalse(virtualRecord$HasAltLabelTag())
+    assertFalse(virtualRecord$HasPkmean())
+    assertFalse(virtualRecord$HasPkmid())
+    assertFalse(virtualRecord$HasPulseCall())
+    assertFalse(virtualRecord$HasPulseWidth())
+    assertFalse(virtualRecord$HasPrePulseFrames())
+    assertFalse(virtualRecord$HasPulseCallWidth()) 
+    assertFalse(virtualRecord$HasPulseCall())
+})
\ No newline at end of file
diff --git a/tests/src/TestData.h.in b/tests/src/TestData.h.in
index e4d786c..1e1d9ca 100644
--- a/tests/src/TestData.h.in
+++ b/tests/src/TestData.h.in
@@ -44,10 +44,10 @@ namespace PacBio {
 namespace BAM {
 namespace tests {
 
-const std::string Source_Dir   = std::string("@PacBioBAM_TestsDir@");
-const std::string Bin_Dir      = std::string("@CMAKE_CURRENT_BINARY_DIR@");
-const std::string Data_Dir     = std::string("@PacBioBAM_TestsDir@/data");
-const std::string Samtools_Bin = std::string("@Samtools_Bin@");
+const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
+const std::string Bin_Dir    = std::string("@CMAKE_CURRENT_BINARY_DIR@");
+const std::string Data_Dir   = std::string("@PacBioBAM_TestsDir@/data");
+const std::string Bam2Sam    = std::string("@PacBioBAM_BinDir@/bam2sam");
 
 } // namespace tests
 } // namespace BAM
diff --git a/tests/src/cram/bam2sam.t b/tests/src/cram/bam2sam.t
new file mode 100644
index 0000000..d306f23
--- /dev/null
+++ b/tests/src/cram/bam2sam.t
@@ -0,0 +1,63 @@
+Setup:
+
+  $ BAM2SAM="$TESTDIR/../../../bin/bam2sam" && export BAM2SAM
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+
+Normal:
+
+  $ $BAM2SAM < $DATADIR/phi29.bam | head -n 5
+  @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+  @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+  @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAG [...]
+
+Explicit Filename (not stdin):
+
+  $ $BAM2SAM $DATADIR/phi29.bam | head -n 5
+  @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+  @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+  @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAG [...]
+
+Header-Only:
+
+  $ $BAM2SAM --header-only < $DATADIR/phi29.bam | head -n 5
+  @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+  @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+  @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+
+No-Header:
+
+  $ $BAM2SAM --no-header < $DATADIR/phi29.bam | head -n 5
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAG [...]
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/4151_6183\t4\t*\t0\t255\t*\t*\t0\t0\tGATCCCGCGAATTAATTACGACTCACTATAGGGGAATTGTGAGCGGATAACAATTCCCGCCTCTAGAAATAATTTTGTTTAAACTTTTAAGAAAGGAGATATTACATATGAAACACAGCCACGTAAAATGTATTCCTGCGACTTGGAGACTACCACCAAGGTGAAGATTTGCCGCGTAATGGGCATACGGTTTACATGAAACATCGAAGAACAAACTCGAGTATAAGATTGGTAACTCCCCTGGATGAATTATGGCTTGGGTTACTGAAAGTTCGAGGTCTGACCTGTACTTCGCACAAATCTGAAAATTTGATGGCCGCAAATTTCAATTCATCACTGGCTGGAACGTAAACGGTTTTAAATGGTCCGCAGATCGGTCTGTGCC [...]
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/6234_8214\t4\t*\t0\t255\t*\t*\t0\t0\tAGAGTCATGTATAAGAGTTATTGCTCAGCGGTGGCAGCAGACAACTCAGCTTCCTTTCGGGCCTTTGTTAGCAGCCGGATCCAAGCTTGAATTCCTGCAAGCTCGAGTTATTTGATAGTAAAAGTGTCATCAAACCAGCACTACGGCCGAACCCGGTACCTGAACAGATTCGTTTCATTTTACGAGAAAAACCCACTTTGAAGTTTTGCCGAAAGTCACTTCTTTTTGATTTGTCCGTCATGCTGCGCATTTCACAGAGACTTGAATGTCAGTGTAGTCGTCATCGGGGGGGGGAAGAGCCCTCTACCAGTTTTGCCGTCTACTTCTTTCATGTAAATATCTGGATGTAGGTTTTCTGAACGCAGATATTTGCAGCTTAAAAGTG [...]
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/8294_10277\t4\t*\t0\t255\t*\t*\t0\t0\tGATTCCCGCGAAATTAATACGAATCACTATAAGGGGAATTGTGAGCGGATAACAATTCCCCTCTAGAAATAATTTTGTTTAACTTTAAGAGGGACGATATACATATGAACACATGCCTACGTAAAATGTATTCCTGCGAACTGTTGAGACTACCACCAAGGTTGAAGATTTGCCGCGTAATGGGCATACGGTTACATGAACATCGAAGACCACTCCGATATGAAGATTGGTTAACCCCTGGATGAATTTATGGCTTGGGTTCTGAAAGTTCAGGCTGACCTGTACTTCACAATCTGAAATTTGATGGCCGCATTCATCAATCACTGGCTGGAACGTAAAACGGTTTAAAAATGGTCCCGCAGATGGTCTGACAAATTAACTACA [...]
+  m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/10327_12283\t4\t*\t0\t255\t*\t*\t0\t0\tAGAGTCATGTATAGAGTTATTGCTCAGCGGTGGCAGCACCAACTCAGCTTCCTTTCGGCTTTGTTAGCAGCCGATCCAAGCTTGAATTCCTGCAGCTCGGAGTTATTTGATAGTAAAAGTTGTCATCCAAACGCAGCACTACGCCCACCCGTACCTGAACAGGCTTTCGGTTTCATTTTACGAGAAAAACACTTTTGAAAGTTTTCGAAAGTCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAGAGAACTTGATGTCAGTGTAGTCGTCAGGAGAGCCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGAATGTAGGTTTTTCTGACGCAGATTATTTTGCACGCTTAAAAGTGGATT [...]
+
+Invalid-Args:
+
+  $ $BAM2SAM --header-only --no-header < $DATADIR/phi29.bam 
+  
+  ERROR: conflicting arguments requested: --no-header and --header-only
+  
+  Usage: bam2sam [options] [input]
+  
+  bam2sam converts a BAM file to SAM. It is essentially a stripped-down 'samtools
+  view', mostly useful for testing/debugging without requiring samtools. Input BAM
+  file is read from a file or stdin, and SAM output is written to stdout.
+  
+  Options:
+    -h, --help            show this help message and exit
+    --version             show program's version number and exit
+  
+    Options:
+      input               Input BAM file. If not provided, stdin will be used as input.
+      --no-header         Omit header from output.
+      --header-only       Print only the header (no records).
+  [1]
+
diff --git a/tests/src/cram/pbindexdump_cpp.t b/tests/src/cram/pbindexdump_cpp.t
new file mode 100644
index 0000000..cf318ee
--- /dev/null
+++ b/tests/src/cram/pbindexdump_cpp.t
@@ -0,0 +1,39 @@
+Setup:
+
+  $ PBINDEXDUMP="$TESTDIR/../../../bin/pbindexdump" && export PBINDEXDUMP
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+
+Normal C++:
+
+  $ $PBINDEXDUMP --format=cpp $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+  PbiRawData rawData;
+  rawData.Version(PbiFile::Version_3_0_1);
+  rawData.FileSections(PbiFile::BASIC);
+  rawData.NumReads(1);
+  
+  PbiRawBasicData& basicData = rawData.BasicData();
+  basicData.rgId_       = {-898246524};
+  basicData.qStart_     = {2659};
+  basicData.qEnd_       = {7034};
+  basicData.holeNumber_ = {0};
+  basicData.readQual_   = {0.01};
+  basicData.ctxtFlag_   = {0};
+  basicData.fileOffset_ = {20054016};
+  
+  
+--(leave the blank lines above this)--
+
+Request C++, with JSON options (stdout includes usage/help, so we just want to check stderr):
+
+  $ $PBINDEXDUMP --format=cpp --json-indent-level=2 $DATADIR/polymerase/production_hq.hqregion.bam.pbi > /dev/null
+  
+  ERROR: JSON formatting options not valid on non-JSON output
+  
+  [1]
+
+  $ $PBINDEXDUMP --format=cpp --json-raw $DATADIR/polymerase/production_hq.hqregion.bam.pbi > /dev/null
+  
+  ERROR: JSON formatting options not valid on non-JSON output
+  
+  [1]
diff --git a/tests/src/cram/pbindexdump_json.t b/tests/src/cram/pbindexdump_json.t
new file mode 100644
index 0000000..676e21a
--- /dev/null
+++ b/tests/src/cram/pbindexdump_json.t
@@ -0,0 +1,83 @@
+Setup:
+
+  $ PBINDEXDUMP="$TESTDIR/../../../bin/pbindexdump" && export PBINDEXDUMP
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+
+Default settings (JSON):
+
+  $ $PBINDEXDUMP $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+  {
+      "fileSections": [
+          "BasicData"
+      ],
+      "numReads": 1,
+      "reads": [
+          {
+              "contextFlag": 0,
+              "fileOffset": 20054016,
+              "holeNumber": 0,
+              "qEnd": 7034,
+              "qStart": 2659,
+              "readQuality": 0.00999999977648258,
+              "rgId": -898246524
+          }
+      ],
+      "version": "3.0.1"
+  }
+
+JSON indent level(2):
+
+  $ $PBINDEXDUMP --json-indent-level=2 $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+  {
+    "fileSections": [
+      "BasicData"
+    ],
+    "numReads": 1,
+    "reads": [
+      {
+        "contextFlag": 0,
+        "fileOffset": 20054016,
+        "holeNumber": 0,
+        "qEnd": 7034,
+        "qStart": 2659,
+        "readQuality": 0.00999999977648258,
+        "rgId": -898246524
+      }
+    ],
+    "version": "3.0.1"
+  }
+
+JSON raw:
+
+  $ $PBINDEXDUMP --json-raw $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+  {
+      "basicData": {
+          "ctxtFlag": [
+              0
+          ],
+          "fileOffset": [
+              20054016
+          ],
+          "holeNumber": [
+              0
+          ],
+          "qEnd": [
+              7034
+          ],
+          "qStart": [
+              2659
+          ],
+          "readQual": [
+              0.00999999977648258
+          ],
+          "rgId": [
+              -898246524
+          ]
+      },
+      "fileSections": [
+          "BasicData"
+      ],
+      "numReads": 1,
+      "version": "3.0.1"
+  }
diff --git a/tests/src/cram/pbmerge_aligned_ordering.t b/tests/src/cram/pbmerge_aligned_ordering.t
new file mode 100644
index 0000000..48a8553
--- /dev/null
+++ b/tests/src/cram/pbmerge_aligned_ordering.t
@@ -0,0 +1,197 @@
+Setup:
+
+  $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+  $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+  $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+  $ INPUT_1="$DATADIR/dataset/bam_mapping_1.bam" && export INPUT_1
+  $ INPUT_2="$DATADIR/dataset/bam_mapping_2.bam" && export INPUT_2
+
+  $ MERGED_BAM="/tmp/aligned_ordering_merged.bam" && export MERGED_BAM
+  $ MERGED_BAM_PBI="/tmp/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+  $ $BAM2SAM --header-only $INPUT_1
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+
+  $ $BAM2SAM --no-header $INPUT_1 | cut -f 1,3,4 | head -n 10
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+  $ $BAM2SAM --header-only $INPUT_2
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+
+  $ $BAM2SAM --no-header $INPUT_2 | cut -f 1,3,4 | head -n 10
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+Normal Merge:
+
+  $ $PBMERGE $INPUT_1 $INPUT_2 > $MERGED_BAM
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+  $ rm $MERGED_BAM
+
+Shuffle Input:
+
+  $ $PBMERGE $INPUT_2 $INPUT_2 > $MERGED_BAM
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7046_7293\tlambda_NEB3011\t5136 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/38025/6255_7894\tlambda_NEB3011\t5427 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5311_5508\tlambda_NEB3011\t5943 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/36363/899_1197\tlambda_NEB3011\t6258 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/36363/605_853\tlambda_NEB3011\t6312 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/31174/0_1029\tlambda_NEB3011\t6487 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/31174/1075_1271\tlambda_NEB3011\t6499 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/38025/5743_6211\tlambda_NEB3011\t6606 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/50257/6944_7361\tlambda_NEB3011\t6942 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/50257/6546_6903\tlambda_NEB3011\t7010 (esc)
+
+  $ rm $MERGED_BAM
+
+Explicit Output Filename (also enables PBI):
+
+  $ $PBMERGE -o $MERGED_BAM $INPUT_1 $INPUT_2
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Found
+
+  $ rm $MERGED_BAM
+  $ rm $MERGED_BAM_PBI
+
+Explicit Output Filename (with disabled PBI):
+
+  $ $PBMERGE -o $MERGED_BAM --no-pbi $INPUT_1 $INPUT_2
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Not found
+
+  $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_dataset.t b/tests/src/cram/pbmerge_dataset.t
new file mode 100644
index 0000000..076bcc0
--- /dev/null
+++ b/tests/src/cram/pbmerge_dataset.t
@@ -0,0 +1,144 @@
+Setup:
+
+  $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+  $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+  $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+  $ INPUT_XML="$DATADIR/polymerase/consolidate.subread.dataset.xml" && export INPUT_XML
+  $ BAM_1="$DATADIR/polymerase/production.subreads.bam" && export BAM_1
+  $ BAM_2="$DATADIR/polymerase/production.scraps.bam" && export BAM_2
+
+  $ MERGED_BAM="/tmp/merged.bam" && export MERGED_BAM
+  $ MERGED_BAM_PBI="/tmp/merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+  $ $BAM2SAM --no-header $BAM_1 | cut -f 1
+  ArminsFakeMovie/0/2659_3025
+  ArminsFakeMovie/0/3116_3628
+  ArminsFakeMovie/0/3722_4267
+  ArminsFakeMovie/0/4356_4864
+  ArminsFakeMovie/0/4960_5477
+  ArminsFakeMovie/0/5571_6087
+  ArminsFakeMovie/0/6199_6719
+  ArminsFakeMovie/0/6812_7034
+
+  $ $BAM2SAM --no-header $BAM_2  | cut -f 1
+  ArminsFakeMovie/0/0_2659
+  ArminsFakeMovie/0/3025_3047
+  ArminsFakeMovie/0/3047_3095
+  ArminsFakeMovie/0/3095_3116
+  ArminsFakeMovie/0/3628_3650
+  ArminsFakeMovie/0/3650_3700
+  ArminsFakeMovie/0/3700_3722
+  ArminsFakeMovie/0/4267_4289
+  ArminsFakeMovie/0/4289_4335
+  ArminsFakeMovie/0/4335_4356
+  ArminsFakeMovie/0/4864_4888
+  ArminsFakeMovie/0/4888_4939
+  ArminsFakeMovie/0/4939_4960
+  ArminsFakeMovie/0/5477_5498
+  ArminsFakeMovie/0/5498_5546
+  ArminsFakeMovie/0/5546_5571
+  ArminsFakeMovie/0/6087_6116
+  ArminsFakeMovie/0/6116_6173
+  ArminsFakeMovie/0/6173_6199
+  ArminsFakeMovie/0/6719_6740
+  ArminsFakeMovie/0/6740_6790
+  ArminsFakeMovie/0/6790_6812
+  ArminsFakeMovie/0/7034_7035
+
+Normal Merge from XML:
+
+  $ $PBMERGE -o $MERGED_BAM $INPUT_XML
+
+  $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+  Found
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Found
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+  @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+  @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/0/4267_4289
+  ArminsFakeMovie/0/4289_4335
+  ArminsFakeMovie/0/4335_4356
+  ArminsFakeMovie/0/4356_4864
+  ArminsFakeMovie/0/4864_4888
+  ArminsFakeMovie/0/4888_4939
+  ArminsFakeMovie/0/4939_4960
+  ArminsFakeMovie/0/4960_5477
+
+  $ rm $MERGED_BAM
+  $ rm $MERGED_BAM_PBI
+
+Normal Merge from XML (disabled PBI):
+
+  $ $PBMERGE --no-pbi -o $MERGED_BAM $INPUT_XML
+
+  $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+  Found
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Not found
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+  @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+  @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/0/4267_4289
+  ArminsFakeMovie/0/4289_4335
+  ArminsFakeMovie/0/4335_4356
+  ArminsFakeMovie/0/4356_4864
+  ArminsFakeMovie/0/4864_4888
+  ArminsFakeMovie/0/4888_4939
+  ArminsFakeMovie/0/4939_4960
+  ArminsFakeMovie/0/4960_5477
+
+  $ rm $MERGED_BAM
+
+Write to stdout:
+
+  $ $PBMERGE --no-pbi $INPUT_XML > $MERGED_BAM
+
+  $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+  Found
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Not found
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+  @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+  @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/0/4267_4289
+  ArminsFakeMovie/0/4289_4335
+  ArminsFakeMovie/0/4335_4356
+  ArminsFakeMovie/0/4356_4864
+  ArminsFakeMovie/0/4864_4888
+  ArminsFakeMovie/0/4888_4939
+  ArminsFakeMovie/0/4939_4960
+  ArminsFakeMovie/0/4960_5477
+
+  $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_fofn.t b/tests/src/cram/pbmerge_fofn.t
new file mode 100644
index 0000000..b88e08b
--- /dev/null
+++ b/tests/src/cram/pbmerge_fofn.t
@@ -0,0 +1,134 @@
+Setup:
+
+  $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+  $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+  $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+  $ INPUT_FOFN="$DATADIR/dataset/merge.fofn" && export INPUT_FOFN
+  $ INPUT_1="$DATADIR/dataset/bam_mapping_1.bam" && export INPUT_1
+  $ INPUT_2="$DATADIR/dataset/bam_mapping_2.bam" && export INPUT_2
+
+  $ MERGED_BAM="/tmp/aligned_ordering_merged.bam" && export MERGED_BAM
+  $ MERGED_BAM_PBI="/tmp/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+  $ $BAM2SAM --header-only $INPUT_1
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+
+  $ $BAM2SAM --no-header $INPUT_1 | cut -f 1,3,4 | head -n 10
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+  $ $BAM2SAM --header-only $INPUT_2
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+
+  $ $BAM2SAM --no-header $INPUT_2 | cut -f 1,3,4 | head -n 10
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+Normal Merge from FOFN:
+
+  $ $PBMERGE -o $MERGED_BAM $INPUT_FOFN
+
+  $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+  Found
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Found
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+  $ rm $MERGED_BAM
+  $ rm $MERGED_BAM_PBI
+
+Normal Merge from FOFN (disabled PBI):
+
+  $ $PBMERGE --no-pbi -o $MERGED_BAM $INPUT_FOFN
+
+  $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+  Found
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Not found
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+  $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_mixed_ordering.t b/tests/src/cram/pbmerge_mixed_ordering.t
new file mode 100644
index 0000000..70cbe74
--- /dev/null
+++ b/tests/src/cram/pbmerge_mixed_ordering.t
@@ -0,0 +1,57 @@
+Setup:
+
+  $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+  $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+  $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+  $ UNALIGNED_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export UNALIGNED_BAM
+  $ ALIGNED_BAM="$DATADIR/dataset/bam_mapping_1.bam" && export ALIGNED_BAM
+
+  $ MERGED_BAM="/tmp/mixed_ordering_merged.bam" && export MERGED_BAM
+
+Sanity Check:
+
+  $ $BAM2SAM --header-only $UNALIGNED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+  $ $BAM2SAM --no-header $UNALIGNED_BAM | cut -f 1
+  ArminsFakeMovie/100000/2659_7034
+
+  $ $BAM2SAM --header-only $ALIGNED_BAM
+  @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+  @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+  @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+  @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread  (esc)
+
+  $ $BAM2SAM --no-header $ALIGNED_BAM | cut -f 1,3,4 | head -n 10
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+  m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+Normal Merge - should fail:
+
+  $ $PBMERGE $UNALIGNED_BAM $ALIGNED_BAM > $MERGED_BAM
+  ERROR: BAM file sort orders do not match, aborting merge
+  [1]
+
+Shuffle Input - should fail:
+
+  $ $PBMERGE $ALIGNED_BAM $UNALIGNED_BAM > $MERGED_BAM
+  ERROR: BAM file sort orders do not match, aborting merge
+  [1]
+
+Cleanup:
+
+  $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_pacbio_ordering.t b/tests/src/cram/pbmerge_pacbio_ordering.t
new file mode 100644
index 0000000..729c627
--- /dev/null
+++ b/tests/src/cram/pbmerge_pacbio_ordering.t
@@ -0,0 +1,227 @@
+Setup:
+
+  $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+  $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+  $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+  $ DATADIR="$TESTDIR/../../data" && export DATADIR
+  $ HQREGION_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export HQREGION_BAM
+  $ SCRAPS_BAM="$DATADIR/polymerase/internal.scraps.bam" && export SCRAPS_BAM
+
+  $ MERGED_BAM="/tmp/pacbio_ordering_merged.bam" && export MERGED_BAM
+  $ MERGED_BAM_PBI="/tmp/pacbio_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+  $ $BAM2SAM --header-only $HQREGION_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+  $ $BAM2SAM --no-header $HQREGION_BAM | cut -f 1
+  ArminsFakeMovie/100000/2659_7034
+
+  $ $BAM2SAM --header-only $SCRAPS_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+  $ $BAM2SAM --no-header $SCRAPS_BAM | cut -f 1
+  ArminsFakeMovie/100000/0_2659
+  ArminsFakeMovie/100000/3025_3047
+  ArminsFakeMovie/100000/3047_3095
+  ArminsFakeMovie/100000/3095_3116
+  ArminsFakeMovie/100000/3628_3650
+  ArminsFakeMovie/100000/3650_3700
+  ArminsFakeMovie/100000/3700_3722
+  ArminsFakeMovie/100000/4267_4289
+  ArminsFakeMovie/100000/4289_4335
+  ArminsFakeMovie/100000/4335_4356
+  ArminsFakeMovie/100000/4864_4888
+  ArminsFakeMovie/100000/4888_4939
+  ArminsFakeMovie/100000/4939_4960
+  ArminsFakeMovie/100000/5477_5498
+  ArminsFakeMovie/100000/5498_5546
+  ArminsFakeMovie/100000/5546_5571
+  ArminsFakeMovie/100000/6087_6116
+  ArminsFakeMovie/100000/6116_6173
+  ArminsFakeMovie/100000/6173_6199
+  ArminsFakeMovie/100000/6719_6740
+  ArminsFakeMovie/100000/6740_6790
+  ArminsFakeMovie/100000/6790_6812
+  ArminsFakeMovie/100000/7034_7035
+
+Normal Merge:
+
+  $ $PBMERGE $HQREGION_BAM $SCRAPS_BAM > $MERGED_BAM
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/100000/0_2659
+  ArminsFakeMovie/100000/2659_7034
+  ArminsFakeMovie/100000/3025_3047
+  ArminsFakeMovie/100000/3047_3095
+  ArminsFakeMovie/100000/3095_3116
+  ArminsFakeMovie/100000/3628_3650
+  ArminsFakeMovie/100000/3650_3700
+  ArminsFakeMovie/100000/3700_3722
+  ArminsFakeMovie/100000/4267_4289
+  ArminsFakeMovie/100000/4289_4335
+  ArminsFakeMovie/100000/4335_4356
+  ArminsFakeMovie/100000/4864_4888
+  ArminsFakeMovie/100000/4888_4939
+  ArminsFakeMovie/100000/4939_4960
+  ArminsFakeMovie/100000/5477_5498
+  ArminsFakeMovie/100000/5498_5546
+  ArminsFakeMovie/100000/5546_5571
+  ArminsFakeMovie/100000/6087_6116
+  ArminsFakeMovie/100000/6116_6173
+  ArminsFakeMovie/100000/6173_6199
+  ArminsFakeMovie/100000/6719_6740
+  ArminsFakeMovie/100000/6740_6790
+  ArminsFakeMovie/100000/6790_6812
+  ArminsFakeMovie/100000/7034_7035
+
+  $ rm $MERGED_BAM
+
+Shuffle Input:
+
+  $ $PBMERGE $SCRAPS_BAM $HQREGION_BAM  > $MERGED_BAM
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/100000/0_2659
+  ArminsFakeMovie/100000/2659_7034
+  ArminsFakeMovie/100000/3025_3047
+  ArminsFakeMovie/100000/3047_3095
+  ArminsFakeMovie/100000/3095_3116
+  ArminsFakeMovie/100000/3628_3650
+  ArminsFakeMovie/100000/3650_3700
+  ArminsFakeMovie/100000/3700_3722
+  ArminsFakeMovie/100000/4267_4289
+  ArminsFakeMovie/100000/4289_4335
+  ArminsFakeMovie/100000/4335_4356
+  ArminsFakeMovie/100000/4864_4888
+  ArminsFakeMovie/100000/4888_4939
+  ArminsFakeMovie/100000/4939_4960
+  ArminsFakeMovie/100000/5477_5498
+  ArminsFakeMovie/100000/5498_5546
+  ArminsFakeMovie/100000/5546_5571
+  ArminsFakeMovie/100000/6087_6116
+  ArminsFakeMovie/100000/6116_6173
+  ArminsFakeMovie/100000/6173_6199
+  ArminsFakeMovie/100000/6719_6740
+  ArminsFakeMovie/100000/6740_6790
+  ArminsFakeMovie/100000/6790_6812
+  ArminsFakeMovie/100000/7034_7035
+
+  $ rm $MERGED_BAM
+
+Explicit Output Filename (also enables PBI):
+
+  $ $PBMERGE -o $MERGED_BAM $HQREGION_BAM $SCRAPS_BAM
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/100000/0_2659
+  ArminsFakeMovie/100000/2659_7034
+  ArminsFakeMovie/100000/3025_3047
+  ArminsFakeMovie/100000/3047_3095
+  ArminsFakeMovie/100000/3095_3116
+  ArminsFakeMovie/100000/3628_3650
+  ArminsFakeMovie/100000/3650_3700
+  ArminsFakeMovie/100000/3700_3722
+  ArminsFakeMovie/100000/4267_4289
+  ArminsFakeMovie/100000/4289_4335
+  ArminsFakeMovie/100000/4335_4356
+  ArminsFakeMovie/100000/4864_4888
+  ArminsFakeMovie/100000/4888_4939
+  ArminsFakeMovie/100000/4939_4960
+  ArminsFakeMovie/100000/5477_5498
+  ArminsFakeMovie/100000/5498_5546
+  ArminsFakeMovie/100000/5546_5571
+  ArminsFakeMovie/100000/6087_6116
+  ArminsFakeMovie/100000/6116_6173
+  ArminsFakeMovie/100000/6173_6199
+  ArminsFakeMovie/100000/6719_6740
+  ArminsFakeMovie/100000/6740_6790
+  ArminsFakeMovie/100000/6790_6812
+  ArminsFakeMovie/100000/7034_7035
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Found
+
+  $ rm $MERGED_BAM
+  $ rm $MERGED_BAM_PBI
+
+Explicit Output Filename (with disabled PBI):
+
+  $ $PBMERGE -o $MERGED_BAM --no-pbi $HQREGION_BAM $SCRAPS_BAM
+
+  $ $BAM2SAM --header-only $MERGED_BAM
+  @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+  @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+  @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+  @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+  @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+  @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+  $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+  ArminsFakeMovie/100000/0_2659
+  ArminsFakeMovie/100000/2659_7034
+  ArminsFakeMovie/100000/3025_3047
+  ArminsFakeMovie/100000/3047_3095
+  ArminsFakeMovie/100000/3095_3116
+  ArminsFakeMovie/100000/3628_3650
+  ArminsFakeMovie/100000/3650_3700
+  ArminsFakeMovie/100000/3700_3722
+  ArminsFakeMovie/100000/4267_4289
+  ArminsFakeMovie/100000/4289_4335
+  ArminsFakeMovie/100000/4335_4356
+  ArminsFakeMovie/100000/4864_4888
+  ArminsFakeMovie/100000/4888_4939
+  ArminsFakeMovie/100000/4939_4960
+  ArminsFakeMovie/100000/5477_5498
+  ArminsFakeMovie/100000/5498_5546
+  ArminsFakeMovie/100000/5546_5571
+  ArminsFakeMovie/100000/6087_6116
+  ArminsFakeMovie/100000/6116_6173
+  ArminsFakeMovie/100000/6173_6199
+  ArminsFakeMovie/100000/6719_6740
+  ArminsFakeMovie/100000/6740_6790
+  ArminsFakeMovie/100000/6790_6812
+  ArminsFakeMovie/100000/7034_7035
+
+  $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+  Not found
+
+  $ rm $MERGED_BAM
diff --git a/tests/src/python/test/test_Accuracy.py b/tests/src/python/test/test_Accuracy.py
index 7bef5db..a8b2112 100755
--- a/tests/src/python/test/test_Accuracy.py
+++ b/tests/src/python/test/test_Accuracy.py
@@ -49,17 +49,17 @@ class AccuracyTest(unittest.TestCase):
     # ------------ TESTS --------------
         
     def test_clamp(self):
-        a_zero     = PacBioBam.Accuracy(0)
-        a_neg      = PacBioBam.Accuracy(-1)
-        a_min      = PacBioBam.Accuracy(0)
-        a_normal   = PacBioBam.Accuracy(300)
-        a_max      = PacBioBam.Accuracy(1000)
-        a_tooLarge = PacBioBam.Accuracy(2000)
+        a_zero     = PacBioBam.Accuracy(0.0)
+        a_neg      = PacBioBam.Accuracy(-0.5)
+        a_min      = PacBioBam.Accuracy(0.0)
+        a_normal   = PacBioBam.Accuracy(0.9)
+        a_max      = PacBioBam.Accuracy(1.0)
+        a_tooLarge = PacBioBam.Accuracy(1.1)
         
-        self.assertEqual(0,    int(a_zero))
-        self.assertEqual(0,    int(a_neg))
-        self.assertEqual(0,    int(a_min))
-        self.assertEqual(300,  int(a_normal))
-        self.assertEqual(1000, int(a_max))
-        self.assertEqual(1000, int(a_tooLarge))
+        self.assertAlmostEqual(float(0.0), float(a_zero))
+        self.assertAlmostEqual(float(0.0), float(a_neg))
+        self.assertAlmostEqual(float(0.0), float(a_min))
+        self.assertAlmostEqual(float(0.9), float(a_normal))
+        self.assertAlmostEqual(float(1.0), float(a_max))
+        self.assertAlmostEqual(float(1.0), float(a_tooLarge))
         
\ No newline at end of file
diff --git a/tests/src/python/test/test_BamFile.py b/tests/src/python/test/test_BamFile.py
index 214b2a8..aabfc59 100755
--- a/tests/src/python/test/test_BamFile.py
+++ b/tests/src/python/test/test_BamFile.py
@@ -55,7 +55,6 @@ class BamFileTest(unittest.TestCase):
         
     def test_ctor(self):
         f = PacBioBam.BamFile(self.ex2BamFn)
-        self.assertFalse(f.IsPacBioBAM())
         
     def test_nonExistentFile(self):
         with self.assertRaises(RuntimeError):
diff --git a/tests/src/python/test/test_BamHeader.py b/tests/src/python/test/test_BamHeader.py
index 076683b..3a08fad 100755
--- a/tests/src/python/test/test_BamHeader.py
+++ b/tests/src/python/test/test_BamHeader.py
@@ -71,7 +71,7 @@ class BamHeaderTest(unittest.TestCase):
         
     def test_decode(self):
         
-        text = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+        text = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                "@RG\tID:rg1\tSM:control\n"
@@ -85,7 +85,7 @@ class BamHeaderTest(unittest.TestCase):
 
         self.assertEqual("1.1",       header.Version())
         self.assertEqual("queryname", header.SortOrder())
-        self.assertEqual("3.0b3",     header.PacBioBamVersion())
+        self.assertEqual("3.0.1",     header.PacBioBamVersion())
 
         self.assertEqual(3, len(header.ReadGroups()))
         self.assertTrue(header.HasReadGroup("rg1"))
@@ -113,7 +113,7 @@ class BamHeaderTest(unittest.TestCase):
         
     def test_encode(self):
         
-        expectedText = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+        expectedText = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                         "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                         "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                         "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
@@ -143,7 +143,7 @@ class BamHeaderTest(unittest.TestCase):
         header = PacBioBam.BamHeader()
         header.Version("1.1")
         header.SortOrder("queryname")
-        header.PacBioBamVersion("3.0b3")
+        header.PacBioBamVersion("3.0.1")
         header.AddReadGroup(rg1)
         header.AddReadGroup(rg2)
         header.AddReadGroup(rg3)
diff --git a/tests/src/python/test/test_PolymeraseStitching.py b/tests/src/python/test/test_PolymeraseStitching.py
new file mode 100755
index 0000000..416ac71
--- /dev/null
+++ b/tests/src/python/test/test_PolymeraseStitching.py
@@ -0,0 +1,383 @@
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above
+#    copyright notice, this list of conditions and the following
+#    disclaimer in the documentation and/or other materials provided
+#    with the distribution.
+#
+#  * Neither the name of Pacific Biosciences nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config 
+import unittest
+
+class PolymeraseStitchingTest(unittest.TestCase):
+    
+    # ------------ SETUP --------------
+    
+    def setUp(self):
+        self.data = config.TestData()
+    
+    def runTest(self):
+        self.test_virtualRegions()
+        self.test_internalSubreadsToOriginal()
+        self.test_internalHqToOriginal()
+        self.test_productionSubreadsToOriginal()
+        self.test_productionHqToOriginal()
+        
+    # ------------ TESTS --------------
+    
+    def test_virtualRegions(self):
+
+        subreadBam = self.data.directory + "/polymerase/internal.subreads.bam"
+        scrapsBam  = self.data.directory + "/polymerase/internal.scraps.bam"
+        vpr = PacBioBam.VirtualPolymeraseReader(subreadBam, scrapsBam)
+    
+        virtualRecord = vpr.Next()
+        
+        # NOTE: this method is disabled 
+        #
+        # Any attempt to retrive this value resulted in several 
+        #   "swig/python detected a memory leak of type 'unknown', no destructor found."
+        # errors (& an empty dictionary result). The same info is available via the 
+        # VirtualRegionsTable(regionType) method, though a bit clunkier if you just want 
+        # to iterate. But access to region info for specific types are available & correct, 
+        # so I'm just going to leave this one out for now. - DB
+        #
+        # regionMap = virtualRecord.VirtualRegionsMap();
+    
+        # ADAPTER
+        adapter = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_ADAPTER)
+        self.assertEqual(7, len(adapter))
+        self.assertEqual(3047, adapter[0].beginPos);
+        self.assertEqual(3095, adapter[0].endPos);
+        self.assertEqual(3650, adapter[1].beginPos);
+        self.assertEqual(3700, adapter[1].endPos);
+        self.assertEqual(4289, adapter[2].beginPos);
+        self.assertEqual(4335, adapter[2].endPos);
+        self.assertEqual(4888, adapter[3].beginPos);
+        self.assertEqual(4939, adapter[3].endPos);
+        self.assertEqual(5498, adapter[4].beginPos);
+        self.assertEqual(5546, adapter[4].endPos);
+        self.assertEqual(6116, adapter[5].beginPos);
+        self.assertEqual(6173, adapter[5].endPos);
+        self.assertEqual(6740, adapter[6].beginPos);
+        self.assertEqual(6790, adapter[6].endPos);
+    
+        # BARCODE
+        barcode = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_BARCODE)
+        self.assertEqual(14, len(barcode))
+        self.assertEqual(3025, barcode[0].beginPos);
+        self.assertEqual(3047, barcode[0].endPos);
+        self.assertEqual(3095, barcode[1].beginPos);
+        self.assertEqual(3116, barcode[1].endPos);
+        self.assertEqual(3628, barcode[2].beginPos);
+        self.assertEqual(3650, barcode[2].endPos);
+        self.assertEqual(3700, barcode[3].beginPos);
+        self.assertEqual(3722, barcode[3].endPos);
+        self.assertEqual(4267, barcode[4].beginPos);
+        self.assertEqual(4289, barcode[4].endPos);
+        self.assertEqual(4335, barcode[5].beginPos);
+        self.assertEqual(4356, barcode[5].endPos);
+        self.assertEqual(4864, barcode[6].beginPos);
+        self.assertEqual(4888, barcode[6].endPos);
+        self.assertEqual(4939, barcode[7].beginPos);
+        self.assertEqual(4960, barcode[7].endPos);
+        self.assertEqual(5477, barcode[8].beginPos);
+        self.assertEqual(5498, barcode[8].endPos);
+        self.assertEqual(5546, barcode[9].beginPos);
+        self.assertEqual(5571, barcode[9].endPos);
+        self.assertEqual(6087, barcode[10].beginPos);
+        self.assertEqual(6116, barcode[10].endPos);
+        self.assertEqual(6173, barcode[11].beginPos);
+        self.assertEqual(6199, barcode[11].endPos);
+        self.assertEqual(6719, barcode[12].beginPos);
+        self.assertEqual(6740, barcode[12].endPos);
+        self.assertEqual(6790, barcode[13].beginPos);
+        self.assertEqual(6812, barcode[13].endPos);
+    
+        # HQREGION
+        hqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_HQREGION)
+        self.assertEqual(1, len(hqregion))
+        
+        self.assertEqual(2659, hqregion[0].beginPos);
+        self.assertEqual(7034, hqregion[0].endPos);
+    
+        # LQREGION
+        lqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_LQREGION)
+        self.assertEqual(2, len(lqregion))
+        
+        self.assertEqual(0,    lqregion[0].beginPos);
+        self.assertEqual(2659, lqregion[0].endPos);
+        self.assertEqual(7034, lqregion[1].beginPos);
+        self.assertEqual(7035, lqregion[1].endPos);
+    
+        # SUBREAD
+        subread = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_SUBREAD)
+        self.assertEqual(8, len(subread)) 
+    
+    def test_internalSubreadsToOriginal(self):
+        
+        # stitch virtual polymerase record
+        subreadsBam = self.data.directory + "/polymerase/internal.subreads.bam"
+        scrapsBam   = self.data.directory + "/polymerase/internal.scraps.bam"
+        vpr = PacBioBam.VirtualPolymeraseReader(subreadsBam, scrapsBam)
+
+        self.assertTrue(vpr.HasNext())
+        virtualRecord = vpr.Next()
+        self.assertFalse(vpr.HasNext())
+
+        # fetch original polymerase record
+        polyBam   = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam")
+        polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+        polyIter = polyQuery.begin()
+        polyEnd  = polyQuery.end()
+
+        self.assertTrue(polyIter != polyEnd)
+        polyRecord = polyIter.value()
+        polyIter.incr()
+        self.assertTrue(polyIter == polyEnd)
+
+        # compare
+        self.compare(polyRecord, virtualRecord)
+
+    def test_internalHqToOriginal(self):
+        
+        # stitch virtual polymerase record
+        hqRegionsBam = self.data.directory + "/polymerase/internal.hqregions.bam"
+        lqRegionsBam = self.data.directory + "/polymerase/internal.lqregions.bam"
+        vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam)
+
+        self.assertTrue(vpr.HasNext())
+        virtualRecord = vpr.Next()
+        self.assertFalse(vpr.HasNext())
+
+        # fetch original polymerase record
+        polyBam   = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam")
+        polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+        polyIter = polyQuery.begin()
+        polyEnd  = polyQuery.end()
+
+        self.assertTrue(polyIter != polyEnd)
+        polyRecord = polyIter.value()
+        polyIter.incr()
+        self.assertTrue(polyIter == polyEnd)
+       
+        # # compare
+        self.compare(polyRecord, virtualRecord)
+
+    def test_productionSubreadsToOriginal(self):
+        
+        # stitch virtual polymerase record
+        subreadsBam = self.data.directory + "/polymerase/production.subreads.bam"
+        scrapsBam   = self.data.directory + "/polymerase/production.scraps.bam"
+        vpr = PacBioBam.VirtualPolymeraseReader(subreadsBam, scrapsBam)
+
+        self.assertTrue(vpr.HasNext())
+        virtualRecord = vpr.Next()
+        self.assertFalse(vpr.HasNext())
+
+        # fetch original polymerase record
+        polyBam   = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam")
+        polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+        polyIter = polyQuery.begin()
+        polyEnd  = polyQuery.end()
+
+        self.assertTrue(polyIter != polyEnd)
+        polyRecord = polyIter.value()
+        polyIter.incr()
+        self.assertTrue(polyIter == polyEnd)
+        
+        # compare
+        self.assertEqual(polyRecord.FullName(),        virtualRecord.FullName());
+        self.assertEqual(polyRecord.HoleNumber(),      virtualRecord.HoleNumber());
+        self.assertEqual(polyRecord.NumPasses(),       virtualRecord.NumPasses());
+        self.assertEqual(polyRecord.Sequence(),        virtualRecord.Sequence());
+        self.assertEqual(polyRecord.DeletionTag(),     virtualRecord.DeletionTag());
+        self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+        self.assertEqual(polyRecord.IPD(),             virtualRecord.IPDV1Frames());
+        self.assertEqual(polyRecord.ReadGroup(),       virtualRecord.ReadGroup());
+        
+        self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy()));
+        
+        self.assertEqual(polyRecord.Qualities().Fastq(),       virtualRecord.Qualities().Fastq());
+        self.assertEqual(polyRecord.DeletionQV().Fastq(),      virtualRecord.DeletionQV().Fastq());
+        self.assertEqual(polyRecord.InsertionQV().Fastq(),     virtualRecord.InsertionQV().Fastq());
+        self.assertEqual(polyRecord.MergeQV().Fastq(),         virtualRecord.MergeQV().Fastq());
+        self.assertEqual(polyRecord.SubstitutionQV().Fastq(),  virtualRecord.SubstitutionQV().Fastq());
+
+    def test_productionHqToOriginal(self):
+        
+        # stitch virtual polymerase record
+        hqRegionsBam = self.data.directory + "/polymerase/production_hq.hqregion.bam"
+        lqRegionsBam = self.data.directory + "/polymerase/production_hq.scraps.bam"
+        vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam)
+
+        self.assertTrue(vpr.HasNext())
+        virtualRecord = vpr.Next()
+        self.assertFalse(vpr.HasNext())
+
+        # fetch original polymerase record
+        polyBam   = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam")
+        polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+        polyIter = polyQuery.begin()
+        polyEnd  = polyQuery.end()
+
+        self.assertTrue(polyIter != polyEnd)
+        polyRecord = polyIter.value()
+        polyIter.incr()
+        self.assertTrue(polyIter == polyEnd)
+        
+        # compare        
+        self.assertFalse(polyRecord.HasPulseCall());
+        self.assertFalse(virtualRecord.HasPulseCall());
+        
+        self.assertEqual(polyRecord.FullName(),        virtualRecord.FullName());
+        self.assertEqual(polyRecord.HoleNumber(),      virtualRecord.HoleNumber());
+        self.assertEqual(polyRecord.NumPasses(),       virtualRecord.NumPasses());
+        self.assertEqual(polyRecord.Sequence(),        virtualRecord.Sequence());
+        self.assertEqual(polyRecord.DeletionTag(),     virtualRecord.DeletionTag());
+        self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+        self.assertEqual(polyRecord.IPD(),             virtualRecord.IPDV1Frames());
+        self.assertEqual(polyRecord.ReadGroup(),       virtualRecord.ReadGroup());
+        
+        self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy()));
+        
+        self.assertEqual(polyRecord.Qualities().Fastq(),       virtualRecord.Qualities().Fastq());
+        self.assertEqual(polyRecord.DeletionQV().Fastq(),      virtualRecord.DeletionQV().Fastq());
+        self.assertEqual(polyRecord.InsertionQV().Fastq(),     virtualRecord.InsertionQV().Fastq());
+        self.assertEqual(polyRecord.MergeQV().Fastq(),         virtualRecord.MergeQV().Fastq());
+        self.assertEqual(polyRecord.SubstitutionQV().Fastq(),  virtualRecord.SubstitutionQV().Fastq());
+        
+        self.assertTrue(polyRecord.HasDeletionQV());
+        self.assertTrue(polyRecord.HasDeletionTag());
+        self.assertTrue(polyRecord.HasInsertionQV());
+        self.assertTrue(polyRecord.HasMergeQV());
+        self.assertTrue(polyRecord.HasSubstitutionQV());
+        self.assertTrue(polyRecord.HasSubstitutionTag());
+        self.assertTrue(polyRecord.HasIPD());
+        self.assertFalse(polyRecord.HasLabelQV());
+        self.assertFalse(polyRecord.HasAltLabelQV());
+        self.assertFalse(polyRecord.HasAltLabelTag());
+        self.assertFalse(polyRecord.HasPkmean());
+        self.assertFalse(polyRecord.HasPkmid());
+        self.assertFalse(polyRecord.HasPulseCall());
+        self.assertFalse(polyRecord.HasPulseWidth());
+        self.assertFalse(polyRecord.HasPrePulseFrames());
+        self.assertFalse(polyRecord.HasPulseCallWidth());
+        
+        self.assertTrue(virtualRecord.HasDeletionQV());
+        self.assertTrue(virtualRecord.HasDeletionTag());
+        self.assertTrue(virtualRecord.HasInsertionQV());
+        self.assertTrue(virtualRecord.HasMergeQV());
+        self.assertTrue(virtualRecord.HasSubstitutionQV());
+        self.assertTrue(virtualRecord.HasSubstitutionTag());
+        self.assertTrue(virtualRecord.HasIPD());
+        self.assertFalse(virtualRecord.HasLabelQV());
+        self.assertFalse(virtualRecord.HasAltLabelQV());
+        self.assertFalse(virtualRecord.HasAltLabelTag());
+        self.assertFalse(virtualRecord.HasPkmean());
+        self.assertFalse(virtualRecord.HasPkmid());
+        self.assertFalse(virtualRecord.HasPulseCall());
+        self.assertFalse(virtualRecord.HasPulseWidth());
+        self.assertFalse(virtualRecord.HasPrePulseFrames());
+        self.assertFalse(virtualRecord.HasPulseCallWidth());   
+    
+    # ------------ HELPERS --------------
+    
+    def compare(self, b1, b2):
+    
+        self.assertTrue(b1.HasDeletionQV());
+        self.assertTrue(b1.HasDeletionTag());
+        self.assertTrue(b1.HasInsertionQV());
+        self.assertTrue(b1.HasMergeQV());
+        self.assertTrue(b1.HasSubstitutionQV());
+        self.assertTrue(b1.HasSubstitutionTag());
+        self.assertTrue(b1.HasLabelQV());
+        self.assertTrue(b1.HasAltLabelQV());
+        self.assertTrue(b1.HasAltLabelTag());
+        self.assertTrue(b1.HasPkmean());
+        self.assertTrue(b1.HasPkmid());
+        self.assertTrue(b1.HasPulseCall());
+        self.assertTrue(b1.HasIPD());
+        self.assertTrue(b1.HasPulseWidth());
+        self.assertTrue(b1.HasPrePulseFrames());
+        self.assertTrue(b1.HasPulseCallWidth());
+        self.assertTrue(b1.HasPulseMergeQV());
+
+        self.assertTrue(b2.HasDeletionQV());
+        self.assertTrue(b2.HasDeletionTag());
+        self.assertTrue(b2.HasInsertionQV());
+        self.assertTrue(b2.HasMergeQV());
+        self.assertTrue(b2.HasSubstitutionQV());
+        self.assertTrue(b2.HasSubstitutionTag());
+        self.assertTrue(b2.HasLabelQV());
+        self.assertTrue(b2.HasAltLabelQV());
+        self.assertTrue(b2.HasAltLabelTag());
+        self.assertTrue(b2.HasPkmean());
+        self.assertTrue(b2.HasPkmid());
+        self.assertTrue(b2.HasPulseCall());
+        self.assertTrue(b2.HasIPD());
+        self.assertTrue(b2.HasPulseWidth());
+        self.assertTrue(b2.HasPrePulseFrames());
+        self.assertTrue(b2.HasPulseCallWidth());
+        self.assertTrue(b2.HasPulseMergeQV());
+    
+        self.assertEqual(b1.FullName(),        b2.FullName());
+        self.assertEqual(b1.HoleNumber(),      b2.HoleNumber());
+        self.assertEqual(b1.NumPasses(),       b2.NumPasses());
+        self.assertEqual(b1.Sequence(),        b2.Sequence());
+        self.assertEqual(b1.DeletionTag(),     b2.DeletionTag());
+        self.assertEqual(b1.SubstitutionTag(), b2.SubstitutionTag());
+        self.assertEqual(b1.AltLabelTag(),     b2.AltLabelTag());
+        self.assertEqual(b1.Pkmean(),          b2.Pkmean());
+        self.assertEqual(b1.Pkmid(),           b2.Pkmid());
+        self.assertEqual(b1.PulseCall(),       b2.PulseCall());
+        self.assertEqual(b1.IPD(),             b2.IPD());
+        self.assertEqual(b1.PulseWidth(),      b2.PulseWidth());
+        self.assertEqual(b1.PrePulseFrames(),  b2.PrePulseFrames());
+        self.assertEqual(b1.PulseCallWidth(),  b2.PulseCallWidth());
+        self.assertEqual(b1.ReadGroup(),       b2.ReadGroup());
+        
+        self.assertEqual(b1.Qualities().Fastq(),       b2.Qualities().Fastq());
+        self.assertEqual(b1.DeletionQV().Fastq(),      b2.DeletionQV().Fastq());
+        self.assertEqual(b1.InsertionQV().Fastq(),     b2.InsertionQV().Fastq());
+        self.assertEqual(b1.MergeQV().Fastq(),         b2.MergeQV().Fastq());
+        self.assertEqual(b1.SubstitutionQV().Fastq(),  b2.SubstitutionQV().Fastq());
+        self.assertEqual(b1.PulseMergeQV().Fastq(),    b2.PulseMergeQV().Fastq());
+        self.assertEqual(b1.LabelQV().Fastq(),         b2.LabelQV().Fastq());
+        self.assertEqual(b1.AltLabelQV().Fastq(),      b2.AltLabelQV().Fastq());
+        
+        
diff --git a/tests/src/test_Accuracy.cpp b/tests/src/test_Accuracy.cpp
index 17d1f59..9750dd4 100644
--- a/tests/src/test_Accuracy.cpp
+++ b/tests/src/test_Accuracy.cpp
@@ -47,17 +47,17 @@ using namespace std;
 
 TEST(AccuracyTest, ClampValues)
 {
-    Accuracy a_zero(0);
-    Accuracy a_neg(-1);
-    Accuracy a_min(0);
-    Accuracy a_normal(300);
-    Accuracy a_max(1000);
-    Accuracy a_tooLarge(2000);
+    Accuracy a_zero(0.0);
+    Accuracy a_neg(-0.5);
+    Accuracy a_min(0.0);
+    Accuracy a_normal(0.9);
+    Accuracy a_max(1.0);
+    Accuracy a_tooLarge(1.1);
 
-    EXPECT_EQ(0,    a_zero);
-    EXPECT_EQ(0,    a_neg);
-    EXPECT_EQ(0,    a_min);
-    EXPECT_EQ(300,  a_normal);
-    EXPECT_EQ(1000, a_max);
-    EXPECT_EQ(1000, a_tooLarge);
+    EXPECT_FLOAT_EQ(0.0, a_zero);
+    EXPECT_FLOAT_EQ(0.0, a_neg);
+    EXPECT_FLOAT_EQ(0.0, a_min);
+    EXPECT_FLOAT_EQ(0.9, a_normal);
+    EXPECT_FLOAT_EQ(1.0, a_max);
+    EXPECT_FLOAT_EQ(1.0, a_tooLarge);
 }
diff --git a/tests/src/test_AlignmentPrinter.cpp b/tests/src/test_AlignmentPrinter.cpp
index 0034f2d..9aa7f5b 100644
--- a/tests/src/test_AlignmentPrinter.cpp
+++ b/tests/src/test_AlignmentPrinter.cpp
@@ -62,41 +62,91 @@ const string singleInsertionBam = tests::Data_Dir + "/aligned.bam";
 TEST(AlignmentPrinterTest, Print)
 {
     IndexedFastaReader r(lambdaFasta);
+    AlignmentPrinter pretty(r);
 
     BamFile bamFile(singleInsertionBam);
     EntireFileQuery bamQuery(bamFile);
-
     auto it = bamQuery.begin();
-    
 
-    // std::cerr << record.AlignedStart() << std::endl;
-    // std::cerr << record.Sequence(Orientation::GENOMIC, true) << std::endl;
-    // std::cerr << record.Sequence(Orientation::GENOMIC, true, true) << std::endl;
+    // funky formatting used to format alignments
+    auto expected = string
+    {
+        "Read        : singleInsertion2\n"
+        "Reference   : lambda_NEB3011\n"
+        "\n"
+        "Read-length : 49\n"
+        "Concordance : 0.96\n"
+        "\n"
+        "5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249\n"
+        "       \x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| ||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+        "   0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG :   39\n"
+        "\n"
+        "5249 : ACTGGCTGAT : 5259\n"
+        "       |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+        "  39 : ACTGGCTGAT :   49\n"
+        "\n"
+    };
 
-    AlignmentPrinter pretty(r);
+    auto record = *it++;
+    EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
 
-    // std::string expected =
-    // "Read        : singleInsertion2\n"
-    // "Reference   : lambda_NEB3011\n"
-    // "\n"
-    // "Read-length : 49\n"
-    // "Concordance : 0.96\n"
-    // "\n"
-    // "   GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT\n"
-    // "   |||||||| ||||||||||||||||||| |||||||||||||||||||||\n"
-    // "   GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGGACTGGCTGAT\n";
-    // EXPECT_EQ(expected, pretty.Print(record, Orientation::NATIVE));
+    expected = {
+        "Read        : singleInsertion\n"
+        "Reference   : lambda_NEB3011\n"
+        "\n"
+        "Read-length : 49\n"
+        "Concordance : 0.96\n"
+        "\n"
+        "5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249\n"
+        "       \x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| ||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+        "   0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG :   39\n"
+        "\n"
+        "5249 : ACTGGCTGAT : 5259\n"
+        "       |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+        "  39 : ACTGGCTGAT :   49\n"
+        "\n"
+    };
 
-    auto record = *it++;
-    std::cerr << pretty.Print(record, Orientation::GENOMIC);
-    std::cerr << std::endl << std::endl;
     record = *it++;
-    std::cerr << pretty.Print(record, Orientation::GENOMIC);
-    std::cerr << std::endl << std::endl;
+    EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+    expected = {
+        "Read        : singleInsertion2\n"
+        "Reference   : lambda_NEB3011\n"
+        "\n"
+        "Read-length : 59\n"
+        "Concordance : 0.951\n"
+        "\n"
+        "9377 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCG : 9417\n"
+        "       |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||  |\n"
+        "   0 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGA--G :   38\n"
+        "\n"
+        "9417 : CAGCACGGT-AACAGCGGCAA : 9437\n"
+        "       |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||| ||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||\n"
+        "  38 : CAGCACGGTAAACAGCGGCAA :   59\n"
+        "\n"
+    };
+
     record = *it++;
-    std::cerr << pretty.Print(record, Orientation::GENOMIC);
-    std::cerr << std::endl << std::endl;
+    EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+    expected = {
+        "Read        : singleInsertion\n"
+        "Reference   : lambda_NEB3011\n"
+        "\n"
+        "Read-length : 59\n"
+        "Concordance : 0.951\n"
+        "\n"
+        "9377 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCG : 9417\n"
+        "       |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||  |\n"
+        "   0 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGA--G :   38\n"
+        "\n"
+        "9417 : CAGCACGGT-AACAGCGGCAA : 9437\n"
+        "       |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||| ||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||\n"
+        "  38 : CAGCACGGTAAACAGCGGCAA :   59\n"
+        "\n"
+    };
+
     record = *it++;
-    std::cerr << pretty.Print(record, Orientation::GENOMIC);
-    std::cerr << std::endl << std::endl;
+    EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
 }
diff --git a/tests/src/test_BamFile.cpp b/tests/src/test_BamFile.cpp
index 4ca910d..c8f1be8 100644
--- a/tests/src/test_BamFile.cpp
+++ b/tests/src/test_BamFile.cpp
@@ -42,7 +42,11 @@
 #include "TestData.h"
 #include <gtest/gtest.h>
 #include <pbbam/BamFile.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/../../src/FileUtils.h>
 #include <stdexcept>
+#include <cstdlib>
+#include <unistd.h>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -67,3 +71,107 @@ TEST(BamFileTest, NonBamFileThrows)
     },
     std::exception);
 }
+
+TEST(BamFileTest, RelativePathBamOk)
+{
+    const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+    ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+    ASSERT_EQ(0, chdir("relative/a"));
+
+    { // direct BAM
+        BamFile file("../b/test1.bam");
+        EntireFileQuery entireFile(file);
+        int count = 0;
+        for (const BamRecord& r : entireFile) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(10, count);
+    }
+
+    { // dataset from BAM filename
+        DataSet ds("../b/test1.bam");
+        EntireFileQuery entireFile(ds);
+        int count = 0;
+        for (const BamRecord& r : entireFile) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(10, count);
+    }
+
+    { // dataset from BamFile object
+        BamFile file("../b/test1.bam");
+        DataSet ds(file);
+        EntireFileQuery entireFile(ds);
+        int count = 0;
+        for (const BamRecord& r : entireFile) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(10, count);
+    }
+
+    ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, RelativePathXmlOk)
+{
+    const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+
+    ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+
+    {
+        DataSet ds("relative/relative.xml");
+        EntireFileQuery entireFile(ds);
+        int count = 0;
+        for (const BamRecord& r : entireFile) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(30, count);
+    }
+
+    ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, RelativePathFofnOk)
+{
+    const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+    ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+
+    { // FOFN containing BAMs in different subdirs
+
+        DataSet ds("relative/relative.fofn");
+        EntireFileQuery entireFile(ds);
+        int count = 0;
+        for (const BamRecord& r : entireFile) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(30, count);
+    }
+
+    // NOTE: doesn't yet support a FOFN containing an XML with relative paths
+
+//    { // FOFN containing subdir BAMs + relative.xml
+
+//        DataSet ds("relative/relative2.fofn");
+//        EntireFileQuery entireFile(ds);
+//        int count = 0;
+//        for (const BamRecord& r : entireFile) {
+//            (void)r;
+//            ++count;
+//        }
+//        EXPECT_EQ(60, count);
+//    }
+
+    ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, TruncatedFileThrowsOk)
+{
+    const string fn = tests::Data_Dir + "/truncated.bam";
+    EXPECT_THROW(BamFile file(fn), std::runtime_error);
+}
+
diff --git a/tests/src/test_BamHeader.cpp b/tests/src/test_BamHeader.cpp
index c4f872a..b4e9fd6 100644
--- a/tests/src/test_BamHeader.cpp
+++ b/tests/src/test_BamHeader.cpp
@@ -82,7 +82,7 @@ TEST(BamHeaderTest, DefaultConstruction)
 
 TEST(BamHeaderTest, DecodeTest)
 {
-    const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+    const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                          "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                          "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
                          "@RG\tID:rg1\tSM:control\n"
@@ -96,7 +96,7 @@ TEST(BamHeaderTest, DecodeTest)
 
     EXPECT_EQ(string("1.1"),       header.Version());
     EXPECT_EQ(string("queryname"), header.SortOrder());
-    EXPECT_EQ(string("3.0b3"),     header.PacBioBamVersion());
+    EXPECT_EQ(string("3.0.1"),     header.PacBioBamVersion());
 
     EXPECT_EQ(3, header.ReadGroups().size());
     EXPECT_TRUE(header.HasReadGroup("rg1"));
@@ -124,7 +124,51 @@ TEST(BamHeaderTest, DecodeTest)
     EXPECT_EQ(string("citation needed"), header.Comments().at(1));
 }
 
-TEST(BamHeaderCodecTest, EncodeTest)
+TEST(BamHeaderTest, VersionCheckOk)
+{
+
+    // empty
+    EXPECT_THROW({
+        const string text = "@HD\tVN:1.1\tSO:queryname\tpb:\n";
+        BamHeader h(text);
+        (void)h;
+    }, std::runtime_error);
+
+    // old beta version(s)
+    EXPECT_THROW({
+        const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n";
+        BamHeader h(text);
+        (void)h;
+    }, std::runtime_error);
+    EXPECT_THROW({
+        const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b7\n";
+        BamHeader h(text);
+        (void)h;
+    }, std::runtime_error);
+
+    // contains other, invalid info
+    EXPECT_THROW({
+        const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.should_not_work\n";
+        BamHeader h(text);
+        (void)h;
+    }, std::runtime_error);
+
+    // valid syntax, but earlier than minimum allowed version
+    EXPECT_THROW({
+        const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.0\n";
+        BamHeader h(text);
+        (void)h;
+    }, std::runtime_error);
+
+    // correct version syntax, number
+    EXPECT_NO_THROW({
+        const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n";
+        BamHeader h(text);
+        (void)h;
+    });
+}
+
+TEST(BamHeaderTest, EncodeTest)
 {
     ReadGroupInfo rg1("rg1");
     rg1.Sample("control");
@@ -144,7 +188,7 @@ TEST(BamHeaderCodecTest, EncodeTest)
     BamHeader header;
     header.Version("1.1")
           .SortOrder("queryname")
-          .PacBioBamVersion("3.0b3")
+          .PacBioBamVersion("3.0.1")
           .AddReadGroup(rg1)
           .AddReadGroup(rg2)
           .AddReadGroup(rg3)
@@ -154,12 +198,12 @@ TEST(BamHeaderCodecTest, EncodeTest)
           .AddComment("ipsum and so on")
           .AddComment("citation needed");
 
-    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                                  "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                                  "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
-                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
-                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
-                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
+                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                  "@PG\tID:_foo_\tPN:ide\n"
                                  "@CO\tipsum and so on\n"
                                  "@CO\tcitation needed\n";
@@ -188,7 +232,7 @@ TEST(BamHeaderTest, ConvertToRawDataOk)
     BamHeader header;
     header.Version("1.1")
           .SortOrder("queryname")
-          .PacBioBamVersion("3.0b3")
+          .PacBioBamVersion("3.0.1")
           .AddReadGroup(rg1)
           .AddReadGroup(rg2)
           .AddReadGroup(rg3)
@@ -198,12 +242,12 @@ TEST(BamHeaderTest, ConvertToRawDataOk)
           .AddComment("ipsum and so on")
           .AddComment("citation needed");
 
-    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                                  "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                                  "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
-                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
-                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
-                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
+                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                  "@PG\tID:_foo_\tPN:ide\n"
                                  "@CO\tipsum and so on\n"
                                  "@CO\tcitation needed\n";
@@ -241,7 +285,7 @@ TEST(BamHeaderTest, ExtractFromRawDataOk)
     BamHeader header;
     header.Version("1.1")
           .SortOrder("queryname")
-          .PacBioBamVersion("3.0b3")
+          .PacBioBamVersion("3.0.1")
           .AddReadGroup(rg1)
           .AddReadGroup(rg2)
           .AddReadGroup(rg3)
@@ -251,12 +295,12 @@ TEST(BamHeaderTest, ExtractFromRawDataOk)
           .AddComment("ipsum and so on")
           .AddComment("citation needed");
 
-    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+    const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
                                  "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
                                  "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
-                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
-                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
-                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
+                                 "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+                                 "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+                                 "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
                                  "@PG\tID:_foo_\tPN:ide\n"
                                  "@CO\tipsum and so on\n"
                                  "@CO\tcitation needed\n";
@@ -279,3 +323,139 @@ TEST(BamHeaderTest, ExtractFromRawDataOk)
     text = newHeader.ToSam();
     EXPECT_EQ(expectedText, text);
 }
+
+TEST(BamHeaderTest, MergeOk)
+{
+    const string hdrText1 = {
+        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+        "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+            "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+            "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
+            "PM:SEQUEL\n"
+        "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+        "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+        "@CO\tcomment1\n"
+    };
+
+    const string hdrText2 = {
+        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+        "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
+            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
+            "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
+            "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
+            "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
+            "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
+            "PM:SEQUEL\n"
+        "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
+        "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
+        "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
+        "@CO\tcomment2\n"
+    };
+
+    const string mergedText = {
+        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+        "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+            "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+            "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
+            "PM:SEQUEL\n"
+        "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
+            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
+            "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
+            "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
+            "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
+            "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
+            "PM:SEQUEL\n"
+        "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+        "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+        "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
+        "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
+        "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
+        "@CO\tcomment1\n"
+        "@CO\tcomment2\n"
+    };
+
+    { // operator+
+
+        const BamHeader header1(hdrText1);
+        const BamHeader header2(hdrText2);
+        const BamHeader merged = header1 + header2;
+        EXPECT_EQ(mergedText, merged.ToSam());
+
+        // also make sure inputs not changed
+        EXPECT_EQ(hdrText1, header1.ToSam());
+        EXPECT_EQ(hdrText2, header2.ToSam());
+    }
+
+    { // operator+=
+
+        BamHeader header1(hdrText1);
+        header1 += BamHeader(hdrText2);
+        EXPECT_EQ(mergedText, header1.ToSam());
+    }
+}
+
+TEST(BamHeaderTest, MergeHandlesDuplicateReadGroups)
+{
+    const string hdrText = {
+        "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+        "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+            "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+            "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+            "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\tPM:SEQUEL\n"
+        "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+        "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+    };
+
+    // duplicate @RG:IDs handled ok (i.e. not duplicated in output)
+
+    const BamHeader header1(hdrText);
+    const BamHeader header2(hdrText);
+    const BamHeader merged = header1 + header2;
+    EXPECT_EQ(hdrText, merged.ToSam());
+}
+
+TEST(BamHeaderTest, IncompatibleMergeFails)
+{
+    { // @HD:VN
+        const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+        const string hdrText2 = { "@HD\tVN:1.0\tSO:unknown\tpb:3.0.1\n" };
+        const BamHeader header1(hdrText1);
+        const BamHeader header2(hdrText2);
+        EXPECT_THROW(header1 + header2, std::runtime_error);
+    }
+
+    { // @HD:SO
+        const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+        const string hdrText2 = { "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n" };
+        const BamHeader header1(hdrText1);
+        const BamHeader header2(hdrText2);
+        EXPECT_THROW(header1 + header2, std::runtime_error);
+    }
+
+    { // @HD:pb
+        const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+        const string hdrText2 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.3\n" };
+        const BamHeader header1(hdrText1);
+        const BamHeader header2(hdrText2);
+        EXPECT_THROW(header1 + header2, std::runtime_error);
+    }
+
+    { // @SQ list clash
+
+        const string hdrText1 = {
+            "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n"
+            "@SQ\tSN:foo\tLN:42\n"
+            "@SQ\tSN:bar\tLN:24\n"
+        };
+        const string hdrText2 = {
+            "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n"
+            "@SQ\tSN:foo\tLN:42\n"
+            "@SQ\tSN:baz\tLN:99\n"
+        };
+        const BamHeader header1(hdrText1);
+        const BamHeader header2(hdrText2);
+        EXPECT_THROW(header1 + header2, std::runtime_error);
+    }
+}
diff --git a/tests/src/test_BamRecord.cpp b/tests/src/test_BamRecord.cpp
index e32ad2e..a166044 100644
--- a/tests/src/test_BamRecord.cpp
+++ b/tests/src/test_BamRecord.cpp
@@ -223,6 +223,9 @@ TEST(BamRecordTest, DefaultValues)
     EXPECT_EQ(-1, bam.AlignedStart());
     EXPECT_EQ(-1, bam.AlignedEnd());
     EXPECT_THROW(bam.HoleNumber(), std::exception);
+    EXPECT_FALSE(bam.HasNumPasses());
+    EXPECT_THROW(bam.NumPasses(), std::exception);
+
     // 8888888888888888888888888888888888888
 //    EXPECT_EQ(-1, bam.NumPasses());
 //    EXPECT_EQ(-1, bam.QueryStart());
@@ -442,6 +445,7 @@ TEST(BamRecordTest, CoreSetters)
 //    EXPECT_EQ(42, bam.HoleNumber());
 //    EXPECT_EQ(testQVs, bam.InsertionQVs());
 //    EXPECT_EQ(testQVs, bam.MergeQVs());
+
 //    EXPECT_EQ(42, bam.NumPasses());
 //    EXPECT_EQ(42, bam.QueryEnd());
 //    EXPECT_EQ(42, bam.QueryStart());
diff --git a/tests/src/test_BamRecordClipping.cpp b/tests/src/test_BamRecordClipping.cpp
index 425b1e5..5193868 100644
--- a/tests/src/test_BamRecordClipping.cpp
+++ b/tests/src/test_BamRecordClipping.cpp
@@ -1185,3 +1185,27 @@ TEST(BamRecordClippingTest, StaticClippedToReference)
 //    EXPECT_EQ(s3_tagQuals_clipped, s3.AltLabelQV(Orientation::GENOMIC).Fastq());
     EXPECT_EQ(s3_frames_clipped,   s3.IPD(Orientation::GENOMIC).Data());
 }
+
+TEST(BamRecordTest, ClipCigarData)
+{
+    const Position qStart = 500;
+    const Position qEnd   = 515;
+    const string seq      = "TTAACCGTTAGCAAA";
+    const string quals    = "--?]?]?]?]?*+++";
+    const string tagBases = "TTAACCGTTAGCAAA";
+    const string tagQuals = "--?]?]?]?]?*+++";
+    const f_data frames   = { 40, 40, 10, 10, 20, 20, 30, 40, 40, 10, 30, 20, 10, 10, 10 };
+    const uint8_t mapQual = 80;
+    BamRecord s3 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+    BamRecord s3_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+    const string s3_cigar = "5H2S4=1D2I2D4=3S7H";
+    s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+    s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+    const Cigar s3_cigar_raw     = s3.CigarData();
+    const Cigar s3_cigar_clipped = s3.CigarData(true);
+
+    EXPECT_EQ(s3_cigar, s3_cigar_raw.ToStdString());
+    EXPECT_EQ(string("4=1D2I2D4="), s3_cigar_clipped.ToStdString());
+}
diff --git a/tests/src/test_BamWriter.cpp b/tests/src/test_BamWriter.cpp
index 320c698..37b4b40 100644
--- a/tests/src/test_BamWriter.cpp
+++ b/tests/src/test_BamWriter.cpp
@@ -46,6 +46,9 @@
 #include <pbbam/BamRecord.h>
 #include <pbbam/BamWriter.h>
 #include <pbbam/EntireFileQuery.h>
+
+#include <pbbam/../../src/FileUtils.h>
+
 #include <chrono>
 #include <iostream>
 #include <string>
@@ -196,3 +199,27 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
 
     remove(generatedBamFn.c_str());
 }
+
+//static
+//void CreateBamFile(const string& filename)
+//{
+//    if (internal::FileUtils::Exists(filename))
+//        return;
+
+//    BamHeader header;
+//    BamWriter writer(filename, header);
+
+//    BamRecord r;
+//    for (int i = 0; i < 10; ++i) {
+//        writer.Write(r);
+//    }
+//}
+
+
+//TEST(BamWriterTest, CreateBAMs)
+//{
+//    const string relativeDir = tests::Data_Dir + "/relative";
+//    CreateBamFile(relativeDir + "/a/test.bam");
+//    CreateBamFile(relativeDir + "/b/test1.bam");
+//    CreateBamFile(relativeDir + "/b/test2.bam");
+//}
diff --git a/src/QueryBase.cpp b/tests/src/test_BarcodeQuery.cpp
similarity index 89%
rename from src/QueryBase.cpp
rename to tests/src/test_BarcodeQuery.cpp
index 8bd2a53..6ec02a8 100644
--- a/src/QueryBase.cpp
+++ b/tests/src/test_BarcodeQuery.cpp
@@ -35,14 +35,19 @@
 
 // Author: Derek Barnett
 
-#include "pbbam/QueryBase.h"
-#include "pbbam/BamRecord.h"
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BarcodeQuery.h>
+#include <string>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
-QueryBase::QueryBase(const BamFile& file)
-    : file_(file)
-{ }
-
-QueryBase::~QueryBase(void) { }
+TEST(BarcodeQueryTest, QueryOk)
+{
+    // come back with barcoded data
+}
diff --git a/tests/src/test_Compare.cpp b/tests/src/test_Compare.cpp
new file mode 100644
index 0000000..b92a0a7
--- /dev/null
+++ b/tests/src/test_Compare.cpp
@@ -0,0 +1,739 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/Compare.h>
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+static inline
+BamRecord makeRecordWithTag(const string& tagName,
+                            const Tag& tag)
+{
+    auto r = BamRecord{ };
+    r.Impl().AddTag(tagName, tag);
+    return r;
+}
+
+static
+BamRecord makeRecord(const Position qStart,
+                     const Position qEnd,
+                     const string& seq,
+                     const string& quals,
+                     const string& tagBases,
+                     const string& tagQuals,
+                     const vector<uint16_t>& frames)
+{
+    BamRecordImpl impl;
+    impl.SetSequenceAndQualities(seq, quals);
+
+    TagCollection tags;
+    tags["qs"] = qStart;
+    tags["qe"] = qEnd;
+    tags["ip"] = frames;
+    tags["pw"] = frames;
+    tags["dt"] = tagBases;
+    tags["st"] = tagBases;
+    tags["dq"] = tagQuals;
+    tags["iq"] = tagQuals;
+    tags["mq"] = tagQuals;
+    tags["sq"] = tagQuals;
+    tags["pq"] = tagQuals;
+    tags["pv"] = tagQuals;
+    impl.Tags(tags);
+
+    return BamRecord(std::move(impl));
+}
+
+static
+std::vector<BamRecord> makeMappedRecords(void)
+{
+    const Position qStart = 500;
+    const Position qEnd   = 510;
+    const string seq      = "AACCGTTAGC";
+    const string quals    = "?]?]?]?]?*";
+    const string tagBases = "AACCGTTAGC";
+    const string tagQuals = "?]?]?]?]?*";
+    const vector<uint16_t> frames  = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+    const uint8_t mapQual = 80;
+
+    const string s1_cigar = "10=";
+    const string s2_cigar = "5=3D5=";
+    const string s3_cigar = "4=1D2I2D2X2=";
+
+    BamRecord s1 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+    BamRecord s2 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+    BamRecord s3 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+    BamRecord s1_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+    BamRecord s2_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+    BamRecord s3_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+    s1.Map(0, 100, Strand::FORWARD, s1_cigar, mapQual);
+    s2.Map(0, 100, Strand::FORWARD, s2_cigar, mapQual);
+    s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+    s1_rev.Map(0, 100, Strand::REVERSE, s1_cigar, mapQual);
+    s2_rev.Map(0, 100, Strand::REVERSE, s2_cigar, mapQual);
+    s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+    return std::vector<BamRecord> { s1, s2, s3, s1_rev, s2_rev, s3_rev };
+}
+
+} // namespace tests
+
+TEST(CompareTest, TypeToNameOk)
+{
+    EXPECT_EQ(string{"Compare::EQUAL"},              Compare::TypeToName(Compare::EQUAL));
+    EXPECT_EQ(string{"Compare::NOT_EQUAL"},          Compare::TypeToName(Compare::NOT_EQUAL));
+    EXPECT_EQ(string{"Compare::LESS_THAN"},          Compare::TypeToName(Compare::LESS_THAN));
+    EXPECT_EQ(string{"Compare::LESS_THAN_EQUAL"},    Compare::TypeToName(Compare::LESS_THAN_EQUAL));
+    EXPECT_EQ(string{"Compare::GREATER_THAN"},       Compare::TypeToName(Compare::GREATER_THAN));
+    EXPECT_EQ(string{"Compare::GREATER_THAN_EQUAL"}, Compare::TypeToName(Compare::GREATER_THAN_EQUAL));
+    EXPECT_EQ(string{"Compare::CONTAINS"},           Compare::TypeToName(Compare::CONTAINS));
+    EXPECT_EQ(string{"Compare::NOT_CONTAINS"},       Compare::TypeToName(Compare::NOT_CONTAINS));
+
+    // invalid type throws
+    EXPECT_THROW(Compare::TypeToName(static_cast<Compare::Type>(42)), std::runtime_error);
+}
+
+TEST(CompareTest, TypeToOperatorOk)
+{
+    { // normal
+        EXPECT_EQ(Compare::TypeToOperator(Compare::EQUAL),              string{"=="});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_EQUAL),          string{"!="});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN),          string{"<"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN_EQUAL),    string{"<="});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN),       string{">"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN_EQUAL), string{">="});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::CONTAINS),           string{"&"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_CONTAINS),       string{"~"});
+    }
+
+    { // alpha
+        EXPECT_EQ(Compare::TypeToOperator(Compare::EQUAL, true),              string{"eq"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_EQUAL, true),          string{"ne"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN, true),          string{"lt"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN_EQUAL, true),    string{"lte"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN, true),       string{"gt"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN_EQUAL, true), string{"gte"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::CONTAINS, true),           string{"and"});
+        EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_CONTAINS, true),       string{"not"});
+    }
+
+    // invalid type throws
+    EXPECT_THROW(Compare::TypeToOperator(static_cast<Compare::Type>(42)), std::runtime_error);
+}
+
+TEST(CompareTest, FromOperatorOk)
+{
+    EXPECT_EQ(Compare::EQUAL,              Compare::TypeFromOperator("=="));
+    EXPECT_EQ(Compare::EQUAL,              Compare::TypeFromOperator("="));
+    EXPECT_EQ(Compare::EQUAL,              Compare::TypeFromOperator("eq"));
+    EXPECT_EQ(Compare::NOT_EQUAL,          Compare::TypeFromOperator("!="));
+    EXPECT_EQ(Compare::NOT_EQUAL,          Compare::TypeFromOperator("ne"));
+    EXPECT_EQ(Compare::LESS_THAN,          Compare::TypeFromOperator("<"));
+    EXPECT_EQ(Compare::LESS_THAN,          Compare::TypeFromOperator("lt"));
+    EXPECT_EQ(Compare::LESS_THAN,          Compare::TypeFromOperator("<"));
+    EXPECT_EQ(Compare::LESS_THAN_EQUAL,    Compare::TypeFromOperator("<="));
+    EXPECT_EQ(Compare::LESS_THAN_EQUAL,    Compare::TypeFromOperator("lte"));
+    EXPECT_EQ(Compare::LESS_THAN_EQUAL,    Compare::TypeFromOperator("<="));
+    EXPECT_EQ(Compare::GREATER_THAN,       Compare::TypeFromOperator(">"));
+    EXPECT_EQ(Compare::GREATER_THAN,       Compare::TypeFromOperator("gt"));
+    EXPECT_EQ(Compare::GREATER_THAN,       Compare::TypeFromOperator(">"));
+    EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator(">="));
+    EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator("gte"));
+    EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator(">="));
+    EXPECT_EQ(Compare::CONTAINS,           Compare::TypeFromOperator("&"));
+    EXPECT_EQ(Compare::NOT_CONTAINS,       Compare::TypeFromOperator("~"));
+
+    // invalid operator strings throw
+    EXPECT_THROW(Compare::TypeFromOperator(""),        std::runtime_error);
+    EXPECT_THROW(Compare::TypeFromOperator("invalid"), std::runtime_error);
+}
+
+TEST(CompareTest, AlignedEndOk)
+{
+    BamRecord r1; r1.alignedEnd_ = 300;
+    BamRecord r2; r2.alignedEnd_ = 200;
+    BamRecord r3; r3.alignedEnd_ = 400;
+    BamRecord r4; r4.alignedEnd_ = 100;
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::AlignedEnd());
+
+    EXPECT_EQ(r4.alignedEnd_, records.at(0).AlignedEnd());
+    EXPECT_EQ(r2.alignedEnd_, records.at(1).AlignedEnd());
+    EXPECT_EQ(r1.alignedEnd_, records.at(2).AlignedEnd());
+    EXPECT_EQ(r3.alignedEnd_, records.at(3).AlignedEnd());
+}
+
+TEST(CompareTest, AlignedStartOk)
+{
+    BamRecord r1; r1.alignedStart_ = 300;
+    BamRecord r2; r2.alignedStart_ = 200;
+    BamRecord r3; r3.alignedStart_ = 400;
+    BamRecord r4; r4.alignedStart_ = 100;
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::AlignedStart());
+
+    EXPECT_EQ(r4.alignedStart_, records.at(0).AlignedStart());
+    EXPECT_EQ(r2.alignedStart_, records.at(1).AlignedStart());
+    EXPECT_EQ(r1.alignedStart_, records.at(2).AlignedStart());
+    EXPECT_EQ(r3.alignedStart_, records.at(3).AlignedStart());
+}
+
+TEST(CompareTest, AlignedStrandOk)
+{
+    BamRecord r1; r1.Impl().SetReverseStrand(true);
+    BamRecord r2; r2.Impl().SetReverseStrand(false);
+    BamRecord r3; r3.Impl().SetReverseStrand(true);
+    BamRecord r4; r4.Impl().SetReverseStrand(false);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::AlignedStrand());
+
+    EXPECT_EQ(Strand::FORWARD, records.at(0).AlignedStrand());
+    EXPECT_EQ(Strand::FORWARD, records.at(1).AlignedStrand());
+    EXPECT_EQ(Strand::REVERSE, records.at(2).AlignedStrand());
+    EXPECT_EQ(Strand::REVERSE, records.at(3).AlignedStrand());
+}
+
+TEST(CompareTest, BarcodeForwardOk)
+{
+    BamRecord r1; r1.Barcodes(std::make_pair<uint16_t,uint16_t>(30,20));
+    BamRecord r2; r2.Barcodes(std::make_pair<uint16_t,uint16_t>(20,30));
+    BamRecord r3; r3.Barcodes(std::make_pair<uint16_t,uint16_t>(40,10));
+    BamRecord r4; r4.Barcodes(std::make_pair<uint16_t,uint16_t>(10,40));
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::BarcodeForward());
+
+    EXPECT_EQ(r4.BarcodeForward(), records.at(0).BarcodeForward());
+    EXPECT_EQ(r2.BarcodeForward(), records.at(1).BarcodeForward());
+    EXPECT_EQ(r1.BarcodeForward(), records.at(2).BarcodeForward());
+    EXPECT_EQ(r3.BarcodeForward(), records.at(3).BarcodeForward());
+}
+
+TEST(CompareTest, BarcodeReverseOk)
+{
+    BamRecord r1; r1.Barcodes(std::make_pair<uint16_t,uint16_t>(30,20));
+    BamRecord r2; r2.Barcodes(std::make_pair<uint16_t,uint16_t>(20,30));
+    BamRecord r3; r3.Barcodes(std::make_pair<uint16_t,uint16_t>(40,10));
+    BamRecord r4; r4.Barcodes(std::make_pair<uint16_t,uint16_t>(10,40));
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::BarcodeReverse());
+
+    EXPECT_EQ(r3.BarcodeReverse(), records.at(0).BarcodeReverse());
+    EXPECT_EQ(r1.BarcodeReverse(), records.at(1).BarcodeReverse());
+    EXPECT_EQ(r2.BarcodeReverse(), records.at(2).BarcodeReverse());
+    EXPECT_EQ(r4.BarcodeReverse(), records.at(3).BarcodeReverse());
+}
+
+TEST(CompareTest, BarcodeQualityOk)
+{
+    uint8_t q1 = 30;
+    uint8_t q2 = 20;
+    uint8_t q3 = 40;
+    uint8_t q4 = 10;
+
+    auto records = vector<BamRecord>
+    {
+        tests::makeRecordWithTag("bq", Tag(q1)),
+        tests::makeRecordWithTag("bq", Tag(q2)),
+        tests::makeRecordWithTag("bq", Tag(q3)),
+        tests::makeRecordWithTag("bq", Tag(q4))
+    };
+    std::sort(records.begin(), records.end(), Compare::BarcodeQuality());
+
+    EXPECT_EQ(q4, records.at(0).BarcodeQuality());
+    EXPECT_EQ(q2, records.at(1).BarcodeQuality());
+    EXPECT_EQ(q1, records.at(2).BarcodeQuality());
+    EXPECT_EQ(q3, records.at(3).BarcodeQuality());
+}
+
+TEST(CompareTest, CustomCompareOk)
+{
+    struct CustomCompare : public Compare::MemberFunctionBase<bool, &BamRecord::HasDeletionTag> { };
+
+    auto records = vector<BamRecord>
+    {
+        tests::makeRecordWithTag("dt", Tag(string("foo"))),
+        tests::makeRecordWithTag("dt", Tag(string("foo"))),
+        tests::makeRecordWithTag("dt", Tag(string("foo"))),
+        tests::makeRecordWithTag("dt", Tag(string("foo")))
+    };
+    records.push_back(BamRecord());
+    records.push_back(BamRecord());
+    records.push_back(BamRecord());
+    records.push_back(BamRecord());
+    EXPECT_EQ(8, records.size());
+
+    std::sort(records.begin(), records.end(), CustomCompare());
+
+    EXPECT_FALSE(records.at(0).HasDeletionTag());
+    EXPECT_FALSE(records.at(1).HasDeletionTag());
+    EXPECT_FALSE(records.at(2).HasDeletionTag());
+    EXPECT_FALSE(records.at(3).HasDeletionTag());
+    EXPECT_TRUE(records.at(4).HasDeletionTag());
+    EXPECT_TRUE(records.at(5).HasDeletionTag());
+    EXPECT_TRUE(records.at(6).HasDeletionTag());
+    EXPECT_TRUE(records.at(7).HasDeletionTag());
+}
+
+TEST(CompareTest, FullNameOk)
+{
+    BamRecord r1; r1.Impl().Name("c");
+    BamRecord r2; r2.Impl().Name("b");
+    BamRecord r3; r3.Impl().Name("d");
+    BamRecord r4; r4.Impl().Name("a");
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::FullName());
+
+    EXPECT_EQ(r4.FullName(), records.at(0).FullName());
+    EXPECT_EQ(r2.FullName(), records.at(1).FullName());
+    EXPECT_EQ(r1.FullName(), records.at(2).FullName());
+    EXPECT_EQ(r3.FullName(), records.at(3).FullName());
+}
+
+TEST(CompareTest, LocalContextFlagOk)
+{
+    BamRecord r1; r1.LocalContextFlags(LocalContextFlags::BARCODE_AFTER);
+    BamRecord r2; r2.LocalContextFlags(LocalContextFlags::ADAPTER_AFTER);
+    BamRecord r3; r3.LocalContextFlags(LocalContextFlags::REVERSE_PASS);
+    BamRecord r4; r4.LocalContextFlags(LocalContextFlags::NO_LOCAL_CONTEXT);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::LocalContextFlag());
+
+    EXPECT_EQ(r4.LocalContextFlags(), records.at(0).LocalContextFlags());
+    EXPECT_EQ(r2.LocalContextFlags(), records.at(1).LocalContextFlags());
+    EXPECT_EQ(r1.LocalContextFlags(), records.at(2).LocalContextFlags());
+    EXPECT_EQ(r3.LocalContextFlags(), records.at(3).LocalContextFlags());
+}
+
+TEST(CompareTest, MapQualityOk)
+{
+    BamRecord r1; r1.Impl().MapQuality(30);
+    BamRecord r2; r2.Impl().MapQuality(20);
+    BamRecord r3; r3.Impl().MapQuality(40);
+    BamRecord r4; r4.Impl().MapQuality(10);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::MapQuality());
+
+    EXPECT_EQ(r4.MapQuality(), records.at(0).MapQuality());
+    EXPECT_EQ(r2.MapQuality(), records.at(1).MapQuality());
+    EXPECT_EQ(r1.MapQuality(), records.at(2).MapQuality());
+    EXPECT_EQ(r3.MapQuality(), records.at(3).MapQuality());
+}
+
+TEST(CompareTest, MovieNameOk)
+{
+    auto rg1 = ReadGroupInfo { "a", "SUBREAD" };
+    auto rg2 = ReadGroupInfo { "b", "SUBREAD" };
+    auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+    auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+    BamHeader header;
+    header.AddReadGroup(rg1)
+          .AddReadGroup(rg2)
+          .AddReadGroup(rg3)
+          .AddReadGroup(rg4);
+
+    BamRecord r1(header); r1.ReadGroup(rg3);
+    BamRecord r2(header); r2.ReadGroup(rg2);
+    BamRecord r3(header); r3.ReadGroup(rg4);
+    BamRecord r4(header); r4.ReadGroup(rg1);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::MovieName());
+
+    EXPECT_EQ(r4.MovieName(), records.at(0).MovieName());
+    EXPECT_EQ(r2.MovieName(), records.at(1).MovieName());
+    EXPECT_EQ(r1.MovieName(), records.at(2).MovieName());
+    EXPECT_EQ(r3.MovieName(), records.at(3).MovieName());
+}
+
+TEST(CompareTest, NoneOk)
+{
+    BamRecord r1; r1.Impl().Name("c");
+    BamRecord r2; r2.Impl().Name("b");
+    BamRecord r3; r3.Impl().Name("d");
+    BamRecord r4; r4.Impl().Name("a");
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::None());
+
+    EXPECT_EQ(r1.FullName(), records.at(0).FullName());
+    EXPECT_EQ(r2.FullName(), records.at(1).FullName());
+    EXPECT_EQ(r3.FullName(), records.at(2).FullName());
+    EXPECT_EQ(r4.FullName(), records.at(3).FullName());
+}
+
+TEST(CompareTest, NumDeletedBasesOk)
+{
+    // create test data
+    auto records = tests::makeMappedRecords();
+
+    // sanity checks on initial conditions
+    EXPECT_EQ(6, records.size());
+    EXPECT_EQ(0, records.at(0).NumDeletedBases());
+    EXPECT_EQ(3, records.at(1).NumDeletedBases());
+    EXPECT_EQ(3, records.at(2).NumDeletedBases());
+    EXPECT_EQ(0, records.at(3).NumDeletedBases());
+    EXPECT_EQ(3, records.at(4).NumDeletedBases());
+    EXPECT_EQ(3, records.at(5).NumDeletedBases());
+
+    // sort & check
+    std::sort(records.begin(), records.end(), Compare::NumDeletedBases());
+    EXPECT_EQ(0, records.at(0).NumDeletedBases());
+    EXPECT_EQ(0, records.at(1).NumDeletedBases());
+    EXPECT_EQ(3, records.at(2).NumDeletedBases());
+    EXPECT_EQ(3, records.at(3).NumDeletedBases());
+    EXPECT_EQ(3, records.at(4).NumDeletedBases());
+    EXPECT_EQ(3, records.at(5).NumDeletedBases());
+}
+
+TEST(CompareTest, NumInsertedBasesOk)
+{
+    // create test data
+    auto records = tests::makeMappedRecords();
+
+    // sanity checks on initial conditions
+    EXPECT_EQ(6, records.size());
+    EXPECT_EQ(0, records.at(0).NumInsertedBases());
+    EXPECT_EQ(0, records.at(1).NumInsertedBases());
+    EXPECT_EQ(2, records.at(2).NumInsertedBases());
+    EXPECT_EQ(0, records.at(3).NumInsertedBases());
+    EXPECT_EQ(0, records.at(4).NumInsertedBases());
+    EXPECT_EQ(2, records.at(5).NumInsertedBases());
+
+    // sort & check
+    std::sort(records.begin(), records.end(), Compare::NumInsertedBases());
+    EXPECT_EQ(0, records.at(0).NumInsertedBases());
+    EXPECT_EQ(0, records.at(1).NumInsertedBases());
+    EXPECT_EQ(0, records.at(2).NumInsertedBases());
+    EXPECT_EQ(0, records.at(3).NumInsertedBases());
+    EXPECT_EQ(2, records.at(4).NumInsertedBases());
+    EXPECT_EQ(2, records.at(5).NumInsertedBases());
+}
+
+TEST(CompareTest, NumMatchesOk)
+{
+    // create test data
+    auto records = tests::makeMappedRecords();
+
+    // sanity checks on initial conditions
+    EXPECT_EQ(6, records.size());
+    EXPECT_EQ(10, records.at(0).NumMatches());
+    EXPECT_EQ(10, records.at(1).NumMatches());
+    EXPECT_EQ(6,  records.at(2).NumMatches());
+    EXPECT_EQ(10, records.at(3).NumMatches());
+    EXPECT_EQ(10, records.at(4).NumMatches());
+    EXPECT_EQ(6,  records.at(5).NumMatches());
+
+    // sort & check
+    std::sort(records.begin(), records.end(), Compare::NumMatches());
+    EXPECT_EQ(6,  records.at(0).NumMatches());
+    EXPECT_EQ(6,  records.at(1).NumMatches());
+    EXPECT_EQ(10, records.at(2).NumMatches());
+    EXPECT_EQ(10, records.at(3).NumMatches());
+    EXPECT_EQ(10, records.at(4).NumMatches());
+    EXPECT_EQ(10, records.at(5).NumMatches());
+}
+
+TEST(CompareTest, NumMismatchesOk)
+{
+    // create test data
+    auto records = tests::makeMappedRecords();
+
+    // sanity checks on initial conditions
+    EXPECT_EQ(6, records.size());
+    EXPECT_EQ(0, records.at(0).NumMismatches());
+    EXPECT_EQ(0, records.at(1).NumMismatches());
+    EXPECT_EQ(2, records.at(2).NumMismatches());
+    EXPECT_EQ(0, records.at(3).NumMismatches());
+    EXPECT_EQ(0, records.at(4).NumMismatches());
+    EXPECT_EQ(2, records.at(5).NumMismatches());
+
+    // sort & check
+    std::sort(records.begin(), records.end(), Compare::NumMismatches());
+    EXPECT_EQ(0, records.at(0).NumMismatches());
+    EXPECT_EQ(0, records.at(1).NumMismatches());
+    EXPECT_EQ(0, records.at(2).NumMismatches());
+    EXPECT_EQ(0, records.at(3).NumMismatches());
+    EXPECT_EQ(2, records.at(4).NumMismatches());
+    EXPECT_EQ(2, records.at(5).NumMismatches());
+}
+
+TEST(CompareTest, QueryEndOk)
+{
+    Position q1 = 30;
+    Position q2 = 20;
+    Position q3 = 40;
+    Position q4 = 10;
+
+    auto records = vector<BamRecord>
+    {
+        tests::makeRecordWithTag("qe", Tag(q1)),
+        tests::makeRecordWithTag("qe", Tag(q2)),
+        tests::makeRecordWithTag("qe", Tag(q3)),
+        tests::makeRecordWithTag("qe", Tag(q4))
+    };
+    std::sort(records.begin(), records.end(), Compare::QueryEnd());
+
+    EXPECT_EQ(q4, records.at(0).QueryEnd());
+    EXPECT_EQ(q2, records.at(1).QueryEnd());
+    EXPECT_EQ(q1, records.at(2).QueryEnd());
+    EXPECT_EQ(q3, records.at(3).QueryEnd());
+}
+
+TEST(CompareTest, QueryStartOk)
+{
+    Position q1 = 30;
+    Position q2 = 20;
+    Position q3 = 40;
+    Position q4 = 10;
+
+    auto records = vector<BamRecord>
+    {
+        tests::makeRecordWithTag("qs", Tag(q1)),
+        tests::makeRecordWithTag("qs", Tag(q2)),
+        tests::makeRecordWithTag("qs", Tag(q3)),
+        tests::makeRecordWithTag("qs", Tag(q4))
+    };
+    std::sort(records.begin(), records.end(), Compare::QueryStart());
+
+    EXPECT_EQ(q4, records.at(0).QueryStart());
+    EXPECT_EQ(q2, records.at(1).QueryStart());
+    EXPECT_EQ(q1, records.at(2).QueryStart());
+    EXPECT_EQ(q3, records.at(3).QueryStart());
+}
+
+TEST(CompareTest, ReadGroupIdOk)
+{
+    auto rg1 = ReadGroupInfo { "foo", "SUBREAD" };
+    auto rg2 = ReadGroupInfo { "bar", "SUBREAD" };
+    auto rg3 = ReadGroupInfo { "c",   "SUBREAD" };
+    auto rg4 = ReadGroupInfo { "d",   "SUBREAD" };
+
+    BamHeader header;
+    header.AddReadGroup(rg1)
+          .AddReadGroup(rg2)
+          .AddReadGroup(rg3)
+          .AddReadGroup(rg4);
+
+    BamRecord r1(header); r1.ReadGroup(rg3); // -> 99365356
+    BamRecord r2(header); r2.ReadGroup(rg2); // -> d9f305e4
+    BamRecord r3(header); r3.ReadGroup(rg4); // -> 54397cd6
+    BamRecord r4(header); r4.ReadGroup(rg1); // -> a60ddc69
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::ReadGroupId()); // lexical, NOT numeric ordering
+
+    EXPECT_EQ(r3.ReadGroupId(), records.at(0).ReadGroupId());
+    EXPECT_EQ(r1.ReadGroupId(), records.at(1).ReadGroupId());
+    EXPECT_EQ(r4.ReadGroupId(), records.at(2).ReadGroupId());
+    EXPECT_EQ(r2.ReadGroupId(), records.at(3).ReadGroupId());
+}
+
+TEST(CompareTest, ReadGroupNumericIdOk)
+{
+    auto rg1 = ReadGroupInfo { "a", "SUBREAD" };
+    auto rg2 = ReadGroupInfo { "b", "SUBREAD" };
+    auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+    auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+    BamHeader header;
+    header.AddReadGroup(rg1)
+          .AddReadGroup(rg2)
+          .AddReadGroup(rg3)
+          .AddReadGroup(rg4);
+
+    BamRecord r1(header); r1.ReadGroup(rg3); // -> -1724492970
+    BamRecord r2(header); r2.ReadGroup(rg2); // ->   235381373
+    BamRecord r3(header); r3.ReadGroup(rg4); // ->  1413053654
+    BamRecord r4(header); r4.ReadGroup(rg1); // ->  1153643386
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::ReadGroupNumericId()); // numeric ordering
+
+    EXPECT_EQ(r1.ReadGroupNumericId(), records.at(0).ReadGroupNumericId());
+    EXPECT_EQ(r2.ReadGroupNumericId(), records.at(1).ReadGroupNumericId());
+    EXPECT_EQ(r4.ReadGroupNumericId(), records.at(2).ReadGroupNumericId());
+    EXPECT_EQ(r3.ReadGroupNumericId(), records.at(3).ReadGroupNumericId());
+}
+
+TEST(CompareTest, ReadAccuracyOk)
+{
+    Accuracy a1 = 30;
+    Accuracy a2 = 20;
+    Accuracy a3 = 40;
+    Accuracy a4 = 10;
+
+    auto records = vector<BamRecord>
+    {
+        tests::makeRecordWithTag("rq", Tag(a1)),
+        tests::makeRecordWithTag("rq", Tag(a2)),
+        tests::makeRecordWithTag("rq", Tag(a3)),
+        tests::makeRecordWithTag("rq", Tag(a4))
+    };
+    std::sort(records.begin(), records.end(), Compare::ReadAccuracy());
+
+    EXPECT_EQ(a4, records.at(0).ReadAccuracy());
+    EXPECT_EQ(a2, records.at(1).ReadAccuracy());
+    EXPECT_EQ(a1, records.at(2).ReadAccuracy());
+    EXPECT_EQ(a3, records.at(3).ReadAccuracy());
+}
+
+TEST(CompareTest, ReferenceEndOk)
+{
+    // create test data
+    auto records = tests::makeMappedRecords();
+
+    // sanity checks on initial conditions
+    EXPECT_EQ(6, records.size());
+    EXPECT_EQ(110, records.at(0).ReferenceEnd());
+    EXPECT_EQ(113, records.at(1).ReferenceEnd());
+    EXPECT_EQ(111, records.at(2).ReferenceEnd());
+    EXPECT_EQ(110, records.at(3).ReferenceEnd());
+    EXPECT_EQ(113, records.at(4).ReferenceEnd());
+    EXPECT_EQ(111, records.at(5).ReferenceEnd());
+
+    // sort & check
+    std::sort(records.begin(), records.end(), Compare::ReferenceEnd());
+    EXPECT_EQ(110, records.at(0).ReferenceEnd());
+    EXPECT_EQ(110, records.at(1).ReferenceEnd());
+    EXPECT_EQ(111, records.at(2).ReferenceEnd());
+    EXPECT_EQ(111, records.at(3).ReferenceEnd());
+    EXPECT_EQ(113, records.at(4).ReferenceEnd());
+    EXPECT_EQ(113, records.at(5).ReferenceEnd());
+}
+
+TEST(CompareTest, ReferenceIdOk)
+{
+    BamRecord r1; r1.Impl().ReferenceId(30);
+    BamRecord r2; r2.Impl().ReferenceId(20);
+    BamRecord r3; r3.Impl().ReferenceId(40);
+    BamRecord r4; r4.Impl().ReferenceId(10);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::ReferenceId());
+
+    EXPECT_EQ(r4.ReferenceId(), records.at(0).ReferenceId());
+    EXPECT_EQ(r2.ReferenceId(), records.at(1).ReferenceId());
+    EXPECT_EQ(r1.ReferenceId(), records.at(2).ReferenceId());
+    EXPECT_EQ(r3.ReferenceId(), records.at(3).ReferenceId());
+}
+
+TEST(CompareTest, ReferenceNameOk)
+{
+    auto seq1 = SequenceInfo { "seq1" };
+    auto seq2 = SequenceInfo { "seq2" };
+    auto seq3 = SequenceInfo { "seq3" };
+    auto seq4 = SequenceInfo { "seq4" };
+
+    BamHeader header;
+    header.AddSequence(seq1)  // -> 0
+          .AddSequence(seq2)  // -> 1
+          .AddSequence(seq3)  // -> 2
+          .AddSequence(seq4); // -> 3
+
+    BamRecord r1(header); r1.Impl().SetMapped(true); r1.Impl().ReferenceId(2);
+    BamRecord r2(header); r2.Impl().SetMapped(true); r2.Impl().ReferenceId(1);
+    BamRecord r3(header); r3.Impl().SetMapped(true); r3.Impl().ReferenceId(3);
+    BamRecord r4(header); r4.Impl().SetMapped(true); r4.Impl().ReferenceId(0);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::ReferenceName());
+
+    EXPECT_EQ(seq1.Name(), records.at(0).ReferenceName());
+    EXPECT_EQ(seq2.Name(), records.at(1).ReferenceName());
+    EXPECT_EQ(seq3.Name(), records.at(2).ReferenceName());
+    EXPECT_EQ(seq4.Name(), records.at(3).ReferenceName());
+}
+
+TEST(CompareTest, ReferenceStartOk)
+{
+    BamRecord r1; r1.Impl().Position(30);
+    BamRecord r2; r2.Impl().Position(20);
+    BamRecord r3; r3.Impl().Position(40);
+    BamRecord r4; r4.Impl().Position(10);
+
+    auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+    std::sort(records.begin(), records.end(), Compare::ReferenceStart());
+
+    EXPECT_EQ(r4.ReferenceStart(), records.at(0).ReferenceStart());
+    EXPECT_EQ(r2.ReferenceStart(), records.at(1).ReferenceStart());
+    EXPECT_EQ(r1.ReferenceStart(), records.at(2).ReferenceStart());
+    EXPECT_EQ(r3.ReferenceStart(), records.at(3).ReferenceStart());
+}
+
+TEST(CompareTest, ZmwOk)
+{
+    int32_t z1 = 30;
+    int32_t z2 = 20;
+    int32_t z3 = 40;
+    int32_t z4 = 10;
+
+    auto records = vector<BamRecord>
+    {
+        tests::makeRecordWithTag("zm", Tag(z1)),
+        tests::makeRecordWithTag("zm", Tag(z2)),
+        tests::makeRecordWithTag("zm", Tag(z3)),
+        tests::makeRecordWithTag("zm", Tag(z4))
+    };
+    std::sort(records.begin(), records.end(), Compare::Zmw());
+
+    EXPECT_EQ(z4, records.at(0).HoleNumber());
+    EXPECT_EQ(z2, records.at(1).HoleNumber());
+    EXPECT_EQ(z1, records.at(2).HoleNumber());
+    EXPECT_EQ(z3, records.at(3).HoleNumber());
+}
diff --git a/tests/src/test_DataSetCore.cpp b/tests/src/test_DataSetCore.cpp
index 2ad0a4e..ba92b1a 100644
--- a/tests/src/test_DataSetCore.cpp
+++ b/tests/src/test_DataSetCore.cpp
@@ -76,28 +76,50 @@ TEST(DataSetCoreTest, XmlNameParts)
     EXPECT_EQ(boost::string_ref(":node_name"), leadingColon.QualifiedName());
 }
 
-TEST(DataSetCoreTest, NamespacesOk)
-{
-
-}
-
 TEST(DataSetCoreTest, DefaultsOk)
 {
     DataSet dataset;
     EXPECT_EQ(DataSet::GENERIC, dataset.Type());
-    EXPECT_FALSE(dataset.CreatedAt().empty());    // default init
+    EXPECT_FALSE(dataset.CreatedAt().empty());
+    EXPECT_FALSE(dataset.MetaType().empty());
+    EXPECT_FALSE(dataset.TimeStampedName().empty());
+    EXPECT_FALSE(dataset.UniqueId().empty());
+
+    EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_"));
+
     EXPECT_TRUE(dataset.Format().empty());
-    EXPECT_TRUE(dataset.MetaType().empty());
     EXPECT_TRUE(dataset.ModifiedAt().empty());
     EXPECT_TRUE(dataset.Name().empty());
     EXPECT_TRUE(dataset.ResourceId().empty());
     EXPECT_TRUE(dataset.Tags().empty());
-    EXPECT_TRUE(dataset.TimeStampedName().empty());
-    EXPECT_TRUE(dataset.UniqueId().empty());
-    EXPECT_TRUE(dataset.Version().empty());
     EXPECT_EQ(0, dataset.ExternalResources().Size());
     EXPECT_EQ(0, dataset.Filters().Size());
     EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+    EXPECT_EQ(string{"3.0.1"}, dataset.Version());
+}
+
+TEST(DataSetCoreTest, TimeStampedNamesOk)
+{
+    DataSet dataset;
+    AlignmentSet alignmentSet;
+    BarcodeSet barcodeSet;
+    ContigSet contigSet;
+    ConsensusAlignmentSet consensusAlignmentSet;
+    ConsensusReadSet consensusReadSet;
+    HdfSubreadSet hdfSubreadSet;
+    ReferenceSet referenceSet;
+    SubreadSet subreadSet;
+
+    EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_dataset-"));
+    EXPECT_EQ(0, alignmentSet.TimeStampedName().find("pacbio_dataset_alignmentset-"));
+    EXPECT_EQ(0, barcodeSet.TimeStampedName().find("pacbio_dataset_barcodeset-"));
+    EXPECT_EQ(0, contigSet.TimeStampedName().find("pacbio_dataset_contigset-"));
+    EXPECT_EQ(0, consensusAlignmentSet.TimeStampedName().find("pacbio_dataset_consensusalignmentset-"));
+    EXPECT_EQ(0, consensusReadSet.TimeStampedName().find("pacbio_dataset_consensusreadset-"));
+    EXPECT_EQ(0, hdfSubreadSet.TimeStampedName().find("pacbio_dataset_hdfsubreadset-"));
+    EXPECT_EQ(0, referenceSet.TimeStampedName().find("pacbio_dataset_referenceset-"));
+    EXPECT_EQ(0, subreadSet.TimeStampedName().find("pacbio_dataset_subreadset-"));
 }
 
 TEST(DataSetCoreTest, BasicGettersSettersOk)
@@ -161,17 +183,21 @@ TEST(DataSetCoreTest, AddExternalResources)
     DataSet dataset;
     EXPECT_EQ(0, dataset.ExternalResources().Size());
 
-    ExternalResource resource1;
+    ExternalResource resource1("metatype", "id");
     resource1.Name("file1");
 
-    ExternalResource resource2;
+    ExternalResource resource2("metatype", "id2");
     resource2.Name("file2");
-    resource2.MetaType("blah");
 
     dataset.ExternalResources().Add(resource1);
     dataset.ExternalResources().Add(resource2);
     EXPECT_EQ(2, dataset.ExternalResources().Size());
 
+     // disallow duplicates (checking on ResourceId)
+    ExternalResource duplicateResource("metatype", "id");
+    dataset.ExternalResources().Add(duplicateResource);
+    EXPECT_EQ(2, dataset.ExternalResources().Size());
+
     // direct access
     const ExternalResources& resources = dataset.ExternalResources();
     EXPECT_EQ(string("file1"), resources[0].Name());
@@ -192,11 +218,11 @@ TEST(DataSetCoreTest, EditExternalResources)
 {
     DataSet dataset;
 
-    ExternalResource resource;
+    ExternalResource resource("metatype", "id");
     resource.Name("file1");
     dataset.ExternalResources().Add(resource);
 
-    resource.Name("file2");
+    resource.Name("file2").ResourceId("id2");
     dataset.ExternalResources().Add(resource);
     EXPECT_EQ(2, dataset.ExternalResources().Size());
 
@@ -206,6 +232,20 @@ TEST(DataSetCoreTest, EditExternalResources)
     EXPECT_EQ(string("file2"),         dataset.ExternalResources()[1].Name());
 }
 
+TEST(DataSetCoreTest, NestedExternalResources)
+{
+    ExternalResource resource("metatype", "filename");
+    resource.ExternalResources().Add(ExternalResource("metatype.child",  "filename.child"));
+    resource.ExternalResources().Add(ExternalResource("metatype.child2", "filename.child2"));
+
+    const ExternalResources& childResources = resource.ExternalResources();
+    EXPECT_EQ(2, childResources.Size());
+    EXPECT_EQ(string("metatype.child"),  childResources[0].MetaType());
+    EXPECT_EQ(string("metatype.child2"), childResources[1].MetaType());
+    EXPECT_EQ(string("filename.child"),  childResources[0].ResourceId());
+    EXPECT_EQ(string("filename.child2"), childResources[1].ResourceId());
+}
+
 TEST(DataSetCoreTest, AddFilters)
 {
     DataSet dataset;
@@ -407,10 +447,10 @@ TEST(DataSetCoreTest, RemoveExternalResources)
     DataSet dataset;
     EXPECT_EQ(0, dataset.ExternalResources().Size());
 
-    ExternalResource resource1;
+    ExternalResource resource1("metatype", "id");
     resource1.Name("file1");
 
-    ExternalResource resource2;
+    ExternalResource resource2("metatype", "id2");
     resource2.Name("file2");
 
     dataset.ExternalResources().Add(resource1);
diff --git a/tests/src/test_DataSetIO.cpp b/tests/src/test_DataSetIO.cpp
index b075788..8756f87 100644
--- a/tests/src/test_DataSetIO.cpp
+++ b/tests/src/test_DataSetIO.cpp
@@ -40,12 +40,16 @@
 #endif
 
 #include "TestData.h"
+#include "../src/FileUtils.h"
 #include <gtest/gtest.h>
 #include <pbbam/DataSet.h>
 #include <pbbam/internal/DataSetElement.h>
-#include <stdexcept>
+#include <fstream>
 #include <sstream>
+#include <stdexcept>
 #include <string>
+#include <vector>
+#include <unistd.h>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -87,6 +91,10 @@ static void TestSubread2Xml(void);
 static void TestSubread3Xml(void);
 static void TestTransformedXml(void);
 
+static inline
+void changeCurrentDirectory(const std::string& dir)
+{ ASSERT_EQ(0, chdir(dir.c_str())); }
+
 TEST(DataSetIOTest, FromBamFilename)
 {
     DataSet dataset(ex2BamFn);
@@ -97,6 +105,16 @@ TEST(DataSetIOTest, FromBamFilename)
     EXPECT_EQ(ex2BamFn, bamRef.ResourceId());
 }
 
+TEST(DataSetIOTest, FromBamFilenames)
+{
+    std::ifstream fofn(bamGroupFofn);
+    std::vector<std::string> files;
+    std::string file;
+    while (std::getline(fofn, file)) if (!file.empty()) files.emplace_back(file);
+    DataSet dataset(files);
+    EXPECT_EQ(3, dataset.ExternalResources().Size());
+}
+
 TEST(DataSetIOTest, FromBamFileObject)
 {
     BamFile bamFile(ex2BamFn);
@@ -164,43 +182,43 @@ TEST(DataSetIOTest, ToXml)
     dataset.MetaType("PacBio.DataSet.AlignmentSet");
     dataset.Name("DataSet_AlignmentSet");
     dataset.Tags("barcode moreTags mapping mytags");
+    dataset.TimeStampedName("my_tsn");
     dataset.UniqueId("b095d0a3-94b8-4918-b3af-a3f81bbe519c");
-    dataset.Version("2.3.0");
-    dataset.Attribute("xmlns","http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd")
-           .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
-           .Attribute("xsi:schemaLocation",
-                      "http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd PacBioSecondaryDataModel.xsd");
+    dataset.Attribute("xmlns",              "http://pacificbiosciences.com/PacBioDatasets.xsd")
+           .Attribute("xmlns:xsi",          "http://www.w3.org/2001/XMLSchema-instance")
+           .Attribute("xsi:schemaLocation", "http://pacificbiosciences.com/PacBioDatasets.xsd");
 
     // external resources
-    ExternalResource resource1;
+    ExternalResource resource1("AlignmentFile.AlignmentBamFile", "file:/mnt/path/to/alignments2.bam");    
     resource1.Name("Third Alignments BAM");
     resource1.Description("Points to an example Alignments BAM file.");
-    resource1.MetaType("AlignmentFile.AlignmentBamFile");
-    resource1.ResourceId("file:/mnt/path/to/alignments2.bam");
     resource1.Tags("Example");
-    FileIndex pbi1;
-    pbi1.MetaType("PacBio.Index.PacBioIndex");
-    pbi1.ResourceId("file:/mnt/path/to/alignments2.pbi");
+    resource1.TimeStampedName("my_tsn");
+    resource1.UniqueId("my_uuid");
+    FileIndex pbi1("PacBio.Index.PacBioIndex", "file:/mnt/path/to/alignments2.pbi");
+    pbi1.TimeStampedName("my_tsn");
+    pbi1.UniqueId("my_uuid");
     resource1.FileIndices().Add(pbi1);
     dataset.ExternalResources().Add(resource1);
 
-    ExternalResource resource2;
+    ExternalResource resource2("AlignmentFile.AlignmentBamFile", "file:./alignments3.bam");
     resource2.Name("Fourth Alignments BAM");
     resource2.Description("Points to another example Alignments BAM file, by relative path.");
-    resource2.MetaType("AlignmentFile.AlignmentBamFile");
-    resource2.ResourceId("file:./alignments3.bam");
     resource2.Tags("Example");
-    FileIndex pbi2;
-    pbi2.MetaType("PacBio.Index.PacBioIndex");
-    pbi2.ResourceId("file:/mnt/path/to/alignments3.pbi");
+    resource2.TimeStampedName("my_tsn");
+    resource2.UniqueId("my_uuid");
+    FileIndex pbi2("PacBio.Index.PacBioIndex", "file:/mnt/path/to/alignments3.pbi");
+    pbi2.TimeStampedName("my_tsn");
+    pbi2.UniqueId("my_uuid");
+
     resource2.FileIndices().Add(pbi2);
     dataset.ExternalResources().Add(resource2);
 
     // sub-datasets with filters
     DataSetBase subDataSet1;
     subDataSet1.Name("HighQuality Read Alignments");
+    subDataSet1.TimeStampedName("my_tsn");
     subDataSet1.UniqueId("ab95d0a3-94b8-4918-b3af-a3f81bbe519c");
-    subDataSet1.Version("2.3.0");
     Filter filter1;
     filter1.Properties().Add(Property("rq", "0.85", ">"));
     subDataSet1.Filters().Add(filter1);
@@ -208,8 +226,8 @@ TEST(DataSetIOTest, ToXml)
 
     DataSetBase subDataSet2;
     subDataSet2.Name("Alignments to chromosome 1");
+    subDataSet2.TimeStampedName("my_tsn");
     subDataSet2.UniqueId("ac95d0a3-94b8-4918-b3af-a3f81bbe519c");
-    subDataSet2.Version("2.3.0");
     Filter filter2;
     filter2.Properties().Add(Property("RNAME", "chr1", "=="));
     subDataSet2.Filters().Add(filter2);
@@ -218,31 +236,59 @@ TEST(DataSetIOTest, ToXml)
     // write dataset
     const string expectedXml =
         "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
-        "<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
-                "Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
-                "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\" "
-                "xmlns=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd\" "
+        "<pbds:AlignmentSet "
+                "CreatedAt=\"2015-01-27T09:00:01\" "
+                "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+                "Name=\"DataSet_AlignmentSet\" "
+                "Tags=\"barcode moreTags mapping mytags\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"3.0.1\" "
+                "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
                 "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
-                "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd "
-                "PacBioSecondaryDataModel.xsd\">\n"
+                "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+                "xmlns:pbbase=\"http://pacificbiosciences.com/PacBioBaseDataModel.xsd\" "
+                "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
         "\t<pbbase:ExternalResources>\n"
-        "\t\t<pbbase:ExternalResource Description=\"Points to an example Alignments BAM file.\" "
-                "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Third Alignments BAM\" "
-                "ResourceId=\"file:/mnt/path/to/alignments2.bam\" Tags=\"Example\">\n"
+        "\t\t<pbbase:ExternalResource "
+                "Description=\"Points to an example Alignments BAM file.\" "
+                "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+                "Name=\"Third Alignments BAM\" "
+                "ResourceId=\"file:/mnt/path/to/alignments2.bam\" "
+                "Tags=\"Example\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"my_uuid\" Version=\"3.0.1\">\n"
         "\t\t\t<pbbase:FileIndices>\n"
-        "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
+        "\t\t\t\t<pbbase:FileIndex "
+                "MetaType=\"PacBio.Index.PacBioIndex\" "
+                "ResourceId=\"file:/mnt/path/to/alignments2.pbi\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
         "\t\t\t</pbbase:FileIndices>\n"
         "\t\t</pbbase:ExternalResource>\n"
-        "\t\t<pbbase:ExternalResource Description=\"Points to another example Alignments BAM file, by relative path.\" "
-                "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Fourth Alignments BAM\" "
-                "ResourceId=\"file:./alignments3.bam\" Tags=\"Example\">\n"
+        "\t\t<pbbase:ExternalResource "
+                "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+                "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+                "Name=\"Fourth Alignments BAM\" "
+                "ResourceId=\"file:./alignments3.bam\" "
+                "Tags=\"Example\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"my_uuid\" Version=\"3.0.1\">\n"
         "\t\t\t<pbbase:FileIndices>\n"
-        "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
+        "\t\t\t\t<pbbase:FileIndex "
+                "MetaType=\"PacBio.Index.PacBioIndex\" "
+                "ResourceId=\"file:/mnt/path/to/alignments3.pbi\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
         "\t\t\t</pbbase:FileIndices>\n"
         "\t\t</pbbase:ExternalResource>\n"
         "\t</pbbase:ExternalResources>\n"
         "\t<pbds:DataSets>\n"
-        "\t\t<pbds:DataSet Name=\"HighQuality Read Alignments\" UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+        "\t\t<pbds:DataSet "
+                "MetaType=\"PacBio.DataSet.DataSet\" "
+                "Name=\"HighQuality Read Alignments\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+                "Version=\"3.0.1\">\n"
         "\t\t\t<pbds:Filters>\n"
         "\t\t\t\t<pbds:Filter>\n"
         "\t\t\t\t\t<pbbase:Properties>\n"
@@ -251,7 +297,12 @@ TEST(DataSetIOTest, ToXml)
         "\t\t\t\t</pbds:Filter>\n"
         "\t\t\t</pbds:Filters>\n"
         "\t\t</pbds:DataSet>\n"
-        "\t\t<pbds:DataSet Name=\"Alignments to chromosome 1\" UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+        "\t\t<pbds:DataSet "
+                "MetaType=\"PacBio.DataSet.DataSet\" "
+                "Name=\"Alignments to chromosome 1\" "
+                "TimeStampedName=\"my_tsn\" "
+                "UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+                "Version=\"3.0.1\">\n"
         "\t\t\t<pbds:Filters>\n"
         "\t\t\t\t<pbds:Filter>\n"
         "\t\t\t\t\t<pbbase:Properties>\n"
@@ -272,30 +323,47 @@ static void TestFromXmlString(void)
 {
     const string inputXml =
         "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
-        "<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
-                "Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
-                "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\" "
-                "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
-                "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
-                "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
+        "<pbds:AlignmentSet "
+            "CreatedAt=\"2015-01-27T09:00:01\" "
+            "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+            "Name=\"DataSet_AlignmentSet\" "
+            "Tags=\"barcode moreTags mapping mytags\" "
+            "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+            "Version=\"2.3.0\" "
+            "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
+            "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+            "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
         "\t<pbbase:ExternalResources>\n"
-        "\t\t<pbbase:ExternalResource Description=\"Points to an example Alignments BAM file.\" "
-                "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Third Alignments BAM\" "
-                "ResourceId=\"file:/mnt/path/to/alignments2.bam\" Tags=\"Example\">\n"
+        "\t\t<pbbase:ExternalResource "
+                "Description=\"Points to an example Alignments BAM file.\" "
+                "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+                "Name=\"Third Alignments BAM\" "
+                "ResourceId=\"file:/mnt/path/to/alignments2.bam\" "
+                "Tags=\"Example\">\n"
         "\t\t\t<pbbase:FileIndices>\n"
-        "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
+        "\t\t\t\t<pbbase:FileIndex "
+                    "MetaType=\"PacBio.Index.PacBioIndex\" "
+                    "ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
         "\t\t\t</pbbase:FileIndices>\n"
         "\t\t</pbbase:ExternalResource>\n"
-        "\t\t<pbbase:ExternalResource Description=\"Points to another example Alignments BAM file, by relative path.\" "
-                "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Fourth Alignments BAM\" "
-                "ResourceId=\"file:./alignments3.bam\" Tags=\"Example\">\n"
+        "\t\t<pbbase:ExternalResource "
+                "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+                "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+                "Name=\"Fourth Alignments BAM\" "
+                "ResourceId=\"file:./alignments3.bam\" "
+                "Tags=\"Example\">\n"
         "\t\t\t<pbbase:FileIndices>\n"
-        "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
+        "\t\t\t\t<pbbase:FileIndex "
+                    "MetaType=\"PacBio.Index.PacBioIndex\" "
+                    "ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
         "\t\t\t</pbbase:FileIndices>\n"
         "\t\t</pbbase:ExternalResource>\n"
         "\t</pbbase:ExternalResources>\n"
         "\t<pbds:DataSets>\n"
-        "\t\t<pbds:DataSet Name=\"HighQuality Read Alignments\" UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+        "\t\t<pbds:DataSet "
+                "Name=\"HighQuality Read Alignments\" "
+                "UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+                "Version=\"2.3.0\">\n"
         "\t\t\t<pbds:Filters>\n"
         "\t\t\t\t<pbds:Filter>\n"
         "\t\t\t\t\t<pbbase:Properties>\n"
@@ -304,7 +372,10 @@ static void TestFromXmlString(void)
         "\t\t\t\t</pbds:Filter>\n"
         "\t\t\t</pbds:Filters>\n"
         "\t\t</pbds:DataSet>\n"
-        "\t\t<pbds:DataSet Name=\"Alignments to chromosome 1\" UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+        "\t\t<pbds:DataSet "
+                "Name=\"Alignments to chromosome 1\" "
+                "UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+                "Version=\"2.3.0\">\n"
         "\t\t\t<pbds:Filters>\n"
         "\t\t\t\t<pbds:Filter>\n"
         "\t\t\t\t\t<pbbase:Properties>\n"
@@ -395,9 +466,9 @@ static void TestAli1Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"),      dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"),                                dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
 
@@ -484,9 +555,9 @@ static void TestAli2Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"),      dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"),                                dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
 
@@ -573,9 +644,9 @@ static void TestAli3Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"),      dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"),                                dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
 
@@ -662,9 +733,9 @@ static void TestAli4Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"),      dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"),                                dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
 
@@ -751,9 +822,9 @@ static void TestMappingStaggeredXml(void)
     EXPECT_EQ(string(""), dataset.Tags());
     EXPECT_EQ(string("30f72098-bc5b-e06b-566c-8b28dda909a8"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
 
@@ -838,9 +909,9 @@ static void TestBarcodeXml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
     EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -872,9 +943,9 @@ static void TestCcsReadXml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
     EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -927,9 +998,9 @@ static void TestLambdaContigsXml(void)
     EXPECT_EQ(string(""), dataset.Tags());
     EXPECT_EQ(string("596e87db-34f9-d2fd-c905-b017543170e1"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
     EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -950,9 +1021,9 @@ static void TestPbalchemyXml(void)
     EXPECT_EQ(string(""), dataset.Tags());
     EXPECT_EQ(string("58e3f7c5-24c1-b58b-fbd5-37de268cc2f0"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.SubDataSets().Size());
 
@@ -979,9 +1050,9 @@ static void TestReferenceXml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     EXPECT_EQ(0, dataset.Filters().Size());
     EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -1036,9 +1107,9 @@ static void TestSubread1Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     const ExternalResources& resources = dataset.ExternalResources();
     ASSERT_EQ(2, resources.Size());
@@ -1106,9 +1177,9 @@ static void TestSubread2Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     const ExternalResources& resources = dataset.ExternalResources();
     ASSERT_EQ(2, resources.Size());
@@ -1176,9 +1247,9 @@ static void TestSubread3Xml(void)
     EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
     EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
     EXPECT_EQ(string("2.3.0"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
-    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),         dataset.Attribute("xmlns:xsi"));
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"),        dataset.Attribute("xmlns:xsi"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
 
     const ExternalResources& resources = dataset.ExternalResources();
     ASSERT_EQ(2, resources.Size());
@@ -1245,7 +1316,7 @@ static void TestTransformedXml(void)
     EXPECT_EQ(string("pacbio.secondary.instrument=RS"), dataset.Tags());
     EXPECT_EQ(string("abbc9183-b01e-4671-8c12-19efee534647"), dataset.UniqueId());
     EXPECT_EQ(string("0.5"), dataset.Version());
-    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
+    EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
     EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema"),         dataset.Attribute("xmlns:xs"));
     EXPECT_EQ(string("http://www.w3.org/2005/xpath-functions"), dataset.Attribute("xmlns:fn"));
     EXPECT_EQ(string("java:java.util.UUID"), dataset.Attribute("xmlns:uuid"));
@@ -1279,3 +1350,184 @@ static void TestTransformedXml(void)
     EXPECT_EQ(string("150000"),   metadata.NumRecords());
     EXPECT_EQ(string("50000000"), metadata.TotalLength());
 }
+
+TEST(DataSetIOTest, InspectMalformedXml)
+{
+    const string xmlFn = tests::Data_Dir + "/dataset/malformed.xml";
+
+    DataSet ds(xmlFn);
+    stringstream s;
+    ds.SaveToStream(s);
+
+    const string expected =
+        "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+        "<SubreadSet Description=\"Merged dataset from 1 files using DatasetMerger 0.1.2\" "
+                    "MetaType=\"PacBio.DataSet.HdfSubreadSet\" Name=\"Subreads from runr000013_42267_150403\" "
+                    "Tags=\"pacbio.secondary.instrument=RS\" TimeStampedName=\"hdfsubreadset_2015-08-19T15:39:36.331-07:00\" "
+                    "UniqueId=\"b4741521-2a4c-42df-8a13-0a755ca9ed1e\" Version=\"0.5\" "
+                    "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+                    "xmlns:ns0=\"http://pacificbiosciences.com/PacBioBaseDataModel.xsd\" "
+                    "xmlns:ns1=\"http://pacificbiosciences.com/PacBioSampleInfo.xsd\" "
+                    "xmlns:ns2=\"http://pacificbiosciences.com/PacBioCollectionMetadata.xsd\" "
+                    "xmlns:ns3=\"http://pacificbiosciences.com/PacBioReagentKit.xsd\" "
+                    "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+                    "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
+        "\t<ns0:ExternalResources>\n"
+        "\t\t<ns0:ExternalResource MetaType=\"SubreadFile.SubreadBamFile\" "
+                                  "ResourceId=\"file:///mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0//mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_sub [...]
+                                  "TimeStampedName=\"SubreadFile.SubreadBamFile_00000000000000\" "
+                                  "UniqueId=\"251acf71-9eb0-489e-9dd1-cdbd11432753\" />\n"
+        "\t</ns0:ExternalResources>\n"
+        "\t<DataSetMetadata>\n"
+        "\t\t<TotalLength>50000000</TotalLength>\n"
+        "\t\t<NumRecords>150000</NumRecords>\n"
+        "\t\t<ns2:Collections>\n"
+        "\t\t\t<ns2:CollectionMetadata Context=\"m150404_101626_42267_c100807920800000001823174110291514_s1_p0\" "
+                                      "InstrumentId=\"1\" InstrumentName=\"42267\" MetaType=\"PacBio.Collection\" "
+                                      "TimeStampedName=\"m150404_101626_42267_c100807920800000001823174110291514_s1_p0\" "
+                                      "UniqueId=\"d66c8372-2b70-4dcf-b64f-9f8b5cc351fd\">\n"
+        "\t\t\t\t<ns2:InstCtrlVer>2.3.0.1.142990</ns2:InstCtrlVer>\n"
+        "\t\t\t\t<ns2:SigProcVer>NRT at 172.31.128.10:8082, SwVer=2301.142990, HwVer=1.0</ns2:SigProcVer>\n"
+        "\t\t\t\t<ns2:RunDetails>\n"
+        "\t\t\t\t\t<ns2:RunId>r000013_42267_150403</ns2:RunId>\n"
+        "\t\t\t\t\t<ns2:Name>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:Name>\n"
+        "\t\t\t\t</ns2:RunDetails>\n"
+        "\t\t\t\t<ns2:WellSample Name=\"Inst42267-040315-SAT-100pM-2kb-P6C4\">\n"
+        "\t\t\t\t\t<ns2:PlateId>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:PlateId>\n"
+        "\t\t\t\t\t<ns2:WellName>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:WellName>\n"
+        "\t\t\t\t\t<ns2:Concentration>0.0</ns2:Concentration>\n"
+        "\t\t\t\t\t<ns2:SampleReuseEnabled>false</ns2:SampleReuseEnabled>\n"
+        "\t\t\t\t\t<ns2:StageHotstartEnabled>false</ns2:StageHotstartEnabled>\n"
+        "\t\t\t\t\t<ns2:SizeSelectionEnabled>false</ns2:SizeSelectionEnabled>\n"
+        "\t\t\t\t\t<ns2:UseCount>1</ns2:UseCount>\n"
+        "\t\t\t\t\t<ns1:BioSamplePointers>\n"
+        "\t\t\t\t\t\t<ns1:BioSamplePointer>251acf71-9eb0-489e-9dd1-cdbd11432752</ns1:BioSamplePointer>\n"
+        "\t\t\t\t\t</ns1:BioSamplePointers>\n"
+        "\t\t\t\t</ns2:WellSample>\n"
+        "\t\t\t\t<ns2:Automation>\n"
+        "\t\t\t\t\t<ns0:AutomationParameters>\n"
+        "\t\t\t\t\t\t<ns0:AutomationParameter />\n"
+        "\t\t\t\t\t</ns0:AutomationParameters>\n"
+        "\t\t\t\t</ns2:Automation>\n"
+        "\t\t\t\t<ns2:CollectionNumber>7</ns2:CollectionNumber>\n"
+        "\t\t\t\t<ns2:CellIndex>4</ns2:CellIndex>\n"
+        "\t\t\t\t<ns2:CellPac Barcode=\"10080792080000000182317411029151\" />\n"
+        "\t\t\t\t<ns2:Primary>\n"
+        "\t\t\t\t\t<ns2:AutomationName>BasecallerV1</ns2:AutomationName>\n"
+        "\t\t\t\t\t<ns2:ConfigFileName>2-3-0_P6-C4.xml</ns2:ConfigFileName>\n"
+        "\t\t\t\t\t<ns2:SequencingCondition />\n"
+        "\t\t\t\t\t<ns2:OutputOptions>\n"
+        "\t\t\t\t\t\t<ns2:ResultsFolder>Analysis_Results</ns2:ResultsFolder>\n"
+        "\t\t\t\t\t\t<ns2:CollectionPathUri>rsy://mp-rsync/vol55//RS_DATA_STAGING/42267/Inst42267-040315-SAT-100pM-2kb-P6C4_13/A04_7/</ns2:CollectionPathUri>\n"
+        "\t\t\t\t\t\t<ns2:CopyFiles>\n"
+        "\t\t\t\t\t\t\t<ns2:CollectionFileCopy>Fasta</ns2:CollectionFileCopy>\n"
+        "\t\t\t\t\t\t</ns2:CopyFiles>\n"
+        "\t\t\t\t\t\t<ns2:Readout>Bases</ns2:Readout>\n"
+        "\t\t\t\t\t\t<ns2:MetricsVerbosity>Minimal</ns2:MetricsVerbosity>\n"
+        "\t\t\t\t\t</ns2:OutputOptions>\n"
+        "\t\t\t\t</ns2:Primary>\n"
+        "\t\t\t</ns2:CollectionMetadata>\n"
+        "\t\t</ns2:Collections>\n"
+        "\t\t<ns1:BioSamples>\n"
+        "\t\t\t<ns1:BioSample Description=\"Inst42267-SAT-100pM-2kbLambda-P6C4-Std120_CPS_040315\" "
+                            "MetaType=\"PacBio.Sample\" Name=\"Inst42267-040315-SAT-100pM-2kb-P6C4\" "
+                            "TimeStampedName=\"biosample_2015-08-19T15:39:36.331-07:00\" UniqueId=\"251acf71-9eb0-489e-9dd1-cdbd11432752\" />\n"
+        "\t\t</ns1:BioSamples>\n"
+        "\t</DataSetMetadata>\n"
+        "</SubreadSet>\n";
+
+    EXPECT_EQ(expected, s.str());
+}
+
+TEST(DataSetIOTest, RelativePathCarriedThroughOk_FromString)
+{
+    const string inputXml =
+        "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+        "<pbds:AlignmentSet "
+            "CreatedAt=\"2015-01-27T09:00:01\" "
+            "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+            "Name=\"DataSet_AlignmentSet\" "
+            "Tags=\"barcode moreTags mapping mytags\" "
+            "TimeStampedName=\"biosample_2015-08-19T15:39:36.331-07:00\" "
+            "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+            "Version=\"2.3.0\" "
+            "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
+            "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+            "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+            "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
+        "\t<pbbase:ExternalResources>\n"
+        "\t\t<pbbase:ExternalResource "
+                "Description=\"Points to an example Alignments BAM file.\" "
+                "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+                "Name=\"Third Alignments BAM\" "
+                "ResourceId=\"../path/to/resource1.bam\" "
+                "Tags=\"Example\">\n"
+        "\t\t\t<pbbase:FileIndices>\n"
+        "\t\t\t\t<pbbase:FileIndex "
+                    "MetaType=\"PacBio.Index.PacBioIndex\" "
+                    "ResourceId=\"../path/to/resource1.bam.pbi\" />\n"
+        "\t\t\t</pbbase:FileIndices>\n"
+        "\t\t</pbbase:ExternalResource>\n"
+        "\t\t<pbbase:ExternalResource "
+                "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+                "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+                "Name=\"Fourth Alignments BAM\" "
+                "ResourceId=\"../path/to/resource2.bam\" "
+                "Tags=\"Example\">\n"
+        "\t\t\t<pbbase:FileIndices>\n"
+        "\t\t\t\t<pbbase:FileIndex "
+                    "MetaType=\"PacBio.Index.PacBioIndex\" "
+                    "ResourceId=\"../path/to/resource2.bam.pbi\" />\n"
+        "\t\t\t</pbbase:FileIndices>\n"
+        "\t\t</pbbase:ExternalResource>\n"
+        "\t</pbbase:ExternalResources>\n"
+        "</pbds:AlignmentSet>\n";
+
+    auto dataset = DataSet::FromXml(inputXml);
+
+    stringstream stream;
+    dataset.SaveToStream(stream);
+    auto outputXml = stream.str();
+
+    EXPECT_EQ(inputXml, outputXml);
+}
+
+TEST(DataSetIOTest, RelativePathCarriedThroughOk_FromFile)
+{
+    DataSet dataset(tests::Data_Dir + "/relative/relative.xml");
+    auto resources = dataset.ExternalResources();
+    EXPECT_EQ("./a/test.bam",  resources[0].ResourceId());
+    EXPECT_EQ("./b/test1.bam", resources[1].ResourceId());
+    EXPECT_EQ("./b/test2.bam", resources[2].ResourceId());
+
+    stringstream out;
+    dataset.SaveToStream(out);
+
+    auto newDataset = DataSet::FromXml(out.str());
+    auto newResources = newDataset.ExternalResources();
+    EXPECT_EQ("./a/test.bam",  newResources[0].ResourceId());
+    EXPECT_EQ("./b/test1.bam", newResources[1].ResourceId());
+    EXPECT_EQ("./b/test2.bam", newResources[2].ResourceId());
+}
+
+TEST(DataSetIOTest, DataSetFromRelativeBamFilename)
+{
+    // cache initial directory and move to location so we can test relatvie filename ok
+    const string startingDirectory = internal::FileUtils::CurrentWorkingDirectory();
+
+    const string targetDirectory = tests::Data_Dir + "/dataset";
+    changeCurrentDirectory(targetDirectory);
+    ASSERT_EQ(targetDirectory, internal::FileUtils::CurrentWorkingDirectory());
+
+    EXPECT_NO_THROW(
+    {
+        const string relativeBamFn = "../phi29.bam";
+        const DataSet ds(relativeBamFn);
+        const auto& files = ds.BamFiles();
+        EXPECT_EQ(1, files.size());
+    });
+
+    // restore working directory
+    changeCurrentDirectory(startingDirectory);
+}
+
diff --git a/tests/src/test_DataSetQuery.cpp b/tests/src/test_DataSetQuery.cpp
index da9b41e..2dc6b2b 100644
--- a/tests/src/test_DataSetQuery.cpp
+++ b/tests/src/test_DataSetQuery.cpp
@@ -49,6 +49,7 @@ using namespace PacBio::BAM;
 using namespace std;
 
 const string ex2BamFn     = tests::Data_Dir + "/ex2.bam";
+const string ex2CopyBamFn = tests::Data_Dir + "/ex2_copy.bam";
 const string bamMappingFn = tests::Data_Dir + "/dataset/bam_mapping.bam";
 const string bamMappingFn_1 = tests::Data_Dir + "/dataset/bam_mapping_1.bam";
 const string bamMappingFn_2 = tests::Data_Dir + "/dataset/bam_mapping_2.bam";
@@ -132,7 +133,7 @@ TEST(DataSetQueryTest, EntireFileQueryTest)
         EXPECT_EQ(3307, count);
     });
 
-    // simple multi-file (actually just same file twice)
+    // duplicate file attempt
     EXPECT_NO_THROW(
     {
         BamFile bamFile(ex2BamFn);
@@ -147,7 +148,7 @@ TEST(DataSetQueryTest, EntireFileQueryTest)
             (void)record;
             ++count;
         }
-        EXPECT_EQ(3307*2, count);
+        EXPECT_EQ(3307, count); // same as single
     });
 
     // true multi-file dataset
@@ -263,7 +264,7 @@ TEST(DataSetQueryTest, GenomicIntervalQueryTest)
         EXPECT_EQ(83, count);
     });
 
-    // multi-file (same twice)
+    // duplicate file
     EXPECT_NO_THROW(
     {
         BamFile bamFile(ex2BamFn);
@@ -288,6 +289,81 @@ TEST(DataSetQueryTest, GenomicIntervalQueryTest)
             prevPos = record.ReferenceStart();
             ++count;
         }
+        EXPECT_EQ(39, count); // same as single file
+
+        // adjust interval and pass back in
+        count = 0;
+        interval.Start(500);
+        interval.Stop(600);
+        query.Interval(interval);
+        for (const BamRecord& record : query) {
+            (void)record;
+            ++count;
+        }
+        EXPECT_EQ(166, count); // same as single file
+
+        // adjust again
+        count = 0;
+        interval.Name("seq2");
+        interval.Start(0);
+        interval.Stop(100);
+        query.Interval(interval);
+        for (const BamRecord& record : query) {
+            (void)record;
+            ++count;
+        }
+        EXPECT_EQ(83, count); // same as single file
+
+        // unknown ref
+        count = 0;
+        interval.Name("does not exist");
+        interval.Start(0);
+        interval.Stop(100);
+        EXPECT_THROW(
+            query.Interval(interval);
+        , std::exception);
+        for (const BamRecord& record : query) {    // iteration is still safe, just returns no data
+            (void)record;
+            ++count;
+        }
+        EXPECT_EQ(0, count); // same as single file
+
+        // adjust again - make sure we can read a real region after an invalid one
+        interval.Name("seq2");
+        interval.Start(0);
+        interval.Stop(100);
+        query.Interval(interval);
+        count = 0;
+        for (const BamRecord& record : query) {
+            (void)record;
+            ++count;
+        }
+        EXPECT_EQ(83, count); // same as single file
+    });
+
+    // multi file BAM (same record content for easy testing, but different filename(ResourceId)
+    EXPECT_NO_THROW(
+    {
+        DataSet dataset;
+        dataset.ExternalResources().Add(BamFile(ex2BamFn));
+        dataset.ExternalResources().Add(BamFile(ex2CopyBamFn));
+
+        // count records & also ensure sorted merge
+        int count = 0;
+        int prevId = 0;
+        int prevPos = 0;
+
+        GenomicInterval interval("seq1", 0, 100);
+        GenomicIntervalQuery query(interval, dataset);
+        for (const BamRecord& record : query) {
+
+            EXPECT_TRUE(record.ReferenceId()   >= prevId);
+            EXPECT_TRUE(record.ReferenceStart() >= prevPos);
+
+            prevId = record.ReferenceId();
+            prevPos = record.ReferenceStart();
+            ++count;
+        }
         EXPECT_EQ(39*2, count);
 
         // adjust interval and pass back in
@@ -339,9 +415,6 @@ TEST(DataSetQueryTest, GenomicIntervalQueryTest)
         }
         EXPECT_EQ(83*2, count);
     });
-
-    // multi file BAM
-
 }
 
 // TODO: implement me
@@ -372,6 +445,30 @@ TEST(DataSetQueryTest, ZmwQueryTest)
     });
 
     // multi-file
+    {
+        BamFile bamFile(bamMappingFn);
+        bamFile.EnsurePacBioIndexExists();
+
+        BamFile bamFile_1(bamMappingFn_1);
+        bamFile_1.EnsurePacBioIndexExists();
+
+        BamFile bamFile_2(bamMappingFn_2);
+        bamFile_2.EnsurePacBioIndexExists();
+
+        DataSet dataset;
+        dataset.ExternalResources().Add(ExternalResource(bamFile));
+        dataset.ExternalResources().Add(ExternalResource(bamFile_1));
+        dataset.ExternalResources().Add(ExternalResource(bamFile_2));
+
+        int count = 0;
+        ZmwQuery query(whitelist, dataset);
+        for (const BamRecord& r : query) {
+            const auto holeNumber = r.HoleNumber();
+            EXPECT_TRUE(holeNumber == 13473 || holeNumber == 38025);
+            ++count;
+        }
+        EXPECT_EQ(15, count);
+    }
 }
 
 TEST(DataSetQueryTest, ZmwGroupQueryTest)
@@ -417,18 +514,29 @@ TEST(DataSetQueryTest, ZmwGroupQueryTest)
         dataset.ExternalResources().Add(ExternalResource(bamFile_1));
         dataset.ExternalResources().Add(ExternalResource(bamFile_2));
 
-        int count = 0;
+        int totalCount = 0;
+        int numRecordsInGroup = 0;
+        int groupCount = 0;
         int32_t groupZmw = -1;
         ZmwGroupQuery query(whitelist, dataset);
         for (const vector<BamRecord>& group : query)  {
             for (const BamRecord& record: group) {
+                ++numRecordsInGroup;
                 if (groupZmw == -1)
                     groupZmw = record.HoleNumber();
                 EXPECT_EQ(groupZmw, record.HoleNumber());
-                ++count;
+                ++totalCount;
             }
+            if (groupCount == 0)
+                EXPECT_EQ(9, numRecordsInGroup);
+            else if (groupCount == 1)
+                EXPECT_EQ(6, numRecordsInGroup);
+            else
+                EXPECT_TRUE(false); // should not get here
+            numRecordsInGroup = 0;
+            ++groupCount;
             groupZmw = -1;
         }
-        EXPECT_EQ(15, count);
+        EXPECT_EQ(15, totalCount);
     });
 }
diff --git a/tests/src/test_DataSetXsd.cpp b/tests/src/test_DataSetXsd.cpp
index 177b758..1238122 100644
--- a/tests/src/test_DataSetXsd.cpp
+++ b/tests/src/test_DataSetXsd.cpp
@@ -94,13 +94,16 @@ TEST(DataSetXsdTest, EditDatasetRegistry)
     dataset.MetaType("PacBio.DataSet.AlignmentSet");
     dataset.Name("DataSet_AlignmentSet");
     dataset.Tags("barcode moreTags mapping mytags");
+    dataset.TimeStampedName("my_time_stamped_name");
     dataset.UniqueId("b095d0a3-94b8-4918-b3af-a3f81bbe519c");
-    dataset.Version("2.3.0");
-    dataset.Attribute("xmlns","http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd")
-           .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
-           .Attribute("xsi:schemaLocation",
-                      "http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd PacBioSecondaryDataModel.xsd");
-    dataset.ExternalResources().Add(ExternalResource("Fake.MetaType", "filename"));
+    dataset.Attribute("xmlns",              "http://pacificbiosciences.com/PacBioDatasets.xsd")
+           .Attribute("xmlns:xsi",          "http://www.w3.org/2001/XMLSchema-instance")
+           .Attribute("xsi:schemaLocation", "http://pacificbiosciences.com/PacBioDatasets.xsd");
+
+    ExternalResource ext("Fake.MetaType", "filename");
+    ext.TimeStampedName("custom_tsn")
+       .UniqueId("my_uuid");
+    dataset.ExternalResources().Add(ext);
 
     dataset.Namespaces().Register(XsdType::BASE_DATA_MODEL, NamespaceInfo("custom", "http://custom/uri.xsd"));
 
@@ -108,13 +111,15 @@ TEST(DataSetXsdTest, EditDatasetRegistry)
         "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
         "<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
                 "Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
-                "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\" "
-                "xmlns=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd\" "
+                "TimeStampedName=\"my_time_stamped_name\" "
+                "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"3.0.1\" "
+                "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
                 "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
-                "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd "
-                "PacBioSecondaryDataModel.xsd\">\n"
+                "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+                "xmlns:custom=\"http://custom/uri.xsd\" "
+                "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
         "\t<custom:ExternalResources>\n"
-        "\t\t<custom:ExternalResource MetaType=\"Fake.MetaType\" ResourceId=\"filename\" />\n"
+        "\t\t<custom:ExternalResource MetaType=\"Fake.MetaType\" ResourceId=\"filename\" TimeStampedName=\"custom_tsn\" UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
         "\t</custom:ExternalResources>\n"
         "</pbds:AlignmentSet>\n";
 
@@ -122,3 +127,56 @@ TEST(DataSetXsdTest, EditDatasetRegistry)
     dataset.SaveToStream(s);
     EXPECT_EQ(expectedXml, s.str());
 }
+
+TEST(DataSetXsdTest, ElementRegistryOk)
+{
+    { // default namespaces
+
+        DataSet ds;
+
+        // append child elements that do not have a C++ built-in, nor namespace prefix with addition
+        DataSetMetadata& metadata = ds.Metadata();
+        metadata.AddChild(internal::DataSetElement("SummaryStats"));
+        metadata.AddChild(internal::DataSetElement("CopyFiles"));
+        metadata.AddChild(internal::DataSetElement("BioSamples"));
+        metadata.AddChild(internal::DataSetElement("AutomationParameters"));
+
+        stringstream s;
+        ds.SaveToStream(s);
+        const string output = s.str();
+
+        // check that default namespace is propagated properly
+        EXPECT_TRUE(output.find("pbds:SummaryStats") != string::npos);
+        EXPECT_TRUE(output.find("pbmeta:CopyFiles") != string::npos);
+        EXPECT_TRUE(output.find("pbsample:BioSamples") != string::npos);
+        EXPECT_TRUE(output.find("pbbase:AutomationParameters") != string::npos);
+    }
+
+    { // custom namespaces
+
+        DataSet ds;
+
+        // setup custom namespaces
+        ds.Namespaces().Register(XsdType::BASE_DATA_MODEL,     NamespaceInfo("custom_base",   "http://custom/base.xsd"));
+        ds.Namespaces().Register(XsdType::COLLECTION_METADATA, NamespaceInfo("custom_meta",   "http://custom/meta.xsd"));
+        ds.Namespaces().Register(XsdType::DATASETS,            NamespaceInfo("custom_ds",     "http://custom/datasets.xsd"));
+        ds.Namespaces().Register(XsdType::SAMPLE_INFO,         NamespaceInfo("custom_sample", "http://custom/base.xsd"));
+
+        // append child elements that do not have a C++ built-in, nor namespace prefix with addition
+        DataSetMetadata& metadata = ds.Metadata();
+        metadata.AddChild(internal::DataSetElement("SummaryStats"));
+        metadata.AddChild(internal::DataSetElement("CopyFiles"));
+        metadata.AddChild(internal::DataSetElement("BioSamples"));
+        metadata.AddChild(internal::DataSetElement("AutomationParameters"));
+
+        stringstream s;
+        ds.SaveToStream(s);
+        const string output = s.str();
+
+        // check that custom namespace is propagated properly
+        EXPECT_TRUE(output.find("custom_ds:SummaryStats") != string::npos);
+        EXPECT_TRUE(output.find("custom_meta:CopyFiles") != string::npos);
+        EXPECT_TRUE(output.find("custom_sample:BioSamples") != string::npos);
+        EXPECT_TRUE(output.find("custom_base:AutomationParameters") != string::npos);
+    }
+}
diff --git a/tests/src/test_EndToEnd.cpp b/tests/src/test_EndToEnd.cpp
index 7fe951e..fc6a740 100644
--- a/tests/src/test_EndToEnd.cpp
+++ b/tests/src/test_EndToEnd.cpp
@@ -48,13 +48,19 @@
 #include <pbbam/EntireFileQuery.h>
 #include <iostream>
 #include <memory>
+#include <sstream>
 #include <string>
 #include <cstdio>
 #include <cstdlib>
 using namespace PacBio;
 using namespace PacBio::BAM;
+using namespace PacBio::BAM::tests;
 using namespace std;
 
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
 struct Bam1Deleter
 {
     void operator()(bam1_t* b) {
@@ -84,77 +90,96 @@ struct BamHdrDeleter
 
 const string inputBamFn        = tests::Data_Dir + "/ex2.bam";
 const string goldStandardSamFn = tests::Data_Dir + "/ex2.sam";
-const string generatedBamFn    = tests::Data_Dir + "/generated.bam";
-const string generatedSamFn    = tests::Data_Dir + "/generated.sam";
+const string generatedBamFn    = "/tmp/generated.bam";
+const string generatedSamFn    = "/tmp/generated.sam";
+const vector<string> generatedFiles = { generatedBamFn, generatedSamFn };
+
+static inline
+int RunBam2Sam(const string& bamFn,
+               const string& samFn,
+               const string& args = string())
+{
+    stringstream s;
+    s << tests::Bam2Sam << " " << args << " " << bamFn << " > " << samFn;
+    return system(s.str().c_str());
+}
 
 static inline
-int Samtools_Bam2Sam(const string& bamFilename,
-                     const string& samFilename)
+int RunDiff(const string& fn1, const string& fn2)
 {
-    const std::string& convertArgs = string("view -h ") + bamFilename + string(" > ")  + samFilename;
-    const std::string& convertCommandLine = tests::Samtools_Bin + string(" ") + convertArgs;
-    return system(convertCommandLine.c_str());
+    stringstream s;
+    s << "diff " << fn1 << " " << fn2;
+    return system(s.str().c_str());
 }
 
 static inline
-int Diff_Sam2Sam(const string& fn1,
-                 const string& fn2)
+void Remove(const vector<string>& files)
 {
-    const std::string& diffCommandLine = string("diff ") + fn1 + string(" ") + fn2;
-    return system(diffCommandLine.c_str());
+    for (const auto& fn : files)
+        remove(fn.c_str());
 }
 
 static inline
-void RemoveGeneratedFiles(const string& fn1,
-                          const string& fn2)
+void CheckGeneratedOutput(void)
 {
-    remove(fn1.c_str());
-    remove(fn2.c_str());
+    // convert to sam & diff against gold standard
+    const int convertRet = RunBam2Sam(generatedBamFn, generatedSamFn);
+    const int diffRet    = RunDiff(goldStandardSamFn, generatedSamFn);
+    EXPECT_EQ(0, convertRet);
+    EXPECT_EQ(0, diffRet);
+
+    // clean up
+    Remove(generatedFiles);
 }
 
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
 // sanity check for rest of tests below
-TEST(EndToEndTest, ReadPureHtslib_WritePureHtslib)
+TEST(EndToEndTest, ReadAndWrite_PureHtslib)
 {
-    // open input BAM file
-    PBBAM_SHARED_PTR<samFile> inputBam(sam_open(inputBamFn.c_str(), "r"), SamFileDeleter());
-    EXPECT_TRUE(inputBam != 0);
-    PBBAM_SHARED_PTR<bam_hdr_t> header(sam_hdr_read(inputBam.get()), BamHdrDeleter());
+    { // scoped to force flush & close before conversion/diff
 
-    // open output BAM file
-    PBBAM_SHARED_PTR<samFile> outputBam(sam_open(generatedBamFn.c_str(), "wb"), SamFileDeleter());
-    sam_hdr_write(outputBam.get(), header.get());
+        // open files
 
-    // copy BAM file
-    PBBAM_SHARED_PTR<bam1_t> record(bam_init1(), Bam1Deleter());
-    while (sam_read1(inputBam.get(), header.get(), record.get()) >= 0)
-        sam_write1(outputBam.get(), header.get(), record.get());
+        unique_ptr<samFile, SamFileDeleter> inWrapper(sam_open(inputBamFn.c_str(), "r"));
+        samFile* in = inWrapper.get();
+        ASSERT_TRUE(in);
 
-    // need to close files before comparing (to flush any buffers)
-    inputBam.reset();
-    outputBam.reset();
+        unique_ptr<samFile, SamFileDeleter> outWrapper(sam_open(generatedBamFn.c_str(), "wb"));
+        samFile* out = outWrapper.get();
+        ASSERT_TRUE(out);
 
-    // convert to sam & diff against gold standard
+        // fetch & write header
 
-    // TODO: disabled for now - need to replace non-PB BAM files in test cases
+        unique_ptr<bam_hdr_t, BamHdrDeleter> headerWrapper(sam_hdr_read(in));
+        bam_hdr_t* hdr = headerWrapper.get();
+        ASSERT_TRUE(hdr);
+        ASSERT_EQ(0, sam_hdr_write(out, hdr));
 
-//    const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
-//    const int diffRet    = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
-//    EXPECT_EQ(0, convertRet);
-//    EXPECT_EQ(0, diffRet);
+        // fetch & write records
 
-    // clean up
-    RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+        unique_ptr<bam1_t, Bam1Deleter> record(bam_init1());
+        bam1_t* b = record.get();
+        ASSERT_TRUE(b);
+
+        while (sam_read1(in, hdr, b) >= 0)
+            sam_write1(out, hdr, b);
+    }
+
+    CheckGeneratedOutput();
 }
 
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SingleThread)
+TEST(EndToEndTest, ReadAndWrite_SingleThread)
 {
     EXPECT_NO_THROW(
     {
         // open input BAM file
-        BamFile bamFile(inputBamFn);
+        BamFile bamFile(tests::inputBamFn);
 
         // open output BAM file
-        BamWriter writer(generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 1);
+        BamWriter writer(tests::generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 1);
 
         // copy BAM file
         EntireFileQuery entireFile(bamFile);
@@ -162,17 +187,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SingleThread)
             writer.Write(record);
     });
 
-    // convert to sam & diff against gold standard
-    const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
-    const int diffRet    = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
-    EXPECT_EQ(0, convertRet);
-    EXPECT_EQ(0, diffRet);
-
-    // clean up
-    RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+    CheckGeneratedOutput();
 }
 
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_APIDefaultThreadCount)
+TEST(EndToEndTest, ReadAndWrite_APIDefaultThreadCount)
 {
     EXPECT_NO_THROW(
     {
@@ -188,18 +206,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_APIDefaultThreadCount)
             writer.Write(record);
     });
 
-    // convert to sam & diff against gold standard
-    const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
-    const int diffRet    = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
-    EXPECT_EQ(0, convertRet);
-    EXPECT_EQ(0, diffRet);
-
-    // clean up
-    RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
-
+    CheckGeneratedOutput();
 }
 
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SystemDefaultThreadCount)
+TEST(EndToEndTest, ReadAndWrite_SystemDefaultThreadCount)
 {
     EXPECT_NO_THROW(
     {
@@ -207,7 +217,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SystemDefaultThreadCount)
         BamFile bamFile(inputBamFn);
 
         // open output BAM file
-        BamWriter writer(generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 0);
+        BamWriter writer(generatedBamFn,
+                         bamFile.Header(),
+                         BamWriter::DefaultCompression,
+                         0);
 
         // copy BAM file
         EntireFileQuery entireFile(bamFile);
@@ -215,17 +228,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SystemDefaultThreadCount)
             writer.Write(record);
     });
 
-    // convert to sam & diff against gold standard
-    const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
-    const int diffRet    = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
-    EXPECT_EQ(0, convertRet);
-    EXPECT_EQ(0, diffRet);
-
-    // clean up
-    RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+    CheckGeneratedOutput();
 }
 
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_UserThreadCount)
+TEST(EndToEndTest, ReadAndWrite_UserThreadCount)
 {
     EXPECT_NO_THROW(
     {
@@ -233,7 +239,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_UserThreadCount)
         BamFile bamFile(inputBamFn);
 
         // open output BAM file
-        BamWriter writer(generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 6);
+        BamWriter writer(generatedBamFn,
+                         bamFile.Header(),
+                         BamWriter::DefaultCompression,
+                         3);
 
         // copy BAM file
         EntireFileQuery entireFile(bamFile);
@@ -241,12 +250,5 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_UserThreadCount)
             writer.Write(record);
     });
 
-    // convert to sam & diff against gold standard
-    const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
-    const int diffRet    = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
-    EXPECT_EQ(0, convertRet);
-    EXPECT_EQ(0, diffRet);
-
-    // clean up
-    RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+    CheckGeneratedOutput();
 }
diff --git a/tests/src/test_EntireFileQuery.cpp b/tests/src/test_EntireFileQuery.cpp
index b444f2b..6acfbb2 100644
--- a/tests/src/test_EntireFileQuery.cpp
+++ b/tests/src/test_EntireFileQuery.cpp
@@ -42,6 +42,7 @@
 #include "TestData.h"
 #include <gtest/gtest.h>
 #include <pbbam/EntireFileQuery.h>
+#include <pbbam/BamWriter.h>
 #include <string>
 using namespace PacBio;
 using namespace PacBio::BAM;
@@ -142,7 +143,7 @@ TEST(BamRecordTest, ReferenceName)
 //    {
 //        const string exampleBam  = tests::Data_Dir + "/unmap1.bam";
 //        BamFile bamFile(exampleBam);
-//        staging::EntireFileQuery records(bamFile);
+//        EntireFileQuery records(bamFile);
 
 //        EXPECT_THROW(records.begin()->ReferenceName(), std::exception);
 //    }
diff --git a/tests/src/test_FileUtils.cpp b/tests/src/test_FileUtils.cpp
new file mode 100644
index 0000000..814200f
--- /dev/null
+++ b/tests/src/test_FileUtils.cpp
@@ -0,0 +1,325 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/../../src/FileUtils.h>
+#include <pbbam/../../src/TimeUtils.h>
+
+#include <boost/algorithm/string.hpp>
+
+#include <chrono>
+#include <string>
+#include <vector>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+
+#include <iostream>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(FileUtilsTest, ExistsOk)
+{
+    EXPECT_FALSE(FileUtils::Exists("does_not_exist.txt"));
+
+    const string tmp = "/tmp/pbbam_exists_check.tmp";
+    const string cmd = string("touch ") + tmp;
+    ASSERT_EQ(0, system(cmd.c_str()));
+    EXPECT_TRUE(FileUtils::Exists(tmp));
+}
+
+TEST(FileUtilsTest, LastModifiedOk)
+{
+    // a little tricky to check without going a full 'mock' filesystem route, but we can approximate
+    //
+    // also, I can't seem to get better than second resolution (on OSX 10.9/clang at least, st_mtimespec.tv_nsec is always zero)
+
+    const auto now = CurrentTime();
+    const auto nowDuration = now.time_since_epoch();
+    const auto nowSeconds = chrono::duration_cast<chrono::seconds>(nowDuration).count();
+
+    const string tmp = "/tmp/pbbam_lastmod_check.tmp";
+    const string rmCmd = string("rm ") + tmp;
+    const string touchCmd = string("touch  ") + tmp;
+    int ret =  system(rmCmd.c_str());
+    (void)ret; // unused
+    ASSERT_EQ(0, system(touchCmd.c_str()));
+
+    const auto stamp = FileUtils::LastModified(tmp);
+    const auto stampDuration = stamp.time_since_epoch();
+    const auto stampSeconds = chrono::duration_cast<chrono::seconds>(stampDuration).count();
+
+    EXPECT_LE(nowSeconds, stampSeconds);
+}
+
+TEST(FileUtilsTest, ResolvedFilePathOk)
+{
+    const string testFrom = "/path/to/myDir";
+
+    // "raw" filenames - no URI scheme
+
+    const string absolutePath = "/absolute/path/to/file.txt";
+    const string relativePath = "../relative/path/to/file.txt";
+    const string noPathFn     = "file.txt";
+    
+    const string resolvedAbsolutePath = FileUtils::ResolvedFilePath(absolutePath, testFrom);
+    const string resolvedRelativePath = FileUtils::ResolvedFilePath(relativePath, testFrom);
+    const string resolvedNoPath       = FileUtils::ResolvedFilePath(noPathFn, testFrom);
+    const string resolvedAbsolutePath_defaultFrom = FileUtils::ResolvedFilePath(absolutePath);
+    const string resolvedRelativePath_defaultFrom = FileUtils::ResolvedFilePath(relativePath);
+    const string resolvedNoPath_defaultFrom       = FileUtils::ResolvedFilePath(noPathFn);
+
+    EXPECT_EQ("/absolute/path/to/file.txt",                  resolvedAbsolutePath);
+    EXPECT_EQ("/path/to/myDir/../relative/path/to/file.txt", resolvedRelativePath);
+    EXPECT_EQ("/path/to/myDir/file.txt",                     resolvedNoPath);
+
+    EXPECT_EQ("/absolute/path/to/file.txt",     resolvedAbsolutePath_defaultFrom);
+    EXPECT_EQ("./../relative/path/to/file.txt", resolvedRelativePath_defaultFrom);
+    EXPECT_EQ("./file.txt",                     resolvedNoPath_defaultFrom);
+
+    // filenames with URI scheme ("file://")
+
+    const string absoluteSchemeFn = "file:///absolute/path/to/file.txt";
+    const string relativeSchemeFn = "file://../relative/path/to/file.txt";
+    const string noPathSchemeFn   = "file://file.txt";
+
+    const string resolvedAbsoluteSchemePath = FileUtils::ResolvedFilePath(absoluteSchemeFn, testFrom);
+    const string resolvedRelativeSchemePath = FileUtils::ResolvedFilePath(relativeSchemeFn, testFrom);
+    const string resolvedNoPathSchemeFn     = FileUtils::ResolvedFilePath(noPathSchemeFn, testFrom);
+    const string resolvedAbsoluteSchemePath_defaultFrom = FileUtils::ResolvedFilePath(absoluteSchemeFn);
+    const string resolvedRelativeSchemePath_defaultFrom = FileUtils::ResolvedFilePath(relativeSchemeFn);
+    const string resolvedNoPathSchemeFn_defaultFrom     = FileUtils::ResolvedFilePath(noPathSchemeFn);
+
+    EXPECT_EQ("/absolute/path/to/file.txt",                  resolvedAbsoluteSchemePath);
+    EXPECT_EQ("/path/to/myDir/../relative/path/to/file.txt", resolvedRelativeSchemePath);
+    EXPECT_EQ("/path/to/myDir/file.txt",                     resolvedNoPathSchemeFn);
+
+    EXPECT_EQ("/absolute/path/to/file.txt",                  resolvedAbsoluteSchemePath_defaultFrom);
+    EXPECT_EQ("./../relative/path/to/file.txt", resolvedRelativeSchemePath_defaultFrom);
+    EXPECT_EQ("./file.txt",                     resolvedNoPathSchemeFn_defaultFrom);
+}
+
+TEST(FileUtilsTest, SizeOk)
+{
+    const string tmp = "/tmp/pbbam_empty_file.tmp";
+    const string cmd = string("touch ") + tmp;
+    ASSERT_EQ(0, system(cmd.c_str()));
+    EXPECT_EQ(0, FileUtils::Size(tmp));
+
+    EXPECT_THROW(FileUtils::Size("does_not_exist.txt"), std::runtime_error);
+}
+
+// ####################################################################################################
+// The code below is part of a simple check whether or not a (Windows-only) file path is absolute.
+//
+// NOTE: (and this is admittedly brittle for maintenance, but) the internal methods used are literally
+// copied here for direct driving. There's likely a better way going forward, than the manual copy/paste.
+// But in the absence of a similar runtime environment to build in & test against, while
+// the motivating behavior is blocking other work, this lets me get the fix in their hands ASAP and still
+// have some test code poking it beforehand. -DB
+//
+namespace test_windows {
+
+static string removeFileUriScheme(const string& uri)
+{
+    assert(!uri.empty());
+
+    auto schemeLess = uri;
+    const auto fileScheme = string{"file://"};
+    const auto schemeFound = schemeLess.find(fileScheme);
+    if (schemeFound != string::npos) {
+        if (schemeFound != 0)
+            throw runtime_error("Malformed URI: scheme not at beginning");
+        schemeLess = schemeLess.substr(fileScheme.size());
+    }
+    return schemeLess;
+}
+
+static
+string removeDiskName(const string& filePath)
+{
+    if (filePath.size() >= 2) {
+        const char firstChar = filePath.at(0);
+        if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+            return filePath.substr(2);
+    }
+    return filePath;
+}
+
+static const char native_pathSeparator = '\\';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{
+    assert(!filePath.empty());
+
+    // if starts with single slash or double slash [cases 1,3]
+    if (boost::algorithm::starts_with(filePath, "\\"))
+        return true;
+
+    // if starts with single or double-dots -> not absolute [case 4 + ".\file.txt"]
+    if (boost::algorithm::starts_with(filePath, "."))
+        return false;
+
+    // if starts with drive name and colon ("C:\foo\bar.txt")
+    if (filePath.size() >= 2) {
+        const char firstChar = filePath.at(0);
+        if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+            return native_pathIsAbsolute(removeDiskName(filePath));
+    }
+
+    // otherwise, likely relative
+    return false;
+}
+
+static string native_resolvedFilePath(const string& filePath,
+                                      const string& from)
+{
+    // strip file:// scheme if present
+    auto schemeLess = removeFileUriScheme(filePath);
+
+    // if empty or already absolute path, just return it
+    // upfront empty check simplifies further parsing logic
+    if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+        return schemeLess;
+
+    // else make relative from the provided 'from' directory
+    //
+    // first pop disk name, then any leading single-dot '.'
+    //
+    // since we're prepending the 'from' directory, we can remove
+    // any leading './' form our file path. this may just mean that
+    // we pop it off to add it right back (when from == '.'), but this
+    // keeps it consistent with other 'from' parent directories
+    //
+    schemeLess = removeDiskName(schemeLess);
+
+    const bool thisDirAtStart = (schemeLess.find(".") == 0);
+    if (thisDirAtStart) {
+        if (schemeLess.find(native_pathSeparator) == 1)
+            schemeLess = schemeLess.substr(2);
+    }
+    return from + native_pathSeparator + schemeLess;
+}
+
+} // namespace test_windows
+
+TEST(FileUtilsTest, WindowsPathsOk)
+{
+    { // remove disk name
+
+        // "C:\tmp.txt"
+        string f1 = "C:\\tmp.txt";
+        EXPECT_EQ(string("\\tmp.txt"), test_windows::removeDiskName(f1));
+
+        // "C:tmp.txt"
+        string f2 = "C:tmp.txt";
+        EXPECT_EQ(string("tmp.txt"), test_windows::removeDiskName(f2));
+
+        // "\tmp.txt"
+        string f3 = "\\tmp.txt";
+        EXPECT_EQ(f3, test_windows::removeDiskName(f3));
+
+        // "tmp.txt"
+        string f4 = "tmp.txt";
+        EXPECT_EQ(f4, test_windows::removeDiskName(f4));
+    }
+
+    { // isAbsolute ?
+
+        // "\\server\path\to\tmp.txt"
+        EXPECT_TRUE(test_windows::native_pathIsAbsolute("\\\\server\\path\\to\tmp.txt"));
+
+        // "..\tmp.txt"
+        EXPECT_FALSE(test_windows::native_pathIsAbsolute("..\\tmp.txt"));
+
+        // ".\tmp.txt"
+        EXPECT_FALSE(test_windows::native_pathIsAbsolute(".\\tmp.txt"));
+
+        // "C:\path\to\tmp.txt"
+        EXPECT_TRUE(test_windows::native_pathIsAbsolute("C:\\path\\to\\tmp.txt"));
+
+        // "C:..\path\to\tmp.txt"
+        EXPECT_FALSE(test_windows::native_pathIsAbsolute("C:..\\path\\to\\tmp.txt"));
+    }
+
+    { // resolve file path
+
+        const string myRootDir = "C:\\path\\to\\myRootDir";
+
+        // "\\server\path\to\tmp.txt"
+        const string fn1 = "\\\\server\\path\\to\tmp.txt";
+        const string fn1_expected  = fn1;
+        EXPECT_EQ(fn1_expected, test_windows::native_resolvedFilePath(fn1, myRootDir));
+
+        // "..\tmp.txt"
+        const string fn2 = "..\\tmp.txt";
+        const string fn2_expected = "C:\\path\\to\\myRootDir\\..\\tmp.txt";
+        EXPECT_EQ(fn2_expected, test_windows::native_resolvedFilePath(fn2, myRootDir));
+
+        // ".\tmp.txt"
+        const string fn3 = ".\\tmp.txt";
+        const string fn3_expected = "C:\\path\\to\\myRootDir\\tmp.txt";
+        EXPECT_EQ(fn3_expected, test_windows::native_resolvedFilePath(fn3, myRootDir));
+
+        // "C:\path\to\tmp.txt"
+        const string fn4 = "C:\\path\\to\\tmp.txt";
+        const string fn4_expected  = fn4;
+        EXPECT_EQ(fn4_expected, test_windows::native_resolvedFilePath(fn4, myRootDir));
+
+        // "C:..\path\to\tmp.txt"
+        const string fn5 = "C:..\\path\\to\\tmp.txt";
+        const string fn5_expected = "C:\\path\\to\\myRootDir\\..\\path\\to\\tmp.txt";
+        EXPECT_EQ(fn5_expected, test_windows::native_resolvedFilePath(fn5, myRootDir));
+
+        // "C:tmp.txt"
+        const string fn6 = "C:tmp.txt";
+        const string fn6_expected = "C:\\path\\to\\myRootDir\\tmp.txt";
+        EXPECT_EQ(fn6_expected, test_windows::native_resolvedFilePath(fn6, myRootDir));
+        EXPECT_EQ(fn3_expected, test_windows::native_resolvedFilePath(fn6, myRootDir)); // our path is equivalent to fn3's "./temp.txt"
+    }
+}
+//
+// ####################################################################################################
+
+
diff --git a/tests/src/test_GenomicIntervalQuery.cpp b/tests/src/test_GenomicIntervalQuery.cpp
index 0fb98bc..43c8c1e 100644
--- a/tests/src/test_GenomicIntervalQuery.cpp
+++ b/tests/src/test_GenomicIntervalQuery.cpp
@@ -1,182 +1,166 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include "TestData.h"
-#include <gtest/gtest.h>
-#include <pbbam/GenomicIntervalQuery.h>
-#include <iostream>
-#include <string>
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace std;
-
-const string inputBamFn = tests::Data_Dir + "/ex2.bam";
-
-TEST(GenomicIntervalQueryTest, ReuseQueryAndCountRecords)
-{
-    const string seq1 = "seq1";
-    const string seq2 = "seq2";
-
-    // open input BAM file
-    BamFile bamFile(inputBamFn);
-
-    // count records
-    int count = 0;
-    GenomicInterval interval(seq1, 0, 100);
-    GenomicIntervalQuery query(interval, bamFile);
-    for (const BamRecord& record : query) {
-        (void)record;
-        ++count;
-    }
-    EXPECT_EQ(39, count);
-
-    // adjust interval and pass back in
-    count = 0;
-    interval.Start(500);
-    interval.Stop(600);
-    query.Interval(interval);
-    for (const BamRecord& record : query) {
-        (void)record;
-        ++count;
-    }
-    EXPECT_EQ(166, count);
-
-    // adjust again
-    count = 0;
-    interval.Name(seq2);
-    interval.Start(0);
-    interval.Stop(100);
-    query.Interval(interval);
-    for (const BamRecord& record : query) {
-        (void)record;
-        ++count;
-    }
-    EXPECT_EQ(83, count);
-
-    // unknown ref
-    count = 0;
-    interval.Name("does not exist");
-    interval.Start(0);
-    interval.Stop(100);
-    EXPECT_THROW(
-        query.Interval(interval);
-    , std::exception);
-    for (const BamRecord& record : query) {    // iteration is still safe, just returns no data
-        (void)record;
-        ++count;
-    }
-    EXPECT_EQ(0, count);
-
-    // adjust again - make sure we can read a real region after an invalid one
-    interval.Name(seq2);
-    interval.Start(0);
-    interval.Stop(100);
-    query.Interval(interval);
-    count = 0;
-    for (const BamRecord& record : query) {
-        (void)record;
-        ++count;
-    }
-    EXPECT_EQ(83, count);
-}
-
-TEST(GenomicIntervalQueryTest, NonConstBamRecord)
-{
-    EXPECT_NO_THROW(
-    {
-        // open input BAM file
-        BamFile bamFile(inputBamFn);
-
-        // count records
-        int count = 0;
-        GenomicInterval interval("seq1", 0, 100);
-        GenomicIntervalQuery query(interval, bamFile);
-        for (BamRecord& record : query) {
-            (void)record;
-            ++count;
-        }
-        EXPECT_EQ(39, count);
-    });
-}
-
-//TEST(GenomicIntervalQueryTest, WorksWithBamRecordImpl)
-//{
-//    // open input BAM file
-//    BamFile bamFile(inputBamFn);
-//    EXPECT_TRUE(bamFile);
-
-//    const int id = bamFile.ReferenceId("seq1");
-//    EXPECT_TRUE(id != -1);
-
-//    // count records
-//    int count = 0;
-//    GenomicInterval interval(id, 0, 100);
-//    GenomicIntervalQuery query(interval, bamFile);
-//    EXPECT_TRUE(query);
-//    for (const BamRecordImpl& record : query) {
-//        (void)record;
-//        ++count;
-//    }
-//    EXPECT_EQ(39, count);
-//}
-
-//TEST(GenomicIntervalQueryTest, WorksWithNonConstBamRecordImpl)
-//{
-//    // open input BAM file
-//    BamFile bamFile(inputBamFn);
-//    EXPECT_TRUE(bamFile);
-
-//    const int id = bamFile.ReferenceId("seq1");
-//    EXPECT_TRUE(id != -1);
-
-//    // count records
-//    int count = 0;
-//    GenomicInterval interval(id, 0, 100);
-//    GenomicIntervalQuery query(interval, bamFile);
-//    EXPECT_TRUE(query);
-//    for (BamRecordImpl& record : query) {
-//        (void)record;
-//        ++count;
-//    }
-//    EXPECT_EQ(39, count);
-//}
-
-// add special cases as needed
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/GenomicIntervalQuery.h>
+#include <iostream>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string inputBamFn = tests::Data_Dir + "/ex2.bam";
+
+TEST(GenomicIntervalQueryTest, ReuseQueryAndCountRecords)
+{
+    const string seq1 = "seq1";
+    const string seq2 = "seq2";
+
+    // open input BAM file
+    BamFile bamFile(inputBamFn);
+
+    // count records
+    int count = 0;
+    GenomicInterval interval(seq1, 0, 100);
+    GenomicIntervalQuery query(interval, bamFile);
+    for (const BamRecord& record : query) {
+        (void)record;
+        ++count;
+    }
+    EXPECT_EQ(39, count);
+
+    // adjust interval and pass back in
+    count = 0;
+    interval.Start(500);
+    interval.Stop(600);
+    query.Interval(interval);
+    for (const BamRecord& record : query) {
+        (void)record;
+        ++count;
+    }
+    EXPECT_EQ(166, count);
+
+    // adjust again
+    count = 0;
+    interval.Name(seq2);
+    interval.Start(0);
+    interval.Stop(100);
+    query.Interval(interval);
+    for (const BamRecord& record : query) {
+        (void)record;
+        ++count;
+    }
+    EXPECT_EQ(83, count);
+
+    // unknown ref
+    count = 0;
+    interval.Name("does not exist");
+    interval.Start(0);
+    interval.Stop(100);
+    EXPECT_THROW(
+        query.Interval(interval);
+    , std::exception);
+    for (const BamRecord& record : query) {    // iteration is still safe, just returns no data
+        (void)record;
+        ++count;
+    }
+    EXPECT_EQ(0, count);
+
+    // adjust again - make sure we can read a real region after an invalid one
+    interval.Name(seq2);
+    interval.Start(0);
+    interval.Stop(100);
+    query.Interval(interval);
+    count = 0;
+    for (const BamRecord& record : query) {
+        (void)record;
+        ++count;
+    }
+    EXPECT_EQ(83, count);
+}
+
+TEST(GenomicIntervalQueryTest, NonConstBamRecord)
+{
+    EXPECT_NO_THROW(
+    {
+        // open input BAM file
+        BamFile bamFile(inputBamFn);
+
+        // count records
+        int count = 0;
+        GenomicInterval interval("seq1", 0, 100);
+        GenomicIntervalQuery query(interval, bamFile);
+        for (BamRecord& record : query) {
+            (void)record;
+            ++count;
+        }
+        EXPECT_EQ(39, count);
+    });
+}
+
+TEST(GenomicIntervalQueryTest,  MissingBaiShouldThrow)
+{
+    GenomicInterval interval("seq1", 0, 100);
+    const string phi29Bam = tests::Data_Dir + "/phi29.bam";
+    const string hasBaiBam = tests::Data_Dir + "/dataset/bam_mapping1.bam";
+
+    { // single file, missing BAI
+
+        EXPECT_THROW(GenomicIntervalQuery query(interval, phi29Bam), std::runtime_error);
+    }
+
+    { // from dataset, all missing BAI
+
+        DataSet ds;
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+        EXPECT_THROW(GenomicIntervalQuery query(interval, ds), std::runtime_error);
+    }
+
+    { // from dataset, mixed BAI presence
+
+        DataSet ds;
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+        ds.ExternalResources().Add(ExternalResource("PacBio.AlignmentFile.AlignmentBamFile", hasBaiBam));
+        EXPECT_THROW(GenomicIntervalQuery query(interval, ds), std::runtime_error);
+    }
+}
diff --git a/tests/src/test_PacBioIndex.cpp b/tests/src/test_PacBioIndex.cpp
index c841c68..c747c9c 100644
--- a/tests/src/test_PacBioIndex.cpp
+++ b/tests/src/test_PacBioIndex.cpp
@@ -42,12 +42,13 @@
 #include "TestData.h"
 #include <gtest/gtest.h>
 #include <pbbam/BamFile.h>
+#include <pbbam/BamReader.h>
 #include <pbbam/BamWriter.h>
 #include <pbbam/EntireFileQuery.h>
 #include <pbbam/PbiBuilder.h>
 #include <pbbam/PbiIndex.h>
+#include <pbbam/PbiLookupData.h>
 #include <pbbam/PbiRawData.h>
-#include <pbbam/internal/PbiIndex_p.h>
 #include <string>
 #include <cstdio>
 #include <cstdlib>
@@ -56,40 +57,70 @@ using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
-const string test2BamFn = tests::Data_Dir + "/test_group_query/test2.bam";
+const string test2BamFn = tests::Data_Dir + "/dataset/bam_mapping_new.bam";
+const string phi29BamFn = tests::Data_Dir + "/phi29.bam";
 
 namespace PacBio {
 namespace BAM {
 namespace tests {
 
 static
-PbiRawData Test2Bam_RawIndex(void)
+PbiRawData Test2Bam_CoreIndexData(void)
+
+{
+    PbiRawData rawData;
+    rawData.Version(PbiFile::Version_3_0_1);
+    rawData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::REFERENCE);
+    rawData.NumReads(10);
+
+    PbiRawBasicData& basicData = rawData.BasicData();
+    basicData.rgId_       = {-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275};
+    basicData.qStart_     = {48,387,0,9936,10232,7468,5557,7285,426,7064};
+    basicData.qEnd_       = {1132,1134,344,10187,10394,8906,7235,8657,1045,7421};
+    basicData.holeNumber_ = {49050,32328,32328,6469,6469,30983,13473,13473,19915,30983};
+    basicData.readQual_   = {0,0,0,0,0,0,0,0,0,0};
+    basicData.ctxtFlag_   = {0,0,0,0,0,0,0,0,0,0};
+    basicData.fileOffset_ = { };
+
+    PbiRawMappedData& mappedData = rawData.MappedData();
+    mappedData.tId_       = {0,0,0,0,0,0,0,0,0,0};
+    mappedData.tStart_    = {0,302,675,2170,2203,3572,4506,4507,4592,4669};
+    mappedData.tEnd_      = {471,1019,1026,2397,2326,5015,6125,5850,5203,5011};
+    mappedData.aStart_    = {653,395,1,9960,10271,7468,5574,7285,441,7075};
+    mappedData.aEnd_      = {1129,1134,344,10185,10394,8906,7235,8647,1040,7418};
+    mappedData.revStrand_ = {0,1,0,1,0,1,1,0,1,0};
+    mappedData.nM_        = {460,704,339,216,118,1394,1581,1313,583,333};
+    mappedData.nMM_       = {0,0,0,0,0,0,0,0,0,0};
+    mappedData.mapQV_     = {254,254,254,254,254,254,254,254,254,254};
+
+    PbiRawReferenceData& referenceData = rawData.ReferenceData();
+    referenceData.entries_ = {
+        PbiReferenceEntry{0,0,10},
+        PbiReferenceEntry{4294967295,4294967295,4294967295}
+    };
+
+    return rawData;
+}
+
+// NOTE: We have 2 different sets of offsets because the copied, new file differs in size than the existing one.
+//
+//       Unsure which combination of write parameters were used on the original. Things like thread count,
+//       compression level, etc. can effect compression ratio, BGZF block sizes, etc. even though the BAM record
+//       content itself is equal. So we'll just track these index values separately, for now at least.
+//
+static
+PbiRawData Test2Bam_ExistingIndex(void)
 {
-    PbiRawData index;
-    index.Version(PbiFile::Version_3_0_0);
-    index.FileSections(PbiFile::SUBREAD | PbiFile::MAPPED);
-    index.NumReads(4);
-
-    PbiRawSubreadData& subreadData = index.SubreadData();
-    subreadData.rgId_       = { -1197849594, -1197849594, -1197849594, -1197849594 };
-    subreadData.qStart_     = { 2114, 2579, 4101, 5615 };
-    subreadData.qEnd_       = { 2531, 4055, 5571, 6237 };
-    subreadData.holeNumber_ = { 14743, 14743, 14743, 14743 };
-    subreadData.readQual_   = { 901, 901, 901, 901 };
-    subreadData.fileOffset_ =  { 35651584, 35655123, 35667124, 35679164 };
-
-    PbiRawMappedData& mappedData = index.mappedData_;
-    mappedData.tId_       = { 0, 0, 0, 0 };
-    mappedData.tStart_    = { 9507, 8453, 8455, 9291 };
-    mappedData.tEnd_      = { 9903, 9902, 9893, 9900 };
-    mappedData.aStart_    = { 2130, 2581, 4102, 5619 };
-    mappedData.aEnd_      = { 2531, 4055, 5560, 6237 };
-    mappedData.revStrand_ = { 0, 1, 0, 1 };
-    mappedData.mapQV_     = { 254, 254, 254, 254 };
-    mappedData.nM_        = { 384, 1411, 1393, 598 };
-    mappedData.nMM_       = { 0, 0, 0, 0 };             // old 'M' ops were just replaced w/ '=', no 'X'
-
-    // reference & barcode data are empty for this file
+    PbiRawData index = Test2Bam_CoreIndexData();
+    index.BasicData().fileOffset_ = {32636928,32645486,32651627,32654529,32656778,32658272,32669996,32683648,32694741,1388838912};
+    return index;
+}
+
+static
+PbiRawData Test2Bam_NewIndex(void)
+{
+    PbiRawData index = Test2Bam_CoreIndexData();
+    index.BasicData().fileOffset_ = { 33095680, 233766912, 387448832, 463667200, 530317312, 579731456, 857341952, 1171062784, 1436352512, 1567621120 };
     return index;
 }
 
@@ -102,13 +133,14 @@ void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual)
     EXPECT_EQ(expected.NumReads(),     actual.NumReads());
 
     // subread data
-    const PbiRawSubreadData& e = expected.SubreadData();
-    const PbiRawSubreadData& a = actual.SubreadData();
+    const PbiRawBasicData& e = expected.BasicData();
+    const PbiRawBasicData& a = actual.BasicData();
     EXPECT_EQ(e.rgId_,       a.rgId_);
     EXPECT_EQ(e.qStart_,     a.qStart_);
     EXPECT_EQ(e.qEnd_,       a.qEnd_);
     EXPECT_EQ(e.holeNumber_, a.holeNumber_);
     EXPECT_EQ(e.readQual_,   a.readQual_);
+    EXPECT_EQ(e.ctxtFlag_,   a.ctxtFlag_);
     EXPECT_EQ(e.fileOffset_, a.fileOffset_);
 
     // mapped data
@@ -140,28 +172,28 @@ void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual)
     if (expected.HasBarcodeData() && actual.HasBarcodeData()) {
         const PbiRawBarcodeData& e = expected.BarcodeData();
         const PbiRawBarcodeData& a = actual.BarcodeData();
-        EXPECT_EQ(e.bcLeft_,   a.bcLeft_);
-        EXPECT_EQ(e.bcRight_,  a.bcRight_);
+        EXPECT_EQ(e.bcForward_,   a.bcForward_);
+        EXPECT_EQ(e.bcReverse_,  a.bcReverse_);
         EXPECT_EQ(e.bcQual_,   a.bcQual_);
-        EXPECT_EQ(e.ctxtFlag_, a.ctxtFlag_);
     }
 }
 
 static
-bool SubreadLookupsEqual(const internal::SubreadLookupData& lhs,
-                         const internal::SubreadLookupData& rhs)
+bool BasicLookupsEqual(const BasicLookupData& lhs,
+                         const BasicLookupData& rhs)
 {
     return (lhs.rgId_ == rhs.rgId_ &&
             lhs.qStart_ == rhs.qStart_ &&
             lhs.qEnd_ == rhs.qEnd_ &&
             lhs.holeNumber_ == rhs.holeNumber_ &&
             lhs.readQual_ == rhs.readQual_ &&
+            lhs.ctxtFlag_ == rhs.ctxtFlag_ &&
             lhs.fileOffset_ == rhs.fileOffset_);
 }
 
 static
-bool MappedLookupsEqual(const internal::MappedLookupData& lhs,
-                        const internal::MappedLookupData& rhs)
+bool MappedLookupsEqual(const MappedLookupData& lhs,
+                        const MappedLookupData& rhs)
 {
     return (lhs.tId_ == rhs.tId_ &&
             lhs.tStart_ == rhs.tStart_ &&
@@ -176,20 +208,19 @@ bool MappedLookupsEqual(const internal::MappedLookupData& lhs,
 }
 
 static
-bool ReferenceLookupsEqual(const internal::ReferenceLookupData& lhs,
-                           const internal::ReferenceLookupData& rhs)
+bool ReferenceLookupsEqual(const ReferenceLookupData& lhs,
+                           const ReferenceLookupData& rhs)
 {
     return lhs.references_ == rhs.references_;
 }
 
 static
-bool BarcodeLookupsEqual(const internal::BarcodeLookupData& lhs,
-                         const internal::BarcodeLookupData& rhs)
+bool BarcodeLookupsEqual(const BarcodeLookupData& lhs,
+                         const BarcodeLookupData& rhs)
 {
-    return (lhs.bcLeft_ == rhs.bcLeft_ &&
-            lhs.bcRight_ == rhs.bcRight_ &&
-            lhs.bcQual_ == rhs.bcQual_ &&
-            lhs.ctxtFlag_ == rhs.ctxtFlag_);
+    return (lhs.bcForward_ == rhs.bcForward_ &&
+            lhs.bcReverse_ == rhs.bcReverse_ &&
+            lhs.bcQual_ == rhs.bcQual_);
 }
 
 static
@@ -210,7 +241,7 @@ bool PbiIndicesEqual(const PbiIndex& lhs, const PbiIndex& rhs)
     { return false; }
 
     // component compare
-    if ( !SubreadLookupsEqual(lhsImpl->subreadData_,     rhsImpl->subreadData_)   ||
+    if ( !BasicLookupsEqual(lhsImpl->basicData_,         rhsImpl->basicData_)   ||
          !MappedLookupsEqual(lhsImpl->mappedData_,       rhsImpl->mappedData_)    ||
          !ReferenceLookupsEqual(lhsImpl->referenceData_, rhsImpl->referenceData_) ||
          !BarcodeLookupsEqual(lhsImpl->barcodeData_,     rhsImpl->barcodeData_))
@@ -228,7 +259,7 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
 {
     // do this in temp directory, so we can ensure write access
     const string tempDir    = "/tmp/";
-    const string tempBamFn  = tempDir + "test2.bam";
+    const string tempBamFn  = tempDir + "bam_mapping_new.bam";
     const string tempPbiFn  = tempBamFn + ".pbi";
     string cmd("cp ");
     cmd += test2BamFn;
@@ -242,11 +273,11 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
     EXPECT_EQ(tempPbiFn, bamFile.PacBioIndexFilename());
 
     PbiRawData index(bamFile.PacBioIndexFilename());
-    EXPECT_EQ(PbiFile::Version_3_0_0,  index.Version());
-    EXPECT_EQ(4, index.NumReads());
+    EXPECT_EQ(PbiFile::Version_3_0_1,  index.Version());
+    EXPECT_EQ(10, index.NumReads());
     EXPECT_TRUE(index.HasMappedData());
 
-    const PbiRawData& expectedIndex = tests::Test2Bam_RawIndex();
+    const PbiRawData& expectedIndex = tests::Test2Bam_ExistingIndex();
     tests::ExpectRawIndicesEqual(expectedIndex, index);
 
     // clean up temp file(s)
@@ -254,6 +285,14 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
     remove(tempPbiFn.c_str());
 }
 
+::testing::AssertionResult CanRead(BamReader& reader, BamRecord& record, int i)
+{
+    if (reader.GetNext(record))
+        return ::testing::AssertionSuccess() << "i: " << i;
+    else
+        return ::testing::AssertionFailure() << "i: " << i;
+}
+
 TEST(PacBioIndexTest, CreateOnTheFly)
 {
     // do this in temp directory, so we can ensure write access
@@ -261,12 +300,16 @@ TEST(PacBioIndexTest, CreateOnTheFly)
     const string tempBamFn  = tempDir + "temp.bam";
     const string tempPbiFn  = tempBamFn + ".pbi";
 
+    // NOTE: new file differs in size than existing (different write parameters may yield different file sizes, even though content is same)
+    const vector<int64_t> expectedNewOffsets = { 33095680, 233766912, 387448832, 463667200, 530317312, 579731456, 857341952, 1171062784, 1436352512, 1567621120 };
+    vector<int64_t> observedOffsets;
+
     // create PBI on the fly from input BAM while we write to new file
     {
         BamFile bamFile(test2BamFn);
         BamHeader header = bamFile.Header();
 
-        BamWriter writer(tempBamFn, header);
+        BamWriter writer(tempBamFn, header); // default compression, default thread count
         PbiBuilder builder(tempPbiFn, header.Sequences().size());
 
         int64_t vOffset = 0;
@@ -274,17 +317,48 @@ TEST(PacBioIndexTest, CreateOnTheFly)
         for (const BamRecord& record : entireFile) {
             writer.Write(record, &vOffset);
             builder.AddRecord(record, vOffset);
+            observedOffsets.push_back(vOffset);
+        }
+    }
+
+    EXPECT_EQ(expectedNewOffsets, observedOffsets);
+
+    // sanity check on original file
+    {
+        const vector<int64_t> originalFileOffsets = {32636928,32645486,32651627,32654529,32656778,32658272,32669996,32683648,32694741,1388838912};
+        BamRecord r;
+        BamReader reader(test2BamFn);
+        for (int i = 0; i < originalFileOffsets.size(); ++i) {
+            reader.VirtualSeek(originalFileOffsets.at(i));
+            EXPECT_TRUE(CanRead(reader, r, i));
+        }
+    }
+
+    // attempt to seek in our new file using both expected & observed offsets
+    {
+        BamRecord r;
+        BamReader reader(tempBamFn);
+        for (int i = 0; i < expectedNewOffsets.size(); ++i) {
+            reader.VirtualSeek(expectedNewOffsets.at(i));
+            EXPECT_TRUE(CanRead(reader, r, i));
+        }
+        for (int i = 0; i < observedOffsets.size(); ++i) {
+            reader.VirtualSeek(observedOffsets.at(i));
+            EXPECT_TRUE(CanRead(reader, r, i));
         }
     }
 
     // compare data in new PBI file, to expected data
-    const PbiRawData& expectedIndex = tests::Test2Bam_RawIndex();
+    const PbiRawData& expectedIndex = tests::Test2Bam_NewIndex();
     const PbiRawData& fromBuilt = PbiRawData(tempPbiFn);
     tests::ExpectRawIndicesEqual(expectedIndex, fromBuilt);
 
     // straight diff of newly-generated PBI file to existing PBI
-    const string pbiDiffCmd = string("diff -q ") + test2BamFn + ".pbi " + tempPbiFn;
-    EXPECT_EQ(0, system(pbiDiffCmd.c_str()));
+    // TODO: Come back to this once pbindexump is in place.
+    //       We can't exactly do this since file offsets may differ between 2 BAMs of differing compression levels.
+    //       Should add some sort of BAM checksum based on contents, not just size, for this reason.
+//    const string pbiDiffCmd = string("diff -q ") + test2BamFn + ".pbi " + tempPbiFn;
+//    EXPECT_EQ(0, system(pbiDiffCmd.c_str()));
 
     // clean up temp file(s)
     remove(tempBamFn.c_str());
@@ -297,15 +371,64 @@ TEST(PacBioIndexTest, RawLoadFromPbiFile)
     const string& pbiFilename = bamFile.PacBioIndexFilename();
     const PbiRawData loadedIndex(pbiFilename);
 
-    const PbiRawData& expectedIndex = tests::Test2Bam_RawIndex();
+    const PbiRawData& expectedIndex = tests::Test2Bam_ExistingIndex();
     tests::ExpectRawIndicesEqual(expectedIndex, loadedIndex);
 }
 
+TEST(PacBioIndexTest, BasicAndBarodeSectionsOnly)
+{
+    // do this in temp directory, so we can ensure write access
+    const string tempDir    = "/tmp/";
+    const string tempBamFn  = tempDir + "phi29.bam";
+    const string tempPbiFn  = tempBamFn + ".pbi";
+    string cmd("cp ");
+    cmd += phi29BamFn;
+    cmd += " ";
+    cmd += tempDir;
+    int cmdResult = system(cmd.c_str());
+    (void)cmdResult;
+
+    BamFile bamFile(tempBamFn);
+    PbiFile::CreateFrom(bamFile);
+    EXPECT_EQ(tempPbiFn, bamFile.PacBioIndexFilename());
+
+    PbiRawData index(bamFile.PacBioIndexFilename());
+    EXPECT_EQ(PbiFile::Version_3_0_1,  index.Version());
+    EXPECT_EQ(120, index.NumReads());
+    EXPECT_FALSE(index.HasMappedData());
+    EXPECT_TRUE(index.HasBarcodeData());
+
+    const vector<int16_t> expectedBcForward = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+        2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2};
+    const vector<int16_t> expectedBcReverse = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+        2,2,2,2,2,2,2,2,2,2,2,2,2,2};
+    const vector<int8_t>  expectedBcQuality = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1};
+
+    const PbiRawBarcodeData& barcodeData = index.BarcodeData();
+    EXPECT_EQ(expectedBcForward, barcodeData.bcForward_);
+    EXPECT_EQ(expectedBcReverse, barcodeData.bcReverse_);
+    EXPECT_EQ(expectedBcQuality, barcodeData.bcQual_);
+
+
+    // clean up temp file(s)
+    remove(tempBamFn.c_str());
+    remove(tempPbiFn.c_str());
+
+}
+
+
 TEST(PacBioIndexTest, ReferenceDataNotLoadedOnUnsortedBam)
 {
     BamFile bamFile(test2BamFn);
     PbiRawData raw(bamFile.PacBioIndexFilename());
-    EXPECT_FALSE(raw.HasReferenceData());
+    EXPECT_TRUE(raw.HasReferenceData());
 }
 
 TEST(PacBioIndexTest, LookupLoadFromFileOk)
@@ -314,8 +437,8 @@ TEST(PacBioIndexTest, LookupLoadFromFileOk)
     EXPECT_NO_THROW(
     {
         PbiIndex index(bamFile.PacBioIndexFilename());
-        EXPECT_EQ(4, index.NumReads());
-        EXPECT_EQ(vector<int64_t>({ 35651584, 35655123, 35667124, 35679164 }), index.VirtualFileOffsets());
+        EXPECT_EQ(10, index.NumReads());
+        EXPECT_EQ(vector<int64_t>({32636928,32645486,32651627,32654529,32656778,32658272,32669996,32683648,32694741,1388838912}), index.BasicData().VirtualFileOffsets());
     });
 }
 
@@ -359,11 +482,10 @@ TEST(PacBioIndexTest, Copy_and_Move)
 
 TEST(PacBioIndexTest, OrderedLookup)
 {
-    using PacBio::BAM::CompareType;
     using PacBio::BAM::IndexList;
-    using PacBio::BAM::internal::OrderedLookup;
+    using PacBio::BAM::OrderedLookup;
 
-    OrderedLookup<int>::ContainerType oRawData;
+    OrderedLookup<int>::container_type oRawData;
     oRawData[11] = { 0, 3, 4 };
     oRawData[20] = { 1 };
     oRawData[42] = { 2, 7, 8 };
@@ -374,51 +496,50 @@ TEST(PacBioIndexTest, OrderedLookup)
     OrderedLookup<int> oLookup(oRawData);
 
     // EQUAL
-    EXPECT_EQ(IndexList({5}),       oLookup.LookupIndices(10, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({0, 3, 4}), oLookup.LookupIndices(11, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({6}),       oLookup.LookupIndices(12, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({1}),       oLookup.LookupIndices(20, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({2, 7, 8}), oLookup.LookupIndices(42, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({9}),       oLookup.LookupIndices(99, CompareType::EQUAL));
-    EXPECT_EQ(IndexList(),          oLookup.LookupIndices(66, CompareType::EQUAL)); // does not exist
+    EXPECT_EQ(IndexList({5}),       oLookup.LookupIndices(10, Compare::EQUAL));
+    EXPECT_EQ(IndexList({0, 3, 4}), oLookup.LookupIndices(11, Compare::EQUAL));
+    EXPECT_EQ(IndexList({6}),       oLookup.LookupIndices(12, Compare::EQUAL));
+    EXPECT_EQ(IndexList({1}),       oLookup.LookupIndices(20, Compare::EQUAL));
+    EXPECT_EQ(IndexList({2, 7, 8}), oLookup.LookupIndices(42, Compare::EQUAL));
+    EXPECT_EQ(IndexList({9}),       oLookup.LookupIndices(99, Compare::EQUAL));
+    EXPECT_EQ(IndexList(),          oLookup.LookupIndices(66, Compare::EQUAL)); // does not exist
 
     // NOT_EQUAL
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}),    oLookup.LookupIndices(10, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}),          oLookup.LookupIndices(11, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}),    oLookup.LookupIndices(12, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}),    oLookup.LookupIndices(20, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}),          oLookup.LookupIndices(42, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}),    oLookup.LookupIndices(99, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(66, CompareType::NOT_EQUAL)); // does not exist
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}),    oLookup.LookupIndices(10, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}),          oLookup.LookupIndices(11, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}),    oLookup.LookupIndices(12, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}),    oLookup.LookupIndices(20, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}),          oLookup.LookupIndices(42, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}),    oLookup.LookupIndices(99, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(66, Compare::NOT_EQUAL)); // does not exist
 
     // LESS_THAN
-    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, CompareType::LESS_THAN));
-    EXPECT_EQ(IndexList({0, 3, 4, 5}),    oLookup.LookupIndices(12, CompareType::LESS_THAN));
+    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, Compare::LESS_THAN));
+    EXPECT_EQ(IndexList({0, 3, 4, 5}),    oLookup.LookupIndices(12, Compare::LESS_THAN));
     // do more checks
 
     // LESS_THAN_EQUAL
-    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, CompareType::LESS_THAN_EQUAL));
-    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(12, CompareType::LESS_THAN_EQUAL));
+    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, Compare::LESS_THAN_EQUAL));
+    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(12, Compare::LESS_THAN_EQUAL));
     // more checks?
 
     // GREATER_THAN
-    EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, CompareType::GREATER_THAN));
-    EXPECT_EQ(IndexList({9}),       oLookup.LookupIndices(42, CompareType::GREATER_THAN));
+    EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, Compare::GREATER_THAN));
+    EXPECT_EQ(IndexList({9}),       oLookup.LookupIndices(42, Compare::GREATER_THAN));
     // more checks?
 
     // GREATER_THAN_EQUAL
-    EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, CompareType::GREATER_THAN_EQUAL));
-    EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(42, CompareType::GREATER_THAN_EQUAL));
+    EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, Compare::GREATER_THAN_EQUAL));
+    EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(42, Compare::GREATER_THAN_EQUAL));
     // more checks?
 }
 
 TEST(PacBioIndexTest, UnorderedLookup)
 {
-    using PacBio::BAM::CompareType;
     using PacBio::BAM::IndexList;
-    using PacBio::BAM::internal::UnorderedLookup;
+    using PacBio::BAM::UnorderedLookup;
 
-    UnorderedLookup<int>::ContainerType uRawData;
+    UnorderedLookup<int>::container_type uRawData;
     uRawData[11] = { 0, 3, 4 };
     uRawData[20] = { 1 };
     uRawData[42] = { 2, 7, 8 };
@@ -429,54 +550,53 @@ TEST(PacBioIndexTest, UnorderedLookup)
     UnorderedLookup<int> uLookup(uRawData);
 
     // EQUAL
-    EXPECT_EQ(IndexList({5}),       uLookup.LookupIndices(10, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({0, 3, 4}), uLookup.LookupIndices(11, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({6}),       uLookup.LookupIndices(12, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({1}),       uLookup.LookupIndices(20, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({2, 7, 8}), uLookup.LookupIndices(42, CompareType::EQUAL));
-    EXPECT_EQ(IndexList({9}),       uLookup.LookupIndices(99, CompareType::EQUAL));
-    EXPECT_EQ(IndexList(),          uLookup.LookupIndices(66, CompareType::EQUAL)); // does not exist
+    EXPECT_EQ(IndexList({5}),       uLookup.LookupIndices(10, Compare::EQUAL));
+    EXPECT_EQ(IndexList({0, 3, 4}), uLookup.LookupIndices(11, Compare::EQUAL));
+    EXPECT_EQ(IndexList({6}),       uLookup.LookupIndices(12, Compare::EQUAL));
+    EXPECT_EQ(IndexList({1}),       uLookup.LookupIndices(20, Compare::EQUAL));
+    EXPECT_EQ(IndexList({2, 7, 8}), uLookup.LookupIndices(42, Compare::EQUAL));
+    EXPECT_EQ(IndexList({9}),       uLookup.LookupIndices(99, Compare::EQUAL));
+    EXPECT_EQ(IndexList(),          uLookup.LookupIndices(66, Compare::EQUAL)); // does not exist
 
     // NOT_EQUAL
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}),    uLookup.LookupIndices(10, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}),          uLookup.LookupIndices(11, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}),    uLookup.LookupIndices(12, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}),    uLookup.LookupIndices(20, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}),          uLookup.LookupIndices(42, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}),    uLookup.LookupIndices(99, CompareType::NOT_EQUAL));
-    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(66, CompareType::NOT_EQUAL)); // does not exist
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}),    uLookup.LookupIndices(10, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}),          uLookup.LookupIndices(11, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}),    uLookup.LookupIndices(12, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}),    uLookup.LookupIndices(20, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}),          uLookup.LookupIndices(42, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}),    uLookup.LookupIndices(99, Compare::NOT_EQUAL));
+    EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(66, Compare::NOT_EQUAL)); // does not exist
 
     // LESS_THAN
-    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, CompareType::LESS_THAN));
-    EXPECT_EQ(IndexList({0, 3, 4, 5}),    uLookup.LookupIndices(12, CompareType::LESS_THAN));
+    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, Compare::LESS_THAN));
+    EXPECT_EQ(IndexList({0, 3, 4, 5}),    uLookup.LookupIndices(12, Compare::LESS_THAN));
     // more checks?
 
     // LESS_THAN_EQUAL
-    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, CompareType::LESS_THAN_EQUAL));
-    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(12, CompareType::LESS_THAN_EQUAL));
+    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, Compare::LESS_THAN_EQUAL));
+    EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(12, Compare::LESS_THAN_EQUAL));
     // more checks?
 
     // GREATER_THAN
-    EXPECT_EQ(IndexList({2,7,8,9}), uLookup.LookupIndices(41, CompareType::GREATER_THAN));
-    EXPECT_EQ(IndexList({9}),       uLookup.LookupIndices(42, CompareType::GREATER_THAN));
+    EXPECT_EQ(IndexList({2,7,8,9}), uLookup.LookupIndices(41, Compare::GREATER_THAN));
+    EXPECT_EQ(IndexList({9}),       uLookup.LookupIndices(42, Compare::GREATER_THAN));
     // more checks?
 
     // GREATER_THAN_EQUAL
-    EXPECT_EQ(uLookup.LookupIndices(41, CompareType::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
-    EXPECT_EQ(uLookup.LookupIndices(42, CompareType::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
+    EXPECT_EQ(uLookup.LookupIndices(41, Compare::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
+    EXPECT_EQ(uLookup.LookupIndices(42, Compare::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
     // more checks?
 }
 
 TEST(PacBioIndexTest, MergeBlocks)
 {
-    using PacBio::BAM::CompareType;
     using PacBio::BAM::IndexList;
     using PacBio::BAM::IndexResultBlock;
     using PacBio::BAM::IndexResultBlocks;
-    using PacBio::BAM::internal::mergedIndexBlocks;
-    using PacBio::BAM::internal::OrderedLookup;
+    using PacBio::BAM::mergedIndexBlocks;
+    using PacBio::BAM::OrderedLookup;
 
-    OrderedLookup<int>::ContainerType oRawData;
+    OrderedLookup<int>::container_type oRawData;
     oRawData[11] = { 0, 3, 4 };
     oRawData[20] = { 1 };
     oRawData[42] = { 2, 7, 8 };
@@ -487,82 +607,81 @@ TEST(PacBioIndexTest, MergeBlocks)
     OrderedLookup<int> oLookup(oRawData);
 
     // EQUAL
-    auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, CompareType::EQUAL));
+    auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::EQUAL));
     EXPECT_EQ(1, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(5, 1), mergedBlocks.at(0));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, CompareType::EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, Compare::EQUAL));
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 1), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(3, 2), mergedBlocks.at(1));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, CompareType::EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, Compare::EQUAL));
     EXPECT_EQ(1, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(6, 1), mergedBlocks.at(0));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, CompareType::EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, Compare::EQUAL));
     EXPECT_EQ(1, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(1, 1), mergedBlocks.at(0));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, CompareType::EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, Compare::EQUAL));
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(2, 1), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(7, 2), mergedBlocks.at(1));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, CompareType::EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, Compare::EQUAL));
     EXPECT_EQ(1, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(9, 1), mergedBlocks.at(0));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, CompareType::EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, Compare::EQUAL));
     EXPECT_TRUE(mergedBlocks.empty());
 
     // NOT_EQUAL
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::NOT_EQUAL));
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 5), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(6, 4), mergedBlocks.at(1));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, Compare::NOT_EQUAL));
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(1, 2), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(5, 5), mergedBlocks.at(1));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, Compare::NOT_EQUAL));
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 6), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(7, 3), mergedBlocks.at(1));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, Compare::NOT_EQUAL));
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 1), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(2, 8), mergedBlocks.at(1));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, Compare::NOT_EQUAL));
     EXPECT_EQ(3, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 2), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(3, 4), mergedBlocks.at(1));
     EXPECT_EQ(IndexResultBlock(9, 1), mergedBlocks.at(2));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, Compare::NOT_EQUAL));
     EXPECT_EQ(1, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 9), mergedBlocks.at(0));
 
-    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, CompareType::NOT_EQUAL));
+    mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, Compare::NOT_EQUAL));
     EXPECT_EQ(1, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 10), mergedBlocks.at(0));
 }
 
 TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
 {
-    using PacBio::BAM::CompareType;
+    using PacBio::BAM::BasicLookupData;
     using PacBio::BAM::IndexList;
     using PacBio::BAM::IndexResultBlock;
     using PacBio::BAM::IndexResultBlocks;
-    using PacBio::BAM::internal::mergedIndexBlocks;
-    using PacBio::BAM::internal::OrderedLookup;
-    using PacBio::BAM::internal::SubreadLookupData;
+    using PacBio::BAM::mergedIndexBlocks;
+    using PacBio::BAM::OrderedLookup;
 
-    OrderedLookup<int>::ContainerType oRawData;
+    OrderedLookup<int>::container_type oRawData;
     oRawData[11] = { 0, 3, 4 };
     oRawData[20] = { 1 };
     oRawData[42] = { 2, 7, 8 };
@@ -571,15 +690,15 @@ TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
     oRawData[99] = { 9 };
 
     OrderedLookup<int> oLookup(oRawData);
-    auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, CompareType::NOT_EQUAL));
+    auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::NOT_EQUAL));
 
     EXPECT_EQ(2, mergedBlocks.size());
     EXPECT_EQ(IndexResultBlock(0, 5), mergedBlocks.at(0));
     EXPECT_EQ(IndexResultBlock(6, 4), mergedBlocks.at(1));
 
-    SubreadLookupData subreadIndex;
-    subreadIndex.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
-    subreadIndex.ApplyOffsets(mergedBlocks);
+    BasicLookupData basicLookupData;
+    basicLookupData.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
+    basicLookupData.ApplyOffsets(mergedBlocks);
 
     EXPECT_EQ(2,  mergedBlocks.size());
     EXPECT_EQ(0,  mergedBlocks.at(0).virtualOffset_);
@@ -590,16 +709,14 @@ TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
 
 TEST(PacBioIndexTest, LookupMulti)
 {
-    using PacBio::BAM::CompareType;
+    using PacBio::BAM::BasicLookupData;
     using PacBio::BAM::IndexList;
     using PacBio::BAM::IndexResultBlock;
     using PacBio::BAM::IndexResultBlocks;
-    using PacBio::BAM::SubreadField;
-    using PacBio::BAM::internal::mergedIndexBlocks;
-    using PacBio::BAM::internal::SubreadLookupData;
-    using PacBio::BAM::internal::UnorderedLookup;
+    using PacBio::BAM::mergedIndexBlocks;
+    using PacBio::BAM::UnorderedLookup;
 
-    UnorderedLookup<int32_t>::ContainerType uRawData;
+    UnorderedLookup<int32_t>::container_type uRawData;
     uRawData[11] = { 0, 3, 4 };
     uRawData[20] = { 1 };
     uRawData[42] = { 2, 7, 8 };
@@ -607,15 +724,15 @@ TEST(PacBioIndexTest, LookupMulti)
     uRawData[12] = { 6 };
     uRawData[99] = { 9 };
 
-    SubreadLookupData subreadIndex;
-    subreadIndex.rgId_ = UnorderedLookup<int32_t>(uRawData);
-    subreadIndex.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
+    BasicLookupData basicLookup;
+    basicLookup.rgId_ = UnorderedLookup<int32_t>(uRawData);
+    basicLookup.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
 
     const std::vector<int32_t> whitelist = { 11, 42, 20 };
-    const auto indices = subreadIndex.IndicesMulti(SubreadField::RG_ID, whitelist);
+    const auto indices = basicLookup.IndicesMulti(BasicLookupData::RG_ID, whitelist);
 
     IndexResultBlocks mergedBlocks = mergedIndexBlocks(indices);
-    subreadIndex.ApplyOffsets(mergedBlocks);
+    basicLookup.ApplyOffsets(mergedBlocks);
 
     EXPECT_EQ(IndexList({0, 3, 4, 2, 7, 8, 1}), indices);
     EXPECT_EQ(2, mergedBlocks.size());
@@ -634,56 +751,96 @@ TEST(PacBioIndexTest, LookupMulti)
 TEST(PacBioIndexTest, LookupAPI)
 {
     const PbiIndex index(test2BamFn + ".pbi");
+    const BasicLookupData& basicData = index.BasicData();
+    const MappedLookupData& mappedData = index.MappedData();
+    const BarcodeLookupData& barcodeData = index.BarcodeData();
 
     // rgId == x
-    const IndexResultBlocks rgResult = index.Lookup(ReadGroupIndexRequest(-1197849594));
+    IndexResultBlocks rgResult = mergedIndexBlocks(basicData.Indices(BasicLookupData::RG_ID, -1574697275));
+    basicData.ApplyOffsets(rgResult);
     EXPECT_EQ(1, rgResult.size());
     EXPECT_EQ(0, rgResult.at(0).firstIndex_);
-    EXPECT_EQ(4, rgResult.at(0).numReads_);
-    EXPECT_EQ(35651584, rgResult.at(0).virtualOffset_);
+    EXPECT_EQ(10, rgResult.at(0).numReads_);
+    EXPECT_EQ(32636928, rgResult.at(0).virtualOffset_);
 
     // rg != x
-    const IndexResultBlocks notRgResult = index.Lookup(ReadGroupIndexRequest(-1197849594, CompareType::NOT_EQUAL));
+    IndexResultBlocks notRgResult = mergedIndexBlocks(basicData.Indices(BasicLookupData::RG_ID,
+                                                                        -1574697275,
+                                                                        Compare::NOT_EQUAL));
+    basicData.ApplyOffsets(notRgResult);
     EXPECT_TRUE(notRgResult.empty());
 
     // tEnd <= x
-    const IndexResultBlocks tEndLteResult = index.Lookup(ReferenceEndIndexRequest(9900, CompareType::LESS_THAN_EQUAL));
+    IndexResultBlocks tEndLteResult = mergedIndexBlocks(mappedData.Indices(MappedLookupData::T_END,
+                                                                            4500,
+                                                                            Compare::LESS_THAN_EQUAL));
+    basicData.ApplyOffsets(tEndLteResult);
     EXPECT_EQ(1, tEndLteResult.size());
-    EXPECT_EQ(2, tEndLteResult.at(0).firstIndex_);
-    EXPECT_EQ(2, tEndLteResult.at(0).numReads_);
-    EXPECT_EQ(35667124, tEndLteResult.at(0).virtualOffset_);
+    EXPECT_EQ(0, tEndLteResult.at(0).firstIndex_);
+    EXPECT_EQ(5, tEndLteResult.at(0).numReads_);
+    EXPECT_EQ(32636928, tEndLteResult.at(0).virtualOffset_);
 
     // tEnd >= x
-    const IndexResultBlocks tEndGteResult = index.Lookup(ReferenceEndIndexRequest(9900, CompareType::GREATER_THAN_EQUAL));
-    EXPECT_EQ(2, tEndGteResult.size());
-    EXPECT_EQ(0, tEndGteResult.at(0).firstIndex_);
-    EXPECT_EQ(2, tEndGteResult.at(0).numReads_);
-    EXPECT_EQ(35651584, tEndGteResult.at(0).virtualOffset_);
-    EXPECT_EQ(3, tEndGteResult.at(1).firstIndex_);
-    EXPECT_EQ(1, tEndGteResult.at(1).numReads_);
-    EXPECT_EQ(35679164, tEndGteResult.at(1).virtualOffset_);
+    IndexResultBlocks tEndGteResult = mergedIndexBlocks(mappedData.Indices(MappedLookupData::T_START,
+                                                                           4500,
+                                                                           Compare::GREATER_THAN_EQUAL));
+    basicData.ApplyOffsets(tEndGteResult);
+    EXPECT_EQ(1, tEndGteResult.size());
+    EXPECT_EQ(6, tEndGteResult.at(0).firstIndex_);
+    EXPECT_EQ(4, tEndGteResult.at(0).numReads_);
+    EXPECT_EQ(32669996, tEndGteResult.at(0).virtualOffset_);
 
     // strand query
-    const IndexResultBlocks forward = index.Lookup(StrandIndexRequest(Strand::FORWARD));
-    EXPECT_EQ(2, forward.size());
+    IndexResultBlocks forward = mergedIndexBlocks(mappedData.Indices(MappedLookupData::STRAND,
+                                                                     Strand::FORWARD));
+    basicData.ApplyOffsets(forward);
+    EXPECT_EQ(5, forward.size());
     EXPECT_EQ(0, forward.at(0).firstIndex_);
     EXPECT_EQ(1, forward.at(0).numReads_);
-    EXPECT_EQ(35651584, forward.at(0).virtualOffset_);
+    EXPECT_EQ(32636928, forward.at(0).virtualOffset_);
+
     EXPECT_EQ(2, forward.at(1).firstIndex_);
     EXPECT_EQ(1, forward.at(1).numReads_);
-    EXPECT_EQ(35667124, forward.at(1).virtualOffset_);
+    EXPECT_EQ(32651627, forward.at(1).virtualOffset_);
+
+    EXPECT_EQ(4, forward.at(2).firstIndex_);
+    EXPECT_EQ(1, forward.at(2).numReads_);
+    EXPECT_EQ(32656778, forward.at(2).virtualOffset_);
+
+    EXPECT_EQ(7, forward.at(3).firstIndex_);
+    EXPECT_EQ(1, forward.at(3).numReads_);
+    EXPECT_EQ(32683648, forward.at(3).virtualOffset_);
+
+    EXPECT_EQ(9, forward.at(4).firstIndex_);
+    EXPECT_EQ(1, forward.at(4).numReads_);
+    EXPECT_EQ(1388838912, forward.at(4).virtualOffset_);
 
-    const IndexResultBlocks reverse = index.Lookup(StrandIndexRequest(Strand::REVERSE));
-    EXPECT_EQ(2, reverse.size());
+    // 0,1,0,1,0,1,1,0,1,0
+    IndexResultBlocks reverse = mergedIndexBlocks(mappedData.Indices(MappedLookupData::STRAND,
+                                                                     Strand::REVERSE));
+    basicData.ApplyOffsets(reverse);
+    EXPECT_EQ(4, reverse.size());
     EXPECT_EQ(1, reverse.at(0).firstIndex_);
     EXPECT_EQ(1, reverse.at(0).numReads_);
-    EXPECT_EQ(35655123, reverse.at(0).virtualOffset_);
+    EXPECT_EQ(32645486, reverse.at(0).virtualOffset_);
+
     EXPECT_EQ(3, reverse.at(1).firstIndex_);
     EXPECT_EQ(1, reverse.at(1).numReads_);
-    EXPECT_EQ(35679164, reverse.at(1).virtualOffset_);
+    EXPECT_EQ(32654529, reverse.at(1).virtualOffset_);
+
+    EXPECT_EQ(5, reverse.at(2).firstIndex_);
+    EXPECT_EQ(2, reverse.at(2).numReads_);
+    EXPECT_EQ(32658272, reverse.at(2).virtualOffset_);
+
+    EXPECT_EQ(8, reverse.at(3).firstIndex_);
+    EXPECT_EQ(1, reverse.at(3).numReads_);
+    EXPECT_EQ(32694741, reverse.at(3).virtualOffset_);
 
     // query data field that is not in the PBI
-    const IndexResultBlocks missing = index.Lookup(BarcodeQualityIndexRequest(77, CompareType::GREATER_THAN));
+    IndexResultBlocks missing = mergedIndexBlocks(barcodeData.Indices(BarcodeLookupData::BC_QUALITY,
+                                                                      77,
+                                                                      Compare::GREATER_THAN));
+    basicData.ApplyOffsets(missing);
     EXPECT_TRUE(missing.empty());
 }
 
@@ -692,9 +849,13 @@ TEST(PacBioIndexTest, LookupByZmw)
     BamFile f(tests::Data_Dir + "/dataset/bam_mapping.bam");
     f.EnsurePacBioIndexExists();
 
-    PbiIndex index(f.PacBioIndexFilename());
+    const PbiIndex index(f.PacBioIndexFilename());
+    const BasicLookupData& basicData = index.BasicData();
 
-    const IndexResultBlocks blocks = index.Lookup(ZmwIndexRequest(20000, CompareType::LESS_THAN));
+    IndexResultBlocks blocks =  mergedIndexBlocks(basicData.Indices(BasicLookupData::ZMW,
+                                                                      20000,
+                                                                      Compare::LESS_THAN));
+    basicData.ApplyOffsets(blocks);
     EXPECT_EQ(14, blocks.size());
 
     //
@@ -743,11 +904,12 @@ TEST(PacBioIndexTest, LookupMultiZmw)
     BamFile f(tests::Data_Dir + "/dataset/bam_mapping.bam");
     f.EnsurePacBioIndexExists();
 
-    PbiIndex index(f.PacBioIndexFilename());
+    const PbiIndex index(f.PacBioIndexFilename());
+    const BasicLookupData& basicData = index.BasicData();
 
     const std::vector<int32_t> whitelist = { 13473, 38025 };
-    const ZmwIndexMultiRequest request(whitelist);
-    const IndexResultBlocks& blocks = index.Lookup(request);
+    IndexResultBlocks blocks = mergedIndexBlocks(basicData.IndicesMulti(BasicLookupData::ZMW, whitelist));
+    basicData.ApplyOffsets(blocks);
 
     EXPECT_EQ(3, blocks.size());
 
diff --git a/tests/src/test_PbiFilter.cpp b/tests/src/test_PbiFilter.cpp
new file mode 100644
index 0000000..02d0d4d
--- /dev/null
+++ b/tests/src/test_PbiFilter.cpp
@@ -0,0 +1,1300 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/PbiFilter.h>
+#include <string>
+#include <cstdio>
+#include <cstdlib>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+// helper structs & methods
+
+static
+PbiRawData test2Bam_RawIndex(void)
+{
+    PbiRawData index;
+    index.NumReads(4);
+
+    PbiRawBasicData& subreadData = index.BasicData();
+    subreadData.rgId_       = { -1197849594, -1197849594, -1197849594, -1197849594 };
+    subreadData.qStart_     = { 2114, 2579, 4101, 5615 };
+    subreadData.qEnd_       = { 2531, 4055, 5571, 6237 };
+    subreadData.holeNumber_ = { 14743, 14743, 14743, 14743 };
+    subreadData.readQual_   = { 0.901, 0.601, 0.901, 0.601 };
+    subreadData.ctxtFlag_   = { 0, 1, 2, 3 };
+    subreadData.fileOffset_ = { 35651584, 35655125, 35667128, 35679170 };
+
+    PbiRawMappedData& mappedData = index.mappedData_;
+    mappedData.tId_       = { 0, 0, 0, 0 };
+    mappedData.tStart_    = { 9507, 8453, 8455, 9291 };
+    mappedData.tEnd_      = { 9903, 9902, 9893, 9900 };
+    mappedData.aStart_    = { 2130, 2581, 4102, 5619 };
+    mappedData.aEnd_      = { 2531, 4055, 5560, 6237 };
+    mappedData.revStrand_ = { 0, 1, 0, 1 };
+    mappedData.mapQV_     = { 254, 254, 254, 254 };
+    mappedData.nM_        = { 384, 1411, 1393, 598 };
+    mappedData.nMM_       = { 0, 0, 0, 0 };
+
+    PbiRawBarcodeData& barcodeData = index.barcodeData_;
+    barcodeData.bcForward_ = { 0, 17, 256, 17 };
+    barcodeData.bcReverse_ = { 1, 18, 257, 18 };
+    barcodeData.bcQual_    = { 42, 80, 42, 110 };
+
+    PbiRawReferenceData& referenceData = index.referenceData_;
+    referenceData.entries_.emplace_back( 0, 0, 3 );
+    referenceData.entries_.emplace_back( 1 );
+    referenceData.entries_.emplace_back( PbiReferenceEntry::UNMAPPED_ID );
+
+    return index;
+}
+
+static const PbiRawData shared_index = test2Bam_RawIndex();
+
+static
+void checkFilterRows(const PbiFilter& filter, const std::vector<size_t> expectedRows)
+{
+    for (size_t row : expectedRows)
+        EXPECT_TRUE(filter.Accepts(shared_index, row));
+}
+
+static
+void checkFilterInternals(const PbiFilter& filter,
+                          const PbiFilter::CompositionType expectedType,
+                          const size_t expectedNumChildren,
+                          const std::vector<size_t> expectedRows)
+{
+    EXPECT_EQ(expectedType,        filter.d_->type_);
+    EXPECT_EQ(expectedNumChildren, filter.d_->filters_.size());
+    checkFilterRows(filter, expectedRows);
+}
+
+struct SimpleFilter
+{
+    bool Accepts(const PbiRawData& idx, const size_t row) const
+    { (void)idx; (void)row; return true; }
+};
+
+struct NoncompliantFilter { };
+
+struct SortUniqueTestFilter
+{
+    bool Accepts(const PbiRawData& idx, const size_t row) const
+    {
+        (void)idx;
+        switch(row) {
+            case 0: // fall through
+            case 1: // .
+            case 2: // .
+            case 3: // .
+            case 4: // .
+            case 7: // .
+            case 8: return true;
+            default:
+                return false;
+        }
+    }
+};
+
+struct SortUniqueTestFilter2
+{
+    bool Accepts(const PbiRawData& idx, const size_t row) const
+    {
+        (void)idx;
+        switch(row) {
+            case 3: // fall through
+            case 7: // .
+            case 5: return true;
+            default:
+                return false;
+        }
+    }
+};
+
+static inline
+PbiFilter emptyFilter(void)
+{ return PbiFilter{ }; }
+
+static inline
+PbiFilter simpleFilter(void)
+{ return PbiFilter{ SimpleFilter{ } }; }
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(PbiFilterTest, DefaultCtorOk)
+{
+    auto filter = PbiFilter{ };
+    tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+}
+
+TEST(PbiFilterTest, CompositionOk)
+{
+    auto filter = PbiFilter{ };
+    filter.Add(PbiFilter{ });
+    tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+}
+
+TEST(PbiFilterTest, CustomFilterOk)
+{
+    { // ctor
+        auto filter = PbiFilter{ tests::SimpleFilter{ } };
+        tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+    }
+    { // Add
+        auto filter = PbiFilter{ };
+        filter.Add(tests::SimpleFilter{ });
+        tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+    }
+
+//    PbiFilter shouldNotCompile = PbiFilter{ tests::NoncompliantFilter{ } };                       // <-- when uncommented, should not compile
+//    PbiFilter shouldNotCompileEither; shouldNotCompileEither.Add(tests::NoncompliantFilter{ });   // <-- when uncommented, should not compile
+}
+
+TEST(PbiFilterTest, CopyOk)
+{
+    { // empty
+        const auto original = PbiFilter{ };
+
+        PbiFilter copyCtor(original);
+        PbiFilter copyAssign;
+        copyAssign = original;
+
+        tests::checkFilterInternals(original,   PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+        tests::checkFilterInternals(copyCtor,   PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+        tests::checkFilterInternals(copyAssign, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+    }
+    { // with children
+        const auto original = PbiFilter{ tests::SimpleFilter{ } };
+
+        PbiFilter copyCtor(original);
+        PbiFilter copyAssign;
+        copyAssign = original;
+
+        tests::checkFilterInternals(original,   PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+        tests::checkFilterInternals(copyCtor,   PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+        tests::checkFilterInternals(copyAssign, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+    }
+}
+
+TEST(PbiFilterTest, MoveOk)
+{
+    { // empty
+        const auto original = tests::emptyFilter();
+
+        PbiFilter moveCtor(tests::emptyFilter());
+        PbiFilter moveAssign;
+        moveAssign = tests::emptyFilter();
+
+        tests::checkFilterInternals(original,   PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+        tests::checkFilterInternals(moveCtor,   PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+        tests::checkFilterInternals(moveAssign, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+    }
+    { // with children
+        const auto original = tests::simpleFilter();
+
+        PbiFilter moveCtor(tests::simpleFilter());
+        PbiFilter moveAssign;
+        moveAssign = tests::simpleFilter();
+
+        tests::checkFilterInternals(original,   PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+        tests::checkFilterInternals(moveCtor,   PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+        tests::checkFilterInternals(moveAssign, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+    }
+}
+
+TEST(PbiFilterTest, SortsAndUniquesChildFilterResultsOk)
+{
+    const auto childFilter = tests::SortUniqueTestFilter{ };
+    const auto filter = PbiFilter{ childFilter };
+    tests::checkFilterRows(childFilter, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});
+    tests::checkFilterRows(filter, std::vector<size_t>{0, 1, 2, 3, 4, 7, 8});
+}
+
+TEST(PbiFilterTest, UnionOk)
+{
+    { // empty
+        { // copy
+            const auto emptyFilter = tests::emptyFilter();
+            const auto emptyFilter2 = tests::emptyFilter();
+            const auto u = PbiFilter::Union({ emptyFilter, emptyFilter2 });
+            tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{0,1,2,3});
+        }
+        { // move
+            const auto u = PbiFilter::Union({ PbiFilter{ }, PbiFilter{ } });
+            tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{0,1,2,3});
+        }
+    }
+
+    { // with (no-data) children - just checking composition
+        { // copy
+            const auto simpleFilter = tests::SimpleFilter{ };
+            const auto simpleFilter2 = tests::SimpleFilter{ };
+            const auto u = PbiFilter::Union({ simpleFilter, simpleFilter2 });
+            tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{});
+        }
+        { // move
+            const auto u = PbiFilter::Union({ tests::SimpleFilter{ }, tests::SimpleFilter{ } });
+            tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{});
+        }
+    }
+
+    { // 2-child union, results sorted & unique-d by PbiFilter
+
+        const auto child1 = tests::SortUniqueTestFilter{ };
+        const auto child2 = tests::SortUniqueTestFilter2{ };
+        const auto u = PbiFilter::Union({ child1, child2 });
+
+        tests::checkFilterRows(child1, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});
+        tests::checkFilterRows(child2, std::vector<size_t>{3, 7, 5});
+        tests::checkFilterRows(u, std::vector<size_t>{0, 1, 2, 3, 4, 5, 7, 8});
+    }
+}
+
+TEST(PbiFilterTest, IntersectOk)
+{
+    { // empty
+        { // copy
+            const auto emptyFilter = tests::emptyFilter();
+            const auto emptyFilter2 = tests::emptyFilter();
+            const auto i = PbiFilter::Intersection({ emptyFilter, emptyFilter2 });
+            tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{0,1,2,3});
+        }
+        { // move
+            const auto i = PbiFilter::Intersection({ PbiFilter{ }, PbiFilter{ } });
+            tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{0,1,2,3});
+        }
+    }
+
+    { // with (no-data) children - just checking composition
+        { // copy
+            const auto simpleFilter = tests::SimpleFilter{ };
+            const auto simpleFilter2 = tests::SimpleFilter{ };
+            const auto i = PbiFilter::Intersection({ simpleFilter, simpleFilter2 });
+            tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{});
+        }
+        { // move
+            const auto i = PbiFilter::Intersection({ tests::SimpleFilter{ }, tests::SimpleFilter{ } });
+            tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{});
+        }
+    }
+
+    { // 2-child intersect, sorted & unique-d by PbiFilter
+
+        const auto child1 = tests::SortUniqueTestFilter{ };
+        const auto child2 = tests::SortUniqueTestFilter2{ };
+        const auto i = PbiFilter::Intersection({ child1, child2 });
+
+        tests::checkFilterRows(child1, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});
+        tests::checkFilterRows(child2, std::vector<size_t>{3, 7, 5 });
+        tests::checkFilterRows(i, std::vector<size_t>{3, 7});
+    }
+}
+
+TEST(PbiFilterTest, AlignedEndFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4055 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4055, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4000, Compare::LESS_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedEndFilter{ 5560, Compare::GREATER_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedEndFilter{ 5560, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{2,3});
+    }
+
+    {
+        const auto filter = PbiFilter{ PbiAlignedEndFilter{ 7000, Compare::GREATER_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+}
+
+TEST(PbiFilterTest, AlignedLengthFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiAlignedLengthFilter{ 500, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedLengthFilter{ 1000, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+    }
+}
+
+TEST(PbiFilterTest, AlignedStartFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiAlignedStartFilter{ 2600, Compare::LESS_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedStartFilter{ 4102, Compare::GREATER_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedStartFilter{ 4102, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedStartFilter{ 6000, Compare::GREATER_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{ });
+    }
+}
+
+TEST(PbiFilterTest, AlignedStrandFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::FORWARD } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::REVERSE } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::FORWARD, Compare::NOT_EQUAL } }; // same as Strand::REVERSE
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+
+    // unsupported compare types throw
+    EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::LESS_THAN),          std::runtime_error);
+    EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::LESS_THAN_EQUAL),    std::runtime_error);
+    EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::GREATER_THAN),       std::runtime_error);
+    EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::GREATER_THAN_EQUAL), std::runtime_error);
+}
+
+TEST(PbiFilterTest, BarcodeFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiBarcodeFilter{ 17 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeFilter{ 18 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeFilter{ 0 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0});
+    }
+}
+
+TEST(PbiFilterTest, BarcodeForwardFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ 17 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ 400 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ {0, 256} } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+    }
+}
+
+TEST(PbiFilterTest, BarcodeQualityFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiBarcodeQualityFilter{ 80, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeQualityFilter{ 40, Compare::LESS_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+}
+
+TEST(PbiFilterTest, BarcodeReverseFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ 18 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ 400 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{ });
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ {1, 257} } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+    }
+}
+
+TEST(PbiFilterTest, BarcodesFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiBarcodesFilter{ 17, 18 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodesFilter{ 17, 19 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{ });
+    }
+    {
+        const auto filter = PbiFilter{ PbiBarcodesFilter{ std::make_pair(17,18) } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+}
+
+TEST(PbiFilterTest, IdentityFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiIdentityFilter{ 0.95, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+}
+
+TEST(PbiFilterTest, LocalContextFilterOk)
+{
+    { // == NO_LOCAL_CONTEXT
+        const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0});
+    }
+    { // != ADAPTER_BEFORE (exact match)
+        const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2,3});
+    }
+    { // contains ADAPTER_BEFORE
+        const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+    { // does not contain ADAPTER_BEFORE
+        const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+    }
+    { // include both ADAPTER_BEFORE and ADAPTER_AFTER
+        const auto filter = PbiFilter::Intersection(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::CONTAINS }
+        });
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+    { // exclude both ADAPTER_BEFORE and ADAPTER_AFTER
+        const auto filter = PbiFilter::Intersection(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::NOT_CONTAINS }
+        });
+        tests::checkFilterRows(filter, std::vector<size_t>{0});
+    }
+    { // include everything with either ADAPTER_BEFORE or ADAPTER_AFTER
+        const auto filter = PbiFilter::Union(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::CONTAINS }
+        });
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+    }
+    { // include everything with either ADAPTER_BEFORE or ADAPTER_AFTER, but not both
+        const auto filter = PbiFilter::Intersection(
+        {
+                PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },
+                PbiFilter::Union(
+                {
+                    PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },
+                    PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::NOT_CONTAINS }
+                })
+        });
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+    }
+}
+
+TEST(PbiFilterTest, MapQualityFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiMapQualityFilter{ 254 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiMapQualityFilter{ 254, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+}
+
+TEST(PbiFilterTest, MovieNameFilterOk)
+{
+    const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+    const auto index = PbiRawData{ bamFile.PacBioIndexFilename() };
+
+    {
+        const auto filter = PbiFilter{ PbiMovieNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0" } };
+        const auto expectedRows = std::vector<size_t>{0,1,2,3};
+        for (size_t row : expectedRows)
+            EXPECT_TRUE(filter.Accepts(index, row));
+    }
+    {
+        const auto filter = PbiFilter{ PbiMovieNameFilter{ "does_not_exist" } };
+        const auto expectedRows = std::vector<size_t>{};
+        for (size_t row : expectedRows)
+            EXPECT_TRUE(filter.Accepts(index, row));
+    }
+    {
+        const auto names = vector<string>{"does_not_exist",
+                                          "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0"};
+        const auto filter = PbiFilter{ PbiMovieNameFilter{ names } };
+        const auto expectedRows = std::vector<size_t>{0,1,2,3};
+        for (size_t row : expectedRows)
+            EXPECT_TRUE(filter.Accepts(index, row));
+    }
+}
+
+TEST(PbiFilterTest, NumDeletedBasesFilterOk)
+{
+    // del: { 12, 38, 45, 11} - calculated from raw data, not stored directly in testing object or read from PBI file
+
+    {
+        const auto filter = PbiFilter{ PbiNumDeletedBasesFilter{ 12, Compare::LESS_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiNumDeletedBasesFilter{ 45, Compare::EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{2});
+    }
+}
+
+TEST(PbiFilterTest, NumInsertedBasesFilterOk)
+{
+    // ins: { 17, 63, 65, 20 }  - calculated from raw data, not stored directly testing object or read from PBI file
+
+    {
+        const auto filter = PbiFilter{ PbiNumInsertedBasesFilter{ 63, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+    }
+    {
+        const auto filter = PbiFilter{ PbiNumInsertedBasesFilter{ 17, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+    }
+}
+
+TEST(PbiFilterTest, NumMatchesFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiNumMatchesFilter{ 1000, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+    }
+    {
+        const auto filter = PbiFilter{ PbiNumMatchesFilter{ 400, Compare::LESS_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0});
+    }
+}
+
+TEST(PbiFilterTest, NumMismatchesFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiNumMismatchesFilter{ 0, Compare::EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiNumMismatchesFilter{ 0, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+}
+
+TEST(PbiFilterTest, QueryEndFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiQueryEndFilter{ 4055 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1});
+    }
+    {
+        const auto filter = PbiFilter{ PbiQueryEndFilter{ 6200, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+}
+
+TEST(PbiFilterTest, QueryLengthFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiQueryLengthFilter{ 500, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiQueryLengthFilter{ 1000, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+    }
+}
+
+TEST(PbiFilterTest, QueryNameFilterOk)
+{
+    const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+    const auto index = PbiIndex{ bamFile.PacBioIndexFilename() };
+
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1});
+    }
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "does_not_exist/0/0_0" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+    {
+        const auto names = vector<string>{"m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055",
+                                          "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"};
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ names } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+    }
+
+    // invalid QNAME syntax throws
+    EXPECT_THROW(
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    },
+    std::runtime_error);
+    EXPECT_THROW(
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    },
+    std::runtime_error);
+    EXPECT_THROW(
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo/bar" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    },
+    std::runtime_error);
+    EXPECT_THROW(
+    {
+        const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo/bar/baz_bam" } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    },
+    std::exception); // come back to see why this is not runtime_error but something else
+}
+
+TEST(PbiFilterTest, QueryStartFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiQueryStartFilter{ 4101 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{2});
+    }
+    {
+        const auto filter = PbiFilter{ PbiQueryStartFilter{ 5000 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+    {
+        const auto filter = PbiFilter{ PbiQueryStartFilter{ 5000, Compare::GREATER_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+}
+
+TEST(PbiFilterTest, ReadAccuracyFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiReadAccuracyFilter{ 0.9 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+    {
+        const auto filter = PbiFilter{ PbiReadAccuracyFilter{ 0.9, Compare::GREATER_THAN } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+    }
+}
+
+TEST(PbiFilterTest, ReadGroupFilterOk)
+{
+    { // numeric ID
+        const auto filter = PbiReadGroupFilter{ -1197849594 };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+
+        const auto filter2 = PbiReadGroupFilter{ 200 };
+        tests::checkFilterRows(filter2, std::vector<size_t>{});
+    }
+    { // string ID
+        const auto filter = PbiReadGroupFilter{ "b89a4406" };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+
+        const auto filter2 = PbiReadGroupFilter{ "b89a4406" };
+        tests::checkFilterRows(filter2, std::vector<size_t>{0,1,2,3});
+    }
+    { // ReadGroupInfo object
+        const auto rg = ReadGroupInfo{ "b89a4406" };
+        const auto filter = PbiReadGroupFilter{ rg };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    { // multi-ID
+        const auto ids = vector<int32_t>({-1197849594, 200});
+        const auto filter = PbiReadGroupFilter{ ids };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    { // multi-string
+        const auto ids = vector<string>({"b89a4406", "deadbeef"});
+        const auto filter = PbiReadGroupFilter{ ids };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    { // multi-ReadGroupInfo
+        const auto ids = vector<ReadGroupInfo>({ ReadGroupInfo("b89a4406"), ReadGroupInfo("deadbeef")});
+        const auto filter = PbiReadGroupFilter{ ids };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+}
+
+TEST(PbiFilterTest, ReferenceEndFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiReferenceEndFilter{ 9900 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiReferenceEndFilter{ 9900, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,3});
+    }
+}
+
+TEST(PbiFilterTest, ReferenceIdFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiReferenceIdFilter{ 0 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiReferenceIdFilter{ 0, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+    {
+        const auto ids = vector<int32_t>({0, 42});
+        const auto filter = PbiFilter{ PbiReferenceIdFilter{ ids } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+}
+
+TEST(PbiFilterTest, ReferenceNameFilterOk)
+{
+    const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+    const auto index = PbiRawData{ bamFile.PacBioIndexFilename() };
+
+    {
+        const auto filter = PbiFilter{ PbiReferenceNameFilter{ "lambda_NEB3011" } };
+        const auto expectedRows = std::vector<size_t>{0,1,2,3};
+        for (size_t row : expectedRows)
+            EXPECT_TRUE(filter.Accepts(index, row));
+
+    }
+    {
+        const auto filter = PbiFilter{ PbiReferenceNameFilter{ "lambda_NEB3011", Compare::NOT_EQUAL } };
+        const auto expectedRows = std::vector<size_t>{};
+        for (size_t row : expectedRows)
+            EXPECT_TRUE(filter.Accepts(index, row));
+    }
+    {
+        const auto names = vector<string>({ "lambda_NEB3011" }); // this file only has 1 :(
+        const auto filter = PbiFilter{ PbiReferenceNameFilter{ names } };
+        const auto expectedRows = std::vector<size_t>{0,1,2,3};
+        for (size_t row : expectedRows)
+            EXPECT_TRUE(filter.Accepts(index, row));
+    }
+
+    // unsupported compare types throw
+    EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::LESS_THAN),          std::runtime_error);
+    EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::LESS_THAN_EQUAL),    std::runtime_error);
+    EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::GREATER_THAN),       std::runtime_error);
+    EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::GREATER_THAN_EQUAL), std::runtime_error);
+}
+
+TEST(PbiFilterTest, ReferenceStartFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiReferenceStartFilter{ 8453 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{1});
+    }
+    {
+        const auto filter = PbiFilter{ PbiReferenceStartFilter{ 9200, Compare::GREATER_THAN_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,3});
+    }
+}
+
+TEST(PbiFilterTest, ZmwFilterOk)
+{
+    {
+        const auto filter = PbiFilter{ PbiZmwFilter{ 14743 } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+    {
+        const auto filter = PbiFilter{ PbiZmwFilter{ 14743, Compare::NOT_EQUAL } };
+        tests::checkFilterRows(filter, std::vector<size_t>{});
+    }
+    {
+        const auto zmws = vector<int32_t>({14743,42,200});
+        const auto filter = PbiFilter{ PbiZmwFilter{ zmws } };
+        tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+    }
+}
+
+TEST(PbiFilterTest, FromDataSetOk)
+{
+    const auto expectedFilter =
+        PbiFilter::Union(
+        {
+            PbiFilter::Intersection(
+            {
+                PbiZmwFilter{ 14743 },
+                PbiReadAccuracyFilter { 0.9, Compare::GREATER_THAN_EQUAL }
+            }),
+
+            PbiReferenceStartFilter { 9200, Compare::GREATER_THAN_EQUAL }
+        });
+
+
+    auto properties1 = Properties{ };
+    properties1.Add(Property{ "zm", "14743",  "==" });
+    properties1.Add(Property{ "rq", "0.9", ">=" });
+
+    auto datasetFilter1 = Filter{ };
+    datasetFilter1.Properties(properties1);
+
+    auto properties2 = Properties{ };
+    properties2.Add(Property{ "pos", "9200", ">=" });
+
+    auto datasetFilter2 = Filter{ };
+    datasetFilter2.Properties(properties2);
+
+    auto datasetFilters = Filters{ };
+    datasetFilters.Add(datasetFilter1);
+    datasetFilters.Add(datasetFilter2);
+    auto dataset = DataSet{ };
+    dataset.Filters(datasetFilters);
+
+    const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+
+    for (size_t i = 0; i < tests::shared_index.NumReads(); ++i) {
+        EXPECT_EQ(expectedFilter.Accepts(tests::shared_index, i),
+                  generatedFilter.Accepts(tests::shared_index, i));
+    }
+}
+
+TEST(PbiFilterTest, LocalContextFiltersFromDataSetXmlOk)
+{
+    {   // no adapters or barcodes
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::EQUAL };
+
+        // <Property Name="cx" Value="0" Operator="==" />
+
+        Property property("cx", "0", "==");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{0});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});
+    }
+    {   // any adapters or barcodes
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL };
+
+        // <Property Name="cx" Value="0" Operator="!=" />
+
+        Property property("cx", "0", "!=");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+    }
+    {   // contains adapter_before
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS };
+
+        // <Property Name="cx" Value="1" Operator="&" />
+
+        Property property("cx", "1", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,3});
+    }
+    {   // contains adapter_before
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS };
+
+        // <Property Name="cx" Value="ADAPTER_BEFORE" Operator="&" />
+
+        Property property("cx", "ADAPTER_BEFORE", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,3});
+    }
+    {   // contains adapter_after
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS };
+
+        // <Property Name="cx" Value="2" Operator="&" />
+
+        Property property("cx", "2", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{2,3});
+    }
+    {   // contains adapter_before or adapter_after
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+                                       Compare::CONTAINS };
+
+        // <Property Name="cx" Value="3" Operator="&" />
+
+        Property property("cx", "3", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+    }
+    {   // contains adapter_before or adapter_after
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+                                       Compare::CONTAINS };
+
+        // <Property Name="cx" Value="ADAPTER_BEFORE | ADAPTER_AFTER" Operator="&" />
+
+        Property property("cx", "ADAPTER_BEFORE | ADAPTER_AFTER", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+    }
+    {   // contains adapter_before or adapter_after - no whitespace separation
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+                                       Compare::CONTAINS };
+
+        // <Property Name="cx" Value="ADAPTER_BEFORE|ADAPTER_AFTER" Operator="&" />
+
+        Property property("cx", "ADAPTER_BEFORE|ADAPTER_AFTER", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+    }
+    {   // contains adapter_before or adapter_after - a lot of whitespace separation
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+                                       Compare::CONTAINS };
+
+        // <Property Name="cx" Value="ADAPTER_BEFORE        |           ADAPTER_AFTER" Operator="&" />
+
+        Property property("cx", "ADAPTER_BEFORE        |           ADAPTER_AFTER", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+    }
+    {   // contains adapter_before or adapter_after, but not both
+
+        const auto expectedFilter = PbiFilter::Union(
+        {
+            PbiFilter::Intersection(
+            {
+                PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS }
+            }),
+            PbiFilter::Intersection(
+            {
+                PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }
+            })
+        });
+
+        // <Filters>
+        //   <Filter>
+        //     <Properties>
+        //       <Property Name="cx" Value="0" Operator="!=" />
+        //       <Property Name="cx" Value="1" Operator="~" />
+        //     </Properties>
+        //   </Filter>
+        //   <Filter>
+        //     <Properties>
+        //       <Property Name="cx" Value="0" Operator="!=" />
+        //       <Property Name="cx" Value="2" Operator="~" />
+        //     </Properties>
+        //   </Filter>
+        // </Filters>
+
+        auto filter1 = Filter{ };
+        filter1.Properties().Add(Property("cx", "0", "!="));
+        filter1.Properties().Add(Property("cx", "1", "~"));
+
+        auto filter2 = Filter{ };
+        filter2.Properties().Add(Property("cx", "0", "!="));
+        filter2.Properties().Add(Property("cx", "2", "~"));
+
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter1);
+        dataset.Filters().Add(filter2);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2});
+
+    }
+    {   // contains adapter_before or adapter_after
+
+        const auto expectedFilter = PbiFilter::Union(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::CONTAINS }
+        });
+
+        // <Filters>
+        //   <Filter>
+        //     <Properties>
+        //       <Property Name="cx" Value="1" Operator="&" />
+        //     </Properties>
+        //   </Filter>
+        //   <Filter>
+        //     <Properties>
+        //       <Property Name="cx" Value="2" Operator="&" />
+        //     </Properties>
+        //   </Filter>
+        // </Filters>
+
+        auto filter1 = Filter{ };
+        filter1.Properties().Add(Property("cx", "1", "&"));
+
+        auto filter2 = Filter{ };
+        filter2.Properties().Add(Property("cx", "2", "&"));
+
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter1);
+        dataset.Filters().Add(filter2);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1,2,3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+    }
+    { // adapter_before and adapter_after
+
+        const auto expectedFilter = PbiFilter::Intersection(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::CONTAINS }
+        });
+
+        // <Property Name="cx" Value="1" Operator="&" />
+        // <Property Name="cx" Value="2" Operator="&" />
+
+        Property property1("cx", "1", "&");
+        Property property2("cx", "2", "&");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property1);
+        filter.Properties().Add(property2);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{3});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{3});
+    }
+    {   // adapter_before, but no adapter_after
+
+        const auto expectedFilter = PbiFilter::Intersection(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::NOT_CONTAINS }
+        });
+
+        // <Property Name="cx" Value="1" Operator="&" />
+        // <Property Name="cx" Value="2" Operator="~" />
+
+        Property property1("cx", "1", "&");
+        Property property2("cx", "2", "~");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property1);
+        filter.Properties().Add(property2);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{1});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{1});
+    }
+    {   // contains no adapter_before
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS };
+
+        // <Property Name="cx" Value="1" Operator="~" />
+
+        Property property("cx", "1", "~");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{0,2});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{0,2});
+    }
+    {   // contains no adapter_before or adapter_after
+
+        const auto expectedFilter = PbiFilter::Intersection(
+        {
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },
+            PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER,  Compare::NOT_CONTAINS }
+        });
+
+        // <Property Name="cx" Value="1" Operator="~" />
+        // <Property Name="cx" Value="2" Operator="~" />
+
+        Property property1("cx", "1", "~");
+        Property property2("cx", "2", "~");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property1);
+        filter.Properties().Add(property2);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{0});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});
+    }
+    {   // contains no adapter_before or adapter_after
+
+        const auto expectedFilter =
+                PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+                                       Compare::NOT_CONTAINS };
+
+        // <Property Name="cx" Value="3" Operator="~" />
+
+        Property property("cx", "3", "~");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+        tests::checkFilterRows(expectedFilter,  std::vector<size_t>{0});
+        tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});
+    }
+    {   // throws on invalid enum name
+
+        Property property("cx", "DOES_NOT_EXIST", "~");
+
+        auto filter = Filter{ };
+        filter.Properties().Add(property);
+        DataSet dataset = DataSet{ };
+        dataset.Filters().Add(filter);
+
+        EXPECT_THROW(PbiFilter::FromDataSet(dataset), std::runtime_error);
+    }
+}
diff --git a/tests/src/test_PbiFilterQuery.cpp b/tests/src/test_PbiFilterQuery.cpp
new file mode 100644
index 0000000..9db400f
--- /dev/null
+++ b/tests/src/test_PbiFilterQuery.cpp
@@ -0,0 +1,245 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/PbiFilterQuery.h>
+#include <algorithm>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(PbiFilterQueryTest, QueryOk)
+{
+    const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+
+    {
+        int count = 0;
+        PbiFilterQuery query( PbiQueryLengthFilter{ 500, Compare::GREATER_THAN_EQUAL}, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_GE((r.QueryEnd() - r.QueryStart()), 500);
+        }
+        EXPECT_EQ(3, count);
+    }
+    {
+        // all records aligned to reverse strand && pos >= 9200
+        const auto filter = PbiFilter::Intersection(
+        {
+            PbiAlignedStrandFilter{Strand::REVERSE},
+            PbiReferenceStartFilter{9200, Compare::GREATER_THAN_EQUAL}
+        });
+
+        int count = 0;
+        PbiFilterQuery query(filter, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_EQ(Strand::REVERSE, r.AlignedStrand());
+            EXPECT_GE((r.ReferenceStart()), 9200);
+            EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"), r.FullName());
+        }
+        EXPECT_EQ(1, count);
+    }
+    {
+        // all records aligned to forward strand && pos >= 9200
+        const auto filter = PbiFilter::Intersection(
+        {
+            PbiAlignedStrandFilter{Strand::FORWARD},
+            PbiReferenceStartFilter{9200, Compare::GREATER_THAN_EQUAL}
+        });
+
+        int count = 0;
+        PbiFilterQuery query(filter, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_EQ(Strand::FORWARD, r.AlignedStrand());
+            EXPECT_GE((r.ReferenceStart()), 9200);
+            EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2114_2531"), r.FullName());
+        }
+        EXPECT_EQ(1, count);
+    }
+    {
+        // all records from RG ("b89a4406") with numMatches >= 1200
+        const auto filter = PbiFilter::Intersection(
+        {
+            PbiReadGroupFilter{"b89a4406"},
+            PbiNumMatchesFilter{1200, Compare::GREATER_THAN_EQUAL}
+        });
+
+        int count = 0;
+        PbiFilterQuery query(filter, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_EQ(string("b89a4406"), r.ReadGroupId());
+            EXPECT_GE((r.NumMatches()), 1200);
+            if (count == 1)
+                EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055"), r.FullName());
+            else if (count == 2)
+                EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/4101_5571"), r.FullName());
+        }
+        EXPECT_EQ(2, count);
+    }
+}
+
+TEST(PbiFilterQueryTest, ZmwRangeFromDatasetOk)
+{
+    const auto expectedMovieName = string{ "m150404_101626_42267_c100807920800000001823174110291514_s1_p0" };
+
+    const DataSet ds(tests::Data_Dir + "/chunking/chunking.subreadset.xml");
+    EXPECT_EQ(3, ds.BamFiles().size());
+
+    { // movie name
+
+        int count = 0;
+        PbiFilterQuery query{ PbiMovieNameFilter{expectedMovieName}, ds };
+        for (const BamRecord& r : query) {
+            EXPECT_EQ(expectedMovieName, r.MovieName());
+            ++count;
+        }
+        EXPECT_EQ(1220, count);
+    }
+    
+    { // sequencing chemistries
+        set<string> chems{ ds.SequencingChemistries() };
+        set<string> expected{ "P6-C4" };
+        EXPECT_TRUE(equal(chems.begin(), chems.end(), expected.begin()));
+    }
+
+    { // min ZMW
+
+        int count = 0;
+        PbiFilterQuery query{ PbiZmwFilter{54, Compare::GREATER_THAN}, ds };
+        for (const BamRecord& r : query) {
+            EXPECT_GT(r.HoleNumber(), 54);
+            ++count;
+        }
+        EXPECT_EQ(1220, count);
+    }
+
+    { // max ZMW
+
+        int count = 0;
+        PbiFilterQuery query{ PbiZmwFilter{1816, Compare::LESS_THAN}, ds };
+        for (const BamRecord& r : query) {
+            EXPECT_LT(r.HoleNumber(),1816);
+            ++count;
+        }
+        EXPECT_EQ(150, count);
+    }
+
+    { // put all together, from DataSet XML
+
+        const PbiFilter filter = PbiFilter::FromDataSet(ds);
+        PbiFilterQuery query(filter, ds);
+        int count = 0;
+        for (const BamRecord& r : query) {
+            EXPECT_EQ(expectedMovieName, r.MovieName());
+            const auto zmw = r.HoleNumber();
+            EXPECT_GT(zmw, 54);
+            EXPECT_LT(zmw, 1816);
+            ++count;
+        }
+        EXPECT_EQ(150, count);
+    }
+    { // empty filter object - should return all records from the same dataset
+
+        PbiFilterQuery query(PbiFilter{ }, ds);
+        int count = 0;
+        for (const BamRecord& r : query) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(1220, count);
+    }
+    { // no <Filters> element present at all
+
+        const DataSet ds(tests::Data_Dir + "/chunking/chunking_missingfilters.subreadset.xml");
+        const PbiFilter filter = PbiFilter::FromDataSet(ds);
+        PbiFilterQuery query(filter, ds);
+        int count = 0;
+        for (const BamRecord& r : query) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(1220, count);
+    }
+    { // <Filters> element contains no child <Filter> elements
+
+        const DataSet ds(tests::Data_Dir + "/chunking/chunking_emptyfilters.subreadset.xml");
+        const PbiFilter filter = PbiFilter::FromDataSet(ds);
+        PbiFilterQuery query(filter, ds);
+        int count = 0;
+        for (const BamRecord& r : query) {
+            (void)r;
+            ++count;
+        }
+        EXPECT_EQ(1220, count);
+    }
+}
+
+TEST(PbiFilterQueryTest, MissingPbiShouldThrow)
+{
+    const PbiFilter filter{ PbiZmwFilter{31883} };
+    const string phi29Bam = tests::Data_Dir + "/phi29.bam";
+    const string hasPbiBam = tests::Data_Dir + "/polymerase/production.scraps.bam";
+
+    { // single file, missing PBI
+
+        EXPECT_THROW(PbiFilterQuery(filter, phi29Bam), std::runtime_error);
+    }
+
+    { // from dataset, all missing PBI
+
+        DataSet ds;
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+        EXPECT_THROW(PbiFilterQuery(filter, ds), std::runtime_error);
+    }
+
+    { // from dataset, mixed PBI presence
+
+        DataSet ds;
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+        ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.ScrapsBamFile", hasPbiBam));
+        EXPECT_THROW(PbiFilterQuery(filter, ds), std::runtime_error);
+    }
+}
diff --git a/tests/src/test_PolymeraseStitching.cpp b/tests/src/test_PolymeraseStitching.cpp
index 500f8c6..7c2e332 100644
--- a/tests/src/test_PolymeraseStitching.cpp
+++ b/tests/src/test_PolymeraseStitching.cpp
@@ -44,17 +44,22 @@
 #include <string>
 
 #include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/Frames.h>
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
 
-#include "pbbam/virtual/VirtualPolymeraseReader.h"
-#include "pbbam/BamFile.h"
-#include "pbbam/BamRecord.h"
-#include "pbbam/EntireFileQuery.h"
-#include "pbbam/Frames.h"
 #include "TestData.h"
 
 using namespace PacBio;
 using namespace PacBio::BAM;
 
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
 void Compare(const BamRecord& b1, const BamRecord& b2)
 {
     EXPECT_TRUE(b1.HasDeletionQV());
@@ -118,6 +123,10 @@ void Compare(const BamRecord& b1, const BamRecord& b2)
     EXPECT_EQ(b1.PulseMergeQV(),    b2.PulseMergeQV());
 }
 
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
 TEST(VirtualPolymeraseReader, InternalSubreadsToOriginal)
 {
 	// Create virtual polymerase read
@@ -138,7 +147,7 @@ TEST(VirtualPolymeraseReader, InternalSubreadsToOriginal)
     auto polyRecord = *begin++;
 	EXPECT_TRUE(begin == end); 
 
-	Compare(polyRecord, virtualRecord);
+    tests::Compare(polyRecord, virtualRecord);
 }
 
 TEST(VirtualPolymeraseReader, InternalHQToOriginal)
@@ -161,7 +170,7 @@ TEST(VirtualPolymeraseReader, InternalHQToOriginal)
     auto polyRecord = *begin++;
 	EXPECT_TRUE(begin == end);    
 
-	Compare(polyRecord, virtualRecord);
+    tests::Compare(polyRecord, virtualRecord);
 }
 
 TEST(VirtualPolymeraseReader, VirtualRegions)
@@ -248,6 +257,7 @@ TEST(VirtualPolymeraseReader, ProductionSubreadsToOriginal)
     // Create virtual polymerase read
     VirtualPolymeraseReader vpr(tests::Data_Dir + "/polymerase/production.subreads.bam",
                                 tests::Data_Dir + "/polymerase/production.scraps.bam");
+
     EXPECT_TRUE(vpr.HasNext());
     auto virtualRecord = vpr.Next();
     EXPECT_FALSE(vpr.HasNext());
@@ -261,11 +271,11 @@ TEST(VirtualPolymeraseReader, ProductionSubreadsToOriginal)
 
     EXPECT_TRUE(begin != end);
     auto polyRecord = *begin++;
-    EXPECT_TRUE(begin == end); 
+    EXPECT_TRUE(begin == end);
 
     EXPECT_EQ(polyRecord.FullName(),        virtualRecord.FullName());
     EXPECT_EQ(polyRecord.HoleNumber(),      virtualRecord.HoleNumber());
-    EXPECT_EQ(polyRecord.ReadAccuracy(),    virtualRecord.ReadAccuracy());
+    EXPECT_FLOAT_EQ(polyRecord.ReadAccuracy(),    virtualRecord.ReadAccuracy());
     EXPECT_EQ(polyRecord.NumPasses(),       virtualRecord.NumPasses());
     EXPECT_EQ(polyRecord.Sequence(),        virtualRecord.Sequence());
     EXPECT_EQ(polyRecord.Qualities(),       virtualRecord.Qualities());
@@ -350,3 +360,166 @@ TEST(VirtualPolymeraseReader, ProductionHQToOriginal)
     EXPECT_FALSE(virtualRecord.HasPrePulseFrames());
     EXPECT_FALSE(virtualRecord.HasPulseCallWidth());
 }
+
+TEST(ZmwWhitelistVirtualReader, SingleZmwOk)
+{
+    const std::vector<int32_t> whitelist = { 200000 };
+
+    ZmwWhitelistVirtualReader reader(whitelist,
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+
+    // create virtual record
+    EXPECT_TRUE(reader.HasNext());
+    auto virtualRecord = reader.Next();
+    EXPECT_FALSE(reader.HasNext());
+
+    // fetch original polymerase read (2nd record)
+    BamFile polyBam(tests::Data_Dir + "/polymerase/whitelist/internal.polymerase.bam");
+    EntireFileQuery polyQuery(polyBam);
+    auto begin = polyQuery.begin();
+    auto end = polyQuery.end();
+    EXPECT_TRUE(begin != end);
+    ++begin;
+    EXPECT_TRUE(begin != end);
+    auto polyRecord = *begin++;
+
+    EXPECT_EQ(200000, virtualRecord.HoleNumber());
+
+    tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(ZmwWhitelistVirtualReader, MultiZmwsOk)
+{
+    const std::vector<int32_t> whitelist = { 100000, 300000 };
+
+    ZmwWhitelistVirtualReader reader(whitelist,
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+
+
+    // create virtual records
+    EXPECT_TRUE(reader.HasNext());
+    auto virtualRecord1 = reader.Next();
+    EXPECT_TRUE(reader.HasNext());
+    auto virtualRecord2 = reader.Next();
+    EXPECT_FALSE(reader.HasNext());
+
+    // fetch original polymerase reads (2nd record)
+    BamFile polyBam(tests::Data_Dir + "/polymerase/whitelist/internal.polymerase.bam");
+    EntireFileQuery polyQuery(polyBam);
+    auto begin = polyQuery.begin();
+    auto end = polyQuery.end();
+
+    EXPECT_TRUE(begin != end);
+    auto polyRecord1 = *begin++;
+    EXPECT_TRUE(begin != end);
+    ++begin;
+    EXPECT_TRUE(begin != end);
+    auto polyRecord2 = *begin++;
+    EXPECT_TRUE(begin == end);
+
+    EXPECT_EQ(100000, virtualRecord1.HoleNumber());
+    EXPECT_EQ(300000, virtualRecord2.HoleNumber());
+
+    tests::Compare(polyRecord1, virtualRecord1);
+    tests::Compare(polyRecord2, virtualRecord2);
+}
+
+TEST(ZmwWhitelistVirtualReader, EmptyListOk)
+{
+    const std::vector<int32_t> whitelist = { };
+
+    ZmwWhitelistVirtualReader reader(whitelist,
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+    EXPECT_FALSE(reader.HasNext());
+    EXPECT_TRUE(reader.NextRaw().empty());
+}
+
+TEST(ZmwWhitelistVirtualReader, EmptyScrapsFileOk)
+{
+    const std::vector<int32_t> whitelist = { 10944689, 10944690 };
+    const std::string primaryBamFn = tests::Data_Dir + "/polymerase/whitelist/scrapless.subreads.bam" ;
+    const std::string scrapsBamFn  = tests::Data_Dir + "/polymerase/whitelist/scrapless.scraps.bam" ;
+
+    int count = 0;
+    ZmwWhitelistVirtualReader reader(whitelist, primaryBamFn, scrapsBamFn);
+    while (reader.HasNext()) {
+        auto record = reader.Next();
+        (void)record;
+        ++count;
+    }
+    EXPECT_EQ(2, count);
+
+    const BamFile primaryBam(primaryBamFn);
+    const BamFile scrapsBam(scrapsBamFn);
+    const PbiRawData primaryIdx(primaryBam.PacBioIndexFilename());
+    const PbiRawData scrapsIdx(scrapsBam.PacBioIndexFilename());
+    EXPECT_EQ(3, primaryIdx.NumReads());
+    EXPECT_EQ(0, scrapsIdx.NumReads());
+}
+
+TEST(ZmwWhitelistVirtualReader, UnknownZmwOk)
+{
+    const std::vector<int32_t> whitelist = { 42 }; // ZMW not in our files
+
+    ZmwWhitelistVirtualReader reader(whitelist,
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+    EXPECT_FALSE(reader.HasNext());
+    EXPECT_TRUE(reader.NextRaw().empty());
+}
+
+TEST(ZmwWhitelistVirtualReader, MixedKnownAndUnknownZmwsOk)
+{
+    const std::vector<int32_t> whitelist = { 42, 200000, 24 };
+
+    ZmwWhitelistVirtualReader reader(whitelist,
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+                                     tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+
+    // everything below should behave exactly as 'SingleValueOk' test,
+    // as the unknown ZMWs will have been removed during construction
+
+    // create virtual record
+    EXPECT_TRUE(reader.HasNext());
+    auto virtualRecord = reader.Next();
+    EXPECT_FALSE(reader.HasNext());
+
+    // fetch original polymerase read (2nd record)
+    BamFile polyBam(tests::Data_Dir + "/polymerase/whitelist/internal.polymerase.bam");
+    EntireFileQuery polyQuery(polyBam);
+    auto begin = polyQuery.begin();
+    auto end = polyQuery.end();
+    EXPECT_TRUE(begin != end);
+    ++begin;
+    EXPECT_TRUE(begin != end);
+    auto polyRecord = *begin++;
+
+    EXPECT_EQ(200000, virtualRecord.HoleNumber());
+
+    tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(VirtualPolymeraseBamRecord, VirtualRegionsTableOk) 
+{
+    VirtualPolymeraseReader vpr(tests::Data_Dir + "/polymerase/production.subreads.bam",
+                                tests::Data_Dir + "/polymerase/production.scraps.bam");
+    EXPECT_TRUE(vpr.HasNext());
+    const auto virtualRecord = vpr.Next();
+
+    const auto subreads  = virtualRecord.VirtualRegionsTable(VirtualRegionType::SUBREAD);
+    const auto adapters  = virtualRecord.VirtualRegionsTable(VirtualRegionType::ADAPTER);
+    const auto hqRegions = virtualRecord.VirtualRegionsTable(VirtualRegionType::HQREGION);
+    const auto lqRegions = virtualRecord.VirtualRegionsTable(VirtualRegionType::LQREGION);
+    const auto barcodes  = virtualRecord.VirtualRegionsTable(VirtualRegionType::BARCODE);
+    const auto filtered  = virtualRecord.VirtualRegionsTable(VirtualRegionType::FILTERED);
+
+    EXPECT_FALSE(subreads.empty());
+    EXPECT_FALSE(adapters.empty());
+    EXPECT_FALSE(hqRegions.empty());
+    EXPECT_FALSE(lqRegions.empty());
+    EXPECT_FALSE(barcodes.empty());
+    EXPECT_TRUE(filtered.empty());    // this annnotation type is not in data set
+}
diff --git a/tests/src/test_GroupQuery.cpp b/tests/src/test_QNameQuery.cpp
similarity index 57%
rename from tests/src/test_GroupQuery.cpp
rename to tests/src/test_QNameQuery.cpp
index 2f6769f..0c6b696 100644
--- a/tests/src/test_GroupQuery.cpp
+++ b/tests/src/test_QNameQuery.cpp
@@ -35,98 +35,45 @@
 
 // Author: Yuan Li
 
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
 #include "TestData.h"
 #include <gtest/gtest.h>
-#include <pbbam/ZmwGroupQuery.h>
-#include <pbbam/GroupQuery.h>
+#include <pbbam/QNameQuery.h>
 #include <string>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
+static const string dataDir = tests::Data_Dir + "/test_group_query/";
+static const string test1fn = string(dataDir) + "test1.bam";
+static const string test2fn = string(dataDir) + "test2.bam";
+static const string test3fn = string(dataDir) + "test3.bam";
 
-const string dataDir = tests::Data_Dir + "/test_group_query/";
-const string test1fn = string(dataDir) + "test1.bam";
-const string test2fn = string(dataDir) + "test2.bam";
-const string test3fn = string(dataDir) + "test3.bam";
-
-//void TestZmwQuery(const string & fn, const vector<int> & expected)
-//{
-//    EXPECT_NO_THROW(
-//    {
-//        BamFile bamFile(fn);
-//        vector<int> counts;
-//        ZmwGroupQuery zmwQuery(bamFile);
-//        for (const vector<BamRecord>& records : zmwQuery)
-//            counts.push_back(records.size());
-//        EXPECT_EQ(expected, counts);
-//    });
-//}
-
-//void TestNoneConstZmwQuery(const string & fn, const vector<int> & expected)
-//{
-//    EXPECT_NO_THROW(
-//    {
-//        BamFile bamFile(fn);
-//        vector<int> counts;
-//        ZmwGroupQuery zmwQuery(bamFile);
-//        for (vector<BamRecord>& records : zmwQuery)
-//            counts.push_back(records.size());
-//        EXPECT_EQ(expected, counts);
-//    });
-//}
-
-void TestQNameQuery(const string & fn, const vector<int> & expected) 
+static
+void TestQNameQuery(const string& fn, const vector<int>& expected)
 {
     EXPECT_NO_THROW(
     {
-        BamFile bamFile(fn);
         vector<int> counts;
-        QNameQuery qQuery(bamFile);
-        for (const vector<BamRecord>& records : qQuery) 
+        QNameQuery qQuery(fn);
+        for (const vector<BamRecord>& records : qQuery)
             counts.push_back(records.size());
         EXPECT_EQ(expected, counts);
     });
 }
 
-void TestNoneConstQNameQuery(const string & fn, const vector<int> & expected) 
-{    
+static
+void TestNoneConstQNameQuery(const string& fn, const vector<int>& expected)
+{
     EXPECT_NO_THROW(
     {
-        BamFile bamFile(fn);
         vector<int> counts;
-        QNameQuery qQuery(bamFile);
-        for (vector<BamRecord>& records : qQuery) 
+        QNameQuery qQuery(fn);
+        for (vector<BamRecord>& records : qQuery)
             counts.push_back(records.size());
         EXPECT_EQ(expected, counts);
     });
 }
 
-TEST(ZmwQueryTest, CountZmwSizes)
-{
-//    // test case 1 has exactly one bamRecord.
-//    string fn = test1fn;
-//    vector<int> expected({1});
-//    TestZmwQuery(fn, expected);
-//    TestNoneConstZmwQuery(fn, expected);
-
-//    // test case 2 has four bamRecords from the same zmw.
-//    fn = test2fn;
-//    expected = vector<int>({4});
-//    TestZmwQuery(fn, expected);
-//    TestNoneConstZmwQuery(fn, expected);
-
-//    // more bamRecords in test case 3.
-//    fn = test3fn;
-//    expected = {2,3,3,2,2,1};
-//    TestZmwQuery(fn, expected);
-//    TestNoneConstZmwQuery(fn, expected);
-}
-
 TEST(QNameQueryTest, CountQSizes)
 {
     // test case 1 has exactly one bamRecord.
diff --git a/tests/src/test_BamFile.cpp b/tests/src/test_ReadAccuracyQuery.cpp
similarity index 74%
copy from tests/src/test_BamFile.cpp
copy to tests/src/test_ReadAccuracyQuery.cpp
index 4ca910d..721bec7 100644
--- a/tests/src/test_BamFile.cpp
+++ b/tests/src/test_ReadAccuracyQuery.cpp
@@ -41,29 +41,32 @@
 
 #include "TestData.h"
 #include <gtest/gtest.h>
-#include <pbbam/BamFile.h>
-#include <stdexcept>
+#include <pbbam/ReadAccuracyQuery.h>
+#include <string>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
-TEST(BamFileTest, NonExistentFileThrows)
+TEST(ReadAccuracyQueryTest, QueryOk)
 {
-    EXPECT_THROW(
-    {
-       BamFile file("does_not_exist.bam");
-       (void)file;
-    },
-    std::exception);
-}
+    const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
 
-TEST(BamFileTest, NonBamFileThrows)
-{
-    EXPECT_THROW(
     {
-        const std::string& fn = tests::Data_Dir + "/lambdaNEB.fa.fai";
-        BamFile file(fn);
-        (void)file;
-    },
-    std::exception);
+        int count = 0;
+        ReadAccuracyQuery query(0.901, Compare::GREATER_THAN_EQUAL, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_GE(r.ReadAccuracy(), 0.901);
+        }
+        EXPECT_EQ(4, count);
+    }
+    {
+        int count = 0;
+        ReadAccuracyQuery query(0.95, Compare::GREATER_THAN_EQUAL, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_GE(r.ReadAccuracy(), 0.901);
+        }
+        EXPECT_EQ(0, count);
+    }
 }
diff --git a/tests/src/test_ReadGroupInfo.cpp b/tests/src/test_ReadGroupInfo.cpp
index db30fa2..463846c 100644
--- a/tests/src/test_ReadGroupInfo.cpp
+++ b/tests/src/test_ReadGroupInfo.cpp
@@ -41,8 +41,9 @@
 
 #include <gtest/gtest.h>
 #include <pbbam/ReadGroupInfo.h>
+#include <vector>
 using namespace PacBio::BAM;
-
+using namespace std;
 
 TEST(ReadGroupInfoTest, IdFromMovieNameAndReadType)
 {
@@ -59,3 +60,64 @@ TEST(ReadGroupInfoTest, FrameCodecSetOk)
     EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
 }
 
+TEST(ReadGroupInfoTest, SequencingChemistryOk)
+{
+    using std::string;
+    using std::vector;
+
+    { // P6-C4
+        const vector<string> bindingKits { "100356300", "100372700" };
+        const vector<string> versions { "2.1", "2.3" };
+        ReadGroupInfo rg("P6C4");
+        rg.SequencingKit("100356200");
+        for (const string& bk : bindingKits) {
+            rg.BindingKit(bk);
+            for (const string& ver : versions) {
+                rg.BasecallerVersion(ver);
+                EXPECT_EQ("P6-C4", rg.SequencingChemistry());
+            }
+        }
+    }
+
+    { // S/P1-C1
+        const vector<string> sequencingKits { "100-619-400", "100-711-600" };
+        ReadGroupInfo rg("SP1C1");
+        rg.BindingKit("100-619-300");
+        rg.BasecallerVersion("3.0");
+        for (const string& sk : sequencingKits) {
+            rg.SequencingKit(sk);
+            EXPECT_EQ("S/P1-C1", rg.SequencingChemistry());
+        }
+    }
+
+    // basecaller 3.1.x
+    { 
+        const vector<string> sequencingKits { "100-619-400", "100-711-600", "100-620-000" };
+        ReadGroupInfo rg("3.1");
+        rg.BindingKit("100-619-300");
+        rg.BasecallerVersion("3.1.0.171835");
+        for (const string& sk : sequencingKits) {
+            rg.SequencingKit(sk);
+            EXPECT_EQ("S/P1-C1", rg.SequencingChemistry());
+        }
+    }
+}
+
+TEST(ReadGroupInfoTest, SequencingChemistryThrowsOnBadTriple)
+{
+    try {
+        ReadGroupInfo rg("BAD");
+        rg.BindingKit("100372700");
+        rg.SequencingKit("100-619-400");
+        rg.BasecallerVersion("2.0");
+        //EXPECT_THROW(rg.SequencingChemistry(), InvalidSequencingChemistryException);
+    } catch (InvalidSequencingChemistryException& e) {
+        EXPECT_EQ(string("100372700"),   e.BindingKit());
+        EXPECT_EQ(string("100-619-400"), e.SequencingKit());
+        EXPECT_EQ(string("2.0"),         e.BasecallerVersion());
+    }
+}
+
+
+
+
diff --git a/tests/src/test_SequenceUtils.cpp b/tests/src/test_SequenceUtils.cpp
index a089579..20bf5e6 100644
--- a/tests/src/test_SequenceUtils.cpp
+++ b/tests/src/test_SequenceUtils.cpp
@@ -41,38 +41,14 @@
 
 #include <gtest/gtest.h>
 #include <pbbam/../../src/SequenceUtils.h>
-#include <pbbam/../../src/StringUtils.h>
 #include <string>
 #include <vector>
-
 #include <climits>
-
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-TEST(StringUtilsTest, BasicSplit)
-{
-    const string test = "foo\tbar\tbaz";
-    const vector<string> tokens = internal::Split(test, '\t');
-    EXPECT_EQ(3, tokens.size());
-    EXPECT_TRUE(tokens.at(0) == "foo");
-    EXPECT_TRUE(tokens.at(1) == "bar");
-    EXPECT_TRUE(tokens.at(2) == "baz");
-}
-
-TEST(StringUtilsTest, SplitKeepsEmptyTokens)
-{
-    const string test = "foo\tbar\t\tbaz";
-    const vector<string> tokens = internal::Split(test, '\t');
-    EXPECT_EQ(4, tokens.size());
-    EXPECT_TRUE(tokens.at(0) == "foo");
-    EXPECT_TRUE(tokens.at(1) == "bar");
-    EXPECT_TRUE(tokens.at(2) == "");
-    EXPECT_TRUE(tokens.at(3) == "baz");
-}
-
 TEST(SequenceUtilsTest, ComplementChar)
 {
                         // complement
diff --git a/tests/src/test_TimeUtils.cpp b/tests/src/test_StringUtils.cpp
similarity index 73%
copy from tests/src/test_TimeUtils.cpp
copy to tests/src/test_StringUtils.cpp
index 7ab9fa5..d335246 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/tests/src/test_StringUtils.cpp
@@ -40,20 +40,31 @@
 #endif
 
 #include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include <pbbam/../../src/StringUtils.h>
+#include <string>
+#include <vector>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
 using namespace std;
 
-TEST(TimeUtilsTest, ToIso8601)
+TEST(StringUtilsTest, BasicSplit)
 {
-    const time_t rawTime = 436428750L;
-    const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+    const string test = "foo\tbar\tbaz";
+    const vector<string> tokens = internal::Split(test, '\t');
+    EXPECT_EQ(3, tokens.size());
+    EXPECT_TRUE(tokens.at(0) == "foo");
+    EXPECT_TRUE(tokens.at(1) == "bar");
+    EXPECT_TRUE(tokens.at(2) == "baz");
+}
 
-    // can't hardcode expected (since we rely on localtime())
-    const std::string& expected = "1983-10-31T06:12:30Z";
-    const std::string& actual = internal::ToIso8601(timestamp);
-    EXPECT_EQ(expected, actual);
+TEST(StringUtilsTest, SplitKeepsEmptyTokens)
+{
+    const string test = "foo\tbar\t\tbaz";
+    const vector<string> tokens = internal::Split(test, '\t');
+    EXPECT_EQ(4, tokens.size());
+    EXPECT_TRUE(tokens.at(0) == "foo");
+    EXPECT_TRUE(tokens.at(1) == "bar");
+    EXPECT_TRUE(tokens.at(2) == "");
+    EXPECT_TRUE(tokens.at(3) == "baz");
 }
diff --git a/tests/src/test_BamFile.cpp b/tests/src/test_SubreadLengthQuery.cpp
similarity index 66%
copy from tests/src/test_BamFile.cpp
copy to tests/src/test_SubreadLengthQuery.cpp
index 4ca910d..a476823 100644
--- a/tests/src/test_BamFile.cpp
+++ b/tests/src/test_SubreadLengthQuery.cpp
@@ -41,29 +41,41 @@
 
 #include "TestData.h"
 #include <gtest/gtest.h>
-#include <pbbam/BamFile.h>
-#include <stdexcept>
+#include <pbbam/SubreadLengthQuery.h>
+#include <string>
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
 
-TEST(BamFileTest, NonExistentFileThrows)
+TEST(SubreadLengthQueryTest, QueryOk)
 {
-    EXPECT_THROW(
-    {
-       BamFile file("does_not_exist.bam");
-       (void)file;
-    },
-    std::exception);
-}
+    const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
 
-TEST(BamFileTest, NonBamFileThrows)
-{
-    EXPECT_THROW(
     {
-        const std::string& fn = tests::Data_Dir + "/lambdaNEB.fa.fai";
-        BamFile file(fn);
-        (void)file;
-    },
-    std::exception);
+        int count = 0;
+        SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_GE((r.QueryEnd() - r.QueryStart()), 500);
+        }
+        EXPECT_EQ(3, count);
+    }
+    {
+        int count = 0;
+        SubreadLengthQuery query(1000, Compare::GREATER_THAN_EQUAL, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_GE((r.QueryEnd() - r.QueryStart()), 1000);
+        }
+        EXPECT_EQ(2, count);
+    }
+    {
+        int count = 0;
+        SubreadLengthQuery query(5000, Compare::GREATER_THAN_EQUAL, bamFile);
+        for (const auto& r: query) {
+            ++count;
+            EXPECT_GE((r.QueryEnd() - r.QueryStart()), 5000);
+        }
+        EXPECT_EQ(0, count);
+    }
 }
diff --git a/tests/src/test_Tags.cpp b/tests/src/test_Tags.cpp
index 2ed7aa0..6755204 100644
--- a/tests/src/test_Tags.cpp
+++ b/tests/src/test_Tags.cpp
@@ -48,6 +48,9 @@
 #include <iostream>
 #include <map>
 #include <string>
+
+#include <typeinfo>
+
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace std;
@@ -70,6 +73,9 @@ TEST(TagTest, TagConstruction)
     vector<uint32_t> u32_Array;
     vector<float>    float_array;
 
+    signed char   c  = 'A';
+    unsigned char uc = 'A';
+
     Tag i8Tag(i8);
     Tag u8Tag(u8);
     Tag i16Tag(i16);
@@ -86,6 +92,9 @@ TEST(TagTest, TagConstruction)
     Tag u32_array_Tag(u32_Array);
     Tag float_array_Tag(float_array);
 
+    Tag charTag(c, TagModifier::ASCII_CHAR);
+    Tag ucharTag(uc, TagModifier::ASCII_CHAR);
+
     EXPECT_TRUE(i8Tag.Type()     == TagDataType::INT8);
     EXPECT_TRUE(u8Tag.Type()     == TagDataType::UINT8);
     EXPECT_TRUE(i16Tag.Type()    == TagDataType::INT16);
@@ -101,6 +110,9 @@ TEST(TagTest, TagConstruction)
     EXPECT_TRUE(i32_array_Tag.Type()   == TagDataType::INT32_ARRAY);
     EXPECT_TRUE(u32_array_Tag.Type()   == TagDataType::UINT32_ARRAY);
     EXPECT_TRUE(float_array_Tag.Type() == TagDataType::FLOAT_ARRAY);
+
+    EXPECT_TRUE(charTag.ToAscii()  == 'A');
+    EXPECT_TRUE(ucharTag.ToAscii() == 'A');
 }
 
 TEST(TagTest, CopyAndCompare)
@@ -235,31 +247,87 @@ TEST(TagTest, Type_UInt8)
 
 TEST(TagTest, Type_Ascii)
 {
-    Tag pureAscii = Tag('$');
-    pureAscii.Modifier(TagModifier::ASCII_CHAR);
-
+    const char          c  = '$';
+    const signed char   sc = '$';
+    const unsigned char uc = '$';
     const uint8_t u8 = 65;
     const int8_t  i8 = 66;
 
-    Tag fromUint8 = Tag(u8);
-    fromUint8.Modifier(TagModifier::ASCII_CHAR);
-    Tag fromInt8  = Tag(i8);
-    fromInt8.Modifier(TagModifier::ASCII_CHAR);
-
-    EXPECT_TRUE(pureAscii.HasModifier(TagModifier::ASCII_CHAR));
-    EXPECT_TRUE(pureAscii.IsIntegral());
-    EXPECT_TRUE(pureAscii.IsNumeric());
-    EXPECT_EQ('$', pureAscii.ToAscii());
-
-    EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
-    EXPECT_TRUE(fromUint8.IsIntegral());
-    EXPECT_TRUE(fromUint8.IsNumeric());
-    EXPECT_EQ('A', fromUint8.ToAscii());
-
-    EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
-    EXPECT_TRUE(fromInt8.IsIntegral());
-    EXPECT_TRUE(fromInt8.IsNumeric());
-    EXPECT_EQ('B', fromInt8.ToAscii());
+    { // old style: construct-then-modify
+
+        Tag fromPlainChar = Tag(c);
+        Tag fromSignedChar = Tag(sc);
+        Tag fromUnsignedChar = Tag(uc);
+        Tag fromUint8 = Tag(u8);
+        Tag fromInt8  = Tag(i8);
+        fromPlainChar.Modifier(TagModifier::ASCII_CHAR);
+        fromSignedChar.Modifier(TagModifier::ASCII_CHAR);
+        fromUnsignedChar.Modifier(TagModifier::ASCII_CHAR);
+        fromUint8.Modifier(TagModifier::ASCII_CHAR);
+        fromInt8.Modifier(TagModifier::ASCII_CHAR);
+
+        EXPECT_TRUE(fromPlainChar.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromPlainChar.IsIntegral());
+        EXPECT_TRUE(fromPlainChar.IsNumeric());
+        EXPECT_EQ('$', fromPlainChar.ToAscii());
+
+        EXPECT_TRUE(fromSignedChar.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromSignedChar.IsIntegral());
+        EXPECT_TRUE(fromSignedChar.IsNumeric());
+        EXPECT_EQ('$', fromSignedChar.ToAscii());
+
+        EXPECT_TRUE(fromUnsignedChar.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromUnsignedChar.IsIntegral());
+        EXPECT_TRUE(fromUnsignedChar.IsNumeric());
+        EXPECT_EQ('$', fromUnsignedChar.ToAscii());
+
+        EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromUint8.IsIntegral());
+        EXPECT_TRUE(fromUint8.IsNumeric());
+        EXPECT_EQ('A', fromUint8.ToAscii());
+
+        EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromInt8.IsIntegral());
+        EXPECT_TRUE(fromInt8.IsNumeric());
+        EXPECT_EQ('B', fromInt8.ToAscii());
+    }
+
+    { // new style: construct directly as ASCII
+
+        const Tag fromPlainChar    = Tag(c,  TagModifier::ASCII_CHAR);
+        const Tag fromSignedChar   = Tag(sc, TagModifier::ASCII_CHAR);
+        const Tag fromUnsignedChar = Tag(uc, TagModifier::ASCII_CHAR);
+        const Tag fromUint8 = Tag(u8, TagModifier::ASCII_CHAR);
+        const Tag fromInt8  = Tag(i8, TagModifier::ASCII_CHAR);
+
+        EXPECT_TRUE(fromPlainChar.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromPlainChar.IsIntegral());
+        EXPECT_TRUE(fromPlainChar.IsNumeric());
+        EXPECT_EQ('$', fromPlainChar.ToAscii());
+
+        EXPECT_TRUE(fromSignedChar.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromSignedChar.IsIntegral());
+        EXPECT_TRUE(fromSignedChar.IsNumeric());
+        EXPECT_EQ('$', fromSignedChar.ToAscii());
+
+        EXPECT_TRUE(fromUnsignedChar.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromUnsignedChar.IsIntegral());
+        EXPECT_TRUE(fromUnsignedChar.IsNumeric());
+        EXPECT_EQ('$', fromUnsignedChar.ToAscii());
+
+        EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromUint8.IsIntegral());
+        EXPECT_TRUE(fromUint8.IsNumeric());
+        EXPECT_EQ('A', fromUint8.ToAscii());
+
+        EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
+        EXPECT_TRUE(fromInt8.IsIntegral());
+        EXPECT_TRUE(fromInt8.IsNumeric());
+        EXPECT_EQ('B', fromInt8.ToAscii());
+    }
+
+    // check invalid constructs
+    EXPECT_THROW(Tag('A', TagModifier::HEX_STRING), std::runtime_error);
 }
 
 TEST(TagTest, Type_Int16)
@@ -396,6 +464,19 @@ TEST(TagTest, Type_String)
     EXPECT_FALSE(tag.IsArray());
 
     EXPECT_EQ(v, v2);
+
+    // "Hex format" string
+    const Tag hex("DEADBEEF", TagModifier::HEX_STRING);
+    EXPECT_TRUE(hex.Type() == TagDataType::STRING);
+    EXPECT_TRUE(hex.Typename() == "string");
+    EXPECT_TRUE(hex.IsString());
+    EXPECT_TRUE(hex.HasModifier(TagModifier::HEX_STRING));
+    EXPECT_FALSE(hex.IsNull());
+    EXPECT_FALSE(hex.IsNumeric());
+    EXPECT_FALSE(hex.IsArray());
+
+    // check invalid constructs
+    EXPECT_THROW(Tag("DEADBEEF", TagModifier::ASCII_CHAR), std::runtime_error);
 }
 
 TEST(TagTest, Type_Int8Array)
@@ -634,10 +715,10 @@ TEST(TagTest, ConvertToInt8)
 
     // not allowed
     EXPECT_THROW(underflow.ToInt8(), std::exception);
-    EXPECT_THROW(overflow.ToInt8(), std::exception);
-    EXPECT_THROW(floatTag.ToInt8(), std::exception);
+    EXPECT_THROW(overflow.ToInt8(),  std::exception);
+    EXPECT_THROW(floatTag.ToInt8(),  std::exception);
     EXPECT_THROW(stringTag.ToInt8(), std::exception);
-    EXPECT_THROW(arrayTag.ToInt8(), std::exception);
+    EXPECT_THROW(arrayTag.ToInt8(),  std::exception);
 }
 
 TEST(TagTest, ConvertToUInt8)
@@ -660,11 +741,11 @@ TEST(TagTest, ConvertToUInt8)
     });
 
     // not allowed
-    EXPECT_THROW(neg.ToUInt8(), std::exception);
-    EXPECT_THROW(overflow.ToUInt8(), std::exception);
-    EXPECT_THROW(floatTag.ToUInt8(), std::exception);
+    EXPECT_THROW(neg.ToUInt8(),       std::exception);
+    EXPECT_THROW(overflow.ToUInt8(),  std::exception);
+    EXPECT_THROW(floatTag.ToUInt8(),  std::exception);
     EXPECT_THROW(stringTag.ToUInt8(), std::exception);
-    EXPECT_THROW(arrayTag.ToUInt8(), std::exception);
+    EXPECT_THROW(arrayTag.ToUInt8(),  std::exception);
 }
 
 TEST(TagTest, ConvertToInt16)
@@ -690,10 +771,10 @@ TEST(TagTest, ConvertToInt16)
 
     // not allowed
     EXPECT_THROW(underflow.ToInt16(), std::exception);
-    EXPECT_THROW(overflow.ToInt16(), std::exception);
-    EXPECT_THROW(floatTag.ToInt16(), std::exception);
+    EXPECT_THROW(overflow.ToInt16(),  std::exception);
+    EXPECT_THROW(floatTag.ToInt16(),  std::exception);
     EXPECT_THROW(stringTag.ToInt16(), std::exception);
-    EXPECT_THROW(arrayTag.ToInt16(), std::exception);
+    EXPECT_THROW(arrayTag.ToInt16(),  std::exception);
 }
 
 TEST(TagTest, ConvertToUInt16)
@@ -716,11 +797,11 @@ TEST(TagTest, ConvertToUInt16)
     });
 
     // not allowed
-    EXPECT_THROW(neg.ToUInt16(), std::exception);
-    EXPECT_THROW(overflow.ToUInt16(), std::exception);
-    EXPECT_THROW(floatTag.ToUInt16(), std::exception);
+    EXPECT_THROW(neg.ToUInt16(),       std::exception);
+    EXPECT_THROW(overflow.ToUInt16(),  std::exception);
+    EXPECT_THROW(floatTag.ToUInt16(),  std::exception);
     EXPECT_THROW(stringTag.ToUInt16(), std::exception);
-    EXPECT_THROW(arrayTag.ToUInt16(), std::exception);
+    EXPECT_THROW(arrayTag.ToUInt16(),  std::exception);
 }
 
 TEST(TagTest, ConvertToInt32)
@@ -748,9 +829,9 @@ TEST(TagTest, ConvertToInt32)
     });
 
     // not allowed
-    EXPECT_THROW(floatTag.ToInt32(), std::exception);
+    EXPECT_THROW(floatTag.ToInt32(),  std::exception);
     EXPECT_THROW(stringTag.ToInt32(), std::exception);
-    EXPECT_THROW(arrayTag.ToInt32(), std::exception);
+    EXPECT_THROW(arrayTag.ToInt32(),  std::exception);
 }
 
 TEST(TagTest, ConvertToUInt32)
@@ -776,10 +857,10 @@ TEST(TagTest, ConvertToUInt32)
     });
 
     // not allowed
-    EXPECT_THROW(neg.ToUInt32(), std::exception);
-    EXPECT_THROW(floatTag.ToUInt32(), std::exception);
+    EXPECT_THROW(neg.ToUInt32(),       std::exception);
+    EXPECT_THROW(floatTag.ToUInt32(),  std::exception);
     EXPECT_THROW(stringTag.ToUInt32(), std::exception);
-    EXPECT_THROW(arrayTag.ToUInt32(), std::exception);
+    EXPECT_THROW(arrayTag.ToUInt32(),  std::exception);
 }
 
 TEST(TagCollectionTest, DefaultConstruction)
@@ -827,8 +908,7 @@ TEST(SamTagCodecTest, DecodeTest)
     TagCollection expected;
     expected["ST"] = string("foo");
     expected["XY"] = int32_t(-42);
-    expected["HX"] = string("1abc75");
-    expected["HX"].Modifier(TagModifier::HEX_STRING);
+    expected["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
     expected["VC"] = vector<int32_t>( { 42, -100, 37, 2048 } );
 
     TagCollection tags = SamTagCodec::Decode(tagString);
@@ -850,8 +930,7 @@ TEST(SamTagCodecTest, EncodeTest)
     TagCollection tags;
     tags["ST"] = string("foo");
     tags["XY"] = int32_t(-42);
-    tags["HX"] = string("1abc75");
-    tags["HX"].Modifier(TagModifier::HEX_STRING);
+    tags["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
     tags["VC"] = vector<int32_t>( { 42, -100, 37, 2048 } );
 
     // "HX:H:1abc75\tST:Z:foo\0\tVC:B:i,42,-100,37,2048\tXY:i:-42"
@@ -981,8 +1060,7 @@ TEST(BamTagCodecTest, EncodeTest)
     expected.push_back(valueBytes[3]);
 
     TagCollection tags;
-    tags["HX"] = string("1abc75");
-    tags["HX"].Modifier(TagModifier::HEX_STRING);
+    tags["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
     tags["CA"] = charArray;
     tags["XY"] = x;
 
@@ -990,3 +1068,77 @@ TEST(BamTagCodecTest, EncodeTest)
     EXPECT_EQ(expected, data);
 }
 
+TEST(BamTagCodecTest, AsciiTagsTest)
+{
+    vector<uint8_t> expected;
+    expected.reserve(20);
+    expected.push_back('I'); // I8:A:B
+    expected.push_back('8');
+    expected.push_back('A');
+    expected.push_back('B');
+    expected.push_back('P'); // PC:A:$
+    expected.push_back('C');
+    expected.push_back('A');
+    expected.push_back('$');
+    expected.push_back('S'); // SC:A:$
+    expected.push_back('C');
+    expected.push_back('A');
+    expected.push_back('$');
+    expected.push_back('U'); // U8:A:A
+    expected.push_back('8');
+    expected.push_back('A');
+    expected.push_back('A');
+    expected.push_back('U'); // UC:A:$
+    expected.push_back('C');
+    expected.push_back('A');
+    expected.push_back('$');
+
+    const char          c  = '$';
+    const signed char   sc = '$';
+    const unsigned char uc = '$';
+    const uint8_t u8 = 65;
+    const int8_t  i8 = 66;
+
+    { // old style: construct-then-modify
+
+        Tag fromPlainChar = Tag(c);
+        Tag fromSignedChar = Tag(sc);
+        Tag fromUnsignedChar = Tag(uc);
+        Tag fromUint8 = Tag(u8);
+        Tag fromInt8  = Tag(i8);
+        fromPlainChar.Modifier(TagModifier::ASCII_CHAR);
+        fromSignedChar.Modifier(TagModifier::ASCII_CHAR);
+        fromUnsignedChar.Modifier(TagModifier::ASCII_CHAR);
+        fromUint8.Modifier(TagModifier::ASCII_CHAR);
+        fromInt8.Modifier(TagModifier::ASCII_CHAR);
+
+        TagCollection tags;
+        tags["PC"] = fromPlainChar;
+        tags["SC"] = fromSignedChar;
+        tags["UC"] = fromUnsignedChar;
+        tags["U8"] = fromUint8;
+        tags["I8"] = fromInt8;
+
+        const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+        EXPECT_EQ(expected, data);
+    }
+
+    { // new style: construct directly as ASCII
+
+        const Tag fromPlainChar    = Tag(c,  TagModifier::ASCII_CHAR);
+        const Tag fromSignedChar   = Tag(sc, TagModifier::ASCII_CHAR);
+        const Tag fromUnsignedChar = Tag(uc, TagModifier::ASCII_CHAR);
+        const Tag fromUint8 = Tag(u8, TagModifier::ASCII_CHAR);
+        const Tag fromInt8  = Tag(i8, TagModifier::ASCII_CHAR);
+
+        TagCollection tags;
+        tags["PC"] = fromPlainChar;
+        tags["SC"] = fromSignedChar;
+        tags["UC"] = fromUnsignedChar;
+        tags["U8"] = fromUint8;
+        tags["I8"] = fromInt8;
+
+        const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+        EXPECT_EQ(expected, data);
+    }
+}
diff --git a/tests/src/test_TimeUtils.cpp b/tests/src/test_TimeUtils.cpp
index 7ab9fa5..90f1489 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/tests/src/test_TimeUtils.cpp
@@ -41,7 +41,6 @@
 
 #include <gtest/gtest.h>
 #include <pbbam/../../src/TimeUtils.h>
-
 using namespace PacBio;
 using namespace PacBio::BAM;
 using namespace PacBio::BAM::internal;
@@ -52,8 +51,17 @@ TEST(TimeUtilsTest, ToIso8601)
     const time_t rawTime = 436428750L;
     const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
 
-    // can't hardcode expected (since we rely on localtime())
-    const std::string& expected = "1983-10-31T06:12:30Z";
-    const std::string& actual = internal::ToIso8601(timestamp);
+    const auto expected = string{ "1983-10-31T06:12:30Z" }; // no ms in test case
+    const auto actual = internal::ToIso8601(timestamp);
+    EXPECT_EQ(expected, actual);
+}
+
+TEST(TimeUtilsTest, ToDataSetFormat)
+{
+    const time_t rawTime = 436428750L;
+    const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+
+    const auto expected = string{ "831031_061230" }; // no ms in test case
+    const std::string& actual = internal::ToDataSetFormat(timestamp);
     EXPECT_EQ(expected, actual);
 }
diff --git a/tests/src/test_VirtualPolymeraseCompositeReader.cpp b/tests/src/test_VirtualPolymeraseCompositeReader.cpp
new file mode 100644
index 0000000..4652faf
--- /dev/null
+++ b/tests/src/test_VirtualPolymeraseCompositeReader.cpp
@@ -0,0 +1,132 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static
+size_t NumVirtualRecords(const string& primaryBamFn,
+                         const string& scrapsBamFn)
+{
+    VirtualPolymeraseReader reader(primaryBamFn, scrapsBamFn);
+    size_t count = 0;
+    while (reader.HasNext()) {
+        const auto record = reader.Next();
+        (void)record;
+        ++count;
+    }
+    return count;
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(VirtualPolymeraseCompositeReaderTest, DataSetOk)
+{
+    // dataset contains these resources (subreads/scraps + hqregion/scraps BAMs)
+    const string primaryFn1 = tests::Data_Dir + "/polymerase/production.subreads.bam";
+    const string scrapsFn1  = tests::Data_Dir + "/polymerase/production.scraps.bam";
+    const string primaryFn2 = tests::Data_Dir + "/polymerase/production_hq.hqregion.bam";
+    const string scrapsFn2  = tests::Data_Dir + "/polymerase/production_hq.scraps.bam";
+    const size_t numExpectedRecords =
+            tests::NumVirtualRecords(primaryFn1, scrapsFn1) +
+            tests::NumVirtualRecords(primaryFn2, scrapsFn2);
+
+    const string datasetFn = tests::Data_Dir +
+            "/polymerase/multiple_resources.subread.dataset.xml";
+
+    DataSet ds(datasetFn);
+    VirtualPolymeraseCompositeReader reader(ds);
+    size_t numObservedRecords = 0;
+    while (reader.HasNext()) {
+        const auto record = reader.Next();
+        (void)record;
+        ++numObservedRecords;
+    }
+    EXPECT_EQ(numExpectedRecords, numObservedRecords);
+}
+
+TEST(VirtualPolymeraseCompositeReaderTest, EmptyDataSetOk)
+{
+    VirtualPolymeraseCompositeReader reader(DataSet{});
+    EXPECT_FALSE(reader.HasNext());
+}
+
+TEST(VirtualPolymeraseCompositeReaderTest, FilteredDataSetOk)
+{
+    // dataset contains these resources (subreads/scraps + hqregion/scraps BAMs)
+    const string primaryFn1 = tests::Data_Dir + "/polymerase/production.subreads.bam";
+    const string scrapsFn1  = tests::Data_Dir + "/polymerase/production.scraps.bam";
+    const string primaryFn2 = tests::Data_Dir + "/polymerase/internal.subreads.bam";
+    const string scrapsFn2  = tests::Data_Dir + "/polymerase/internal.scraps.bam";
+    const string primaryFn3 = tests::Data_Dir + "/polymerase/production_hq.hqregion.bam";
+    const string scrapsFn3  = tests::Data_Dir + "/polymerase/production_hq.scraps.bam";
+    const size_t totalRecords =
+            tests::NumVirtualRecords(primaryFn1, scrapsFn1) +
+            tests::NumVirtualRecords(primaryFn2, scrapsFn2) +
+            tests::NumVirtualRecords(primaryFn3, scrapsFn3);
+    EXPECT_EQ(3, totalRecords); // 1 per pair
+
+    // our filter will remove the 2 "production" BAM pairs
+    // using a ZMW filter that only the "internal" pair should pass
+    const string datasetFn = tests::Data_Dir +
+            "/polymerase/filtered_resources.subread.dataset.xml";
+
+    DataSet ds(datasetFn);
+    VirtualPolymeraseCompositeReader reader(ds);
+    size_t numObservedRecords = 0;
+    while (reader.HasNext()) {
+        const auto record = reader.Next();
+        (void)record;
+        ++numObservedRecords;
+    }
+    EXPECT_EQ(1, numObservedRecords);
+}
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index db63408..dd6757e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,3 +1,17 @@
-if(PacBioBAM_build_pbindex)
-    add_subdirectory(pbindex)
+
+set(ToolsCommonDir ${PacBioBAM_ToolsDir}/common)
+set(PacBioBAM_CramTestsDir ${PacBioBAM_TestsDir}/src/cram)
+
+# quash warning with OptionParser
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+if(HAS_NO_UNUSED_PRIVATE_FIELD)
+    set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-private-field")
 endif()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+
+# tools
+add_subdirectory(bam2sam)
+add_subdirectory(pbindex)
+add_subdirectory(pbindexdump)
+add_subdirectory(pbmerge)
diff --git a/tools/bam2sam/CMakeLists.txt b/tools/bam2sam/CMakeLists.txt
new file mode 100644
index 0000000..ef3a919
--- /dev/null
+++ b/tools/bam2sam/CMakeLists.txt
@@ -0,0 +1,32 @@
+
+set(Bam2SamSrcDir ${PacBioBAM_ToolsDir}/bam2sam/src)
+
+# create version header
+set(Bam2Sam_VERSION ${PacBioBAM_VERSION})
+configure_file(
+    ${Bam2SamSrcDir}/Bam2SamVersion.h.in Bam2SamVersion.h @ONLY
+)
+
+# list source files
+set(BAM2SAM_SOURCES
+    ${ToolsCommonDir}/OptionParser.cpp
+    ${Bam2SamSrcDir}/main.cpp
+    ${Bam2SamSrcDir}/Bam2Sam.cpp
+)
+
+# build bam2sam executable
+include(PbbamTool)
+create_pbbam_tool(
+    TARGET  bam2sam
+    SOURCES ${BAM2SAM_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+    add_test(
+        NAME bam2sam_CramTests
+        WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+        COMMAND "python" cram.py
+            ${PacBioBAM_CramTestsDir}/bam2sam.t
+    )
+endif()
diff --git a/tools/bam2sam/src/Bam2Sam.cpp b/tools/bam2sam/src/Bam2Sam.cpp
new file mode 100644
index 0000000..5fde774
--- /dev/null
+++ b/tools/bam2sam/src/Bam2Sam.cpp
@@ -0,0 +1,121 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "Bam2Sam.h"
+#include <htslib/sam.h>
+#include <stdexcept>
+#include <memory>
+#include <cassert>
+using namespace bam2sam;
+using namespace std;
+
+namespace bam2sam {
+
+struct HtslibFileDeleter
+{
+    void operator()(samFile* file)
+    {
+        if (file)
+            sam_close(file);
+        file = nullptr;
+    }
+};
+
+struct HtslibHeaderDeleter
+{
+    void operator()(bam_hdr_t* hdr)
+    {
+        if (hdr)
+            bam_hdr_destroy(hdr);
+        hdr = nullptr;
+    }
+};
+
+struct HtslibRecordDeleter
+{
+    void operator()(bam1_t* b)
+    {
+        if (b)
+            bam_destroy1(b);
+        b = nullptr;
+    }
+};
+
+} // namespace bam2sam
+
+void PbBam2Sam::Run(const Settings &settings)
+{
+    int htslibResult = 0;
+
+    // open files
+
+    unique_ptr<samFile, HtslibFileDeleter> inFileWrapper(sam_open(settings.inputFilename_.c_str(), "rb"));
+    samFile* in = inFileWrapper.get();
+    if (!in || !in->fp.bgzf)
+        throw std::runtime_error("could not read from stdin");
+
+    unique_ptr<samFile, HtslibFileDeleter> outFileWrapper(sam_open("-", "w"));
+    samFile* out = outFileWrapper.get();
+    if (!out)
+        throw std::runtime_error("could not write to stdout");
+
+    // fetch & write header
+
+    unique_ptr<bam_hdr_t, HtslibHeaderDeleter> headerWrapper(bam_hdr_read(in->fp.bgzf));
+    bam_hdr_t* hdr = headerWrapper.get();
+    if (!hdr)
+        throw std::runtime_error("could not read header");
+
+    if (!settings.noHeader_) {
+        htslibResult = sam_hdr_write(out, hdr);
+        if (htslibResult != 0)
+            throw std::runtime_error("could not write header");
+        if (settings.printHeaderOnly_)
+            return;
+    }
+
+    // fetch & write records
+
+    unique_ptr<bam1_t, HtslibRecordDeleter> recordWrapper(bam_init1());
+    bam1_t* b = recordWrapper.get();
+
+    while ((htslibResult = sam_read1(in, hdr, b)) >= 0) {
+        htslibResult = sam_write1(out, hdr, b);
+        if (htslibResult < 0)
+            throw std::runtime_error("error writing record to stdout");
+    }
+}
diff --git a/include/pbbam/Strand.h b/tools/bam2sam/src/Bam2Sam.h
similarity index 83%
copy from include/pbbam/Strand.h
copy to tools/bam2sam/src/Bam2Sam.h
index aa8535f..4a7ffbb 100644
--- a/include/pbbam/Strand.h
+++ b/tools/bam2sam/src/Bam2Sam.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,21 +35,19 @@
 
 // Author: Derek Barnett
 
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef BAM2SAM_H
+#define BAM2SAM_H
 
-#include "pbbam/Config.h"
+#include "Settings.h"
 
-namespace PacBio {
-namespace BAM {
+namespace bam2sam {
 
-enum class Strand
+class PbBam2Sam
 {
-    FORWARD
-  , REVERSE
+public:
+    static void Run(const Settings& settings);
 };
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace bam2sam
 
-#endif // STRAND_H
+#endif // PBIBAM2SAM_H
diff --git a/src/Config.cpp b/tools/bam2sam/src/Bam2SamVersion.h.in
similarity index 86%
copy from src/Config.cpp
copy to tools/bam2sam/src/Bam2SamVersion.h.in
index 677ad08..10319b7 100644
--- a/src/Config.cpp
+++ b/tools/bam2sam/src/Bam2SamVersion.h.in
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -35,14 +35,15 @@
 
 // Author: Derek Barnett
 
-#include "pbbam/Config.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
+#ifndef BAM2SAMVERSION_H
+#define BAM2SAMVERSION_H
 
-namespace PacBio {
-namespace BAM {
+#include <string>
 
-int HtslibVerbosity = 0;
+namespace bam2sam {
 
-} // namespace BAM
-} // namespace PacBio
+const std::string Version = std::string("@Bam2Sam_VERSION@");
+
+} // namespace bam2sam
+
+#endif // BAM2SAMVERSION_H
diff --git a/include/pbbam/TagCollection.h b/tools/bam2sam/src/Settings.h
similarity index 78%
copy from include/pbbam/TagCollection.h
copy to tools/bam2sam/src/Settings.h
index 42b4018..d570dc9 100644
--- a/include/pbbam/TagCollection.h
+++ b/tools/bam2sam/src/Settings.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,24 +35,29 @@
 
 // Author: Derek Barnett
 
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#ifndef SETTINGS_H
+#define SETTINGS_H
 
-#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
 #include <string>
+#include <vector>
 
-namespace PacBio {
-namespace BAM {
+namespace bam2sam {
 
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+class Settings
 {
 public:
-    bool Contains(const std::string& name) const;
+    Settings(void)
+        : noHeader_(false)
+        , printHeaderOnly_(false)
+    { }
+
+public:
+    std::string inputFilename_;
+    bool noHeader_;
+    bool printHeaderOnly_;
+    std::vector<std::string> errors_;
 };
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace bam2sam
 
-#endif // TAGCOLLECTION_H
+#endif // SETTINGS_H
diff --git a/tools/pbindex/src/main.cpp b/tools/bam2sam/src/main.cpp
similarity index 51%
copy from tools/pbindex/src/main.cpp
copy to tools/bam2sam/src/main.cpp
index 59065fa..d27b42f 100644
--- a/tools/pbindex/src/main.cpp
+++ b/tools/bam2sam/src/main.cpp
@@ -35,34 +35,43 @@
 
 // Author: Derek Barnett
 
-#include "OptionParser.h"
-#include "PbIndex.h"
-#include "PbIndexVersion.h"
+#include "../common/OptionParser.h"
+#include "Bam2Sam.h"
+#include "Bam2SamVersion.h"
+#include <string>
+#include <vector>
 #include <cassert>
-#include <iostream>
-using namespace std;
+#include <cstdlib>
 
 static
-pbindex::Settings fromCommandLine(optparse::OptionParser& parser,
-                                  int argc, char* argv[])
+bam2sam::Settings fromCommandLine(optparse::OptionParser& parser,
+                                    int argc, char* argv[])
 {
-    const optparse::Values options = parser.parse_args(argc, argv);
-    (void)options;
+    bam2sam::Settings settings;
 
-    pbindex::Settings settings;
+    const optparse::Values options = parser.parse_args(argc, argv);
 
-    // get input filename
-    const vector<string> positionalArgs = parser.args();
+    // input
+    const std::vector<std::string> positionalArgs = parser.args();
     const size_t numPositionalArgs = positionalArgs.size();
     if (numPositionalArgs == 0)
-        settings.errors_.push_back("pbindex requires an input BAM filename");
+        settings.inputFilename_ = "-"; // stdin
     else if (numPositionalArgs == 1)
-        settings.inputBamFilename_ = parser.args().front();
+        settings.inputFilename_ = parser.args().front();
     else {
         assert(numPositionalArgs > 1);
-        settings.errors_.push_back("pbindex does not support more than one input file per run");
+        settings.errors_.push_back("bam2sam does not support more than one input file per run");
     }
 
+    // header options
+    if (options.is_set("no_header"))
+        settings.noHeader_ = options.get("no_header");
+    if (options.is_set("header_only"))
+        settings.printHeaderOnly_ = options.get("header_only");
+
+    if (settings.noHeader_ && settings.printHeaderOnly_)
+        settings.errors_.push_back("conflicting arguments requested: --no-header and --header-only");
+
     return settings;
 }
 
@@ -70,33 +79,49 @@ int main(int argc, char* argv[])
 {
     // setup help & options
     optparse::OptionParser parser;
-    parser.description("pbindex creates a index file that enables random-access to PacBio-specific data in BAM files. "
-                       "Generated index filename will be the same as input BAM plus .pbi suffix."
+    parser.description("bam2sam converts a BAM file to SAM. It is essentially a stripped-down "
+                       "'samtools view', mostly useful for testing/debugging without requiring samtools. "
+                       "Input BAM file is read from a file or stdin, and SAM output is written to stdout."
                        );
-    parser.prog("pbindex");
-    parser.usage("pbindex <input>");
-    parser.version(pbindex::Version);
+    parser.prog("bam2sam");
+    parser.usage("bam2sam [options] [input]");
+    parser.version(bam2sam::Version);
     parser.add_version_option(true);
     parser.add_help_option(true);
 
-    auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
-    ioGroup.add_option("")
-           .dest("input")
-           .metavar("input")
-           .help("Input BAM file");
-    parser.add_option_group(ioGroup);
+    auto optionGroup = optparse::OptionGroup(parser, "Options");
+    optionGroup.add_option("")
+               .dest("input")
+               .metavar("input")
+               .help("Input BAM file. If not provided, stdin will be used as input.");
+    optionGroup.add_option("--no-header")
+                .dest("no_header")
+                .action("store_true")
+                .help("Omit header from output.");
+    optionGroup.add_option("--header-only")
+               .dest("header_only")
+               .action("store_true")
+               .help("Print only the header (no records).");
+    parser.add_option_group(optionGroup);
 
     // parse command line for settings
-    const pbindex::Settings settings = fromCommandLine(parser, argc, argv);
+    const bam2sam::Settings settings = fromCommandLine(parser, argc, argv);
     if (!settings.errors_.empty()) {
-        cerr << endl;
+        std::cerr << std::endl;
         for (const auto e : settings.errors_)
-            cerr << "ERROR: " << e << endl;
-        cerr << endl;
+            std::cerr << "ERROR: " << e << std::endl;
+        std::cerr << std::endl;
         parser.print_help();
         return EXIT_FAILURE;
     }
 
     // run tool
-    return pbindex::PbIndex::Run(settings);
+    try {
+        bam2sam::PbBam2Sam::Run(settings);
+        return EXIT_SUCCESS;
+    }
+    catch (std::exception& e) {
+        std::cerr << "ERROR: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
 }
diff --git a/include/pbbam/AlignmentPrinter.h b/tools/common/BamFileMerger.h
similarity index 59%
copy from include/pbbam/AlignmentPrinter.h
copy to tools/common/BamFileMerger.h
index 6424c5f..d2a6bb2 100644
--- a/include/pbbam/AlignmentPrinter.h
+++ b/tools/common/BamFileMerger.h
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -33,51 +33,46 @@
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 
-// Author: Armin Töpfer
+// Author: Derek Barnett
 
-#ifndef ALIGNMENTPRINTER_H
-#define ALIGNMENTPRINTER_H
+#ifndef BAMFILEMERGER_H
+#define BAMFILEMERGER_H
 
-#include <memory>
+#include <pbbam/DataSet.h>
+#include <pbbam/PbiFilter.h>
+#include <pbbam/ProgramInfo.h>
 #include <string>
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/IndexedFastaReader.h"
-#include "pbbam/Orientation.h"
+#include <vector>
 
 namespace PacBio {
 namespace BAM {
+namespace common {
 
-class BamRecord;
-
-class AlignmentPrinter
+class BamFileMerger
 {
 public:
-    AlignmentPrinter(const IndexedFastaReader& ifr)
-        : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
-    { }
-
-    AlignmentPrinter() = delete;
-    // Move constructor
-    AlignmentPrinter(AlignmentPrinter&&) = default;
-    // Copy constructor
-    AlignmentPrinter(const AlignmentPrinter&) = delete;
-    // Move assignment operator
-    AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
-    // Copy assignment operator
-    AlignmentPrinter& operator=(const AlignmentPrinter&) = delete;
-    // Destructor
-    ~AlignmentPrinter() = default;
-
-public:
-    std::string Print(const BamRecord& record,
-                      const Orientation orientation = Orientation::GENOMIC);
-
-private:
-	const std::unique_ptr<IndexedFastaReader> ifr_;
+    /// \brief Runs merger on a dataset, applying any supplied filters.
+    ///
+    /// When this function exits, a merged BAM (and optional PBI) will have been
+    /// written and closed.
+    ///
+    /// \param[in] dataset          provides input filenames & filters
+    /// \param[in] outputFilename   resulting BAM output
+    /// \param[in] mergeProgram     info about the calling program. Adds a @PG entry to merged header.
+    /// \param[in] createPbi        if true, creates a PBI alongside output BAM
+    ///
+    /// \throws std::runtime_error if any any errors encountered while reading or writing
+    ///
+    static void Merge(const PacBio::BAM::DataSet& dataset,
+                      const std::string& outputFilename,
+                      const PacBio::BAM::ProgramInfo& mergeProgram = PacBio::BAM::ProgramInfo(),
+                      bool createPbi = true);
 };
 
+} // namespace common
 } // namespace BAM
 } // namespace PacBio
 
-#endif // ALIGNMENTPRINTER_H
+#include "BamFileMerger.inl"
+
+#endif // BAMFILEMERGER_H
diff --git a/tools/common/BamFileMerger.inl b/tools/common/BamFileMerger.inl
new file mode 100644
index 0000000..18dfbca
--- /dev/null
+++ b/tools/common/BamFileMerger.inl
@@ -0,0 +1,262 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "BamFileMerger.h"
+
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamReader.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/CompositeBamReader.h>
+#include <pbbam/PbiBuilder.h>
+
+#include <deque>
+#include <memory>
+#include <stdexcept>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace common {
+
+// ICollator
+
+class ICollator
+{
+public:
+    ~ICollator(void) { }
+
+    bool GetNext(BamRecord& record)
+    {
+        // nothing left to read
+        if (mergeItems_.empty())
+            return false;
+
+        // non-destructive 'pop' of first item from queue
+        auto firstIter = mergeItems_.begin();
+        auto firstItem = PacBio::BAM::internal::CompositeMergeItem{ std::move(firstIter->reader),
+                                                                    std::move(firstIter->record)
+                                                                  };
+        mergeItems_.pop_front();
+
+        // store its record in our output record
+        std::swap(record, firstItem.record);
+
+        // try fetch 'next' from first item's reader
+        // if successful, re-insert it into container & re-sort on our new values
+        // otherwise, this item will go out of scope & reader destroyed
+        if (firstItem.reader->GetNext(firstItem.record)) {
+            mergeItems_.push_front(std::move(firstItem));
+            UpdateSort();
+        }
+
+        // return success
+        return true;
+    }
+
+protected:
+    std::deque<PacBio::BAM::internal::CompositeMergeItem> mergeItems_;
+
+protected:
+    ICollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader> >&& readers)
+    {
+        for (auto&& reader : readers) {
+            auto item = internal::CompositeMergeItem{std::move(reader)};
+            if (item.reader->GetNext(item.record))
+                mergeItems_.push_back(std::move(item));
+        }
+    }
+
+    virtual void UpdateSort(void) =0;
+};
+
+// QNameCollator
+
+struct QNameSorter : std::binary_function<internal::CompositeMergeItem,
+                                          internal::CompositeMergeItem,
+                                          bool>
+{
+    bool operator()(const internal::CompositeMergeItem& lhs,
+                    const internal::CompositeMergeItem& rhs)
+    {
+        const BamRecord& l = lhs.record;
+        const BamRecord& r = rhs.record;
+
+        // movie name
+        const int cmp = l.MovieName().compare(r.MovieName());
+        if (cmp != 0)
+            return cmp < 0;
+
+        // hole number
+        const auto lhsZmw = l.HoleNumber();
+        const auto rhsZmw = r.HoleNumber();
+        if (lhsZmw != rhsZmw)
+            return lhsZmw < rhsZmw;
+
+        // shuffle CCS reads after all others
+        const auto lhsReadType = l.Type();
+        const auto rhsReadType = r.Type();
+        if (lhsReadType == RecordType::CCS)
+            return false;
+        if (rhsReadType == RecordType::CCS)
+            return true;
+
+        // sort on qStart, then finally qEnd
+        const auto lhsQStart = l.QueryStart();
+        const auto rhsQStart = r.QueryStart();
+        return lhsQStart < rhsQStart;
+    }
+};
+
+class QNameCollator : public ICollator
+{
+public:
+    QNameCollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader>>&& readers)
+        : ICollator(std::move(readers))
+    { UpdateSort(); }
+
+    void UpdateSort(void)
+    { std::sort(mergeItems_.begin(), mergeItems_.end(), QNameSorter{ }); }
+};
+
+// AlignedCollator
+
+class AlignedCollator : public ICollator
+{
+public:
+    AlignedCollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader>>&& readers)
+        : ICollator(std::move(readers))
+    { UpdateSort(); }
+
+    void UpdateSort(void)
+    { std::sort(mergeItems_.begin(), mergeItems_.end(), PacBio::BAM::PositionSorter{ }); }
+};
+
+// BamFileMerger
+
+inline
+void BamFileMerger::Merge(const DataSet& dataset,
+                          const std::string& outputFilename,
+                          const ProgramInfo& mergeProgram,
+                          bool createPbi)
+{
+    const PbiFilter filter = PbiFilter::FromDataSet(dataset);
+
+    std::vector<std::string> inputFilenames_;
+    const auto& bamFiles = dataset.BamFiles();
+    inputFilenames_.reserve(bamFiles.size());
+    for (const auto& file : bamFiles)
+        inputFilenames_.push_back(file.Filename());
+
+    if (inputFilenames_.empty())
+        throw std::runtime_error("no input filenames provided to BamFileMerger");
+
+    if (outputFilename.empty())
+        throw std::runtime_error("no output filename provide to BamFileMerger");
+
+
+    // attempt open input files
+    std::vector<std::unique_ptr<BamReader> > readers;
+    readers.reserve(inputFilenames_.size());
+    for (const auto& fn : inputFilenames_) {
+        if (filter.IsEmpty())
+            readers.emplace_back(new BamReader(fn));
+        else
+            readers.emplace_back(new PbiIndexedBamReader(filter, fn));
+    }
+
+    // read headers
+    std::vector<BamHeader> headers;
+    headers.reserve(readers.size());
+    for (auto&& reader : readers)
+        headers.push_back(reader->Header());
+
+    assert(!readers.empty());
+    assert(!headers.empty());
+
+    // merge headers
+    BamHeader mergedHeader = headers.front();
+    const std::string& usingSortOrder = mergedHeader.SortOrder();
+    const bool isCoordinateSorted = (usingSortOrder == "coordinate");
+    for (size_t i = 1; i < headers.size(); ++i) {
+        const BamHeader& header = headers.at(i);
+        if (header.SortOrder() != usingSortOrder)
+            throw std::runtime_error("BAM file sort orders do not match, aborting merge");
+        mergedHeader += headers.at(i);
+    }
+    if (mergeProgram.IsValid())
+        mergedHeader.AddProgram(mergeProgram);
+
+    // setup collator, based on sort order
+    std::unique_ptr<ICollator> collator;
+    if (isCoordinateSorted)
+        collator.reset(new AlignedCollator(std::move(readers)));
+    else
+        collator.reset(new QNameCollator(std::move(readers)));
+    // NOTE: readers *moved*, so no longer accessible here
+
+    // do merge, creating PBI on-the-fly
+    if (createPbi && (outputFilename != "-")) {
+
+        // TODO: this implementation recalculates all PBI values, when we really
+        //       only need to collate entries and update offsets
+
+        BamWriter writer(outputFilename, mergedHeader);
+        PbiBuilder builder{ (outputFilename + ".pbi"),
+                            mergedHeader.NumSequences(),
+                            isCoordinateSorted
+                          };
+        BamRecord record;
+        int64_t vOffset = 0;
+        while (collator->GetNext(record)) {
+            writer.Write(record, &vOffset);
+            builder.AddRecord(record, vOffset);
+        }
+    }
+
+    // otherwise just merge BAM
+    else {
+        BamWriter writer(outputFilename, mergedHeader);
+        BamRecord record;
+        while (collator->GetNext(record))
+            writer.Write(record);
+    }
+}
+
+} // namespace common
+} // namespace BAM
+} // namespace PacBio
diff --git a/tools/pbindex/src/OptionParser.cpp b/tools/common/OptionParser.cpp
similarity index 100%
rename from tools/pbindex/src/OptionParser.cpp
rename to tools/common/OptionParser.cpp
diff --git a/tools/pbindex/src/OptionParser.h b/tools/common/OptionParser.h
similarity index 100%
rename from tools/pbindex/src/OptionParser.h
rename to tools/common/OptionParser.h
diff --git a/tools/pbindex/CMakeLists.txt b/tools/pbindex/CMakeLists.txt
index 89572d0..6ebe5c2 100644
--- a/tools/pbindex/CMakeLists.txt
+++ b/tools/pbindex/CMakeLists.txt
@@ -1,27 +1,22 @@
 
+set(PbindexSrcDir ${PacBioBAM_ToolsDir}/pbindex/src)
+
 # create version header
 set(PbIndex_VERSION ${PacBioBAM_VERSION})
 configure_file(
-    ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndexVersion.h.in
-    ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndexVersion.h
+    ${PbindexSrcDir}/PbIndexVersion.h.in PbIndexVersion.h @ONLY
 )
 
-#pbindex sources
+# list source files
 set(PBINDEX_SOURCES
-    ${PacBioBAM_RootDir}/tools/pbindex/src/main.cpp
-    ${PacBioBAM_RootDir}/tools/pbindex/src/OptionParser.h
-    ${PacBioBAM_RootDir}/tools/pbindex/src/OptionParser.cpp
-    ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndex.h
-    ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndex.cpp
-    ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndexVersion.h
+    ${ToolsCommonDir}/OptionParser.cpp
+    ${PbindexSrcDir}/main.cpp
+    ${PbindexSrcDir}/PbIndex.cpp
 )
 
 # build pbindex executable
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
-include_directories(${PacBioBAM_RootDir}/tools/pbindex/src ${PacBioBAM_INCLUDE_DIRS})
-add_executable(pbindex ${PBINDEX_SOURCES})
-set_target_properties(pbindex PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+include(PbbamTool)
+create_pbbam_tool(
+    TARGET  pbindex
+    SOURCES ${PBINDEX_SOURCES}
 )
-target_link_libraries(pbindex pbbam)
-
diff --git a/tools/pbindex/src/PbIndex.cpp b/tools/pbindex/src/PbIndex.cpp
index ca8a234..e25fa0e 100644
--- a/tools/pbindex/src/PbIndex.cpp
+++ b/tools/pbindex/src/PbIndex.cpp
@@ -53,7 +53,7 @@ int PbIndex::Create(const Settings& settings)
     try
     {
         PacBio::BAM::BamFile bamFile(settings.inputBamFilename_);
-        bamFile.EnsurePacBioIndexExists();
+        bamFile.CreatePacBioIndex();
         return EXIT_SUCCESS;
     }
     catch (std::runtime_error& e)
diff --git a/tools/pbindex/src/main.cpp b/tools/pbindex/src/main.cpp
index 59065fa..0f1bfb9 100644
--- a/tools/pbindex/src/main.cpp
+++ b/tools/pbindex/src/main.cpp
@@ -35,7 +35,7 @@
 
 // Author: Derek Barnett
 
-#include "OptionParser.h"
+#include "../common/OptionParser.h"
 #include "PbIndex.h"
 #include "PbIndexVersion.h"
 #include <cassert>
diff --git a/tools/pbindexdump/CMakeLists.txt b/tools/pbindexdump/CMakeLists.txt
new file mode 100644
index 0000000..26178e3
--- /dev/null
+++ b/tools/pbindexdump/CMakeLists.txt
@@ -0,0 +1,35 @@
+
+set(PbindexdumpSrcDir ${PacBioBAM_ToolsDir}/pbindexdump/src)
+
+# create version header
+set(PbIndexDump_VERSION ${PacBioBAM_VERSION})
+configure_file(
+    ${PbindexdumpSrcDir}/PbIndexDumpVersion.h.in PbIndexDumpVersion.h @ONLY
+)
+
+# list source files
+set(PBINDEXDUMP_SOURCES
+    ${ToolsCommonDir}/OptionParser.cpp
+    ${PbindexdumpSrcDir}/CppFormatter.cpp
+    ${PbindexdumpSrcDir}/JsonFormatter.cpp
+    ${PbindexdumpSrcDir}/PbIndexDump.cpp
+    ${PbindexdumpSrcDir}/main.cpp
+)
+
+# build pbindexdump executable
+include(PbbamTool)
+create_pbbam_tool(
+    TARGET  pbindexdump
+    SOURCES ${PBINDEXDUMP_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+    add_test(
+        NAME pbindexdump_CramTests
+        WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+        COMMAND "python" cram.py
+            ${PacBioBAM_CramTestsDir}/pbindexdump_json.t
+            ${PacBioBAM_CramTestsDir}/pbindexdump_cpp.t
+    )
+endif()
diff --git a/tools/pbindexdump/src/CppFormatter.cpp b/tools/pbindexdump/src/CppFormatter.cpp
new file mode 100644
index 0000000..696421e
--- /dev/null
+++ b/tools/pbindexdump/src/CppFormatter.cpp
@@ -0,0 +1,177 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "CppFormatter.h"
+#include <pbbam/PbiRawData.h>
+
+#include <iostream>
+#include <sstream>
+
+using namespace pbindexdump;
+using namespace std;
+
+namespace pbindexdump {
+
+static
+string printCppReferenceData(const PacBio::BAM::PbiRawReferenceData& referenceData)
+{
+    auto result = string{ "" };
+    for (const PacBio::BAM::PbiReferenceEntry& entry : referenceData.entries_) {
+        if (!result.empty())
+            result.append(",\n");
+        result.append( string{ "    PbiReferenceEntry{" }
+                       + to_string(entry.tId_) + "," + to_string(entry.beginRow_) + "," + to_string(entry.endRow_)
+                       + string{ "}" } );
+    }
+    if (!result.empty())
+        result.append("\n");
+    return result;
+}
+
+template<typename T>
+string printVectorElements(const std::vector<T>& c)
+{
+    stringstream s;
+    for (const auto& e : c)
+        s << e << ",";
+    auto result = s.str();
+    if (!result.empty())
+        result.pop_back(); // remove final comma
+    return result;
+}
+
+template<>
+string printVectorElements(const std::vector<uint8_t>& c)
+{
+    stringstream s;
+    for (const auto& e : c)
+        s << static_cast<uint16_t>(e) << ","; // cast to larger uint, force print as number not character
+    auto result = s.str();
+    if (!result.empty())
+        result.pop_back(); // remove final comma
+    return result;
+}
+
+template<>
+string printVectorElements(const std::vector<int8_t>& c)
+{
+    stringstream s;
+    for (const auto& e : c)
+        s << static_cast<int16_t>(e) << ","; // cast to larger int, force print as number not character
+    auto result = s.str();
+    if (!result.empty())
+        result.pop_back(); // remove final comma
+    return result;
+}
+
+} // namespace pbindexdump
+
+CppFormatter::CppFormatter(const Settings& settings)
+    : IFormatter(settings)
+{ }
+
+void CppFormatter::Run(void)
+{
+    using namespace PacBio::BAM;
+
+    const PbiRawData rawData{ settings_.inputPbiFilename_ };
+    const PbiRawBarcodeData& barcodeData = rawData.BarcodeData();
+    const PbiRawBasicData& basicData     = rawData.BasicData();
+    const PbiRawMappedData& mappedData   = rawData.MappedData();
+    const PbiRawReferenceData& referenceData = rawData.ReferenceData();
+
+    auto version = string{ };
+    switch (rawData.Version()) {
+        case PbiFile::Version_3_0_0 : version = "PbiFile::Version_3_0_0"; break;
+        case PbiFile::Version_3_0_1 : version = "PbiFile::Version_3_0_1"; break;
+        default:
+            throw runtime_error("unsupported PBI version encountered");
+    }
+
+    auto fileSections = string{ "PbiFile::BASIC" };
+    if (rawData.HasBarcodeData())   fileSections += string{ " | PbiFile::BARCODE" };
+    if (rawData.HasMappedData())    fileSections += string{ " | PbiFile::MAPPED" };
+    if (rawData.HasReferenceData()) fileSections += string{ " | PbiFile::REFERENCE" };
+
+    stringstream s;
+    s << "PbiRawData rawData;" << endl
+      << "rawData.Version("      << version             << ");" << endl
+      << "rawData.FileSections(" << fileSections        << ");" << endl
+      << "rawData.NumReads("     << rawData.NumReads()  << ");" << endl
+      << endl
+      << "PbiRawBasicData& basicData = rawData.BasicData();" << endl
+      << "basicData.rgId_       = {" << printVectorElements(basicData.rgId_)       << "};" << endl
+      << "basicData.qStart_     = {" << printVectorElements(basicData.qStart_)     << "};" << endl
+      << "basicData.qEnd_       = {" << printVectorElements(basicData.qEnd_)       << "};" << endl
+      << "basicData.holeNumber_ = {" << printVectorElements(basicData.holeNumber_) << "};" << endl
+      << "basicData.readQual_   = {" << printVectorElements(basicData.readQual_)   << "};" << endl
+      << "basicData.ctxtFlag_   = {" << printVectorElements(basicData.ctxtFlag_)   << "};" << endl
+      << "basicData.fileOffset_ = {" << printVectorElements(basicData.fileOffset_) << "};" << endl
+      << endl;
+
+    if (rawData.HasBarcodeData()) {
+        s << "PbiRawBarcodeData& barcodeData = rawData.BarcodeData();" << endl
+          << "barcodeData.bcForward_ = {" << printVectorElements(barcodeData.bcForward_) << "};" << endl
+          << "barcodeData.bcReverse_ = {" << printVectorElements(barcodeData.bcReverse_) << "};" << endl
+          << "barcodeData.bcQual_    = {" << printVectorElements(barcodeData.bcQual_)    << "};" << endl
+          << endl;
+    }
+
+    if (rawData.HasMappedData()) {
+        s << "PbiRawMappedData& mappedData = rawData.MappedData();" << endl
+          << "mappedData.tId_       = {" << printVectorElements(mappedData.tId_)       << "};" << endl
+          << "mappedData.tStart_    = {" << printVectorElements(mappedData.tStart_)    << "};" << endl
+          << "mappedData.tEnd_      = {" << printVectorElements(mappedData.tEnd_)      << "};" << endl
+          << "mappedData.aStart_    = {" << printVectorElements(mappedData.aStart_)    << "};" << endl
+          << "mappedData.aEnd_      = {" << printVectorElements(mappedData.aEnd_)      << "};" << endl
+          << "mappedData.revStrand_ = {" << printVectorElements(mappedData.revStrand_) << "};" << endl
+          << "mappedData.nM_        = {" << printVectorElements(mappedData.nM_)        << "};" << endl
+          << "mappedData.nMM_       = {" << printVectorElements(mappedData.nMM_)       << "};" << endl
+          << "mappedData.mapQV_     = {" << printVectorElements(mappedData.mapQV_)     << "};" << endl
+          << endl;
+    }
+
+    if (rawData.HasReferenceData()) {
+        s << "PbiRawReferenceData& referenceData = rawData.ReferenceData();" << endl
+          << "referenceData.entries_ = { " << endl
+          << printCppReferenceData(referenceData)
+          << "};" << endl
+          << endl;
+    }
+
+    cout << s.str() << endl;
+}
diff --git a/include/pbbam/Strand.h b/tools/pbindexdump/src/CppFormatter.h
similarity index 81%
copy from include/pbbam/Strand.h
copy to tools/pbindexdump/src/CppFormatter.h
index aa8535f..c2cda26 100644
--- a/include/pbbam/Strand.h
+++ b/tools/pbindexdump/src/CppFormatter.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,21 +35,20 @@
 
 // Author: Derek Barnett
 
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef CPPFORMATTER_H
+#define CPPFORMATTER_H
 
-#include "pbbam/Config.h"
+#include "IFormatter.h"
 
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
 
-enum class Strand
+class CppFormatter : public IFormatter
 {
-    FORWARD
-  , REVERSE
+public:
+    CppFormatter(const Settings& settings);
+    void Run(void);
 };
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
 
-#endif // STRAND_H
+#endif // CPPFORMATTER_H
diff --git a/include/pbbam/TagCollection.h b/tools/pbindexdump/src/IFormatter.h
similarity index 77%
copy from include/pbbam/TagCollection.h
copy to tools/pbindexdump/src/IFormatter.h
index 42b4018..eb7e79b 100644
--- a/include/pbbam/TagCollection.h
+++ b/tools/pbindexdump/src/IFormatter.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,24 +35,30 @@
 
 // Author: Derek Barnett
 
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#ifndef IFORMATTER_H
+#define IFORMATTER_H
 
-#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
-#include <string>
+#include "Settings.h"
 
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
 
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+class IFormatter
 {
 public:
-    bool Contains(const std::string& name) const;
+    ~IFormatter(void) { }
+
+public:
+    virtual void Run(void) =0;
+
+protected:
+    const Settings& settings_;
+
+protected:
+    IFormatter(const Settings& settings)
+        : settings_(settings)
+    { }
 };
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
 
-#endif // TAGCOLLECTION_H
+#endif // IFORMATTER_H
diff --git a/tools/pbindexdump/src/JsonFormatter.cpp b/tools/pbindexdump/src/JsonFormatter.cpp
new file mode 100644
index 0000000..368f659
--- /dev/null
+++ b/tools/pbindexdump/src/JsonFormatter.cpp
@@ -0,0 +1,195 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "JsonFormatter.h"
+#include "json.hpp"
+#include <pbbam/PbiFile.h>
+#include <iostream>
+#include <sstream>
+using namespace pbindexdump;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace pbindexdump {
+
+
+} // namespace pbindexdump
+
+JsonFormatter::JsonFormatter(const Settings& settings)
+    : IFormatter(settings)
+    , index_(settings.inputPbiFilename_)
+{ }
+
+void JsonFormatter::FormatMetadata(void)
+{
+    auto version = string{ };
+    switch (index_.Version()) {
+        case PbiFile::Version_3_0_0 : version = "3.0.0"; break;
+        case PbiFile::Version_3_0_1 : version = "3.0.1"; break;
+        default:
+            throw runtime_error("unsupported PBI version encountered");
+    }
+
+    nlohmann::json fileSections;
+    fileSections.push_back("BasicData");
+    if (index_.HasBarcodeData())   fileSections.push_back("BarcodeData");
+    if (index_.HasMappedData())    fileSections.push_back("MappedData");
+    if (index_.HasReferenceData()) fileSections.push_back("ReferenceData");
+
+    json_["version"]      = version;
+    json_["fileSections"] = fileSections;
+    json_["numReads"]     = index_.NumReads();
+}
+
+void JsonFormatter::FormatRaw(void)
+{
+    const PbiRawBasicData& basicData = index_.BasicData();
+    json_["basicData"]["rgId"]       = basicData.rgId_;
+    json_["basicData"]["qStart"]     = basicData.qStart_;
+    json_["basicData"]["qEnd"]       = basicData.qEnd_;
+    json_["basicData"]["holeNumber"] = basicData.holeNumber_;
+    json_["basicData"]["readQual"]   = basicData.readQual_;
+    json_["basicData"]["ctxtFlag"]   = basicData.ctxtFlag_;
+    json_["basicData"]["fileOffset"] = basicData.fileOffset_;
+
+    if (index_.HasBarcodeData()) {
+        const PbiRawBarcodeData& barcodeData = index_.BarcodeData();
+        json_["barcodeData"]["bcForward"] = barcodeData.bcForward_;
+        json_["barcodeData"]["bcReverse"] = barcodeData.bcReverse_;
+        json_["barcodeData"]["bcQuality"] = barcodeData.bcQual_;
+    }
+
+    if (index_.HasMappedData()) {
+        const PbiRawMappedData& mappedData = index_.MappedData();
+
+        // casts to force -1 if unmapped
+        json_["mappedData"]["tId"]    = mappedData.tId_;
+        json_["mappedData"]["tStart"] = mappedData.tStart_;
+        json_["mappedData"]["tEnd"]   = mappedData.tEnd_;
+
+        json_["mappedData"]["aStart"]    = mappedData.aStart_;
+        json_["mappedData"]["aEnd"]      = mappedData.aEnd_;
+        json_["mappedData"]["revStrand"] = mappedData.revStrand_;
+        json_["mappedData"]["nM"]        = mappedData.nM_;
+        json_["mappedData"]["nMM"]       = mappedData.nMM_;
+        json_["mappedData"]["mapQV"]     = mappedData.mapQV_;
+    }
+}
+
+void JsonFormatter::FormatRecords(void)
+{
+    nlohmann::json reads;
+    const uint32_t numReads = index_.NumReads();
+    const bool hasBarcodeData = index_.HasBarcodeData();
+    const bool hasMappedData  = index_.HasMappedData();
+    for (uint32_t i = 0; i < numReads; ++i) {
+
+        nlohmann::json read;
+
+        // common data
+        const PbiRawBasicData& basicData = index_.BasicData();
+        read["rgId"]        = basicData.rgId_[i];
+        read["qStart"]      = basicData.qStart_[i];
+        read["qEnd"]        = basicData.qEnd_[i];
+        read["holeNumber"]  = basicData.holeNumber_[i];
+        read["readQuality"] = basicData.readQual_[i];
+        read["contextFlag"] = basicData.ctxtFlag_[i];
+        read["fileOffset"]  = basicData.fileOffset_[i];
+
+        // barcode data, if present
+        if (hasBarcodeData) {
+            const PbiRawBarcodeData& barcodeData = index_.BarcodeData();
+            read["bcForward"] = barcodeData.bcForward_[i];
+            read["bcReverse"] = barcodeData.bcReverse_[i];
+            read["bcQuality"] = barcodeData.bcQual_[i];
+        }
+
+        // mapping data, if present
+        if (hasMappedData) {
+            const PbiRawMappedData& mappedData = index_.MappedData();
+
+            // casts to force -1 if unmapped
+            read["tId"]    = static_cast<int32_t>(mappedData.tId_[i]);
+            read["tStart"] = static_cast<int32_t>(mappedData.tStart_[i]);
+            read["tEnd"]   = static_cast<int32_t>(mappedData.tEnd_[i]);
+
+            read["aStart"] = mappedData.aStart_[i];
+            read["aEnd"]   = mappedData.aEnd_[i];
+            read["nM"]     = mappedData.nM_[i];
+            read["nMM"]    = mappedData.nMM_[i];
+            read["mapQuality"]    = mappedData.mapQV_[i];
+            read["reverseStrand"] = mappedData.revStrand_[i];
+        }
+
+        reads.push_back(std::move(read));
+    }
+    json_["reads"] = reads;
+}
+
+void JsonFormatter::FormatReferences(void)
+{
+    if (index_.HasReferenceData()) {
+        const PbiRawReferenceData& referenceData = index_.ReferenceData();
+        nlohmann::json references;
+        for (const PbiReferenceEntry& entry : referenceData.entries_) {
+            nlohmann::json element;
+            element["tId"]      = static_cast<int32_t>(entry.tId_);
+            element["beginRow"] = static_cast<int32_t>(entry.beginRow_);
+            element["endRow"]   = static_cast<int32_t>(entry.endRow_);
+            references.push_back(std::move(element));
+        }
+        json_["references"] = references;
+    }
+}
+
+void JsonFormatter::Print(void)
+{
+    cout << json_.dump(settings_.jsonIndentLevel_) << endl;
+}
+
+void JsonFormatter::Run(void)
+{
+    FormatMetadata();
+    FormatReferences();
+
+    if (settings_.jsonRaw_)
+        FormatRaw();
+    else
+        FormatRecords();
+
+    Print();
+}
diff --git a/include/pbbam/internal/FilterEngine.h b/tools/pbindexdump/src/JsonFormatter.h
similarity index 72%
rename from include/pbbam/internal/FilterEngine.h
rename to tools/pbindexdump/src/JsonFormatter.h
index bc4b88b..9bd6911 100644
--- a/include/pbbam/internal/FilterEngine.h
+++ b/tools/pbindexdump/src/JsonFormatter.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,37 +35,35 @@
 
 // Author: Derek Barnett
 
-#ifndef FILTERENGINE_H
-#define FILTERENGINE_H
+#ifndef JSONFORMATTER_H
+#define JSONFORMATTER_H
 
-#include "pbbam/BamRecord.h"
-#include <memory>
-#include <vector>
+#include "IFormatter.h"
+#include "json.hpp"
+#include <pbbam/PbiRawData.h>
 
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-
-
-class FilterEngine {
+namespace pbindexdump {
 
+class JsonFormatter : public IFormatter
+{
 public:
-    FilterEngine(void);
+    JsonFormatter(const Settings& settings);
+    void Run(void);
 
-public:
-    // returns true if record passes filter
-    bool Accepts(const BamRecord& r) const;
+private:
+    void FormatMetadata(void);
+    void FormatReferences(void);
 
-    // removes records that do not pass filter, returns true if !empty()
-    bool Accepts(std::vector<BamRecord>& r) const;
+    void FormatRaw(void);
+    void FormatRecords(void);
 
-private:
+    void Print(void);
 
+private:
+    PacBio::BAM::PbiRawData index_;
+    nlohmann::json json_;
 };
 
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
 
-#endif // FILTERENGINE_H
+#endif // JSONFORMATTER_H
diff --git a/tools/pbindex/src/PbIndex.cpp b/tools/pbindexdump/src/PbIndexDump.cpp
similarity index 71%
copy from tools/pbindex/src/PbIndex.cpp
copy to tools/pbindexdump/src/PbIndexDump.cpp
index ca8a234..2dc925b 100644
--- a/tools/pbindex/src/PbIndex.cpp
+++ b/tools/pbindexdump/src/PbIndexDump.cpp
@@ -35,44 +35,23 @@
 
 // Author: Derek Barnett
 
-#include "PbIndex.h"
-#include <pbbam/BamFile.h>
-#include <pbbam/PbiRawData.h>
-#include <iostream>
+#include "PbIndexDump.h"
+#include "CppFormatter.h"
+#include "JsonFormatter.h"
 #include <cassert>
-#include <cstdlib>
-using namespace pbindex;
+using namespace pbindexdump;
 using namespace std;
 
-Settings::Settings(void)
-    : printPbiContents_(false)
-{ }
-
-int PbIndex::Create(const Settings& settings)
+void PbIndexDump::Run(const Settings& settings)
 {
-    try
-    {
-        PacBio::BAM::BamFile bamFile(settings.inputBamFilename_);
-        bamFile.EnsurePacBioIndexExists();
-        return EXIT_SUCCESS;
-    }
-    catch (std::runtime_error& e)
-    {
-        cerr << "pbindex ERROR: " << e.what() << endl;
-        return EXIT_FAILURE;
+    std::unique_ptr<IFormatter> formatter(nullptr);
+    if      (settings.format_ == "json") formatter.reset(new JsonFormatter(settings));
+    else if (settings.format_ == "cpp")  formatter.reset(new CppFormatter(settings));
+    else {
+        string msg = { "unsupported output format requested: " };
+        msg += settings.format_;
+        throw runtime_error(msg);
     }
+    assert(formatter);
+    formatter->Run();
 }
-
-//int PbIndex::Print(const Settings& settings)
-//{
-
-//}
-
-int PbIndex::Run(const Settings& settings)
-{
-//    if (settings.printPbiContents_)
-//        return Print(settings);
-//    else
-        return Create(settings);
-}
-
diff --git a/include/pbbam/Strand.h b/tools/pbindexdump/src/PbIndexDump.h
similarity index 83%
copy from include/pbbam/Strand.h
copy to tools/pbindexdump/src/PbIndexDump.h
index aa8535f..e5ec2dc 100644
--- a/include/pbbam/Strand.h
+++ b/tools/pbindexdump/src/PbIndexDump.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,21 +35,19 @@
 
 // Author: Derek Barnett
 
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef PBINDEXDUMP_H
+#define PBINDEXDUMP_H
 
-#include "pbbam/Config.h"
+namespace pbindexdump {
 
-namespace PacBio {
-namespace BAM {
+class Settings;
 
-enum class Strand
+class PbIndexDump
 {
-    FORWARD
-  , REVERSE
+public:
+    static void Run(const Settings& settings);
 };
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindex
 
-#endif // STRAND_H
+#endif // PBINDEXDUMP_H
diff --git a/include/pbbam/Strand.h b/tools/pbindexdump/src/PbIndexDumpVersion.h.in
similarity index 85%
copy from include/pbbam/Strand.h
copy to tools/pbindexdump/src/PbIndexDumpVersion.h.in
index aa8535f..ec49612 100644
--- a/include/pbbam/Strand.h
+++ b/tools/pbindexdump/src/PbIndexDumpVersion.h.in
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -35,21 +35,15 @@
 
 // Author: Derek Barnett
 
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef PBINDEXDUMPVERSION_H
+#define PBINDEXDUMPVERSION_H
 
-#include "pbbam/Config.h"
+#include <string>
 
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
 
-enum class Strand
-{
-    FORWARD
-  , REVERSE
-};
+const std::string Version = std::string("@PbIndexDump_VERSION@");
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
 
-#endif // STRAND_H
+#endif // PBINDEXDUMPVERSION_H
diff --git a/include/pbbam/TagCollection.h b/tools/pbindexdump/src/Settings.h
similarity index 76%
copy from include/pbbam/TagCollection.h
copy to tools/pbindexdump/src/Settings.h
index 42b4018..a520293 100644
--- a/include/pbbam/TagCollection.h
+++ b/tools/pbindexdump/src/Settings.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -23,7 +23,7 @@
 // BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT  SHALL PACIFIC BIOSCIENCES OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,24 +35,31 @@
 
 // Author: Derek Barnett
 
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#ifndef SETTINGS_H
+#define SETTINGS_H
 
-#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
 #include <string>
+#include <vector>
 
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
 
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+class Settings
 {
 public:
-    bool Contains(const std::string& name) const;
+    Settings(void)
+        : format_("json")
+        , jsonIndentLevel_(4)
+        , jsonRaw_(false)
+    { }
+
+public:
+    std::string inputPbiFilename_;
+    std::string format_;
+    int jsonIndentLevel_;
+    bool jsonRaw_;
+    std::vector<std::string> errors_;
 };
 
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
 
-#endif // TAGCOLLECTION_H
+#endif // SETTINGS_H
diff --git a/tools/pbindexdump/src/json.hpp b/tools/pbindexdump/src/json.hpp
new file mode 100644
index 0000000..7e174d7
--- /dev/null
+++ b/tools/pbindexdump/src/json.hpp
@@ -0,0 +1,7295 @@
+/*!
+ at mainpage
+
+These pages contain the API documentation of JSON for Modern C++, a C++11
+header-only JSON class.
+
+Class @ref nlohmann::basic_json is a good entry point for the documentation.
+
+ at copyright The code is licensed under the [MIT
+           License](http://opensource.org/licenses/MIT):
+           <br>
+           Copyright © 2013-2015 Niels Lohmann.
+           <br>
+           Permission is hereby granted, free of charge, to any person
+           obtaining a copy of this software and associated documentation files
+           (the "Software"), to deal in the Software without restriction,
+           including without limitation the rights to use, copy, modify, merge,
+           publish, distribute, sublicense, and/or sell copies of the Software,
+           and to permit persons to whom the Software is furnished to do so,
+           subject to the following conditions:
+           <br>
+           The above copyright notice and this permission notice shall be
+           included in all copies or substantial portions of the Software.
+           <br>
+           THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+           EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+           MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+           NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+           BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+           ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+           CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+           SOFTWARE.
+
+ at author [Niels Lohmann](http://nlohmann.me)
+ at see https://github.com/nlohmann/json to download the source code
+*/
+
+#ifndef NLOHMANN_JSON_HPP
+#define NLOHMANN_JSON_HPP
+
+#include <algorithm>
+#include <array>
+#include <ciso646>
+#include <cmath>
+#include <cstdio>
+#include <functional>
+#include <initializer_list>
+#include <iomanip>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// enable ssize_t on MinGW
+#ifdef __GNUC__
+    #ifdef __MINGW32__
+        #include <sys/types.h>
+    #endif
+#endif
+
+// enable ssize_t for MSVC
+#ifdef _MSC_VER
+    #include <basetsd.h>
+    using ssize_t = SSIZE_T;
+#endif
+
+/*!
+ at brief namespace for Niels Lohmann
+ at see https://github.com/nlohmann
+*/
+namespace nlohmann
+{
+
+
+/*!
+ at brief unnamed namespace with internal helper functions
+*/
+namespace
+{
+/*!
+ at brief Helper to determine whether there's a key_type for T.
+ at sa http://stackoverflow.com/a/7728728/266378
+*/
+template<typename T>
+struct has_mapped_type
+{
+  private:
+    template<typename C> static char test(typename C::mapped_type*);
+    template<typename C> static int  test(...);
+  public:
+    enum { value = sizeof(test<T>(0)) == sizeof(char) };
+};
+
+/// "equality" comparison for floating point numbers
+template<typename T>
+static bool approx(const T a, const T b)
+{
+    return not (a > b or a < b);
+}
+}
+
+/*!
+ at brief a class to store JSON values
+
+ at tparam ObjectType type for JSON objects (@c std::map by default; will be used
+in @ref object_t)
+ at tparam ArrayType type for JSON arrays (@c std::vector by default; will be used
+in @ref array_t)
+ at tparam StringType type for JSON strings and object keys (@c std::string by
+default; will be used in @ref string_t)
+ at tparam BooleanType type for JSON booleans (@c `bool` by default; will be used
+in @ref boolean_t)
+ at tparam NumberIntegerType type for JSON integer numbers (@c `int64_t` by
+default; will be used in @ref number_integer_t)
+ at tparam NumberFloatType type for JSON floating-point numbers (@c `double` by
+default; will be used in @ref number_float_t)
+ at tparam AllocatorType type of the allocator to use (@c `std::allocator` by
+default)
+
+ at requirement The class satisfies the following concept requirements:
+- Basic
+ - [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible):
+   JSON values can be default constructed. The result will be a JSON null value.
+ - [MoveConstructible](http://en.cppreference.com/w/cpp/concept/MoveConstructible):
+   A JSON value can be constructed from an rvalue argument.
+ - [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible):
+   A JSON value can be copy-constrcuted from an lvalue expression.
+ - [MoveAssignable](http://en.cppreference.com/w/cpp/concept/MoveAssignable):
+   A JSON value van be assigned from an rvalue argument.
+ - [CopyAssignable](http://en.cppreference.com/w/cpp/concept/CopyAssignable):
+   A JSON value can be copy-assigned from an lvalue expression.
+ - [Destructible](http://en.cppreference.com/w/cpp/concept/Destructible):
+   JSON values can be destructed.
+- Layout
+ - [StandardLayoutType](http://en.cppreference.com/w/cpp/concept/StandardLayoutType):
+   JSON values have
+   [standard layout](http://en.cppreference.com/w/cpp/language/data_members#Standard_layout):
+   All non-static data members are private and standard layout types, the class
+   has no virtual functions or (virtual) base classes.
+- Library-wide
+ - [EqualityComparable](http://en.cppreference.com/w/cpp/concept/EqualityComparable):
+   JSON values can be compared with `==`, see @ref
+   operator==(const_reference,const_reference).
+ - [LessThanComparable](http://en.cppreference.com/w/cpp/concept/LessThanComparable):
+   JSON values can be compared with `<`, see @ref
+   operator<(const_reference,const_reference).
+ - [Swappable](http://en.cppreference.com/w/cpp/concept/Swappable):
+   Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of
+   other compatible types, using unqualified function call @ref swap().
+ - [NullablePointer](http://en.cppreference.com/w/cpp/concept/NullablePointer):
+   JSON values can be compared against `std::nullptr_t` objects which are used
+   to model the `null` value.
+- Container
+ - [Container](http://en.cppreference.com/w/cpp/concept/Container):
+   JSON values can be used like STL containers and provide iterator access.
+ - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer);
+   JSON values can be used like STL containers and provide reverse iterator
+   access.
+
+ at internal
+ at note ObjectType trick from http://stackoverflow.com/a/9860911
+ at endinternal
+
+ at see RFC 7159 <http://rfc7159.net/rfc7159>
+*/
+template <
+    template<typename U, typename V, typename... Args> class ObjectType = std::map,
+    template<typename U, typename... Args> class ArrayType = std::vector,
+    class StringType = std::string,
+    class BooleanType = bool,
+    class NumberIntegerType = int64_t,
+    class NumberFloatType = double,
+    template<typename U> class AllocatorType = std::allocator
+    >
+class basic_json
+{
+  private:
+    /// workaround type for MSVC
+    using basic_json_t = basic_json<ObjectType,
+          ArrayType,
+          StringType,
+          BooleanType,
+          NumberIntegerType,
+          NumberFloatType,
+          AllocatorType>;
+
+  public:
+
+    /////////////////////
+    // container types //
+    /////////////////////
+
+    /// @name container types
+    /// @{
+
+    /// the type of elements in a basic_json container
+    using value_type = basic_json;
+
+    /// the type of an element reference
+    using reference = value_type&;
+
+    /// the type of an element const reference
+    using const_reference = const value_type&;
+
+    /// a type to represent differences between iterators
+    using difference_type = std::ptrdiff_t;
+
+    /// a type to represent container sizes
+    using size_type = std::size_t;
+
+    /// the allocator type
+    using allocator_type = AllocatorType<basic_json>;
+
+    /// the type of an element pointer
+    using pointer = typename std::allocator_traits<allocator_type>::pointer;
+    /// the type of an element const pointer
+    using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
+
+    // forward declaration
+    template<typename Base> class json_reverse_iterator;
+
+    /// an iterator for a basic_json container
+    class iterator;
+    /// a const iterator for a basic_json container
+    class const_iterator;
+    /// a reverse iterator for a basic_json container
+    using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>;
+    /// a const reverse iterator for a basic_json container
+    using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>;
+
+    /// @}
+
+
+    /*!
+    @brief returns the allocator associated with the container
+    */
+    static allocator_type get_allocator()
+    {
+        return allocator_type();
+    }
+
+
+    ///////////////////////////
+    // JSON value data types //
+    ///////////////////////////
+
+    /// @name JSON value data types
+    /// @{
+
+    /*!
+    @brief a type for an object
+
+    [RFC 7159](http://rfc7159.net/rfc7159) describes JSON objects as follows:
+    > An object is an unordered collection of zero or more name/value pairs,
+    > where a name is a string and a value is a string, number, boolean, null,
+    > object, or array.
+
+    To store objects in C++, a type is defined by the template parameters @a
+    ObjectType which chooses the container (e.g., `std::map` or
+    `std::unordered_map`), @a StringType which chooses the type of the keys or
+    names, and @a AllocatorType which chooses the allocator to use.
+
+    #### Default type
+
+    With the default values for @a ObjectType (`std::map`), @a StringType
+    (`std::string`), and @a AllocatorType (`std::allocator`), the default value
+    for @a object_t is:
+
+    @code {.cpp}
+    std::map<
+      std::string, // key_type
+      basic_json, // value_type
+      std::less<std::string>, // key_compare
+      std::allocator<std::pair<const std::string, basic_json>> // allocator_type
+    >
+    @endcode
+
+    #### Behavior
+
+    The choice of @a object_t influences the behavior of the JSON class. With
+    the default type, objects have the following behavior:
+
+    - When all names are unique, objects will be interoperable in the sense
+      that all software implementations receiving that object will agree on the
+      name-value mappings.
+    - When the names within an object are not unique, later stored name/value
+      pairs overwrite previously stored name/value pairs, leaving the used
+      names unique. For instance, `{"key": 1}` and `{"key": 2, "key": 1}` will
+      be treated as equal and both stored as `{"key": 1}`.
+    - Internally, name/value pairs are stored in lexicographical order of the
+      names. Objects will also be serialized (see @ref dump) in this order. For
+      instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored and
+      serialized as `{"a": 2, "b": 1}`.
+    - When comparing objects, the order of the name/value pairs is irrelevant.
+      This makes objects interoperable in the sense that they will not be
+      affected by these differences. For instance, `{"b": 1, "a": 2}` and
+      `{"a": 2, "b": 1}` will be treated as equal.
+
+    #### Limits
+
+    [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+    > An implementation may set limits on the maximum depth of nesting.
+
+    In this class, the object's limit of nesting is not constraint explicitly.
+    However, a maximum depth of nesting may be introduced by the compiler or
+    runtime environment. A theoretical limit can be queried by calling the @ref
+    max_size function of a JSON object.
+
+    #### Storage
+
+    Objects are stored as pointers in a `basic_json` type. That is, for any
+    access to object values, a pointer of type `object_t*` must be dereferenced.
+
+    @sa array_t
+    */
+    using object_t = ObjectType<StringType,
+          basic_json,
+          std::less<StringType>,
+          AllocatorType<std::pair<const StringType,
+          basic_json>>>;
+
+    /*!
+    @brief a type for an array
+
+    [RFC 7159](http://rfc7159.net/rfc7159) describes JSON arrays as follows:
+    > An array is an ordered sequence of zero or more values.
+
+    To store objects in C++, a type is defined by the template parameters @a
+    ArrayType which chooses the container (e.g., `std::vector` or `std::list`)
+    and @a AllocatorType which chooses the allocator to use.
+
+    #### Default type
+
+    With the default values for @a ArrayType (`std::vector`) and @a
+    AllocatorType (`std::allocator`), the default value for @a array_t is:
+
+    @code {.cpp}
+    std::vector<
+      basic_json, // value_type
+      std::allocator<basic_json> // allocator_type
+    >
+    @endcode
+
+    #### Limits
+
+    [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+    > An implementation may set limits on the maximum depth of nesting.
+
+    In this class, the array's limit of nesting is not constraint explicitly.
+    However, a maximum depth of nesting may be introduced by the compiler or
+    runtime environment. A theoretical limit can be queried by calling the @ref
+    max_size function of a JSON array.
+
+    #### Storage
+
+    Arrays are stored as pointers in a `basic_json` type. That is, for any
+    access to array values, a pointer of type `array_t*` must be dereferenced.
+    */
+    using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
+
+    /*!
+    @brief a type for a string
+
+    [RFC 7159](http://rfc7159.net/rfc7159) describes JSON strings as follows:
+    > A string is a sequence of zero or more Unicode characters.
+
+    To store objects in C++, a type is defined by the template parameters @a
+    StringType which chooses the container (e.g., `std::string`) to use.
+
+    Unicode values are split by the JSON class into byte-sized characters
+    during deserialization.
+
+    #### Default type
+
+    With the default values for @a StringType (`std::string`), the default
+    value for @a string_t is:
+
+    @code {.cpp}
+    std::string
+    @endcode
+
+    #### String comparison
+
+    [RFC 7159](http://rfc7159.net/rfc7159) states:
+    > Software implementations are typically required to test names of object
+    > members for equality. Implementations that transform the textual
+    > representation into sequences of Unicode code units and then perform the
+    > comparison numerically, code unit by code unit, are interoperable in the
+    > sense that implementations will agree in all cases on equality or
+    > inequality of two strings. For example, implementations that compare
+    > strings with escaped characters unconverted may incorrectly find that
+    > `"a\\b"` and `"a\u005Cb"` are not equal.
+
+    This implementation is interoperable as it does compare strings code unit
+    by code unit.
+
+    #### Storage
+
+    String values are stored as pointers in a `basic_json` type. That is, for
+    any access to string values, a pointer of type `string_t*` must be
+    dereferenced.
+    */
+    using string_t = StringType;
+
+    /*!
+    @brief a type for a boolean
+
+    [RFC 7159](http://rfc7159.net/rfc7159) implicitly describes a boolean as a
+    type which differentiates the two literals `true` and `false`.
+
+    To store objects in C++, a type is defined by the template parameter @a
+    BooleanType which chooses the type to use.
+
+    #### Default type
+
+    With the default values for @a BooleanType (`bool`), the default value for
+    @a boolean_t is:
+
+    @code {.cpp}
+    bool
+    @endcode
+
+    #### Storage
+
+    Boolean values are stored directly inside a `basic_json` type.
+    */
+    using boolean_t = BooleanType;
+
+    /*!
+    @brief a type for a number (integer)
+
+    [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows:
+    > The representation of numbers is similar to that used in most programming
+    > languages. A number is represented in base 10 using decimal digits. It
+    > contains an integer component that may be prefixed with an optional minus
+    > sign, which may be followed by a fraction part and/or an exponent part.
+    > Leading zeros are not allowed. (...) Numeric values that cannot be
+    > represented in the grammar below (such as Infinity and NaN) are not
+    > permitted.
+
+    This description includes both integer and floating-point numbers. However,
+    C++ allows more precise storage if it is known whether the number is an
+    integer or a floating-point number. Therefore, two different types, @ref
+    number_integer_t and @ref number_float_t are used.
+
+    To store integer numbers in C++, a type is defined by the template
+    parameter @a NumberIntegerType which chooses the type to use.
+
+    #### Default type
+
+    With the default values for @a NumberIntegerType (`int64_t`), the default
+    value for @a number_integer_t is:
+
+    @code {.cpp}
+    int64_t
+    @endcode
+
+    #### Default behavior
+
+    - The restrictions about leading zeros is not enforced in C++. Instead,
+      leading zeros in integer literals lead to an interpretation as octal
+      number. Internally, the value will be stored as decimal number. For
+      instance, the C++ integer literal `010` will be serialized to `8`. During
+      deserialization, leading zeros yield an error.
+    - Not-a-number (NaN) values will be serialized to `null`.
+
+    #### Limits
+
+    [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+    > An implementation may set limits on the range and precision of numbers.
+
+    When the default type is used, the maximal integer number that can be
+    stored is `9223372036854775807` (INT64_MAX) and the minimal integer number
+    that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers
+    that are out of range will yield over/underflow when used in a constructor.
+    During deserialization, too large or small integer numbers will be
+    automatically be stored as @ref number_float_t.
+
+    [RFC 7159](http://rfc7159.net/rfc7159) further states:
+    > Note that when such software is used, numbers that are integers and are
+    > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense
+    > that implementations will agree exactly on their numeric values.
+
+    As this range is a subrange of the exactly supported range [INT64_MIN,
+    INT64_MAX], this class's integer type is interoperable.
+
+    #### Storage
+
+    Integer number values are stored directly inside a `basic_json` type.
+    */
+    using number_integer_t = NumberIntegerType;
+
+    /*!
+    @brief a type for a number (floating-point)
+
+    [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows:
+    > The representation of numbers is similar to that used in most programming
+    > languages. A number is represented in base 10 using decimal digits. It
+    > contains an integer component that may be prefixed with an optional minus
+    > sign, which may be followed by a fraction part and/or an exponent part.
+    > Leading zeros are not allowed. (...) Numeric values that cannot be
+    > represented in the grammar below (such as Infinity and NaN) are not
+    > permitted.
+
+    This description includes both integer and floating-point numbers. However,
+    C++ allows more precise storage if it is known whether the number is an
+    integer or a floating-point number. Therefore, two different types, @ref
+    number_integer_t and @ref number_float_t are used.
+
+    To store floating-point numbers in C++, a type is defined by the template
+    parameter @a NumberFloatType which chooses the type to use.
+
+    #### Default type
+
+    With the default values for @a NumberFloatType (`double`), the default
+    value for @a number_float_t is:
+
+    @code {.cpp}
+    double
+    @endcode
+
+    #### Default behavior
+
+    - The restrictions about leading zeros is not enforced in C++. Instead,
+      leading zeros in floating-point literals will be ignored. Internally, the
+      value will be stored as decimal number. For instance, the C++
+      floating-point literal `01.2` will be serialized to `1.2`. During
+      deserialization, leading zeros yield an error.
+    - Not-a-number (NaN) values will be serialized to `null`.
+
+    #### Limits
+
+    [RFC 7159](http://rfc7159.net/rfc7159) states:
+    > This specification allows implementations to set limits on the range and
+    > precision of numbers accepted. Since software that implements IEEE
+    > 754-2008 binary64 (double precision) numbers is generally available and
+    > widely used, good interoperability can be achieved by implementations that
+    > expect no more precision or range than these provide, in the sense that
+    > implementations will approximate JSON numbers within the expected
+    > precision.
+
+    This implementation does exactly follow this approach, as it uses double
+    precision floating-point numbers. Note values smaller than
+    `-1.79769313486232e+308` and values greather than `1.79769313486232e+308`
+    will be stored as NaN internally and be serialized to `null`.
+
+    #### Storage
+
+    Floating-point number values are stored directly inside a `basic_json` type.
+    */
+    using number_float_t = NumberFloatType;
+
+    /// @}
+
+
+    ///////////////////////////
+    // JSON type enumeration //
+    ///////////////////////////
+
+    /*!
+    @brief the JSON type enumeration
+
+    This enumeration collects the different JSON types. It is internally used
+    to distinguish the stored values, and the functions is_null, is_object,
+    is_array, is_string, is_boolean, is_number, and is_discarded rely on it.
+    */
+    enum class value_t : uint8_t
+    {
+        null,           ///< null value
+        object,         ///< object (unordered set of name/value pairs)
+        array,          ///< array (ordered collection of values)
+        string,         ///< string value
+        boolean,        ///< boolean value
+        number_integer, ///< number value (integer)
+        number_float,   ///< number value (floating-point)
+        discarded       ///< discarded by the the parser callback function
+    };
+
+
+  private:
+    /// helper for exception-safe object creation
+    template<typename T, typename... Args>
+    static T* create( Args&& ... args )
+    {
+        AllocatorType<T> alloc;
+        auto deleter = [&](T * object)
+        {
+            alloc.deallocate(object, 1);
+        };
+        std::unique_ptr<T, decltype(deleter)> object(alloc.allocate(1), deleter);
+        alloc.construct(object.get(), std::forward<Args>(args)...);
+        return object.release();
+    }
+
+    ////////////////////////
+    // JSON value storage //
+    ////////////////////////
+
+    /// a JSON value
+    union json_value
+    {
+        /// object (stored with pointer to save storage)
+        object_t* object;
+        /// array (stored with pointer to save storage)
+        array_t* array;
+        /// string (stored with pointer to save storage)
+        string_t* string;
+        /// boolean
+        boolean_t boolean;
+        /// number (integer)
+        number_integer_t number_integer;
+        /// number (floating-point)
+        number_float_t number_float;
+
+        /// default constructor (for null values)
+        json_value() noexcept = default;
+        /// constructor for booleans
+        json_value(boolean_t v) noexcept : boolean(v) {}
+        /// constructor for numbers (integer)
+        json_value(number_integer_t v) noexcept : number_integer(v) {}
+        /// constructor for numbers (floating-point)
+        json_value(number_float_t v) noexcept : number_float(v) {}
+        /// constructor for empty values of a given type
+        json_value(value_t t)
+        {
+            switch (t)
+            {
+                case (value_t::null):
+                case (value_t::discarded):
+                {
+                    break;
+                }
+
+                case (value_t::object):
+                {
+                    object = create<object_t>();
+                    break;
+                }
+
+                case (value_t::array):
+                {
+                    array = create<array_t>();
+                    break;
+                }
+
+                case (value_t::string):
+                {
+                    string = create<string_t>("");
+                    break;
+                }
+
+                case (value_t::boolean):
+                {
+                    boolean = boolean_t(false);
+                    break;
+                }
+
+                case (value_t::number_integer):
+                {
+                    number_integer = number_integer_t(0);
+                    break;
+                }
+
+                case (value_t::number_float):
+                {
+                    number_float = number_float_t(0.0);
+                    break;
+                }
+            }
+        }
+
+        /// constructor for strings
+        json_value(const string_t& value)
+        {
+            string = create<string_t>(value);
+        }
+
+        /// constructor for objects
+        json_value(const object_t& value)
+        {
+            object = create<object_t>(value);
+        }
+
+        /// constructor for arrays
+        json_value(const array_t& value)
+        {
+            array = create<array_t>(value);
+        }
+    };
+
+
+  public:
+    //////////////////////////
+    // JSON parser callback //
+    //////////////////////////
+
+    /*!
+    @brief JSON callback events
+
+    This enumeration lists the parser events that can trigger calling a
+    callback function of type @ref parser_callback_t during parsing.
+    */
+    enum class parse_event_t : uint8_t
+    {
+        /// the parser read `{` and started to process a JSON object
+        object_start,
+        /// the parser read `}` and finished processing a JSON object
+        object_end,
+        /// the parser read `[` and started to process a JSON array
+        array_start,
+        /// the parser read `]` and finished processing a JSON array
+        array_end,
+        /// the parser read a key of a value in an object
+        key,
+        /// the parser finished reading a JSON value
+        value
+    };
+
+    /*!
+    @brief per-element parser callback type
+
+    With a parser callback function, the result of parsing a JSON text can be
+    influenced. When passed to @ref parse(std::istream&, parser_callback_t) or
+    @ref parse(const string_t&, parser_callback_t), it is called on certain
+    events (passed as @ref parse_event_t via parameter @a event) with a set
+    recursion depth @a depth and context JSON value @a parsed. The return value
+    of the callback function is a boolean indicating whether the element that
+    emitted the callback shall be kept or not.
+
+    We distinguish six scenarios (determined by the event type) in which the
+    callback function can be called. The following table describes the values
+    of the parameters @a depth, @a event, and @a parsed.
+
+    parameter @a event | description | parameter @a depth | parameter @a parsed
+    ------------------ | ----------- | ------------------ | -------------------
+    parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded
+    parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key
+    parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object
+    parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded
+    parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
+    parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
+
+    Discarding a value (i.e., returning `false`) has different effects depending on the
+    context in which function was called:
+
+    - Discarded values in structured types are skipped. That is, the parser
+      will behave as if the discarded value was never read.
+    - In case a value outside a structured type is skipped, it is replaced with
+      `null`. This case happens if the top-level element is skipped.
+
+    @param[in] depth   the depth of the recursion during parsing
+
+    @param[in] event   an event of type parse_event_t indicating the context in
+    the callback function has been called
+
+    @param[in,out] parsed  the current intermediate parse result; note that
+    writing to this value has no effect for parse_event_t::key events
+
+    @return Whether the JSON value which called the function during parsing
+    should be kept (`true`) or not (`false`). In the latter case, it is either
+    skipped completely or replaced by an empty discarded object.
+
+    @sa @ref parse(std::istream&, parser_callback_t) or
+    @ref parse(const string_t&, parser_callback_t) for examples
+    */
+    using parser_callback_t = std::function<bool(
+                                  int depth, parse_event_t event, basic_json& parsed)>;
+
+
+    //////////////////
+    // constructors //
+    //////////////////
+
+    /*!
+    @brief create an empty value with a given type
+
+    Create an empty JSON value with a given type. The value will be default
+    initialized with an empty value which depends on the type:
+
+    Value type  | initial value
+    ----------- | -------------
+    null        | `null`
+    boolean     | `false`
+    string      | `""`
+    number      | `0`
+    object      | `{}`
+    array       | `[]`
+
+    @param[in] value  the type of the value to create
+
+    @complexity Constant.
+
+    @throw std::bad_alloc if allocation for object, array, or string value
+    fails
+
+    @liveexample{The following code shows the constructor for different @ref
+    value_t values,basic_json__value_t}
+    */
+    basic_json(const value_t value)
+        : m_type(value), m_value(value)
+    {}
+
+    /*!
+    @brief create a null object (implicitly)
+
+    Create a `null` JSON value. This is the implicit version of the `null`
+    value constructor as it takes no parameters.
+
+    @complexity Constant.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+    - As postcondition, it holds: `basic_json().empty() == true`.
+
+    @liveexample{The following code shows the constructor for a `null` JSON
+    value.,basic_json}
+
+    @sa basic_json(std::nullptr_t)
+    */
+    basic_json() noexcept = default;
+
+    /*!
+    @brief create a null object (explicitly)
+
+    Create a `null` JSON value. This is the explicitly version of the `null`
+    value constructor as it takes a null pointer as parameter. It allows to
+    create `null` values by explicitly assigning a @c nullptr to a JSON value.
+    The passed null pointer itself is not read - it is only used to choose the
+    right constructor.
+
+    @complexity Constant.
+
+    @liveexample{The following code shows the constructor with null pointer
+    parameter.,basic_json__nullptr_t}
+
+    @sa basic_json()
+    */
+    basic_json(std::nullptr_t) noexcept
+        : basic_json(value_t::null)
+    {}
+
+    /*!
+    @brief create an object (explicit)
+
+    Create an object JSON value with a given content.
+
+    @param[in] value  a value for the object
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for object value fails
+
+    @liveexample{The following code shows the constructor with an @ref object_t
+    parameter.,basic_json__object_t}
+
+    @sa basic_json(const CompatibleObjectType&)
+    */
+    basic_json(const object_t& value)
+        : m_type(value_t::object), m_value(value)
+    {}
+
+    /*!
+    @brief create an object (implicit)
+
+    Create an object JSON value with a given content. This constructor allows
+    any type that can be used to construct values of type @ref object_t.
+    Examples include the types `std::map` and `std::unordered_map`.
+
+    @tparam CompatibleObjectType an object type whose `key_type` and
+    `value_type` is compatible to @ref object_t
+
+    @param[in] value  a value for the object
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for object value fails
+
+    @liveexample{The following code shows the constructor with several
+    compatible object type parameters.,basic_json__CompatibleObjectType}
+
+    @sa basic_json(const object_t&)
+    */
+    template <class CompatibleObjectType, typename
+              std::enable_if<
+                  std::is_constructible<typename object_t::key_type, typename CompatibleObjectType::key_type>::value and
+                  std::is_constructible<basic_json, typename CompatibleObjectType::mapped_type>::value, int>::type
+              = 0>
+    basic_json(const CompatibleObjectType& value)
+        : m_type(value_t::object)
+    {
+        using std::begin;
+        using std::end;
+        m_value.object = create<object_t>(begin(value), end(value));
+    }
+
+    /*!
+    @brief create an array (explicit)
+
+    Create an array JSON value with a given content.
+
+    @param[in] value  a value for the array
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for array value fails
+
+    @liveexample{The following code shows the constructor with an @ref array_t
+    parameter.,basic_json__array_t}
+
+    @sa basic_json(const CompatibleArrayType&)
+    */
+    basic_json(const array_t& value)
+        : m_type(value_t::array), m_value(value)
+    {}
+
+    /*!
+    @brief create an array (implicit)
+
+    Create an array JSON value with a given content. This constructor allows
+    any type that can be used to construct values of type @ref array_t.
+    Examples include the types `std::vector`, `std::list`, and `std::set`.
+
+    @tparam CompatibleArrayType an object type whose `value_type` is compatible
+    to @ref array_t
+
+    @param[in] value  a value for the array
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for array value fails
+
+    @liveexample{The following code shows the constructor with several
+    compatible array type parameters.,basic_json__CompatibleArrayType}
+
+    @sa basic_json(const array_t&)
+    */
+    template <class CompatibleArrayType, typename
+              std::enable_if<
+                  not std::is_same<CompatibleArrayType, typename basic_json_t::iterator>::value and
+                  not std::is_same<CompatibleArrayType, typename basic_json_t::const_iterator>::value and
+                  not std::is_same<CompatibleArrayType, typename basic_json_t::reverse_iterator>::value and
+                  not std::is_same<CompatibleArrayType, typename basic_json_t::const_reverse_iterator>::value and
+                  not std::is_same<CompatibleArrayType, typename array_t::iterator>::value and
+                  not std::is_same<CompatibleArrayType, typename array_t::const_iterator>::value and
+                  std::is_constructible<basic_json, typename CompatibleArrayType::value_type>::value, int>::type
+              = 0>
+    basic_json(const CompatibleArrayType& value)
+        : m_type(value_t::array)
+    {
+        using std::begin;
+        using std::end;
+        m_value.array = create<array_t>(begin(value), end(value));
+    }
+
+    /*!
+    @brief create a string (explicit)
+
+    Create an string JSON value with a given content.
+
+    @param[in] value  a value for the string
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for string value fails
+
+    @liveexample{The following code shows the constructor with an @ref string_t
+    parameter.,basic_json__string_t}
+
+    @sa basic_json(const typename string_t::value_type*)
+    @sa basic_json(const CompatibleStringType&)
+    */
+    basic_json(const string_t& value)
+        : m_type(value_t::string), m_value(value)
+    {}
+
+    /*!
+    @brief create a string (explicit)
+
+    Create a string JSON value with a given content.
+
+    @param[in] value  a literal value for the string
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for string value fails
+
+    @liveexample{The following code shows the constructor with string literal
+    parameter.,basic_json__string_t_value_type}
+
+    @sa basic_json(const string_t&)
+    @sa basic_json(const CompatibleStringType&)
+    */
+    basic_json(const typename string_t::value_type* value)
+        : basic_json(string_t(value))
+    {}
+
+    /*!
+    @brief create a string (implicit)
+
+    Create a string JSON value with a given content.
+
+    @param[in] value  a value for the string
+
+    @tparam CompatibleStringType an string type which is compatible to @ref
+    string_t
+
+    @complexity Linear in the size of the passed @a value.
+
+    @throw std::bad_alloc if allocation for string value fails
+
+    @liveexample{The following code shows the construction of a string value
+    from a compatible type.,basic_json__CompatibleStringType}
+
+    @sa basic_json(const string_t&)
+    */
+    template <class CompatibleStringType, typename
+              std::enable_if<
+                  std::is_constructible<string_t, CompatibleStringType>::value, int>::type
+              = 0>
+    basic_json(const CompatibleStringType& value)
+        : basic_json(string_t(value))
+    {}
+
+    /*!
+    @brief create a boolean (explicit)
+
+    Creates a JSON boolean type from a given value.
+
+    @param[in] value  a boolean value to store
+
+    @complexity Constant.
+
+    @liveexample{The example below demonstrates boolean
+    values.,basic_json__boolean_t}
+    */
+    basic_json(boolean_t value)
+        : m_type(value_t::boolean), m_value(value)
+    {}
+
+    /*!
+    @brief create an integer number (explicit)
+
+    Create an interger number JSON value with a given content.
+
+    @tparam T  helper type to compare number_integer_t and int (not visible in)
+    the interface.
+
+    @param[in] value  an integer to create a JSON number from
+
+    @note This constructor would have the same signature as @ref
+    basic_json(const int value), so we need to switch this one off in case
+    number_integer_t is the same as int. This is done via the helper type @a T.
+
+    @complexity Constant.
+
+    @liveexample{The example below shows the construction of a JSON integer
+    number value.,basic_json__number_integer_t}
+
+    @sa basic_json(const int)
+    */
+    template<typename T,
+             typename std::enable_if<
+                 not (std::is_same<T, int>::value)
+                 and std::is_same<T, number_integer_t>::value
+                 , int>::type = 0>
+    basic_json(const number_integer_t value)
+        : m_type(value_t::number_integer), m_value(value)
+    {}
+
+    /*!
+    @brief create an integer number from an enum type (explicit)
+
+    Create an integer number JSON value with a given content.
+
+    @param[in] value  an integer to create a JSON number from
+
+    @note This constructor allows to pass enums directly to a constructor. As
+    C++ has no way of specifying the type of an anonymous enum explicitly, we
+    can only rely on the fact that such values implicitly convert to int. As
+    int may already be the same type of number_integer_t, we may need to switch
+    off the constructor @ref basic_json(const number_integer_t).
+
+    @complexity Constant.
+
+    @liveexample{The example below shows the construction of a JSON integer
+    number value from an anonymous enum.,basic_json__const_int}
+
+    @sa basic_json(const number_integer_t)
+    */
+    basic_json(const int value)
+        : m_type(value_t::number_integer),
+          m_value(static_cast<number_integer_t>(value))
+    {}
+
+    /*!
+    @brief create an integer number (implicit)
+
+    Create an integer number JSON value with a given content. This constructor
+    allows any type that can be used to construct values of type @ref
+    number_integer_t. Examples may include the types `int`, `int32_t`, or
+    `short`.
+
+    @tparam CompatibleNumberIntegerType an integer type which is compatible to
+    @ref number_integer_t.
+
+    @param[in] value  an integer to create a JSON number from
+
+    @complexity Constant.
+
+    @liveexample{The example below shows the construction of several JSON
+    integer number values from compatible
+    types.,basic_json__CompatibleIntegerNumberType}
+
+    @sa basic_json(const number_integer_t)
+    */
+    template<typename CompatibleNumberIntegerType, typename
+             std::enable_if<
+                 std::is_constructible<number_integer_t, CompatibleNumberIntegerType>::value and
+                 std::numeric_limits<CompatibleNumberIntegerType>::is_integer, CompatibleNumberIntegerType>::type
+             = 0>
+    basic_json(const CompatibleNumberIntegerType value) noexcept
+        : m_type(value_t::number_integer),
+          m_value(static_cast<number_integer_t>(value))
+    {}
+
+    /*!
+    @brief create a floating-point number (explicit)
+
+    Create a floating-point number JSON value with a given content.
+
+    @param[in] value  a floating-point value to create a JSON number from
+
+    @note RFC 7159 <http://www.rfc-editor.org/rfc/rfc7159.txt>, section 6
+    disallows NaN values:
+    > Numeric values that cannot be represented in the grammar below (such
+    > as Infinity and NaN) are not permitted.
+    In case the parameter @a value is not a number, a JSON null value is
+    created instead.
+
+    @complexity Constant.
+
+    @liveexample{The following example creates several floating-point
+    values.,basic_json__number_float_t}
+    */
+    basic_json(const number_float_t value)
+        : m_type(value_t::number_float), m_value(value)
+    {
+        // replace infinity and NAN by null
+        if (not std::isfinite(value))
+        {
+            m_type = value_t::null;
+            m_value = json_value();
+        }
+    }
+
+    /*!
+    @brief create an floating-point number (implicit)
+
+    Create an floating-point number JSON value with a given content. This
+    constructor allows any type that can be used to construct values of type
+    @ref number_float_t. Examples may include the types `float`.
+
+    @tparam CompatibleNumberFloatType a floating-point type which is compatible
+    to @ref number_float_t.
+
+    @param[in] value  a floating-point to create a JSON number from
+
+    @note RFC 7159 <http://www.rfc-editor.org/rfc/rfc7159.txt>, section 6
+    disallows NaN values:
+    > Numeric values that cannot be represented in the grammar below (such
+    > as Infinity and NaN) are not permitted.
+    In case the parameter @a value is not a number, a JSON null value is
+    created instead.
+
+    @complexity Constant.
+
+    @liveexample{The example below shows the construction of several JSON
+    floating-point number values from compatible
+    types.,basic_json__CompatibleNumberFloatType}
+
+    @sa basic_json(const number_float_t)
+    */
+    template<typename CompatibleNumberFloatType, typename = typename
+             std::enable_if<
+                 std::is_constructible<number_float_t, CompatibleNumberFloatType>::value and
+                 std::is_floating_point<CompatibleNumberFloatType>::value>::type
+             >
+    basic_json(const CompatibleNumberFloatType value) noexcept
+        : basic_json(number_float_t(value))
+    {}
+
+    /*!
+    @brief create a container (array or object) from an initializer list
+
+    Creates a JSON value of type array or object from the passed initializer
+    list @a init. In case @a type_deduction is `true` (default), the type of
+    the JSON value to be created is deducted from the initializer list @a init
+    according to the following rules:
+
+    1. If the list is empty, an empty JSON object value `{}` is created.
+    2. If the list consists of pairs whose first element is a string, a JSON
+    object value is created where the first elements of the pairs are treated
+    as keys and the second elements are as values.
+    3. In all other cases, an array is created.
+
+    The rules aim to create the best fit between a C++ initializer list and
+    JSON values. The ratioinale is as follows:
+
+    1. The empty initializer list is written as `{}` which is exactly an empty
+    JSON object.
+    2. C++ has now way of describing mapped types other than to list a list of
+    pairs. As JSON requires that keys must be of type string, rule 2 is the
+    weakest constraint one can pose on initializer lists to interpret them as
+    an object.
+    3. In all other cases, the initializer list could not be interpreted as
+    JSON object type, so interpreting it as JSON array type is safe.
+
+    With the rules described above, the following JSON values cannot be
+    expressed by an initializer list:
+
+    - the empty array (`[]`): use @ref array(std::initializer_list<basic_json>)
+      with an empty initializer list in this case
+    - arrays whose elements satisfy rule 2: use @ref
+      array(std::initializer_list<basic_json>) with the same initializer list
+      in this case
+
+    @note When used without parentheses around an empty initializer list, @ref
+    basic_json() is called instead of this function, yielding the JSON null
+    value.
+
+    @param[in] init  initializer list with JSON values
+
+    @param[in] type_deduction internal parameter; when set to `true`, the type
+    of the JSON value is deducted from the initializer list @a init; when set
+    to `false`, the type provided via @a manual_type is forced. This mode is
+    used by the functions @ref array(std::initializer_list<basic_json>) and
+    @ref object(std::initializer_list<basic_json>).
+
+    @param[in] manual_type internal parameter; when @a type_deduction is set to
+    `false`, the created JSON value will use the provided type (only @ref
+    value_t::array and @ref value_t::object are valid); when @a type_deduction
+    is set to `true`, this parameter has no effect
+
+    @throw std::domain_error if @a type_deduction is `false`, @a manual_type is
+    `value_t::object`, but @a init contains an element which is not a pair
+    whose first element is a string
+
+    @complexity Linear in the size of the initializer list @a init.
+
+    @liveexample{The example below shows how JSON values are created from
+    initializer lists,basic_json__list_init_t}
+
+    @sa basic_json array(std::initializer_list<basic_json>) - create a JSON
+    array value from an initializer list
+    @sa basic_json object(std::initializer_list<basic_json>) - create a JSON
+    object value from an initializer list
+    */
+    basic_json(std::initializer_list<basic_json> init,
+               bool type_deduction = true,
+               value_t manual_type = value_t::array)
+    {
+        // the initializer list could describe an object
+        bool is_object = true;
+
+        // check if each element is an array with two elements whose first element
+        // is a string
+        for (const auto& element : init)
+        {
+            if (element.m_type != value_t::array or element.size() != 2
+                    or element[0].m_type != value_t::string)
+            {
+                // we found an element that makes it impossible to use the
+                // initializer list as object
+                is_object = false;
+                break;
+            }
+        }
+
+        // adjust type if type deduction is not wanted
+        if (not type_deduction)
+        {
+            // if array is wanted, do not create an object though possible
+            if (manual_type == value_t::array)
+            {
+                is_object = false;
+            }
+
+            // if object is wanted but impossible, throw an exception
+            if (manual_type == value_t::object and not is_object)
+            {
+                throw std::domain_error("cannot create object from initializer list");
+            }
+        }
+
+        if (is_object)
+        {
+            // the initializer list is a list of pairs -> create object
+            m_type = value_t::object;
+            m_value = value_t::object;
+
+            for (auto& element : init)
+            {
+                m_value.object->emplace(std::move(*(element[0].m_value.string)), std::move(element[1]));
+            }
+        }
+        else
+        {
+            // the initializer list describes an array -> create array
+            m_type = value_t::array;
+            m_value.array = create<array_t>(std::move(init));
+        }
+    }
+
+    /*!
+    @brief explicitly create an array from an initializer list
+
+    Creates a JSON array value from a given initializer list. That is, given a
+    list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the
+    initializer list is empty, the empty array `[]` is created.
+
+    @note This function is only needed to express two edge cases that cannot be
+    realized with the initializer list constructor (@ref
+    basic_json(std::initializer_list<basic_json>, bool, value_t)). These cases
+    are:
+    1. creating an array whose elements are all pairs whose first element is a
+    string - in this case, the initializer list constructor would create an
+    object, taking the first elements as keys
+    2. creating an empty array - passing the empty initializer list to the
+    initializer list constructor yields an empty object
+
+    @param[in] init  initializer list with JSON values to create an array from
+    (optional)
+
+    @return JSON array value
+
+    @complexity Linear in the size of @a init.
+
+    @liveexample{The following code shows an example for the @ref array
+    function.,array}
+
+    @sa basic_json(std::initializer_list<basic_json>, bool, value_t) - create a
+    JSON value from an initializer list
+    @sa basic_json object(std::initializer_list<basic_json>) - create a JSON
+    object value from an initializer list
+    */
+    static basic_json array(std::initializer_list<basic_json> init =
+                                std::initializer_list<basic_json>())
+    {
+        return basic_json(init, false, value_t::array);
+    }
+
+    /*!
+    @brief explicitly create an object from an initializer list
+
+    Creates a JSON object value from a given initializer list. The initializer
+    lists elements must be pairs, and their first elments must be strings. If
+    the initializer list is empty, the empty object `{}` is created.
+
+    @note This function is only added for symmetry reasons. In contrast to the
+    related function @ref basic_json array(std::initializer_list<basic_json>),
+    there are no cases which can only be expressed by this function. That is,
+    any initializer list @a init can also be passed to the initializer list
+    constructor @ref basic_json(std::initializer_list<basic_json>, bool,
+    value_t).
+
+    @param[in] init  initializer list to create an object from (optional)
+
+    @return JSON object value
+
+    @throw std::domain_error if @a init is not a pair whose first elements are
+    strings; thrown by @ref basic_json(std::initializer_list<basic_json>, bool,
+    value_t)
+
+    @complexity Linear in the size of @a init.
+
+    @liveexample{The following code shows an example for the @ref object
+    function.,object}
+
+    @sa basic_json(std::initializer_list<basic_json>, bool, value_t) - create a
+    JSON value from an initializer list
+    @sa basic_json array(std::initializer_list<basic_json>) - create a JSON
+    array value from an initializer list
+    */
+    static basic_json object(std::initializer_list<basic_json> init =
+                                 std::initializer_list<basic_json>())
+    {
+        return basic_json(init, false, value_t::object);
+    }
+
+    /*!
+    @brief construct an array with count copies of given value
+
+    Constructs a JSON array value by creating @a count copies of a passed
+    value. In case @a count is `0`, an empty array is created. As postcondition,
+    `std::distance(begin(),end()) == count` holds.
+
+    @param[in] count  the number of JSON copies of @a value to create
+    @param[in] value  the JSON value to copy
+
+    @complexity Linear in @a count.
+
+    @liveexample{The following code shows examples for the @ref
+    basic_json(size_type\, const basic_json&)
+    constructor.,basic_json__size_type_basic_json}
+    */
+    basic_json(size_type count, const basic_json& value)
+        : m_type(value_t::array)
+    {
+        m_value.array = create<array_t>(count, value);
+    }
+
+    /*!
+    @brief construct a JSON container given an iterator range
+
+    Constructs the JSON value with the contents of the range `[first, last)`.
+    The semantics depends on the different types a JSON value can have:
+    - In case of primitive types (number, boolean, or string), @a first must
+      be `begin()` and @a last must be `end()`. In this case, the value is
+      copied. Otherwise, std::out_of_range is thrown.
+    - In case of structured types (array, object), the constructor behaves
+      as similar versions for `std::vector`.
+    - In case of a null type, std::domain_error is thrown.
+
+    @tparam InputIT an input iterator type (@ref iterator or @ref
+    const_iterator)
+
+    @param[in] first begin of the range to copy from (included)
+    @param[in] last end of the range to copy from (excluded)
+
+    @throw std::domain_error if iterators are not compatible; that is, do not
+    belong to the same JSON value
+    @throw std::out_of_range if iterators are for a primitive type (number,
+    boolean, or string) where an out of range error can be detected easily
+    @throw std::bad_alloc if allocation for object, array, or string fails
+    @throw std::domain_error if called with a null value
+
+    @complexity Linear in distance between @a first and @a last.
+
+    @liveexample{The example below shows several ways to create JSON values by
+    specifying a subrange with iterators.,basic_json__InputIt_InputIt}
+    */
+    template <class InputIT, typename
+              std::enable_if<
+                  std::is_same<InputIT, typename basic_json_t::iterator>::value or
+                  std::is_same<InputIT, typename basic_json_t::const_iterator>::value
+                  , int>::type
+              = 0>
+    basic_json(InputIT first, InputIT last) : m_type(first.m_object->m_type)
+    {
+        // make sure iterator fits the current value
+        if (first.m_object != last.m_object)
+        {
+            throw std::domain_error("iterators are not compatible");
+        }
+
+        // check if iterator range is complete for primitive values
+        switch (m_type)
+        {
+            case value_t::number_integer:
+            case value_t::number_float:
+            case value_t::boolean:
+            case value_t::string:
+            {
+                if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())
+                {
+                    throw std::out_of_range("iterators out of range");
+                }
+                break;
+            }
+
+            default:
+            {
+                break;
+            }
+        }
+
+        switch (m_type)
+        {
+            case value_t::number_integer:
+            {
+                m_value.number_integer = first.m_object->m_value.number_integer;
+                break;
+            }
+
+            case value_t::number_float:
+            {
+                m_value.number_float = first.m_object->m_value.number_float;
+                break;
+            }
+
+            case value_t::boolean:
+            {
+                m_value.boolean = first.m_object->m_value.boolean;
+                break;
+            }
+
+            case value_t::string:
+            {
+                m_value = *first.m_object->m_value.string;
+                break;
+            }
+
+            case value_t::object:
+            {
+                m_value.object = create<object_t>(first.m_it.object_iterator, last.m_it.object_iterator);
+                break;
+            }
+
+            case value_t::array:
+            {
+                m_value.array = create<array_t>(first.m_it.array_iterator, last.m_it.array_iterator);
+                break;
+            }
+
+            default:
+            {
+                throw std::domain_error("cannot use construct with iterators from " + first.m_object->type_name());
+            }
+        }
+    }
+
+    ///////////////////////////////////////
+    // other constructors and destructor //
+    ///////////////////////////////////////
+
+    /*!
+    @brief copy constructor
+
+    Creates a copy of a given JSON value.
+
+    @param[in] other  the JSON value to copy
+
+    @complexity Linear in the size of @a other.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is linear.
+    - As postcondition, it holds: `other == basic_json(other)`.
+
+    @throw std::bad_alloc if allocation for object, array, or string fails.
+
+    @liveexample{The following code shows an example for the copy
+    constructor.,basic_json__basic_json}
+    */
+    basic_json(const basic_json& other)
+        : m_type(other.m_type)
+    {
+        switch (m_type)
+        {
+            case (value_t::null):
+            case (value_t::discarded):
+            {
+                break;
+            }
+
+            case (value_t::object):
+            {
+                m_value = *other.m_value.object;
+                break;
+            }
+
+            case (value_t::array):
+            {
+                m_value = *other.m_value.array;
+                break;
+            }
+
+            case (value_t::string):
+            {
+                m_value = *other.m_value.string;
+                break;
+            }
+
+            case (value_t::boolean):
+            {
+                m_value = other.m_value.boolean;
+                break;
+            }
+
+            case (value_t::number_integer):
+            {
+                m_value = other.m_value.number_integer;
+                break;
+            }
+
+            case (value_t::number_float):
+            {
+                m_value = other.m_value.number_float;
+                break;
+            }
+        }
+    }
+
+    /*!
+    @brief move constructor
+
+    Move constructor. Constructs a JSON value with the contents of the given
+    value @a other using move semantics. It "steals" the resources from @a
+    other and leaves it as JSON null value.
+
+    @param[in,out] other  value to move to this object
+
+    @post @a other is a JSON null value
+
+    @complexity Constant.
+
+    @liveexample{The code below shows the move constructor explicitly called
+    via std::move.,basic_json__moveconstructor}
+    */
+    basic_json(basic_json&& other) noexcept
+        : m_type(std::move(other.m_type)),
+          m_value(std::move(other.m_value))
+    {
+        // invalidate payload
+        other.m_type = value_t::null;
+        other.m_value = {};
+    }
+
+    /*!
+    @brief copy assignment
+
+    Copy assignment operator. Copies a JSON value via the "copy and swap"
+    strategy: It is expressed in terms of the copy constructor, destructor, and
+    the swap() member function.
+
+    @param[in] other  value to copy from
+
+    @complexity Linear.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is linear.
+
+    @liveexample{The code below shows and example for the copy assignment. It
+    creates a copy of value `a` which is then swapped with `b`. Finally\, the
+    copy of `a` (which is the null value after the swap) is
+    destroyed.,basic_json__copyassignment}
+    */
+    reference& operator=(basic_json other) noexcept (
+        std::is_nothrow_move_constructible<value_t>::value and
+        std::is_nothrow_move_assignable<value_t>::value and
+        std::is_nothrow_move_constructible<json_value>::value and
+        std::is_nothrow_move_assignable<json_value>::value
+    )
+    {
+        using std::swap;
+        swap(m_type, other.m_type);
+        swap(m_value, other.m_value);
+        return *this;
+    }
+
+    /*!
+    @brief destructor
+
+    Destroys the JSON value and frees all allocated memory.
+
+    @complexity Linear.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is linear.
+    - All stored elements are destroyed and all memory is freed.
+    */
+    ~basic_json()
+    {
+        switch (m_type)
+        {
+            case (value_t::object):
+            {
+                AllocatorType<object_t> alloc;
+                alloc.destroy(m_value.object);
+                alloc.deallocate(m_value.object, 1);
+                break;
+            }
+
+            case (value_t::array):
+            {
+                AllocatorType<array_t> alloc;
+                alloc.destroy(m_value.array);
+                alloc.deallocate(m_value.array, 1);
+                break;
+            }
+
+            case (value_t::string):
+            {
+                AllocatorType<string_t> alloc;
+                alloc.destroy(m_value.string);
+                alloc.deallocate(m_value.string, 1);
+                break;
+            }
+
+            default:
+            {
+                // all other types need no specific destructor
+                break;
+            }
+        }
+    }
+
+
+  public:
+    ///////////////////////
+    // object inspection //
+    ///////////////////////
+
+    /// @name object inspection
+    /// @{
+
+    /*!
+    @brief serialization
+
+    Serialization function for JSON values. The function tries to mimick
+    Python's @p json.dumps() function, and currently supports its @p indent
+    parameter.
+
+    @param[in] indent if indent is nonnegative, then array elements and object
+    members will be pretty-printed with that indent level. An indent level of 0
+    will only insert newlines. -1 (the default) selects the most compact
+    representation
+
+    @return string containing the serialization of the JSON value
+
+    @complexity Linear.
+
+    @liveexample{The following example shows the effect of different @a indent
+    parameters to the result of the serializaion.,dump}
+
+    @see https://docs.python.org/2/library/json.html#json.dump
+    */
+    string_t dump(const int indent = -1) const
+    {
+        std::stringstream ss;
+
+        if (indent >= 0)
+        {
+            dump(ss, true, static_cast<unsigned int>(indent));
+        }
+        else
+        {
+            dump(ss, false, 0);
+        }
+
+        return ss.str();
+    }
+
+    /*!
+    @brief return the type of the JSON value (explicit)
+
+    Return the type of the JSON value as a value from the @ref value_t
+    enumeration.
+
+    @return the type of the JSON value
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref type() for all JSON
+    types.,type}
+    */
+    value_t type() const noexcept
+    {
+        return m_type;
+    }
+
+    /*!
+    @brief return whether type is primitive
+
+    This function returns true iff the JSON type is primitive (string, number,
+    boolean, or null).
+
+    @return `true` if type is primitive (string, number, boolean, or null),
+    `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_primitive for all JSON
+    types.,is_primitive}
+    */
+    bool is_primitive() const noexcept
+    {
+        return is_null() or is_string() or is_boolean() or is_number();
+    }
+
+    /*!
+    @brief return whether type is structured
+
+    This function returns true iff the JSON type is structured (array or
+    object).
+
+    @return `true` if type is structured (array or object), `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_structured for all JSON
+    types.,is_structured}
+    */
+    bool is_structured() const noexcept
+    {
+        return is_array() or is_object();
+    }
+
+    /*!
+    @brief return whether value is null
+
+    This function returns true iff the JSON value is null.
+
+    @return `true` if type is null, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_null for all JSON
+    types.,is_null}
+    */
+    bool is_null() const noexcept
+    {
+        return m_type == value_t::null;
+    }
+
+    /*!
+    @brief return whether value is a boolean
+
+    This function returns true iff the JSON value is a boolean.
+
+    @return `true` if type is boolean, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_boolean for all JSON
+    types.,is_boolean}
+    */
+    bool is_boolean() const noexcept
+    {
+        return m_type == value_t::boolean;
+    }
+
+    /*!
+    @brief return whether value is a number
+
+    This function returns true iff the JSON value is a number. This includes
+    both integer and floating-point values.
+
+    @return `true` if type is number, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_number for all JSON
+    types.,is_number}
+    */
+    bool is_number() const noexcept
+    {
+        return is_number_integer() or is_number_float();
+    }
+
+    /*!
+    @brief return whether value is an integer number
+
+    This function returns true iff the JSON value is an integer number. This
+    excludes floating-point values.
+
+    @return `true` if type is an integer number, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_number_integer for all
+    JSON types.,is_number_integer}
+    */
+    bool is_number_integer() const noexcept
+    {
+        return m_type == value_t::number_integer;
+    }
+
+    /*!
+    @brief return whether value is a floating-point number
+
+    This function returns true iff the JSON value is a floating-point number.
+    This excludes integer values.
+
+    @return `true` if type is a floating-point number, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_number_float for all
+    JSON types.,is_number_float}
+    */
+    bool is_number_float() const noexcept
+    {
+        return m_type == value_t::number_float;
+    }
+
+    /*!
+    @brief return whether value is an object
+
+    This function returns true iff the JSON value is an object.
+
+    @return `true` if type is object, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_object for all JSON
+    types.,is_object}
+    */
+    bool is_object() const noexcept
+    {
+        return m_type == value_t::object;
+    }
+
+    /*!
+    @brief return whether value is an array
+
+    This function returns true iff the JSON value is an array.
+
+    @return `true` if type is array, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_array for all JSON
+    types.,is_array}
+    */
+    bool is_array() const noexcept
+    {
+        return m_type == value_t::array;
+    }
+
+    /*!
+    @brief return whether value is a string
+
+    This function returns true iff the JSON value is a string.
+
+    @return `true` if type is string, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_string for all JSON
+    types.,is_string}
+    */
+    bool is_string() const noexcept
+    {
+        return m_type == value_t::string;
+    }
+
+    /*!
+    @brief return whether value is discarded
+
+    This function returns true iff the JSON value was discarded during parsing
+    with a callback function (see @ref parser_callback_t).
+
+    @note This function will always be `false` for JSON values after parsing.
+    That is, discarded values can only occur during parsing, but will be
+    removed when inside a structured value or replaced by null in other cases.
+
+    @return `true` if type is discarded, `false` otherwise.
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies @ref is_discarded for all JSON
+    types.,is_discarded}
+    */
+    bool is_discarded() const noexcept
+    {
+        return m_type == value_t::discarded;
+    }
+
+    /*!
+    @brief return the type of the JSON value (implicit)
+
+    Implicitly return the type of the JSON value as a value from the @ref
+    value_t enumeration.
+
+    @return the type of the JSON value
+
+    @complexity Constant.
+
+    @liveexample{The following code exemplifies the value_t operator for all
+    JSON types.,operator__value_t}
+    */
+    operator value_t() const noexcept
+    {
+        return m_type;
+    }
+
+    /// @}
+
+  private:
+    //////////////////
+    // value access //
+    //////////////////
+
+    /// get an object (explicit)
+    template <class T, typename
+              std::enable_if<
+                  std::is_convertible<typename object_t::key_type, typename T::key_type>::value and
+                  std::is_convertible<basic_json_t, typename T::mapped_type>::value
+                  , int>::type = 0>
+    T get_impl(T*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::object):
+            {
+                return T(m_value.object->begin(), m_value.object->end());
+            }
+            default:
+            {
+                throw std::domain_error("type must be object, but is " + type_name());
+            }
+        }
+    }
+
+    /// get an object (explicit)
+    object_t get_impl(object_t*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::object):
+            {
+                return *(m_value.object);
+            }
+            default:
+            {
+                throw std::domain_error("type must be object, but is " + type_name());
+            }
+        }
+    }
+
+    /// get an array (explicit)
+    template <class T, typename
+              std::enable_if<
+                  std::is_convertible<basic_json_t, typename T::value_type>::value and
+                  not std::is_same<basic_json_t, typename T::value_type>::value and
+                  not std::is_arithmetic<T>::value and
+                  not std::is_convertible<std::string, T>::value and
+                  not has_mapped_type<T>::value
+                  , int>::type = 0>
+    T get_impl(T*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::array):
+            {
+                T to_vector;
+                std::transform(m_value.array->begin(), m_value.array->end(),
+                               std::inserter(to_vector, to_vector.end()), [](basic_json i)
+                {
+                    return i.get<typename T::value_type>();
+                });
+                return to_vector;
+            }
+            default:
+            {
+                throw std::domain_error("type must be array, but is " + type_name());
+            }
+        }
+    }
+
+    /// get an array (explicit)
+    template <class T, typename
+              std::enable_if<
+                  std::is_convertible<basic_json_t, T>::value and
+                  not std::is_same<basic_json_t, T>::value
+                  , int>::type = 0>
+    std::vector<T> get_impl(std::vector<T>*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::array):
+            {
+                std::vector<T> to_vector;
+                to_vector.reserve(m_value.array->size());
+                std::transform(m_value.array->begin(), m_value.array->end(),
+                               std::inserter(to_vector, to_vector.end()), [](basic_json i)
+                {
+                    return i.get<T>();
+                });
+                return to_vector;
+            }
+            default:
+            {
+                throw std::domain_error("type must be array, but is " + type_name());
+            }
+        }
+    }
+
+    /// get an array (explicit)
+    template <class T, typename
+              std::enable_if<
+                  std::is_same<basic_json, typename T::value_type>::value and
+                  not has_mapped_type<T>::value
+                  , int>::type = 0>
+    T get_impl(T*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::array):
+            {
+                return T(m_value.array->begin(), m_value.array->end());
+            }
+            default:
+            {
+                throw std::domain_error("type must be array, but is " + type_name());
+            }
+        }
+    }
+
+    /// get an array (explicit)
+    array_t get_impl(array_t*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::array):
+            {
+                return *(m_value.array);
+            }
+            default:
+            {
+                throw std::domain_error("type must be array, but is " + type_name());
+            }
+        }
+    }
+
+    /// get a string (explicit)
+    template <typename T, typename
+              std::enable_if<
+                  std::is_convertible<string_t, T>::value
+                  , int>::type = 0>
+    T get_impl(T*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::string):
+            {
+                return *m_value.string;
+            }
+            default:
+            {
+                throw std::domain_error("type must be string, but is " + type_name());
+            }
+        }
+    }
+
+    /// get a number (explicit)
+    template<typename T, typename
+             std::enable_if<
+                 std::is_arithmetic<T>::value
+                 , int>::type = 0>
+    T get_impl(T*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::number_integer):
+            {
+                return static_cast<T>(m_value.number_integer);
+            }
+            case (value_t::number_float):
+            {
+                return static_cast<T>(m_value.number_float);
+            }
+            default:
+            {
+                throw std::domain_error("type must be number, but is " + type_name());
+            }
+        }
+    }
+
+    /// get a boolean (explicit)
+    boolean_t get_impl(boolean_t*) const
+    {
+        switch (m_type)
+        {
+            case (value_t::boolean):
+            {
+                return m_value.boolean;
+            }
+            default:
+            {
+                throw std::domain_error("type must be boolean, but is " + type_name());
+            }
+        }
+    }
+
+    /// get a pointer to the value (object)
+    object_t* get_impl_ptr(object_t*) noexcept
+    {
+        return is_object() ? m_value.object : nullptr;
+    }
+
+    /// get a pointer to the value (object)
+    const object_t* get_impl_ptr(const object_t*) const noexcept
+    {
+        return is_object() ? m_value.object : nullptr;
+    }
+
+    /// get a pointer to the value (array)
+    array_t* get_impl_ptr(array_t*) noexcept
+    {
+        return is_array() ? m_value.array : nullptr;
+    }
+
+    /// get a pointer to the value (array)
+    const array_t* get_impl_ptr(const array_t*) const noexcept
+    {
+        return is_array() ? m_value.array : nullptr;
+    }
+
+    /// get a pointer to the value (string)
+    string_t* get_impl_ptr(string_t*) noexcept
+    {
+        return is_string() ? m_value.string : nullptr;
+    }
+
+    /// get a pointer to the value (string)
+    const string_t* get_impl_ptr(const string_t*) const noexcept
+    {
+        return is_string() ? m_value.string : nullptr;
+    }
+
+    /// get a pointer to the value (boolean)
+    boolean_t* get_impl_ptr(boolean_t*) noexcept
+    {
+        return is_boolean() ? &m_value.boolean : nullptr;
+    }
+
+    /// get a pointer to the value (boolean)
+    const boolean_t* get_impl_ptr(const boolean_t*) const noexcept
+    {
+        return is_boolean() ? &m_value.boolean : nullptr;
+    }
+
+    /// get a pointer to the value (integer number)
+    number_integer_t* get_impl_ptr(number_integer_t*) noexcept
+    {
+        return is_number_integer() ? &m_value.number_integer : nullptr;
+    }
+
+    /// get a pointer to the value (integer number)
+    const number_integer_t* get_impl_ptr(const number_integer_t*) const noexcept
+    {
+        return is_number_integer() ? &m_value.number_integer : nullptr;
+    }
+
+    /// get a pointer to the value (floating-point number)
+    number_float_t* get_impl_ptr(number_float_t*) noexcept
+    {
+        return is_number_float() ? &m_value.number_float : nullptr;
+    }
+
+    /// get a pointer to the value (floating-point number)
+    const number_float_t* get_impl_ptr(const number_float_t*) const noexcept
+    {
+        return is_number_float() ? &m_value.number_float : nullptr;
+    }
+
+  public:
+
+    /// @name value access
+    /// @{
+
+    /*!
+    @brief get a value (explicit)
+
+    Explicit type conversion between the JSON value and a compatible value.
+
+    @tparam ValueType non-pointer type compatible to the JSON value, for
+    instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+    `std::vector` types for JSON arrays
+
+    @return copy of the JSON value, converted to type @a ValueType
+
+    @throw std::domain_error in case passed type @a ValueType is incompatible
+    to JSON
+
+    @complexity Linear in the size of the JSON value.
+
+    @liveexample{The example below shows serveral conversions from JSON values
+    to other types. There a few things to note: (1) Floating-point numbers can
+    be converted to integers\, (2) A JSON array can be converted to a standard
+    `std::vector<short>`\, (3) A JSON object can be converted to C++
+    assiciative containers such as `std::unordered_map<std::string\,
+    json>`.,get__ValueType_const}
+
+    @internal
+    The idea of using a casted null pointer to choose the correct
+    implementation is from <http://stackoverflow.com/a/8315197/266378>.
+    @endinternal
+
+    @sa @ref operator ValueType() const for implicit conversion
+    @sa @ref get() for pointer-member access
+    */
+    template<typename ValueType, typename
+             std::enable_if<
+                 not std::is_pointer<ValueType>::value
+                 , int>::type = 0>
+    ValueType get() const
+    {
+        return get_impl(static_cast<ValueType*>(nullptr));
+    }
+
+    /*!
+    @brief get a pointer value (explicit)
+
+    Explicit pointer access to the internally stored JSON value. No copies are
+    made.
+
+    @warning Writing data to the pointee of the result yields an undefined
+    state.
+
+    @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+    object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or @ref
+    number_float_t.
+
+    @return pointer to the internally stored JSON value if the requested pointer
+    type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how pointers to internal values of a
+    JSON value can be requested. Note that no type conversions are made and a
+    `nullptr` is returned if the value and the requested pointer type does not
+    match.,get__PointerType}
+
+    @sa @ref get_ptr() for explicit pointer-member access
+    */
+    template<typename PointerType, typename
+             std::enable_if<
+                 std::is_pointer<PointerType>::value
+                 , int>::type = 0>
+    PointerType get() noexcept
+    {
+        // delegate the call to get_ptr
+        return get_ptr<PointerType>();
+    }
+
+    /*!
+    @brief get a pointer value (explicit)
+    @copydoc get()
+    */
+    template<typename PointerType, typename
+             std::enable_if<
+                 std::is_pointer<PointerType>::value
+                 , int>::type = 0>
+    const PointerType get() const noexcept
+    {
+        // delegate the call to get_ptr
+        return get_ptr<PointerType>();
+    }
+
+    /*!
+    @brief get a pointer value (implicit)
+
+    Implict pointer access to the internally stored JSON value. No copies are
+    made.
+
+    @warning Writing data to the pointee of the result yields an undefined
+    state.
+
+    @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+    object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or @ref
+    number_float_t.
+
+    @return pointer to the internally stored JSON value if the requested pointer
+    type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how pointers to internal values of a
+    JSON value can be requested. Note that no type conversions are made and a
+    `nullptr` is returned if the value and the requested pointer type does not
+    match.,get_ptr}
+    */
+    template<typename PointerType, typename
+             std::enable_if<
+                 std::is_pointer<PointerType>::value
+                 , int>::type = 0>
+    PointerType get_ptr() noexcept
+    {
+        // delegate the call to get_impl_ptr<>()
+        return get_impl_ptr(static_cast<PointerType>(nullptr));
+    }
+
+    /*!
+    @brief get a pointer value (implicit)
+    @copydoc get_ptr()
+    */
+    template<typename PointerType, typename
+             std::enable_if<
+                 std::is_pointer<PointerType>::value
+                 and std::is_const<PointerType>::value
+                 , int>::type = 0>
+    const PointerType get_ptr() const noexcept
+    {
+        // delegate the call to get_impl_ptr<>() const
+        return get_impl_ptr(static_cast<const PointerType>(nullptr));
+    }
+
+    /*!
+    @brief get a value (implicit)
+
+    Implict type conversion between the JSON value and a compatible value. The
+    call is realized by calling @ref get() const.
+
+    @tparam ValueType non-pointer type compatible to the JSON value, for
+    instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+    `std::vector` types for JSON arrays
+
+    @return copy of the JSON value, converted to type @a ValueType
+
+    @throw std::domain_error in case passed type @a ValueType is incompatible
+    to JSON, thrown by @ref get() const
+
+    @complexity Linear in the size of the JSON value.
+
+    @liveexample{The example below shows serveral conversions from JSON values
+    to other types. There a few things to note: (1) Floating-point numbers can
+    be converted to integers\, (2) A JSON array can be converted to a standard
+    `std::vector<short>`\, (3) A JSON object can be converted to C++
+    assiciative containers such as `std::unordered_map<std::string\,
+    json>`.,operator__ValueType}
+    */
+    template<typename ValueType, typename
+             std::enable_if<
+                 not std::is_pointer<ValueType>::value
+                 , int>::type = 0>
+    operator ValueType() const
+    {
+        // delegate the call to get<>() const
+        return get<ValueType>();
+    }
+
+    /// @}
+
+
+    ////////////////////
+    // element access //
+    ////////////////////
+
+    /// @name element access
+    /// @{
+
+    /*!
+    @brief access specified array element with bounds checking
+
+    Returns a reference to the element at specified location @a idx, with
+    bounds checking.
+
+    @param[in] idx  index of the element to access
+
+    @return reference to the element at index @a idx
+
+    @throw std::domain_error if JSON is not an array
+    @throw std::out_of_range if the index @a idx is out of range of the array;
+    that is, `idx >= size()`
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how array elements can be read and
+    written using at.,at__size_type}
+    */
+    reference at(size_type idx)
+    {
+        // at only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use at() with " + type_name());
+        }
+
+        return m_value.array->at(idx);
+    }
+
+    /*!
+    @brief access specified array element with bounds checking
+
+    Returns a const reference to the element at specified location @a idx, with
+    bounds checking.
+
+    @param[in] idx  index of the element to access
+
+    @return const reference to the element at index @a idx
+
+    @throw std::domain_error if JSON is not an array
+    @throw std::out_of_range if the index @a idx is out of range of the array;
+    that is, `idx >= size()`
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how array elements can be read using
+    at.,at__size_type_const}
+    */
+    const_reference at(size_type idx) const
+    {
+        // at only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use at() with " + type_name());
+        }
+
+        return m_value.array->at(idx);
+    }
+
+    /*!
+    @brief access specified object element with bounds checking
+
+    Returns a reference to the element at with specified key @a key, with
+    bounds checking.
+
+    @param[in] key  key of the element to access
+
+    @return reference to the element at key @a key
+
+    @throw std::domain_error if JSON is not an object
+    @throw std::out_of_range if the key @a key is is not stored in the object;
+    that is, `find(key) == end()`
+
+    @complexity Logarithmic in the size of the container.
+
+    @liveexample{The example below shows how object elements can be read and
+    written using at.,at__object_t_key_type}
+    */
+    reference at(const typename object_t::key_type& key)
+    {
+        // at only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use at() with " + type_name());
+        }
+
+        return m_value.object->at(key);
+    }
+
+    /*!
+    @brief access specified object element with bounds checking
+
+    Returns a const reference to the element at with specified key @a key, with
+    bounds checking.
+
+    @param[in] key  key of the element to access
+
+    @return const reference to the element at key @a key
+
+    @throw std::domain_error if JSON is not an object
+    @throw std::out_of_range if the key @a key is is not stored in the object;
+    that is, `find(key) == end()`
+
+    @complexity Logarithmic in the size of the container.
+
+    @liveexample{The example below shows how object elements can be read using
+    at.,at__object_t_key_type_const}
+    */
+    const_reference at(const typename object_t::key_type& key) const
+    {
+        // at only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use at() with " + type_name());
+        }
+
+        return m_value.object->at(key);
+    }
+
+    /*!
+    @brief access specified array element
+
+    Returns a reference to the element at specified location @a idx.
+
+    @note If @a idx is beyond the range of the array (i.e., `idx >= size()`),
+    then the array is silently filled up with `null` values to make `idx` a
+    valid reference to the last stored element.
+
+    @param[in] idx  index of the element to access
+
+    @return reference to the element at index @a idx
+
+    @throw std::domain_error if JSON is not an array or null
+
+    @complexity Constant if @a idx is in the range of the array. Otherwise
+    linear in `idx - size()`.
+
+    @liveexample{The example below shows how array elements can be read and
+    written using [] operator. Note the addition of `null`
+    values.,operatorarray__size_type}
+    */
+    reference operator[](size_type idx)
+    {
+        // implicitly convert null to object
+        if (m_type == value_t::null)
+        {
+            m_type = value_t::array;
+            m_value.array = create<array_t>();
+        }
+
+        // [] only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use operator[] with " + type_name());
+        }
+
+        for (size_t i = m_value.array->size(); i <= idx; ++i)
+        {
+            m_value.array->push_back(basic_json());
+        }
+
+        return m_value.array->operator[](idx);
+    }
+
+    /*!
+    @brief access specified array element
+
+    Returns a const reference to the element at specified location @a idx.
+
+    @param[in] idx  index of the element to access
+
+    @return const reference to the element at index @a idx
+
+    @throw std::domain_error if JSON is not an array
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how array elements can be read using
+    the [] operator.,operatorarray__size_type_const}
+    */
+    const_reference operator[](size_type idx) const
+    {
+        // at only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use operator[] with " + type_name());
+        }
+
+        return m_value.array->operator[](idx);
+    }
+
+    /*!
+    @brief access specified object element
+
+    Returns a reference to the element at with specified key @a key.
+
+    @note If @a key is not found in the object, then it is silently added to
+    the object and filled with a `null` value to make `key` a valid reference.
+    In case the value was `null` before, it is converted to an object.
+
+    @param[in] key  key of the element to access
+
+    @return reference to the element at key @a key
+
+    @throw std::domain_error if JSON is not an object or null
+
+    @complexity Logarithmic in the size of the container.
+
+    @liveexample{The example below shows how object elements can be read and
+    written using the [] operator.,operatorarray__key_type}
+    */
+    reference operator[](const typename object_t::key_type& key)
+    {
+        // implicitly convert null to object
+        if (m_type == value_t::null)
+        {
+            m_type = value_t::object;
+            m_value.object = create<object_t>();
+        }
+
+        // [] only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use operator[] with " + type_name());
+        }
+
+        return m_value.object->operator[](key);
+    }
+
+    /*!
+    @brief access specified object element
+
+    Returns a reference to the element at with specified key @a key.
+
+    @param[in] key  key of the element to access
+
+    @return reference to the element at key @a key
+
+    @throw std::domain_error if JSON is not an object or null
+
+    @complexity Logarithmic in the size of the container.
+
+    @liveexample{The example below shows how object elements can be read using
+    the [] operator.,operatorarray__key_type_const}
+    */
+    const_reference operator[](const typename object_t::key_type& key) const
+    {
+        // at only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use operator[] with " + type_name());
+        }
+
+        return m_value.object->operator[](key);
+    }
+
+    /*!
+    @brief access specified object element
+
+    Returns a reference to the element at with specified key @a key.
+
+    @note If @a key is not found in the object, then it is silently added to
+    the object and filled with a `null` value to make `key` a valid reference.
+    In case the value was `null` before, it is converted to an object.
+
+    @note This function is required for compatibility reasons with Clang.
+
+    @param[in] key  key of the element to access
+
+    @return reference to the element at key @a key
+
+    @throw std::domain_error if JSON is not an object or null
+
+    @complexity Logarithmic in the size of the container.
+
+    @liveexample{The example below shows how object elements can be read and
+    written using the [] operator.,operatorarray__key_type}
+    */
+    template<typename T, std::size_t n>
+    reference operator[](const T (&key)[n])
+    {
+        // implicitly convert null to object
+        if (m_type == value_t::null)
+        {
+            m_type = value_t::object;
+            m_value = value_t::object;
+        }
+
+        // at only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use operator[] with " + type_name());
+        }
+
+        return m_value.object->operator[](key);
+    }
+
+    /*!
+    @brief access specified object element
+
+    Returns a reference to the element at with specified key @a key.
+
+    @note This function is required for compatibility reasons with Clang.
+
+    @param[in] key  key of the element to access
+
+    @return reference to the element at key @a key
+
+    @throw std::domain_error if JSON is not an object or null
+
+    @complexity Logarithmic in the size of the container.
+
+    @liveexample{The example below shows how object elements can be read using
+    the [] operator.,operatorarray__key_type_const}
+    */
+    template<typename T, std::size_t n>
+    const_reference operator[](const T (&key)[n]) const
+    {
+        // at only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use operator[] with " + type_name());
+        }
+
+        return m_value.object->operator[](key);
+    }
+
+    /*!
+    @brief access the first element
+
+    Returns a reference to the first element in the container. For a JSON
+    container `c`, the expression `c.front()` is equivalent to `*c.begin()`.
+
+    @return In case of a structured type (array or object), a reference to the
+    first element is returned. In cast of number, string, or boolean values, a
+    reference to the value is returned.
+
+    @complexity Constant.
+
+    @note Calling `front` on an empty container is undefined.
+
+    @throw std::out_of_range when called on null value
+
+    @liveexample{The following code shows an example for @ref front.,front}
+    */
+    reference front()
+    {
+        return *begin();
+    }
+
+    /*!
+    @copydoc basic_json::front()
+    */
+    const_reference front() const
+    {
+        return *cbegin();
+    }
+
+    /*!
+    @brief access the last element
+
+    Returns a reference to the last element in the container. For a JSON
+    container `c`, the expression `c.back()` is equivalent to `{ auto tmp =
+    c.end(); --tmp; return *tmp; }`.
+
+    @return In case of a structured type (array or object), a reference to the
+    last element is returned. In cast of number, string, or boolean values, a
+    reference to the value is returned.
+
+    @complexity Constant.
+
+    @note Calling `back` on an empty container is undefined.
+
+    @throw std::out_of_range when called on null value.
+
+    @liveexample{The following code shows an example for @ref back.,back}
+    */
+    reference back()
+    {
+        auto tmp = end();
+        --tmp;
+        return *tmp;
+    }
+
+    /*!
+    @copydoc basic_json::back()
+    */
+    const_reference back() const
+    {
+        auto tmp = cend();
+        --tmp;
+        return *tmp;
+    }
+
+    /*!
+    @brief remove element given an iterator
+
+    Removes the element specified by iterator @a pos. Invalidates iterators and
+    references at or after the point of the erase, including the end()
+    iterator. The iterator @a pos must be valid and dereferenceable. Thus the
+    end() iterator (which is valid, but is not dereferencable) cannot be used
+    as a value for @a pos.
+
+    If called on a primitive type other than null, the resulting JSON value
+    will be `null`.
+
+    @param[in] pos iterator to the element to remove
+    @return Iterator following the last removed element. If the iterator @a pos
+    refers to the last element, the end() iterator is returned.
+
+    @tparam InteratorType an @ref iterator or @ref const_iterator
+
+    @throw std::domain_error if called on a `null` value
+    @throw std::domain_error if called on an iterator which does not belong to
+    the current JSON value
+    @throw std::out_of_range if called on a primitive type with invalid iterator
+    (i.e., any iterator which is not end())
+
+    @complexity The complexity depends on the type:
+    - objects: amortized constant
+    - arrays: linear in distance between pos and the end of the container
+    - strings: linear in the length of the string
+    - other types: constant
+
+    @liveexample{The example shows the result of erase for different JSON
+    types.,erase__IteratorType}
+    */
+    template <class InteratorType, typename
+              std::enable_if<
+                  std::is_same<InteratorType, typename basic_json_t::iterator>::value or
+                  std::is_same<InteratorType, typename basic_json_t::const_iterator>::value
+                  , int>::type
+              = 0>
+    InteratorType erase(InteratorType pos)
+    {
+        // make sure iterator fits the current value
+        if (this != pos.m_object)
+        {
+            throw std::domain_error("iterator does not fit current value");
+        }
+
+        InteratorType result = end();
+
+        switch (m_type)
+        {
+            case value_t::number_integer:
+            case value_t::number_float:
+            case value_t::boolean:
+            case value_t::string:
+            {
+                if (not pos.m_it.primitive_iterator.is_begin())
+                {
+                    throw std::out_of_range("iterator out of range");
+                }
+
+                if (m_type == value_t::string)
+                {
+                    delete m_value.string;
+                    m_value.string = nullptr;
+                }
+
+                m_type = value_t::null;
+                break;
+            }
+
+            case value_t::object:
+            {
+                result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator);
+                break;
+            }
+
+            case value_t::array:
+            {
+                result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator);
+                break;
+            }
+
+            default:
+            {
+                throw std::domain_error("cannot use erase() with " + type_name());
+            }
+        }
+
+        return result;
+    }
+
+    /*!
+    @brief remove elements given an iterator range
+
+    Removes the element specified by the range `[first; last)`. Invalidates
+    iterators and references at or after the point of the erase, including the
+    end() iterator. The iterator @a first does not need to be dereferenceable
+    if `first == last`: erasing an empty range is a no-op.
+
+    If called on a primitive type other than null, the resulting JSON value
+    will be `null`.
+
+    @param[in] first iterator to the beginning of the range to remove
+    @param[in] last iterator past the end of the range to remove
+    @return Iterator following the last removed element. If the iterator @a
+    second refers to the last element, the end() iterator is returned.
+
+    @tparam InteratorType an @ref iterator or @ref const_iterator
+
+    @throw std::domain_error if called on a `null` value
+    @throw std::domain_error if called on iterators which does not belong to
+    the current JSON value
+    @throw std::out_of_range if called on a primitive type with invalid iterators
+    (i.e., if `first != begin()` and `last != end()`)
+
+    @complexity The complexity depends on the type:
+    - objects: `log(size()) + std::distance(first, last)`
+    - arrays: linear in the distance between @a first and @a last, plus linear
+      in the distance between @a last and end of the container
+    - strings: linear in the length of the string
+    - other types: constant
+
+    @liveexample{The example shows the result of erase for different JSON
+    types.,erase__IteratorType_IteratorType}
+    */
+    template <class InteratorType, typename
+              std::enable_if<
+                  std::is_same<InteratorType, typename basic_json_t::iterator>::value or
+                  std::is_same<InteratorType, typename basic_json_t::const_iterator>::value
+                  , int>::type
+              = 0>
+    InteratorType erase(InteratorType first, InteratorType last)
+    {
+        // make sure iterator fits the current value
+        if (this != first.m_object or this != last.m_object)
+        {
+            throw std::domain_error("iterators do not fit current value");
+        }
+
+        InteratorType result = end();
+
+        switch (m_type)
+        {
+            case value_t::number_integer:
+            case value_t::number_float:
+            case value_t::boolean:
+            case value_t::string:
+            {
+                if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())
+                {
+                    throw std::out_of_range("iterators out of range");
+                }
+
+                if (m_type == value_t::string)
+                {
+                    delete m_value.string;
+                    m_value.string = nullptr;
+                }
+
+                m_type = value_t::null;
+                break;
+            }
+
+            case value_t::object:
+            {
+                result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator,
+                                              last.m_it.object_iterator);
+                break;
+            }
+
+            case value_t::array:
+            {
+                result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator,
+                                             last.m_it.array_iterator);
+                break;
+            }
+
+            default:
+            {
+                throw std::domain_error("cannot use erase with " + type_name());
+            }
+        }
+
+        return result;
+    }
+
+    /*!
+    @brief remove element from a JSON object given a key
+
+    Removes elements from a JSON object with the key value @a key.
+
+    @param[in] key value of the elements to remove
+
+    @return Number of elements removed. If ObjectType is the default `std::map`
+    type, the return value will always be `0` (@a key was not found) or `1` (@a
+    key was found).
+
+    @throw std::domain_error when called on a type other than JSON object
+
+    @complexity `log(size()) + count(key)`
+
+    @liveexample{The example shows the effect of erase.,erase__key_type}
+    */
+    size_type erase(const typename object_t::key_type& key)
+    {
+        // this erase only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use erase() with " + type_name());
+        }
+
+        return m_value.object->erase(key);
+    }
+
+    /*!
+    @brief remove element from a JSON array given an index
+
+    Removes element from a JSON array at the index @a idx.
+
+    @param[in] idx index of the element to remove
+
+    @throw std::domain_error when called on a type other than JSON array
+    @throw std::out_of_range when `idx >= size()`
+
+    @complexity Linear in distance between @a idx and the end of the container.
+
+    @liveexample{The example shows the effect of erase.,erase__size_type}
+    */
+    void erase(const size_type idx)
+    {
+        // this erase only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use erase() with " + type_name());
+        }
+
+        if (idx >= size())
+        {
+            throw std::out_of_range("index out of range");
+        }
+
+        m_value.array->erase(m_value.array->begin() + static_cast<difference_type>(idx));
+    }
+
+    /*!
+    @brief find an element in a JSON object
+
+    Finds an element in a JSON object with key equivalent to @a key. If the
+    element is not found or the JSON value is not an object, end() is returned.
+
+    @param[in] key key value of the element to search for
+
+    @return Iterator to an element with key equivalent to @a key. If no such
+    element is found, past-the-end (see end()) iterator is returned.
+
+    @complexity Logarithmic in the size of the JSON object.
+
+    @liveexample{The example shows how find is used.,find__key_type}
+    */
+    iterator find(typename object_t::key_type key)
+    {
+        auto result = end();
+
+        if (m_type == value_t::object)
+        {
+            result.m_it.object_iterator = m_value.object->find(key);
+        }
+
+        return result;
+    }
+
+    /*!
+    @brief find an element in a JSON object
+    @copydoc find(typename object_t::key_type)
+    */
+    const_iterator find(typename object_t::key_type key) const
+    {
+        auto result = cend();
+
+        if (m_type == value_t::object)
+        {
+            result.m_it.object_iterator = m_value.object->find(key);
+        }
+
+        return result;
+    }
+
+    /*!
+    @brief returns the number of occurrences of a key in a JSON object
+
+    Returns the number of elements with key @a key. If ObjectType is the
+    default `std::map` type, the return value will always be `0` (@a key was
+    not found) or `1` (@a key was found).
+
+    @param[in] key key value of the element to count
+
+    @return Number of elements with key @a key. If the JSON value is not an
+    object, the return value will be `0`.
+
+    @complexity Logarithmic in the size of the JSON object.
+
+    @liveexample{The example shows how count is used.,count}
+    */
+    size_type count(typename object_t::key_type key) const
+    {
+        // return 0 for all nonobject types
+        return (m_type == value_t::object) ? m_value.object->count(key) : 0;
+    }
+
+    /// @}
+
+
+    ///////////////
+    // iterators //
+    ///////////////
+
+    /// @name iterators
+    /// @{
+
+    /*!
+    @brief returns an iterator to the first element
+
+    Returns an iterator to the first element.
+
+    @image html range-begin-end.svg "Illustration from cppreference.com"
+
+    @return iterator to the first element
+
+    @complexity Constant.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+
+    @liveexample{The following code shows an example for @ref begin.,begin}
+    */
+    iterator begin()
+    {
+        iterator result(this);
+        result.set_begin();
+        return result;
+    }
+
+    /*!
+    @copydoc basic_json::cbegin()
+    */
+    const_iterator begin() const
+    {
+        return cbegin();
+    }
+
+    /*!
+    @brief returns a const iterator to the first element
+
+    Returns a const iterator to the first element.
+
+    @image html range-begin-end.svg "Illustration from cppreference.com"
+
+    @return const iterator to the first element
+
+    @complexity Constant.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+    - Has the semantics of `const_cast<const basic_json&>(*this).begin()`.
+
+    @liveexample{The following code shows an example for @ref cbegin.,cbegin}
+    */
+    const_iterator cbegin() const
+    {
+        const_iterator result(this);
+        result.set_begin();
+        return result;
+    }
+
+    /*!
+    @brief returns an iterator to one past the last element
+
+    Returns an iterator to one past the last element.
+
+    @image html range-begin-end.svg "Illustration from cppreference.com"
+
+    @return iterator one past the last element
+
+    @complexity Constant.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+
+    @liveexample{The following code shows an example for @ref end.,end}
+    */
+    iterator end()
+    {
+        iterator result(this);
+        result.set_end();
+        return result;
+    }
+
+    /*!
+    @copydoc basic_json::cend()
+    */
+    const_iterator end() const
+    {
+        return cend();
+    }
+
+    /*!
+    @brief returns a const iterator to one past the last element
+
+    Returns a const iterator to one past the last element.
+
+    @image html range-begin-end.svg "Illustration from cppreference.com"
+
+    @return const iterator one past the last element
+
+    @complexity Constant.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+    - Has the semantics of `const_cast<const basic_json&>(*this).end()`.
+
+    @liveexample{The following code shows an example for @ref cend.,cend}
+    */
+    const_iterator cend() const
+    {
+        const_iterator result(this);
+        result.set_end();
+        return result;
+    }
+
+    /*!
+    @brief returns an iterator to the reverse-beginning
+
+    Returns an iterator to the reverse-beginning; that is, the last element.
+
+    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+    @complexity Constant.
+
+    @requirement This function satisfies the ReversibleContainer requirements:
+    - The complexity is constant.
+    - Has the semantics of `reverse_iterator(end())`.
+
+    @liveexample{The following code shows an example for @ref rbegin.,rbegin}
+    */
+    reverse_iterator rbegin()
+    {
+        return reverse_iterator(end());
+    }
+
+    /*!
+    @copydoc basic_json::crbegin()
+    */
+    const_reverse_iterator rbegin() const
+    {
+        return crbegin();
+    }
+
+    /*!
+    @brief returns an iterator to the reverse-end
+
+    Returns an iterator to the reverse-end; that is, one before the first
+    element.
+
+    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+    @complexity Constant.
+
+    @requirement This function satisfies the ReversibleContainer requirements:
+    - The complexity is constant.
+    - Has the semantics of `reverse_iterator(begin())`.
+
+    @liveexample{The following code shows an example for @ref rend.,rend}
+    */
+    reverse_iterator rend()
+    {
+        return reverse_iterator(begin());
+    }
+
+    /*!
+    @copydoc basic_json::crend()
+    */
+    const_reverse_iterator rend() const
+    {
+        return crend();
+    }
+
+    /*!
+    @brief returns a const reverse iterator to the last element
+
+    Returns a const iterator to the reverse-beginning; that is, the last
+    element.
+
+    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+    @complexity Constant.
+
+    @requirement This function satisfies the ReversibleContainer requirements:
+    - The complexity is constant.
+    - Has the semantics of `const_cast<const basic_json&>(*this).rbegin()`.
+
+    @liveexample{The following code shows an example for @ref crbegin.,crbegin}
+    */
+    const_reverse_iterator crbegin() const
+    {
+        return const_reverse_iterator(cend());
+    }
+
+    /*!
+    @brief returns a const reverse iterator to one before the first
+
+    Returns a const reverse iterator to the reverse-end; that is, one before
+    the first element.
+
+    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+    @complexity Constant.
+
+    @requirement This function satisfies the ReversibleContainer requirements:
+    - The complexity is constant.
+    - Has the semantics of `const_cast<const basic_json&>(*this).rend()`.
+
+    @liveexample{The following code shows an example for @ref crend.,crend}
+    */
+    const_reverse_iterator crend() const
+    {
+        return const_reverse_iterator(cbegin());
+    }
+
+    /// @}
+
+
+    //////////////
+    // capacity //
+    //////////////
+
+    /// @name capacity
+    /// @{
+
+    /*!
+    @brief checks whether the container is empty
+
+    Checks if a JSON value has no elements.
+
+    @return The return value depends on the different types and is
+            defined as follows:
+            Value type  | return value
+            ----------- | -------------
+            null        | @c true
+            boolean     | @c false
+            string      | @c false
+            number      | @c false
+            object      | result of function object_t::empty()
+            array       | result of function array_t::empty()
+
+    @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+                Container concept; that is, their empty() functions have
+                constant complexity.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+    - Has the semantics of `begin() == end()`.
+
+    @liveexample{The following code uses @ref empty to check if a @ref json
+    object contains any elements.,empty}
+    */
+    bool empty() const noexcept
+    {
+        switch (m_type)
+        {
+            case (value_t::null):
+            {
+                return true;
+            }
+
+            case (value_t::array):
+            {
+                return m_value.array->empty();
+            }
+
+            case (value_t::object):
+            {
+                return m_value.object->empty();
+            }
+
+            default:
+            {
+                // all other types are nonempty
+                return false;
+            }
+        }
+    }
+
+    /*!
+    @brief returns the number of elements
+
+    Returns the number of elements in a JSON value.
+
+    @return The return value depends on the different types and is
+            defined as follows:
+            Value type  | return value
+            ----------- | -------------
+            null        | @c 0
+            boolean     | @c 1
+            string      | @c 1
+            number      | @c 1
+            object      | result of function object_t::size()
+            array       | result of function array_t::size()
+
+    @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+                Container concept; that is, their size() functions have
+                constant complexity.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+    - Has the semantics of `std::distance(begin(), end())`.
+
+    @liveexample{The following code calls @ref size on the different value
+    types.,size}
+    */
+    size_type size() const noexcept
+    {
+        switch (m_type)
+        {
+            case (value_t::null):
+            {
+                return 0;
+            }
+
+            case (value_t::array):
+            {
+                return m_value.array->size();
+            }
+
+            case (value_t::object):
+            {
+                return m_value.object->size();
+            }
+
+            default:
+            {
+                // all other types have size 1
+                return 1;
+            }
+        }
+    }
+
+    /*!
+    @brief returns the maximum possible number of elements
+
+    Returns the maximum number of elements a JSON value is able to hold due to
+    system or library implementation limitations, i.e. `std::distance(begin(),
+    end())` for the JSON value.
+
+    @return The return value depends on the different types and is
+            defined as follows:
+            Value type  | return value
+            ----------- | -------------
+            null        | @c 0 (same as size())
+            boolean     | @c 1 (same as size())
+            string      | @c 1 (same as size())
+            number      | @c 1 (same as size())
+            object      | result of function object_t::max_size()
+            array       | result of function array_t::max_size()
+
+    @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+                Container concept; that is, their max_size() functions have
+                constant complexity.
+
+    @requirement This function satisfies the Container requirements:
+    - The complexity is constant.
+    - Has the semantics of returning `b.size()` where `b` is the largest
+      possible JSON value.
+
+    @liveexample{The following code calls @ref max_size on the different value
+    types. Note the output is implementation specific.,max_size}
+    */
+    size_type max_size() const noexcept
+    {
+        switch (m_type)
+        {
+            case (value_t::array):
+            {
+                return m_value.array->max_size();
+            }
+
+            case (value_t::object):
+            {
+                return m_value.object->max_size();
+            }
+
+            default:
+            {
+                // all other types have max_size() == size()
+                return size();
+            }
+        }
+    }
+
+    /// @}
+
+
+    ///////////////
+    // modifiers //
+    ///////////////
+
+    /// @name modifiers
+    /// @{
+
+    /*!
+    @brief clears the contents
+
+    Clears the content of a JSON value and resets it to the default value as
+    if @ref basic_json(value_t) would have been called:
+
+    Value type  | initial value
+    ----------- | -------------
+    null        | `null`
+    boolean     | `false`
+    string      | `""`
+    number      | `0`
+    object      | `{}`
+    array       | `[]`
+
+    @note Floating-point numbers are set to `0.0` which will be serialized to
+    `0`. The vale type remains @ref number_float_t.
+
+    @complexity Linear in the size of the JSON value.
+
+    @liveexample{The example below shows the effect of @ref clear to different
+    JSON types.,clear}
+    */
+    void clear() noexcept
+    {
+        switch (m_type)
+        {
+            case (value_t::null):
+            case (value_t::discarded):
+            {
+                break;
+            }
+
+            case (value_t::number_integer):
+            {
+                m_value.number_integer = 0;
+                break;
+            }
+
+            case (value_t::number_float):
+            {
+                m_value.number_float = 0.0;
+                break;
+            }
+
+            case (value_t::boolean):
+            {
+                m_value.boolean = false;
+                break;
+            }
+
+            case (value_t::string):
+            {
+                m_value.string->clear();
+                break;
+            }
+
+            case (value_t::array):
+            {
+                m_value.array->clear();
+                break;
+            }
+
+            case (value_t::object):
+            {
+                m_value.object->clear();
+                break;
+            }
+        }
+    }
+
+    /*!
+    @brief add an object to an array
+
+    Appends the given element @a value to the end of the JSON value. If the
+    function is called on a JSON null value, an empty array is created before
+    appending @a value.
+
+    @param value the value to add to the JSON array
+
+    @throw std::domain_error when called on a type other than JSON array or null
+
+    @complexity Amortized constant.
+
+    @liveexample{The example shows how `push_back` and `+=` can be used to add
+    elements to a JSON array. Note how the `null` value was silently converted
+    to a JSON array.,push_back}
+    */
+    void push_back(basic_json&& value)
+    {
+        // push_back only works for null objects or arrays
+        if (not(m_type == value_t::null or m_type == value_t::array))
+        {
+            throw std::domain_error("cannot use push_back() with " + type_name());
+        }
+
+        // transform null object into an array
+        if (m_type == value_t::null)
+        {
+            m_type = value_t::array;
+            m_value = value_t::array;
+        }
+
+        // add element to array (move semantics)
+        m_value.array->push_back(std::move(value));
+        // invalidate object
+        value.m_type = value_t::null;
+    }
+
+    /*!
+    @brief add an object to an array
+    @copydoc push_back(basic_json&&)
+    */
+    reference operator+=(basic_json&& value)
+    {
+        push_back(std::move(value));
+        return *this;
+    }
+
+    /*!
+    @brief add an object to an array
+    @copydoc push_back(basic_json&&)
+    */
+    void push_back(const basic_json& value)
+    {
+        // push_back only works for null objects or arrays
+        if (not(m_type == value_t::null or m_type == value_t::array))
+        {
+            throw std::domain_error("cannot use push_back() with " + type_name());
+        }
+
+        // transform null object into an array
+        if (m_type == value_t::null)
+        {
+            m_type = value_t::array;
+            m_value = value_t::array;
+        }
+
+        // add element to array
+        m_value.array->push_back(value);
+    }
+
+    /*!
+    @brief add an object to an array
+    @copydoc push_back(basic_json&&)
+    */
+    reference operator+=(const basic_json& value)
+    {
+        push_back(value);
+        return *this;
+    }
+
+    /*!
+    @brief add an object to an object
+
+    Inserts the given element @a value to the JSON object. If the function is
+    called on a JSON null value, an empty object is created before inserting @a
+    value.
+
+    @param[in] value the value to add to the JSON object
+
+    @throw std::domain_error when called on a type other than JSON object or
+    null
+
+    @complexity Logarithmic in the size of the container, O(log(`size()`)).
+
+    @liveexample{The example shows how `push_back` and `+=` can be used to add
+    elements to a JSON object. Note how the `null` value was silently converted
+    to a JSON object.,push_back__object_t__value}
+    */
+    void push_back(const typename object_t::value_type& value)
+    {
+        // push_back only works for null objects or objects
+        if (not(m_type == value_t::null or m_type == value_t::object))
+        {
+            throw std::domain_error("cannot use push_back() with " + type_name());
+        }
+
+        // transform null object into an object
+        if (m_type == value_t::null)
+        {
+            m_type = value_t::object;
+            m_value = value_t::object;
+        }
+
+        // add element to array
+        m_value.object->insert(value);
+    }
+
+    /*!
+    @brief add an object to an object
+    @copydoc push_back(const typename object_t::value_type&)
+    */
+    reference operator+=(const typename object_t::value_type& value)
+    {
+        push_back(value);
+        return operator[](value.first);
+    }
+
+    /*!
+    @brief inserts element
+
+    Inserts element @a value before iterator @a pos.
+
+    @param[in] pos iterator before which the content will be inserted; may be
+    the end() iterator
+    @param[in] value element to insert
+    @return iterator pointing to the inserted @a value.
+
+    @throw std::domain_error if called on JSON values other than arrays
+    @throw std::domain_error if @a pos is not an iterator of *this
+
+    @complexity Constant plus linear in the distance between pos and end of the
+    container.
+
+    @liveexample{The example shows how insert is used.,insert}
+    */
+    iterator insert(const_iterator pos, const basic_json& value)
+    {
+        // insert only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use insert() with " + type_name());
+        }
+
+        // check if iterator pos fits to this JSON value
+        if (pos.m_object != this)
+        {
+            throw std::domain_error("iterator does not fit current value");
+        }
+
+        // insert to array and return iterator
+        iterator result(this);
+        result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, value);
+        return result;
+    }
+
+    /*!
+    @brief inserts element
+    @copydoc insert(const_iterator, const basic_json&)
+    */
+    iterator insert(const_iterator pos, basic_json&& value)
+    {
+        return insert(pos, value);
+    }
+
+    /*!
+    @brief inserts elements
+
+    Inserts @a count copies of @a value before iterator @a pos.
+
+    @param[in] pos iterator before which the content will be inserted; may be
+    the end() iterator
+    @param[in] count number of copies of @a value to insert
+    @param[in] value element to insert
+    @return iterator pointing to the first element inserted, or @a pos if
+    `count==0`
+
+    @throw std::domain_error if called on JSON values other than arrays
+    @throw std::domain_error if @a pos is not an iterator of *this
+
+    @complexity Linear in @a count plus linear in the distance between @a pos
+    and end of the container.
+
+    @liveexample{The example shows how insert is used.,insert__count}
+    */
+    iterator insert(const_iterator pos, size_type count, const basic_json& value)
+    {
+        // insert only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use insert() with " + type_name());
+        }
+
+        // check if iterator pos fits to this JSON value
+        if (pos.m_object != this)
+        {
+            throw std::domain_error("iterator does not fit current value");
+        }
+
+        // insert to array and return iterator
+        iterator result(this);
+        result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, count, value);
+        return result;
+    }
+
+    /*!
+    @brief inserts elements
+
+    Inserts elements from range `[first, last)` before iterator @a pos.
+
+    @param[in] pos iterator before which the content will be inserted; may be
+    the end() iterator
+    @param[in] first begin of the range of elements to insert
+    @param[in] last end of the range of elements to insert
+
+    @throw std::domain_error if called on JSON values other than arrays
+    @throw std::domain_error if @a pos is not an iterator of *this
+    @throw std::domain_error if @a first and @a last do not belong to the same
+    JSON value
+    @throw std::domain_error if @a first or @a last are iterators into
+    container for which insert is called
+    @return iterator pointing to the first element inserted, or @a pos if
+    `first==last`
+
+    @complexity Linear in `std::distance(first, last)` plus linear in the
+    distance between @a pos and end of the container.
+
+    @liveexample{The example shows how insert is used.,insert__range}
+    */
+    iterator insert(const_iterator pos, const_iterator first, const_iterator last)
+    {
+        // insert only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use insert() with " + type_name());
+        }
+
+        // check if iterator pos fits to this JSON value
+        if (pos.m_object != this)
+        {
+            throw std::domain_error("iterator does not fit current value");
+        }
+
+        if (first.m_object != last.m_object)
+        {
+            throw std::domain_error("iterators does not fit");
+        }
+
+        if (first.m_object == this or last.m_object == this)
+        {
+            throw std::domain_error("passed iterators may not belong to container");
+        }
+
+        // insert to array and return iterator
+        iterator result(this);
+        result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator,
+                                     first.m_it.array_iterator, last.m_it.array_iterator);
+        return result;
+    }
+
+    /*!
+    @brief inserts elements
+
+    Inserts elements from initializer list @a ilist before iterator @a pos.
+
+    @param[in] pos iterator before which the content will be inserted; may be
+    the end() iterator
+    @param[in] ilist initializer list to insert the values from
+
+    @throw std::domain_error if called on JSON values other than arrays
+    @throw std::domain_error if @a pos is not an iterator of *this
+    @return iterator pointing to the first element inserted, or @a pos if
+    `ilist` is empty
+
+    @complexity Linear in `ilist.size()` plus linear in the distance between @a
+    pos and end of the container.
+
+    @liveexample{The example shows how insert is used.,insert__ilist}
+    */
+    iterator insert(const_iterator pos, std::initializer_list<basic_json> ilist)
+    {
+        // insert only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use insert() with " + type_name());
+        }
+
+        // check if iterator pos fits to this JSON value
+        if (pos.m_object != this)
+        {
+            throw std::domain_error("iterator does not fit current value");
+        }
+
+        // insert to array and return iterator
+        iterator result(this);
+        result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, ilist);
+        return result;
+    }
+
+    /*!
+    @brief exchanges the values
+
+    Exchanges the contents of the JSON value with those of @a other. Does not
+    invoke any move, copy, or swap operations on individual elements. All
+    iterators and references remain valid. The past-the-end iterator is
+    invalidated.
+
+    @param[in,out] other JSON value to exchange the contents with
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how JSON arrays can be
+    swapped.,swap__reference}
+    */
+    void swap(reference other) noexcept (
+        std::is_nothrow_move_constructible<value_t>::value and
+        std::is_nothrow_move_assignable<value_t>::value and
+        std::is_nothrow_move_constructible<json_value>::value and
+        std::is_nothrow_move_assignable<json_value>::value
+    )
+    {
+        std::swap(m_type, other.m_type);
+        std::swap(m_value, other.m_value);
+    }
+
+    /*!
+    @brief exchanges the values
+
+    Exchanges the contents of a JSON array with those of @a other. Does not
+    invoke any move, copy, or swap operations on individual elements. All
+    iterators and references remain valid. The past-the-end iterator is
+    invalidated.
+
+    @param[in,out] other array to exchange the contents with
+
+    @throw std::domain_error when JSON value is not an array
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how JSON values can be
+    swapped.,swap__array_t}
+    */
+    void swap(array_t& other)
+    {
+        // swap only works for arrays
+        if (m_type != value_t::array)
+        {
+            throw std::domain_error("cannot use swap() with " + type_name());
+        }
+
+        // swap arrays
+        std::swap(*(m_value.array), other);
+    }
+
+    /*!
+    @brief exchanges the values
+
+    Exchanges the contents of a JSON object with those of @a other. Does not
+    invoke any move, copy, or swap operations on individual elements. All
+    iterators and references remain valid. The past-the-end iterator is
+    invalidated.
+
+    @param[in,out] other object to exchange the contents with
+
+    @throw std::domain_error when JSON value is not an object
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how JSON values can be
+    swapped.,swap__object_t}
+    */
+    void swap(object_t& other)
+    {
+        // swap only works for objects
+        if (m_type != value_t::object)
+        {
+            throw std::domain_error("cannot use swap() with " + type_name());
+        }
+
+        // swap objects
+        std::swap(*(m_value.object), other);
+    }
+
+    /*!
+    @brief exchanges the values
+
+    Exchanges the contents of a JSON string with those of @a other. Does not
+    invoke any move, copy, or swap operations on individual elements. All
+    iterators and references remain valid. The past-the-end iterator is
+    invalidated.
+
+    @param[in,out] other string to exchange the contents with
+
+    @throw std::domain_error when JSON value is not a string
+
+    @complexity Constant.
+
+    @liveexample{The example below shows how JSON values can be
+    swapped.,swap__string_t}
+    */
+    void swap(string_t& other)
+    {
+        // swap only works for strings
+        if (m_type != value_t::string)
+        {
+            throw std::domain_error("cannot use swap() with " + type_name());
+        }
+
+        // swap strings
+        std::swap(*(m_value.string), other);
+    }
+
+    /// @}
+
+
+    //////////////////////////////////////////
+    // lexicographical comparison operators //
+    //////////////////////////////////////////
+
+    /// @name lexicographical comparison operators
+    /// @{
+
+  private:
+    /*!
+    @brief comparison operator for JSON types
+
+    Returns an ordering that is similar to Python:
+    - order: null < boolean < number < object < array < string
+    - furthermore, each type is not smaller than itself
+    */
+    friend bool operator<(const value_t lhs, const value_t rhs)
+    {
+        static constexpr std::array<uint8_t, 7> order = {{
+                0, // null
+                3, // object
+                4, // array
+                5, // string
+                1, // boolean
+                2, // integer
+                2  // float
+            }
+        };
+
+        // discarded values are not comparable
+        if (lhs == value_t::discarded or rhs == value_t::discarded)
+        {
+            return false;
+        }
+
+        return order[static_cast<std::size_t>(lhs)] < order[static_cast<std::size_t>(rhs)];
+    }
+
+  public:
+    /*!
+    @brief comparison: equal
+
+    Compares two JSON values for equality according to the following rules:
+    - Two JSON values are equal if (1) they are from the same type and (2)
+      their stored values are the same.
+    - Integer and floating-point numbers are automatically converted before
+      comparison. Floating-point numbers are compared indirectly: two
+      floating-point numbers `f1` and `f2` are considered equal if neither
+      `f1 > f2` nor `f2 > f1` holds.
+    - Two JSON null values are equal.
+
+    @param[in] lhs  first JSON value to consider
+    @param[in] rhs  second JSON value to consider
+    @return whether the values @a lhs and @a rhs are equal
+
+    @complexity Linear.
+
+    @liveexample{The example demonstrates comparing several JSON
+    types.,operator__equal}
+    */
+    friend bool operator==(const_reference lhs, const_reference rhs) noexcept
+    {
+        const auto lhs_type = lhs.type();
+        const auto rhs_type = rhs.type();
+
+        if (lhs_type == rhs_type)
+        {
+            switch (lhs_type)
+            {
+                case (value_t::array):
+                    return *lhs.m_value.array == *rhs.m_value.array;
+                case (value_t::object):
+                    return *lhs.m_value.object == *rhs.m_value.object;
+                case (value_t::null):
+                    return true;
+                case (value_t::string):
+                    return *lhs.m_value.string == *rhs.m_value.string;
+                case (value_t::boolean):
+                    return lhs.m_value.boolean == rhs.m_value.boolean;
+                case (value_t::number_integer):
+                    return lhs.m_value.number_integer == rhs.m_value.number_integer;
+                case (value_t::number_float):
+                    return approx(lhs.m_value.number_float, rhs.m_value.number_float);
+                case (value_t::discarded):
+                    return false;
+            }
+        }
+        else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float)
+        {
+            return approx(static_cast<number_float_t>(lhs.m_value.number_integer),
+                          rhs.m_value.number_float);
+        }
+        else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer)
+        {
+            return approx(lhs.m_value.number_float,
+                          static_cast<number_float_t>(rhs.m_value.number_integer));
+        }
+        return false;
+    }
+
+    /*!
+    @brief comparison: equal
+
+    The functions compares the given JSON value against a null pointer. As the
+    null pointer can be used to initialize a JSON value to null, a comparison
+    of JSON value @a v with a null pointer should be equivalent to call
+    `v.is_null()`.
+
+    @param[in] v  JSON value to consider
+    @return whether @a v is null
+
+    @complexity Constant.
+
+    @liveexample{The example compares several JSON types to the null pointer.
+    ,operator__equal__nullptr_t}
+    */
+    friend bool operator==(const_reference v, std::nullptr_t) noexcept
+    {
+        return v.is_null();
+    }
+
+    /*!
+    @brief comparison: equal
+    @copydoc operator==(const_reference, std::nullptr_t)
+    */
+    friend bool operator==(std::nullptr_t, const_reference v) noexcept
+    {
+        return v.is_null();
+    }
+
+    /*!
+    @brief comparison: not equal
+
+    Compares two JSON values for inequality by calculating `not (lhs == rhs)`.
+
+    @param[in] lhs  first JSON value to consider
+    @param[in] rhs  second JSON value to consider
+    @return whether the values @a lhs and @a rhs are not equal
+
+    @complexity Linear.
+
+    @liveexample{The example demonstrates comparing several JSON
+    types.,operator__notequal}
+    */
+    friend bool operator!=(const_reference lhs, const_reference rhs) noexcept
+    {
+        return not (lhs == rhs);
+    }
+
+    /*!
+    @brief comparison: not equal
+
+    The functions compares the given JSON value against a null pointer. As the
+    null pointer can be used to initialize a JSON value to null, a comparison
+    of JSON value @a v with a null pointer should be equivalent to call
+    `not v.is_null()`.
+
+    @param[in] v  JSON value to consider
+    @return whether @a v is not null
+
+    @complexity Constant.
+
+    @liveexample{The example compares several JSON types to the null pointer.
+    ,operator__notequal__nullptr_t}
+    */
+    friend bool operator!=(const_reference v, std::nullptr_t) noexcept
+    {
+        return not v.is_null();
+    }
+
+    /*!
+    @brief comparison: not equal
+    @copydoc operator!=(const_reference, std::nullptr_t)
+    */
+    friend bool operator!=(std::nullptr_t, const_reference v) noexcept
+    {
+        return not v.is_null();
+    }
+
+    /*!
+    @brief comparison: less than
+
+    Compares whether one JSON value @a lhs is less than another JSON value @a
+    rhs according to the following rules:
+    - If @a lhs and @a rhs have the same type, the values are compared using
+      the default `<` operator.
+    - Integer and floating-point numbers are automatically converted before
+      comparison
+    - In case @a lhs and @a rhs have different types, the values are ignored
+      and the order of the types is considered, see
+      @ref operator<(const value_t, const value_t).
+
+    @param[in] lhs  first JSON value to consider
+    @param[in] rhs  second JSON value to consider
+    @return whether @a lhs is less than @a rhs
+
+    @complexity Linear.
+
+    @liveexample{The example demonstrates comparing several JSON
+    types.,operator__less}
+    */
+    friend bool operator<(const_reference lhs, const_reference rhs) noexcept
+    {
+        const auto lhs_type = lhs.type();
+        const auto rhs_type = rhs.type();
+
+        if (lhs_type == rhs_type)
+        {
+            switch (lhs_type)
+            {
+                case (value_t::array):
+                    return *lhs.m_value.array < *rhs.m_value.array;
+                case (value_t::object):
+                    return *lhs.m_value.object < *rhs.m_value.object;
+                case (value_t::null):
+                    return false;
+                case (value_t::string):
+                    return *lhs.m_value.string < *rhs.m_value.string;
+                case (value_t::boolean):
+                    return lhs.m_value.boolean < rhs.m_value.boolean;
+                case (value_t::number_integer):
+                    return lhs.m_value.number_integer < rhs.m_value.number_integer;
+                case (value_t::number_float):
+                    return lhs.m_value.number_float < rhs.m_value.number_float;
+                case (value_t::discarded):
+                    return false;
+            }
+        }
+        else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float)
+        {
+            return static_cast<number_float_t>(lhs.m_value.number_integer) <
+                   rhs.m_value.number_float;
+        }
+        else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer)
+        {
+            return lhs.m_value.number_float <
+                   static_cast<number_float_t>(rhs.m_value.number_integer);
+        }
+
+        // We only reach this line if we cannot compare values. In that case,
+        // we compare types. Note we have to call the operator explicitly,
+        // because MSVC has problems otherwise.
+        return operator<(lhs_type, rhs_type);
+    }
+
+    /*!
+    @brief comparison: less than or equal
+
+    Compares whether one JSON value @a lhs is less than or equal to another
+    JSON value by calculating `not (rhs < lhs)`.
+
+    @param[in] lhs  first JSON value to consider
+    @param[in] rhs  second JSON value to consider
+    @return whether @a lhs is less than or equal to @a rhs
+
+    @complexity Linear.
+
+    @liveexample{The example demonstrates comparing several JSON
+    types.,operator__greater}
+    */
+    friend bool operator<=(const_reference lhs, const_reference rhs) noexcept
+    {
+        return not (rhs < lhs);
+    }
+
+    /*!
+    @brief comparison: greater than
+
+    Compares whether one JSON value @a lhs is greater than another
+    JSON value by calculating `not (lhs <= rhs)`.
+
+    @param[in] lhs  first JSON value to consider
+    @param[in] rhs  second JSON value to consider
+    @return whether @a lhs is greater than to @a rhs
+
+    @complexity Linear.
+
+    @liveexample{The example demonstrates comparing several JSON
+    types.,operator__lessequal}
+    */
+    friend bool operator>(const_reference lhs, const_reference rhs) noexcept
+    {
+        return not (lhs <= rhs);
+    }
+
+    /*!
+    @brief comparison: greater than or equal
+
+    Compares whether one JSON value @a lhs is greater than or equal to another
+    JSON value by calculating `not (lhs < rhs)`.
+
+    @param[in] lhs  first JSON value to consider
+    @param[in] rhs  second JSON value to consider
+    @return whether @a lhs is greater than or equal to @a rhs
+
+    @complexity Linear.
+
+    @liveexample{The example demonstrates comparing several JSON
+    types.,operator__greaterequal}
+    */
+    friend bool operator>=(const_reference lhs, const_reference rhs) noexcept
+    {
+        return not (lhs < rhs);
+    }
+
+    /// @}
+
+
+    ///////////////////
+    // serialization //
+    ///////////////////
+
+    /// @name serialization
+    /// @{
+
+    /*!
+    @brief serialize to stream
+
+    Serialize the given JSON value @a j to the output stream @a o. The JSON
+    value will be serialized using the @ref dump member function. The
+    indentation of the output can be controlled with the member variable
+    `width` of the output stream @a o. For instance, using the manipulator
+    `std::setw(4)` on @a o sets the indentation level to `4` and the
+    serialization result is the same as calling `dump(4)`.
+
+    @param[in,out] o  stream to serialize to
+    @param[in] j  JSON value to serialize
+
+    @return the stream @a o
+
+    @complexity Linear.
+
+    @liveexample{The example below shows the serialization with different
+    parameters to `width` to adjust the indentation level.,operator_serialize}
+    */
+    friend std::ostream& operator<<(std::ostream& o, const basic_json& j)
+    {
+        // read width member and use it as indentation parameter if nonzero
+        const bool pretty_print = (o.width() > 0);
+        const auto indentation = (pretty_print ? o.width() : 0);
+
+        // reset width to 0 for subsequent calls to this stream
+        o.width(0);
+
+        // do the actual serialization
+        j.dump(o, pretty_print, static_cast<unsigned int>(indentation));
+        return o;
+    }
+
+    /*!
+    @brief serialize to stream
+    @copydoc operator<<(std::ostream&, const basic_json&)
+    */
+    friend std::ostream& operator>>(const basic_json& j, std::ostream& o)
+    {
+        return o << j;
+    }
+
+    /// @}
+
+
+    /////////////////////
+    // deserialization //
+    /////////////////////
+
+    /// @name deserialization
+    /// @{
+
+    /*!
+    @brief deserialize from string
+
+    @param[in] s  string to read a serialized JSON value from
+    @param[in] cb a parser callback function of type @ref parser_callback_t
+    which is used to control the deserialization by filtering unwanted values
+    (optional)
+
+    @return result of the deserialization
+
+    @complexity Linear in the length of the input. The parser is a predictive
+    LL(1) parser. The complexity can be higher if the parser callback function
+    @a cb has a super-linear complexity.
+
+    @liveexample{The example below demonstrates the parse function with and
+    without callback function.,parse__string__parser_callback_t}
+
+    @sa parse(std::istream&, parser_callback_t) for a version that reads from
+    an input stream
+    */
+    static basic_json parse(const string_t& s, parser_callback_t cb = nullptr)
+    {
+        return parser(s, cb).parse();
+    }
+
+    /*!
+    @brief deserialize from stream
+
+    @param[in,out] i  stream to read a serialized JSON value from
+    @param[in] cb a parser callback function of type @ref parser_callback_t
+    which is used to control the deserialization by filtering unwanted values
+    (optional)
+
+    @return result of the deserialization
+
+    @complexity Linear in the length of the input. The parser is a predictive
+    LL(1) parser. The complexity can be higher if the parser callback function
+    @a cb has a super-linear complexity.
+
+    @liveexample{The example below demonstrates the parse function with and
+    without callback function.,parse__istream__parser_callback_t}
+
+    @sa parse(const string_t&, parser_callback_t) for a version that reads
+    from a string
+    */
+    static basic_json parse(std::istream& i, parser_callback_t cb = nullptr)
+    {
+        return parser(i, cb).parse();
+    }
+
+    static basic_json parse(std::istream&& i, parser_callback_t cb = nullptr)
+    {
+        return parser(i, cb).parse();
+    }
+
+    /*!
+    @brief deserialize from stream
+
+    Deserializes an input stream to a JSON value.
+
+    @param[in,out] i  input stream to read a serialized JSON value from
+    @param[in,out] j  JSON value to write the deserialized input to
+
+    @throw std::invalid_argument in case of parse errors
+
+    @complexity Linear in the length of the input. The parser is a predictive
+    LL(1) parser.
+
+    @liveexample{The example below shows how a JSON value is constructed by
+    reading a serialization from a stream.,operator_deserialize}
+
+    @sa parse(std::istream&, parser_callback_t) for a variant with a parser
+    callback function to filter values while parsing
+    */
+    friend std::istream& operator<<(basic_json& j, std::istream& i)
+    {
+        j = parser(i).parse();
+        return i;
+    }
+
+    /*!
+    @brief deserialize from stream
+    @copydoc operator<<(basic_json&, std::istream&)
+    */
+    friend std::istream& operator>>(std::istream& i, basic_json& j)
+    {
+        j = parser(i).parse();
+        return i;
+    }
+
+    /// @}
+
+
+  private:
+    ///////////////////////////
+    // convenience functions //
+    ///////////////////////////
+
+    /// return the type as string
+    string_t type_name() const
+    {
+        switch (m_type)
+        {
+            case (value_t::null):
+            {
+                return "null";
+            }
+
+            case (value_t::object):
+            {
+                return "object";
+            }
+
+            case (value_t::array):
+            {
+                return "array";
+            }
+
+            case (value_t::string):
+            {
+                return "string";
+            }
+
+            case (value_t::boolean):
+            {
+                return "boolean";
+            }
+
+            case (value_t::discarded):
+            {
+                return "discarded";
+            }
+
+            default:
+            {
+                return "number";
+            }
+        }
+    }
+
+    /*!
+    @brief calculates the extra space to escape a JSON string
+
+    @param[in] s  the string to escape
+    @return the number of characters required to escape string @a s
+
+    @complexity Linear in the length of string @a s.
+    */
+    static std::size_t extra_space(const string_t& s) noexcept
+    {
+        std::size_t result = 0;
+
+        for (const auto& c : s)
+        {
+            switch (c)
+            {
+                case '"':
+                case '\\':
+                case '\b':
+                case '\f':
+                case '\n':
+                case '\r':
+                case '\t':
+                {
+                    // from c (1 byte) to \x (2 bytes)
+                    result += 1;
+                    break;
+                }
+
+                default:
+                {
+                    if (c >= 0x00 and c <= 0x1f)
+                    {
+                        // from c (1 byte) to \uxxxx (6 bytes)
+                        result += 5;
+                    }
+                    break;
+                }
+            }
+        }
+
+        return result;
+    }
+
+    /*!
+    @brief escape a string
+
+    Escape a string by replacing certain special characters by a sequence of an
+    escape character (backslash) and another character and other control
+    characters by a sequence of "\u" followed by a four-digit hex
+    representation.
+
+    @param[in] s  the string to escape
+    @return  the escaped string
+
+    @complexity Linear in the length of string @a s.
+    */
+    static string_t escape_string(const string_t& s) noexcept
+    {
+        const auto space = extra_space(s);
+        if (space == 0)
+        {
+            return s;
+        }
+
+        // create a result string of necessary size
+        string_t result(s.size() + space, '\\');
+        std::size_t pos = 0;
+
+        for (const auto& c : s)
+        {
+            switch (c)
+            {
+                // quotation mark (0x22)
+                case '"':
+                {
+                    result[pos + 1] = '"';
+                    pos += 2;
+                    break;
+                }
+
+                // reverse solidus (0x5c)
+                case '\\':
+                {
+                    // nothing to change
+                    pos += 2;
+                    break;
+                }
+
+                // backspace (0x08)
+                case '\b':
+                {
+                    result[pos + 1] = 'b';
+                    pos += 2;
+                    break;
+                }
+
+                // formfeed (0x0c)
+                case '\f':
+                {
+                    result[pos + 1] = 'f';
+                    pos += 2;
+                    break;
+                }
+
+                // newline (0x0a)
+                case '\n':
+                {
+                    result[pos + 1] = 'n';
+                    pos += 2;
+                    break;
+                }
+
+                // carriage return (0x0d)
+                case '\r':
+                {
+                    result[pos + 1] = 'r';
+                    pos += 2;
+                    break;
+                }
+
+                // horizontal tab (0x09)
+                case '\t':
+                {
+                    result[pos + 1] = 't';
+                    pos += 2;
+                    break;
+                }
+
+                default:
+                {
+                    if (c >= 0x00 and c <= 0x1f)
+                    {
+                        // print character c as \uxxxx
+                        sprintf(&result[pos + 1], "u%04x", int(c));
+                        pos += 6;
+                        // overwrite trailing null character
+                        result[pos] = '\\';
+                    }
+                    else
+                    {
+                        // all other characters are added as-is
+                        result[pos++] = c;
+                    }
+                    break;
+                }
+            }
+        }
+
+        return result;
+    }
+
+    /*!
+    @brief internal implementation of the serialization function
+
+    This function is called by the public member function dump and organizes
+    the serializaion internally. The indentation level is propagated as
+    additional parameter. In case of arrays and objects, the function is called
+    recursively. Note that
+
+    - strings and object keys are escaped using escape_string()
+    - integer numbers are converted implictly via operator<<
+    - floating-point numbers are converted to a string using "%g" format
+
+    @param[out] o              stream to write to
+    @param[in] pretty_print    whether the output shall be pretty-printed
+    @param[in] indent_step     the indent level
+    @param[in] current_indent  the current indent level (only used internally)
+    */
+    void dump(std::ostream& o, const bool pretty_print, const unsigned int indent_step,
+              const unsigned int current_indent = 0) const
+    {
+        // variable to hold indentation for recursive calls
+        unsigned int new_indent = current_indent;
+
+        switch (m_type)
+        {
+            case (value_t::object):
+            {
+                if (m_value.object->empty())
+                {
+                    o << "{}";
+                    return;
+                }
+
+                o << "{";
+
+                // increase indentation
+                if (pretty_print)
+                {
+                    new_indent += indent_step;
+                    o << "\n";
+                }
+
+                for (auto i = m_value.object->cbegin(); i != m_value.object->cend(); ++i)
+                {
+                    if (i != m_value.object->cbegin())
+                    {
+                        o << (pretty_print ? ",\n" : ",");
+                    }
+                    o << string_t(new_indent, ' ') << "\""
+                      << escape_string(i->first) << "\":"
+                      << (pretty_print ? " " : "");
+                    i->second.dump(o, pretty_print, indent_step, new_indent);
+                }
+
+                // decrease indentation
+                if (pretty_print)
+                {
+                    new_indent -= indent_step;
+                    o << "\n";
+                }
+
+                o << string_t(new_indent, ' ') + "}";
+                return;
+            }
+
+            case (value_t::array):
+            {
+                if (m_value.array->empty())
+                {
+                    o << "[]";
+                    return;
+                }
+
+                o << "[";
+
+                // increase indentation
+                if (pretty_print)
+                {
+                    new_indent += indent_step;
+                    o << "\n";
+                }
+
+                for (auto i = m_value.array->cbegin(); i != m_value.array->cend(); ++i)
+                {
+                    if (i != m_value.array->cbegin())
+                    {
+                        o << (pretty_print ? ",\n" : ",");
+                    }
+                    o << string_t(new_indent, ' ');
+                    i->dump(o, pretty_print, indent_step, new_indent);
+                }
+
+                // decrease indentation
+                if (pretty_print)
+                {
+                    new_indent -= indent_step;
+                    o << "\n";
+                }
+
+                o << string_t(new_indent, ' ') << "]";
+                return;
+            }
+
+            case (value_t::string):
+            {
+                o << string_t("\"") << escape_string(*m_value.string) << "\"";
+                return;
+            }
+
+            case (value_t::boolean):
+            {
+                o << (m_value.boolean ? "true" : "false");
+                return;
+            }
+
+            case (value_t::number_integer):
+            {
+                o << m_value.number_integer;
+                return;
+            }
+
+            case (value_t::number_float):
+            {
+                // 15 digits of precision allows round-trip IEEE 754
+                // string->double->string; to be safe, we read this value from
+                // std::numeric_limits<number_float_t>::digits10
+                o << std::setprecision(std::numeric_limits<number_float_t>::digits10) << m_value.number_float;
+                return;
+            }
+
+            case (value_t::discarded):
+            {
+                o << "<discarded>";
+                return;
+            }
+
+            default:
+            {
+                o << "null";
+                return;
+            }
+        }
+    }
+
+  private:
+    //////////////////////
+    // member variables //
+    //////////////////////
+
+    /// the type of the current element
+    value_t m_type = value_t::null;
+
+    /// the value of the current element
+    json_value m_value = {};
+
+
+  private:
+    ///////////////
+    // iterators //
+    ///////////////
+
+    /*!
+    @brief an iterator for primitive JSON types
+
+    This class models an iterator for primitive JSON types (boolean, number,
+    string). It's only purpose is to allow the iterator/const_iterator classes
+    to "iterate" over primitive values. Internally, the iterator is modeled by
+    a `difference_type` variable. Value begin_value (`0`) models the begin,
+    end_value (`1`) models past the end.
+    */
+    class primitive_iterator_t
+    {
+      public:
+        /// set iterator to a defined beginning
+        void set_begin()
+        {
+            m_it = begin_value;
+        }
+
+        /// set iterator to a defined past the end
+        void set_end()
+        {
+            m_it = end_value;
+        }
+
+        /// return whether the iterator can be dereferenced
+        bool is_begin() const
+        {
+            return (m_it == begin_value);
+        }
+
+        /// return whether the iterator is at end
+        bool is_end() const
+        {
+            return (m_it == end_value);
+        }
+
+        /// return reference to the value to change and compare
+        operator difference_type& ()
+        {
+            return m_it;
+        }
+
+        /// return value to compare
+        operator difference_type () const
+        {
+            return m_it;
+        }
+
+      private:
+        static constexpr difference_type begin_value = 0;
+        static constexpr difference_type end_value = begin_value + 1;
+
+        /// iterator as signed integer type
+        difference_type m_it = std::numeric_limits<std::ptrdiff_t>::min();
+    };
+
+    /*!
+    @brief an iterator value
+
+    @note This structure could easily be a union, but MSVC currently does not
+    allow unions members with complex constructors, see
+    https://github.com/nlohmann/json/pull/105.
+    */
+    struct internal_iterator
+    {
+        /// iterator for JSON objects
+        typename object_t::iterator object_iterator;
+        /// iterator for JSON arrays
+        typename array_t::iterator array_iterator;
+        /// generic iterator for all other types
+        primitive_iterator_t primitive_iterator;
+
+        /// create an uninitialized internal_iterator
+        internal_iterator()
+            : object_iterator(), array_iterator(), primitive_iterator()
+        {}
+    };
+
+  public:
+    /*!
+    @brief a const random access iterator for the @ref basic_json class
+
+    This class implements a const iterator for the @ref basic_json class. From
+    this class, the @ref iterator class is derived.
+
+    @requirement The class satisfies the following concept requirements:
+    - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+      The iterator that can be moved to point (forward and backward) to any
+      element in constant time.
+    */
+    class const_iterator : public std::iterator<std::random_access_iterator_tag, const basic_json>
+    {
+        /// allow basic_json to access private members
+        friend class basic_json;
+
+      public:
+        /// the type of the values when the iterator is dereferenced
+        using value_type = typename basic_json::value_type;
+        /// a type to represent differences between iterators
+        using difference_type = typename basic_json::difference_type;
+        /// defines a pointer to the type iterated over (value_type)
+        using pointer = typename basic_json::const_pointer;
+        /// defines a reference to the type iterated over (value_type)
+        using reference = typename basic_json::const_reference;
+        /// the category of the iterator
+        using iterator_category = std::bidirectional_iterator_tag;
+
+        /// default constructor
+        const_iterator() = default;
+
+        /// constructor for a given JSON instance
+        const_iterator(pointer object) : m_object(object)
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    m_it.object_iterator = typename object_t::iterator();
+                    break;
+                }
+                case (basic_json::value_t::array):
+                {
+                    m_it.array_iterator = typename array_t::iterator();
+                    break;
+                }
+                default:
+                {
+                    m_it.primitive_iterator = primitive_iterator_t();
+                    break;
+                }
+            }
+        }
+
+        /// copy constructor given a nonconst iterator
+        const_iterator(const iterator& other) : m_object(other.m_object)
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    m_it.object_iterator = other.m_it.object_iterator;
+                    break;
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    m_it.array_iterator = other.m_it.array_iterator;
+                    break;
+                }
+
+                default:
+                {
+                    m_it.primitive_iterator = other.m_it.primitive_iterator;
+                    break;
+                }
+            }
+        }
+
+        /// copy constructor
+        const_iterator(const const_iterator& other) noexcept
+            : m_object(other.m_object), m_it(other.m_it)
+        {}
+
+        /// copy assignment
+        const_iterator& operator=(const_iterator other) noexcept(
+            std::is_nothrow_move_constructible<pointer>::value and
+            std::is_nothrow_move_assignable<pointer>::value and
+            std::is_nothrow_move_constructible<internal_iterator>::value and
+            std::is_nothrow_move_assignable<internal_iterator>::value
+        )
+        {
+            std::swap(m_object, other.m_object);
+            std::swap(m_it, other.m_it);
+            return *this;
+        }
+
+      private:
+        /// set the iterator to the first value
+        void set_begin()
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    m_it.object_iterator = m_object->m_value.object->begin();
+                    break;
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    m_it.array_iterator = m_object->m_value.array->begin();
+                    break;
+                }
+
+                case (basic_json::value_t::null):
+                {
+                    // set to end so begin()==end() is true: null is empty
+                    m_it.primitive_iterator.set_end();
+                    break;
+                }
+
+                default:
+                {
+                    m_it.primitive_iterator.set_begin();
+                    break;
+                }
+            }
+        }
+
+        /// set the iterator past the last value
+        void set_end()
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    m_it.object_iterator = m_object->m_value.object->end();
+                    break;
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    m_it.array_iterator = m_object->m_value.array->end();
+                    break;
+                }
+
+                default:
+                {
+                    m_it.primitive_iterator.set_end();
+                    break;
+                }
+            }
+        }
+
+      public:
+        /// return a reference to the value pointed to by the iterator
+        reference operator*() const
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    return m_it.object_iterator->second;
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    return *m_it.array_iterator;
+                }
+
+                case (basic_json::value_t::null):
+                {
+                    throw std::out_of_range("cannot get value");
+                }
+
+                default:
+                {
+                    if (m_it.primitive_iterator.is_begin())
+                    {
+                        return *m_object;
+                    }
+                    else
+                    {
+                        throw std::out_of_range("cannot get value");
+                    }
+                }
+            }
+        }
+
+        /// dereference the iterator
+        pointer operator->() const
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    return &(m_it.object_iterator->second);
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    return &*m_it.array_iterator;
+                }
+
+                default:
+                {
+                    if (m_it.primitive_iterator.is_begin())
+                    {
+                        return m_object;
+                    }
+                    else
+                    {
+                        throw std::out_of_range("cannot get value");
+                    }
+                }
+            }
+        }
+
+        /// post-increment (it++)
+        const_iterator operator++(int)
+        {
+            auto result = *this;
+            ++(*this);
+
+            return result;
+        }
+
+        /// pre-increment (++it)
+        const_iterator& operator++()
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    ++m_it.object_iterator;
+                    break;
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    ++m_it.array_iterator;
+                    break;
+                }
+
+                default:
+                {
+                    ++m_it.primitive_iterator;
+                    break;
+                }
+            }
+
+            return *this;
+        }
+
+        /// post-decrement (it--)
+        const_iterator operator--(int)
+        {
+            auto result = *this;
+            --(*this);
+
+            return result;
+        }
+
+        /// pre-decrement (--it)
+        const_iterator& operator--()
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    --m_it.object_iterator;
+                    break;
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    --m_it.array_iterator;
+                    break;
+                }
+
+                default:
+                {
+                    --m_it.primitive_iterator;
+                    break;
+                }
+            }
+
+            return *this;
+        }
+
+        /// comparison: equal
+        bool operator==(const const_iterator& other) const
+        {
+            // if objects are not the same, the comparison is undefined
+            if (m_object != other.m_object)
+            {
+                throw std::domain_error("cannot compare iterators of different containers");
+            }
+
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    return (m_it.object_iterator == other.m_it.object_iterator);
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    return (m_it.array_iterator == other.m_it.array_iterator);
+                }
+
+                default:
+                {
+                    return (m_it.primitive_iterator == other.m_it.primitive_iterator);
+                }
+            }
+        }
+
+        /// comparison: not equal
+        bool operator!=(const const_iterator& other) const
+        {
+            return not operator==(other);
+        }
+
+        /// comparison: smaller
+        bool operator<(const const_iterator& other) const
+        {
+            // if objects are not the same, the comparison is undefined
+            if (m_object != other.m_object)
+            {
+                throw std::domain_error("cannot compare iterators of different containers");
+            }
+
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    throw std::domain_error("cannot use operator< for object iterators");
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    return (m_it.array_iterator < other.m_it.array_iterator);
+                }
+
+                default:
+                {
+                    return (m_it.primitive_iterator < other.m_it.primitive_iterator);
+                }
+            }
+        }
+
+        /// comparison: less than or equal
+        bool operator<=(const const_iterator& other) const
+        {
+            return not other.operator < (*this);
+        }
+
+        /// comparison: greater than
+        bool operator>(const const_iterator& other) const
+        {
+            return not operator<=(other);
+        }
+
+        /// comparison: greater than or equal
+        bool operator>=(const const_iterator& other) const
+        {
+            return not operator<(other);
+        }
+
+        /// add to iterator
+        const_iterator& operator+=(difference_type i)
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    throw std::domain_error("cannot use operator+= for object iterators");
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    m_it.array_iterator += i;
+                    break;
+                }
+
+                default:
+                {
+                    m_it.primitive_iterator += i;
+                    break;
+                }
+            }
+
+            return *this;
+        }
+
+        /// subtract from iterator
+        const_iterator& operator-=(difference_type i)
+        {
+            return operator+=(-i);
+        }
+
+        /// add to iterator
+        const_iterator operator+(difference_type i)
+        {
+            auto result = *this;
+            result += i;
+            return result;
+        }
+
+        /// subtract from iterator
+        const_iterator operator-(difference_type i)
+        {
+            auto result = *this;
+            result -= i;
+            return result;
+        }
+
+        /// return difference
+        difference_type operator-(const const_iterator& other) const
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    throw std::domain_error("cannot use operator- for object iterators");
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    return m_it.array_iterator - other.m_it.array_iterator;
+                }
+
+                default:
+                {
+                    return m_it.primitive_iterator - other.m_it.primitive_iterator;
+                }
+            }
+        }
+
+        /// access to successor
+        reference operator[](difference_type n) const
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    throw std::domain_error("cannot use operator[] for object iterators");
+                }
+
+                case (basic_json::value_t::array):
+                {
+                    return *(m_it.array_iterator + n);
+                }
+
+                case (basic_json::value_t::null):
+                {
+                    throw std::out_of_range("cannot get value");
+                }
+
+                default:
+                {
+                    if (m_it.primitive_iterator == -n)
+                    {
+                        return *m_object;
+                    }
+                    else
+                    {
+                        throw std::out_of_range("cannot get value");
+                    }
+                }
+            }
+        }
+
+        /// return the key of an object iterator
+        typename object_t::key_type key() const
+        {
+            switch (m_object->m_type)
+            {
+                case (basic_json::value_t::object):
+                {
+                    return m_it.object_iterator->first;
+                }
+
+                default:
+                {
+                    throw std::domain_error("cannot use key() for non-object iterators");
+                }
+            }
+        }
+
+        /// return the value of an iterator
+        reference value() const
+        {
+            return operator*();
+        }
+
+      private:
+        /// associated JSON instance
+        pointer m_object = nullptr;
+        /// the actual iterator of the associated instance
+        internal_iterator m_it = internal_iterator();
+    };
+
+    /*!
+    @brief a mutable random access iterator for the @ref basic_json class
+
+    @requirement The class satisfies the following concept requirements:
+    - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+      The iterator that can be moved to point (forward and backward) to any
+      element in constant time.
+    - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator):
+      It is possible to write to the pointed-to element.
+    */
+    class iterator : public const_iterator
+    {
+      public:
+        using base_iterator = const_iterator;
+        using pointer = typename basic_json::pointer;
+        using reference = typename basic_json::reference;
+
+        /// default constructor
+        iterator() = default;
+
+        /// constructor for a given JSON instance
+        iterator(pointer object) noexcept : base_iterator(object)
+        {}
+
+        /// copy constructor
+        iterator(const iterator& other) noexcept
+            : base_iterator(other)
+        {}
+
+        /// copy assignment
+        iterator& operator=(iterator other) noexcept(
+            std::is_nothrow_move_constructible<pointer>::value and
+            std::is_nothrow_move_assignable<pointer>::value and
+            std::is_nothrow_move_constructible<internal_iterator>::value and
+            std::is_nothrow_move_assignable<internal_iterator>::value
+        )
+        {
+            base_iterator::operator=(other);
+            return *this;
+        }
+
+        /// return a reference to the value pointed to by the iterator
+        reference operator*()
+        {
+            return const_cast<reference>(base_iterator::operator*());
+        }
+
+        /// dereference the iterator
+        pointer operator->()
+        {
+            return const_cast<pointer>(base_iterator::operator->());
+        }
+
+        /// post-increment (it++)
+        iterator operator++(int)
+        {
+            iterator result = *this;
+            base_iterator::operator++();
+            return result;
+        }
+
+        /// pre-increment (++it)
+        iterator& operator++()
+        {
+            base_iterator::operator++();
+            return *this;
+        }
+
+        /// post-decrement (it--)
+        iterator operator--(int)
+        {
+            iterator result = *this;
+            base_iterator::operator--();
+            return result;
+        }
+
+        /// pre-decrement (--it)
+        iterator& operator--()
+        {
+            base_iterator::operator--();
+            return *this;
+        }
+
+        /// add to iterator
+        iterator& operator+=(difference_type i)
+        {
+            base_iterator::operator+=(i);
+            return *this;
+        }
+
+        /// subtract from iterator
+        iterator& operator-=(difference_type i)
+        {
+            base_iterator::operator-=(i);
+            return *this;
+        }
+
+        /// add to iterator
+        iterator operator+(difference_type i)
+        {
+            auto result = *this;
+            result += i;
+            return result;
+        }
+
+        /// subtract from iterator
+        iterator operator-(difference_type i)
+        {
+            auto result = *this;
+            result -= i;
+            return result;
+        }
+
+        difference_type operator-(const iterator& other) const
+        {
+            return base_iterator::operator-(other);
+        }
+
+        /// access to successor
+        reference operator[](difference_type n) const
+        {
+            return const_cast<reference>(base_iterator::operator[](n));
+        }
+
+        /// return the value of an iterator
+        reference value() const
+        {
+            return const_cast<reference>(base_iterator::value());
+        }
+    };
+
+    /*!
+    @brief a template for a reverse iterator class
+
+    @tparam Base the base iterator type to reverse. Valid types are @ref
+    iterator (to create @ref reverse_iterator) and @ref const_iterator (to
+    create @ref const_reverse_iterator).
+
+    @requirement The class satisfies the following concept requirements:
+    - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+      The iterator that can be moved to point (forward and backward) to any
+      element in constant time.
+    - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator):
+      It is possible to write to the pointed-to element (only if @a Base is
+      @ref iterator).
+    */
+    template<typename Base>
+    class json_reverse_iterator : public std::reverse_iterator<Base>
+    {
+      public:
+        /// shortcut to the reverse iterator adaptor
+        using base_iterator = std::reverse_iterator<Base>;
+        /// the reference type for the pointed-to element
+        using reference = typename Base::reference;
+
+        /// create reverse iterator from iterator
+        json_reverse_iterator(const typename base_iterator::iterator_type& it)
+            : base_iterator(it) {}
+
+        /// create reverse iterator from base class
+        json_reverse_iterator(const base_iterator& it) : base_iterator(it) {}
+
+        /// post-increment (it++)
+        json_reverse_iterator operator++(int)
+        {
+            return base_iterator::operator++(1);
+        }
+
+        /// pre-increment (++it)
+        json_reverse_iterator& operator++()
+        {
+            base_iterator::operator++();
+            return *this;
+        }
+
+        /// post-decrement (it--)
+        json_reverse_iterator operator--(int)
+        {
+            return base_iterator::operator--(1);
+        }
+
+        /// pre-decrement (--it)
+        json_reverse_iterator& operator--()
+        {
+            base_iterator::operator--();
+            return *this;
+        }
+
+        /// add to iterator
+        json_reverse_iterator& operator+=(difference_type i)
+        {
+            base_iterator::operator+=(i);
+            return *this;
+        }
+
+        /// add to iterator
+        json_reverse_iterator operator+(difference_type i) const
+        {
+            auto result = *this;
+            result += i;
+            return result;
+        }
+
+        /// subtract from iterator
+        json_reverse_iterator operator-(difference_type i) const
+        {
+            auto result = *this;
+            result -= i;
+            return result;
+        }
+
+        /// return difference
+        difference_type operator-(const json_reverse_iterator& other) const
+        {
+            return this->base() - other.base();
+        }
+
+        /// access to successor
+        reference operator[](difference_type n) const
+        {
+            return *(this->operator+(n));
+        }
+
+        /// return the key of an object iterator
+        typename object_t::key_type key() const
+        {
+            auto it = --this->base();
+            return it.key();
+        }
+
+        /// return the value of an iterator
+        reference value() const
+        {
+            auto it = --this->base();
+            return it.operator * ();
+        }
+    };
+
+    /*!
+    @brief wrapper to access iterator member functions in range-based for
+
+    This class allows to access @ref key() and @ref value() during range-based
+    for loops. In these loops, a reference to the JSON values is returned, so
+    there is no access to the underlying iterator.
+    */
+    class iterator_wrapper
+    {
+      private:
+        /// the container to iterate
+        basic_json& container;
+        /// the type of the iterator to use while iteration
+        using json_iterator = decltype(std::begin(container));
+
+        /// internal iterator wrapper
+        class iterator_wrapper_internal
+        {
+          private:
+            /// the iterator
+            json_iterator anchor;
+            /// an index for arrays
+            size_t array_index = 0;
+
+          public:
+            /// construct wrapper given an iterator
+            iterator_wrapper_internal(json_iterator i) : anchor(i)
+            {}
+
+            /// dereference operator (needed for range-based for)
+            iterator_wrapper_internal& operator*()
+            {
+                return *this;
+            }
+
+            /// increment operator (needed for range-based for)
+            iterator_wrapper_internal& operator++()
+            {
+                ++anchor;
+                ++array_index;
+
+                return *this;
+            }
+
+            /// inequality operator (needed for range-based for)
+            bool operator!= (const iterator_wrapper_internal& o)
+            {
+                return anchor != o.anchor;
+            }
+
+            /// stream operator
+            friend std::ostream& operator<<(std::ostream& o, const iterator_wrapper_internal& w)
+            {
+                return o << w.value();
+            }
+
+            /// return key of the iterator
+            typename basic_json::string_t key() const
+            {
+                switch (anchor.m_object->type())
+                {
+                    /// use integer array index as key
+                    case (value_t::array):
+                    {
+                        return std::to_string(array_index);
+                    }
+
+                    /// use key from the object
+                    case (value_t::object):
+                    {
+                        return anchor.key();
+                    }
+
+                    /// use an empty key for all primitive types
+                    default:
+                    {
+                        return "";
+                    }
+                }
+            }
+
+            /// return value of the iterator
+            typename json_iterator::reference value() const
+            {
+                return anchor.value();
+            }
+        };
+
+      public:
+        /// construct iterator wrapper from a container
+        iterator_wrapper(basic_json& cont)
+            : container(cont)
+        {}
+
+        /// return iterator begin (needed for range-based for)
+        iterator_wrapper_internal begin()
+        {
+            return iterator_wrapper_internal(container.begin());
+        }
+
+        /// return iterator end (needed for range-based for)
+        iterator_wrapper_internal end()
+        {
+            return iterator_wrapper_internal(container.end());
+        }
+    };
+
+  private:
+    //////////////////////
+    // lexer and parser //
+    //////////////////////
+
+    /*!
+    @brief lexical analysis
+
+    This class organizes the lexical analysis during JSON deserialization. The
+    core of it is a scanner generated by re2c <http://re2c.org> that processes
+    a buffer and recognizes tokens according to RFC 7159.
+    */
+    class lexer
+    {
+      public:
+        /// token types for the parser
+        enum class token_type
+        {
+            uninitialized,    ///< indicating the scanner is uninitialized
+            literal_true,     ///< the "true" literal
+            literal_false,    ///< the "false" literal
+            literal_null,     ///< the "null" literal
+            value_string,     ///< a string - use get_string() for actual value
+            value_number,     ///< a number - use get_number() for actual value
+            begin_array,      ///< the character for array begin "["
+            begin_object,     ///< the character for object begin "{"
+            end_array,        ///< the character for array end "]"
+            end_object,       ///< the character for object end "}"
+            name_separator,   ///< the name separator ":"
+            value_separator,  ///< the value separator ","
+            parse_error,      ///< indicating a parse error
+            end_of_input      ///< indicating the end of the input buffer
+        };
+
+        /// the char type to use in the lexer
+        using lexer_char_t = unsigned char;
+
+        /// constructor with a given buffer
+        explicit lexer(const string_t& s) noexcept
+            : m_stream(nullptr), m_buffer(s)
+        {
+            m_content = reinterpret_cast<const lexer_char_t*>(s.c_str());
+            m_start = m_cursor = m_content;
+            m_limit = m_content + s.size();
+        }
+        explicit lexer(std::istream* s) noexcept
+            : m_stream(s), m_buffer()
+        {
+            getline(*m_stream, m_buffer);
+            m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
+            m_start = m_cursor = m_content;
+            m_limit = m_content + m_buffer.size();
+        }
+
+        /// default constructor
+        lexer() = default;
+
+        // switch of unwanted functions
+        lexer(const lexer&) = delete;
+        lexer operator=(const lexer&) = delete;
+
+        /*!
+        @brief create a string from a Unicode code point
+
+        @param[in] codepoint1  the code point (can be high surrogate)
+        @param[in] codepoint2  the code point (can be low surrogate or 0)
+        @return string representation of the code point
+        @throw std::out_of_range if code point is >0x10ffff
+        @throw std::invalid_argument if the low surrogate is invalid
+
+        @see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
+        */
+        static string_t to_unicode(const std::size_t codepoint1,
+                                   const std::size_t codepoint2 = 0)
+        {
+            string_t result;
+
+            // calculate the codepoint from the given code points
+            std::size_t codepoint = codepoint1;
+
+            // check if codepoint1 is a high surrogate
+            if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF)
+            {
+                // check if codepoint2 is a low surrogate
+                if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF)
+                {
+                    codepoint =
+                        // high surrogate occupies the most significant 22 bits
+                        (codepoint1 << 10)
+                        // low surrogate occupies the least significant 15 bits
+                        + codepoint2
+                        // there is still the 0xD800, 0xDC00 and 0x10000 noise
+                        // in the result so we have to substract with:
+                        // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
+                        - 0x35FDC00;
+                }
+                else
+                {
+                    throw std::invalid_argument("missing or wrong low surrogate");
+                }
+            }
+
+            if (codepoint < 0x80)
+            {
+                // 1-byte characters: 0xxxxxxx (ASCII)
+                result.append(1, static_cast<typename string_t::value_type>(codepoint));
+            }
+            else if (codepoint <= 0x7ff)
+            {
+                // 2-byte characters: 110xxxxx 10xxxxxx
+                result.append(1, static_cast<typename string_t::value_type>(0xC0 | ((codepoint >> 6) & 0x1F)));
+                result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+            }
+            else if (codepoint <= 0xffff)
+            {
+                // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
+                result.append(1, static_cast<typename string_t::value_type>(0xE0 | ((codepoint >> 12) & 0x0F)));
+                result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 6) & 0x3F)));
+                result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+            }
+            else if (codepoint <= 0x10ffff)
+            {
+                // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                result.append(1, static_cast<typename string_t::value_type>(0xF0 | ((codepoint >> 18) & 0x07)));
+                result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 12) & 0x3F)));
+                result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 6) & 0x3F)));
+                result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+            }
+            else
+            {
+                throw std::out_of_range("code points above 0x10FFFF are invalid");
+            }
+
+            return result;
+        }
+
+        /// return name of values of type token_type
+        static std::string token_type_name(token_type t)
+        {
+            switch (t)
+            {
+                case (token_type::uninitialized):
+                    return "<uninitialized>";
+                case (token_type::literal_true):
+                    return "true literal";
+                case (token_type::literal_false):
+                    return "false literal";
+                case (token_type::literal_null):
+                    return "null literal";
+                case (token_type::value_string):
+                    return "string literal";
+                case (token_type::value_number):
+                    return "number literal";
+                case (token_type::begin_array):
+                    return "[";
+                case (token_type::begin_object):
+                    return "{";
+                case (token_type::end_array):
+                    return "]";
+                case (token_type::end_object):
+                    return "}";
+                case (token_type::name_separator):
+                    return ":";
+                case (token_type::value_separator):
+                    return ",";
+                case (token_type::end_of_input):
+                    return "<end of input>";
+                default:
+                    return "<parse error>";
+            }
+        }
+
+        /*!
+        This function implements a scanner for JSON. It is specified using
+        regular expressions that try to follow RFC 7159 as close as possible.
+        These regular expressions are then translated into a deterministic
+        finite automaton (DFA) by the tool re2c <http://re2c.org>. As a result,
+        the translated code for this function consists of a large block of code
+        with goto jumps.
+
+        @return the class of the next token read from the buffer
+        */
+        token_type scan() noexcept
+        {
+            // pointer for backtracking information
+            m_marker = nullptr;
+
+            // remember the begin of the token
+            m_start = m_cursor;
+
+
+            {
+                lexer_char_t yych;
+                unsigned int yyaccept = 0;
+                static const unsigned char yybm[] =
+                {
+                    0,   0,   0,   0,   0,   0,   0,   0,
+                    0,  32,  32,   0,   0,  32,   0,   0,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    96,  64,   0,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    192, 192, 192, 192, 192, 192, 192, 192,
+                    192, 192,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,   0,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                    64,  64,  64,  64,  64,  64,  64,  64,
+                };
+
+                if ((m_limit - m_cursor) < 5)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= '9')
+                {
+                    if (yych <= ' ')
+                    {
+                        if (yych <= '\n')
+                        {
+                            if (yych <= 0x00)
+                            {
+                                goto basic_json_parser_27;
+                            }
+                            if (yych <= 0x08)
+                            {
+                                goto basic_json_parser_29;
+                            }
+                            if (yych >= '\n')
+                            {
+                                goto basic_json_parser_4;
+                            }
+                        }
+                        else
+                        {
+                            if (yych == '\r')
+                            {
+                                goto basic_json_parser_2;
+                            }
+                            if (yych <= 0x1F)
+                            {
+                                goto basic_json_parser_29;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        if (yych <= ',')
+                        {
+                            if (yych == '"')
+                            {
+                                goto basic_json_parser_26;
+                            }
+                            if (yych <= '+')
+                            {
+                                goto basic_json_parser_29;
+                            }
+                            goto basic_json_parser_14;
+                        }
+                        else
+                        {
+                            if (yych <= '-')
+                            {
+                                goto basic_json_parser_22;
+                            }
+                            if (yych <= '/')
+                            {
+                                goto basic_json_parser_29;
+                            }
+                            if (yych <= '0')
+                            {
+                                goto basic_json_parser_23;
+                            }
+                            goto basic_json_parser_25;
+                        }
+                    }
+                }
+                else
+                {
+                    if (yych <= 'm')
+                    {
+                        if (yych <= '\\')
+                        {
+                            if (yych <= ':')
+                            {
+                                goto basic_json_parser_16;
+                            }
+                            if (yych == '[')
+                            {
+                                goto basic_json_parser_6;
+                            }
+                            goto basic_json_parser_29;
+                        }
+                        else
+                        {
+                            if (yych <= ']')
+                            {
+                                goto basic_json_parser_8;
+                            }
+                            if (yych == 'f')
+                            {
+                                goto basic_json_parser_21;
+                            }
+                            goto basic_json_parser_29;
+                        }
+                    }
+                    else
+                    {
+                        if (yych <= 'z')
+                        {
+                            if (yych <= 'n')
+                            {
+                                goto basic_json_parser_18;
+                            }
+                            if (yych == 't')
+                            {
+                                goto basic_json_parser_20;
+                            }
+                            goto basic_json_parser_29;
+                        }
+                        else
+                        {
+                            if (yych <= '{')
+                            {
+                                goto basic_json_parser_10;
+                            }
+                            if (yych == '}')
+                            {
+                                goto basic_json_parser_12;
+                            }
+                            goto basic_json_parser_29;
+                        }
+                    }
+                }
+basic_json_parser_2:
+                ++m_cursor;
+                yych = *m_cursor;
+                goto basic_json_parser_5;
+basic_json_parser_3:
+                {
+                    return scan();
+                }
+basic_json_parser_4:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+basic_json_parser_5:
+                if (yybm[0 + yych] & 32)
+                {
+                    goto basic_json_parser_4;
+                }
+                goto basic_json_parser_3;
+basic_json_parser_6:
+                ++m_cursor;
+                {
+                    return token_type::begin_array;
+                }
+basic_json_parser_8:
+                ++m_cursor;
+                {
+                    return token_type::end_array;
+                }
+basic_json_parser_10:
+                ++m_cursor;
+                {
+                    return token_type::begin_object;
+                }
+basic_json_parser_12:
+                ++m_cursor;
+                {
+                    return token_type::end_object;
+                }
+basic_json_parser_14:
+                ++m_cursor;
+                {
+                    return token_type::value_separator;
+                }
+basic_json_parser_16:
+                ++m_cursor;
+                {
+                    return token_type::name_separator;
+                }
+basic_json_parser_18:
+                yyaccept = 0;
+                yych = *(m_marker = ++m_cursor);
+                if (yych == 'u')
+                {
+                    goto basic_json_parser_59;
+                }
+basic_json_parser_19:
+                {
+                    return token_type::parse_error;
+                }
+basic_json_parser_20:
+                yyaccept = 0;
+                yych = *(m_marker = ++m_cursor);
+                if (yych == 'r')
+                {
+                    goto basic_json_parser_55;
+                }
+                goto basic_json_parser_19;
+basic_json_parser_21:
+                yyaccept = 0;
+                yych = *(m_marker = ++m_cursor);
+                if (yych == 'a')
+                {
+                    goto basic_json_parser_50;
+                }
+                goto basic_json_parser_19;
+basic_json_parser_22:
+                yych = *++m_cursor;
+                if (yych <= '/')
+                {
+                    goto basic_json_parser_19;
+                }
+                if (yych <= '0')
+                {
+                    goto basic_json_parser_49;
+                }
+                if (yych <= '9')
+                {
+                    goto basic_json_parser_40;
+                }
+                goto basic_json_parser_19;
+basic_json_parser_23:
+                yyaccept = 1;
+                yych = *(m_marker = ++m_cursor);
+                if (yych <= 'D')
+                {
+                    if (yych == '.')
+                    {
+                        goto basic_json_parser_42;
+                    }
+                }
+                else
+                {
+                    if (yych <= 'E')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    if (yych == 'e')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                }
+basic_json_parser_24:
+                {
+                    return token_type::value_number;
+                }
+basic_json_parser_25:
+                yyaccept = 1;
+                yych = *(m_marker = ++m_cursor);
+                goto basic_json_parser_41;
+basic_json_parser_26:
+                yyaccept = 0;
+                yych = *(m_marker = ++m_cursor);
+                if (yych <= 0x0F)
+                {
+                    goto basic_json_parser_19;
+                }
+                goto basic_json_parser_31;
+basic_json_parser_27:
+                ++m_cursor;
+                {
+                    return token_type::end_of_input;
+                }
+basic_json_parser_29:
+                yych = *++m_cursor;
+                goto basic_json_parser_19;
+basic_json_parser_30:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+basic_json_parser_31:
+                if (yybm[0 + yych] & 64)
+                {
+                    goto basic_json_parser_30;
+                }
+                if (yych <= 0x0F)
+                {
+                    goto basic_json_parser_32;
+                }
+                if (yych <= '"')
+                {
+                    goto basic_json_parser_34;
+                }
+                goto basic_json_parser_33;
+basic_json_parser_32:
+                m_cursor = m_marker;
+                if (yyaccept == 0)
+                {
+                    goto basic_json_parser_19;
+                }
+                else
+                {
+                    goto basic_json_parser_24;
+                }
+basic_json_parser_33:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= 'e')
+                {
+                    if (yych <= '/')
+                    {
+                        if (yych == '"')
+                        {
+                            goto basic_json_parser_30;
+                        }
+                        if (yych <= '.')
+                        {
+                            goto basic_json_parser_32;
+                        }
+                        goto basic_json_parser_30;
+                    }
+                    else
+                    {
+                        if (yych <= '\\')
+                        {
+                            if (yych <= '[')
+                            {
+                                goto basic_json_parser_32;
+                            }
+                            goto basic_json_parser_30;
+                        }
+                        else
+                        {
+                            if (yych == 'b')
+                            {
+                                goto basic_json_parser_30;
+                            }
+                            goto basic_json_parser_32;
+                        }
+                    }
+                }
+                else
+                {
+                    if (yych <= 'q')
+                    {
+                        if (yych <= 'f')
+                        {
+                            goto basic_json_parser_30;
+                        }
+                        if (yych == 'n')
+                        {
+                            goto basic_json_parser_30;
+                        }
+                        goto basic_json_parser_32;
+                    }
+                    else
+                    {
+                        if (yych <= 's')
+                        {
+                            if (yych <= 'r')
+                            {
+                                goto basic_json_parser_30;
+                            }
+                            goto basic_json_parser_32;
+                        }
+                        else
+                        {
+                            if (yych <= 't')
+                            {
+                                goto basic_json_parser_30;
+                            }
+                            if (yych <= 'u')
+                            {
+                                goto basic_json_parser_36;
+                            }
+                            goto basic_json_parser_32;
+                        }
+                    }
+                }
+basic_json_parser_34:
+                ++m_cursor;
+                {
+                    return token_type::value_string;
+                }
+basic_json_parser_36:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= '@')
+                {
+                    if (yych <= '/')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych >= ':')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+                else
+                {
+                    if (yych <= 'F')
+                    {
+                        goto basic_json_parser_37;
+                    }
+                    if (yych <= '`')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych >= 'g')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+basic_json_parser_37:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= '@')
+                {
+                    if (yych <= '/')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych >= ':')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+                else
+                {
+                    if (yych <= 'F')
+                    {
+                        goto basic_json_parser_38;
+                    }
+                    if (yych <= '`')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych >= 'g')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+basic_json_parser_38:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= '@')
+                {
+                    if (yych <= '/')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych >= ':')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+                else
+                {
+                    if (yych <= 'F')
+                    {
+                        goto basic_json_parser_39;
+                    }
+                    if (yych <= '`')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych >= 'g')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+basic_json_parser_39:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= '@')
+                {
+                    if (yych <= '/')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych <= '9')
+                    {
+                        goto basic_json_parser_30;
+                    }
+                    goto basic_json_parser_32;
+                }
+                else
+                {
+                    if (yych <= 'F')
+                    {
+                        goto basic_json_parser_30;
+                    }
+                    if (yych <= '`')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych <= 'f')
+                    {
+                        goto basic_json_parser_30;
+                    }
+                    goto basic_json_parser_32;
+                }
+basic_json_parser_40:
+                yyaccept = 1;
+                m_marker = ++m_cursor;
+                if ((m_limit - m_cursor) < 3)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+basic_json_parser_41:
+                if (yybm[0 + yych] & 128)
+                {
+                    goto basic_json_parser_40;
+                }
+                if (yych <= 'D')
+                {
+                    if (yych != '.')
+                    {
+                        goto basic_json_parser_24;
+                    }
+                }
+                else
+                {
+                    if (yych <= 'E')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    if (yych == 'e')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    goto basic_json_parser_24;
+                }
+basic_json_parser_42:
+                yych = *++m_cursor;
+                if (yych <= '/')
+                {
+                    goto basic_json_parser_32;
+                }
+                if (yych <= '9')
+                {
+                    goto basic_json_parser_47;
+                }
+                goto basic_json_parser_32;
+basic_json_parser_43:
+                yych = *++m_cursor;
+                if (yych <= ',')
+                {
+                    if (yych != '+')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                }
+                else
+                {
+                    if (yych <= '-')
+                    {
+                        goto basic_json_parser_44;
+                    }
+                    if (yych <= '/')
+                    {
+                        goto basic_json_parser_32;
+                    }
+                    if (yych <= '9')
+                    {
+                        goto basic_json_parser_45;
+                    }
+                    goto basic_json_parser_32;
+                }
+basic_json_parser_44:
+                yych = *++m_cursor;
+                if (yych <= '/')
+                {
+                    goto basic_json_parser_32;
+                }
+                if (yych >= ':')
+                {
+                    goto basic_json_parser_32;
+                }
+basic_json_parser_45:
+                ++m_cursor;
+                if (m_limit <= m_cursor)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= '/')
+                {
+                    goto basic_json_parser_24;
+                }
+                if (yych <= '9')
+                {
+                    goto basic_json_parser_45;
+                }
+                goto basic_json_parser_24;
+basic_json_parser_47:
+                yyaccept = 1;
+                m_marker = ++m_cursor;
+                if ((m_limit - m_cursor) < 3)
+                {
+                    yyfill();    // LCOV_EXCL_LINE;
+                }
+                yych = *m_cursor;
+                if (yych <= 'D')
+                {
+                    if (yych <= '/')
+                    {
+                        goto basic_json_parser_24;
+                    }
+                    if (yych <= '9')
+                    {
+                        goto basic_json_parser_47;
+                    }
+                    goto basic_json_parser_24;
+                }
+                else
+                {
+                    if (yych <= 'E')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    if (yych == 'e')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    goto basic_json_parser_24;
+                }
+basic_json_parser_49:
+                yyaccept = 1;
+                yych = *(m_marker = ++m_cursor);
+                if (yych <= 'D')
+                {
+                    if (yych == '.')
+                    {
+                        goto basic_json_parser_42;
+                    }
+                    goto basic_json_parser_24;
+                }
+                else
+                {
+                    if (yych <= 'E')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    if (yych == 'e')
+                    {
+                        goto basic_json_parser_43;
+                    }
+                    goto basic_json_parser_24;
+                }
+basic_json_parser_50:
+                yych = *++m_cursor;
+                if (yych != 'l')
+                {
+                    goto basic_json_parser_32;
+                }
+                yych = *++m_cursor;
+                if (yych != 's')
+                {
+                    goto basic_json_parser_32;
+                }
+                yych = *++m_cursor;
+                if (yych != 'e')
+                {
+                    goto basic_json_parser_32;
+                }
+                ++m_cursor;
+                {
+                    return token_type::literal_false;
+                }
+basic_json_parser_55:
+                yych = *++m_cursor;
+                if (yych != 'u')
+                {
+                    goto basic_json_parser_32;
+                }
+                yych = *++m_cursor;
+                if (yych != 'e')
+                {
+                    goto basic_json_parser_32;
+                }
+                ++m_cursor;
+                {
+                    return token_type::literal_true;
+                }
+basic_json_parser_59:
+                yych = *++m_cursor;
+                if (yych != 'l')
+                {
+                    goto basic_json_parser_32;
+                }
+                yych = *++m_cursor;
+                if (yych != 'l')
+                {
+                    goto basic_json_parser_32;
+                }
+                ++m_cursor;
+                {
+                    return token_type::literal_null;
+                }
+            }
+
+
+        }
+
+        /// append data from the stream to the internal buffer
+        void yyfill() noexcept
+        {
+            if (not m_stream or not * m_stream)
+            {
+                return;
+            }
+
+            const ssize_t offset_start = m_start - m_content;
+            const ssize_t offset_marker = m_marker - m_start;
+            const ssize_t offset_cursor = m_cursor - m_start;
+
+            m_buffer.erase(0, static_cast<size_t>(offset_start));
+            std::string line;
+            std::getline(*m_stream, line);
+            m_buffer += "\n" + line; // add line with newline symbol
+
+            m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
+            m_start  = m_content;
+            m_marker = m_start + offset_marker;
+            m_cursor = m_start + offset_cursor;
+            m_limit  = m_start + m_buffer.size() - 1;
+        }
+
+        /// return string representation of last read token
+        string_t get_token() const noexcept
+        {
+            return string_t(reinterpret_cast<typename string_t::const_pointer>(m_start),
+                            static_cast<size_t>(m_cursor - m_start));
+        }
+
+        /*!
+        @brief return string value for string tokens
+
+        The function iterates the characters between the opening and closing
+        quotes of the string value. The complete string is the range
+        [m_start,m_cursor). Consequently, we iterate from m_start+1 to
+        m_cursor-1.
+
+        We differentiate two cases:
+
+        1. Escaped characters. In this case, a new character is constructed
+           according to the nature of the escape. Some escapes create new
+           characters (e.g., @c "\\n" is replaced by @c "\n"), some are copied
+           as is (e.g., @c "\\\\"). Furthermore, Unicode escapes of the shape
+           @c "\\uxxxx" need special care. In this case, to_unicode takes care
+           of the construction of the values.
+        2. Unescaped characters are copied as is.
+
+        @return string value of current token without opening and closing quotes
+        @throw std::out_of_range if to_unicode fails
+        */
+        string_t get_string() const
+        {
+            string_t result;
+            result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
+
+            // iterate the result between the quotes
+            for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i)
+            {
+                // process escaped characters
+                if (*i == '\\')
+                {
+                    // read next character
+                    ++i;
+
+                    switch (*i)
+                    {
+                        // the default escapes
+                        case 't':
+                        {
+                            result += "\t";
+                            break;
+                        }
+                        case 'b':
+                        {
+                            result += "\b";
+                            break;
+                        }
+                        case 'f':
+                        {
+                            result += "\f";
+                            break;
+                        }
+                        case 'n':
+                        {
+                            result += "\n";
+                            break;
+                        }
+                        case 'r':
+                        {
+                            result += "\r";
+                            break;
+                        }
+                        case '\\':
+                        {
+                            result += "\\";
+                            break;
+                        }
+                        case '/':
+                        {
+                            result += "/";
+                            break;
+                        }
+                        case '"':
+                        {
+                            result += "\"";
+                            break;
+                        }
+
+                        // unicode
+                        case 'u':
+                        {
+                            // get code xxxx from uxxxx
+                            auto codepoint = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>(i + 1),
+                                                          4).c_str(), nullptr, 16);
+
+                            // check if codepoint is a high surrogate
+                            if (codepoint >= 0xD800 and codepoint <= 0xDBFF)
+                            {
+                                // make sure there is a subsequent unicode
+                                if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u')
+                                {
+                                    throw std::invalid_argument("missing low surrogate");
+                                }
+
+                                // get code yyyy from uxxxx\uyyyy
+                                auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
+                                                               (i + 7), 4).c_str(), nullptr, 16);
+                                result += to_unicode(codepoint, codepoint2);
+                                // skip the next 11 characters (xxxx\uyyyy)
+                                i += 11;
+                            }
+                            else
+                            {
+                                // add unicode character(s)
+                                result += to_unicode(codepoint);
+                                // skip the next four characters (xxxx)
+                                i += 4;
+                            }
+                            break;
+                        }
+                    }
+                }
+                else
+                {
+                    // all other characters are just copied to the end of the
+                    // string
+                    result.append(1, static_cast<typename string_t::value_type>(*i));
+                }
+            }
+
+            return result;
+        }
+
+        /*!
+        @brief return number value for number tokens
+
+        This function translates the last token into a floating point number.
+        The pointer m_start points to the beginning of the parsed number. We
+        pass this pointer to std::strtod which sets endptr to the first
+        character past the converted number. If this pointer is not the same as
+        m_cursor, then either more or less characters have been used during the
+        comparison. This can happen for inputs like "01" which will be treated
+        like number 0 followed by number 1.
+
+        @return the result of the number conversion or NAN if the conversion
+        read past the current token. The latter case needs to be treated by the
+        caller function.
+
+        @throw std::range_error if passed value is out of range
+        */
+        long double get_number() const
+        {
+            // conversion
+            typename string_t::value_type* endptr;
+            const auto float_val = std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start),
+                                                &endptr);
+
+            // return float_val if the whole number was translated and NAN
+            // otherwise
+            return (reinterpret_cast<lexer_char_t*>(endptr) == m_cursor) ? float_val : NAN;
+        }
+
+      private:
+        /// optional input stream
+        std::istream* m_stream;
+        /// the buffer
+        string_t m_buffer;
+        /// the buffer pointer
+        const lexer_char_t* m_content = nullptr;
+        /// pointer to the beginning of the current symbol
+        const lexer_char_t* m_start = nullptr;
+        /// pointer for backtracking information
+        const lexer_char_t* m_marker = nullptr;
+        /// pointer to the current symbol
+        const lexer_char_t* m_cursor = nullptr;
+        /// pointer to the end of the buffer
+        const lexer_char_t* m_limit = nullptr;
+    };
+
+    /*!
+    @brief syntax analysis
+    */
+    class parser
+    {
+      public:
+        /// constructor for strings
+        parser(const string_t& s, parser_callback_t cb = nullptr)
+            : callback(cb), m_lexer(s)
+        {
+            // read first token
+            get_token();
+        }
+
+        /// a parser reading from an input stream
+        parser(std::istream& _is, parser_callback_t cb = nullptr)
+            : callback(cb), m_lexer(&_is)
+        {
+            // read first token
+            get_token();
+        }
+
+        /// public parser interface
+        basic_json parse()
+        {
+            basic_json result = parse_internal(true);
+
+            expect(lexer::token_type::end_of_input);
+
+            // return parser result and replace it with null in case the
+            // top-level value was discarded by the callback function
+            return result.is_discarded() ? basic_json() : result;
+        }
+
+      private:
+        /// the actual parser
+        basic_json parse_internal(bool keep)
+        {
+            auto result = basic_json(value_t::discarded);
+
+            switch (last_token)
+            {
+                case (lexer::token_type::begin_object):
+                {
+                    if (keep and (not callback or (keep = callback(depth++, parse_event_t::object_start, result))))
+                    {
+                        // explicitly set result to object to cope with {}
+                        result.m_type = value_t::object;
+                        result.m_value = json_value(value_t::object);
+                    }
+
+                    // read next token
+                    get_token();
+
+                    // closing } -> we are done
+                    if (last_token == lexer::token_type::end_object)
+                    {
+                        get_token();
+                        if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
+                        {
+                            result = basic_json(value_t::discarded);
+                        }
+                        return result;
+                    }
+
+                    // no comma is expected here
+                    unexpect(lexer::token_type::value_separator);
+
+                    // otherwise: parse key-value pairs
+                    do
+                    {
+                        // ugly, but could be fixed with loop reorganization
+                        if (last_token == lexer::token_type::value_separator)
+                        {
+                            get_token();
+                        }
+
+                        // store key
+                        expect(lexer::token_type::value_string);
+                        const auto key = m_lexer.get_string();
+
+                        bool keep_tag = false;
+                        if (keep)
+                        {
+                            if (callback)
+                            {
+                                basic_json k(key);
+                                keep_tag = callback(depth, parse_event_t::key, k);
+                            }
+                            else
+                            {
+                                keep_tag = true;
+                            }
+                        }
+
+                        // parse separator (:)
+                        get_token();
+                        expect(lexer::token_type::name_separator);
+
+                        // parse and add value
+                        get_token();
+                        auto value = parse_internal(keep);
+                        if (keep and keep_tag and not value.is_discarded())
+                        {
+                            result[key] = std::move(value);
+                        }
+                    }
+                    while (last_token == lexer::token_type::value_separator);
+
+                    // closing }
+                    expect(lexer::token_type::end_object);
+                    get_token();
+                    if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
+                    {
+                        result = basic_json(value_t::discarded);
+                    }
+
+                    return result;
+                }
+
+                case (lexer::token_type::begin_array):
+                {
+                    if (keep and (not callback or (keep = callback(depth++, parse_event_t::array_start, result))))
+                    {
+                        // explicitly set result to object to cope with []
+                        result.m_type = value_t::array;
+                        result.m_value = json_value(value_t::array);
+                    }
+
+                    // read next token
+                    get_token();
+
+                    // closing ] -> we are done
+                    if (last_token == lexer::token_type::end_array)
+                    {
+                        get_token();
+                        if (callback and not callback(--depth, parse_event_t::array_end, result))
+                        {
+                            result = basic_json(value_t::discarded);
+                        }
+                        return result;
+                    }
+
+                    // no comma is expected here
+                    unexpect(lexer::token_type::value_separator);
+
+                    // otherwise: parse values
+                    do
+                    {
+                        // ugly, but could be fixed with loop reorganization
+                        if (last_token == lexer::token_type::value_separator)
+                        {
+                            get_token();
+                        }
+
+                        // parse value
+                        auto value = parse_internal(keep);
+                        if (keep and not value.is_discarded())
+                        {
+                            result.push_back(std::move(value));
+                        }
+                    }
+                    while (last_token == lexer::token_type::value_separator);
+
+                    // closing ]
+                    expect(lexer::token_type::end_array);
+                    get_token();
+                    if (keep and callback and not callback(--depth, parse_event_t::array_end, result))
+                    {
+                        result = basic_json(value_t::discarded);
+                    }
+
+                    return result;
+                }
+
+                case (lexer::token_type::literal_null):
+                {
+                    get_token();
+                    result.m_type = value_t::null;
+                    break;
+                }
+
+                case (lexer::token_type::value_string):
+                {
+                    const auto s = m_lexer.get_string();
+                    get_token();
+                    result = basic_json(s);
+                    break;
+                }
+
+                case (lexer::token_type::literal_true):
+                {
+                    get_token();
+                    result.m_type = value_t::boolean;
+                    result.m_value = true;
+                    break;
+                }
+
+                case (lexer::token_type::literal_false):
+                {
+                    get_token();
+                    result.m_type = value_t::boolean;
+                    result.m_value = false;
+                    break;
+                }
+
+                case (lexer::token_type::value_number):
+                {
+                    auto float_val = m_lexer.get_number();
+
+                    // NAN is returned if token could not be translated
+                    // completely
+                    if (std::isnan(float_val))
+                    {
+                        throw std::invalid_argument(std::string("parse error - ") +
+                                                    m_lexer.get_token() + " is not a number");
+                    }
+
+                    get_token();
+
+                    // check if conversion loses precision
+                    const auto int_val = static_cast<number_integer_t>(float_val);
+                    if (approx(float_val, static_cast<long double>(int_val)))
+                    {
+                        // we basic_json not lose precision -> return int
+                        result.m_type = value_t::number_integer;
+                        result.m_value = int_val;
+                    }
+                    else
+                    {
+                        // we would lose precision -> returnfloat
+                        result.m_type = value_t::number_float;
+                        result.m_value = static_cast<number_float_t>(float_val);
+                    }
+                    break;
+                }
+
+                default:
+                {
+                    // the last token was unexpected
+                    unexpect(last_token);
+                }
+            }
+
+            if (keep and callback and not callback(depth, parse_event_t::value, result))
+            {
+                result = basic_json(value_t::discarded);
+            }
+            return result;
+        }
+
+        /// get next token from lexer
+        typename lexer::token_type get_token()
+        {
+            last_token = m_lexer.scan();
+            return last_token;
+        }
+
+        void expect(typename lexer::token_type t) const
+        {
+            if (t != last_token)
+            {
+                std::string error_msg = "parse error - unexpected \'";
+                error_msg += m_lexer.get_token();
+                error_msg += "\' (" + lexer::token_type_name(last_token);
+                error_msg += "); expected " + lexer::token_type_name(t);
+                throw std::invalid_argument(error_msg);
+            }
+        }
+
+        void unexpect(typename lexer::token_type t) const
+        {
+            if (t == last_token)
+            {
+                std::string error_msg = "parse error - unexpected \'";
+                error_msg += m_lexer.get_token();
+                error_msg += "\' (";
+                error_msg += lexer::token_type_name(last_token) + ")";
+                throw std::invalid_argument(error_msg);
+            }
+        }
+
+      private:
+        /// current level of recursion
+        int depth = 0;
+        /// callback function
+        parser_callback_t callback;
+        /// the type of the last read token
+        typename lexer::token_type last_token = lexer::token_type::uninitialized;
+        /// the lexer
+        lexer m_lexer;
+    };
+};
+
+
+/////////////
+// presets //
+/////////////
+
+/*!
+ at brief default JSON class
+
+This type is the default specialization of the @ref basic_json class which uses
+the standard template types.
+*/
+using json = basic_json<>;
+}
+
+
+/////////////////////////
+// nonmember functions //
+/////////////////////////
+
+// specialization of std::swap, and std::hash
+namespace std
+{
+/*!
+ at brief exchanges the values of two JSON objects
+*/
+template <>
+inline void swap(nlohmann::json& j1,
+                 nlohmann::json& j2) noexcept(
+                     is_nothrow_move_constructible<nlohmann::json>::value and
+                     is_nothrow_move_assignable<nlohmann::json>::value
+                 )
+{
+    j1.swap(j2);
+}
+
+/// hash value for JSON objects
+template <>
+struct hash<nlohmann::json>
+{
+    /// return a hash value for a JSON object
+    std::size_t operator()(const nlohmann::json& j) const
+    {
+        // a naive hashing via the string representation
+        const auto& h = hash<nlohmann::json::string_t>();
+        return h(j.dump());
+    }
+};
+}
+
+/*!
+ at brief user-defined string literal for JSON values
+
+This operator implements a user-defined string literal for JSON objects. It can
+be used by adding \p "_json" to a string literal and returns a JSON object if
+no parse error occurred.
+
+ at param[in] s  a string representation of a JSON object
+ at return a JSON object
+*/
+inline nlohmann::json operator "" _json(const char* s, std::size_t)
+{
+    return nlohmann::json::parse(reinterpret_cast<nlohmann::json::string_t::value_type*>
+                                 (const_cast<char*>(s)));
+}
+
+#endif
diff --git a/tools/pbindex/src/main.cpp b/tools/pbindexdump/src/main.cpp
similarity index 51%
copy from tools/pbindex/src/main.cpp
copy to tools/pbindexdump/src/main.cpp
index 59065fa..a6aefc6 100644
--- a/tools/pbindex/src/main.cpp
+++ b/tools/pbindexdump/src/main.cpp
@@ -35,32 +35,50 @@
 
 // Author: Derek Barnett
 
-#include "OptionParser.h"
-#include "PbIndex.h"
-#include "PbIndexVersion.h"
+#include "../common/OptionParser.h"
+#include "PbIndexDump.h"
+#include "PbIndexDumpVersion.h"
+#include "Settings.h"
 #include <cassert>
 #include <iostream>
 using namespace std;
 
 static
-pbindex::Settings fromCommandLine(optparse::OptionParser& parser,
-                                  int argc, char* argv[])
+pbindexdump::Settings fromCommandLine(optparse::OptionParser& parser,
+                                      int argc,
+                                      char* argv[])
 {
     const optparse::Values options = parser.parse_args(argc, argv);
-    (void)options;
+    pbindexdump::Settings settings;
 
-    pbindex::Settings settings;
-
-    // get input filename
+    // input
     const vector<string> positionalArgs = parser.args();
     const size_t numPositionalArgs = positionalArgs.size();
     if (numPositionalArgs == 0)
-        settings.errors_.push_back("pbindex requires an input BAM filename");
+        settings.inputPbiFilename_ = "-"; // stdin
     else if (numPositionalArgs == 1)
-        settings.inputBamFilename_ = parser.args().front();
+        settings.inputPbiFilename_ = parser.args().front();
     else {
         assert(numPositionalArgs > 1);
-        settings.errors_.push_back("pbindex does not support more than one input file per run");
+        settings.errors_.push_back("pbindexdump does not support more than one input file per run");
+    }
+
+    // output format
+    if (options.is_set("format"))
+        settings.format_ = options["format"];
+
+    // JSON options
+    if (settings.format_ == "json") {
+        if (options.is_set("json_indent_level"))
+            settings.jsonIndentLevel_ = options.get("json_indent_level");
+        if (options.is_set("json_raw"))
+            settings.jsonRaw_ = options.get("json_raw");
+    } else {
+        if (options.is_set("json_indent_level") ||
+            options.is_set("json_raw"))
+        {
+            settings.errors_.push_back("JSON formatting options not valid on non-JSON output");
+        }
     }
 
     return settings;
@@ -70,12 +88,10 @@ int main(int argc, char* argv[])
 {
     // setup help & options
     optparse::OptionParser parser;
-    parser.description("pbindex creates a index file that enables random-access to PacBio-specific data in BAM files. "
-                       "Generated index filename will be the same as input BAM plus .pbi suffix."
-                       );
-    parser.prog("pbindex");
-    parser.usage("pbindex <input>");
-    parser.version(pbindex::Version);
+    parser.description("pbindexdump prints a human-readable view of PBI data to stdout.");
+    parser.prog("pbindexdump");
+    parser.usage("pbindexdump [options] [input]");
+    parser.version(pbindexdump::Version);
     parser.add_version_option(true);
     parser.add_help_option(true);
 
@@ -83,11 +99,32 @@ int main(int argc, char* argv[])
     ioGroup.add_option("")
            .dest("input")
            .metavar("input")
-           .help("Input BAM file");
+           .help("Input PBI file. If not provided, stdin will be used as input.");
+    ioGroup.add_option("--format")
+           .dest("format")
+           .metavar("STRING")
+           .help("Output format, one of:\n"
+                 "    json, cpp\n\n"
+                 "json: pretty-printed JSON [default]\n\n"
+                 "cpp: copy/paste-able C++ code that can be used to construct the"
+                 " equivalent PacBio::BAM::PbiRawData object");
     parser.add_option_group(ioGroup);
 
+    auto jsonGroup = optparse::OptionGroup(parser, "JSON Formatting");
+    jsonGroup.add_option("--json-indent-level")
+             .dest("json_indent_level")
+             .metavar("INT")
+             .help("JSON indent level [4]");
+    jsonGroup.add_option("--json-raw")
+             .dest("json_raw")
+             .action("store_true")
+             .help("Prints fields in a manner that more closely reflects the PBI"
+                   " file format - presenting data as per-field columns, not"
+                   " per-record objects.");
+    parser.add_option_group(jsonGroup);
+
     // parse command line for settings
-    const pbindex::Settings settings = fromCommandLine(parser, argc, argv);
+    const pbindexdump::Settings settings = fromCommandLine(parser, argc, argv);
     if (!settings.errors_.empty()) {
         cerr << endl;
         for (const auto e : settings.errors_)
@@ -98,5 +135,12 @@ int main(int argc, char* argv[])
     }
 
     // run tool
-    return pbindex::PbIndex::Run(settings);
+    try {
+        pbindexdump::PbIndexDump::Run(settings);
+        return EXIT_SUCCESS;
+    }
+    catch (std::exception& e) {
+        cerr << "ERROR: " << e.what() << endl;
+        return EXIT_FAILURE;
+    }
 }
diff --git a/tools/pbmerge/CMakeLists.txt b/tools/pbmerge/CMakeLists.txt
new file mode 100644
index 0000000..c9728d7
--- /dev/null
+++ b/tools/pbmerge/CMakeLists.txt
@@ -0,0 +1,36 @@
+
+set(PbmergeSrcDir ${PacBioBAM_ToolsDir}/pbmerge/src)
+
+# create version header
+set(PbMerge_VERSION ${PacBioBAM_VERSION})
+configure_file(
+    ${PbmergeSrcDir}/PbMergeVersion.h.in PbMergeVersion.h @ONLY
+)
+
+# list source files
+set(PBMERGE_SOURCES
+    ${ToolsCommonDir}/BamFileMerger.h
+    ${ToolsCommonDir}/OptionParser.cpp
+    ${PbmergeSrcDir}/main.cpp
+)
+
+# build pbmerge executable
+include(PbbamTool)
+create_pbbam_tool(
+    TARGET  pbmerge
+    SOURCES ${PBMERGE_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+    add_test(
+        NAME pbmerge_CramTests
+        WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+        COMMAND "python" cram.py
+            ${PacBioBAM_CramTestsDir}/pbmerge_pacbio_ordering.t
+            ${PacBioBAM_CramTestsDir}/pbmerge_aligned_ordering.t
+            ${PacBioBAM_CramTestsDir}/pbmerge_mixed_ordering.t
+            ${PacBioBAM_CramTestsDir}/pbmerge_dataset.t
+            ${PacBioBAM_CramTestsDir}/pbmerge_fofn.t
+    )
+endif()
diff --git a/src/Config.cpp b/tools/pbmerge/src/PbMergeVersion.h.in
similarity index 86%
copy from src/Config.cpp
copy to tools/pbmerge/src/PbMergeVersion.h.in
index 677ad08..2bda4f0 100644
--- a/src/Config.cpp
+++ b/tools/pbmerge/src/PbMergeVersion.h.in
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
 //
 // All rights reserved.
 //
@@ -35,14 +35,15 @@
 
 // Author: Derek Barnett
 
-#include "pbbam/Config.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
+#ifndef PBMERGEVERSION_H
+#define PBMERGEVERSION_H
 
-namespace PacBio {
-namespace BAM {
+#include <string>
 
-int HtslibVerbosity = 0;
+namespace pbmerge {
 
-} // namespace BAM
-} // namespace PacBio
+const std::string Version = std::string("@PbMerge_VERSION@");
+
+} // namespace pbmerge
+
+#endif // PBMERGEVERSION_H
diff --git a/tools/pbmerge/src/main.cpp b/tools/pbmerge/src/main.cpp
new file mode 100644
index 0000000..3056dc1
--- /dev/null
+++ b/tools/pbmerge/src/main.cpp
@@ -0,0 +1,174 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//  * Neither the name of Pacific Biosciences nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../common/OptionParser.h"
+#include "../common/BamFileMerger.h"
+#include "PbMergeVersion.h"
+#include <cassert>
+#include <iostream>
+using namespace std;
+
+namespace pbmerge {
+
+class Settings
+{
+public:
+    static Settings FromCommandLine(optparse::OptionParser& parser,
+                                    int argc, char* argv[])
+    {
+        pbmerge::Settings settings;
+        const optparse::Values options = parser.parse_args(argc, argv);
+
+        // input
+        const vector<string> positionalArgs = parser.args();
+        if (positionalArgs.empty())
+            settings.errors_.push_back("at least input one file must be specified");
+        else
+            settings.inputFilenames_ = positionalArgs;
+
+        // output
+        if (options.is_set("output"))
+            settings.outputFilename_ = options["output"];
+        else
+            settings.outputFilename_ = "-"; // stdout
+
+        // PBI?
+        if (settings.outputFilename_ == "-")
+            settings.createPbi_ = false; // always skip PBI if writing to stdout
+        else {
+            if (options.is_set("no_pbi"))
+                settings.createPbi_ = !options.get("no_pbi"); // user-disabled
+            else
+                settings.createPbi_ = true; // not specified, go ahead and generate by default
+        }
+
+        return settings;
+    }
+
+public:
+    std::vector<std::string> inputFilenames_;
+    std::string outputFilename_;
+    bool createPbi_;
+    std::vector<std::string> errors_;
+
+private:
+    Settings(void) { }
+};
+
+} // namespace pbmerge
+
+int main(int argc, char* argv[])
+{
+    // setup help & options
+    optparse::OptionParser parser;
+    parser.description("pbmerge merges PacBio BAM files. If the input is DataSetXML, "
+                       "any filters will be applied. If no output filename is specified, "
+                       "new BAM will be written to stdout."
+                       );
+    parser.prog("pbmerge");
+    parser.usage("pbmerge [options] [-o <out.bam>] <INPUT>");
+    parser.version(pbmerge::Version);
+    parser.add_version_option(true);
+    parser.add_help_option(true);
+
+    auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
+    ioGroup.add_option("-o")
+           .dest("output")
+           .metavar("output")
+           .help("Output BAM filename. ");
+    ioGroup.add_option("--no-pbi")
+            .dest("no_pbi")
+            .action("store_true")
+            .help("Set this option to skip PBI index file creation. PBI creation is "
+                  "automatically skipped if no output filename is provided."
+                  );
+    ioGroup.add_option("")
+           .dest("input")
+           .metavar("INPUT")
+           .help("Input may be one of:\n"
+                 "    DataSetXML, list of BAM files, or FOFN\n\n"
+                 "    fofn: pbmerge -o merged.bam bams.fofn\n\n"
+                 "    bams: pbmerge -o merged.bam 1.bam 2.bam 3.bam\n\n"
+                 "    xml:  pbmerge -o merged.bam foo.subreadset.xml\n\n"
+                 );
+    parser.add_option_group(ioGroup);
+
+    // parse command line for settings
+    const pbmerge::Settings settings = pbmerge::Settings::FromCommandLine(parser, argc, argv);
+    if (!settings.errors_.empty()) {
+        cerr << endl;
+        for (const auto e : settings.errors_)
+            cerr << "ERROR: " << e << endl;
+        cerr << endl;
+        parser.print_help();
+        return EXIT_FAILURE;
+    }
+
+    // run tool
+    try {
+        // setup our @PG entry to add to header
+        PacBio::BAM::ProgramInfo mergeProgram;
+        mergeProgram.Id(string("pbmerge-")+pbmerge::Version)
+                    .Name("pbmerge")
+                    .Version(pbmerge::Version);
+
+        PacBio::BAM::DataSet dataset;
+        if (settings.inputFilenames_.size() == 1)
+            dataset = PacBio::BAM::DataSet(settings.inputFilenames_.front());
+        else
+            dataset = PacBio::BAM::DataSet(settings.inputFilenames_);
+
+
+        PacBio::BAM::common::BamFileMerger::Merge(dataset,
+                                                  settings.outputFilename_,
+                                                  mergeProgram,
+                                                  settings.createPbi_);
+
+
+//        PacBio::BAM::common::BamFileMerger merger(dataset,
+//                                                  settings.outputFilename_,
+//                                                  mergeProgram,
+//                                                  settings.createPbi_);
+//        merger.Merge();
+
+        return EXIT_SUCCESS;
+    }
+    catch (std::exception& e) {
+        cerr << "ERROR: " << e.what() << endl;
+        return EXIT_FAILURE;
+    }
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbbam.git