[med-svn] [pbbam] 01/12: Imported Upstream version 0.5.0
Afif Elghraoui
afif at moszumanska.debian.org
Tue Jul 5 03:24:42 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository pbbam.
commit ce586756f62774b0f76bea862cf29431444e01aa
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Mon Jul 4 15:18:40 2016 -0700
Imported Upstream version 0.5.0
---
.travis.yml | 64 +
CHANGELOG.md | 248 +-
CMakeLists.txt | 80 +-
INSTALL.md | 115 +-
LICENSE.txt | 34 +
README.md | 29 +
cmake/FindCSharp.cmake | 72 +
cmake/FindDotNetFrameworkSdk.cmake | 29 +
cmake/FindMono.cmake | 167 +
cmake/FindR.cmake | 48 +
cmake/PbbamTool.cmake | 23 +
cmake/UseCSharp.cmake | 111 +
cmake/UseDotNetFrameworkSdk.cmake | 16 +
cmake/UseMono.cmake | 16 +
docs/Doxyfile.in | 24 +-
docs/Makefile | 168 +
docs/examples/code/BarcodeQuery.txt | 17 +
docs/examples/code/Compare.txt | 3 +
docs/examples/code/Compare_AlignedEnd.txt | 2 +
docs/examples/code/Compare_AlignedStart.txt | 2 +
docs/examples/code/Compare_AlignedStrand.txt | 2 +
docs/examples/code/Compare_BarcodeForward.txt | 2 +
docs/examples/code/Compare_BarcodeQuality.txt | 2 +
docs/examples/code/Compare_BarcodeReverse.txt | 2 +
docs/examples/code/Compare_FullName.txt | 2 +
docs/examples/code/Compare_LocalContextFlag.txt | 2 +
docs/examples/code/Compare_MapQuality.txt | 2 +
docs/examples/code/Compare_MovieName.txt | 2 +
docs/examples/code/Compare_NumDeletedBases.txt | 2 +
docs/examples/code/Compare_NumInsertedBases.txt | 2 +
docs/examples/code/Compare_NumMatches.txt | 2 +
docs/examples/code/Compare_NumMismatches.txt | 2 +
docs/examples/code/Compare_QueryEnd.txt | 2 +
docs/examples/code/Compare_QueryStart.txt | 2 +
docs/examples/code/Compare_ReadAccuracy.txt | 2 +
docs/examples/code/Compare_ReadGroupId.txt | 2 +
docs/examples/code/Compare_ReadGroupNumericId.txt | 2 +
docs/examples/code/Compare_ReferenceEnd.txt | 2 +
docs/examples/code/Compare_ReferenceId.txt | 2 +
docs/examples/code/Compare_ReferenceName.txt | 2 +
docs/examples/code/Compare_ReferenceStart.txt | 2 +
docs/examples/code/Compare_TypeFromOperator.txt | 2 +
docs/examples/code/Compare_TypeToName.txt | 2 +
docs/examples/code/Compare_Zmw.txt | 2 +
docs/examples/code/EntireFileQuery.txt | 15 +
docs/examples/code/EntireFileQuery_BamFilename.txt | 4 +
docs/examples/code/EntireFileQuery_NonConst.txt | 4 +
docs/examples/code/GenomicIntervalQuery.txt | 16 +
docs/examples/code/GenomicIntervalQuery_Reuse.txt | 8 +
docs/examples/code/PbiAlignedEndFilter.txt | 4 +
docs/examples/code/PbiAlignedLengthFilter.txt | 4 +
docs/examples/code/PbiAlignedStartFilter.txt | 4 +
docs/examples/code/PbiAlignedStrandFilter.txt | 5 +
docs/examples/code/PbiBarcodeFilter.txt | 17 +
docs/examples/code/PbiBarcodeForwardFilter.txt | 15 +
docs/examples/code/PbiBarcodeQualityFilter.txt | 5 +
docs/examples/code/PbiBarcodeReverseFilter.txt | 15 +
docs/examples/code/PbiBarcodesFilter.txt | 6 +
docs/examples/code/PbiBuilder_WithReader.txt | 30 +
docs/examples/code/PbiBuilder_WithWriter.txt | 12 +
docs/examples/code/PbiFilterQuery.txt | 22 +
docs/examples/code/PbiFilter_Composition.txt | 8 +
docs/examples/code/PbiFilter_CustomFilter.txt | 21 +
docs/examples/code/PbiFilter_Interface.txt | 1 +
docs/examples/code/PbiFilter_Intersection_Copy.txt | 3 +
docs/examples/code/PbiFilter_Intersection_Move.txt | 3 +
docs/examples/code/PbiFilter_Union_Copy.txt | 3 +
docs/examples/code/PbiFilter_Union_Move.txt | 3 +
docs/examples/code/PbiIdentityFilter.txt | 6 +
docs/examples/code/PbiLocalContextFilter.txt | 22 +
docs/examples/code/PbiMapQualityFilter.txt | 5 +
docs/examples/code/PbiMovieNameFilter.txt | 14 +
docs/examples/code/PbiNumDeletedBasesFilter.txt | 6 +
docs/examples/code/PbiNumInsertedBasesFilter.txt | 6 +
docs/examples/code/PbiNumMatchesFilter.txt | 6 +
docs/examples/code/PbiNumMismatchesFilter.txt | 6 +
docs/examples/code/PbiQueryEndFilter.txt | 5 +
docs/examples/code/PbiQueryLengthFilter.txt | 5 +
docs/examples/code/PbiQueryNameFilter.txt | 15 +
docs/examples/code/PbiQueryStartFilter.txt | 5 +
docs/examples/code/PbiReadAccuracyFilter.txt | 5 +
docs/examples/code/PbiReadGroupFilter.txt | 64 +
docs/examples/code/PbiReferenceEndFilter.txt | 5 +
docs/examples/code/PbiReferenceIdFilter.txt | 16 +
docs/examples/code/PbiReferenceNameFilter.txt | 15 +
docs/examples/code/PbiReferenceStartFilter.txt | 5 +
docs/examples/code/PbiZmwFilter.txt | 16 +
docs/examples/code/ReadAccuracyQuery.txt | 15 +
docs/examples/code/SubreadLengthQuery.txt | 15 +
docs/examples/code/Tag_AsciiCtor.txt | 10 +
docs/examples/code/ZmwGroupQuery.txt | 23 +
docs/examples/code/ZmwQuery.txt | 6 +
docs/examples/code/ZmwWhitelistVirtualReader.txt | 6 +
docs/examples/plaintext/AlignmentPrinterOutput.txt | 13 +
.../plaintext/PbiFilter_DataSetXmlFilters.txt | 14 +
docs/source/api/Accuracy.rst | 11 +
docs/source/api/AlignmentPrinter.rst | 11 +
docs/source/api/AlignmentSet.rst | 11 +
docs/source/api/BaiIndexedBamReader.rst | 11 +
docs/source/api/BamFile.rst | 11 +
docs/source/api/BamHeader.rst | 11 +
docs/source/api/BamReader.rst | 11 +
docs/source/api/BamRecord.rst | 17 +
docs/source/api/BamRecordBuilder.rst | 11 +
docs/source/api/BamRecordImpl.rst | 11 +
docs/source/api/BamRecordView.rst | 11 +
docs/source/api/BamTagCodec.rst | 11 +
docs/source/api/BamWriter.rst | 11 +
docs/source/api/BarcodeLookupData.rst | 11 +
docs/source/api/BarcodeQuery.rst | 11 +
docs/source/api/BarcodeSet.rst | 11 +
docs/source/api/BasicLookupData.rst | 11 +
docs/source/api/Cigar.rst | 11 +
docs/source/api/CigarOperation.rst | 13 +
docs/source/api/Compare.rst | 8 +
docs/source/api/Config.rst | 8 +
docs/source/api/ConsensusAlignmentSet.rst | 11 +
docs/source/api/ConsensusReadSet.rst | 11 +
docs/source/api/ContigSet.rst | 11 +
docs/source/api/DataSet.rst | 11 +
docs/source/api/DataSetBase.rst | 11 +
docs/source/api/DataSetMetadata.rst | 11 +
docs/source/api/EntireFileQuery.rst | 11 +
docs/source/api/ExtensionElement.rst | 11 +
docs/source/api/Extensions.rst | 11 +
docs/source/api/ExternalResource.rst | 11 +
docs/source/api/ExternalResources.rst | 11 +
docs/source/api/FileIndex.rst | 11 +
docs/source/api/FileIndices.rst | 11 +
docs/source/api/Filter.rst | 11 +
docs/source/api/Filters.rst | 11 +
docs/source/api/Frames.rst | 11 +
docs/source/api/GenomicInterval.rst | 11 +
.../api/GenomicIntervalCompositeBamReader.rst | 11 +
docs/source/api/GenomicIntervalQuery.rst | 11 +
docs/source/api/HdfSubreadSet.rst | 11 +
docs/source/api/IndexResultBlock.rst | 17 +
docs/source/api/IndexedFastaReader.rst | 11 +
docs/source/api/Interval.rst | 11 +
.../api/InvalidSequencingChemistryException.rst | 11 +
docs/source/api/LocalContextFlags.rst | 8 +
docs/source/api/MappedLookupData.rst | 11 +
docs/source/api/NamespaceInfo.rst | 11 +
docs/source/api/NamespaceRegistry.rst | 11 +
docs/source/api/OrderedLookup.rst | 11 +
docs/source/api/Orientation.rst | 8 +
docs/source/api/ParentTool.rst | 11 +
docs/source/api/PbiBuilder.rst | 11 +
docs/source/api/PbiFile.rst | 14 +
docs/source/api/PbiFilter.rst | 11 +
docs/source/api/PbiFilterCompositeBamReader.rst | 11 +
docs/source/api/PbiFilterQuery.rst | 11 +
docs/source/api/PbiFilterTypes.rst | 8 +
docs/source/api/PbiIndex.rst | 11 +
docs/source/api/PbiIndexedBamReader.rst | 11 +
docs/source/api/PbiRawBarcodeData.rst | 11 +
docs/source/api/PbiRawBasicData.rst | 11 +
docs/source/api/PbiRawData.rst | 11 +
docs/source/api/PbiRawMappedData.rst | 11 +
docs/source/api/PbiRawReferenceData.rst | 11 +
docs/source/api/PbiReferenceEntry.rst | 11 +
docs/source/api/Position.rst | 10 +
docs/source/api/ProgramInfo.rst | 11 +
docs/source/api/QNameQuery.rst | 11 +
docs/source/api/QualityValue.rst | 11 +
docs/source/api/QualityValues.rst | 11 +
docs/source/api/ReadAccuracyQuery.rst | 11 +
docs/source/api/ReadGroupInfo.rst | 21 +
docs/source/api/ReferenceLookupData.rst | 11 +
docs/source/api/ReferenceSet.rst | 11 +
docs/source/api/SamTagCodec.rst | 11 +
docs/source/api/SequenceInfo.rst | 11 +
docs/source/api/SequentialCompositeBamReader.rst | 11 +
docs/source/api/Strand.rst | 8 +
docs/source/api/SubDataSets.rst | 11 +
docs/source/api/SubreadLengthQuery.rst | 11 +
docs/source/api/SubreadSet.rst | 11 +
docs/source/api/Tag.rst | 15 +
docs/source/api/TagCollection.rst | 11 +
docs/source/api/UnorderedLookup.rst | 11 +
docs/source/api/VirtualPolymeraseBamRecord.rst | 11 +
.../api/VirtualPolymeraseCompositeReader.rst | 11 +
docs/source/api/VirtualPolymeraseReader.rst | 11 +
docs/source/api/VirtualRegion.rst | 11 +
docs/source/api/VirtualRegionType.rst | 8 +
docs/source/api/VirtualRegionTypeMap.rst | 11 +
docs/source/api/ZmwGroupQuery.rst | 11 +
docs/source/api/ZmwQuery.rst | 11 +
docs/source/api/ZmwWhitelistVirtualReader.rst | 11 +
docs/source/api_reference.rst | 12 +
docs/source/commandline_utilities.rst | 15 +
docs/source/conf.py | 332 +
docs/source/getting_started.rst | 144 +
docs/source/index.rst | 33 +
docs/source/pacbio-theme/static/headerGradient.jpg | Bin 0 -> 7099 bytes
docs/source/pacbio-theme/static/pacbio.css | 238 +
docs/source/pacbio-theme/static/pacbioLogo.png | Bin 0 -> 3128 bytes
docs/source/pacbio-theme/static/pygments.css | 55 +
docs/source/pacbio-theme/theme.conf | 4 +
docs/source/requirements.txt | 1 +
docs/source/swig_bindings.rst | 257 +
docs/source/tools/bam2sam.rst | 21 +
docs/source/tools/pbindex.rst | 18 +
docs/source/tools/pbindexdump.rst | 233 +
docs/source/tools/pbmerge.rst | 30 +
docs/specs/pbbam.rst | 631 ++
include/pbbam/Accuracy.h | 51 +-
include/pbbam/AlignmentPrinter.h | 55 +-
include/pbbam/BaiIndexedBamReader.h | 130 +
include/pbbam/BamFile.h | 105 +-
include/pbbam/BamHeader.h | 240 +-
include/pbbam/BamReader.h | 157 +-
include/pbbam/BamRecord.h | 1312 ++--
include/pbbam/BamRecordBuilder.h | 198 +-
include/pbbam/BamRecordImpl.h | 468 +-
include/pbbam/BamTagCodec.h | 73 +-
include/pbbam/BamWriter.h | 95 +-
include/pbbam/{ZmwQuery.h => BarcodeQuery.h} | 55 +-
include/pbbam/Cigar.h | 66 +-
include/pbbam/CigarOperation.h | 90 +-
include/pbbam/Compare.h | 430 ++
include/pbbam/CompositeBamReader.h | 269 +
include/pbbam/Config.h | 111 +-
include/pbbam/DataSet.h | 620 +-
include/pbbam/DataSetTypes.h | 601 +-
include/pbbam/DataSetXsd.h | 54 +-
include/pbbam/EntireFileQuery.h | 49 +-
include/pbbam/Frames.h | 86 +-
include/pbbam/GenomicInterval.h | 144 +-
include/pbbam/GenomicIntervalQuery.h | 62 +-
include/pbbam/GroupQuery.h | 88 -
include/pbbam/GroupQueryBase.h | 214 -
include/pbbam/IndexedFastaReader.h | 120 +-
include/pbbam/Interval.h | 149 +-
include/pbbam/LocalContextFlags.h | 25 +-
include/pbbam/Orientation.h | 20 +-
include/pbbam/PbiBasicTypes.h | 108 +
include/pbbam/PbiBuilder.h | 143 +-
include/pbbam/PbiFile.h | 36 +-
include/pbbam/PbiFilter.h | 343 +
.../pbbam/{ZmwGroupQuery.h => PbiFilterQuery.h} | 55 +-
include/pbbam/PbiFilterTypes.h | 1028 +++
include/pbbam/PbiIndex.h | 277 +-
include/pbbam/PbiIndexedBamReader.h | 174 +
include/pbbam/PbiLookupData.h | 718 ++
include/pbbam/PbiRawData.h | 398 +-
include/pbbam/Position.h | 13 +-
include/pbbam/ProgramInfo.h | 166 +-
include/pbbam/{BamReader.h => QNameQuery.h} | 96 +-
include/pbbam/QualityValue.h | 63 +-
include/pbbam/QualityValues.h | 203 +-
include/pbbam/QueryBase.h | 241 -
include/pbbam/{ZmwQuery.h => ReadAccuracyQuery.h} | 62 +-
include/pbbam/ReadGroupInfo.h | 571 +-
include/pbbam/SamTagCodec.h | 26 +-
include/pbbam/SequenceInfo.h | 174 +-
include/pbbam/Strand.h | 13 +-
.../{ZmwGroupQuery.h => SubreadLengthQuery.h} | 58 +-
include/pbbam/Tag.h | 175 +-
include/pbbam/TagCollection.h | 12 +-
include/pbbam/ZmwGroupQuery.h | 43 +-
include/pbbam/ZmwQuery.h | 42 +-
.../{virtual/VirtualRegionType.h => ZmwType.h} | 29 +-
include/pbbam/{TagCollection.h => ZmwTypeMap.h} | 25 +-
.../InvalidSequencingChemistryException.h} | 96 +-
.../pbbam/{Position.h => internal/Accuracy.inl} | 35 +-
include/pbbam/internal/BamHeader.inl | 154 +
include/pbbam/internal/BamRecord.inl | 166 +
.../BamRecordBuilder.inl} | 93 +-
include/pbbam/internal/BamRecordImpl.inl | 216 +
include/pbbam/internal/BamRecordSort.h | 138 -
.../pbbam/internal/Cigar.inl | 57 +-
.../CigarOperation.inl} | 127 +-
.../{SequentialMergeStrategy.h => Compare.inl} | 55 +-
include/pbbam/internal/CompositeBamReader.inl | 397 ++
include/pbbam/internal/DataSet.inl | 6 +-
include/pbbam/internal/DataSetBaseTypes.h | 11 +-
include/pbbam/internal/DataSetElement.h | 8 +-
include/pbbam/internal/DataSetElement.inl | 40 +-
include/pbbam/internal/DataSetTypes.inl | 6 +-
include/pbbam/{Accuracy.h => internal/Frames.inl} | 85 +-
.../GenomicInterval.inl} | 67 +-
include/pbbam/internal/IMergeStrategy.h | 67 -
.../pbbam/{Interval.h => internal/Interval.inl} | 116 +-
include/pbbam/internal/MergeStrategy.h | 239 -
.../pbbam/internal/PbiBasicTypes.inl | 56 +-
include/pbbam/internal/PbiFilter.inl | 312 +
include/pbbam/internal/PbiFilterTypes.inl | 553 ++
include/pbbam/internal/PbiIndex.inl | 165 +
include/pbbam/internal/PbiIndex_p.h | 931 ---
include/pbbam/internal/PbiIndex_p.inl | 927 ---
include/pbbam/internal/PbiLookupData.inl | 531 ++
include/pbbam/internal/PbiRawData.inl | 113 +
.../{ProgramInfo.h => internal/ProgramInfo.inl} | 105 +-
.../internal/{MergeItem.h => QualityValue.inl} | 52 +-
.../QualityValues.inl} | 99 +-
include/pbbam/internal/QueryBase.h | 152 +-
include/pbbam/internal/QueryBase.inl | 177 +
.../ReadGroupInfo.inl} | 286 +-
.../{SequenceInfo.h => internal/SequenceInfo.inl} | 111 +-
include/pbbam/internal/Tag.inl | 19 +-
include/pbbam/virtual/VirtualPolymeraseBamRecord.h | 109 +-
.../virtual/VirtualPolymeraseCompositeReader.h | 111 +
include/pbbam/virtual/VirtualPolymeraseReader.h | 96 +-
include/pbbam/virtual/VirtualRegion.h | 89 +-
include/pbbam/virtual/VirtualRegionType.h | 23 +-
include/pbbam/virtual/VirtualRegionTypeMap.h | 11 +-
include/pbbam/virtual/ZmwWhitelistVirtualReader.h | 151 +
src/Accuracy.cpp | 10 +-
src/AlignmentPrinter.cpp | 15 +-
src/BaiIndexedBamReader.cpp | 141 +
src/BamFile.cpp | 110 +-
src/BamHeader.cpp | 289 +-
src/BamReader.cpp | 189 +
src/BamRecord.cpp | 408 +-
src/BamRecordImpl.cpp | 240 +-
src/BamTagCodec.cpp | 76 +-
src/BamWriter.cpp | 80 +-
.../src/test_TimeUtils.cpp => src/BarcodeQuery.cpp | 41 +-
.../IBamFileIterator.h => src/ChemistryTable.cpp | 50 +-
.../pbbam/Orientation.h => src/ChemistryTable.h | 20 +-
src/Cigar.cpp | 8 +-
src/CigarOperation.cpp | 6 +-
src/Compare.cpp | 141 +
src/Config.cpp | 16 +-
src/DataSet.cpp | 143 +-
src/DataSetBaseTypes.cpp | 48 +-
src/DataSetElement.cpp | 4 +-
src/DataSetIO.cpp | 7 +-
src/DataSetTypes.cpp | 110 +-
src/DataSetUtils.h | 12 +
src/DataSetXsd.cpp | 177 +-
src/EntireFileQuery.cpp | 73 +-
src/FileUtils.cpp | 246 +
src/FileUtils.h | 145 +-
src/Frames.cpp | 10 +-
src/GenomicInterval.cpp | 7 +-
src/GenomicIntervalQuery.cpp | 113 +-
src/GroupQuery.cpp | 91 -
src/IndexedFastaReader.cpp | 37 +-
src/MemoryUtils.h | 9 +
src/PbiBuilder.cpp | 225 +-
src/PbiFile.cpp | 49 +-
src/PbiFilter.cpp | 249 +
src/{FilterEngine.cpp => PbiFilterQuery.cpp} | 56 +-
src/PbiFilterTypes.cpp | 313 +
src/PbiIndex.cpp | 143 +-
src/PbiIndexIO.cpp | 111 +-
src/PbiIndexIO.h | 8 +-
src/PbiIndexedBamReader.cpp | 187 +
src/PbiRawData.cpp | 215 +-
src/ProgramInfo.cpp | 6 +-
src/{FilterEngine.cpp => QNameQuery.cpp} | 88 +-
src/QualityValue.cpp | 6 +-
.../ReadAccuracyQuery.cpp | 44 +-
src/ReadGroupInfo.cpp | 503 +-
src/SamTagCodec.cpp | 26 +-
src/SequenceInfo.cpp | 6 +-
.../SubreadLengthQuery.cpp | 44 +-
src/Tag.cpp | 25 +-
src/TagCollection.cpp | 6 +-
src/TimeUtils.h | 19 +
src/VirtualPolymeraseBamRecord.cpp | 149 +-
src/VirtualPolymeraseCompositeReader.cpp | 146 +
src/VirtualPolymeraseReader.cpp | 286 +-
src/VirtualRegionTypeMap.cpp | 17 +-
src/XmlReader.cpp | 2 +-
src/XmlWriter.cpp | 83 +-
src/ZmwGroupQuery.cpp | 133 +-
src/ZmwQuery.cpp | 111 +-
src/{VirtualRegionTypeMap.cpp => ZmwTypeMap.cpp} | 20 +-
...aseReader.cpp => ZmwWhitelistVirtualReader.cpp} | 102 +-
src/files.cmake | 89 +-
src/swig/Accuracy.i | 4 +-
src/swig/BamFile.i | 13 +-
src/swig/BamHeader.i | 7 -
src/swig/BamRecord.i | 8 +-
src/swig/BamRecordBuilder.i | 6 +
src/swig/BamRecordImpl.i | 4 +-
src/swig/BamWriter.i | 5 +-
src/swig/CigarOperation.i | 2 -
src/swig/DataSet.i | 34 +-
src/swig/DataSetTypes.i | 59 +
src/swig/EntireFileQuery.i | 4 +-
src/swig/GenomicInterval.i | 4 +-
src/swig/GenomicIntervalQuery.i | 6 +-
src/swig/LocalContextFlags.i | 4 +
src/swig/PacBioBam.i | 43 +-
src/swig/PbiRawData.i | 12 +-
src/swig/Tag.i | 174 +-
src/swig/VirtualPolymeraseBamRecord.i | 24 +
src/swig/VirtualPolymeraseReader.i | 11 +
src/swig/VirtualRegion.i | 18 +
src/swig/ZmwWhitelistVirtualReader.i | 11 +
tests/CMakeLists.txt | 15 +-
tests/data/chunking/chunking.subreadset.xml | 65 +
.../chunking/chunking_emptyfilters.subreadset.xml | 59 +
.../chunking_missingfilters.subreadset.xml | 58 +
...20800000001823174110291514_s1_p0.1.subreads.bam | Bin 0 -> 1090276 bytes
...0000001823174110291514_s1_p0.1.subreads.bam.pbi | Bin 0 -> 4163 bytes
...20800000001823174110291514_s1_p0.2.subreads.bam | Bin 0 -> 980379 bytes
...0000001823174110291514_s1_p0.2.subreads.bam.pbi | Bin 0 -> 3933 bytes
...20800000001823174110291514_s1_p0.3.subreads.bam | Bin 0 -> 973029 bytes
...0000001823174110291514_s1_p0.3.subreads.bam.pbi | Bin 0 -> 3698 bytes
tests/data/dataset/ali1.xml | 2 +-
tests/data/dataset/ali2.xml | 2 +-
tests/data/dataset/ali3.xml | 2 +-
tests/data/dataset/ali4.xml | 2 +-
tests/data/dataset/bam_mapping.bam | Bin 169668 -> 169668 bytes
tests/data/dataset/bam_mapping.bam.pbi | Bin 2452 -> 2469 bytes
tests/data/dataset/bam_mapping_1.bam | Bin 167530 -> 167530 bytes
tests/data/dataset/bam_mapping_1.bam.pbi | Bin 2437 -> 2448 bytes
tests/data/dataset/bam_mapping_2.bam | Bin 165778 -> 165778 bytes
tests/data/dataset/bam_mapping_2.bam.pbi | Bin 2422 -> 2435 bytes
tests/data/dataset/bam_mapping_new.bam | Bin 0 -> 22428 bytes
tests/data/dataset/bam_mapping_new.bam.pbi | Bin 0 -> 362 bytes
tests/data/dataset/bam_mapping_staggered.xml | 2 +-
tests/data/dataset/barcode.dataset.xml | 4 +-
tests/data/dataset/ccsread.dataset.xml | 2 +-
tests/data/dataset/contig.dataset.xml | 30 +-
tests/data/dataset/hdfsubread_dataset.xml | 2 +-
tests/data/dataset/lambda_contigs.xml | 6 +-
tests/data/dataset/malformed.xml | 84 +
tests/data/dataset/merge.fofn | 2 +
.../pbalchemy10kbp.pbalign.sorted.pbver1.bam | Bin 35251 -> 35235 bytes
.../pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai | Bin 632 -> 632 bytes
tests/data/dataset/pbalchemy10kbp.xml | 2 +-
tests/data/dataset/reference.dataset.xml | 2 +-
tests/data/dataset/subread_dataset1.xml | 2 +-
tests/data/dataset/subread_dataset2.xml | 2 +-
tests/data/dataset/subread_dataset3.xml | 2 +-
.../dataset/transformed_rs_subread_dataset.xml | 25 +-
tests/data/ex2.bam | Bin 125999 -> 126008 bytes
tests/data/ex2.bam.bai | Bin 176 -> 176 bytes
tests/data/ex2.sam | 2 +-
tests/data/{ex2.bam => ex2_copy.bam} | Bin
tests/data/{ex2.bam.bai => ex2_copy.bam.bai} | Bin
tests/data/phi29.bam | Bin 0 -> 71653 bytes
.../polymerase/consolidate.subread.dataset.xml | 38 +
.../filtered_resources.subread.dataset.xml | 67 +
tests/data/polymerase/internal.hqregions.bam | Bin 0 -> 84164 bytes
tests/data/polymerase/internal.lqregions.bam | Bin 0 -> 53088 bytes
tests/data/polymerase/internal.polymerase.bam | Bin 134303 -> 133770 bytes
tests/data/polymerase/internal.scraps.bam | Bin 68735 -> 67986 bytes
tests/data/polymerase/internal.scraps.bam.pbi | Bin 0 -> 275 bytes
tests/data/polymerase/internal.subreads.bam | Bin 73590 -> 73170 bytes
tests/data/polymerase/internal.subreads.bam.pbi | Bin 0 -> 185 bytes
tests/data/polymerase/internal_hq.hqregion.bam | Bin 82035 -> 0 bytes
tests/data/polymerase/internal_hq.scraps.bam | Bin 51450 -> 0 bytes
tests/data/polymerase/internal_polymerase.fasta | 2 -
.../multiple_resources.subread.dataset.xml | 46 +
tests/data/polymerase/production.polymerase.bam | Bin 25072 -> 25082 bytes
tests/data/polymerase/production.scraps.bam | Bin 13530 -> 13535 bytes
tests/data/polymerase/production.scraps.bam.pbi | Bin 0 -> 279 bytes
tests/data/polymerase/production.subreads.bam | Bin 14655 -> 14659 bytes
tests/data/polymerase/production.subreads.bam.pbi | Bin 0 -> 186 bytes
tests/data/polymerase/production_hq.hqregion.bam | Bin 15796 -> 15803 bytes
.../data/polymerase/production_hq.hqregion.bam.pbi | Bin 0 -> 90 bytes
tests/data/polymerase/production_hq.scraps.bam | Bin 10061 -> 10070 bytes
tests/data/polymerase/production_hq.scraps.bam.pbi | Bin 0 -> 104 bytes
.../polymerase/whitelist/internal.polymerase.bam | Bin 0 -> 400494 bytes
.../whitelist/internal.polymerase.bam.pbi | Bin 0 -> 105 bytes
.../data/polymerase/whitelist/internal.scraps.bam | Bin 0 -> 203149 bytes
.../polymerase/whitelist/internal.scraps.bam.pbi | Bin 0 -> 420 bytes
.../polymerase/whitelist/internal.subreads.bam | Bin 0 -> 218703 bytes
.../polymerase/whitelist/internal.subreads.bam.pbi | Bin 0 -> 264 bytes
.../data/polymerase/whitelist/scrapless.scraps.bam | Bin 0 -> 436 bytes
.../polymerase/whitelist/scrapless.scraps.bam.pbi | Bin 0 -> 65 bytes
.../polymerase/whitelist/scrapless.subreads.bam | Bin 0 -> 33466 bytes
.../whitelist/scrapless.subreads.bam.pbi | Bin 0 -> 113 bytes
tests/data/relative/a/test.bam | Bin 0 -> 351 bytes
tests/data/relative/b/test1.bam | Bin 0 -> 351 bytes
tests/data/relative/b/test2.bam | Bin 0 -> 351 bytes
tests/data/relative/relative.fofn | 3 +
tests/data/relative/relative.xml | 8 +
tests/data/relative/relative2.fofn | 4 +
tests/data/test_group_query/test1.bam | Bin 2165 -> 2168 bytes
tests/data/test_group_query/test2.bam | Bin 13004 -> 13008 bytes
tests/data/test_group_query/test2.bam.pbi | Bin 194 -> 197 bytes
tests/data/test_group_query/test3.bam | Bin 19277 -> 19294 bytes
tests/data/truncated.bam | Bin 0 -> 200 bytes
tests/files.cmake | 12 +-
tests/scripts/cram.py | 516 ++
tests/src/R/test_pbbam.sh.in | 11 +-
tests/src/R/tests/test_Accuracy.R | 33 +-
tests/src/R/tests/test_BamFile.R | 3 +-
tests/src/R/tests/test_BamHeader.R | 8 +-
tests/src/R/tests/test_EndToEnd.R | 41 +-
tests/src/R/tests/test_Intervals.R | 52 +-
tests/src/R/tests/test_PolymeraseStitching.R | 427 ++
tests/src/TestData.h.in | 8 +-
tests/src/cram/bam2sam.t | 63 +
tests/src/cram/pbindexdump_cpp.t | 39 +
tests/src/cram/pbindexdump_json.t | 83 +
tests/src/cram/pbmerge_aligned_ordering.t | 197 +
tests/src/cram/pbmerge_dataset.t | 144 +
tests/src/cram/pbmerge_fofn.t | 134 +
tests/src/cram/pbmerge_mixed_ordering.t | 57 +
tests/src/cram/pbmerge_pacbio_ordering.t | 227 +
tests/src/python/test/test_Accuracy.py | 24 +-
tests/src/python/test/test_BamFile.py | 1 -
tests/src/python/test/test_BamHeader.py | 8 +-
tests/src/python/test/test_PolymeraseStitching.py | 383 +
tests/src/test_Accuracy.cpp | 24 +-
tests/src/test_AlignmentPrinter.cpp | 102 +-
tests/src/test_BamFile.cpp | 108 +
tests/src/test_BamHeader.cpp | 216 +-
tests/src/test_BamRecord.cpp | 4 +
tests/src/test_BamRecordClipping.cpp | 24 +
tests/src/test_BamWriter.cpp | 27 +
.../src/test_BarcodeQuery.cpp | 19 +-
tests/src/test_Compare.cpp | 739 ++
tests/src/test_DataSetCore.cpp | 74 +-
tests/src/test_DataSetIO.cpp | 436 +-
tests/src/test_DataSetQuery.cpp | 126 +-
tests/src/test_DataSetXsd.cpp | 80 +-
tests/src/test_EndToEnd.cpp | 160 +-
tests/src/test_EntireFileQuery.cpp | 3 +-
tests/src/test_FileUtils.cpp | 325 +
tests/src/test_GenomicIntervalQuery.cpp | 348 +-
tests/src/test_PacBioIndex.cpp | 514 +-
tests/src/test_PbiFilter.cpp | 1300 ++++
tests/src/test_PbiFilterQuery.cpp | 245 +
tests/src/test_PolymeraseStitching.cpp | 191 +-
.../{test_GroupQuery.cpp => test_QNameQuery.cpp} | 81 +-
...test_BamFile.cpp => test_ReadAccuracyQuery.cpp} | 39 +-
tests/src/test_ReadGroupInfo.cpp | 64 +-
tests/src/test_SequenceUtils.cpp | 24 -
.../{test_TimeUtils.cpp => test_StringUtils.cpp} | 29 +-
...est_BamFile.cpp => test_SubreadLengthQuery.cpp} | 48 +-
tests/src/test_Tags.cpp | 246 +-
tests/src/test_TimeUtils.cpp | 16 +-
.../src/test_VirtualPolymeraseCompositeReader.cpp | 132 +
tools/CMakeLists.txt | 18 +-
tools/bam2sam/CMakeLists.txt | 32 +
tools/bam2sam/src/Bam2Sam.cpp | 121 +
.../pbbam/Strand.h => tools/bam2sam/src/Bam2Sam.h | 24 +-
.../bam2sam/src/Bam2SamVersion.h.in | 19 +-
.../bam2sam/src/Settings.h | 33 +-
tools/{pbindex => bam2sam}/src/main.cpp | 87 +-
.../common/BamFileMerger.h | 65 +-
tools/common/BamFileMerger.inl | 262 +
tools/{pbindex/src => common}/OptionParser.cpp | 0
tools/{pbindex/src => common}/OptionParser.h | 0
tools/pbindex/CMakeLists.txt | 27 +-
tools/pbindex/src/PbIndex.cpp | 2 +-
tools/pbindex/src/main.cpp | 2 +-
tools/pbindexdump/CMakeLists.txt | 35 +
tools/pbindexdump/src/CppFormatter.cpp | 177 +
.../pbindexdump/src/CppFormatter.h | 25 +-
.../pbindexdump/src/IFormatter.h | 36 +-
tools/pbindexdump/src/JsonFormatter.cpp | 195 +
.../pbindexdump/src/JsonFormatter.h | 48 +-
.../src/PbIndexDump.cpp} | 49 +-
.../pbindexdump/src/PbIndexDump.h | 24 +-
.../pbindexdump/src/PbIndexDumpVersion.h.in | 22 +-
.../pbindexdump/src/Settings.h | 35 +-
tools/pbindexdump/src/json.hpp | 7295 ++++++++++++++++++++
tools/{pbindex => pbindexdump}/src/main.cpp | 86 +-
tools/pbmerge/CMakeLists.txt | 36 +
.../pbmerge/src/PbMergeVersion.h.in | 19 +-
tools/pbmerge/src/main.cpp | 174 +
562 files changed, 38529 insertions(+), 9895 deletions(-)
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..b1990e9
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,64 @@
+language: cpp
+compiler:
+ - gcc
+
+before_install:
+
+ # Travis's default installs of gcc, boost, & cmake currently lag behind the minimums we need.
+ # So we need to manually setup them up.
+ #
+ # - gcc 4.8 (current default on Travis is 4.7, which is no good for C++11 work)
+ # - boost 1.55
+ # - cmake 3.x
+
+ # add external repos
+ - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test # gcc
+ - sudo add-apt-repository -y ppa:boost-latest/ppa # boost
+ - sudo add-apt-repository -y ppa:george-edison55/precise-backports # cmake
+
+ # remove existing cmake install
+ - sudo apt-get remove -qq cmake cmake-data
+ - sudo apt-get autoremove -qq
+
+ # update apt
+ - sudo apt-get update -y -qq
+
+ # install
+ - sudo apt-get install -y -qq g++-4.8 boost1.55 cmake-data cmake
+
+ # make sure we're using new gcc tools
+ - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90
+ - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 90
+ - sudo update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-4.8 90
+
+ # prep zlib
+ - sudo apt-get install -y -qq zlib1g-dev
+
+ # prep htslib
+ - "cd .. && git clone https://github.com/PacificBiosciences/htslib.git && cd htslib && make && sudo make install; cd $TRAVIS_BUILD_DIR"
+
+ # prep GoogleTest
+ - sudo apt-get install -y -qq libgtest-dev
+
+before_script:
+ # run cmake
+ - mkdir build
+ - cd build
+ - cmake .. -DGTEST_SRC_DIR=/usr/src/gtest -DCMAKE_BUILD_TYPE=Debug
+
+script:
+ # build & test
+ - make -j 3
+ - make test
+
+branches:
+ only:
+ - master
+
+notifications:
+ recipients:
+ - dbarnett at pacb.com
+ email:
+ on_success: change
+ on_failure: always
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68703c1..bd2c228 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,36 +3,199 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
-**NOTE:** The current series (0.y.z) is under initial development. Anything may change at any time.
-The public API should not be considered stable yet. Once we lock down a version 1.0.0, this will
-define a reference point & compatibility guarantees will be maintained within each major version
-series.
-
+**NOTE:** The current series (0.y.z) is under initial development. Anything may
+change at any time. The public API should not be considered stable yet. Once we
+lock down a version 1.0.0, this will define a reference point & compatibility
+guarantees will be maintained within each major version series.
## Active
+## [0.5.0] - 2016-02-22
+
+### Added
+- Platform model tag added to read group as RG::PM
+- New scrap zmw type sz
+
+### Added
+- pbmerge accepts DataSetXML as input - using top-level resource BAMs as input,
+applying filters, and generating a merged BAM. Also added FOFN support, instead
+of listing out BAMs as command line args.
+- PbiLocalContextFilter to allow filtering on subread local context.
+- PbiBuilder: multithreading & zlib compression-level tuning for PBI output
+
+### Fixed
+- Fixed mishandling of relative BAM filenames in the filename constructor for
+DataSet (e.g. DataSet ds("../data.bam")).
+
+## [0.4.5] - 2016-01-14
+
+### Changed
+- PbiFilterQuery (and any other PBI-backed query, e.g. ZmwQuery ) now throws if
+PBI file(s) missing insted of returning empty result.
+- GenomicIntervalQuery now throws if BAI file(s) missing instead of returning
+empty result.
+- BamFile will throw if file is truncated (e.g. missing the EOF block). Disable
+by defining PBBAM_NO_CHECK_EOF .
+
+## [0.4.4] - 2016-01-07
+
+### Added
+- bam2sam command line utility. The primary benefit is removing the dependency
+on samtools during tests, but also provides users a functioning BAM -> SAM
+converter in the absence of samtools.
+- pbmerge command line utility. Allows merging N BAM files into one, optionally
+creating the PBI file alongside.
+- Added BamRecord::Pkmean2 & Pkmid2, 2D equivalent of Pkmean/Pkmid, for internal
+BAMs.
+
+### Removed
+- samtools dependency
+
+## [0.4.3] - 2015-12-22
+
+### Added
+- Compile using ccache by default, if available. Can be manually disabled using
+-DPacBioBAM_use_ccache=OFF with cmake.
+- pbindexdump: command-line utility that converts PBI file data into human-
+readable formats. (JSON by default).
+
+### Changed
+- CMake option PacBioBAM_build_pbindex is being deprecated. Use
+PacBioBAM_build_tools instead.
+
+## [0.4.2] - 2015-12-22
+
+### Changed
+- BamFile::PacBioIndexExists & StandardIndexExists no longer check timestamps.
+Copying/moving files around can yield timestamps that are not helpful (no longer
+guaranteed that the .pbi will be "newer" than the .bam, even though no content
+changed). Added methods (e.g. bool BamFile::PacBioIndexIsNewer()) to do that
+lookup if needed, but it is no longer done automatically.
+
+## [0.4.1] - 2015-12-18
+
+### Added
+- BamRecord::HasNumPasses
+
+### Changed
+- VirtualPolymeraseBamRecord::VirtualRegionsTable(type) returns an empty vector
+of regions if none are associated with the requested type, instead of throwing.
+
+## [0.4.0] - 2015-12-15
+
+### Changed
+- Redesigned PbiFilter interface and backend. Previous implementation did not
+scale well as intermediate results were far too unwieldy. This redesign provides
+speedups of orders of magnitude in many cases.
+
+## [0.3.2] - 2015-12-10
+
+### Added
+- Support for ReadGroupInfo sequencing chemistry data.
+InvalidSequencingChemistryException thrown if an unsupported combination is
+encountered.
+- VirtualPolymeraseCompositeReader - for re-stitching records, across multiple
+resources (e.g. from DataSetXML). Reader respects DataSet filter criteria.
+
+## [0.3.1] - 2015-10-30
+
+### Added
+- ZmwWhitelistVirtualReader: similar to VirtualPolymeraseReader but restricts
+iteration to a whitelist of ZMW hole numbers, leveraging PBI index data for
+random-access.
+
+### Fixed
+- Fixed error in PBI construction, in which entire file sections (e.g.
+BarcodeData or MappedData) where being dropped when any one record lacked data.
+Correct behavior is to allow file section ommission if all records lack that
+data type.
+
+## [0.3.0] - 2015-10-29
+
+### Fixed
+- Improper reporting of current offset from multi-threaded BamWriter. This had
+the effect of creating broken PBIs that were written alongside the BAM. Added a
+flush step, which incurs a performance hit, but restores correctness.
+
+## [0.2.4] - 2015-10-26
+
+### Fixed
+- Empty PbiFilter now returns all records, instead of filtering away all records.
+
+## [0.2.3] - 2015-10-26
+
+### Added/Fixed
+- Syncing DataSetXML across APIs. Primary changes include output of Version
+attribute ("3.0.1") on appropriate elements, as well as resolution of namespace
+issues.
+
+## [0.2.2] - 2015-10-22
+
+### Added
+- Added BAI bin calculation to BamWriter::Write, to ensure maximal compatibility
+with downstream tools (e.g. 'samtools index'). A new BinCalculationMode enum
+flag in BamWriter constructor cotnrols whether this behavior is enabled[default]
+or not.
+
+## [0.2.1] - 2015-10-19
+
+### Added
+- Exposed the following classes to public API:
+ - BamReader
+ - BaiIndexedBamReader
+ - PbiIndexedBamReader
+ - GenomicIntervalCompositeBamReader
+ - PbiFilterCompositeBamReader
+
+## [0.2.0] - 2015-10-09
+
+### Changed
+- BAM spec v3.0.1 compliance. Previous (betas) versions of the BAM spec are not
+supported and will causean exception to be throw if encountered.
+- PBI lookup interface & backend, see PbiIndex.h & PbiLookupData.h for details.
+
### Added
-- BamFile::PacBioIndexExists() & BamFile::StandardIndexExists() - query the existence of index files
-without auto-building them if they are missing, as in BamFile::Ensure*IndexExists().
-- GenomicInterval now accepts an htslib/samtools-style REGION string in the constructor:
-GenomicInterval("chr1:1000-2000"). Please note though, that pbbam uses 0-based coordinates throughout,
-whereas samtools expects 1-based. The above string is equivalent to "chr1:1001-2000" in samtools.
+- BamFile::PacBioIndexExists() & BamFile::StandardIndexExists() - query the
+existence of index files without auto-building them if they are missing, as in
+BamFile::Ensure*IndexExists().
+- GenomicInterval now accepts an htslib/samtools-style REGION string in the
+constructor: GenomicInterval("chr1:1000-2000"). Please note though, that pbbam
+uses 0-based coordinates throughout, whereas samtools expects 1-based. The above
+string is equivalent to "chr1:1001-2000" in samtools.
+- Built-in PBI filters. See PbiFlter.h & PbiFilterTypes.h for built-in filters
+and constructing composite filters. These can be used in conjunction with the
+new PbiFilterQuery, which takes a generic PbiFilter and applies that to a
+DataSet for iteration.
+- New built-in queries: BarcodeQuery, ReadAccuracyQuery, SubreadLengthQuery.
+These leverage the new filter API to construct a PbiFilter and apply to a
+DataSet.
+- Built-in BamRecord comparators that are STL-compatible. See Compare.h for full
+list. This allows for statements like the following, which sorts records by ZMW
+number:
+``` c++
+ vector<BamRecord> data;
+ std::sort(data.begin(), data.end(), Compare::Zmw());
+```
+- "exciseSoftClips" option to BamRecord::CigarData()
## [0.1.0] - 2015-07-17
### Changed
- BAM spec v3.0b7 compliance
- - Removal of 'M' as allowed CIGAR operation. Attempt to use such a CIGAR op will throw an exception.
+ - Removal of 'M' as allowed CIGAR operation. Attempt to use such a CIGAR op
+ will throw an exception.
- Addition of IPD/PulseWidth codec version info in header
### Added
- Auto-generation of UTC timestamp for DataSet objects
-- PbiBuilder - allows generation of PBI index data alongside generation/modification of BAM record
-data. This obviates the need to wait for a completed BAM, then go through the zlib decompression, etc.
-- Added DataSet::FromXml(string xml) to create DataSets from "raw" XML string, rather than building up
-using DataSet API or loading from existing file.
-- "pbindex" command line tool to generate ".pbi" files from BAM data. The executable is built by default,
-but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
+- PbiBuilder - allows generation of PBI index data alongside generation or
+modification of BAM record data. This obviates the need to wait for a completed
+BAM, then go through the zlib decompression, etc.
+- Added DataSet::FromXml(string xml) to create DataSets from "raw" XML string,
+rather than building up using DataSet API or loading from existing file.
+- "pbindex" command line tool to generate ".pbi" files from BAM data. The
+executable is built by default, but can be disabled using the cmake option
+"-DPacBioBAM_build_pbindex=OFF".
### Fixed
- PBI construction failing on CCS reads
@@ -45,23 +208,28 @@ but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
## [0.0.7] - 2015-07-02
### Added
-- PBI index lookup API. Not so much intended for client use directly, but will enable construction of
- higher-level semantic queries: grouping by, filtering, etc.
-- DataSet & PBI-aware queries (e.g. ZmwGroupQuery). More PBI-enabled queries to follow.
-- More flexibility in tag access. Samtools has a habit of performing a "shrink-to-fit" when it handles
- integer-valued tag data. Thus we cannot **guarantee** the binary type that our API will have to process.
- Safe conversions are allowed on integer-like data only. Under- or overflows in casting will trigger an
- exception. All other tag data types must be asked for explicitly, or else an exception will be raised,
- as before.
-- BamHeader::DeepCopy - allows creation of editable header data, without overwriting all shared instances
+- PBI index lookup API. Not so much intended for client use directly, but will
+enable construction of higher-level semantic queries: grouping by, filtering,
+etc.
+- DataSet & PBI-aware queries (e.g. ZmwGroupQuery). More PBI-enabled queries to
+follow.
+- More flexibility in tag access. Samtools has a habit of performing a
+"shrink-to-fit" when it handles integer-valued tag data. Thus we cannot
+**guarantee** the binary type that our API will have to process. Safe
+conversions are allowed on integer-like data only. Under- or overflows in
+casting will trigger an exception. All other tag data types must be asked for
+explicitly, or else an exception will be raised, as before.
+- BamHeader::DeepCopy - allows creation of editable header data, without
+overwriting all shared instances
### Fixed
- XSD compliance for DataSet APIs.
### Changed
-- The functionality provided by ZmwQuery (group by hole number), is now available using the ZmwGroupQuery
- object. The new ZmwQuery returns a single-record iterator (a la EntireFileQuery), but limited to a whitelist
- of requested hole numbers.
+- The functionality provided by ZmwQuery (group by hole number), is now
+available using the ZmwGroupQuery object. The new ZmwQuery returns a single-
+record iterator (a la EntireFileQuery), but limited to a whitelist of requested
+hole numbers.
### Removed
- XSD non-compliant classes (e.g. ExternalDataReference)
@@ -98,19 +266,22 @@ but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
### Added
-- DataSet support. This includes XML I/O, basic dataset query/manipulation, and multi-BAM-file
- queries. New classes are located in <pbbam/dataset/>. DataSet-capable queries currently reside in the
- PacBio::BAM::staging namespace. These will be ported over to the main namespace once the support is
- stabilized and works seamlessly with either a single BamFile or DataSet object as input. (bug 25941)
-- PBI support. This includes read/write raw data & building from a BamFile. The lookup API for
- random-access queries is under development, but the raw data is available - for creating PBI files &
- generating summary statistics. (bug 26025)
+- DataSet support. This includes XML I/O, basic dataset query/manipulation, and
+multi-BAM-file queries. New classes are located in <pbbam/dataset/>. DataSet-
+capable queries currently reside in the PacBio::BAM::staging namespace. These
+will be ported over to the main namespace once the support is stabilized and
+works seamlessly with either a single BamFile or DataSet object as input. (bug
+25941)
+- PBI support. This includes read/write raw data & building from a BamFile. The
+lookup API for random-access queries is under development, but the raw data is
+available - for creating PBI files & generating summary statistics. (bug 26025)
- C# SWIG bindings, alongside existing Python and R wrappers.
- LocalContextFlags support in BamRecord (bug 26623)
### Fixed
-- BamRecord[Impl] map quality now initialized with 255 (missing) value, instead of 0. (bug 26228)
+- BamRecord[Impl] map quality now initialized with 255 (missing) value, instead
+of 0. (bug 26228)
- ReadGroupId calculation. (bug 25940)
## [0.0.4] - 2015-04-22
@@ -124,7 +295,8 @@ but can be disabled using the cmake option "-DPacBioBAM_build_pbindex=OFF".
### Changed
- Now using exceptions instead of return codes, output parameters, etc.
-- Removed "messy" shared_ptrs across interface (see especially BamHeader). These are now taken care of within the API, not exposed to client code.
+- Removed "messy" shared_ptrs across interface (see especially BamHeader). These
+are now taken care of within the API, not exposed to client code.
### Removed
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7a646d..4908a52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,25 +3,58 @@
########################################################################
cmake_policy(SET CMP0048 NEW) # lets us set version in project()
-project(PacBioBAM VERSION 0.1.0 LANGUAGES CXX C)
+project(PacBioBAM VERSION 0.5.0 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.0)
-# project version
+# project name & version
+set(PacBioBAM_NAME pbbam)
set(PacBioBAM_VERSION
"${PacBioBAM_VERSION_MAJOR}.${PacBioBAM_VERSION_MINOR}.${PacBioBAM_VERSION_PATCH}"
)
# list build-time options
-option(PacBioBAM_build_docs "Build PacBioBAM's API documentation." ON)
-option(PacBioBAM_build_tests "Build PacBioBAM's unit tests." ON)
-option(PacBioBAM_build_shared "Build PacBioBAM as shared library as well." OFF)
-option(PacBioBAM_build_pbindex "Build pbindex tool." ON)
-option(PacBioBAM_wrap_csharp "Build PacBioBAM with SWIG bindings for C#." OFF)
-option(PacBioBAM_wrap_python "Build PacBioBAM with SWIG bindings for Python." OFF)
-option(PacBioBAM_wrap_r "Build PacBioBAM with SWIG bindings for R." OFF)
-option(PacBioBAM_use_modbuild "Build PacBioBAM using Modular Build System." OFF)
+option(PacBioBAM_build_docs "Build PacBioBAM's API documentation." ON)
+option(PacBioBAM_build_tests "Build PacBioBAM's unit tests." ON)
+option(PacBioBAM_build_shared "Build PacBioBAM as shared library as well." OFF)
+option(PacBioBAM_build_tools "Build PacBioBAM command line utilities (e.g. pbindex)" ON)
+option(PacBioBAM_wrap_csharp "Build PacBioBAM with SWIG bindings for C#." OFF)
+option(PacBioBAM_wrap_python "Build PacBioBAM with SWIG bindings for Python." OFF)
+option(PacBioBAM_wrap_r "Build PacBioBAM with SWIG bindings for R." OFF)
+option(PacBioBAM_use_modbuild "Build PacBioBAM using Modular Build System." OFF)
+option(PacBioBAM_use_ccache "Build PacBioBAM using ccache, if available." ON)
+
+# enable ccache, if available
+if(PacBioBAM_use_ccache)
+ find_program(CCACHE_FOUND ccache)
+ if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+ endif()
+endif()
+
+# Deprecating the "PacBioBAM_build_pbindex" command line option in favor of more
+# general "PacBioBAM_build_tools", as we're starting to add new utilities.
+#
+# That said, I don't want to break current auto tests/builds, so I'm providing a
+# warning message so devs are aware.
+#
+if(DEFINED PacBioBAM_build_pbindex)
+
+ # construct warning message
+ set(pbindex_warning "\nDeprecated:\n-DPacBioBAM_build_pbindex\n")
+ if (PacBioBAM_build_pbindex)
+ set(pbindex_warning "${pbindex_warning} Building as requested,")
+ else()
+ set(pbindex_warning "${pbindex_warning} Skipping as requested,")
+ endif()
+ set(pbindex_warning "${pbindex_warning} but support for this option will be removed at some point in the future.\n")
+ message(AUTHOR_WARNING "${pbindex_warning} ** Use -DPacBioBAM_build_tools instead. **\n")
+
+ # force PacBioBAM_build_tools option
+ set(PacBioBAM_build_tools ${PacBioBAM_build_pbindex} CACHE BOOL
+ "Build PacBioBAM with add'l utilities (e.g. pbindex, pbindexdump)." FORCE)
+endif()
-# --check build-time options --
# enable testing if requested
if(PacBioBAM_build_tests)
@@ -54,6 +87,7 @@ set(PacBioBAM_IncludeDir ${PacBioBAM_RootDir}/include)
set(PacBioBAM_SourceDir ${PacBioBAM_RootDir}/src)
set(PacBioBAM_SwigSourceDir ${PacBioBAM_RootDir}/src/swig)
set(PacBioBAM_TestsDir ${PacBioBAM_RootDir}/tests)
+set(PacBioBAM_ToolsDir ${PacBioBAM_RootDir}/tools)
if(NOT PacBioBAM_OutputDir)
set(PacBioBAM_OutputDir ${PacBioBAM_RootDir})
@@ -117,16 +151,36 @@ if(APPLE)
set(CMAKE_MACOSX_RPATH OFF)
endif()
+# Turn on windows-style filepath resolution.
+# We need to add this #define early (not just in the C# SWIG wrapper)
+if(WIN32 AND PacBioBAM_wrap_csharp)
+ add_definitions(-DPBBAM_WIN_FILEPATHS)
+endif()
+
# keep this order (src first, at least)
add_subdirectory(src)
-add_subdirectory(tools)
+
+if(PacBioBAM_build_tools)
+ add_subdirectory(tools)
+endif()
+
if(PacBioBAM_build_docs)
add_subdirectory(docs)
endif()
+
if(PacBioBAM_build_tests)
+
if (NOT GTEST_SRC_DIR)
- set(GTEST_SRC_DIR ../gtest)
+ set(PREBUILT_GTEST_SRC ${PacBioBAM_RootDir}/../../../../prebuilt.tmpout/gtest/gtest_1.7.0/)
+ if(EXISTS ${PREBUILT_GTEST_SRC})
+ set(GTEST_SRC_DIR ${PREBUILT_GTEST_SRC})
+ else()
+ set(GTEST_SRC_DIR ../gtest) # keep old fallback behavior for external builds, for now at least
+ endif()
endif()
+
add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
add_subdirectory(tests)
+
endif()
+
diff --git a/INSTALL.md b/INSTALL.md
index 1d72d77..86dddda 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,114 +1,3 @@
-# PacBio::BAM - building & integration
+# PacBio::BAM - building & integrating
-- [Dependencies](#dependencies)
-- [Build](#build)
-- [Test](#test)
-- [Integration](#integration)
- - [CMake](#cmake)
- - [Other](#other)
-- [SWIG](#swig)
- - [Python](#python)
- - [R](#r)
- - [CSharp](#csharp)
-
-## Dependencies
- - CMake v2.8+
- - Boost 1.54+
- - zlib
- - samtools exe (*)
-
-(*) NOTE: ppbam uses samtools for some of its tests, for now at least. The current
-build system points uses a relative path to one of the "prebuilt" samtools binaries.
-If you have checked out pbbam to any path that is NOT:
-
- ///depot/software/smrtanalysis/bioinformatics/staging/PostPrimary/pbbam
-
-then please edit the Samtools_Dir variable in pbbam/tests/CMakeLists.txt to a place
-that works for your setup. That could just be as simple as "" if you already have
-samtools somewhere in your PATH.
-
-## Build
-
-To perform a simple build of the library (and its tests):
-
- $ cd <pbbam_root>
- $ mkdir build
- $ cd build
- $ cmake ..
- $ make
-
-## Test
-
-There are 2 options for testing the library.
-
-1) Run the test executable directly:
-
- $ <pbbam_root>/tests/bin/test_pbbam
-
-which displays the GoogleTest-formatted results for the 250+ individual tests. This
-provides fine-grained info on any failed test.
-
-2) The other option is to use CMake/CTest-generated 'make' command:
-
- $ cd <pbbam_root>/build
- $ make test
-
-which collapses all of the test output into a single, CTest-formatted pass/fail display.
-
-## Integration
-
-### CMake
-
-If you are using CMake for your library or application, you can use the following steps
-to automate both the building of pbbam and its dependencies (if necessary) and importing
-the proper include paths, library paths, etc. If the pbbam library already exists, then
-the header/lib variables are simply imported.
-
- # just for convenience
- set(PacBioBAM_RootDir </anywhere/on/disk/path/to/pbbam>)
-
- # add_subdirectory() sounds a bit misleading, the path can be *anywhere* on disk.
- # the 2nd arg tells CMake where it should build pbbam if necessary
- add_subdirectory(${PacBioBAM_RootDir} ${PacBioBAM_RootDir}/build)
-
- # setup your client
- add_executable(foo ....)
-
- # PacBioBAM_INCLUDE_DIRS provides all pbbam headers, as well as dependencies
- include_directories( .... ${PacBioBAM_INCLUDE_DIRS} )
-
- # PacBioBAM_LIBRARIES provides libpbbam.a, as well as dependencies
- target_link_libraries( foo ..... ${PacBioBAM_LIBRARIES} )
-
-### Other
-
-The following instructions apply to all non-CMake-based builds. In addition to Boost headers & zlib, the relevant include paths for pbbam are:
-
- <pbbam_root>/include
- <pbbam_root>/third-party/htslib
-
-which allows these statements:
-
- #include <pbbam/BamRecord.h>
- #include <htslib/sam.h>
-
-and so on in your code. And the relevant libraries to link to are:
-
- <pbbam_root>/lib/libpbbam.a
- <pbbam_root>/third-party/htslib/libhts.a
-
-## SWIG
-
-TODO: fill this in
-
-### Python
-
-TODO: fill this in
-
-### R
-
-TODO: fill this in
-
-### CSharp
-
-TODO: fill this in
+Detailed build instructions can be found [here](http://pbbam.readthedocs.org/en/latest/getting_started.html).
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..77e9557
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,34 @@
+Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted (subject to the limitations in the
+disclaimer below) provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Pacific Biosciences nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..046296e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,29 @@
+# pbbam
+
+[![Build Status](https://travis-ci.org/PacificBiosciences/pbbam.svg?branch=master)](https://travis-ci.org/PacificBiosciences/pbbam) [![Documentation Status](https://readthedocs.org/projects/pbbam/badge/?version=latest)](http://pbbam.readthedocs.org/en/latest/?badge=latest)
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard BAM
+format for (both aligned and unaligned) basecall data files. We have also formulated
+a BAM companion file format (bam.pbi) enabling fast access to a richer set of per-read
+information as well as compatibility for software built around the legacy cmp.h5 format.
+
+The **pbbam** software package provides components to create, query, & edit PacBio BAM
+files and associated indices. These components include a core C++ library, bindings for
+additional languages, and command-line utilities.
+
+### Note:
+
+This library is **not** intended to be used as a general-purpose BAM utility - all input & output BAMs must adhere to the [PacBio BAM format specification](https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst). Non-PacBio BAMs will cause exceptions to be thrown.
+
+## Documentation
+
+ - [Documentation Home](http://pbbam.readthedocs.org/en/latest/index.html)
+ - [Getting Started](http://pbbam.readthedocs.org/en/latest/getting_started.html)
+ - [C++ API Reference](http://pbbam.readthedocs.org/en/latest/api_reference.html)
+
+ - [Changelog](https://github.com/PacificBiosciences/pbbam/blob/master/CHANGELOG.md)
+
+## License
+
+ - [PacBio open source license](https://github.com/PacificBiosciences/pbbam/blob/master/LICENSE.txt)
+
diff --git a/cmake/FindCSharp.cmake b/cmake/FindCSharp.cmake
new file mode 100644
index 0000000..08d09a7
--- /dev/null
+++ b/cmake/FindCSharp.cmake
@@ -0,0 +1,72 @@
+#
+# A CMake Module for finding and using C# (.NET and Mono).
+#
+# The following variables are set:
+# CSHARP_FOUND - set to ON if C# is found
+# CSHARP_USE_FILE - the path to the C# use file
+# CSHARP_TYPE - the type of the C# compiler (eg. ".NET" or "Mono")
+# CSHARP_VERSION - the version of the C# compiler (eg. "v4.0" or "2.10.2")
+# CSHARP_COMPILER - the path to the C# compiler executable (eg. "C:/Windows/Microsoft.NET/Framework/v4.0.30319/csc.exe" or "/usr/bin/gmcs")
+# CSHARP_INTERPRETER - the path to interpreter needed to run CSharp executables
+# CSHARP_PLATFORM - the C# target platform
+# CSHARP_SDK - the SDK commandline switch (empty for .NET, for Mono eg. "/sdk:2" or "/sdk:4")
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindCSharp.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+# TODO: ADD ABILITY TO SELECT WHICH C# COMPILER eg. .NET or Mono (if both exist). For the moment, .NET is selected above Mono.
+
+# Make sure find package macros are included
+include( FindPackageHandleStandardArgs )
+
+unset( CSHARP_COMPILER CACHE )
+unset( CSHARP_INTERPRETER CACHE )
+unset( CSHARP_TYPE CACHE )
+unset( CSHARP_VERSION CACHE )
+unset( CSHARP_FOUND CACHE )
+
+# By default use anycpu platform, allow the user to override
+set( CSHARP_PLATFORM "anycpu" CACHE STRING "C# target platform: x86, x64, anycpu, or itanium" )
+if( NOT ${CSHARP_PLATFORM} MATCHES "x86|x64|anycpu|itanium" )
+ message( FATAL_ERROR "The C# target platform '${CSHARP_PLATFORM}' is not valid. Please enter one of the following: x86, x64, anycpu, or itanium" )
+endif( )
+
+if( WIN32 )
+ find_package( DotNetFrameworkSdk )
+ if( NOT CSHARP_DOTNET_FOUND )
+ find_package( Mono )
+ endif( )
+else( UNIX )
+ find_package( Mono )
+endif( )
+
+if( CSHARP_DOTNET_FOUND )
+ set( CSHARP_TYPE ".NET" CACHE STRING "Using the .NET compiler" )
+ set( CSHARP_VERSION ${CSHARP_DOTNET_VERSION} CACHE STRING "C# .NET compiler version" FORCE )
+ set( CSHARP_COMPILER ${CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION}} CACHE STRING "Full path to .NET compiler" FORCE )
+ set( CSHARP_INTERPRETER "" CACHE INTERNAL "Interpretor not required for .NET" FORCE )
+elseif( CSHARP_MONO_FOUND )
+ set( CSHARP_TYPE "Mono" CACHE STRING "Using the Mono compiler" )
+ set( CSHARP_VERSION ${CSHARP_MONO_VERSION} CACHE STRING "C# Mono compiler version" FORCE )
+ set( CSHARP_COMPILER ${CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION}} CACHE STRING "Full path to Mono compiler" FORCE )
+ set( CSHARP_INTERPRETER ${CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION}} CACHE STRING "Full path to Mono interpretor" FORCE )
+ set( CSHARP_SDK "/sdk:4.5" CACHE STRING "C# Mono SDK commandline switch (e.g. /sdk:2, /sdk:4, /sdk:5)" )
+endif( )
+
+# Handle WIN32 specific issues
+if ( WIN32 )
+ if ( CSHARP_COMPILER MATCHES "bat" )
+ set( CSHARP_COMPILER "call ${CSHARP_COMPILER}" )
+ endif ( )
+endif( )
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(CSharp DEFAULT_MSG CSHARP_TYPE CSHARP_VERSION CSHARP_COMPILER)
+
+mark_as_advanced( CSHARP_TYPE CSHARP_VERSION CSHARP_COMPILER CSHARP_INTERPRETER CSHARP_PLATFORM CSHARP_SDK )
+
+# Set the USE_FILE path
+# http://public.kitware.com/Bug/view.php?id=7757
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( CSHARP_USE_FILE ${current_list_path}/UseCSharp.cmake )
diff --git a/cmake/FindDotNetFrameworkSdk.cmake b/cmake/FindDotNetFrameworkSdk.cmake
new file mode 100644
index 0000000..8e12c70
--- /dev/null
+++ b/cmake/FindDotNetFrameworkSdk.cmake
@@ -0,0 +1,29 @@
+# Set paths and vars for .NET compilers
+# This is hand-rolled because I had problems with the one from SimpleITK
+
+#
+# The following variables are set:
+# CSHARP_DOTNET_FOUND
+# CSHARP_DOTNET_COMPILER_${version} eg. "CSHARP_DOTNET_COMPILER_v4.0.30319"
+# CSHARP_DOTNET_VERSION eg. "v4.0.30319"
+# CSHARP_DOTNET_VERSIONS eg. "v2.0.50727, v3.5, v4.0.30319"
+# DotNetFrameworkSdk_USE_FILE
+#
+# CSHARP_PROJECT_BUILDER (xbuild/msbuild)
+
+set(framework_dir "C:/Windows/Microsoft.NET/Framework")
+
+set(CSHARP_DOTNET_VERSION "v4.0.30319")
+set(CSHARP_DOTNET_VERSIONS "")
+set(CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION} "${framework_dir}/${CSHARP_DOTNET_VERSION}/csc.exe")
+set(CSHARP_PROJECT_BUILDER "${framework_dir}/${CSHARP_DOTNET_VERSION}/MSBuild.exe")
+
+if(EXISTS ${CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION}})
+ set(CSHARP_DOTNET_FOUND 1)
+else()
+ set(CSHARP_DOTNET_FOUND 0)
+endif()
+
+# Set USE_FILE
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( DotNetFrameworkSdk_USE_FILE ${current_list_path}/UseDotNetFrameworkSdk.cmake )
\ No newline at end of file
diff --git a/cmake/FindMono.cmake b/cmake/FindMono.cmake
new file mode 100644
index 0000000..0fab116
--- /dev/null
+++ b/cmake/FindMono.cmake
@@ -0,0 +1,167 @@
+#
+# A CMake Module for finding Mono.
+#
+# The following variables are set:
+# CSHARP_MONO_FOUND
+# CSHARP_MONO_COMPILER_${version} eg. "CSHARP_MONO_COMPILER_2.10.2"
+# CSHARP_MONO_INTERPRETOR_${version} eg. "CSHARP_MONO_INTERPRETOR_2.10.2"
+# CSHARP_MONO_VERSION eg. "2.10.2"
+# CSHARP_MONO_VERSIONS eg. "2.10.2, 2.6.7"
+#
+# Additional references can be found here:
+# http://www.mono-project.com/Main_Page
+# http://www.mono-project.com/CSharp_Compiler
+# http://mono-project.com/FAQ:_Technical (How can I tell where the Mono runtime is installed)
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+set( csharp_mono_valid 1 )
+if( DEFINED CSHARP_MONO_FOUND )
+ # The Mono compiler has already been found
+ # It may have been reset by the user, verify it is correct
+ if( NOT DEFINED CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+ set( csharp_mono_version_user ${CSHARP_MONO_VERSION} )
+ set( csharp_mono_valid 0 )
+ set( CSHARP_MONO_FOUND 0 )
+ set( CSHARP_MONO_VERSION "CSHARP_MONO_VERSION-NOTVALID" CACHE STRING "C# Mono compiler version, choices: ${CSHARP_MONO_VERSIONS}" FORCE )
+ message( FATAL_ERROR "The C# Mono version '${csharp_mono_version_user}' is not valid. Please enter one of the following: ${CSHARP_MONO_VERSIONS}" )
+ endif( NOT DEFINED CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+endif( DEFINED CSHARP_MONO_FOUND )
+
+unset( CSHARP_MONO_VERSIONS CACHE ) # Clear versions
+if( WIN32 )
+ # Search for Mono on Win32 systems
+ # See http://mono-project.com/OldReleases and http://www.go-mono.com/mono-downloads/download.html
+ set( csharp_mono_bin_dirs )
+ set( csharp_mono_search_hints
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.11.2;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.9;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.8;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.7;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.6;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.5;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.4;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.3;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.2;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.1;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.8;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.7;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.4;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.3;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.1;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6;SdkInstallRoot]/bin"
+ )
+ foreach( csharp_mono_search_hint ${csharp_mono_search_hints} )
+ get_filename_component( csharp_mono_bin_dir "${csharp_mono_search_hint}" ABSOLUTE )
+ if ( EXISTS "${csharp_mono_bin_dir}" )
+ set( csharp_mono_bin_dirs ${csharp_mono_bin_dirs} ${csharp_mono_bin_dir} )
+ endif ( EXISTS "${csharp_mono_bin_dir}" )
+ endforeach( csharp_mono_search_hint )
+ # TODO: Use HKLM_LOCAL_MACHINE\Software\Novell\Mono\DefaultCLR to specify default version
+ # get_filename_component( test "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono;DefaultCLR]" NAME )
+
+ foreach ( csharp_mono_bin_dir ${csharp_mono_bin_dirs} )
+ string( REPLACE "\\" "/" csharp_mono_bin_dir ${csharp_mono_bin_dir} )
+ if (EXISTS "${csharp_mono_bin_dir}/dmcs.bat")
+ set( csharp_mono_executable "${csharp_mono_bin_dir}/dmcs.bat")
+ elseif (EXISTS "${csharp_mono_bin_dir}/gmcs.bat")
+ set( csharp_mono_executable "${csharp_mono_bin_dir}/gmcs.bat")
+ elseif (EXISTS "${csharp_mono_bin_dir}/mcs.bat")
+ set( csharp_mono_executable "${csharp_mono_bin_dir}/mcs.bat")
+ endif (EXISTS "${csharp_mono_bin_dir}/dmcs.bat")
+
+ if( csharp_mono_valid )
+ # Extract version number (eg. 2.10.2)
+ string(REGEX MATCH "([0-9]*)([.])([0-9]*)([.]*)([0-9]*)" csharp_mono_version_temp ${csharp_mono_bin_dir})
+ set( CSHARP_MONO_VERSION ${csharp_mono_version_temp} CACHE STRING "C# Mono compiler version" )
+ mark_as_advanced( CSHARP_MONO_VERSION )
+
+ # Add variable holding executable
+ set( CSHARP_MONO_COMPILER_${csharp_mono_version_temp} ${csharp_mono_executable} CACHE STRING "C# Mono compiler ${csharp_mono_version_temp}" FORCE )
+ mark_as_advanced( CSHARP_MONO_COMPILER_${csharp_mono_version_temp} )
+
+ # Set interpreter
+ if (EXISTS "${csharp_mono_bin_dir}/mono.exe")
+ set( CSHARP_MONO_INTERPRETER_${csharp_mono_version_temp} "${csharp_mono_bin_dir}/mono.exe" CACHE STRING "C# Mono interpreter ${csharp_mono_version_temp}" FORCE )
+ mark_as_advanced( CSHARP_MONO_INTERPRETER_${csharp_mono_version_temp} )
+ endif (EXISTS "${csharp_mono_bin_dir}/mono.exe")
+ endif( csharp_mono_valid )
+
+ # Create a list of supported compiler versions
+ if( NOT DEFINED CSHARP_MONO_VERSIONS )
+ set( CSHARP_MONO_VERSIONS "${csharp_mono_version_temp}" CACHE STRING "Available C# Mono compiler versions" FORCE )
+ else( NOT DEFINED CSHARP_MONO_VERSIONS )
+ set( CSHARP_MONO_VERSIONS "${CSHARP_MONO_VERSIONS}, ${csharp_mono_version_temp}" CACHE STRING "Available C# Mono versions" FORCE )
+ endif( NOT DEFINED CSHARP_MONO_VERSIONS )
+ mark_as_advanced( CSHARP_MONO_VERSIONS )
+
+ # We found at least one Mono compiler version
+ set( CSHARP_MONO_FOUND 1 CACHE INTERNAL "Boolean indicating if C# Mono was found" )
+ endforeach( csharp_mono_bin_dir )
+
+else( UNIX )
+ # Search for Mono on non-Win32 systems
+ set( chsarp_mono_names "mcs" "mcs.exe" "dmcs" "dmcs.exe" "smcs" "smcs.exe" "gmcs" "gmcs.exe" )
+ set(
+ csharp_mono_paths
+ "/usr/bin/"
+ "/usr/local/bin/"
+ "/usr/lib/mono/2.0"
+ "/opt/novell/mono/bin"
+ )
+ find_program(
+ csharp_mono_compiler # variable is added to the cache, we removed it below
+ NAMES ${chsarp_mono_names}
+ PATHS ${csharp_mono_paths}
+ )
+
+ if( EXISTS ${csharp_mono_compiler} )
+ # Determine version
+ find_program(
+ csharp_mono_interpreter # variable is added to the cache, we removed it below
+ NAMES mono
+ PATHS ${csharp_mono_paths}
+ )
+ if ( EXISTS ${csharp_mono_interpreter} )
+ execute_process(
+ COMMAND ${csharp_mono_interpreter} -V
+ OUTPUT_VARIABLE csharp_mono_version_string
+ )
+ string( REGEX MATCH "([0-9]*)([.])([0-9]*)([.]*)([0-9]*)" csharp_mono_version_temp ${csharp_mono_version_string} )
+ set( CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION} ${csharp_mono_interpreter} CACHE STRING "C# Mono interpreter ${csharp_mono_version_temp}" FORCE )
+ mark_as_advanced( CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION} )
+ endif ( EXISTS ${csharp_mono_interpreter} )
+ unset( csharp_mono_interpreter CACHE )
+
+ # We found Mono compiler
+ set( CSHARP_MONO_VERSION ${csharp_mono_version_temp} CACHE STRING "C# Mono compiler version" )
+ mark_as_advanced( CSHARP_MONO_VERSION )
+ set( CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} ${csharp_mono_compiler} CACHE STRING "C# Mono compiler ${CSHARP_MONO_VERSION}" FORCE )
+ mark_as_advanced( CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+ set( CSHARP_MONO_VERSIONS ${CSHARP_MONO_VERSION} CACHE STRING "Available C# Mono compiler versions" FORCE )
+ mark_as_advanced( CSHARP_MONO_VERSIONS )
+ set( CSHARP_MONO_FOUND 1 CACHE INTERNAL "Boolean indicating if C# Mono was found" )
+
+ # Assume xbuild is just xbuild.
+ set(CSHARP_PROJECT_BUILDER "xbuild")
+
+
+ endif( EXISTS ${csharp_mono_compiler} )
+
+ # Remove temp variable from cache
+ unset( csharp_mono_compiler CACHE )
+
+endif( WIN32 )
+
+if( CSHARP_MONO_FOUND )
+ # Report the found versions
+ message( STATUS "Found the following C# Mono versions: ${CSHARP_MONO_VERSIONS}" )
+endif( CSHARP_MONO_FOUND )
+
+# Set USE_FILE
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( Mono_USE_FILE ${current_list_path}/UseMono.cmake )
diff --git a/cmake/FindR.cmake b/cmake/FindR.cmake
new file mode 100644
index 0000000..6ae4354
--- /dev/null
+++ b/cmake/FindR.cmake
@@ -0,0 +1,48 @@
+
+#
+# - This module locates an installed R distribution.
+#
+# Defines the following:
+#
+# R_INCLUDE_DIR - Path to R include directory
+# R_LIBRARIES - Path to R library
+# R_LIBRARY_BASE -
+# R_COMMAND - Path to R command
+# RSCRIPT_EXECUTABLE - Path to Rscript command
+#
+
+
+# Make sure find package macros are included
+include( FindPackageHandleStandardArgs )
+
+set(TEMP_CMAKE_FIND_APPBUNDLE ${CMAKE_FIND_APPBUNDLE})
+set(CMAKE_FIND_APPBUNDLE "NEVER")
+find_program(R_COMMAND R DOC "R executable.")
+if(R_COMMAND)
+ execute_process(WORKING_DIRECTORY . COMMAND ${R_COMMAND} RHOME OUTPUT_VARIABLE R_BASE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
+ set(R_HOME ${R_BASE_DIR} CACHE PATH "R home directory obtained from R RHOME")
+ mark_as_advanced(R_HOME)
+endif(R_COMMAND)
+
+find_program(RSCRIPT_EXECUTABLE Rscript DOC "Rscript executable.")
+
+set(CMAKE_FIND_APPBUNDLE ${TEMP_CMAKE_FIND_APPBUNDLE})
+
+# R.h gets installed in all sorts of places -
+# ubuntu: /usr/share/R/include, RHEL/Fedora: /usr/include/R/R.h
+find_path(R_INCLUDE_DIR R.h PATHS ${R_INCLUDE_DIR_HINT} /usr/local/lib /usr/local/lib64 /usr/share /usr/include ${R_BASE_DIR} PATH_SUFFIXES include R R/include DOC "Path to file R.h")
+find_library(R_LIBRARY_BASE R PATHS ${R_BASE_DIR} PATH_SUFFIXES /lib DOC "R library (example libR.a, libR.dylib, etc.).")
+
+set(R_LIBRARIES ${R_LIBRARY_BASE})
+mark_as_advanced(RSCRIPT_EXECUTABLE R_LIBRARIES R_INCLUDE_DIR R_COMMAND R_LIBRARY_BASE)
+
+
+set( _REQUIRED_R_VARIABLES R_INCLUDE_DIR R_COMMAND )
+
+if( APPLE )
+ # On linux platform some times the libR.so is not available, however
+ # on apple a link error results if the library is linked.
+ list( APPEND _REQUIRED_R_VARIABLES R_LIBRARIES R_LIBRARY_BASE )
+endif()
+
+find_package_handle_standard_args(R DEFAULT_MSG ${_REQUIRED_R_VARIABLES} )
diff --git a/cmake/PbbamTool.cmake b/cmake/PbbamTool.cmake
new file mode 100644
index 0000000..a1411a7
--- /dev/null
+++ b/cmake/PbbamTool.cmake
@@ -0,0 +1,23 @@
+include(CMakeParseArguments)
+
+function(create_pbbam_tool)
+
+ # parse args
+ set(oneValueArgs TARGET)
+ set(multiValueArgs SOURCES)
+ cmake_parse_arguments(create_pbbam_tool "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ # create executable
+ include_directories(
+ ${ToolsCommonDir} # shared tool code
+ ${CMAKE_CURRENT_BINARY_DIR} # generated version headers
+ ${PacBioBAM_INCLUDE_DIRS} # pbbam/htslib includes
+ )
+ add_executable(${create_pbbam_tool_TARGET} ${create_pbbam_tool_SOURCES})
+ set_target_properties(
+ ${create_pbbam_tool_TARGET} PROPERTIES
+ RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+ )
+ target_link_libraries(${create_pbbam_tool_TARGET} pbbam)
+
+endfunction(create_pbbam_tool)
diff --git a/cmake/UseCSharp.cmake b/cmake/UseCSharp.cmake
new file mode 100644
index 0000000..dac4537
--- /dev/null
+++ b/cmake/UseCSharp.cmake
@@ -0,0 +1,111 @@
+# CMake Module for finding and using C# (.NET and Mono).
+#
+# The following global variables are assumed to exist:
+# CSHARP_SOURCE_DIRECTORY - path to C# sources
+# CSHARP_BINARY_DIRECTORY - path to place resultant C# binary files
+#
+# The following variables are set:
+# CSHARP_TYPE - the type of the C# compiler (eg. ".NET" or "Mono")
+# CSHARP_COMPILER - the path to the C# compiler executable (eg. "C:/Windows/Microsoft.NET/Framework/v4.0.30319/csc.exe")
+# CSHARP_VERSION - the version number of the C# compiler (eg. "v4.0.30319")
+#
+# The following macros are defined:
+# CSHARP_ADD_EXECUTABLE( name references [files] [output_dir] ) - Define C# executable with the given name
+# CSHARP_ADD_LIBRARY( name references [files] [output_dir] ) - Define C# library with the given name
+#
+# Examples:
+# CSHARP_ADD_EXECUTABLE( MyExecutable "" "Program.cs" )
+# CSHARP_ADD_EXECUTABLE( MyExecutable "ref1.dll ref2.dll" "Program.cs File1.cs" )
+# CSHARP_ADD_EXECUTABLE( MyExecutable "ref1.dll;ref2.dll" "Program.cs;File1.cs" )
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/UseCSharp.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+# TODO: ADD SUPPORT FOR LINK LIBRARIES
+
+# Check something was found
+if( NOT CSHARP_COMPILER )
+ message( WARNING "A C# compiler executable was not found on your system" )
+endif( NOT CSHARP_COMPILER )
+
+# Include type-based USE_FILE
+if( CSHARP_TYPE MATCHES ".NET" )
+ include( ${DotNetFrameworkSdk_USE_FILE} )
+elseif ( CSHARP_TYPE MATCHES "Mono" )
+ include( ${Mono_USE_FILE} )
+endif ( CSHARP_TYPE MATCHES ".NET" )
+
+macro( CSHARP_ADD_LIBRARY name )
+ CSHARP_ADD_PROJECT( "library" ${name} ${ARGN} )
+endmacro( CSHARP_ADD_LIBRARY )
+
+macro( CSHARP_ADD_EXECUTABLE name )
+ CSHARP_ADD_PROJECT( "exe" ${name} ${ARGN} )
+endmacro( CSHARP_ADD_EXECUTABLE )
+
+# Private macro
+macro( CSHARP_ADD_PROJECT type name )
+ set( refs "/reference:System.dll" )
+ set( sources )
+ set( sources_dep )
+
+ if( ${type} MATCHES "library" )
+ set( output "dll" )
+ elseif( ${type} MATCHES "exe" )
+ set( output "exe" )
+ endif( ${type} MATCHES "library" )
+
+ # Step through each argument
+ foreach( it ${ARGN} )
+ if( ${it} MATCHES "(.*)(dll)" )
+ # Argument is a dll, add reference
+ list( APPEND refs /reference:${it} )
+ else( )
+ # Argument is a source file
+ if( EXISTS ${it} )
+ list( APPEND sources ${it} )
+ list( APPEND sources_dep ${it} )
+ elseif( EXISTS ${CSHARP_SOURCE_DIRECTORY}/${it} )
+ list( APPEND sources ${CSHARP_SOURCE_DIRECTORY}/${it} )
+ list( APPEND sources_dep ${CSHARP_SOURCE_DIRECTORY}/${it} )
+ elseif( ${it} MATCHES "[*]" )
+ # For dependencies, we need to expand wildcards
+ FILE( GLOB it_glob ${it} )
+ list( APPEND sources ${it} )
+ list( APPEND sources_dep ${it_glob} )
+ endif( )
+ endif ( )
+ endforeach( )
+
+ # Check we have at least one source
+ list( LENGTH sources_dep sources_length )
+ if ( ${sources_length} LESS 1 )
+ MESSAGE( SEND_ERROR "No C# sources were specified for ${type} ${name}" )
+ endif ()
+ list( SORT sources_dep )
+
+ # Perform platform specific actions
+ if (WIN32)
+ string( REPLACE "/" "\\" sources ${sources} )
+ else (UNIX)
+ string( REPLACE "\\" "/" sources ${sources} )
+ endif (WIN32)
+
+ # Add custom target and command
+ MESSAGE( STATUS "Adding C# ${type} ${name}: '${CSHARP_COMPILER} /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}'" )
+ add_custom_command(
+ COMMENT "Compiling C# ${type} ${name}: '${CSHARP_COMPILER} /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}'"
+ OUTPUT ${CSHARP_BINARY_DIRECTORY}/${name}.${output}
+ COMMAND ${CSHARP_COMPILER}
+ ARGS /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}
+ WORKING_DIRECTORY ${CSHARP_BINARY_DIRECTORY}
+ DEPENDS ${sources_dep}
+ )
+ add_custom_target(
+ ${name} ALL
+ DEPENDS ${CSHARP_BINARY_DIRECTORY}/${name}.${output}
+ SOURCES ${sources_dep}
+ )
+endmacro( CSHARP_ADD_PROJECT )
diff --git a/cmake/UseDotNetFrameworkSdk.cmake b/cmake/UseDotNetFrameworkSdk.cmake
new file mode 100644
index 0000000..6be4027
--- /dev/null
+++ b/cmake/UseDotNetFrameworkSdk.cmake
@@ -0,0 +1,16 @@
+#
+# A CMake Module for using Mono.
+#
+# The following variables are set:
+# (none)
+#
+# Additional references can be found here:
+# http://www.mono-project.com/Main_Page
+# http://www.mono-project.com/CSharp_Compiler
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+message( STATUS "Using .NET compiler version ${CSHARP_DOTNET_VERSION}" )
\ No newline at end of file
diff --git a/cmake/UseMono.cmake b/cmake/UseMono.cmake
new file mode 100644
index 0000000..16a80ae
--- /dev/null
+++ b/cmake/UseMono.cmake
@@ -0,0 +1,16 @@
+#
+# A CMake Module for using Mono.
+#
+# The following variables are set:
+# (none)
+#
+# Additional references can be found here:
+# http://www.mono-project.com/Main_Page
+# http://www.mono-project.com/CSharp_Compiler
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre at gmail.com>
+#
+
+message( STATUS "Using Mono compiler version ${CSHARP_MONO_VERSION}" )
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
index 66c4de1..90f6f63 100644
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -25,7 +25,7 @@ DOXYFILE_ENCODING = UTF-8
# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
# by quotes) that should identify the project.
-PROJECT_NAME = @CMAKE_PROJECT_NAME@
+PROJECT_NAME = @PacBioBAM_NAME@
# The PROJECT_NUMBER tag can be used to enter a project or revision number.
# This could be handy for archiving the generated documentation or
@@ -115,7 +115,7 @@ INLINE_INHERITED_MEMB = NO
# path before files name in the file list and in the header files. If set
# to NO the shortest path that makes the file name unique will be used.
-FULL_PATH_NAMES = NO
+FULL_PATH_NAMES = YES
# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
# can be used to strip a user-defined part of the path. Stripping is
@@ -133,7 +133,7 @@ STRIP_FROM_PATH =
# definition is used. Otherwise one should specify the include paths that
# are normally passed to the compiler using the -I flag.
-STRIP_FROM_INC_PATH =
+STRIP_FROM_INC_PATH = @PacBioBAM_IncludeDir@
# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
# (but less readable) file names. This can be useful is your file systems
@@ -238,7 +238,7 @@ EXTENSION_MAPPING =
# func(std::string) {}). This also make the inheritance and collaboration
# diagrams that involve STL classes more complete and accurate.
-BUILTIN_STL_SUPPORT = NO
+BUILTIN_STL_SUPPORT = YES
# If you use Microsoft's C++/CLI language, you should set this option to YES to
# enable parsing support.
@@ -310,7 +310,7 @@ SYMBOL_CACHE_SIZE = 0
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-EXTRACT_ALL = YES
+EXTRACT_ALL = NO
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.
@@ -320,13 +320,13 @@ EXTRACT_PRIVATE = NO
# If the EXTRACT_STATIC tag is set to YES all static members of a file
# will be included in the documentation.
-EXTRACT_STATIC = NO
+EXTRACT_STATIC = YES
# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
# defined locally in source files will be included in the documentation.
# If set to NO only classes defined in header files are included.
-EXTRACT_LOCAL_CLASSES = YES
+EXTRACT_LOCAL_CLASSES = NO
# This flag is only useful for Objective-C code. When set to YES local
# methods, which are defined in the implementation section but not in
@@ -592,7 +592,7 @@ WARN_LOGFILE =
# directories like "/usr/src/myproject". Separate the files or directories
# with spaces.
-INPUT = @PacBioBAM_IncludeDir@/pbbam @PacBioBAM_SourceDir@
+INPUT = @PacBioBAM_IncludeDir@
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
@@ -651,7 +651,7 @@ RECURSIVE = YES
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
-EXCLUDE =
+EXCLUDE = @PacBioBAM_IncludeDir@/pbbam/internal
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
# directories that are symbolic links (a Unix filesystem feature) are excluded
@@ -673,13 +673,13 @@ EXCLUDE_PATTERNS =
# wildcard * is used, a substring. Examples: ANamespace, AClass,
# AClass::ANamespace, ANamespace::*Test
-EXCLUDE_SYMBOLS =
+EXCLUDE_SYMBOLS = pugi, PacBio::BAM::internal
# The EXAMPLE_PATH tag can be used to specify one or more files or
# directories that contain example code fragments that are included (see
# the \include command).
-EXAMPLE_PATH =
+EXAMPLE_PATH = examples
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
@@ -1226,7 +1226,7 @@ MAN_LINKS = NO
# generate an XML file that captures the structure of
# the code including all documentation.
-GENERATE_XML = NO
+GENERATE_XML = YES
# The XML_OUTPUT tag is used to specify where the XML pages will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..14e0fb1
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,168 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = build
+SOURCEDIR = source
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) $(SOURCEDIR)
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) $(SOURCEDIR)
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext fig
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ -rm -rf $(BUILDDIR)/*
+
+html: basefig MANY_CLUSTER.png
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pbtoolkits.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pbtoolkits.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/pbtoolkits"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pbtoolkits"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+basefig:
+ dot -Tpng $(SOURCEDIR)/dependencies.dot > $(SOURCEDIR)/$@
+ grep -v "\"pbsmrtpipe\" ->" $(SOURCEDIR)/dependencies.dot \
+ | grep -v "> \"pbcore\"" \
+ | sed 's/All/Sparse/' > $(SOURCEDIR)/sparse_dependencies.dot
+ dot -Tpng $(SOURCEDIR)/sparse_dependencies.dot \
+ > $(SOURCEDIR)/sparse_dependencies.png
+
+%.png: basefig
+ grep -v $* $(SOURCEDIR)/sparse_dependencies.dot | \
+ grep -v \? | sed 's/Sparse dependencies/Module bundles/' | \
+ dot -Tpng > $(SOURCEDIR)/$@
+
diff --git a/docs/examples/code/BarcodeQuery.txt b/docs/examples/code/BarcodeQuery.txt
new file mode 100644
index 0000000..3fe8fce
--- /dev/null
+++ b/docs/examples/code/BarcodeQuery.txt
@@ -0,0 +1,17 @@
+// using C++11 range-based for loop
+BarcodeQuery query(42, dataset);
+for (const BamRecord& r : query) {
+ assert(r.HasBarcodes());
+ assert(r.BarcodeForward() == 42 || r.barcodeReverse() == 42);
+}
+
+// OR
+
+// using iterators directly
+BarcodeQuery query(42, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert(iter->HasBarcodes());
+ assert(iter->BarcodeForward() == 42 || iter->barcodeReverse() == 42);
+}
diff --git a/docs/examples/code/Compare.txt b/docs/examples/code/Compare.txt
new file mode 100644
index 0000000..deecd8d
--- /dev/null
+++ b/docs/examples/code/Compare.txt
@@ -0,0 +1,3 @@
+// sort on increasing ZMW hole number
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::Zmw());
diff --git a/docs/examples/code/Compare_AlignedEnd.txt b/docs/examples/code/Compare_AlignedEnd.txt
new file mode 100644
index 0000000..d34ed67
--- /dev/null
+++ b/docs/examples/code/Compare_AlignedEnd.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedEnd());
diff --git a/docs/examples/code/Compare_AlignedStart.txt b/docs/examples/code/Compare_AlignedStart.txt
new file mode 100644
index 0000000..68de3e2
--- /dev/null
+++ b/docs/examples/code/Compare_AlignedStart.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedStart());
diff --git a/docs/examples/code/Compare_AlignedStrand.txt b/docs/examples/code/Compare_AlignedStrand.txt
new file mode 100644
index 0000000..6c22cdc
--- /dev/null
+++ b/docs/examples/code/Compare_AlignedStrand.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedStrand());
diff --git a/docs/examples/code/Compare_BarcodeForward.txt b/docs/examples/code/Compare_BarcodeForward.txt
new file mode 100644
index 0000000..1967341
--- /dev/null
+++ b/docs/examples/code/Compare_BarcodeForward.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeForward());
diff --git a/docs/examples/code/Compare_BarcodeQuality.txt b/docs/examples/code/Compare_BarcodeQuality.txt
new file mode 100644
index 0000000..144f483
--- /dev/null
+++ b/docs/examples/code/Compare_BarcodeQuality.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeQuality());
diff --git a/docs/examples/code/Compare_BarcodeReverse.txt b/docs/examples/code/Compare_BarcodeReverse.txt
new file mode 100644
index 0000000..9d3b245
--- /dev/null
+++ b/docs/examples/code/Compare_BarcodeReverse.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeReverse());
diff --git a/docs/examples/code/Compare_FullName.txt b/docs/examples/code/Compare_FullName.txt
new file mode 100644
index 0000000..4b392b9
--- /dev/null
+++ b/docs/examples/code/Compare_FullName.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::FullName());
diff --git a/docs/examples/code/Compare_LocalContextFlag.txt b/docs/examples/code/Compare_LocalContextFlag.txt
new file mode 100644
index 0000000..aeab944
--- /dev/null
+++ b/docs/examples/code/Compare_LocalContextFlag.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::LocalContextFlag());
diff --git a/docs/examples/code/Compare_MapQuality.txt b/docs/examples/code/Compare_MapQuality.txt
new file mode 100644
index 0000000..fe22821
--- /dev/null
+++ b/docs/examples/code/Compare_MapQuality.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::MapQuality());
diff --git a/docs/examples/code/Compare_MovieName.txt b/docs/examples/code/Compare_MovieName.txt
new file mode 100644
index 0000000..cddcb64
--- /dev/null
+++ b/docs/examples/code/Compare_MovieName.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::MovieName());
diff --git a/docs/examples/code/Compare_NumDeletedBases.txt b/docs/examples/code/Compare_NumDeletedBases.txt
new file mode 100644
index 0000000..aa6dd4b
--- /dev/null
+++ b/docs/examples/code/Compare_NumDeletedBases.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumDeletedBases());
diff --git a/docs/examples/code/Compare_NumInsertedBases.txt b/docs/examples/code/Compare_NumInsertedBases.txt
new file mode 100644
index 0000000..917d87f
--- /dev/null
+++ b/docs/examples/code/Compare_NumInsertedBases.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumInsertedBases());
diff --git a/docs/examples/code/Compare_NumMatches.txt b/docs/examples/code/Compare_NumMatches.txt
new file mode 100644
index 0000000..47e3081
--- /dev/null
+++ b/docs/examples/code/Compare_NumMatches.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumMatches());
diff --git a/docs/examples/code/Compare_NumMismatches.txt b/docs/examples/code/Compare_NumMismatches.txt
new file mode 100644
index 0000000..12affb1
--- /dev/null
+++ b/docs/examples/code/Compare_NumMismatches.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumMismatches());
diff --git a/docs/examples/code/Compare_QueryEnd.txt b/docs/examples/code/Compare_QueryEnd.txt
new file mode 100644
index 0000000..d664d28
--- /dev/null
+++ b/docs/examples/code/Compare_QueryEnd.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::QueryEnd());
diff --git a/docs/examples/code/Compare_QueryStart.txt b/docs/examples/code/Compare_QueryStart.txt
new file mode 100644
index 0000000..12f6244
--- /dev/null
+++ b/docs/examples/code/Compare_QueryStart.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::QueryStart());
diff --git a/docs/examples/code/Compare_ReadAccuracy.txt b/docs/examples/code/Compare_ReadAccuracy.txt
new file mode 100644
index 0000000..9454309
--- /dev/null
+++ b/docs/examples/code/Compare_ReadAccuracy.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadAccuracy());
diff --git a/docs/examples/code/Compare_ReadGroupId.txt b/docs/examples/code/Compare_ReadGroupId.txt
new file mode 100644
index 0000000..dab3497
--- /dev/null
+++ b/docs/examples/code/Compare_ReadGroupId.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadGroupId());
diff --git a/docs/examples/code/Compare_ReadGroupNumericId.txt b/docs/examples/code/Compare_ReadGroupNumericId.txt
new file mode 100644
index 0000000..5ad8f9d
--- /dev/null
+++ b/docs/examples/code/Compare_ReadGroupNumericId.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadGroupNumericId());
diff --git a/docs/examples/code/Compare_ReferenceEnd.txt b/docs/examples/code/Compare_ReferenceEnd.txt
new file mode 100644
index 0000000..ed42d05
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceEnd.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceEnd());
diff --git a/docs/examples/code/Compare_ReferenceId.txt b/docs/examples/code/Compare_ReferenceId.txt
new file mode 100644
index 0000000..5628427
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceId.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceId());
diff --git a/docs/examples/code/Compare_ReferenceName.txt b/docs/examples/code/Compare_ReferenceName.txt
new file mode 100644
index 0000000..1f76e7e
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceName.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceName());
diff --git a/docs/examples/code/Compare_ReferenceStart.txt b/docs/examples/code/Compare_ReferenceStart.txt
new file mode 100644
index 0000000..0ccaf36
--- /dev/null
+++ b/docs/examples/code/Compare_ReferenceStart.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceStart());
diff --git a/docs/examples/code/Compare_TypeFromOperator.txt b/docs/examples/code/Compare_TypeFromOperator.txt
new file mode 100644
index 0000000..afb0848
--- /dev/null
+++ b/docs/examples/code/Compare_TypeFromOperator.txt
@@ -0,0 +1,2 @@
+Compare::Type type = Compare::TypeFromOperator("!=");
+assert(type == Compare::NOT_EQUAL);
diff --git a/docs/examples/code/Compare_TypeToName.txt b/docs/examples/code/Compare_TypeToName.txt
new file mode 100644
index 0000000..c44e1cb
--- /dev/null
+++ b/docs/examples/code/Compare_TypeToName.txt
@@ -0,0 +1,2 @@
+string name = Compare::TypeToName(Compare::LESS_THAN);
+assert(name = "Compare::LESS_THAN");
diff --git a/docs/examples/code/Compare_Zmw.txt b/docs/examples/code/Compare_Zmw.txt
new file mode 100644
index 0000000..b02c426
--- /dev/null
+++ b/docs/examples/code/Compare_Zmw.txt
@@ -0,0 +1,2 @@
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::Zmw());
diff --git a/docs/examples/code/EntireFileQuery.txt b/docs/examples/code/EntireFileQuery.txt
new file mode 100644
index 0000000..d3fcc2c
--- /dev/null
+++ b/docs/examples/code/EntireFileQuery.txt
@@ -0,0 +1,15 @@
+// using C++11 range-based for loop
+EntireFileQuery query(dataset);
+for (const BamRecord& record : query) {
+ // ... do stuff ...
+}
+
+// OR
+
+// using iterators
+EntireFileQuery query(dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ // ... do stuff ...
+}
diff --git a/docs/examples/code/EntireFileQuery_BamFilename.txt b/docs/examples/code/EntireFileQuery_BamFilename.txt
new file mode 100644
index 0000000..484db61
--- /dev/null
+++ b/docs/examples/code/EntireFileQuery_BamFilename.txt
@@ -0,0 +1,4 @@
+EntireFileQuery query("foo.bam");
+for (const BamRecord& record : query) {
+ // do stuff
+}
diff --git a/docs/examples/code/EntireFileQuery_NonConst.txt b/docs/examples/code/EntireFileQuery_NonConst.txt
new file mode 100644
index 0000000..a0a092e
--- /dev/null
+++ b/docs/examples/code/EntireFileQuery_NonConst.txt
@@ -0,0 +1,4 @@
+EntireFileQuery query("foo.bam");
+for (BamRecord& record : query) {
+ // ok to modify 'record' here
+}
diff --git a/docs/examples/code/GenomicIntervalQuery.txt b/docs/examples/code/GenomicIntervalQuery.txt
new file mode 100644
index 0000000..651f254
--- /dev/null
+++ b/docs/examples/code/GenomicIntervalQuery.txt
@@ -0,0 +1,16 @@
+// using C++11 range-based for loop
+GenomicIntervalQuery query(GenomicInterval("chr1:1000-2000"), dataset);
+for (const BamRecord& record : query) {
+ // ... do stuff ...
+}
+
+// OR
+
+// using iterators directly
+GenomicIntervalQuery query(GenomicInterval("chr1:1000-2000"), dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ // ... do stuff ...
+}
+
diff --git a/docs/examples/code/GenomicIntervalQuery_Reuse.txt b/docs/examples/code/GenomicIntervalQuery_Reuse.txt
new file mode 100644
index 0000000..339ae95
--- /dev/null
+++ b/docs/examples/code/GenomicIntervalQuery_Reuse.txt
@@ -0,0 +1,8 @@
+DataSet ds("data.xml");
+GenomicIntervalQuery query(GenomicInterval(), ds);
+for (const GenomicInterval& interval : intervals) {
+ query.Interval(interval);
+ for (const BamRecord& record : query) {}
+ // do stuff
+ }
+}
\ No newline at end of file
diff --git a/docs/examples/code/PbiAlignedEndFilter.txt b/docs/examples/code/PbiAlignedEndFilter.txt
new file mode 100644
index 0000000..bac1a46
--- /dev/null
+++ b/docs/examples/code/PbiAlignedEndFilter.txt
@@ -0,0 +1,4 @@
+PbiFilterQuery query(PbiAlignedEndFilter{3000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+ assert(record.AlignedEnd() > 3000);
+}
diff --git a/docs/examples/code/PbiAlignedLengthFilter.txt b/docs/examples/code/PbiAlignedLengthFilter.txt
new file mode 100644
index 0000000..38dc3ff
--- /dev/null
+++ b/docs/examples/code/PbiAlignedLengthFilter.txt
@@ -0,0 +1,4 @@
+PbiFilterQuery query(PbiAlignedLengthFilter{1000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+ assert((record.AlignedEnd() - record.AlignedStart()) > 1000);
+}
diff --git a/docs/examples/code/PbiAlignedStartFilter.txt b/docs/examples/code/PbiAlignedStartFilter.txt
new file mode 100644
index 0000000..b78bb2c
--- /dev/null
+++ b/docs/examples/code/PbiAlignedStartFilter.txt
@@ -0,0 +1,4 @@
+PbiFilterQuery query(PbiAlignedStartFilter{3000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+ assert(record.AlignedStart() > 3000);
+}
diff --git a/docs/examples/code/PbiAlignedStrandFilter.txt b/docs/examples/code/PbiAlignedStrandFilter.txt
new file mode 100644
index 0000000..9f9a885
--- /dev/null
+++ b/docs/examples/code/PbiAlignedStrandFilter.txt
@@ -0,0 +1,5 @@
+PbiFilterQuery query(PbiAlignedStrandFilter{Strand::FORWARD});
+for (const BamRecord& record : query) {
+ assert(record.AlignedStrand() == Strand::FORWARD);
+}
+
diff --git a/docs/examples/code/PbiBarcodeFilter.txt b/docs/examples/code/PbiBarcodeFilter.txt
new file mode 100644
index 0000000..3913479
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeFilter.txt
@@ -0,0 +1,17 @@
+// single value
+PbiFilter filter{ PbiBarcodeFilter{17} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const auto barcodes = record.Barcodes();
+ assert(barcodes.first == 17 || barcodes.second == 17);
+}
+
+// whitelist
+vector<uint16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const auto barcodes = record.Barcodes();
+ assert(barcodes.first == 50 || barcodes.second == 50 ||
+ barcodes.first == 100 || barcodes.second == 100);
+}
diff --git a/docs/examples/code/PbiBarcodeForwardFilter.txt b/docs/examples/code/PbiBarcodeForwardFilter.txt
new file mode 100644
index 0000000..af88be6
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeForwardFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiBarcodeForwardFilter{50} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeForward() == 50);
+}
+
+// whitelist
+vector<uint16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeForwardFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeForward() == 50 || record.BarcodeForward() == 100);
+}
+
diff --git a/docs/examples/code/PbiBarcodeQualityFilter.txt b/docs/examples/code/PbiBarcodeQualityFilter.txt
new file mode 100644
index 0000000..34311d0
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeQualityFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiBarcodeQualityFilter{42, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeQuality() >= 42);
+}
diff --git a/docs/examples/code/PbiBarcodeReverseFilter.txt b/docs/examples/code/PbiBarcodeReverseFilter.txt
new file mode 100644
index 0000000..27e3e3d
--- /dev/null
+++ b/docs/examples/code/PbiBarcodeReverseFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiBarcodeReverseFilter{50} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeReverse() == 50);
+}
+
+// whitelist
+vector<uint16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeReverseFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeReverse() == 50 || record.BarcodeReverse() == 100);
+}
+
diff --git a/docs/examples/code/PbiBarcodesFilter.txt b/docs/examples/code/PbiBarcodesFilter.txt
new file mode 100644
index 0000000..a655c57
--- /dev/null
+++ b/docs/examples/code/PbiBarcodesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiBarcodesFilter{17, 18} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeForward() == 17 &&
+ record.BarcodeReverse() == 18);
+}
diff --git a/docs/examples/code/PbiBuilder_WithReader.txt b/docs/examples/code/PbiBuilder_WithReader.txt
new file mode 100644
index 0000000..e2748c2
--- /dev/null
+++ b/docs/examples/code/PbiBuilder_WithReader.txt
@@ -0,0 +1,30 @@
+// To simply create a PBI file from BAM, the following is the easiest method:
+//
+#include <pbbam/BamFile.h>
+#include <pbbam/PbiFile.h>
+
+BamFile bamFile("data.bam");
+PbiFile::CreateFrom(bamFile);
+
+
+// However if you need to perform additional operations while reading the BAM file,
+// you can do something like the following:
+//
+{
+ BamFile bamFile("data.bam");
+ PbiBuilder builder(bamFile.PacBioIndexFilename(),
+ bamFile.Header().Sequences().size());
+ BamReader reader(bamFile);
+ BamRecord b;
+ int64_t offset = reader.VirtualTell(); // first record's vOffset
+ while (reader.GetNext(b)) {
+
+ // store PBI recrod entry & get next record's vOffset
+ builder.AddRecord(b, offset);
+ offset = reader.VirtualTell();
+
+ // ... additional stuff as needed ...
+ }
+
+} // <-- PBI data will only be written here, as PbiBuilder goes out of scope
+
diff --git a/docs/examples/code/PbiBuilder_WithWriter.txt b/docs/examples/code/PbiBuilder_WithWriter.txt
new file mode 100644
index 0000000..0c7d6d1
--- /dev/null
+++ b/docs/examples/code/PbiBuilder_WithWriter.txt
@@ -0,0 +1,12 @@
+BamWriter writer(...);
+PbiBuilder pbiBuilder(...);
+int64_t vOffset;
+BamRecord record;
+while (...) {
+
+ // ... populate record data ...
+
+ // write record to BAM and add PBI entry
+ writer.Write(record, &vOffset);
+ pbiBuilder.AddRecord(record, vOffset);
+}
diff --git a/docs/examples/code/PbiFilterQuery.txt b/docs/examples/code/PbiFilterQuery.txt
new file mode 100644
index 0000000..4914eab
--- /dev/null
+++ b/docs/examples/code/PbiFilterQuery.txt
@@ -0,0 +1,22 @@
+// setup filter
+PbiFilter filter;
+filter.Add(PbiZmwFilter(42));
+filter.Add(PbiReadAccuracyFilter(0.9, Compare::GREATER_THAN_EQUAL));
+
+// using C++11 range-based for loop
+PbiFilterQuery query(filter, dataset);
+for (const BamRecord& r : query) {
+ assert(r.HoleNumber() == 42);
+ assert(r.ReadAccuracy() >= 0.9);
+}
+
+// OR
+
+// using iterators directly
+PbiFilterQuery query(filter, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert(iter->HoleNumber() == 42);
+ assert(iter->ReadAccuracy() >= 0.9);
+}
diff --git a/docs/examples/code/PbiFilter_Composition.txt b/docs/examples/code/PbiFilter_Composition.txt
new file mode 100644
index 0000000..22cc6ff
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Composition.txt
@@ -0,0 +1,8 @@
+// (f1 && f2) || f3
+
+PbiFilter f1;
+PbiFilter f2;
+PbiFilter intersect_f1_f2 = PbiFilter::Intersection(f1, f2);
+
+PbiFilter f3;
+PbiFilter final = PbiFilter::Union(intersect_f1_f2, f3);
diff --git a/docs/examples/code/PbiFilter_CustomFilter.txt b/docs/examples/code/PbiFilter_CustomFilter.txt
new file mode 100644
index 0000000..f9cdd21
--- /dev/null
+++ b/docs/examples/code/PbiFilter_CustomFilter.txt
@@ -0,0 +1,21 @@
+struct MyCustomFilter
+{
+ bool Accepts(const PbiRawData& index, const size_t row) const
+ {
+ // Look up data for record at the provided row. Do any calculations
+ // necessary, then return whether that record passes your
+ // filter criteria.
+
+ return true;
+ }
+};
+
+// use in composite filters
+PbiFilter f;
+f.Add(PbiMovieNameFilter("foo"));
+f.Add(MyCustomFilter());
+
+// pass directly to PbiFilterQuery
+PbiFilterQuery query(MyCustomFilter(), "foo.bam");
+for (const BamRecord& record : query)
+ // ... do stuff ...
diff --git a/docs/examples/code/PbiFilter_Interface.txt b/docs/examples/code/PbiFilter_Interface.txt
new file mode 100644
index 0000000..0fea900
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Interface.txt
@@ -0,0 +1 @@
+bool Accepts(const PbiRawData& index, const size_t row) const;
diff --git a/docs/examples/code/PbiFilter_Intersection_Copy.txt b/docs/examples/code/PbiFilter_Intersection_Copy.txt
new file mode 100644
index 0000000..ec0a7ac
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Intersection_Copy.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::INTERSECT };
+result.Add(filters);
+return result;
diff --git a/docs/examples/code/PbiFilter_Intersection_Move.txt b/docs/examples/code/PbiFilter_Intersection_Move.txt
new file mode 100644
index 0000000..2b06106
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Intersection_Move.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::INTERSECT };
+result.Add(std::move(filters));
+return result;
diff --git a/docs/examples/code/PbiFilter_Union_Copy.txt b/docs/examples/code/PbiFilter_Union_Copy.txt
new file mode 100644
index 0000000..7e2a192
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Union_Copy.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::UNION };
+result.Add(filters);
+return result;
diff --git a/docs/examples/code/PbiFilter_Union_Move.txt b/docs/examples/code/PbiFilter_Union_Move.txt
new file mode 100644
index 0000000..2e98d91
--- /dev/null
+++ b/docs/examples/code/PbiFilter_Union_Move.txt
@@ -0,0 +1,3 @@
+PbiFilter result{ PbiFilter::UNION };
+result.Add(std::move(filters));
+return result;
diff --git a/docs/examples/code/PbiIdentityFilter.txt b/docs/examples/code/PbiIdentityFilter.txt
new file mode 100644
index 0000000..6fcb8d0
--- /dev/null
+++ b/docs/examples/code/PbiIdentityFilter.txt
@@ -0,0 +1,6 @@
+// single value
+PbiFilter filter{ PbiIdentityFilter{ 0.5, Compare::GREATER_THAN_EQUAL } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ // ... at least 50% of record was aligned ...
+}
diff --git a/docs/examples/code/PbiLocalContextFilter.txt b/docs/examples/code/PbiLocalContextFilter.txt
new file mode 100644
index 0000000..0aaa3eb
--- /dev/null
+++ b/docs/examples/code/PbiLocalContextFilter.txt
@@ -0,0 +1,22 @@
+
+// --------------------
+// has adapter_before
+// --------------------
+
+PbiFilter filter{ PbiLocalContextFilter{LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const bool hasAdapterBefore = (record.LocalContextFlags() & LocalContextFlags::ADAPTER_BEFORE) != 0;
+ assert(hasAdapterBefore);
+}
+
+// ----------------------------------
+// has any adapters, barcodes, etc.
+// ----------------------------------
+
+PbiFilter filter{ PbiLocalContextFilter{LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const bool hasContext = (record.LocalContextFlags() != LocalContextFlags::NO_LOCAL_CONTEXT);
+ assert(hasContext);
+}
diff --git a/docs/examples/code/PbiMapQualityFilter.txt b/docs/examples/code/PbiMapQualityFilter.txt
new file mode 100644
index 0000000..67fb5dc
--- /dev/null
+++ b/docs/examples/code/PbiMapQualityFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiMapQualityFilter{75, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.MapQuality() >= 75);
+}
diff --git a/docs/examples/code/PbiMovieNameFilter.txt b/docs/examples/code/PbiMovieNameFilter.txt
new file mode 100644
index 0000000..dd124e2
--- /dev/null
+++ b/docs/examples/code/PbiMovieNameFilter.txt
@@ -0,0 +1,14 @@
+// single value
+PbiFilter filter{ PbiMovieFilter{ "foo" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.MovieName() == "foo");
+}
+
+// whitelist
+vector<string> whitelist = { "foo", "bar" };
+PbiFilter filter{ PbiMovieNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.MovieName() == "foo" || record.MovieName() == "bar");
+}
diff --git a/docs/examples/code/PbiNumDeletedBasesFilter.txt b/docs/examples/code/PbiNumDeletedBasesFilter.txt
new file mode 100644
index 0000000..e1e3d1f
--- /dev/null
+++ b/docs/examples/code/PbiNumDeletedBasesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumDeletedBasesFilter{50, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumDeletedBases() < 50);
+}
+
diff --git a/docs/examples/code/PbiNumInsertedBasesFilter.txt b/docs/examples/code/PbiNumInsertedBasesFilter.txt
new file mode 100644
index 0000000..ab385e4
--- /dev/null
+++ b/docs/examples/code/PbiNumInsertedBasesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumInsertedBasesFilter{50, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumInsertedBases() < 50);
+}
+
diff --git a/docs/examples/code/PbiNumMatchesFilter.txt b/docs/examples/code/PbiNumMatchesFilter.txt
new file mode 100644
index 0000000..4e1b97d
--- /dev/null
+++ b/docs/examples/code/PbiNumMatchesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumMatchesFilter{2000, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumMatches() >= 2000);
+}
+
diff --git a/docs/examples/code/PbiNumMismatchesFilter.txt b/docs/examples/code/PbiNumMismatchesFilter.txt
new file mode 100644
index 0000000..690e4a1
--- /dev/null
+++ b/docs/examples/code/PbiNumMismatchesFilter.txt
@@ -0,0 +1,6 @@
+PbiFilter filter{ PbiNumMismatchesFilter{500, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumMismatches() < 500);
+}
+
diff --git a/docs/examples/code/PbiQueryEndFilter.txt b/docs/examples/code/PbiQueryEndFilter.txt
new file mode 100644
index 0000000..f85166b
--- /dev/null
+++ b/docs/examples/code/PbiQueryEndFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiQueryEndFilter{3000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.QueryEnd() > 3000);
+}
diff --git a/docs/examples/code/PbiQueryLengthFilter.txt b/docs/examples/code/PbiQueryLengthFilter.txt
new file mode 100644
index 0000000..123412a
--- /dev/null
+++ b/docs/examples/code/PbiQueryLengthFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiQueryLengthFilter{2000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert( (record.QueryEnd() - record.QueryStart()) > 2000 );
+}
diff --git a/docs/examples/code/PbiQueryNameFilter.txt b/docs/examples/code/PbiQueryNameFilter.txt
new file mode 100644
index 0000000..f1e51c7
--- /dev/null
+++ b/docs/examples/code/PbiQueryNameFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiQueryNameFilter{ "movie_1/42/100_200" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.FullName() == "movie_1/42/100_200");
+}
+
+// whitelist
+vector<string> whitelist = { "movie_1/42/100_200", "movie_3/24/300_500" };
+PbiFilter filter{ PbiQueryNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.FullName() == "movie_1/42/100_200" ||
+ record.FullName() == "movie_3/24/300_500");
+}
diff --git a/docs/examples/code/PbiQueryStartFilter.txt b/docs/examples/code/PbiQueryStartFilter.txt
new file mode 100644
index 0000000..56353df
--- /dev/null
+++ b/docs/examples/code/PbiQueryStartFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiQueryStartFilter{3000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.QueryStart() > 3000);
+}
diff --git a/docs/examples/code/PbiReadAccuracyFilter.txt b/docs/examples/code/PbiReadAccuracyFilter.txt
new file mode 100644
index 0000000..dd2df32
--- /dev/null
+++ b/docs/examples/code/PbiReadAccuracyFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiReadAccuracyFilter{0.8, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadAccuracy() >= 0.8);
+}
diff --git a/docs/examples/code/PbiReadGroupFilter.txt b/docs/examples/code/PbiReadGroupFilter.txt
new file mode 100644
index 0000000..9af096d
--- /dev/null
+++ b/docs/examples/code/PbiReadGroupFilter.txt
@@ -0,0 +1,64 @@
+// -------------------------
+// numeric ID
+// -------------------------
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ 2458765 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupNumericId() == 2458765);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 2458765, -32143 };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupNumericId() == 2458765 ||
+ record.ReadGroupNumericId() == -32143);
+}
+
+// -------------------------
+// printable ID
+// -------------------------
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ "12B33F00" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupId() == "12B33F00");
+}
+
+// whitelist
+vector<string> whitelist = { "12B33F00", "123ABC77" };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupId() == "12B33F00" ||
+ record.ReadGroupId() == "123ABC77");
+}
+
+
+// -------------------------
+// read group
+// -------------------------
+
+BamFile file("foo.bam");
+BamHeader header = file.Header();
+assert(header.ReadGroups().size() > 1);
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ header.ReadGroups()[0] } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroup() == header.ReadGroups()[0]);
+}
+
+// whitelist
+vector<ReadGroupInfo> whitelist = { header.ReadGroups()[0], header.ReadGroups()[1] };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroup() == header.ReadGroups()[0] ||
+ record.ReadGroup() == header.ReadGroups()[1]);
+}
diff --git a/docs/examples/code/PbiReferenceEndFilter.txt b/docs/examples/code/PbiReferenceEndFilter.txt
new file mode 100644
index 0000000..ce005c6
--- /dev/null
+++ b/docs/examples/code/PbiReferenceEndFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiReferenceEndFilter{ 2000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceEnd() == 2000);
+}
diff --git a/docs/examples/code/PbiReferenceIdFilter.txt b/docs/examples/code/PbiReferenceIdFilter.txt
new file mode 100644
index 0000000..d963d28
--- /dev/null
+++ b/docs/examples/code/PbiReferenceIdFilter.txt
@@ -0,0 +1,16 @@
+// single value
+PbiFilter filter{ PbiReferenceIdFilter{ 4 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceId() == 4);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 0, 1 };
+PbiFilter filter{ PbiReferenceIdFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceId() == 0 ||
+ record.ReferenceId() == 1);
+}
+
diff --git a/docs/examples/code/PbiReferenceNameFilter.txt b/docs/examples/code/PbiReferenceNameFilter.txt
new file mode 100644
index 0000000..c86b14a
--- /dev/null
+++ b/docs/examples/code/PbiReferenceNameFilter.txt
@@ -0,0 +1,15 @@
+// single value
+PbiFilter filter{ PbiReferenceNameFilter{ "chr1" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceName() == "chr1");
+}
+
+// whitelist
+vector<string> whitelist = { "chr1", "chr5" };
+PbiFilter filter{ PbiReferenceNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceName() == "chr1" ||
+ record.ReferenceName() == "chr5");
+}
diff --git a/docs/examples/code/PbiReferenceStartFilter.txt b/docs/examples/code/PbiReferenceStartFilter.txt
new file mode 100644
index 0000000..d3ffdbb
--- /dev/null
+++ b/docs/examples/code/PbiReferenceStartFilter.txt
@@ -0,0 +1,5 @@
+PbiFilter filter{ PbiReferenceStartFilter{ 2000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceStart() == 2000);
+}
diff --git a/docs/examples/code/PbiZmwFilter.txt b/docs/examples/code/PbiZmwFilter.txt
new file mode 100644
index 0000000..c63a804
--- /dev/null
+++ b/docs/examples/code/PbiZmwFilter.txt
@@ -0,0 +1,16 @@
+// single value
+PbiFilter filter{ PbiZmwFilter{ 4000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.HoleNumber() == 4000);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 4000, 8000 };
+PbiFilter filter{ PbiZmwFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.HoleNumber() == 4000 ||
+ record.HoleNumber() == 8000);
+}
+
diff --git a/docs/examples/code/ReadAccuracyQuery.txt b/docs/examples/code/ReadAccuracyQuery.txt
new file mode 100644
index 0000000..5b0404f
--- /dev/null
+++ b/docs/examples/code/ReadAccuracyQuery.txt
@@ -0,0 +1,15 @@
+// using C++11 range-based for loop
+ReadAccuracyQuery query(0.9, Compare::GREATER_THAN_EQUAL, dataset);
+for (const BamRecord& r : query) {
+ assert(r.ReadAccuracy() >= 0.9);
+}
+
+// OR
+
+// using iterators directly
+ReadAccuracyQuery query(0.9, Compare::GREATER_THAN_EQUAL, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert(iter->ReadAccuracy() >= 0.9);
+}
diff --git a/docs/examples/code/SubreadLengthQuery.txt b/docs/examples/code/SubreadLengthQuery.txt
new file mode 100644
index 0000000..466a1d9
--- /dev/null
+++ b/docs/examples/code/SubreadLengthQuery.txt
@@ -0,0 +1,15 @@
+// using C++11 range-based for loop
+SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, dataset);
+for (const BamRecord& r : query) {
+ assert((r.QueryEnd() - r.QueryStart()) >= 500);
+}
+
+// OR
+
+// using iterators directly
+SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert((iter->QueryEnd() - iter->QueryStart()) >= 500);
+}
diff --git a/docs/examples/code/Tag_AsciiCtor.txt b/docs/examples/code/Tag_AsciiCtor.txt
new file mode 100644
index 0000000..057d22f
--- /dev/null
+++ b/docs/examples/code/Tag_AsciiCtor.txt
@@ -0,0 +1,10 @@
+// One-step construction
+//
+// This is useful in situations that require a const Tag.
+//
+const auto t = Tag('A', TagModifier::ASCII_CHAR);
+
+// or two-step construction
+auto t = Tag('A');
+t.Modifier(TagModifier::ASCII_CHAR);
+
diff --git a/docs/examples/code/ZmwGroupQuery.txt b/docs/examples/code/ZmwGroupQuery.txt
new file mode 100644
index 0000000..1d728ac
--- /dev/null
+++ b/docs/examples/code/ZmwGroupQuery.txt
@@ -0,0 +1,23 @@
+bool allHoleNumbersEqual(const vector<BamRecord>& group)
+{
+ if (group.empty())
+ return true;
+ const auto firstHoleNumber = group[0].HoleNumber();
+ for (size_t i = 1; i < group.size(); ++i) {
+ if (group[i].HoleNumber() != firstHoleNumber)
+ return false;
+ }
+ return true;
+}
+
+vector<int32_t> whitelist = { 50, 100 };
+ZmwGroupQuery query(whitelist, dataset);
+for(const vector<BamRecord>& group : query) {
+
+ assert(allHoleNumbersEqual(group));
+
+ for (const BamRecord& record : group) {
+ assert(record.HoleNumber() == 50 ||
+ record.HoleNumber() == 100);
+ }
+}
diff --git a/docs/examples/code/ZmwQuery.txt b/docs/examples/code/ZmwQuery.txt
new file mode 100644
index 0000000..59c22c4
--- /dev/null
+++ b/docs/examples/code/ZmwQuery.txt
@@ -0,0 +1,6 @@
+vector<int32_t> whitelist = { 50, 100 };
+ZmwQuery query(whitelist, dataset);
+for (const BamRecord& record : query) {
+ assert(record.HoleNumber() == 50 ||
+ record.HoleNumber() == 100);
+}
diff --git a/docs/examples/code/ZmwWhitelistVirtualReader.txt b/docs/examples/code/ZmwWhitelistVirtualReader.txt
new file mode 100644
index 0000000..ae1facf
--- /dev/null
+++ b/docs/examples/code/ZmwWhitelistVirtualReader.txt
@@ -0,0 +1,6 @@
+vector<int32_t> zmws = { ... };
+ZmwWhitelistVirtualReader reader(zmws, "primary.bam", "scraps.bam");
+while(reader.HasNext()) {
+ auto virtualRecord = reader.Next();
+ // ... do stuff ...
+}
diff --git a/docs/examples/plaintext/AlignmentPrinterOutput.txt b/docs/examples/plaintext/AlignmentPrinterOutput.txt
new file mode 100644
index 0000000..21d948b
--- /dev/null
+++ b/docs/examples/plaintext/AlignmentPrinterOutput.txt
@@ -0,0 +1,13 @@
+Read : singleInsertion2
+Reference : lambda_NEB3011
+
+Read-length : 49
+Concordance : 0.96
+
+5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249
+ |||||||| ||||||||||||||||||| |||||||||||
+ 0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG : 39
+
+5249 : ACTGGCTGAT : 5259
+ ||||||||||
+ 39 : ACTGGCTGAT : 49
diff --git a/docs/examples/plaintext/PbiFilter_DataSetXmlFilters.txt b/docs/examples/plaintext/PbiFilter_DataSetXmlFilters.txt
new file mode 100644
index 0000000..5b5e8c2
--- /dev/null
+++ b/docs/examples/plaintext/PbiFilter_DataSetXmlFilters.txt
@@ -0,0 +1,14 @@
+<Filters>
+ <Filter>
+ <Properties>
+ <Property /> # A
+ <Property /> # B
+ </Properties>
+ </Filter>
+ <Filter>
+ <Properties>
+ <Property /> # C
+ <Property /> # D
+ </Properties>
+ </Filter>
+</Filters>
diff --git a/docs/source/api/Accuracy.rst b/docs/source/api/Accuracy.rst
new file mode 100644
index 0000000..f88b722
--- /dev/null
+++ b/docs/source/api/Accuracy.rst
@@ -0,0 +1,11 @@
+Accuracy
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/Accuracy.h>
+
+.. doxygenclass:: PacBio::BAM::Accuracy
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/AlignmentPrinter.rst b/docs/source/api/AlignmentPrinter.rst
new file mode 100644
index 0000000..ef0b191
--- /dev/null
+++ b/docs/source/api/AlignmentPrinter.rst
@@ -0,0 +1,11 @@
+AlignmentPrinter
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/AlignmentPrinter.h>
+
+.. doxygenclass:: PacBio::BAM::AlignmentPrinter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/AlignmentSet.rst b/docs/source/api/AlignmentSet.rst
new file mode 100644
index 0000000..1817962
--- /dev/null
+++ b/docs/source/api/AlignmentSet.rst
@@ -0,0 +1,11 @@
+AlignmentSet
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::AlignmentSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BaiIndexedBamReader.rst b/docs/source/api/BaiIndexedBamReader.rst
new file mode 100644
index 0000000..aab136f
--- /dev/null
+++ b/docs/source/api/BaiIndexedBamReader.rst
@@ -0,0 +1,11 @@
+BaiIndexedBamReader
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/BaiIndexedBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::BaiIndexedBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamFile.rst b/docs/source/api/BamFile.rst
new file mode 100644
index 0000000..c7e48fb
--- /dev/null
+++ b/docs/source/api/BamFile.rst
@@ -0,0 +1,11 @@
+BamFile
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/BamFile.h>
+
+.. doxygenclass:: PacBio::BAM::BamFile
+ :members:
+ :protected-members:
+ :undoc-members:
diff --git a/docs/source/api/BamHeader.rst b/docs/source/api/BamHeader.rst
new file mode 100644
index 0000000..6cf06af
--- /dev/null
+++ b/docs/source/api/BamHeader.rst
@@ -0,0 +1,11 @@
+BamHeader
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamHeader.h>
+
+.. doxygenclass:: PacBio::BAM::BamHeader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamReader.rst b/docs/source/api/BamReader.rst
new file mode 100644
index 0000000..e0b6f3c
--- /dev/null
+++ b/docs/source/api/BamReader.rst
@@ -0,0 +1,11 @@
+BamReader
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamReader.h>
+
+.. doxygenclass:: PacBio::BAM::BamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecord.rst b/docs/source/api/BamRecord.rst
new file mode 100644
index 0000000..a749775
--- /dev/null
+++ b/docs/source/api/BamRecord.rst
@@ -0,0 +1,17 @@
+BamRecord
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecord.h>
+
+.. doxygenenum:: PacBio::BAM::ClipType
+
+.. doxygenenum:: PacBio::BAM::RecordType
+
+.. doxygenenum:: PacBio::BAM::FrameEncodingType
+
+.. doxygenclass:: PacBio::BAM::BamRecord
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecordBuilder.rst b/docs/source/api/BamRecordBuilder.rst
new file mode 100644
index 0000000..ce477b4
--- /dev/null
+++ b/docs/source/api/BamRecordBuilder.rst
@@ -0,0 +1,11 @@
+BamRecordBuilder
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecordBuilder.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordBuilder
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecordImpl.rst b/docs/source/api/BamRecordImpl.rst
new file mode 100644
index 0000000..92b6759
--- /dev/null
+++ b/docs/source/api/BamRecordImpl.rst
@@ -0,0 +1,11 @@
+BamRecordImpl
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecordImpl.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordImpl
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamRecordView.rst b/docs/source/api/BamRecordView.rst
new file mode 100644
index 0000000..2bc8fc4
--- /dev/null
+++ b/docs/source/api/BamRecordView.rst
@@ -0,0 +1,11 @@
+BamRecordView
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecord.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordView
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamTagCodec.rst b/docs/source/api/BamTagCodec.rst
new file mode 100644
index 0000000..9307421
--- /dev/null
+++ b/docs/source/api/BamTagCodec.rst
@@ -0,0 +1,11 @@
+BamTagCodec
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamTagCodec.h>
+
+.. doxygenclass:: PacBio::BAM::BamTagCodec
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BamWriter.rst b/docs/source/api/BamWriter.rst
new file mode 100644
index 0000000..2e2951b
--- /dev/null
+++ b/docs/source/api/BamWriter.rst
@@ -0,0 +1,11 @@
+BamWriter
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamWriter.h>
+
+.. doxygenclass:: PacBio::BAM::BamWriter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BarcodeLookupData.rst b/docs/source/api/BarcodeLookupData.rst
new file mode 100644
index 0000000..2dac47d
--- /dev/null
+++ b/docs/source/api/BarcodeLookupData.rst
@@ -0,0 +1,11 @@
+BarcodeLookupData
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BarcodeQuery.rst b/docs/source/api/BarcodeQuery.rst
new file mode 100644
index 0000000..5836059
--- /dev/null
+++ b/docs/source/api/BarcodeQuery.rst
@@ -0,0 +1,11 @@
+BarcodeQuery
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/BarcodeQuery.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BarcodeSet.rst b/docs/source/api/BarcodeSet.rst
new file mode 100644
index 0000000..a7ee056
--- /dev/null
+++ b/docs/source/api/BarcodeSet.rst
@@ -0,0 +1,11 @@
+BarcodeSet
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/BasicLookupData.rst b/docs/source/api/BasicLookupData.rst
new file mode 100644
index 0000000..b991fdf
--- /dev/null
+++ b/docs/source/api/BasicLookupData.rst
@@ -0,0 +1,11 @@
+BasicLookupData
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::BasicLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
diff --git a/docs/source/api/Cigar.rst b/docs/source/api/Cigar.rst
new file mode 100644
index 0000000..cea30d5
--- /dev/null
+++ b/docs/source/api/Cigar.rst
@@ -0,0 +1,11 @@
+Cigar
+=====
+
+.. code-block:: cpp
+
+ #include <pbbam/Cigar.h>
+
+.. doxygenclass:: PacBio::BAM::Cigar
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/CigarOperation.rst b/docs/source/api/CigarOperation.rst
new file mode 100644
index 0000000..856400a
--- /dev/null
+++ b/docs/source/api/CigarOperation.rst
@@ -0,0 +1,13 @@
+CigarOperation
+==============
+
+.. code-block:: cpp
+
+ #include <pbbam/CigarOperation.h>
+
+.. doxygenenum:: PacBio::BAM::CigarOperationType
+
+.. doxygenclass:: PacBio::BAM::CigarOperation
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Compare.rst b/docs/source/api/Compare.rst
new file mode 100644
index 0000000..bb28a7e
--- /dev/null
+++ b/docs/source/api/Compare.rst
@@ -0,0 +1,8 @@
+Compare
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/Compare.h>
+
+.. doxygenfile:: Compare.h
\ No newline at end of file
diff --git a/docs/source/api/Config.rst b/docs/source/api/Config.rst
new file mode 100644
index 0000000..c4be9e4
--- /dev/null
+++ b/docs/source/api/Config.rst
@@ -0,0 +1,8 @@
+Config
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/Conifig.h>
+
+.. doxygenfile:: Config.h
\ No newline at end of file
diff --git a/docs/source/api/ConsensusAlignmentSet.rst b/docs/source/api/ConsensusAlignmentSet.rst
new file mode 100644
index 0000000..bc5a7e5
--- /dev/null
+++ b/docs/source/api/ConsensusAlignmentSet.rst
@@ -0,0 +1,11 @@
+ConsensusAlignmentSet
+=====================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ConsensusAlignmentSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ConsensusReadSet.rst b/docs/source/api/ConsensusReadSet.rst
new file mode 100644
index 0000000..846698d
--- /dev/null
+++ b/docs/source/api/ConsensusReadSet.rst
@@ -0,0 +1,11 @@
+ConsensusReadSet
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ConsensusReadSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ContigSet.rst b/docs/source/api/ContigSet.rst
new file mode 100644
index 0000000..96bb20b
--- /dev/null
+++ b/docs/source/api/ContigSet.rst
@@ -0,0 +1,11 @@
+ContigSet
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ContigSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/DataSet.rst b/docs/source/api/DataSet.rst
new file mode 100644
index 0000000..8b3f0db
--- /dev/null
+++ b/docs/source/api/DataSet.rst
@@ -0,0 +1,11 @@
+DataSet
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSet.h>
+
+.. doxygenclass:: PacBio::BAM::DataSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/DataSetBase.rst b/docs/source/api/DataSetBase.rst
new file mode 100644
index 0000000..f23fbb5
--- /dev/null
+++ b/docs/source/api/DataSetBase.rst
@@ -0,0 +1,11 @@
+DataSetBase
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::DataSetBase
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/DataSetMetadata.rst b/docs/source/api/DataSetMetadata.rst
new file mode 100644
index 0000000..eea260d
--- /dev/null
+++ b/docs/source/api/DataSetMetadata.rst
@@ -0,0 +1,11 @@
+DataSetMetadata
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::DataSetMetadata
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/EntireFileQuery.rst b/docs/source/api/EntireFileQuery.rst
new file mode 100644
index 0000000..4e7b86b
--- /dev/null
+++ b/docs/source/api/EntireFileQuery.rst
@@ -0,0 +1,11 @@
+EntireFileQuery
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/EntireFileQuery.h>
+
+.. doxygenclass:: PacBio::BAM::EntireFileQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ExtensionElement.rst b/docs/source/api/ExtensionElement.rst
new file mode 100644
index 0000000..980303e
--- /dev/null
+++ b/docs/source/api/ExtensionElement.rst
@@ -0,0 +1,11 @@
+ExtensionElement
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExtensionElement
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Extensions.rst b/docs/source/api/Extensions.rst
new file mode 100644
index 0000000..6704807
--- /dev/null
+++ b/docs/source/api/Extensions.rst
@@ -0,0 +1,11 @@
+Extensions
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Extensions
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ExternalResource.rst b/docs/source/api/ExternalResource.rst
new file mode 100644
index 0000000..03ab0d3
--- /dev/null
+++ b/docs/source/api/ExternalResource.rst
@@ -0,0 +1,11 @@
+ExternalResource
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExternalResource
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ExternalResources.rst b/docs/source/api/ExternalResources.rst
new file mode 100644
index 0000000..bd72ea4
--- /dev/null
+++ b/docs/source/api/ExternalResources.rst
@@ -0,0 +1,11 @@
+ExternalResources
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExternalResources
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/FileIndex.rst b/docs/source/api/FileIndex.rst
new file mode 100644
index 0000000..c117214
--- /dev/null
+++ b/docs/source/api/FileIndex.rst
@@ -0,0 +1,11 @@
+FileIndex
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::FileIndex
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/FileIndices.rst b/docs/source/api/FileIndices.rst
new file mode 100644
index 0000000..b25720c
--- /dev/null
+++ b/docs/source/api/FileIndices.rst
@@ -0,0 +1,11 @@
+FileIndices
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::FileIndices
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Filter.rst b/docs/source/api/Filter.rst
new file mode 100644
index 0000000..6faa8aa
--- /dev/null
+++ b/docs/source/api/Filter.rst
@@ -0,0 +1,11 @@
+Filter
+======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Filter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Filters.rst b/docs/source/api/Filters.rst
new file mode 100644
index 0000000..7ea1620
--- /dev/null
+++ b/docs/source/api/Filters.rst
@@ -0,0 +1,11 @@
+Filters
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Filters
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Frames.rst b/docs/source/api/Frames.rst
new file mode 100644
index 0000000..cf260f2
--- /dev/null
+++ b/docs/source/api/Frames.rst
@@ -0,0 +1,11 @@
+Frames
+======
+
+.. code-block:: cpp
+
+ #include <pbbam/Frames.h>
+
+.. doxygenclass:: PacBio::BAM::Frames
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/GenomicInterval.rst b/docs/source/api/GenomicInterval.rst
new file mode 100644
index 0000000..811b83a
--- /dev/null
+++ b/docs/source/api/GenomicInterval.rst
@@ -0,0 +1,11 @@
+GenomicInterval
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/GenomicInterval.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicInterval
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/GenomicIntervalCompositeBamReader.rst b/docs/source/api/GenomicIntervalCompositeBamReader.rst
new file mode 100644
index 0000000..f658621
--- /dev/null
+++ b/docs/source/api/GenomicIntervalCompositeBamReader.rst
@@ -0,0 +1,11 @@
+GenomicIntervalCompositeBamReader
+=================================
+
+.. code-block:: cpp
+
+ #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicIntervalCompositeBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/GenomicIntervalQuery.rst b/docs/source/api/GenomicIntervalQuery.rst
new file mode 100644
index 0000000..7bae558
--- /dev/null
+++ b/docs/source/api/GenomicIntervalQuery.rst
@@ -0,0 +1,11 @@
+GenomicIntervalQuery
+====================
+
+.. code-block:: cpp
+
+ #include <pbbam/GenomicIntervalQuery.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicIntervalQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/HdfSubreadSet.rst b/docs/source/api/HdfSubreadSet.rst
new file mode 100644
index 0000000..88bf008
--- /dev/null
+++ b/docs/source/api/HdfSubreadSet.rst
@@ -0,0 +1,11 @@
+HdfSubreadSet
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::HdfSubreadSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/IndexResultBlock.rst b/docs/source/api/IndexResultBlock.rst
new file mode 100644
index 0000000..fac804a
--- /dev/null
+++ b/docs/source/api/IndexResultBlock.rst
@@ -0,0 +1,17 @@
+IndexResultBlock
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiBasicTypes.h>
+
+.. doxygenstruct:: PacBio::BAM::IndexResultBlock
+ :members:
+ :protected-members:
+ :undoc-members:
+
+.. doxygentypedef:: PacBio::BAM::IndexResultBlocks
+
+.. doxygentypedef:: PacBio::BAM::IndexList
+
+.. doxygentypedef:: PacBio::BAM::IndexRange
\ No newline at end of file
diff --git a/docs/source/api/IndexedFastaReader.rst b/docs/source/api/IndexedFastaReader.rst
new file mode 100644
index 0000000..7c46064
--- /dev/null
+++ b/docs/source/api/IndexedFastaReader.rst
@@ -0,0 +1,11 @@
+IndexedFastaReader
+==================
+
+.. code-block:: cpp
+
+ #include <pbbam/IndexedFastaReader.h>
+
+.. doxygenclass:: PacBio::BAM::IndexedFastaReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Interval.rst b/docs/source/api/Interval.rst
new file mode 100644
index 0000000..f506a19
--- /dev/null
+++ b/docs/source/api/Interval.rst
@@ -0,0 +1,11 @@
+Interval
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/Interval.h>
+
+.. doxygenclass:: PacBio::BAM::Interval
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/InvalidSequencingChemistryException.rst b/docs/source/api/InvalidSequencingChemistryException.rst
new file mode 100644
index 0000000..d521ecc
--- /dev/null
+++ b/docs/source/api/InvalidSequencingChemistryException.rst
@@ -0,0 +1,11 @@
+InvalidSequencingChemistryException
+===================================
+
+.. code-block:: cpp
+
+ #include <pbbam/exception/InvalidSequencingChemistryException.h>
+
+.. doxygenclass:: PacBio::BAM::InvalidSequencingChemistryException
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/LocalContextFlags.rst b/docs/source/api/LocalContextFlags.rst
new file mode 100644
index 0000000..8cd63be
--- /dev/null
+++ b/docs/source/api/LocalContextFlags.rst
@@ -0,0 +1,8 @@
+LocalContextFlags
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/LocalContextFlags.h>
+
+.. doxygenenum:: PacBio::BAM::LocalContextFlags
diff --git a/docs/source/api/MappedLookupData.rst b/docs/source/api/MappedLookupData.rst
new file mode 100644
index 0000000..7cf3c8b
--- /dev/null
+++ b/docs/source/api/MappedLookupData.rst
@@ -0,0 +1,11 @@
+MappedLookupData
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::MappedLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/NamespaceInfo.rst b/docs/source/api/NamespaceInfo.rst
new file mode 100644
index 0000000..c7613ec
--- /dev/null
+++ b/docs/source/api/NamespaceInfo.rst
@@ -0,0 +1,11 @@
+NamespaceInfo
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetXsd.h>
+
+.. doxygenclass:: PacBio::BAM::NamespaceInfo
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/NamespaceRegistry.rst b/docs/source/api/NamespaceRegistry.rst
new file mode 100644
index 0000000..2f8f9a7
--- /dev/null
+++ b/docs/source/api/NamespaceRegistry.rst
@@ -0,0 +1,11 @@
+NamespaceRegistry
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetXsd.h>
+
+.. doxygenclass:: PacBio::BAM::NamespaceRegistry
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/OrderedLookup.rst b/docs/source/api/OrderedLookup.rst
new file mode 100644
index 0000000..d5b81b6
--- /dev/null
+++ b/docs/source/api/OrderedLookup.rst
@@ -0,0 +1,11 @@
+OrderedLookup
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::OrderedLookup
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Orientation.rst b/docs/source/api/Orientation.rst
new file mode 100644
index 0000000..e9bbc42
--- /dev/null
+++ b/docs/source/api/Orientation.rst
@@ -0,0 +1,8 @@
+Orientation
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/Orientation.h>
+
+.. doxygenenum:: PacBio::BAM::Orientation
diff --git a/docs/source/api/ParentTool.rst b/docs/source/api/ParentTool.rst
new file mode 100644
index 0000000..e2ffa1b
--- /dev/null
+++ b/docs/source/api/ParentTool.rst
@@ -0,0 +1,11 @@
+ParentTool
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ParentTool
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiBuilder.rst b/docs/source/api/PbiBuilder.rst
new file mode 100644
index 0000000..d795d0f
--- /dev/null
+++ b/docs/source/api/PbiBuilder.rst
@@ -0,0 +1,11 @@
+PbiBuilder
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiBuilder.h>
+
+.. doxygenclass:: PacBio::BAM::PbiBuilder
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFile.rst b/docs/source/api/PbiFile.rst
new file mode 100644
index 0000000..5a8b85a
--- /dev/null
+++ b/docs/source/api/PbiFile.rst
@@ -0,0 +1,14 @@
+PbiFile
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFile.h>
+
+.. doxygenenum:: PacBio::BAM::PbiFile::Section
+
+.. doxygentypedef:: PacBio::BAM::PbiFile::Sections
+
+.. doxygenenum:: PacBio::BAM::PbiFile::VersionEnum
+
+.. doxygenfunction:: PacBio::BAM::PbiFile::CreateFrom
diff --git a/docs/source/api/PbiFilter.rst b/docs/source/api/PbiFilter.rst
new file mode 100644
index 0000000..261498b
--- /dev/null
+++ b/docs/source/api/PbiFilter.rst
@@ -0,0 +1,11 @@
+PbiFilter
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFilter.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFilterCompositeBamReader.rst b/docs/source/api/PbiFilterCompositeBamReader.rst
new file mode 100644
index 0000000..7a69df3
--- /dev/null
+++ b/docs/source/api/PbiFilterCompositeBamReader.rst
@@ -0,0 +1,11 @@
+PbiFilterCompositeBamReader
+===========================
+
+.. code-block:: cpp
+
+ #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilterCompositeBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFilterQuery.rst b/docs/source/api/PbiFilterQuery.rst
new file mode 100644
index 0000000..75bbc12
--- /dev/null
+++ b/docs/source/api/PbiFilterQuery.rst
@@ -0,0 +1,11 @@
+PbiFilterQuery
+==============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFilterQuery.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilterQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiFilterTypes.rst b/docs/source/api/PbiFilterTypes.rst
new file mode 100644
index 0000000..052389b
--- /dev/null
+++ b/docs/source/api/PbiFilterTypes.rst
@@ -0,0 +1,8 @@
+PbiFilterTypes
+==============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFilterTypes.h>
+
+.. doxygenfile:: PbiFilterTypes.h
\ No newline at end of file
diff --git a/docs/source/api/PbiIndex.rst b/docs/source/api/PbiIndex.rst
new file mode 100644
index 0000000..811bc68
--- /dev/null
+++ b/docs/source/api/PbiIndex.rst
@@ -0,0 +1,11 @@
+PbiIndex
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiIndex.h>
+
+.. doxygenclass:: PacBio::BAM::PbiIndex
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiIndexedBamReader.rst b/docs/source/api/PbiIndexedBamReader.rst
new file mode 100644
index 0000000..5450c8a
--- /dev/null
+++ b/docs/source/api/PbiIndexedBamReader.rst
@@ -0,0 +1,11 @@
+PbiIndexedBamReader
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiIndexedBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::PbiIndexedBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawBarcodeData.rst b/docs/source/api/PbiRawBarcodeData.rst
new file mode 100644
index 0000000..c72ebfb
--- /dev/null
+++ b/docs/source/api/PbiRawBarcodeData.rst
@@ -0,0 +1,11 @@
+PbiRawBarcodeData
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawBarcodeData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawBasicData.rst b/docs/source/api/PbiRawBasicData.rst
new file mode 100644
index 0000000..2282387
--- /dev/null
+++ b/docs/source/api/PbiRawBasicData.rst
@@ -0,0 +1,11 @@
+PbiRawBasicData
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawBasicData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawData.rst b/docs/source/api/PbiRawData.rst
new file mode 100644
index 0000000..1a974e8
--- /dev/null
+++ b/docs/source/api/PbiRawData.rst
@@ -0,0 +1,11 @@
+PbiRawData
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawMappedData.rst b/docs/source/api/PbiRawMappedData.rst
new file mode 100644
index 0000000..42e1de1
--- /dev/null
+++ b/docs/source/api/PbiRawMappedData.rst
@@ -0,0 +1,11 @@
+PbiRawMappedData
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawMappedData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiRawReferenceData.rst b/docs/source/api/PbiRawReferenceData.rst
new file mode 100644
index 0000000..460cde4
--- /dev/null
+++ b/docs/source/api/PbiRawReferenceData.rst
@@ -0,0 +1,11 @@
+PbiRawReferenceData
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawReferenceData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/PbiReferenceEntry.rst b/docs/source/api/PbiReferenceEntry.rst
new file mode 100644
index 0000000..472e586
--- /dev/null
+++ b/docs/source/api/PbiReferenceEntry.rst
@@ -0,0 +1,11 @@
+PbiReferenceEntry
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiReferenceEntry
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Position.rst b/docs/source/api/Position.rst
new file mode 100644
index 0000000..3c945f2
--- /dev/null
+++ b/docs/source/api/Position.rst
@@ -0,0 +1,10 @@
+Position
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/Position.h>
+
+.. doxygentypedef:: PacBio::BAM::Position
+
+.. doxygenvariable:: PacBio::BAM::UnmappedPosition
\ No newline at end of file
diff --git a/docs/source/api/ProgramInfo.rst b/docs/source/api/ProgramInfo.rst
new file mode 100644
index 0000000..b58c93a
--- /dev/null
+++ b/docs/source/api/ProgramInfo.rst
@@ -0,0 +1,11 @@
+ProgramInfo
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/ProgramInfo.h>
+
+.. doxygenclass:: PacBio::BAM::ProgramInfo
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/QNameQuery.rst b/docs/source/api/QNameQuery.rst
new file mode 100644
index 0000000..b549436
--- /dev/null
+++ b/docs/source/api/QNameQuery.rst
@@ -0,0 +1,11 @@
+QNameQuery
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/QNameQuery.h>
+
+.. doxygenclass:: PacBio::BAM::QNameQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/QualityValue.rst b/docs/source/api/QualityValue.rst
new file mode 100644
index 0000000..3520c5a
--- /dev/null
+++ b/docs/source/api/QualityValue.rst
@@ -0,0 +1,11 @@
+QualityValue
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/QualityValue.h>
+
+.. doxygenclass:: PacBio::BAM::QualityValue
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/QualityValues.rst b/docs/source/api/QualityValues.rst
new file mode 100644
index 0000000..8f6dfa5
--- /dev/null
+++ b/docs/source/api/QualityValues.rst
@@ -0,0 +1,11 @@
+QualityValues
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/QualityValues.h>
+
+.. doxygenclass:: PacBio::BAM::QualityValues
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ReadAccuracyQuery.rst b/docs/source/api/ReadAccuracyQuery.rst
new file mode 100644
index 0000000..abfd1e6
--- /dev/null
+++ b/docs/source/api/ReadAccuracyQuery.rst
@@ -0,0 +1,11 @@
+ReadAccuracyQuery
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/ReadAccuracyQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ReadAccuracyQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ReadGroupInfo.rst b/docs/source/api/ReadGroupInfo.rst
new file mode 100644
index 0000000..7fb4f69
--- /dev/null
+++ b/docs/source/api/ReadGroupInfo.rst
@@ -0,0 +1,21 @@
+ReadGroupInfo
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/ReadGroupInfo.h>
+
+.. doxygenenum:: PacBio::BAM::BaseFeature
+
+.. doxygenenum:: PacBio::BAM::FrameCodec
+
+.. doxygenenum:: PacBio::BAM::BarcodeModeType
+
+.. doxygenenum:: PacBio::BAM::BarcodeQualityType
+
+.. doxygenclass:: PacBio::BAM::ReadGroupInfo
+ :members:
+ :protected-members:
+ :undoc-members:
+
+.. doxygenfunction:: PacBio::BAM::MakeReadGroupId
\ No newline at end of file
diff --git a/docs/source/api/ReferenceLookupData.rst b/docs/source/api/ReferenceLookupData.rst
new file mode 100644
index 0000000..20316fc
--- /dev/null
+++ b/docs/source/api/ReferenceLookupData.rst
@@ -0,0 +1,11 @@
+ReferenceLookupData
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::ReferenceLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ReferenceSet.rst b/docs/source/api/ReferenceSet.rst
new file mode 100644
index 0000000..22e4703
--- /dev/null
+++ b/docs/source/api/ReferenceSet.rst
@@ -0,0 +1,11 @@
+ReferenceSet
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ReferenceSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SamTagCodec.rst b/docs/source/api/SamTagCodec.rst
new file mode 100644
index 0000000..4f8d65d
--- /dev/null
+++ b/docs/source/api/SamTagCodec.rst
@@ -0,0 +1,11 @@
+SamTagCodec
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/SamTagCodec.h>
+
+.. doxygenclass:: PacBio::BAM::SamTagCodec
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SequenceInfo.rst b/docs/source/api/SequenceInfo.rst
new file mode 100644
index 0000000..393d5bb
--- /dev/null
+++ b/docs/source/api/SequenceInfo.rst
@@ -0,0 +1,11 @@
+SequenceInfo
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/SequenceInfo.h>
+
+.. doxygenclass:: PacBio::BAM::SequenceInfo
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SequentialCompositeBamReader.rst b/docs/source/api/SequentialCompositeBamReader.rst
new file mode 100644
index 0000000..31ed3b1
--- /dev/null
+++ b/docs/source/api/SequentialCompositeBamReader.rst
@@ -0,0 +1,11 @@
+SequentialCompositeBamReader
+============================
+
+.. code-block:: cpp
+
+ #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::SequentialCompositeBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Strand.rst b/docs/source/api/Strand.rst
new file mode 100644
index 0000000..4978f72
--- /dev/null
+++ b/docs/source/api/Strand.rst
@@ -0,0 +1,8 @@
+Strand
+======
+
+.. code-block:: cpp
+
+ #include <pbbam/Strand.h>
+
+.. doxygenenum:: PacBio::BAM::Strand
diff --git a/docs/source/api/SubDataSets.rst b/docs/source/api/SubDataSets.rst
new file mode 100644
index 0000000..d179065
--- /dev/null
+++ b/docs/source/api/SubDataSets.rst
@@ -0,0 +1,11 @@
+SubDataSets
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::SubDataSets
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SubreadLengthQuery.rst b/docs/source/api/SubreadLengthQuery.rst
new file mode 100644
index 0000000..23000b3
--- /dev/null
+++ b/docs/source/api/SubreadLengthQuery.rst
@@ -0,0 +1,11 @@
+SubreadLengthQuery
+==================
+
+.. code-block:: cpp
+
+ #include <pbbam/SubreadLengthQuery.h>
+
+.. doxygenclass:: PacBio::BAM::SubreadLengthQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/SubreadSet.rst b/docs/source/api/SubreadSet.rst
new file mode 100644
index 0000000..bfc3c13
--- /dev/null
+++ b/docs/source/api/SubreadSet.rst
@@ -0,0 +1,11 @@
+SubreadSet
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::SubreadSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/Tag.rst b/docs/source/api/Tag.rst
new file mode 100644
index 0000000..50b85c7
--- /dev/null
+++ b/docs/source/api/Tag.rst
@@ -0,0 +1,15 @@
+Tag
+===
+
+.. code-block:: cpp
+
+ #include <pbbam/Tag.h>
+
+.. doxygenenum:: PacBio::BAM::TagDataType
+
+.. doxygenenum:: PacBio::BAM::TagModifier
+
+.. doxygenclass:: PacBio::BAM::Tag
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/TagCollection.rst b/docs/source/api/TagCollection.rst
new file mode 100644
index 0000000..1314b13
--- /dev/null
+++ b/docs/source/api/TagCollection.rst
@@ -0,0 +1,11 @@
+TagCollection
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/TagCollection.h>
+
+.. doxygenclass:: PacBio::BAM::TagCollection
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/UnorderedLookup.rst b/docs/source/api/UnorderedLookup.rst
new file mode 100644
index 0000000..718e4e7
--- /dev/null
+++ b/docs/source/api/UnorderedLookup.rst
@@ -0,0 +1,11 @@
+UnorderedLookup
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::UnorderedLookup
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualPolymeraseBamRecord.rst b/docs/source/api/VirtualPolymeraseBamRecord.rst
new file mode 100644
index 0000000..06d5531
--- /dev/null
+++ b/docs/source/api/VirtualPolymeraseBamRecord.rst
@@ -0,0 +1,11 @@
+VirtualPolymeraseBamRecord
+==========================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseBamRecord
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualPolymeraseCompositeReader.rst b/docs/source/api/VirtualPolymeraseCompositeReader.rst
new file mode 100644
index 0000000..e6cab4e
--- /dev/null
+++ b/docs/source/api/VirtualPolymeraseCompositeReader.rst
@@ -0,0 +1,11 @@
+VirtualPolymeraseCompositeReader
+================================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseCompositeReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualPolymeraseReader.rst b/docs/source/api/VirtualPolymeraseReader.rst
new file mode 100644
index 0000000..14a46e8
--- /dev/null
+++ b/docs/source/api/VirtualPolymeraseReader.rst
@@ -0,0 +1,11 @@
+VirtualPolymeraseReader
+=======================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualPolymeraseReader.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualRegion.rst b/docs/source/api/VirtualRegion.rst
new file mode 100644
index 0000000..7a09846
--- /dev/null
+++ b/docs/source/api/VirtualRegion.rst
@@ -0,0 +1,11 @@
+VirtualRegion
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualRegion.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualRegion
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/VirtualRegionType.rst b/docs/source/api/VirtualRegionType.rst
new file mode 100644
index 0000000..4279200
--- /dev/null
+++ b/docs/source/api/VirtualRegionType.rst
@@ -0,0 +1,8 @@
+VirtualRegionType
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualRegionType.h>
+
+.. doxygenenum:: PacBio::BAM::VirtualRegionType
diff --git a/docs/source/api/VirtualRegionTypeMap.rst b/docs/source/api/VirtualRegionTypeMap.rst
new file mode 100644
index 0000000..eebe637
--- /dev/null
+++ b/docs/source/api/VirtualRegionTypeMap.rst
@@ -0,0 +1,11 @@
+VirtualRegionTypeMap
+====================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualRegionTypeMap.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualRegionTypeMap
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ZmwGroupQuery.rst b/docs/source/api/ZmwGroupQuery.rst
new file mode 100644
index 0000000..01fc18a
--- /dev/null
+++ b/docs/source/api/ZmwGroupQuery.rst
@@ -0,0 +1,11 @@
+ZmwGroupQuery
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/ZmwGroupQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwGroupQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ZmwQuery.rst b/docs/source/api/ZmwQuery.rst
new file mode 100644
index 0000000..375fcb0
--- /dev/null
+++ b/docs/source/api/ZmwQuery.rst
@@ -0,0 +1,11 @@
+ZmwQuery
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/ZmwQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api/ZmwWhitelistVirtualReader.rst b/docs/source/api/ZmwWhitelistVirtualReader.rst
new file mode 100644
index 0000000..95d2d1a
--- /dev/null
+++ b/docs/source/api/ZmwWhitelistVirtualReader.rst
@@ -0,0 +1,11 @@
+ZmwWhitelistVirtualReader
+=========================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwWhitelistVirtualReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
new file mode 100644
index 0000000..354c0de
--- /dev/null
+++ b/docs/source/api_reference.rst
@@ -0,0 +1,12 @@
+.. _api_reference:
+
+C++ API Reference
+=================
+
+Watch this space for more recipes & how-tos.
+
+.. toctree::
+ :maxdepth: 1
+ :glob:
+
+ api/*
diff --git a/docs/source/commandline_utilities.rst b/docs/source/commandline_utilities.rst
new file mode 100644
index 0000000..7f1bdaf
--- /dev/null
+++ b/docs/source/commandline_utilities.rst
@@ -0,0 +1,15 @@
+.. _command_line:
+
+Command Line Utilities
+======================
+
+In addition to the main library and wrappers, pbbam also provides a few basic
+utilities for working with PacBio indices (".pbi" files).
+
+.. toctree::
+ :maxdepth: 1
+
+ tools/bam2sam
+ tools/pbindex
+ tools/pbindexdump
+ tools/pbmerge
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100755
index 0000000..13a512d
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+#
+# pbbam documentation build configuration file, created by
+# sphinx-quickstart on Fri Dec 4 10:08:52 2015.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+import shlex
+import re
+import subprocess
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# get RTD to run doxygen first, per http://breathe.readthedocs.org/en/latest/readthedocs.html
+# but... we generate our actual Doxyfile via CMake in a normal build,
+# so we need to create one here, subbing actual values
+read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
+if read_the_docs_build:
+
+ # fetch directory info
+ this_dir = os.path.abspath(os.getcwd())
+ docs_dir = os.path.abspath(os.path.join(this_dir, '..'))
+ root_dir = os.path.abspath(os.path.join(docs_dir, '..'))
+ include_dir = os.path.abspath(os.path.join(root_dir, 'include'))
+
+ # get project version
+ version = ''
+ with open(os.path.abspath(os.path.join(root_dir, 'CMakeLists.txt')), 'r') as cmakeFile:
+ for line in cmakeFile:
+ if line.startswith('project'):
+ version = re.search(r'VERSION\s*([\d.]+)', line).group(1)
+ break
+
+ # read Doxyfile.in, replace markers with real values, and write Doxyfile
+ inDoxyfile = open(os.path.abspath(os.path.join(docs_dir, 'Doxyfile.in')), 'r')
+ configIn = inDoxyfile.read()
+ configOut = re.sub('@PacBioBAM_NAME@', 'pbbam', \
+ re.sub('@PacBioBAM_VERSION@', version, \
+ re.sub('@PacBioBAM_DocsDir@', docs_dir, \
+ re.sub('@PacBioBAM_IncludeDir@', include_dir, configIn))))
+ outDoxyfile = open(os.path.abspath(os.path.join(docs_dir, 'Doxyfile')), 'w')
+ #print(configOut, outDoxyfile)
+ print >>outDoxyfile, configOut
+ outDoxyfile.close()
+
+ # now run Doxygen
+ subprocess.call('cd ..; doxygen', shell=True)
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['breathe']
+#extensions = [
+# 'sphinx.ext.autodoc',
+ # 'sphinx.ext.coverage',
+ # 'breathe',
+#]
+
+# Setup Breathe extension varialbes
+breathe_projects = { 'pbbam' : os.path.join(os.getcwd(), '..', 'xml') + os.path.sep }
+breathe_default_project = 'pbbam'
+breathe_default_members = ('members', 'undoc-members')
+breathe_implementation_filename_extensions = [ '.cpp', '.inl' ]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pbbam'
+copyright = u'2015, Derek Barnett'
+author = u'Derek Barnett'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.4.5'
+# The full version, including alpha/beta/rc tags.
+release = '0.4.5'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'pacbio-theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ['.']
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pbbamdoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+
+# Latex figure (float) alignment
+#'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'pbbam.tex', u'pbbam Documentation',
+ u'Derek Barnett', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'pbbam', u'pbbam Documentation',
+ [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'pbbam', u'pbbam Documentation',
+ author, 'pbbam', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
new file mode 100644
index 0000000..6860f9f
--- /dev/null
+++ b/docs/source/getting_started.rst
@@ -0,0 +1,144 @@
+
+.. _getting_started:
+
+Getting Started
+===============
+
+.. _getting_started-requirements:
+
+Requirements
+------------
+
+These components will almost certainly already be on your system.
+
+* `gcc`_ (4.8+) OR `clang`_ (v3.1+)
+* pthreads
+* zlib
+
+Double-check your compiler version, to be sure it is compatible.
+
+.. code-block:: console
+
+ $ g++ -v
+ $ clang -v
+
+Additional requirements:
+
+* `Boost`_ (1.55+)
+* `CMake`_ (3.0+)
+* `Google Test`_
+* `htslib`_ (PacBio fork)
+
+For additional languages:
+
+* `SWIG`_ (3.0.5+)
+
+For building API documentation locally:
+
+* `Doxygen`_
+
+For maximal convenience, install htslib and google test in the same parent directory you plan to install pbbam.
+
+.. _Boost: http://www.boost.org/
+.. _clang: http://clang.llvm.org/
+.. _CMake: https://cmake.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+.. _gcc: https://gcc.gnu.org/
+.. _Google Test: https://github.com/google/googletest
+.. _htslib: https://github.com/PacificBiosciences/htslib.git
+.. _SWIG: http://www.swig.org/
+
+.. _getting_started-build:
+
+Clone & Build
+-------------
+
+.. note::
+
+ The following steps are for building the C++ library and command-line utilities.
+ If you are integrating pbbam into a C#, Python, or R project, take a look at the
+ instructions for :ref:`additional languages <swig_bindings>`.
+
+The basic steps for obtaining pbbam and building it from source are as follows:
+
+.. code-block:: console
+
+ $ git clone https://github.com/PacificBiosciences/pbbam.git
+ $ cd pbbam
+ $ mkdir build
+ $ cd build
+ $ cmake ..
+ $ make -j 4 # compiles using 4 threads
+
+Output:
+
+ * Library : <pbbam_root>/lib
+ * Headers : <pbbam_root>/include
+ * Utilities : <pbbam_root>/bin
+
+You may need to set a few options on the cmake command, to point to dependencies' install locations.
+Common installation-related options include:
+
+ * HTSLIB_ROOTDIR
+ * GTEST_SRC_DIR
+
+Add these using the '-D' argument, like this:
+
+.. code-block:: console
+
+ $ cmake .. -DHTSLIB_ROOTDIR="path/to/htslib"
+
+To run the test suite, run:
+
+.. code-block:: console
+
+ $ make test
+
+To build a local copy of the (Doxygen-style) API documentation, run:
+
+.. code-block:: console
+
+ $ make doc
+
+And then open <pbbam_root>/docs/html/index.html in your favorite browser.
+
+.. _getting_started-integrate:
+
+Integrate
+---------
+
+CMake-based projects
+````````````````````
+
+For CMake-based projects that will "ship with" or otherwise live alongside pbbam, you can
+use the approach described here.
+
+Before defining your library or executable, add the following:
+
+.. code-block:: cmake
+
+ add_subdirectory(<path/to/pbbam> external/build/pbbam)
+
+When it's time to run "make" this will ensure that pbbam will be built, inside your own project's
+build directory. After this point in the CMakeLists.txt file(s), a few variables will be available
+that can be used to setup your include paths and library linking targets:
+
+.. code-block:: cmake
+
+ include_directories(
+ ${PacBioBAM_INCLUDE_DIRS}
+ # other includes that your project needs
+ )
+
+ add_executable(foo)
+
+ target_link_libraries(foo
+ ${PacBioBAM_LIBRARIES}
+ # other libs that your project needs
+ )
+
+Non-CMake projects
+``````````````````
+
+If you're using something other than CMake for your project's build system, then you need to point
+it to pbbam's include directory & library, as well as those of its dependencies (primarily htslib).
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..426c3c5
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,33 @@
+.. pbbam documentation master file, created by
+ sphinx-quickstart on Fri Dec 4 10:08:52 2015.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+.. _home:
+
+pbbam documentation
+===================
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard BAM
+format for (both aligned and unaligned) basecall data files. We have also formulated
+a BAM companion file format (bam.pbi) enabling fast access to a richer set of per-read
+information as well as compatibility for software built around the legacy cmp.h5 format.
+
+The **pbbam** software package provides components to create, query, & edit PacBio BAM
+files and associated indices. These components include a core C++ library, bindings for
+additional languages, and command-line utilities.
+
+.. toctree::
+ :maxdepth: 1
+
+ getting_started
+ api_reference
+ swig_bindings
+ commandline_utilities
+
+
+Search:
+
+* :ref:`genindex`
+* :ref:`search`
+
diff --git a/docs/source/pacbio-theme/static/headerGradient.jpg b/docs/source/pacbio-theme/static/headerGradient.jpg
new file mode 100644
index 0000000..883f147
Binary files /dev/null and b/docs/source/pacbio-theme/static/headerGradient.jpg differ
diff --git a/docs/source/pacbio-theme/static/pacbio.css b/docs/source/pacbio-theme/static/pacbio.css
new file mode 100644
index 0000000..b4ab87f
--- /dev/null
+++ b/docs/source/pacbio-theme/static/pacbio.css
@@ -0,0 +1,238 @@
+/**
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+ at import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: Arial, sans-serif;
+ font-size: 100%;
+ background-color: #555;
+ color: #555;
+ margin: 0;
+ padding: 0;
+ min-width: 500px;
+ max-width: 956px;
+ margin: 0 auto;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 0 0 0 230px;
+}
+
+hr{
+ border: 1px solid #B1B4B6;
+
+}
+
+div.document {
+ background-color: #eee;
+}
+
+div.body {
+ background-color: #ffffff;
+ color: #3E4349;
+ padding: 30px 30px 30px 30px;
+ font-size: 0.8em;
+}
+
+div.footer {
+ color: #555;
+ background-color: #fff;
+ padding: 13px 0;
+ text-align: center;
+ font-size: 75%;
+
+}
+div.footer a {
+ color: #444;
+ text-decoration: underline;
+}
+
+div.related {
+ background: #fff url(headerGradient.jpg);
+ line-height: 80px;
+ color: #fff;
+ font-size: 0.80em;
+ height: 79px;
+ z-index: -1;
+}
+
+div.related ul {
+ background: url(pacbioLogo.png) 10px no-repeat;
+ padding: 0 0 0 200px;
+}
+
+div.related a {
+ color: #E2F3CC;
+}
+
+div.sphinxsidebar {
+ font-size: 0.75em;
+ line-height: 1.5em;
+}
+
+div.sphinxsidebarwrapper{
+ padding: 20px 0;
+}
+
+div.sphinxsidebar h3,
+div.sphinxsidebar h4 {
+ font-family: Arial, sans-serif;
+ color: #222;
+ font-size: 1.2em;
+ font-weight: bold;
+ margin: 0;
+ padding: 5px 10px 0 10px;
+}
+
+div.sphinxsidebar h4{
+ font-size: 1.1em;
+}
+
+div.sphinxsidebar h3 a {
+ color: #444;
+}
+
+
+div.sphinxsidebar p {
+ color: #888;
+ padding: 0px 20px;
+ margin-top: 5px;
+}
+
+div.sphinxsidebar p.topless {
+}
+
+div.sphinxsidebar ul {
+ margin: 5px 20px 10px 20px;
+ padding: 0;
+ color: #000;
+}
+
+div.sphinxsidebar a {
+ color: #444;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #ccc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar input[type=text]{
+ margin-left: 20px;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+ color: #005B81;
+ text-decoration: none;
+}
+
+a:hover {
+ color: #E32E00;
+ text-decoration: underline;
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: Arial, sans-serif;
+ font-weight: bold;
+ color: #264868;
+ margin: 30px 0px 10px 0px;
+ padding: 5px 0 5px 0px;
+}
+
+div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 180%; font-weight: normal; }
+div.body h2 { font-size: 125%; }
+div.body h3 { font-size: 110%; }
+div.body h4 { font-size: 100%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+
+a.headerlink {
+ color: #c60f0f;
+ font-size: 0.8em;
+ padding: 0 4px 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ background-color: #c60f0f;
+ color: white;
+}
+
+div.body p, div.body dd, div.body li {
+ line-height: 1.5em;
+ font-size: 1em;
+}
+
+div.admonition p.admonition-title + p {
+ display: inline;
+}
+
+div.highlight{
+ background-color: white;
+}
+
+div.note {
+ background-color: #eee;
+ border: 1px solid #ccc;
+}
+
+div.seealso {
+ background-color: #ffc;
+ border: 1px solid #ff6;
+}
+
+div.topic {
+ background-color: #eee;
+}
+
+div.warning {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre {
+ padding: 10px;
+ background-color: White;
+ color: #222;
+ line-height: 1.2em;
+ border: 1px solid #C6C9CB;
+ font-size: 1.2em;
+ margin: 1.5em 0 1.5em 0;
+ -webkit-box-shadow: 1px 1px 1px #d8d8d8;
+ -moz-box-shadow: 1px 1px 1px #d8d8d8;
+}
+
+tt {
+ background-color: #ecf0f3;
+ color: #222;
+ padding: 1px 2px;
+ font-size: 1.2em;
+ font-family: monospace;
+}
+
diff --git a/docs/source/pacbio-theme/static/pacbioLogo.png b/docs/source/pacbio-theme/static/pacbioLogo.png
new file mode 100644
index 0000000..b2e4887
Binary files /dev/null and b/docs/source/pacbio-theme/static/pacbioLogo.png differ
diff --git a/docs/source/pacbio-theme/static/pygments.css b/docs/source/pacbio-theme/static/pygments.css
new file mode 100644
index 0000000..4588cde
--- /dev/null
+++ b/docs/source/pacbio-theme/static/pygments.css
@@ -0,0 +1,55 @@
+.c { color: #999988; font-style: italic } /* Comment */
+.k { font-weight: bold } /* Keyword */
+.o { font-weight: bold } /* Operator */
+.cm { color: #999988; font-style: italic } /* Comment.Multiline */
+.cp { color: #999999; font-weight: bold } /* Comment.preproc */
+.c1 { color: #999988; font-style: italic } /* Comment.Single */
+.gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #aa0000 } /* Generic.Error */
+.gh { color: #999999 } /* Generic.Heading */
+.gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
+.go { color: #111 } /* Generic.Output */
+.gp { color: #555555 } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #aaaaaa } /* Generic.Subheading */
+.gt { color: #aa0000 } /* Generic.Traceback */
+.kc { font-weight: bold } /* Keyword.Constant */
+.kd { font-weight: bold } /* Keyword.Declaration */
+.kp { font-weight: bold } /* Keyword.Pseudo */
+.kr { font-weight: bold } /* Keyword.Reserved */
+.kt { color: #445588; font-weight: bold } /* Keyword.Type */
+.m { color: #009999 } /* Literal.Number */
+.s { color: #bb8844 } /* Literal.String */
+.na { color: #008080 } /* Name.Attribute */
+.nb { color: #999999 } /* Name.Builtin */
+.nc { color: #445588; font-weight: bold } /* Name.Class */
+.no { color: #ff99ff } /* Name.Constant */
+.ni { color: #800080 } /* Name.Entity */
+.ne { color: #990000; font-weight: bold } /* Name.Exception */
+.nf { color: #990000; font-weight: bold } /* Name.Function */
+.nn { color: #555555 } /* Name.Namespace */
+.nt { color: #000080 } /* Name.Tag */
+.nv { color: purple } /* Name.Variable */
+.ow { font-weight: bold } /* Operator.Word */
+.mf { color: #009999 } /* Literal.Number.Float */
+.mh { color: #009999 } /* Literal.Number.Hex */
+.mi { color: #009999 } /* Literal.Number.Integer */
+.mo { color: #009999 } /* Literal.Number.Oct */
+.sb { color: #bb8844 } /* Literal.String.Backtick */
+.sc { color: #bb8844 } /* Literal.String.Char */
+.sd { color: #bb8844 } /* Literal.String.Doc */
+.s2 { color: #bb8844 } /* Literal.String.Double */
+.se { color: #bb8844 } /* Literal.String.Escape */
+.sh { color: #bb8844 } /* Literal.String.Heredoc */
+.si { color: #bb8844 } /* Literal.String.Interpol */
+.sx { color: #bb8844 } /* Literal.String.Other */
+.sr { color: #808000 } /* Literal.String.Regex */
+.s1 { color: #bb8844 } /* Literal.String.Single */
+.ss { color: #bb8844 } /* Literal.String.Symbol */
+.bp { color: #999999 } /* Name.Builtin.Pseudo */
+.vc { color: #ff99ff } /* Name.Variable.Class */
+.vg { color: #ff99ff } /* Name.Variable.Global */
+.vi { color: #ff99ff } /* Name.Variable.Instance */
+.il { color: #009999 } /* Literal.Number.Integer.Long */
+
diff --git a/docs/source/pacbio-theme/theme.conf b/docs/source/pacbio-theme/theme.conf
new file mode 100644
index 0000000..dd24a1a
--- /dev/null
+++ b/docs/source/pacbio-theme/theme.conf
@@ -0,0 +1,4 @@
+[theme]
+inherit = default
+stylesheet = pacbio.css
+pygments_style = tango
diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt
new file mode 100644
index 0000000..cd6467e
--- /dev/null
+++ b/docs/source/requirements.txt
@@ -0,0 +1 @@
+breathe
diff --git a/docs/source/swig_bindings.rst b/docs/source/swig_bindings.rst
new file mode 100644
index 0000000..e9dc33a
--- /dev/null
+++ b/docs/source/swig_bindings.rst
@@ -0,0 +1,257 @@
+.. _swig_bindings:
+
+Additional Languages
+====================
+
+pbbam uses SWIG to generate bindings for other languages. Currently this includes support for C#, Python, and R.
+
+These bindings are disabled by default. See the entry below for your target language to configure pbbam & integrate
+the bindings into your project.
+
+.. _swig_bindings-csharp:
+
+C#
+------
+
+Building
+````````
+
+To build the support for C#, you need to tell CMake to enable it before building:
+
+.. code-block:: console
+
+ $ cmake .. -DPacBioBAM_wrap_csharp
+ $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them,
+as a quick sanity-check.
+
+After building, the libraries and wrappers can be found under the pbbam/lib/csharp directory.
+
+API Example
+```````````
+
+.. code-block:: c#
+
+ using PacBio.BAM;
+
+ namespace TestStuff
+ {
+ public class TestPbbam
+ {
+ public static void TestZmwQuery()
+ {
+ var d = new DataSet("foo.bam");
+ var q = new ZmwQuery(new IntList {1, 2, 3}, d);
+ var q2 = new ZmwQuery(new IntList { 14743 }, d);
+ if (0 != q.Count() || 4 != q2.Count())
+ {
+ throw new Exception("ZmwQuery not working");
+ }
+ Console.WriteLine("TestZmwQuery - OK!");
+ }
+ }
+ }
+
+.. _swig_bindings-python:
+
+Python
+------
+
+Building
+````````
+
+To build the support for Python, you need to tell CMake to enable it:
+
+.. code-block:: console
+
+ $ cmake .. -DPacBioBAM_wrap_python
+ $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them,
+as a quick sanity-check.
+
+After building, the libraries and wrappers can be found in the pbbam/lib/python directory.
+'make test' will also include some Python-side unit tests as well.
+
+To use the PacBioBam module, you can set your PYTHONPATH before invoking your script:
+
+.. code-block:: console
+
+ $ PYTHONPATH="path/to/pbbam/lib/python" python myScript.py
+
+Or otherwise configure your environment to find the PacBioBam module.
+
+API Example
+```````````
+
+.. code-block:: python
+
+ import PacBioBam
+
+ try:
+ file = PacBioBam.BamFile('foo.bam')
+ writer = PacBioBam.BamWriter('new.bam', file.Header())
+ dataset = PacBioBam.DataSet(file)
+ entireFile = PacBioBam.EntireFileQuery(dataset)
+ for record in PacBioBam.Iterate(entireFile):
+ writer.Write(record)
+ except RuntimeError:
+ # found error
+
+Python-Specific Notes
+`````````````````````
+
+Iteration
+.........
+
+Iteration over dataset queries in Python will likely need to use the PacBioBam.Iterate() method. Thus
+file iteration loops will look something like the following:
+
+.. code-block:: python
+
+ entireFile = PacBioBam.EntireFileQuery("input.bam")
+ for record in PacBioBam.Iterate(entireFile):
+ foo.bar(record)
+
+Exception Handling
+..................
+
+Exceptions are used widely by the C++ library. To handle them from Python, you can use try blocks, looking for
+any RuntimeError:
+
+.. code-block:: python
+
+ try:
+ file = PacBioBam.BamFile("does_not_exist.bam")
+ except RuntimeError:
+ print("caught expected error")
+
+.. _swig_bindings-r:
+
+R
+------
+
+Building
+````````
+
+To build the support for R, you need to tell CMake to enable it:
+
+.. code-block:: console
+
+ $ cmake .. -DPacBioBAM_wrap_r
+ $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them,
+as a quick sanity-check.
+
+After building, the libraries and wrappers can be found in the pbbam/lib/R directory.
+'make test' will also include some R-side unit tests as well.
+
+To use the PacBioBam module in your script, nothing should be needed up front - simply invoke 'R' as normal.
+You'll do the dynamic load of the R module near the beginning of your script:
+
+.. code-block:: r
+
+ # load pbbam R library
+ lib_path <- "path/to/pbbam/lib/R"
+ pbbam_libname <- paste(lib_path, "PacBioBam", sep="/")
+ pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+ dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+ source(pbbam_wrapper)
+ cacheMetaData(1)
+
+
+API Example
+```````````
+
+.. code-block:: r
+
+ # load pbbam R library
+ lib_path <- "path/to/pbbam/lib/R"
+ pbbam_libname <- paste(lib_path, "PacBioBam", sep="/")
+ pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+ dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+ source(pbbam_wrapper)
+ cacheMetaData(1)
+
+ # sample method
+ copyFileAndFetchRecordNames <-function(inputFn, outputFn) {
+
+ result <- tryCatch(
+ {
+ file <- BamFile(inputFn)
+ writer <- BamWriter(outputFn, file$Header())
+ ds <- DataSet(file)
+
+ entireFile <- EntireFileQuery(ds)
+ iter <- entireFile$begin()
+ end <- entireFile$end()
+
+ while ( iter$'__ne__'(end) ) {
+ record <- iter$value()
+
+ names_in <- c(names_in, record$FullName())
+ writer$Write(record)
+ iter$incr()
+ }
+ writer$TryFlush()
+ return(names_in)
+ },
+ error = function(e) {
+ # handle error
+ return(list())
+ })
+ return(result)
+ }
+
+R-Specific Notes
+````````````````
+
+Iteration
+.........
+
+To compare iterators, you'll need to explicitly use the '__eq__' or '__ne__' methods. Thus iterating over
+a data query, will look something like this:
+
+.. code-block:: r
+
+ iter <- query$begin()
+ end <- query$end()
+ while ( iter$'__ne__'(end) ) {
+ record <- iter$value()
+
+ # do stuff with record
+ }
+
+operator[]
+..........
+
+In C++, operator[] can be used in some classes to directly access elements in a sequence, e.g. Cigar string
+
+.. code-block:: cpp
+
+ CigarOperation op = cigar[0];
+
+For the R wrapper, if you want to do the same sort of thing, you'll need to use the '__getitem__' method.
+Please note that these are **0-based** indices, not 1-based as in much of R.
+
+.. code-block:: r
+
+ op <- cigar$'__getitem__'(0)
+
+Exception Handling
+..................
+
+Exceptions are used widely by the C++ library. To handle them from R, you can use the 'tryCatch' block, listening for
+'error' type exceptions.
+
+ .. code-block:: r
+
+ result <- tryCatch(
+ {
+ f <- BamFile("does_not_exist.bam") # this statement will throw
+ },
+ error = function(e) {
+ print(paste("caught expected erorr: ",e))
+ })
diff --git a/docs/source/tools/bam2sam.rst b/docs/source/tools/bam2sam.rst
new file mode 100644
index 0000000..4577686
--- /dev/null
+++ b/docs/source/tools/bam2sam.rst
@@ -0,0 +1,21 @@
+.. _bam2sam:
+
+bam2sam
+=======
+
+::
+
+ Usage: bam2sam [options] [input]
+
+ bam2sam converts a BAM file to SAM. It is essentially a stripped-down 'samtools
+ view', mostly useful for testing/debugging without requiring samtools. Input BAM
+ file is read from a file or stdin, and SAM output is written to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Options:
+ input Input BAM file. If not provided, stdin will be used as input.
+ --no-header Omit header from output.
+ --header-only Print only the header (no records).
diff --git a/docs/source/tools/pbindex.rst b/docs/source/tools/pbindex.rst
new file mode 100644
index 0000000..e7c491f
--- /dev/null
+++ b/docs/source/tools/pbindex.rst
@@ -0,0 +1,18 @@
+.. _pbindex:
+
+pbindex
+=======
+
+::
+
+ Usage: pbindex <input>
+
+ pbindex creates a index file that enables random-access to PacBio-specific data
+ in BAM files. Generated index filename will be the same as input BAM plus .pbi suffix.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Input/Output:
+ input Input BAM file
diff --git a/docs/source/tools/pbindexdump.rst b/docs/source/tools/pbindexdump.rst
new file mode 100644
index 0000000..6829064
--- /dev/null
+++ b/docs/source/tools/pbindexdump.rst
@@ -0,0 +1,233 @@
+.. _pbindexdump:
+
+pbindexdump
+===========
+
+::
+
+ Usage: pbindexdump [options] [input]
+
+ pbindexdump prints a human-readable view of PBI data to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Input/Output:
+ input Input PBI file. If not provided, stdin will be used as input.
+ --format=STRING Output format, one of:
+ json, cpp
+
+ json: pretty-printed JSON [default]
+
+ cpp: copy/paste-able C++ code that can be used to
+ construct the equivalent PacBio::BAM::PbiRawData object
+
+ JSON Formatting:
+ --json-indent-level=INT
+ JSON indent level [4]
+ --json-raw Prints fields in a manner that more closely reflects the
+ PBI file format - presenting data as per-field columns,
+ not per-record objects.
+
+JSON Output Schemas
+-------------------
+
+Normal JSON:
+
+.. code-block:: JSON
+
+ {
+ "type": "object",
+ "properties": {
+ "fileSections": {
+ "type": "array",
+ "items": { "type": "string" },
+ },
+ "numReads": { "type": "integer" },
+ "reads": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "aEnd": { "type": "integer" },
+ "aStart": { "type": "integer" },
+ "bcForward": { "type": "integer" },
+ "bcQuality": { "type": "integer" },
+ "bcReverse": { "type": "integer" },
+ "contextFlag": { "type": "integer" },
+ "fileOffset": { "type": "integer" },
+ "holeNumber": { "type": "integer" },
+ "mapQuality": { "type": "integer" },
+ "nM": { "type": "integer" },
+ "nMM": { "type": "integer" },
+ "qEnd": { "type": "integer" },
+ "qStart": { "type": "integer" },
+ "readQuality": { "type": "number" },
+ "reverseStrand": { "type": "integer" },
+ "rgId": { "type": "integer" },
+ "tEnd": { "type": "integer" },
+ "tId": { "type": "integer" },
+ "tStart: { "type": "integer" }
+ },
+ "required": [
+ "contextFlag",
+ "fileOffset",
+ "holeNumber",
+ "qEnd",
+ "qStart",
+ "readQuality",
+ "rgId"
+ ]
+ }
+ },
+ "references": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "beginRow": { "type": "integer" },
+ "endRow": { "type": "integer" },
+ "tId": { "type": "integer" }
+ },
+ "required" : [ "beginRow", "endRow","tId" ]
+ }
+ }q
+ "version": { "type": "string" }
+ },
+ "required": [
+ "fileSections",
+ "numReads",
+ "reads",
+ "version"
+ ]
+ }
+
+"Raw" JSON:
+
+.. code-block:: JSON
+
+ {
+ "type": "object",
+ "properties": {
+ "barcodeData" : {
+ "type" : "object",
+ "properties: {
+ "bcForward" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "bcQuality" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "bcReverse" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ }
+ }
+ },
+ "basicData" : {
+ "type" : "object",
+ "properties: {
+ "contextFlag" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "fileOffset" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "holeNumber" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "qEnd" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "qStart" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "readQuality" : {
+ "type": "array",
+ "items" : { "type": "number" }
+ },
+ "rgId : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ }
+ }
+ },
+ "fileSections": {
+ "type": "array",
+ "items": { "type": "string" },
+ },
+ "mappedData" : {
+ "type" : "object",
+ "properties: {
+ "aEnd" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "aStart" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "mapQuality" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "nM" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "nMM" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "readQuality" : {
+ "type": "array",
+ "items" : { "type": "number" }
+ },
+ "reverseStrand" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "tEnd" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "tId" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "tStart" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ }
+ }
+ },
+ "numReads": { "type": "integer" },
+ "references": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "beginRow": { "type": "integer" },
+ "endRow": { "type": "integer" },
+ "tId": { "type": "integer" }
+ },
+ "required" : [ "beginRow", "endRow","tId" ]
+ }
+ },
+ "version" : { "type": "string" }
+ },
+ "required": [
+ "fileSections",
+ "numReads",
+ "basicData",
+ "version"
+ ]
+ }
diff --git a/docs/source/tools/pbmerge.rst b/docs/source/tools/pbmerge.rst
new file mode 100644
index 0000000..937ec56
--- /dev/null
+++ b/docs/source/tools/pbmerge.rst
@@ -0,0 +1,30 @@
+.. _pbmerge:
+
+pbmerge
+=======
+
+::
+
+ Usage: pbmerge [options] [-o <out.bam>] <INPUT>
+
+ pbmerge merges PacBio BAM files. If the input is DataSetXML, any filters will be
+ applied. If no output filename is specified, new BAM will be written to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Input/Output:
+ -o output Output BAM filename.
+ --no-pbi Set this option to skip PBI index file creation. PBI
+ creation is automatically skipped if no output filename
+ is provided.
+ INPUT Input may be one of:
+ DataSetXML, list of BAM files, or FOFN
+
+ fofn: pbmerge -o merged.bam bams.fofn
+
+ bams: pbmerge -o merged.bam 1.bam 2.bam 3.bam
+
+ xml: pbmerge -o merged.bam foo.subreadset.xml
+
diff --git a/docs/specs/pbbam.rst b/docs/specs/pbbam.rst
new file mode 100644
index 0000000..6842371
--- /dev/null
+++ b/docs/specs/pbbam.rst
@@ -0,0 +1,631 @@
+=================================================================
+**pbbam Software Design & Functional Specification**
+=================================================================
+| *Version 0.1*
+| *Pacific Biosciences Engineering Group*
+| *Jan 29, 2016*
+
+1. Revision History
+===================
+
++-------------+---------------+--------------------+---------------------------+
+| **Date** | **Revision** | **Author(s)** | **Comments** |
++=============+===============+====================+===========================+
+| 01-29-2016 | 0.1 | Derek Barnett | Initial draft created |
+| | | | |
++-------------+---------------+--------------------+---------------------------+
+
+2. Introduction
+===============
+
+2.1. Document Specification Identifier
+--------------------------------------
+
++-----------------------------------+------------------------------------------+
+| **Document Specification Prefix** | **Description** |
++===================================+==========================================+
+| FS\_SA\_PBBAM\_ | Functional spec for pbbam |
++-----------------------------------+------------------------------------------+
+
+2.2. Purpose
+------------
+
+This document is intended to describe the requirements and interface of the pbbam
+library, which provides functionality for creating, querying, and editing PacBio
+BAM files and associated file formats.
+
+2.3. Scope of Document
+----------------------
+
+This document covers the expected usage of the pbbam library, as well as any
+desired or required performance characteristics with respect to quality or speed.
+
+This document does not provide installation instructions or API documentation.
+
+2.4. Glossary of Terms
+----------------------
+
+The table below specifies only terms specific to this document, and skips
+acronyms/terms that are specified in `Pacific Biosciences Software Glossary`_.
+
+.. _Pacific Biosciences Software Glossary: http://smrtanalysis-docs/pb_sw_glossary.html
+
++------------------+-----------------------------------------------------------+
+| **Acronym/Term** | **Description** |
++==================+===========================================================+
+| API | Application Programming Interface - a set of routines, |
+| | protocols, and tools for building software applications. |
+| | In this document , this will consist of one or more |
+| | cooperating libraries that specify data structures, |
+| | methods, etc. for use within a target programming |
+| | language. |
++------------------+-----------------------------------------------------------+
+| Client | An application that uses the library. |
++------------------+-----------------------------------------------------------+
+| I/O | Input/output of data. |
++------------------+-----------------------------------------------------------+
+
+2.5. References
+---------------
+
++-------------+------------------------------+--------------------------------------+
+| **Ref No.** | **Document Name, Link** | **Description** |
++=============+==============================+======================================+
+| (1) | `BAM format`_ | General SAM/BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (2) | `PacBio BAM`_ | PacBio BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (3) | `PacBio BAM index`_ | PacBio BAM index specification |
++-------------+------------------------------+--------------------------------------+
+| (4) | `DataSet XML`_ | PacBio DataSet XML specification |
++-------------+------------------------------+--------------------------------------+
+| (5) | `Software Style Guide`_ | PacBio coding standards |
++-------------+------------------------------+--------------------------------------+
+| (6) | `SMRT Analysis`_ | General SMRT Analysis infrastructure |
++-------------+------------------------------+--------------------------------------+
+
+.. _BAM format: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _PacBio BAM: http://pacbiofileformats.readthedocs.org/en/3.0/BAM.html
+.. _PacBio BAM index: http://pacbiofileformats.readthedocs.org/en/3.0/PacBioBamIndex.html
+.. _DataSet XML: https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/DataSet.rst
+.. _Software Style Guide: http://smrtanalysis-docs/_downloads/PBISoftwareStyleGuide.doc
+.. _SMRT Analysis: http://smrtanalysis-docs/smrt_docs.html
+
+3. Software Overview
+====================
+
+3.1. Product Description
+------------------------
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard
+`BAM format`_ (1) for (both aligned and unaligned) basecall data files. We have
+also formulated a BAM companion file format (.bam.pbi) enabling fast access to a
+richer set of per-read information as well as compatibility for software built
+around the legacy cmp.h5 format.
+
+The pbbam library provides components to create, query, & transform PacBio BAM
+data: sequence files and their associated indices. This includes a core C++
+library as well as bindings for additional programming languages.
+
+3.2. Product Functional Capabilities
+------------------------------------
+
+The library must be able to read and write BAM files that conform to the
+`PacBio BAM`_ specification (2). BAM records must be editable e.g. adding
+alignment information. Random access must be supported, whether by genomic
+region or by filtering record features. To this end, the library will be able to
+read, write, and create associated index files - both the standard BAM index
+(.bai) and the `PacBio BAM index`_ (.pbi) (3). In addition to working with
+individual files, datasets of related BAM files will be supported. These are
+described in a `DataSet XML`_ document. (4)
+
+3.3. User Characteristics
+-------------------------
+
++---------------------+--------------------------------------------------------+
+| **User Class/Role** | **User Knowledge and Skill Levels** |
++=====================+========================================================+
+| Developer | Competence in one or more programming languages |
+| | supported (C++, R, Python, C#). No knowledge of |
+| | molecular biology wet lab techniques required. |
++---------------------+--------------------------------------------------------+
+
+3.4. User Operations and Practices
+----------------------------------
+
+Developer users will interact with the software by incorporating the library
+into a client application.
+
+3.5. Operating Environment
+--------------------------
+
+The software is intended to be run in a Linux or OSX environment, with ideally 4
+or more cores.
+
+3.6. Design and Implementation Constraints
+------------------------------------------
+
+Currently there are no constraints outside the operating environment and speed
+requirements. In particular, as the library will be used for writing the BAM
+files coming off a Sequel instrument, it should be able to keep pace.
+
+3.7. Assumptions and Dependencies
+---------------------------------
+
+Input routines for the library will expect to receive files that conform to the
+`PacBio BAM`_ (2) or `DataSet XML`_ (4) specifications.
+
+The pbbam library depends on Boost, zlib, and htslib libraries.
+
+3.8. Other Software
+-------------------
+
+Output PacBio BAMs will be compatible with the `PacBio BAM`_ specification (2)
+and thus compatible with the general `BAM format`_ specification (1). This
+ensures that a wide variety of downstream tools can interact with data files.
+
+The software uses `CMake`_ as its build system.
+
+The core C++ API relies on the following 3rd party components:
+
+* `zlib`_
+* `htslib`_
+* `Boost`_ (header-only modules)
+
+Wrapper APIs for additional languages (Python, R, C#) are generated by `SWIG`_.
+
+API documentation is generated via `Doxygen`_.
+
+.. _CMake: https://cmake.org/
+.. _zlib: http://www.zlib.net/
+.. _htslib: https://github.com/samtools/htslib
+.. _Boost: http://www.boost.org/
+.. _SWIG: http://www.swig.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+
+4. External Interfaces
+======================
+
+4.1. User Interfaces
+--------------------
+
+N/A
+
+4.2. Software Interfaces
+------------------------
+
+pbbam will require the following software:
+
+* `htslib`_ & `zlib`_ - provides low-level handling of compressed BAM data
+* `Boost`_ - provides utility classes
+
+Incoming data from upstream components will be compliant with
+PacBio BAM format - see `PacBio BAM`_ specification (2) for more detail.
+
+4.3. Hardware Interfaces
+------------------------
+
+N/A
+
+4.4. Communications Interfaces
+------------------------------
+
+N/A
+
+5. Functional Requirements
+==========================
+
+5.1. Query BAM data by genomic region
+-----------------------------------------
+
+5.1.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some genomic
+region of interest.
+
+5.1.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a standard index (.bai) for each source BAM file
+* genomic interval (e.g. "chr1:1000-2000")
+
+5.1.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Obtain an `htslib`_ "iterator" object for a given file and region. This will be
+wrapped by pbbam to hide the low-level nature of this type, as well as handling
+memory lifetime.
+
+5.1.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which are aligned to the requested genomic interval.
+
+For example:
+
+.. code:: c++
+
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+ // ... do stuff ...
+ }
+
+
+5.1.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.2. Query BAM data by filter criteria
+-----------------------------------------
+
+5.2.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some filter
+criteria (e.g. only reads from ZMW hole number 200 with a read quality of >0.5).
+
+5.2.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a `PacBio BAM index`_ (.pbi) for each source BAM file
+* filters supported by data contained in the PBI
+
+5.2.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Query PBI files(s) for records that match the provided filter criteria. Merge
+contiguous runs of records into record blocks, to minimize seeks. Advancing the
+iterator either reads the next read from the current block or seeks to the next
+block and fetches the next record.
+
+5.2.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which satisfy the requested filter criteria.
+
+For example:
+
+.. code:: c++
+
+ PbiFilterQuery query(filter, dataset);
+ for (const BamRecord& record : query) {
+ // ... do stuff ...
+ }
+
+5.2.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.3. Write PacBio BAM data
+------------------------------------------
+
+5.3.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall be able to write `PacBio BAM`_ files conforming to the specification.
+
+5.3.2. Inputs
+~~~~~~~~~~~~~
+
+* filename
+* header information
+* BAM records
+
+5.3.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Create file handle for the provided filename, output initial header information.
+As records are passed in, write to file. Upon completion, flush any buffers and
+close file handle.
+
+Multithreading, provided by `htslib`_, will be utilized where possible to speed
+up the compression process - often then main bottleneck of BAM throughput.
+
+5.3.4. Outputs
+~~~~~~~~~~~~~~
+
+BAM file conforming to the `PacBio BAM`_ specification.
+
+5.3.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.4. Create PacBio BAM index file
+------------------------------------------
+
+5.4.1. Description
+~~~~~~~~~~~~~~~~~~
+
+Much of PacBio BAM data processing relies on the presence of a `PacBio BAM index`_
+file. pbbam shall be able to generate this file type for a `PacBio BAM`_ file.
+
+5.4.2. Inputs
+~~~~~~~~~~~~~
+
+`PacBio BAM`_ file
+
+5.4.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Read through the input BAM records, storing the values relevant to a PBI index.
+At end of file, write the index contents to a file and close.
+
+5.4.4. Outputs
+~~~~~~~~~~~~~~
+
+`PacBio BAM index`_ file
+
+5.4.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6. Non-Functional Requirements
+==============================
+
+6.1. Performance Requirements
+-----------------------------
+
+Since pbbam will be used to write all BAM files coming off a Sequel device, the
+library must keep pace with data generation requirements.
+
+** come back to this, hard numbers ?? **
+
+6.2. Safety Requirements
+------------------------
+
+N/A
+
+6.3. Security Requirements
+--------------------------
+
+N/A
+
+6.4. Quality Attributes
+-----------------------
+
+6.4.1. Availability
+~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.4.2. Integrity
+~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+Files that do not meet this requirement will raise exceptions and will not be
+accepted.
+
+6.4.3. Interoperability
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+
+6.4.4. Reliability
+~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product reliability requirements.
+
+6.4.5. Robustness
+~~~~~~~~~~~~~~~~~
+
+pbbam will raise exceptions upon encountering failure cases, allowing client
+applications to recover or report the error to a UI.
+
+6.4.6. Usability
+~~~~~~~~~~~~~~~~
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.4.7. Maintainability
+~~~~~~~~~~~~~~~~~~~~~~
+
+The source code of the software covered in this functional specification shall
+adhere to the PacBio `Software Style Guide`_ (9) work instruction, to guarantee
+high quality of code that facilitates maintainability.
+
+6.4.8. Customizability
+~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.5. Business Rules
+-------------------
+
+N/A
+
+6.6. Installation and Upgrade
+-----------------------------
+
+Installation and Upgrade of this software will be handled as part of the SMRT
+Analysis subsystem. See `SMRT Analysis`_ (6) specifications for more detail.
+
+Additionally, the library may be built independently, either from internal
+version control (Perforce) or from the public-facing Github repository. In
+either case, `CMake`_ is used to drive the build process.
+
+6.7. Administration
+-------------------
+
+N/A
+
+6.8. User Documentation
+-----------------------
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+The "offline" API documentation may be built directly from the source code, using
+`Doxygen`_. Online documentation will be generated via a continuous integration
+server, thus ensuring it is always pointing to the current codebase.
+
+7. High Level Design
+====================
+
+7.1. Top Level Context
+----------------------
+
+The pbbam library is intended to be linked in with client applications,
+providing programmatic access to data files.
+
+7.2. Use Cases
+--------------
+
+Primary use cases for pbbam include:
+
+* BAM file creation
+* BAM file query - iterable access to various subsets of data
+
+8. Detailed Design
+==================
+
+8.1. Structural Representation
+------------------------------
+
+ *image(s) here*
+
+8.2. Behavioral Representation
+------------------------------
+
+This section provides behavioral (dynamic) representation of how the
+elements of the system realize the required use cases.
+
+Describe how the significant subsystems and classes interact with each
+other to realize the architecturally significant use cases.
+
+Provide a link to a file containing Sequence Diagram or Activity Diagram, when applicable.
+The link may be provided with use of 'image' directive.
+
+Sequence Diagram shows one use case scenario, executed by class model,
+with sequence of operations over period of time (time increased from top
+to bottom). It shows interactions between objects, but does not show
+relationships between them.
+
+Activity Diagram is a virtual representation of the sequential flow and
+control logic of a set of related activities or actions. It is a type of
+flowchart, frequently called Swim Lane Diagram, because activities of
+each entity are presented within its swim lane.
+
+Note: You may use http://wsd tool to auto-generate a sequence diagram from
+a descriptive text file, save the diagram to the wsd site, get link to the image,
+and add this link to the document with use of 'image' directive.
+
+8.3. Information Storage
+------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+8.4. Technology Overview
+------------------------
+
+pbbam is implemented in C++-11 and should perform as designed on any UNIX-like
+operating system (Linux distributions, Apple OSX, etc.).
+
+8.5. SOUP Components
+--------------------
+
+pbbam utilizes CMake for its build system. The C++ library uses the following
+3rd-party software components: Boost, htslib, & zlib. Wrappers for additional
+languages are generated using SWIG.
+
+8.6. Deployment and Configuration
+---------------------------------
+
+Please refer to `SMRT Analysis`_ (6) documentation
+
+9. Automated Tests
+==================
+
+9.1. Unit Testing
+-----------------
+
+The library shall have unit tests for all classes & components.
+
+9.2. Performance Testing
+------------------------
+
+Unit tests may evaluate performance requirements as desired.
+
+9.3. Regression Testing
+-----------------------
+
+As its role is primarily in data I/O, pbbam has no "scientific quality/validity"
+metrics that would indicate a regression. Instead, passing its unit tests and
+end-to-end tests will indicate that a regression has not been introduced.
+
+These tests will be run after each check-in and nightly.
+
+10. Requirements Traceability Matrices
+======================================
+
+This section provides traces from requirements specified in PRD/DIR documents to the
+requirements covered in this functional specification, and from these
+functional requirements to corresponding Test Cases/Procedures.
+
+10.1. HPQC Functional Specifications
+------------------------------------
+
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| **PBI_ID** | **Name** | **Description** | **Comment** | **Metric** | **Owner** | **PRD/DIR Path** |
++=============+===========================+===================================================+=============+============+===========+===========================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | | | dbarnett | |
+| | genomic region | data, limited to some genomic region of interest. | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | | | dbarnett | |
+| | filter criteria | data, limited to some filter criteria (e.g. only | | | | |
+| | | reads from ZMW hole number 200 with a read | | | | |
+| | | quality of >0.5). | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | | | dbarnett | |
+| | | the `PacBio BAM`_ specifictation. | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | | | dbarnett | |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | | | | |
+| | | shall be able to generate this file type for a | | | | |
+| | | `PacBio BAM`_ file. | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+
+10.2. Automated Tests Coverage
+------------------------------
+
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| **FS Item** | **FS Item Title** | **Use Case Description** | **Test Case Name/ID** |
++=============+===========================+====================================================+==================================================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | TODO |
+| | genomic region | data, limited to some genomic region of interest. | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | TODO |
+| | filter criteria | data, limited to some filter criteria (e.g. only | |
+| | | reads from ZMW hole number 200 with a read | |
+| | | quality of >0.5). | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | TODO |
+| | | the `PacBio BAM`_ specifictation. | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | TODO |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | |
+| | | shall be able to generate this file type for a | |
+| | | `PacBio BAM`_ file. | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+
diff --git a/include/pbbam/Accuracy.h b/include/pbbam/Accuracy.h
index 03c233e..f1db014 100644
--- a/include/pbbam/Accuracy.h
+++ b/include/pbbam/Accuracy.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Accuracy.h
+/// \brief Defines the Accuracy class.
+//
// Author: Derek Barnett
#ifndef ACCURACY_H
@@ -44,55 +48,42 @@ namespace PacBio {
namespace BAM {
/// \brief The Accuracy class represents the expected accuracy of a BamRecord.
-/// Values are clamped to [0,1000].
+///
+/// Values are clamped to fall within [0,1].
///
class PBBAM_EXPORT Accuracy
{
public:
- static const int MIN;
- static const int MAX;
+ static const float MIN; ///< Minimum valid accuracy value [0.0]
+ static const float MAX; ///< Maximum valid accuracy value [1.0]
public:
/// \name Constructors & Related Methods
/// \{
- /// \note This is not an 'explicit' ctor, to make it as easy to use in
- /// numeric operations as possible. We really just want to make
- /// sure that the acceptable range is respected.
- Accuracy(int accuracy);
+ /// Constructs an Accuracy object from a floating-point number.
+ ///
+ /// \note This is not an \b explicit ctor, to make it as easy as
+ /// possible to use in numeric operations. We really just want
+ /// to make sure that the acceptable range is respected.
+ ///
+ Accuracy(float accuracy);
Accuracy(const Accuracy& other);
~Accuracy(void);
/// \}
public:
-
- /// \returns Accuracy as integer
- operator int(void) const;
+ /// \returns Accuracy as float primitive
+ operator float(void) const;
private:
- int accuracy_;
+ float accuracy_;
};
-inline Accuracy::Accuracy(int accuracy)
-{
- if (accuracy < Accuracy::MIN)
- accuracy = Accuracy::MIN;
- else if (accuracy > Accuracy::MAX)
- accuracy = Accuracy::MAX;
- accuracy_ = accuracy;
-}
-
-inline Accuracy::Accuracy(const Accuracy &other)
- : accuracy_(other.accuracy_)
-{ }
-
-inline Accuracy::~Accuracy(void) { }
-
-inline Accuracy::operator int(void) const
-{ return accuracy_; }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/Accuracy.inl"
+
#endif // ACCURACY_H
diff --git a/include/pbbam/AlignmentPrinter.h b/include/pbbam/AlignmentPrinter.h
index 6424c5f..4dda6cd 100644
--- a/include/pbbam/AlignmentPrinter.h
+++ b/include/pbbam/AlignmentPrinter.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file AlignmentPrinter.h
+/// \brief Defines the AlignmentPrinter class.
+//
// Author: Armin Töpfer
#ifndef ALIGNMENTPRINTER_H
@@ -40,7 +44,6 @@
#include <memory>
#include <string>
-
#include "pbbam/BamRecord.h"
#include "pbbam/IndexedFastaReader.h"
#include "pbbam/Orientation.h"
@@ -50,29 +53,53 @@ namespace BAM {
class BamRecord;
+/// \brief The AlignmentPrinter class "pretty-prints" an alignment with respect
+/// to its associated reference sequence.
+///
+/// Example output:
+/// \verbinclude plaintext/AlignmentPrinterOutput.txt
+///
class AlignmentPrinter
{
public:
- AlignmentPrinter(const IndexedFastaReader& ifr)
- : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
- { }
+ /// \name Constructors & Related Methods
+ /// \{
- AlignmentPrinter() = delete;
- // Move constructor
- AlignmentPrinter(AlignmentPrinter&&) = default;
- // Copy constructor
+ /// Constructs the alignment printer with an associated FASTA file reader.
+ ///
+ /// \param[in] ifr FASTA reader
+ ///
+ /// \throws std::runtime_error if FASTA file cannot be opened for reading.
+ ///
+ AlignmentPrinter(const IndexedFastaReader& ifr);
+
+ AlignmentPrinter(void) = delete;
AlignmentPrinter(const AlignmentPrinter&) = delete;
- // Move assignment operator
- AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
- // Copy assignment operator
+ AlignmentPrinter(AlignmentPrinter&&) = default;
AlignmentPrinter& operator=(const AlignmentPrinter&) = delete;
- // Destructor
- ~AlignmentPrinter() = default;
+ AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
+ ~AlignmentPrinter(void) = default;
+
+ /// \}
public:
+ /// \name Printing
+ /// \{
+
+ /// Pretty-prints an aligned BamRecord to std::string.
+ ///
+ /// \note The current implementation includes ANSI escape sequences for
+ /// coloring terminal output. Future versions of this method will
+ /// likely make this optional.
+ ///
+ /// \returns formatted string containing the alignment and summary
+ /// information
+ ///
std::string Print(const BamRecord& record,
const Orientation orientation = Orientation::GENOMIC);
+ /// \}
+
private:
const std::unique_ptr<IndexedFastaReader> ifr_;
};
diff --git a/include/pbbam/BaiIndexedBamReader.h b/include/pbbam/BaiIndexedBamReader.h
new file mode 100644
index 0000000..7441c69
--- /dev/null
+++ b/include/pbbam/BaiIndexedBamReader.h
@@ -0,0 +1,130 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BaiIndexedBamReader.h
+/// \brief Defines the BaiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef BAIINDEXEDBAMREADER_H
+#define BAIINDEXEDBAMREADER_H
+
+#include "pbbam/BamReader.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/GenomicInterval.h"
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct BaiIndexedBamReaderPrivate; }
+
+/// \brief The BaiIndexedBamReader class provides read-only iteration over %BAM
+/// records, bounded by a particular genomic interval.
+///
+/// The SAM/BAM standard index (*.bai) is used to allow random-access operations.
+///
+class PBBAM_EXPORT BaiIndexedBamReader : public BamReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs %BAM reader, bounded by a genomic interval.
+ ///
+ /// All reads that overlap the interval will be available.
+ ///
+ /// \param[in] interval iteration will be bounded by this GenomicInterval.
+ /// \param[in] filename input %BAM filename
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+ /// for reading, or if the interval is invalid
+ ///
+ BaiIndexedBamReader(const GenomicInterval& interval,
+ const std::string& filename);
+
+ /// \brief Constructs BAM reader, bounded by a genomic interval.
+ ///
+ /// All reads that overlap the interval will be available.
+ ///
+ /// \param[in] interval iteration will be bounded by this GenomicInterval.
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+ /// for reading, or if the interval is invalid
+ ///
+ BaiIndexedBamReader(const GenomicInterval& interval, const BamFile& bamFile);
+
+ /// \brief Constructs %BAM reader, bounded by a genomic interval.
+ ///
+ /// All reads that overlap the interval will be available.
+ ///
+ /// \param[in] interval iteration will be bounded by this GenomicInterval.
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+ /// for reading, or if the interval is invalid
+ ///
+ BaiIndexedBamReader(const GenomicInterval& interval, BamFile&& bamFile);
+
+ /// \}
+
+public:
+ /// \name Random-Access
+ /// \{
+
+ /// \returns the current GenomicInterval in use by this reader
+ const GenomicInterval& Interval(void) const;
+
+ /// \brief Sets a new genomic interval on the reader.
+ ///
+ /// \param[in] interval
+ /// \returns reference to this reader
+ ///
+ BaiIndexedBamReader& Interval(const GenomicInterval& interval);
+
+ /// \}
+
+protected:
+ int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+private:
+ std::unique_ptr<internal::BaiIndexedBamReaderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAIINDEXEDBAMREADER_H
diff --git a/include/pbbam/BamFile.h b/include/pbbam/BamFile.h
index 62da044..8a20299 100644
--- a/include/pbbam/BamFile.h
+++ b/include/pbbam/BamFile.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamFile.h
+/// \brief Defines the BamFile class.
+//
// Author: Derek Barnett
#ifndef BAMFILE_H
@@ -47,6 +51,11 @@ namespace BAM {
namespace internal { class BamFilePrivate; }
+/// \brief The BamFile class represents a %BAM file.
+///
+/// It provides access to header metadata and methods for finding/creating
+/// associated index files.
+///
class PBBAM_EXPORT BamFile
{
public:
@@ -54,10 +63,12 @@ public:
/// \name Constructors & Related Methods
/// \{
- /// \brief Creates a BamFile object on the provided \p filename & loads header information.
+ /// \brief Creates a BamFile object on the provided \p filename &
+ /// loads header information.
+ ///
+ /// \param[in] filename %BAM filename
+ /// \throws std::exception on failure to open %BAM file for reading
///
- /// \param[in] filename BAM filename
- /// \throws std::exception on failure
BamFile(const std::string& filename);
BamFile(const BamFile& other);
@@ -73,45 +84,87 @@ public:
/// \name Index & Filename Methods
/// \{
- /// Check that ".pbi" exists and is newer than this BAM file.
- /// If not, one will be created.
+ /// \brief Creates a ".pbi" file for this %BAM file.
+ ///
+ /// \note Existing index file will be overwritten. Use
+ /// EnsurePacBioIndexExists() if this is not desired.
///
/// \throws if PBI file could not be properly created and/or
- /// written to disk
+ /// written to disk
+ ///
+ void CreatePacBioIndex(void) const;
+
+ /// \brief Creates a ".bai" file for this %BAM file.
+ ///
+ /// \note Existing index file will be overwritten. Use
+ /// EnsureStandardIndexExists() if this is not desired.
+ ///
+ /// \throws if BAI file could not be properly created (e.g. this
+ /// %BAM is not coordinate-sorted) or could not be written to disk
+ ///
+ void CreateStandardIndex(void) const;
+
+ /// \brief Creates a ".pbi" file if one does not exist or is older than its
+ /// %BAM file.
+ ///
+ /// Equivalent to:
+ /// \code{.cpp}
+ /// if (!file.PacBioIndexExists())
+ /// file.CreatePacBioIndex();
+ /// \endcode
+ ///
+ /// \note As of v0.4.02+, no timestamp check is performed. Previously we requr
+ /// with an additional timestamp check.
+ ///
+ /// \throws if PBI file could not be properly created and/or
+ /// written to disk
///
void EnsurePacBioIndexExists(void) const;
- /// Check that ".bai" exists and is newer than this BAM file.
- /// If not, one will be created.
+ /// \brief Creates a ".bai" file if one does not exist or is older than its
+ /// %BAM file.
+ ///
+ /// Equivalent to:
+ /// \code{.cpp}
+ /// if (!file.StandardIndexExists())
+ /// file.CreateStandardIndex();
+ /// \endcode
+ ///
+ /// \note As of v0.4.2, no timestamp check is performed.
///
/// \throws if BAI file could not be properly created (e.g. this
- /// BAM is not coordinate-sorted) or could not be written to disk
+ /// %BAM is not coordinate-sorted) or could not be written to disk
///
void EnsureStandardIndexExists(void) const;
- /// \returns BAM filename
+ /// \returns %BAM filename
std::string Filename(void) const;
- /// \returns true if ".pbi" exists and is newer than this BAM file.
+ /// \returns true if ".pbi" exists and is newer than this %BAM file.
bool PacBioIndexExists(void) const;
- /// \returns filename of PacBio index file (".pbi")
+ /// \returns filename of %PacBio index file (".pbi")
/// \note No guarantee is made on the existence of this file.
/// This method simply returns the expected filename.
std::string PacBioIndexFilename(void) const;
- /// \returns true if ".bai" exists and is newer than this BAM file.
+ /// \returns true if ".pbi" has a more recent timestamp than this file
+ bool PacBioIndexIsNewer(void) const;
+
+ /// \returns true if ".bai" exists
bool StandardIndexExists(void) const;
- /// \returns filename of standard index file (".bai")
/// \note No guarantee is made on the existence of this file.
/// This method simply returns the expected filename.
std::string StandardIndexFilename(void) const;
+ /// \returns true if ".bai" has a more recent timestamp than this file
+ bool StandardIndexIsNewer(void) const;
/// \}
- /// \name Header Metadata Methods
+public:
+ /// \name File Header Data
/// \{
/// \returns true if header metadata has this reference name
@@ -120,10 +173,12 @@ public:
/// \returns const reference to BamHeader containing the file's metadata
const BamHeader& Header(void) const;
- /// \returns true if BAM file is a PacBio BAM file (i.e. has non-empty version associated with header "pb" tag)
+ /// \returns true if file is a %PacBio %BAM file (i.e. has non-empty version
+ /// associated with header "pb" tag)
bool IsPacBioBAM(void) const;
- /// \returns ID for reference \p name (can be used for e.g. GenomicIntervalQuery), -1 if not found
+ /// \returns ID for reference \p name (can be used for e.g.
+ /// GenomicIntervalQuery), or -1 if not found
int ReferenceId(const std::string& name) const;
/// \return name of reference matching \p id, empty string if not found
@@ -137,8 +192,20 @@ public:
/// \}
+public:
+ /// \name Additional Attributes
+ /// \{
+
+ /// \returns virtual offset of first alignment. Intended mostly for internal
+ /// use. Note that this is a BGZF \b virtual offset, not a
+ /// 'normal' file position.
+ ///
+ int64_t FirstAlignmentOffset(void) const;
+
+ /// \}
+
private:
- PBBAM_SHARED_PTR<internal::BamFilePrivate> d_;
+ std::unique_ptr<internal::BamFilePrivate> d_;
};
} // namespace BAM
diff --git a/include/pbbam/BamHeader.h b/include/pbbam/BamHeader.h
index 9dea3cc..eada466 100644
--- a/include/pbbam/BamHeader.h
+++ b/include/pbbam/BamHeader.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamHeader.h
+/// \brief Defines the BamHeader class.
+//
// Author: Derek Barnett
#ifndef BAMHEADER_H
@@ -51,6 +55,24 @@ namespace BAM {
namespace internal { class BamHeaderPrivate; }
+/// \brief The BamHeader class represents the header section of the %BAM file.
+///
+/// It provides metadata about the file including file version, reference
+/// sequences, read groups, comments, etc.
+///
+/// A BamHeader may be fetched from a BamFile to view an existing file's
+/// metadata. Or one may be created/edited for use with writing to a new file
+/// (via BamWriter).
+///
+/// \note A particular BamHeader is likely to be re-used in lots of places
+/// throughout the library, for read-only purposes. For this reason, even
+/// though a BamHeader may be returned by value, it is essentially a thin
+/// wrapper for a shared-pointer to the actual data. This means, though,
+/// that if you need to edit an existing BamHeader for use with a
+/// BamWriter, please consider using BamHeader::DeepCopy. Otherwise any
+/// modifications will affect all BamHeaders that are sharing its
+/// underlying data.
+///
class PBBAM_EXPORT BamHeader
{
public:
@@ -65,111 +87,321 @@ public:
BamHeader& operator=(BamHeader&& other);
~BamHeader(void);
+ /// \brief Detaches underlying data from the shared-pointer, returning a
+ /// independent copy of the header contents.
+ ///
+ /// This ensures that any modifications to the newly returned BamHeader do
+ /// not affect other BamHeader objects that were sharing its underlying data.
+ ///
BamHeader DeepCopy(void) const;
/// \}
public:
- /// \name General
+ /// \name Operators
/// \{
+ /// \brief Merges another header with this one.
+ ///
+ /// Headers must be compatible for merging. This means that their Version,
+ /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data,
+ /// Sequences) must all match. If not, an exception will be thrown.
+ ///
+ /// \param[in] other header to merge with this one
+ /// \returns reference to this header
+ ///
+ /// \throws std::runtime_error if the headers are not compatible
+ ///
+ BamHeader& operator+=(const BamHeader& other);
+
+ /// \brief Creates a new, merged header.
+ ///
+ /// Headers must be compatible for merging. This means that their Version,
+ /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data,
+ /// Sequences) must all match. If not, an exception will be thrown.
+ ///
+ /// Both original headers (this header and \p other) will not be modified.
+ ///
+ /// \param[in] other header to merge with this one
+ /// \returns merged header
+ ///
+ /// \throws std::runtime_error if the headers are not compatible
+ ///
+ BamHeader operator+(const BamHeader& other) const;
+
+ /// \}
+
+public:
+ /// \name General Attributes
+ /// \{
+
+ /// \returns the %PacBio %BAM version number (\@HD:pb)
+ ///
+ /// \note This is different from the SAM/BAM version number
+ /// \sa BamHeader::Version.
+ ///
std::string PacBioBamVersion(void) const;
+
+ /// \returns the sort order used
+ ///
+ /// Valid values: "unknown", "unsorted", "queryname", or "coordinate"
+ ///
std::string SortOrder(void) const;
+
+ /// \returns the SAM/BAM version number (\@HD:VN)
+ ///
+ /// \note This is different from the %PacBio %BAM version number
+ /// \sa BamHeader::PacBioBamVersion
+ ///
std::string Version(void) const;
/// \}
+public:
/// \name Read Groups
/// \{
+ /// \returns true if the header contains a read group with \p id (\@RG:ID)
bool HasReadGroup(const std::string& id) const;
+
+ /// \returns a ReadGroupInfo object representing the read group matching
+ /// \p id (\@RG:ID)
+ /// \throws std::runtime_error if \p id is unknown
+ ///
ReadGroupInfo ReadGroup(const std::string& id) const;
+
+ /// \returns vector of read group IDs listed in this header
std::vector<std::string> ReadGroupIds(void) const;
+
+ /// \returns vector of ReadGroupInfo objects, representing all read groups
+ /// listed in this header
+ ///
std::vector<ReadGroupInfo> ReadGroups(void) const;
/// \}
+public:
/// \name Sequences
/// \{
+ /// \returns true if header contains a sequence with \p name (\@SQ:SN)
bool HasSequence(const std::string& name) const;
+
+ /// \returns number of sequences (\@SQ entries) stored in this header
+ size_t NumSequences(void) const;
+
+ /// \returns numeric ID for sequence matching \p name (\@SQ:SN)
+ ///
+ /// This is the numeric ID used elsewhere throughout the API.
+ ///
+ /// \throws std::runtime_error if \p name is unknown
+ /// \sa BamReader::ReferenceId, PbiReferenceIdFilter,
+ /// PbiRawMappedData::tId_
+ ///
int32_t SequenceId(const std::string& name) const;
+
+ /// \returns the length of the sequence (\@SQ:LN, e.g. chromosome length) at
+ /// index \p id
+ ///
+ /// \sa SequenceInfo::Length, BamHeader::SequenceId
+ ///
std::string SequenceLength(const int32_t id) const;
+
+ /// \returns the name of the sequence (\@SQ:SN) at index \p id
+ ///
+ /// \sa SequenceInfo::Name, BamHeader::SequenceId
+ ///
std::string SequenceName(const int32_t id) const;
+
+ /// \returns vector of sequence names (\@SQ:SN) stored in this header
+ ///
+ /// Position in the vector is equivalent to SequenceId.
+ ///
std::vector<std::string> SequenceNames(void) const;
+
+ /// \returns SequenceInfo object at index \p id
+ ///
+ /// \throws std::out_of_range if \p is an invalid or unknown index
+ /// \sa BamHeader::SequenceId
+ ///
SequenceInfo Sequence(const int32_t id) const;
+
+ /// \returns SequenceInfo for the sequence matching \p name
SequenceInfo Sequence(const std::string& name) const;
+
+ /// \returns vector of SequenceInfo objects representing the sequences
+ /// (\@SQ entries) stored in this header
+ ///
std::vector<SequenceInfo> Sequences(void) const;
/// \}
+public:
/// \name Programs
/// \{
+ /// \returns true if this header contains a program entry with ID (\@PG:ID)
+ /// matching \p id
+ ///
bool HasProgram(const std::string& id) const;
+
+ /// \returns ProgramInfo object for the program entry matching \p id
+ /// \throws std::runtime_error if \p id is unknown
+ ///
ProgramInfo Program(const std::string& id) const;
+
+ /// \returns vector of program IDs (\@PG:ID)
std::vector<std::string> ProgramIds(void) const;
+
+ /// \returns vector of ProgramInfo objects representing program entries
+ /// (\@PG) stored in this heder
+ ///
std::vector<ProgramInfo> Programs(void) const;
/// \}
+public:
/// \name Comments
/// \{
+ /// \returns vector of comment (\@CO) strings
std::vector<std::string> Comments(void) const;
/// \}
+public:
/// \name Conversion Methods
/// \{
+ /// \returns SAM-header-formatted string representing this header's data
std::string ToSam(void) const;
/// \}
public:
- /// \name General
+ /// \name General Attributes
/// \{
+ /// \brief Sets this header's PacBioBAM version number (\@HD:pb).
+ ///
+ /// \returns reference to this object
+ /// \throws std::runtime_error if version number cannot be parsed or
+ /// is less than the minimum version allowed.
+ ///
BamHeader& PacBioBamVersion(const std::string& version);
+
+ /// \brief Sets this header's sort order label (\@HD:SO).
+ ///
+ /// Valid values: "unknown", "unsorted", "queryname", or "coordinate"
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& SortOrder(const std::string& order);
+
+ /// \brief Sets this header's SAM/BAM version number (\@HD:VN).
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& Version(const std::string& version);
/// \}
+public:
/// \name Read Groups
/// \{
+ /// \brief Appends a read group entry (\@RG) to this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& AddReadGroup(const ReadGroupInfo& readGroup);
+
+ /// \brief Removes all read group entries from this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& ClearReadGroups(void);
+
+ /// \brief Replaces this header's list of read group entries with those in
+ /// \p readGroups.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& ReadGroups(const std::vector<ReadGroupInfo>& readGroups);
/// \}
+public:
/// \name Sequences
/// \{
+ /// \brief Appends a sequence entry (\@SQ) to this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& AddSequence(const SequenceInfo& sequence);
+
+ /// \brief Removes all sequence entries from this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& ClearSequences(void);
+
+ /// \brief Replaces this header's list of sequence entries with those in
+ /// \p sequences.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& Sequences(const std::vector<SequenceInfo>& sequences);
/// \}
+public:
/// \name Programs
/// \{
+ /// \brief Appends a program entry (\@PG) to this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& AddProgram(const ProgramInfo& pg);
+
+ /// \brief Removes all program entries from this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& ClearPrograms(void);
+
+ /// \brief Replaces this header's list of program entries with those in
+ /// \p programs.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& Programs(const std::vector<ProgramInfo>& programs);
/// \}
+public:
/// \name Comments
/// \{
+ /// \brief Appends a comment (\@CO) to this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& AddComment(const std::string& comment);
+
+ /// \brief Removes all comments from this header.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& ClearComments(void);
+
+ /// \brief Replaces this header's list of comments with those in \p comments.
+ ///
+ /// \returns reference to this object
+ ///
BamHeader& Comments(const std::vector<std::string>& comments);
/// \}
@@ -181,4 +413,6 @@ private:
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/BamHeader.inl"
+
#endif // BAMHEADER_H
diff --git a/include/pbbam/BamReader.h b/include/pbbam/BamReader.h
index bd0ced6..774a2ec 100644
--- a/include/pbbam/BamReader.h
+++ b/include/pbbam/BamReader.h
@@ -32,80 +32,157 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamReader.h
+/// \brief Defines the BamReader class.
+//
// Author: Derek Barnett
#ifndef BAMREADER_H
#define BAMREADER_H
+#include "pbbam/BamFile.h"
#include "pbbam/BamHeader.h"
#include "pbbam/BamRecord.h"
#include "pbbam/Config.h"
+#include "pbbam/GenomicInterval.h"
+
+#include <htslib/sam.h>
+#include <memory>
#include <string>
namespace PacBio {
namespace BAM {
+namespace internal { struct BamReaderPrivate; }
+
+/// \brief The BamReader class provides basic read-access to a %BAM file.
+///
+/// The base-class implementation provides a sequential read-through of BAM
+/// records. Derived classes may implement other access schemes (e.g. genomic
+/// region, PBI-enabled record filtering).
+///
class PBBAM_EXPORT BamReader
{
-
public:
- enum ReadError
- {
- NoError = 0
- , OpenFileError
- , ReadHeaderError
- , ReadRecordError
- };
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Opens BAM file for reading.
+ ///
+ /// \param[in] fn %BAM filename
+ /// \throws std::runtime_error if failed to open
+ ///
+ explicit BamReader(const std::string& fn);
+
+ /// \brief Opens BAM file for reading.
+ ///
+ /// \param[in] bamFile BamFile object
+ /// \throws std::runtime_error if failed to open
+ ///
+ explicit BamReader(const BamFile& bamFile);
+
+ /// \brief Opens BAM file for reading.
+ ///
+ /// \param[in] bamFile BamFile object
+ /// \throws std::runtime_error if failed to open
+ ///
+ explicit BamReader(BamFile&& bamFile);
-public:
- BamReader(void);
virtual ~BamReader(void);
+ /// \}
+
public:
+ /// \name BAM File Attributes
+ /// \{
+
+ /// \returns the underlying BamFile
+ const BamFile& File(void) const;
+
+ /// \returns %BAM filename
+ std::string Filename(void) const;
+
+ /// \returns BamHeader object from %BAM header contents
+ const BamHeader& Header(void) const;
- /// Closes the BAM file reader.
- void Close(void);
+ /// \}
- /// Opens a BAM file for reading.
+public:
+ /// \name BAM File I/O
+ /// \{
+
+ /// \brief Fetches the "next" %BAM record.
+ ///
+ /// Default implementation will read records until EOF. Derived readers may
+ /// use additional criteria to decide which record is "next" and when
+ /// reading is done.
///
- /// Prefix \p filename with "http://" or "ftp://" for remote files,
- /// or set to "-" for stdin.
+ /// \param[out] record next BamRecord object. Should not be used if method
+ /// returns false.
///
- /// \param[in] filename path to input BAM file
+ /// \returns true if record was read successfully. Returns false if EOF (or
+ /// end of iterator in derived readers). False is not an error,
+ /// it indicates "end of data".
///
- /// \returns success/failure
- bool Open(const std::string& filename);
+ /// \throws std::runtime_error if failed to read from file (e.g. possible
+ /// truncated or corrupted file).
+ ///
+ bool GetNext(BamRecord& record);
- /// \returns header as BamHeader object
- BamHeader::SharedPtr Header(void) const;
+ /// \brief Seeks to virtual offset in %BAM.
+ ///
+ /// \note This is \b NOT a normal file offset, but the virtual offset used
+ /// in %BAM indexing.
+ ///
+ /// \throws std::runtime_error if failed to seek
+ ///
+ void VirtualSeek(int64_t virtualOffset);
- /// \returns error status code
- BamReader::ReadError Error(void) const;
+ /// \returns current (virtual) file position.
+ ///
+ /// \note This is \b NOT a normal file offset, but the virtual offset used
+ /// in %BAM indexing.
+ ///
+ int64_t VirtualTell(void) const;
- /// \returns true if error encountered
- bool HasError(void) const;
+ /// \}
- /// Fetches the next record in a BAM file.
+protected:
+ /// \name BAM File I/O
+ /// \{
+
+ /// \brief Helper method for access to underlying BGZF stream pointer.
+ ///
+ /// Useful for derived readers' contact points with htslib methods.
///
- /// \param[out] record pointer to BamRecord object
+ /// \returns BGZF stream pointer
///
- /// \returns succcess/failure
- bool GetNext(PBBAM_SHARED_PTR<BamRecord> record);
+ BGZF* Bgzf(void) const;
-public:
- std::string PacBioBamVersion(void) const;
+ /// \brief Performs the actual raw read of the next record from the BAM
+ /// file.
+ ///
+ /// Default implementation will read records, sequentially, until EOF.
+ /// Derived readers may use additional criteria to decide which record is
+ /// "next" and when reading is done.
+ ///
+ /// Return value should be equivalent to htslib's bam_read1():
+ /// >= 0 : normal
+ /// -1 : EOF (not an error)
+ /// < -1 : error
+ ///
+ /// \param[in] bgzf BGZF stream pointer
+ /// \param[out] b %BAM record pointer
+ /// \returns integer status code, see description
+ ///
+ virtual int ReadRawData(BGZF* bgzf, bam1_t* b);
-protected:
- bool GetNext(PBBAM_SHARED_PTR<bam1_t> rawRecord);
- void InitialOpen(void);
- PBBAM_SHARED_PTR<bam_hdr_t> RawHeader(void) const;
+ /// \}
-protected:
- PBBAM_SHARED_PTR<samFile> file_;
- PBBAM_SHARED_PTR<bam_hdr_t> header_;
- std::string filename_;
- BamReader::ReadError error_;
+private:
+ std::unique_ptr<internal::BamReaderPrivate> d_;
};
} // namespace BAM
diff --git a/include/pbbam/BamRecord.h b/include/pbbam/BamRecord.h
index 8630e4b..9184121 100644
--- a/include/pbbam/BamRecord.h
+++ b/include/pbbam/BamRecord.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecord.h
+/// \brief Defines the BamRecord & BamRecordView classes.
+//
// Author: Derek Barnett
#ifndef BAMRECORD_H
@@ -48,8 +52,10 @@
#include "pbbam/Strand.h"
#include "pbbam/QualityValues.h"
#include "pbbam/virtual/VirtualRegionType.h"
+#include "pbbam/ZmwType.h"
#include <memory>
#include <string>
+#include <utility>
#include <vector>
namespace PacBio {
@@ -57,34 +63,66 @@ namespace BAM {
namespace internal { class BamRecordMemory; }
+/// \brief This enum defines the modes supported by BamRecord clipping
+/// operations.
+///
+/// Methods like BamRecord::Clip accept Position parameters - which may be in
+/// either polymerase or reference coorindates. Using this enum as a flag
+/// indicates how the positions should be interpreted.
+///
enum class ClipType
{
- CLIP_NONE
- , CLIP_TO_QUERY
- , CLIP_TO_REFERENCE
+ CLIP_NONE ///< No clipping will be performed.
+ , CLIP_TO_QUERY ///< Clipping positions are in polymerase coordinates.
+ , CLIP_TO_REFERENCE ///< Clipping positions are in genomic coordinates.
};
+/// \brief This enum defines the possible PacBio BAM record types.
+///
+/// \sa ReadGroupInfo::ReadType
+///
enum class RecordType
{
- POLYMERASE
- , HQREGION
- , SUBREAD
- , CCS
- , SCRAP
- , UNKNOWN
+ POLYMERASE ///< Polymerase read
+ , HQREGION ///< High-quality region
+ , SUBREAD ///< Subread (
+ , CCS ///< Circular consensus sequence
+ , SCRAP ///< Additional sequence (barcodes, adapters, etc.)
+ , UNKNOWN ///< Unknown read type
};
+/// \brief This enum defines the possible encoding modes used in Frames data
+/// (e.g. BamRecord::IPD or BamRecord::PulseWidth).
+///
+/// The LOSSY mode is the default in production output; LOSSLESS mode
+/// being used primarily for internal applications.
+///
+/// \sa https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
+/// for more information on pulse frame encoding schemes.
+///
enum class FrameEncodingType
{
- LOSSY
- , LOSSLESS
+ LOSSY ///< 8-bit compression (using CodecV1) of frame data
+ , LOSSLESS ///< 16-bit native frame data
};
+/// \brief The BamRecord class represents a %PacBio %BAM record.
+///
+/// %PacBio %BAM records are extensions of normal SAM/BAM records. Thus in
+/// addition to normal fields like bases, qualities, mapping coordinates, etc.,
+/// tags are used extensively to annotate records with additional
+/// PacBio-specific data.
+///
+/// Mapping and clipping APIs are provided as well to ensure that such
+/// operations "trickle down" to all data fields properly.
+///
+/// \sa https://samtools.github.io/hts-specs/SAMv1.pdf
+/// for more information on standard %BAM data, and
+/// https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
+/// for more information on %PacBio %BAM fields.
+///
class PBBAM_EXPORT BamRecord
{
-public: // static data
- static const float photonFactor;
-
public:
/// \name Constructors & Related Methods
/// \{
@@ -102,34 +140,208 @@ public:
/// \}
public:
- /// \name Per-Record Data
+ /// \name General Data
/// \{
- /// \note AlignedStart is in polymerase read coordinates, NOT genomic coordinates.
+ /// \returns this record's full name
+ /// \sa BamRecordImpl::Name
///
- /// \returns the record's aligned start position
- Position AlignedStart(void) const;
+ std::string FullName(void) const;
- /// \note AlignedEnd is in polymerase read coordinates, NOT genomic coordinates.
+ /// \returns shared pointer to this record's associated BamHeader
+ BamHeader Header(void) const;
+
+ /// \returns ZMW hole number
+ /// \throws if missing zm tag & record name does not contain hole number
///
+ int32_t HoleNumber(void) const;
+
+ /// \returns this record's LocalContextFlags
+ PacBio::BAM::LocalContextFlags LocalContextFlags(void) const;
+
+ /// \returns this record's movie name
+ std::string MovieName(void) const;
+
+ /// \returns "number of complete passes of the insert"
+ int32_t NumPasses(void) const;
+
+ /// \returns the record's query end position, or Sequence().length() if not
+ /// stored
+ /// \note QueryEnd is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position QueryEnd(void) const;
+
+ /// \returns the record's query start position, or 0 if not stored
+ ///
+ /// \note QueryStart is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position QueryStart(void) const;
+
+ /// \returns this record's expected read accuracy [0, 1000]
+ Accuracy ReadAccuracy(void) const;
+
+ /// \returns ReadGroupInfo object for this record
+ ReadGroupInfo ReadGroup(void) const;
+
+ /// \returns string ID of this record's read group
+ /// \sa ReadGroupInfo::Id
+ ///
+ std::string ReadGroupId(void) const;
+
+ /// \returns integer value for this record's read group ID
+ int32_t ReadGroupNumericId(void) const;
+
+ /// \returns this scrap record's scrap region type
+ VirtualRegionType ScrapRegionType(void) const;
+
+ /// \returns this scrap record's scrap ZMW type
+ ZmwType ScrapZmwType(void) const;
+
+ /// \returns this record's average signal-to-noise for each of A, C, G,
+ /// and T
+ ///
+ std::vector<float> SignalToNoise(void) const;
+
+ /// \returns this record's type
+ /// \sa RecordType
+ RecordType Type(void) const;
+
+ /// \}
+
+public:
+ /// \name Mapping Data
+ /// \{
+
/// \returns the record's aligned end position
+ ///
+ /// \note AlignedEnd is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
Position AlignedEnd(void) const;
+ /// \returns the record's aligned start position
+ ///
+ /// \note AlignedStart is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position AlignedStart(void) const;
+
/// \returns the record's strand as a Strand enum value
Strand AlignedStrand(void) const;
/// \returns the record's CIGAR data as a Cigar object
- Cigar CigarData(void) const;
+ ///
+ /// \param[in] exciseAllClips if true, remove all clipping operations
+ /// (hard & soft) [default:false]
+ ///
+ Cigar CigarData(bool exciseAllClips = false) const;
- /// \returns this record's full name
- /// \sa BamRecordImpl::Name
- std::string FullName(void) const;
+ /// \returns true if this record was mapped by aligner
+ bool IsMapped(void) const;
+
+ /// \returns this record's mapping quality. A value of 255 indicates
+ /// "unknown"
+ ///
+ uint8_t MapQuality(void) const;
+
+ /// \returns the number of deleted bases (relative to reference)
+ size_t NumDeletedBases(void) const;
+
+ /// \returns the number of inserted bases (relative to reference)
+ size_t NumInsertedBases(void) const;
+
+ /// \returns the number of matching bases (sum of '=' CIGAR op lengths)
+ size_t NumMatches(void) const;
+
+ /// \returns a tuple containing NumMatches (first) and NumMismatches
+ /// (second)
+ ///
+ std::pair<size_t, size_t> NumMatchesAndMismatches(void) const;
+
+ /// \returns the number of mismatching bases (sum of 'X' CIGAR op lengths)
+ size_t NumMismatches(void) const;
+
+ /// \returns this record's reference ID, or -1 if unmapped.
+ ///
+ /// \note This is only a valid identifier within this %BAM file
+ ///
+ int32_t ReferenceId(void) const;
+
+ /// \returns this record's reference name.
+ ///
+ /// \throws an exception if unmapped record.
+ ///
+ std::string ReferenceName(void) const;
+
+ /// \returns the record's reference end position, or UnmappedPosition if
+ /// unmapped
+ ///
+ /// \note ReferenceEnd is in reference coordinates, NOT polymerase read
+ /// coordinates.
+ ///
+ Position ReferenceEnd(void) const;
+
+ /// \returns the record's reference start position, or UnmappedPosition if
+ /// unmapped
+ ///
+ /// \note ReferenceStart is in reference coordinates, NOT polymerase read
+ /// coordinates.
+ ///
+ Position ReferenceStart(void) const;
+
+ /// \}
+
+public:
+ /// \name Barcode Data
+ /// \{
+
+ /// \returns forward barcode id
+ ///
+ /// \throws std::runtime_error if barcode data is absent or malformed.
+ /// \sa HasBarcodes
+ ///
+ uint16_t BarcodeForward(void) const;
+
+ /// \returns barcode call confidence (Phred-scaled posterior probability
+ /// of correct barcode call)
+ ///
+ /// \sa HasBarcodeQuality
+ ///
+ uint8_t BarcodeQuality(void) const;
+
+ /// \returns reverse barcode id
+ ///
+ /// \throws std::runtime_error if barcode data is absent or malformed.
+ /// \sa HasBarcodes
+ ///
+ uint16_t BarcodeReverse(void) const;
+
+ /// \returns the forward and reverse barcode ids
+ ///
+ /// \throws std::runtime_error if barcode data is absent or malformed.
+ /// \sa HasBarcodes
+ ///
+ std::pair<uint16_t,uint16_t> Barcodes(void) const;
+
+ /// \}
+
+public:
+ /// \name Auxiliary Data Queries
+ /// \{
/// \returns true if this record has AltLabelQV data
bool HasAltLabelQV(void) const;
- /// \returns true if this record has LabelQV data
- bool HasLabelQV(void) const;
+ /// \returns true if this record has AltLabelTag data
+ bool HasAltLabelTag(void) const;
+
+ /// \returns true if this record has Barcode data
+ bool HasBarcodes(void) const;
+
+ /// \returns true is this record has BarcodeQuality data
+ bool HasBarcodeQuality(void) const;
/// \returns true if this record has DeletionQV data
bool HasDeletionQV(void) const;
@@ -137,20 +349,38 @@ public:
/// \returns true if this record has DeletionTag data
bool HasDeletionTag(void) const;
- /// \returns true if this record has LocalContextFlags (absent in CCS)
- bool HasLocalContextFlags(void) const;
+ /// \returns true if this record has a HoleNumber
+ bool HasHoleNumber(void) const;
/// \returns true if this record has InsertionQV data
bool HasInsertionQV(void) const;
+ /// \returns true if this record has IPD data
+ bool HasIPD(void) const;
+
+ /// \returns true if this record has LabelQV data
+ bool HasLabelQV(void) const;
+
+ /// \returns true if this record has LocalContextFlags (absent in CCS)
+ bool HasLocalContextFlags(void) const;
+
+ /// \returns true if this record has MergeQV data
+ bool HasMergeQV(void) const;
+
+ /// \returns true if this record has NumPasses data
+ bool HasNumPasses(void) const;
+
/// \returns true if this record has Pkmean data
bool HasPkmean(void) const;
/// \returns true if this record has Pkmid data
bool HasPkmid(void) const;
- /// \returns true if this record has IPD data
- bool HasIPD(void) const;
+ /// \returns true if this record has Pkmean2 data
+ bool HasPkmean2(void) const;
+
+ /// \returns true if this record has Pkmid2 data
+ bool HasPkmid2(void) const;
/// \returns true if this record has PreBaseFrames aka IPD data
bool HasPreBaseFrames(void) const;
@@ -158,135 +388,125 @@ public:
/// \returns true if this record has PrePulseFrames data
bool HasPrePulseFrames(void) const;
+ /// \returns true if this record has PulseCall data
+ bool HasPulseCall(void) const;
+
/// \returns true if this record has PulseCallWidth data
bool HasPulseCallWidth(void) const;
- /// \returns true if this record has MergeQV data
- bool HasMergeQV(void) const;
-
/// \returns true if this record has PulseMergeQV data
bool HasPulseMergeQV(void) const;
/// \returns true if this record has PulseWidth data
bool HasPulseWidth(void) const;
- /// \returns true if this record has signal-to-noise data (absent in POLYMERASE)
- bool HasSignalToNoise(void) const;
-
- /// \returns true if this record has ScrapType data (only in SCRAP)
- bool HasScrapType(void) const;
-
- /// \returns true if this record has SubstitutionQV data
- bool HasSubstitutionQV(void) const;
-
- /// \returns true if this record has SubstitutionTag data
- bool HasSubstitutionTag(void) const;
-
- /// \returns true if this record has AltLabelTag data
- bool HasAltLabelTag(void) const;
-
- /// \returns true if this record has PulseCall data
- bool HasPulseCall(void) const;
-
/// \returns true if this record has ReadAccuracyTag data
bool HasReadAccuracy(void) const;
- /// \returns true if this record has a HoleNumber
- bool HasHoleNumber(void) const;
+ /// \returns true if this record has QueryEnd data
+ bool HasQueryEnd(void) const;
/// \returns true if this record has QueryStart data
bool HasQueryStart(void) const;
- /// \returns true if this record has QueryEnd data
- bool HasQueryEnd(void) const;
+ /// \returns true if this record has ScrapRegionType data (only in SCRAP)
+ bool HasScrapRegionType(void) const;
- /// \returns true if this record has Barcode data
- bool HasBarcodes(void) const;
-
- /// \returns shared pointer to this record's associated BamHeader
- BamHeader Header(void) const;
+ /// \returns true if this record has scrap ZMW type data (only in SCRAP)
+ bool HasScrapZmwType(void) const;
- /// \returns ZMW hole number
- /// \throws if missing zm tag & record name does not contain hole number
- int32_t HoleNumber(void) const;
+ /// \returns true if this record has signal-to-noise data (absent in
+ /// POLYMERASE)
+ ///
+ bool HasSignalToNoise(void) const;
- /// \returns true if this record was mapped by aligner
- /// \sa BamRecordImpl::IsMapped
- bool IsMapped(void) const;
+ /// \returns true if this record has StartFrame data
+ bool HasStartFrame(void) const;
- /// \returns this record's LocalContextFlags
- PacBio::BAM::LocalContextFlags LocalContextFlags(void) const;
+ /// \returns true if this record has SubstitutionQV data
+ bool HasSubstitutionQV(void) const;
- /// \returns this record's mapping quality. A value of 255 indicates "unknown"
- uint8_t MapQuality(void) const;
+ /// \returns true if this record has SubstitutionTag data
+ bool HasSubstitutionTag(void) const;
- /// \returns this record's movie name
- std::string MovieName(void) const;
+ /// \}
- /// \returns "number of complete passes of the insert"
- int32_t NumPasses(void) const;
+public:
+ /// \name Sequence & Tag Data
+ /// \{
- /// \note QueryStart is in polymerase read coordinates, NOT genomic coordinates.
+ /// \brief Fetches this record's AltLabelTag values ("pt" tag).
///
- /// \returns the record's query start position, or 0 if not stored
- Position QueryStart(void) const;
-
- /// \note QueryEnd is in polymerase read coordinates, NOT genomic coordinates.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
///
- /// \returns the record's query end position, or Sequence().length() if not stored
- Position QueryEnd(void) const;
-
- /// \returns the left and right barcode ids
- std::pair<int,int> Barcodes(void) const;
-
- /// \returns this record's expected read accuracy [0, 1000]
- Accuracy ReadAccuracy(void) const;
-
- /// \returns ReadGroupInfo object for this record
- ReadGroupInfo ReadGroup(void) const;
-
- /// \returns ID of this record's read group
- /// \sa ReadGroupInfo::Id
- std::string ReadGroupId(void) const;
-
- /// \returns this record's reference ID, or -1 if unmapped.
- /// \note This is only a valid identifier within this BAM file
- int32_t ReferenceId(void) const;
-
- /// \returns this record's reference name.
- /// \throws an exception if unmapped record.
- std::string ReferenceName(void) const;
-
- /// \note ReferenceStart is in reference coordinates, NOT polymerase read coordinates.
+ /// \param[in] orientation Orientation of output.
///
- /// \returns the record's reference start position, or UnmappedPosition if unmapped
- Position ReferenceStart(void) const;
-
- /// \note ReferenceEnd is in reference coordinates, NOT polymerase read coordinates.
+ /// \returns AltLabelTags string
///
- /// \returns the record's reference end position, or UnmappedPosition if unmapped
- Position ReferenceEnd(void) const;
+ std::string AltLabelTag(Orientation orientation = Orientation::NATIVE) const;
- /// \returns this scrap record's ScrapType
- VirtualRegionType ScrapType(void) const;
+ /// \brief Fetches this record's DeletionTag values ("dt" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns DeletionTag string
+ ///
+ std::string DeletionTag(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \returns this record's average signal-to-noise for each of A, C, G, and T
- std::vector<float> SignalToNoise(void) const;
+ /// \brief Fetches this record's DNA sequence (SEQ field).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns sequence string
+ ///
+ std::string Sequence(const Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \returns this record's type
- /// \sa RecordType
- RecordType Type(void) const;
+ /// \brief Fetches this record's SubstitutionTag values ("st" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns SubstitutionTags string
+ ///
+ std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
/// \}
public:
- /// \name Per-Base Data
+ /// \name Quality Data
/// \{
- /// \brief Fetch this record's AltLabelQV values ("pv" tag).
+ /// \brief Fetches this record's AltLabelQV values ("pv" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
///
/// \param[in] orientation Orientation of output.
///
@@ -294,70 +514,118 @@ public:
///
QualityValues AltLabelQV(Orientation orientation = Orientation::NATIVE) const;
- /// \brief Fetch this record's AltLabelTag values ("pt" tag).
+ /// \brief Fetches this record's DeletionQV values ("dq" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// gap chars will be '-' and padding chars will be '*'.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
///
- /// \param[in] orientation Orientation of output.
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
///
- /// \returns AltLabelTags string
+ /// \returns DeletionQV as QualityValues object
///
- std::string AltLabelTag(Orientation orientation = Orientation::NATIVE) const;
+ QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \brief Fetch this record's DeletionQV values ("dq" tag).
+ /// \brief Fetches this record's InsertionQV values ("iq" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
///
- /// \returns DeletionQV as QualityValues object
+ /// \returns InsertionQVs as QualityValues object
///
- QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \brief Fetch this record's DeletionTag values ("dt" tag).
+ /// \brief Fetches this record's LabelQV values ("pq" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// gap chars will be '-' and padding chars will be '*'.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
///
/// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
///
- /// \returns DeletionTag string
+ /// \returns LabelQV as QualityValues object
///
- std::string DeletionTag(Orientation orientation = Orientation::NATIVE,
+ QualityValues LabelQV(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's MergeQV values ("mq" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns MergeQV as QualityValues object
+ ///
+ QualityValues MergeQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's %BAM quality values (QUAL field).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns %BAM qualities as QualityValues object
+ ///
+ QualityValues Qualities(Orientation orientation = Orientation::NATIVE,
bool aligned = false,
bool exciseSoftClips = false) const;
- /// \brief Fetch this record's InsertionQV values ("iq" tag).
+ /// \brief Fetches this record's SubstitutionQV values ("sq" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
///
- /// \returns InsertionQVs as QualityValues object
+ /// \returns SubstitutionQV as QualityValues object
///
- QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \brief Fetch this record's IPD values ("ip" tag).
+ /// \}
+
+public:
+ /// \name Pulse Data
+ /// \{
+
+ /// \brief Fetches this record's IPD values ("ip" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// frames will have a value of 0;
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new frames will have a value of 0;
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
///
/// \returns IPD as Frames object
///
@@ -365,432 +633,514 @@ public:
bool aligned = false,
bool exciseSoftClips = false) const;
- /// \brief Fetch this record's PreBaseFrames aka IPD values ("ip" tag).
- ///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// frames will have a value of 0;
+ /// \brief Fetches this record's IPD values ("ip" tag), but does not upscale.
///
/// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
- ///
/// \returns IPD as Frames object
///
- Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const;
- /// \brief Fetch this record's IPD values ("ip" tag), but does not upscale.
+ /// \brief Fetches this record's Pkmean values ("pa" tag).
///
/// \param[in] orientation Orientation of output.
+ /// \returns Pkmean as vector<float> object
///
- /// \returns IPD as Frames object
+ std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's Pkmid values ("pm" tag).
///
- Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const;
+ /// \param[in] orientation Orientation of output.
+ /// \returns Pkmid as vector<float> object
+ ///
+ std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE) const;
- /// \brief Fetch this record's LabelQV values ("pq" tag).
+ /// \brief Fetches this record's Pkmean2 values ("pi" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns Pkmean as vector<float> object
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ std::vector<float> Pkmean2(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's Pkmid2 values ("ps" tag).
///
/// \param[in] orientation Orientation of output.
+ /// \returns Pkmid as vector<float> object
///
- /// \returns LabelQV as QualityValues object
+ std::vector<float> Pkmid2(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's PreBaseFrames aka IPD values ("ip" tag).
///
- QualityValues LabelQV(Orientation orientation = Orientation::NATIVE) const;
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new frames will have a value of 0;
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns IPD as Frames object
+ ///
+ Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \brief Fetch this record's MergeQV values ("mq" tag).
+ /// \brief Fetches this record's PrePulseFrames values ("pd" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ /// \param[in] orientation Orientation of output.
+ /// \returns PrePulseFrames as Frames object
+ ///
+ Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's PulseCall values ("pc" tag).
///
/// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ /// \returns PulseCalls string
///
- /// \returns MergeQV as QualityValues object
+ std::string PulseCall(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's PulseCallWidth values ("px" tag).
///
- QualityValues MergeQV(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ /// \param[in] orientation Orientation of output.
+ /// \returns PulseCallWidth as Frames object
+ ///
+ Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE) const;
/// \brief Fetch this record's PulseMergeQV values ("pg" tag).
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
- ///
/// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
- ///
/// \returns PulseMergeQV as QualityValues object
///
QualityValues PulseMergeQV(Orientation orientation = Orientation::NATIVE) const;
- /// \brief Fetch this record's Pkmean values ("pa" tag).
+ /// \brief Fetches this record's PulseWidth values ("pw" tag).
///
- /// \param[in] orientation Orientation of output.
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new frames will have a value of 0.
///
- /// \returns Pkmean as vector<float> object
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
///
- std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE) const;
+ /// \returns PulseWidths as Frames object
+ ///
+ Frames PulseWidth(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
- /// \brief Fetch this record's Pkmid values ("pm" tag).
+ /// \brief Fetches this record's PulseWidth values ("pw" tag), but does not
+ /// upscale.
///
/// \param[in] orientation Orientation of output.
+ /// \returns PulseWidth as Frames object
///
- /// \returns Pkmid as vector<float> object
- ///
- std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE) const;
+ Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE) const;
- /// \brief Fetch this record's PrePulseFrames values ("pd" tag).
- ///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// frames will have a value of 0;
+ /// \brief Fetches this record's StartFrame values ("sf" tag).
///
- /// \param[in] orientation Orientation of output.
+ /// \param[in] orientation Orientation of output
///
- /// \returns PrePulseFrames as Frames object
+ /// \returns StartFrame as uint32_t vector
///
- Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE) const;
+ std::vector<uint32_t> StartFrame(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \}
+
+public:
+ /// \name Low-Level Access & Operations
+ /// \{
- /// \brief Fetch this record's PulseCall values ("pc" tag).
+ /// \warning This method should be considered temporary and avoided as much
+ /// as possible. Direct access to the internal object is likely to
+ /// disappear as BamRecord interface matures.
///
- /// \param[in] orientation Orientation of output.
+ /// \returns const reference to underlying BamRecordImpl object
///
- /// \returns PulseCalls string
+ const BamRecordImpl& Impl(void) const;
+
+ /// \warning This method should be considered temporary and avoided as much
+ /// as possible. Direct access to the internal object is likely to
+ /// disappear as BamRecord interface matures.
///
- std::string PulseCall(Orientation orientation = Orientation::NATIVE) const;
+ /// \returns reference to underlying BamRecordImpl object
+ ///
+ BamRecordImpl& Impl(void);
+
+ /// \}
+
+public:
+ /// \name General Data
+ /// \{
- /// \brief Fetch this record's PulseCallWidth values ("px" tag).
+ /// \brief Sets this record's ZMW hole number.
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// frames will have a value of 0;
+ /// \param[in] holeNumber
+ /// \returns reference to this record
///
- /// \param[in] orientation Orientation of output.
+ BamRecord& HoleNumber(const int32_t holeNumber);
+
+ /// \brief Sets this record's local context flags
///
- /// \returns PulseCallWidth as Frames object
+ /// \param[in] flags
+ /// \returns reference to this record
///
- Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE) const;
+ BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags);
- /// \brief Fetch this record's PulseWidth values ("pw" tag).
+ /// \brief Sets this record's "number of complete passes of the insert".
+ ///
+ /// \param[in] numPasses
+ /// \returns reference to this record
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// frames will have a value of 0.
+ BamRecord& NumPasses(const int32_t numPasses);
+
+ /// \brief Sets this record's query end position.
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ /// \note Changing this will modify the name of non-CCS records.
///
- /// \returns PulseWidths as Frames object
+ /// \param[in] pos
+ /// \returns reference to this record
///
- Frames PulseWidth(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ BamRecord& QueryEnd(const PacBio::BAM::Position pos);
- /// \brief Fetch this record's PulseWidth values ("pw" tag), but does not upscale.
+ /// \brief Sets this record's query start position.
///
- /// \param[in] orientation Orientation of output.
+ /// \note Changing this will modify the name of non-CCS records.
///
- /// \returns PulseWidth as Frames object
+ /// \param[in] pos
+ /// \returns reference to this record
///
- Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE) const;
+ BamRecord& QueryStart(const PacBio::BAM::Position pos);
- /// \brief Fetch this record's BAM quality values (QUAL field).
+ /// \brief Sets this record's expected read accuracy [0, 1000]
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ /// \param[in] accuracy
+ /// \returns reference to this record
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ BamRecord& ReadAccuracy(const Accuracy& accuracy);
+
+ /// \brief Attaches this record to the provided read group, changing the
+ /// record name & 'RG' tag.
///
- /// \returns BAM qualities as QualityValues object
+ /// \param[in] rg
+ /// \returns reference to this record
///
- QualityValues Qualities(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ BamRecord& ReadGroup(const ReadGroupInfo& rg);
- /// \brief Fetch this record's DNA sequence (SEQ field).
+ /// \brief Attaches this record to the provided read group, changing the
+ /// record name & 'RG' tag.
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// gap chars will be '-' and padding chars will be '*'.
+ /// \param[in] id
+ /// \returns reference to this record
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ BamRecord& ReadGroupId(const std::string& id);
+
+ /// \brief Sets this scrap record's ScrapRegionType
///
- /// \returns sequence string
+ /// \param[in] type
+ /// \returns reference to this record
///
- std::string Sequence(const Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ BamRecord& ScrapRegionType(const VirtualRegionType type);
- /// \brief Fetch this record's SubstitutionQV values ("sq" tag).
+ /// \brief Sets this scrap record's ScrapRegionType
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// QVs will have a value of 0.
+ /// \param[in] type character equivalent of VirtualRegionType
+ /// \returns reference to this record
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ BamRecord& ScrapRegionType(const char type);
+
+ /// \brief Sets this scrap record's ScrapZmwType
///
- /// \returns SubstitutionQV as QualityValues object
+ /// \param[in] type
+ /// \returns reference to this record
///
- QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ BamRecord& ScrapZmwType(const ZmwType type);
- /// \brief Fetch this record's SubstitutionTag values ("st" tag).
+ /// \brief Sets this scrap record's ScrapZmwType
///
- /// \note If \p aligned is true, and gaps/padding need to be inserted, the new
- /// gap chars will be '-' and padding chars will be '*'.
+ /// \param[in] type character equivalent of ZmwType
+ /// \returns reference to this record
///
- /// \param[in] orientation Orientation of output.
- /// \param[in] aligned if true, gaps/padding will be inserted, per Cigar info.
- /// \param[in] exciseSoftClips if true, any soft-clipped positions will be removed from query ends
+ BamRecord& ScrapZmwType(const char type);
+
+ /// \brief Sets this record's average signal-to-noise in each of A, C, G,
+ /// and T
///
- /// \returns SubstitutionTags string
+ /// \param[in] snr average signal-to-noise of A, C, G, and T (in this order)
+ /// \returns reference to this record
///
- std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE,
- bool aligned = false,
- bool exciseSoftClips = false) const;
+ BamRecord& SignalToNoise(const std::vector<float>& snr);
/// \}
public:
- /// \name Low-Level
+ /// \name Barcode Data
/// \{
- /// \warning This method should be considered temporary and avoided as much as possible.
- /// Direct access to the internal object is likely to disappear as BamRecord interface matures.
+ /// \brief Sets this record's barcode IDs ('bc' tag)
///
- /// \returns const reference to underlying BamRecordImpl object
- const BamRecordImpl& Impl(void) const;
+ /// \param[in] barcodeIds
+ /// \returns reference to this record
+ ///
+ BamRecord& Barcodes(const std::pair<uint16_t,uint16_t>& barcodeIds);
- /// \warning This method should be considered temporary and avoided as much as possible.
- /// Direct access to the internal object is likely to disappear as BamRecord interface matures.
+ /// \brief Sets this record's barcode quality ('bq' tag)
///
- /// \returns reference to underlying BamRecordImpl object
- BamRecordImpl& Impl(void);
+ /// \param[in] quality Phred-scaled confidence call
+ /// \returns reference to this record
+ ///
+ BamRecord& BarcodeQuality(const uint8_t quality);
/// \}
public:
- /// \name Per-Record Data
+ /// \name Sequence & Tag Data
/// \{
+
+ /// \brief Sets this record's AltLabelTag values ("at" tag).
+ ///
+ /// \param[in] tags
+ /// \returns reference to this record
///
+ BamRecord& AltLabelTag(const std::string& tags);
- /// Sets this record's ZMW hole number.
+ /// \brief Sets this record's DeletionTag values ("dt" tag).
///
- /// \param[in] numPasses
+ /// \param[in] tags
/// \returns reference to this record
- BamRecord& HoleNumber(const int32_t holeNumber);
+ ///
+ BamRecord& DeletionTag(const std::string& tags);
- /// Sets this record's local context flags
+ /// \brief Sets this record's SubstitutionTag values ("st" tag).
///
- /// \param[in] flags
+ /// \param[in] tags
/// \returns reference to this record
- BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags);
+ ///
+ BamRecord& SubstitutionTag(const std::string& tags);
+
+ /// \}
+
+public:
+ /// \name Quality Data
+ /// \{
- /// Sets this record's "number of complete passes of the insert".
+ /// \brief Sets this record's AltLabelQV values ("pv" tag).
///
- /// \param[in] numPasses
+ /// \param[in] altLabelQVs
/// \returns reference to this record
- BamRecord& NumPasses(const int32_t numPasses);
+ ///
+ BamRecord& AltLabelQV(const QualityValues& altLabelQVs);
- /// Sets this record's expected read accuracy [0, 1000]
+ /// \brief Sets this record's DeletionQV values ("dq" tag).
///
- /// \param[in] accuracy
+ /// \param[in] deletionQVs
/// \returns reference to this record
- BamRecord& ReadAccuracy(const Accuracy& accuracy);
+ ///
+ BamRecord& DeletionQV(const QualityValues& deletionQVs);
- /// Sets this record's average signal-to-noise in each of A, C, G, and T
+ /// \brief Sets this record's InsertionQV values ("iq" tag).
///
- /// \param[in] average signal-to-noise of A, C, G, and T (in this order)
+ /// \param[in] insertionQVs
/// \returns reference to this record
- BamRecord& SignalToNoise(const std::vector<float>& snr);
+ ///
+ BamRecord& InsertionQV(const QualityValues& insertionQVs);
+
+ /// \brief Sets this record's LabelQV values ("pq" tag).
+ ///
+ /// \param[in] labelQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& LabelQV(const QualityValues& labelQVs);
- /// Sets this scrap record's ScrapType
+ /// \brief Sets this record's MergeQV values ("mq" tag).
///
- /// \param[in] ScrapType of type VirtualRegionType
+ /// \param[in] mergeQVs
/// \returns reference to this record
- BamRecord& ScrapType(const VirtualRegionType type);
+ ///
+ BamRecord& MergeQV(const QualityValues& mergeQVs);
- /// Sets this scrap record's ScrapType
+ /// \brief Sets this record's SubstitutionQV values ("sq" tag).
///
- /// \param[in] ScrapType as char
+ /// \param[in] substitutionQVs
/// \returns reference to this record
- BamRecord& ScrapType(const char type);
+ ///
+ BamRecord& SubstitutionQV(const QualityValues& substitutionQVs);
/// \}
public:
- /// \name Per-Base Data
+ /// \name Pulse Data
/// \{
- /// Sets this record's AltLabelQV values ("pv" tag).
+ /// \brief Sets this record's IPD values ("ip" tag).
///
- /// \param[in] altLabelQVs
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
/// \returns reference to this record
- BamRecord& AltLabelQV(const QualityValues& altLabelQVs);
-
- /// Sets this record's LabelQV values ("pq" tag).
///
- /// \param[in] labelQVs
- /// \returns reference to this record
- BamRecord& LabelQV(const QualityValues& labelQVs);
+ BamRecord& IPD(const Frames& frames,
+ const FrameEncodingType encoding);
- /// Sets this record's DeletionQV values ("dq" tag).
+ /// \brief Sets this record's Pkmean values ("pm" tag).
///
- /// \param[in] deletionQVs
+ /// \param[in] photons
/// \returns reference to this record
- BamRecord& DeletionQV(const QualityValues& deletionQVs);
-
- /// Sets this record's DeletionTag values ("dt" tag).
///
- /// \param[in] tags
- /// \returns reference to this record
- BamRecord& DeletionTag(const std::string& tags);
+ BamRecord& Pkmean(const std::vector<float>& photons);
- /// Sets this record's InsertionQV values ("iq" tag).
+ /// \brief Sets this record's Pkmean values ("pm" tag).
///
- /// \param[in] insertionQVs
+ /// \param[in] encodedPhotons
/// \returns reference to this record
- BamRecord& InsertionQV(const QualityValues& insertionQVs);
+ ///
+ BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons);
- /// Sets this record's Pkmid values ("pa" tag).
+ /// \brief Sets this record's Pkmid values ("pa" tag).
///
/// \param[in] photons
/// \returns reference to this record
+ ///
BamRecord& Pkmid(const std::vector<float>& photons);
- /// Sets this record's Pkmid values ("pa" tag).
+ /// \brief Sets this record's Pkmid values ("pa" tag).
///
- /// \param[in] encoded photons
+ /// \param[in] encodedPhotons
/// \returns reference to this record
+ ///
BamRecord& Pkmid(const std::vector<uint16_t>& encodedPhotons);
- /// Sets this record's Pkmean values ("pm" tag).
+ /// \brief Sets this record's Pkmean2 values ("ps" tag).
///
/// \param[in] photons
/// \returns reference to this record
- BamRecord& Pkmean(const std::vector<float>& photons);
+ ///
+ BamRecord& Pkmean2(const std::vector<float>& photons);
- /// Sets this record's Pkmean values ("pm" tag).
+ /// \brief Sets this record's Pkmean2 values ("ps" tag).
///
- /// \param[in] encoded photons
+ /// \param[in] encodedPhotons
/// \returns reference to this record
- BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons);
+ ///
+ BamRecord& Pkmean2(const std::vector<uint16_t>& encodedPhotons);
- /// Sets this record's IPD values ("ip" tag).
+ /// \brief Sets this record's Pkmid2 values ("pi" tag).
///
- /// \param[in] frames
- /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+ /// \param[in] photons
/// \returns reference to this record
- BamRecord& IPD(const Frames& frames,
- const FrameEncodingType encoding);
+ ///
+ BamRecord& Pkmid2(const std::vector<float>& photons);
+
+ /// \brief Sets this record's Pkmid2 values ("pi" tag).
+ ///
+ /// \param[in] encodedPhotons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmid2(const std::vector<uint16_t>& encodedPhotons);
- /// Sets this record's PreBaseFrames aka IPD values ("ip" tag).
+ /// \brief Sets this record's PreBaseFrames aka IPD values ("ip" tag).
///
/// \param[in] frames
- /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
/// \returns reference to this record
+ ///
BamRecord& PreBaseFrames(const Frames& frames,
const FrameEncodingType encoding);
- /// Sets this record's PrePulseFrames values ("pd" tag).
+ /// \brief Sets this record's PrePulseFrames values ("pd" tag).
///
/// \param[in] frames
- /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
/// \returns reference to this record
+ ///
BamRecord& PrePulseFrames(const Frames& frames,
const FrameEncodingType encoding);
- /// Sets this record's PulseCallWidth values ("px" tag).
+ /// \brief Sets this record's PulseCall values ("pc" tag).
///
- /// \param[in] frames
- /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+ /// \param[in] tags
/// \returns reference to this record
- BamRecord& PulseCallWidth(const Frames& frames,
- const FrameEncodingType encoding);
+ ///
+ BamRecord& PulseCall(const std::string& tags);
- /// Sets this record's MergeQV values ("mq" tag).
+ /// \brief Sets this record's PulseCallWidth values ("px" tag).
///
- /// \param[in] mergeQVs
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
/// \returns reference to this record
- BamRecord& MergeQV(const QualityValues& mergeQVs);
+ ///
+ BamRecord& PulseCallWidth(const Frames& frames,
+ const FrameEncodingType encoding);
- /// Sets this record's PulseMergeQV values ("pg" tag).
+ /// \brief Sets this record's PulseMergeQV values ("pg" tag).
///
/// \param[in] pulseMergeQVs
/// \returns reference to this record
+ ///
BamRecord& PulseMergeQV(const QualityValues& pulseMergeQVs);
- /// Sets this record's PulseWidth values ("pw" tag).
+ /// \brief Sets this record's PulseWidth values ("pw" tag).
///
/// \param[in] frames
- /// \param[in] encoding specify how to encode the data (8-bit lossy, or 16-bit lossless)
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
/// \returns reference to this record
+ ///
BamRecord& PulseWidth(const Frames& frames,
const FrameEncodingType encoding);
- /// Sets this record's SubstitutionQV values ("sq" tag).
+ /// \brief Sets this record's StartFrame values ("sf" tag).
///
- /// \param[in] substitutionQVs
+ /// \param[in] startFrame
/// \returns reference to this record
- BamRecord& SubstitutionQV(const QualityValues& substitutionQVs);
+ ///
+ BamRecord& StartFrame(const std::vector<uint32_t>& startFrame);
+
+ /// \}
+
+public:
+ /// \name Low-Level Access & Operations
+ /// \{
- /// Sets this record's SubstitutionTag values ("st" tag).
+ /// \brief Resets cached aligned start/end.
///
- /// \param[in] tags
- /// \returns reference to this record
- BamRecord& SubstitutionTag(const std::string& tags);
+ /// \note This method should not be needed in most client code. It exists
+ /// primarily as a hook for internal reading loops (queries, index
+ /// build, etc.) It's essentially a workaround and will likely be
+ /// removed from the API.
+ ///
+ void ResetCachedPositions(void) const;
- /// Sets this record's AltLabelTag values ("at" tag).
+ /// \brief Resets cached aligned start/end.
///
- /// \param[in] tags
- /// \returns reference to this record
- BamRecord& AltLabelTag(const std::string& tags);
+ /// \note This method should not be needed in most client code. It exists
+ /// primarily as a hook for internal reading loops (queries, index
+ /// build, etc.) It's essentially a workaround and will likely be
+ /// removed from the API.
+ ///
+ void ResetCachedPositions(void);
- /// Sets this record's PulseCall values ("pc" tag).
+ /// \brief Updates the record's name (BamRecord::FullName) to reflect
+ /// modifications to name components (movie name, ZMW hole number,
+ /// etc.)
///
- /// \param[in] tags
- /// \returns reference to this record
- BamRecord& PulseCall(const std::string& tags);
+ void UpdateName(void);
/// \}
public:
- BamRecord& QueryEnd(const PacBio::BAM::Position pos);
- BamRecord& QueryStart(const PacBio::BAM::Position pos);
-
- /// Resets cached aligned start/end.
- ///
- /// \note This method should not be needed in most client code. It exists
- /// primarily as a hook for internal reading loops (queries, index build, etc.)
- /// It's essentially a workaround and will likely be removed from the API as
- /// soon as possible.
- ///
- void ResetCachedPositions(void) const;
-
- /// Resets cached aligned start/end.
- ///
- /// \note This method should not be needed in most client code. It exists
- /// primarily as a hook for internal reading loops (queries, index build, etc.)
- /// It's essentially a workaround and will likely be removed from the API as
- /// soon as possible.
- ///
- void ResetCachedPositions(void);
-
- void UpdateName(void);
+ /// \name Pulse Data
+ /// \{
- static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data);
+ static const float photonFactor;
- BamRecord& ReadGroup(const ReadGroupInfo& rg);
- BamRecord& ReadGroupId(const std::string& id);
+ static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data);
-// BamRecord& ReferenceStart(const PacBio::BAM::Position pos);
+ /// \}
public:
/// \name Clipping & Mapping
@@ -833,22 +1183,24 @@ public:
const Strand strand,
const Cigar& cigar,
const uint8_t mappingQuality) const;
-
/// \}
+
private:
BamRecordImpl impl_;
public:
- // public & mutable so that queries can directly set the header info,
- // even on a record that is const from client code's perspective
+ /// public & mutable so that queries can directly set the header info,
+ /// even on a record that is const from client code's perspective
mutable BamHeader header_;
-
+
private:
- // cached positions (mutable to allow lazy-calc in const methods)
+ /// \internal
+ /// cached positions (mutable to allow lazy-calc in const methods)
mutable Position alignedStart_;
mutable Position alignedEnd_;
private:
+ /// \internal
std::vector<float> FetchPhotons(const std::string& tagName,
const Orientation orientation) const;
std::string FetchBasesRaw(const std::string& tagName) const;
@@ -889,121 +1241,109 @@ private:
friend class internal::BamRecordMemory;
};
-inline
-BamRecord BamRecord::Clipped(const BamRecord& input,
- const ClipType clipType,
- const PacBio::BAM::Position start,
- const PacBio::BAM::Position end)
-{
- return input.Clipped(clipType, start, end);
-}
-
-inline
-BamRecord BamRecord::Clipped(const ClipType clipType,
- const PacBio::BAM::Position start,
- const PacBio::BAM::Position end) const
-{
- BamRecord result(*this);
- result.Clip(clipType, start, end);
- return result;
-}
-
-inline
-BamRecord BamRecord::Mapped(const BamRecord& input,
- const int32_t referenceId,
- const Position refStart,
- const Strand strand,
- const Cigar& cigar,
- const uint8_t mappingQuality)
-{
- return input.Mapped(referenceId, refStart, strand, cigar, mappingQuality);
-}
-
-inline
-BamRecord BamRecord::Mapped(const int32_t referenceId,
- const Position refStart,
- const Strand strand,
- const Cigar& cigar,
- const uint8_t mappingQuality) const
-{
- BamRecord result(*this);
- result.Map(referenceId, refStart, strand, cigar, mappingQuality);
- return result;
-}
-
+/// \brief Provides a re-usable "view" onto a BamRecord
+///
+/// This class acts a convenience wrapper for working with per-base BamRecord
+/// data. Most of these BamRecord methods take a list of parameters, to adjust
+/// how the underlying data are presented to client code. Often these parameters
+/// will be re-used for each BamRecord method call. Thus, to simplify such
+/// client code, a BamRecordView can be used to state those parameters once, and
+/// then simply request the desired fields.
+///
+/// \internal
+/// \todo Sync up method names with BamRecord
+/// \endinternal
+///
class PBBAM_EXPORT BamRecordView
{
public:
+ /// \brief Constructs a view onto \p record using the supplied parameters.
+ ///
+ /// For frame or QV data, if \p aligned is true, a value of 0 (Accuracy or
+ /// QualityValue) will be used at each inserted or padded base location.
+ ///
+ /// \param[in] record BamRecord data source.
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
BamRecordView(const BamRecord& record,
const Orientation orientation,
const bool aligned,
- const bool exciseSoftClips)
- : record_(record)
- , orientation_(orientation)
- , aligned_(aligned)
- , exciseSoftClips_(exciseSoftClips)
- { }
+ const bool exciseSoftClips);
public:
- QualityValues AltLabelQVs(void) const
- { return record_.AltLabelQV(orientation_); }
- std::string AltLabelTags(void) const
- { return record_.AltLabelTag(orientation_); }
+ /// \returns BamRecord::AltLabelQV with this view's parameters applied
+ QualityValues AltLabelQVs(void) const;
+
+ /// \returns BamRecord::AltLabelTag with this view's parameters applied
+ std::string AltLabelTags(void) const;
- QualityValues DeletionQVs(void) const
- { return record_.DeletionQV(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::DeletionQV with this view's parameters applied
+ QualityValues DeletionQVs(void) const;
- std::string DeletionTags(void) const
- { return record_.DeletionTag(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::DeletionTag with this view's parameters applied
+ std::string DeletionTags(void) const;
- QualityValues InsertionQVs(void) const
- { return record_.InsertionQV(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::InsertionQV with this view's parameters applied
+ QualityValues InsertionQVs(void) const;
- Frames IPD(void) const
- { return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::IPD with this view's parameters applied
+ Frames IPD(void) const;
- Frames PrebaseFrames(void) const
- { return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::LabelQV with this view's parameters applied
+ QualityValues LabelQVs(void) const;
- QualityValues LabelQVs(void) const
- { return record_.LabelQV(orientation_); }
+ /// \returns BamRecord::MergeQV with this view's parameters applied
+ QualityValues MergeQVs(void) const;
- QualityValues MergeQVs(void) const
- { return record_.MergeQV(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::PulseMergeQV with this view's parameters applied
+ QualityValues PulseMergeQVs(void) const;
- QualityValues PulseMergeQVs(void) const
- { return record_.PulseMergeQV(orientation_); }
+ /// \returns BamRecord::Pkmean with this view's parameters applied
+ std::vector<float> Pkmean(void) const;
- std::vector<float> Pkmean(void) const
- { return record_.Pkmean(orientation_); }
+ /// \returns BamRecord::Pkmid with this view's parameters applied
+ std::vector<float> Pkmid(void) const;
- std::vector<float> Pkmid(void) const
- { return record_.Pkmid(orientation_); }
+ /// \returns BamRecord::Pkmean2 with this view's parameters applied
+ std::vector<float> Pkmean2(void) const;
- Frames PrePulseFrames(void) const
- { return record_.PrePulseFrames(orientation_); }
+ /// \returns BamRecord::Pkmid2 with this view's parameters applied
+ std::vector<float> Pkmid2(void) const;
- std::string PulseCalls(void) const
- { return record_.PulseCall(orientation_); }
+ /// \returns BamRecord::PreBaseFrames with this view's parameters applied
+ Frames PrebaseFrames(void) const;
- Frames PulseCallWidth(void) const
- { return record_.PulseCallWidth(orientation_); }
+ /// \returns BamRecord::PrePulseFrames with this view's parameters applied
+ Frames PrePulseFrames(void) const;
- Frames PulseWidths(void) const
- { return record_.PulseWidth(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::PulseCalls with this view's parameters applied
+ std::string PulseCalls(void) const;
- QualityValues Qualities(void) const
- { return record_.Qualities(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::PulseCallWidth with this view's parameters applied
+ Frames PulseCallWidth(void) const;
- std::string Sequence(void) const
- { return record_.Sequence(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::PulseWidths with this view's parameters applied
+ Frames PulseWidths(void) const;
- QualityValues SubstitutionQVs(void) const
- { return record_.SubstitutionQV(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::Qualities with this view's parameters applied
+ QualityValues Qualities(void) const;
- std::string SubstitutionTags(void) const
- { return record_.SubstitutionTag(orientation_, aligned_, exciseSoftClips_); }
+ /// \returns BamRecord::Sequence with this view's parameters applied
+ std::string Sequence(void) const;
+
+ /// \returns BamRecord::StartFrame with this view's parameters applied
+ std::vector<uint32_t> StartFrames(void) const;
+
+ /// \returns BamRecord::SubstitutionQV with this view's parameters applied
+ QualityValues SubstitutionQVs(void) const;
+
+ /// \returns BamRecord::SubstitutionTag with this view's parameters applied
+ std::string SubstitutionTags(void) const;
private:
const BamRecord& record_;
@@ -1015,4 +1355,6 @@ private:
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/BamRecord.inl"
+
#endif // BAMRECORD_H
diff --git a/include/pbbam/BamRecordBuilder.h b/include/pbbam/BamRecordBuilder.h
index 81002da..c6ff877 100644
--- a/include/pbbam/BamRecordBuilder.h
+++ b/include/pbbam/BamRecordBuilder.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecordBuilder.h
+/// \brief Defines the BamRecordBuilder class.
+//
// Author: Derek Barnett
#ifndef BAMRECORDBUILDER_H
@@ -46,21 +50,37 @@
namespace PacBio {
namespace BAM {
-class PBBAM_EXPORT BamImplBuilder
-{
-
-};
-
-
+/// \brief The BamRecordBuilder class provides a helper utility for building
+/// BamRecords.
+///
+/// This class provides a mechanism for building up %BAM data and
+/// lazy-encoding/constructing the actual BamRecord. Currently, the methods here
+/// really only support filling in the low-level SAM/BAM-style fields, not so
+/// much the PacBio-specific fields.
+///
class PBBAM_EXPORT BamRecordBuilder
{
public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Creates an empty %BAM record builder.
BamRecordBuilder(void);
+
+ /// \brief Creates an empty %BAM record builder, with header info to apply
+ /// to built records.
+ ///
+ /// \param[in] header BamHeader object
+ ///
explicit BamRecordBuilder(const BamHeader& header);
+
+ /// \brief Creates record builder with inital record data.
+ ///
+ /// \param[in] prototype data from this record will be used to seed the
+ /// builder
+ ///
BamRecordBuilder(const BamRecord& prototype);
+
BamRecordBuilder(const BamRecordBuilder& other);
BamRecordBuilder(BamRecordBuilder&& other);
BamRecordBuilder& operator=(const BamRecordBuilder& other);
@@ -73,28 +93,34 @@ public:
/// \name Record-Building
/// \{
- /// Builds a BamRecord from current builder attributes
+ /// \brief Builds a BamRecord from current builder attributes.
+ ///
+ /// \returns newly-built BamRecord object
///
- /// \returns BamRecord object
BamRecord Build(void) const;
- /// Replaces an existing BamRecord's data with current builder attributes
+ /// \brief Replaces an existing BamRecord's data with current builder
+ /// attributes.
///
/// \param[out] record resulting record
/// \returns true if successful
+ ///
bool BuildInPlace(BamRecord& record) const;
- /// Resets builder attributes to default values
+ /// \brief Resets builder attributes to default values.
+ ///
void Reset(void);
- /// Resets builder attributes with existing BamRecord data
+ /// \brief Resets builder attributes with \p prototype's data.
///
/// \param[in] prototype
+ ///
void Reset(const BamRecord& prototype);
- /// Resets builder attributes with existing BamRecord data
+ /// \brief Resets builder attributes with \p prototype's data.
///
/// \param[in] prototype
+ ///
void Reset(BamRecord&& prototype);
/// \}
@@ -104,52 +130,60 @@ public:
/// \name Core Attribute Setup
/// \{
- /// Sets the record's (BAI) index bin ID.
+ /// \brief Sets the record's (BAI) index bin ID.
///
/// \param[in] bin BAI index bin ID.
/// \returns reference to this builder
+ ///
BamRecordBuilder& Bin(const uint32_t bin);
- /// Sets this record's alignment flag, using a raw integer.
+ /// \brief Sets this record's alignment flag, using a raw integer.
///
/// \param[in] flag raw alignment flag
/// \returns reference to this record
+ ///
BamRecordBuilder& Flag(const uint32_t flag);
- /// Sets this record's insert size.
+ /// \brief Sets this record's insert size.
///
/// \param[in] iSize insert size
/// \returns reference to this record
+ ///
BamRecordBuilder& InsertSize(const int32_t iSize);
- /// Sets this record's map quality.
+ /// \brief Sets this record's map quality.
///
/// \param[in] mapQual mapping quality - value of 255 indicates "unknown"
/// \returns reference to this record
+ ///
BamRecordBuilder& MapQuality(const uint8_t mapQual);
- /// Sets this record's mate's mapped position.
+ /// \brief Sets this record's mate's mapped position.
///
/// \param[in] pos mapped position. A value of -1 indicates unmapped.
/// \returns reference to this record
+ ///
BamRecordBuilder& MatePosition(const int32_t pos);
- /// Sets this record's mate's mapped reference ID
+ /// \brief Sets this record's mate's mapped reference ID
///
/// \param[in] id reference ID. A value of -1 indicates unmapped.
/// \returns reference to this record
+ ///
BamRecordBuilder& MateReferenceId(const int32_t id);
- /// Sets this record's mapped position.
+ /// \brief Sets this record's mapped position.
///
/// \param[in] pos mapped position. A value of -1 indicates unmapped.
/// \returns reference to this record
+ ///
BamRecordBuilder& Position(const int32_t pos);
- /// Sets this record's mapped reference ID
+ /// \brief Sets this record's mapped reference ID
///
/// \param[in] id reference ID. A value of -1 indicates unmapped.
/// \returns reference to this record
+ ///
BamRecordBuilder& ReferenceId(const int32_t id);
/// \}
@@ -158,40 +192,42 @@ public:
/// \name Alignment Flag Setup
/// \{
- /// Sets whether this record is a PCR/optical duplicate
+ /// \brief Sets whether this record is a PCR/optical duplicate
BamRecordBuilder& SetDuplicate(bool ok);
- /// Sets whether this record failed quality controls
+ /// \brief Sets whether this record failed quality controls
BamRecordBuilder& SetFailedQC(bool ok);
- /// Sets whether this record is the first mate of a pair.
+ /// \brief Sets whether this record is the first mate of a pair.
BamRecordBuilder& SetFirstMate(bool ok);
- /// Sets whether this record was aligned.
+ /// \brief Sets whether this record was aligned.
BamRecordBuilder& SetMapped(bool ok);
- /// Sets whether this record's mate was aligned.
+ /// \brief Sets whether this record's mate was aligned.
BamRecordBuilder& SetMateMapped(bool ok);
- /// Sets whether this record's mate mapped to reverse strand.
+ /// \brief Sets whether this record's mate mapped to reverse strand.
BamRecordBuilder& SetMateReverseStrand(bool ok);
- /// Sets whether this record came from paired-end sequencing.
+ /// \brief Sets whether this record came from paired-end sequencing.
BamRecordBuilder& SetPaired(bool ok);
- /// Sets whether this record is a read's primary alignment.
+ /// \brief Sets whether this record is a read's primary alignment.
BamRecordBuilder& SetPrimaryAlignment(bool ok);
- /// Sets whether this record & its mate were properly mapped, per the aligner.
+ /// \brief Sets whether this record & its mate were properly mapped, per the
+ /// aligner.
+ ///
BamRecordBuilder& SetProperPair(bool ok);
- /// Sets whether this record mapped to reverse strand.
+ /// \brief Sets whether this record mapped to reverse strand.
BamRecordBuilder& SetReverseStrand(bool ok);
- /// Sets whether this record is the second mate of a pair.
+ /// \brief Sets whether this record is the second mate of a pair.
BamRecordBuilder& SetSecondMate(bool ok);
- /// Sets whether this record is a supplementary alignment.
+ /// \brief Sets whether this record is a supplementary alignment.
BamRecordBuilder& SetSupplementaryAlignment(bool ok);
/// \}
@@ -200,24 +236,70 @@ public:
/// \name Variable-Length Data Setup
/// \{
+ /// \brief Sets the record's CIGAR data.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Cigar(const PacBio::BAM::Cigar& cigar);
+
+ /// \brief Sets the record's CIGAR data.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Cigar(PacBio::BAM::Cigar&& cigar);
+
+ /// \brief Sets the record's name.
+ ///
+ /// \returns reference to this builder
+ ///
BamRecordBuilder& Name(const std::string& name);
- BamRecordBuilder& Name(std::string&& name);
- BamRecordBuilder& Sequence(const std::string& sequence);
- BamRecordBuilder& Sequence(std::string&& sequence);
+ /// \brief Sets the record's name.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Name(std::string&& name);
+ /// \brief Sets the record's qualities.
+ ///
+ /// \returns reference to this builder
+ ///
BamRecordBuilder& Qualities(const std::string& qualities);
+
+ /// \brief Sets the record's qualities.
+ ///
+ /// \returns reference to this builder
+ ///
BamRecordBuilder& Qualities(std::string&& qualities);
- BamRecordBuilder& Cigar(const PacBio::BAM::Cigar& cigar);
- BamRecordBuilder& Cigar(PacBio::BAM::Cigar&& cigar);
+ /// \brief Sets the record's sequence.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Sequence(const std::string& sequence);
+ /// \brief Sets the record's sequence.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Sequence(std::string&& sequence);
+
+ /// \brief Sets the record's tags.
+ ///
+ /// \returns reference to this builder
+ ///
BamRecordBuilder& Tags(const TagCollection& tags);
+
+ /// \brief Sets the record's tags.
+ ///
+ /// \returns reference to this builder
+ ///
BamRecordBuilder& Tags(TagCollection&& tags);
+ /// \}
+
private:
BamHeader header_;
-
bam1_core_t core_;
std::string name_;
std::string sequence_;
@@ -226,43 +308,9 @@ private:
TagCollection tags_;
};
-inline BamRecordBuilder& BamRecordBuilder::Bin(const uint32_t bin)
-{ core_.bin = bin; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Flag(const uint32_t flag)
-{ core_.flag = flag; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::InsertSize(const int32_t iSize)
-{ core_.isize = iSize; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::MapQuality(const uint8_t mapQual)
-{ core_.qual = mapQual; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::MatePosition(const int32_t pos)
-{ core_.mpos = pos; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::MateReferenceId(const int32_t id)
-{ core_.mtid = id; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Position(const int32_t pos)
-{ core_.pos = pos; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Qualities(const std::string& qualities)
-{ qualities_ = qualities; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Qualities(std::string&& qualities)
-{ qualities_ = std::move(qualities); return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::ReferenceId(const int32_t id)
-{ core_.tid = id; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Tags(const TagCollection& tags)
-{ tags_ = tags; return *this; }
-
-inline BamRecordBuilder& BamRecordBuilder::Tags(TagCollection&& tags)
-{ tags_ = std::move(tags); return *this; }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/BamRecordBuilder.inl"
+
#endif // BAMRECORDBUILDER_H
diff --git a/include/pbbam/BamRecordImpl.h b/include/pbbam/BamRecordImpl.h
index c42ef0a..a4f23b1 100644
--- a/include/pbbam/BamRecordImpl.h
+++ b/include/pbbam/BamRecordImpl.h
@@ -32,18 +32,23 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecordImpl.h
+/// \brief Defines the BamRecordImpl class.
+//
// Author: Derek Barnett
#ifndef BAMRECORDIMPL_H
#define BAMRECORDIMPL_H
-#include "htslib/sam.h"
#include "pbbam/Cigar.h"
#include "pbbam/Config.h"
#include "pbbam/Position.h"
#include "pbbam/QualityValues.h"
#include "pbbam/TagCollection.h"
+#include <htslib/sam.h>
+#include <map>
#include <string>
namespace PacBio {
@@ -52,10 +57,14 @@ namespace BAM {
namespace internal { class BamRecordMemory; }
/// \brief The BamRecordImpl class holds all data necessary for creating,
-/// querying or editing a valid, generic BAM record.
+/// querying or editing a generic %BAM record.
///
/// For PacBio-specific extensions and convenience methods, see BamRecord.
///
+/// \note This class is mostly an internal implementation detail and will
+/// likely be removed from the public API in the future. Please use
+/// BamRecord as much as possible.
+///
class PBBAM_EXPORT BamRecordImpl
{
public:
@@ -91,168 +100,172 @@ public:
/// \}
public:
-
- /** @name Core Data
- */
- ///@{
+ /// \name Core Data
+ /// \{
/// \returns this record's assigned (BAI) index bin ID.
- inline uint32_t Bin(void) const;
+ uint32_t Bin(void) const;
/// \returns this record's alignment flag, in raw integer form.
- inline uint32_t Flag(void) const;
+ uint32_t Flag(void) const;
/// \returns this record's insert size
- inline int32_t InsertSize(void) const;
+ int32_t InsertSize(void) const;
/// \returns this record's mapping quality. A value of 255 indicates "unknown"
- inline uint8_t MapQuality(void) const;
+ uint8_t MapQuality(void) const;
/// \returns this record's mate's mapped position, or -1 if unmapped
- inline PacBio::BAM::Position MatePosition(void) const;
+ PacBio::BAM::Position MatePosition(void) const;
/// \returns this record's mate's mapped reference ID, or -1 if unmapped
- inline int32_t MateReferenceId(void) const;
+ int32_t MateReferenceId(void) const;
/// \returns this record's mapped position, or -1 if unmapped
- inline PacBio::BAM::Position Position(void) const;
+ PacBio::BAM::Position Position(void) const;
/// \returns this record's mate's mapped reference ID, or -1 if unmapped
- inline int32_t ReferenceId(void) const;
+ int32_t ReferenceId(void) const;
/// Sets the record's (BAI) index bin ID.
///
/// \param[in] bin BAI index bin ID.
/// \returns reference to this record
- inline BamRecordImpl& Bin(uint32_t bin);
+ ///
+ BamRecordImpl& Bin(uint32_t bin);
/// Sets this record's alignment flag, using a raw integer.
///
/// \param[in] flag raw alignment flag
/// \returns reference to this record
- inline BamRecordImpl& Flag(uint32_t flag);
+ ///
+ BamRecordImpl& Flag(uint32_t flag);
/// Sets this record's insert size.
///
/// \param[in] iSize insert size
/// \returns reference to this record
- inline BamRecordImpl& InsertSize(int32_t iSize);
+ ///
+ BamRecordImpl& InsertSize(int32_t iSize);
/// Sets this record's map quality.
///
/// \param[in] mapQual mapping quality - value of 255 indicates "unknown"
/// \returns reference to this record
- inline BamRecordImpl& MapQuality(uint8_t mapQual);
+ ///
+ BamRecordImpl& MapQuality(uint8_t mapQual);
/// Sets this record's mate's mapped position.
///
/// \param[in] pos mapped position. A value of -1 indicates unmapped.
/// \returns reference to this record
- inline BamRecordImpl& MatePosition(PacBio::BAM::Position pos);
+ ///
+ BamRecordImpl& MatePosition(PacBio::BAM::Position pos);
/// Sets this record's mate's mapped reference ID
///
/// \param[in] id reference ID. A value of -1 indicates unmapped.
/// \returns reference to this record
- inline BamRecordImpl& MateReferenceId(int32_t id);
+ ///
+ BamRecordImpl& MateReferenceId(int32_t id);
/// Sets this record's mapped position.
///
/// \param[in] pos mapped position. A value of -1 indicates unmapped.
/// \returns reference to this record
- inline BamRecordImpl& Position(PacBio::BAM::Position pos);
+ ///
+ BamRecordImpl& Position(PacBio::BAM::Position pos);
/// Sets this record's mapped reference ID
///
/// \param[in] id reference ID. A value of -1 indicates unmapped.
/// \returns reference to this record
- inline BamRecordImpl& ReferenceId(int32_t id);
+ ///
+ BamRecordImpl& ReferenceId(int32_t id);
- ///@}
+ /// \}
public:
- /** @name Alignment Flags
- */
- ///@{
+ /// \name Alignment Flags
+ /// \{
/// \returns true if this record is a PCR/optical duplicate
- inline bool IsDuplicate(void) const;
+ bool IsDuplicate(void) const;
/// \returns true if this record failed quality controls
- inline bool IsFailedQC(void) const;
+ bool IsFailedQC(void) const;
/// \returns true if this record is the first mate of a pair
- inline bool IsFirstMate(void) const;
+ bool IsFirstMate(void) const;
/// \returns true if this record was mapped by aligner
- inline bool IsMapped(void) const;
+ bool IsMapped(void) const;
/// \returns true if this record's mate was mapped by aligner
- inline bool IsMateMapped(void) const;
+ bool IsMateMapped(void) const;
/// \returns true if this record's mate was mapped to the reverse strand
- inline bool IsMateReverseStrand(void) const;
+ bool IsMateReverseStrand(void) const;
/// \returns true if this record comes from paired-end sequencing
- inline bool IsPaired(void) const;
+ bool IsPaired(void) const;
/// \returns true if this record is a read's primary alignment
- inline bool IsPrimaryAlignment(void) const;
+ bool IsPrimaryAlignment(void) const;
/// \returns true if this record & its mate were properly aligned
- inline bool IsProperPair(void) const;
+ bool IsProperPair(void) const;
/// \returns true if this record was mapped to the reverse strand
- inline bool IsReverseStrand(void) const;
+ bool IsReverseStrand(void) const;
/// \returns true if this record is the second mate of a pair
- inline bool IsSecondMate(void) const;
+ bool IsSecondMate(void) const;
/// \returns true if this record is a supplementary alignment
- inline bool IsSupplementaryAlignment(void) const;
+ bool IsSupplementaryAlignment(void) const;
/// Sets whether this record is a PCR/optical duplicate
- inline BamRecordImpl& SetDuplicate(bool ok);
+ BamRecordImpl& SetDuplicate(bool ok);
/// Sets whether this record failed quality controls
- inline BamRecordImpl& SetFailedQC(bool ok);
+ BamRecordImpl& SetFailedQC(bool ok);
/// Sets whether this record is the first mate of a pair.
- inline BamRecordImpl& SetFirstMate(bool ok);
+ BamRecordImpl& SetFirstMate(bool ok);
/// Sets whether this record was aligned.
- inline BamRecordImpl& SetMapped(bool ok);
+ BamRecordImpl& SetMapped(bool ok);
/// Sets whether this record's mate was aligned.
- inline BamRecordImpl& SetMateMapped(bool ok);
+ BamRecordImpl& SetMateMapped(bool ok);
/// Sets whether this record's mate mapped to reverse strand.
- inline BamRecordImpl& SetMateReverseStrand(bool ok);
+ BamRecordImpl& SetMateReverseStrand(bool ok);
/// Sets whether this record came from paired-end sequencing.
- inline BamRecordImpl& SetPaired(bool ok);
+ BamRecordImpl& SetPaired(bool ok);
/// Sets whether this record is a read's primary alignment.
- inline BamRecordImpl& SetPrimaryAlignment(bool ok);
+ BamRecordImpl& SetPrimaryAlignment(bool ok);
/// Sets whether this record & its mate were properly mapped, per the aligner.
- inline BamRecordImpl& SetProperPair(bool ok);
+ BamRecordImpl& SetProperPair(bool ok);
/// Sets whether this record mapped to reverse strand.
- inline BamRecordImpl& SetReverseStrand(bool ok);
+ BamRecordImpl& SetReverseStrand(bool ok);
/// Sets whether this record is the second mate of a pair.
- inline BamRecordImpl& SetSecondMate(bool ok);
+ BamRecordImpl& SetSecondMate(bool ok);
/// Sets whether this record is a supplementary alignment.
- inline BamRecordImpl& SetSupplementaryAlignment(bool ok);
+ BamRecordImpl& SetSupplementaryAlignment(bool ok);
- ///@}
+ /// \}
public:
- /** @name Variable-length Data (sequence, qualities, etc.)
- */
- ///@{
+ /// \name Variable-length Data (sequence, qualities, etc.)
+ /// \{
/// \returns the record's CIGAR data as a Cigar object
Cigar CigarData(void) const;
@@ -260,15 +273,15 @@ public:
/// Sets the record's CIGAR data using a Cigar object
///
/// \param[in] cigar PacBio::BAM::Cigar object
- ///
/// \returns reference to this record
+ ///
BamRecordImpl& CigarData(const Cigar& cigar);
/// Sets the record's CIGAR data using a CIGAR-formatted string.
///
/// \param[in] cigarString CIGAR-formatted string
- ///
/// \returns reference to this record
+ ///
BamRecordImpl& CigarData(const std::string& cigarString);
// TODO: CIGAR iterator - Cigar only or here as well ??
@@ -279,8 +292,8 @@ public:
/// Sets the record's "query name".
///
/// \param name new name
- ///
/// \returns reference to this record
+ ///
BamRecordImpl& Name(const std::string& name);
/// \returns the record's quality values (phred-style ASCII)
@@ -288,27 +301,31 @@ public:
/// \note Usually Qualities().size() == Sequence.size(). However, in
/// some data sets, the quality values are not provided. In that
/// case, this method will return an empty container.
+ ///
QualityValues Qualities(void) const;
/// \returns the record's DNA sequence.
std::string Sequence(void) const;
+ size_t SequenceLength(void) const;
+
/// \brief Sets the record's DNA sequence and quality values
///
- /// This is an overloaded function. Sets the DNA sequence and quality values,
- /// using the length of \p sequence.
+ /// This is an overloaded function. Sets the DNA sequence and quality
+ /// values, using the length of \p sequence.
///
- /// \note When using this overload (and \p qualities is non-empty), the lengths
- /// of \p sequence and \p qualities \b must be equal.
+ /// \note When using this overload (and \p qualities is non-empty), the
+ /// lengths of \p sequence and \p qualities \b must be equal.
///
- /// \todo How to handle mismatched lenths?
+ /// \todo How to handle mismatched lengths?
///
/// \param[in] sequence std::string containing DNA sequence
/// \param[in] qualities std::string containing ASCII quality values
///
/// \returns reference to this record.
///
- /// \sa SetSequenceAndQualities(const char* sequence, const size_t sequenceLength, const char* qualities)
+ /// \sa SetSequenceAndQualities(const char* sequence,
+ /// const size_t sequenceLength, const char* qualities)
///
BamRecordImpl& SetSequenceAndQualities(const std::string& sequence,
const std::string& qualities = std::string());
@@ -316,16 +333,17 @@ public:
/// \brief Sets the record's DNA sequence and quality values.
///
/// The \p sequence must consist of IUPAC nucleotide codes {=ACMGRSVTWYHKDBN}.
- /// The \p qualities, if not empty, must consist of 'phred'-style ASCII quality
- /// values. \p qualities may be an empty string or NULL pointer in cases where
- /// there are no such data available.
+ /// The \p qualities, if not empty, must consist of 'phred'-style ASCII
+ /// quality values. \p qualities may be an empty string or NULL pointer in
+ /// cases where there are no such data available.
///
- /// \param[in] sequence C-string containing DNA sequence
- /// \param[in] sequenceLength length of DNA sequence
- /// \param[in] qualities C-string containing 'phred-style' ASCII quality values
+ /// \param[in] sequence C-string containing DNA sequence
+ /// \param[in] sequenceLength length of DNA sequence
+ /// \param[in] qualities C-string containing 'phred-style' ASCII
+ /// quality values
///
- /// \note \p sequence does \b NOT have to be NULL-terminated. Length is explicitly
- /// determined by the value of \p sequenceLength provided.
+ /// \note \p sequence does \b NOT have to be NULL-terminated. Length is
+ /// explicitly determined by the value of \p sequenceLength provided.
///
/// \returns reference to this record.
///
@@ -335,41 +353,49 @@ public:
/// \brief Sets the record's DNA sequence and quality values.
///
- /// The \p encodedSequence should be preencoded/packed into the BAM binary format.
- /// The \p qualities, if not empty, must consist of 'phred'-style ASCII quality values.
- /// \p qualities may be an empty string or NULL pointer in cases where there are no
- /// such data available.
+ /// The \p encodedSequence should be preencoded/packed into the BAM binary
+ /// format. The \p qualities, if not empty, must consist of 'phred'-style
+ /// ASCII quality values. \p qualities may be an empty string or NULL
+ /// pointer in cases where there are no such data available.
///
- /// \param[in] encodedSequence C-string containing BAM-format-encoded DNA sequence
- /// \param[in] rawSequenceLength length of DNA sequence (not the encoded length)
- /// \param[in] qualities C-string containing 'phred-style' ASCII quality values
+ /// \param[in] encodedSequence C-string containing BAM-format-encoded
+ /// DNA sequence
+ /// \param[in] rawSequenceLength length of DNA sequence (not the encoded
+ /// length)
+ /// \param[in] qualities C-string containing 'phred-style' ASCII
+ /// quality values
///
- /// \note \p encodedSequence does \b NOT have to be NULL-terminated. Length is explicitly
- /// determined by the value of \p sequenceLength provided.
+ /// \note \p encodedSequence does \b NOT have to be NULL-terminated. Length
+ /// is explicitly determined by the value of \p sequenceLength
+ /// provided.
///
/// \returns reference to this record.
///
- /// \sa SetSequenceAndQualities(const char* sequence, const size_t sequenceLength, const char* qualities)
+ /// \sa SetSequenceAndQualities(const char* sequence,
+ /// const size_t sequenceLength, const char* qualities)
///
BamRecordImpl& SetPreencodedSequenceAndQualities(const char* encodedSequence,
const size_t rawSequenceLength,
const char* qualities = 0);
+ /// \}
+
public:
- /** @name Tag Data
- */
- ///@{
+ /// \name Tag Data
+ /// \{
/// \returns record's full tag data as a TagCollection object
TagCollection Tags(void) const;
- /// Sets the record's full tag data via a TagCollection object
+ /// \brief Sets the record's full tag data via a TagCollection object
+ ///
BamRecordImpl& Tags(const TagCollection& tags);
- /// Adds a new tag to this record.
+ /// \brief Adds a new tag to this record.
///
- /// \param[in] tagName 2-character tag name.
- /// \param[in] value Tag object that describes the type & value of data to be added
+ /// \param[in] tagName 2-character tag name.
+ /// \param[in] value Tag object that describes the type & value of data
+ /// to be added
///
/// \note Any value that can be used to implicitly construct a Tag is valid.
/// \code
@@ -380,41 +406,95 @@ public:
/// \endcode
///
/// \returns true if tag was successfully added.
- bool AddTag(const std::string& tagName, const Tag& value);
+ ///
+ bool AddTag(const std::string& tagName,
+ const Tag& value);
+
+ /// \brief Adds a new tag to this record, with an optional modifier.
+ ///
+ /// \param[in] tagName 2-character tag name.
+ /// \param[in] value Tag object that describes the type &
+ /// value of data to be added
+ /// \param[in] additionalModifier optional extra modifier (for explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \note Any value that can be used to implicitly construct a Tag is valid.
+ /// \code
+ /// char c;
+ /// string h;
+ /// record.AddTag("XX", c, TagModifier::ASCII_CHAR); // will add a char-type tag
+ /// record.AddTag("YY", h, TagModifier::HEX_STRING); // will add a hex string-type tag
+ /// \endcode
+ ///
+ /// \returns true if tag was successfully added.
+ ///
+ bool AddTag(const std::string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier);
- /// Edits an existing tag on this record.
+ /// \brief Edits an existing tag on this record.
///
- /// \param[in] tagName 2-character tag name. Name must be present (see HasTag)
- /// \param[in] newValue Tag object that describes the type & value of new data to be added
+ /// \param[in] tagName 2-character tag name. Name must be present
+ /// (see HasTag)
+ /// \param[in] newValue Tag object that describes the type & value of
+ /// new data to be added
///
/// \note Any value that can be used to implicitly construct a Tag is valid.
/// \code
/// string s;
/// vector<uint32_t> v;
- /// record.EditTag("XX", s); // will overwrite tag XX with a string-type Tag
- /// record.EditTag("YY", v); // will overwrite tag YY with a uint32-array-type Tag
+ /// record.EditTag("XX", s); // will overwrite tag XX with a string-type tag
+ /// record.EditTag("YY", v); // will overwrite tag YY with a uint32-array-type tag
/// \endcode
///
/// \returns true if tag was successfully edited.
- bool EditTag(const std::string& tagName, const Tag& newValue);
+ ///
+ bool EditTag(const std::string& tagName,
+ const Tag& newValue);
+
+ /// \brief Edits an existing tag on this record.
+ ///
+ /// \param[in] tagName 2-character tag name. Name must be
+ /// present (see HasTag)
+ /// \param[in] value Tag object that describes the type &
+ /// value of new data to be added
+ /// \param[in] additionalModifier optional extra modifier (for explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \note Any value that can be used to implicitly construct a Tag is valid.
+ /// \code
+ /// char c;
+ /// string h;
+ /// record.EditTag("XX", c, TagModifier::ASCII_CHAR); // will overwrite tag XX with a char-type tag
+ /// record.EditTag("YY", h, TagModifier::HEX_STRING); // will overwrite tag YY with a hex string-type tag
+ /// \endcode
+ ///
+ /// \returns true if tag was successfully edited.
+ ///
+ bool EditTag(const std::string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier);
/// \returns true if a tag with this name is present in this record.
bool HasTag(const std::string& tagName) const;
- /// Removes an existing tag from this record.
+ /// \brief Removes an existing tag from this record.
///
- /// \param[in] tagName 2-character tag name.
+ /// \param[in] tagName 2-character tag name.
///
- /// \returns true if tag was actaully removed (i.e. false if tagName previously unknown)
+ /// \returns true if tag was actaully removed (i.e. false if tagName
+ /// previously unknown)
/// \sa HasTag
+ ///
bool RemoveTag(const std::string& tagName);
- /// Fetches a tag from this record.
+ /// \brief Fetches a tag from this record.
///
- /// \param[in] tagName 2-character tag name.
+ /// \param[in] tagName 2-character tag name.
+ ///
+ /// \returns Tag object for the requested name. If name is unknown, a
+ /// default constructed Tag is returned (Tag::IsNull() is true).
///
- /// \returns Tag object for the requested name. If name is unknown, a default constructed
- /// Tag is returned (Tag::IsNull() is true).
Tag TagValue(const std::string& tagName) const;
// change above to Tag();
@@ -423,7 +503,7 @@ public:
// T TagValue(const std::string& tagName) const;
- ///@}
+ /// \}
private:
// returns a BamRecordImpl object, with a deep copy of @rawData contents
@@ -432,6 +512,15 @@ private:
// internal memory setup/expand methods
void InitializeData(void);
void MaybeReallocData(void);
+ void UpdateTagMap(void) const; // allowed to be called from const methods
+ // (lazy update on request)
+
+ // internal tag helper methods
+ bool AddTagImpl(const std::string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier);
+ bool RemoveTagImpl(const std::string& tagName);
+ int TagOffset(const std::string& tagName) const;
// core seq/qual logic shared by the public API
BamRecordImpl& SetSequenceAndQualitiesInternal(const char* sequence,
@@ -443,180 +532,15 @@ private:
// data members
PBBAM_SHARED_PTR<bam1_t> d_;
+ mutable std::map<uint16_t, int> tagOffsets_;
// friends
friend class internal::BamRecordMemory;
};
-inline uint32_t BamRecordImpl::Bin(void) const
-{ return d_->core.bin; }
-
-inline BamRecordImpl& BamRecordImpl::Bin(uint32_t bin)
-{ d_->core.bin = bin; return *this; }
-
-inline uint32_t BamRecordImpl::Flag(void) const
-{ return d_->core.flag; }
-
-inline BamRecordImpl& BamRecordImpl::Flag(uint32_t flag)
-{ d_->core.flag = flag; return *this; }
-
-inline int32_t BamRecordImpl::InsertSize(void) const
-{ return d_->core.isize; }
-
-inline BamRecordImpl& BamRecordImpl::InsertSize(int32_t iSize)
-{ d_->core.isize = iSize; return *this; }
-
-inline uint8_t BamRecordImpl::MapQuality(void) const
-{ return d_->core.qual; }
-
-inline BamRecordImpl& BamRecordImpl::MapQuality(uint8_t mapQual)
-{ d_->core.qual = mapQual; return *this; }
-
-inline PacBio::BAM::Position BamRecordImpl::MatePosition(void) const
-{ return d_->core.mpos; }
-
-inline BamRecordImpl& BamRecordImpl::MatePosition(PacBio::BAM::Position pos)
-{ d_->core.mpos = pos; return *this; }
-
-inline int32_t BamRecordImpl::MateReferenceId(void) const
-{ return d_->core.mtid; }
-
-inline BamRecordImpl& BamRecordImpl::MateReferenceId(int32_t id)
-{ d_->core.mtid = id; return *this; }
-
-inline PacBio::BAM::Position BamRecordImpl::Position(void) const
-{ return d_->core.pos; }
-
-inline BamRecordImpl& BamRecordImpl::Position(PacBio::BAM::Position pos)
-{ d_->core.pos = pos; return *this; }
-
-inline int32_t BamRecordImpl::ReferenceId(void) const
-{ return d_->core.tid; }
-
-inline BamRecordImpl& BamRecordImpl::ReferenceId(int32_t id)
-{ d_->core.tid = id; return *this; }
-
-inline bool BamRecordImpl::IsDuplicate(void) const
-{ return (d_->core.flag & BamRecordImpl::DUPLICATE) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetDuplicate(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::DUPLICATE;
- else d_->core.flag &= ~BamRecordImpl::DUPLICATE;
- return *this;
-}
-
-inline bool BamRecordImpl::IsFailedQC(void) const
-{ return (d_->core.flag & BamRecordImpl::FAILED_QC) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetFailedQC(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::FAILED_QC;
- else d_->core.flag &= ~BamRecordImpl::FAILED_QC;
- return *this;
-}
-
-inline bool BamRecordImpl::IsFirstMate(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_1) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetFirstMate(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::MATE_1;
- else d_->core.flag &= ~BamRecordImpl::MATE_1;
- return *this;
-}
-
-inline bool BamRecordImpl::IsMapped(void) const
-{ return (d_->core.flag & BamRecordImpl::UNMAPPED) == 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetMapped(bool ok)
-{
- if (ok) d_->core.flag &= ~BamRecordImpl::UNMAPPED;
- else d_->core.flag |= BamRecordImpl::UNMAPPED;
- return *this;
-}
-
-inline bool BamRecordImpl::IsMateMapped(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_UNMAPPED) == 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetMateMapped(bool ok)
-{
- if (ok) d_->core.flag &= ~BamRecordImpl::MATE_UNMAPPED;
- else d_->core.flag |= BamRecordImpl::MATE_UNMAPPED;
- return *this;
-}
-
-inline bool BamRecordImpl::IsMateReverseStrand(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_REVERSE_STRAND) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetMateReverseStrand(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::MATE_REVERSE_STRAND;
- else d_->core.flag &= ~BamRecordImpl::MATE_REVERSE_STRAND;
- return *this;
-}
-
-inline bool BamRecordImpl::IsPaired(void) const
-{ return (d_->core.flag & BamRecordImpl::PAIRED) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetPaired(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::PAIRED;
- else d_->core.flag &= ~BamRecordImpl::PAIRED;
- return *this;
-}
-
-inline bool BamRecordImpl::IsPrimaryAlignment(void) const
-{ return (d_->core.flag & BamRecordImpl::SECONDARY) == 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetPrimaryAlignment(bool ok)
-{
- if (ok) d_->core.flag &= ~BamRecordImpl::SECONDARY;
- else d_->core.flag |= BamRecordImpl::SECONDARY;
- return *this;
-}
-
-inline bool BamRecordImpl::IsProperPair(void) const
-{ return (d_->core.flag & BamRecordImpl::PROPER_PAIR) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetProperPair(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::PROPER_PAIR;
- else d_->core.flag &= ~BamRecordImpl::PROPER_PAIR;
- return *this;
-}
-
-inline bool BamRecordImpl::IsReverseStrand(void) const
-{ return (d_->core.flag & BamRecordImpl::REVERSE_STRAND) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetReverseStrand(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::REVERSE_STRAND;
- else d_->core.flag &= ~BamRecordImpl::REVERSE_STRAND;
- return *this;
-}
-
-inline bool BamRecordImpl::IsSecondMate(void) const
-{ return (d_->core.flag & BamRecordImpl::MATE_2) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetSecondMate(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::MATE_2;
- else d_->core.flag &= ~BamRecordImpl::MATE_2;
- return *this;
-}
-
-inline bool BamRecordImpl::IsSupplementaryAlignment(void) const
-{ return (d_->core.flag & BamRecordImpl::SUPPLEMENTARY) != 0; }
-
-inline BamRecordImpl& BamRecordImpl::SetSupplementaryAlignment(bool ok)
-{
- if (ok) d_->core.flag |= BamRecordImpl::SUPPLEMENTARY;
- else d_->core.flag &= ~BamRecordImpl::SUPPLEMENTARY;
- return *this;
-}
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/BamRecordImpl.inl"
+
#endif // BAMRECORDIMPL_H
diff --git a/include/pbbam/BamTagCodec.h b/include/pbbam/BamTagCodec.h
index 5aad239..9126900 100644
--- a/include/pbbam/BamTagCodec.h
+++ b/include/pbbam/BamTagCodec.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamTagCodec.h
+/// \brief Defines the BamTagCodec class.
+//
// Author: Derek Barnett
#ifndef BAMTAGCODEC_H
@@ -45,28 +49,73 @@
namespace PacBio {
namespace BAM {
+/// \brief The BamTagCodec class provides binary encoding/decoding of %BAM tag
+/// data.
+///
+/// \note BamTagCodec is mostly an implementation and/or testing detail, and may
+/// be removed from the public API.
+///
class PBBAM_EXPORT BamTagCodec
{
-
-// high-level, operate on a full collection
public:
+ /// \name Tag Collection Methods
+ /// \{
+
+ /// \brief Creates a TagCollection from raw BAM data.
+ ///
+ /// \param[in] data BAM-formatted (binary) tag data
+ /// \returns TagCollection containing tag data
+ ///
static TagCollection Decode(const std::vector<uint8_t>& data);
+
+ /// \brief Creates binary BAM data from a TagCollection.
+ ///
+ /// \param[in] tags TagCollection containing tag data
+ /// \returns vector of bytes (encoded BAM data)
+ ///
static std::vector<uint8_t> Encode(const PacBio::BAM::TagCollection& tags);
-// per-tag methods
+ /// \}
+
public:
+ /// \name Per-Tag Methods
+ /// \{
- // returns the SAM/BAM single char code for tag type
- static uint8_t TagTypeCode(const PacBio::BAM::Tag& tag);
+ /// \brief Determines the SAM/BAM tag code for a Tag.
+ ///
+ /// \param[in] tag Tag object to check
+ /// \param[in] additionalModifier optional extra modifier (allows explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \returns the SAM/BAM single char code for tag type
+ ///
+ static uint8_t TagTypeCode(const PacBio::BAM::Tag& tag,
+ const TagModifier& additionalModifier = TagModifier::NONE);
- // returns the tag value's raw data in bytes
- // NOTE: does *NOT* encode name & tag type. It does however,
- // include the element type of an array tag
- static std::vector<uint8_t> ToRawData(const PacBio::BAM::Tag& tag);
+ /// \brief Encodes a single Tag's contents in %BAM binary
+ ///
+ /// \note This method does \b NOT encode the tag name & tag type. It does
+ /// include the element type for array-type tags.
+ ///
+ /// \param[in] tag Tag object containing data to encode
+ /// \param[in] additionalModifier optional extra modifier (allows explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \returns vector of bytes (encoded BAM data)
+ ///
+ static std::vector<uint8_t> ToRawData(const PacBio::BAM::Tag& tag,
+ const TagModifier& additionalModifier = TagModifier::NONE);
- // TODO: make this hidden a bit more, maybe this whole class in fact
- // rawData should be the result of sam.h:bam_aux_get(...)
+ /// \brief Creates a Tag object from binary BAM data.
+ ///
+ /// \param[in] rawData raw BAM bytes (assumed to be the result of
+ /// htslib's bam_aux_get())
+ ///
+ /// \returns resulting Tag object
+ ///
static PacBio::BAM::Tag FromRawData(uint8_t* rawData);
+
+ /// \}
};
} // namespace BAM
diff --git a/include/pbbam/BamWriter.h b/include/pbbam/BamWriter.h
index e66df0a..3bbe2a5 100644
--- a/include/pbbam/BamWriter.h
+++ b/include/pbbam/BamWriter.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamWriter.h
+/// \brief Defines the BamWriter class.
+//
// Author: Derek Barnett
#ifndef BAMWRITER_H
@@ -51,13 +55,34 @@ class BamFile;
namespace internal { class BamWriterPrivate; }
+/// \brief The BamWriter class provides a writing interface for creating
+/// new %BAM files.
+///
+/// \note The underlying buffered data may not be flushed to the file until the
+/// destructor is called. Trying to access the file (reading, stat-ing,
+/// indexing, etc.) before the BamWriter is destroyed yields undefined
+/// behavior. Enclose the BamWriter in some form of local scope (curly
+/// braces, a separate function, etc.) to ensure that its destructor is
+/// called before proceeding to read-based operations.
+///
+/// \code{.cpp}
+/// {
+/// BamWriter w(...);
+/// // write data
+/// }
+/// // now safe to access the new file
+/// \endcode
+///
+///
class PBBAM_EXPORT BamWriter
{
public:
- /// This enum allows you to control the compression level of the output BAM file.
+ /// \brief This enum allows you to control the compression level of the
+ /// output %BAM file.
+ ///
+ /// Values are equivalent to zlib compression levels. See its documentation
+ /// for more details: http://www.zlib.net/manual.html
///
- /// Values are equivalent to zlib compression levels. See its documentation for more details:
- /// http://www.zlib.net/manual.html
enum CompressionLevel
{
CompressionLevel_0 = 0
@@ -77,28 +102,54 @@ public:
, BestCompression = CompressionLevel_9
};
+ /// \brief This enum allows you to control whether BAI bin numbers are
+ /// calculated for output records.
+ ///
+ /// For most cases, the default behavior (ON) should be retained for maximum
+ /// compatibility with downstream tools (e.g. samtools index). Disabling bin
+ /// calculation should only be used if all records are known to never be
+ /// mapped, and even then only if profiling revelas the calculation to
+ /// affect extremely performance-sensitive, "critical paths".
+ ///
+ enum BinCalculationMode
+ {
+ BinCalculation_ON = 0
+ , BinCalculation_OFF
+ };
+
public:
/// \name Constructors & Related Methods
/// \{
- /// Opens a BAM file for writing & writes the header information.
+ /// \brief Opens a %BAM file for writing & writes the header information.
///
/// The error status will be set if either operation fails.
///
/// \note Set \p filename to "-" for stdout.
///
- /// \param[in] filename path to output BAM file
+ /// \param[in] filename path to output %BAM file
/// \param[in] header BamHeader object
/// \param[in] compressionLevel zlib compression level
- /// \param[in] numThreads number of threads for compression.
- /// If set to 0, BamWriter will attempt to determine a reasonable estimate.
- /// If set to 1, this will force single-threaded execution.
- /// No checks are made against an upper limit.
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, BamWriter will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
+ ///
+ /// \param[in] binCalculationMode BAI bin calculation mode. The default
+ /// behavior will ensure proper bin numbers are provided for all
+ /// records written. This extra step may turned off when bin
+ /// numbers are not needed. Though if in doubt, keep the default.
+ ///
+ /// \throws std::runtmie_error if there was a problem opening the file for
+ /// writing or if an error occurred while writing the header
+ ///
BamWriter(const std::string& filename,
const BamHeader& header,
const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
- const size_t numThreads = 4);
+ const size_t numThreads = 4,
+ const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON);
/// Fully flushes all buffered data & closes file.
~BamWriter(void);
@@ -108,33 +159,41 @@ public:
public:
/// \name Data Writing & Resource Management
+ /// \{
- /// Try to flush any buffered data to file.
+ /// \brief Try to flush any buffered data to file.
///
- /// \note The underlying implementation doesn't necessarily flush buffered data
- /// immediately, especially in a multithreaded writer situation.
+ /// \note The underlying implementation doesn't necessarily flush buffered
+ /// data immediately, especially in a multithreaded writer situation.
/// Let the BamWriter go out of scope to fully ensure flushing.
///
- /// \throws
+ /// \throws std::runtime_error if flush fails
+ ///
void TryFlush(void);
- /// Write a record to the output BAM file.
+ /// \brief Write a record to the output %BAM file.
///
/// \param[in] record BamRecord object
+ ///
/// \throws std::runtime_error on failure to write
+ ///
void Write(const BamRecord& record);
- /// Write a record to the output BAM file.
+ /// \brief Write a record to the output %BAM file.
///
/// \param[in] record BamRecord object
/// \param[out] vOffset BGZF virtual offset to start of \p record
+ ///
/// \throws std::runtime_error on failure to write
+ ///
void Write(const BamRecord& record, int64_t* vOffset);
- /// Write a record to the output BAM file.
+ /// \brief Write a record to the output %BAM file.
///
/// \param[in] recordImpl BamRecordImpl object
+ ///
/// \throws std::runtime_error on failure to write
+ ///
void Write(const BamRecordImpl& recordImpl);
/// \}
diff --git a/include/pbbam/ZmwQuery.h b/include/pbbam/BarcodeQuery.h
similarity index 58%
copy from include/pbbam/ZmwQuery.h
copy to include/pbbam/BarcodeQuery.h
index fdd1d1d..3072ddf 100644
--- a/include/pbbam/ZmwQuery.h
+++ b/include/pbbam/BarcodeQuery.h
@@ -32,11 +32,15 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BarcodeQuery.h
+/// \brief Defines the BarcodeQuery class.
+//
// Author: Derek Barnett
-#ifndef ZMWQUERY_H
-#define ZMWQUERY_H
+#ifndef BARCODEQUERY_H
+#define BARCODEQUERY_H
#include "pbbam/Config.h"
#include "pbbam/internal/QueryBase.h"
@@ -44,23 +48,50 @@
namespace PacBio {
namespace BAM {
-//namespace staging {
-class PBBAM_EXPORT ZmwQuery : public internal::IQuery
+/// \brief The BarcodeQuery class provides iterable access to a DataSet's %BAM
+/// records, limiting results to those matching a particular barcode.
+///
+/// Example:
+/// \include code/BarcodeQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT BarcodeQuery : public internal::IQuery
{
public:
- ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
- const DataSet& dataset);
+ /// \brief Creates a new BarcodeQuery, limiting record results to only those
+ /// annotated with a particular barcode ID.
+ ///
+ /// \param[in] barcode filtering criteria
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \sa BamRecord::Barcodes
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+ /// files.
+ ///
+ BarcodeQuery(const uint16_t barcode, const DataSet& dataset);
+
+ ~BarcodeQuery(void);
+
+public:
-protected:
- FileIterPtr CreateIterator(const BamFile& bamFile);
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
private:
- std::vector<int> whitelist_;
+ struct BarcodeQueryPrivate;
+ std::unique_ptr<BarcodeQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namespace PacBio
-#endif // ZMWQUERY_H
+#endif // BARCODEQUERY_H
diff --git a/include/pbbam/Cigar.h b/include/pbbam/Cigar.h
index 1e0bc46..c391057 100644
--- a/include/pbbam/Cigar.h
+++ b/include/pbbam/Cigar.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Cigar.h
+/// \brief Defines the Cigar class.
+//
// Author: Derek Barnett
#ifndef CIGAR_H
@@ -46,27 +50,39 @@
namespace PacBio {
namespace BAM {
+/// \brief The Cigar class represents the CIGAR string used to report alignment
+/// charateristics in SAM/BAM.
+///
+/// \note Use of the 'M' operator is forbidden in PacBio BAMs. See
+/// CigarOperationType description for more information.
+///
+/// \sa https://samtools.github.io/hts-specs/SAMv1.pdf for more information on CIGAR in general.
+///
class PBBAM_EXPORT Cigar : public std::vector<CigarOperation>
{
-
public:
- /// \name Static Constructor
+ /// \name Constructors & Related Methods
/// \{
- /// Creates a Cigar object from SAM/BAM string input
+ /// \brief Creates a Cigar object from SAM/BAM string input
+ ///
+ /// \param [in] stdString SAM/BAM formatted CIGAR data
+ /// \returns a Cigar object representing the input data
+ ///
+ /// \note This class may be removed from the public API in the future,
+ /// as the constructor taking a std::string accomplishes the same end.
///
- /// \param [in] stdString SAM/BAM formatted CIGAR data
- /// \returns Cigar object representing the input data
static Cigar FromStdString(const std::string& stdString);
- /// \}
-
-public:
- /// \name Constructors & Related Methods
- /// \{
-
+ /// \brief Creates an empty Cigar.
Cigar(void);
+
+ /// \brief Creates a Cigar object from SAM/BAM string input
+ ///
+ /// \param [in] cigarString SAM/BAM formatted CIGAR data
+ ///
Cigar(const std::string& cigarString);
+
Cigar(const Cigar& other);
Cigar(Cigar&& other);
Cigar& operator=(const Cigar& other);
@@ -82,35 +98,15 @@ public:
/// Converts Cigar object data to SAM/BAM formatted string
///
/// \returns SAM/BAM formatted std::string
+ ///
std::string ToStdString(void) const;
/// \}
};
-inline Cigar::Cigar(void)
- : std::vector<CigarOperation>()
-{ }
-
-inline Cigar::Cigar(const Cigar& other)
- : std::vector<CigarOperation>(other)
-{ }
-
-inline Cigar::Cigar(Cigar&& other)
- : std::vector<CigarOperation>(std::move(other))
-{ }
-
-inline Cigar& Cigar::operator=(const Cigar& other)
-{ std::vector<CigarOperation>::operator= (other); return *this; }
-
-inline Cigar& Cigar::operator=(Cigar&& other)
-{ std::vector<CigarOperation>::operator= (std::move(other)); return *this; }
-
-inline Cigar::~Cigar(void) { }
-
-inline Cigar Cigar::FromStdString(const std::string& stdString)
-{ return Cigar(stdString); }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/Cigar.inl"
+
#endif // CIGAR_H
diff --git a/include/pbbam/CigarOperation.h b/include/pbbam/CigarOperation.h
index 951128d..9b936ef 100644
--- a/include/pbbam/CigarOperation.h
+++ b/include/pbbam/CigarOperation.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file CigarOperation.h
+/// \brief Defines the CigarOperationType enum & CigarOperation class.
+//
// Author: Derek Barnett
#ifndef CIGAROPERATION_H
@@ -44,11 +48,16 @@
namespace PacBio {
namespace BAM {
-/// Describes a CIGAR operation. Bracketed character is the corresponding SAM/BAM character code.
+/// \brief Describes a CIGAR operation.
+///
+/// Bracketed character is the corresponding SAM/BAM character code.
///
-/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain consistency with htslib.
-/// However, as of PacBio BAM spec version 3.0b7, this CIGAR operation \b forbidden. Attempt to
-/// read or write a record containing this operation will trigger a std::runtime_error.
+/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain
+/// consistency with htslib. However, as of PacBio BAM spec version
+/// 3.0b7, this CIGAR operation \b forbidden. Any attempt to read or
+/// write a record containing this operation will trigger a
+/// std::runtime_error. SEQUENCE_MATCH('=) or SEQUENCE_MISMATCH('X')
+/// should be used instead.
///
enum class CigarOperationType
{
@@ -62,11 +71,11 @@ enum class CigarOperationType
, PADDING ///< padding (silent deletion from padded reference) [P]
, SEQUENCE_MATCH ///< sequence match [=]
, SEQUENCE_MISMATCH ///< sequence mismatch [X]
-
- // TODO: looks like there is a new 'B' type in htslib source, referring to some 'back' operation...
- // no reference in htslib docs though yet as to what that applies to
};
+/// \brief The CigarOperation class represents a single CIGAR operation
+/// (consisting of a type & length).
+///
class PBBAM_EXPORT CigarOperation
{
public:
@@ -157,70 +166,9 @@ private:
uint32_t length_;
};
-inline CigarOperation::CigarOperation(void)
- : type_(CigarOperationType::UNKNOWN_OP)
- , length_(0)
-{ }
-
-inline CigarOperation::CigarOperation(char c, uint32_t length)
- : type_(CigarOperation::CharToType(c))
- , length_(length)
-{
- if (type_ == CigarOperationType::ALIGNMENT_MATCH)
- throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
-}
-
-inline CigarOperation::CigarOperation(CigarOperationType op, uint32_t length)
- : type_(op)
- , length_(length)
-{
- if (type_ == CigarOperationType::ALIGNMENT_MATCH)
- throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
-}
-
-inline CigarOperation::CigarOperation(const CigarOperation& other)
- : type_(other.type_)
- , length_(other.length_)
-{ }
-
-inline CigarOperation::CigarOperation(CigarOperation&& other)
- : type_(std::move(other.type_))
- , length_(std::move(other.length_))
-{ }
-
-inline CigarOperation::~CigarOperation(void) { }
-
-inline uint32_t CigarOperation::Length(void) const
-{ return length_; }
-
-inline CigarOperation& CigarOperation::Length(const uint32_t length)
-{ length_ = length; return *this; }
-
-inline CigarOperationType CigarOperation::Type(void) const
-{ return type_; }
-
-inline CigarOperation &CigarOperation::Type(const CigarOperationType opType)
-{ type_ = opType; return *this; }
-
-inline char CigarOperation::Char(void) const
-{ return CigarOperation::TypeToChar(type_); }
-
-inline CigarOperation &CigarOperation::Char(const char opChar)
-{ type_ = CigarOperation::CharToType(opChar);return *this; }
-
-inline CigarOperation& CigarOperation::operator=(const CigarOperation& other)
-{ type_ = other.type_; length_ = other.length_; return *this; }
-
-inline CigarOperation& CigarOperation::operator=(CigarOperation&& other)
-{ type_ = std::move(other.type_); length_ = std::move(other.length_); return *this; }
-
-inline bool CigarOperation::operator==(const CigarOperation& other) const
-{ return type_ == other.type_ && length_ == other.length_; }
-
-inline bool CigarOperation::operator!=(const CigarOperation& other) const
-{ return !(*this == other); }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/CigarOperation.inl"
+
#endif // CIGAROPERATION_H
diff --git a/include/pbbam/Compare.h b/include/pbbam/Compare.h
new file mode 100644
index 0000000..da44b48
--- /dev/null
+++ b/include/pbbam/Compare.h
@@ -0,0 +1,430 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.h
+/// \brief Defines the Compare class & a number of function objects for
+/// comparing BamRecords.
+//
+// Author: Derek Barnett
+
+#ifndef COMPARE_H
+#define COMPARE_H
+
+#include "pbbam/BamRecord.h"
+#include <functional>
+#include <string>
+#include <utility>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The Compare class provides utilities for sorting collections of
+/// BamRecords.
+///
+/// \note The functors provided here currently only support std::less<T>
+/// comparisons (i.e. sorting by ascending value).
+///
+/// \include code/Compare.txt
+///
+struct PBBAM_EXPORT Compare
+{
+public:
+
+ /// \name Comparison Type
+ /// \{
+
+ /// \brief This enum defines the supported comparison types
+ /// { ==, !=, <, <=, >, >=, & (contains), ~ (not contains) }.
+ ///
+ enum Type {
+ EQUAL = 0
+ , NOT_EQUAL
+ , LESS_THAN
+ , LESS_THAN_EQUAL
+ , GREATER_THAN
+ , GREATER_THAN_EQUAL
+ , CONTAINS
+ , NOT_CONTAINS
+ };
+
+ /// \brief Convert operator string to Compare::Type.
+ ///
+ /// \include code/Compare_TypeFromOperator.txt
+ ///
+ /// \param[in] opString operator string. Can be C++-style operators
+ /// ("==", "!=", "<=", etc) or alpha equivalents
+ /// ("eq", "ne", "lte", etc).
+ ///
+ /// \returns comparison type from an operator string
+ /// \throws std::runtime_error if cannot convert opString to Compare::Type
+ /// \sa Compare::TypeToOperator
+ ///
+ static Compare::Type TypeFromOperator(const std::string& opString);
+
+ /// \brief Convert a Compare::Type to printable enum name.
+ ///
+ /// \include code/Compare_TypeToName.txt
+ ///
+ /// \param[in] type Compare::Type to convert
+ /// \returns the printable name for a Compare::Type enum value.are::Type
+ /// \throws std::runtime_error on unknown Compare::Type
+ ///
+ static std::string TypeToName(const Compare::Type& type);
+
+ /// \brief Convert a Compare::Type to printable operator.
+ ///
+ /// \param[in] type Compare::Type to convert
+ /// \param[in] asAlpha (optional) flag to print using alpha equivalents
+ /// e.g. "lte" rather than "<="
+ /// \returns the printable operator string
+ /// \throws std::runtime_error on unknown Compare::Type
+ ///
+ static std::string TypeToOperator(const Compare::Type& type,
+ bool asAlpha = false);
+
+ /// \}
+
+public:
+
+ /// \name Comparison Function Objects
+ /// \{
+
+ /// %Base class for all BamRecord compare functors.
+ ///
+ /// Mostly used for method signatures that can accept any comparator.
+ ///
+ /// Custom comparators may be used by inheriting from this class.
+ ///
+ struct Base : public std::function<bool(const BamRecord&, const BamRecord&)> { };
+
+private:
+ /// \internal
+ ///
+ /// Exists to provide the typedef we'll use in the actual
+ /// MemberFunctionBase, since we need to use it in the template signature.
+ /// This keeps that a lot easier to read.
+ ///
+ template<typename ValueType>
+ struct MemberFunctionBaseHelper : public Compare::Base
+ {
+ typedef ValueType (BamRecord::*MemberFnType)(void) const;
+ };
+
+public:
+ /// \brief %Base class for all BamRecord compare functors that take a
+ /// BamRecord function pointer and compare on its return type.
+ ///
+ /// Derived comparators usually need only declare the return value &
+ /// function pointer in the template signature. This class implements the
+ /// basic method-calling machinery.
+ ///
+ /// Custom comparators will work for any BamRecord member function that does
+ /// not take any input parameters.
+ ///
+ template<typename ValueType,
+ typename MemberFunctionBaseHelper<ValueType>::MemberFnType fn,
+ typename CompareType = std::less<ValueType> >
+ struct MemberFunctionBase : public Compare::MemberFunctionBaseHelper<ValueType>
+ {
+ bool operator()(const BamRecord& lhs, const BamRecord& rhs) const;
+ };
+
+public:
+
+ /// \brief Compares on BamRecord::AlignedEnd.
+ ///
+ /// Example:
+ /// \include code/Compare_AlignedEnd.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct AlignedEnd : public MemberFunctionBase<Position, &BamRecord::AlignedEnd> { };
+
+ /// \brief Compares on BamRecord::AlignedStart.
+ ///
+ /// Example:
+ /// \include code/Compare_AlignedStart.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct AlignedStart : public MemberFunctionBase<Position, &BamRecord::AlignedStart> { };
+
+ /// \brief Compares on BamRecord::AlignedStrand
+ ///
+ /// Example:
+ /// \include code/Compare_AlignedStrand.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct AlignedStrand : public MemberFunctionBase<Strand, &BamRecord::AlignedStrand> { };
+
+ /// \brief Compares on BamRecord::BarcodeForward.
+ ///
+ /// Example:
+ /// \include code/Compare_BarcodeForward.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct BarcodeForward : public MemberFunctionBase<uint16_t, &BamRecord::BarcodeForward> { };
+
+ /// \brief Compares on BamRecord::BarcodeQuality.
+ ///
+ /// Example:
+ /// \include code/Compare_BarcodeQuality.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct BarcodeQuality : public MemberFunctionBase<uint8_t, &BamRecord::BarcodeQuality> { };
+
+ /// \brief Compares on BamRecord::BarcodeReverse.
+ ///
+ /// Example:
+ /// \include code/Compare_BarcodeReverse.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct BarcodeReverse: public MemberFunctionBase<uint16_t, &BamRecord::BarcodeReverse> { };
+
+ /// \brief Compares on BamRecord::FullName.
+ ///
+ /// Example:
+ /// \include code/Compare_FullName.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct FullName : public MemberFunctionBase<std::string, &BamRecord::FullName> { };
+
+ /// \brief Compares on BamRecord::LocalContextFlags.
+ ///
+ /// Example:
+ /// \include code/Compare_LocalContextFlag.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct LocalContextFlag : public MemberFunctionBase<LocalContextFlags, &BamRecord::LocalContextFlags> { };
+
+ /// \brief Compares on BamRecord::MapQuality.
+ ///
+ /// Example:
+ /// \include code/Compare_MapQuality.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct MapQuality : public MemberFunctionBase<uint8_t, &BamRecord::MapQuality> { };
+
+ /// \brief Compares on BamRecord::MovieName.
+ ///
+ /// Example:
+ /// \include code/Compare_MovieName.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct MovieName : public MemberFunctionBase<std::string, &BamRecord::MovieName> { };
+
+ /// \brief Provides an operator() is essentially a no-op for
+ /// comparing/sorting.
+ ///
+ /// If used in a sorting operation, then no change will occur.
+ ///
+ struct None : public Compare::Base
+ {
+ bool operator()(const BamRecord&, const BamRecord&) const;
+ };
+
+ ///\brief Compares on BamRecord::NumDeletedBases.
+ ///
+ /// Example:
+ /// \include code/Compare_NumDeletedBases.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumDeletedBases : public MemberFunctionBase<size_t, &BamRecord::NumDeletedBases> { };
+
+ /// \brief Compares on BamRecord::NumInsertedBases.
+ ///
+ /// Example:
+ /// \include code/Compare_NumInsertedBases.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumInsertedBases : public MemberFunctionBase<size_t, &BamRecord::NumInsertedBases> { };
+
+ /// \brief Compares on BamRecord::NumMatches.
+ ///
+ /// Example:
+ /// \include code/Compare_NumMatches.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumMatches : public MemberFunctionBase<size_t, &BamRecord::NumMatches> { };
+
+ /// \brief Compares on BamRecord::NumMismatches.
+ ///
+ /// Example:
+ /// \include code/Compare_NumMismatches.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumMismatches : public MemberFunctionBase<size_t, &BamRecord::NumMismatches> { };
+
+ /// \brief Compares on BamRecord::QueryEnd.
+ ///
+ /// Example:
+ /// \include code/Compare_QueryEnd.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct QueryEnd : public MemberFunctionBase<Position, &BamRecord::QueryEnd> { };
+
+ /// \brief Compares on BamRecord::QueryStart.
+ ///
+ /// Example:
+ /// \include code/Compare_QueryStart.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct QueryStart : public MemberFunctionBase<Position, &BamRecord::QueryStart> { };
+
+ /// \brief Compares on BamRecord::ReadAccuracy.
+ ///
+ /// Example:
+ /// \include code/Compare_ReadAccuracy.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReadAccuracy : public MemberFunctionBase<Accuracy, &BamRecord::ReadAccuracy> { };
+
+ /// \brief Compares on BamRecord::ReadGroupId.
+ ///
+ /// \note Even though the ReadGroupId string contains hex values, it is
+ /// still just a std::string. Comparisons will use lexical, not
+ /// numeric ordering. If numeric ordering is desired, use
+ /// Compare::ReadGroupNumericId instead.
+ ///
+ /// Example:
+ /// \include code/Compare_ReadGroupId.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReadGroupId : public MemberFunctionBase<std::string, &BamRecord::ReadGroupId> { };
+
+ /// \brief Compares on BamRecord::ReadGroupNumericId.
+ ///
+ /// Example:
+ /// \include code/Compare_ReadGroupNumericId.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReadGroupNumericId : public MemberFunctionBase<int32_t, &BamRecord::ReadGroupNumericId> { };
+
+ /// \brief Compares on BamRecord::ReferenceEnd.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceEnd.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceEnd : public MemberFunctionBase<Position, &BamRecord::ReferenceEnd> { };
+
+ /// \brief Compares on BamRecord::ReferenceId.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceId.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceId : public MemberFunctionBase<int32_t, &BamRecord::ReferenceId> { };
+
+ /// \brief Compares on BamRecord::ReferenceName.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceName.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceName : public MemberFunctionBase<std::string, &BamRecord::ReferenceName> { };
+
+ /// \brief Compares on BamRecord::ReferenceStart.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceStart.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceStart : public MemberFunctionBase<Position, &BamRecord::ReferenceStart> { };
+
+ /// \brief Compares on BamRecord::HoleNumber.
+ ///
+ /// Example:
+ /// \include code/Compare_Zmw.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct Zmw : public MemberFunctionBase<int32_t, &BamRecord::HoleNumber> { };
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Compare.inl"
+
+#endif // COMPARE_H
diff --git a/include/pbbam/CompositeBamReader.h b/include/pbbam/CompositeBamReader.h
new file mode 100644
index 0000000..f0de942
--- /dev/null
+++ b/include/pbbam/CompositeBamReader.h
@@ -0,0 +1,269 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CompositeBamReader.h
+/// \brief Defines the composite BAM readers, for working with multiple input
+/// files.
+//
+// Author: Derek Barnett
+
+#ifndef COMPOSITEBAMREADER_H
+#define COMPOSITEBAMREADER_H
+
+#include "pbbam/BaiIndexedBamReader.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamReader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/DataSet.h"
+#include "pbbam/GenomicInterval.h"
+#include "pbbam/PbiIndexedBamReader.h"
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+/// \internal
+/// \brief The CompositeMergeItem class provides a helper struct for composite
+/// readers, containing a single-file reader and its "next" record.
+///
+struct CompositeMergeItem
+{
+public:
+ std::unique_ptr<BamReader> reader;
+ BamRecord record;
+
+public:
+ CompositeMergeItem(std::unique_ptr<BamReader>&& rdr);
+ CompositeMergeItem(std::unique_ptr<BamReader>&& rdr, BamRecord&& rec);
+ CompositeMergeItem(CompositeMergeItem&& other);
+ CompositeMergeItem& operator=(CompositeMergeItem&& other);
+ ~CompositeMergeItem(void);
+};
+
+/// \internal
+/// \brief The CompositeMergeItemSorter class provides a helper function object
+/// for ordering composite reader results.
+///
+/// Essentially just exracts a BamRecord from its parent CompositeMergeItem for
+/// further checks.
+///
+template<typename CompareType>
+struct CompositeMergeItemSorter : public std::function<bool(const CompositeMergeItem&,
+ const CompositeMergeItem&)>
+{
+ bool operator()(const CompositeMergeItem& lhs,
+ const CompositeMergeItem& rhs);
+};
+
+} // namespace internal
+
+/// \brief The GenomicIntervalCompositeBamReader class provides read access to
+/// multipe %BAM files, limiting results to a genomic region.
+///
+/// Requires a ".bai" file for each input %BAM file.
+///
+/// Results will be returned in order of genomic coordinate (first by reference
+/// ID, then by position).
+///
+class PBBAM_EXPORT GenomicIntervalCompositeBamReader
+{
+public:
+ /// \name Contstructors & Related Methods
+ /// \{
+
+ GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ const std::vector<BamFile>& bamFiles);
+ GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ std::vector<BamFile>&& bamFiles);
+ GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ const DataSet& dataset);
+
+ /// \}
+
+public:
+ /// \name Data Access
+ /// \{
+
+ /// Fetches next BAM record in the interval specified, storing in \p record
+ ///
+ /// \param[out] record
+ /// \returns true on success, false if no more data available.
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// Sets a new genomic interval of interest.
+ ///
+ /// \returns reference to this reader
+ ///
+ GenomicIntervalCompositeBamReader& Interval(const GenomicInterval& interval);
+
+ /// \returns the current specified interval
+ ///
+ const GenomicInterval& Interval(void) const;
+
+ /// \}
+
+private:
+ void UpdateSort(void);
+
+private:
+ GenomicInterval interval_;
+ std::deque<internal::CompositeMergeItem> mergeItems_;
+ std::vector<std::string> filenames_;
+};
+
+/// \brief Provides read access to multipe %BAM files, limiting results to those
+/// passing a PbiFilter.
+///
+/// Requires a ".pbi" file for each input %BAM file.
+///
+/// \note The template parameter OrderByType is not fully implemented at this
+/// time. Use of comparison functor (e.g. Compare::Zmw) for this will
+/// currently result in the proper "next" value <b> at each iteration
+/// step, independently, but not over the full data set. </b> If all
+/// files' "order-by" data values are accessible in increasing order
+/// within each file, then the expected ordering will be observed,
+/// However, if these data are not sorted within a file, the final results
+/// will appear unordered. \n
+/// \n
+/// Example:\n
+/// file 1: { 1, 5, 2, 6 } \n
+/// file 2: { 3, 8, 4, 7 } \n
+/// results: { 1, 3, 5, 2, 6, 8, 4, 7 } \n
+/// \n
+/// This a known issue and will be addressed in a future update. But in
+/// the meantime, use of Compare::None as the OrderByType is recommended,
+/// to explicitly indicate that no particular ordering is expected.
+///
+template<typename OrderByType>
+class PBBAM_EXPORT PbiFilterCompositeBamReader
+{
+public:
+ typedef internal::CompositeMergeItem value_type;
+ typedef internal::CompositeMergeItemSorter<OrderByType> merge_sorter_type;
+ typedef std::deque<value_type> container_type;
+ typedef typename container_type::iterator iterator;
+ typedef typename container_type::const_iterator const_iterator;
+
+public:
+ /// \name Contstructors & Related Methods
+ /// \{
+
+ PbiFilterCompositeBamReader(const PbiFilter& filter,
+ const std::vector<BamFile>& bamFiles);
+ PbiFilterCompositeBamReader(const PbiFilter& filter,
+ std::vector<BamFile>&& bamFiles);
+ PbiFilterCompositeBamReader(const PbiFilter& filter,
+ const DataSet& dataset);
+
+ /// \}
+
+public:
+ /// \name Data Access
+ /// \{
+
+ /// Fetches next BAM record in the interval specified.
+ ///
+ /// \returns true on success, false if no more data available.
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// Sets a new PBI filter
+ ///
+ /// \returns reference to this reader
+ ///
+ PbiFilterCompositeBamReader& Filter(const PbiFilter& filter);
+
+ /// \}
+
+private:
+ void UpdateSort(void);
+
+private:
+ container_type mergeQueue_;
+ std::vector<std::string> filenames_;
+};
+
+/// \brief The SequentialCompositeBamReader class provides read access to
+/// multiple %BAM files, reading through the entire contents of each
+/// file.
+///
+/// Input files will be accessed in the order provided to the constructor. Each
+/// file's contents will be exhausted before moving on to the next one (as
+/// opposed to a "round-robin" scheme).
+///
+class PBBAM_EXPORT SequentialCompositeBamReader
+{
+public:
+ /// \name Contstructors & Related Methods
+ /// \{
+
+ SequentialCompositeBamReader(const std::vector<BamFile>& bamFiles);
+ SequentialCompositeBamReader(std::vector<BamFile>&& bamFiles);
+ SequentialCompositeBamReader(const DataSet& dataset);
+
+ /// \}
+
+public:
+ /// \name Data Access
+ /// \{
+
+ /// Fetches next BAM record from the .
+ ///
+ /// \returns true on success, false if no more data available.
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// \}
+
+private:
+ std::deque<std::unique_ptr<BamReader> > readers_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/CompositeBamReader.inl"
+
+#endif // COMPOSITEBAMREADER_H
diff --git a/include/pbbam/Config.h b/include/pbbam/Config.h
index 3d2b5d7..4fbc417 100644
--- a/include/pbbam/Config.h
+++ b/include/pbbam/Config.h
@@ -32,21 +32,20 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Config.h
+/// \brief Defines library-wide macros & global variables.
+//
// Author: Derek Barnett
#ifndef PBBAM_CONFIG_H
#define PBBAM_CONFIG_H
-// --------------------------------
-// standard types
-// --------------------------------
-
#include <cstdint>
-// -------------------------------------
-// library symbol import/export macros
-// -------------------------------------
+/// \name Library Import/Export
+/// \{
#ifndef PBBAM_LIBRARY_EXPORT
# if defined(WIN32)
@@ -72,12 +71,14 @@
# endif
#endif
-// ----------------------------------------------------
-// setup the shared_ptr implementation we'll be using
-// ----------------------------------------------------
+/// \}
+
+/// \name Shared Pointer Settings
+/// \{
// uncomment this define, or pass via command-line (-DPBBAM_USE_BOOST_SHARED_PTR),
// to use boost::shared_ptr<T> instead of std::shared_ptr<T>
+//
//#define PBBAM_USE_BOOST_SHARED_PTR
#ifdef PBBAM_USE_BOOST_SHARED_PTR
@@ -88,38 +89,88 @@
# define PBBAM_SHARED_PTR std::shared_ptr
#endif
-// ----------------------------------------------------
-// htslib verbosity level
-// ----------------------------------------------------
+/// \}
-namespace PacBio {
-namespace BAM {
+/// \name Class Definition Helpers
+/// \{
-/// \brief Sets the desired verbosity level of htslib warnings.
+/// \brief Disables the use of copy constructors and assignment operators for a
+/// class.
///
-/// Change this value to allow debug/warning statements from htslib.
-/// The valid range seems to be [0-3], where 0->OFF, and 3->most verbose.
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+/// DISABLE_COPY(Foo);
+/// };
+/// \endcode
///
-extern int HtslibVerbosity;
-
-} // namespace BAM
-} // namespace PacBio
-
-// ----------------------------------------------------
-// additional helper macros
-// ----------------------------------------------------
-
#ifndef DISABLE_COPY
#define DISABLE_COPY(Class) \
Class(const Class&); \
Class& operator=(const Class&)
#endif
+/// \brief Disables the use of move constructors and assignment operators for a
+/// class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+/// DISABLE_MOVE(Foo);
+/// };
+/// \endcode
+///
+#ifndef DISABLE_MOVE
+#define DISABLE_MOVE(Class) \
+ Class(Class&&); \
+ Class& operator=(Class&&);
+#endif
+
+/// \brief Disables the use of copy & move constructors and assignment operators f
+/// or a class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+/// DISABLE_MOVE_AND_COPY(Foo);
+/// };
+/// \endcode
+///
#ifndef DISABLE_MOVE_AND_COPY
#define DISABLE_MOVE_AND_COPY(Class) \
- Class(Class&&); \
- Class& operator=(Class&&); \
+ DISABLE_MOVE(Class) \
DISABLE_COPY(Class)
#endif
+/// \}
+
+namespace PacBio {
+namespace BAM {
+
+/// \name Verbosity Settings
+/// \{
+
+/// \brief Sets the desired verbosity level of htslib warnings.
+///
+/// Change this value to allow debug/warning statements from htslib itself.
+/// The valid range seems to be [0-3], where 0 indicates OFF, and 3 is the
+/// most verbose.
+///
+/// By default, pbbam disables htslib statements to keep output channels clean.
+/// We rely on exceptions & their associated messages instead.
+///
+/// This global variable is obviously not thread-safe by any means. But as a
+/// debug flag, it is unlikely to cause any real issues. The worst case would be
+/// unexpected presence/absence of output statements.
+///
+extern int HtslibVerbosity;
+
+/// \}
+
+} // namespace BAM
+} // namespace PacBio
+
#endif // PBBAM_CONFIG_H
diff --git a/include/pbbam/DataSet.h b/include/pbbam/DataSet.h
index 21a6aa2..af1b14f 100644
--- a/include/pbbam/DataSet.h
+++ b/include/pbbam/DataSet.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSet.h
+/// \brief Defines the DataSet class.
+//
// Author: Derek Barnett
#ifndef DATASET_H
@@ -41,20 +45,31 @@
#include "pbbam/BamFile.h"
#include "pbbam/Config.h"
#include "pbbam/DataSetTypes.h"
+#include <chrono>
#include <memory>
+#include <set>
#include <string>
#include <vector>
namespace PacBio {
namespace BAM {
+/// \brief The DataSet class represents a %PacBio analyis dataset (e.g. from
+/// XML).
+///
+/// \nosubgrouping
+///
+/// It provides resource paths, filters, and metadata associated with a dataset
+/// under analysis.
+///
class PBBAM_EXPORT DataSet
{
public:
-
- /// \name DataSet Types
+ /// \name DataSet Type
/// \{
+ /// \brief This enum defines the currently-supported DataSet types.
+ ///
enum TypeEnum {
GENERIC = 0
, ALIGNMENT
@@ -67,9 +82,22 @@ public:
, SUBREAD
};
+ /// \brief Converts printable dataset type to type enum.
+ ///
+ /// \param[in] typeName printable dataset type
+ /// \returns dataset type enum
+ /// \throws std::runtime_error if \p typeName is unknown
+ ///
static DataSet::TypeEnum NameToType(const std::string& typeName);
+ /// \brief Converts dataset type enum to printable name.
+ ///
+ /// \param[in] type dataset type enum
+ /// \returns printable dataset type
+ /// \throws std::runtime_error if \p type is unknown
+ ///
static std::string TypeToName(const DataSet::TypeEnum& type);
+
/// \}
public:
@@ -77,17 +105,57 @@ public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Constructs an empty, generic DataSet.
+ ///
DataSet(void);
+
+ /// \brief Constructs an empty DataSet of the type specified.
+ ///
+ /// \param[in] type dataset type
+ /// \throws std::runtime_error if \p type is unknown
+ ///
DataSet(const DataSet::TypeEnum type);
+
+ /// \brief Constructs a DataSet from a %BAM file.
+ ///
+ /// This currently defaults to a SubreadSet, with an ExternalResource
+ /// pointing to BamFile::Filename.
+ ///
+ /// \param[in] bamFile BamFile object
+ ///
DataSet(const BamFile& bamFile);
+
+ /// \brief Loads a DataSet from a file.
+ ///
+ /// \p filename may be one of three types, indicated by its extension:\n
+ /// - %BAM ("*.bam") \n
+ /// - FOFN ("*.fofn") \n
+ /// - DataSetXML ("*.xml") \n
+ ///
+ /// \param[in] filename input filename
+ /// \throws std::runtime_error if \p filename has an unsupported extension,
+ /// or if a valid DataSet could not be created from its contents
+ ///
DataSet(const std::string& filename);
+
+ /// \brief Constructs a DataSet from a list of files.
+ ///
+ /// \param[in] filenames input filenames
+ /// \throws std::runtime_error if DataSet could not be created from
+ /// \p filenames
+ ///
+ DataSet(const std::vector<std::string>& filenames);
+
DataSet(const DataSet& other);
DataSet(DataSet&& other);
DataSet& operator=(const DataSet& other);
DataSet& operator=(DataSet&& other);
~DataSet(void);
- /// Creates a DataSet from "raw" XML data.
+ /// \brief Creates a DataSet from "raw" XML data.
+ ///
+ /// \param[in] xml DataSetXML text
+ ///
static DataSet FromXml(const std::string& xml);
/// \}
@@ -96,37 +164,149 @@ public:
/// \name Operators
/// \{
+ /// \brief Merges DataSet contents.
+ ///
+ /// Adds contents of \p other to this dataset object
+ ///
+ /// \param[in] other some other dataset to add to this one
+ /// \returns reference to this dataset object
+ ///
DataSet& operator+=(const DataSet& other);
/// \}
public:
+ /// \name Serialization
+ /// \{
+
+ /// \brief Saves dataset XML to file.
+ ///
+ /// \param[in] outputFilename destination for XML contents
+ ///
+ /// \throws std::runtime_error if file could be opened or if DataSet
+ /// elements could not be converted to XML
+ ///
void Save(const std::string& outputFilename);
+
+ /// \brief Saves dataset XML to output stream, e.g. std::cout,
+ /// std::stringstream.
+ ///
+ /// \param[out] out destination for XML contents
+ ///
+ /// \throws std::runtime_error if DataSet elements could not be converted to
+ /// XML
+ ///
void SaveToStream(std::ostream& out);
+ /// \}
+
public:
/// \name Attributes
/// \{
///
+ /// \brief Fetches the value of a DataSet root element's attribute.
+ ///
+ /// These are the attributes attached to the root dataset element: \n
+ /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+ ///
+ /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+ /// but additional attributes can be used as well via these generic
+ /// Attribute methods.
+ ///
+ /// \param[in] name root element's attribute name
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& Attribute(const std::string& name) const;
- std::string& Attribute(const std::string& name);
- DataSet& Attribute(const std::string& name, const std::string& value);
+ /// \brief Fetches the value of dataset's CreatedAt attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& CreatedAt(void) const;
- const PacBio::BAM::Extensions& Extensions(void) const;
+
+ /// \brief Fetches the value of dataset's Format attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& Format(void) const;
+
+ /// \brief Fetches the value of dataset's MetaType attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& MetaType(void) const;
+
+ /// \brief Fetches the value of dataset's ModifiedAt attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& ModifiedAt(void) const;
+
+ /// \brief Fetches the value of dataset's Name attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& Name(void) const;
+
+ /// \brief Fetches the value of dataset's ResourceId attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& ResourceId(void) const;
+
+ /// \brief Fetches the value of dataset's Tags attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& Tags(void) const;
+
+ /// \brief Fetches the value of dataset's TimeStampedName attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& TimeStampedName(void) const;
+
+ /// \brief Fetches the value of dataset's UniqueId attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& UniqueId(void) const;
+
+ /// \brief Fetches the value of dataset's Version attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
const std::string& Version(void) const;
+ /// \}
+
+public:
+ /// \name DataSet Type
+ /// \{
+
+ /// \brief Fetches the dataset's type.
+ ///
+ /// \returns dataset type enum
+ ///
PacBio::BAM::DataSet::TypeEnum Type(void) const;
+
+ /// \brief Fetches the dataset's type.
+ ///
+ /// \returns printable dataset type
+ ///
std::string TypeName(void) const;
/// \}
@@ -135,17 +315,101 @@ public:
/// \name Child Elements
/// \{
+ /// \brief Fetches the dataset's Extensions element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::Extensions& Extensions(void) const;
+
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// \returns const reference to child element
+ ///
const PacBio::BAM::Filters& Filters(void) const;
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// \returns const reference to child element
+ ///
const PacBio::BAM::DataSetMetadata& Metadata(void) const;
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// \returns const reference to child element
+ ///
const PacBio::BAM::SubDataSets& SubDataSets(void) const;
/// \}
public:
+ /// \name Resource Handling
+ /// \{
+
+ /// \brief Returns this dataset's primary %BAM resources, with relative
+ /// filepaths already resolved.
+ ///
+ /// Primary resources are those listed as top-level %ExternalResources, not
+ /// associated files (indices, references, scraps %BAMs, etc.).
+ ///
+ /// \returns vector of BamFiles
+ ///
+ /// \sa DataSet::ResolvedResourceIds
+ ///
+ std::vector<BamFile> BamFiles(void) const;
+
+ /// \brief Returns all primary external resource filepaths, with relative
+ /// paths resolved.
+ ///
+ /// Primary resources are those listed as top-level %ExternalResources, not
+ /// associated files (indices, references, scraps %BAMs, etc.).
+ ///
+ /// \sa ResolvePath
+ ///
+ /// \returns resourceIds
+ ///
+ std::vector<std::string> ResolvedResourceIds(void) const;
+
+ /// \brief Resolves a filepath (that may be relative to the dataset).
+ ///
+ /// A DataSet's resources may be described using absolute filepaths or with
+ /// relative paths. For absolute paths, nothing is changed from the input.
+ /// For relative paths, these are resolved using the DataSet's own path
+ /// as a starting point. A DataSet's own path will be one of:\n
+ /// 1 - the location of its XML or %BAM input file, e.g. created using
+ /// DataSet("foo.xml") or DataSet("foo.bam")\n
+ /// 2 - application's current working directory for all other DataSet
+ /// construction methods { DataSet(), DataSet(type),
+ /// DataSet("foo.fofn") }\n
+ ///
+ /// \param[in] originalPath input file path (absolute or relative)
+ /// \returns resolved path
+ ///
+ std::string ResolvePath(const std::string& originalPath) const;
+
+ /// \returns sequence chemistry info for all read groups in this dataset
+ ///
+ /// \sa ReadGroupInfo::SequencingChemistry
+ ///
+ std::set<std::string> SequencingChemistries(void) const;
+
+ /// \}
+
+public:
/// \name XML Namespace Handling
/// \{
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns const reference to dataset's NamespaceRegistry
+ ///
const NamespaceRegistry& Namespaces(void) const;
/// \}
@@ -154,30 +418,235 @@ public:
/// \name Attributes
/// \{
+ /// \brief Fetches the value of a DataSet root element's attribute.
+ ///
+ /// These are the attributes attached to the root dataset element: \n
+ /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+ ///
+ /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+ /// but additional attributes can be used as well via these generic methods.
+ ///
+ /// A new attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] name root element's attribute name
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& Attribute(const std::string& name);
+
+ /// \brief Fetches the value of dataset's CreatedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& CreatedAt(void);
- PacBio::BAM::Extensions& Extensions(void);
+
+ /// \brief Fetches the value of dataset's Format attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& Format(void);
+
+ /// \brief Fetches the value of dataset's MetaType attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& MetaType(void);
+
+ /// \brief Fetches the value of dataset's ModifiedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& ModifiedAt(void);
+
+ /// \brief Fetches the value of dataset's Name attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& Name(void);
+
+ /// \brief Fetches the value of dataset's ResourceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& ResourceId(void);
+
+ /// \brief Fetches the value of dataset's Tags attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& Tags(void);
+
+ /// \brief Fetches the value of dataset's TimeStampedName attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& TimeStampedName(void);
+
+ /// \brief Fetches the value of dataset's UniqueId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& UniqueId(void);
+
+ /// \brief Fetches the value of dataset's Version attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
std::string& Version(void);
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \brief Sets this dataset's XML attribute \p name, with \p value
+ ///
+ /// These are the attributes attached to the root dataset element: \n
+ /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+ ///
+ /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+ /// but additional attributes can be used as well via these generic methods.
+ ///
+ /// The attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] name root element's attribute name
+ /// \param[in] value new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Attribute(const std::string& name, const std::string& value);
+
+ /// \brief Sets this dataset's CreatedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] createdAt new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& CreatedAt(const std::string& createdAt);
- DataSet& Extensions(const PacBio::BAM::Extensions& extensions);
+
+ /// \brief Sets this dataset's Format attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] format new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& Format(const std::string& format);
+
+ /// \brief Sets this dataset's MetaType attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] metatype new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& MetaType(const std::string& metatype);
+
+ /// \brief Sets this dataset's ModifiedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] modifiedAt new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& ModifiedAt(const std::string& modifiedAt);
+
+ /// \brief Sets this dataset's Name attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] name new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& Name(const std::string& name);
+
+ /// \brief Sets this dataset's ResourceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] resourceId new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& ResourceId(const std::string& resourceId);
+
+ /// \brief Sets this dataset's Tags attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] tags new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& Tags(const std::string& tags);
+
+ /// \brief Sets this dataset's TimeStampedName attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] timeStampedName new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& TimeStampedName(const std::string& timeStampedName);
+
+ /// \brief Sets this dataset's UniqueId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] uuid new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& UniqueId(const std::string& uuid);
+
+ /// \brief Sets this dataset's Version attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] version new value for the attribute
+ /// \returns reference to this dataset object
+ ///
DataSet& Version(const std::string& version);
+ /// \}
+
+public:
+ /// \name DataSet Type
+ /// \{
+
+ /// \brief Edits dataset type.
+ ///
+ /// \param[in] type new dataset type
+ /// \returns reference to this dataset object
+ ///
DataSet& Type(const PacBio::BAM::DataSet::TypeEnum type);
/// \}
@@ -186,14 +655,95 @@ public:
/// \name Child Elements
/// \{
+ /// \brief Fetches the dataset's Extensions element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::Extensions& Extensions(void);
+
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::ExternalResources& ExternalResources(void);
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::Filters& Filters(void);
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::DataSetMetadata& Metadata(void);
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::SubDataSets& SubDataSets(void);
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Sets this dataset's Extensions element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] extensions new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Extensions(const PacBio::BAM::Extensions& extensions);
+
+ /// \brief Sets this dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] resources new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSet& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+ /// \brief Sets this dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] filters new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSet& Filters(const PacBio::BAM::Filters& filters);
+
+ /// \brief Sets this dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] metadata new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSet& Metadata(const PacBio::BAM::DataSetMetadata& metadata);
+
+ /// \brief Sets this dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] subdatasets new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSet& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
/// \}
@@ -202,14 +752,66 @@ public:
/// \name XML Namespace Handling
/// \{
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns non-const reference to dataset's NamespaceRegistry
+ ///
NamespaceRegistry& Namespaces(void);
/// \}
private:
std::unique_ptr<DataSetBase> d_;
+ std::string path_;
};
+/// \name DataSet Timestamp Utilities
+/// \{
+
+/// \brief Fetches current time, in "DataSetXML format".
+///
+/// \returns DataSetXML formatted timestamp
+///
+/// \sa ToDataSetFormat
+///
+PBBAM_EXPORT std::string CurrentTimestamp(void);
+
+/// \brief Converts a time_point to "DataSetXML-formatted" timestamp.
+///
+/// This is the format used as a component of the DataSet::TimeStampedName
+/// (yymmdd_HHmmssttt>.
+///
+/// \returns "DataSetXML-formatted" timestamp
+///
+PBBAM_EXPORT std::string ToDataSetFormat(const std::chrono::system_clock::time_point& tp);
+
+/// \brief Converts a time_t to "DataSetXML-formatted" timestamp.
+///
+/// This is the format used as a component of the DataSet::TimeStampedName
+/// (yymmdd_HHmmssttt>.
+///
+/// \returns "DataSetXML-formatted" timestamp
+///
+PBBAM_EXPORT std::string ToDataSetFormat(const time_t& tp);
+
+/// \brief Converts a time_point to ISO-8601 formatted timestamp.
+///
+/// This is the format used in DataSet::CreatedAt and DataSet::ModifiedAt.
+///
+/// \returns ISO-8601 formatted timestamp
+///
+PBBAM_EXPORT std::string ToIso8601(const std::chrono::system_clock::time_point& tp);
+
+/// \brief Converts a time_t to ISO-8601 formatted timestamp.
+///
+/// This is the format used in DataSet::CreatedAt and DataSet::ModifiedAt.
+///
+/// \returns ISO-8601 formatted timestamp
+///
+PBBAM_EXPORT std::string ToIso8601(const time_t& t);
+
+/// \}
+
} // namespace BAM
} // namespace PacBio
diff --git a/include/pbbam/DataSetTypes.h b/include/pbbam/DataSetTypes.h
index dd4c496..23df643 100644
--- a/include/pbbam/DataSetTypes.h
+++ b/include/pbbam/DataSetTypes.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetTypes.h
+/// \brief Defines the public DataSet component classes.
+//
// Author: Derek Barnett
#ifndef DATASETTYPES_H
@@ -47,263 +51,848 @@
namespace PacBio {
namespace BAM {
+/// \brief The DataSetMetadata class represents the %DataSetMetadata child
+/// element in DataSetXML.
+///
+/// A few top-level elements are built-in, but as pbbam is not primarily a
+/// DataSetXML API, most of the metadata hierarchy needs to be manually managed.
+///
class PBBAM_EXPORT DataSetMetadata : public internal::DataSetElement
{
public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs a DataSetMetadata with required fields.
DataSetMetadata(const std::string& numRecords,
const std::string& totalLength);
+ /// \}
+
public:
+ /// \name Operators
+ /// \{
+
+ /// \brief Merges DataSetMetadata contents.
+ ///
+ /// Adds contents of \p other to this metadata object
+ ///
+ /// \param[in] other some other metadata to add to this one
+ /// \returns reference to this object
+ ///
DataSetMetadata& operator+=(const DataSetMetadata& other);
+ /// \}
+
public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Fetches the text of the NumRecords element.
+ ///
+ /// \returns const reference to element text (empty string if not present)
+ ///
const std::string& NumRecords(void) const;
+
+ /// \brief Fetches the text of the TotalLength element.
+ ///
+ /// \returns const reference to element text (empty string if not present)
+ ///
const std::string& TotalLength(void) const;
+
+ /// \brief Fetches the Provenance element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
const PacBio::BAM::Provenance& Provenance(void) const;
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Fetches the text of the NumRecords element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to element text
+ ///
std::string& NumRecords(void);
+
+ /// \brief Fetches the text of the TotalLength element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to element text
+ ///
std::string& TotalLength(void);
+
+ /// \brief Fetches Provenance element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::Provenance& Provenance(void);
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Sets the text of the NumRecords element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns reference to this metadata object
+ ///
DataSetMetadata& NumRecords(const std::string& numRecords);
+
+ /// \brief Sets the text of the TotalLength element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns reference to this metadata object
+ ///
DataSetMetadata& TotalLength(const std::string& totalLength);
+
+ /// \brief Sets the Provenance child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns reference to this metadata object
+ ///
DataSetMetadata& Provenance(const PacBio::BAM::Provenance& provenance);
+
+ /// \}
};
+/// \brief The ExtensionElement class represents an %ExtensionElement element in
+/// DataSetXML.
+///
class PBBAM_EXPORT ExtensionElement : public internal::DataSetElement {
public:
ExtensionElement(void);
};
+/// \brief The Extensions class represents an %Extensions element in DataSetXML.
+///
+/// The Extensions element is essentially just a list of ExtensionElement
+/// objects.
+///
class PBBAM_EXPORT Extensions : public internal::DataSetListElement<ExtensionElement>
{
public:
+ /// \brief Creates an empty extensions list.
Extensions(void);
};
+class ExternalResources;
+
+/// \brief The ExternalResource class represents an %ExternalResource element in
+/// DataSetXML.
+///
+/// An ExternalResource can itself have a child element, ExternalResources, that
+/// lists related files (e.g. index files).
+///
class PBBAM_EXPORT ExternalResource : public internal::IndexedDataType
{
public:
- ExternalResource(void);
+ /// \brief Creates an ExternalResource from a BamFile object.
+ ///
+ /// The metatype & resourceId are automatically set.
+ ///
ExternalResource(const BamFile& bamFile);
+
+ /// \brief Creates an ExternalResource with provided \p metatype and
+ /// \p filename as resource ID.
+ ///
ExternalResource(const std::string& metatype,
const std::string& filename);
public:
+ /// \brief Fetches the resource's ExternalResources child element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+public:
+ /// \brief Fetches the resource's ExternalResources child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::ExternalResources& ExternalResources(void);
+
+ /// \brief Sets this resource's ExternalResources child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] resources new value for the element
+ /// \returns reference to this resource object
+ ///
+ ExternalResource& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+public:
+ /// \brief Converts an ExternalResource to a BamFile object
+ ///
+ /// \returns corresponding BamFile object for this ExternalResource
+ /// \throws std::runtime_error if fails to open %BAM file (e.g. does not
+ /// exist, not a %BAM file, etc.)
+ ///
+ /// \deprecated Use the results from DataSet::BamFiles instead. This method
+ /// cannot resolve relative filepaths and will be removed in the
+ /// near future.
+ ///
BamFile ToBamFile(void) const;
};
+/// \brief The ExternalResources class represents an %ExternalResources element
+/// in DataSetXML.
+///
+/// The ExternalResources element is essentially just a list of ExternalResource
+/// elements.
+///
class PBBAM_EXPORT ExternalResources : public internal::DataSetListElement<ExternalResource>
{
public:
+ /// \brief Creates an empty resource list.
ExternalResources(void);
+ /// \brief Merges \p other resource list with this one.
ExternalResources& operator+=(const ExternalResources& other);
public:
+ /// \brief Adds an ExternalResource to this list.
void Add(const ExternalResource& ext);
+
+ /// \brief Removes an ExternalResource from this list.
void Remove(const ExternalResource& ext);
public:
+ /// \brief Converts resource list to BamFile objects.
+ ///
+ /// \deprecated Use DataSet::BamFiles instead. This method cannot resolve
+ /// relative filepaths and will be removed in the near future.
+ ///
std::vector<BamFile> BamFiles(void) const;
};
+/// \brief The FileIndex class represents a %FileIndex element in DataSetXML.
+///
+/// A FileIndex is used as an auxiliary to an ExternalResource, providing
+/// information about a data file's index file (e.g. for %BAM files, *.bai or
+/// *.pbi).
+///
class PBBAM_EXPORT FileIndex : public internal::InputOutputDataType
{
public:
- FileIndex(void);
+ /// \brief Creates a FileIndex with provided \p metatype and \p filename as
+ /// resource ID.
+ ///
+ FileIndex(const std::string& metatype,
+ const std::string& filename);
};
+/// \brief The FileIndices class represents a %FileIndices element in DataSetXML.
+///
+/// The FileIndices element is essentially just a list of FileIndex elements,
+/// providing information about a data file's index files (e.g. for %BAM files
+/// this will usually be *.bai and/or *.pbi).
+///
class PBBAM_EXPORT FileIndices : public internal::DataSetListElement<FileIndex>
{
public:
+ /// \brief Creates an empty index list.
FileIndices(void);
+public:
+ /// \brief Adds a FileIndex to this list.
void Add(const FileIndex& index);
+
+ /// \brief Removes a FileIndex from this list.
void Remove(const FileIndex& index);
};
+/// \brief The Filter class represents a %Filter element in DataSetXML.
+///
+/// The Filter element allows analysis pipelines to describe filters on data
+/// that should be respected downstream, without needing to create filtered
+/// intermediate files.
+///
+/// A filter consists of a list of Property elements, each of which must be
+/// passed (logical AND) to pass the filter, e.g. property1 && property2 &&
+/// property3.
+///
class PBBAM_EXPORT Filter : public internal::DataSetElement
{
public:
+ /// \brief Creates an empty filter.
Filter(void);
public:
+ /// \brief Fetches the filter's property list element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
const PacBio::BAM::Properties& Properties(void) const;
+
+public:
+ /// \brief Fetches the filter's property list child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::Properties& Properties(void);
+
+ /// \brief Sets this filter's Properties child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] properties new value for the element
+ /// \returns reference to this filter object
+ ///
Filter& Properties(const PacBio::BAM::Properties& properties);
};
+/// \brief The Filters class represents a %Filters list element in DataSetXML.
+///
+/// The Filters element is essentially a list of Filter elements. For analysis
+/// purpose, each filter is considered separately (logical OR) to consider which
+/// data passes, e.g. filter1 || filter2 || filter3.
+///
class PBBAM_EXPORT Filters : public internal::DataSetListElement<Filter>
{
public:
+ /// \brief Creates an empty filter list.
Filters(void);
+ /// \brief Merges \p other filter list with this one.
Filters& operator+=(const Filters& other);
+public:
+ /// \brief Adds a filter to this list.
void Add(const Filter& filter);
+
+ /// \brief Removes a filter from this list.
void Remove(const Filter& filter);
};
+/// \brief The ParentTool class represents a %ParentTool element in DataSetXML.
+///
class PBBAM_EXPORT ParentTool : public internal::BaseEntityType {
public:
+ /// \brief Creates an empty %ParentTool element.
ParentTool(void);
};
+/// \brief The Property class represents a %Property element in DataSetXML.
+///
+/// A Property is the primary building block of %DataSetXML filtering. The
+/// %Property element describes a data record's property (or field), some value,
+/// and a comparison operator.
+///
+/// For example, one could filter all %BAM records with a read accuracy at or
+/// above 0.9. In C++ this could be constructed like:
+/// \code{.cpp}
+/// Property p("accuracy", "0.9", ">=");
+/// \endcode
+///
class PBBAM_EXPORT Property : public internal::DataSetElement
{
public:
+ /// \brief Constructs a filter property.
Property(const std::string& name,
const std::string& value,
const std::string& op);
public:
+
+ /// \brief Fetches the value of property's Name attribute.
+ ///
+ /// \returns const reference to attribute value
+ ///
const std::string& Name(void) const;
+
+ /// \brief Fetches the value of property's Operator attribute.
+ ///
+ /// \returns const reference to attribute value
+ ///
const std::string& Operator(void) const;
+
+ /// \brief Fetches the value of property's Value attribute.
+ ///
+ /// \returns const reference to attribute value
+ ///
const std::string& Value(void) const;
+public:
+
+ /// \brief Fetches the value of property's Name attribute.
+ ///
+ /// \returns non-const reference to attribute value
+ ///
std::string& Name(void);
+
+ /// \brief Fetches the value of property's Operator attribute.
+ ///
+ /// \returns non-const reference to attribute value
+ ///
std::string& Operator(void);
+
+ /// \brief Fetches the value of property's Value attribute.
+ ///
+ /// \returns nonconst reference to attribute value
+ ///
std::string& Value(void);
+public:
+ /// \brief Sets this property's Name attribute.
+ ///
+ /// \param[in] name new value for the attribute
+ /// \returns reference to this property object
+ ///
Property& Name(const std::string& name);
+
+ /// \brief Sets this property's Operator attribute.
+ ///
+ /// \param[in] op new value for the attribute
+ /// \returns reference to this property object
+ ///
Property& Operator(const std::string& op);
+
+ /// \brief Sets this property's Value attribute.
+ ///
+ /// \param[in] value new value for the attribute
+ /// \returns reference to this property object
+ ///
Property& Value(const std::string& value);
};
+/// \brief The Properties class represents a %Properties list element in
+/// DataSetXML.
+///
+/// The Properties element is essentially a list of Property elements.
+///
class PBBAM_EXPORT Properties : public internal::DataSetListElement<Property>
{
public:
+ /// \brief Creates an empty property list.
Properties(void);
+public:
+ /// \brief Adds a property to this list.
void Add(const Property& property);
+
+ /// \brief Removes a property from this list.
void Remove(const Property& property);
};
+/// \brief The Provenance class represents a %Provenance element in DataSetXML.
+///
class PBBAM_EXPORT Provenance : public internal::DataSetElement
{
public:
+ /// \brief Creates a empty provenance element.
Provenance(void);
public:
+ /// \brief Fetches the value of CreatedBy attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
const std::string& CreatedBy(void) const;
+
+ /// \brief Fetches the value of CommonServicesInstanceId attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
const std::string& CommonServicesInstanceId(void) const;
+
+ /// \brief Fetches the value of CreatorUserId attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
const std::string& CreatorUserId(void) const;
+
+ /// \brief Fetches the value of ParentJobId attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
const std::string& ParentJobId(void) const;
+
+ /// \brief Fetches the ParentTool child element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
const PacBio::BAM::ParentTool& ParentTool(void) const;
+public:
+
+ /// \brief Fetches the value of CreatedBy attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
std::string& CreatedBy(void);
+
+ /// \brief Fetches the value of CommonServicesInstanceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
std::string& CommonServicesInstanceId(void);
+
+ /// \brief Fetches the value of CreatorUserId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
std::string& CreatorUserId(void);
+
+ /// \brief Fetches the value of ParentJobId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
std::string& ParentJobId(void);
+
+ /// \brief Fetches the ParentTool element element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::ParentTool& ParentTool(void);
+public:
+
+ /// \brief Sets the CreatedBy attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] createdBy new value for the attribute
+ /// \returns reference to this object
+ ///
Provenance& CreatedBy(const std::string& createdBy);
+
+ /// \brief Sets the CommonServicesInstanceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] id new value for the attribute
+ /// \returns reference to this object
+ ///
Provenance& CommonServicesInstanceId(const std::string& id);
+
+ /// \brief Sets the CreatorUserId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] id new value for the attribute
+ /// \returns reference to this object
+ ///
Provenance& CreatorUserId(const std::string& id);
+
+ /// \brief Sets the ParentJobId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] id new value for the attribute
+ /// \returns reference to this object
+ ///
Provenance& ParentJobId(const std::string& id);
+
+ /// \brief Sets the ParentTool child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] tool new value for the element
+ /// \returns reference to this dataset object
+ ///
Provenance& ParentTool(const PacBio::BAM::ParentTool& tool);
};
class SubDataSets;
+/// \brief The DataSetBase class provides the attributes & child elements shared
+/// by all dataset types.
+///
+/// Client code should not need to use this class directly. It should be
+/// considered as more of an implementation detail and may in fact be removed
+/// from public API in the future. The top-level DataSet is the recommended
+/// entry point.
+///
class PBBAM_EXPORT DataSetBase : public internal::StrictEntityType
{
public:
+
+ /// \brief Creates a DataSetBase object, or one of its subclasses, from an
+ /// XML element name (e.g. SubreadSet)
+ ///
static std::shared_ptr<DataSetBase> Create(const std::string& typeName);
public:
+ /// \brief Creates an empty, generic DataSetBase.
DataSetBase(void);
protected:
- DataSetBase(const std::string& label, const XsdType& xsd);
+ /// \brief Creates a DataSetBase with key values initialized.
+ DataSetBase(const std::string& metatype,
+ const std::string& label,
+ const XsdType& xsd);
+
+ /// \brief Returns a new DataSetBase containing a deep copy of contents
DataSetBase* DeepCopy(void) const;
public:
+ /// \brief Merges dataset contents.
+ ///
+ /// Adds contents of \p other to this dataset object
+ ///
+ /// \param[in] other some other dataset to add to this one
+ /// \returns reference to this dataset object
+ ///
DataSetBase& operator+=(const DataSetBase& other);
public:
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// \returns const reference to child element
+ ///
const PacBio::BAM::Filters& Filters(void) const;
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// \returns const reference to child element
+ ///
const PacBio::BAM::DataSetMetadata& Metadata(void) const;
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// \returns const reference to child element
+ ///
const PacBio::BAM::SubDataSets& SubDataSets(void) const;
+public:
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns const reference to dataset's NamespaceRegistry
+ ///
+ const NamespaceRegistry& Namespaces(void) const;
+
+public:
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::ExternalResources& ExternalResources(void);
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::Filters& Filters(void);
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::DataSetMetadata& Metadata(void);
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
PacBio::BAM::SubDataSets& SubDataSets(void);
+public:
+ /// \brief Sets this dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] resources new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSetBase& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+ /// \brief Sets this dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] filters new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSetBase& Filters(const PacBio::BAM::Filters& filters);
+
+ /// \brief Sets this dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] metadata new value for the element
+ /// \returns reference to this dataset object
+ ///
DataSetBase& Metadata(const PacBio::BAM::DataSetMetadata& metadata);
- DataSetBase& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
+ /// \brief Sets this dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] subdatasets new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSetBase& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
public:
- const NamespaceRegistry& Namespaces(void) const;
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns non-const reference to dataset's NamespaceRegistry
+ ///
NamespaceRegistry& Namespaces(void);
private:
NamespaceRegistry registry_;
};
+/// \brief The AlignmentSet class represents an %AlignmentSet root element in
+/// DataSetXML.
+///
class PBBAM_EXPORT AlignmentSet : public DataSetBase
{
public:
+ /// \brief Creates an empty AlignmentSet dataset.
AlignmentSet(void);
};
+/// \brief The BarcodeSet class represents a %BarcodeSet root element in
+/// DataSetXML.
+///
class PBBAM_EXPORT BarcodeSet : public DataSetBase
{
public:
+ /// \brief Creates an empty BarcodeSet dataset.
BarcodeSet(void);
};
+/// \brief The ConsensusAlignmentSet class represents a %ConsensusAlignmentSet
+/// root element in DataSetXML.
+///
class PBBAM_EXPORT ConsensusAlignmentSet : public DataSetBase
{
public:
+ /// \brief Creates an empty ConsensusAlignmentSet dataset.
ConsensusAlignmentSet(void);
};
+/// \brief The ConsensusReadSet class represents a %ConsensusReadSet root
+/// element in DataSetXML.
+///
class PBBAM_EXPORT ConsensusReadSet : public DataSetBase
{
public:
+ /// \brief Creates an empty ConsensusReadSet dataset.
ConsensusReadSet(void);
};
+/// \brief The ContigSet class represents a %ContigSet root element in
+/// DataSetXML.
+///
class PBBAM_EXPORT ContigSet : public DataSetBase
{
public:
+ /// \brief Creates an empty ContigSet dataset.
ContigSet(void);
};
+/// \brief The HdfSubreadSet class represents a %HdfSubreadSet root element in
+/// DataSetXML.
+///
class PBBAM_EXPORT HdfSubreadSet : public DataSetBase
{
public:
+ /// \brief Creates an empty HdfSubreadSet dataset.
HdfSubreadSet(void);
};
+/// \brief The ReferenceSet class represents a %ReferenceSet root element in
+/// DataSetXML.
+///
class PBBAM_EXPORT ReferenceSet : public DataSetBase
{
public:
+ /// \brief Creates an empty ReferenceSet dataset.
ReferenceSet(void);
};
+/// \brief The SubDataSets class represents a %DataSets list element in
+/// DataSetXML.
+///
+/// The SubDataSets element is essentially a list of DataSets.
+///
class PBBAM_EXPORT SubDataSets : public internal::DataSetListElement<DataSetBase>
{
public:
+ /// \brief Creates an empty list of sub-datasets.
SubDataSets(void);
+public:
+ /// \brief Adds \p other sub-dataset to this list.
SubDataSets& operator+=(const DataSetBase& other); // single
+
+ /// \brief Adds \p other sub-dataset list to this list.
SubDataSets& operator+=(const SubDataSets& other); // list
+public:
+ /// \brief Adds a sub-dataset to this list.
void Add(const DataSetBase& subdataset);
+
+ /// \brief Removes a sub-dataset from this list.
void Remove(const DataSetBase& subdataset);
};
+/// \brief The SubreadSet class represents a %SubreadSet root element in
+/// DataSetXML.
+///
class PBBAM_EXPORT SubreadSet : public DataSetBase
{
public:
+ /// \brief Creates an empty SubreadSet dataset.
SubreadSet(void);
};
diff --git a/include/pbbam/DataSetXsd.h b/include/pbbam/DataSetXsd.h
index 29df5e1..8d0ec38 100644
--- a/include/pbbam/DataSetXsd.h
+++ b/include/pbbam/DataSetXsd.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetXsd.h
+/// \brief Defines the XSD- and namespace-related classes for DataSetXML.
+//
// Author: Derek Barnett
#ifndef DATASETXSD_H
@@ -45,6 +49,8 @@
namespace PacBio {
namespace BAM {
+/// \brief The XsdType enum defines the supported XSD namespaces.
+///
enum class XsdType
{
NONE
@@ -65,15 +71,26 @@ enum class XsdType
, SEEDING_DATA
};
+/// \brief The NamespaceInfo class provides XML namespace info (prefix & URI).
+///
class PBBAM_EXPORT NamespaceInfo
{
public:
+ /// \brief Creates an empty entry.
+ ///
+ /// This constructor only exists for STL container compatibility.
+ ///
NamespaceInfo(void);
+
+ /// \brief Creates a valid info entry.
NamespaceInfo(const std::string& name,
const std::string& uri);
public:
+ /// \brief Fetches namespace name (i.e. prefix)
const std::string& Name(void) const { return name_; }
+
+ /// \brief Fetches namespace URI.
const std::string& Uri(void) const { return uri_; }
private:
@@ -81,25 +98,54 @@ private:
std::string uri_;
};
+/// \brief The NamespaceRegistry class provides a per-dataset registry of XML
+/// namespace information.
+///
+/// This is used to format XML output - properly prefixing element labels with
+/// namespace as appropriate.
+///
class PBBAM_EXPORT NamespaceRegistry
{
public:
+ /// \name Constructors & Related Methods
+ /// \{
+
NamespaceRegistry(void);
NamespaceRegistry(const NamespaceRegistry& other);
+ NamespaceRegistry(NamespaceRegistry&& other);
NamespaceRegistry& operator=(const NamespaceRegistry& other);
+ NamespaceRegistry& operator=(NamespaceRegistry&& other);
~NamespaceRegistry(void);
+ /// \}
+
public:
+ /// \name Registry Access
+ /// \{
+
+ /// \brief Fetches namespace info for the dataset's default XSD type.
const NamespaceInfo& DefaultNamespace(void) const;
+
+ /// \brief Fetches dataset's default XSD type.
XsdType DefaultXsd(void) const;
- const NamespaceInfo& Namespace(const XsdType& xsd) const;
- XsdType XsdForUri(const std::string& uri) const;
+ /// \brief Fetches namespace info for the requested XSD type.
+ const NamespaceInfo& Namespace(const XsdType& xsd) const;
-public:
+ /// \brief Registers namespace info for a particular XSD type.
void Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo);
+
+ /// \brief Updates dataset's default XSD type.
void SetDefaultXsd(const XsdType& xsd);
+ /// \brief Fetches the XSD type for \p elementLabel.
+ XsdType XsdForElement(const std::string& elementLabel) const;
+
+ /// \brief Fetches the XSD type for a particular URI.
+ XsdType XsdForUri(const std::string& uri) const;
+
+ /// \}
+
private:
std::map<XsdType, NamespaceInfo> data_;
XsdType defaultXsdType_;
diff --git a/include/pbbam/EntireFileQuery.h b/include/pbbam/EntireFileQuery.h
index cd5809e..10c06ff 100644
--- a/include/pbbam/EntireFileQuery.h
+++ b/include/pbbam/EntireFileQuery.h
@@ -32,26 +32,63 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file EntireFileQuery.h
+/// \brief Defines the EntireFileQuery class.
+//
// Author: Derek Barnett
#ifndef ENTIREFILEQUERY_H
#define ENTIREFILEQUERY_H
#include "pbbam/internal/QueryBase.h"
-#include <htslib/sam.h>
+#include <memory>
namespace PacBio {
namespace BAM {
-class BamFile;
-
+/// \brief The EntireFileQuery class provides iterable access to a DataSet's
+/// %BAM records, reading through the entire contents of each file.
+///
+/// Input files will be accessed in the order listed in the DataSet.
+///
+/// \include code/EntireFileQuery.txt
+///
+/// Iteration is not limited to only 'const' records. The files themselves will
+/// not be affected, but individual records may be modified if needed.
+///
+/// \include code/EntireFileQuery_NonConst.txt
+///
+/// \note DataSets can be implicitly constructed from %BAM filenames as well.
+/// Thus a single %BAM file can be read through using the following:
+///
+/// \include code/EntireFileQuery_BamFilename.txt
+///
class PBBAM_EXPORT EntireFileQuery : public internal::IQuery
{
public:
+ /// \brief Creates a new EntireFileQuery, reading through the entire
+ /// contents of a dataset.
+ ///
+ /// \param[in] dataset input data source(s)
+ /// \throws std::runtime_error on failure to open/read underlying %BAM
+ /// files.
+ ///
EntireFileQuery(const PacBio::BAM::DataSet& dataset);
-protected:
- FileIterPtr CreateIterator(const BamFile& bamFile);
+ ~EntireFileQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct EntireFileQueryPrivate;
+ std::unique_ptr<EntireFileQueryPrivate> d_;
};
} // namespace BAM
diff --git a/include/pbbam/Frames.h b/include/pbbam/Frames.h
index f11598c..326701b 100644
--- a/include/pbbam/Frames.h
+++ b/include/pbbam/Frames.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Frames.h
+/// \brief Defines the Frames class.
+//
// Author: Derek Barnett
#ifndef FRAMES_H
@@ -44,26 +48,36 @@
namespace PacBio {
namespace BAM {
+/// \brief The Frames class represents pulse frame data.
+///
+/// Frame data may be stored in either their raw, 16-bit values or
+/// using a lossy, 8-bit compression scheme.
+///
+/// This class is used to store the data and convert between the 2 storage types.
+///
class PBBAM_EXPORT Frames
{
public:
/// \name Conversion Methods
/// \{
- /// Constructs a Frames object from encoded (lossy, 8-bit data).
+ /// \brief Constructs a Frames object from encoded (lossy, 8-bit) data.
///
- /// \note This method should probably not be needed often by client code working with frame data.
- /// It exists primarily for (internal) parsing & interpretation of the BAM file contents. The
- /// method is available, though, should the conversion operation be needed.
+ /// \note This method should probably not be needed often by client code
+ /// working with frame data. It exists primarily for (internal)
+ /// parsing & interpretation of the %BAM file contents. The method is
+ /// available, though, should the conversion operation be needed.
///
- /// \param[in] codedData encoded data
+ /// \param[in] codedData encoded data
/// \returns Frames object
+ ///
static Frames Decode(const std::vector<uint8_t>& codedData);
- /// Encodes a container of (raw) frames values in our 8-bit encoding.
+ /// \brief Creates encoded, compressed frame data from raw input data.
+ ///
+ /// \param[in] frames raw frame data
+ /// \returns lossy, 8-bit encoded frame data
///
- /// \param[in] frames expanded frame data
- /// \returns lossy, 8-bit encoded frame codes
static std::vector<uint8_t> Encode(const std::vector<uint16_t>& frames);
/// \}
@@ -112,7 +126,7 @@ public:
/// \}
public:
- /// \name Iterators
+ /// \name STL Compatbility
/// \{
/// \returns A const_iterator to the beginning of the sequence.
@@ -133,6 +147,12 @@ public:
/// \returns An iterator to the element past the end of the sequence.
std::vector<uint16_t>::iterator end(void);
+ /// \returns The number of frame data points.
+ size_t size(void) const;
+
+ /// \returns True if the container is empty, false otherwise.
+ bool empty(void) const;
+
/// \}
public:
@@ -143,15 +163,14 @@ public:
///
/// \param[in] frames data in expanded (not encoded) form
/// \returns reference to this object
+ ///
Frames& Data(const std::vector<uint16_t>& frames);
/// Sets this record's data.
///
- /// This is an overloaded function, allowing move semantics
- /// (instead of copying the data).
- ///
/// \param[in] frames data in expanded (not encoded) form
/// \returns reference to this object
+ ///
Frames& Data(std::vector<uint16_t>&& frames);
/// \}
@@ -160,46 +179,9 @@ private:
std::vector<uint16_t> data_;
};
-inline const std::vector<uint16_t>& Frames::Data(void) const
-{ return data_; }
-
-inline std::vector<uint16_t>& Frames::DataRaw(void)
-{ return data_; }
-
-inline std::vector<uint8_t> Frames::Encode(void) const
-{ return Frames::Encode(data_); }
-
-inline Frames& Frames::Data(const std::vector<uint16_t>& frames)
-{ data_ = frames; return *this; }
-
-inline Frames& Frames::Data(std::vector<uint16_t>&& frames)
-{ data_ = std::move(frames); return *this; }
-
-inline std::vector<uint16_t>::const_iterator Frames::cbegin(void) const
-{ return data_.cbegin(); }
-
-inline std::vector<uint16_t>::const_iterator Frames::cend(void) const
-{ return data_.cend(); }
-
-inline std::vector<uint16_t>::const_iterator Frames::begin(void) const
-{ return data_.begin(); }
-
-inline std::vector<uint16_t>::const_iterator Frames::end(void) const
-{ return data_.end(); }
-
-inline std::vector<uint16_t>::iterator Frames::begin(void)
-{ return data_.begin(); }
-
-inline std::vector<uint16_t>::iterator Frames::end(void)
-{ return data_.end(); }
-
-inline bool Frames::operator==(const Frames& other) const
-{ return data_ == other.data_; }
-
-inline bool Frames::operator!=(const Frames& other) const
-{ return !(*this == other); }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/Frames.inl"
+
#endif // FRAMES_H
diff --git a/include/pbbam/GenomicInterval.h b/include/pbbam/GenomicInterval.h
index 12ebb9a..a7d4986 100644
--- a/include/pbbam/GenomicInterval.h
+++ b/include/pbbam/GenomicInterval.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicInterval.h
+/// \brief Defines the GenomicInterval class.
+//
// Author: Derek Barnett
#ifndef GENOMICINTERVAL_H
@@ -46,29 +50,33 @@
namespace PacBio {
namespace BAM {
-/// This class represents a genomic interval (reference name, and 0-based coordinates)
+/// \brief The GenomicInterval class represents a genomic interval (reference
+/// name and 0-based coordinates).
+///
class PBBAM_EXPORT GenomicInterval
{
public:
/// \name Constructors & Related Methods
/// \{
- /// Default constructor; yields an empty genomic interval
+ /// \brief Creates an empty genomic interval
GenomicInterval(void);
- /// Constructor for interval on sequence with \p name, using range: [\p start, \p stop)
+ /// \brief Creates a genomic interval on sequence with \p name, using range:
+ /// [\p start, \p stop)
GenomicInterval(const std::string& name,
const Position& start,
const Position& stop);
- /// Constructor for interval, using REGION string
+ /// \brief Creates a genomic interval, using REGION string
///
/// "<ref>:<start>-<stop>" ("chr8:200-600")
///
- /// \note The htslib/samtools REGION string expects start positions to be 1-based.
- /// However, throughout pbbam (including the rest of this class), we stick
- /// to 0-based start coordinates. Thus, while the syntax matches that of samtools,
- /// we are using a 0-based start coordinate here.
+ /// \note The htslib/samtools REGION string expects start positions to be
+ /// 1-based. However, throughout pbbam (including the rest of this
+ /// class), we stick to 0-based start coordinates. Thus, while the
+ /// syntax matches that of samtools, we are using a 0-based start
+ /// coordinate here.
///
GenomicInterval(const std::string& zeroBasedRegionString);
@@ -80,6 +88,42 @@ public:
/// \}
public:
+ /// \name Comparison Operators
+ /// \{
+
+ /// \returns true if same id & underlying interval
+ bool operator==(const GenomicInterval& other) const;
+
+ /// \returns true if either ids or underlying intervals differ
+ bool operator!=(const GenomicInterval& other) const;
+
+ /// \}
+
+public:
+ /// \name Interval Operations
+ /// \{
+
+ /// \returns true if same id and underlying Interval::CoveredBy() other.
+ bool CoveredBy(const GenomicInterval& other) const;
+
+ /// \returns true if same id and underlying Interval::Covers() other.
+ bool Covers(const GenomicInterval& other) const;
+
+ /// \returns true if same id and underlying Interval::Intersects() other.
+ bool Intersects(const GenomicInterval& other) const;
+
+ /// \returns true if underlying Interval::IsValid(), and id/endpoints are
+ /// non-negative.
+ ///
+ bool IsValid(void) const;
+
+ /// \returns length of underlying
+ size_t Length(void) const;
+
+ /// \}
+
+
+public:
/// \name Attributes
/// \{
@@ -105,110 +149,40 @@ public:
///
/// \param[in] name
/// \returns reference to this interval
+ ///
GenomicInterval& Name(const std::string& name);
/// Sets this underlying Interval
///
/// \param[in] interval
/// \returns reference to this interval
+ ///
GenomicInterval& Interval(const PacBio::BAM::Interval<Position>& interval);
/// Sets this interval's start coordinate.
///
/// \param[in] start
/// \returns reference to this interval
+ ///
GenomicInterval& Start(const Position start);
/// Sets this interval's stop coordinate.
///
/// \param[in] stop
/// \returns reference to this interval
+ ///
GenomicInterval& Stop(const Position stop);
/// \}
-public:
- /// \name Interval Operations
- /// \{
-
- /// \returns true if same id and underlying Interval::CoveredBy() other.
- bool CoveredBy(const GenomicInterval& other) const;
-
- /// \returns true if same id and underlying Interval::Covers() other.
- bool Covers(const GenomicInterval& other) const;
-
- /// \returns true if same id and underlying Interval::Intersects() other.
- bool Intersects(const GenomicInterval& other) const;
-
- /// \returns true if underlying Interval::IsValid(), and id/endpoints are non-negative.
- bool IsValid(void) const;
-
- /// \returns length of underlying
- size_t Length(void) const;
-
- /// \}
-
-public:
- /// \name Comparison Operators
- /// \{
-
- /// \returns true if same id & underlying interval
- bool operator==(const GenomicInterval& other) const;
-
- /// \returns true if either ids or underlying intervals differ
- bool operator!=(const GenomicInterval& other) const;
-
- /// \}
-
private:
std::string name_;
PacBio::BAM::Interval<Position> interval_;
};
-inline GenomicInterval::~GenomicInterval(void) { }
-
-inline std::string GenomicInterval::Name(void) const
-{ return name_; }
-
-inline GenomicInterval& GenomicInterval::Name(const std::string& name)
-{ name_ = name; return *this; }
-
-inline PacBio::BAM::Interval<Position> GenomicInterval::Interval(void) const
-{ return interval_; }
-
-inline GenomicInterval& GenomicInterval::Interval(const PacBio::BAM::Interval<Position>& interval)
-{ interval_ = interval; return *this; }
-
-inline bool GenomicInterval::IsValid(void) const
-{
- return !name_.empty() &&
- interval_.Start() >= 0 &&
- interval_.Stop() >= 0 &&
- interval_.IsValid();
-}
-
-inline size_t GenomicInterval::Length(void) const
-{ return interval_.Length(); }
-
-inline Position GenomicInterval::Start(void) const
-{ return interval_.Start(); }
-
-inline GenomicInterval& GenomicInterval::Start(const Position start)
-{ interval_.Start(start); return *this; }
-
-inline Position GenomicInterval::Stop(void) const
-{ return interval_.Stop(); }
-
-inline GenomicInterval& GenomicInterval::Stop(const Position stop)
-{ interval_.Stop(stop); return *this; }
-
-inline bool GenomicInterval::operator==(const GenomicInterval& other) const
-{ return name_ == other.name_ && interval_ == other.interval_; }
-
-inline bool GenomicInterval::operator!=(const GenomicInterval& other) const
-{ return !(*this == other); }
-
} // namespace BAM
} // namspace PacBio
+#include "pbbam/internal/GenomicInterval.inl"
+
#endif // GENOMICINTERVAL_H
diff --git a/include/pbbam/GenomicIntervalQuery.h b/include/pbbam/GenomicIntervalQuery.h
index c1e10f9..7df7721 100644
--- a/include/pbbam/GenomicIntervalQuery.h
+++ b/include/pbbam/GenomicIntervalQuery.h
@@ -32,40 +32,80 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicIntervalQuery.h
+/// \brief Defines the GenomicIntervalQuery class.
+//
// Author: Derek Barnett
#ifndef GENOMICINTERVALQUERY_H
#define GENOMICINTERVALQUERY_H
#include "pbbam/GenomicInterval.h"
-#include "pbbam/QueryBase.h"
#include "pbbam/internal/QueryBase.h"
-#include <string>
+#include <memory>
namespace PacBio {
namespace BAM {
-class BamFile;
-
+/// \brief The GenomicIntervalQuery class provides iterable access to a
+/// DataSet's %BAM records, limiting results to those overlapping a
+/// GenomicInterval.
+///
+/// Example:
+/// \include code/GenomicIntervalQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".bai" index file.
+/// Use BamFile::EnsureStandardIndexExists before creating the query if
+/// one may not be present.
+///
class PBBAM_EXPORT GenomicIntervalQuery : public internal::IQuery
{
public:
+
+ /// \brief Constructs a new GenomiIntervalQuery, limiting record results to
+ /// only those overalpping a GenomicInterval.
+ ///
+ /// \param[in] interval genomic interval of interest
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// BAI files.
+ ///
GenomicIntervalQuery(const GenomicInterval& interval,
- const DataSet& dataset);
+ const PacBio::BAM::DataSet& dataset);
+ ~GenomicIntervalQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
public:
+ /// \brief Sets a new genomic interval.
+ ///
+ /// This allows the same dataset/query to be re-used over multiple regions of
+ /// interest:
+ ///
+ /// \include code/GenomicIntervalQuery_Reuse.txt
+ ///
+ /// \param[in] interval new genomic interval
+ /// \returns reference to this query
+ ///
GenomicIntervalQuery& Interval(const GenomicInterval& interval);
- GenomicInterval Interval(void) const;
-protected:
- FileIterPtr CreateIterator(const BamFile& bamFile);
+ /// \returns Current genomic interval active on this query.
+ const GenomicInterval& Interval(void) const;
private:
- GenomicInterval interval_;
+ struct GenomicIntervalQueryPrivate;
+ std::unique_ptr<GenomicIntervalQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namspace PacBio
diff --git a/include/pbbam/GroupQuery.h b/include/pbbam/GroupQuery.h
deleted file mode 100644
index abc1f5c..0000000
--- a/include/pbbam/GroupQuery.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Yuan Li
-
-#ifndef _GROUP_QUERY_H_
-#define _GROUP_QUERY_H_
-#include "GroupQueryBase.h"
-#include <htslib/sam.h>
-#include <vector>
-
-namespace PacBio {
-namespace BAM {
-
-class PBBAM_EXPORT SequentialGroupQueryBase: public GroupQueryBase
-{
-public:
- SequentialGroupQueryBase(const BamFile & bamFile);
-
-protected:
- virtual bool InSameGroup(const BamRecord & record, const BamRecord & another) = 0;
- bool GetNext(std::vector<BamRecord> & records);
- PBBAM_SHARED_PTR<samFile> htsFile_;
- PBBAM_SHARED_PTR<bam_hdr_t> htsHeader_;
- BamRecord nextRecord_;
-};
-
-//class PBBAM_EXPORT ZmwQuery: public SequentialGroupQueryBase
-//{
-//public:
-// ZmwQuery(const BamFile & bamFile)
-// : SequentialGroupQueryBase(bamFile) { }
-
-//private:
-// bool InSameGroup(const BamRecord & record, const BamRecord & another) {
-// return (record.MovieName() == another.MovieName() &&
-// record.HoleNumber() == another.HoleNumber());
-// }
-//};
-
-class PBBAM_EXPORT QNameQuery: public SequentialGroupQueryBase
-{
-public:
- QNameQuery(const BamFile & bamFile)
- : SequentialGroupQueryBase(bamFile) { }
-
-private:
- bool InSameGroup(const BamRecord & record, const BamRecord & another) {
- return (record.Impl().Name() == another.Impl().Name());
- }
-};
-
-} // namespace BAM
-} // namespace PacBio
-
-#endif // _SEQUENTIAL_GROUP_QUERY_BASE_H_
diff --git a/include/pbbam/GroupQueryBase.h b/include/pbbam/GroupQueryBase.h
deleted file mode 100644
index 624bdb1..0000000
--- a/include/pbbam/GroupQueryBase.h
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Yuan Li
-// TODO: Up to Derek's decision. This class mostly references
-// QueryBase.h. We may make QueryBase a template class and make
-// GroupQueryBase a specialization of the template.
-
-#ifndef _GROUP_QUERY_BASE_H_
-#define _GROUP_QUERY_BASE_H_
-
-#include "pbbam/QueryBase.h"
-#include "pbbam/BamRecord.h"
-#include <memory>
-#include <vector>
-
-namespace PacBio {
-namespace BAM {
-
-class GroupQueryBase;
-
-class GroupQueryIterator
-{
-public:
- std::vector<BamRecord> & operator* (void);
- std::vector<BamRecord> * operator-> (void);
- GroupQueryIterator& operator++ (void);
- GroupQueryIterator operator++ (int);
-
- bool operator== (const GroupQueryIterator & other) const;
- bool operator!= (const GroupQueryIterator & other) const;
-
- GroupQueryIterator(void);
- GroupQueryIterator(GroupQueryBase & parent);
-
-private:
- GroupQueryBase * query_;
- std::vector<BamRecord> records_;
- friend class GroupQueryBase;
-};
-
-class GroupQueryConstIterator
-{
-public:
- const std::vector<BamRecord>& operator*(void) const;
- const std::vector<BamRecord>* operator->(void) const;
- GroupQueryConstIterator& operator++(void);
- GroupQueryConstIterator operator++(int);
- bool operator==(const GroupQueryConstIterator& other) const;
- bool operator!=(const GroupQueryConstIterator& other) const;
-
- GroupQueryConstIterator(void);
- GroupQueryConstIterator(const GroupQueryBase& parent);
-
-private:
- GroupQueryBase* query_;
- std::vector<BamRecord> records_;
- friend class GroupQueryBase;
-};
-
-
-class PBBAM_EXPORT GroupQueryBase
-{
-public:
- typedef GroupQueryIterator iterator;
-
-protected:
- BamFile file_;
-
-public:
- virtual ~GroupQueryBase(void);
-
-public:
- GroupQueryBase::iterator begin(void);
- GroupQueryBase::iterator end(void);
-
-protected:
- GroupQueryBase(const BamFile & file);
- virtual bool GetNext(std::vector<BamRecord>& records) = 0;
-
- friend class GroupQueryIterator;
- friend class GroupQueryConstIterator;
-};
-
-inline GroupQueryBase::iterator GroupQueryBase::begin(void)
-{ return GroupQueryBase::iterator(*this); }
-
-inline GroupQueryBase::iterator GroupQueryBase::end(void)
-{ return GroupQueryBase::iterator(); }
-
-
-inline GroupQueryBase::GroupQueryBase(const BamFile & file)
- : file_(file)
-{ }
-
-inline GroupQueryBase::~GroupQueryBase(void) { }
-
-// -------------------
-// GroupQueryIterator
-// -------------------
-
-inline GroupQueryIterator::GroupQueryIterator(void): query_(0) {}
-
-inline GroupQueryIterator::GroupQueryIterator(GroupQueryBase & parent)
- : query_(& parent)
- , records_()
-{
- if (!(query_->GetNext(records_)))
- query_ = 0;
-}
-
-inline std::vector<BamRecord>& GroupQueryIterator::operator* (void)
-{ return records_; }
-
-inline std::vector<BamRecord>* GroupQueryIterator::operator-> (void)
-{ return &(operator*()); }
-
-inline GroupQueryIterator& GroupQueryIterator::operator++ (void)
-{
- if (!(query_->GetNext(records_)))
- query_ = 0;
- return *this;
-}
-
-inline GroupQueryIterator GroupQueryIterator::operator++ (int)
-{
- GroupQueryIterator result(*this);
- ++(*this);
- return result;
-}
-
-inline bool GroupQueryIterator::operator==(const GroupQueryIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool GroupQueryIterator::operator!=(const GroupQueryIterator& other) const
-{ return !(*this == other); }
-
-
-// -------------------
-// GroupQueryConstIterator
-// -------------------
-
-inline const std::vector<BamRecord>& GroupQueryConstIterator::operator*(void) const
-{ return records_; }
-
-inline const std::vector<BamRecord>* GroupQueryConstIterator::operator->(void) const
-{ return &(operator*()); }
-
-inline GroupQueryConstIterator& GroupQueryConstIterator::operator++(void)
-{
- if (!(query_->GetNext(records_)))
- query_ = 0;
- return *this;
-}
-
-inline GroupQueryConstIterator GroupQueryConstIterator::operator++(int)
-{
- GroupQueryConstIterator result(*this);
- ++(*this);
- return result;
-}
-
-inline bool GroupQueryConstIterator::operator==(const GroupQueryConstIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool GroupQueryConstIterator::operator!=(const GroupQueryConstIterator& other) const
-{ return !(*this == other); }
-
-inline GroupQueryConstIterator::GroupQueryConstIterator(void): query_(0) { }
-
-inline GroupQueryConstIterator::GroupQueryConstIterator(const GroupQueryBase& parent)
- : query_(const_cast<GroupQueryBase*>(&parent))
- , records_()
-{
- if (!(query_->GetNext(records_)))
- query_ = 0;
-}
-
-} // namespace BAM
-} // namespace PacBio
-
-#endif // _GROUP_QUERY_BASE_H_
diff --git a/include/pbbam/IndexedFastaReader.h b/include/pbbam/IndexedFastaReader.h
index aa485db..b382d96 100644
--- a/include/pbbam/IndexedFastaReader.h
+++ b/include/pbbam/IndexedFastaReader.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file IndexedFastaReader.h
+/// \brief Defines the IndexedFastaReader class.
+//
// Author: David Alexander
#ifndef INDEXEDFASTAREADER_H
@@ -40,8 +44,7 @@
#include "pbbam/Orientation.h"
#include "pbbam/Position.h"
-#include "htslib/faidx.h"
-
+#include <htslib/faidx.h>
#include <string>
#include <iostream>
#include <stdexcept>
@@ -52,50 +55,106 @@ namespace BAM {
class GenomicInterval;
class BamRecord;
+/// \brief The IndexedFastaReader class provides random-access to FASTA file
+/// data.
+///
class IndexedFastaReader {
public:
- IndexedFastaReader() = delete;
+ /// \name Constructors & Related Methods
+ /// \{
+
+ IndexedFastaReader(void) = delete;
IndexedFastaReader(const std::string& filename);
- ~IndexedFastaReader();
+ IndexedFastaReader(const IndexedFastaReader& src);
+ IndexedFastaReader& operator=(const IndexedFastaReader& rhs);
+ ~IndexedFastaReader(void);
-public:
- // Copy constructor
- IndexedFastaReader(const IndexedFastaReader& src)
- {
- if (!Open(src.filename_))
- throw std::runtime_error("Cannot open file " + src.filename_);
- }
-
- // Copy assignment operator
- IndexedFastaReader& operator=(const IndexedFastaReader& rhs)
- {
- if(&rhs == this) return *this;
-
- Open(rhs.filename_);
- return *this;
- }
+ /// \}
public:
- std::string Subsequence(const std::string& id, Position begin, Position end) const;
+ /// name Sequence Access
+ /// \{
+
+ /// \brief Fetches FASTA sequence for desired interval.
+ ///
+ /// \param[in] id reference sequence name
+ /// \param[in] begin start position
+ /// \param[in] end end position
+ ///
+ /// \returns sequence string at desired interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
+ std::string Subsequence(const std::string& id,
+ Position begin,
+ Position end) const;
+
+ /// \brief Fetches FASTA sequence for desired interval.
+ ///
+ /// \param[in] interval desired interval
+ ///
+ /// \returns sequence string at desired interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
std::string Subsequence(const GenomicInterval& interval) const;
+
+ /// \brief Fetches FASTA sequence for desired interval.
+ ///
+ /// \param[in] htslibRegion htslib/samtools-formatted REGION string
+ /// representing the desired interval
+ ///
+ /// \returns sequence string at desired interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
std::string Subsequence(const char* htslibRegion) const;
-public:
- // \returns subsequence of the reference corresponding to the bamRecord,
- // oriented and gapped as requested. For example, "native" orientation
- // and "gapped" will return the reference sequence with gaps inserted, as
- // would align against the read in "native" orientation
+ /// \brief Fetches FASTA sequence corresponding to a BamRecord, oriented and
+ /// gapped as requested.
+ ///
+ /// For example, "native" orientation and "gapped" will return the reference
+ /// sequence with gaps inserted, as would align against the read in "native"
+ /// orientation.
+ ///
+ /// \param[in] bamRecord input BamRecord to derive interval/CIGAR
+ /// data
+ /// \param[in] orientation orientation of output
+ /// \param[in] gapped if true, gaps/padding will be inserted, per
+ /// record's CIGAR info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns sequence string over the record's interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
std::string ReferenceSubsequence(const BamRecord& bamRecord,
const Orientation orientation=Orientation::GENOMIC,
const bool gapped=false,
const bool exciseSoftClips=false) const;
+ /// \}
+
public:
- int NumSequences() const;
+ /// \name File Attributes
+ /// \{
+
+ /// \returns true if FASTA file contains a sequence matching \p name
bool HasSequence(const std::string& name) const;
+
+ /// \returns number of sequences stored in FASTA file
+ int NumSequences(void) const;
+
+ /// \returns length of FASTA sequence
+ ///
+ /// \throws std::runtime_error if length could not be determined
+ ///
int SequenceLength(const std::string& name) const;
+ /// \}
+
private:
std::string filename_;
faidx_t* handle_;
@@ -105,8 +164,7 @@ private:
bool Open(const std::string& filename);
};
+} // namespace BAM
+} // namespace PacBio
-
-} // PacBio
-} // BAM
#endif // INDEXEDFASTAREADER_H
diff --git a/include/pbbam/Interval.h b/include/pbbam/Interval.h
index 6c2e91a..3f5a40e 100644
--- a/include/pbbam/Interval.h
+++ b/include/pbbam/Interval.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Interval.h
+/// \brief Defines the Interval class.
+//
// Author: Derek Barnett
#ifndef INTERVAL_H
@@ -48,10 +52,11 @@
namespace PacBio {
namespace BAM {
-/// \brief Utility class for working with half-open (right-open) intervals. [start, stop)
+/// \brief Represents a half-open (right-open) interval [start, stop)
///
/// \note This class is agnostic whether the values are 0-based or 1-based.
-/// \todo Should it be? Should that go here or "higher up"?
+/// Client code should primarily work with GenomicInterval, which does
+/// enforce this distinction.
///
template<typename T>
class Interval
@@ -60,21 +65,31 @@ public:
typedef boost::icl::discrete_interval<T> interval_type;
public:
-
- /// \name Constructors
+ /// \name Constructors & Related Methods
/// \{
- /** Default constructor; yields an empty interval [0,0) */
- inline Interval(void);
+ /// \brief Creates an empty interval [0,0)
+ Interval(void);
- /** Constructor for a singleton interval [val,val+1) */
- inline Interval(const T val);
+ /// \brief Creates a 'singleton' interval [val,val+1)
+ Interval(const T val);
- /** Constructor for interval from [start, stop) */
- inline Interval(const T start, const T stop);
+ /// brief Creates an interval from [start, stop) */
+ Interval(const T start, const T stop);
- /** Copy constructor */
- inline Interval(const Interval<T>& other);
+ Interval(const Interval<T>& other);
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ /// \returns true if both intervals share the same endpoints
+ bool operator==(const Interval<T>& other) const;
+
+ /// \returns true if either interval's endpoints differ
+ bool operator!=(const Interval<T>& other) const;
/// \}
@@ -82,53 +97,45 @@ public:
/// \name Attributes
/// \{
- /// \returns interval start coordinate
- inline T Start(void) const;
+ /// \returns interval's start coordinate
+ T Start(void) const;
/// Sets this interval's start coordinate.
///
/// \param[in] start
/// \returns reference to this interval
- inline Interval<T>& Start(const T& start);
+ ///
+ Interval<T>& Start(const T& start);
- /// \returns interval stop coordinate
- inline T Stop(void) const;
+ /// \returns interval's stop coordinate
+ T Stop(void) const;
/// Sets this interval's stop coordinate.
///
/// \param[in] stop
/// \returns reference to this interval
- inline Interval<T>& Stop(const T& stop);
+ ///
+ Interval<T>& Stop(const T& stop);
/// \}
+public:
/// \name Interval Operations
/// \returns true if this interval is fully covered by (or contained in) \p other
- inline bool CoveredBy(const Interval<T>& other) const;
+ bool CoveredBy(const Interval<T>& other) const;
//// \returns true if this interval covers (or contains) \p other
- inline bool Covers(const Interval<T>& other) const;
+ bool Covers(const Interval<T>& other) const;
/// \returns true if intervals interset
- inline bool Intersects(const Interval<T>& other) const;
+ bool Intersects(const Interval<T>& other) const;
/// \returns true if interval is valid (e.g. start < stop)
- inline bool IsValid(void) const;
+ bool IsValid(void) const;
/// \returns interval length
- inline size_t Length(void) const;
-
- /// \}
-
- /// \name Comparison Operators
- /// \{
-
- /// \returns true if both intervals share the same endpoints
- inline bool operator==(const Interval<T>& other) const;
-
- /// \returns true if either interval's endpoints differ
- inline bool operator!=(const Interval<T>& other) const;
+ size_t Length(void) const;
/// \}
@@ -136,77 +143,9 @@ private:
interval_type data_;
};
-template<typename T>
-Interval<T>::Interval(void)
- : data_(boost::icl::discrete_interval<T>::right_open(0,0))
-{ }
-
-template<typename T>
-Interval<T>::Interval(const T val)
- : data_(boost::icl::discrete_interval<T>::right_open(val,val+1))
-{ }
-
-template<typename T>
-Interval<T>::Interval(const T start, const T stop)
- : data_(boost::icl::discrete_interval<T>::right_open(start,stop))
-{ }
-
-template<typename T>
-Interval<T>::Interval(const Interval<T>& other)
- : data_(boost::icl::discrete_interval<T>::right_open(other.Start(), other.Stop()))
-{ }
-
-template<typename T>
-inline bool Interval<T>::operator==(const Interval<T>& other) const
-{ return data_ == other.data_; }
-
-template<typename T>
-inline bool Interval<T>::operator!=(const Interval<T>& other) const
-{ return !(data_ == other.data_); }
-
-template<typename T>
-inline bool Interval<T>::CoveredBy(const Interval<T>& other) const
-{ return boost::icl::within(data_, other.data_); }
-
-template<typename T>
-inline bool Interval<T>::Covers(const Interval<T>& other) const
-{ return boost::icl::contains(data_, other.data_); }
-
-template<typename T>
-inline bool Interval<T>::Intersects(const Interval<T>& other) const
-{ return boost::icl::intersects(data_, other.data_); }
-
-template<typename T>
-inline bool Interval<T>::IsValid(void) const
-{ return !boost::icl::is_empty(data_); }
-
-template<typename T>
-inline size_t Interval<T>::Length(void) const
-{ return boost::icl::length(data_); }
-
-template<typename T>
-inline T Interval<T>::Start(void) const
-{ return data_.lower(); }
-
-template<typename T>
-inline Interval<T>& Interval<T>::Start(const T& start)
-{
- data_ = boost::icl::discrete_interval<T>::right_open(start, data_.upper());
- return *this;
-}
-
-template<typename T>
-inline T Interval<T>::Stop(void) const
-{ return data_.upper(); }
-
-template<typename T>
-inline Interval<T>& Interval<T>::Stop(const T& stop)
-{
- data_ = boost::icl::discrete_interval<T>::right_open(data_.lower(), stop);
- return *this;
-}
-
} // namespace BAM
} // namspace PacBio
+#include "pbbam/internal/Interval.inl"
+
#endif // GENOMICINTERVAL_H
diff --git a/include/pbbam/LocalContextFlags.h b/include/pbbam/LocalContextFlags.h
index 53e8c9e..0c59707 100644
--- a/include/pbbam/LocalContextFlags.h
+++ b/include/pbbam/LocalContextFlags.h
@@ -33,6 +33,10 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file LocalContextFlags.h
+/// \brief Defines the LocalContextFlags enum & helper method(s).
+//
// Author: Lance Hepler
#ifndef LOCALCONTEXTFLAGS_H
@@ -43,17 +47,24 @@
namespace PacBio {
namespace BAM {
+/// \brief The LocalContextFlags enum defines the flags that can be used
+/// to describe a subread's "local context", i.e. whether it is
+/// flanked by barcodes/adapters or its pass orientation.
+///
enum LocalContextFlags : uint8_t
{
- NO_LOCAL_CONTEXT = 0,
- ADAPTER_BEFORE = 1,
- ADAPTER_AFTER = 2,
- BARCODE_BEFORE = 4,
- BARCODE_AFTER = 8,
- FORWARD_PASS = 16,
- REVERSE_PASS = 32
+ NO_LOCAL_CONTEXT = 0, ///< No context information available
+ ADAPTER_BEFORE = 1, ///< Adapter precedes subread
+ ADAPTER_AFTER = 2, ///< Adapter follows subread
+ BARCODE_BEFORE = 4, ///< Barcode precedes subread
+ BARCODE_AFTER = 8, ///< Barcode follows subread
+ FORWARD_PASS = 16, ///< Subread's orientation is 'forward pass'
+ REVERSE_PASS = 32 ///< Subread's orientation is 'reverse pass'
};
+
+/// \returns a LocalContextFlags value containing the result of the bitwise-OR
+/// operation of \p lhs and \p rhs.
// constexpr is implicitly inline
constexpr LocalContextFlags operator|(const LocalContextFlags lhs, const LocalContextFlags rhs)
{
diff --git a/include/pbbam/Orientation.h b/include/pbbam/Orientation.h
index 7582199..c354822 100644
--- a/include/pbbam/Orientation.h
+++ b/include/pbbam/Orientation.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Orientation.h
+/// \brief Defines the Orientation enum.
+//
// Author: Derek Barnett
#ifndef ORIENTATION_H
@@ -43,10 +47,20 @@
namespace PacBio {
namespace BAM {
+/// \brief This enum defines the orientations recognized by BamRecord, for
+/// presenting "per-base" data.
+///
+/// Orientation::NATIVE indicates that data should be presented in the subread's
+/// original form.
+///
+/// Orientation::GENOMIC indicates that data should be presented relative to
+/// genomic forward strand. This means that data will be reversed (or
+/// reverse-complemented) if the subread was aligned to the reverse strand.
+///
enum class Orientation
{
- NATIVE
- , GENOMIC
+ NATIVE ///< Present data in 'raw' original orientation, regardless of aligned Strand
+ , GENOMIC ///< Present data in aligned orientation, always relative to Strand::FORWARD.
};
} // namespace BAM
diff --git a/include/pbbam/PbiBasicTypes.h b/include/pbbam/PbiBasicTypes.h
new file mode 100644
index 0000000..4006ed4
--- /dev/null
+++ b/include/pbbam/PbiBasicTypes.h
@@ -0,0 +1,108 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiBasicTypes.h
+/// \brief Defines the basic data structures used in PBI lookups.
+//
+// Author: Derek Barnett
+
+#ifndef PBIBASICTYPES_H
+#define PBIBASICTYPES_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/Config.h"
+#include <deque>
+#include <utility>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The IndexResultBlock class represents a contiguous group of records
+/// returned from a PBI lookup.
+///
+/// Contiguous reads that satisfy a PBI lookup query will be merged down into a
+/// single block. This helps to minimize the number of seeks in subsequent read
+/// operations.
+///
+/// An PBI-enabled reader or query can iterate over a list of IndexResultBlocks;
+/// for each block, seeking to the first record and then sequentially reading
+/// 'numReads' consecutive records before needing to seek again.
+///
+struct PBBAM_EXPORT IndexResultBlock
+{
+public:
+ IndexResultBlock(void);
+ IndexResultBlock(size_t idx, size_t numReads);
+
+public:
+ bool operator==(const IndexResultBlock& other) const;
+ bool operator!=(const IndexResultBlock& other) const;
+
+public:
+ size_t firstIndex_; ///< index of block's first record in BAM/PBI files (e.g. i-th record)
+ size_t numReads_; ///< number of reads in this block
+ int64_t virtualOffset_; ///< virtual offset of first record in this block
+};
+
+/// \brief container of PBI result blocks
+///
+typedef std::deque<IndexResultBlock> IndexResultBlocks;
+
+/// \brief container of raw PBI indices
+///
+/// This is the primary result of PbiFilter -associated classes. This raw list
+/// can participate in set operations (union, intersect) for compound filters,
+/// and then be merged down into IndexResultBlocks for actual data file
+/// random-access.
+///
+typedef std::vector<size_t> IndexList;
+
+/// \brief pair representing a range of PBI indices: where interval
+/// is [first, second)
+///
+/// Used primarily by the PBI's CoordinateSortedData components.
+///
+/// \sa PbiReferenceEntry, PbiRawReferenceData, & ReferenceLookupData
+///
+typedef std::pair<size_t, size_t> IndexRange;
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiBasicTypes.inl"
+
+#endif // PBIBASICTYPES_H
diff --git a/include/pbbam/PbiBuilder.h b/include/pbbam/PbiBuilder.h
index 6e99302..d1d83bc 100644
--- a/include/pbbam/PbiBuilder.h
+++ b/include/pbbam/PbiBuilder.h
@@ -33,6 +33,10 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiBuilder.h
+/// \brief Defines the PbiBuilder class.
+//
// Author: Derek Barnett
#ifndef PBIBUILDER_H
@@ -50,31 +54,117 @@ class PbiRawData;
namespace internal { class PbiBuilderPrivate; }
-/// This class may be used to construct PBI index data while a BAM file is being
-/// written, rather than waiting to process it at the end.
+/// \brief The PbiBuilder class construct PBI index data from %BAM record data.
+///
+/// Records are added one-by-one. This allows for either whole-file indexing of
+/// existing %BAM files or for indexing "on-the-fly" alongside a %BAM file as it
+/// is generated.
+///
+/// For simple PBI creation from existing %BAM files, see PbiFile::CreateFrom.
+/// This is the recommended approach, unless finer control or additional
+/// processing is needed.
///
class PBBAM_EXPORT PbiBuilder
{
public:
+ /// \brief This enum allows you to control the compression level of the
+ /// output PBI file.
+ ///
+ /// Values are equivalent to zlib compression levels. See its documentation
+ /// for more details: http://www.zlib.net/manual.html
+ ///
+ enum CompressionLevel
+ {
+ CompressionLevel_0 = 0
+ , CompressionLevel_1 = 1
+ , CompressionLevel_2 = 2
+ , CompressionLevel_3 = 3
+ , CompressionLevel_4 = 4
+ , CompressionLevel_5 = 5
+ , CompressionLevel_6 = 6
+ , CompressionLevel_7 = 7
+ , CompressionLevel_8 = 8
+ , CompressionLevel_9 = 9
+
+ , DefaultCompression = -1
+ , NoCompression = CompressionLevel_0
+ , FastCompression = CompressionLevel_1
+ , BestCompression = CompressionLevel_9
+ };
+
+public:
/// \name Constructors & Related Methods
/// \{
- /// Initialize builder to write data to \p pbiFilename.
+ /// \brief Initializes builder to write data to \p pbiFilename.
+ ///
+ /// \param[in] pbiFilename output filename
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, PbiBuilder will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
+ ///
+ /// \throws std::runtime_error if PBI file cannot be opened for writing
+ ///
+ PbiBuilder(const std::string& pbiFilename,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
+
+ /// \brief Initializes builder to write data to \p pbiFilename.
+ ///
+ /// Reference data-tracking structures will be initialized to expect
+ /// \p numReferenceSequences. (This is useful so that we can mark any
+ /// references that lack observed data appropriately).
+ ///
+ /// \param[in] pbiFilename output filename
+ /// \param[in] numReferenceSequences number of possible reference
+ /// sequences, e.g. BamHeader::NumSequences
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, PbiBuilder will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
///
/// \throws std::runtime_error if PBI file cannot be opened for writing
///
- PbiBuilder(const std::string& pbiFilename);
+ PbiBuilder(const std::string& pbiFilename,
+ const size_t numReferenceSequences,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
- /// Initialize builder to write data to \p pbiFilename. Reference data-tracking
- /// structures will be initialized to expect \p numReferenceSequences. (This is
- /// useful so that we can mark any references that lack observed data appropriately).
+ /// \brief Initializes builder to write data to \p pbiFilename.
+ ///
+ /// Reference data-tracking structures will be initialized to expect
+ /// \p numReferenceSequences, but only if \p isCoordinateSorted is true.
+ ///
+ /// \param[in] pbiFilename output filename
+ /// \param[in] numReferenceSequences number of possible reference
+ /// sequences, e.g. BamHeader::NumSequences
+ /// \param[in] isCoordinateSorted if false, disables reference
+ /// sequence tracking
+ /// (BamHeader::SortOrder != "coordinate")
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, PbiBuilder will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
///
/// \throws std::runtime_error if PBI file cannot be opened for writing
///
- PbiBuilder(const std::string& pbiFilename, const size_t numReferenceSequences);
+ PbiBuilder(const std::string& pbiFilename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
- /// On destruction, data summaries are calculated, raw data is written to file, and
- /// file handle closed.
+ /// \brief Destroys builder, writing its data out to PBI file.
+ ///
+ /// On destruction, data summaries are calculated, raw data is written to
+ /// file, and file handle closed.
///
~PbiBuilder(void);
@@ -84,28 +174,27 @@ public:
/// \name Index Building
/// \{
- /// Adds \p record's data to underlying raw data structure. \p vOffset is the BGZF
- /// virtual offset into the BAM file where the record begins.
+ /// \brief Adds \p record's data to underlying raw data structure.
+ ///
+ /// \note \p vOffset is a BGZF \b virtual offset into the %BAM file. To get
+ /// this value, you should use one of the following: \n
+ /// - while reading existing %BAM: BamReader::VirtualTell \n
+ /// - while writing new %BAM: BamWriter::Write(const BamRecord& record, int64_t* vOffset) \n
+ ///
+ ///
+ /// To build a PBI index while generating a %BAM file:
+ /// \include code/PbiBuilder_WithWriter.txt
///
- /// \sa BamWriter::Write(const BamRecord& record, int64_t* vOffset) for the easiest
- /// way to retrieve this information while generating a BAM file. See example below:
+ /// To build a PBI index from an existing %BAM file:
+ /// \include code/PbiBuilder_WithReader.txt
///
- /// \code{.cpp}
- /// BamWriter writer(...);
- /// PbiBuilder pbiBuilder(...);
- /// int64_t vOffset;
- /// while (...) {
- /// BamRecord record;
- /// // ... generate record data ...
- /// writer.Write(record, &vOffset);
- /// pbiBuilder.AddRecord(record, &vOffset);
- /// }
- /// \endcode
+ /// \param[in] record input BamRecord to pull index data from
+ /// \param[in] vOffset \b virtual offset into %BAM file where record begins
///
void AddRecord(const BamRecord& record, const int64_t vOffset);
- /// \returns const reference to current raw index data. Mostly only used for testing;
- /// shouldn't be needed by most client code.
+ /// \returns const reference to current raw index data. Mostly only used for
+ /// testing; shouldn't be needed by most client code.
///
const PbiRawData& Index(void) const;
diff --git a/include/pbbam/PbiFile.h b/include/pbbam/PbiFile.h
index 81c3de3..89bffa3 100644
--- a/include/pbbam/PbiFile.h
+++ b/include/pbbam/PbiFile.h
@@ -33,12 +33,17 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiFile.h
+/// \brief Defines the PbiFile enums, typedefs, and methods.
+//
// Author: Derek Barnett
#ifndef PBIFILE_H
#define PBIFILE_H
#include "pbbam/Config.h"
+#include "pbbam/PbiBuilder.h"
#include <string>
namespace PacBio {
@@ -46,40 +51,43 @@ namespace BAM {
class BamFile;
-// class-like namespace
namespace PbiFile
{
-
- /// PBI File Sections
- ///
- /// See (spec/doc links) for more details.
+ /// \brief This enum describes the PBI file sections
///
enum Section
{
- SUBREAD = 0x0000 ///< SubreadData (required)
+ BASIC = 0x0000 ///< BasicData (required)
, MAPPED = 0x0001 ///< MappedData (always optional)
, REFERENCE = 0x0002 ///< ReferenceData (always optional)
, BARCODE = 0x0004 ///< BarcodeData (always optional)
- , ALL = SUBREAD | MAPPED | REFERENCE | BARCODE ///< synonym for building
+ , ALL = BASIC | MAPPED | REFERENCE | BARCODE ///< Synonym for 'all sections'
};
+
+ /// \brief Helper typedef for storing multiple Section flags.
+ ///
typedef uint16_t Sections;
- /// PBI File Version
+ /// \brief This enum describes the PBI file version.
enum VersionEnum
{
- Version_3_0_0 = 0x030000
+ Version_3_0_0 = 0x030000 ///< v3.0.0
+ , Version_3_0_1 = 0x030001 ///< v3.0.1
- , CurrentVersion = Version_3_0_0
+ , CurrentVersion = Version_3_0_1 ///< Synonym for the current PBI version.
};
- /// Builds PBI index data from the supplied ".bam" file and writes a ".pbi" file.
+ /// \brief Builds PBI index data from the supplied %BAM file and writes a
+ /// ".pbi" file.
///
- /// \param[in] bamFile The source BamFile.
+ /// \param[in] bamFile source %BAM file
///
- /// \throws std::exception if index file could not be created
+ /// \throws std::runtime_error if index file could not be created
///
- PBBAM_EXPORT void CreateFrom(const BamFile& bamFile);
+ PBBAM_EXPORT void CreateFrom(const BamFile& bamFile,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
} // namespace PbiFile
} // namespace BAM
diff --git a/include/pbbam/PbiFilter.h b/include/pbbam/PbiFilter.h
new file mode 100644
index 0000000..65ef7ef
--- /dev/null
+++ b/include/pbbam/PbiFilter.h
@@ -0,0 +1,343 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.h
+/// \brief Defines the PbiFilter class & helper 'concept'.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTER_H
+#define PBIFILTER_H
+
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiBasicTypes.h"
+#include "pbbam/PbiIndex.h"
+#include <boost/concept_check.hpp>
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct PbiFilterPrivate; }
+
+/// \brief The PbiFilterConcept class provides compile-time enforcement of the
+/// required interface for PbiFilter's child filters.
+///
+template<typename T>
+struct PbiFilterConcept
+{
+ BOOST_CONCEPT_USAGE(PbiFilterConcept)
+ {
+ // All PBI filters (built-in or client-define) need only provide this
+ // interface:
+ //
+ // bool Accepts(const PbiRawData& index, const size_t row) const;
+ //
+ const PbiRawData index;
+ bool result = filter.Accepts(index, 0);
+ (void)result;
+ }
+
+private:
+ T filter;
+// PbiRawData index;
+};
+
+/// \brief The PbiFilter class provides a mechanism for performing PBI-enabled
+/// lookups.
+///
+/// The PbiFilter API is designed to be flexible, both built-in and for
+/// client-side customization. Built-in filters are provided for common queries,
+/// and client code can define and use custom filters as well. More complex
+/// filtering rules can be constructed via composition of simpler child filters.
+///
+/// Filter objects used as children of PbiFilter need only provide a method that
+/// matches this signature:
+///
+/// \include code/PbiFilter_Interface.txt
+///
+/// This requirement is enforced internally, using the PbiFilterConcept to
+/// require a compatible interface without requiring inheritance. This approach
+/// allows composition of heterogeneous filter types without worrying about a
+/// class hierarchy, pointer ownership across library/client boundaries, etc.
+///
+/// Thus a client application can define a custom filter if the built-in filters
+/// do not quite meet requirements. This filter may then be used in further
+/// PbiFilter composition, or directly to PbiFilterQuery
+///
+/// \include code/PbiFilter_CustomFilter.txt
+///
+/// As mentioned above, complex filters can be built up using multiple "child"
+/// filters. These complex filters are constructed by using either
+/// PbiFilter::Union (logical-OR over all direct children) or
+/// PbiFilter::Intersection (logical-AND over direct children).
+///
+/// \include code/PbiFilter_Composition.txt
+///
+class PBBAM_EXPORT PbiFilter
+{
+public:
+ enum CompositionType
+ {
+ INTERSECT
+ , UNION
+ };
+
+public:
+ /// \name Set Operations
+ /// \{
+
+ /// \brief Creates a PbiFilter that acts as intersection of the input
+ /// filters.
+ ///
+ /// A record must satisfy \b all of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Intersection_Copy.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Intersection(const std::vector<PbiFilter>& filters);
+
+ /// \brief Creates a PbiFilter that acts as an intersection of the input
+ /// filters.
+ ///
+ /// A record must satisfy \b all of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Intersection_Move.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Intersection(std::vector<PbiFilter>&& filters);
+
+ /// \brief Creates a PbiFilter that acts as a union of the input filters.
+ ///
+ /// A record must satisfy \b any of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Union_Copy.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Union(const std::vector<PbiFilter>& filters);
+
+ /// \brief Creates a PbiFilter that acts as a union of the input filters.
+ ///
+ /// A record must satisfy \b any of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Union_Move.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Union(std::vector<PbiFilter>&& filters);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a PbiFilter from a %DataSet's described filters.
+ ///
+ /// A DataSet may contain a Filters element, itself a list of Filter
+ /// elements. Each Filter element will contain a Properties element, itself
+ /// a list of Property elements.
+ ///
+ /// The Filters hierarchy looks like this (in its XML output):
+ /// \verbinclude examples/plaintext/PbiFilter_DataSetXmlFilters.txt
+ ///
+ /// The resulting PbiFilter represents a union over all Filter elements,
+ /// with each Filter element requiring an intersection of all of its
+ /// Property criteria. These Property elements are mapped to built-in PBI
+ /// filter types. To use the labels in the example XML above, the filter
+ /// created here is equivalent to:
+ ///
+ /// (A && B) || (C && D)
+ ///
+ /// If a DataSet lacks any Filters, then an empty PbiFilter will be created
+ /// - corresponding to the dataset's entire contents.
+ ///
+ /// \param[in] dataset maybe containing filters
+ /// \returns composite filter
+ ///
+ static PbiFilter FromDataSet(const DataSet& dataset);
+
+public:
+
+ /// \brief Creates an empty filter.
+ ///
+ /// \note An empty filter will result in all records being returned, e.g.
+ /// for query iteration.
+ ///
+ /// \param[in] type composition type. Any additional child filters added to
+ /// this composite will be treated according to this type.
+ /// If INTERSECT, a record must match all child filters. If
+ /// UNION, a record must match any child filter.
+ ///
+ PbiFilter(const CompositionType type = INTERSECT);
+
+ /// \brief Creates a composite filter (of INTERSECT type) with an initial
+ /// child filter.
+ ///
+ /// \note T must satisfy PbiFilterConcept
+ ///
+ /// \param[in] filter initial child filter
+ ///
+ template<typename T>
+ PbiFilter(const T& filter);
+
+ /// \brief Creates a composite filter (of INTERSECT type) with an initial
+ /// child filter.
+ ///
+ /// \note T must satisfy PbiFilterConcept
+ ///
+ /// \param[in] filter initial child filter
+ ///
+ template<typename T>
+ PbiFilter(T&& filter);
+
+ /// \brief Creates a composite filter (of INTERSECT type) with a list of
+ /// initial child filters.
+ ///
+ /// \param[in] filters initial child filters
+ ///
+ PbiFilter(const std::vector<PbiFilter>& filters);
+
+ /// \brief Creates composite filter (of INTERSECT type) with a list of
+ /// initial child filters.
+ ///
+ /// \param[in] filters initial child filters
+ ///
+ PbiFilter(std::vector<PbiFilter>&& filters);
+
+ PbiFilter(const PbiFilter& other);
+ PbiFilter(PbiFilter&& other) noexcept;
+ PbiFilter& operator=(const PbiFilter& other);
+ PbiFilter& operator=(PbiFilter&& other) noexcept;
+ ~PbiFilter(void);
+
+ /// \}
+
+public:
+ /// \name Composition
+ /// \{
+
+ /// \brief Adds a new child filter of type T.
+ ///
+ /// \param[in] filter additional child filter. Type T must satisfy
+ /// PbiFilterConcept.
+ /// \returns reference to this filter
+ ///
+ template<typename T>
+ PbiFilter& Add(const T& filter);
+
+ /// \brief Adds a new child filter of type T.
+ ///
+ /// \param[in] filter additional child filter. Type T must satisfy
+ /// PbiFilterConcept.
+ /// \returns reference to this filter
+ ///
+ template<typename T>
+ PbiFilter& Add(T&& filter);
+
+ /// \brief Adds a new child filter.
+ ///
+ /// \param[in] filter additional child filter
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(const PbiFilter& filter);
+
+ /// \brief Adds a new child filter.
+ ///
+ /// \param[in] filter additional child filter
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(PbiFilter&& filter);
+
+ /// \brief Add child filters.
+ ///
+ /// \param[in] filters additional child filters
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(const std::vector<PbiFilter>& filters);
+
+ /// \brief Add child filters.
+ ///
+ /// \param[in] filters additional child filters
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(std::vector<PbiFilter>&& filters);
+
+ /// \returns true if this filter has no child filters.
+ bool IsEmpty(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup
+ /// \{
+
+ /// \brief Performs the PBI index lookup, combining child results a
+ /// composite filter.
+ ///
+ /// \param[in] idx PBI (raw) index object
+ /// \param[in] row record number in %BAM/PBI files
+ ///
+ /// \returns true if record at \p row passes this filter criteria,
+ /// including children (if any)
+ ///
+ bool Accepts(const BAM::PbiRawData& idx, const size_t row) const;
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::PbiFilterPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiFilter.inl"
+#include "pbbam/PbiFilterTypes.h"
+
+#endif // PBIFILTER_H
diff --git a/include/pbbam/ZmwGroupQuery.h b/include/pbbam/PbiFilterQuery.h
similarity index 58%
copy from include/pbbam/ZmwGroupQuery.h
copy to include/pbbam/PbiFilterQuery.h
index 8b88113..120a30d 100644
--- a/include/pbbam/ZmwGroupQuery.h
+++ b/include/pbbam/PbiFilterQuery.h
@@ -32,36 +32,65 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file PbiFilterQuery.h
+/// \brief Defines the PbiFilterQuery class.
+//
// Author: Derek Barnett
-#ifndef ZMWGROUPQUERY_H
-#define ZMWGROUPQUERY_H
+#ifndef PBIFILTERQUERY_H
+#define PBIFILTERQUERY_H
#include "pbbam/Config.h"
+#include "pbbam/PbiFilter.h"
#include "pbbam/internal/QueryBase.h"
#include <vector>
namespace PacBio {
namespace BAM {
-//namespace staging {
-class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
+/// \brief The PbiFilter class provides iterable access to a DataSet's %BAM
+/// records, limiting results to those matching filter criteria.
+///
+/// Example:
+/// \include code/PbiFilterQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT PbiFilterQuery : public internal::IQuery
{
public:
- ZmwGroupQuery(const DataSet& dataset);
- ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
- const DataSet& dataset);
+ /// \brief Creates a new PbiFilterQuery, limiting record results to only
+ /// those matching filter criteria
+ ///
+ /// \param[in] filter filtering criteria
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// PBI files.
+ ///
+ PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset);
+
+ ~PbiFilterQuery(void);
+
+public:
-protected:
- FileIterPtr CreateIterator(const BamFile& file);
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
private:
- std::vector<int> whitelist_;
+ struct PbiFilterQueryPrivate;
+ std::unique_ptr<PbiFilterQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namespace PacBio
-#endif // ZMWGROUPQUERY_H
+#endif // PBIFILTERQUERY_H
diff --git a/include/pbbam/PbiFilterTypes.h b/include/pbbam/PbiFilterTypes.h
new file mode 100644
index 0000000..b6ae4ad
--- /dev/null
+++ b/include/pbbam/PbiFilterTypes.h
@@ -0,0 +1,1028 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.h
+/// \brief Defines the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTERTYPES_H
+#define PBIFILTERTYPES_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiIndex.h"
+#include <boost/optional.hpp>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+/// \internal
+///
+/// Provides basic container for value/compare-type pair
+///
+template<typename T>
+struct FilterBase
+{
+public:
+ T value_;
+ boost::optional<std::vector<T> > multiValue_;
+ Compare::Type cmp_;
+protected:
+ FilterBase(const T& value, const Compare::Type cmp);
+ FilterBase(T&& value, const Compare::Type cmp);
+ FilterBase(const std::vector<T>& values);
+ FilterBase(std::vector<T>&& values);
+protected:
+ bool CompareHelper(const T& lhs) const;
+private:
+ bool CompareSingleHelper(const T& lhs) const;
+ bool CompareMultiHelper(const T& lhs) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to BarcodeLookupData
+///
+template<typename T, BarcodeLookupData::Field field>
+struct BarcodeDataFilterBase : public FilterBase<T>
+{
+protected:
+ BarcodeDataFilterBase(const T& value, const Compare::Type cmp);
+ BarcodeDataFilterBase(T&& value, const Compare::Type cmp);
+ BarcodeDataFilterBase(const std::vector<T>& values);
+ BarcodeDataFilterBase(std::vector<T>&& values);
+public:
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to BasicLookupData
+///
+template<typename T, BasicLookupData::Field field>
+struct BasicDataFilterBase : public FilterBase<T>
+{
+protected:
+ BasicDataFilterBase(const T& value, const Compare::Type cmp);
+ BasicDataFilterBase(T&& value, const Compare::Type cmp);
+ BasicDataFilterBase(const std::vector<T>& values);
+ BasicDataFilterBase(std::vector<T>&& values);
+public:
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to MappedLookupData
+///
+template<typename T, MappedLookupData::Field field>
+struct MappedDataFilterBase : public FilterBase<T>
+{
+protected:
+ MappedDataFilterBase(const T& value, const Compare::Type cmp);
+ MappedDataFilterBase(T&& value, const Compare::Type cmp);
+ MappedDataFilterBase(const std::vector<T>& values);
+ MappedDataFilterBase(std::vector<T>&& values);
+public:
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+} // namespace internal
+
+/// \brief The PbiAlignedEndFilter class provides a PbiFilter-compatible filter
+/// on aligned end.
+///
+/// Example: \include code/PbiAlignedEndFilter.txt
+///
+/// \sa BamRecord::AlignedEnd
+///
+struct PbiAlignedEndFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_END>
+{
+public:
+ /// \brief Creates a filter on aligned end.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedEndFilter(const uint32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiAlignedLengthFilter class provides a PbiFilter-compatible
+/// filter on aligned length.
+///
+/// Example: \include code/PbiAlignedLengthFilter.txt
+///
+/// \sa BamRecord::AlignedEnd, BamRecord::AlignedStart
+///
+struct PbiAlignedLengthFilter : public internal::FilterBase<uint32_t>
+{
+public:
+ /// \brief Creates a filter on aligned length.
+ ///
+ /// \param[in] length value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedLengthFilter(const uint32_t length,
+ const Compare::Type cmp = Compare::EQUAL);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiAlignedStartFilter class provides a PbiFilter-compatible
+/// filter on aligned start.
+///
+/// Example: \include code/PbiAlignedStartFilter.txt
+///
+/// \sa BamRecord::AlignedStart
+///
+struct PbiAlignedStartFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_START>
+{
+public:
+ /// \brief Creates a filter on aligned start.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedStartFilter(const uint32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiAlignedStrandFilter class provides a PbiFilter-compatible
+/// filter on aligned strand.
+///
+/// Example: \include code/PbiAlignedStrandFilter.txt
+///
+/// \sa BamRecord::AlignedStrand
+///
+struct PbiAlignedStrandFilter
+ : public internal::MappedDataFilterBase<Strand, MappedLookupData::STRAND>
+{
+public:
+ /// \brief Creates a strand filter.
+ ///
+ /// \param[in] strand strand value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedStrandFilter(const Strand strand,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiBarcodeFilter class provides a PbiFilter-compatible filter on
+/// barcode ID.
+///
+/// Any record with this barcode ID (forward or reverse) will pass this filter.
+///
+/// Example: \include code/PbiBarcodeFilter.txt
+///
+/// \sa BamRecord::BarcodeForward, BamRecord::BarcodeReverse
+///
+struct PbiBarcodeFilter
+{
+public:
+ /// \brief Creates a single-value barcode filter.
+ ///
+ /// \param[in] barcode barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeFilter(const uint16_t barcode,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in either bc_forward or bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeFilter(const std::vector<uint16_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in either bc_forward or bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeFilter(std::vector<uint16_t>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiBarcodeForwardFilter class provides a PbiFilter-compatible
+/// filter on forward barcode ID.
+///
+/// Example: \include code/PbiBarcodeForwardFilter.txt
+///
+/// \sa BamRecord::BarcodeForward
+///
+struct PbiBarcodeForwardFilter
+ : public internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>
+{
+public:
+ /// \brief Creates a single-value forward barcode filter.
+ ///
+ /// \param[in] bcFwdId (forward) barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeForwardFilter(const uint16_t bcFwdId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' forward barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_forward.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeForwardFilter(const std::vector<uint16_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' forward barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_forward.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeForwardFilter(std::vector<uint16_t>&& whitelist);
+};
+
+/// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible
+/// filter on barcode quality.
+///
+/// Example: \include code/PbiBarcodeQualityFilter.txt
+///
+/// \sa BamRecord::BarcodeQuality
+///
+struct PbiBarcodeQualityFilter
+ : public internal::BarcodeDataFilterBase<uint8_t, BarcodeLookupData::BC_QUALITY>
+{
+public:
+ /// \brief Creates a single-value barcode quality filter.
+ ///
+ /// \param[in] bcQuality barcode quality to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeQualityFilter(const uint8_t bcQuality,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiBarcodeReverseFilter class provides a PbiFilter-compatible
+/// filter on forward barcode ID.
+///
+/// Example: \include code/PbiBarcodeReverseFilter.txt
+///
+/// \sa BamRecord::BarcodeReverse
+///
+struct PbiBarcodeReverseFilter
+ : public internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>
+{
+public:
+ /// \brief Creates a single-value reverse barcode filter.
+ ///
+ /// \param[in] bcRevId (reverse) barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeReverseFilter(const uint16_t bcRevId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' reverse barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeReverseFilter(const std::vector<uint16_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' reverse barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeReverseFilter(std::vector<uint16_t>&& whitelist);
+};
+
+/// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on
+/// both forward & reverse barcode IDs.
+///
+/// A record must match both IDs to pass the filter.
+///
+/// Example: \include code/PbiBarcodesFilter.txt
+///
+/// \sa BamRecord::Barcodes
+///
+struct PbiBarcodesFilter
+{
+public:
+ /// \brief Creates a barcodes filter from a std::pair of IDs.
+ ///
+ /// pair.first -> BarcodeForward\n
+ /// pair.second -> BarcodeReverse
+ ///
+ /// \param[in] barcodes barcode IDs to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodesFilter(const std::pair<uint16_t, uint16_t> barcodes,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a barcodes filter from forward & reverse IDs.
+ ///
+ /// \param[in] bcForward forward barcode ID to compare on
+ /// \param[in] bcReverse reverse barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodesFilter(const uint16_t bcForward,
+ const uint16_t bcReverse,
+ const Compare::Type cmp = Compare::EQUAL);
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiIdentityFilter class provides a PbiFilter-compatible filter on
+/// read identity (% aligned match).
+///
+/// Read identity is equivalent to: 1.0 - (nMM + nDel + nIns)/readLength.
+///
+/// Example: \include code/PbiIdentityFilter.txt
+///
+struct PbiIdentityFilter : public internal::FilterBase<float>
+{
+public:
+ /// \brief Creates a read identity filter.
+ ///
+ /// \param[in] identity value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiIdentityFilter(const float identity,
+ const Compare::Type cmp = Compare::EQUAL);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiLocalContextFilter class provides a PbiFilter-compatible
+/// filter on local context (adapter, barcode, etc.).
+///
+/// The primary Compare::Type operators intended for this filter are:
+/// Compare::EQUAL, Compare::NOT_EQUAL, Compare::CONTAINS, and
+/// Compare::NOT_CONTAINS.
+///
+/// Example: \include code/PbiLocalContextFilter.txt
+///
+struct PbiLocalContextFilter
+ : public internal::BasicDataFilterBase<LocalContextFlags,
+ BasicLookupData::CONTEXT_FLAG >
+{
+public:
+ PbiLocalContextFilter(const LocalContextFlags& flags,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiMapQualityFilter class provides a PbiFilter-compatible filter on
+/// mapping quality.
+///
+/// Example: \include code/PbiMapQualityFilter.txt
+///
+/// \sa BamRecord::MapQuality
+///
+struct PbiMapQualityFilter
+ : public internal::MappedDataFilterBase<uint8_t, MappedLookupData::MAP_QUALITY>
+{
+public:
+ /// \brief Creates a map quality filter.
+ ///
+ /// \param[in] mapQual value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiMapQualityFilter(const uint8_t mapQual,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiMovieNameFilter class provides a PbiFilter-compatible filter
+/// on movie name.
+///
+/// Example: \include code/PbiMovieNameFilter.txt
+///
+/// \sa BamRecord::MovieName
+///
+struct PbiMovieNameFilter
+{
+public:
+ /// \brief Creates a single-value movie name filter.
+ ///
+ /// \param[in] movieName movie name to compare on
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match movie name, exactly.
+ ///
+ PbiMovieNameFilter(const std::string& movieName);
+
+ /// \brief Creates a 'whitelisted' movie name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist movie names to compare on
+ ///
+ PbiMovieNameFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' movie name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist movie names to compare on
+ ///
+ PbiMovieNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiNumDeletedBasesFilter class provides a PbiFilter-compatible
+/// filter on the number of deleted bases.
+///
+/// Example: \include code/PbiNumDeletedBasesFilter.txt
+///
+/// \sa BamRecord::NumDeletedBases
+///
+struct PbiNumDeletedBasesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_DEL>
+{
+public:
+ /// \brief Creates a filter on the number of deleted bases.
+ ///
+ /// \param[in] numDeletions value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumDeletedBasesFilter(const size_t numDeletions,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumInsertededBasesFilter class provides a PbiFilter-compatible
+/// filter on the number of inserted bases.
+///
+/// Example: \include code/PbiNumInsertedBasesFilter.txt
+///
+/// \sa BamRecord::NumInsertedBases
+///
+struct PbiNumInsertedBasesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_INS>
+{
+public:
+ /// \brief Creates a filter on the number of inserted bases.
+ ///
+ /// \param[in] numInsertions value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumInsertedBasesFilter(const size_t numInsertions,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumMatchesFilter class provides a PbiFilter-compatible filter
+/// on the number of matched bases.
+///
+/// Example: \include code/PbiNumMatchesFilter.txt
+///
+/// \sa BamRecord::NumMatches
+///
+struct PbiNumMatchesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_M>
+{
+public:
+ /// \brief Creates a filter on the number of matched bases.
+ ///
+ /// \param[in] numMatchedBases value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumMatchesFilter(const size_t numMatchedBases,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumMismatchesFilter class provides a PbiFilter-compatible
+/// filter on the number of mismatched bases.
+///
+/// Example: \include code/PbiNumMismatchesFilter.txt
+///
+/// \sa BamRecord::NumMismatches
+///
+struct PbiNumMismatchesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_MM>
+{
+public:
+ /// \brief Creates a filter on the number of mismatched bases.
+ ///
+ /// \param[in] numMismatchedBases value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumMismatchesFilter(const size_t numMismatchedBases,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiQueryEndFilter class provides a PbiFilter-compatible filter
+/// on query end.
+///
+/// Example: \include code/PbiQueryEndFilter.txt
+///
+/// \sa BamRecord::QueryEnd
+///
+struct PbiQueryEndFilter
+ : public internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_END>
+{
+public:
+ /// \brief Creates a filter on query end position.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiQueryEndFilter(const int32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiQueryLengthFilter class provides a PbiFilter-compatible filter
+/// on query length.
+///
+/// queryLength = (queryEnd - queryStart)
+///
+/// Example: \include code/PbiQueryLengthFilter.txt
+///
+/// \sa BamRecord::QueryEnd, BamRecord::QueryStart
+///
+struct PbiQueryLengthFilter : public internal::FilterBase<int32_t>
+{
+public:
+ /// \brief Creates a filter on query length
+ ///
+ /// \param[in] length value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiQueryLengthFilter(const int32_t length,
+ const Compare::Type cmp = Compare::EQUAL);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiQueryNameFilter class provides a PbiFilter-compatible filter
+/// on name length.
+///
+/// Example: \include code/PbiQueryNameFilter.txt
+///
+/// \sa BamRecord::FullName
+///
+struct PbiQueryNameFilter
+{
+public:
+ /// \brief Creates a single-value query name filter.
+ ///
+ /// \param[in] qname query name to compare on
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match query name, exactly.
+ ///
+ PbiQueryNameFilter(const std::string& qname);
+
+ /// \brief Creates a 'whitelisted' query name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist query names to compare on
+ ///
+ PbiQueryNameFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' query name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist query names to compare on
+ ///
+ PbiQueryNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiQueryStartFilter class provides a PbiFilter-compatible filter
+/// on query start.
+///
+/// Example: \include code/PbiQueryStartFilter.txt
+///
+/// \sa BamRecord::QueryStart
+///
+struct PbiQueryStartFilter
+ : public internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_START>
+{
+public:
+ /// \brief Creates a filter on query start position.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiQueryStartFilter(const int32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReadAccuracyFilter class provides a PbiFilter-compatible filter
+/// on read accuracy.
+///
+/// Example: \include code/PbiReadAccuracyFilter.txt
+///
+/// \sa BamRecord::ReadAccuracy
+///
+struct PbiReadAccuracyFilter
+ : public internal::BasicDataFilterBase<Accuracy, BasicLookupData::READ_QUALITY>
+{
+public:
+ /// \brief Creates a filter on read accuracy.
+ ///
+ /// \param[in] accuracy value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReadAccuracyFilter(const Accuracy accuracy,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReadGroupFilter class provides a PbiFilter-compatible filter
+/// on read group.
+///
+/// Example: \include code/PbiReadGroupFilter.txt
+///
+/// \sa BamRecord::ReadGroup,
+/// BamRecord::ReadGroupId,
+/// BamRecord::ReadGroupNumericId
+///
+struct PbiReadGroupFilter
+ : public internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>
+{
+public:
+ /// \brief Creates a filter on read group (numeric) ID value
+ ///
+ /// \param[in] rgId numeric read group ID
+ /// \param[in] cmp compare type
+ ///
+ /// \sa BamRecord::ReadGroupNumericId
+ ///
+ PbiReadGroupFilter(const int32_t rgId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a filter on printable read group ID value
+ ///
+ /// \param[in] rgId read group ID string
+ /// \param[in] cmp compare type
+ ///
+ /// \sa BamRecord::ReadGroupId
+ ///
+ PbiReadGroupFilter(const std::string rgId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a filter on read group (object).
+ ///
+ /// \param[in] rg read group object
+ /// \param[in] cmp compare type
+ ///
+ /// \sa BamRecord::ReadGroup
+ ///
+ PbiReadGroupFilter(const ReadGroupInfo& rg,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' filter on read group numeric IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group IDs to compare on
+ ///
+ PbiReadGroupFilter(const std::vector<int32_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter on read group numeric IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group IDs to compare on
+ ///
+ PbiReadGroupFilter(std::vector<int32_t>&& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter on read group printable IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group ID strings to compare on
+ ///
+ PbiReadGroupFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter on read group printable IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group ID strings to compare on
+ ///
+ PbiReadGroupFilter(std::vector<std::string>&& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter using read group objects.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group objects to compare on
+ ///
+ PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter using read group objects.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group objects to compare on
+ ///
+ PbiReadGroupFilter(std::vector<ReadGroupInfo>&& whitelist);
+};
+
+/// \brief The PbiReferenceEndFilter class provides a PbiFilter-compatible
+/// filter on reference end.
+///
+/// Example: \include code/PbiReferenceEndFilter.txt
+///
+/// \sa BamRecord::ReferenceEnd
+///
+struct PbiReferenceEndFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_END>
+{
+public:
+ /// \brief Creates a filter on reference end.
+ ///
+ /// \param[in] tEnd value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceEndFilter(const uint32_t tEnd,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReferenceIdFilter class provides a PbiFilter-compatible
+/// filter on reference ID.
+///
+/// Example: \include code/PbiReferenceIdFilter.txt
+///
+/// \sa BamRecord::ReferenceId
+///
+struct PbiReferenceIdFilter
+ : public internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>
+{
+public:
+ /// \brief Creates a single-value reference ID filter.
+ ///
+ /// \param[in] tId reference ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceIdFilter(const int32_t tId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' reference ID filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference IDs to compare on
+ ///
+ PbiReferenceIdFilter(const std::vector<int32_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' reference ID filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference IDs to compare on
+ ///
+ PbiReferenceIdFilter(std::vector<int32_t>&& whitelist);
+};
+
+/// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible
+/// filter on reference name.
+///
+/// Example: \include code/PbiReferenceNameFilter.txt
+///
+/// \sa BamRecord::ReferenceName
+///
+struct PbiReferenceNameFilter
+{
+public:
+ /// \brief Creates a single-value reference name filter.
+ ///
+ /// \param[in] rname reference ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceNameFilter(const std::string& rname,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' reference name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference names to compare on
+ ///
+ PbiReferenceNameFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' reference name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference names to compare on
+ ///
+ PbiReferenceNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ mutable bool initialized_;
+ mutable PbiFilter subFilter_;
+ std::string rname_;
+ boost::optional<std::vector<std::string> > rnameWhitelist_;
+ Compare::Type cmp_;
+
+private:
+ // marked const so we can delay setup of filter in Accepts(), once we have
+ // access to PBI/BAM input. modified values marked mutable accordingly
+ void Initialize(const PbiRawData& idx) const;
+};
+
+/// \brief The PbiReferenceStartFilter class provides a PbiFilter-compatible
+/// filter on reference start.
+///
+/// Example: \include code/PbiReferenceStartFilter.txt
+///
+/// \sa BamRecord::ReferenceStart
+///
+struct PbiReferenceStartFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_START>
+{
+public:
+ /// \brief Creates a filter on reference start.
+ ///
+ /// \param[in] tStart value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceStartFilter(const uint32_t tStart,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiZmwFilter class provides a PbiFilter-compatible filter on
+/// ZMW hole number.
+///
+/// Example: \include code/PbiZmwFilter.txt
+///
+/// \sa BamRecord::HoleNumber
+///
+struct PbiZmwFilter : public internal::BasicDataFilterBase<int32_t,
+ BasicLookupData::ZMW>
+{
+public:
+ /// \brief Creates a single-value ZMW hole number filter.
+ ///
+ /// \param[in] zmw value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiZmwFilter(const int32_t zmw,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' ZMW hole number filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist ZMW hole numbers to compare on
+ ///
+ PbiZmwFilter(const std::vector<int32_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' ZMW hole number filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist ZMW hole numbers to compare on
+ ///
+ PbiZmwFilter(std::vector<int32_t>&& whitelist);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiFilterTypes.inl"
+
+#endif // PBIFILTERTYPES_H
diff --git a/include/pbbam/PbiIndex.h b/include/pbbam/PbiIndex.h
index 79aa880..09b61b8 100644
--- a/include/pbbam/PbiIndex.h
+++ b/include/pbbam/PbiIndex.h
@@ -33,131 +33,51 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiIndex.h
+/// \brief Defines the PbiIndex class.
+//
// Author: Derek Barnett
#ifndef PBIINDEX_H
#define PBIINDEX_H
#include "pbbam/Config.h"
-#include "pbbam/LocalContextFlags.h"
#include "pbbam/PbiFile.h"
-#include "pbbam/Strand.h"
-#include <deque>
+#include "pbbam/PbiLookupData.h"
#include <memory>
#include <string>
-#include <vector>
namespace PacBio {
namespace BAM {
namespace internal { class PbiIndexPrivate; }
-enum class SubreadField
-{
- RG_ID
- , Q_START
- , Q_END
- , ZMW
- , READ_QUALITY
- , VIRTUAL_OFFSET
-};
-
-enum class MappedField
-{
- T_ID
- , T_START
- , T_END
- , A_START
- , A_END
- , N_M
- , N_MM
- , N_INS
- , N_DEL
- , MAP_QUALITY
- , STRAND
-};
-
-enum class BarcodeField
-{
- BC_LEFT
- , BC_RIGHT
- , BC_QUALITY
- , CONTEXT_FLAG
-};
-
-enum class CompareType
-{
- EQUAL
- , LESS_THAN
- , LESS_THAN_EQUAL
- , GREATER_THAN
- , GREATER_THAN_EQUAL
- , NOT_EQUAL
-};
-
-//
-// Contiguous reads that satisfy a query will be returned as a block.
-// This is to help minimize number of seeks (or even unneccesary checks).
-//
-// An index query can iterate over the lookup result 'IndexResultBlocks' list to
-// perform a seek and fetch 'numReads' consecutive records before needing to
-// seek again.
-//
-struct PBBAM_EXPORT IndexResultBlock
-{
-public:
- IndexResultBlock(void);
- IndexResultBlock(size_t idx, size_t numReads);
-
-public:
- bool operator==(const IndexResultBlock& other) const;
- bool operator!=(const IndexResultBlock& other) const;
-
-public:
- size_t firstIndex_;
- size_t numReads_;
- int64_t virtualOffset_;
-};
-
-typedef std::deque<IndexResultBlock> IndexResultBlocks;
-
-typedef std::vector<size_t> IndexList;
-typedef std::pair<size_t, size_t> IndexRange;
-
-template<typename FieldType, typename ValueType>
-struct IndexRequestBase
-{
-public:
- FieldType field_;
- ValueType value_;
- CompareType compareType_;
-
-protected:
- IndexRequestBase(const FieldType field,
- const ValueType& value,
- const CompareType compareType = CompareType::EQUAL);
-};
-
-// all multi-requests use CompareType::EQUAL
-template<typename FieldType, typename ValueType>
-struct IndexMultiRequestBase
-{
-public:
- FieldType field_;
- std::vector<ValueType> values_;
-
-protected:
- IndexMultiRequestBase(const FieldType field,
- const std::vector<ValueType>& values);
-};
-
+/// \brief The PbiIndex class provides an representation of PBI index data that
+/// is rearranged for quick lookups.
+///
+/// The PbiIndex class itself provides access to a few high-level attributes
+/// (e.g. version, number of records, etc.). The actual lookup data is stored
+/// in its member components:
+/// BasicLookupData,
+/// MappedLookupData,
+/// ReferenceLookupData, &
+/// BarcodeLookupData .
+///
class PBBAM_EXPORT PbiIndex
{
public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Creates a PbiIndex lookup structure from a PBI file.
+ ///
+ /// \param[in] pbiFilename filename
+ ///
+ /// \throws std::runtime_error if failed to load data from file
+ ///
PbiIndex(const std::string& pbiFilename);
+
PbiIndex(const PbiIndex& other);
PbiIndex(PbiIndex&& other);
PbiIndex& operator=(const PbiIndex& other);
@@ -167,133 +87,76 @@ public:
/// \}
public:
- // PBI attributes
+ /// \name PBI General Attributes
+ /// \{
+
+ /// \returns true if index has BarcodeData section
bool HasBarcodeData(void) const;
+
+ /// \returns true if index has MappedData section
bool HasMappedData(void) const;
+
+ /// \returns true if index has ReferenceData section
bool HasReferenceData(void) const;
+
+ /// \returns true if index has \b section
+ /// \param[in] section PbiFile::Section identifier
+ ///
bool HasSection(const PbiFile::Section section) const;
+ /// \returns index filename ("*.pbi")
+ ///
+ /// \note Returns an empty string if the underlying data was generated, not
+ /// loaded from file.
+ ///
+ std::string Filename(void) const;
+
+ /// \returns enum flags representing the file sections present
PbiFile::Sections FileSections(void) const;
+
+ /// \returns the number of records in the PBI (& associated %BAM)
uint32_t NumReads(void) const;
+
+ /// \returns the PBI file's version
PbiFile::VersionEnum Version(void) const;
-public:
+ /// \}
- template<typename FieldType, typename ValueType>
- IndexList RawIndices(const IndexRequestBase<FieldType, ValueType>& request) const;
+public:
+ /// \name Lookup Data Components
+ /// \{
- template<typename FieldType, typename ValueType>
- IndexList RawIndices(const IndexMultiRequestBase<FieldType, ValueType>& request) const;
+ /// \returns const reference to BarcodeData lookup structure
+ ///
+ /// May be empty, check result of HasBarcodeData.
+ ///
+ const BarcodeLookupData& BarcodeData(void) const;
- template<typename FieldType, typename ValueType>
- IndexResultBlocks Lookup(const IndexRequestBase<FieldType, ValueType>& request) const;
+ /// \returns const reference to BasicData lookup structure
+ const BasicLookupData& BasicData(void) const;
- template<typename FieldType, typename ValueType>
- IndexResultBlocks Lookup(const IndexMultiRequestBase<FieldType, ValueType>& request) const;
+ /// \returns const reference to MappedData lookup structure
+ ///
+ /// May be empty, check result of HasMappedData.
+ ///
+ const MappedLookupData& MappedData(void) const;
- IndexResultBlocks LookupReference(const int32_t tId) const;
+ /// \returns const reference to reference data lookup structure
+ ///
+ /// May be empty, check result of HasReferenceData.
+ ///
+ const ReferenceLookupData& ReferenceData(void) const;
- const std::vector<int64_t>& VirtualFileOffsets(void) const;
+ /// }
private:
PbiIndex(void);
std::unique_ptr<internal::PbiIndexPrivate> d_;
};
-template<SubreadField field, typename ValueType>
-class SubreadIndexRequest : public IndexRequestBase<SubreadField, ValueType>
-{
-public:
- SubreadIndexRequest(const ValueType& value,
- const CompareType& compareType = CompareType::EQUAL);
-};
-
-template<SubreadField field, typename ValueType>
-class SubreadIndexMultiRequest : public IndexMultiRequestBase<SubreadField, ValueType>
-{
-public:
- SubreadIndexMultiRequest(const std::vector<ValueType>& values);
-};
-
-typedef SubreadIndexRequest<SubreadField::RG_ID, int32_t> ReadGroupIndexRequest;
-typedef SubreadIndexRequest<SubreadField::Q_START, int32_t> QueryStartIndexRequest;
-typedef SubreadIndexRequest<SubreadField::Q_END, int32_t> QueryEndIndexRequest;
-typedef SubreadIndexRequest<SubreadField::ZMW, int32_t> ZmwIndexRequest;
-typedef SubreadIndexRequest<SubreadField::READ_QUALITY, uint16_t> ReadQualityIndexRequest;
-
-typedef SubreadIndexMultiRequest<SubreadField::RG_ID, int32_t> ReadGroupIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::Q_START, int32_t> QueryStartIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::Q_END, int32_t> QueryEndIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::ZMW, int32_t> ZmwIndexMultiRequest;
-typedef SubreadIndexMultiRequest<SubreadField::READ_QUALITY, uint16_t> ReadQualityIndexMultiRequest;
-
-template<MappedField field, typename ValueType>
-class MappedIndexRequest : public IndexRequestBase<MappedField, ValueType>
-{
-public:
- MappedIndexRequest(const ValueType& value, const
- CompareType& compareType = CompareType::EQUAL);
-};
-
-template<MappedField field, typename ValueType>
-class MappedIndexMultiRequest : public IndexMultiRequestBase<SubreadField, ValueType>
-{
-public:
- MappedIndexMultiRequest(const std::vector<ValueType>& values);
-};
-
-typedef MappedIndexRequest<MappedField::T_ID, int32_t> ReferenceIdIndexRequest;
-typedef MappedIndexRequest<MappedField::T_START, int32_t> ReferenceStartIndexRequest;
-typedef MappedIndexRequest<MappedField::T_END, int32_t> ReferenceEndIndexRequest;
-typedef MappedIndexRequest<MappedField::A_START, int32_t> AlignedStartIndexRequest;
-typedef MappedIndexRequest<MappedField::A_END, int32_t> AlignedEndIndexRequest;
-typedef MappedIndexRequest<MappedField::N_M, int32_t> NumMatchesIndexRequest;
-typedef MappedIndexRequest<MappedField::N_MM, int32_t> NumMismatchesIndexRequest;
-typedef MappedIndexRequest<MappedField::N_INS, int32_t> NumInsertionsIndexRequest;
-typedef MappedIndexRequest<MappedField::N_DEL, int32_t> NumDeletionsIndexRequest;
-typedef MappedIndexRequest<MappedField::MAP_QUALITY, uint8_t> MapQualityIndexRequest;
-typedef MappedIndexRequest<MappedField::STRAND, Strand> StrandIndexRequest;
-
-typedef MappedIndexMultiRequest<MappedField::T_ID, int32_t> ReferenceIdIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::T_START, int32_t> ReferenceStartIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::T_END, int32_t> ReferenceEndIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::A_START, int32_t> AlignedStartIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::A_END, int32_t> AlignedEndIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_M, int32_t> NumMatchesIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_MM, int32_t> NumMismatchesIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_INS, int32_t> NumInsertionsIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::N_DEL, int32_t> NumDeletionsIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::MAP_QUALITY, uint8_t> MapQualityIndexMultiRequest;
-typedef MappedIndexMultiRequest<MappedField::STRAND, Strand> StrandIndexMultiRequest;
-
-template<BarcodeField field, typename ValueType>
-class BarcodeIndexRequest : public IndexRequestBase<BarcodeField, ValueType>
-{
-public:
- BarcodeIndexRequest(const ValueType& value,
- const CompareType& compareType = CompareType::EQUAL);
-};
-
-template<BarcodeField field, typename ValueType>
-class BarcodeIndexMultiRequest : public IndexMultiRequestBase<BarcodeField, ValueType>
-{
-public:
- BarcodeIndexMultiRequest(const std::vector<ValueType>& values);
-};
-
-typedef BarcodeIndexRequest<BarcodeField::BC_LEFT, uint16_t> BarcodeLeftIndexRequest;
-typedef BarcodeIndexRequest<BarcodeField::BC_RIGHT, uint16_t> BarcodeRightIndexRequest;
-typedef BarcodeIndexRequest<BarcodeField::BC_QUALITY, uint8_t> BarcodeQualityIndexRequest;
-typedef BarcodeIndexRequest<BarcodeField::CONTEXT_FLAG, LocalContextFlags> ContextFlagIndexRequest;
-
-typedef BarcodeIndexMultiRequest<BarcodeField::BC_LEFT, uint16_t> BarcodeLeftIndexMultiRequest;
-typedef BarcodeIndexMultiRequest<BarcodeField::BC_RIGHT, uint16_t> BarcodeRightIndexMultiRequest;
-typedef BarcodeIndexMultiRequest<BarcodeField::BC_QUALITY, uint8_t> BarcodeQualityIndexMultiRequest;
-typedef BarcodeIndexMultiRequest<BarcodeField::CONTEXT_FLAG, LocalContextFlags> ContextFlagIndexMultiRequest;
-
} // namespace BAM
} // namespace PacBio
-#include "internal/PbiIndex_p.inl"
+#include "internal/PbiIndex.inl"
#endif // PBIINDEX_H
diff --git a/include/pbbam/PbiIndexedBamReader.h b/include/pbbam/PbiIndexedBamReader.h
new file mode 100644
index 0000000..17c46b5
--- /dev/null
+++ b/include/pbbam/PbiIndexedBamReader.h
@@ -0,0 +1,174 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndexedBamReader.h
+/// \brief Defines the PbiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef PBIINDEXEDBAMREADER_H
+#define PBIINDEXEDBAMREADER_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamReader.h"
+#include "pbbam/PbiBasicTypes.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiIndex.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct PbiIndexedBamReaderPrivate; }
+
+/// \brief The PbiIndexedBamReader class provides read-only iteration over %BAM
+/// records, limited to some filtering criteria.
+///
+/// The PacBio BAM index (*.pbi) is used to allow random-access operations.
+///
+class PBBAM_EXPORT PbiIndexedBamReader : public BamReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs %BAM reader, with an initial filter.
+ ///
+ /// All reads that satisfy the filter will be available.
+ ///
+ /// \param[in] filter PbiFilter or compatible object
+ /// \param[in] bamFilename input %BAM filename
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const PbiFilter& filter, const std::string& bamFilename);
+
+ /// \brief Constructs %BAM reader, with an initial filter.
+ ///
+ /// All reads that satisfy the filter will be available.
+ ///
+ /// \param[in] filter PbiFilter or compatible object
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const PbiFilter& filter, const BamFile& bamFile);
+
+ /// \brief Constructs %BAM reader, with an initial filter.
+ ///
+ /// All reads that satisfy the filter will be available.
+ ///
+ /// \param[in] filter PbiFilter or compatible object
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const PbiFilter& filter, BamFile&& bamFile);
+
+ /// \brief Constructs %BAM reader, with no initial filter.
+ ///
+ /// Useful for delaying either specifying the filtering criteria or
+ /// performing the PBI lookups.
+ ///
+ /// \param[in] bamFilename input %BAM filename
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const std::string& bamFilename);
+
+ /// \brief Constructs %BAM reader, with no initial filter.
+ ///
+ /// Useful for delaying either specifying the filtering criteria or
+ /// performing the PBI lookups.
+ ///
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const BamFile& bamFile);
+
+ /// \brief Constructs %BAM reader, with no initial filter.
+ ///
+ /// Useful for delaying either specifying the filtering criteria or
+ /// performing the PBI lookups.
+ ///
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(BamFile&& bamFile);
+
+ ~PbiIndexedBamReader(void);
+
+ /// \}
+
+public:
+ /// \name Filtering & Index Data
+ /// \{
+
+ /// \returns the current filter active on this reader
+ const PbiFilter& Filter(void) const;
+
+// /// \returns the reader's underlying index data
+// const PbiIndex& Index(void) const;
+
+public:
+ /// \brief Sets a new filter on the reader.
+ ///
+ /// \param[in] filter
+ /// \returns reference to this reader
+ ///
+ PbiIndexedBamReader& Filter(const PbiFilter& filter);
+
+ /// \}
+
+protected:
+ int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+private:
+ std::unique_ptr<internal::PbiIndexedBamReaderPrivate> d_;
+};
+
+} // namespace internal
+} // namespace BAM
+
+#endif // PBIINDEXEDBAMREADER_H
diff --git a/include/pbbam/PbiLookupData.h b/include/pbbam/PbiLookupData.h
new file mode 100644
index 0000000..398c349
--- /dev/null
+++ b/include/pbbam/PbiLookupData.h
@@ -0,0 +1,718 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiLookupData.h
+/// \brief Defines the classes used for PBI data lookup.
+//
+// Author: Derek Barnett
+
+#ifndef PBILOOKUPDATA_H
+#define PBILOOKUPDATA_H
+
+#include "pbbam/Config.h"
+#include "pbbam/Compare.h"
+#include "pbbam/PbiBasicTypes.h"
+#include <deque>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+class PbiRawBarcodeData;
+class PbiRawBasicData;
+class PbiRawMappedData;
+class PbiRawReferenceData;
+
+/// \brief The OrderedLookup class provides a quick lookup structure for
+/// PBI index data, where key values are sorted.
+///
+/// The main, underlying lookup structure is essentailly a std::map, where the
+/// key is some value (e.g. readAccuracy) and the value is the list of indices
+/// (i-th record) in the %BAM file.
+///
+/// This lookup class is one of the main building blocks for the PBI index
+/// lookup components.
+///
+/// \param T type of key stored (Accuracy for readAccuracy, int32_t for ZMW,
+/// etc.)
+///
+template<typename T>
+class OrderedLookup
+{
+public:
+ typedef T key_type;
+ typedef IndexList value_type;
+ typedef std::map<key_type, value_type> container_type;
+ typedef typename container_type::iterator iterator;
+ typedef typename container_type::const_iterator const_iterator;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty OrderedLookup structure.
+ ///
+ OrderedLookup(void);
+
+ /// \brief Creates an OrderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ OrderedLookup(const container_type& data);
+
+ /// \brief Creates an OrderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ OrderedLookup(container_type&& data);
+
+ /// \brief Creates an OrderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ OrderedLookup(const std::vector<T>& rawData);
+
+ /// \brief Creates an OrderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ OrderedLookup(std::vector<T>&& rawData);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \returns true if this lookup is same as \p other
+ bool operator==(const OrderedLookup<T>& other) const;
+
+ /// \returns true if this lookup is not the same as \p other
+ bool operator!=(const OrderedLookup<T>& other) const;
+
+ /// \}
+
+public:
+ /// \name STL-Compatibility Methods
+ /// \{
+
+ /// \returns an iterator to the first element in the underlying container
+ iterator begin(void);
+
+ /// \returns a const iterator to the first element in the underlying
+ /// container
+ const_iterator begin(void) const;
+
+ /// \returns a const iterator to the first element in the underlying
+ ///
+ const_iterator cbegin(void) const;
+
+ /// \returns an iterator after the last element in the underlying container
+ iterator end(void);
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator end(void) const;
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator cend(void) const;
+
+ /// \returns true if underlying container is empty
+ bool empty(void) const;
+
+ /// \returns number of keys in the container
+ size_t size(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data
+ /// \{
+
+ /// \brief Performs a lookup into the underlying data.
+ ///
+ /// \param[in] key key value to lookup
+ /// \param[in] compare compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup key &
+ /// compare type
+ ///
+ IndexList LookupIndices(const key_type& key,
+ const Compare::Type& compare) const;
+
+ /// \brief Converts the lookup structure back into its raw data.
+ ///
+ /// \returns raw data values, where i is the index into the %BAM file, and
+ /// rawData[i] is the key value
+ ///
+ std::vector<T> Unpack(void) const;
+
+ /// \}
+
+private:
+ IndexList LookupInclusiveRange(const const_iterator& begin,
+ const const_iterator& end) const;
+
+ IndexList LookupExclusiveRange(const const_iterator& begin,
+ const const_iterator& end,
+ const key_type& key) const;
+
+private:
+ container_type data_;
+};
+
+/// \brief The UnorderedLookup class provides a quick lookup structure for
+/// PBI index data, where key values are not sorted.
+///
+/// The main, underlying lookup structure is essentailly a std::unordered_map,
+/// where the key is some value (e.g. read group ID) and the value is the list
+/// of indices (i-th record) in the %BAM file.
+///
+/// This lookup class is one of the main building blocks for the PBI index
+/// lookup components.
+///
+/// \param T type of key stored (Accuracy for readAccuracy, int32_t for ZMW,
+/// etc.)
+///
+template<typename T>
+class UnorderedLookup
+{
+public:
+ typedef T key_type;
+ typedef IndexList value_type;
+ typedef std::unordered_map<key_type, value_type> container_type;
+ typedef typename container_type::iterator iterator;
+ typedef typename container_type::const_iterator const_iterator;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty UnorderedLookup structure.
+ ///
+ UnorderedLookup(void);
+
+ /// \brief Creates an UnorderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ UnorderedLookup(const container_type& data);
+
+ /// \brief Creates an UnorderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ UnorderedLookup(container_type&& data);
+
+ /// \brief Creates an UnorderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ UnorderedLookup(const std::vector<T>& rawData);
+
+ /// \brief Creates an UnorderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ UnorderedLookup(std::vector<T>&& rawData);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \returns true if this lookup is same as \p other
+ bool operator==(const UnorderedLookup<T>& other) const;
+
+ /// \returns true if this lookup is not the same as \p other
+ bool operator!=(const UnorderedLookup<T>& other) const;
+
+ /// \}
+
+public:
+ /// \name STL-Compatibility Methods
+ /// \{
+
+ /// \returns an iterator to the first element in the underlying container
+ iterator begin(void);
+
+ /// \returns a const iterator to the first element in the underlying
+ /// container
+ const_iterator begin(void) const;
+
+ /// \returns a const iterator to the first element in the underlying
+ ///
+ const_iterator cbegin(void) const;
+
+ /// \returns an iterator after the last element in the underlying container
+ iterator end(void);
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator end(void) const;
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator cend(void) const;
+
+ /// \returns true if underlying container is empty
+ bool empty(void) const;
+
+ /// \returns number of keys in the container
+ size_t size(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data
+ /// \{
+
+ /// \brief Performs a lookup into the underlying data.
+ ///
+ /// \param[in] key key value to lookup
+ /// \param[in] compare compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup key &
+ /// compare type
+ ///
+ IndexList LookupIndices(const key_type& key,
+ const Compare::Type& compare) const;
+
+ /// \brief Converts the lookup structure back into its raw data.
+ ///
+ /// \returns raw data values, where i is the index into the %BAM file, and
+ /// rawData[i] is the key value
+ ///
+ std::vector<T> Unpack(void) const;
+
+ /// \}
+
+private:
+ template<typename Compare>
+ IndexList LookupHelper(const key_type& key,
+ const Compare& cmp) const;
+
+private:
+ container_type data_;
+};
+
+/// \brief The BasicLookupData class provides quick lookup access to the
+/// "BasicData" section of the PBI index.
+///
+class PBBAM_EXPORT BasicLookupData
+{
+public:
+ /// \brief This enum describes the component fields of the BasicData
+ /// section.
+ enum Field
+ {
+ RG_ID
+ , Q_START
+ , Q_END
+ , ZMW
+ , READ_QUALITY
+ , CONTEXT_FLAG
+ , VIRTUAL_OFFSET
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ BasicLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ BasicLookupData(const PbiRawBasicData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief Adds \b virtual file offset data to the index lookup result
+ /// blocks.
+ ///
+ /// A PBI lookup will result in a number of index lists, depending on the
+ /// complexity of the PbiFilter involved. These index lists are then merged
+ /// down into blocks of contiguous values, where each block describes a
+ /// particular record index and the number of subsequent, contiguous reads
+ /// that immediately follow it. In this manner, we need only perform seeks
+ /// to the first record of each block.
+ ///
+ /// This method takes such blocks and annotates them with the corresponding
+ /// \b virtual file offset. Subsequent %BAM readers can use this information
+ /// to control file seeks.
+ ///
+ /// \param[in,out] blocks
+ ///
+ /// \throws std::out_of_range if a block has an invalid index value
+ ///
+ void ApplyOffsets(IndexResultBlocks& blocks) const;
+
+ /// \brief This method dispatches a single-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] value value to lookup
+ /// \param[in] compareType compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList Indices(const BasicLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType = Compare::EQUAL) const;
+
+ /// \brief This method dispatches a multi-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Results will correspond to an exact match on at
+ /// least one value in the list.
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] values values to lookup
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList IndicesMulti(const BasicLookupData::Field& field,
+ const std::vector<T>& values) const;
+
+ /// \returns the \b virtual file offsets for all records
+ ///
+ const std::vector<int64_t>& VirtualFileOffsets(void) const;
+
+ /// \}
+
+public:
+ /// \brief Lookup Data Members
+ /// \{
+
+ // map ordering doesn't make sense, optimize for direct lookup
+ UnorderedLookup<int32_t> rgId_;
+
+ // numeric comparisons make sense, keep key ordering preserved
+ OrderedLookup<int32_t> qStart_;
+ OrderedLookup<int32_t> qEnd_;
+ OrderedLookup<int32_t> holeNumber_;
+ OrderedLookup<float> readQual_;
+
+ // see if this works, or if can use unordered, 'direct' query
+ OrderedLookup<uint8_t> ctxtFlag_;
+
+ // offsets
+ std::vector<int64_t> fileOffset_;
+
+ /// \}
+};
+
+/// \brief The MappedLookupData class provides quick lookup access to the
+/// "MappedData" section of the PBI index.
+///
+class PBBAM_EXPORT MappedLookupData
+{
+public:
+ /// \brief This enum describes the component fields of the MappedData
+ /// section.
+ enum Field
+ {
+ T_ID
+ , T_START
+ , T_END
+ , A_START
+ , A_END
+ , N_M
+ , N_MM
+ , N_INS
+ , N_DEL
+ , MAP_QUALITY
+ , STRAND
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ MappedLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ MappedLookupData(const PbiRawMappedData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief This method dispatches a single-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] value value to lookup
+ /// \param[in] compareType compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList Indices(const MappedLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType = Compare::EQUAL) const;
+
+ /// \brief This method dispatches a multi-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Results will correspond to an exact match on at
+ /// least one value in the list.
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] values values to lookup
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList IndicesMulti(const MappedLookupData::Field& field,
+ const std::vector<T>& values) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Members
+ /// \{
+
+ // numeric comparisons make sense, keep key ordering preserved
+ OrderedLookup<int32_t> tId_;
+ OrderedLookup<uint32_t> tStart_;
+ OrderedLookup<uint32_t> tEnd_;
+ OrderedLookup<uint32_t> aStart_;
+ OrderedLookup<uint32_t> aEnd_;
+ OrderedLookup<uint32_t> nM_;
+ OrderedLookup<uint32_t> nMM_;
+ OrderedLookup<uint8_t> mapQV_;
+
+ // generated values, not stored directly in PBI file
+ OrderedLookup<uint32_t> nIns_;
+ OrderedLookup<uint32_t> nDel_;
+
+ // no need for map overhead, just store direct indices
+ IndexList reverseStrand_;
+ IndexList forwardStrand_;
+
+ /// \}
+};
+
+/// \brief The ReferenceLookupData class provides quick lookup access to the
+/// "CoordinateSortedData" section of the PBI index.
+///
+class PBBAM_EXPORT ReferenceLookupData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ ///
+ ReferenceLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ ReferenceLookupData(const PbiRawReferenceData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief Retrieves the index range for all records that map to a
+ /// particular reference.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] tId reference ID to lookup
+ ///
+ /// \returns resulting index range [begin, end). If \p tId is unknown,
+ /// will return IndexRange(-1,-1) .
+ ///
+ IndexRange Indices(const int32_t tId) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Members
+ /// \{
+
+ // references_[tId] = [begin, end) indices
+ std::unordered_map<int32_t, IndexRange> references_;
+
+ /// \}
+};
+
+/// \brief The BarcodeLookupData class provides quick lookup access to the
+/// "BarcodeData" section of the PBI index.
+///
+class PBBAM_EXPORT BarcodeLookupData
+{
+public:
+ /// \brief This enum describes the component fields of the BarcodeData
+ /// section.
+ enum Field
+ {
+ BC_FORWARD
+ , BC_REVERSE
+ , BC_QUALITY
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ ///
+ BarcodeLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ BarcodeLookupData(const PbiRawBarcodeData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief This method dispatches a single-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] value value to lookup
+ /// \param[in] compareType compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList Indices(const BarcodeLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType = Compare::EQUAL) const;
+
+ /// \brief This method dispatches a multi-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Results will correspond to an exact match on at
+ /// least one value in the list.
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] values values to lookup
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList IndicesMulti(const BarcodeLookupData::Field& field,
+ const std::vector<T>& values) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Members
+ /// \{
+
+ // numeric comparisons make sense, keep key ordering preserved
+ OrderedLookup<int16_t> bcForward_;
+ OrderedLookup<int16_t> bcReverse_;
+ OrderedLookup<int8_t> bcQual_;
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/PbiLookupData.inl"
+
+#endif // PBILOOKUPDATA_H
diff --git a/include/pbbam/PbiRawData.h b/include/pbbam/PbiRawData.h
index 37c7706..41ebe00 100644
--- a/include/pbbam/PbiRawData.h
+++ b/include/pbbam/PbiRawData.h
@@ -33,6 +33,10 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiRawData.h
+/// \brief Defines the classes used for working with raw PBI data.
+//
// Author: Derek Barnett
#ifndef PBIRAWDATA_H
@@ -48,46 +52,130 @@ namespace BAM {
class BamRecord;
+/// \brief The PbiRawBarcodeData class represents the raw data stored in the
+/// "BarcodeData" section of the PBI index.
+///
class PBBAM_EXPORT PbiRawBarcodeData
{
public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
PbiRawBarcodeData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a known
+ /// number of records.
PbiRawBarcodeData(uint32_t numReads);
+
PbiRawBarcodeData(const PbiRawBarcodeData& other);
PbiRawBarcodeData(PbiRawBarcodeData&& other);
PbiRawBarcodeData& operator=(const PbiRawBarcodeData& other);
PbiRawBarcodeData& operator=(PbiRawBarcodeData&& other);
+ /// \}
+
public:
- /// Maybe add barcode data for \p b, if available.
- /// \returns true if record had barcode data
+ /// \name Index Construction
+ /// \{
+
+ /// \brief Adds a record's barcode data.
///
- bool AddRecord(const BamRecord& b);
+ /// \param[in] b %BAM record
+ ///
+ void AddRecord(const BamRecord& b);
+
+ /// \}
public:
- std::vector<uint16_t> bcLeft_;
- std::vector<uint16_t> bcRight_;
- std::vector<uint8_t> bcQual_;
- std::vector<uint8_t> ctxtFlag_;
+ /// \name Raw Data Containers
+ /// \{
+
+ std::vector<int16_t> bcForward_;
+ std::vector<int16_t> bcReverse_;
+ std::vector<int8_t> bcQual_;
+
+ /// \}
};
+/// \brief The PbiRawMappedData class represents the raw data stored in the
+/// "MappedData" section of the PBI index.
+///
class PBBAM_EXPORT PbiRawMappedData
{
public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
PbiRawMappedData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a known
+ /// number of records.
PbiRawMappedData(uint32_t numReads);
+
PbiRawMappedData(const PbiRawMappedData& other);
PbiRawMappedData(PbiRawMappedData&& other);
PbiRawMappedData& operator=(const PbiRawMappedData& other);
PbiRawMappedData& operator=(PbiRawMappedData&& other);
+ /// \}
+
+public:
+ /// \name Index Construction
+ /// \{
+
+ /// \brief Adds a record's mapping data.
+ ///
+ /// \param[in] b %BAM record
+ ///
+ void AddRecord(const BamRecord& b);
+
+ /// \}
+
public:
- /// Maybe add mapping data for \p b, if available.
- /// \returns true if record had mapping data
+ /// \name Index Data Query
+ /// \{
+
+ /// \brief Calculates the number of deleted bases for a particular record.
+ ///
+ /// Convenvience method. Equivalent to:
+ /// \code{.cpp}
+ /// NumDeletedAndInsertedBasesAt(i).first;
+ /// \endcode
+ ///
+ /// \param[in] recordIndex i-th record
+ /// \returns number of deleted bases
+ ///
+ uint32_t NumDeletedBasesAt(size_t recordIndex) const;
+
+ /// \brief Calculates the number of inserted bases for a particular record.
+ ///
+ /// Convenvience method. Equivalent to:
+ /// \code{.cpp}
+ /// NumDeletedAndInsertedBasesAt(i).second;
+ /// \endcode
///
- bool AddRecord(const BamRecord& b);
+ /// \param[in] recordIndex i-th record
+ /// \returns number of inserted bases
+ ///
+ uint32_t NumInsertedBasesAt(size_t recordIndex) const;
+
+ /// \brief Calculates the number of deleted & inserted bases for a
+ /// particular record.
+ ///
+ /// \param[in] recordIndex i-th record in the data set
+ /// \returns a pair consisting of (numDeletions,numInsertions)
+ ///
+ std::pair<uint32_t, uint32_t>
+ NumDeletedAndInsertedBasesAt(size_t recordIndex) const;
+
+ /// \}
public:
+ /// \name Raw Data Containers
+ /// \{
+
std::vector<int32_t> tId_;
std::vector<uint32_t> tStart_;
std::vector<uint32_t> tEnd_;
@@ -97,8 +185,18 @@ public:
std::vector<uint32_t> nM_;
std::vector<uint32_t> nMM_;
std::vector<uint8_t> mapQV_;
+
+ /// \}
};
+/// \brief The PbiReferenceEntryClass represents a single reference in the PBI
+/// CoordinateSorted section.
+///
+/// A reference entry consists of an associated reference ID (tId), as well as
+/// start and end indices into the %BAM or PBI.
+///
+/// \note Rows are given in the interval [start, end).
+///
class PBBAM_EXPORT PbiReferenceEntry
{
public:
@@ -106,80 +204,168 @@ public:
typedef uint32_t Row;
public:
+ static const ID UNMAPPED_ID;
+ static const Row UNSET_ROW;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a default entry.
+ ///
+ /// - default ID: PbiReferenceEntry::UNMAPPED_ID \n
+ /// - default rows: PbiReferenceEntry::UNSET_ROW
+ ///
PbiReferenceEntry(void);
+
+ /// \brief Creates a reference entry, with no rows set.
+ ///
+ /// - default rows: PbiReferenceEntry::UNSET_ROW
+ ///
PbiReferenceEntry(ID id);
+
+ /// \brief Creates a reference entry, with rows set.
+ ///
+ PbiReferenceEntry(ID id, Row beginRow, Row endRow);
+
PbiReferenceEntry(const PbiReferenceEntry& other);
PbiReferenceEntry(PbiReferenceEntry&& other);
PbiReferenceEntry& operator=(const PbiReferenceEntry& other);
PbiReferenceEntry& operator=(PbiReferenceEntry&& other);
- bool operator==(const PbiReferenceEntry& other) const
- {
- return tId_ == other.tId_ &&
- beginRow_ == other.beginRow_ &&
- endRow_ == other.endRow_;
- }
+ bool operator==(const PbiReferenceEntry& other) const;
-public:
- static const ID UNMAPPED_ID;
- static const Row UNSET_ROW;
+ /// \}
public:
+ /// \name Reference Data Members
+ /// \{
+
ID tId_;
Row beginRow_;
Row endRow_;
+
+ /// \}
};
+/// \brief The PbiRawReferenceData class represents the raw data stored in the
+/// "CoordinateSortedData" section of the PBI index.
+///
class PBBAM_EXPORT PbiRawReferenceData
{
public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
PbiRawReferenceData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a
+ /// number of references.
+ ///
+ /// This constructor is recommended as this is the safest way to ensure that
+ /// references without observed mappings are included in the final output.
+ ///
PbiRawReferenceData(uint32_t numRefs);
+
PbiRawReferenceData(const PbiRawReferenceData& other);
PbiRawReferenceData(PbiRawReferenceData&& other);
PbiRawReferenceData& operator=(const PbiRawReferenceData& other);
PbiRawReferenceData& operator=(PbiRawReferenceData&& other);
+ /// \}
+
public:
+ /// \name Raw Data Containers
+ /// \{
+
std::vector<PbiReferenceEntry> entries_;
+
+ /// \}
};
-class PBBAM_EXPORT PbiRawSubreadData
+/// \brief The PbiRawBasicData class represents the raw data stored in the
+/// "BasicData" section of the PBI index.
+///
+class PBBAM_EXPORT PbiRawBasicData
{
public:
- PbiRawSubreadData(void);
- PbiRawSubreadData(uint32_t numReads);
- PbiRawSubreadData(const PbiRawSubreadData& other);
- PbiRawSubreadData(PbiRawSubreadData&& other);
- PbiRawSubreadData& operator=(const PbiRawSubreadData& other);
- PbiRawSubreadData& operator=(PbiRawSubreadData&& other);
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
+ PbiRawBasicData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a known
+ /// number of records.
+ PbiRawBasicData(uint32_t numReads);
+
+ PbiRawBasicData(const PbiRawBasicData& other);
+ PbiRawBasicData(PbiRawBasicData&& other);
+ PbiRawBasicData& operator=(const PbiRawBasicData& other);
+ PbiRawBasicData& operator=(PbiRawBasicData&& other);
+
+ /// \}
public:
+ /// \name Index Construction
+ /// \{
+
+ /// \brief Adds a record's mapping data.
+ ///
+ /// \param[in] b %BAM record
+ /// \param[in] offset \b virtual file offset where record begins
+ ///
void AddRecord(const BamRecord& b, int64_t offset);
+ /// \}
+
public:
+ /// \name Raw Data Containers
+ /// \{
+
std::vector<int32_t> rgId_;
std::vector<int32_t> qStart_;
std::vector<int32_t> qEnd_;
std::vector<int32_t> holeNumber_;
- std::vector<uint16_t> readQual_;
+ std::vector<float> readQual_;
+ std::vector<uint8_t> ctxtFlag_;
std::vector<int64_t> fileOffset_;
+
+ /// \}
};
+/// \deprecated For legacy-code support only, and will be removed soon.
+/// Use PbiRawBasicData instead.
+///
+typedef PbiRawBasicData PbiRawSubreadData;
+
+/// \brief The PbiRawData class provides an representation of raw PBI index
+/// data, used mostly for construction or I/O.
+///
+/// The PbiRawData class itself provides access to a few high-level attributes
+/// (e.g. version, number of records, etc.). The actual index data is stored
+/// in its member components:
+/// PbiRawBasicData,
+/// PbiRawMappedData,
+/// PbiRawReferenceData, &
+/// PbiRawBarcodeData .
+///
class PBBAM_EXPORT PbiRawData
{
public:
/// \name Constructors & Related Methods
/// \{
- /// Default ctor. Used in index building
+ /// \brief Creates an empty raw data structure, ready for building.
+ ///
PbiRawData(void);
- /// Load raw data from \p pbiFilename.
+ /// \brief Loads raw PBI data from a file.
///
- /// \param[in] pbiFilename PBI filename
+ /// \param[in] pbiFilename ".pbi" filename
///
- /// \throws if file contents cannot be loaded properly
+ /// \throws std::runtime_error if file contents cannot be loaded properly
///
PbiRawData(const std::string& pbiFilename);
@@ -192,116 +378,136 @@ public:
/// \}
public:
- /// \name Attributes
+ /// \name PBI General Attributes
/// \{
+ /// \returns true if index has BarcodeData section
bool HasBarcodeData(void) const;
+
+ /// \returns true if index has MappedData section
bool HasMappedData(void) const;
+
+ /// \returns true if index has ReferenceData section
bool HasReferenceData(void) const;
+
+ /// \returns true if index has \b section
+ /// \param[in] section PbiFile::Section identifier
+ ///
bool HasSection(const PbiFile::Section section) const;
+ /// \returns index filename ("*.pbi")
+ ///
+ /// \note Returns an empty string if the underlying data was generated, not
+ /// loaded from file.
+ ///
+ std::string Filename(void) const;
+
+ /// \returns enum flags representing the file sections present
PbiFile::Sections FileSections(void) const;
+
+ /// \returns the number of records in the PBI (& associated %BAM)
uint32_t NumReads(void) const;
+
+ /// \returns the PBI file's version
PbiFile::VersionEnum Version(void) const;
/// \}
public:
- /// \name Indexed Sections
+ /// \name Raw Data Components
/// \{
- const PbiRawBarcodeData& BarcodeData(void) const;
- const PbiRawMappedData& MappedData(void) const;
+ /// \returns const reference to BarcodeData lookup structure
+ ///
+ /// May be empty, check result of HasBarcodeData.
+ ///
+ const PbiRawBarcodeData& BarcodeData(void) const;
+
+ /// \returns const reference to BasicData lookup structure
+ const PbiRawBasicData& BasicData(void) const;
+
+ /// \returns const reference to MappedData lookup structure
+ ///
+ /// May be empty, check result of HasMappedData.
+ ///
+ const PbiRawMappedData& MappedData(void) const;
+
+ /// \returns const reference to reference data lookup structure
+ ///
+ /// May be empty, check result of HasReferenceData.
+ ///
const PbiRawReferenceData& ReferenceData(void) const;
- const PbiRawSubreadData& SubreadData(void) const;
/// \}
public:
- /// \name Attributes
+ /// \name PBI General Attributes
/// \{
+ /// \brief Sets the file section flags.
+ ///
+ /// \param[in] sections section flags
+ /// \returns reference to this index
+ ///
PbiRawData& FileSections(PbiFile::Sections sections);
+
+ /// \brief Sets the number of indexed records.
+ ///
+ /// \param[in] num number of records
+ /// \returns reference to this index
+ ///
PbiRawData& NumReads(uint32_t num);
+
+ /// \brief Sets PBI file version.
+ ///
+ /// \param[in] version file version
+ /// \returns reference to this index
+ ///
PbiRawData& Version(PbiFile::VersionEnum version);
/// \}
public:
- /// \name Indexed Sections
+ /// \name Raw Data Components
+ /// \{
+
+ /// \returns reference to BarcodeData lookup structure
+ ///
+ /// May be empty, check result of HasBarcodeData.
+ ///
+ PbiRawBarcodeData& BarcodeData(void);
- PbiRawBarcodeData& BarcodeData(void);
- PbiRawMappedData& MappedData(void);
+ /// \returns reference to BasicData lookup structure
+ PbiRawBasicData& BasicData(void);
+
+ /// \returns reference to MappedData lookup structure
+ ///
+ /// May be empty, check result of HasMappedData.
+ ///
+ PbiRawMappedData& MappedData(void);
+
+ /// \returns reference to reference data lookup structure
+ ///
+ /// May be empty, check result of HasReferenceData.
+ ///
PbiRawReferenceData& ReferenceData(void);
- PbiRawSubreadData& SubreadData(void);
/// \}
private:
+ std::string filename_;
PbiFile::VersionEnum version_;
PbiFile::Sections sections_;
uint32_t numReads_;
PbiRawBarcodeData barcodeData_;
PbiRawMappedData mappedData_;
PbiRawReferenceData referenceData_;
- PbiRawSubreadData subreadData_;
+ PbiRawBasicData basicData_;
};
-inline const PbiRawBarcodeData& PbiRawData::BarcodeData(void) const
-{ return barcodeData_; }
-
-inline PbiRawBarcodeData& PbiRawData::BarcodeData(void)
-{ return barcodeData_; }
-
-inline PbiFile::Sections PbiRawData::FileSections(void) const
-{ return sections_; }
-
-inline PbiRawData& PbiRawData::FileSections(PbiFile::Sections sections)
-{ sections_ = sections; return *this; }
-
-inline bool PbiRawData::HasBarcodeData(void) const
-{ return HasSection(PbiFile::BARCODE); }
-
-inline bool PbiRawData::HasMappedData(void) const
-{ return HasSection(PbiFile::MAPPED); }
-
-inline bool PbiRawData::HasReferenceData(void) const
-{ return HasSection(PbiFile::REFERENCE); }
-
-inline bool PbiRawData::HasSection(const PbiFile::Section section) const
-{ return (sections_ & section) != 0; }
-
-inline uint32_t PbiRawData::NumReads(void) const
-{ return numReads_; }
-
-inline PbiRawData& PbiRawData::NumReads(uint32_t num)
-{ numReads_ = num; return *this; }
-
-inline const PbiRawMappedData& PbiRawData::MappedData(void) const
-{ return mappedData_; }
-
-inline PbiRawMappedData& PbiRawData::MappedData(void)
-{ return mappedData_; }
-
-inline const PbiRawReferenceData& PbiRawData::ReferenceData(void) const
-{ return referenceData_; }
-
-inline PbiRawReferenceData& PbiRawData::ReferenceData(void)
-{ return referenceData_; }
-
-inline const PbiRawSubreadData& PbiRawData::SubreadData(void) const
-{ return subreadData_; }
-
-inline PbiRawSubreadData& PbiRawData::SubreadData(void)
-{ return subreadData_; }
-
-inline PbiFile::VersionEnum PbiRawData::Version(void) const
-{ return version_; }
-
-inline PbiRawData& PbiRawData::Version(PbiFile::VersionEnum version)
-{ version_ = version; return *this; }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/PbiRawData.inl"
+
#endif // PBIRAWDATA_H
diff --git a/include/pbbam/Position.h b/include/pbbam/Position.h
index 110d7ed..aece8c2 100644
--- a/include/pbbam/Position.h
+++ b/include/pbbam/Position.h
@@ -33,6 +33,10 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file Position.h
+/// \brief Defines the Position typedef.
+//
// Author: Derek Barnett
#ifndef POSITION_H
@@ -43,14 +47,17 @@
namespace PacBio {
namespace BAM {
+/// \brief This type is used to refer to genomic positions.
/// \typedef typedef int32_t PacBio::BAM::Position
///
-/// This type refers to all genomic positions. We use signed
-/// because SAM/BAM uses the -1 value to indicate unknown, unmapped, etc.
-/// positions.
+/// We use a signed integer because SAM/BAM uses the -1 value to indicate
+/// unknown or unmapped positions.
///
typedef int32_t Position;
+/// \brief This constant is widely used as a "missing" or "invalid" position
+/// marker.
+///
static const Position UnmappedPosition = Position(-1);
} // namespace BAM
diff --git a/include/pbbam/ProgramInfo.h b/include/pbbam/ProgramInfo.h
index d1bbcfe..e137707 100644
--- a/include/pbbam/ProgramInfo.h
+++ b/include/pbbam/ProgramInfo.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ProgramInfo.h
+/// \brief Defines the ProgramInfo class.
+//
// Author: Derek Barnett
#ifndef PROGRAMINFO_H
@@ -45,14 +49,27 @@
namespace PacBio {
namespace BAM {
+/// \brief The ProgramInfo class represents a program entry (\@PG) in the SAM
+/// header.
+///
class PBBAM_EXPORT ProgramInfo
{
public:
/// \name Conversion & Validation
///
+ /// \brief Creates a ProgramInfo object from SAM-formatted text.
+ ///
+ /// \param[in] sam SAM-formatted text
+ /// \returns program info object
+ ///
static ProgramInfo FromSam(const std::string& sam);
+ /// \brief Converts a ProgramInfo object to its SAM-formatted text.
+ ///
+ /// \param[in] prog input ProgramInfo object
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
static std::string ToSam(const ProgramInfo& prog);
/// \}
@@ -61,8 +78,16 @@ public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Creates an empty program info object.
ProgramInfo(void);
+
+ /// \brief Creates a program info object with an ID.
+ ///
+ /// \param[in] id program ID (\@PG:ID)
+ ///
ProgramInfo(const std::string& id);
+
+
ProgramInfo(const ProgramInfo& other);
ProgramInfo(ProgramInfo&& other);
ProgramInfo& operator=(const ProgramInfo& other);
@@ -72,115 +97,126 @@ public:
/// \}
public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \returns true if program info is valid
+ ///
+ /// Currently this checks to see that ProgramInfo::Id does not contain an
+ /// empty string.
+ ///
+ bool IsValid(void) const;
+
+ /// \brief Converts this object to its SAM-formatted text.
+ ///
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
/// \name Attributes
/// \{
+ /// \returns string value of \@PG:CL
std::string CommandLine(void) const;
+ /// \returns any non-standard tags added to the \@PG entry
+ ///
+ /// Result map consists of {tagName => value}.
+ ///
std::map<std::string, std::string> CustomTags(void) const;
+ /// \returns string value of \@PG:DS
std::string Description(void) const;
+ /// \returns string value of \@PG:ID
std::string Id(void) const;
+ /// \returns string value of \@PG:PN
std::string Name(void) const;
+ /// \returns string value of \@PG:PP
std::string PreviousProgramId(void) const;
+ /// \returns string value of \@PG:VN
std::string Version(void) const;
/// \}
- /// \name Conversion & Validation
- ///
-
- bool IsValid(void) const;
-
- std::string ToSam(void) const;
-
- /// \}
-
public:
/// \name Attributes
/// \{
+ /// \brief Sets the value for \@PG:CL
+ ///
+ /// \param[in] cmd new value
+ /// \returns reference to this object
+ ///
ProgramInfo& CommandLine(const std::string& cmd);
+ /// \brief Sets a new collection of non-standard tags.
+ ///
+ /// Custom tag map entries should consist of {tagName => value}.
+ ///
+ /// \param[in] custom new tags
+ /// \returns reference to this object
+ ///
ProgramInfo& CustomTags(const std::map<std::string, std::string>& custom);
+ /// \brief Sets the value for \@PG:DS
+ ///
+ /// \param[in] description new value
+ /// \returns reference to this object
+ ///
ProgramInfo& Description(const std::string& description);
+ /// \brief Sets the value for \@PG:ID
+ ///
+ /// \param[in] id new value
+ /// \returns reference to this object
+ ///
ProgramInfo& Id(const std::string& id);
+ /// \brief Sets the value for \@PG:PN
+ ///
+ /// \param[in] name new value
+ /// \returns reference to this object
+ ///
ProgramInfo& Name(const std::string& name);
+ /// \brief Sets the value for \@PG:PP
+ ///
+ /// \param[in] id new value
+ /// \returns reference to this object
+ ///
ProgramInfo& PreviousProgramId(const std::string& id);
+ /// \brief Sets the value for \@PG:VN
+ ///
+ /// \param[in] version new value
+ /// \returns reference to this object
+ ///
ProgramInfo& Version(const std::string& version);
/// \}
private:
- std::string commandLine_; // CL:<CommandLine>
- std::string description_; // DS:<Description>
- std::string id_; // ID:<ID> * Unique ID required for valid SAM header*
- std::string name_; // PN:<Name>
- std::string previousProgramId_; // PP:<PreviousProgramID>
- std::string version_; // VN:<Version>
+ std::string commandLine_; // CL:<CommandLine>
+ std::string description_; // DS:<Description>
+ std::string id_; // ID:<ID> * must be unique for valid SAM *
+ std::string name_; // PN:<Name>
+ std::string previousProgramId_; // PP:<PreviousProgramID>
+ std::string version_; // VN:<Version>
// custom attributes
- std::map<std::string, std::string> custom_; // tag => value
+ std::map<std::string, std::string> custom_; // tag => value
};
-inline std::string ProgramInfo::CommandLine(void) const
-{ return commandLine_; }
-
-inline ProgramInfo& ProgramInfo::CommandLine(const std::string& cmd)
-{ commandLine_ = cmd; return *this; }
-
-inline std::map<std::string, std::string> ProgramInfo::CustomTags(void) const
-{ return custom_; }
-
-inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string, std::string>& custom)
-{ custom_ = custom; return *this; }
-
-inline std::string ProgramInfo::Description(void) const
-{ return description_; }
-
-inline ProgramInfo& ProgramInfo::Description(const std::string& description)
-{ description_ = description; return *this; }
-
-inline std::string ProgramInfo::Id(void) const
-{ return id_; }
-
-inline ProgramInfo& ProgramInfo::Id(const std::string& id)
-{ id_ = id; return *this; }
-
-inline bool ProgramInfo::IsValid(void) const
-{ return !id_.empty(); }
-
-inline std::string ProgramInfo::Name(void) const
-{ return name_; }
-
-inline ProgramInfo& ProgramInfo::Name(const std::string& name)
-{ name_ = name; return *this; }
-
-inline std::string ProgramInfo::PreviousProgramId(void) const
-{ return previousProgramId_; }
-
-inline ProgramInfo& ProgramInfo::PreviousProgramId(const std::string& id)
-{ previousProgramId_ = id; return *this; }
-
-inline std::string ProgramInfo::ToSam(const ProgramInfo& prog)
-{ return prog.ToSam(); }
-
-inline std::string ProgramInfo::Version(void) const
-{ return version_; }
-
-inline ProgramInfo& ProgramInfo::Version(const std::string& version)
-{ version_ = version; return *this; }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/ProgramInfo.inl"
+
#endif // PROGRAMINFO_H
diff --git a/include/pbbam/BamReader.h b/include/pbbam/QNameQuery.h
similarity index 55%
copy from include/pbbam/BamReader.h
copy to include/pbbam/QNameQuery.h
index bd0ced6..ad93d03 100644
--- a/include/pbbam/BamReader.h
+++ b/include/pbbam/QNameQuery.h
@@ -32,83 +32,63 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QNameQuery.h
+/// \brief Defines the QNameQuery class.
+//
// Author: Derek Barnett
-#ifndef BAMREADER_H
-#define BAMREADER_H
+#ifndef QNAMEQUERY_H
+#define QNAMEQUERY_H
-#include "pbbam/BamHeader.h"
-#include "pbbam/BamRecord.h"
-#include "pbbam/Config.h"
-#include <string>
+#include "pbbam/internal/QueryBase.h"
+#include <memory>
namespace PacBio {
namespace BAM {
-class PBBAM_EXPORT BamReader
+/// \brief The QNameQuery class provides iterable access to a DataSet's records,
+/// with each iteration of the query returning a contiguous block of
+/// records that share a name.
+///
+/// There is no random-access here. It is simply a sequential read-through,
+/// grouping contiguous results that share a BamRecord::FullName.
+///
+/// \note The name is not ideal - but for legacy reasons, it will remain as-is
+/// for now. It will likely become something more explicit, like
+/// "SequentialQNameGroupQuery", so that the name "QNameQuery" will be
+/// available for a built-in query on a QNAME filter (or whitelist). This
+/// will make it more consistent with other queries (ReadAccuracyQuery,
+/// SubreadLengthQuery, ZmwQuery, etc).
+///
+class PBBAM_EXPORT QNameQuery : public internal::IGroupQuery
{
-
-public:
- enum ReadError
- {
- NoError = 0
- , OpenFileError
- , ReadHeaderError
- , ReadRecordError
- };
-
-public:
- BamReader(void);
- virtual ~BamReader(void);
-
public:
- /// Closes the BAM file reader.
- void Close(void);
-
- /// Opens a BAM file for reading.
+ /// \brief Creates a new QNameQuery.
///
- /// Prefix \p filename with "http://" or "ftp://" for remote files,
- /// or set to "-" for stdin.
+ /// \param[in] dataset input data source(s)
///
- /// \param[in] filename path to input BAM file
+ /// \throws std::runtime_error on failure to open/read underlying %BAM files
///
- /// \returns success/failure
- bool Open(const std::string& filename);
-
- /// \returns header as BamHeader object
- BamHeader::SharedPtr Header(void) const;
-
- /// \returns error status code
- BamReader::ReadError Error(void) const;
-
- /// \returns true if error encountered
- bool HasError(void) const;
+ QNameQuery(const DataSet& dataset);
+ ~QNameQuery(void);
- /// Fetches the next record in a BAM file.
+public:
+ /// \brief Main iteration point for record access.
///
- /// \param[out] record pointer to BamRecord object
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
///
- /// \returns succcess/failure
- bool GetNext(PBBAM_SHARED_PTR<BamRecord> record);
-
-public:
- std::string PacBioBamVersion(void) const;
-
-protected:
- bool GetNext(PBBAM_SHARED_PTR<bam1_t> rawRecord);
- void InitialOpen(void);
- PBBAM_SHARED_PTR<bam_hdr_t> RawHeader(void) const;
+ bool GetNext(std::vector<BamRecord>& records);
-protected:
- PBBAM_SHARED_PTR<samFile> file_;
- PBBAM_SHARED_PTR<bam_hdr_t> header_;
- std::string filename_;
- BamReader::ReadError error_;
+private:
+ struct QNameQueryPrivate;
+ std::unique_ptr<QNameQueryPrivate> d_;
};
} // namespace BAM
} // namespace PacBio
-#endif // BAMREADER_H
+#endif // QNAMEQUERY_H
diff --git a/include/pbbam/QualityValue.h b/include/pbbam/QualityValue.h
index 7eecc12..ab108d0 100644
--- a/include/pbbam/QualityValue.h
+++ b/include/pbbam/QualityValue.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValue.h
+/// \brief Defines the QualityValue class.
+//
// Author: Derek Barnett
#ifndef QUALITYVALUE_H
@@ -47,11 +51,12 @@ namespace BAM {
/// \brief The QualityValue class represents a FASTQ-compatible quality value.
///
-/// Integers are clamped to [0, 93] (corresponding to ASCII printable chars [!-~]).
+/// Integers are clamped to [0, 93] (corresponding to ASCII printable chars
+/// [!-~]).
///
-/// Use the explicitly-named static method for constructing QualityValue entries from
-/// FASTQ encoding characters. Otherwise, the value will be interpreted as the actual
-/// integer value.
+/// Use QualityValue::FromFastq for constructing entries from FASTQ encoding
+/// characters. Otherwise, the resulting QualityValue will be interpreted using
+/// the character's numeric value (ignoring the FASTQ offset of 33).
///
class PBBAM_EXPORT QualityValue
{
@@ -59,54 +64,52 @@ public:
static const uint8_t MAX;
public:
- /// Creates a QualityValue from a FASTQ encoding character.
+ /// \name Conversion Methods
+ /// \{
+
+ /// \brief Creates a QualityValue from a FASTQ-encoding character.
+ ///
+ /// \param[in] c FASTQ character
+ /// \returns quality value representing (c - 33)
+ ///
static QualityValue FromFastq(const char c);
+ /// \}
+
public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Creates a QualityValue with specified value.
+ ///
+ /// \param[in] value quality value
+ ///
QualityValue(const uint8_t value = 0);
+
QualityValue(const QualityValue& other);
~QualityValue(void);
/// \}
public:
- /// \returns the FASTQ encoding char for this QualityValue
+ /// \name Conversion Methods
+ /// \{
+
+ /// \returns the FASTQ-encoding char for this QualityValue
char Fastq(void) const;
/// \returns the integer value of this QualityValue
operator uint8_t(void) const;
+ /// \}
+
private:
uint8_t value_;
};
-inline QualityValue::QualityValue(const uint8_t value)
- : value_(value)
-{
- // clamp QV
- if (value_ > QualityValue::MAX)
- value_ = QualityValue::MAX;
-}
-
-inline QualityValue::QualityValue(const QualityValue& other)
- : value_(other.value_)
-{ }
-
-inline QualityValue::~QualityValue(void) { }
-
-inline char QualityValue::Fastq(void) const
-{ return static_cast<char>(value_ + 33); }
-
-inline QualityValue::operator uint8_t(void) const
-{ return value_; }
-
-inline QualityValue QualityValue::FromFastq(const char c)
-{ return QualityValue(static_cast<uint8_t>(c-33)); }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/QualityValue.inl"
+
#endif // QUALITYVALUE_H
diff --git a/include/pbbam/QualityValues.h b/include/pbbam/QualityValues.h
index 240b96f..af054f6 100644
--- a/include/pbbam/QualityValues.h
+++ b/include/pbbam/QualityValues.h
@@ -32,14 +32,17 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValues.h
+/// \brief Defines the QualityValues class.
+//
// Author: Derek Barnett
#ifndef QUALITYVALUES_H
#define QUALITYVALUES_H
#include "pbbam/QualityValue.h"
-#include <algorithm>
#include <string>
#include <vector>
@@ -47,39 +50,101 @@ namespace PacBio {
namespace BAM {
/// \brief The QualityValues class represents a sequence of FASTQ-compatible
-/// quality values. See QualityValue documentation for details.
+/// quality values. See QualityValue documentation for more details.
///
class PBBAM_EXPORT QualityValues : public std::vector<QualityValue>
{
public:
- /// Creates a QualityValues collection from a FASTQ-encoded string.
+ /// \brief Creates a QualityValues object from a FASTQ-encoded string.
+ ///
+ /// \param[in] fastq FASTQ-encoded string
+ /// \returns corresponding QualityValues object
+ ///
static QualityValues FromFastq(const std::string& fastq);
public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Default constructor - creates an empty QualityValues object.
QualityValues(void);
+
+ /// \brief Creates a QualityValues object from a FASTQ-encoded string.
+ ///
+ /// \param[in] fastqString FASTQ-encoded string
+ ///
explicit QualityValues(const std::string& fastqString);
+
+ /// \brief Creates a QualityValues object from a vector of QualityValue
+ /// elements.
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
explicit QualityValues(const std::vector<QualityValue>& quals);
+
+ /// \brief Creates a QualityValues object from a vector of QualityValue
+ /// elements.
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
+ QualityValues(std::vector<QualityValue>&& quals);
+
+ /// \brief Creates a QualityValues object from a vector of (numeric) quality
+ /// values.
+ ///
+ /// \param[in] quals vector of quality value numbers
+ ///
explicit QualityValues(const std::vector<uint8_t>& quals);
+ /// \brief Creates a QualityValues object from the contents of the range:
+ /// [first, last)
+ ///
+ /// \param[in] first input iterator, whose element is a numeric quality
+ /// \param[in] last input iterator, whose element is a numeric quality
+ ///
QualityValues(const std::vector<uint8_t>::const_iterator first,
const std::vector<uint8_t>::const_iterator last);
+
+ /// \brief Creates a QualityValues object from the contents of the range:
+ /// [first, last)
+ ///
+ /// \param[in] first input iterator, whose element is a QualityValue
+ /// \param[in] last input iterator, whose element is a QualityValue
+ ///
QualityValues(const QualityValues::const_iterator first,
const QualityValues::const_iterator last);
+ /// \brief Copy constructor
QualityValues(const QualityValues& other);
- QualityValues(QualityValues&& other);
- QualityValues(std::vector<QualityValue>&& quals);
+ /// \brief Move constructor
+ QualityValues(QualityValues&& other);
+ /// \brief Copy assignment operator
+ ///
+ /// \param[in] other QualityValues object
+ ///
QualityValues& operator=(const QualityValues& other);
+
+ /// \brief Move assignment operator
+ ///
+ /// \param[in] other QualityValues object
+ ///
QualityValues& operator=(QualityValues&& other);
+ /// \brief Copy assignment operator
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
QualityValues& operator=(const std::vector<QualityValue>& quals);
+
+ /// \brief Move assignment operator
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
QualityValues& operator=(std::vector<QualityValue>&& quals);
+ /// \brief Destructor
~QualityValues(void);
/// \}
@@ -97,137 +162,39 @@ public:
/// \name Iterators
/// \{
- /// \returns A const_iterator to the beginning of the sequence.
+ /// \returns a const_iterator to the beginning of the sequence
std::vector<QualityValue>::const_iterator cbegin(void) const;
- /// \returns A const_iterator to the element past the end of the sequence.
+ /// \returns a const_iterator to the element following the last element
std::vector<QualityValue>::const_iterator cend(void) const;
- /// \returns A const_iterator to the beginning of the sequence.
+ /// \returns a const_iterator to the beginning of the sequence
std::vector<QualityValue>::const_iterator begin(void) const;
- /// \returns A const_iterator to the element past the end of the sequence.
+ /// \returns a const_iterator to the element following the last element
std::vector<QualityValue>::const_iterator end(void) const;
- /// \returns An iterator to the beginning of the sequence.
+ /// \returns an iterator to the beginning of the sequence
std::vector<QualityValue>::iterator begin(void);
- /// \returns An iterator to the element past the end of the sequence.
+ /// \returns an iterator to the element following the last element
std::vector<QualityValue>::iterator end(void);
/// \}
public:
- /// \returns the FASTQ-encoded string for this collection
- std::string Fastq(void) const;
-};
-
-inline QualityValues::QualityValues(void)
- : std::vector<QualityValue>()
-{ }
-
-inline QualityValues::QualityValues(const std::string& fastqString)
- : std::vector<QualityValue>()
-{
- resize(fastqString.size());
- std::transform(fastqString.cbegin(), fastqString.cend(),
- begin(), QualityValue::FromFastq);
-}
-
-inline QualityValues::QualityValues(const std::vector<QualityValue>& quals)
- : std::vector<QualityValue>(quals)
-{ }
-
-inline QualityValues::QualityValues(const std::vector<uint8_t>& quals)
- : std::vector<QualityValue>()
-{
- resize(quals.size());
- std::copy(quals.cbegin(), quals.cend(), begin());
-}
-
-inline QualityValues::QualityValues(const std::vector<uint8_t>::const_iterator first,
- const std::vector<uint8_t>::const_iterator last)
- : std::vector<QualityValue>(first, last)
-{ }
-
-inline QualityValues::QualityValues(const QualityValues::const_iterator first,
- const QualityValues::const_iterator last)
- : std::vector<QualityValue>()
-{
- assign(first, last);
-}
-
-inline QualityValues::QualityValues(const QualityValues& other)
- : std::vector<QualityValue>(other)
-{ }
-
-inline QualityValues::QualityValues(std::vector<QualityValue>&& quals)
- : std::vector<QualityValue>(std::move(quals))
-{ }
-
-inline QualityValues::QualityValues(QualityValues&& other)
- : std::vector<QualityValue>(std::move(other))
-{ }
-
-inline QualityValues& QualityValues::operator=(const QualityValues& other)
-{ std::vector<QualityValue>::operator=(other); return *this; }
-
-inline QualityValues& QualityValues::operator=(const std::vector<QualityValue>& quals)
-{ std::vector<QualityValue>::operator=(quals); return *this; }
-
-inline QualityValues& QualityValues::operator=(QualityValues&& other)
-{ std::vector<QualityValue>::operator=(std::move(other)); return *this; }
-
-inline QualityValues& QualityValues::operator=(std::vector<QualityValue>&& quals)
-{ std::vector<QualityValue>::operator=(std::move(quals)); return *this; }
-
-inline QualityValues::~QualityValues(void) { }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::cbegin(void) const
-{ return std::vector<QualityValue>::cbegin(); }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::cend(void) const
-{ return std::vector<QualityValue>::cend(); }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::begin(void) const
-{ return std::vector<QualityValue>::begin(); }
-
-inline std::vector<QualityValue>::const_iterator QualityValues::end(void) const
-{ return std::vector<QualityValue>::end(); }
-
-inline std::vector<QualityValue>::iterator QualityValues::begin(void)
-{ return std::vector<QualityValue>::begin(); }
-
-inline std::vector<QualityValue>::iterator QualityValues::end(void)
-{ return std::vector<QualityValue>::end(); }
-
-inline QualityValues QualityValues::FromFastq(const std::string& fastq)
-{
- return QualityValues(fastq);
-// QualityValues result;
-// result.resize(fastq.size());
-// std::transform(fastq.cbegin(), fastq.cend(), result.begin(), QualityValue::FromFastq);
-// return result;
-}
-
-inline std::string QualityValues::Fastq(void) const
-{
- std::string result;
- result.reserve(size());
- auto iter = cbegin();
- const auto end = cend();
- for (; iter != end; ++iter)
- result.push_back((*iter).Fastq());
- return result;
-}
+ /// \name Conversion Methods
+ /// \{
-inline bool QualityValues::operator==(const std::string& fastq) const
-{ return *this == QualityValues(fastq); }
+ /// \returns the FASTQ-encoded string for this sequence of quality values
+ std::string Fastq(void) const;
-inline bool QualityValues::operator!=(const std::string& fastq) const
-{ return *this != QualityValues(fastq); }
+ /// \}
+};
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/QualityValues.inl"
+
#endif // QUALITYVALUES_H
diff --git a/include/pbbam/QueryBase.h b/include/pbbam/QueryBase.h
deleted file mode 100644
index 6106a4a..0000000
--- a/include/pbbam/QueryBase.h
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef QUERYBASE_H
-#define QUERYBASE_H
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/BamFile.h"
-
-namespace PacBio {
-namespace BAM {
-
-class QueryBase;
-
-class QueryIterator
-{
-public:
- BamRecord& operator*(void);
- BamRecord* operator->(void);
- QueryIterator& operator++(void);
- QueryIterator operator++(int);
- bool operator==(const QueryIterator& other) const;
- bool operator!=(const QueryIterator& other) const;
-
- QueryIterator(void);
- QueryIterator(QueryBase& parent);
-
-private:
- QueryBase* query_;
- BamRecord record_;
- friend class QueryBase;
-};
-
-class QueryConstIterator
-{
-public:
- const BamRecord& operator*(void) const;
- const BamRecord* operator->(void) const;
- QueryConstIterator& operator++(void);
- QueryConstIterator operator++(int);
- bool operator==(const QueryConstIterator& other) const;
- bool operator!=(const QueryConstIterator& other) const;
-
- QueryConstIterator(void);
- QueryConstIterator(const QueryBase& parent);
-
-private:
- QueryBase* query_;
- BamRecord record_;
- friend class QueryBase;
-};
-
-/// This class provides the base functionality and iterators for querying BAM files.
-class PBBAM_EXPORT QueryBase {
-
-public:
- typedef QueryIterator iterator;
- typedef QueryConstIterator const_iterator;
-
-public:
- virtual ~QueryBase(void);
-
-public:
-
- /// \name Iterators
- /// \{
-
- /// \returns an iterator to the beginning of the query results.
- QueryBase::iterator begin(void);
-
- /// \returns a const_iterator to the beginning of the query results.
- QueryBase::const_iterator begin(void) const;
-
- /// \returns a const_iterator to the beginning of the query results.
- QueryBase::const_iterator cbegin(void) const;
-
- /// \returns an iterator marking the end of query results.
- QueryBase::iterator end(void);
-
- /// \returns a const_iterator marking the end of query results.
- QueryBase::const_iterator end(void) const;
-
- /// \returns a const_iterator marking the end of query results.
- QueryBase::const_iterator cend(void) const;
-
- /// \}
-
-protected:
- QueryBase(const BamFile& file);
-
- /// Primary method for iterating through a query. Derived classes will implement this
- /// method to return
- virtual bool GetNext(BamRecord& x) =0;
-
-protected:
- const BamFile& file_;
-
- friend class QueryIterator;
- friend class QueryConstIterator;
-};
-
-inline QueryBase::iterator QueryBase::begin(void)
-{ return QueryBase::iterator(*this); }
-
-inline QueryBase::const_iterator QueryBase::begin(void) const
-{ return QueryBase::const_iterator(*this); }
-
-inline QueryBase::const_iterator QueryBase::cbegin(void) const
-{ return QueryBase::const_iterator(*this); }
-
-inline QueryBase::iterator QueryBase::end(void)
-{ return QueryBase::iterator(); }
-
-inline QueryBase::const_iterator QueryBase::end(void) const
-{ return QueryBase::const_iterator(); }
-
-inline QueryBase::const_iterator QueryBase::cend(void) const
-{ return QueryBase::const_iterator(); }
-
-// ---------------
-// QueryIterator
-// ---------------
-
-inline QueryIterator::QueryIterator(void)
- : query_(0)
-{ }
-
-inline QueryIterator::QueryIterator(QueryBase& parent)
- : query_(&parent)
- , record_(parent.file_.Header())
-{
- if (!(query_->GetNext(record_)))
- query_ = 0;
-}
-
-inline BamRecord& QueryIterator::operator*(void)
-{ return record_; }
-
-inline BamRecord* QueryIterator::operator->(void)
-{ return &(operator*()); }
-
-inline QueryIterator& QueryIterator::operator++(void)
-{
- if (!(query_->GetNext(record_)))
- query_ = 0;
- return *this;
-}
-
-inline QueryIterator QueryIterator::operator++(int)
-{
- QueryIterator result(*this);
- ++(*this);
- return result;
-}
-
-inline bool QueryIterator::operator==(const QueryIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool QueryIterator::operator!=(const QueryIterator& other) const
-{ return !(*this == other); }
-
-// --------------------
-// QueryConstIterator
-// --------------------
-
-inline const BamRecord& QueryConstIterator::operator*(void) const
-{ return record_; }
-
-inline const BamRecord* QueryConstIterator::operator->(void) const
-{ return &(operator*()); }
-
-inline QueryConstIterator& QueryConstIterator::operator++(void)
-{
- if (!(query_->GetNext(record_)))
- query_ = 0;
- return *this;
-}
-
-inline QueryConstIterator QueryConstIterator::operator++(int)
-{
- QueryConstIterator result(*this);
- ++(*this);
- return result;
-}
-
-inline bool QueryConstIterator::operator==(const QueryConstIterator& other) const
-{ return query_ == other.query_; }
-
-inline bool QueryConstIterator::operator!=(const QueryConstIterator& other) const
-{ return !(*this == other); }
-
-inline QueryConstIterator::QueryConstIterator(void)
- : query_(0)
-{ }
-
-inline QueryConstIterator::QueryConstIterator(const QueryBase& parent)
- : record_(parent.file_.Header())
-{
- query_ = const_cast<QueryBase*>(&parent);
- if (!(query_->GetNext(record_)))
- query_ = 0;
-}
-
-} // namespace BAM
-} // namspace PacBio
-
-#endif // QUERYBASE_H
diff --git a/include/pbbam/ZmwQuery.h b/include/pbbam/ReadAccuracyQuery.h
similarity index 54%
copy from include/pbbam/ZmwQuery.h
copy to include/pbbam/ReadAccuracyQuery.h
index fdd1d1d..1eecb6c 100644
--- a/include/pbbam/ZmwQuery.h
+++ b/include/pbbam/ReadAccuracyQuery.h
@@ -32,35 +32,73 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadAccuracyQuery.h
+/// \brief Defines the ReadAccuracyQuery class.
+//
// Author: Derek Barnett
-#ifndef ZMWQUERY_H
-#define ZMWQUERY_H
+#ifndef READACCURACYQUERY_H
+#define READACCURACYQUERY_H
+#include "pbbam/Accuracy.h"
+#include "pbbam/Compare.h"
#include "pbbam/Config.h"
#include "pbbam/internal/QueryBase.h"
#include <vector>
namespace PacBio {
namespace BAM {
-//namespace staging {
-class PBBAM_EXPORT ZmwQuery : public internal::IQuery
+/// \brief The ReadAccuracyQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a read accuracy
+/// criterion.
+///
+/// Example:
+/// \include code/ReadAccuracyQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT ReadAccuracyQuery : public internal::IQuery
{
public:
- ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
- const DataSet& dataset);
-protected:
- FileIterPtr CreateIterator(const BamFile& bamFile);
+ /// \brief Creates a new ReadAccuracyQuery, limiting record results to only
+ /// those matching a read accuracy criterion.
+ ///
+ /// \param[in] accuracy read accuracy value
+ /// \param[in] compareType compare operator
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \sa BamRecord::ReadAccuracy
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+ /// files.
+ ///
+ ReadAccuracyQuery(const Accuracy accuracy,
+ const Compare::Type compareType,
+ const DataSet& dataset);
+
+ ~ReadAccuracyQuery(void);
+
+public:
+
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
private:
- std::vector<int> whitelist_;
+ struct ReadAccuracyQueryPrivate;
+ std::unique_ptr<ReadAccuracyQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namespace PacBio
-#endif // ZMWQUERY_H
+#endif // READACCURACYQUERY_H
diff --git a/include/pbbam/ReadGroupInfo.h b/include/pbbam/ReadGroupInfo.h
index 86372ee..f29dc46 100644
--- a/include/pbbam/ReadGroupInfo.h
+++ b/include/pbbam/ReadGroupInfo.h
@@ -32,19 +32,29 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadGroupInfo.h
+/// \brief Defines the ReadGroupInfo class.
+//
// Author: Derek Barnett
#ifndef READGROUPINFO_H
#define READGROUPINFO_H
#include "pbbam/Config.h"
+#include "pbbam/exception/InvalidSequencingChemistryException.h"
#include <map>
#include <string>
namespace PacBio {
namespace BAM {
+/// \brief This enum describes the base features that may be present in a read
+/// group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
enum class BaseFeature
{
DELETION_QV
@@ -57,6 +67,8 @@ enum class BaseFeature
, PULSE_WIDTH
, PKMID
, PKMEAN
+ , PKMID2
+ , PKMEAN2
, LABEL
, LABEL_QV
, ALT_LABEL
@@ -65,33 +77,138 @@ enum class BaseFeature
, PULSE_CALL
, PRE_PULSE_FRAMES
, PULSE_CALL_WIDTH
+ , START_FRAME
};
+/// \brief This enum describes the encoding types used for frame data within a
+/// read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
enum class FrameCodec
{
RAW
, V1
};
+/// \brief This enum describes the experimental design of the barcodes within a
+/// read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BarcodeModeType
+{
+ NONE
+ , SYMMETRIC
+ , ASYMMETRIC
+};
+
+/// \brief This enum describes the type of value encoded by barcode quality,
+/// within a read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BarcodeQualityType
+{
+ NONE
+ , SCORE
+ , PROBABILITY
+};
+
+/// \brief This enum describes the instrument type / platform model,
+/// within a read group's records.
+///
+/// This information is stored in its description (\@RG:PM).
+///
+enum class PlatformModelType
+{
+ ASTRO
+ , RS
+ , SEQUEL
+};
+
+/// \brief The ReadGroupInfo class represents a read group entry (\@RG) in the
+/// SAM header.
+///
class PBBAM_EXPORT ReadGroupInfo
{
public:
/// \name Conversion & Validation
///
+ /// \brief Creates a ReadGroupInfo object from SAM-formatted text.
+ ///
+ /// \param[in] sam SAM-formatted text
+ /// \returns read group info object
+ ///
static ReadGroupInfo FromSam(const std::string& sam);
+ /// \brief Converts a ReadGroupInfo object to its SAM-formatted text.
+ ///
+ /// \param[in] rg input ReadGroupInfo object
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
static std::string ToSam(const ReadGroupInfo& rg);
+ /// \brief Converts a read group ID (string) to its numeric value.
+ ///
+ /// \param[in] rgId read group ID string
+ /// \returns numeric value of ID
+ ///
+ static int32_t IdToInt(const std::string& rgId);
+
+ /// \brief Converts a read group ID number to its string representation.
+ ///
+ /// \param[in] id read group ID number
+ /// \returns hexadecimal string representation of ID
+ ///
+ static std::string IntToId(const int32_t id);
+
+ /// \returns sequencing chemistry from (bindingKig, sequencingKit,
+ /// basecallerVersion)
+ ///
+ static std::string SequencingChemistryFromTriple(const std::string& bindingKit,
+ const std::string& sequencingKit,
+ const std::string& basecallerVersion);
+
/// \}
public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Creates an empty read group info object.
ReadGroupInfo(void);
+
+ /// \brief Creates a read group info object with an ID.
+ ///
+ /// \param[in] id string representation of read group ID
+ ///
ReadGroupInfo(const std::string& id);
- ReadGroupInfo(const std::string& movieName, const std::string& readType);
+
+ /// \brief Creates a read group info object from a movie name & read type.
+ ///
+ /// \param[in] movieName sequencing movie name
+ /// \param[in] readType string version of record type
+ ///
+ /// \sa RecordType
+ ///
+ ReadGroupInfo(const std::string& movieName,
+ const std::string& readType);
+
+ /// \brief Creates a read group info object from a movie name, read type,
+ /// and platform model.
+ ///
+ /// \param[in] movieName sequencing movie name
+ /// \param[in] readType string version of record type
+ /// \param[in] platform platform model type
+ ///
+ /// \sa RecordType
+ ///
+ ReadGroupInfo(const std::string& movieName,
+ const std::string& readType,
+ const PlatformModelType platform);
+
ReadGroupInfo(const ReadGroupInfo& other);
ReadGroupInfo(ReadGroupInfo&& other);
ReadGroupInfo& operator=(const ReadGroupInfo& other);
@@ -101,68 +218,148 @@ public:
/// \}
public:
+ /// \name Comparison Operators
+ /// \{
+
+ bool operator==(const ReadGroupInfo& other) const;
+
+ /// \}
+
+public:
+ /// \name Conversion & Validation
+ /// \{
+
+ /// \returns true if read group info is valid
+ ///
+ /// Currently this checks to see that ReadGroupInfo::Id does not contain an
+ /// empty string.
+ ///
+ bool IsValid(void) const;
+
+ /// \brief Converts this object to its SAM-formatted text.
+ ///
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
/// \name Attributes
/// \{
- const std::string& BasecallerVersion(void) const;
+ /// \returns the number of barcode sequences in BarcodeFile
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ size_t BarcodeCount(void) const;
- bool HasBaseFeature(const BaseFeature& feature) const;
+ /// \returns name of FASTA file containing barcode sequences
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ std::string BarcodeFile(void) const;
+
+ /// \returns MD5 hash of the contents of BarcodeFile
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ std::string BarcodeHash(void) const;
+
+ /// \returns experimental design type of barcodes
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ BarcodeModeType BarcodeMode(void) const;
+
+ /// \returns type of value encoded in the 'bq' tag
+ ///
+ /// \throws std::runtime_error if barcode data is not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ BarcodeQualityType BarcodeQuality(void) const;
+
+ /// \returns basecaller version number (e.g. "2.1")
+ std::string BasecallerVersion(void) const;
+ /// \returns tag name in use for the specified for base feature
std::string BaseFeatureTag(const BaseFeature& feature) const;
+ /// \returns binding kit part number (e.g. "100236500")
std::string BindingKit(void) const;
+ /// \returns true if reads are classified as spike-in controls
bool Control(void) const;
+ /// \returns any non-standard tags added to the \@PG entry
+ ///
+ /// Result map consists of {tagName => value}.
+ ///
std::map<std::string, std::string> CustomTags(void) const;
+ /// \returns string value of \@RG:DT
std::string Date(void) const;
+ /// \returns string value of \@RG:FO
std::string FlowOrder(void) const;
+ /// \returns frame rate in Hz
std::string FrameRateHz(void) const;
+ /// \returns true if read group has barcode data
+ bool HasBarcodeData(void) const;
+
+ /// \returns true if read group has an entry for the specified base feature
+ bool HasBaseFeature(const BaseFeature& feature) const;
+
+ /// \returns string value of \@RG:ID
std::string Id(void) const;
+ /// \returns codec type in use for IPD
FrameCodec IpdCodec(void) const;
+ /// \returns string value of \@RG:KS
std::string KeySequence(void) const;
+ /// \returns string value of \@RG:LB
std::string Library(void) const;
+ /// \returns movie name (stored in \@RG:PU)
std::string MovieName(void) const;
+ /// \returns string value of \@RG:PL
std::string Platform(void) const;
+ /// \returns string value of \@RG:PM
+ PlatformModelType PlatformModel(void) const;
+
+ /// \returns string value of \@RG:PI
std::string PredictedInsertSize(void) const;
+ /// \returns string value of \@RG:PG
std::string Programs(void) const;
+ /// \returns codec type in use for PulseWidth
FrameCodec PulseWidthCodec(void) const;
+ /// \returns string value of read type
std::string ReadType(void) const;
+ /// \returns string value of \@RG:SM
std::string Sample(void) const;
+ /// \returns string value of \@RG:CN
std::string SequencingCenter(void) const;
- std::string SequencingKit(void) const;
-
- /// \}
-
- /// \name Conversion & Validation
- /// \{
-
- bool IsValid(void) const;
-
- std::string ToSam(void) const;
-
- /// \}
-
- /// \name Comparison
- /// \{
+ /// \returns sequencing chemistry name
+ std::string SequencingChemistry(void) const;
- bool operator==(const ReadGroupInfo& other) const;
+ /// \returns sequencing kit part number
+ std::string SequencingKit(void) const;
/// \}
@@ -170,62 +367,217 @@ public:
/// \name Attributes
/// \{
+ /// \brief Sets read group's barcode data.
+ ///
+ /// Barcode fields are either absent or all must be present.
+ ///
+ /// \param[in] barcodeFile barcode filename
+ /// \param[in] barcodeHash MD5 hash of barcode file
+ /// \param[in] barcodeCount number of records in barcode file
+ /// \param[in] barcodeMode experimental design of barcodes
+ /// \param[in] barcodeQuality type of barcode quality value
+ ///
+ /// \sa BarcodeFile \n
+ /// BarcodeHash \n
+ /// BarcodeCount \n
+ /// BarcodeMode \n
+ /// BarcodeQuality \n
+ /// ReadGroupInfo::ClearBarcodeData
+ ///
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& BarcodeData(const std::string& barcodeFile,
+ const std::string& barcodeHash,
+ size_t barcodeCount,
+ BarcodeModeType barcodeMode,
+ BarcodeQualityType barcodeQuality);
+
+ /// \brief Sets the basecaller version number.
+ ///
+ /// \param[in] versionNumber new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& BasecallerVersion(const std::string& versionNumber);
+ /// \brief Sets the tag to be used for a particular base feature.
+ ///
+ /// \param[in] feature feature type begin updated
+ /// \param[in] tag new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& BaseFeatureTag(const BaseFeature& feature,
const std::string& tag);
+ /// \brief Sets the binding kit part number.
+ ///
+ /// \param[in] kitNumber new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& BindingKit(const std::string& kitNumber);
+ /// \brief Removes all barcode data from this read group.
+ ///
+ /// \returns reference to this read group
+ ///
+ ReadGroupInfo& ClearBarcodeData(void);
+
+ /// \brief Sets whether read group's records are classifed as spike-in
+ /// controls.
+ ///
+ /// \param[in] ctrl true if records are spike-in controls
+ /// \returns reference to this object
+ ///
ReadGroupInfo& Control(const bool ctrl);
+ /// \brief Sets a new collection of non-standard tags.
+ ///
+ /// Custom tag map entries should consist of {tagName => value}.
+ ///
+ /// \param[in] custom new tags
+ /// \returns reference to this object
+ ///
ReadGroupInfo& CustomTags(const std::map<std::string, std::string>& custom);
+ /// \brief Sets the value for \@RG:DT
+ ///
+ /// \param[in] date new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& Date(const std::string& date);
+ /// \brief Sets the value for \@RG:FO
+ ///
+ /// \param[in] order new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& FlowOrder(const std::string& order);
+ /// \brief Sets the frame rate.
+ ///
+ /// \param[in] frameRateHz string value of frame rate in Hz
+ /// \returns reference to this object
+ ///
ReadGroupInfo& FrameRateHz(const std::string& frameRateHz);
+ /// \brief Sets the read group's ID.
+ ///
+ /// \param[in] id string value of ID
+ /// \returns reference to this object
+ ///
ReadGroupInfo& Id(const std::string& id);
- ReadGroupInfo& Id(const std::string& movieName, const std::string& readType);
+ /// \brief Sets the read group's ID, from movie name & read type
+ ///
+ /// \param[in] movieName sequencing movie name
+ /// \param[in] readType string version of read type
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Id(const std::string& movieName,
+ const std::string& readType);
- ReadGroupInfo& IpdCodec(const FrameCodec& codec, const std::string& tag = std::string());
+ /// \brief Sets the codec type used for IPD
+ ///
+ /// \param[in] codec codec type
+ /// \param[in] tag IPD tag
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& IpdCodec(const FrameCodec& codec,
+ const std::string& tag = std::string());
+ /// \brief Sets the value for \@RG:KS
+ ///
+ /// \param[in] sequence new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& KeySequence(const std::string& sequence);
+ /// \brief Sets the value for \@RG:LB
+ ///
+ /// \param[in] library new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& Library(const std::string& library);
- ReadGroupInfo& MovieName(const std::string& id);
+ /// \brief Sets the value for movie name (stored in \@RG:PU).
+ ///
+ /// \param[in] movieName new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& MovieName(const std::string& movieName);
+ /// \brief Sets the value for \@RG:PI
+ ///
+ /// \param[in] size new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& PredictedInsertSize(const std::string& size);
+ /// \brief Sets the value for \@RG:PG
+ ///
+ /// \param[in] programs new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& Programs(const std::string& programs);
- ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec, const std::string& tag = std::string());
+ /// \brief Sets the value for \@RG:PM
+ ///
+ /// \param[in] platformModel new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& PlatformModel(const PlatformModelType& platform);
+
+ /// \brief Sets the codec type used for PulseWidth
+ ///
+ /// \param[in] codec codec type
+ /// \param[in] tag pulse width tag
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec,
+ const std::string& tag = std::string());
+ /// \brief Sets the read type.
+ ///
+ /// \param[in] type new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& ReadType(const std::string& type);
+ /// \brief Sets the value for \@RG:SM
+ ///
+ /// \param[in] sample new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& Sample(const std::string& sample);
+ /// \brief Sets the value for \@RG:CN
+ ///
+ /// \param[in] center new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& SequencingCenter(const std::string& center);
+ /// \brief Sets the sequencing kit part number.
+ ///
+ /// \param[in] kitNumber new value
+ /// \returns reference to this object
+ ///
ReadGroupInfo& SequencingKit(const std::string& kitNumber);
/// \}
private:
- std::string id_; // ID * Unique ID required for valid SAM/BAM header *
- std::string sequencingCenter_; // CN
- std::string date_; // DT * (ISO 8601) *
- std::string flowOrder_; // FO
- std::string keySequence_; // KS
- std::string library_; // LB
- std::string programs_; // PG
- std::string predictedInsertSize_; // PI
- std::string movieName_; // PU * more explicit, in place of "platform unit" *
- std::string sample_; // SM
+ std::string id_; // ID * must be unique for valid SAM *
+ std::string sequencingCenter_; // CN
+ std::string date_; // DT * (ISO-8601) *
+ std::string flowOrder_; // FO
+ std::string keySequence_; // KS
+ std::string library_; // LB
+ std::string programs_; // PG
+ std::string predictedInsertSize_; // PI
+ std::string movieName_; // PU
+ std::string sample_; // SM
+
+ PlatformModelType platformModel_; // PM
// DS:<Description> components
std::string readType_;
@@ -236,6 +588,12 @@ private:
bool control_ = false;
FrameCodec ipdCodec_;
FrameCodec pulseWidthCodec_;
+ bool hasBarcodeData_ = false;
+ std::string barcodeFile_;
+ std::string barcodeHash_;
+ size_t barcodeCount_ = 0;
+ BarcodeModeType barcodeMode_ = BarcodeModeType::NONE;
+ BarcodeQualityType barcodeQuality_ = BarcodeQualityType::NONE;
std::map<BaseFeature, std::string> features_;
// custom attributes
@@ -246,147 +604,20 @@ private:
void DecodeSamDescription(const std::string& description);
};
+/// \brief Creates a read group ID from a movie name & read type.
+///
+/// \param[in] movieName sequencing movie name
+/// \param[in] readType string version of read type
+///
+/// \returns hexadecimal string read group ID
+///
PBBAM_EXPORT
std::string MakeReadGroupId(const std::string& movieName,
const std::string& readType);
-inline const std::string& ReadGroupInfo::BasecallerVersion(void) const
-{ return basecallerVersion_; }
-
-inline ReadGroupInfo& ReadGroupInfo::BasecallerVersion(const std::string& versionNumber)
-{ basecallerVersion_ = versionNumber; return *this; }
-
-inline std::string ReadGroupInfo::BaseFeatureTag(const BaseFeature& feature) const
-{
- const auto iter = features_.find(feature);
- if (iter == features_.end())
- return std::string();
- return iter->second;
-}
-
-inline ReadGroupInfo& ReadGroupInfo::BaseFeatureTag(const BaseFeature& feature,
- const std::string& tag)
-{ features_[feature] = tag; return *this; }
-
-inline std::string ReadGroupInfo::BindingKit(void) const
-{ return bindingKit_; }
-
-inline ReadGroupInfo& ReadGroupInfo::BindingKit(const std::string& kitNumber)
-{ bindingKit_ = kitNumber; return *this; }
-
-inline bool ReadGroupInfo::Control(void) const
-{ return control_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Control(const bool ctrl)
-{ control_ = ctrl; return *this; }
-
-inline std::map<std::string, std::string> ReadGroupInfo::CustomTags(void) const
-{ return custom_; }
-
-inline ReadGroupInfo& ReadGroupInfo::CustomTags(const std::map<std::string, std::string>& custom)
-{ custom_ = custom; return *this; }
-
-inline std::string ReadGroupInfo::Date(void) const
-{ return date_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Date(const std::string& date)
-{ date_ = date; return *this; }
-
-inline std::string ReadGroupInfo::FlowOrder(void) const
-{ return flowOrder_; }
-
-inline ReadGroupInfo& ReadGroupInfo::FlowOrder(const std::string& order)
-{ flowOrder_ = order; return *this; }
-
-inline std::string ReadGroupInfo::FrameRateHz(void) const
-{ return frameRateHz_; }
-
-inline ReadGroupInfo& ReadGroupInfo::FrameRateHz(const std::string& frameRateHz)
-{ frameRateHz_ = frameRateHz; return *this; }
-
-inline bool ReadGroupInfo::HasBaseFeature(const BaseFeature& feature) const
-{ return features_.find(feature) != features_.end(); }
-
-inline std::string ReadGroupInfo::Id(void) const
-{ return id_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& id)
-{ id_ = id; return *this; }
-
-inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& movieName,
- const std::string& readType)
-{ id_ = MakeReadGroupId(movieName, readType); return *this; }
-
-inline FrameCodec ReadGroupInfo::IpdCodec(void) const
-{ return ipdCodec_; }
-
-inline bool ReadGroupInfo::IsValid(void) const
-{ return !id_.empty(); }
-
-inline std::string ReadGroupInfo::KeySequence(void) const
-{ return keySequence_; }
-
-inline ReadGroupInfo& ReadGroupInfo::KeySequence(const std::string& sequence)
-{ keySequence_ = sequence; return *this; }
-
-inline std::string ReadGroupInfo::Library(void) const
-{ return library_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Library(const std::string& library)
-{ library_ = library; return *this; }
-
-inline std::string ReadGroupInfo::MovieName(void) const
-{ return movieName_; }
-
-inline ReadGroupInfo& ReadGroupInfo::MovieName(const std::string& movieName)
-{ movieName_ = movieName; return *this; }
-
-inline std::string ReadGroupInfo::Platform(void) const
-{ return std::string("PACBIO"); }
-
-inline std::string ReadGroupInfo::PredictedInsertSize(void) const
-{ return predictedInsertSize_; }
-
-inline ReadGroupInfo& ReadGroupInfo::PredictedInsertSize(const std::string& size)
-{ predictedInsertSize_ = size; return *this; }
-
-inline std::string ReadGroupInfo::Programs(void) const
-{ return programs_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Programs(const std::string& programs)
-{ programs_ = programs; return *this; }
-
-inline FrameCodec ReadGroupInfo::PulseWidthCodec(void) const
-{ return pulseWidthCodec_; }
-
-inline std::string ReadGroupInfo::ReadType(void) const
-{ return readType_; }
-
-inline ReadGroupInfo& ReadGroupInfo::ReadType(const std::string& type)
-{ readType_ = type; return *this; }
-
-inline std::string ReadGroupInfo::Sample(void) const
-{ return sample_; }
-
-inline ReadGroupInfo& ReadGroupInfo::Sample(const std::string& sample)
-{ sample_ = sample; return *this; }
-
-inline std::string ReadGroupInfo::SequencingCenter(void) const
-{ return sequencingCenter_; }
-
-inline ReadGroupInfo& ReadGroupInfo::SequencingCenter(const std::string& center)
-{ sequencingCenter_ = center; return *this; }
-
-inline std::string ReadGroupInfo::SequencingKit(void) const
-{ return sequencingKit_; }
-
-inline ReadGroupInfo& ReadGroupInfo::SequencingKit(const std::string& kitNumber)
-{ sequencingKit_ = kitNumber; return *this; }
-
-inline std::string ReadGroupInfo::ToSam(const ReadGroupInfo& rg)
-{ return rg.ToSam(); }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/ReadGroupInfo.inl"
+
#endif // READGROUPINFO_H
diff --git a/include/pbbam/SamTagCodec.h b/include/pbbam/SamTagCodec.h
index b0b6796..cc4def4 100644
--- a/include/pbbam/SamTagCodec.h
+++ b/include/pbbam/SamTagCodec.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SamTagCodec.h
+/// \brief Defines the SamTagCodec class.
+//
// Author: Derek Barnett
#ifndef SAMTAGCODEC_H
@@ -45,10 +49,30 @@
namespace PacBio {
namespace BAM {
+/// \brief The SamTagCodec class provides text-based encoding/decoding of %BAM
+/// tag data.
+///
+/// \note SamTagCodec is mostly an implementation and/or testing detail, and may
+/// be removed from the public API.
+///
class PBBAM_EXPORT SamTagCodec
{
public:
+ /// \name Tag Collection Methods
+ /// \{
+
+ /// \brief Creates a TagCollection from SAM-formatted tag data.
+ ///
+ /// \param[in] tagString SAM-formmated string
+ /// \returns resulting tag collection
+ ///
static TagCollection Decode(const std::string& tagString);
+
+ /// \brief Creates SAM-formatted string from a TagCollection.
+ ///
+ /// \param[in] tags TagCollection containing tag data
+ /// \returns SAM-formatted string
+ ///
static std::string Encode(const PacBio::BAM::TagCollection& tags);
};
diff --git a/include/pbbam/SequenceInfo.h b/include/pbbam/SequenceInfo.h
index 0cf9d04..88b8dd1 100644
--- a/include/pbbam/SequenceInfo.h
+++ b/include/pbbam/SequenceInfo.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SequenceInfo.h
+/// \brief Defines the SequenceInfo class.
+//
// Author: Derek Barnett
#ifndef SEQUENCEINFO_H
@@ -45,14 +49,27 @@
namespace PacBio {
namespace BAM {
+/// \brief The SequenceInfo class represents a program entry (\@SQ) in the SAM
+/// header.
+///
class PBBAM_EXPORT SequenceInfo
{
public:
/// \name Conversion & Validation
///
+ /// \brief Creates a SequenceInfo object from SAM-formatted text.
+ ///
+ /// \param[in] sam SAM-formatted text
+ /// \returns program info object
+ ///
static SequenceInfo FromSam(const std::string& sam);
+ /// \brief Converts a SequenceInfo object to its SAM-formatted text.
+ ///
+ /// \param[in] seq input SequenceInfo object
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
static std::string ToSam(const SequenceInfo& seq);
/// \}
@@ -61,8 +78,17 @@ public:
/// \name Constructors & Related Methods
/// \{
+ /// \brief Creates an empty sequence info object.
SequenceInfo(void);
- SequenceInfo(const std::string& name, const std::string& length = "0");
+
+ /// \brief Creates a sequence info object with name & (optional) length.
+ ///
+ /// \param[in] name sequence name (\@SQ:SN)
+ /// \param[in] length sequence length (\@SQ:LN)
+ ///
+ SequenceInfo(const std::string& name,
+ const std::string& length = "0");
+
SequenceInfo(const SequenceInfo& other);
SequenceInfo(SequenceInfo&& other);
SequenceInfo& operator=(const SequenceInfo& other);
@@ -72,111 +98,135 @@ public:
/// \}
public:
+ /// \name Operators
+ /// \{
+
+ bool operator==(const SequenceInfo& other) const;
+ bool operator!=(const SequenceInfo& other) const;
+
+ /// \}
+
+public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \returns true if sequence info is valid
+ ///
+ /// Currently this checks to see that Name is non-empty and Length is within
+ /// the accepted range.
+ ///
+ bool IsValid(void) const;
+
+ /// \brief Converts this object to its SAM-formatted text.
+ ///
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
/// \name Attributes
/// \{
+ /// \returns string value of \@SQ:AS
std::string AssemblyId(void) const;
+ /// \returns string value of \@SQ:M5
std::string Checksum(void) const;
+ /// \returns any non-standard tags added to the \@PG entry
+ ///
+ /// Result map consists of {tagName => value}.
+ ///
std::map<std::string, std::string> CustomTags(void) const;
+ /// \returns string value of \@SQ:LN
std::string Length(void) const;
+ /// \returns string value of \@SQ:SN
std::string Name(void) const;
+ /// \returns string value of \@SQ:SP
std::string Species(void) const;
+ /// \returns string value of \@SQ:UR
std::string Uri(void) const;
/// \}
- /// \name Conversion & Validation
- ///
-
- bool IsValid(void) const;
-
- std::string ToSam(void) const;
-
- /// \}
-
public:
/// \name Attributes
+ /// \{
+ /// \brief Sets the value for \@SQ:AS
+ ///
+ /// \param[in] id new value
+ /// \returns reference to this object
+ ///
SequenceInfo& AssemblyId(const std::string& id);
+ /// \brief Sets the value for \@SQ:M5
+ ///
+ /// \param[in] checksum new value
+ /// \returns reference to this object
+ ///
SequenceInfo& Checksum(const std::string& checksum);
+ /// \brief Sets a new collection of non-standard tags.
+ ///
+ /// Custom tag map entries should consist of {tagName => value}.
+ ///
+ /// \param[in] custom new tags
+ /// \returns reference to this object
+ ///
SequenceInfo& CustomTags(const std::map<std::string, std::string>& custom);
+ /// \brief Sets the value for \@SQ:LN
+ ///
+ /// \param[in] length new value
+ /// \returns reference to this object
+ ///
SequenceInfo& Length(const std::string& length);
+ /// \brief Sets the value for \@SQ:SN
+ ///
+ /// \param[in] name new value
+ /// \returns reference to this object
+ ///
SequenceInfo& Name(const std::string& name);
+ /// \brief Sets the value for \@SQ:SP
+ ///
+ /// \param[in] species new value
+ /// \returns reference to this object
+ ///
SequenceInfo& Species(const std::string& species);
+ /// \brief Sets the value for \@SQ:UR
+ ///
+ /// \param[in] uri new value
+ /// \returns reference to this object
+ ///
SequenceInfo& Uri(const std::string& uri);
/// \}
private:
- std::string name_; // SN:<Name> * Unique Name required for valid SAM header*
- std::string length_; // LN:<Length> * [0 - 2^31-1]
- std::string assemblyId_; // AS:<AssemblyId>
- std::string checksum_; // M5:<Checksum>
- std::string species_; // SP:<Species>
- std::string uri_; // UR:<URI>
+ std::string name_; // SN:<Name> * must be unique for valid SAM *
+ std::string length_; // LN:<Length> * must be within [0 - 2^31-1] *
+ std::string assemblyId_; // AS:<AssemblyId>
+ std::string checksum_; // M5:<Checksum>
+ std::string species_; // SP:<Species>
+ std::string uri_; // UR:<URI>
// custom attributes
std::map<std::string, std::string> custom_; // tag => value
};
-inline std::string SequenceInfo::AssemblyId(void) const
-{ return assemblyId_; }
-
-inline SequenceInfo& SequenceInfo::AssemblyId(const std::string& id)
-{ assemblyId_ = id; return *this; }
-
-inline std::string SequenceInfo::Checksum(void) const
-{ return checksum_; }
-
-inline SequenceInfo& SequenceInfo::Checksum(const std::string& checksum)
-{ checksum_ = checksum; return *this; }
-
-inline std::map<std::string, std::string> SequenceInfo::CustomTags(void) const
-{ return custom_; }
-
-inline SequenceInfo& SequenceInfo::CustomTags(const std::map<std::string, std::string>& custom)
-{ custom_ = custom; return *this; }
-
-inline std::string SequenceInfo::Length(void) const
-{ return length_; }
-
-inline SequenceInfo& SequenceInfo::Length(const std::string& length)
-{ length_ = length; return *this; }
-
-inline std::string SequenceInfo::Name(void) const
-{ return name_; }
-
-inline SequenceInfo& SequenceInfo::Name(const std::string& name)
-{ name_ = name; return *this; }
-
-inline std::string SequenceInfo::Species(void) const
-{ return species_; }
-
-inline SequenceInfo& SequenceInfo::Species(const std::string& species)
-{ species_ = species; return *this; }
-
-inline std::string SequenceInfo::ToSam(const SequenceInfo& seq)
-{ return seq.ToSam(); }
-
-inline std::string SequenceInfo::Uri(void) const
-{ return uri_; }
-
-inline SequenceInfo& SequenceInfo::Uri(const std::string& uri)
-{ uri_ = uri; return *this; }
-
} // namespace BAM
} // namespace PacBio
+#include "pbbam/internal/SequenceInfo.inl"
+
#endif // SEQUENCEINFO_H
diff --git a/include/pbbam/Strand.h b/include/pbbam/Strand.h
index aa8535f..6fa5043 100644
--- a/include/pbbam/Strand.h
+++ b/include/pbbam/Strand.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Strand.h
+/// \brief Defines the Strand enum.
+//
// Author: Derek Barnett
#ifndef STRAND_H
@@ -43,10 +47,13 @@
namespace PacBio {
namespace BAM {
+/// \brief This enum defines the strand orientations used for reporting
+/// alignment-related information.
+///
enum class Strand
{
- FORWARD
- , REVERSE
+ FORWARD ///< Forward strand
+ , REVERSE ///< Reverse strand
};
} // namespace BAM
diff --git a/include/pbbam/ZmwGroupQuery.h b/include/pbbam/SubreadLengthQuery.h
similarity index 55%
copy from include/pbbam/ZmwGroupQuery.h
copy to include/pbbam/SubreadLengthQuery.h
index 8b88113..e8839fe 100644
--- a/include/pbbam/ZmwGroupQuery.h
+++ b/include/pbbam/SubreadLengthQuery.h
@@ -32,36 +32,68 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SubreadLengthQuery.h
+/// \brief Defines the SubreadLengthQuery class.
+//
// Author: Derek Barnett
-#ifndef ZMWGROUPQUERY_H
-#define ZMWGROUPQUERY_H
+#ifndef SUBREADLENGTHQUERY_H
+#define SUBREADLENGTHQUERY_H
+#include "pbbam/Compare.h"
#include "pbbam/Config.h"
#include "pbbam/internal/QueryBase.h"
#include <vector>
namespace PacBio {
namespace BAM {
-//namespace staging {
-class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
+/// \brief The SubreadLengthQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a subread length
+/// criterion.
+///
+/// Example:
+/// \include code/SubreadLengthQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT SubreadLengthQuery : public internal::IQuery
{
public:
- ZmwGroupQuery(const DataSet& dataset);
- ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
- const DataSet& dataset);
+ /// \brief Creates a new SubreadLengthQuery, limiting record results to only
+ /// those matching a subread length criterion.
+ ///
+ /// \param[in] length subread length value
+ /// \param[in] compareType compare operator
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+ /// files.
+ ///
+ SubreadLengthQuery(const int32_t length,
+ const Compare::Type compareType,
+ const DataSet& dataset);
-protected:
- FileIterPtr CreateIterator(const BamFile& file);
+ ~SubreadLengthQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
private:
- std::vector<int> whitelist_;
+ struct SubreadLengthQueryPrivate;
+ std::unique_ptr<SubreadLengthQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namespace PacBio
-#endif // ZMWGROUPQUERY_H
+#endif // SUBREADLENGTHQUERY_H
diff --git a/include/pbbam/Tag.h b/include/pbbam/Tag.h
index c7f3d94..0520e38 100644
--- a/include/pbbam/Tag.h
+++ b/include/pbbam/Tag.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Tag.h
+/// \brief Defines the Tag class.
+//
// Author: Derek Barnett
#ifndef TAG_H
@@ -46,7 +50,9 @@
namespace PacBio {
namespace BAM {
-/// \brief Provides information on the exact (C++) data type held by a Tag.
+/// \brief This enum is used to describe the exact (C++) data type held by a
+/// Tag.
+///
enum class TagDataType
{
INVALID = 0 ///< boost::blank
@@ -67,66 +73,137 @@ enum class TagDataType
, FLOAT_ARRAY = 15 ///< std::vector<float>
};
-/// \brief Provides additional instructions on interpreting the tag's value.
+/// \brief This enum provides additional instructions on interpreting the tag's
+/// value.
+///
+/// Some C++ data types (e.g. std::string) may represent more than one BAM tag
+/// type ('H' vs 'Z'). Thus a TagModifier may be used to indicate how to
+/// properly distinguish between these shared data types.
///
-/// Some C++ data types (e.g. std::string) may represent more than one BAM tag type
-/// ('H' vs 'Z'). These modifiers indicate how to properly interpret those shared
-/// data types.
enum class TagModifier
{
- /// \brief This indicates the tag has no modifiers set.
+ /// \brief This value indicates that the tag has no modifiers set.
+ ///
NONE = 0,
/// \brief This modifier marks an integer as ASCII.
///
- /// SAM/BAM has the concept of an ASCII character that is distinct from an 8-bit
- /// integer. However, there is no such pure separation in C++
- /// (int8_t/uint8_t are likely implemented as typedefs around char/unsigned char).
- /// Thus this modifier can be used to indicate a tag's integer data should be
+ /// SAM/BAM has the concept of an ASCII character that is distinct from an
+ /// 8-bit integer. However, there is no such pure separation in C++ - as
+ /// int8_t/uint8_t are likely implemented as typedefs around char/unsigned
+ /// char. Thus this modifier can be used to indicate a tag's value should be
/// interpreted as a printable, ASCII character.
+ ///
ASCII_CHAR,
- /// \brief This modifier marks std::string data as "hex string", rather than a regular string.
+ /// \brief This modifier marks std::string data as "hex string", rather than
+ /// a regular string.
+ ///
+ /// SAM/BAM has a distinction between regular strings and "Hex format"
+ /// strings. However, they are both manipulated in C++ via std::string. Thus
+ /// this modifier can be used to indicate that a tag's string data should be
+ /// interpreted as "Hex format" rather than a regular, literal string.
///
- /// SAM/BAM has a distinction between regular strings and "Hex format" strings.
- /// However, they are both manipulated in C++ via std::string. Thus this modifier
- /// can be used to indicate that a tag's string data should be interpreted as
- /// "Hex format" rather than a regular, literal string.
HEX_STRING
};
+/// \brief The Tag class represents a SAM/BAM record tag value.
+///
+/// SAM/BAM tags may store values from a variety of types: varying fixed-width
+/// integers, strings, arrays of data, etc.
+///
+/// The Tag class allow tags to be handled in a generic fashion, while
+/// maintaining a high level of type-safety. Only those types recognized by the
+/// SAM/BAM format are allowed, and extracting the value from a tag is subject
+/// to allowed conversion rules, as well.
+///
// Inspired by (but greatly simplified & modified from) the boost::variant
// wrapper approach taken by DynamicCpp (https://code.google.com/p/dynamic-cpp)
+//
class PBBAM_EXPORT Tag
{
public:
-
/// \name Constructors & Related Methods
/// \{
- /// Constructs a null tag.
- /// \sa IsNull()
+ /// \brief Creates an empty, null tag
Tag(void);
+ /// \brief Creates a Tag from a signed 8-bit integer or character.
+ ///
+ /// Without a TagModifier, the resulting Tag will be annotated as containing
+ /// an 8-bit integer, whether the input \p value was an integer or a char.
+ /// For ASCII tags, use one of these methods:
+ /// \include code/Tag_AsciiCtor.txt
+ ///
Tag(int8_t value);
+
+ /// \brief Creates a Tag from a signed 8-bit integer or character,
+ /// applying the provided modifier.
+ ///
+ /// This method allows direct construction of an ASCII character, rather
+ /// than an 8-bit integer (e.g. Tag('A', TagModifier::ASCII_CHAR) ).
+ ///
+ /// \throws runtime_error if \p modifier is not valid for int8_t data
+ ///
+ Tag(int8_t value, const TagModifier mod);
+
+ /// \brief Creates a Tag from an unsigned 8-bit integer or character.
+ ///
+ /// Without a TagModifier, the resulting Tag will be annotated as containing
+ /// an 8-bit unsigned integer, whether the input \p value was an integer or
+ /// a char. For ASCII tags, use one of these methods:
+ /// \include code/Tag_AsciiCtor.txt
+ ///
Tag(uint8_t value);
+
+ /// \brief Creates a Tag from 16-bit integer.
Tag(int16_t value);
+
+ /// \brief Creates a Tag from 16-bit unsigned integer.
Tag(uint16_t value);
+
+ /// \brief Creates a Tag from 32-bit signed integer.
Tag(int32_t value);
+
+ /// \brief Creates a Tag from 32-bit unsigned integer.
Tag(uint32_t value);
+
+ /// \brief Creates a Tag from floating-point value.
Tag(float value);
+
+ /// \brief Creates a Tag from string data.
Tag(const std::string& value);
+
+ /// \brief Creates a Tag from string data, adding modifier.
+ ///
+ /// \throws runtime_error if \p modifier is not valid for string data
+ ///
+ Tag(const std::string& value, const TagModifier mod);
+
+ /// \brief Creates a Tag from a vector of 8-bit integers.
Tag(const std::vector<int8_t>& value);
+
+ /// \brief Creates a Tag from a vector of 8-bit unsigned integers.
Tag(const std::vector<uint8_t>& value);
+
+ /// \brief Creates a Tag from a vector of 16-bit integers.
Tag(const std::vector<int16_t>& value);
+
+ /// \brief Creates a Tag from a vector of 16-bit unsigned integers.
Tag(const std::vector<uint16_t>& value);
+
+ /// Constructs a Tag from a vector of 32-bit integers.
Tag(const std::vector<int32_t>& value);
+
+ /// \brief Creates a Tag from a vector of 32-bit unsigned integers.
Tag(const std::vector<uint32_t>& value);
+
+ /// \brief Creates a Tag from a vector of floating-point values.
Tag(const std::vector<float>& value);
Tag(const Tag& other);
Tag(Tag&& other);
-
~Tag(void);
Tag& operator=(boost::blank value);
@@ -157,72 +234,82 @@ public:
/// \name Data Conversion & Validation
/// \{
- /// Converts the tag value to an ASCII character
+ /// \brief Converts the tag value to an ASCII character.
+ ///
+ /// Tag must hold an integral type, within the valid ASCII range [33-127].
///
- /// Tag must hold an integer type, within the valid ASCII range [33-127].
+ /// \returns ASCII character value
+ /// \throws std::runtime_error if not ASCII-compatible
///
- /// \returns ASCII character if valid
- /// \throws if not ASCII-compatible
char ToAscii(void) const;
/// \returns tag data as signed 8-bit (casting if needed)
- /// \throws if not integral data, or out of valid range
+ /// \throws std::runtime_error if not integral data, or out of valid range
int8_t ToInt8(void) const;
/// \returns tag data as unsigned 8-bit (casting if needed)
- /// \throws if not integral data, or out of valid range
+ /// \throws std::runtime_error if not integral data, or out of valid range
uint8_t ToUInt8(void) const;
/// \returns tag data as signed 16-bit (casting if needed)
- /// \throws if not integral data, or out of valid range
+ /// \throws std::runtime_error if not integral data, or out of valid range
int16_t ToInt16(void) const;
/// \returns tag data as unsigned 16-bit (casting if needed)
- /// \throws if not integral data, or out of valid range
+ /// \throws std::runtime_error if not integral data, or out of valid range
uint16_t ToUInt16(void) const;
/// \returns tag data as signed 32-bit (casting if needed)
- /// \throws if not integral data, or out of valid range
+ /// \throws std::runtime_error if not integral data, or out of valid range
int32_t ToInt32(void) const;
/// \returns tag data as unsigned 32-bit (casting if needed)
- /// \throws if not integral data, or out of valid range
+ /// \throws std::runtime_error if not integral data, or out of valid range
uint32_t ToUInt32(void) const;
/// \returns tag data as float
- /// \throws if tag does not contain a value of explicit type: float
+ /// \throws std::runtime_error if tag does not contain a value of
+ /// explicit type: float
float ToFloat(void) const;
/// \returns tag data as std::string
- /// \throws if tag does not contain a value of explicit type: std::string
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::string
std::string ToString(void) const;
/// \returns tag data as std::vector<int8_t>
- /// \throws if tag does not contain a value of explicit type: std::vector<int8_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<int8_t>
std::vector<int8_t> ToInt8Array(void) const;
/// \returns tag data as std::vector<uint8_t>
- /// \throws if tag does not contain a value of explicit type: std::vector<uint8_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<uint8_t>
std::vector<uint8_t> ToUInt8Array(void) const;
/// \returns tag data as std::vector<int16_t>
- /// \throws if tag does not contain a value of explicit type: std::vector<int16_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<int16_t>
std::vector<int16_t> ToInt16Array(void) const;
/// \returns tag data as std::vector<uint16_t>
- /// \throws if tag does not contain a value of explicit type: std::vector<uint16_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<uint16_t>
std::vector<uint16_t> ToUInt16Array(void) const;
/// \returns tag data as std::vector<int32_t>
- /// \throws if tag does not contain a value of explicit type: std::vector<int32_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<int32_t>
std::vector<int32_t> ToInt32Array(void) const;
/// \returns tag data as std::vector<uint32_t>
- /// \throws if tag does not contain a value of explicit type: std::vector<uint32_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<uint32_t>
std::vector<uint32_t> ToUInt32Array(void) const;
/// \returns tag data as std::vector<float>
- /// \throws if tag does not contain a value of explicit type: std::vector<float>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<float>
std::vector<float> ToFloatArray(void) const;
/// \}
@@ -259,7 +346,8 @@ public:
/// \returns true if tag contains a value of type: std::string
bool IsString(void) const;
- /// \returns true if tag contains a value of type: std::string \b AND has a TagModifier of HEX_STRING
+ /// \returns true if tag contains a value of type: std::string \b AND has a
+ /// TagModifier of TagModifier::HEX_STRING
bool IsHexString(void) const;
/// \returns true if tag contains a value of type: std::vector<int8_t>
@@ -325,7 +413,10 @@ public:
/// \returns current tag data modifier
TagModifier Modifier(void) const;
- /// Sets tag data modifier
+ /// \brief Sets tag data modifier.
+ ///
+ /// \param[in] m new modifier value
+ ///
/// \returns reference to this tag
Tag& Modifier(const TagModifier m);
@@ -357,6 +448,6 @@ private :
} // namespace BAM
} // namespace PacBio
-#include "internal/Tag.inl"
+#include "pbbam/internal/Tag.inl"
#endif // TAG_H
diff --git a/include/pbbam/TagCollection.h b/include/pbbam/TagCollection.h
index 42b4018..11c80ff 100644
--- a/include/pbbam/TagCollection.h
+++ b/include/pbbam/TagCollection.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file TagCollection.h
+/// \brief Defines the TagCollection class.
+//
// Author: Derek Barnett
#ifndef TAGCOLLECTION_H
@@ -46,9 +50,15 @@
namespace PacBio {
namespace BAM {
+/// \brief The TagCollection class represents a collection (or "dictionary") of
+/// tags.
+///
+/// Tags are mapped to their tag name, a 2-character string.
+///
class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
{
public:
+ /// \returns true if the collection contains a tag with \p name
bool Contains(const std::string& name) const;
};
diff --git a/include/pbbam/ZmwGroupQuery.h b/include/pbbam/ZmwGroupQuery.h
index 8b88113..290d3ad 100644
--- a/include/pbbam/ZmwGroupQuery.h
+++ b/include/pbbam/ZmwGroupQuery.h
@@ -32,35 +32,62 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwGroupQuery.h
+/// \brief Defines the ZmwGroupQuery class.
+//
// Author: Derek Barnett
#ifndef ZMWGROUPQUERY_H
#define ZMWGROUPQUERY_H
-#include "pbbam/Config.h"
#include "pbbam/internal/QueryBase.h"
#include <vector>
namespace PacBio {
namespace BAM {
-//namespace staging {
+/// \brief The ZmwGroupQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a ZMW hole number
+/// whitelist, and grouping those results by hole number.
+///
+/// Example:
+/// \include code/ZmwGroupQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
{
public:
- ZmwGroupQuery(const DataSet& dataset);
+ /// \brief Creates a new ZmwGroupQuery, limiting record results to only
+ /// those matching a ZMW hole number criterion.
+ ///
+ /// \param[in] zmwWhitelist vector of allowed ZMW hole numbers
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// PBI files.
+ ///
ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
const DataSet& dataset);
+ ~ZmwGroupQuery(void);
-protected:
- FileIterPtr CreateIterator(const BamFile& file);
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(std::vector<BamRecord>& records);
private:
- std::vector<int> whitelist_;
+ struct ZmwGroupQueryPrivate;
+ std::unique_ptr<ZmwGroupQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namespace PacBio
diff --git a/include/pbbam/ZmwQuery.h b/include/pbbam/ZmwQuery.h
index fdd1d1d..0d6e166 100644
--- a/include/pbbam/ZmwQuery.h
+++ b/include/pbbam/ZmwQuery.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwQuery.h
+/// \brief Defines the ZmwQuery class.
+//
// Author: Derek Barnett
#ifndef ZMWQUERY_H
@@ -44,22 +48,48 @@
namespace PacBio {
namespace BAM {
-//namespace staging {
+/// \brief The ZmwQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a ZMW hole number
+/// whitelist.
+///
+/// Example:
+/// \include code/ZmwQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
class PBBAM_EXPORT ZmwQuery : public internal::IQuery
{
public:
+ /// \brief Creates a new ZmwQuery, limiting record results to only
+ /// those matching a ZMW hole number criterion.
+ ///
+ /// \param[in] zmwWhitelist vector of allowed ZMW hole numbers
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// PBI files.
+ ///
ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
const DataSet& dataset);
-protected:
- FileIterPtr CreateIterator(const BamFile& bamFile);
+ ~ZmwQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
private:
- std::vector<int> whitelist_;
+ struct ZmwQueryPrivate;
+ std::unique_ptr<ZmwQueryPrivate> d_;
};
-//} // namespace staging
} // namespace BAM
} // namespace PacBio
diff --git a/include/pbbam/virtual/VirtualRegionType.h b/include/pbbam/ZmwType.h
similarity index 84%
copy from include/pbbam/virtual/VirtualRegionType.h
copy to include/pbbam/ZmwType.h
index 6b917bf..a93e295 100644
--- a/include/pbbam/virtual/VirtualRegionType.h
+++ b/include/pbbam/ZmwType.h
@@ -32,27 +32,32 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwType.h
+/// \brief Defines the ZmwType enum.
+//
+// Author: Armin Töpfer
-// Author: Derek Barnett
-
-#ifndef REGIONTYPE_H
-#define REGIONTYPE_H
+#ifndef ZMWTYPE_H
+#define ZMWTYPE_H
#include "pbbam/Config.h"
namespace PacBio {
namespace BAM {
-/// Type of annotated region.
-enum class VirtualRegionType : char
+
+/// \brief This enum defines the different ZMW categories of scraps
+///
+enum class ZmwType : char
{
- ADAPTER = 'A',
- BARCODE = 'B',
- SUBREAD = 'S',
- HQREGION = 'H',
- LQREGION = 'L' // Outside the HQ region
+ CONTROL = 'C',
+ MALFORMED = 'M',
+ NORMAL = 'N',
+ SENTINEL = 'S'
};
} // namespace BAM
} // namespace PacBio
-#endif // REGIONTYPE_H
+#endif // ZMWTYPE_H
diff --git a/include/pbbam/TagCollection.h b/include/pbbam/ZmwTypeMap.h
similarity index 83%
copy from include/pbbam/TagCollection.h
copy to include/pbbam/ZmwTypeMap.h
index 42b4018..4dc781c 100644
--- a/include/pbbam/TagCollection.h
+++ b/include/pbbam/ZmwTypeMap.h
@@ -32,27 +32,34 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwTypeMap.h
+/// \brief Defines the ZmwTypeMap class.
+//
+// Author: Armin Töpfer
-// Author: Derek Barnett
+#ifndef ZMWTYPEMAP_H
+#define ZMWTYPEMAP_H
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#include <map>
#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
-#include <string>
+#include "pbbam/ZmwType.h"
namespace PacBio {
namespace BAM {
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+/// \brief The ZmwTypeMap class provides mapping between char codes and
+/// ZmwType enum keys.
+///
+class ZmwTypeMap
{
public:
- bool Contains(const std::string& name) const;
+ static std::map<char, ZmwType> ParseChar;
};
} // namespace BAM
} // namespace PacBio
-#endif // TAGCOLLECTION_H
+#endif // ZMWTYPEMAP_H
diff --git a/include/pbbam/BamReader.h b/include/pbbam/exception/InvalidSequencingChemistryException.h
similarity index 53%
copy from include/pbbam/BamReader.h
copy to include/pbbam/exception/InvalidSequencingChemistryException.h
index bd0ced6..9761703 100644
--- a/include/pbbam/BamReader.h
+++ b/include/pbbam/exception/InvalidSequencingChemistryException.h
@@ -32,83 +32,67 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file InvalidSequencingChemistryException.h
+/// \brief Defines the InvalidSequencingChemistryException class.
+//
// Author: Derek Barnett
-#ifndef BAMREADER_H
-#define BAMREADER_H
+#ifndef INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
+#define INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
-#include "pbbam/BamHeader.h"
-#include "pbbam/BamRecord.h"
-#include "pbbam/Config.h"
+#include <exception>
+#include <sstream>
#include <string>
namespace PacBio {
namespace BAM {
-class PBBAM_EXPORT BamReader
+/// \brief The InvalidSequencingChemistryException class represents an exception
+/// that will be thrown when an invalid sequencing chemistry combination
+/// is encountered.
+///
+class InvalidSequencingChemistryException : public std::exception
{
-
public:
- enum ReadError
+ InvalidSequencingChemistryException(const std::string& bindingKit,
+ const std::string& sequencingKit,
+ const std::string& basecallerVersion)
+ : bindingKit_(bindingKit)
+ , sequencingKit_(sequencingKit)
+ , basecallerVersion_(basecallerVersion)
{
- NoError = 0
- , OpenFileError
- , ReadHeaderError
- , ReadRecordError
- };
-
-public:
- BamReader(void);
- virtual ~BamReader(void);
+ std::stringstream s;
+ s << "unsupported sequencing chemistry combination: " << std::endl
+ << " binding kit: " << bindingKit_ << std::endl
+ << " sequencing kit: " << sequencingKit_ << std::endl
+ << " basecaller version: " << basecallerVersion_ << std::endl;
+ what_ = s.str();
+ }
public:
+ const std::string& BindingKit(void) const
+ { return bindingKit_; }
- /// Closes the BAM file reader.
- void Close(void);
+ const std::string& SequencingKit(void) const
+ { return sequencingKit_; }
- /// Opens a BAM file for reading.
- ///
- /// Prefix \p filename with "http://" or "ftp://" for remote files,
- /// or set to "-" for stdin.
- ///
- /// \param[in] filename path to input BAM file
- ///
- /// \returns success/failure
- bool Open(const std::string& filename);
-
- /// \returns header as BamHeader object
- BamHeader::SharedPtr Header(void) const;
-
- /// \returns error status code
- BamReader::ReadError Error(void) const;
-
- /// \returns true if error encountered
- bool HasError(void) const;
-
- /// Fetches the next record in a BAM file.
- ///
- /// \param[out] record pointer to BamRecord object
- ///
- /// \returns succcess/failure
- bool GetNext(PBBAM_SHARED_PTR<BamRecord> record);
+ const std::string& BasecallerVersion(void) const
+ { return basecallerVersion_; }
public:
- std::string PacBioBamVersion(void) const;
-
-protected:
- bool GetNext(PBBAM_SHARED_PTR<bam1_t> rawRecord);
- void InitialOpen(void);
- PBBAM_SHARED_PTR<bam_hdr_t> RawHeader(void) const;
+ virtual const char* what(void) const noexcept
+ { return what_.c_str(); }
protected:
- PBBAM_SHARED_PTR<samFile> file_;
- PBBAM_SHARED_PTR<bam_hdr_t> header_;
- std::string filename_;
- BamReader::ReadError error_;
+ std::string bindingKit_;
+ std::string sequencingKit_;
+ std::string basecallerVersion_;
+ std::string what_;
};
} // namespace BAM
} // namespace PacBio
-#endif // BAMREADER_H
+#endif // INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
diff --git a/include/pbbam/Position.h b/include/pbbam/internal/Accuracy.inl
similarity index 77%
copy from include/pbbam/Position.h
copy to include/pbbam/internal/Accuracy.inl
index 110d7ed..f859662 100644
--- a/include/pbbam/Position.h
+++ b/include/pbbam/internal/Accuracy.inl
@@ -33,27 +33,34 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file Accuracy.inl
+/// \brief Inline implementations for the Accuracy class.
+//
// Author: Derek Barnett
-#ifndef POSITION_H
-#define POSITION_H
-
-#include "pbbam/Config.h"
+#include "pbbam/Accuracy.h"
namespace PacBio {
namespace BAM {
-/// \typedef typedef int32_t PacBio::BAM::Position
-///
-/// This type refers to all genomic positions. We use signed
-/// because SAM/BAM uses the -1 value to indicate unknown, unmapped, etc.
-/// positions.
-///
-typedef int32_t Position;
+inline Accuracy::Accuracy(float accuracy)
+{
+ if (accuracy < Accuracy::MIN)
+ accuracy = Accuracy::MIN;
+ else if (accuracy > Accuracy::MAX)
+ accuracy = Accuracy::MAX;
+ accuracy_ = accuracy;
+}
+
+inline Accuracy::Accuracy(const Accuracy &other)
+ : accuracy_(other.accuracy_)
+{ }
-static const Position UnmappedPosition = Position(-1);
+inline Accuracy::~Accuracy(void) { }
+
+inline Accuracy::operator float(void) const
+{ return accuracy_; }
} // namespace BAM
} // namespace PacBio
-
-#endif // POSITION_H
diff --git a/include/pbbam/internal/BamHeader.inl b/include/pbbam/internal/BamHeader.inl
new file mode 100644
index 0000000..2445a25
--- /dev/null
+++ b/include/pbbam/internal/BamHeader.inl
@@ -0,0 +1,154 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamHeader.inl
+/// \brief Inline implementations for the BamHeader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamHeader.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class BamHeaderPrivate
+{
+public:
+ std::string version_;
+ std::string pacbioBamVersion_;
+ std::string sortOrder_;
+ std::map<std::string, std::string> headerLineCustom_;
+
+ std::map<std::string, ReadGroupInfo> readGroups_; // id => read group info
+ std::map<std::string, ProgramInfo> programs_; // id => program info
+ std::vector<std::string> comments_;
+
+ // we need to preserve insertion order, use lookup for access by name
+ std::vector<SequenceInfo> sequences_;
+ std::map<std::string, int32_t> sequenceIdLookup_;
+};
+
+} // namespace internal
+
+inline BamHeader::BamHeader(void)
+ : d_(new internal::BamHeaderPrivate)
+{ }
+
+inline BamHeader::BamHeader(const BamHeader& other)
+ : d_(other.d_)
+{ }
+
+inline BamHeader::BamHeader(BamHeader&& other)
+ : d_(std::move(other.d_))
+{ }
+
+inline BamHeader& BamHeader::operator=(const BamHeader& other)
+{ d_ = other.d_; return *this; }
+
+inline BamHeader& BamHeader::operator=(BamHeader&& other)
+{ d_ = std::move(other.d_); return *this; }
+
+inline BamHeader::~BamHeader(void) { }
+
+inline BamHeader BamHeader::operator+(const BamHeader& other) const
+{ return DeepCopy() += other; }
+
+inline BamHeader& BamHeader::AddComment(const std::string& comment)
+{ d_->comments_.push_back(comment); return *this; }
+
+inline BamHeader& BamHeader::AddProgram(const ProgramInfo& pg)
+{ d_->programs_[pg.Id()] = pg; return *this; }
+
+inline BamHeader& BamHeader::AddReadGroup(const ReadGroupInfo& readGroup)
+{ d_->readGroups_[readGroup.Id()] = readGroup; return *this; }
+
+inline BamHeader& BamHeader::ClearComments(void)
+{ d_->comments_.clear(); return* this; }
+
+inline BamHeader& BamHeader::ClearPrograms(void)
+{ d_->programs_.clear(); return *this; }
+
+inline BamHeader& BamHeader::ClearReadGroups(void)
+{ d_->readGroups_.clear(); return *this; }
+
+inline std::vector<std::string> BamHeader::Comments(void) const
+{ return d_->comments_; }
+
+inline BamHeader& BamHeader::Comments(const std::vector<std::string>& comments)
+{ d_->comments_ = comments; return *this; }
+
+inline bool BamHeader::HasProgram(const std::string& id) const
+{ return d_->programs_.find(id) != d_->programs_.cend(); }
+
+inline bool BamHeader::HasReadGroup(const std::string& id) const
+{ return d_->readGroups_.find(id) != d_->readGroups_.cend(); }
+
+inline bool BamHeader::HasSequence(const std::string& name) const
+{ return d_->sequenceIdLookup_.find(name) != d_->sequenceIdLookup_.cend(); }
+
+inline size_t BamHeader::NumSequences(void) const
+{ return d_->sequences_.size(); }
+
+inline std::string BamHeader::PacBioBamVersion(void) const
+{ return d_->pacbioBamVersion_; }
+
+inline SequenceInfo BamHeader::Sequence(const int32_t id) const
+{ return d_->sequences_.at(id); }
+
+inline std::string BamHeader::SequenceLength(const int32_t id) const
+{ return Sequence(id).Length(); }
+
+inline std::string BamHeader::SequenceName(const int32_t id) const
+{ return Sequence(id).Name(); }
+
+inline std::vector<SequenceInfo> BamHeader::Sequences(void) const
+{ return d_->sequences_; }
+
+inline std::string BamHeader::SortOrder(void) const
+{ return d_->sortOrder_; }
+
+inline BamHeader& BamHeader::SortOrder(const std::string& order)
+{ d_->sortOrder_ = order; return *this; }
+
+inline std::string BamHeader::Version(void) const
+{ return d_->version_; }
+
+inline BamHeader& BamHeader::Version(const std::string& version)
+{ d_->version_ = version; return *this; }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/BamRecord.inl b/include/pbbam/internal/BamRecord.inl
new file mode 100644
index 0000000..11e2985
--- /dev/null
+++ b/include/pbbam/internal/BamRecord.inl
@@ -0,0 +1,166 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecord.inl
+/// \brief Inline implementations for the BamRecord & BamRecordView classes.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline BamRecord BamRecord::Clipped(const BamRecord& input,
+ const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end)
+{
+ return input.Clipped(clipType, start, end);
+}
+
+inline BamRecord BamRecord::Clipped(const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end) const
+{
+ BamRecord result(*this);
+ result.Clip(clipType, start, end);
+ return result;
+}
+
+inline BamRecord BamRecord::Mapped(const BamRecord& input,
+ const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality)
+{
+ return input.Mapped(referenceId, refStart, strand, cigar, mappingQuality);
+}
+
+inline BamRecord BamRecord::Mapped(const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality) const
+{
+ BamRecord result(*this);
+ result.Map(referenceId, refStart, strand, cigar, mappingQuality);
+ return result;
+}
+
+
+inline BamRecordView::BamRecordView(const BamRecord& record,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips)
+ : record_(record)
+ , orientation_(orientation)
+ , aligned_(aligned)
+ , exciseSoftClips_(exciseSoftClips)
+{ }
+
+inline QualityValues BamRecordView::AltLabelQVs(void) const
+{ return record_.AltLabelQV(orientation_); }
+
+inline std::string BamRecordView::AltLabelTags(void) const
+{ return record_.AltLabelTag(orientation_); }
+
+inline QualityValues BamRecordView::DeletionQVs(void) const
+{ return record_.DeletionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::DeletionTags(void) const
+{ return record_.DeletionTag(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::InsertionQVs(void) const
+{ return record_.InsertionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline Frames BamRecordView::IPD(void) const
+{ return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+
+inline Frames BamRecordView::PrebaseFrames(void) const
+{ return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::LabelQVs(void) const
+{ return record_.LabelQV(orientation_); }
+
+inline QualityValues BamRecordView::MergeQVs(void) const
+{ return record_.MergeQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::PulseMergeQVs(void) const
+{ return record_.PulseMergeQV(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmean(void) const
+{ return record_.Pkmean(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmid(void) const
+{ return record_.Pkmid(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmean2(void) const
+{ return record_.Pkmean2(orientation_); }
+
+inline std::vector<float> BamRecordView::Pkmid2(void) const
+{ return record_.Pkmid2(orientation_); }
+
+inline Frames BamRecordView::PrePulseFrames(void) const
+{ return record_.PrePulseFrames(orientation_); }
+
+inline std::string BamRecordView::PulseCalls(void) const
+{ return record_.PulseCall(orientation_); }
+
+inline Frames BamRecordView::PulseCallWidth(void) const
+{ return record_.PulseCallWidth(orientation_); }
+
+inline Frames BamRecordView::PulseWidths(void) const
+{ return record_.PulseWidth(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::Qualities(void) const
+{ return record_.Qualities(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::Sequence(void) const
+{ return record_.Sequence(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::vector<uint32_t> BamRecordView::StartFrames(void) const
+{ return record_.StartFrame(orientation_); }
+
+inline QualityValues BamRecordView::SubstitutionQVs(void) const
+{ return record_.SubstitutionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::SubstitutionTags(void) const
+{ return record_.SubstitutionTag(orientation_, aligned_, exciseSoftClips_); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/DataSetXsd.h b/include/pbbam/internal/BamRecordBuilder.inl
similarity index 55%
copy from include/pbbam/DataSetXsd.h
copy to include/pbbam/internal/BamRecordBuilder.inl
index 29df5e1..212e831 100644
--- a/include/pbbam/DataSetXsd.h
+++ b/include/pbbam/internal/BamRecordBuilder.inl
@@ -32,80 +32,53 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecordBuilder.inl
+/// \brief Inline implementations for the BamRecordBuilder class.
+//
// Author: Derek Barnett
-#ifndef DATASETXSD_H
-#define DATASETXSD_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/BamRecordBuilder.h"
namespace PacBio {
namespace BAM {
-enum class XsdType
-{
- NONE
+inline BamRecordBuilder& BamRecordBuilder::Bin(const uint32_t bin)
+{ core_.bin = bin; return *this; }
- , AUTOMATION_CONSTRAINTS
- , BASE_DATA_MODEL
- , COLLECTION_METADATA
- , COMMON_MESSAGES
- , DATA_MODEL
- , DATA_STORE
- , DATASETS
- , DECL_DATA
- , PART_NUMBERS
- , PRIMARY_METRICS
- , REAGENT_KIT
- , RIGHTS_AND_ROLES
- , SAMPLE_INFO
- , SEEDING_DATA
-};
+inline BamRecordBuilder& BamRecordBuilder::Flag(const uint32_t flag)
+{ core_.flag = flag; return *this; }
-class PBBAM_EXPORT NamespaceInfo
-{
-public:
- NamespaceInfo(void);
- NamespaceInfo(const std::string& name,
- const std::string& uri);
+inline BamRecordBuilder& BamRecordBuilder::InsertSize(const int32_t iSize)
+{ core_.isize = iSize; return *this; }
-public:
- const std::string& Name(void) const { return name_; }
- const std::string& Uri(void) const { return uri_; }
+inline BamRecordBuilder& BamRecordBuilder::MapQuality(const uint8_t mapQual)
+{ core_.qual = mapQual; return *this; }
-private:
- std::string name_;
- std::string uri_;
-};
+inline BamRecordBuilder& BamRecordBuilder::MatePosition(const int32_t pos)
+{ core_.mpos = pos; return *this; }
-class PBBAM_EXPORT NamespaceRegistry
-{
-public:
- NamespaceRegistry(void);
- NamespaceRegistry(const NamespaceRegistry& other);
- NamespaceRegistry& operator=(const NamespaceRegistry& other);
- ~NamespaceRegistry(void);
+inline BamRecordBuilder& BamRecordBuilder::MateReferenceId(const int32_t id)
+{ core_.mtid = id; return *this; }
-public:
- const NamespaceInfo& DefaultNamespace(void) const;
- XsdType DefaultXsd(void) const;
- const NamespaceInfo& Namespace(const XsdType& xsd) const;
+inline BamRecordBuilder& BamRecordBuilder::Position(const int32_t pos)
+{ core_.pos = pos; return *this; }
- XsdType XsdForUri(const std::string& uri) const;
+inline BamRecordBuilder& BamRecordBuilder::Qualities(const std::string& qualities)
+{ qualities_ = qualities; return *this; }
-public:
- void Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo);
- void SetDefaultXsd(const XsdType& xsd);
+inline BamRecordBuilder& BamRecordBuilder::Qualities(std::string&& qualities)
+{ qualities_ = std::move(qualities); return *this; }
-private:
- std::map<XsdType, NamespaceInfo> data_;
- XsdType defaultXsdType_;
-};
+inline BamRecordBuilder& BamRecordBuilder::ReferenceId(const int32_t id)
+{ core_.tid = id; return *this; }
-} // namespace PacBio
-} // namespace BAM
+inline BamRecordBuilder& BamRecordBuilder::Tags(const TagCollection& tags)
+{ tags_ = tags; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Tags(TagCollection&& tags)
+{ tags_ = std::move(tags); return *this; }
-#endif // DATASETXSD_H
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/BamRecordImpl.inl b/include/pbbam/internal/BamRecordImpl.inl
new file mode 100644
index 0000000..6c0ecef
--- /dev/null
+++ b/include/pbbam/internal/BamRecordImpl.inl
@@ -0,0 +1,216 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordImpl.inl
+/// \brief Inline implementations for the BamRecordImpl class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordImpl.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline uint32_t BamRecordImpl::Bin(void) const
+{ return d_->core.bin; }
+
+inline BamRecordImpl& BamRecordImpl::Bin(uint32_t bin)
+{ d_->core.bin = bin; return *this; }
+
+inline uint32_t BamRecordImpl::Flag(void) const
+{ return d_->core.flag; }
+
+inline BamRecordImpl& BamRecordImpl::Flag(uint32_t flag)
+{ d_->core.flag = flag; return *this; }
+
+inline int32_t BamRecordImpl::InsertSize(void) const
+{ return d_->core.isize; }
+
+inline BamRecordImpl& BamRecordImpl::InsertSize(int32_t iSize)
+{ d_->core.isize = iSize; return *this; }
+
+inline uint8_t BamRecordImpl::MapQuality(void) const
+{ return d_->core.qual; }
+
+inline BamRecordImpl& BamRecordImpl::MapQuality(uint8_t mapQual)
+{ d_->core.qual = mapQual; return *this; }
+
+inline PacBio::BAM::Position BamRecordImpl::MatePosition(void) const
+{ return d_->core.mpos; }
+
+inline BamRecordImpl& BamRecordImpl::MatePosition(PacBio::BAM::Position pos)
+{ d_->core.mpos = pos; return *this; }
+
+inline int32_t BamRecordImpl::MateReferenceId(void) const
+{ return d_->core.mtid; }
+
+inline BamRecordImpl& BamRecordImpl::MateReferenceId(int32_t id)
+{ d_->core.mtid = id; return *this; }
+
+inline PacBio::BAM::Position BamRecordImpl::Position(void) const
+{ return d_->core.pos; }
+
+inline BamRecordImpl& BamRecordImpl::Position(PacBio::BAM::Position pos)
+{ d_->core.pos = pos; return *this; }
+
+inline int32_t BamRecordImpl::ReferenceId(void) const
+{ return d_->core.tid; }
+
+inline BamRecordImpl& BamRecordImpl::ReferenceId(int32_t id)
+{ d_->core.tid = id; return *this; }
+
+inline bool BamRecordImpl::IsDuplicate(void) const
+{ return (d_->core.flag & BamRecordImpl::DUPLICATE) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetDuplicate(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::DUPLICATE;
+ else d_->core.flag &= ~BamRecordImpl::DUPLICATE;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsFailedQC(void) const
+{ return (d_->core.flag & BamRecordImpl::FAILED_QC) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetFailedQC(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::FAILED_QC;
+ else d_->core.flag &= ~BamRecordImpl::FAILED_QC;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsFirstMate(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_1) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetFirstMate(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::MATE_1;
+ else d_->core.flag &= ~BamRecordImpl::MATE_1;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsMapped(void) const
+{ return (d_->core.flag & BamRecordImpl::UNMAPPED) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMapped(bool ok)
+{
+ if (ok) d_->core.flag &= ~BamRecordImpl::UNMAPPED;
+ else d_->core.flag |= BamRecordImpl::UNMAPPED;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsMateMapped(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_UNMAPPED) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMateMapped(bool ok)
+{
+ if (ok) d_->core.flag &= ~BamRecordImpl::MATE_UNMAPPED;
+ else d_->core.flag |= BamRecordImpl::MATE_UNMAPPED;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsMateReverseStrand(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_REVERSE_STRAND) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMateReverseStrand(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::MATE_REVERSE_STRAND;
+ else d_->core.flag &= ~BamRecordImpl::MATE_REVERSE_STRAND;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsPaired(void) const
+{ return (d_->core.flag & BamRecordImpl::PAIRED) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetPaired(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::PAIRED;
+ else d_->core.flag &= ~BamRecordImpl::PAIRED;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsPrimaryAlignment(void) const
+{ return (d_->core.flag & BamRecordImpl::SECONDARY) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetPrimaryAlignment(bool ok)
+{
+ if (ok) d_->core.flag &= ~BamRecordImpl::SECONDARY;
+ else d_->core.flag |= BamRecordImpl::SECONDARY;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsProperPair(void) const
+{ return (d_->core.flag & BamRecordImpl::PROPER_PAIR) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetProperPair(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::PROPER_PAIR;
+ else d_->core.flag &= ~BamRecordImpl::PROPER_PAIR;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsReverseStrand(void) const
+{ return (d_->core.flag & BamRecordImpl::REVERSE_STRAND) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetReverseStrand(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::REVERSE_STRAND;
+ else d_->core.flag &= ~BamRecordImpl::REVERSE_STRAND;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsSecondMate(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_2) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetSecondMate(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::MATE_2;
+ else d_->core.flag &= ~BamRecordImpl::MATE_2;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsSupplementaryAlignment(void) const
+{ return (d_->core.flag & BamRecordImpl::SUPPLEMENTARY) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetSupplementaryAlignment(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::SUPPLEMENTARY;
+ else d_->core.flag &= ~BamRecordImpl::SUPPLEMENTARY;
+ return *this;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/BamRecordSort.h b/include/pbbam/internal/BamRecordSort.h
deleted file mode 100644
index 53dab05..0000000
--- a/include/pbbam/internal/BamRecordSort.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef BAMRECORDSORT_H
-#define BAMRECORDSORT_H
-
-#include "pbbam/BamRecord.h"
-#include <functional>
-#include <cassert>
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-enum class SortOrder {
- Ascending = 0
- , Descending
-};
-
-template<typename ElemType>
-inline bool sort_helper(const SortOrder& order,
- const ElemType& lhs,
- const ElemType& rhs)
-{
- switch ( order ) {
- case SortOrder::Ascending : { std::less<ElemType> comp; return comp(lhs, rhs); }
- case SortOrder::Descending : { std::greater<ElemType> comp; return comp(lhs, rhs); }
- default :
- assert(false);
- }
- return false; // <-- unreachable
-}
-
-typedef std::binary_function<BamRecord, BamRecord, bool> BamRecordSortBase;
-
-struct Unsorted : public BamRecordSortBase
-{
-public:
- Unsorted(const SortOrder& order = SortOrder::Ascending)
- { (void)order; }
-
- bool operator()(const BamRecord& lhs, const BamRecord& rhs)
- { (void)lhs; (void)rhs; return false; }
-};
-
-struct ByQName : public BamRecordSortBase
-{
-public:
- ByQName(const SortOrder& order = SortOrder::Ascending)
- : m_order(order)
- { }
-
- bool operator()(const BamRecord& lhs, const BamRecord& rhs)
- { return sort_helper(m_order, lhs.FullName(), rhs.FullName()); }
-
-private:
- const SortOrder m_order;
-};
-
-struct ByPosition : public BamRecordSortBase
-{
-public:
- ByPosition(const SortOrder& order = SortOrder::Ascending)
- : m_order(order)
- { }
-
- // comparison function
- bool operator()(const BamRecord& lhs, const BamRecord& rhs) {
-
- const int32_t lhsId = lhs.ReferenceId();
- const int32_t rhsId = rhs.ReferenceId();
-
- // force unmapped aligmnents to end
- if ( lhsId == -1 ) return false;
- if ( rhsId == -1 ) return true;
-
- // if on same reference, sort on position
- if ( lhsId == rhsId )
- return sort_helper(m_order, lhs.ReferenceStart(), rhs.ReferenceStart());
-
- // otherwise sort on reference ID
- return sort_helper(m_order, lhsId, rhsId);
- }
-
-private:
- const SortOrder m_order;
-};
-
-struct ByZmw : public BamRecordSortBase {
-public:
- ByZmw(const SortOrder& order = SortOrder::Ascending) : m_order(order) { }
-
- bool operator()(const BamRecord& lhs, const BamRecord& rhs)
- { return sort_helper(m_order, lhs.HoleNumber(), rhs.HoleNumber()); }
-
-private:
- const SortOrder m_order;
-};
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-#endif // BAMRECORDSORT_H
diff --git a/src/FilterEngine.cpp b/include/pbbam/internal/Cigar.inl
similarity index 71%
copy from src/FilterEngine.cpp
copy to include/pbbam/internal/Cigar.inl
index 1f47967..4799a72 100644
--- a/src/FilterEngine.cpp
+++ b/include/pbbam/internal/Cigar.inl
@@ -32,45 +32,46 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Cigar.inl
+/// \brief Inline implemenations for the Cigar class.
+//
// Author: Derek Barnett
-#include "pbbam/internal/FilterEngine.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace PacBio::BAM::internal;
-using namespace std;
+#include "pbbam/Cigar.h"
namespace PacBio {
namespace BAM {
-namespace internal {
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+inline Cigar::Cigar(void)
+ : std::vector<CigarOperation>()
+{ }
+inline Cigar::Cigar(const Cigar& other)
+ : std::vector<CigarOperation>(other)
+{ }
-FilterEngine::FilterEngine(void) { }
+inline Cigar::Cigar(Cigar&& other)
+ : std::vector<CigarOperation>(std::move(other))
+{ }
-bool FilterEngine::Accepts(const BamRecord& r) const
+inline Cigar& Cigar::operator=(const Cigar& other)
{
-// foreach ( const FilterParameter& param, parameters_ ) {
-// if (!param.Accepts(r))
-// return false;
-// }
-// return true;
- (void)r;
- return true;
+ std::vector<CigarOperation>::operator=(other);
+ return *this;
}
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
+inline Cigar& Cigar::operator=(Cigar&& other)
{
- size_t i = 0;
- while (i < r.size()) {
- if (!Accepts(r.at(i)))
- r.erase(r.begin() + i);
- else
- ++i;
- }
- return !r.empty();
+ std::vector<CigarOperation>::operator=(std::move(other));
+ return *this;
}
+
+inline Cigar::~Cigar(void) { }
+
+inline Cigar Cigar::FromStdString(const std::string& stdString)
+{ return Cigar(stdString); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/CigarOperation.h b/include/pbbam/internal/CigarOperation.inl
similarity index 51%
copy from include/pbbam/CigarOperation.h
copy to include/pbbam/internal/CigarOperation.inl
index 951128d..167528c 100644
--- a/include/pbbam/CigarOperation.h
+++ b/include/pbbam/internal/CigarOperation.inl
@@ -32,131 +32,18 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file CigarOperation.inl
+/// \brief Inline implemenations for the CigarOperation class.
+//
// Author: Derek Barnett
-#ifndef CIGAROPERATION_H
-#define CIGAROPERATION_H
-
-#include "pbbam/Config.h"
-#include <stdexcept>
+#include "pbbam/CigarOperation.h"
namespace PacBio {
namespace BAM {
-/// Describes a CIGAR operation. Bracketed character is the corresponding SAM/BAM character code.
-///
-/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain consistency with htslib.
-/// However, as of PacBio BAM spec version 3.0b7, this CIGAR operation \b forbidden. Attempt to
-/// read or write a record containing this operation will trigger a std::runtime_error.
-///
-enum class CigarOperationType
-{
- UNKNOWN_OP = -1 ///< unknown/invalid CIGAR operator
- , ALIGNMENT_MATCH = 0 ///< alignment match (can be a sequence match or mismatch) [M]
- , INSERTION ///< insertion to the reference [I]
- , DELETION ///< deletion from the reference [D]
- , REFERENCE_SKIP ///< skipped region from the reference [N]
- , SOFT_CLIP ///< soft clipping (clipped sequences present in SEQ) [S]
- , HARD_CLIP = 5 ///< hard clipping (clipped sequences NOT present in SEQ) [H]
- , PADDING ///< padding (silent deletion from padded reference) [P]
- , SEQUENCE_MATCH ///< sequence match [=]
- , SEQUENCE_MISMATCH ///< sequence mismatch [X]
-
- // TODO: looks like there is a new 'B' type in htslib source, referring to some 'back' operation...
- // no reference in htslib docs though yet as to what that applies to
-};
-
-class PBBAM_EXPORT CigarOperation
-{
-public:
-
- /// \name Operation Type Conversion Methods
- /// \{
-
- /// Convert between CigarOperationType enum & SAM/BAM character code.
- ///
- /// \param[in] type CigarOperationType value
- /// \returns SAM/BAM character code
- static char TypeToChar(const CigarOperationType type);
-
- /// Convert between CigarOperationType enum & SAM/BAM character code.
- ///
- /// \param[in] c SAM/BAM character code
- /// \returns CigarOperationType value
- static CigarOperationType CharToType(const char c);
-
- /// \}
-
-public:
- /// \name Constructors & Related Methods
- /// \{
-
- CigarOperation(void);
- CigarOperation(char c, uint32_t length);
- CigarOperation(CigarOperationType op, uint32_t length);
- CigarOperation(const CigarOperation& other);
- CigarOperation(CigarOperation&& other);
- CigarOperation& operator=(const CigarOperation& other);
- CigarOperation& operator=(CigarOperation&& other);
- ~CigarOperation(void);
-
- /// \}
-
-public:
-
- /// \returns operation type as SAM/BAM char code
- inline char Char(void) const;
-
- /// \returns operation length
- inline uint32_t Length(void) const;
-
- /// \returns operation type as CigarOperationType enum value
- inline CigarOperationType Type(void) const;
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- /// Sets this operation type.
- ///
- /// \param[in] opChar SAM/BAM character code
- /// \returns reference to this operation
- inline CigarOperation& Char(const char opChar);
-
- /// Sets this operation length.
- ///
- /// \param[in] length
- /// \returns reference to this operation
- inline CigarOperation& Length(const uint32_t length);
-
- /// Sets this operation type.
- ///
- /// \param[in] opType CigarOperationType value
- /// \returns reference to this operation
- inline CigarOperation& Type(const CigarOperationType opType);
-
- /// \}
-
-public:
- /// \name Comparison Operators
- /// \{
-
- /// \returns true if both CIGAR operation type & length match
- inline bool operator==(const CigarOperation& other) const;
-
- /// \returns true if either CIGAR operation type or length differ
- inline bool operator!=(const CigarOperation& other) const;
-
- /// \}
-
-private:
- CigarOperationType type_;
- uint32_t length_;
-};
-
inline CigarOperation::CigarOperation(void)
: type_(CigarOperationType::UNKNOWN_OP)
, length_(0)
@@ -222,5 +109,3 @@ inline bool CigarOperation::operator!=(const CigarOperation& other) const
} // namespace BAM
} // namespace PacBio
-
-#endif // CIGAROPERATION_H
diff --git a/include/pbbam/internal/SequentialMergeStrategy.h b/include/pbbam/internal/Compare.inl
similarity index 63%
rename from include/pbbam/internal/SequentialMergeStrategy.h
rename to include/pbbam/internal/Compare.inl
index cf8e591..4eb5ccf 100644
--- a/include/pbbam/internal/SequentialMergeStrategy.h
+++ b/include/pbbam/internal/Compare.inl
@@ -32,48 +32,47 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Compare.inl
+/// \brief Inline implementations for the Compare class & inner classes.
+//
// Author: Derek Barnett
-#ifndef SEQUENTIALMERGESTRATEGY_H
-#define SEQUENTIALMERGESTRATEGY_H
-
-#include "pbbam/internal/IMergeStrategy.h"
+#include "pbbam/Compare.h"
namespace PacBio {
namespace BAM {
namespace internal {
-class SequentialMergeStrategy : public IMergeStrategy
+template <typename T, T> struct MemberFnProxy;
+
+template<typename T, typename R, typename... Args, R (T::*fn)(Args...)const>
+struct MemberFnProxy<R (T::*)(Args...)const, fn>
{
-public:
- SequentialMergeStrategy(const std::vector<FileIterPtr>& iters);
- bool GetNext(BamRecord& r);
-private:
- std::vector<FileIterPtr> iters_;
+ static R call(const T& obj, Args&&... args)
+ {
+ return (obj.*fn)(std::forward<Args>(args)...);
+ }
};
-inline SequentialMergeStrategy::SequentialMergeStrategy(const std::vector<FileIterPtr>& iters)
- : IMergeStrategy()
- , iters_(iters)
-{ }
+} // namespace internal
-inline bool SequentialMergeStrategy::GetNext(BamRecord& r)
+template<typename ValueType,
+ typename Compare::MemberFunctionBaseHelper<ValueType>::MemberFnType fn,
+ typename CompareType>
+inline bool Compare::MemberFunctionBase<ValueType, fn, CompareType>::operator()(const BamRecord& lhs,
+ const BamRecord& rhs) const
{
- if (iters_.empty())
- return false;
+ using MemberFnType = typename Compare::MemberFunctionBaseHelper<ValueType>::MemberFnType;
+ using Proxy = internal::MemberFnProxy<MemberFnType, fn>;
- FileIterPtr iter = iters_.front();
- if (iter->GetNext(r))
- return true;
- else {
- iters_.erase(iters_.begin());
- return GetNext(r);
- }
+ CompareType cmp;
+ return cmp(Proxy::call(lhs), Proxy::call(rhs));
}
-} // namespace internal
+inline bool Compare::None::operator()(const BamRecord&, const BamRecord&) const
+{ return false; }
+
} // namespace BAM
} // namespace PacBio
-
-#endif // SEQUENCTIALMERGESTRATEGY_H
diff --git a/include/pbbam/internal/CompositeBamReader.inl b/include/pbbam/internal/CompositeBamReader.inl
new file mode 100644
index 0000000..9703697
--- /dev/null
+++ b/include/pbbam/internal/CompositeBamReader.inl
@@ -0,0 +1,397 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CompositeBamReader.inl
+/// \brief Inline implementations for the composite BAM readers, for
+/// working with multiple input files.
+//
+// Author: Derek Barnett
+
+#include "pbbam/CompositeBamReader.h"
+#include <algorithm>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// -----------------------------------
+// Merging helpers
+// -----------------------------------
+
+inline CompositeMergeItem::CompositeMergeItem(std::unique_ptr<BamReader>&& rdr)
+ : reader(std::move(rdr))
+{ }
+
+inline CompositeMergeItem::CompositeMergeItem(std::unique_ptr<BamReader>&& rdr,
+ BamRecord&& rec)
+ : reader(std::move(rdr))
+ , record(std::move(rec))
+{ }
+
+inline CompositeMergeItem::CompositeMergeItem(CompositeMergeItem&& other)
+ : reader(std::move(other.reader))
+ , record(std::move(other.record))
+{ }
+
+inline CompositeMergeItem& CompositeMergeItem::operator=(CompositeMergeItem&& other)
+{
+ reader = std::move(other.reader);
+ record = std::move(other.record);
+ return *this;
+}
+
+inline CompositeMergeItem::~CompositeMergeItem(void) { }
+
+template<typename CompareType>
+inline bool CompositeMergeItemSorter<CompareType>::operator()(const CompositeMergeItem& lhs,
+ const CompositeMergeItem& rhs)
+{
+ const BamRecord& l = lhs.record;
+ const BamRecord& r = rhs.record;
+ return CompareType()(l, r);
+}
+
+} // namespace internal
+
+// -----------------------------------
+// GenomicIntervalCompositeBamReader
+// -----------------------------------
+
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ const std::vector<BamFile>& bamFiles)
+{
+ filenames_.reserve(bamFiles.size());
+ for(const auto& bamFile : bamFiles)
+ filenames_.push_back(bamFile.Filename());
+ Interval(interval);
+}
+
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ std::vector<BamFile>&& bamFiles)
+{
+ filenames_.reserve(bamFiles.size());
+ for(auto&& bamFile : bamFiles)
+ filenames_.push_back(bamFile.Filename());
+ Interval(interval);
+}
+
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ const DataSet& dataset)
+ : GenomicIntervalCompositeBamReader(interval, std::move(dataset.BamFiles()))
+{ }
+
+inline bool GenomicIntervalCompositeBamReader::GetNext(BamRecord& record)
+{
+ // nothing left to read
+ if (mergeItems_.empty())
+ return false;
+
+ // non-destructive 'pop' of first item from queue
+ auto firstIter = mergeItems_.begin();
+ auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };
+ mergeItems_.pop_front();
+
+ // store its record in our output record
+ std::swap(record, firstItem.record);
+
+ // try fetch 'next' from first item's reader
+ // if successful, re-insert it into container & re-sort on our new values
+ // otherwise, this item will go out of scope & reader destroyed
+ if (firstItem.reader->GetNext(firstItem.record)) {
+ mergeItems_.push_front(std::move(firstItem));
+ UpdateSort();
+ }
+
+ // return success
+ return true;
+}
+
+inline const GenomicInterval& GenomicIntervalCompositeBamReader::Interval(void) const
+{ return interval_; }
+
+inline GenomicIntervalCompositeBamReader& GenomicIntervalCompositeBamReader::Interval(const GenomicInterval& interval)
+{
+ auto updatedMergeItems = std::deque<internal::CompositeMergeItem>{ };
+ auto filesToCreate = std::set<std::string>{ filenames_.cbegin(), filenames_.cend() };
+
+ // update existing readers
+ while (!mergeItems_.empty()) {
+
+ // non-destructive 'pop' of first item from queue
+ auto firstIter = mergeItems_.begin();
+ auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };
+ mergeItems_.pop_front();
+
+ // reset interval
+ BaiIndexedBamReader* baiReader = dynamic_cast<BaiIndexedBamReader*>(firstItem.reader.get());
+ assert(baiReader);
+ baiReader->Interval(interval);
+
+ // try fetch 'next' from first item's reader
+ // if successful, re-insert it into container & re-sort on our new values
+ // otherwise, this item will go out of scope & reader destroyed
+ if (firstItem.reader->GetNext(firstItem.record)) {
+ updatedMergeItems.push_front(std::move(firstItem));
+ filesToCreate.erase(firstItem.reader->Filename());
+ }
+ }
+
+ // create readers for files that were not 'active' for the previous
+ std::vector<std::string> missingBai;
+ for (auto&& fn : filesToCreate) {
+ auto bamFile = BamFile{ fn };
+ if (bamFile.StandardIndexExists()) {
+ auto item = internal::CompositeMergeItem{ std::unique_ptr<BamReader>{ new BaiIndexedBamReader{ interval, std::move(bamFile) } } };
+ if (item.reader->GetNext(item.record))
+ updatedMergeItems.push_back(std::move(item));
+ // else not an error, simply no data matching interval
+ }
+ else {
+ // maybe handle PBI-backed interval searches if BAI missing, but for now treat as error
+ missingBai.push_back(bamFile.Filename());
+ }
+ }
+
+ // throw if any files missing BAI
+ if (!missingBai.empty()) {
+ std::stringstream e;
+ e << "failed to open GenomicIntervalCompositeBamReader because the following files are missing a BAI file:" << std::endl;
+ for (const auto& fn : missingBai)
+ e << " " << fn << std::endl;
+ throw std::runtime_error(e.str());
+ }
+
+ // update our actual container and return
+ mergeItems_ = std::move(updatedMergeItems);
+ UpdateSort();
+ return *this;
+}
+
+struct OrderByPosition
+{
+ static inline bool less_than(const BamRecord& lhs, const BamRecord& rhs)
+ {
+ const int32_t lhsId = lhs.ReferenceId();
+ const int32_t rhsId = rhs.ReferenceId();
+ if (lhsId == -1) return false;
+ if (rhsId == -1) return true;
+
+ if (lhsId == rhsId)
+ return lhs.ReferenceStart() < rhs.ReferenceStart();
+ else return lhsId < rhsId;
+ }
+
+ static inline bool equals(const BamRecord& lhs, const BamRecord& rhs)
+ {
+ return lhs.ReferenceId() == rhs.ReferenceId() &&
+ lhs.ReferenceStart() == rhs.ReferenceStart();
+ }
+};
+
+struct PositionSorter : std::binary_function<internal::CompositeMergeItem, internal::CompositeMergeItem, bool>
+{
+ bool operator()(const internal::CompositeMergeItem& lhs,
+ const internal::CompositeMergeItem& rhs)
+ {
+ const BamRecord& l = lhs.record;
+ const BamRecord& r = rhs.record;
+ return OrderByPosition::less_than(l, r);
+ }
+};
+
+inline void GenomicIntervalCompositeBamReader::UpdateSort(void)
+{ std::sort(mergeItems_.begin(), mergeItems_.end(), PositionSorter{ }); }
+
+// ------------------------------
+// PbiRequestCompositeBamReader
+// ------------------------------
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,
+ const std::vector<BamFile>& bamFiles)
+{
+ filenames_.reserve(bamFiles.size());
+ for(const auto& bamFile : bamFiles)
+ filenames_.push_back(bamFile.Filename());
+ Filter(filter);
+}
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,
+ std::vector<BamFile>&& bamFiles)
+{
+ filenames_.reserve(bamFiles.size());
+ for(auto&& bamFile : bamFiles)
+ filenames_.push_back(bamFile.Filename());
+ Filter(filter);
+}
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,
+ const DataSet& dataset)
+ : PbiFilterCompositeBamReader(filter, std::move(dataset.BamFiles()))
+{ }
+
+template<typename OrderByType>
+inline bool PbiFilterCompositeBamReader<OrderByType>::GetNext(BamRecord& record)
+{
+ // nothing left to read
+ if (mergeQueue_.empty())
+ return false;
+
+ // non-destructive 'pop' of first item from queue
+ auto firstIter = mergeQueue_.begin();
+ auto firstItem = value_type{ std::move(firstIter->reader), std::move(firstIter->record) };
+ mergeQueue_.pop_front();
+
+ // store its record in our output record
+ std::swap(record, firstItem.record);
+
+ // try fetch 'next' from first item's reader
+ // if successful, re-insert it into container & re-sort on our new values
+ // otherwise, this item will go out of scope & reader destroyed
+ if (firstItem.reader->GetNext(firstItem.record)) {
+ mergeQueue_.push_front(std::move(firstItem));
+ UpdateSort();
+ }
+
+ // return success
+ return true;
+}
+
+template<typename OrderByType>
+inline PbiFilterCompositeBamReader<OrderByType>&
+PbiFilterCompositeBamReader<OrderByType>::Filter(const PbiFilter& filter)
+{
+ auto updatedMergeItems = container_type{ };
+ auto filesToCreate = std::set<std::string>{ filenames_.cbegin(), filenames_.cend() };
+
+ // update existing readers
+ while (!mergeQueue_.empty()) {
+
+ // non-destructive 'pop' of first item from queue
+ auto firstIter = mergeQueue_.begin();
+ auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };
+ mergeQueue_.pop_front();
+
+ // reset request
+ PbiIndexedBamReader* pbiReader = dynamic_cast<PbiIndexedBamReader*>(firstItem.reader.get());
+ assert(pbiReader);
+ pbiReader->Filter(filter);
+
+ // try fetch 'next' from first item's reader
+ // if successful, re-insert it into container & re-sort on our new values
+ // otherwise, this item will go out of scope & reader destroyed
+ if (firstItem.reader->GetNext(firstItem.record)) {
+ updatedMergeItems.push_front(std::move(firstItem));
+ filesToCreate.erase(firstItem.reader->Filename());
+ }
+ }
+
+ // create readers for files that were not 'active' for the previous
+ std::vector<std::string> missingPbi;
+ for (auto&& fn : filesToCreate) {
+ auto bamFile = BamFile{ fn };
+ if (bamFile.PacBioIndexExists()) {
+ auto item = internal::CompositeMergeItem{ std::unique_ptr<BamReader>{ new PbiIndexedBamReader{ filter, std::move(bamFile) } } };
+ if (item.reader->GetNext(item.record))
+ updatedMergeItems.push_back(std::move(item));
+ // else not an error, simply no data matching filter
+ }
+ else
+ missingPbi.push_back(fn);
+ }
+
+ // throw if any files missing PBI
+ if (!missingPbi.empty()) {
+ std::stringstream e;
+ e << "failed to open PbiFilterCompositeBamReader because the following files are missing a PBI file:" << std::endl;
+ for (const auto& fn : missingPbi)
+ e << " " << fn << std::endl;
+ throw std::runtime_error(e.str());
+ }
+
+ // update our actual container and return
+ mergeQueue_ = std::move(updatedMergeItems);
+ UpdateSort();
+ return *this;
+}
+
+template<typename OrderByType>
+inline void PbiFilterCompositeBamReader<OrderByType>::UpdateSort(void)
+{ std::sort(mergeQueue_.begin(), mergeQueue_.end(), merge_sorter_type{}); }
+
+// ------------------------------
+// SequentialCompositeBamReader
+// ------------------------------
+
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(const std::vector<BamFile>& bamFiles)
+{
+ for (auto&& bamFile : bamFiles)
+ readers_.emplace_back(new BamReader{ bamFile });
+}
+
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(std::vector<BamFile>&& bamFiles)
+{
+ for (auto&& bamFile : bamFiles)
+ readers_.emplace_back(new BamReader{ std::move(bamFile) });
+}
+
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(const DataSet& dataset)
+ : SequentialCompositeBamReader(std::move(dataset.BamFiles()))
+{ }
+
+inline bool SequentialCompositeBamReader::GetNext(BamRecord& record)
+{
+ // try first reader, if successful return true
+ // else pop reader and try next, until all readers exhausted
+ while (!readers_.empty()) {
+ auto& reader = readers_.front();
+ if (reader->GetNext(record))
+ return true;
+ else
+ readers_.pop_front();
+ }
+
+ // no readers available
+ return false;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/DataSet.inl b/include/pbbam/internal/DataSet.inl
index 34f4af3..6627ddf 100644
--- a/include/pbbam/internal/DataSet.inl
+++ b/include/pbbam/internal/DataSet.inl
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSet.inl
+/// \brief Inline implementations for the DataSet class.
+//
// Author: Derek Barnett
#include "pbbam/DataSet.h"
diff --git a/include/pbbam/internal/DataSetBaseTypes.h b/include/pbbam/internal/DataSetBaseTypes.h
index 98fae1f..917162a 100644
--- a/include/pbbam/internal/DataSetBaseTypes.h
+++ b/include/pbbam/internal/DataSetBaseTypes.h
@@ -129,7 +129,8 @@ public:
class StrictEntityType : public BaseEntityType
{
protected:
- StrictEntityType(const std::string& label,
+ StrictEntityType(const std::string& metatype,
+ const std::string& label,
const XsdType& xsd = XsdType::BASE_DATA_MODEL);
public:
@@ -149,14 +150,18 @@ public:
class InputOutputDataType : public StrictEntityType
{
protected:
- InputOutputDataType(const std::string& label,
+ InputOutputDataType(const std::string& metatype,
+ const std::string& filename,
+ const std::string& label,
const XsdType& xsd = XsdType::BASE_DATA_MODEL);
};
class IndexedDataType : public InputOutputDataType
{
protected:
- IndexedDataType(const std::string& label,
+ IndexedDataType(const std::string& metatype,
+ const std::string& filename,
+ const std::string& label,
const XsdType& xsd = XsdType::BASE_DATA_MODEL);
public:
diff --git a/include/pbbam/internal/DataSetElement.h b/include/pbbam/internal/DataSetElement.h
index e80e55e..c7f7c8d 100644
--- a/include/pbbam/internal/DataSetElement.h
+++ b/include/pbbam/internal/DataSetElement.h
@@ -63,7 +63,7 @@ class XmlName
// prefix local name
public:
- XmlName(const std::string& fullName);
+ XmlName(const std::string& fullName, bool verbatim = false);
XmlName(const std::string& localName, const std::string& prefix);
XmlName(const XmlName& other);
XmlName(XmlName&& other);
@@ -79,18 +79,23 @@ public:
const boost::string_ref LocalName(void) const;
const boost::string_ref Prefix(void) const;
const std::string& QualifiedName(void) const;
+ bool Verbatim(void) const;
private:
std::string qualifiedName_;
size_t prefixSize_;
size_t localNameOffset_;
size_t localNameSize_;
+ bool verbatim_;
};
+struct FromInputXml { };
+
class DataSetElement
{
public:
DataSetElement(const std::string& label, const XsdType& xsd = XsdType::NONE);
+ DataSetElement(const std::string& label, const FromInputXml& fromInputXml, const XsdType& xsd = XsdType::NONE);
DataSetElement(const DataSetElement& other);
DataSetElement(DataSetElement&& other);
DataSetElement& operator=(const DataSetElement& other);
@@ -115,6 +120,7 @@ public:
const boost::string_ref LocalNameLabel(void) const;
const boost::string_ref PrefixLabel(void) const;
const std::string& QualifiedNameLabel(void) const;
+ bool IsVerbatimLabel(void) const;
const std::string& Text(void) const;
std::string& Text(void);
diff --git a/include/pbbam/internal/DataSetElement.inl b/include/pbbam/internal/DataSetElement.inl
index 08f15b1..37a673f 100644
--- a/include/pbbam/internal/DataSetElement.inl
+++ b/include/pbbam/internal/DataSetElement.inl
@@ -52,6 +52,13 @@ inline DataSetElement::DataSetElement(const std::string& label, const XsdType& x
, label_(label)
{ }
+inline DataSetElement::DataSetElement(const std::string& label,
+ const FromInputXml&,
+ const XsdType& xsd)
+ : xsd_(xsd)
+ , label_(label, true)
+{ }
+
inline DataSetElement::DataSetElement(const DataSetElement& other)
: xsd_(other.xsd_)
, label_(other.label_)
@@ -216,7 +223,7 @@ inline const std::string& DataSetElement::QualifiedNameLabel(void) const
//{ return label_.QualifiedName(); }
inline void DataSetElement::Label(const std::string& label)
-{ label_ = XmlName(label); }
+{ label_ = XmlName(label, true); }
inline size_t DataSetElement::NumAttributes(void) const
{ return attributes_.size(); }
@@ -246,6 +253,9 @@ inline void DataSetElement::ChildText(const std::string& label,
}
}
+inline bool DataSetElement::IsVerbatimLabel(void) const
+{ return label_.Verbatim(); }
+
inline const std::string& DataSetElement::Text(void) const
{ return text_; }
@@ -262,14 +272,13 @@ inline const XsdType& DataSetElement::Xsd(void) const
// XmlName
// ----------------
-inline XmlName::XmlName(const std::string& fullName)
+inline XmlName::XmlName(const std::string& fullName, bool verbatim)
: qualifiedName_(fullName)
, prefixSize_(0)
, localNameOffset_(0)
, localNameSize_(0)
+ , verbatim_(verbatim)
{
-// std::cerr << "Creating XmlName from fullName: " << fullName << std::endl;
-
const size_t colonFound = qualifiedName_.find(':');
if (colonFound == std::string::npos || colonFound == 0)
localNameSize_ = qualifiedName_.size();
@@ -282,10 +291,6 @@ inline XmlName::XmlName(const std::string& fullName)
localNameOffset_ = prefixSize_;
if (prefixSize_ != 0)
++localNameOffset_;
-
-// std::cerr << " qualName: " << qualifiedName_ << std::endl;
-// std::cerr << " prefix: " << Prefix() << std::endl;
-// std::cerr << " localName: " << LocalName() << std::endl;
}
inline XmlName::XmlName(const std::string& localName,
@@ -293,6 +298,7 @@ inline XmlName::XmlName(const std::string& localName,
: prefixSize_(prefix.size())
, localNameOffset_(prefixSize_)
, localNameSize_(localName.size())
+ , verbatim_(true)
{
qualifiedName_.clear();
qualifiedName_.reserve(localNameSize_+ prefixSize_ + 1);
@@ -311,6 +317,7 @@ inline XmlName::XmlName(const XmlName& other)
, prefixSize_(other.prefixSize_)
, localNameOffset_(other.localNameOffset_)
, localNameSize_(other.localNameSize_)
+ , verbatim_(other.verbatim_)
{ }
inline XmlName::XmlName(XmlName&& other)
@@ -318,6 +325,7 @@ inline XmlName::XmlName(XmlName&& other)
, prefixSize_(std::move(other.prefixSize_))
, localNameOffset_(std::move(other.localNameOffset_))
, localNameSize_(std::move(other.localNameSize_))
+ , verbatim_(std::move(other.verbatim_))
{ }
inline XmlName& XmlName::operator=(const XmlName& other)
@@ -326,6 +334,7 @@ inline XmlName& XmlName::operator=(const XmlName& other)
prefixSize_ = other.prefixSize_;
localNameOffset_ = other.localNameOffset_;
localNameSize_ = other.localNameSize_;
+ verbatim_ = other.verbatim_;
return *this;
}
@@ -335,6 +344,7 @@ inline XmlName& XmlName::operator=(XmlName&& other)
prefixSize_ = std::move(other.prefixSize_);
localNameOffset_ = std::move(other.localNameOffset_);
localNameSize_ = std::move(other.localNameSize_);
+ verbatim_ = std::move(other.verbatim_);
return *this;
}
@@ -346,17 +356,6 @@ inline bool XmlName::operator==(const XmlName& other) const
inline bool XmlName::operator!=(const XmlName& other) const
{ return !(*this == other); }
-//inline void XmlName::CalculateSizes(const size_t localNameSize,
-// const size_t prefixSize)
-//{
-// size_t offset = prefixSize;
-// if (offset != 0)
-// ++offset;
-
-// localName_ = boost::string_ref(qualifiedName_.data() + offset, localNameSize);
-// prefix_ = boost::string_ref(qualifiedName_.data(), prefixSize);
-//}
-
inline const boost::string_ref XmlName::LocalName(void) const
{ return boost::string_ref(qualifiedName_.data() + localNameOffset_, localNameSize_); }
@@ -366,6 +365,9 @@ inline const boost::string_ref XmlName::Prefix(void) const
inline const std::string& XmlName::QualifiedName(void) const
{ return qualifiedName_; }
+inline bool XmlName::Verbatim(void) const
+{ return verbatim_; }
+
} // namespace internal
} // namespace BAM
} // namespace PacBio
diff --git a/include/pbbam/internal/DataSetTypes.inl b/include/pbbam/internal/DataSetTypes.inl
index 105dffb..dbcbd26 100644
--- a/include/pbbam/internal/DataSetTypes.inl
+++ b/include/pbbam/internal/DataSetTypes.inl
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetTypes.inl
+/// \brief Inline implementations for the public DataSet component classes.
+//
// Author: Derek Barnett
#include "pbbam/DataSetTypes.h"
diff --git a/include/pbbam/Accuracy.h b/include/pbbam/internal/Frames.inl
similarity index 56%
copy from include/pbbam/Accuracy.h
copy to include/pbbam/internal/Frames.inl
index 03c233e..37cb64b 100644
--- a/include/pbbam/Accuracy.h
+++ b/include/pbbam/internal/Frames.inl
@@ -32,67 +32,62 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Frames.inl
+/// \brief Inline implementations for the Frames class.
+//
// Author: Derek Barnett
-#ifndef ACCURACY_H
-#define ACCURACY_H
-
-#include "pbbam/Config.h"
+#include "pbbam/Frames.h"
namespace PacBio {
namespace BAM {
-/// \brief The Accuracy class represents the expected accuracy of a BamRecord.
-/// Values are clamped to [0,1000].
-///
-class PBBAM_EXPORT Accuracy
-{
-public:
- static const int MIN;
- static const int MAX;
+inline const std::vector<uint16_t>& Frames::Data(void) const
+{ return data_; }
+
+inline std::vector<uint16_t>& Frames::DataRaw(void)
+{ return data_; }
-public:
- /// \name Constructors & Related Methods
- /// \{
+inline std::vector<uint8_t> Frames::Encode(void) const
+{ return Frames::Encode(data_); }
- /// \note This is not an 'explicit' ctor, to make it as easy to use in
- /// numeric operations as possible. We really just want to make
- /// sure that the acceptable range is respected.
- Accuracy(int accuracy);
- Accuracy(const Accuracy& other);
- ~Accuracy(void);
+inline Frames& Frames::Data(const std::vector<uint16_t>& frames)
+{ data_ = frames; return *this; }
- /// \}
+inline Frames& Frames::Data(std::vector<uint16_t>&& frames)
+{ data_ = std::move(frames); return *this; }
-public:
+inline std::vector<uint16_t>::const_iterator Frames::begin(void) const
+{ return data_.begin(); }
- /// \returns Accuracy as integer
- operator int(void) const;
+inline std::vector<uint16_t>::iterator Frames::begin(void)
+{ return data_.begin(); }
-private:
- int accuracy_;
-};
+inline std::vector<uint16_t>::const_iterator Frames::cbegin(void) const
+{ return data_.cbegin(); }
-inline Accuracy::Accuracy(int accuracy)
-{
- if (accuracy < Accuracy::MIN)
- accuracy = Accuracy::MIN;
- else if (accuracy > Accuracy::MAX)
- accuracy = Accuracy::MAX;
- accuracy_ = accuracy;
-}
+inline std::vector<uint16_t>::const_iterator Frames::cend(void) const
+{ return data_.cend(); }
-inline Accuracy::Accuracy(const Accuracy &other)
- : accuracy_(other.accuracy_)
-{ }
+inline std::vector<uint16_t>::const_iterator Frames::end(void) const
+{ return data_.end(); }
-inline Accuracy::~Accuracy(void) { }
+inline std::vector<uint16_t>::iterator Frames::end(void)
+{ return data_.end(); }
-inline Accuracy::operator int(void) const
-{ return accuracy_; }
+inline size_t Frames::size(void) const
+{ return data_.size(); }
+
+inline bool Frames::empty(void) const
+{ return data_.empty(); }
+
+inline bool Frames::operator==(const Frames& other) const
+{ return data_ == other.data_; }
+
+inline bool Frames::operator!=(const Frames& other) const
+{ return !(*this == other); }
} // namespace BAM
} // namespace PacBio
-
-#endif // ACCURACY_H
diff --git a/include/pbbam/GenomicIntervalQuery.h b/include/pbbam/internal/GenomicInterval.inl
similarity index 55%
copy from include/pbbam/GenomicIntervalQuery.h
copy to include/pbbam/internal/GenomicInterval.inl
index c1e10f9..07c18ef 100644
--- a/include/pbbam/GenomicIntervalQuery.h
+++ b/include/pbbam/internal/GenomicInterval.inl
@@ -32,41 +32,60 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicInterval.inl
+/// \brief Inline implementations for the GenomicInterval class.
+//
// Author: Derek Barnett
-#ifndef GENOMICINTERVALQUERY_H
-#define GENOMICINTERVALQUERY_H
-
#include "pbbam/GenomicInterval.h"
-#include "pbbam/QueryBase.h"
-#include "pbbam/internal/QueryBase.h"
-#include <string>
namespace PacBio {
namespace BAM {
-class BamFile;
+inline GenomicInterval::~GenomicInterval(void) { }
+
+inline std::string GenomicInterval::Name(void) const
+{ return name_; }
+
+inline GenomicInterval& GenomicInterval::Name(const std::string& name)
+{ name_ = name; return *this; }
+
+inline PacBio::BAM::Interval<Position> GenomicInterval::Interval(void) const
+{ return interval_; }
-class PBBAM_EXPORT GenomicIntervalQuery : public internal::IQuery
+inline GenomicInterval& GenomicInterval::Interval(const PacBio::BAM::Interval<Position>& interval)
+{ interval_ = interval; return *this; }
+
+inline bool GenomicInterval::IsValid(void) const
{
-public:
- GenomicIntervalQuery(const GenomicInterval& interval,
- const DataSet& dataset);
+ return !name_.empty() &&
+ interval_.Start() >= 0 &&
+ interval_.Stop() >= 0 &&
+ interval_.IsValid();
+}
-public:
- GenomicIntervalQuery& Interval(const GenomicInterval& interval);
- GenomicInterval Interval(void) const;
+inline size_t GenomicInterval::Length(void) const
+{ return interval_.Length(); }
-protected:
- FileIterPtr CreateIterator(const BamFile& bamFile);
+inline Position GenomicInterval::Start(void) const
+{ return interval_.Start(); }
-private:
- GenomicInterval interval_;
-};
+inline GenomicInterval& GenomicInterval::Start(const Position start)
+{ interval_.Start(start); return *this; }
-//} // namespace staging
-} // namespace BAM
-} // namspace PacBio
+inline Position GenomicInterval::Stop(void) const
+{ return interval_.Stop(); }
+
+inline GenomicInterval& GenomicInterval::Stop(const Position stop)
+{ interval_.Stop(stop); return *this; }
-#endif // GENOMICINTERVALQUERY_H
+inline bool GenomicInterval::operator==(const GenomicInterval& other) const
+{ return name_ == other.name_ && interval_ == other.interval_; }
+
+inline bool GenomicInterval::operator!=(const GenomicInterval& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/IMergeStrategy.h b/include/pbbam/internal/IMergeStrategy.h
deleted file mode 100644
index 0677f31..0000000
--- a/include/pbbam/internal/IMergeStrategy.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef IMERGESTRATEGY_H
-#define IMERGESTRATEGY_H
-
-#include "pbbam/internal/IBamFileIterator.h"
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-template<typename T>
-class IMergeStrategyBase
-{
-public:
- typedef typename IBamFileIteratorBase<T>::Ptr FileIterPtr;
-protected:
- IMergeStrategyBase(void) { }
-public:
- virtual ~IMergeStrategyBase(void) { }
-public:
- virtual bool GetNext(T& result) =0;
-};
-
-typedef IMergeStrategyBase<BamRecord> IMergeStrategy;
-typedef IMergeStrategyBase<std::vector<BamRecord> > IGroupMergeStrategy;
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-#endif // IMERGESTRATEGY_H
diff --git a/include/pbbam/Interval.h b/include/pbbam/internal/Interval.inl
similarity index 57%
copy from include/pbbam/Interval.h
copy to include/pbbam/internal/Interval.inl
index 6c2e91a..e9c7edd 100644
--- a/include/pbbam/Interval.h
+++ b/include/pbbam/internal/Interval.inl
@@ -32,127 +32,35 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Interval.inl
+/// \brief Inline implementations for the Interval class.
+//
// Author: Derek Barnett
-#ifndef INTERVAL_H
-#define INTERVAL_H
-
-#include "pbbam/Config.h"
-#include <string>
-
-#define BOOST_ICL_USE_STATIC_BOUNDED_INTERVALS
-#include <boost/icl/discrete_interval.hpp>
-#include <boost/icl/interval_traits.hpp>
+#include "pbbam/Interval.h"
namespace PacBio {
namespace BAM {
-/// \brief Utility class for working with half-open (right-open) intervals. [start, stop)
-///
-/// \note This class is agnostic whether the values are 0-based or 1-based.
-/// \todo Should it be? Should that go here or "higher up"?
-///
-template<typename T>
-class Interval
-{
-public:
- typedef boost::icl::discrete_interval<T> interval_type;
-
-public:
-
- /// \name Constructors
- /// \{
-
- /** Default constructor; yields an empty interval [0,0) */
- inline Interval(void);
-
- /** Constructor for a singleton interval [val,val+1) */
- inline Interval(const T val);
-
- /** Constructor for interval from [start, stop) */
- inline Interval(const T start, const T stop);
-
- /** Copy constructor */
- inline Interval(const Interval<T>& other);
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- /// \returns interval start coordinate
- inline T Start(void) const;
-
- /// Sets this interval's start coordinate.
- ///
- /// \param[in] start
- /// \returns reference to this interval
- inline Interval<T>& Start(const T& start);
-
- /// \returns interval stop coordinate
- inline T Stop(void) const;
-
- /// Sets this interval's stop coordinate.
- ///
- /// \param[in] stop
- /// \returns reference to this interval
- inline Interval<T>& Stop(const T& stop);
-
- /// \}
-
- /// \name Interval Operations
-
- /// \returns true if this interval is fully covered by (or contained in) \p other
- inline bool CoveredBy(const Interval<T>& other) const;
-
- //// \returns true if this interval covers (or contains) \p other
- inline bool Covers(const Interval<T>& other) const;
-
- /// \returns true if intervals interset
- inline bool Intersects(const Interval<T>& other) const;
-
- /// \returns true if interval is valid (e.g. start < stop)
- inline bool IsValid(void) const;
-
- /// \returns interval length
- inline size_t Length(void) const;
-
- /// \}
-
- /// \name Comparison Operators
- /// \{
-
- /// \returns true if both intervals share the same endpoints
- inline bool operator==(const Interval<T>& other) const;
-
- /// \returns true if either interval's endpoints differ
- inline bool operator!=(const Interval<T>& other) const;
-
- /// \}
-
-private:
- interval_type data_;
-};
-
template<typename T>
-Interval<T>::Interval(void)
+inline Interval<T>::Interval(void)
: data_(boost::icl::discrete_interval<T>::right_open(0,0))
{ }
template<typename T>
-Interval<T>::Interval(const T val)
+inline Interval<T>::Interval(const T val)
: data_(boost::icl::discrete_interval<T>::right_open(val,val+1))
{ }
template<typename T>
-Interval<T>::Interval(const T start, const T stop)
+inline Interval<T>::Interval(const T start, const T stop)
: data_(boost::icl::discrete_interval<T>::right_open(start,stop))
{ }
template<typename T>
-Interval<T>::Interval(const Interval<T>& other)
+inline Interval<T>::Interval(const Interval<T>& other)
: data_(boost::icl::discrete_interval<T>::right_open(other.Start(), other.Stop()))
{ }
@@ -207,6 +115,4 @@ inline Interval<T>& Interval<T>::Stop(const T& stop)
}
} // namespace BAM
-} // namspace PacBio
-
-#endif // GENOMICINTERVAL_H
+} // namespace PacBio
diff --git a/include/pbbam/internal/MergeStrategy.h b/include/pbbam/internal/MergeStrategy.h
deleted file mode 100644
index 1a8a564..0000000
--- a/include/pbbam/internal/MergeStrategy.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifndef MERGESTRATEGY_H
-#define MERGESTRATEGY_H
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/internal/IMergeStrategy.h"
-#include "pbbam/internal/MergeItem.h"
-#include <functional>
-#include <set>
-#include <vector>
-#include <cassert>
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-template<typename Compare>
-struct MergeItemSorter : public std::binary_function<MergeItem, MergeItem, bool>
-{
-public:
- MergeItemSorter(const Compare& comp = Compare())
- : comp_(comp)
- { }
-
- bool operator()(const MergeItem& lhs, const MergeItem& rhs) {
- const BamRecord& l = lhs.record_;
- const BamRecord& r = rhs.record_;
- return comp_(l, r);
- }
-
-private:
- Compare comp_;
-};
-
-template<typename Compare>
-struct GroupMergeItemSorter : public std::binary_function<GroupMergeItem, GroupMergeItem, bool>
-{
-public:
- GroupMergeItemSorter(const Compare& comp = Compare())
- : comp_(comp)
- { }
-
- bool operator()(const GroupMergeItem& lhs, const GroupMergeItem& rhs) {
- if ( lhs.record_.empty())
- return false;
- if ( rhs.record_.empty())
- return true;
- assert(!lhs.record_.empty());
- assert(!rhs.record_.empty());
- const BamRecord& l = lhs.record_.front();
- const BamRecord& r = rhs.record_.front();
- return comp_(l, r);
- }
-
-private:
- Compare comp_;
-};
-
-template<typename Compare>
-class MergeStrategy : public IMergeStrategy
-{
-public:
- MergeStrategy(const std::vector<FileIterPtr>& iters);
- bool GetNext(BamRecord& record);
-private:
- std::multiset<MergeItem, MergeItemSorter<Compare> > mergeItems_;
-};
-
-template<typename Compare>
-class GroupMergeStrategy : public IGroupMergeStrategy
-{
-public:
- GroupMergeStrategy(const std::vector<FileIterPtr>& iters);
- bool GetNext(std::vector<BamRecord>& records);
-private:
- GroupMergeItem nextItem_;
- std::multiset<GroupMergeItem, GroupMergeItemSorter<Compare> > mergeItems_;
-};
-
-// -----------------------
-// MergeStrategy
-// -----------------------
-
-template<typename Compare>
-inline MergeStrategy<Compare>::MergeStrategy(const std::vector<FileIterPtr>& iters)
- : IMergeStrategy()
-{
- BamRecord r;
- for (FileIterPtr iter : iters) {
- if (iter->GetNext(r)) {
- MergeItem item(r, iter);
- mergeItems_.insert(item);
- }
- }
-}
-
-template<typename Compare>
-inline bool MergeStrategy<Compare>::GetNext(BamRecord& record)
-{
- if (mergeItems_.empty())
- return false;
-
- // pop first merge item & record
- auto firstIter = mergeItems_.begin();
- MergeItem firstItem = (*firstIter);
- mergeItems_.erase(firstIter);
- record = firstItem.record_;
-
- // try fetch iter's next (if failed, do not replace)
- if (firstItem.iter_->GetNext(firstItem.record_))
- mergeItems_.insert(firstItem);
- return true;
-}
-
-// -----------------------
-// GroupMergeStrategy
-// -----------------------
-
-template<typename Compare>
-inline GroupMergeStrategy<Compare>::GroupMergeStrategy(const std::vector<FileIterPtr>& iters)
- : IGroupMergeStrategy()
-{
- std::vector<BamRecord> r;
- for (FileIterPtr iter : iters) {
- if (iter->GetNext(r)) {
- GroupMergeItem item(r, iter);
- mergeItems_.insert(item);
- }
- }
- if (!mergeItems_.empty()) {
- auto firstIter = mergeItems_.begin();
- nextItem_ = (*firstIter);
- mergeItems_.erase(firstIter);
- }
-}
-
-template<typename Compare>
-inline bool GroupMergeStrategy<Compare>::GetNext(std::vector<BamRecord>& records)
-{
- records.clear();
- if (nextItem_.IsNull())
- return false;
-
- // append "nextItem" records
- records = nextItem_.record_;
-
- // try fetch iter's next (if failed, do not replace)
- if (nextItem_.iter_->GetNext(nextItem_.record_))
- mergeItems_.insert(nextItem_);
- else
- nextItem_ = GroupMergeItem();
-
- while (!mergeItems_.empty()) {
-
- // pop first merge item
- auto firstIter = mergeItems_.begin();
- GroupMergeItem firstItem = (*firstIter);
- mergeItems_.erase(firstIter);
-
- // if first item has records
- if (!firstItem.record_.empty()) {
-
- // if first block to store
- if (records.empty())
- records = firstItem.record_;
-
- // else see if we match current group
- else {
- const BamRecord& lhs = records.front();
- const BamRecord& rhs = firstItem.record_.front();
-
- // if match, append to output & fetch next
- if (firstItem.iter_->InSameGroup(lhs, rhs)) {
-
- for (const BamRecord& r : firstItem.record_)
- records.push_back(r);
- if (firstItem.iter_->GetNext(firstItem.record_))
- mergeItems_.insert(firstItem);
- }
-
- // no match, item becomes the "next item" to use
- else {
- nextItem_ = firstItem;
- break;
- }
- }
- }
-
- // first item has no records, try fetch next
- else {
- if (firstItem.iter_->GetNext(firstItem.record_))
- mergeItems_.insert(firstItem);
- }
- }
-
- return true;
-}
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-#endif // MERGESTRATEGY_H
diff --git a/src/FilterEngine.cpp b/include/pbbam/internal/PbiBasicTypes.inl
similarity index 71%
copy from src/FilterEngine.cpp
copy to include/pbbam/internal/PbiBasicTypes.inl
index 1f47967..229841e 100644
--- a/src/FilterEngine.cpp
+++ b/include/pbbam/internal/PbiBasicTypes.inl
@@ -32,45 +32,39 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file PbiBasicTypes.inl
+/// \brief Inline implementations for the basic data structures used in PBI lookups.
+//
// Author: Derek Barnett
-#include "pbbam/internal/FilterEngine.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace PacBio::BAM::internal;
-using namespace std;
+#include "pbbam/PbiBasicTypes.h"
namespace PacBio {
namespace BAM {
-namespace internal {
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+inline IndexResultBlock::IndexResultBlock(void)
+ : firstIndex_(0)
+ , numReads_(0)
+ , virtualOffset_(-1)
+{ }
-FilterEngine::FilterEngine(void) { }
+inline IndexResultBlock::IndexResultBlock(size_t idx, size_t numReads)
+ : firstIndex_(idx)
+ , numReads_(numReads)
+ , virtualOffset_(-1)
+{ }
-bool FilterEngine::Accepts(const BamRecord& r) const
+inline bool IndexResultBlock::operator==(const IndexResultBlock& other) const
{
-// foreach ( const FilterParameter& param, parameters_ ) {
-// if (!param.Accepts(r))
-// return false;
-// }
-// return true;
- (void)r;
- return true;
+ return firstIndex_ == other.firstIndex_ &&
+ numReads_ == other.numReads_ &&
+ virtualOffset_ == other.virtualOffset_;
}
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
-{
- size_t i = 0;
- while (i < r.size()) {
- if (!Accepts(r.at(i)))
- r.erase(r.begin() + i);
- else
- ++i;
- }
- return !r.empty();
-}
+inline bool IndexResultBlock::operator!=(const IndexResultBlock& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiFilter.inl b/include/pbbam/internal/PbiFilter.inl
new file mode 100644
index 0000000..18c26d0
--- /dev/null
+++ b/include/pbbam/internal/PbiFilter.inl
@@ -0,0 +1,312 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.inl
+/// \brief Inline implementations for the PbiFilter class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilter.h"
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <set>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \internal
+///
+/// This class wraps a the basic PBI filter (whether property filter or some operator
+/// e.g. union, intersect, etc.). The wrapper allows PbiFilters to hold heterogeneous,
+/// recursive filter types - without exposing pointers & worrying about memory ownership
+/// issues between client & library.
+///
+/// Filters can be given by value from client code and we will wrap them for composition.
+///
+/// \code{.cpp}
+/// PbiFilter f1(PbiZmwFilter(42));
+/// PbiFilter f2;
+/// f2.Add(PbiQueryLengthFilter(3000, GREATER_THAN_EQUAL));
+/// f2.Add(MyApplicationCustomFilter("foo"));
+/// PbiFilter intersect = PbiFilter::Intersect(f1, f2);
+/// ...
+/// \endcode
+///
+struct FilterWrapper
+{
+public:
+ template<typename T> FilterWrapper(T x);
+
+ FilterWrapper(const FilterWrapper& other);
+ FilterWrapper(FilterWrapper&&) noexcept = default;
+ FilterWrapper& operator=(const FilterWrapper& other);
+ FilterWrapper& operator=(FilterWrapper&&) noexcept = default;
+ ~FilterWrapper(void);
+
+public:
+ bool Accepts(const PacBio::BAM::PbiRawData& idx, const size_t row) const;
+
+private:
+ struct WrapperInterface
+ {
+ virtual ~WrapperInterface(void) = default;
+ virtual WrapperInterface* Clone(void) const =0;
+ virtual bool Accepts(const PacBio::BAM::PbiRawData& idx,
+ const size_t row) const =0;
+ };
+
+ template<typename T>
+ struct WrapperImpl : public WrapperInterface
+ {
+ WrapperImpl(T x);
+ WrapperImpl(const WrapperImpl& other);
+ WrapperInterface* Clone(void) const;
+ bool Accepts(const PacBio::BAM::PbiRawData& idx, const size_t row) const;
+ T data_;
+ };
+
+private:
+ std::unique_ptr<WrapperInterface> self_;
+};
+
+// ---------------
+// FilterWrapper
+// ---------------
+
+template<typename T>
+inline FilterWrapper::FilterWrapper(T x)
+ : self_(new WrapperImpl<T>(std::move(x)))
+{ }
+
+inline FilterWrapper::FilterWrapper(const FilterWrapper& other)
+ : self_(other.self_->Clone())
+{ }
+
+inline FilterWrapper& FilterWrapper::operator=(const FilterWrapper& other)
+{
+ self_.reset(other.self_->Clone());
+ return *this;
+}
+
+inline FilterWrapper::~FilterWrapper(void) { }
+
+inline bool FilterWrapper::Accepts(const PbiRawData& idx, const size_t row) const
+{ return self_->Accepts(idx, row); }
+
+// ----------------
+// WrapperImpl<T>
+// ----------------
+
+template<typename T>
+inline FilterWrapper::WrapperImpl<T>::WrapperImpl(T x)
+ : FilterWrapper::WrapperInterface()
+ , data_(std::move(x))
+{
+ BOOST_CONCEPT_ASSERT((PbiFilterConcept<T>));
+}
+
+template<typename T>
+inline FilterWrapper::WrapperImpl<T>::WrapperImpl(const WrapperImpl& other)
+ : FilterWrapper::WrapperInterface()
+ , data_(other.data_)
+{ }
+
+template<typename T>
+inline FilterWrapper::WrapperInterface* FilterWrapper::WrapperImpl<T>::Clone(void) const
+{ return new WrapperImpl(*this); }
+
+template<typename T>
+inline bool FilterWrapper::WrapperImpl<T>::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{ return data_.Accepts(idx, row); }
+
+struct PbiFilterPrivate
+{
+ PbiFilterPrivate(PbiFilter::CompositionType type)
+ : type_(type)
+ { }
+
+ template<typename T>
+ void Add(T&& filter)
+ {
+ filters_.emplace_back(std::move(filter));
+ }
+
+ std::unique_ptr<internal::PbiFilterPrivate> DeepCopy(void)
+ {
+ auto copy = std::unique_ptr<PbiFilterPrivate>{ new PbiFilterPrivate{type_} };
+ copy->filters_ = this->filters_;
+ return copy;
+ }
+
+ bool Accepts(const PbiRawData& idx, const size_t row) const
+ {
+ // no filter -> accepts every record
+ if (filters_.empty())
+ return true;
+
+ // intersection of child filters
+ if (type_ == PbiFilter::INTERSECT) {
+ for (const auto& filter : filters_) {
+ if (!filter.Accepts(idx, row))
+ return false; // break early on failure
+ }
+ return true; // all passed
+ }
+
+ // union of child filters
+ else if (type_ == PbiFilter::UNION) {
+ for (const auto& filter : filters_) {
+ if (filter.Accepts(idx, row))
+ return true; // break early on pass
+ }
+ return false; // none passed
+ }
+
+ else
+ //assert(false); // invalid composite filter type
+ throw std::runtime_error("invalid composite filter type in PbiFilterPrivate::Accepts");
+ }
+
+ PbiFilter::CompositionType type_;
+ std::vector<FilterWrapper> filters_;
+};
+
+} // namespace internal
+
+inline PbiFilter::PbiFilter(const CompositionType type)
+ : d_{ new internal::PbiFilterPrivate{ type } }
+{ }
+
+template<typename T> inline
+PbiFilter::PbiFilter(const T& filter)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+ Add(filter);
+}
+
+template<typename T> inline
+PbiFilter::PbiFilter(T&& filter)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+ Add(std::move(filter));
+}
+
+inline PbiFilter::PbiFilter(const std::vector<PbiFilter>& filters)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+ Add(filters);
+}
+
+inline PbiFilter::PbiFilter(std::vector<PbiFilter>&& filters)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT} }
+{
+ Add(std::move(filters));
+}
+
+inline PbiFilter::PbiFilter(const PbiFilter& other)
+ : d_{ other.d_->DeepCopy() }
+{ }
+
+inline PbiFilter::PbiFilter(PbiFilter&& other) noexcept
+ : d_{ std::move(other.d_) }
+{ }
+
+inline PbiFilter& PbiFilter::operator=(const PbiFilter& other)
+{
+ d_ = other.d_->DeepCopy();
+ return *this;
+}
+
+inline PbiFilter& PbiFilter::operator=(PbiFilter&& other) noexcept
+{
+ d_ = std::move(other.d_);
+ return *this;
+}
+
+inline PbiFilter::~PbiFilter(void) { }
+
+inline bool PbiFilter::Accepts(const PacBio::BAM::PbiRawData& idx,
+ const size_t row) const
+{ return d_->Accepts(idx, row); }
+
+template<typename T>
+inline PbiFilter& PbiFilter::Add(const T& filter)
+{
+ T copy = filter;
+ return Add(std::move(copy));
+}
+
+template<typename T>
+inline PbiFilter& PbiFilter::Add(T&& filter)
+{
+ d_->Add(std::move(filter));
+ return *this;
+}
+
+inline PbiFilter& PbiFilter::Add(const PbiFilter& filter)
+{
+ PbiFilter copy = filter;
+ return Add(std::move(copy));
+}
+
+inline PbiFilter& PbiFilter::Add(PbiFilter&& filter)
+{
+ d_->Add(std::move(filter));
+ return *this;
+}
+
+inline PbiFilter& PbiFilter::Add(const std::vector<PbiFilter>& filters)
+{
+ std::vector<PbiFilter> copy = filters;
+ return Add(std::move(copy));
+}
+
+inline PbiFilter& PbiFilter::Add(std::vector<PbiFilter>&& filters)
+{
+ for (auto&& filter : filters)
+ d_->Add(std::move(filter));
+ return *this;
+}
+
+inline bool PbiFilter::IsEmpty(void) const
+{ return d_->filters_.empty(); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiFilterTypes.inl b/include/pbbam/internal/PbiFilterTypes.inl
new file mode 100644
index 0000000..89caca4
--- /dev/null
+++ b/include/pbbam/internal/PbiFilterTypes.inl
@@ -0,0 +1,553 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.inl
+/// \brief Inline implementations for the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterTypes.h"
+#include <cassert>
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+template <typename T>
+inline FilterBase<T>::FilterBase(const T& value, const Compare::Type cmp)
+ : value_(value)
+ , cmp_(cmp)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(T&& value, const Compare::Type cmp)
+ : value_(std::move(value))
+ , cmp_(cmp)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(const std::vector<T>& values)
+ : multiValue_(values)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(std::vector<T>&& values)
+ : multiValue_(std::move(values))
+{ }
+
+template<typename T>
+inline bool FilterBase<T>::CompareHelper(const T& lhs) const
+{
+ if (multiValue_ == boost::none)
+ return CompareSingleHelper(lhs);
+ else
+ return CompareMultiHelper(lhs);
+}
+
+template<typename T>
+inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
+{
+ // check provided value against all filter criteria,
+ // return true on any exact match
+ auto iter = multiValue_.get().cbegin();
+ const auto end = multiValue_.get().cend();
+ for (; iter != end; ++iter) {
+ if (*iter == lhs)
+ return true;
+ }
+ return false; // no matches
+}
+
+template<typename T>
+inline bool FilterBase<T>::CompareSingleHelper(const T& lhs) const
+{
+ switch(cmp_) {
+ case Compare::EQUAL: return lhs == value_;
+ case Compare::LESS_THAN: return lhs < value_;
+ case Compare::LESS_THAN_EQUAL: return lhs <= value_;
+ case Compare::GREATER_THAN: return lhs > value_;
+ case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
+ case Compare::NOT_EQUAL: return lhs != value_;
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported compare type requested");
+ }
+}
+
+template<>
+inline bool FilterBase<LocalContextFlags>::CompareSingleHelper(const LocalContextFlags& lhs) const
+{
+ switch(cmp_) {
+ case Compare::EQUAL: return lhs == value_;
+ case Compare::LESS_THAN: return lhs < value_;
+ case Compare::LESS_THAN_EQUAL: return lhs <= value_;
+ case Compare::GREATER_THAN: return lhs > value_;
+ case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
+ case Compare::NOT_EQUAL: return lhs != value_;
+ case Compare::CONTAINS: return ((lhs & value_) != 0);
+ case Compare::NOT_CONTAINS: return ((lhs & value_) == 0);
+
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported compare type requested");
+ }
+}
+
+// BarcodeDataFilterBase
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(const T& value, const Compare::Type cmp)
+ : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(T&& value, const Compare::Type cmp)
+ : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(const std::vector<T>& values)
+ : FilterBase<T>(values)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T>&& values)
+ : FilterBase<T>(std::move(values))
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline bool BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawBarcodeData& barcodeData = idx.BarcodeData();
+ switch (field) {
+ case BarcodeLookupData::BC_FORWARD: return FilterBase<T>::CompareHelper(barcodeData.bcForward_.at(row));
+ case BarcodeLookupData::BC_REVERSE: return FilterBase<T>::CompareHelper(barcodeData.bcReverse_.at(row));
+ case BarcodeLookupData::BC_QUALITY: return FilterBase<T>::CompareHelper(barcodeData.bcQual_.at(row));
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported BarcodeData field requested");
+ }
+}
+
+// BasicDataFilterBase
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(const T& value, const Compare::Type cmp)
+ : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(T&& value, const Compare::Type cmp)
+ : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(const std::vector<T>& values)
+ : FilterBase<T>(values)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T>&& values)
+ : FilterBase<T>(std::move(values))
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline bool BasicDataFilterBase<T, field>::BasicDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawBasicData& basicData = idx.BasicData();
+ switch (field) {
+ case BasicLookupData::RG_ID: return FilterBase<T>::CompareHelper(basicData.rgId_.at(row));
+ case BasicLookupData::Q_START: return FilterBase<T>::CompareHelper(basicData.qStart_.at(row));
+ case BasicLookupData::Q_END: return FilterBase<T>::CompareHelper(basicData.qEnd_.at(row));
+ case BasicLookupData::ZMW: return FilterBase<T>::CompareHelper(basicData.holeNumber_.at(row));
+ case BasicLookupData::READ_QUALITY: return FilterBase<T>::CompareHelper(basicData.readQual_.at(row));
+ // BasicLookupData::CONTEXT_FLAG has its own specialization
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported BasicData field requested");
+ }
+}
+
+// this typedef exists purely so that the next method signature isn't 2 screen widths long
+typedef BasicDataFilterBase<LocalContextFlags, BasicLookupData::CONTEXT_FLAG> LocalContextFilter__;
+
+template<>
+inline bool LocalContextFilter__::BasicDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawBasicData& basicData = idx.BasicData();
+ const LocalContextFlags rowFlags = static_cast<LocalContextFlags>(basicData.ctxtFlag_.at(row));
+ return FilterBase<LocalContextFlags>::CompareHelper(rowFlags);
+}
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(const T& value, const Compare::Type cmp)
+ : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(T&& value, const Compare::Type cmp)
+ : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(const std::vector<T>& values)
+ : FilterBase<T>(values)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T>&& values)
+ : FilterBase<T>(std::move(values))
+{ }
+
+template<>
+inline bool MappedDataFilterBase<Strand, MappedLookupData::STRAND>::MappedDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawMappedData& mappedData = idx.MappedData();
+ const Strand strand = (mappedData.revStrand_.at(row) == 1 ? Strand::REVERSE : Strand::FORWARD);
+ return FilterBase<Strand>::CompareHelper(strand);
+}
+
+template<typename T, MappedLookupData::Field field>
+inline bool MappedDataFilterBase<T, field>::MappedDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawMappedData& mappedData = idx.MappedData();
+ switch (field) {
+ case MappedLookupData::T_ID: return FilterBase<T>::CompareHelper(mappedData.tId_.at(row));
+ case MappedLookupData::T_START: return FilterBase<T>::CompareHelper(mappedData.tStart_.at(row));
+ case MappedLookupData::T_END: return FilterBase<T>::CompareHelper(mappedData.tEnd_.at(row));
+ case MappedLookupData::A_START: return FilterBase<T>::CompareHelper(mappedData.aStart_.at(row));
+ case MappedLookupData::A_END: return FilterBase<T>::CompareHelper(mappedData.aEnd_.at(row));
+ case MappedLookupData::N_M: return FilterBase<T>::CompareHelper(mappedData.nM_.at(row));
+ case MappedLookupData::N_MM: return FilterBase<T>::CompareHelper(mappedData.nMM_.at(row));
+ case MappedLookupData::N_DEL: return FilterBase<T>::CompareHelper(mappedData.NumDeletedBasesAt(row));
+ case MappedLookupData::N_INS: return FilterBase<T>::CompareHelper(mappedData.NumInsertedBasesAt(row));
+ case MappedLookupData::MAP_QUALITY: return FilterBase<T>::CompareHelper(mappedData.mapQV_.at(row));
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported MappedData field requested");
+ }
+}
+
+} // namespace internal
+
+// PbiAlignedEndFilter
+
+inline PbiAlignedEndFilter::PbiAlignedEndFilter(const uint32_t position, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_END>(position, cmp)
+{ }
+
+// PbiAlignedLengthFilter
+
+inline PbiAlignedLengthFilter::PbiAlignedLengthFilter(const uint32_t length, const Compare::Type cmp)
+ : internal::FilterBase<uint32_t>(length, cmp)
+{ }
+
+// PbiAlignedStartFilter
+
+inline PbiAlignedStartFilter::PbiAlignedStartFilter(const uint32_t position, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_START>(position, cmp)
+{ }
+
+// PbiAlignedStrandFilter
+
+inline PbiAlignedStrandFilter::PbiAlignedStrandFilter(const Strand strand, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<Strand, MappedLookupData::STRAND>(strand, cmp)
+{
+ if (cmp != Compare::EQUAL && cmp != Compare::NOT_EQUAL) {
+ auto msg = std::string{ "Compare type: " };
+ msg += Compare::TypeToName(cmp);
+ msg += " not supported for PbiAlignedStrandFilter (use one of Compare::EQUAL or Compare::NOT_EQUAL).";
+ throw std::runtime_error(msg);
+ }
+}
+
+// PbiBarcodeFilter
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(const uint16_t barcode, const Compare::Type cmp)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{barcode,cmp},
+ PbiBarcodeReverseFilter{barcode,cmp}
+ })
+ }
+{ }
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(const std::vector<uint16_t> &whitelist)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{whitelist},
+ PbiBarcodeReverseFilter{whitelist}
+ })
+ }
+{ }
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<uint16_t> &&whitelist)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist)},
+ PbiBarcodeReverseFilter{std::move(whitelist)}
+ })
+ }
+{ }
+
+inline bool PbiBarcodeFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiBarcodeForwardFilter
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const uint16_t bcFwdId, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>(bcFwdId, cmp)
+{ }
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const std::vector<uint16_t>& whitelist)
+ : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>(whitelist)
+{ }
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<uint16_t>&& whitelist)
+ : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_FORWARD>(std::move(whitelist))
+{ }
+
+// PbiBarcodeQualityFilter
+
+inline PbiBarcodeQualityFilter::PbiBarcodeQualityFilter(const uint8_t bcQuality, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<uint8_t, BarcodeLookupData::BC_QUALITY>(bcQuality, cmp)
+{ }
+
+// PbiBarcodeReverseFilter
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const uint16_t bcRevId, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>(bcRevId, cmp)
+{ }
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const std::vector<uint16_t>& whitelist)
+ : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>(whitelist)
+{ }
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<uint16_t>&& whitelist)
+ : internal::BarcodeDataFilterBase<uint16_t, BarcodeLookupData::BC_REVERSE>(std::move(whitelist))
+{ }
+
+// PbiBarcodesFilter
+
+inline PbiBarcodesFilter::PbiBarcodesFilter(const std::pair<uint16_t, uint16_t> barcodes, const Compare::Type cmp)
+ : PbiBarcodesFilter(barcodes.first, barcodes.second, cmp)
+{ }
+
+inline PbiBarcodesFilter::PbiBarcodesFilter(const uint16_t bcForward, const uint16_t bcReverse, const Compare::Type cmp)
+ : compositeFilter_{ PbiFilter::Intersection({ PbiBarcodeForwardFilter{bcForward,cmp},
+ PbiBarcodeReverseFilter{bcReverse,cmp}
+ })
+ }
+{ }
+
+inline bool PbiBarcodesFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiIdentityFilter
+
+inline PbiIdentityFilter::PbiIdentityFilter(const float identity,
+ const Compare::Type cmp)
+ : internal::FilterBase<float>(identity, cmp)
+{ }
+
+// PbiLocalContextFilter
+
+inline PbiLocalContextFilter::PbiLocalContextFilter(const LocalContextFlags& flags,
+ const Compare::Type cmp)
+ : internal::BasicDataFilterBase<LocalContextFlags, BasicLookupData::CONTEXT_FLAG>(flags, cmp)
+{ }
+
+// PbiMapQualityFilter
+
+inline PbiMapQualityFilter::PbiMapQualityFilter(const uint8_t mapQual, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint8_t, MappedLookupData::MAP_QUALITY>(mapQual, cmp)
+{ }
+
+// PbiMovieNameFilter
+
+inline bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiNumDeletedBasesFilter
+
+inline PbiNumDeletedBasesFilter::PbiNumDeletedBasesFilter(const size_t numDeletions, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_DEL>(numDeletions, cmp)
+{ }
+
+// PbiNumInsertedBasesFilter
+
+inline PbiNumInsertedBasesFilter::PbiNumInsertedBasesFilter(const size_t numInsertions, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_INS>(numInsertions, cmp)
+{ }
+
+// PbiNumMatchesFilter
+
+inline PbiNumMatchesFilter::PbiNumMatchesFilter(const size_t numMatchedBases, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_M>(numMatchedBases, cmp)
+{ }
+
+// PbiNumMismatchesFilter
+
+inline PbiNumMismatchesFilter::PbiNumMismatchesFilter(const size_t numMismatchedBases, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_MM>(numMismatchedBases, cmp)
+{ }
+
+// PbiQueryEndFilter
+
+inline PbiQueryEndFilter::PbiQueryEndFilter(const int32_t position, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_END>(position, cmp)
+{ }
+
+// PbiQueryLengthFilter
+
+inline PbiQueryLengthFilter::PbiQueryLengthFilter(const int32_t length, const Compare::Type cmp)
+ : internal::FilterBase<int32_t>(length, cmp)
+{ }
+
+// PbiQueryNameFilter
+
+inline bool PbiQueryNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiQueryStartFilter
+
+inline PbiQueryStartFilter::PbiQueryStartFilter(const int32_t position, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_START>(position, cmp)
+{ }
+
+// PbiReadAccuracyFilter
+
+inline PbiReadAccuracyFilter::PbiReadAccuracyFilter(const Accuracy accuracy, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<Accuracy, BasicLookupData::READ_QUALITY>(accuracy, cmp)
+{ }
+
+// PbiReadGroupFilter
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const int32_t rgId, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(rgId, cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::string rgId, const Compare::Type cmp)
+ : PbiReadGroupFilter(ReadGroupInfo::IdToInt(rgId), cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Compare::Type cmp)
+ : PbiReadGroupFilter(rg.Id(), cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<int32_t>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(whitelist)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::move(whitelist))
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (const auto& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<std::string>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (auto&& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (const auto& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg.Id()));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<ReadGroupInfo>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (auto&& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg.Id()));
+}
+
+// PbiReferenceEndFilter
+
+inline PbiReferenceEndFilter::PbiReferenceEndFilter(const uint32_t tEnd, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_END>(tEnd, cmp)
+{ }
+
+// PbiReferenceIdFilter
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(const int32_t tId, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(tId, cmp)
+{ }
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(const std::vector<int32_t>& whitelist)
+ : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(whitelist)
+{ }
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t>&& whitelist)
+ : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(std::move(whitelist))
+{ }
+
+// PbiReferenceStartFilter
+
+inline PbiReferenceStartFilter::PbiReferenceStartFilter(const uint32_t tStart, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_START>(tStart, cmp)
+{ }
+
+// PbiZmwFilter
+
+inline PbiZmwFilter::PbiZmwFilter(const int32_t zmw, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(zmw, cmp)
+{ }
+
+inline PbiZmwFilter::PbiZmwFilter(const std::vector<int32_t>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(whitelist)
+{ }
+
+inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(std::move(whitelist))
+{ }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiIndex.inl b/include/pbbam/internal/PbiIndex.inl
new file mode 100644
index 0000000..ca4c4ce
--- /dev/null
+++ b/include/pbbam/internal/PbiIndex.inl
@@ -0,0 +1,165 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndex.inl
+/// \brief Inline implementations for the PbiIndex class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/PbiFile.h"
+#include "pbbam/PbiIndex.h"
+#include "pbbam/PbiRawData.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// --------------------------
+// Pbi Lookup Aggregate
+// --------------------------
+
+class PbiIndexPrivate
+{
+public:
+ PbiIndexPrivate(void);
+ PbiIndexPrivate(const PbiRawData& rawIndex);
+ PbiIndexPrivate(PbiRawData&& rawIndex);
+
+ std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
+
+public:
+ bool HasSection(const PbiFile::Section flag) const;
+ void SetSection(const PbiFile::Section flag, bool ok = true);
+
+public:
+ IndexResultBlocks LookupReference(const int32_t tId) const;
+
+private:
+ IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
+
+public:
+ std::string filename_;
+ PbiFile::VersionEnum version_;
+ PbiFile::Sections sections_;
+ uint32_t numReads_;
+
+ // lookup structures
+ BasicLookupData basicData_;
+ MappedLookupData mappedData_;
+ ReferenceLookupData referenceData_;
+ BarcodeLookupData barcodeData_;
+
+private:
+ // not-implemented - ensure no copy
+ PbiIndexPrivate(const PbiIndexPrivate& other);
+ PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
+};
+
+inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
+{ return (sections_ & flag) != 0; }
+
+inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
+{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
+
+inline IndexResultBlocks
+PbiIndexPrivate::LookupReference(const int32_t tId) const
+{
+ if (!HasSection(PbiFile::REFERENCE))
+ return IndexResultBlocks{ };
+
+ const auto& indexRange = referenceData_.Indices(tId);
+ if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
+ return IndexResultBlocks{ };
+ const auto numReads = indexRange.second - indexRange.first;
+ auto blocks = IndexResultBlocks{ IndexResultBlock(indexRange.first, numReads) };
+ basicData_.ApplyOffsets(blocks);
+ return blocks;
+}
+
+inline IndexResultBlocks
+PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
+{
+ auto blocks = mergedIndexBlocks(indices);
+ basicData_.ApplyOffsets(blocks);
+ return blocks;
+}
+
+} // namespace internal
+
+inline PbiFile::Sections PbiIndex::FileSections(void) const
+{ return d_->sections_; }
+
+inline bool PbiIndex::HasBarcodeData(void) const
+{ return d_->HasSection(PbiFile::BARCODE); }
+
+inline bool PbiIndex::HasMappedData(void) const
+{ return d_->HasSection(PbiFile::MAPPED); }
+
+inline bool PbiIndex::HasReferenceData(void) const
+{ return d_->HasSection(PbiFile::REFERENCE); }
+
+inline bool PbiIndex::HasSection(const PbiFile::Section section) const
+{ return d_->HasSection(section); }
+
+inline uint32_t PbiIndex::NumReads(void) const
+{ return d_->numReads_; }
+
+inline PbiFile::VersionEnum PbiIndex::Version(void) const
+{ return d_->version_; }
+
+inline const BarcodeLookupData& PbiIndex::BarcodeData(void) const
+{ return d_->barcodeData_; }
+
+inline const BasicLookupData& PbiIndex::BasicData(void) const
+{ return d_->basicData_; }
+
+inline const MappedLookupData& PbiIndex::MappedData(void) const
+{ return d_->mappedData_; }
+
+inline const ReferenceLookupData& PbiIndex::ReferenceData(void) const
+{ return d_->referenceData_; }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiIndex_p.h b/include/pbbam/internal/PbiIndex_p.h
deleted file mode 100644
index 5f0db35..0000000
--- a/include/pbbam/internal/PbiIndex_p.h
+++ /dev/null
@@ -1,931 +0,0 @@
-//// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-////
-//// All rights reserved.
-////
-//// Redistribution and use in source and binary forms, with or without
-//// modification, are permitted (subject to the limitations in the
-//// disclaimer below) provided that the following conditions are met:
-////
-//// * Redistributions of source code must retain the above copyright
-//// notice, this list of conditions and the following disclaimer.
-////
-//// * Redistributions in binary form must reproduce the above
-//// copyright notice, this list of conditions and the following
-//// disclaimer in the documentation and/or other materials provided
-//// with the distribution.
-////
-//// * Neither the name of Pacific Biosciences nor the names of its
-//// contributors may be used to endorse or promote products derived
-//// from this software without specific prior written permission.
-////
-//// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-//// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-//// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-//// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-//// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-//// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-//// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-//// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-//// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-//// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-//// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-//// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-//// SUCH DAMAGE.
-////
-//// Author: Derek Barnett
-
-//#ifndef PBIINDEX_P_H
-//#define PBIINDEX_P_H
-
-//#include "pbbam/BamRecord.h"
-//#include "pbbam/PbiFile.h"
-//#include "pbbam/PbiIndex.h"
-//#include "pbbam/PbiRawData.h"
-
-//#include <algorithm>
-//#include <map>
-//#include <memory>
-//#include <unordered_map>
-//#include <utility>
-//#include <vector>
-//#include <cassert>
-
-//namespace PacBio {
-//namespace BAM {
-//namespace internal {
-
-//// --------------------------
-//// Ordered Lookup Container (e.g. map)
-//// --------------------------
-
-//template<typename T>
-//class OrderedLookup
-//{
-//public:
-// typedef T KeyType;
-// typedef IndexList ValueType;
-// typedef std::map<KeyType, ValueType> ContainerType;
-// typedef typename ContainerType::const_iterator IterType;
-
-//public:
-// OrderedLookup(void);
-// OrderedLookup(size_t n);
-// OrderedLookup(const ContainerType& data);
-// OrderedLookup(ContainerType&& data);
-// OrderedLookup(const std::vector<T>& rawData);
-// OrderedLookup(std::vector<T>&& rawData);
-
-//public:
-// bool operator==(const OrderedLookup<T>& other) const;
-// bool operator!=(const OrderedLookup<T>& other) const;
-
-//public:
-// IndexList LookupIndices(const KeyType& key,
-// const CompareType& compare) const;
-
-//private:
-// IndexList LookupInclusiveRange(const IterType& begin,
-// const IterType& end) const;
-
-// IndexList LookupExclusiveRange(const IterType& begin,
-// const IterType& end,
-// const KeyType& key) const;
-
-//private:
-// ContainerType data_;
-//};
-
-//// --------------------------
-//// Unordered Lookup Container (e.g. hash)
-//// --------------------------
-
-//template<typename T>
-//class UnorderedLookup
-//{
-//public:
-// typedef T KeyType;
-// typedef IndexList ValueType;
-// typedef std::unordered_map<KeyType, ValueType> ContainerType;
-
-//public:
-// UnorderedLookup(void);
-// UnorderedLookup(size_t n);
-// UnorderedLookup(const ContainerType& data);
-// UnorderedLookup(ContainerType&& data);
-// UnorderedLookup(const std::vector<T>& rawData);
-// UnorderedLookup(std::vector<T>&& rawData);
-
-//public:
-// bool operator==(const UnorderedLookup<T>& other) const;
-// bool operator!=(const UnorderedLookup<T>& other) const;
-
-//public:
-// IndexList LookupIndices(const KeyType& key,
-// const CompareType& compare) const;
-
-//private:
-// template<typename Compare>
-// IndexList LookupHelper(const KeyType& key, const Compare& cmp) const;
-
-//private:
-// ContainerType data_;
-//};
-
-//// ----------------
-//// Subread Data
-//// ----------------
-
-//struct SubreadLookupData
-//{
-// // ctors
-// SubreadLookupData(void);
-// SubreadLookupData(const PbiRawSubreadData& rawData);
-//// SubreadLookupData(PbiRawSubreadData&& rawData);
-
-// // add offset data to index result blocks
-// void ApplyOffsets(IndexResultBlocks& blocks) const;
-
-// template<typename T>
-// IndexList Indices(const SubreadField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexList IndicesMulti(const SubreadField& field,
-// const std::vector<T>& values) const;
-
-// // map ordering doesn't make sense, optimize for direct lookup
-// UnorderedLookup<int32_t> rgId_;
-
-// // numeric comparisons make sense, keep key ordering preserved
-// OrderedLookup<int32_t> qStart_;
-// OrderedLookup<int32_t> qEnd_;
-// OrderedLookup<int32_t> holeNumber_;
-// OrderedLookup<uint16_t> readQual_;
-
-// // offsets
-// std::vector<int64_t> fileOffset_;
-//};
-
-//// -----------------
-//// Mapped Data
-//// -----------------
-
-//struct MappedLookupData
-//{
-// // ctors
-// MappedLookupData(void);
-// MappedLookupData(const PbiRawMappedData& rawData);
-//// MappedLookupData(PbiRawMappedData&& rawData);
-
-// template<typename T>
-// IndexList Indices(const MappedField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexList IndicesMulti(const MappedField& field,
-// const std::vector<T>& values) const;
-
-// // numeric comparisons make sense, keep key ordering preserved
-// OrderedLookup<int32_t> tId_;
-// OrderedLookup<uint32_t> tStart_;
-// OrderedLookup<uint32_t> tEnd_;
-// OrderedLookup<uint32_t> aStart_;
-// OrderedLookup<uint32_t> aEnd_;
-// OrderedLookup<uint32_t> nM_;
-// OrderedLookup<uint32_t> nMM_;
-// OrderedLookup<uint8_t> mapQV_;
-
-// // generated, not stored in PBI
-// OrderedLookup<uint32_t> nIns_;
-// OrderedLookup<uint32_t> nDel_;
-
-// // no need for map overhead, just store direct indices
-// IndexList reverseStrand_;
-// IndexList forwardStrand_;
-//};
-
-//// ------------------
-//// Reference Data
-//// ------------------
-
-//struct ReferenceLookupData
-//{
-// // ctors
-// ReferenceLookupData(void);
-// ReferenceLookupData(const PbiRawReferenceData& rawData);
-//// ReferenceLookupData(PbiRawReferenceData&& rawData);
-
-// IndexRange Indices(const int32_t tId) const;
-
-// // references_[tId] = (begin, end) indices
-// // into SubreadLookupData::fileOffset_
-// std::unordered_map<int32_t, IndexRange> references_;
-//};
-
-//// ---------------
-//// Barcode Data
-//// ---------------
-
-//struct BarcodeLookupData
-//{
-// // ctors
-// BarcodeLookupData(void);
-// BarcodeLookupData(const PbiRawBarcodeData& rawData);
-//// BarcodeLookupData(PbiRawBarcodeData&& rawData);
-
-// template<typename T>
-// IndexList Indices(const BarcodeField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexList IndicesMulti(const BarcodeField& field,
-// const std::vector<T>& values) const;
-
-// // numeric comparisons make sense, keep key ordering preserved
-// OrderedLookup<uint16_t> bcLeft_;
-// OrderedLookup<uint16_t> bcRight_;
-// OrderedLookup<uint8_t> bcQual_;
-
-// // see if this works, or if can use unordered, 'direct' query
-// OrderedLookup<uint8_t> ctxtFlag_;
-//};
-
-//// --------------------------
-//// Pbi Lookup Aggregate
-//// --------------------------
-
-//class PbiIndexPrivate
-//{
-//public:
-// PbiIndexPrivate(void);
-// PbiIndexPrivate(const PbiRawData& rawIndex);
-// PbiIndexPrivate(PbiRawData&& rawIndex);
-
-// std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
-
-//public:
-// bool HasSection(const PbiFile::Section flag) const;
-// void SetSection(const PbiFile::Section flag, bool ok = true);
-
-//public:
-
-// template<typename T>
-// IndexList Indices(const SubreadField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexList Indices(const MappedField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexList Indices(const BarcodeField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexList IndicesMulti(const SubreadField& field,
-// const T& value) const;
-
-// template<typename T>
-// IndexList IndicesMulti(const MappedField& field,
-// const T& value) const;
-
-// template<typename T>
-// IndexList IndicesMulti(const BarcodeField& field,
-// const T& value) const;
-
-// template<typename T>
-// IndexResultBlocks Lookup(const SubreadField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexResultBlocks Lookup(const MappedField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexResultBlocks Lookup(const BarcodeField& field,
-// const T& value,
-// const CompareType& compareType) const;
-
-// template<typename T>
-// IndexResultBlocks LookupMulti(const SubreadField& field,
-// const std::vector<T>& values) const;
-
-// template<typename T>
-// IndexResultBlocks LookupMulti(const MappedField& field,
-// const std::vector<T>& values) const;
-
-// template<typename T>
-// IndexResultBlocks LookupMulti(const BarcodeField& field,
-// const std::vector<T>& values) const;
-
-// IndexResultBlocks LookupReference(const int32_t tId) const;
-
-//private:
-// IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
-
-//public:
-// PbiFile::VersionEnum version_;
-// PbiFile::Sections sections_;
-// uint32_t numReads_;
-
-// // lookup structures
-// SubreadLookupData subreadData_;
-// MappedLookupData mappedData_;
-// ReferenceLookupData referenceData_;
-// BarcodeLookupData barcodeData_;
-
-//private:
-// // not-implemented - ensure no copy
-// PbiIndexPrivate(const PbiIndexPrivate& other);
-// PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
-//};
-
-//// ----------------
-//// helper methods
-//// ----------------
-
-//inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
-//{
-// if (indices.empty())
-// return IndexResultBlocks();
-// std::sort(indices.begin(), indices.end());
-
-// IndexResultBlocks result;
-// result.push_back(IndexResultBlock(indices.at(0), 1));
-// const size_t numIndices = indices.size();
-// for (size_t i = 1; i < numIndices; ++i) {
-// if (indices.at(i) == indices.at(i-1)+1)
-// ++result.back().numReads_;
-// else
-// result.push_back(IndexResultBlock(indices.at(i), 1));
-// }
-// return result;
-//}
-
-//inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
-//{
-// IndexList copy = indices;
-// return mergedIndexBlocks(std::move(copy));
-//}
-
-//inline size_t nullIndex(void)
-//{ return static_cast<size_t>(-1); }
-
-//inline
-//void pushBackIndices(IndexList& result,
-// const IndexList& toAppend)
-//{
-// result.reserve(result.size() + toAppend.size());
-// for (auto element : toAppend)
-// result.push_back(element);
-//}
-
-//// -----------------
-//// OrderedLookup
-//// -----------------
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(void) { }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(size_t n)
-//{ data_.reserve(n); }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(const ContainerType& data)
-// : data_(data)
-//{ }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(ContainerType&& data)
-// : data_(std::move(data))
-//{ }
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
-//{
-// const size_t numElements = rawData.size();
-// for (size_t i = 0; i < numElements; ++i)
-// data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
-//{
-// const size_t numElements = rawData.size();
-// for (size_t i = 0; i < numElements; ++i)
-// data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
-//{ return data_ == other.data_; }
-
-//template<typename T>
-//inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
-//{ return !(*this == other); }
-
-//template<typename T>
-//inline IndexList
-//OrderedLookup<T>::LookupInclusiveRange(const IterType& begin,
-// const IterType& end) const
-//{
-// IndexList result;
-// for ( auto iter = begin; iter != end; ++iter )
-// pushBackIndices(result, iter->second);
-// std::sort(result.begin(), result.end());
-// return result;
-//}
-
-//template<typename T>
-//inline IndexList
-//OrderedLookup<T>::LookupExclusiveRange(const IterType& begin,
-// const IterType& end,
-// const KeyType& key) const
-//{
-// IndexList result;
-// for ( auto iter = begin; iter != end; ++iter ) {
-// if (iter->first != key)
-// pushBackIndices(result, iter->second);
-// }
-// std::sort(result.begin(), result.end());
-// return result;
-//}
-
-//template<typename T>
-//inline IndexList
-//OrderedLookup<T>::LookupIndices(const OrderedLookup::KeyType& key,
-// const CompareType& compare) const
-//{
-// const IterType begin = data_.cbegin();
-// const IterType end = data_.cend();
-// switch(compare)
-// {
-// case CompareType::EQUAL:
-// {
-// const auto found = data_.find(key);
-// if (found != end)
-// return found->second;
-// return IndexList();
-// }
-// case CompareType::LESS_THAN: return LookupExclusiveRange(begin, data_.upper_bound(key), key);
-// case CompareType::LESS_THAN_EQUAL: return LookupInclusiveRange(begin, data_.upper_bound(key));
-// case CompareType::GREATER_THAN: return LookupExclusiveRange(data_.lower_bound(key), end, key);
-// case CompareType::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
-// case CompareType::NOT_EQUAL: return LookupExclusiveRange(begin, end, key);
-// default:
-// assert(false);
-// }
-// return IndexList();
-//}
-
-//// -----------------
-//// UnorderedLookup
-//// -----------------
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(void) { }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(size_t n)
-//{ data_.reserve(n); }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(const ContainerType& data)
-// : data_(data)
-//{ }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(ContainerType&& data)
-// : data_(std::move(data))
-//{ }
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T> &rawData)
-//{
-// const size_t numElements = rawData.size();
-// for (size_t i = 0; i < numElements; ++i)
-// data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
-//{
-// const size_t numElements = rawData.size();
-// for (size_t i = 0; i < numElements; ++i)
-// data_[ rawData.at(i) ].push_back(i);
-//}
-
-//template<typename T>
-//inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
-//{ return data_ == other.data_; }
-
-//template<typename T>
-//inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
-//{ return !(*this == other); }
-
-//template<typename T>
-//template<typename Compare>
-//inline IndexList
-//UnorderedLookup<T>::LookupHelper(const UnorderedLookup::KeyType& key,
-// const Compare& cmp) const
-//{
-// auto iter = data_.cbegin();
-// const auto end = data_.cend();
-// IndexList result; // init with some avg size ??
-// for ( ; iter != end; ++iter ) {
-// const auto e = (iter->first);
-// if (cmp(e, key))
-// pushBackIndices(result, iter->second);
-// }
-// std::sort(result.begin(), result.end());
-// return result;
-//}
-
-//template<typename T>
-//inline IndexList
-//UnorderedLookup<T>::LookupIndices(const UnorderedLookup::KeyType& key,
-// const CompareType& compare) const
-//{
-// switch (compare) {
-// case CompareType::EQUAL:
-// {
-// const auto found = data_.find(key);
-// if (found != data_.cend())
-// return found->second;
-// else
-// return IndexList();
-// }
-// case CompareType::LESS_THAN: return LookupHelper(key, std::less<KeyType>());
-// case CompareType::LESS_THAN_EQUAL: return LookupHelper(key, std::less_equal<KeyType>());
-// case CompareType::GREATER_THAN: return LookupHelper(key, std::greater<KeyType>());
-// case CompareType::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<KeyType>());
-// case CompareType::NOT_EQUAL: return LookupHelper(key, std::not_equal_to<KeyType>());
-// default:
-// assert(false);
-// }
-// return IndexList();
-//}
-
-//// -------------------
-//// SubreadLookupData
-//// -------------------
-
-//inline
-//void SubreadLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
-//{
-// for (IndexResultBlock& block : blocks)
-// block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
-//}
-
-//template<typename T>
-//inline IndexList SubreadLookupData::Indices(const SubreadField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{
-// switch(field) {
-// case SubreadField::RG_ID: return rgId_.LookupIndices(value, compareType);
-// case SubreadField::Q_START: return qStart_.LookupIndices(value, compareType);
-// case SubreadField::Q_END: return qEnd_.LookupIndices(value, compareType);
-// case SubreadField::ZMW: return holeNumber_.LookupIndices(value, compareType);
-// case SubreadField::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
-
-// case SubreadField::VIRTUAL_OFFSET : // fall-through, not supported this way
-// default:
-// assert(false);
-// }
-// return IndexList();
-//}
-
-//template<typename T>
-//inline IndexList SubreadLookupData::IndicesMulti(const SubreadField& field,
-// const std::vector<T>& values) const
-//{
-// IndexList result;
-// for (auto value : values) {
-// const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-// result.reserve(result.size() + valueIndices.size());
-// for (auto i : valueIndices)
-// result.push_back(i);
-// }
-// return result;
-//}
-
-//// -------------------
-//// MappedLookupData
-//// -------------------
-
-//template<typename T>
-//inline IndexList MappedLookupData::Indices(const MappedField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{
-// switch(field) {
-// case MappedField::T_ID: return tId_.LookupIndices(value, compareType);
-// case MappedField::T_START: return tStart_.LookupIndices(value, compareType);
-// case MappedField::T_END: return tEnd_.LookupIndices(value, compareType);
-// case MappedField::A_START: return aStart_.LookupIndices(value, compareType);
-// case MappedField::A_END: return aEnd_.LookupIndices(value, compareType);
-// case MappedField::N_M: return nM_.LookupIndices(value, compareType);
-// case MappedField::N_MM: return nM_.LookupIndices(value, compareType);
-// case MappedField::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
-
-// // MappedField::STRAND has its own specialization
-
-// default:
-// assert(false);
-// }
-// return IndexList();
-//}
-
-//template<>
-//inline IndexList MappedLookupData::Indices(const MappedField& field,
-// const Strand& strand,
-// const CompareType& compareType) const
-//{
-// assert(field == MappedField::STRAND);
-
-// if (compareType == CompareType::EQUAL) {
-// if (strand == Strand::FORWARD)
-// return forwardStrand_;
-// else
-// return reverseStrand_;
-// } else if (compareType == CompareType::NOT_EQUAL) {
-// if (strand == Strand::FORWARD)
-// return reverseStrand_;
-// else
-// return forwardStrand_;
-// }
-
-// // only EQUAL/NOT_EQUAL supported
-// assert(false);
-// return IndexList();
-//}
-
-//template<typename T>
-//inline IndexList MappedLookupData::IndicesMulti(const MappedField& field,
-// const std::vector<T>& values) const
-//{
-// IndexList result;
-// for (auto value : values) {
-// const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-// result.reserve(result.size() + valueIndices.size());
-// for (auto i : valueIndices)
-// result.push_back(i);
-// }
-// return result;
-//}
-
-
-//// ---------------------
-//// ReferenceLookupData
-//// ---------------------
-
-//inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
-//{
-// auto found = references_.find(tId);
-// if (found == references_.cend())
-// return IndexRange(nullIndex(), nullIndex());
-// return found->second;
-//}
-
-//// -------------------
-//// BarcodeLookupData
-//// -------------------
-
-//template<typename T>
-//inline IndexList BarcodeLookupData::Indices(const BarcodeField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{
-// switch(field) {
-// case BarcodeField::BC_LEFT: return bcLeft_.LookupIndices(value, compareType);
-// case BarcodeField::BC_RIGHT: return bcRight_.LookupIndices(value, compareType);
-// case BarcodeField::BC_QUALITY: return bcQual_.LookupIndices(value, compareType);
-// case BarcodeField::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
-// default:
-// assert(false);
-// }
-// return IndexList();
-//}
-
-//template<typename T>
-//inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeField& field,
-// const std::vector<T>& values) const
-//{
-// IndexList result;
-// for (auto value : values) {
-// const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
-// result.reserve(result.size() + valueIndices.size());
-// for (auto i : valueIndices)
-// result.push_back(i);
-// }
-// return result;
-//}
-
-
-//// -----------------
-//// PbiIndexPrivate
-//// -----------------
-
-//inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
-//{ return (sections_ & flag) != 0; }
-
-//inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
-//{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::Indices(const SubreadField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{ return subreadData_.Indices(field, value, compareType); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::Indices(const MappedField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{ return mappedData_.Indices(field, value, compareType); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::Indices(const BarcodeField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{ return barcodeData_.Indices(field, value, compareType); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::IndicesMulti(const SubreadField& field,
-// const T& value) const
-//{ return subreadData_.IndicesMulti(field, value); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::IndicesMulti(const MappedField& field,
-// const T& value) const
-//{ return mappedData_.IndicesMulti(field, value); }
-
-//template<typename T>
-//inline IndexList
-//PbiIndexPrivate::IndicesMulti(const BarcodeField& field,
-// const T& value) const
-//{ return barcodeData_.IndicesMulti(field, value); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::Lookup(const SubreadField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{ return MergeBlocksWithOffsets(subreadData_.Indices(field, value, compareType)); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::Lookup(const MappedField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{
-// if (!HasSection(PbiFile::MAPPED))
-// return IndexResultBlocks();
-// return MergeBlocksWithOffsets(mappedData_.Indices(field, value, compareType));
-//}
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::Lookup(const BarcodeField& field,
-// const T& value,
-// const CompareType& compareType) const
-//{
-// if (!HasSection(PbiFile::BARCODE))
-// return IndexResultBlocks();
-// return MergeBlocksWithOffsets(barcodeData_.Indices(field, value, compareType));
-//}
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupMulti(const SubreadField& field,
-// const std::vector<T>& values) const
-//{ return MergeBlocksWithOffsets(subreadData_.IndicesMulti(field, values)); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupMulti(const MappedField& field,
-// const std::vector<T>& values) const
-//{ return MergeBlocksWithOffsets(mappedData_.IndicesMulti(field, values)); }
-
-//template<typename T>
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupMulti(const BarcodeField& field,
-// const std::vector<T>& values) const
-//{ return MergeBlocksWithOffsets(barcodeData_.IndicesMulti(field, values)); }
-
-//inline IndexResultBlocks
-//PbiIndexPrivate::LookupReference(const int32_t tId) const
-//{
-// if (!HasSection(PbiFile::REFERENCE))
-// return IndexResultBlocks();
-// const IndexRange& indexRange = referenceData_.Indices(tId);
-// if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
-// return IndexResultBlocks();
-// const size_t numReads = indexRange.second - indexRange.first;
-// IndexResultBlocks blocks(1, IndexResultBlock(indexRange.first, numReads));
-// subreadData_.ApplyOffsets(blocks);
-// return blocks;
-//}
-
-//inline IndexResultBlocks
-//PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
-//{
-// IndexResultBlocks blocks = mergedIndexBlocks(indices);
-// subreadData_.ApplyOffsets(blocks);
-// return blocks;
-//}
-
-//} // namespace internal
-
-//template<typename FieldType, typename ValueType>
-//inline IndexRequestBase<FieldType, ValueType>::IndexRequestBase(const FieldType field,
-// const ValueType& value,
-// const CompareType compareType)
-// : field_(field)
-// , value_(value)
-// , compareType_(compareType)
-//{ }
-
-//template<typename FieldType, typename ValueType>
-//inline IndexMultiRequestBase<FieldType, ValueType>::IndexMultiRequestBase(const FieldType field,
-// const std::vector<ValueType>& values)
-// : field_(field)
-// , values_(values)
-//{ }
-
-//template<SubreadField field, typename ValueType>
-//inline SubreadIndexRequest<field, ValueType>::SubreadIndexRequest(const ValueType& value,
-// const CompareType& compareType)
-// : IndexRequestBase<SubreadField, ValueType>(field, value, compareType)
-//{ }
-
-//template<SubreadField field, typename ValueType>
-//inline SubreadIndexMultiRequest<field, ValueType>::SubreadIndexMultiRequest(const std::vector<ValueType>& values)
-// : IndexMultiRequestBase<SubreadField, ValueType>(field, values)
-//{ }
-
-//template<MappedField field, typename ValueType>
-//inline MappedIndexRequest<field, ValueType>::MappedIndexRequest(const ValueType& value,
-// const CompareType& compareType)
-// : IndexRequestBase<MappedField, ValueType>(field, value, compareType)
-//{ }
-
-//template<MappedField field, typename ValueType>
-//inline MappedIndexMultiRequest<field, ValueType>::MappedIndexMultiRequest(const std::vector<ValueType>& values)
-// : IndexMultiRequestBase<MappedField, ValueType>(field, values)
-//{ }
-
-//template<BarcodeField field, typename ValueType>
-//inline BarcodeIndexRequest<field, ValueType>::BarcodeIndexRequest(const ValueType& value,
-// const CompareType& compareType)
-// : IndexRequestBase<BarcodeField, ValueType>(field, value, compareType)
-//{ }
-
-//template<BarcodeField field, typename ValueType>
-//inline BarcodeIndexMultiRequest<field, ValueType>::BarcodeIndexMultiRequest(const std::vector<ValueType>& values)
-// : IndexMultiRequestBase<BarcodeField, ValueType>(field, values)
-//{ }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexList
-//PbiIndex::RawIndices(const IndexRequestBase<FieldType, ValueType>& request) const
-//{ return d_->Indices(request.field_, request.value_, request.compareType_); }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexList
-//PbiIndex::RawIndices(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-//{ return d_->Indices(request.field_, request.values_); }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexResultBlocks
-//PbiIndex::Lookup(const IndexRequestBase<FieldType, ValueType>& request) const
-//{ return d_->Lookup(request.field_, request.value_, request.compareType_); }
-
-//template <typename FieldType, typename ValueType>
-//inline IndexResultBlocks
-//PbiIndex::Lookup(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-//{ return d_->LookupMulti(request.field_, request.values_); }
-
-//inline IndexResultBlocks PbiIndex::LookupReference(const int32_t tId) const
-//{ return d_->LookupReference(tId); }
-
-//} // namespace BAM
-//} // namespace PacBio
-
-//#endif // PACBIOINDEX_P_H
diff --git a/include/pbbam/internal/PbiIndex_p.inl b/include/pbbam/internal/PbiIndex_p.inl
deleted file mode 100644
index 41dc831..0000000
--- a/include/pbbam/internal/PbiIndex_p.inl
+++ /dev/null
@@ -1,927 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-//
-// Author: Derek Barnett
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/PbiFile.h"
-#include "pbbam/PbiIndex.h"
-#include "pbbam/PbiRawData.h"
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-#include <cassert>
-
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-// --------------------------
-// Ordered Lookup Container (e.g. map)
-// --------------------------
-
-template<typename T>
-class OrderedLookup
-{
-public:
- typedef T KeyType;
- typedef IndexList ValueType;
- typedef std::map<KeyType, ValueType> ContainerType;
- typedef typename ContainerType::const_iterator IterType;
-
-public:
- OrderedLookup(void);
- OrderedLookup(size_t n);
- OrderedLookup(const ContainerType& data);
- OrderedLookup(ContainerType&& data);
- OrderedLookup(const std::vector<T>& rawData);
- OrderedLookup(std::vector<T>&& rawData);
-
-public:
- bool operator==(const OrderedLookup<T>& other) const;
- bool operator!=(const OrderedLookup<T>& other) const;
-
-public:
- IndexList LookupIndices(const KeyType& key,
- const CompareType& compare) const;
-
-private:
- IndexList LookupInclusiveRange(const IterType& begin,
- const IterType& end) const;
-
- IndexList LookupExclusiveRange(const IterType& begin,
- const IterType& end,
- const KeyType& key) const;
-
-private:
- ContainerType data_;
-};
-
-// --------------------------
-// Unordered Lookup Container (e.g. hash)
-// --------------------------
-
-template<typename T>
-class UnorderedLookup
-{
-public:
- typedef T KeyType;
- typedef IndexList ValueType;
- typedef std::unordered_map<KeyType, ValueType> ContainerType;
-
-public:
- UnorderedLookup(void);
- UnorderedLookup(size_t n);
- UnorderedLookup(const ContainerType& data);
- UnorderedLookup(ContainerType&& data);
- UnorderedLookup(const std::vector<T>& rawData);
- UnorderedLookup(std::vector<T>&& rawData);
-
-public:
- bool operator==(const UnorderedLookup<T>& other) const;
- bool operator!=(const UnorderedLookup<T>& other) const;
-
-public:
- IndexList LookupIndices(const KeyType& key,
- const CompareType& compare) const;
-
-private:
- template<typename Compare>
- IndexList LookupHelper(const KeyType& key, const Compare& cmp) const;
-
-private:
- ContainerType data_;
-};
-
-// ----------------
-// Subread Data
-// ----------------
-
-struct SubreadLookupData
-{
- // ctors
- SubreadLookupData(void);
- SubreadLookupData(const PbiRawSubreadData& rawData);
-// SubreadLookupData(PbiRawSubreadData&& rawData);
-
- // add offset data to index result blocks
- void ApplyOffsets(IndexResultBlocks& blocks) const;
-
- template<typename T>
- IndexList Indices(const SubreadField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexList IndicesMulti(const SubreadField& field,
- const std::vector<T>& values) const;
-
- // map ordering doesn't make sense, optimize for direct lookup
- UnorderedLookup<int32_t> rgId_;
-
- // numeric comparisons make sense, keep key ordering preserved
- OrderedLookup<int32_t> qStart_;
- OrderedLookup<int32_t> qEnd_;
- OrderedLookup<int32_t> holeNumber_;
- OrderedLookup<uint16_t> readQual_;
-
- // offsets
- std::vector<int64_t> fileOffset_;
-};
-
-// -----------------
-// Mapped Data
-// -----------------
-
-struct MappedLookupData
-{
- // ctors
- MappedLookupData(void);
- MappedLookupData(const PbiRawMappedData& rawData);
-// MappedLookupData(PbiRawMappedData&& rawData);
-
- template<typename T>
- IndexList Indices(const MappedField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexList IndicesMulti(const MappedField& field,
- const std::vector<T>& values) const;
-
- // numeric comparisons make sense, keep key ordering preserved
- OrderedLookup<int32_t> tId_;
- OrderedLookup<uint32_t> tStart_;
- OrderedLookup<uint32_t> tEnd_;
- OrderedLookup<uint32_t> aStart_;
- OrderedLookup<uint32_t> aEnd_;
- OrderedLookup<uint32_t> nM_;
- OrderedLookup<uint32_t> nMM_;
- OrderedLookup<uint8_t> mapQV_;
-
- // generated, not stored in PBI
- OrderedLookup<uint32_t> nIns_;
- OrderedLookup<uint32_t> nDel_;
-
- // no need for map overhead, just store direct indices
- IndexList reverseStrand_;
- IndexList forwardStrand_;
-};
-
-// ------------------
-// Reference Data
-// ------------------
-
-struct ReferenceLookupData
-{
- // ctors
- ReferenceLookupData(void);
- ReferenceLookupData(const PbiRawReferenceData& rawData);
-// ReferenceLookupData(PbiRawReferenceData&& rawData);
-
- IndexRange Indices(const int32_t tId) const;
-
- // references_[tId] = (begin, end) indices
- // into SubreadLookupData::fileOffset_
- std::unordered_map<int32_t, IndexRange> references_;
-};
-
-// ---------------
-// Barcode Data
-// ---------------
-
-struct BarcodeLookupData
-{
- // ctors
- BarcodeLookupData(void);
- BarcodeLookupData(const PbiRawBarcodeData& rawData);
-// BarcodeLookupData(PbiRawBarcodeData&& rawData);
-
- template<typename T>
- IndexList Indices(const BarcodeField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexList IndicesMulti(const BarcodeField& field,
- const std::vector<T>& values) const;
-
- // numeric comparisons make sense, keep key ordering preserved
- OrderedLookup<uint16_t> bcLeft_;
- OrderedLookup<uint16_t> bcRight_;
- OrderedLookup<uint8_t> bcQual_;
-
- // see if this works, or if can use unordered, 'direct' query
- OrderedLookup<uint8_t> ctxtFlag_;
-};
-
-// --------------------------
-// Pbi Lookup Aggregate
-// --------------------------
-
-class PbiIndexPrivate
-{
-public:
- PbiIndexPrivate(void);
- PbiIndexPrivate(const PbiRawData& rawIndex);
- PbiIndexPrivate(PbiRawData&& rawIndex);
-
- std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
-
-public:
- bool HasSection(const PbiFile::Section flag) const;
- void SetSection(const PbiFile::Section flag, bool ok = true);
-
-public:
-
- template<typename T>
- IndexList Indices(const SubreadField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexList Indices(const MappedField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexList Indices(const BarcodeField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexList IndicesMulti(const SubreadField& field,
- const T& value) const;
-
- template<typename T>
- IndexList IndicesMulti(const MappedField& field,
- const T& value) const;
-
- template<typename T>
- IndexList IndicesMulti(const BarcodeField& field,
- const T& value) const;
-
- template<typename T>
- IndexResultBlocks Lookup(const SubreadField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexResultBlocks Lookup(const MappedField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexResultBlocks Lookup(const BarcodeField& field,
- const T& value,
- const CompareType& compareType) const;
-
- template<typename T>
- IndexResultBlocks LookupMulti(const SubreadField& field,
- const std::vector<T>& values) const;
-
- template<typename T>
- IndexResultBlocks LookupMulti(const MappedField& field,
- const std::vector<T>& values) const;
-
- template<typename T>
- IndexResultBlocks LookupMulti(const BarcodeField& field,
- const std::vector<T>& values) const;
-
- IndexResultBlocks LookupReference(const int32_t tId) const;
-
-private:
- IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
-
-public:
- PbiFile::VersionEnum version_;
- PbiFile::Sections sections_;
- uint32_t numReads_;
-
- // lookup structures
- SubreadLookupData subreadData_;
- MappedLookupData mappedData_;
- ReferenceLookupData referenceData_;
- BarcodeLookupData barcodeData_;
-
-private:
- // not-implemented - ensure no copy
- PbiIndexPrivate(const PbiIndexPrivate& other);
- PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
-};
-
-// ----------------
-// helper methods
-// ----------------
-
-inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
-{
- if (indices.empty())
- return IndexResultBlocks();
- std::sort(indices.begin(), indices.end());
-
- IndexResultBlocks result;
- result.push_back(IndexResultBlock(indices.at(0), 1));
- const size_t numIndices = indices.size();
- for (size_t i = 1; i < numIndices; ++i) {
- if (indices.at(i) == indices.at(i-1)+1)
- ++result.back().numReads_;
- else
- result.push_back(IndexResultBlock(indices.at(i), 1));
- }
- return result;
-}
-
-inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
-{
- IndexList copy = indices;
- return mergedIndexBlocks(std::move(copy));
-}
-
-inline size_t nullIndex(void)
-{ return static_cast<size_t>(-1); }
-
-inline
-void pushBackIndices(IndexList& result,
- const IndexList& toAppend)
-{
- result.reserve(result.size() + toAppend.size());
- for (auto element : toAppend)
- result.push_back(element);
-}
-
-// -----------------
-// OrderedLookup
-// -----------------
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(void) { }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(size_t n)
-{ data_.reserve(n); }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(const ContainerType& data)
- : data_(data)
-{ }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(ContainerType&& data)
- : data_(std::move(data))
-{ }
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
-{
- const size_t numElements = rawData.size();
- for (size_t i = 0; i < numElements; ++i)
- data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
-{
- const size_t numElements = rawData.size();
- for (size_t i = 0; i < numElements; ++i)
- data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
-{ return data_ == other.data_; }
-
-template<typename T>
-inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
-{ return !(*this == other); }
-
-template<typename T>
-inline IndexList
-OrderedLookup<T>::LookupInclusiveRange(const IterType& begin,
- const IterType& end) const
-{
- IndexList result;
- for ( auto iter = begin; iter != end; ++iter )
- pushBackIndices(result, iter->second);
- std::sort(result.begin(), result.end());
- return result;
-}
-
-template<typename T>
-inline IndexList
-OrderedLookup<T>::LookupExclusiveRange(const IterType& begin,
- const IterType& end,
- const KeyType& key) const
-{
- IndexList result;
- for ( auto iter = begin; iter != end; ++iter ) {
- if (iter->first != key)
- pushBackIndices(result, iter->second);
- }
- std::sort(result.begin(), result.end());
- return result;
-}
-
-template<typename T>
-inline IndexList
-OrderedLookup<T>::LookupIndices(const OrderedLookup::KeyType& key,
- const CompareType& compare) const
-{
- const IterType begin = data_.cbegin();
- const IterType end = data_.cend();
- switch(compare)
- {
- case CompareType::EQUAL:
- {
- const auto found = data_.find(key);
- if (found != end)
- return found->second;
- return IndexList();
- }
- case CompareType::LESS_THAN: return LookupExclusiveRange(begin, data_.upper_bound(key), key);
- case CompareType::LESS_THAN_EQUAL: return LookupInclusiveRange(begin, data_.upper_bound(key));
- case CompareType::GREATER_THAN: return LookupExclusiveRange(data_.lower_bound(key), end, key);
- case CompareType::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
- case CompareType::NOT_EQUAL: return LookupExclusiveRange(begin, end, key);
- default:
- assert(false);
- }
- return IndexList();
-}
-
-// -----------------
-// UnorderedLookup
-// -----------------
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(void) { }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(size_t n)
-{ data_.reserve(n); }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(const ContainerType& data)
- : data_(data)
-{ }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(ContainerType&& data)
- : data_(std::move(data))
-{ }
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T> &rawData)
-{
- const size_t numElements = rawData.size();
- for (size_t i = 0; i < numElements; ++i)
- data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
-{
- const size_t numElements = rawData.size();
- for (size_t i = 0; i < numElements; ++i)
- data_[ rawData.at(i) ].push_back(i);
-}
-
-template<typename T>
-inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
-{ return data_ == other.data_; }
-
-template<typename T>
-inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
-{ return !(*this == other); }
-
-template<typename T>
-template<typename Compare>
-inline IndexList
-UnorderedLookup<T>::LookupHelper(const UnorderedLookup::KeyType& key,
- const Compare& cmp) const
-{
- auto iter = data_.cbegin();
- const auto end = data_.cend();
- IndexList result; // init with some avg size ??
- for ( ; iter != end; ++iter ) {
- const auto e = (iter->first);
- if (cmp(e, key))
- pushBackIndices(result, iter->second);
- }
- std::sort(result.begin(), result.end());
- return result;
-}
-
-template<typename T>
-inline IndexList
-UnorderedLookup<T>::LookupIndices(const UnorderedLookup::KeyType& key,
- const CompareType& compare) const
-{
- switch (compare) {
- case CompareType::EQUAL:
- {
- const auto found = data_.find(key);
- if (found != data_.cend())
- return found->second;
- else
- return IndexList();
- }
- case CompareType::LESS_THAN: return LookupHelper(key, std::less<KeyType>());
- case CompareType::LESS_THAN_EQUAL: return LookupHelper(key, std::less_equal<KeyType>());
- case CompareType::GREATER_THAN: return LookupHelper(key, std::greater<KeyType>());
- case CompareType::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<KeyType>());
- case CompareType::NOT_EQUAL: return LookupHelper(key, std::not_equal_to<KeyType>());
- default:
- assert(false);
- }
- return IndexList();
-}
-
-// -------------------
-// SubreadLookupData
-// -------------------
-
-inline
-void SubreadLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
-{
- for (IndexResultBlock& block : blocks)
- block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
-}
-
-template<typename T>
-inline IndexList SubreadLookupData::Indices(const SubreadField& field,
- const T& value,
- const CompareType& compareType) const
-{
- switch(field) {
- case SubreadField::RG_ID: return rgId_.LookupIndices(value, compareType);
- case SubreadField::Q_START: return qStart_.LookupIndices(value, compareType);
- case SubreadField::Q_END: return qEnd_.LookupIndices(value, compareType);
- case SubreadField::ZMW: return holeNumber_.LookupIndices(value, compareType);
- case SubreadField::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
-
- case SubreadField::VIRTUAL_OFFSET : // fall-through, not supported this way
- default:
- assert(false);
- }
- return IndexList();
-}
-
-template<typename T>
-inline IndexList SubreadLookupData::IndicesMulti(const SubreadField& field,
- const std::vector<T>& values) const
-{
- IndexList result;
- for (auto value : values) {
- const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
- result.reserve(result.size() + valueIndices.size());
- for (auto i : valueIndices)
- result.push_back(i);
- }
- return result;
-}
-
-// -------------------
-// MappedLookupData
-// -------------------
-
-template<typename T>
-inline IndexList MappedLookupData::Indices(const MappedField& field,
- const T& value,
- const CompareType& compareType) const
-{
- switch(field) {
- case MappedField::T_ID: return tId_.LookupIndices(value, compareType);
- case MappedField::T_START: return tStart_.LookupIndices(value, compareType);
- case MappedField::T_END: return tEnd_.LookupIndices(value, compareType);
- case MappedField::A_START: return aStart_.LookupIndices(value, compareType);
- case MappedField::A_END: return aEnd_.LookupIndices(value, compareType);
- case MappedField::N_M: return nM_.LookupIndices(value, compareType);
- case MappedField::N_MM: return nM_.LookupIndices(value, compareType);
- case MappedField::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
-
- // MappedField::STRAND has its own specialization
-
- default:
- assert(false);
- }
- return IndexList();
-}
-
-template<>
-inline IndexList MappedLookupData::Indices(const MappedField& field,
- const Strand& strand,
- const CompareType& compareType) const
-{
- assert(field == MappedField::STRAND);
-
- if (compareType == CompareType::EQUAL) {
- if (strand == Strand::FORWARD)
- return forwardStrand_;
- else
- return reverseStrand_;
- } else if (compareType == CompareType::NOT_EQUAL) {
- if (strand == Strand::FORWARD)
- return reverseStrand_;
- else
- return forwardStrand_;
- }
-
- // only EQUAL/NOT_EQUAL supported
- assert(false);
- return IndexList();
-}
-
-template<typename T>
-inline IndexList MappedLookupData::IndicesMulti(const MappedField& field,
- const std::vector<T>& values) const
-{
- IndexList result;
- for (auto value : values) {
- const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
- result.reserve(result.size() + valueIndices.size());
- for (auto i : valueIndices)
- result.push_back(i);
- }
- return result;
-}
-
-
-// ---------------------
-// ReferenceLookupData
-// ---------------------
-
-inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
-{
- auto found = references_.find(tId);
- if (found == references_.cend())
- return IndexRange(nullIndex(), nullIndex());
- return found->second;
-}
-
-// -------------------
-// BarcodeLookupData
-// -------------------
-
-template<typename T>
-inline IndexList BarcodeLookupData::Indices(const BarcodeField& field,
- const T& value,
- const CompareType& compareType) const
-{
- switch(field) {
- case BarcodeField::BC_LEFT: return bcLeft_.LookupIndices(value, compareType);
- case BarcodeField::BC_RIGHT: return bcRight_.LookupIndices(value, compareType);
- case BarcodeField::BC_QUALITY: return bcQual_.LookupIndices(value, compareType);
- case BarcodeField::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
- default:
- assert(false);
- }
- return IndexList();
-}
-
-template<typename T>
-inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeField& field,
- const std::vector<T>& values) const
-{
- IndexList result;
- for (auto value : values) {
- const IndexList& valueIndices = Indices(field, value, CompareType::EQUAL);
- result.reserve(result.size() + valueIndices.size());
- for (auto i : valueIndices)
- result.push_back(i);
- }
- return result;
-}
-
-
-// -----------------
-// PbiIndexPrivate
-// -----------------
-
-inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
-{ return (sections_ & flag) != 0; }
-
-inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
-{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::Indices(const SubreadField& field,
- const T& value,
- const CompareType& compareType) const
-{ return subreadData_.Indices(field, value, compareType); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::Indices(const MappedField& field,
- const T& value,
- const CompareType& compareType) const
-{ return mappedData_.Indices(field, value, compareType); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::Indices(const BarcodeField& field,
- const T& value,
- const CompareType& compareType) const
-{ return barcodeData_.Indices(field, value, compareType); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::IndicesMulti(const SubreadField& field,
- const T& value) const
-{ return subreadData_.IndicesMulti(field, value); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::IndicesMulti(const MappedField& field,
- const T& value) const
-{ return mappedData_.IndicesMulti(field, value); }
-
-template<typename T>
-inline IndexList
-PbiIndexPrivate::IndicesMulti(const BarcodeField& field,
- const T& value) const
-{ return barcodeData_.IndicesMulti(field, value); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::Lookup(const SubreadField& field,
- const T& value,
- const CompareType& compareType) const
-{ return MergeBlocksWithOffsets(subreadData_.Indices(field, value, compareType)); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::Lookup(const MappedField& field,
- const T& value,
- const CompareType& compareType) const
-{
- if (!HasSection(PbiFile::MAPPED))
- return IndexResultBlocks();
- return MergeBlocksWithOffsets(mappedData_.Indices(field, value, compareType));
-}
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::Lookup(const BarcodeField& field,
- const T& value,
- const CompareType& compareType) const
-{
- if (!HasSection(PbiFile::BARCODE))
- return IndexResultBlocks();
- return MergeBlocksWithOffsets(barcodeData_.Indices(field, value, compareType));
-}
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::LookupMulti(const SubreadField& field,
- const std::vector<T>& values) const
-{ return MergeBlocksWithOffsets(subreadData_.IndicesMulti(field, values)); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::LookupMulti(const MappedField& field,
- const std::vector<T>& values) const
-{ return MergeBlocksWithOffsets(mappedData_.IndicesMulti(field, values)); }
-
-template<typename T>
-inline IndexResultBlocks
-PbiIndexPrivate::LookupMulti(const BarcodeField& field,
- const std::vector<T>& values) const
-{ return MergeBlocksWithOffsets(barcodeData_.IndicesMulti(field, values)); }
-
-inline IndexResultBlocks
-PbiIndexPrivate::LookupReference(const int32_t tId) const
-{
- if (!HasSection(PbiFile::REFERENCE))
- return IndexResultBlocks();
- const IndexRange& indexRange = referenceData_.Indices(tId);
- if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
- return IndexResultBlocks();
- const size_t numReads = indexRange.second - indexRange.first;
- IndexResultBlocks blocks(1, IndexResultBlock(indexRange.first, numReads));
- subreadData_.ApplyOffsets(blocks);
- return blocks;
-}
-
-inline IndexResultBlocks
-PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
-{
- IndexResultBlocks blocks = mergedIndexBlocks(indices);
- subreadData_.ApplyOffsets(blocks);
- return blocks;
-}
-
-} // namespace internal
-
-template<typename FieldType, typename ValueType>
-inline IndexRequestBase<FieldType, ValueType>::IndexRequestBase(const FieldType field,
- const ValueType& value,
- const CompareType compareType)
- : field_(field)
- , value_(value)
- , compareType_(compareType)
-{ }
-
-template<typename FieldType, typename ValueType>
-inline IndexMultiRequestBase<FieldType, ValueType>::IndexMultiRequestBase(const FieldType field,
- const std::vector<ValueType>& values)
- : field_(field)
- , values_(values)
-{ }
-
-template<SubreadField field, typename ValueType>
-inline SubreadIndexRequest<field, ValueType>::SubreadIndexRequest(const ValueType& value,
- const CompareType& compareType)
- : IndexRequestBase<SubreadField, ValueType>(field, value, compareType)
-{ }
-
-template<SubreadField field, typename ValueType>
-inline SubreadIndexMultiRequest<field, ValueType>::SubreadIndexMultiRequest(const std::vector<ValueType>& values)
- : IndexMultiRequestBase<SubreadField, ValueType>(field, values)
-{ }
-
-template<MappedField field, typename ValueType>
-inline MappedIndexRequest<field, ValueType>::MappedIndexRequest(const ValueType& value,
- const CompareType& compareType)
- : IndexRequestBase<MappedField, ValueType>(field, value, compareType)
-{ }
-
-template<MappedField field, typename ValueType>
-inline MappedIndexMultiRequest<field, ValueType>::MappedIndexMultiRequest(const std::vector<ValueType>& values)
- : IndexMultiRequestBase<MappedField, ValueType>(field, values)
-{ }
-
-template<BarcodeField field, typename ValueType>
-inline BarcodeIndexRequest<field, ValueType>::BarcodeIndexRequest(const ValueType& value,
- const CompareType& compareType)
- : IndexRequestBase<BarcodeField, ValueType>(field, value, compareType)
-{ }
-
-template<BarcodeField field, typename ValueType>
-inline BarcodeIndexMultiRequest<field, ValueType>::BarcodeIndexMultiRequest(const std::vector<ValueType>& values)
- : IndexMultiRequestBase<BarcodeField, ValueType>(field, values)
-{ }
-
-template <typename FieldType, typename ValueType>
-inline IndexList
-PbiIndex::RawIndices(const IndexRequestBase<FieldType, ValueType>& request) const
-{ return d_->Indices(request.field_, request.value_, request.compareType_); }
-
-template <typename FieldType, typename ValueType>
-inline IndexList
-PbiIndex::RawIndices(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-{ return d_->Indices(request.field_, request.values_); }
-
-template <typename FieldType, typename ValueType>
-inline IndexResultBlocks
-PbiIndex::Lookup(const IndexRequestBase<FieldType, ValueType>& request) const
-{ return d_->Lookup(request.field_, request.value_, request.compareType_); }
-
-template <typename FieldType, typename ValueType>
-inline IndexResultBlocks
-PbiIndex::Lookup(const IndexMultiRequestBase<FieldType, ValueType>& request) const
-{ return d_->LookupMulti(request.field_, request.values_); }
-
-inline IndexResultBlocks PbiIndex::LookupReference(const int32_t tId) const
-{ return d_->LookupReference(tId); }
-
-} // namespace BAM
-} // namespace PacBio
-
diff --git a/include/pbbam/internal/PbiLookupData.inl b/include/pbbam/internal/PbiLookupData.inl
new file mode 100644
index 0000000..2ca38f3
--- /dev/null
+++ b/include/pbbam/internal/PbiLookupData.inl
@@ -0,0 +1,531 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiLookupData.inl
+/// \brief Inline implementations for the classes used for PBI data lookup.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiLookupData.h"
+#include "pbbam/PbiRawData.h"
+#include "pbbam/Strand.h"
+#include <algorithm>
+#include <unordered_set>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+
+// ----------------
+// helper methods
+// ----------------
+
+inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
+{
+ if (indices.empty())
+ return IndexResultBlocks{ };
+
+ std::sort(indices.begin(), indices.end());
+ auto newEndIter = std::unique(indices.begin(), indices.end());
+ auto numIndices = std::distance(indices.begin(), newEndIter);
+ assert(!indices.empty());
+ auto result = IndexResultBlocks{ IndexResultBlock(indices.at(0), 1) };
+ for (auto i = 1; i < numIndices; ++i) {
+ if (indices.at(i) == indices.at(i-1)+1)
+ ++result.back().numReads_;
+ else
+ result.push_back(IndexResultBlock(indices.at(i), 1));
+ }
+ return result;
+}
+
+inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
+{
+ auto copy = indices;
+ return mergedIndexBlocks(std::move(copy));
+}
+
+inline size_t nullIndex(void)
+{ return static_cast<size_t>(-1); }
+
+inline void pushBackIndices(IndexList& result,
+ const IndexList& toAppend)
+{
+ result.reserve(result.size() + toAppend.size());
+ for (auto element : toAppend)
+ result.push_back(element);
+}
+
+// -----------------
+// OrderedLookup
+// -----------------
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(void) { }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(const container_type& data)
+ : data_(data)
+{ }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(container_type&& data)
+ : data_(std::move(data))
+{ }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
+{ return !(*this == other); }
+
+template<typename T>
+inline typename OrderedLookup<T>::iterator OrderedLookup<T>::begin(void)
+{ return data_.begin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::begin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::cbegin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::iterator OrderedLookup<T>::end(void)
+{ return data_.end(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::end(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::cend(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline bool OrderedLookup<T>::empty(void) const
+{ return data_.empty(); }
+
+template<typename T>
+inline size_t OrderedLookup<T>::size(void) const
+{ return data_.size(); }
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupInclusiveRange(const const_iterator &begin,
+ const const_iterator &end) const
+{
+ auto result = IndexList{ };
+ for (auto iter = begin; iter != end; ++iter)
+ pushBackIndices(result, iter->second);
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupExclusiveRange(const const_iterator& begin,
+ const const_iterator& end,
+ const key_type& key) const
+{
+ auto result = IndexList{ };
+ for (auto iter = begin; iter != end; ++iter) {
+ if (iter->first != key)
+ pushBackIndices(result, iter->second);
+ }
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupIndices(const OrderedLookup::key_type& key,
+ const Compare::Type& compare) const
+{
+ auto begin = data_.cbegin();
+ auto end = data_.cend();
+ switch(compare)
+ {
+ case Compare::EQUAL:
+ {
+ const auto found = data_.find(key);
+ if (found != end)
+ return found->second;
+ return IndexList();
+ }
+ case Compare::LESS_THAN: return LookupExclusiveRange(begin, data_.upper_bound(key), key);
+ case Compare::LESS_THAN_EQUAL: return LookupInclusiveRange(begin, data_.upper_bound(key));
+ case Compare::GREATER_THAN: return LookupExclusiveRange(data_.lower_bound(key), end, key);
+ case Compare::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
+ case Compare::NOT_EQUAL: return LookupExclusiveRange(begin, end, key);
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline std::vector<T> OrderedLookup<T>::Unpack(void) const
+{
+ auto result = std::vector<T>{ };
+ auto iter = cbegin();
+ const auto end = cend();
+ for ( ; iter != end; ++iter ) {
+ const auto& indices = iter->second;
+ for (auto&& i : indices) {
+ if (result.size() <= i)
+ result.resize(i+1);
+ result[i] = iter->first;
+ }
+ }
+ return result;
+}
+
+// -----------------
+// UnorderedLookup
+// -----------------
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(void) { }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(const container_type& data)
+ : data_(data)
+{ }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(container_type&& data)
+ : data_(std::move(data))
+{ }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T>& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
+{ return !(*this == other); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::iterator UnorderedLookup<T>::begin(void)
+{ return data_.begin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::begin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::cbegin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::iterator UnorderedLookup<T>::end(void)
+{ return data_.end(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::end(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::cend(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline bool UnorderedLookup<T>::empty(void) const
+{ return data_.empty(); }
+
+template<typename T>
+inline size_t UnorderedLookup<T>::size(void) const
+{ return data_.size(); }
+
+template<typename T>
+template<typename Compare>
+inline IndexList
+UnorderedLookup<T>::LookupHelper(const UnorderedLookup::key_type& key,
+ const Compare& cmp) const
+{
+ auto result = IndexList{ }; // init with some avg size ??
+ const auto end = data_.cend();
+ for (auto iter = data_.cbegin(); iter != end; ++iter) {
+ const auto e = (iter->first);
+ if (cmp(e, key))
+ pushBackIndices(result, iter->second);
+ }
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+template<typename T>
+inline IndexList
+UnorderedLookup<T>::LookupIndices(const UnorderedLookup::key_type& key,
+ const Compare::Type& compare) const
+{
+ switch (compare) {
+ case Compare::EQUAL:
+ {
+ const auto found = data_.find(key);
+ if (found != data_.cend())
+ return found->second;
+ else
+ return IndexList();
+ }
+ case Compare::LESS_THAN: return LookupHelper(key, std::less<key_type>());
+ case Compare::LESS_THAN_EQUAL: return LookupHelper(key, std::less_equal<key_type>());
+ case Compare::GREATER_THAN: return LookupHelper(key, std::greater<key_type>());
+ case Compare::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<key_type>());
+ case Compare::NOT_EQUAL: return LookupHelper(key, std::not_equal_to<key_type>());
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline std::vector<T> UnorderedLookup<T>::Unpack(void) const
+{
+ auto result = std::vector<T>{ };
+ auto iter = cbegin();
+ const auto end = cend();
+ for ( ; iter != end; ++iter ) {
+ const auto& indices = iter->second;
+ for (auto&& i : indices) {
+ if (result.size() <= i)
+ result.resize(i+1);
+ result[i] = iter->first;
+ }
+ }
+ return result;
+}
+
+// -------------------
+// SubreadLookupData
+// -------------------
+
+inline
+void BasicLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
+{
+ for (IndexResultBlock& block : blocks)
+ block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
+}
+
+template<typename T>
+inline IndexList BasicLookupData::Indices(const BasicLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType) const
+{
+ switch(field) {
+ case BasicLookupData::RG_ID: return rgId_.LookupIndices(value, compareType);
+ case BasicLookupData::Q_START: return qStart_.LookupIndices(value, compareType);
+ case BasicLookupData::Q_END: return qEnd_.LookupIndices(value, compareType);
+ case BasicLookupData::ZMW: return holeNumber_.LookupIndices(value, compareType);
+ case BasicLookupData::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
+ case BasicLookupData::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
+
+ case BasicLookupData::VIRTUAL_OFFSET : // fall-through, not supported this way
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline IndexList BasicLookupData::IndicesMulti(const BasicLookupData::Field& field,
+ const std::vector<T>& values) const
+{
+ auto result = IndexList{ };
+ for (auto value : values) {
+ const auto valueIndices = Indices(field, value, Compare::EQUAL);
+ result.reserve(result.size() + valueIndices.size());
+ for (auto i : valueIndices)
+ result.push_back(i);
+ }
+ return result;
+}
+
+inline const std::vector<int64_t>& BasicLookupData::VirtualFileOffsets(void) const
+{ return fileOffset_; }
+
+// -------------------
+// MappedLookupData
+// -------------------
+
+template<typename T>
+inline IndexList MappedLookupData::Indices(const MappedLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType) const
+{
+ switch(field) {
+ case MappedLookupData::T_ID: return tId_.LookupIndices(value, compareType);
+ case MappedLookupData::T_START: return tStart_.LookupIndices(value, compareType);
+ case MappedLookupData::T_END: return tEnd_.LookupIndices(value, compareType);
+ case MappedLookupData::A_START: return aStart_.LookupIndices(value, compareType);
+ case MappedLookupData::A_END: return aEnd_.LookupIndices(value, compareType);
+ case MappedLookupData::N_M: return nM_.LookupIndices(value, compareType);
+ case MappedLookupData::N_MM: return nMM_.LookupIndices(value, compareType);
+ case MappedLookupData::N_DEL: return nDel_.LookupIndices(value, compareType);
+ case MappedLookupData::N_INS: return nIns_.LookupIndices(value, compareType);
+ case MappedLookupData::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
+
+ // MappedField::STRAND has its own specialization
+
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<>
+inline IndexList MappedLookupData::Indices(const MappedLookupData::Field& field,
+ const Strand& strand,
+ const Compare::Type& compareType) const
+{
+ assert(field == MappedLookupData::STRAND);
+ (void)field; // quash warnings building in release mode
+
+ if (compareType == Compare::EQUAL) {
+ if (strand == Strand::FORWARD)
+ return forwardStrand_;
+ else
+ return reverseStrand_;
+ } else if (compareType == Compare::NOT_EQUAL) {
+ if (strand == Strand::FORWARD)
+ return reverseStrand_;
+ else
+ return forwardStrand_;
+ }
+
+ // only EQUAL/NOT_EQUAL supported
+ assert(false);
+ return IndexList{ };
+}
+
+template<typename T>
+inline IndexList MappedLookupData::IndicesMulti(const MappedLookupData::Field& field,
+ const std::vector<T>& values) const
+{
+ auto result = IndexList{ };
+ for (auto value : values) {
+ auto valueIndices = Indices(field, value, Compare::EQUAL);
+ result.reserve(result.size() + valueIndices.size());
+ for (auto i : valueIndices)
+ result.push_back(i);
+ }
+ return result;
+}
+
+
+// ---------------------
+// ReferenceLookupData
+// ---------------------
+
+inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
+{
+ auto found = references_.find(tId);
+ if (found == references_.cend())
+ return IndexRange{ nullIndex(), nullIndex() };
+ return found->second;
+}
+
+// -------------------
+// BarcodeLookupData
+// -------------------
+
+template<typename T>
+inline IndexList BarcodeLookupData::Indices(const BarcodeLookupData::Field &field,
+ const T& value,
+ const Compare::Type &compareType) const
+{
+ switch(field) {
+ case BarcodeLookupData::BC_FORWARD: return bcForward_.LookupIndices(value, compareType);
+ case BarcodeLookupData::BC_REVERSE: return bcReverse_.LookupIndices(value, compareType);
+ case BarcodeLookupData::BC_QUALITY: return bcQual_.LookupIndices(value, compareType);
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeLookupData::Field &field,
+ const std::vector<T>& values) const
+{
+ IndexList result;
+ for (auto value : values) {
+ const IndexList& valueIndices = Indices(field, value, Compare::EQUAL);
+ result.reserve(result.size() + valueIndices.size());
+ for (auto i : valueIndices)
+ result.push_back(i);
+ }
+ return result;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/internal/PbiRawData.inl b/include/pbbam/internal/PbiRawData.inl
new file mode 100644
index 0000000..af24376
--- /dev/null
+++ b/include/pbbam/internal/PbiRawData.inl
@@ -0,0 +1,113 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiRawData.inl
+/// \brief Inline implementations for the classes used for working with raw PBI
+/// data.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiRawData.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline const PbiRawBarcodeData& PbiRawData::BarcodeData(void) const
+{ return barcodeData_; }
+
+inline PbiRawBarcodeData& PbiRawData::BarcodeData(void)
+{ return barcodeData_; }
+
+inline const PbiRawBasicData& PbiRawData::BasicData(void) const
+{ return basicData_; }
+
+inline PbiRawBasicData& PbiRawData::BasicData(void)
+{ return basicData_; }
+
+inline std::string PbiRawData::Filename(void) const
+{ return filename_; }
+
+inline PbiFile::Sections PbiRawData::FileSections(void) const
+{ return sections_; }
+
+inline PbiRawData& PbiRawData::FileSections(PbiFile::Sections sections)
+{ sections_ = sections; return *this; }
+
+inline bool PbiRawData::HasBarcodeData(void) const
+{ return HasSection(PbiFile::BARCODE); }
+
+inline bool PbiRawData::HasMappedData(void) const
+{ return HasSection(PbiFile::MAPPED); }
+
+inline bool PbiRawData::HasReferenceData(void) const
+{ return HasSection(PbiFile::REFERENCE); }
+
+inline bool PbiRawData::HasSection(const PbiFile::Section section) const
+{ return (sections_ & section) != 0; }
+
+inline uint32_t PbiRawData::NumReads(void) const
+{ return numReads_; }
+
+inline PbiRawData& PbiRawData::NumReads(uint32_t num)
+{ numReads_ = num; return *this; }
+
+inline const PbiRawMappedData& PbiRawData::MappedData(void) const
+{ return mappedData_; }
+
+inline PbiRawMappedData& PbiRawData::MappedData(void)
+{ return mappedData_; }
+
+inline const PbiRawReferenceData& PbiRawData::ReferenceData(void) const
+{ return referenceData_; }
+
+inline PbiRawReferenceData& PbiRawData::ReferenceData(void)
+{ return referenceData_; }
+
+inline PbiFile::VersionEnum PbiRawData::Version(void) const
+{ return version_; }
+
+inline PbiRawData& PbiRawData::Version(PbiFile::VersionEnum version)
+{ version_ = version; return *this; }
+
+inline bool PbiReferenceEntry::operator==(const PbiReferenceEntry& other) const
+{
+ return tId_ == other.tId_ &&
+ beginRow_ == other.beginRow_ &&
+ endRow_ == other.endRow_;
+}
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/ProgramInfo.h b/include/pbbam/internal/ProgramInfo.inl
similarity index 60%
copy from include/pbbam/ProgramInfo.h
copy to include/pbbam/internal/ProgramInfo.inl
index d1bbcfe..2f0287f 100644
--- a/include/pbbam/ProgramInfo.h
+++ b/include/pbbam/internal/ProgramInfo.inl
@@ -32,106 +32,18 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ProgramInfo.inl
+/// \brief Inline implementations for the ProgramInfo class.
+//
// Author: Derek Barnett
-#ifndef PROGRAMINFO_H
-#define PROGRAMINFO_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/ProgramInfo.h"
namespace PacBio {
namespace BAM {
-class PBBAM_EXPORT ProgramInfo
-{
-public:
- /// \name Conversion & Validation
- ///
-
- static ProgramInfo FromSam(const std::string& sam);
-
- static std::string ToSam(const ProgramInfo& prog);
-
- /// \}
-
-public:
- /// \name Constructors & Related Methods
- /// \{
-
- ProgramInfo(void);
- ProgramInfo(const std::string& id);
- ProgramInfo(const ProgramInfo& other);
- ProgramInfo(ProgramInfo&& other);
- ProgramInfo& operator=(const ProgramInfo& other);
- ProgramInfo& operator=(ProgramInfo&& other);
- ~ProgramInfo(void);
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- std::string CommandLine(void) const;
-
- std::map<std::string, std::string> CustomTags(void) const;
-
- std::string Description(void) const;
-
- std::string Id(void) const;
-
- std::string Name(void) const;
-
- std::string PreviousProgramId(void) const;
-
- std::string Version(void) const;
-
- /// \}
-
- /// \name Conversion & Validation
- ///
-
- bool IsValid(void) const;
-
- std::string ToSam(void) const;
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- ProgramInfo& CommandLine(const std::string& cmd);
-
- ProgramInfo& CustomTags(const std::map<std::string, std::string>& custom);
-
- ProgramInfo& Description(const std::string& description);
-
- ProgramInfo& Id(const std::string& id);
-
- ProgramInfo& Name(const std::string& name);
-
- ProgramInfo& PreviousProgramId(const std::string& id);
-
- ProgramInfo& Version(const std::string& version);
-
- /// \}
-
-private:
- std::string commandLine_; // CL:<CommandLine>
- std::string description_; // DS:<Description>
- std::string id_; // ID:<ID> * Unique ID required for valid SAM header*
- std::string name_; // PN:<Name>
- std::string previousProgramId_; // PP:<PreviousProgramID>
- std::string version_; // VN:<Version>
-
- // custom attributes
- std::map<std::string, std::string> custom_; // tag => value
-};
-
inline std::string ProgramInfo::CommandLine(void) const
{ return commandLine_; }
@@ -141,7 +53,8 @@ inline ProgramInfo& ProgramInfo::CommandLine(const std::string& cmd)
inline std::map<std::string, std::string> ProgramInfo::CustomTags(void) const
{ return custom_; }
-inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string, std::string>& custom)
+inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string,
+ std::string>& custom)
{ custom_ = custom; return *this; }
inline std::string ProgramInfo::Description(void) const
@@ -182,5 +95,3 @@ inline ProgramInfo& ProgramInfo::Version(const std::string& version)
} // namespace BAM
} // namespace PacBio
-
-#endif // PROGRAMINFO_H
diff --git a/include/pbbam/internal/MergeItem.h b/include/pbbam/internal/QualityValue.inl
similarity index 72%
rename from include/pbbam/internal/MergeItem.h
rename to include/pbbam/internal/QualityValue.inl
index f3d2347..07db35b 100644
--- a/include/pbbam/internal/MergeItem.h
+++ b/include/pbbam/internal/QualityValue.inl
@@ -32,46 +32,40 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValue.inl
+/// \brief Inline implementations for the QualityValue class.
+//
// Author: Derek Barnett
-#ifndef MERGEITEM_H
-#define MERGEITEM_H
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/internal/IBamFileIterator.h"
-#include <vector>
+#include "pbbam/QualityValue.h"
namespace PacBio {
namespace BAM {
-namespace internal {
-template<typename T>
-struct MergeItemBase
+inline QualityValue::QualityValue(const uint8_t value)
+ : value_(value)
{
-public:
- typedef typename IBamFileIteratorBase<T>::Ptr FileIterPtr;
+ // clamp QV
+ if (value_ > QualityValue::MAX)
+ value_ = QualityValue::MAX;
+}
+
+inline QualityValue::QualityValue(const QualityValue& other)
+ : value_(other.value_)
+{ }
-public:
- MergeItemBase(void) { }
- MergeItemBase(const T& r, const FileIterPtr& iter)
- : record_(r), iter_(iter)
- { }
+inline QualityValue::~QualityValue(void) { }
-public:
- bool IsNull(void) const
- { return !iter_; }
+inline char QualityValue::Fastq(void) const
+{ return static_cast<char>(value_ + 33); }
-public:
- T record_;
- FileIterPtr iter_;
-};
+inline QualityValue::operator uint8_t(void) const
+{ return value_; }
-typedef MergeItemBase<BamRecord> MergeItem;
-typedef MergeItemBase<std::vector<BamRecord> > GroupMergeItem;
+inline QualityValue QualityValue::FromFastq(const char c)
+{ return QualityValue(static_cast<uint8_t>(c-33)); }
-} // namespace internal
} // namespace BAM
} // namespace PacBio
-
-#endif // MERGEITEM_H
diff --git a/include/pbbam/QualityValues.h b/include/pbbam/internal/QualityValues.inl
similarity index 65%
copy from include/pbbam/QualityValues.h
copy to include/pbbam/internal/QualityValues.inl
index 240b96f..0eabf49 100644
--- a/include/pbbam/QualityValues.h
+++ b/include/pbbam/internal/QualityValues.inl
@@ -32,96 +32,19 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValues.inl
+/// \brief Inline implementations for the QualityValues class.
+//
// Author: Derek Barnett
-#ifndef QUALITYVALUES_H
-#define QUALITYVALUES_H
-
-#include "pbbam/QualityValue.h"
+#include "pbbam/QualityValues.h"
#include <algorithm>
-#include <string>
-#include <vector>
namespace PacBio {
namespace BAM {
-/// \brief The QualityValues class represents a sequence of FASTQ-compatible
-/// quality values. See QualityValue documentation for details.
-///
-class PBBAM_EXPORT QualityValues : public std::vector<QualityValue>
-{
-public:
- /// Creates a QualityValues collection from a FASTQ-encoded string.
- static QualityValues FromFastq(const std::string& fastq);
-
-public:
- /// \name Constructors & Related Methods
- /// \{
-
- QualityValues(void);
- explicit QualityValues(const std::string& fastqString);
- explicit QualityValues(const std::vector<QualityValue>& quals);
- explicit QualityValues(const std::vector<uint8_t>& quals);
-
- QualityValues(const std::vector<uint8_t>::const_iterator first,
- const std::vector<uint8_t>::const_iterator last);
- QualityValues(const QualityValues::const_iterator first,
- const QualityValues::const_iterator last);
-
- QualityValues(const QualityValues& other);
- QualityValues(QualityValues&& other);
-
- QualityValues(std::vector<QualityValue>&& quals);
-
- QualityValues& operator=(const QualityValues& other);
- QualityValues& operator=(QualityValues&& other);
-
- QualityValues& operator=(const std::vector<QualityValue>& quals);
- QualityValues& operator=(std::vector<QualityValue>&& quals);
-
- ~QualityValues(void);
-
- /// \}
-
-public:
- /// \name Comparison Operators
- /// \{
-
- bool operator==(const std::string& other) const;
- bool operator!=(const std::string& other) const;
-
- /// \}
-
-public:
- /// \name Iterators
- /// \{
-
- /// \returns A const_iterator to the beginning of the sequence.
- std::vector<QualityValue>::const_iterator cbegin(void) const;
-
- /// \returns A const_iterator to the element past the end of the sequence.
- std::vector<QualityValue>::const_iterator cend(void) const;
-
- /// \returns A const_iterator to the beginning of the sequence.
- std::vector<QualityValue>::const_iterator begin(void) const;
-
- /// \returns A const_iterator to the element past the end of the sequence.
- std::vector<QualityValue>::const_iterator end(void) const;
-
- /// \returns An iterator to the beginning of the sequence.
- std::vector<QualityValue>::iterator begin(void);
-
- /// \returns An iterator to the element past the end of the sequence.
- std::vector<QualityValue>::iterator end(void);
-
- /// \}
-
-public:
- /// \returns the FASTQ-encoded string for this collection
- std::string Fastq(void) const;
-};
-
inline QualityValues::QualityValues(void)
: std::vector<QualityValue>()
{ }
@@ -202,13 +125,7 @@ inline std::vector<QualityValue>::iterator QualityValues::end(void)
{ return std::vector<QualityValue>::end(); }
inline QualityValues QualityValues::FromFastq(const std::string& fastq)
-{
- return QualityValues(fastq);
-// QualityValues result;
-// result.resize(fastq.size());
-// std::transform(fastq.cbegin(), fastq.cend(), result.begin(), QualityValue::FromFastq);
-// return result;
-}
+{ return QualityValues(fastq); }
inline std::string QualityValues::Fastq(void) const
{
@@ -229,5 +146,3 @@ inline bool QualityValues::operator!=(const std::string& fastq) const
} // namespace BAM
} // namespace PacBio
-
-#endif // QUALITYVALUES_H
diff --git a/include/pbbam/internal/QueryBase.h b/include/pbbam/internal/QueryBase.h
index 7c16f87..e012f86 100644
--- a/include/pbbam/internal/QueryBase.h
+++ b/include/pbbam/internal/QueryBase.h
@@ -35,15 +35,12 @@
// Author: Derek Barnett
-#ifndef QUERYBASE2_H
-#define QUERYBASE2_H
+#ifndef QUERYBASE_H
+#define QUERYBASE_H
#include "pbbam/BamFile.h"
#include "pbbam/BamRecord.h"
#include "pbbam/DataSet.h"
-#include "pbbam/internal/FilterEngine.h"
-#include "pbbam/internal/IBamFileIterator.h"
-#include "pbbam/internal/IMergeStrategy.h"
#include <memory>
#include <vector>
#include <cassert>
@@ -58,23 +55,18 @@ class QueryBase;
template<typename T>
class QueryIteratorBase
{
+public:
+ virtual ~QueryIteratorBase(void);
+
+ bool operator==(const QueryIteratorBase<T>& other) const;
+ bool operator!=(const QueryIteratorBase<T>& other) const;
+
protected:
QueryIteratorBase(void);
QueryIteratorBase(QueryBase<T>& query);
-public:
- virtual ~QueryIteratorBase(void) { }
-
-protected:
void ReadNext(void);
-public:
- bool operator==(const QueryIteratorBase<T>& other) const
- { return query_ == other.query_; }
-
- bool operator!=(const QueryIteratorBase<T>& other) const
- { return !(*this == other); }
-
protected:
QueryBase<T>* query_;
T record_;
@@ -84,46 +76,28 @@ template<typename T>
class QueryIterator : public QueryIteratorBase<T>
{
public:
- QueryIterator(void) : QueryIteratorBase<T>() { }
- QueryIterator(QueryBase<T>& query)
- : QueryIteratorBase<T>(query)
- { }
-
- T& operator*(void) { return QueryIteratorBase<T>::record_; }
- T* operator->(void) { return &(operator*()); }
-
- QueryIterator<T>& operator++(void)
- { QueryIteratorBase<T>::ReadNext(); return *this; }
-
- QueryIterator<T> operator++(int)
- {
- QueryIterator<T> result(*this);
- ++(*this);
- return result;
- }
+ QueryIterator(void);
+ QueryIterator(QueryBase<T>& query);
+
+ T& operator*(void);
+ T* operator->(void);
+
+ QueryIterator<T>& operator++(void);
+ QueryIterator<T> operator++(int);
};
template<typename T>
class QueryConstIterator : public QueryIteratorBase<T>
{
public:
- QueryConstIterator(void) : QueryIteratorBase<T>() { }
- QueryConstIterator(const QueryBase<T>& query)
- : QueryIteratorBase<T>(const_cast<QueryBase<T>&>(query))
- { }
-
- const T& operator*(void) const { return QueryIteratorBase<T>::record_; }
- const T* operator->(void) const { return &(operator*()); }
-
- QueryConstIterator<T>& operator++(void)
- { QueryIteratorBase<T>::ReadNext(); return *this; }
-
- QueryConstIterator<T> operator++(int)
- {
- QueryConstIterator<T> result(*this);
- ++(*this);
- return result;
- }
+ QueryConstIterator(void);
+ QueryConstIterator(const QueryBase<T>& query);
+
+ const T& operator*(void) const;
+ const T* operator->(void) const;
+
+ QueryConstIterator<T>& operator++(void);
+ QueryConstIterator<T> operator++(int);
};
template<typename T>
@@ -133,86 +107,32 @@ public:
typedef QueryIterator<T> iterator;
typedef QueryConstIterator<T> const_iterator;
- typedef typename IBamFileIteratorBase<T>::Ptr FileIterPtr;
-
-protected:
- QueryBase(const DataSet& dataset);
public:
- virtual ~QueryBase(void) { }
-
- QueryConstIterator<T> begin(void) const { return QueryConstIterator<T>(*this); }
- QueryConstIterator<T> cbegin(void) const { return QueryConstIterator<T>(*this); }
- QueryIterator<T> begin(void) { return QueryIterator<T>(*this); }
-
- QueryConstIterator<T> end(void) const { return QueryConstIterator<T>(); }
- QueryConstIterator<T> cend(void) const { return QueryConstIterator<T>(); }
- QueryIterator<T> end(void) { return QueryIterator<T>(); }
+ virtual ~QueryBase(void);
public:
- bool GetNext(T& r);
+ QueryConstIterator<T> begin(void) const;
+ QueryConstIterator<T> cbegin(void) const;
+ QueryIterator<T> begin(void);
- std::vector<BamFile> GetBamFiles(void) const
- { return dataset_.ExternalResources().BamFiles(); }
+ QueryConstIterator<T> end(void) const;
+ QueryConstIterator<T> cend(void) const;
+ QueryIterator<T> end(void);
public:
- std::vector<FileIterPtr> CreateIterators(void)
- {
- const std::vector<BamFile>& bamFiles = dataset_.ExternalResources().BamFiles();
- std::vector<FileIterPtr> result;
- result.reserve(bamFiles.size());
- for (const BamFile& bamFile : bamFiles)
- result.push_back(CreateIterator(bamFile));
- return result;
- }
-
-protected:
- virtual FileIterPtr CreateIterator(const BamFile& bamFile) = 0;
+ virtual bool GetNext(T& r) =0;
protected:
- const DataSet dataset_;
- std::unique_ptr<IMergeStrategyBase<T> > mergeStrategy_;
- FilterEngine filterEngine_;
+ QueryBase(void);
};
typedef QueryBase<BamRecord> IQuery;
typedef QueryBase<std::vector<BamRecord> > IGroupQuery;
-template<typename T>
-inline QueryIteratorBase<T>::QueryIteratorBase(void)
- : query_(nullptr)
-{ }
-
-template<typename T>
-inline QueryIteratorBase<T>::QueryIteratorBase(QueryBase<T> &query)
- : query_(&query)
-{ ReadNext(); }
-
-template<typename T>
-inline QueryBase<T>::QueryBase(const DataSet& dataset)
- : dataset_(dataset)
- , mergeStrategy_(nullptr)
-{ }
-
-template<typename T>
-inline bool QueryBase<T>::GetNext(T& r)
-{
- while (mergeStrategy_->GetNext(r)) {
- if (filterEngine_.Accepts(r))
- return true;
- }
- return false;
-}
-
-template<typename T>
-inline void QueryIteratorBase<T>::ReadNext(void)
-{
- assert(query_);
- if (!query_->GetNext(record_))
- query_ = nullptr;
-}
-
} // namespace internal
} // namespace BAM
} // namespace PacBio
-#endif // QUERYBASE2_H
+#include "pbbam/internal/QueryBase.inl"
+
+#endif // QUERYBASE_H
diff --git a/include/pbbam/internal/QueryBase.inl b/include/pbbam/internal/QueryBase.inl
new file mode 100644
index 0000000..7f2376f
--- /dev/null
+++ b/include/pbbam/internal/QueryBase.inl
@@ -0,0 +1,177 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/QueryBase.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// -------------------
+// QueryIteratorBase
+// -------------------
+
+template<typename T>
+inline QueryIteratorBase<T>::QueryIteratorBase(void)
+ : query_(nullptr)
+{ }
+
+template<typename T>
+inline QueryIteratorBase<T>::QueryIteratorBase(QueryBase<T>& query)
+ : query_(&query)
+{ ReadNext(); }
+
+template<typename T> inline
+QueryIteratorBase<T>::~QueryIteratorBase(void) { }
+
+template<typename T> inline
+bool QueryIteratorBase<T>::operator==(const QueryIteratorBase<T>& other) const
+{ return query_ == other.query_; }
+
+template<typename T> inline
+bool QueryIteratorBase<T>::operator!=(const QueryIteratorBase<T>& other) const
+{ return !(*this == other); }
+
+// -------------------
+// QueryIterator
+// -------------------
+
+template<typename T> inline
+QueryIterator<T>::QueryIterator(void) : QueryIteratorBase<T>() { }
+
+template<typename T> inline
+QueryIterator<T>::QueryIterator(QueryBase<T>& query)
+ : QueryIteratorBase<T>(query)
+{ }
+
+template<typename T> inline
+T& QueryIterator<T>::operator*(void)
+{ return QueryIteratorBase<T>::record_; }
+
+template<typename T> inline
+T* QueryIterator<T>::operator->(void)
+{ return &(operator*()); }
+
+template<typename T> inline
+QueryIterator<T>& QueryIterator<T>::operator++(void)
+{ QueryIteratorBase<T>::ReadNext(); return *this; }
+
+template<typename T> inline
+QueryIterator<T> QueryIterator<T>::operator++(int)
+{
+ QueryIterator<T> result(*this);
+ ++(*this);
+ return result;
+}
+
+// --------------------
+// QueryConstIterator
+// --------------------
+
+template<typename T> inline
+QueryConstIterator<T>::QueryConstIterator(void) : QueryIteratorBase<T>() { }
+
+template<typename T> inline
+QueryConstIterator<T>::QueryConstIterator(const QueryBase<T>& query)
+ : QueryIteratorBase<T>(const_cast<QueryBase<T>&>(query))
+{ }
+
+template<typename T> inline
+const T& QueryConstIterator<T>::operator*(void) const
+{ return QueryIteratorBase<T>::record_; }
+
+template<typename T> inline
+const T* QueryConstIterator<T>::operator->(void) const
+{ return &(operator*()); }
+
+template<typename T> inline
+QueryConstIterator<T>& QueryConstIterator<T>::operator++(void)
+{ QueryIteratorBase<T>::ReadNext(); return *this; }
+
+template<typename T> inline
+QueryConstIterator<T> QueryConstIterator<T>::operator++(int)
+{
+ QueryConstIterator<T> result(*this);
+ ++(*this);
+ return result;
+}
+
+// -----------
+// QueryBase
+// -----------
+
+template<typename T> inline
+QueryBase<T>::QueryBase(void) { }
+
+template<typename T> inline
+QueryBase<T>::~QueryBase(void) { }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::begin(void) const
+{ return QueryConstIterator<T>(*this); }
+
+template<typename T> inline
+QueryIterator<T> QueryBase<T>::begin(void)
+{ return QueryIterator<T>(*this); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::cbegin(void) const
+{ return QueryConstIterator<T>(*this); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::cend(void) const
+{ return QueryConstIterator<T>(); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::end(void) const
+{ return QueryConstIterator<T>(); }
+
+template<typename T> inline
+QueryIterator<T> QueryBase<T>::end(void)
+{ return QueryIterator<T>(); }
+
+template<typename T>
+inline void QueryIteratorBase<T>::ReadNext(void)
+{
+ assert(query_);
+ if (!query_->GetNext(record_))
+ query_ = nullptr;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
diff --git a/include/pbbam/ReadGroupInfo.h b/include/pbbam/internal/ReadGroupInfo.inl
similarity index 55%
copy from include/pbbam/ReadGroupInfo.h
copy to include/pbbam/internal/ReadGroupInfo.inl
index 86372ee..c4b208b 100644
--- a/include/pbbam/ReadGroupInfo.h
+++ b/include/pbbam/internal/ReadGroupInfo.inl
@@ -32,225 +32,69 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadGroupInfo.inl
+/// \brief Inline implementations for the ReadGroupInfo class.
+//
// Author: Derek Barnett
-#ifndef READGROUPINFO_H
-#define READGROUPINFO_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/ReadGroupInfo.h"
namespace PacBio {
namespace BAM {
-enum class BaseFeature
-{
- DELETION_QV
- , DELETION_TAG
- , INSERTION_QV
- , MERGE_QV
- , SUBSTITUTION_QV
- , SUBSTITUTION_TAG
- , IPD
- , PULSE_WIDTH
- , PKMID
- , PKMEAN
- , LABEL
- , LABEL_QV
- , ALT_LABEL
- , ALT_LABEL_QV
- , PULSE_MERGE_QV
- , PULSE_CALL
- , PRE_PULSE_FRAMES
- , PULSE_CALL_WIDTH
-};
-
-enum class FrameCodec
+inline size_t ReadGroupInfo::BarcodeCount(void) const
{
- RAW
- , V1
-};
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode count requested but barcode data is missing");
+ return barcodeCount_;
+}
-class PBBAM_EXPORT ReadGroupInfo
+inline ReadGroupInfo& ReadGroupInfo::BarcodeData(const std::string& barcodeFile,
+ const std::string& barcodeHash,
+ size_t barcodeCount,
+ BarcodeModeType barcodeMode,
+ BarcodeQualityType barcodeQuality)
{
-public:
- /// \name Conversion & Validation
- ///
-
- static ReadGroupInfo FromSam(const std::string& sam);
-
- static std::string ToSam(const ReadGroupInfo& rg);
-
- /// \}
-
-public:
- /// \name Constructors & Related Methods
- /// \{
-
- ReadGroupInfo(void);
- ReadGroupInfo(const std::string& id);
- ReadGroupInfo(const std::string& movieName, const std::string& readType);
- ReadGroupInfo(const ReadGroupInfo& other);
- ReadGroupInfo(ReadGroupInfo&& other);
- ReadGroupInfo& operator=(const ReadGroupInfo& other);
- ReadGroupInfo& operator=(ReadGroupInfo&& other);
- ~ReadGroupInfo(void);
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- const std::string& BasecallerVersion(void) const;
-
- bool HasBaseFeature(const BaseFeature& feature) const;
-
- std::string BaseFeatureTag(const BaseFeature& feature) const;
-
- std::string BindingKit(void) const;
-
- bool Control(void) const;
-
- std::map<std::string, std::string> CustomTags(void) const;
-
- std::string Date(void) const;
-
- std::string FlowOrder(void) const;
-
- std::string FrameRateHz(void) const;
-
- std::string Id(void) const;
-
- FrameCodec IpdCodec(void) const;
-
- std::string KeySequence(void) const;
-
- std::string Library(void) const;
-
- std::string MovieName(void) const;
-
- std::string Platform(void) const;
-
- std::string PredictedInsertSize(void) const;
-
- std::string Programs(void) const;
-
- FrameCodec PulseWidthCodec(void) const;
-
- std::string ReadType(void) const;
-
- std::string Sample(void) const;
-
- std::string SequencingCenter(void) const;
-
- std::string SequencingKit(void) const;
-
- /// \}
-
- /// \name Conversion & Validation
- /// \{
-
- bool IsValid(void) const;
-
- std::string ToSam(void) const;
-
- /// \}
-
- /// \name Comparison
- /// \{
-
- bool operator==(const ReadGroupInfo& other) const;
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- ReadGroupInfo& BasecallerVersion(const std::string& versionNumber);
-
- ReadGroupInfo& BaseFeatureTag(const BaseFeature& feature,
- const std::string& tag);
-
- ReadGroupInfo& BindingKit(const std::string& kitNumber);
-
- ReadGroupInfo& Control(const bool ctrl);
-
- ReadGroupInfo& CustomTags(const std::map<std::string, std::string>& custom);
-
- ReadGroupInfo& Date(const std::string& date);
-
- ReadGroupInfo& FlowOrder(const std::string& order);
-
- ReadGroupInfo& FrameRateHz(const std::string& frameRateHz);
-
- ReadGroupInfo& Id(const std::string& id);
-
- ReadGroupInfo& Id(const std::string& movieName, const std::string& readType);
-
- ReadGroupInfo& IpdCodec(const FrameCodec& codec, const std::string& tag = std::string());
-
- ReadGroupInfo& KeySequence(const std::string& sequence);
-
- ReadGroupInfo& Library(const std::string& library);
-
- ReadGroupInfo& MovieName(const std::string& id);
-
- ReadGroupInfo& PredictedInsertSize(const std::string& size);
-
- ReadGroupInfo& Programs(const std::string& programs);
-
- ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec, const std::string& tag = std::string());
-
- ReadGroupInfo& ReadType(const std::string& type);
-
- ReadGroupInfo& Sample(const std::string& sample);
-
- ReadGroupInfo& SequencingCenter(const std::string& center);
-
- ReadGroupInfo& SequencingKit(const std::string& kitNumber);
-
- /// \}
-
-private:
- std::string id_; // ID * Unique ID required for valid SAM/BAM header *
- std::string sequencingCenter_; // CN
- std::string date_; // DT * (ISO 8601) *
- std::string flowOrder_; // FO
- std::string keySequence_; // KS
- std::string library_; // LB
- std::string programs_; // PG
- std::string predictedInsertSize_; // PI
- std::string movieName_; // PU * more explicit, in place of "platform unit" *
- std::string sample_; // SM
+ barcodeFile_ = barcodeFile;
+ barcodeHash_ = barcodeHash;
+ barcodeCount_ = barcodeCount;
+ barcodeMode_ = barcodeMode;
+ barcodeQuality_ = barcodeQuality;
+ hasBarcodeData_ = true;
+ return *this;
+}
- // DS:<Description> components
- std::string readType_;
- std::string bindingKit_;
- std::string sequencingKit_;
- std::string basecallerVersion_;
- std::string frameRateHz_;
- bool control_ = false;
- FrameCodec ipdCodec_;
- FrameCodec pulseWidthCodec_;
- std::map<BaseFeature, std::string> features_;
+inline std::string ReadGroupInfo::BarcodeFile(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode file requested but barcode data is missing");
+ return barcodeFile_;
+}
- // custom attributes
- std::map<std::string, std::string> custom_; // tag => value
+inline std::string ReadGroupInfo::BarcodeHash(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode hash requested but barcode data is missing");
+ return barcodeHash_;
+}
-private:
- std::string EncodeSamDescription(void) const;
- void DecodeSamDescription(const std::string& description);
-};
+inline BarcodeModeType ReadGroupInfo::BarcodeMode(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode mode requested but barcode data is missing");
+ return barcodeMode_;
+}
-PBBAM_EXPORT
-std::string MakeReadGroupId(const std::string& movieName,
- const std::string& readType);
+inline BarcodeQualityType ReadGroupInfo::BarcodeQuality(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode quality requested but barcode data is missing");
+ return barcodeQuality_;
+}
-inline const std::string& ReadGroupInfo::BasecallerVersion(void) const
+inline std::string ReadGroupInfo::BasecallerVersion(void) const
{ return basecallerVersion_; }
inline ReadGroupInfo& ReadGroupInfo::BasecallerVersion(const std::string& versionNumber)
@@ -274,6 +118,14 @@ inline std::string ReadGroupInfo::BindingKit(void) const
inline ReadGroupInfo& ReadGroupInfo::BindingKit(const std::string& kitNumber)
{ bindingKit_ = kitNumber; return *this; }
+inline ReadGroupInfo& ReadGroupInfo::ClearBarcodeData(void)
+{
+ barcodeFile_.clear();
+ barcodeHash_.clear();
+ hasBarcodeData_ = false;
+ return *this;
+}
+
inline bool ReadGroupInfo::Control(void) const
{ return control_; }
@@ -304,6 +156,9 @@ inline std::string ReadGroupInfo::FrameRateHz(void) const
inline ReadGroupInfo& ReadGroupInfo::FrameRateHz(const std::string& frameRateHz)
{ frameRateHz_ = frameRateHz; return *this; }
+inline bool ReadGroupInfo::HasBarcodeData(void) const
+{ return hasBarcodeData_; }
+
inline bool ReadGroupInfo::HasBaseFeature(const BaseFeature& feature) const
{ return features_.find(feature) != features_.end(); }
@@ -317,6 +172,12 @@ inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& movieName,
const std::string& readType)
{ id_ = MakeReadGroupId(movieName, readType); return *this; }
+inline int32_t ReadGroupInfo::IdToInt(const std::string& rgId)
+{
+ const uint32_t rawid = std::stoul(rgId, nullptr, 16);
+ return static_cast<int32_t>(rawid);
+}
+
inline FrameCodec ReadGroupInfo::IpdCodec(void) const
{ return ipdCodec_; }
@@ -344,6 +205,12 @@ inline ReadGroupInfo& ReadGroupInfo::MovieName(const std::string& movieName)
inline std::string ReadGroupInfo::Platform(void) const
{ return std::string("PACBIO"); }
+inline PlatformModelType ReadGroupInfo::PlatformModel(void) const
+{ return platformModel_; }
+
+inline ReadGroupInfo& ReadGroupInfo::PlatformModel(const PlatformModelType& platform)
+{ platformModel_ = platform; return *this; }
+
inline std::string ReadGroupInfo::PredictedInsertSize(void) const
{ return predictedInsertSize_; }
@@ -377,6 +244,13 @@ inline std::string ReadGroupInfo::SequencingCenter(void) const
inline ReadGroupInfo& ReadGroupInfo::SequencingCenter(const std::string& center)
{ sequencingCenter_ = center; return *this; }
+inline std::string ReadGroupInfo::SequencingChemistry(void) const
+{
+ return SequencingChemistryFromTriple(BindingKit(),
+ SequencingKit(),
+ BasecallerVersion());
+}
+
inline std::string ReadGroupInfo::SequencingKit(void) const
{ return sequencingKit_; }
@@ -388,5 +262,3 @@ inline std::string ReadGroupInfo::ToSam(const ReadGroupInfo& rg)
} // namespace BAM
} // namespace PacBio
-
-#endif // READGROUPINFO_H
diff --git a/include/pbbam/SequenceInfo.h b/include/pbbam/internal/SequenceInfo.inl
similarity index 60%
copy from include/pbbam/SequenceInfo.h
copy to include/pbbam/internal/SequenceInfo.inl
index 0cf9d04..93b653d 100644
--- a/include/pbbam/SequenceInfo.h
+++ b/include/pbbam/internal/SequenceInfo.inl
@@ -32,104 +32,31 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SequenceInfo.inl
+/// \brief Inline implementations for the SequenceInfo class.
+//
// Author: Derek Barnett
-#ifndef SEQUENCEINFO_H
-#define SEQUENCEINFO_H
-
-#include "pbbam/Config.h"
-#include <map>
-#include <string>
+#include "pbbam/SequenceInfo.h"
namespace PacBio {
namespace BAM {
-class PBBAM_EXPORT SequenceInfo
+inline bool SequenceInfo::operator==(const SequenceInfo& other) const
{
-public:
- /// \name Conversion & Validation
- ///
-
- static SequenceInfo FromSam(const std::string& sam);
-
- static std::string ToSam(const SequenceInfo& seq);
-
- /// \}
-
-public:
- /// \name Constructors & Related Methods
- /// \{
-
- SequenceInfo(void);
- SequenceInfo(const std::string& name, const std::string& length = "0");
- SequenceInfo(const SequenceInfo& other);
- SequenceInfo(SequenceInfo&& other);
- SequenceInfo& operator=(const SequenceInfo& other);
- SequenceInfo& operator=(SequenceInfo&& other);
- ~SequenceInfo(void);
-
- /// \}
-
-public:
- /// \name Attributes
- /// \{
-
- std::string AssemblyId(void) const;
-
- std::string Checksum(void) const;
-
- std::map<std::string, std::string> CustomTags(void) const;
-
- std::string Length(void) const;
-
- std::string Name(void) const;
-
- std::string Species(void) const;
-
- std::string Uri(void) const;
-
- /// \}
-
- /// \name Conversion & Validation
- ///
-
- bool IsValid(void) const;
-
- std::string ToSam(void) const;
-
- /// \}
-
-public:
- /// \name Attributes
-
- SequenceInfo& AssemblyId(const std::string& id);
-
- SequenceInfo& Checksum(const std::string& checksum);
-
- SequenceInfo& CustomTags(const std::map<std::string, std::string>& custom);
-
- SequenceInfo& Length(const std::string& length);
-
- SequenceInfo& Name(const std::string& name);
-
- SequenceInfo& Species(const std::string& species);
-
- SequenceInfo& Uri(const std::string& uri);
-
- /// \}
-
-private:
- std::string name_; // SN:<Name> * Unique Name required for valid SAM header*
- std::string length_; // LN:<Length> * [0 - 2^31-1]
- std::string assemblyId_; // AS:<AssemblyId>
- std::string checksum_; // M5:<Checksum>
- std::string species_; // SP:<Species>
- std::string uri_; // UR:<URI>
-
- // custom attributes
- std::map<std::string, std::string> custom_; // tag => value
-};
+ return assemblyId_ == other.assemblyId_ &&
+ checksum_ == other.checksum_ &&
+ length_ == other.length_ &&
+ name_ == other.name_ &&
+ species_ == other.species_ &&
+ uri_ == other.uri_ &&
+ custom_ == other.custom_;
+}
+
+inline bool SequenceInfo::operator!=(const SequenceInfo& other) const
+{ return !(*this == other); }
inline std::string SequenceInfo::AssemblyId(void) const
{ return assemblyId_; }
@@ -178,5 +105,3 @@ inline SequenceInfo& SequenceInfo::Uri(const std::string& uri)
} // namespace BAM
} // namespace PacBio
-
-#endif // SEQUENCEINFO_H
diff --git a/include/pbbam/internal/Tag.inl b/include/pbbam/internal/Tag.inl
index cf9f60a..f8d4af2 100644
--- a/include/pbbam/internal/Tag.inl
+++ b/include/pbbam/internal/Tag.inl
@@ -32,16 +32,17 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Tag.inl
+/// \brief Inline implementations for the Tag class.
+//
// Author: Derek Barnett
#include "pbbam/Tag.h"
#include <boost/numeric/conversion/cast.hpp>
#include <iostream>
-#ifndef TAG_INL
-#define TAG_INL
-
namespace PacBio {
namespace BAM {
namespace internal {
@@ -91,7 +92,7 @@ struct NumericConvertVisitor : public boost::static_visitor<DesiredType>
{
const std::string from = typeid(t).name();
const std::string to = typeid(DesiredType).name();
- const std::string msg = std::string("conversion not supported: ") + from + " -> " + to;
+ const std::string msg = std::string("conversion not supported: ") + from + " -> " + to;
throw std::runtime_error(msg);
return 0;
}
@@ -114,7 +115,7 @@ struct IsEqualVisitor : public boost::static_visitor<bool>
return false;
}
- bool operator()(const boost::blank&, const boost::blank&) const
+ bool operator() (const boost::blank&, const boost::blank&) const
{ return true; }
template <typename T>
@@ -273,8 +274,8 @@ inline uint16_t Tag::ToUInt16(void) const
inline int32_t Tag::ToInt32(void) const
{
-// if (IsInt32())
-// return boost::get<int32_t>(data_);
+ if (IsInt32())
+ return boost::get<int32_t>(data_);
return boost::apply_visitor(internal::ToInt32ConvertVisitor(), data_);
}
@@ -320,5 +321,3 @@ inline std::string Tag::Typename(void) const
} // namespace BAM
} // namespace PacBio
-
-#endif // TAG_INL
diff --git a/include/pbbam/virtual/VirtualPolymeraseBamRecord.h b/include/pbbam/virtual/VirtualPolymeraseBamRecord.h
index 86f3ef8..9a81790 100644
--- a/include/pbbam/virtual/VirtualPolymeraseBamRecord.h
+++ b/include/pbbam/virtual/VirtualPolymeraseBamRecord.h
@@ -32,11 +32,15 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseBamRecord.h
+/// \brief Defines the VirtualPolymeraseBamRecord class.
+//
// Author: Armin Töpfer
-#ifndef POLYMERASEBAMRECORD_H
-#define POLYMERASEBAMRECORD_H
+#ifndef VIRTUALPOLYMERASEBAMRECORD_H
+#define VIRTUALPOLYMERASEBAMRECORD_H
#include <vector>
#include <sstream>
@@ -50,87 +54,68 @@
namespace PacBio {
namespace BAM {
-/// This class represents a polymerase read stitched on the fly
-/// from subreads|hqregion+scraps.
+/// \brief The VirtualPolymeraseBamRecord class represents a polymerase read stitched on the fly
+/// from subreads|hqregion+scraps.
+///
class VirtualPolymeraseBamRecord : public BamRecord
{
public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a "virtual" polymerase %BAM record, by re-stitching its constituent segments.
+ ///
+ /// \param[in] unorderedSources source data (subreads, scraps, etc.)
+ /// \param[in] header %BAM header to associate with the new record
+ ///
+ /// \throws std::runtime_error on failure to stitch virtual record
+ ///
VirtualPolymeraseBamRecord(std::vector<BamRecord>&& unorderedSources,
const BamHeader& header);
- VirtualPolymeraseBamRecord() = delete;
- // Move constructor
+ VirtualPolymeraseBamRecord(void) = delete;
+ VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&) = default; // un-"delete"-ed for SWIG
VirtualPolymeraseBamRecord(VirtualPolymeraseBamRecord&&) = default;
- // Copy constructor
- VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&) = delete;
- // Move assignment operator
- VirtualPolymeraseBamRecord& operator=(VirtualPolymeraseBamRecord&&) = default;
- // Copy assignment operator
VirtualPolymeraseBamRecord& operator=(const VirtualPolymeraseBamRecord&) = delete;
- // Destructor
+ VirtualPolymeraseBamRecord& operator=(VirtualPolymeraseBamRecord&&) = default;
virtual ~VirtualPolymeraseBamRecord() = default;
-public:
- /// Provides bool if a given VirtualRegionType has been annotated
- bool HasVirtualRegionType(const VirtualRegionType type) const
- { return virtualRegionsMap_.find(type) != virtualRegionsMap_.end(); }
+ /// \}
- /// Provides annotations of the polymerase read for a given VirtualRegionType
- std::vector<VirtualRegion> VirtualRegionsTable(const VirtualRegionType type) const
- { return virtualRegionsMap_.at(type); }
+public:
+ /// \name Virtual Record Attributes
+ ///
- /// Provides all annotations of the polymerase read as a map
- std::map<VirtualRegionType, std::vector<VirtualRegion>> VirtualRegionsMap() const
- { return virtualRegionsMap_; }
+ /// \returns true if requested VirtualRegionType has been annotated.
+ ///
+ bool HasVirtualRegionType(const VirtualRegionType regionType) const;
-public: // New BamRecord functionality.
+ /// \returns IPD frame data
+ ///
Frames IPDV1Frames(Orientation orientation = Orientation::NATIVE) const;
+ /// \brief Provides all annotations of the polymerase read as a map (type => regions)
+ ///
+ std::map<VirtualRegionType, std::vector<VirtualRegion>> VirtualRegionsMap(void) const;
+
+ /// \brief Provides annotations of the polymerase read for a given VirtualRegionType.
+ ///
+ /// \param[in] regionType requested region type
+ /// \returns regions that match the requested type (empty vector if none found).
+ ///
+ std::vector<VirtualRegion> VirtualRegionsTable(const VirtualRegionType regionType) const;
+
+ /// \}
+
private:
std::vector<BamRecord> sources_;
std::map<VirtualRegionType, std::vector<VirtualRegion>> virtualRegionsMap_;
private:
- void StitchSources();
-
- /// \brief Appends content of src vector to dst vector using move semantics.
- /// \param[in] src Input vector that will be empty after execution
- /// \param[in,out] dest Output vector that will be appended to
- template <typename T>
- inline void MoveAppend(std::vector<T>& src, std::vector<T>& dst) noexcept
- {
- if (dst.empty())
- {
- dst = std::move(src);
- }
- else
- {
- dst.reserve(dst.size() + src.size());
- std::move(src.begin(), src.end(), std::back_inserter(dst));
- src.clear();
- }
- }
-
- /// \brief Appends content of src vector to dst vector using move semantics.
- /// \param[in] src Input vector via perfect forwarding
- /// \param[in,out] dest Output vector that will be appended to
- template <typename T>
- inline void MoveAppend(std::vector<T>&& src, std::vector<T>& dst) noexcept
- {
- if (dst.empty())
- {
- dst = std::move(src);
- }
- else
- {
- dst.reserve(dst.size() + src.size());
- std::move(src.begin(), src.end(), std::back_inserter(dst));
- src.clear();
- }
- }
+ void StitchSources(void);
};
} // namespace BAM
} // namespace PacBio
-#endif // POLYMERASEBAMRECORD_H
+#endif // VIRTUALPOLYMERASEBAMRECORD_H
diff --git a/include/pbbam/virtual/VirtualPolymeraseCompositeReader.h b/include/pbbam/virtual/VirtualPolymeraseCompositeReader.h
new file mode 100644
index 0000000..9ab025d
--- /dev/null
+++ b/include/pbbam/virtual/VirtualPolymeraseCompositeReader.h
@@ -0,0 +1,111 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseCompositeReader.h
+/// \brief Defines the VirtualPolymeraseCompositeReader class.
+//
+// Author: Derek Barnett
+
+#ifndef VIRTUALPOLYMERASECOMPOSITEREADER_H
+#define VIRTUALPOLYMERASECOMPOSITEREADER_H
+
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/virtual/VirtualPolymeraseReader.h"
+#include <deque>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The VirtualPolymeraseCompositeReader provides an interface for
+/// re-stitching "virtual" polymerase reads from their constituent parts,
+/// across multiple %BAM resources from a DataSet.
+///
+/// This class is essentially a DataSet-aware wrapper around
+/// VirtualPolymeraseReader, enabling multiple resources as input. See that
+/// class's documentation for more info.
+///
+class PBBAM_EXPORT VirtualPolymeraseCompositeReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ VirtualPolymeraseCompositeReader(const DataSet& dataset);
+
+ VirtualPolymeraseCompositeReader(void) = delete;
+ VirtualPolymeraseCompositeReader(const VirtualPolymeraseCompositeReader&) = delete;
+ VirtualPolymeraseCompositeReader(VirtualPolymeraseCompositeReader&&) = delete;
+ VirtualPolymeraseCompositeReader& operator=(const VirtualPolymeraseCompositeReader&) = delete;
+ VirtualPolymeraseCompositeReader& operator=(VirtualPolymeraseCompositeReader&&) = delete;
+ ~VirtualPolymeraseCompositeReader(void) = default;
+
+ /// \}
+
+public:
+ /// \name Stitched Record Reading
+ ///
+
+ /// \returns true if more ZMWs/files are available for reading.
+ bool HasNext(void);
+
+ /// \returns the next stitched polymerase read
+ VirtualPolymeraseBamRecord Next(void);
+
+ /// \returns the next set of reads that belong to one ZMW from one %BAM
+ /// resource (a primary %BAM and/or its scraps file). This enables
+ /// stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+ /// \}
+
+private:
+ std::deque< std::pair<std::string, std::string> > sources_;
+ std::unique_ptr<VirtualPolymeraseReader> currentReader_;
+ PbiFilter filter_;
+
+private:
+ void OpenNextReader(void);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALPOLYMERASECOMPOSITEREADER_H
diff --git a/include/pbbam/virtual/VirtualPolymeraseReader.h b/include/pbbam/virtual/VirtualPolymeraseReader.h
index e166482..0e2e198 100644
--- a/include/pbbam/virtual/VirtualPolymeraseReader.h
+++ b/include/pbbam/virtual/VirtualPolymeraseReader.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseReader.h
+/// \brief Defines the VirtualPolymeraseReader class.
+//
// Author: Armin Töpfer
#ifndef VIRTUALPOLYMERASEREADER_H
@@ -44,59 +48,87 @@
#include "pbbam/BamRecord.h"
#include "pbbam/Config.h"
#include "pbbam/EntireFileQuery.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiFilterQuery.h"
#include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
namespace PacBio {
namespace BAM {
+/// \brief The VirtualPolymeraseReader class provides an interface for re-stitching
+/// "virtual" polymerase reads from their constituent parts.
+///
class VirtualPolymeraseReader
{
public:
- /// Constructor takes two input bam file paths.
- /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
- /// \param[in] scrapsBamFilePath scraps.bam file path
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+ /// and a scraps file, consuming all reads.
+ ///
+ /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
+ /// \param[in] scrapsBamFilePath scraps.bam file path
+ ///
VirtualPolymeraseReader(const std::string& primaryBamFilePath,
const std::string& scrapsBamFilePath);
- VirtualPolymeraseReader() = delete;
- // Move constructor
- VirtualPolymeraseReader(VirtualPolymeraseReader&&) = delete;
- // Copy constructor
+ /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+ /// and a scraps file, respecting the provided PBI filter.
+ ///
+ /// \note All %BAM files must have a corresponding ".pbi" index file to use
+ /// the filter. You may need to call BamFile::EnsurePacBioIndexExists
+ /// before constructing the reader.
+ ///
+ /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
+ /// \param[in] scrapsBamFilePath scraps.bam file path
+ /// \param[in] filter PBI filter criteria
+ ///
+ VirtualPolymeraseReader(const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath,
+ const PbiFilter& filter);
+
+ VirtualPolymeraseReader(void) = delete;
VirtualPolymeraseReader(const VirtualPolymeraseReader&) = delete;
- // Move assignment operator
- VirtualPolymeraseReader& operator=(VirtualPolymeraseReader&&) = delete;
- // Copy assignment operator
+ VirtualPolymeraseReader(VirtualPolymeraseReader&&) = delete;
VirtualPolymeraseReader& operator=(const VirtualPolymeraseReader&) = delete;
- // Destructor
- ~VirtualPolymeraseReader() = default;
+ VirtualPolymeraseReader& operator=(VirtualPolymeraseReader&&) = delete;
+ ~VirtualPolymeraseReader(void);
+
+ /// \}
public:
- /// Provides the next stitched polymerase read
- VirtualPolymeraseBamRecord Next();
+ /// \name File Headers
+ /// \{
- /// Provides the next set of reads that belong to one ZMW.
- /// Enables stitching records in a distinct thread.
- std::vector<BamRecord> NextRaw();
+ /// \returns the BamHeader associated with this reader's "primary" %BAM file
+ BamHeader PrimaryHeader(void) const;
- /// Returns true if more ZMWs are available for reading.
- bool HasNext();
+ /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+ BamHeader ScrapsHeader(void) const;
- BamHeader PrimaryHeader();
- BamHeader ScrapsHeader();
+ /// \}
-private:
- const std::string primaryBamFilePath_;
- const std::string scrapsBamFilePath_;
+public:
+ /// \name Stitched Record Reading
+ ///
- std::unique_ptr<BamFile> primaryBamFile_;
- std::unique_ptr<BamFile> scrapsBamFile_;
- std::unique_ptr<EntireFileQuery> primaryQuery_;
- std::unique_ptr<EntireFileQuery> scrapsQuery_;
+ /// \returns true if more ZMWs are available for reading.
+ bool HasNext(void);
- EntireFileQuery::iterator primaryIt_;
- EntireFileQuery::iterator scrapsIt_;
+ /// \returns the next stitched polymerase read
+ VirtualPolymeraseBamRecord Next(void);
- std::unique_ptr<BamHeader> polyHeader_;
+ /// \returns the next set of reads that belong to one ZMW.
+ /// This enables stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+ /// \}
+
+private:
+ struct VirtualPolymeraseReaderPrivate;
+ std::unique_ptr<VirtualPolymeraseReaderPrivate> d_;
};
} // namespace BAM
diff --git a/include/pbbam/virtual/VirtualRegion.h b/include/pbbam/virtual/VirtualRegion.h
index 69c16df..facce7d 100644
--- a/include/pbbam/virtual/VirtualRegion.h
+++ b/include/pbbam/virtual/VirtualRegion.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegion.h
+/// \brief Defines the VirtualRegion class.
+//
// Author: Armin Töpfer
#ifndef VIRTUALREGION_H
@@ -44,7 +48,9 @@
namespace PacBio {
namespace BAM {
-/// Represents annotation of a polymerase region.
+
+/// \brief The VirtualRegion represents an annotation of a polymerase region.
+///
struct VirtualRegion
{
public:
@@ -54,49 +60,70 @@ public:
LocalContextFlags cxTag = LocalContextFlags::NO_LOCAL_CONTEXT;
int barcodeLeft = -1;
int barcodeRight = -1;
+ int score = 0;
public:
+ /// \brief Creates a virtual region with basic type & position info.
+ ///
VirtualRegion(const VirtualRegionType type,
const int beginPos,
- const int endPos)
- : type(type)
- , beginPos(beginPos)
- , endPos(endPos), cxTag()
- {}
+ const int endPos,
+ const int score = 0);
+
+ /// \brief Creates a virtual region with type/position info, as well as context & barcode.
+ ///
VirtualRegion(const VirtualRegionType type,
const int beginPos,
const int endPos,
const LocalContextFlags cxTag,
const int barcodeLeft,
- const int barcodeRight)
- : type(type)
- , beginPos(beginPos)
- , endPos(endPos)
- , cxTag(cxTag)
- , barcodeLeft(barcodeLeft)
- , barcodeRight(barcodeRight)
- {}
- VirtualRegion() = default;
- // Move constructor
- VirtualRegion(VirtualRegion&&) = default;
- // Copy constructor
+ const int barcodeRight,
+ const int score = 0);
+
+ VirtualRegion(void) = default;
VirtualRegion(const VirtualRegion&) = default;
- // Move assignment operator
+ VirtualRegion(VirtualRegion&&) = default;
+ VirtualRegion& operator=(const VirtualRegion&) = default; // un-"delete"-ed for SWIG
VirtualRegion& operator=(VirtualRegion&&) = default;
- // Copy assignment operator
- VirtualRegion& operator=(const VirtualRegion&) = delete;
- // Destructor
- ~VirtualRegion() = default;
+ ~VirtualRegion(void) = default;
+
+ bool operator==(const VirtualRegion &v1) const;
-public:
- bool operator==(const VirtualRegion &v1) const
- {
- return (v1.type == this->type &&
- v1.beginPos == this->beginPos &&
- v1.endPos == this->endPos);
- }
};
+inline VirtualRegion::VirtualRegion(const VirtualRegionType type,
+ const int beginPos,
+ const int endPos,
+ const int score)
+ : type(type)
+ , beginPos(beginPos)
+ , endPos(endPos), cxTag()
+ , score(score)
+{}
+
+inline VirtualRegion::VirtualRegion(const VirtualRegionType type,
+ const int beginPos,
+ const int endPos,
+ const LocalContextFlags cxTag,
+ const int barcodeLeft,
+ const int barcodeRight,
+ const int score)
+ : type(type)
+ , beginPos(beginPos)
+ , endPos(endPos)
+ , cxTag(cxTag)
+ , barcodeLeft(barcodeLeft)
+ , barcodeRight(barcodeRight)
+ , score(score)
+{}
+
+inline bool VirtualRegion::operator==(const VirtualRegion& v1) const
+{
+ return (v1.type == this->type &&
+ v1.beginPos == this->beginPos &&
+ v1.endPos == this->endPos);
+}
+
} // namespace BAM
} // namespace PacBio
diff --git a/include/pbbam/virtual/VirtualRegionType.h b/include/pbbam/virtual/VirtualRegionType.h
index 6b917bf..d359094 100644
--- a/include/pbbam/virtual/VirtualRegionType.h
+++ b/include/pbbam/virtual/VirtualRegionType.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegionType.h
+/// \brief Defines the VirtualRegionType enum.
+//
// Author: Derek Barnett
#ifndef REGIONTYPE_H
@@ -42,14 +46,17 @@
namespace PacBio {
namespace BAM {
-/// Type of annotated region.
-enum class VirtualRegionType : char
+
+/// \brief This enum defines the types of annotated region.
+///
+enum class VirtualRegionType // : char
{
- ADAPTER = 'A',
- BARCODE = 'B',
- SUBREAD = 'S',
- HQREGION = 'H',
- LQREGION = 'L' // Outside the HQ region
+ ADAPTER = 0x41, ///< Adapter region ('A')
+ BARCODE = 0x42, ///< Barcode region ('B')
+ FILTERED = 0x46, ///< Filtered subread ('F')
+ SUBREAD = 0x53, ///< Subread ('S')
+ HQREGION = 0x48, ///< High-quality region ('H')
+ LQREGION = 0x4C ///< Low-quality region ('L'), i.e. outside the HQ region
};
} // namespace BAM
diff --git a/include/pbbam/virtual/VirtualRegionTypeMap.h b/include/pbbam/virtual/VirtualRegionTypeMap.h
index 105696c..200f12f 100644
--- a/include/pbbam/virtual/VirtualRegionTypeMap.h
+++ b/include/pbbam/virtual/VirtualRegionTypeMap.h
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegionTypeMap.h
+/// \brief Defines the VirtualRegionTypeMap class.
+//
// Author: Derek Barnett
#ifndef VIRTUALREGIONTYPEMAP_H
@@ -45,7 +49,10 @@
namespace PacBio {
namespace BAM {
-/// Allows mapping of char 'A', 'B', 'H', and 'L' to the respective enum keys.
+
+/// \brief The VirtualRegionTypeMap class provides mapping between char codes and
+/// VirtualRegionType enum keys.
+///
class VirtualRegionTypeMap
{
public:
diff --git a/include/pbbam/virtual/ZmwWhitelistVirtualReader.h b/include/pbbam/virtual/ZmwWhitelistVirtualReader.h
new file mode 100644
index 0000000..d40f03c
--- /dev/null
+++ b/include/pbbam/virtual/ZmwWhitelistVirtualReader.h
@@ -0,0 +1,151 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwWhitelistVirtualReader.h
+/// \brief Defines the ZmwWhitelistVirtualReader class.
+//
+// Author: Derek Barnett
+
+#ifndef ZMWWHITELISTVIRTUALREADER_H
+#define ZMWWHITELISTVIRTUALREADER_H
+
+#include <deque>
+#include <memory>
+#include <vector>
+#include <string>
+#include "pbbam/BamFile.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/PbiIndexedBamReader.h"
+#include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ZmwWhitelistVirtualReader class provides an interface for re-stitching
+/// "virtual" polymerase reads from their constituent parts, limiting results
+/// to only those reads originating from a 'whitelist' of ZMW hole numbers.
+///
+/// Whitelisted ZMWs that are not present in both primary and scraps BAMs
+/// will be "pre-removed." This ensures that, given client code like this:
+///
+/// \include code/ZmwWhitelistVirtualReader.txt
+///
+/// each iteration will always provide valid data - either a valid virtual record from
+/// Next() or a non-empty vector from NextRaw().
+///
+/// \note This reader requires that both input %BAM files also have associated PBI
+/// files available for query. See BamFile::EnsurePacBioIndexExists .
+///
+class ZmwWhitelistVirtualReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+ /// and a scraps file, using a ZMW whitelist to filter the input.
+ ///
+ /// \param[in] zmwWhitelist list of ZMWs to restrict iteration over
+ /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
+ /// \param[in] scrapsBamFilePath scraps.bam file path
+ ///
+ /// \note This reader requires that both input %BAM files also have associated PBI
+ /// files available for query. See BamFile::EnsurePacBioIndexExists .
+ ///
+ /// \throws std::runtime_error if any files (*.bam and/or *.pbi) were not available for reading, or
+ /// if malformed data encountered
+ ///
+ ZmwWhitelistVirtualReader(const std::vector<int32_t>& zmwWhitelist,
+ const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath);
+
+ ZmwWhitelistVirtualReader(void) = delete;
+ ZmwWhitelistVirtualReader(const ZmwWhitelistVirtualReader&) = delete;
+ ZmwWhitelistVirtualReader(ZmwWhitelistVirtualReader&&) = delete;
+ ZmwWhitelistVirtualReader& operator=(const ZmwWhitelistVirtualReader&) = delete;
+ ZmwWhitelistVirtualReader& operator=(ZmwWhitelistVirtualReader&&) = delete;
+ ~ZmwWhitelistVirtualReader(void) = default;
+
+ /// \}
+
+public:
+ /// \name Stitched Record Reading
+ /// \{
+
+ /// \returns true if more ZMWs are available for reading.
+ bool HasNext(void) const;
+
+ /// \returns the re-stitched polymerase read from the next ZMW in the whitelist
+ VirtualPolymeraseBamRecord Next(void);
+
+ /// \returns the set of reads that belong to the next ZMW in the whitelist.
+ /// This enables stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+ /// \}
+
+public:
+ /// \name File Headers
+ /// \{
+
+ /// \returns the BamHeader associated with this reader's "primary" %BAM file
+ BamHeader PrimaryHeader(void) const;
+
+ /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+ BamHeader ScrapsHeader(void) const;
+
+ /// \}
+
+private:
+ const std::string primaryBamFilePath_;
+ const std::string scrapsBamFilePath_;
+ std::unique_ptr<BamFile> primaryBamFile_;
+ std::unique_ptr<BamFile> scrapsBamFile_;
+ std::unique_ptr<PbiIndexedBamReader> primaryReader_;
+ std::unique_ptr<PbiIndexedBamReader> scrapsReader_;
+ std::unique_ptr<BamHeader> polyHeader_;
+ std::deque<int32_t> zmwWhitelist_;
+
+private:
+ void PreFilterZmws(const std::vector<int32_t>& zmwWhitelist);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWWHITELISTVIRTUALREADER_H
diff --git a/src/Accuracy.cpp b/src/Accuracy.cpp
index 0c8114a..e335abf 100644
--- a/src/Accuracy.cpp
+++ b/src/Accuracy.cpp
@@ -32,12 +32,16 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Accuracy.cpp
+/// \brief Implements the Accuracy class.
+//
// Author: Derek Barnett
#include "pbbam/Accuracy.h"
using namespace PacBio;
using namespace PacBio::BAM;
-const int Accuracy::MIN = 0;
-const int Accuracy::MAX = 1000;
+const float Accuracy::MIN = 0.0f;
+const float Accuracy::MAX = 1.0f;
diff --git a/src/AlignmentPrinter.cpp b/src/AlignmentPrinter.cpp
index 6692021..5155859 100644
--- a/src/AlignmentPrinter.cpp
+++ b/src/AlignmentPrinter.cpp
@@ -32,24 +32,31 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file AlignmentPrinter.cpp
+/// \brief Implements the AlignmentPrinter class.
+//
// Author: Armin Töpfer
+#include "pbbam/AlignmentPrinter.h"
+
#include <cmath>
#include <iostream>
#include <iomanip>
#include <stdexcept>
#include <sstream>
-#include "pbbam/AlignmentPrinter.h"
-
using namespace PacBio;
using namespace PacBio::BAM;
+AlignmentPrinter::AlignmentPrinter(const IndexedFastaReader& ifr)
+ : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
+{ }
+
std::string AlignmentPrinter::Print(const BamRecord& record,
const Orientation orientation)
{
-
std::string seq = record.Sequence(orientation, true, true);
std::string ref = ifr_->ReferenceSubsequence(record, orientation, true, true);
diff --git a/src/BaiIndexedBamReader.cpp b/src/BaiIndexedBamReader.cpp
new file mode 100644
index 0000000..3f9d538
--- /dev/null
+++ b/src/BaiIndexedBamReader.cpp
@@ -0,0 +1,141 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BaiIndexedBamReader.cpp
+/// \brief Implements the BaiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BaiIndexedBamReader.h"
+#include "MemoryUtils.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct BaiIndexedBamReaderPrivate
+{
+public:
+ BaiIndexedBamReaderPrivate(const BamFile& file,
+ const GenomicInterval& interval)
+ : htsIndex_(nullptr)
+ , htsIterator_(nullptr)
+ {
+ LoadIndex(file.Filename());
+ Interval(file.Header(), interval);
+ }
+
+ void Interval(const BamHeader& header,
+ const GenomicInterval& interval)
+ {
+ htsIterator_.reset(nullptr);
+
+ if (header.HasSequence(interval.Name())) {
+ auto id = header.SequenceId(interval.Name());
+ if (id >= 0 && static_cast<size_t>(id) < header.NumSequences()) {
+ htsIterator_.reset(bam_itr_queryi(htsIndex_.get(),
+ id,
+ interval.Start(),
+ interval.Stop()));
+ }
+ }
+
+ if (!htsIterator_)
+ throw std::runtime_error("could not create iterator for requested region");
+ }
+
+ void LoadIndex(const string& fn)
+ {
+ htsIndex_.reset(bam_index_load(fn.c_str()));
+ if (!htsIndex_)
+ throw std::runtime_error("could not load BAI index data");
+ }
+
+ int ReadRawData(BGZF* bgzf, bam1_t* b)
+ {
+ assert(htsIterator_.get());
+ return hts_itr_next(bgzf, htsIterator_.get(), b, nullptr);
+ }
+
+public:
+ GenomicInterval interval_;
+ std::unique_ptr<hts_idx_t, internal::HtslibIndexDeleter> htsIndex_;
+ std::unique_ptr<hts_itr_t, internal::HtslibIteratorDeleter> htsIterator_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+ const std::string& filename)
+ : BaiIndexedBamReader(interval, BamFile(filename))
+{ }
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+ const BamFile& bamFile)
+ : BamReader(bamFile)
+ , d_(new BaiIndexedBamReaderPrivate(File(), interval))
+{ }
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+ BamFile&& bamFile)
+ : BamReader(std::move(bamFile))
+ , d_(new BaiIndexedBamReaderPrivate(File(), interval))
+{ }
+
+const GenomicInterval& BaiIndexedBamReader::Interval(void) const
+{
+ assert(d_);
+ return d_->interval_;
+}
+
+int BaiIndexedBamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+ assert(d_);
+ return d_->ReadRawData(bgzf, b);
+}
+
+BaiIndexedBamReader& BaiIndexedBamReader::Interval(const GenomicInterval& interval)
+{
+ assert(d_);
+ d_->Interval(Header(), interval);
+ return *this;
+}
diff --git a/src/BamFile.cpp b/src/BamFile.cpp
index f75bc9d..249c3e2 100644
--- a/src/BamFile.cpp
+++ b/src/BamFile.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamFile.cpp
+/// \brief Implements the BamFile class.
+//
// Author: Derek Barnett
#include "pbbam/BamFile.h"
@@ -41,6 +45,8 @@
#include "MemoryUtils.h"
#include <htslib/sam.h>
#include <memory>
+#include <sstream>
+#include <cassert>
#include <sys/stat.h>
using namespace PacBio;
using namespace PacBio::BAM;
@@ -55,25 +61,52 @@ class BamFilePrivate
public:
BamFilePrivate(const string& fn)
: filename_(fn)
+ , firstAlignmentOffset_(-1)
{
- // update verbosity
- hts_verbose = PacBio::BAM::HtslibVerbosity;
+ // ensure we've updated htslib verbosity with requested verbosity here
+ hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);
// attempt open
std::unique_ptr<samFile, internal::HtslibFileDeleter> f(sam_open(filename_.c_str(), "rb"));
- if (!f)
- throw std::runtime_error("could not open file");
+ if (!f || !f->fp.bgzf)
+ throw std::runtime_error(string("could not open BAM file: ") + filename_);
if (f->format.format != bam)
throw std::runtime_error("expected BAM, unknown format");
+#ifndef PBBAM_NO_CHECK_EOF
+ // sanity check on file
+ const int eofCheck = bgzf_check_EOF(f->fp.bgzf);
+ if (eofCheck <= 0 ) {
+ // 1: EOF present & correct
+ // 2: not seekable (e.g. reading from stdin)
+ // 0: EOF absent
+ // -1: some other error
+ stringstream e;
+ if (eofCheck == 0)
+ e << fn << " : is missing EOF block" << endl;
+ else
+ e << fn << " : unknown error while checking EOF block" << endl;
+ throw std::runtime_error(e.str());
+ }
+#endif
+
// attempt fetch header
std::unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> hdr(sam_hdr_read(f.get()));
header_ = internal::BamHeaderMemory::FromRawData(hdr.get());
+
+ // cache first alignment offset
+ firstAlignmentOffset_ = bgzf_tell(f->fp.bgzf);
+ }
+
+ unique_ptr<BamFilePrivate> DeepCopy(void)
+ {
+ return unique_ptr<BamFilePrivate>(new BamFilePrivate(filename_));
}
public:
std::string filename_;
BamHeader header_;
+ int64_t firstAlignmentOffset_;
};
} // namespace internal
@@ -89,7 +122,7 @@ BamFile::BamFile(const std::string& filename)
{ }
BamFile::BamFile(const BamFile& other)
- : d_(other.d_)
+ : d_(other.d_->DeepCopy())
{ }
BamFile::BamFile(BamFile&& other)
@@ -97,30 +130,45 @@ BamFile::BamFile(BamFile&& other)
{ }
BamFile& BamFile::operator=(const BamFile& other)
-{ d_ = other.d_; return *this; }
+{
+ d_ = other.d_->DeepCopy();
+ return *this;
+}
BamFile& BamFile::operator=(BamFile&& other)
{ d_ = std::move(other.d_); return *this; }
BamFile::~BamFile(void) { }
+void BamFile::CreatePacBioIndex(void) const
+{
+ PbiFile::CreateFrom(*this);
+}
+
+void BamFile::CreateStandardIndex(void) const
+{
+ if (bam_index_build(d_->filename_.c_str(), 0) != 0)
+ throw std::runtime_error("could not build BAI index");
+}
+
void BamFile::EnsurePacBioIndexExists(void) const
{
if (!PacBioIndexExists())
- PbiFile::CreateFrom(*this);
+ CreatePacBioIndex();
}
void BamFile::EnsureStandardIndexExists(void) const
{
- if (!StandardIndexExists()) {
- if (bam_index_build(d_->filename_.c_str(), 0) != 0)
- throw std::runtime_error("could not build BAI index");
- }
+ if (!StandardIndexExists())
+ CreateStandardIndex();
}
std::string BamFile::Filename(void) const
{ return d_->filename_; }
+int64_t BamFile::FirstAlignmentOffset(void) const
+{ return d_->firstAlignmentOffset_; }
+
bool BamFile::HasReference(const std::string& name) const
{ return d_->header_.HasSequence(name); }
@@ -131,20 +179,18 @@ bool BamFile::IsPacBioBAM(void) const
{ return !d_->header_.PacBioBamVersion().empty(); }
bool BamFile::PacBioIndexExists(void) const
-{
- const string pbiFn = PacBioIndexFilename();
- if (internal::FileUtils::Exists(pbiFn)) {
- const time_t bamTimestamp = internal::FileUtils::LastModified(Filename());
- const time_t pbiTimestamp = internal::FileUtils::LastModified(pbiFn);
- if (bamTimestamp <= pbiTimestamp)
- return true;
- }
- return false;
-}
+{ return internal::FileUtils::Exists(PacBioIndexFilename()); }
std::string BamFile::PacBioIndexFilename(void) const
{ return d_->filename_ + ".pbi"; }
+bool BamFile::PacBioIndexIsNewer(void) const
+{
+ const auto bamTimestamp = internal::FileUtils::LastModified(Filename());
+ const auto pbiTimestamp = internal::FileUtils::LastModified(PacBioIndexFilename());
+ return bamTimestamp <= pbiTimestamp;
+}
+
int BamFile::ReferenceId(const std::string& name) const
{ return d_->header_.SequenceId(name); }
@@ -158,17 +204,15 @@ std::string BamFile::ReferenceName(const int id) const
{ return d_->header_.SequenceName(id); }
bool BamFile::StandardIndexExists(void) const
-{
- const string bamFn = Filename();
- const string baiFn = StandardIndexFilename();
- if (internal::FileUtils::Exists(baiFn)) {
- const time_t bamTimestamp = internal::FileUtils::LastModified(bamFn);
- const time_t baiTimestamp = internal::FileUtils::LastModified(baiFn);
- if (bamTimestamp <= baiTimestamp)
- return true;
- }
- return false;
-}
+{ return internal::FileUtils::Exists(StandardIndexFilename()); }
std::string BamFile::StandardIndexFilename(void) const
{ return d_->filename_ + ".bai"; }
+
+bool BamFile::StandardIndexIsNewer(void) const
+{
+ const auto bamTimestamp = internal::FileUtils::LastModified(Filename());
+ const auto baiTimestamp = internal::FileUtils::LastModified(StandardIndexFilename());
+ return bamTimestamp <= baiTimestamp;
+}
+
diff --git a/src/BamHeader.cpp b/src/BamHeader.cpp
index 7caa62d..69a9c40 100644
--- a/src/BamHeader.cpp
+++ b/src/BamHeader.cpp
@@ -32,11 +32,15 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamHeader.cpp
+/// \brief Implements the BamHeader class.
+//
// Author: Derek Barnett
#include "pbbam/BamHeader.h"
-#include "SequenceUtils.h"
+#include "StringUtils.h"
#include <htslib/hts.h>
#include <sstream>
#include <set>
@@ -59,33 +63,153 @@ static const string token_VN = string("VN");
static const string token_SO = string("SO");
static const string token_pb = string("pb");
-static const string current_version = string("3.0b7");
-
-class BamHeaderPrivate
+struct PacBioVersion
{
public:
- std::string version_;
- std::string pacbioBamVersion_;
- std::string sortOrder_;
- std::map<std::string, std::string> headerLineCustom_;
-
- std::map<std::string, ReadGroupInfo> readGroups_; // id => read group info
- std::map<std::string, ProgramInfo> programs_; // id => program info
- std::vector<std::string> comments_;
-
- // we need to preserve insertion order, use lookup for access by name
- std::vector<SequenceInfo> sequences_;
- std::map<std::string, int32_t> sequenceIdLookup_;
+ PacBioVersion(int major, int minor, int revision)
+ : major_(major)
+ , minor_(minor)
+ , revision_(revision)
+ { }
+
+ PacBioVersion(const string& v)
+ : major_(0)
+ , minor_(0)
+ , revision_(0)
+ {
+ if (v.empty()) {
+ auto msg = string{ "PacBio BAM version number is missing (@HD pb:<version> tag). See spec for details." };
+ throw std::runtime_error(msg);
+ }
+
+ if (v.find('b') != string::npos) {
+ auto msg = string{ "invalid version number (" + v + "): beta version BAMs are no longer supported" };
+ throw std::runtime_error(msg);
+ }
+
+ try {
+ const auto fields = Split(v, '.');
+ const auto numFields = fields.size();
+ if (numFields > 0) {
+ major_ = stoi(fields.at(0));
+ if (numFields > 1) {
+ minor_ = stoi(fields.at(1));
+ if (numFields > 2 )
+ revision_ = stoi(fields.at(2));
+ }
+ }
+ } catch (std::exception&) {
+ auto msg = string{ "invalid version number (" + v + "): failed to parse" };
+ throw std::runtime_error(msg);
+ }
+ }
+
+public:
+ bool operator==(const PacBioVersion& other) const
+ {
+ return major_ == other.major_ &&
+ minor_ == other.minor_ &&
+ revision_ == other.revision_;
+ }
+
+ bool operator<(const PacBioVersion& other) const
+ {
+ // 2.* < 3.*
+ if (major_ < other.major_)
+ return true;
+
+ // 3. == 3.
+ else if (major_ == other.major_) {
+
+ // 3.1.* < 3.2.*
+ if (minor_ < other.minor_)
+ return true;
+
+ // 3.2. == 3.2.
+ else if (minor_ == other.minor_) {
+
+ // 3.2.1 < 3.2.2
+ if (revision_ < other.revision_)
+ return true;
+ }
+ }
+
+ // otherwise not less-than
+ return false;
+ }
+ bool operator>=(const PacBioVersion& other) const
+ { return !operator<(other); }
+
+public:
+ string ToString(void) const
+ {
+ stringstream s;
+ s << major_ << '.' << minor_ << '.' << revision_;
+ return s.str();
+ }
+
+ string ToMsgString(void) const
+ {
+ stringstream s;
+ s << '(' << ToString() << ')';
+ return s.str();
+ }
+
+private:
+ int major_;
+ int minor_;
+ int revision_;
};
+static const PacBioVersion minimum_version = PacBioVersion(3,0,1);
+static const PacBioVersion current_version = PacBioVersion(3,0,3);
+
+static
+void EnsureCanMerge(const BamHeader& lhs, const BamHeader& rhs)
+{
+ // check compatibility
+ const bool samVersionOk = lhs.Version() == rhs.Version();
+ const bool sortOrderOk = lhs.SortOrder() == rhs.SortOrder();
+ const bool pbVersionOk = lhs.PacBioBamVersion() == rhs.PacBioBamVersion();
+ const bool sequencesOk = ( (lhs.SortOrder() == "coordinate") ? lhs.Sequences() == rhs.Sequences()
+ : true);
+
+ // if all checks out, return
+ if (samVersionOk && sortOrderOk && pbVersionOk && sequencesOk)
+ return;
+
+ // else, format error message & throw
+ stringstream e;
+ e << "could not merge BAM headers:" << endl;
+
+ if (!samVersionOk) {
+ e << " mismatched SAM versions (@HD:VN) : ("
+ << lhs.Version() << ", " << rhs.Version()
+ << ")" << endl;
+ }
+
+ if (!sortOrderOk) {
+ e << " mismatched sort orders (@HD:SO) : ("
+ << lhs.SortOrder() << ", " << rhs.SortOrder()
+ << ")" << endl;
+ }
+
+ if (!pbVersionOk) {
+ e << " mismatched PacBio BAM versions (@HD:pb) : ("
+ << lhs.PacBioBamVersion() << ", " << rhs.PacBioBamVersion()
+ << ")" << endl;
+ }
+
+ if (!sequencesOk)
+ e << " mismatched sequence lists (@SQ entries)" << endl;
+
+ throw std::runtime_error(e.str());
+}
+
} // namespace internal
} // namespace BAM
} // namespace PacBio
-BamHeader::BamHeader(void)
- : d_(new internal::BamHeaderPrivate)
-{ }
-
BamHeader::BamHeader(const string& samHeaderText)
: d_(new internal::BamHeaderPrivate)
{
@@ -134,30 +258,28 @@ BamHeader::BamHeader(const string& samHeaderText)
}
}
-BamHeader::BamHeader(const BamHeader& other)
- : d_(other.d_)
-{ }
-
-BamHeader::BamHeader(BamHeader&& other)
- : d_(std::move(other.d_))
-{ }
-
-BamHeader& BamHeader::operator=(const BamHeader& other)
-{ d_ = other.d_; return *this; }
-
-BamHeader& BamHeader::operator=(BamHeader&& other)
-{ d_ = std::move(other.d_); return *this; }
+BamHeader& BamHeader::operator+=(const BamHeader& other)
+{
+ internal::EnsureCanMerge(*this, other);
-BamHeader::~BamHeader(void) { }
+ // merge read groups
+ for (const auto& rg : other.ReadGroups()) {
+ if (!HasReadGroup(rg.Id()))
+ AddReadGroup(rg);
+ }
-BamHeader& BamHeader::AddComment(const std::string& comment)
-{ d_->comments_.push_back(comment); return *this; }
+ // merge programs
+ for (const auto& pg : other.Programs()) {
+ if (!HasProgram(pg.Id()))
+ AddProgram(pg);
+ }
-BamHeader& BamHeader::AddProgram(const ProgramInfo& pg)
-{ d_->programs_[pg.Id()] = pg; return *this; }
+ // merge comments
+ for (const auto& comment : other.Comments())
+ AddComment(comment);
-BamHeader& BamHeader::AddReadGroup(const ReadGroupInfo& readGroup)
-{ d_->readGroups_[readGroup.Id()] = readGroup; return *this; }
+ return *this;
+}
BamHeader& BamHeader::AddSequence(const SequenceInfo& sequence)
{
@@ -166,15 +288,6 @@ BamHeader& BamHeader::AddSequence(const SequenceInfo& sequence)
return *this;
}
-BamHeader& BamHeader::ClearComments(void)
-{ d_->comments_.clear(); return* this; }
-
-BamHeader& BamHeader::ClearPrograms(void)
-{ d_->programs_.clear(); return *this; }
-
-BamHeader& BamHeader::ClearReadGroups(void)
-{ d_->readGroups_.clear(); return *this; }
-
BamHeader& BamHeader::ClearSequences(void)
{
d_->sequenceIdLookup_.clear();
@@ -182,12 +295,6 @@ BamHeader& BamHeader::ClearSequences(void)
return *this;
}
-std::vector<std::string> BamHeader::Comments(void) const
-{ return d_->comments_; }
-
-BamHeader& BamHeader::Comments(const std::vector<std::string>& comments)
-{ d_->comments_ = comments; return *this; }
-
BamHeader BamHeader::DeepCopy(void) const
{
BamHeader result;
@@ -203,20 +310,21 @@ BamHeader BamHeader::DeepCopy(void) const
return result;
}
-bool BamHeader::HasProgram(const std::string& id) const
-{ return d_->programs_.find(id) != d_->programs_.cend(); }
-
-bool BamHeader::HasReadGroup(const std::string& id) const
-{ return d_->readGroups_.find(id) != d_->readGroups_.cend(); }
-
-bool BamHeader::HasSequence(const std::string& name) const
-{ return d_->sequenceIdLookup_.find(name) != d_->sequenceIdLookup_.cend(); }
-
-std::string BamHeader::PacBioBamVersion(void) const
-{ return d_->pacbioBamVersion_; }
-
BamHeader& BamHeader::PacBioBamVersion(const std::string& version)
-{ d_->pacbioBamVersion_ = version; return *this; }
+{
+ const auto fileVersion = internal::PacBioVersion{ version };
+ if (fileVersion >= internal::minimum_version)
+ d_->pacbioBamVersion_ = version;
+ else {
+ d_->pacbioBamVersion_.clear();
+ auto msg = string{ "invalid PacBio BAM version number" };
+ msg += fileVersion.ToMsgString();
+ msg += string{ "is older than the minimum supported version" };
+ msg += internal::minimum_version.ToMsgString();
+ throw std::runtime_error(msg);
+ }
+ return *this;
+}
ProgramInfo BamHeader::Program(const std::string& id) const
{
@@ -294,14 +402,10 @@ BamHeader& BamHeader::ReadGroups(const vector<ReadGroupInfo>& readGroups)
return *this;
}
-SequenceInfo BamHeader::Sequence(const int32_t id) const
-{
- // throws out of range
- return d_->sequences_.at(id);
-}
-
SequenceInfo BamHeader::Sequence(const std::string& name) const
{
+ // TODO: SequenceId(name) throws if not found, should we do so here as well?
+
const auto iter = d_->sequenceIdLookup_.find(name);
if (iter == d_->sequenceIdLookup_.cend())
return SequenceInfo();
@@ -318,12 +422,6 @@ int32_t BamHeader::SequenceId(const std::string& name) const
return iter->second;
}
-std::string BamHeader::SequenceLength(const int32_t id) const
-{ return Sequence(id).Length(); }
-
-std::string BamHeader::SequenceName(const int32_t id) const
-{ return Sequence(id).Name(); }
-
vector<string> BamHeader::SequenceNames(void) const
{
vector<string> result;
@@ -335,9 +433,6 @@ vector<string> BamHeader::SequenceNames(void) const
return result;
}
-std::vector<SequenceInfo> BamHeader::Sequences(void) const
-{ return d_->sequences_; }
-
BamHeader& BamHeader::Sequences(const vector<SequenceInfo>& sequences)
{
d_->sequences_.clear();
@@ -346,21 +441,16 @@ BamHeader& BamHeader::Sequences(const vector<SequenceInfo>& sequences)
return *this;
}
-std::string BamHeader::SortOrder(void) const
-{ return d_->sortOrder_; }
-
-BamHeader& BamHeader::SortOrder(const std::string& order)
-{ d_->sortOrder_ = order; return *this; }
-
string BamHeader::ToSam(void) const
{
- // clear out stream
+ // init stream
stringstream out("");
// @HD
- const string& outputVersion = (d_->version_.empty() ? string(hts_version()) : d_->version_);
+ const string& outputVersion = (d_->version_.empty() ? string(hts_version()) : d_->version_);
const string& outputSortOrder = (d_->sortOrder_.empty() ? string("unknown") : d_->sortOrder_);
- const string& outputPbBamVersion = (d_->pacbioBamVersion_.empty() ? internal::current_version : d_->pacbioBamVersion_);
+ const string& outputPbBamVersion = (d_->pacbioBamVersion_.empty() ? internal::current_version.ToString()
+ : d_->pacbioBamVersion_);
out << internal::prefix_HD
<< internal::MakeSamTag(internal::token_VN, outputVersion)
@@ -368,10 +458,6 @@ string BamHeader::ToSam(void) const
<< internal::MakeSamTag(internal::token_pb, outputPbBamVersion)
<< endl;
-// if (!d_->pacbioBamVersion_.empty())
-// out << internal::MakeSamTag(internal::token_pb, d_->pacbioBamVersion_);
-// out << endl;
-
// @SQ
for (const SequenceInfo& seq : d_->sequences_)
out << seq.ToSam() << endl;
@@ -382,7 +468,7 @@ string BamHeader::ToSam(void) const
// @PG
for (const auto& progIter : d_->programs_)
- out << progIter.second.ToSam() << endl;
+ out << progIter.second.ToSam() << endl;
// @CO
for (const string& comment : d_->comments_)
@@ -391,10 +477,3 @@ string BamHeader::ToSam(void) const
// return result
return out.str();
}
-
-std::string BamHeader::Version(void) const
-{ return d_->version_; }
-
-BamHeader& BamHeader::Version(const std::string& version)
-{ d_->version_ = version; return *this; }
-
diff --git a/src/BamReader.cpp b/src/BamReader.cpp
new file mode 100644
index 0000000..f6f4cad
--- /dev/null
+++ b/src/BamReader.cpp
@@ -0,0 +1,189 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamReader.cpp
+/// \brief Implements the BamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamReader.h"
+#include "MemoryUtils.h"
+#include <htslib/bgzf.h>
+#include <htslib/hfile.h>
+#include <htslib/hts.h>
+#include <cassert>
+#include <cstdio>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct BamReaderPrivate
+{
+public:
+ BamReaderPrivate(const BamFile& bamFile)
+ : htsFile_(nullptr)
+ , bamFile_(bamFile)
+ {
+ DoOpen();
+ }
+
+ BamReaderPrivate(BamFile&& bamFile)
+ : htsFile_(nullptr)
+ , bamFile_(std::move(bamFile))
+ {
+ DoOpen();
+ }
+
+ void DoOpen(void) {
+
+ // fetch file pointer
+ htsFile_.reset(sam_open(bamFile_.Filename().c_str(), "rb"));
+ if (!htsFile_)
+ throw std::runtime_error("could not open BAM file for reading");
+ }
+
+public:
+ std::unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
+ BamFile bamFile_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BamReader::BamReader(const string& fn)
+ : BamReader(BamFile(fn))
+{ }
+
+BamReader::BamReader(const BamFile& bamFile)
+ : d_(new internal::BamReaderPrivate(bamFile))
+{
+ // skip header
+ VirtualSeek(d_->bamFile_.FirstAlignmentOffset());
+}
+
+BamReader::BamReader(BamFile&& bamFile)
+ : d_(new internal::BamReaderPrivate(std::move(bamFile)))
+{
+ // skip header
+ VirtualSeek(d_->bamFile_.FirstAlignmentOffset());
+}
+
+BamReader::~BamReader(void) { }
+
+BGZF* BamReader::Bgzf(void) const
+{
+ assert(d_);
+ assert(d_->htsFile_);
+ assert(d_->htsFile_->fp.bgzf);
+ return d_->htsFile_->fp.bgzf;
+}
+
+const BamFile& BamReader::File(void) const
+{
+ assert(d_);
+ return d_->bamFile_;
+}
+
+std::string BamReader::Filename(void) const
+{
+ assert(d_);
+ return d_->bamFile_.Filename();
+}
+
+const BamHeader& BamReader::Header(void) const
+{
+ assert(d_);
+ return d_->bamFile_.Header();
+}
+
+bool BamReader::GetNext(BamRecord& record)
+{
+ assert(Bgzf());
+ assert(internal::BamRecordMemory::GetRawData(record).get());
+
+ auto result = ReadRawData(Bgzf(), internal::BamRecordMemory::GetRawData(record).get());
+
+ // success
+ if (result >= 0) {
+ internal::BamRecordMemory::UpdateRecordTags(record);
+ record.header_ = Header();
+ return true;
+ }
+
+ // EOF or end-of-data range (not an error)
+ else if (result == -1)
+ return false;
+
+ // error corrupted file
+ else {
+ auto errorMsg = string{"corrupted BAM file: "};
+ if (result == -2)
+ errorMsg += "probably truncated";
+ else if (result == -3)
+ errorMsg += "could not read BAM record's' core data";
+ else if (result == -4)
+ errorMsg += "could not read BAM record's' variable-length data";
+ else
+ errorMsg += "unknown reason " + to_string(result);
+ errorMsg += string{" ("};
+ errorMsg += Filename();
+ errorMsg += string{")"};
+ throw std::runtime_error{errorMsg};
+ }
+}
+
+int BamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+ return bam_read1(bgzf, b);
+}
+
+void BamReader::VirtualSeek(int64_t virtualOffset)
+{
+ auto result = bgzf_seek(Bgzf(), virtualOffset, SEEK_SET);
+ if (result != 0)
+ throw std::runtime_error("Failed to seek in BAM file");
+}
+
+int64_t BamReader::VirtualTell(void) const
+{
+ return bgzf_tell(Bgzf());
+}
diff --git a/src/BamRecord.cpp b/src/BamRecord.cpp
index 11387ae..d0da4a7 100644
--- a/src/BamRecord.cpp
+++ b/src/BamRecord.cpp
@@ -32,11 +32,16 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamRecord.cpp
+/// \brief Implements the BamRecord & BamRecordView classes.
+//
// Author: Derek Barnett
#include "pbbam/BamRecord.h"
#include "pbbam/virtual/VirtualRegionTypeMap.h"
+#include "pbbam/ZmwTypeMap.h"
#include "AssertUtils.h"
#include "MemoryUtils.h"
#include "SequenceUtils.h"
@@ -54,33 +59,38 @@ namespace BAM {
namespace internal {
// BAM record tag names
-static const string tagName_readAccuracy = "rq";
-static const string tagName_holeNumber = "zm";
-static const string tagName_numPasses = "np";
+static const string tagName_alternative_labelQV = "pv";
+static const string tagName_alternative_labelTag = "pt";
+static const string tagName_barcodes = "bc";
+static const string tagName_barcode_quality = "bq";
static const string tagName_contextFlags = "cx";
-static const string tagName_snr = "sn";
+static const string tagName_holeNumber = "zm";
static const string tagName_deletionQV = "dq";
static const string tagName_deletionTag = "dt";
static const string tagName_insertionQV = "iq";
static const string tagName_ipd = "ip";
+static const string tagName_labelQV = "pq";
static const string tagName_mergeQV = "mq";
-static const string tagName_pulseWidth = "pw";
-static const string tagName_readGroup = "RG";
-static const string tagName_queryStart = "qs";
-static const string tagName_queryEnd = "qe";
-static const string tagName_substitutionQV = "sq";
-static const string tagName_substitutionTag = "st";
+static const string tagName_numPasses = "np";
static const string tagName_pkmean = "pa";
static const string tagName_pkmid = "pm";
+static const string tagName_pkmean2 = "ps";
+static const string tagName_pkmid2 = "pi";
static const string tagName_pre_pulse_frames = "pd";
-static const string tagName_pulse_call_width = "px";
-static const string tagName_labelQV = "pq";
-static const string tagName_alternative_labelQV = "pv";
-static const string tagName_alternative_labelTag = "pt";
static const string tagName_pulse_call = "pc";
-static const string tagName_scrap_type = "sc";
-static const string tagName_barcodes = "bc";
+static const string tagName_pulse_call_width = "px";
static const string tagName_pulseMergeQV = "pg";
+static const string tagName_pulseWidth = "pw";
+static const string tagName_queryStart = "qs";
+static const string tagName_queryEnd = "qe";
+static const string tagName_readAccuracy = "rq";
+static const string tagName_readGroup = "RG";
+static const string tagName_scrap_region_type = "sc";
+static const string tagName_scrap_zmw_type = "sz";
+static const string tagName_snr = "sn";
+static const string tagName_startFrame = "sf";
+static const string tagName_substitutionQV = "sq";
+static const string tagName_substitutionTag = "st";
// faux (helper) tag names
static const string tagName_QUAL = "QUAL";
@@ -97,7 +107,7 @@ static const string recordTypeName_Unknown = "UNKNOWN";
static
int32_t HoleNumberFromName(const string& fullName)
{
- const vector<string> mainTokens = std::move(Split(fullName, '/'));
+ const auto mainTokens = Split(fullName, '/');
if (mainTokens.size() != 3)
throw std::runtime_error("malformed record name");
return stoi(mainTokens.at(1));
@@ -106,10 +116,10 @@ int32_t HoleNumberFromName(const string& fullName)
static
Position QueryEndFromName(const string& fullName)
{
- const vector<string> mainTokens = std::move(Split(fullName, '/'));
+ const auto mainTokens = Split(fullName, '/');
if (mainTokens.size() != 3)
throw std::runtime_error("malformed record name");
- const vector<string> queryTokens = std::move(Split(mainTokens.at(2), '_'));
+ const auto queryTokens = Split(mainTokens.at(2), '_');
if (queryTokens.size() != 2)
throw std::runtime_error("malformed record name");
return stoi(queryTokens.at(1));
@@ -118,10 +128,10 @@ Position QueryEndFromName(const string& fullName)
static
Position QueryStartFromName(const string& fullName)
{
- const vector<string> mainTokens = std::move(Split(fullName, '/'));
+ const auto mainTokens = Split(fullName, '/');
if (mainTokens.size() != 3)
throw std::runtime_error("malformed record name");
- const vector<string> queryTokens = std::move(Split(mainTokens.at(2), '_'));
+ const auto queryTokens = Split(mainTokens.at(2), '_');
if (queryTokens.size() != 2)
throw std::runtime_error("malformed record name");
return stoi(queryTokens.at(0));
@@ -140,54 +150,52 @@ BamRecordImpl* CreateOrEdit(const string& tagName,
}
static
-int32_t AlignedEndOffset(const Cigar& cigar,
- const int seqLength)
+pair<int32_t, int32_t> AlignedOffsets(const BamRecord& record,
+ const int seqLength)
{
+ int32_t startOffset = 0;
int32_t endOffset = seqLength;
- if (!cigar.empty()) {
- Cigar::const_reverse_iterator cigarIter = cigar.crbegin();
- Cigar::const_reverse_iterator cigarEnd = cigar.crend();
- for (; cigarIter != cigarEnd; ++cigarIter) {
- const CigarOperation& op = (*cigarIter);
- if (op.Type() == CigarOperationType::HARD_CLIP) {
- if (endOffset != 0 && endOffset != seqLength)
- return -1;
+ PBBAM_SHARED_PTR<bam1_t> b = internal::BamRecordMemory::GetRawData(record);
+ uint32_t* cigarData = bam_get_cigar(b.get());
+ const size_t numCigarOps = b->core.n_cigar;
+ if (numCigarOps > 0) {
+
+ // start offset
+ for (size_t i = 0; i < numCigarOps; ++i) {
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ if (type == CigarOperationType::HARD_CLIP) {
+ if (startOffset != 0 && startOffset != seqLength) {
+ startOffset = -1;
+ break;
+ }
}
- else if (op.Type() == CigarOperationType::SOFT_CLIP)
- endOffset -= op.Length();
+ else if (type == CigarOperationType::SOFT_CLIP)
+ startOffset += bam_cigar_oplen(cigarData[i]);
else
break;
}
- }
- if (endOffset == 0)
- endOffset = seqLength;
- return endOffset;
-}
-
-static
-int32_t AlignedStartOffset(const Cigar& cigar,
- const int seqLength)
-{
- int32_t startOffset = 0;
-
- if (!cigar.empty()) {
- Cigar::const_iterator cigarIter = cigar.cbegin();
- Cigar::const_iterator cigarEnd = cigar.cend();
- for (; cigarIter != cigarEnd; ++cigarIter) {
- const CigarOperation& op = (*cigarIter);
- if (op.Type() == CigarOperationType::HARD_CLIP) {
- if (startOffset != 0 && startOffset != seqLength)
- return -1;
+ // end offset
+ for (int i = numCigarOps-1; i >= 0; --i) {
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ if (type == CigarOperationType::HARD_CLIP) {
+ if (endOffset != 0 && endOffset != seqLength) {
+ endOffset = -1;
+ break;
+ }
}
- else if (op.Type() == CigarOperationType::SOFT_CLIP)
- startOffset += op.Length();
+ else if (type == CigarOperationType::SOFT_CLIP)
+ endOffset -= bam_cigar_oplen(cigarData[i]);
else
break;
+
}
+
+ if (endOffset == 0)
+ endOffset = seqLength;
}
- return startOffset;
+ return std::make_pair(startOffset, endOffset);
}
template<typename T>
@@ -208,16 +216,16 @@ void MaybeClipAndGapifyBases(const BamRecordImpl& impl,
if (impl.IsMapped() && (aligned || exciseSoftClips)) {
size_t seqIndex = 0;
- const Cigar& cigar = impl.CigarData();
- Cigar::const_iterator cigarIter = cigar.cbegin();
- Cigar::const_iterator cigarEnd = cigar.cend();
+ const auto cigar = impl.CigarData();
+ auto cigarIter = cigar.cbegin();
+ auto cigarEnd = cigar.cend();
for (; cigarIter != cigarEnd; ++cigarIter) {
- const CigarOperation& op = (*cigarIter);
- const CigarOperationType& type = op.Type();
+ const auto op = (*cigarIter);
+ const auto type = op.Type();
// do nothing for hard clips
if (type != CigarOperationType::HARD_CLIP) {
- const size_t opLength = op.Length();
+ const auto opLength = op.Length();
// maybe remove soft clips
if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
@@ -254,18 +262,18 @@ void MaybeClipAndGapifyFrames(const BamRecordImpl& impl,
{
if (impl.IsMapped() && (aligned || exciseSoftClips)) {
- vector<uint16_t> data = std::move(frames.Data()); // we're going to put it back
+ auto data = std::move(frames.Data()); // we're going to put it back
size_t frameIndex = 0;
- const Cigar& cigar = impl.CigarData();
- Cigar::const_iterator cigarIter = cigar.cbegin();
- Cigar::const_iterator cigarEnd = cigar.cend();
+ const auto cigar = impl.CigarData();
+ auto cigarIter = cigar.cbegin();
+ auto cigarEnd = cigar.cend();
for (; cigarIter != cigarEnd; ++cigarIter) {
- const CigarOperation& op = (*cigarIter);
- const CigarOperationType& type = op.Type();
+ const auto op = (*cigarIter);
+ const auto type = op.Type();
// do nothing for hard clips
if (type != CigarOperationType::HARD_CLIP) {
- const size_t opLength = op.Length();
+ const auto opLength = op.Length();
// maybe remove soft clips
if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
@@ -300,17 +308,16 @@ void MaybeClipAndGapifyQualities(const BamRecordImpl& impl,
if (impl.IsMapped() && (aligned || exciseSoftClips)) {
size_t qualIndex = 0;
- const Cigar& cigar = impl.CigarData();
- Cigar::const_iterator cigarIter = cigar.cbegin();
- Cigar::const_iterator cigarEnd = cigar.cend();
+ const auto cigar = impl.CigarData();
+ auto cigarIter = cigar.cbegin();
+ auto cigarEnd = cigar.cend();
for (; cigarIter != cigarEnd; ++cigarIter) {
-
- const CigarOperation& op = (*cigarIter);
- const CigarOperationType& type = op.Type();
+ const auto op = (*cigarIter);
+ const auto type = op.Type();
// do nothing for hard clips
if (type != CigarOperationType::HARD_CLIP) {
- const size_t opLength = op.Length();
+ const auto opLength = op.Length();
// maybe remove soft clips
if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
@@ -401,6 +408,13 @@ RecordType NameToType(const string& name)
return RecordType::UNKNOWN;
}
+static inline
+bool IsClippingOp(const CigarOperation& op)
+{
+ const auto opType = op.Type();
+ return opType == CigarOperationType::SOFT_CLIP ||
+ opType == CigarOperationType::HARD_CLIP;
+}
} // namespace internal
} // namespace BAM
@@ -507,22 +521,49 @@ BamRecord& BamRecord::AltLabelTag(const std::string& tags)
return *this;
}
-std::pair<int,int> BamRecord::Barcodes(void) const
+uint16_t BamRecord::BarcodeForward(void) const
+{ return Barcodes().first; }
+
+uint16_t BamRecord::BarcodeReverse(void) const
+{ return Barcodes().second; }
+
+uint8_t BamRecord::BarcodeQuality(void) const
+{
+ const auto bq = impl_.TagValue(internal::tagName_barcode_quality);
+ if (bq.IsNull())
+ return 0; // ?? "missing" value for tags ?? should we consider boost::optional<T> for these kind of guys ??
+ return bq.ToUInt8();
+}
+
+BamRecord& BamRecord::BarcodeQuality(const uint8_t quality)
+{
+ internal::CreateOrEdit(internal::tagName_barcode_quality, quality, &impl_);
+ return *this;
+}
+
+std::pair<uint16_t,uint16_t> BamRecord::Barcodes(void) const
{
const Tag& bc = impl_.TagValue(internal::tagName_barcodes);
if (bc.IsNull())
- return std::make_pair(-1, -1);
+ throw std::runtime_error("barcode tag (bc) was requested but is missing");
if (!bc.IsUInt16Array())
- throw std::runtime_error("Barcode tag bc is not of type uint16_t array.");
+ throw std::runtime_error("barcode tag (bc) is malformed: should be a uint16_t array of size==2.");
const auto bcArray = bc.ToUInt16Array();
if (bcArray.size() != 2)
- throw std::runtime_error("Barcode array is not of size 2");
+ throw std::runtime_error("barcode tag (bc) is malformed: should be a uint16_t array of size==2.");
return std::make_pair(bcArray[0], bcArray[1]);
}
+BamRecord& BamRecord::Barcodes(const std::pair<uint16_t,uint16_t>& barcodeIds)
+{
+ const auto data = std::vector<uint16_t>{ barcodeIds.first, barcodeIds.second };
+ internal::CreateOrEdit(internal::tagName_barcodes, data, &impl_);
+ return *this;
+}
+
void BamRecord::CalculateAlignedPositions(void) const
{
// reset
@@ -531,16 +572,20 @@ void BamRecord::CalculateAlignedPositions(void) const
// skip if unmapped, or has no queryStart/End
if (!impl_.IsMapped())
return;
- const Position qStart = QueryStart();
- const Position qEnd = QueryEnd();
+
+ // get the query start/end
+ const size_t seqLength = impl_.SequenceLength();
+ const RecordType type = Type();
+ const Position qStart = (type == RecordType::CCS) ? Position(0) : QueryStart();
+ const Position qEnd = (type == RecordType::CCS) ? Position(seqLength) : QueryEnd();
+
if (qStart == PacBio::BAM::UnmappedPosition || qEnd == PacBio::BAM::UnmappedPosition)
return;
// determine clipped end ranges
- const Cigar& cigar = impl_.CigarData();
- const size_t seqLength = impl_.Sequence().size();
- const int32_t startOffset = internal::AlignedStartOffset(cigar, seqLength);
- const int32_t endOffset = internal::AlignedEndOffset(cigar, seqLength);
+ const std::pair<int32_t, int32_t> alignedOffsets = internal::AlignedOffsets(*this, seqLength);
+ const int32_t startOffset = alignedOffsets.first;
+ const int32_t endOffset = alignedOffsets.second;
if (endOffset == -1 || startOffset == -1)
return; // TODO: handle error more??
@@ -555,8 +600,17 @@ void BamRecord::CalculateAlignedPositions(void) const
}
}
-Cigar BamRecord::CigarData(void) const
-{ return impl_.CigarData(); }
+Cigar BamRecord::CigarData(bool exciseAllClips) const
+{
+ auto cigar = impl_.CigarData();
+ if (exciseAllClips) {
+ cigar.erase(std::remove_if(cigar.begin(),
+ cigar.end(),
+ internal::IsClippingOp),
+ cigar.end());
+ }
+ return cigar;
+}
BamRecord& BamRecord::Clip(const ClipType clipType,
const Position start,
@@ -740,9 +794,16 @@ BamRecord& BamRecord::Clip(const ClipType clipType,
string pulseCall = std::move(PulseCall(Orientation::GENOMIC));
std::vector<float> pkmean = std::move(Pkmean(Orientation::GENOMIC));
std::vector<float> pkmid = std::move(Pkmid(Orientation::GENOMIC));
+ std::vector<float> pkmean2 = std::move(Pkmean2(Orientation::GENOMIC));
+ std::vector<float> pkmid2 = std::move(Pkmid2(Orientation::GENOMIC));
Frames prePulseFrames = std::move(PrePulseFrames(Orientation::GENOMIC).Data());
Frames pulseCallWidth = std::move(PulseCallWidth(Orientation::GENOMIC).Data());
+ // TODO: clean this up
+ std::vector<uint32_t> startFrame;
+ if (HasStartFrame())
+ startFrame = std::move(StartFrame(Orientation::GENOMIC));
+
// restore native orientation
if (!isForwardStrand) {
internal::Reverse(altLabelQV);
@@ -760,8 +821,14 @@ BamRecord& BamRecord::Clip(const ClipType clipType,
internal::ReverseComplementCaseSens(pulseCall);
internal::Reverse(pkmean);
internal::Reverse(pkmid);
+ internal::Reverse(pkmean2);
+ internal::Reverse(pkmid2);
internal::Reverse(prePulseFrames);
internal::Reverse(pulseCallWidth);
+
+ if (HasStartFrame())
+ internal::Reverse(startFrame);
+
}
// update BAM tags
@@ -781,8 +848,13 @@ BamRecord& BamRecord::Clip(const ClipType clipType,
tags[internal::tagName_pulse_call] = pulseCall;
tags[internal::tagName_pkmean] = EncodePhotons(pkmean);
tags[internal::tagName_pkmid] = EncodePhotons(pkmid);
+ tags[internal::tagName_pkmean2] = EncodePhotons(pkmean2);
+ tags[internal::tagName_pkmid2] = EncodePhotons(pkmid2);
tags[internal::tagName_pre_pulse_frames] = prePulseFrames.Data();
tags[internal::tagName_pulse_call_width] = pulseCallWidth.Data();
+ if (HasStartFrame())
+ tags[internal::tagName_startFrame] = startFrame;
+
impl_.Tags(tags);
// update query start/end
@@ -1083,6 +1155,9 @@ bool BamRecord::HasAltLabelTag(void) const
bool BamRecord::HasBarcodes(void) const
{ return impl_.HasTag(internal::tagName_barcodes); }
+bool BamRecord::HasBarcodeQuality(void) const
+{ return impl_.HasTag(internal::tagName_barcode_quality); }
+
bool BamRecord::HasLabelQV(void) const
{ return impl_.HasTag(internal::tagName_labelQV); }
@@ -1100,6 +1175,9 @@ bool BamRecord::HasHoleNumber(void) const
bool BamRecord::HasInsertionQV(void) const
{ return impl_.HasTag(internal::tagName_insertionQV); }
+bool BamRecord::HasNumPasses(void) const
+{ return impl_.HasTag(internal::tagName_numPasses); }
+
bool BamRecord::HasPreBaseFrames(void) const
{ return HasIPD(); }
@@ -1121,6 +1199,12 @@ bool BamRecord::HasPkmean(void) const
bool BamRecord::HasPkmid(void) const
{ return impl_.HasTag(internal::tagName_pkmid); }
+bool BamRecord::HasPkmean2(void) const
+{ return impl_.HasTag(internal::tagName_pkmean2); }
+
+bool BamRecord::HasPkmid2(void) const
+{ return impl_.HasTag(internal::tagName_pkmid2); }
+
bool BamRecord::HasPrePulseFrames(void) const
{ return impl_.HasTag(internal::tagName_pre_pulse_frames); }
@@ -1146,11 +1230,19 @@ bool BamRecord::HasReadAccuracy(void) const
&& !impl_.TagValue(internal::tagName_readAccuracy).IsNull();
}
-bool BamRecord::HasScrapType(void) const
-{ return impl_.HasTag(internal::tagName_scrap_type)
- && !impl_.TagValue(internal::tagName_scrap_type).IsNull();
+bool BamRecord::HasScrapRegionType(void) const
+{ return impl_.HasTag(internal::tagName_scrap_region_type)
+ && !impl_.TagValue(internal::tagName_scrap_region_type).IsNull();
+}
+
+bool BamRecord::HasScrapZmwType(void) const
+{ return impl_.HasTag(internal::tagName_scrap_zmw_type)
+ && !impl_.TagValue(internal::tagName_scrap_zmw_type).IsNull();
}
+bool BamRecord::HasStartFrame(void) const
+{ return impl_.HasTag(internal::tagName_startFrame); }
+
bool BamRecord::HasSignalToNoise(void) const
{ return impl_.HasTag(internal::tagName_snr); }
@@ -1223,6 +1315,51 @@ BamRecord& BamRecord::IPD(const Frames& frames,
return *this;
}
+size_t BamRecord::NumDeletedBases(void) const
+{
+ auto tEnd = ReferenceEnd();
+ auto tStart = ReferenceStart();
+ auto numMatchesAndMismatches = NumMatchesAndMismatches();
+ auto nM = numMatchesAndMismatches.first;
+ auto nMM = numMatchesAndMismatches.second;
+ return (tEnd - tStart - nM - nMM);
+}
+
+size_t BamRecord::NumInsertedBases(void) const
+{
+ auto aEnd = AlignedEnd();
+ auto aStart = AlignedStart();
+ auto numMatchesAndMismatches = NumMatchesAndMismatches();
+ auto nM = numMatchesAndMismatches.first;
+ auto nMM = numMatchesAndMismatches.second;
+ return (aEnd - aStart - nM - nMM);
+}
+
+size_t BamRecord::NumMatches(void) const
+{
+ return NumMatchesAndMismatches().first;
+}
+
+pair<size_t, size_t> BamRecord::NumMatchesAndMismatches(void) const
+{
+ pair<size_t, size_t> result = make_pair(0,0);
+ PBBAM_SHARED_PTR<bam1_t> b = internal::BamRecordMemory::GetRawData(this);
+ uint32_t* cigarData = bam_get_cigar(b.get());
+ for (uint32_t i = 0; i < b->core.n_cigar; ++i) {
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ if (type == CigarOperationType::SEQUENCE_MATCH)
+ result.first += bam_cigar_oplen(cigarData[i]);
+ else if (type == CigarOperationType::SEQUENCE_MISMATCH)
+ result.second += bam_cigar_oplen(cigarData[i]);
+ }
+ return result;
+}
+
+size_t BamRecord::NumMismatches(void) const
+{
+ return NumMatchesAndMismatches().second;
+}
+
Frames BamRecord::PreBaseFrames(Orientation orientation,
bool aligned,
bool exciseSoftClips) const
@@ -1438,6 +1575,40 @@ BamRecord& BamRecord::Pkmid(const std::vector<uint16_t>& encodedPhotons)
return *this;
}
+std::vector<float> BamRecord::Pkmean2(Orientation orientation) const
+{
+ return FetchPhotons(internal::tagName_pkmean2, orientation);
+}
+
+BamRecord& BamRecord::Pkmean2(const std::vector<float>& photons)
+{
+ Pkmean2(EncodePhotons(photons));
+ return *this;
+}
+
+BamRecord& BamRecord::Pkmean2(const std::vector<uint16_t>& encodedPhotons)
+{
+ internal::CreateOrEdit(internal::tagName_pkmean2, encodedPhotons, &impl_);
+ return *this;
+}
+
+std::vector<float> BamRecord::Pkmid2(Orientation orientation) const
+{
+ return FetchPhotons(internal::tagName_pkmid2, orientation);
+}
+
+BamRecord& BamRecord::Pkmid2(const std::vector<float>& photons)
+{
+ Pkmid2(EncodePhotons(photons));
+ return *this;
+}
+
+BamRecord& BamRecord::Pkmid2(const std::vector<uint16_t>& encodedPhotons)
+{
+ internal::CreateOrEdit(internal::tagName_pkmid2, encodedPhotons, &impl_);
+ return *this;
+}
+
Frames BamRecord::PrePulseFrames(Orientation orientation) const
{
return FetchFrames(internal::tagName_pre_pulse_frames, orientation);
@@ -1583,13 +1754,13 @@ BamRecord& BamRecord::QueryStart(const Position pos)
Accuracy BamRecord::ReadAccuracy(void) const
{
const Tag& readAccuracy = impl_.TagValue(internal::tagName_readAccuracy);
- return Accuracy(readAccuracy.ToInt32());
+ return Accuracy(readAccuracy.ToFloat());
}
BamRecord& BamRecord::ReadAccuracy(const Accuracy& accuracy)
{
internal::CreateOrEdit(internal::tagName_readAccuracy,
- static_cast<int32_t>(accuracy),
+ static_cast<float>(accuracy),
&impl_);
return *this;
}
@@ -1621,6 +1792,10 @@ BamRecord& BamRecord::ReadGroupId(const std::string& id)
return *this;
}
+int32_t BamRecord::ReadGroupNumericId(void) const
+{ return ReadGroupInfo::IdToInt(ReadGroupId()); }
+
+
Position BamRecord::ReferenceEnd(void) const
{
if (!impl_.IsMapped())
@@ -1657,22 +1832,41 @@ void BamRecord::ResetCachedPositions(void)
alignedStart_ = PacBio::BAM::UnmappedPosition;
}
-VirtualRegionType BamRecord::ScrapType(void) const
+VirtualRegionType BamRecord::ScrapRegionType(void) const
+{
+ const Tag& srTag = impl_.TagValue(internal::tagName_scrap_region_type);
+ return VirtualRegionTypeMap::ParseChar[srTag.ToUInt8()];
+}
+
+BamRecord& BamRecord::ScrapRegionType(const VirtualRegionType type)
{
- const Tag& scTag = impl_.TagValue(internal::tagName_scrap_type);
- return VirtualRegionTypeMap::ParseChar[scTag.ToUInt8()];
+ internal::CreateOrEdit(internal::tagName_scrap_region_type,
+ static_cast<uint8_t>(type), &impl_);
+ return *this;
}
-BamRecord& BamRecord::ScrapType(const VirtualRegionType type)
+BamRecord& BamRecord::ScrapRegionType(const char type)
{
- internal::CreateOrEdit(internal::tagName_scrap_type,
+ internal::CreateOrEdit(internal::tagName_scrap_region_type, type, &impl_);
+ return *this;
+}
+
+ZmwType BamRecord::ScrapZmwType(void) const
+{
+ const Tag& szTag = impl_.TagValue(internal::tagName_scrap_zmw_type);
+ return ZmwTypeMap::ParseChar[szTag.ToUInt8()];
+}
+
+BamRecord& BamRecord::ScrapZmwType(const ZmwType type)
+{
+ internal::CreateOrEdit(internal::tagName_scrap_zmw_type,
static_cast<uint8_t>(type), &impl_);
return *this;
}
-BamRecord& BamRecord::ScrapType(const char type)
+BamRecord& BamRecord::ScrapZmwType(const char type)
{
- internal::CreateOrEdit(internal::tagName_scrap_type, type, &impl_);
+ internal::CreateOrEdit(internal::tagName_scrap_zmw_type, type, &impl_);
return *this;
}
@@ -1698,6 +1892,18 @@ BamRecord& BamRecord::SignalToNoise(const vector<float>& snr)
return *this;
}
+std::vector<uint32_t> BamRecord::StartFrame(Orientation orientation) const
+{
+ const Tag& sfTag = impl_.TagValue(internal::tagName_startFrame);
+ return sfTag.ToUInt32Array();
+}
+
+BamRecord& BamRecord::StartFrame(const std::vector<uint32_t>& startFrame)
+{
+ internal::CreateOrEdit(internal::tagName_startFrame, startFrame, &impl_);
+ return *this;
+}
+
QualityValues BamRecord::SubstitutionQV(Orientation orientation,
bool aligned,
bool exciseSoftClips) const
diff --git a/src/BamRecordImpl.cpp b/src/BamRecordImpl.cpp
index 46632b3..abe7bf0 100644
--- a/src/BamRecordImpl.cpp
+++ b/src/BamRecordImpl.cpp
@@ -56,10 +56,12 @@ BamRecordImpl::BamRecordImpl(void)
BamRecordImpl::BamRecordImpl(const BamRecordImpl& other)
: d_(bam_dup1(other.d_.get()), internal::HtslibRecordDeleter())
+ , tagOffsets_(other.tagOffsets_)
{ }
BamRecordImpl::BamRecordImpl(BamRecordImpl&& other)
: d_(nullptr)
+ , tagOffsets_(std::move(other.tagOffsets_))
{
d_.swap(other.d_);
other.d_.reset();
@@ -71,6 +73,7 @@ BamRecordImpl& BamRecordImpl::operator=(const BamRecordImpl& other)
if (d_ == nullptr)
InitializeData();
bam_copy1(d_.get(), other.d_.get());
+ tagOffsets_ = other.tagOffsets_;
}
return *this;
}
@@ -80,27 +83,45 @@ BamRecordImpl& BamRecordImpl::operator=(BamRecordImpl&& other)
if (this != & other) {
d_.swap(other.d_);
other.d_.reset();
+
+ tagOffsets_ = std::move(other.tagOffsets_);
}
return *this;
}
BamRecordImpl::~BamRecordImpl(void) { }
-bool BamRecordImpl::AddTag(const string& tagName, const Tag &value)
+bool BamRecordImpl::AddTag(const string& tagName,
+ const Tag &value)
+{
+ return AddTag(tagName, value, TagModifier::NONE);
+}
+
+bool BamRecordImpl::AddTag(const string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier)
{
if (tagName.size() != 2 || HasTag(tagName))
return false;
+ const bool added = AddTagImpl(tagName, value, additionalModifier);
+ if (added)
+ UpdateTagMap();
+ return added;
+}
- const vector<uint8_t> rawData = std::move(BamTagCodec::ToRawData(value));
+bool BamRecordImpl::AddTagImpl(const string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier)
+{
+ const vector<uint8_t> rawData = std::move(BamTagCodec::ToRawData(value, additionalModifier));
if (rawData.empty())
return false;
bam_aux_append(d_.get(),
tagName.c_str(),
- BamTagCodec::TagTypeCode(value),
+ BamTagCodec::TagTypeCode(value, additionalModifier),
rawData.size(),
const_cast<uint8_t*>(rawData.data()));
-
return true;
}
@@ -111,7 +132,7 @@ Cigar BamRecordImpl::CigarData(void) const
uint32_t* cigarData = bam_get_cigar(d_);
for (uint32_t i = 0; i < d_->core.n_cigar; ++i) {
const uint32_t length = bam_cigar_oplen(cigarData[i]);
- const char type = bam_cigar_opchr(cigarData[i]);
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
result.push_back(CigarOperation(type, length));
}
@@ -151,9 +172,26 @@ BamRecordImpl& BamRecordImpl::CigarData(const std::string& cigarString)
return CigarData(Cigar::FromStdString(cigarString));
}
-bool BamRecordImpl::EditTag(const string& tagName, const Tag &newValue)
+bool BamRecordImpl::EditTag(const string& tagName,
+ const Tag& newValue)
{
- return RemoveTag(tagName) && AddTag(tagName, newValue);
+ return EditTag(tagName, newValue, TagModifier::NONE);
+}
+
+bool BamRecordImpl::EditTag(const string& tagName,
+ const Tag& newValue,
+ const TagModifier additionalModifier)
+{
+ // try remove old value (with delayed tag map update)
+ const bool removed = RemoveTagImpl(tagName);
+ if (!removed)
+ return false;
+
+ // if old value removed, add new value
+ const bool added = AddTagImpl(tagName, newValue, additionalModifier);
+ if (added)
+ UpdateTagMap();
+ return added;
}
BamRecordImpl BamRecordImpl::FromRawData(const PBBAM_SHARED_PTR<bam1_t>& rawData)
@@ -167,7 +205,10 @@ bool BamRecordImpl::HasTag(const string& tagName) const
{
if (tagName.size() != 2)
return false;
- return bam_aux_get(d_.get(), tagName.c_str()) != 0;
+ return TagOffset(tagName) != -1;
+
+ // 27635
+// return bam_aux_get(d_.get(), tagName.c_str()) != 0;
}
void BamRecordImpl::InitializeData(void)
@@ -242,22 +283,25 @@ QualityValues BamRecordImpl::Qualities(void) const
for (size_t i = 0; i < numQuals; ++i)
result.push_back(QualityValue(qualData[i]));
return result;
-
-// string result;
-// result.reserve(d_->core.l_qseq);
-// for (int i = 0; i < d_->core.l_qseq; ++i)
-// result.push_back(qualData[i] + 33);
-// return result;
}
bool BamRecordImpl::RemoveTag(const string& tagName)
{
+ const bool removed = RemoveTagImpl(tagName);
+ if (removed)
+ UpdateTagMap();
+ return removed;
+}
+
+bool BamRecordImpl::RemoveTagImpl(const string &tagName)
+{
if (tagName.size() != 2)
return false;
uint8_t* data = bam_aux_get(d_.get(), tagName.c_str());
if (data == 0)
return false;
- return bam_aux_del(d_.get(), data) == 0;
+ const bool ok = bam_aux_del(d_.get(), data) == 0;
+ return ok;
}
string BamRecordImpl::Sequence(void) const
@@ -271,6 +315,9 @@ string BamRecordImpl::Sequence(void) const
return result;
}
+size_t BamRecordImpl::SequenceLength(void) const
+{ return d_->core.l_qseq; }
+
BamRecordImpl& BamRecordImpl::SetSequenceAndQualities(const std::string& sequence,
const std::string& qualities)
{
@@ -335,51 +382,11 @@ BamRecordImpl& BamRecordImpl::SetSequenceAndQualitiesInternal(const char* sequen
memset(pEncodedSequence, 0, encodedSequenceLength);
for (size_t i = 0; i < sequenceLength; ++i)
pEncodedSequence[i>>1] |= seq_nt16_table[(int)sequence[i]] << ((~i&1)<<2);
-
-
-
-// const char* pRawSequence = sequence;
-// uint8_t nucleotideCode;
-// bool useHighWord = true;
-// for (size_t i = 0; i < sequenceLength; ++i) {
-// switch (*pRawSequence) {
-// case '=' : nucleotideCode = 0; break;
-// case 'A' : nucleotideCode = 1; break;
-// case 'C' : nucleotideCode = 2; break;
-// case 'M' : nucleotideCode = 3; break;
-// case 'G' : nucleotideCode = 4; break;
-// case 'R' : nucleotideCode = 5; break;
-// case 'S' : nucleotideCode = 6; break;
-// case 'V' : nucleotideCode = 7; break;
-// case 'T' : nucleotideCode = 8; break;
-// case 'W' : nucleotideCode = 9; break;
-// case 'Y' : nucleotideCode = 10; break;
-// case 'H' : nucleotideCode = 11; break;
-// case 'K' : nucleotideCode = 12; break;
-// case 'D' : nucleotideCode = 13; break;
-// case 'B' : nucleotideCode = 14; break;
-// case 'N' : nucleotideCode = 15; break;
-// default :
-// PB_ASSERT_UNREACHABLE; // graceful way to handle?
-// break;
-// }
-
-// // pack the nucleotide code
-// if (useHighWord) {
-// *pEncodedSequence = nucleotideCode << 4;
-// useHighWord = false;
-// } else {
-// *pEncodedSequence |= nucleotideCode;
-// ++pEncodedSequence;
-// useHighWord = true;
-// }
-// ++pRawSequence;
-// }
}
// fill in quality values
uint8_t* encodedQualities = bam_get_qual(d_);
- if ( (qualities == 0 ) || (::strlen(qualities) == 0) )
+ if ( (qualities == 0 ) || (strlen(qualities) == 0) )
memset(encodedQualities, 0xff, sequenceLength);
else {
for (size_t i = 0; i < sequenceLength; ++i)
@@ -388,6 +395,19 @@ BamRecordImpl& BamRecordImpl::SetSequenceAndQualitiesInternal(const char* sequen
return *this;
}
+int BamRecordImpl::TagOffset(const string& tagName) const
+{
+ if (tagName.size() != 2)
+ throw std::runtime_error("invalid tag name size");
+
+ if (tagOffsets_.empty())
+ UpdateTagMap();
+
+ const uint16_t tagCode = (static_cast<uint8_t>(tagName.at(0)) << 8) | static_cast<uint8_t>(tagName.at(1));
+ const auto found = tagOffsets_.find(tagCode);
+ return (found != tagOffsets_.cend() ? found->second : -1);
+}
+
BamRecordImpl& BamRecordImpl::Tags(const TagCollection& tags)
{
// convert tags to binary
@@ -405,6 +425,9 @@ BamRecordImpl& BamRecordImpl::Tags(const TagCollection& tags)
// fill in new tag data
memcpy((void*)tagStart, data, numBytes);
+
+ // update tag info
+ UpdateTagMap();
return *this;
}
@@ -419,8 +442,107 @@ Tag BamRecordImpl::TagValue(const string& tagName) const
{
if (tagName.size() != 2)
return Tag();
- uint8_t* data = bam_aux_get(d_.get(), tagName.c_str());
- if (data == 0)
+
+ const int offset = TagOffset(tagName);
+ if (offset == -1)
return Tag();
- return BamTagCodec::FromRawData(data);
+
+ bam1_t* b = d_.get();
+ assert(bam_get_aux(b));
+ uint8_t* tagData = bam_get_aux(b) + offset;
+ if (offset >= b->l_data)
+ return Tag();
+
+ // skip tag name
+ return BamTagCodec::FromRawData(tagData);
+}
+
+void BamRecordImpl::UpdateTagMap(void) const
+{
+ // clear out offsets, leave map structure basically intact
+ auto tagIter = tagOffsets_.begin();
+ auto tagEnd = tagOffsets_.end();
+ for ( ; tagIter != tagEnd; ++tagIter )
+ tagIter->second = -1;
+
+ const uint8_t* tagStart = bam_get_aux(d_);
+ if (tagStart == 0)
+ return;
+ const ptrdiff_t numBytes = d_->l_data - (tagStart - d_->data);
+
+ // NOTE: using a 16-bit 'code' for tag name here instead of string, to avoid
+ // a lot of string constructions & comparisons. All valid tags will be 2 chars
+ // anyway, so this should be a nice lookup mechanism.
+ //
+ uint16_t tagNameCode;
+ int64_t i = 0;
+ while(i < numBytes) {
+
+ // store (tag name code -> start offset into tag data)
+ tagNameCode = static_cast<char>(tagStart[i]) << 8 | static_cast<char>(tagStart[i+1]);
+ i += 2;
+ tagOffsets_[tagNameCode] = i;
+
+ // skip tag contents
+ const char tagType = static_cast<char>(tagStart[i++]);
+ switch (tagType) {
+ case 'A' :
+ case 'a' :
+ case 'c' :
+ case 'C' :
+ {
+ i += 1;
+ break;
+ }
+ case 's' :
+ case 'S' :
+ {
+ i += 2;
+ break;
+ }
+ case 'i' :
+ case 'I' :
+ case 'f' :
+ {
+ i += 4;
+ break;
+ }
+
+ case 'Z' :
+ case 'H' :
+ {
+ // null-terminated string
+ i += strlen((const char*)&tagStart[i]) + 1;
+ break;
+ }
+
+ case 'B' :
+ {
+ const char subTagType = tagStart[i++];
+ size_t elementSize = 0;
+ switch (subTagType) {
+ case 'c' :
+ case 'C' : elementSize = 1; break;
+ case 's' :
+ case 'S' : elementSize = 2; break;
+ case 'i' :
+ case 'I' :
+ case 'f' : elementSize = 4; break;
+
+ // unknown subTagType
+ default:
+ PB_ASSERT_OR_RETURN(false);
+ }
+
+ uint32_t numElements = 0;
+ memcpy(&numElements, &tagStart[i], sizeof(uint32_t));
+ i += (4 + (elementSize * numElements));
+ break;
+ }
+
+ // unknown tagType
+ default:
+ PB_ASSERT_OR_RETURN(false);
+ }
+ }
}
diff --git a/src/BamTagCodec.cpp b/src/BamTagCodec.cpp
index 522e41d..fca2cbe 100644
--- a/src/BamTagCodec.cpp
+++ b/src/BamTagCodec.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BamTagCodec.cpp
+/// \brief Implements the BamTagCodec class.
+//
// Author: Derek Barnett
#include "pbbam/BamTagCodec.h"
@@ -44,30 +48,17 @@ using namespace PacBio::BAM;
using namespace std;
template<typename T>
-inline void appendBamValue(const T& value, kstring_t* str /*vector<uint8_t>& result*/)
+inline void appendBamValue(const T& value, kstring_t* str)
{
kputsn_((char*)&value, sizeof(value), str);
-// const size_t initialResultSize = result.size();
-// result.resize(initialResultSize + sizeof(T));
-// memcpy((uint8_t*)&result[initialResultSize],
-// (uint8_t*)&value,
-// sizeof(T));
}
template<typename T>
-inline void appendBamMultiValue(const vector<T>& container, kstring_t* str /*vector<uint8_t>& result*/)
+inline void appendBamMultiValue(const vector<T>& container, kstring_t* str)
{
const uint32_t n = container.size();
kputsn_(&n, sizeof(n), str);
kputsn_((char*)&container[0], n*sizeof(T), str);
-
-// const size_t initialResultSize = result.size();
-// const uint32_t numValues = container.size();
-// result.resize(initialResultSize + 4 + numValues*sizeof(T));
-// memcpy((uint8_t*)&result[initialResultSize], (uint32_t*)&numValues, sizeof(numValues));
-// memcpy((uint8_t*)&result[initialResultSize + 4],
-// (uint8_t*)&container[0],
-// numValues*sizeof(T));
}
template<typename T>
@@ -147,7 +138,6 @@ TagCollection BamTagCodec::Decode(const vector<uint8_t>& data)
{
const char subTagType = pData[i++];
switch (subTagType) {
-
case 'c' : tags[tagName] = readBamMultiValue<int8_t>(pData, i); break;
case 'C' : tags[tagName] = readBamMultiValue<uint8_t>(pData, i); break;
case 's' : tags[tagName] = readBamMultiValue<int16_t>(pData, i); break;
@@ -176,8 +166,6 @@ vector<uint8_t> BamTagCodec::Encode(const TagCollection& tags)
{
kstring_t str = { 0, 0, NULL };
- vector<uint8_t> result;
-
const auto tagEnd = tags.cend();
for (auto tagIter = tags.cbegin(); tagIter != tagEnd; ++tagIter) {
const string& name = (*tagIter).first;
@@ -305,12 +293,14 @@ vector<uint8_t> BamTagCodec::Encode(const TagCollection& tags)
break;
}
+ // unsupported tag type
default :
free(str.s);
PB_ASSERT_OR_RETURN_VALUE(false, vector<uint8_t>());
}
}
+ vector<uint8_t> result;
result.resize(str.l);
memcpy((char*)&result[0], str.s, str.l);
free(str.s);
@@ -344,7 +334,7 @@ Tag BamTagCodec::FromRawData(uint8_t* rawData)
const size_t dataLength = strlen((const char*)&rawData[0]);
string value;
value.resize(dataLength);
- memcpy( (char*)value.data(), &rawData[0], dataLength );
+ memcpy((char*)value.data(), &rawData[0], dataLength);
Tag t(value);
if (tagType == 'H')
t.Modifier(TagModifier::HEX_STRING);
@@ -375,34 +365,36 @@ Tag BamTagCodec::FromRawData(uint8_t* rawData)
default:
PB_ASSERT_OR_RETURN_VALUE(false, Tag());
}
+ return Tag(); // to avoid compiler warning
}
-vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag)
+vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag,
+ const TagModifier& additionalModifier)
{
+ // temp raw data destination (for use with htslib methods)
kstring_t str = { 0, 0, NULL };
// "<TYPE>:<DATA>" for printable, ASCII char
- if (tag.HasModifier(TagModifier::ASCII_CHAR)) {
- char c = tag.ToAscii();
- if (c != '\0') {
+ if (tag.HasModifier(TagModifier::ASCII_CHAR) || additionalModifier == TagModifier::ASCII_CHAR) {
+ const char c = tag.ToAscii();
+ if (c != '\0')
kputc_(c, &str);
- }
}
// for all others
else {
- switch ( tag.Type() ) {
+ switch (tag.Type()) {
// single, numeric values
- case TagDataType::INT8 : appendBamValue(tag.ToInt8(), &str); break;
- case TagDataType::UINT8 : appendBamValue(tag.ToUInt8(), &str); break;
- case TagDataType::INT16 : appendBamValue(tag.ToInt16(), &str); break;
+ case TagDataType::INT8 : appendBamValue(tag.ToInt8(), &str); break;
+ case TagDataType::UINT8 : appendBamValue(tag.ToUInt8(), &str); break;
+ case TagDataType::INT16 : appendBamValue(tag.ToInt16(), &str); break;
case TagDataType::UINT16 : appendBamValue(tag.ToUInt16(), &str); break;
- case TagDataType::INT32 : appendBamValue(tag.ToInt32(), &str); break;
+ case TagDataType::INT32 : appendBamValue(tag.ToInt32(), &str); break;
case TagDataType::UINT32 : appendBamValue(tag.ToUInt32(), &str); break;
- case TagDataType::FLOAT : appendBamValue(tag.ToFloat(), &str); break;
+ case TagDataType::FLOAT : appendBamValue(tag.ToFloat(), &str); break;
- // string (& hex-string) values
+ // string & hex-string values
case TagDataType::STRING :
{
const string& s = tag.ToString();
@@ -454,12 +446,14 @@ vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag)
break;
}
+ // unsupported tag type
default :
free(str.s);
PB_ASSERT_OR_RETURN_VALUE(false, vector<uint8_t>());
}
}
+ // store temp contents in actual destination
vector<uint8_t> result;
result.resize(str.l);
memcpy((char*)&result[0], str.s, str.l);
@@ -467,11 +461,12 @@ vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag)
return result;
}
-uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
+uint8_t BamTagCodec::TagTypeCode(const Tag& tag,
+ const TagModifier& additionalModifier)
{
- if ( tag.HasModifier(TagModifier::ASCII_CHAR) ) {
+ if (tag.HasModifier(TagModifier::ASCII_CHAR) || additionalModifier == TagModifier::ASCII_CHAR) {
int64_t value = 0;
- switch ( tag.Type() ) {
+ switch (tag.Type()) {
case TagDataType::INT8 : value = static_cast<int64_t>(tag.ToInt8()); break;
case TagDataType::UINT8 : value = static_cast<int64_t>(tag.ToUInt8()); break;
case TagDataType::INT16 : value = static_cast<int64_t>(tag.ToInt16()); break;
@@ -479,7 +474,7 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
case TagDataType::INT32 : value = static_cast<int64_t>(tag.ToInt32()); break;
case TagDataType::UINT32 : value = static_cast<int64_t>(tag.ToUInt32()); break;
default:
- // non integers not
+ // non integers not allowed
PB_ASSERT_OR_RETURN_VALUE(false, 0);
}
// printable range
@@ -488,7 +483,7 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
return static_cast<uint8_t>('A');
}
- switch ( tag.Type() ) {
+ switch (tag.Type()) {
case TagDataType::INT8 : return static_cast<uint8_t>('c');
case TagDataType::UINT8 : return static_cast<uint8_t>('C');
case TagDataType::INT16 : return static_cast<uint8_t>('s');
@@ -499,8 +494,10 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
case TagDataType::STRING :
{
- return tag.HasModifier(TagModifier::HEX_STRING) ? static_cast<uint8_t>('H')
- : static_cast<uint8_t>('Z');
+ if (tag.HasModifier(TagModifier::HEX_STRING) || additionalModifier == TagModifier::HEX_STRING)
+ return static_cast<uint8_t>('H');
+ else
+ return static_cast<uint8_t>('Z');
}
case TagDataType::INT8_ARRAY : // fall through
@@ -514,4 +511,5 @@ uint8_t BamTagCodec::TagTypeCode(const Tag &tag)
default:
PB_ASSERT_OR_RETURN_VALUE(false, 0);
}
+ return 0; // to avoid compiler warning
}
diff --git a/src/BamWriter.cpp b/src/BamWriter.cpp
index 0abd6e6..1c12acb 100644
--- a/src/BamWriter.cpp
+++ b/src/BamWriter.cpp
@@ -42,8 +42,8 @@
#include <htslib/bgzf.h>
#include <htslib/hfile.h>
#include <htslib/hts.h>
-#include <thread>
#include <iostream>
+#include <thread>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -55,35 +55,33 @@ namespace internal {
class BamWriterPrivate
{
public:
- BamWriterPrivate(void)
- : file_(nullptr)
- , header_(nullptr)
- { }
+ BamWriterPrivate(const std::string& filename,
+ const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
+ const BamWriter::CompressionLevel compressionLevel,
+ const size_t numThreads,
+ const BamWriter::BinCalculationMode binCalculationMode);
public:
- void Open(const std::string& filename,
- const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
- const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
- size_t numThreads = 4);
void Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord);
void Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord, int64_t* vOffset);
public:
+ bool calculateBins_;
std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
PBBAM_SHARED_PTR<bam_hdr_t> header_;
std::string filename_;
};
-void BamWriterPrivate::Open(const string& filename,
- const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
- const BamWriter::CompressionLevel compressionLevel,
- size_t numThreads)
+BamWriterPrivate::BamWriterPrivate(const string& filename,
+ const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
+ const BamWriter::CompressionLevel compressionLevel,
+ const size_t numThreads,
+ const BamWriter::BinCalculationMode binCalculationMode)
+ : calculateBins_(binCalculationMode == BamWriter::BinCalculation_ON)
+ , file_(nullptr)
+ , header_(rawHeader)
+ , filename_(filename)
{
- // store filename
- filename_ = filename;
-
- // store header
- header_ = rawHeader;
if (!header_)
throw std::runtime_error("null header");
@@ -93,18 +91,22 @@ void BamWriterPrivate::Open(const string& filename,
if (!file_)
throw std::runtime_error("could not open file for writing");
+// BGZF* bgzf = file_.get()->fp.bgzf;
+// bgzf_index_build_init(bgzf);
+
// if no explicit thread count given, attempt built-in check
- if (numThreads == 0) {
- numThreads = thread::hardware_concurrency();
+ size_t actualNumThreads = numThreads;
+ if (actualNumThreads == 0) {
+ actualNumThreads = thread::hardware_concurrency();
// if still unknown, default to single-threaded
- if (numThreads == 0)
- numThreads = 1;
+ if (actualNumThreads == 0)
+ actualNumThreads = 1;
}
// if multithreading requested, enable it
- if (numThreads > 1)
- hts_set_threads(file_.get(), numThreads);
+ if (actualNumThreads > 1)
+ hts_set_threads(file_.get(), actualNumThreads);
// write header
const int ret = sam_hdr_write(file_.get(), header_.get());
@@ -114,6 +116,11 @@ void BamWriterPrivate::Open(const string& filename,
void BamWriterPrivate::Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord)
{
+ // (probably) store bins
+ if (calculateBins_)
+ rawRecord->core.bin = hts_reg2bin(rawRecord->core.pos, bam_endpos(rawRecord.get()), 14, 5); // min_shift=14 & n_lvls=5 are BAM "magic numbers"
+
+ // write record to file
const int ret = sam_write1(file_.get(), header_.get(), rawRecord.get());
if (ret <= 0)
throw std::runtime_error("could not write record");
@@ -125,10 +132,15 @@ void BamWriterPrivate::Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord, int64_t*
assert(bgzf);
assert(vOffset);
+ // ensure offsets up-to-date
+ bgzf_flush(bgzf);
+
+ // capture virtual offset where we’re about to write
const off_t rawTell = htell(bgzf->fp);
const int length = bgzf->block_offset;
-
*vOffset = (rawTell << 16) | length ;
+
+ // now write data
Write(rawRecord);
}
@@ -139,14 +151,16 @@ void BamWriterPrivate::Write(const PBBAM_SHARED_PTR<bam1_t>& rawRecord, int64_t*
BamWriter::BamWriter(const std::string& filename,
const BamHeader& header,
const BamWriter::CompressionLevel compressionLevel,
- const size_t numThreads)
- : d_(new internal::BamWriterPrivate)
-{
- d_->Open(filename,
- internal::BamHeaderMemory::MakeRawHeader(header),
- compressionLevel,
- numThreads);
-}
+ const size_t numThreads,
+ const BinCalculationMode binCalculationMode)
+ : d_{ new internal::BamWriterPrivate{ filename,
+ internal::BamHeaderMemory::MakeRawHeader(header),
+ compressionLevel,
+ numThreads,
+ binCalculationMode
+ }
+ }
+{ }
BamWriter::~BamWriter(void)
{
diff --git a/tests/src/test_TimeUtils.cpp b/src/BarcodeQuery.cpp
similarity index 72%
copy from tests/src/test_TimeUtils.cpp
copy to src/BarcodeQuery.cpp
index 7ab9fa5..be45ddb 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/src/BarcodeQuery.cpp
@@ -32,28 +32,37 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file BarcodeQuery.cpp
+/// \brief Implements the BarcodeQuery class.
+//
// Author: Derek Barnett
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include "pbbam/BarcodeQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-TEST(TimeUtilsTest, ToIso8601)
+struct BarcodeQuery::BarcodeQueryPrivate
{
- const time_t rawTime = 436428750L;
- const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+ BarcodeQueryPrivate(const uint16_t barcode, const DataSet& dataset)
+ : reader_(PbiBarcodeFilter(barcode), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+BarcodeQuery::BarcodeQuery(const uint16_t barcode,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new BarcodeQueryPrivate(barcode, dataset))
+{ }
+
+BarcodeQuery::~BarcodeQuery(void) { }
- // can't hardcode expected (since we rely on localtime())
- const std::string& expected = "1983-10-31T06:12:30Z";
- const std::string& actual = internal::ToIso8601(timestamp);
- EXPECT_EQ(expected, actual);
-}
+bool BarcodeQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/include/pbbam/internal/IBamFileIterator.h b/src/ChemistryTable.cpp
similarity index 67%
rename from include/pbbam/internal/IBamFileIterator.h
rename to src/ChemistryTable.cpp
index 5ea34f3..622524d 100644
--- a/include/pbbam/internal/IBamFileIterator.h
+++ b/src/ChemistryTable.cpp
@@ -33,47 +33,33 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-// Author: Derek Barnett
+// Author: Lance Hepler
-#ifndef IBAMFILEITERATOR_H
-#define IBAMFILEITERATOR_H
-
-#include "pbbam/BamFile.h"
-#include "pbbam/BamRecord.h"
-#include <memory>
-#include <vector>
+#include "ChemistryTable.h"
namespace PacBio {
namespace BAM {
namespace internal {
-template<typename T>
-class IBamFileIteratorBase
-{
-public:
- typedef std::shared_ptr< IBamFileIteratorBase<T> > Ptr;
-
-protected:
- IBamFileIteratorBase(const BamFile& file)
- : header_(file.Header().DeepCopy())
- { }
-public:
- virtual ~IBamFileIteratorBase(void) { }
-
-public:
- virtual bool GetNext(T& result) =0;
- virtual bool InSameGroup(const BamRecord& lhs, const BamRecord& rhs) const
- { (void)lhs; (void)rhs; return true; }
+extern const std::vector<std::array<std::string, 4>> ChemistryTable = {
-protected:
- const BamHeader header_;
+ // binding, sequencing, version, chemistry
+ {{"100356300", "100356200", "2.1", "P6-C4"}},
+ {{"100356300", "100356200", "2.3", "P6-C4"}},
+ {{"100356300", "100612400", "2.1", "P6-C4"}},
+ {{"100356300", "100612400", "2.3", "P6-C4"}},
+ {{"100372700", "100356200", "2.1", "P6-C4"}},
+ {{"100372700", "100356200", "2.3", "P6-C4"}},
+ {{"100372700", "100612400", "2.1", "P6-C4"}},
+ {{"100372700", "100612400", "2.3", "P6-C4"}},
+ {{"100-619-300", "100-619-400", "3.0", "S/P1-C1"}},
+ {{"100-619-300", "100-711-600", "3.0", "S/P1-C1"}},
+ {{"100-619-300", "100-620-000", "3.0", "S/P1-C1"}},
+ {{"100-619-300", "100-619-400", "3.1", "S/P1-C1"}},
+ {{"100-619-300", "100-711-600", "3.1", "S/P1-C1"}},
+ {{"100-619-300", "100-620-000", "3.1", "S/P1-C1"}}
};
-typedef IBamFileIteratorBase<BamRecord> IBamFileIterator;
-typedef IBamFileIteratorBase<std::vector<BamRecord> > IBamFileGroupIterator;
-
} // namespace internal
} // namespace BAM
} // namespace PacBio
-
-#endif // IBAMFILEITERATOR_H
diff --git a/include/pbbam/Orientation.h b/src/ChemistryTable.h
similarity index 87%
copy from include/pbbam/Orientation.h
copy to src/ChemistryTable.h
index 7582199..6caacaa 100644
--- a/include/pbbam/Orientation.h
+++ b/src/ChemistryTable.h
@@ -33,23 +33,23 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-// Author: Derek Barnett
+// Author: Lance Hepler
-#ifndef ORIENTATION_H
-#define ORIENTATION_H
+#ifndef CHEMISTRYTABLE_H
+#define CHEMISTRYTABLE_H
-#include "pbbam/Config.h"
+#include <array>
+#include <string>
+#include <vector>
namespace PacBio {
namespace BAM {
+namespace internal {
-enum class Orientation
-{
- NATIVE
- , GENOMIC
-};
+extern const std::vector<std::array<std::string, 4>> ChemistryTable;
+} // namespace internal
} // namespace BAM
} // namespace PacBio
-#endif // ORIENTATION_H
+#endif // CHEMISTRYTABLE_H
diff --git a/src/Cigar.cpp b/src/Cigar.cpp
index 8c7efa7..f099f54 100644
--- a/src/Cigar.cpp
+++ b/src/Cigar.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Cigar.cpp
+/// \brief Implements the Cigar class.
+//
// Author: Derek Barnett
#include "pbbam/Cigar.h"
@@ -48,7 +52,7 @@ Cigar::Cigar(const string& cigarString)
const size_t numChars = cigarString.size();
for (size_t i = 0; i < numChars; ++i) {
const char c = cigarString.at(i);
- if (!::isdigit(c)) {
+ if (!isdigit(c)) {
const size_t distance = i - numberStart;
const uint32_t length = stoul(cigarString.substr(numberStart, distance));
push_back(CigarOperation(c, length));
diff --git a/src/CigarOperation.cpp b/src/CigarOperation.cpp
index 50dc4c2..7289983 100644
--- a/src/CigarOperation.cpp
+++ b/src/CigarOperation.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file CigarOperation.cpp
+/// \brief Implements the CigarOperation class.
+//
// Author: Derek Barnett
#include "pbbam/CigarOperation.h"
diff --git a/src/Compare.cpp b/src/Compare.cpp
new file mode 100644
index 0000000..43874f2
--- /dev/null
+++ b/src/Compare.cpp
@@ -0,0 +1,141 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.cpp
+/// \brief Implements the Compare class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Compare.h"
+#include <functional>
+#include <unordered_map>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct TypeAlias
+{
+ string name_;
+ string op_;
+ string opAlpha_;
+
+ TypeAlias(const string& name = string(),
+ const string& op = string(),
+ const string& opAlpha = string())
+ : name_(name)
+ , op_(op)
+ , opAlpha_(opAlpha)
+ { }
+};
+
+struct CompareTypeHash
+{
+ size_t operator()(const Compare::Type& t) const
+ { return std::hash<int>()(static_cast<int>(t)); }
+};
+
+static const unordered_map<string, Compare::Type> opToTypeMap =
+{
+ // basic operators plus some permissiveness for other representations
+
+ { "==", Compare::EQUAL },
+ { "=", Compare::EQUAL },
+ { "eq", Compare::EQUAL },
+ { "!=", Compare::NOT_EQUAL },
+ { "ne", Compare::NOT_EQUAL },
+ { "<", Compare::LESS_THAN },
+ { "lt", Compare::LESS_THAN },
+ { "<", Compare::LESS_THAN },
+ { "<=", Compare::LESS_THAN_EQUAL },
+ { "lte", Compare::LESS_THAN_EQUAL },
+ { "<=", Compare::LESS_THAN_EQUAL },
+ { ">", Compare::GREATER_THAN },
+ { "gt", Compare::GREATER_THAN },
+ { ">", Compare::GREATER_THAN },
+ { ">=", Compare::GREATER_THAN_EQUAL },
+ { "gte", Compare::GREATER_THAN_EQUAL },
+ { ">=", Compare::GREATER_THAN_EQUAL },
+ { "&", Compare::CONTAINS },
+ { "~", Compare::NOT_CONTAINS }
+};
+
+static const unordered_map<Compare::Type, TypeAlias, CompareTypeHash> typeAliases =
+{
+ { Compare::EQUAL, TypeAlias{ "Compare::EQUAL", "==", "eq" } },
+ { Compare::NOT_EQUAL, TypeAlias{ "Compare::NOT_EQUAL", "!=", "ne" } },
+ { Compare::LESS_THAN, TypeAlias{ "Compare::LESS_THAN", "<", "lt" } },
+ { Compare::LESS_THAN_EQUAL, TypeAlias{ "Compare::LESS_THAN_EQUAL", "<=", "lte" } },
+ { Compare::GREATER_THAN, TypeAlias{ "Compare::GREATER_THAN", ">", "gt" } },
+ { Compare::GREATER_THAN_EQUAL, TypeAlias{ "Compare::GREATER_THAN_EQUAL", ">=", "gte" } },
+ { Compare::CONTAINS, TypeAlias{ "Compare::CONTAINS", "&", "and" } },
+ { Compare::NOT_CONTAINS, TypeAlias{ "Compare::NOT_CONTAINS", "~", "not" } }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+Compare::Type Compare::TypeFromOperator(const string& opString)
+{
+ try {
+ return internal::opToTypeMap.at(opString);
+ } catch (std::exception&) {
+ throw std::runtime_error(opString + " is not a valid comparison operator." );
+ }
+}
+
+string Compare::TypeToName(const Compare::Type& type)
+{
+ try {
+ return internal::typeAliases.at(type).name_;
+ } catch (std::exception&) {
+ throw std::runtime_error("invalid comparison type encountered" );
+ }
+}
+
+string Compare::TypeToOperator(const Compare::Type& type, bool asAlpha)
+{
+ try {
+ return asAlpha ? internal::typeAliases.at(type).opAlpha_
+ : internal::typeAliases.at(type).op_;
+ } catch (std::exception&) {
+ throw std::runtime_error("invalid comparison type encountered" );
+ }
+}
diff --git a/src/Config.cpp b/src/Config.cpp
index 677ad08..095aa37 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Config.cpp
+/// \brief Initializes global variable defaults.
+//
// Author: Derek Barnett
#include "pbbam/Config.h"
@@ -42,7 +46,15 @@ using namespace PacBio::BAM;
namespace PacBio {
namespace BAM {
-int HtslibVerbosity = 0;
+// Initialized to -1 to indicate default. Client code may set this or not.
+//
+// To respect client code or else fallback to default[OFF], this value should be used like this:
+//
+// hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);
+//
+//
+//
+int HtslibVerbosity = -1;
} // namespace BAM
} // namespace PacBio
diff --git a/src/DataSet.cpp b/src/DataSet.cpp
index ee43e9f..a44780b 100644
--- a/src/DataSet.cpp
+++ b/src/DataSet.cpp
@@ -32,14 +32,20 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSet.cpp
+/// \brief Implements the DataSet class.
+//
// Author: Derek Barnett
#include "pbbam/DataSet.h"
#include "pbbam/DataSetTypes.h"
#include "pbbam/internal/DataSetBaseTypes.h"
#include "DataSetIO.h"
+#include "FileUtils.h"
#include "TimeUtils.h"
+#include <boost/algorithm/string.hpp>
#include <unordered_map>
using namespace PacBio;
using namespace PacBio::BAM;
@@ -47,13 +53,12 @@ using namespace PacBio::BAM::internal;
using namespace std;
DataSet::DataSet(void)
- : d_(new DataSetBase)
-{
- CreatedAt(internal::ToIso8601(internal::CurrentTime()));
-}
+ : DataSet(DataSet::GENERIC)
+{ }
DataSet::DataSet(const DataSet::TypeEnum type)
: d_(nullptr)
+ , path_(FileUtils::CurrentWorkingDirectory())
{
switch(type) {
case DataSet::GENERIC : d_.reset(new DataSetBase); break;
@@ -69,20 +74,39 @@ DataSet::DataSet(const DataSet::TypeEnum type)
throw std::runtime_error("unsupported dataset type"); // unknown type
}
- CreatedAt(internal::ToIso8601(internal::CurrentTime()));
+ CreatedAt(internal::ToIso8601(CurrentTime()));
}
DataSet::DataSet(const BamFile& bamFile)
- : d_(internal::DataSetIO::FromUri(bamFile.Filename()))
+ : d_(DataSetIO::FromUri(bamFile.Filename()))
+ , path_(FileUtils::CurrentWorkingDirectory())
{
- CreatedAt(internal::ToIso8601(internal::CurrentTime()));
+ CreatedAt(internal::ToIso8601(CurrentTime()));
}
DataSet::DataSet(const string& filename)
- : d_(internal::DataSetIO::FromUri(filename))
+ : d_(DataSetIO::FromUri(filename))
+ , path_(FileUtils::DirectoryName(filename))
+{
+ // for FOFN contents and raw BAM filenames, we can just use the current
+ // directory as the starting path.
+ //
+ // (any relative paths in the FOFN have already been resolved)
+ //
+ if (boost::algorithm::iends_with(filename, ".fofn") ||
+ boost::algorithm::iends_with(filename, ".bam"))
+ {
+ path_ = FileUtils::CurrentWorkingDirectory();
+ }
+}
+
+DataSet::DataSet(const vector<string>& filenames)
+ : d_(DataSetIO::FromUris(filenames))
+ , path_(FileUtils::CurrentWorkingDirectory())
{ }
DataSet::DataSet(const DataSet& other)
+ : path_(other.path_)
{
DataSetBase* otherDataset = other.d_.get();
DataSetElement* copyDataset = new DataSetElement(*otherDataset);
@@ -91,6 +115,7 @@ DataSet::DataSet(const DataSet& other)
DataSet::DataSet(DataSet&& other)
: d_(std::move(other.d_))
+ , path_(std::move(other.path_))
{
assert(other.d_.get() == nullptr);
}
@@ -100,12 +125,14 @@ DataSet& DataSet::operator=(const DataSet& other)
DataSetBase* otherDataset = other.d_.get();
DataSetElement* copyDataset = new DataSetElement(*otherDataset);
d_.reset(static_cast<DataSetBase*>(copyDataset));
+ path_ = other.path_;
return *this;
}
DataSet& DataSet::operator=(DataSet&& other)
{
d_ = std::move(other.d_);
+ path_ = std::move(other.path_);
return *this;
}
@@ -117,6 +144,29 @@ DataSet& DataSet::operator+=(const DataSet& other)
return *this;
}
+vector<BamFile> DataSet::BamFiles(void) const
+{
+ const PacBio::BAM::ExternalResources& resources = ExternalResources();
+
+// cerr << "path: " << this->path_ << endl;
+
+ vector<BamFile> result;
+ result.reserve(resources.Size());
+ for(const ExternalResource& ext : resources) {
+
+// cerr << ext.ResourceId() << std::endl;
+
+ // only bother resolving file path if this is a BAM file
+ boost::iterator_range<string::const_iterator> bamFound = boost::algorithm::ifind_first(ext.MetaType(), "bam");
+ if (!bamFound.empty()) {
+ const string fn = ResolvePath(ext.ResourceId());
+// const string fn = internal::FileUtils::ResolvedFilePath(ext.ResourceId(), path_);
+ result.push_back(BamFile(fn));
+ }
+ }
+ return result;
+}
+
DataSet DataSet::FromXml(const string& xml)
{
DataSet result;
@@ -136,24 +186,56 @@ DataSet::TypeEnum DataSet::NameToType(const string& typeName)
{
static std::unordered_map<std::string, DataSet::TypeEnum> lookup;
if (lookup.empty()) {
- lookup["DataSet"] = DataSet::GENERIC;
- lookup["AlignmentSet"] = DataSet::ALIGNMENT;
- lookup["BarcodeSet"] = DataSet::BARCODE;
+ lookup["DataSet"] = DataSet::GENERIC;
+ lookup["AlignmentSet"] = DataSet::ALIGNMENT;
+ lookup["BarcodeSet"] = DataSet::BARCODE;
lookup["ConsensusAlignmentSet"] = DataSet::CONSENSUS_ALIGNMENT;
- lookup["ConsensusReadSet"] = DataSet::CONSENSUS_READ;
- lookup["ContigSet"] = DataSet::CONTIG;
- lookup["HdfSubreadSet"] = DataSet::HDF_SUBREAD;
- lookup["ReferenceSet"] = DataSet::REFERENCE;
- lookup["SubreadSet"] = DataSet::SUBREAD;
+ lookup["ConsensusReadSet"] = DataSet::CONSENSUS_READ;
+ lookup["ContigSet"] = DataSet::CONTIG;
+ lookup["HdfSubreadSet"] = DataSet::HDF_SUBREAD;
+ lookup["ReferenceSet"] = DataSet::REFERENCE;
+ lookup["SubreadSet"] = DataSet::SUBREAD;
}
return lookup.at(typeName); // throws if unknown typename
}
+vector<string> DataSet::ResolvedResourceIds(void) const
+{
+ const PacBio::BAM::ExternalResources& resources = ExternalResources();
+
+ vector<string> result;
+ result.reserve(resources.Size());
+ for(const ExternalResource& ext : resources) {
+// const string fn = ;
+// const string fn = internal::FileUtils::ResolvedFilePath(ext.ResourceId(), path_);
+ result.push_back(ResolvePath(ext.ResourceId()));
+ }
+ return result;
+}
+
+string DataSet::ResolvePath(const string& originalPath) const
+{ return internal::FileUtils::ResolvedFilePath(originalPath, path_); }
+
void DataSet::Save(const std::string& outputFilename)
-{ internal::DataSetIO::ToFile(d_, outputFilename); }
+{ DataSetIO::ToFile(d_, outputFilename); }
void DataSet::SaveToStream(ostream& out)
-{ internal::DataSetIO::ToStream(d_, out); }
+{ DataSetIO::ToStream(d_, out); }
+
+set<string> DataSet::SequencingChemistries(void) const
+{
+ const vector<BamFile> bamFiles{ BamFiles() };
+
+ set<string> result;
+ for(const BamFile& bf : bamFiles) {
+ if (!bf.IsPacBioBAM())
+ throw std::runtime_error{ "only PacBio BAMs are supported" };
+ const vector<ReadGroupInfo> readGroups{ bf.Header().ReadGroups() };
+ for (const ReadGroupInfo& rg : readGroups)
+ result.insert(rg.SequencingChemistry());
+ }
+ return result;
+}
string DataSet::TypeToName(const DataSet::TypeEnum& type)
{
@@ -171,3 +253,26 @@ string DataSet::TypeToName(const DataSet::TypeEnum& type)
throw std::runtime_error("unsupported dataset type"); // unknown type
}
}
+
+// Exposed timestamp utils
+
+namespace PacBio {
+namespace BAM {
+
+string CurrentTimestamp(void)
+{ return internal::ToDataSetFormat(internal::CurrentTime()); }
+
+string ToDataSetFormat(const chrono::system_clock::time_point &tp)
+{ return internal::ToDataSetFormat(tp); }
+
+string ToDataSetFormat(const time_t &t)
+{ return ToDataSetFormat(chrono::system_clock::from_time_t(t)); }
+
+string ToIso8601(const chrono::system_clock::time_point &tp)
+{ return internal::ToIso8601(tp); }
+
+string ToIso8601(const time_t &t)
+{ return ToIso8601(chrono::system_clock::from_time_t(t)); }
+
+} // namespace BAM
+} // namespace PacBio
diff --git a/src/DataSetBaseTypes.cpp b/src/DataSetBaseTypes.cpp
index cc133e3..2c19e0b 100644
--- a/src/DataSetBaseTypes.cpp
+++ b/src/DataSetBaseTypes.cpp
@@ -38,6 +38,8 @@
#include "pbbam/DataSetTypes.h"
#include "pbbam/internal/DataSetBaseTypes.h"
#include "DataSetUtils.h"
+#include "TimeUtils.h"
+#include <boost/algorithm/string.hpp>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
@@ -49,7 +51,10 @@ using namespace std;
BaseEntityType::BaseEntityType(const std::string& label, const XsdType& xsd)
: DataSetElement(label, xsd)
-{ }
+{
+ if (Version().empty())
+ Version(internal::XML_VERSION);
+}
DEFINE_ACCESSORS(BaseEntityType, Extensions, Extensions)
@@ -68,8 +73,11 @@ DataEntityType::DataEntityType(const std::string& label, const XsdType& xsd)
// IndexedDataType
// -----------------
-IndexedDataType::IndexedDataType(const std::string& label, const XsdType &xsd)
- : InputOutputDataType(label, xsd)
+IndexedDataType::IndexedDataType(const string& metatype,
+ const string& filename,
+ const string& label,
+ const XsdType &xsd)
+ : InputOutputDataType(metatype, filename, label, xsd)
{ }
DEFINE_ACCESSORS(IndexedDataType, FileIndices, FileIndices)
@@ -81,14 +89,38 @@ IndexedDataType& IndexedDataType::FileIndices(const PacBio::BAM::FileIndices& in
// InputOutputDataType
// ---------------------
-InputOutputDataType::InputOutputDataType(const std::string& label, const XsdType &xsd)
- : StrictEntityType(label, xsd)
-{ }
+InputOutputDataType::InputOutputDataType(const string& metatype,
+ const string& filename,
+ const string& label,
+ const XsdType &xsd)
+ : StrictEntityType(metatype, label, xsd)
+{
+ ResourceId(filename);
+}
// ----------------
// StrictEntityType
// ----------------
-StrictEntityType::StrictEntityType(const std::string& label, const XsdType& xsd)
+StrictEntityType::StrictEntityType(const string& metatype,
+ const string& label,
+ const XsdType& xsd)
: BaseEntityType(label, xsd)
-{ }
+{
+ // MetaType
+ MetaType(metatype);
+
+ // TimeStampedName
+ const size_t numChars = metatype.size();
+ string transformedMetatype;
+ transformedMetatype.resize(numChars);
+ for (size_t i = 0; i < numChars; ++i) {
+ const char c = metatype.at(i);
+ transformedMetatype[i] = ((c == '.') ? '_' : tolower(c));
+ }
+ const string& tsn = transformedMetatype + "-" + internal::ToDataSetFormat(internal::CurrentTime());
+ TimeStampedName(tsn);
+
+ // UniqueId
+ UniqueId(internal::GenerateUuid());
+}
diff --git a/src/DataSetElement.cpp b/src/DataSetElement.cpp
index 26c0bb6..6854fd2 100644
--- a/src/DataSetElement.cpp
+++ b/src/DataSetElement.cpp
@@ -36,12 +36,12 @@
// Author: Derek Barnett
#include "pbbam/internal/DataSetElement.h"
+#include "DataSetUtils.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
const std::string& DataSetElement::SharedNullString(void)
{
- static const std::string empty = std::string("");
- return empty;
+ return internal::NullObject<std::string>();
}
diff --git a/src/DataSetIO.cpp b/src/DataSetIO.cpp
index d618210..e57173f 100644
--- a/src/DataSetIO.cpp
+++ b/src/DataSetIO.cpp
@@ -36,6 +36,7 @@
// Author: Derek Barnett
#include "DataSetIO.h"
+#include "FileUtils.h"
#include "FofnReader.h"
#include "StringUtils.h"
#include "XmlReader.h"
@@ -77,10 +78,14 @@ unique_ptr<DataSetBase> FromBam(const string& bamFn)
static
unique_ptr<DataSetBase> FromFofn(const string& fofn)
{
+ const string fofnDir = internal::FileUtils::DirectoryName(fofn);
ifstream in(fofn);
if (!in)
throw std::runtime_error("could not open FOFN for reading");
- const vector<string> filenames = std::move(FofnReader::Files(in));
+
+ vector<string> filenames = std::move(FofnReader::Files(in));
+ for (size_t i = 0; i < filenames.size(); ++i)
+ filenames[i] = internal::FileUtils::ResolvedFilePath(filenames[i], fofnDir);
return DataSetIO::FromUris(filenames);
}
diff --git a/src/DataSetTypes.cpp b/src/DataSetTypes.cpp
index 3144adc..9dd7b27 100644
--- a/src/DataSetTypes.cpp
+++ b/src/DataSetTypes.cpp
@@ -32,12 +32,18 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetTypes.cpp
+/// \brief Implementations for the public DataSet component classes.
+//
// Author: Derek Barnett
#include "pbbam/DataSetTypes.h"
#include "pbbam/internal/DataSetBaseTypes.h"
#include "DataSetUtils.h"
+#include "FileUtils.h"
+#include "TimeUtils.h"
#include <set>
using namespace PacBio;
using namespace PacBio::BAM;
@@ -49,7 +55,9 @@ using namespace std;
// -------------------
AlignmentSet::AlignmentSet(void)
- : DataSetBase("AlignmentSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.AlignmentSet",
+ "AlignmentSet",
+ XsdType::DATASETS)
{ }
// -------------------
@@ -57,7 +65,9 @@ AlignmentSet::AlignmentSet(void)
// -------------------
BarcodeSet::BarcodeSet(void)
- : DataSetBase("BarcodeSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.BarcodeSet",
+ "BarcodeSet",
+ XsdType::DATASETS)
{ }
// -----------------------
@@ -65,7 +75,9 @@ BarcodeSet::BarcodeSet(void)
// -----------------------
ConsensusAlignmentSet::ConsensusAlignmentSet(void)
- : DataSetBase("ConsensusAlignmentSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.ConsensusAlignmentSet",
+ "ConsensusAlignmentSet",
+ XsdType::DATASETS)
{ }
// -------------------
@@ -73,7 +85,9 @@ ConsensusAlignmentSet::ConsensusAlignmentSet(void)
// -------------------
ConsensusReadSet::ConsensusReadSet(void)
- : DataSetBase("ConsensusReadSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.ConsensusReadSet",
+ "ConsensusReadSet",
+ XsdType::DATASETS)
{ }
// -------------------
@@ -81,7 +95,9 @@ ConsensusReadSet::ConsensusReadSet(void)
// -------------------
ContigSet::ContigSet(void)
- : DataSetBase("ContigSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.ContigSet",
+ "ContigSet",
+ XsdType::DATASETS)
{ }
// -------------------
@@ -89,11 +105,15 @@ ContigSet::ContigSet(void)
// -------------------
DataSetBase::DataSetBase(void)
- : StrictEntityType("DataSet", XsdType::DATASETS)
+ : StrictEntityType("PacBio.DataSet.DataSet",
+ "DataSet",
+ XsdType::DATASETS)
{ }
-DataSetBase::DataSetBase(const string& label, const XsdType& xsd)
- : StrictEntityType(label, xsd)
+DataSetBase::DataSetBase(const string& metatype,
+ const string& label,
+ const XsdType& xsd)
+ : StrictEntityType(metatype, label, xsd)
{ }
DEFINE_ACCESSORS(DataSetBase, ExternalResources, ExternalResources)
@@ -141,7 +161,7 @@ DataSetBase* DataSetBase::DeepCopy(void) const
DataSetBase& DataSetBase::operator+=(const DataSetBase& other)
{
// must be same dataset types (or 'other' must be generic)
- if (other.QualifiedNameLabel() != QualifiedNameLabel() && other.LocalNameLabel() != "DataSet")
+ if (other.LocalNameLabel() != LocalNameLabel() && other.LocalNameLabel() != "DataSet")
throw std::runtime_error("cannot merge incompatible dataset types");
// check filter match
@@ -211,27 +231,29 @@ Extensions::Extensions(void)
: DataSetListElement<ExtensionElement>("Extensions", XsdType::BASE_DATA_MODEL)
{ }
-ExternalResource::ExternalResource(void)
- : IndexedDataType("ExternalResource", XsdType::BASE_DATA_MODEL)
-{ }
-
// -------------------
// ExternalResource
// -------------------
-ExternalResource::ExternalResource(const BamFile &bamFile)
- : IndexedDataType("ExternalResource", XsdType::BASE_DATA_MODEL)
-{
- MetaType("SubreadFile.SubreadBamFile");
- ResourceId(bamFile.Filename());
-}
+ExternalResource::ExternalResource(const BamFile& bamFile)
+ : IndexedDataType("PacBio.SubreadFile.SubreadBamFile",
+ bamFile.Filename(),
+ "ExternalResource",
+ XsdType::BASE_DATA_MODEL)
+{ }
-ExternalResource::ExternalResource(const string& metatype, const string& filename)
- : IndexedDataType("ExternalResource", XsdType::BASE_DATA_MODEL)
-{
- MetaType(metatype);
- ResourceId(filename);
-}
+ExternalResource::ExternalResource(const string& metatype,
+ const string& filename)
+ : IndexedDataType(metatype,
+ filename,
+ "ExternalResource",
+ XsdType::BASE_DATA_MODEL)
+{ }
+
+DEFINE_ACCESSORS(ExternalResource, ExternalResources, ExternalResources)
+
+ExternalResource& ExternalResource::ExternalResources(const PacBio::BAM::ExternalResources& resources)
+{ ExternalResources() = resources; return *this; }
BamFile ExternalResource::ToBamFile(void) const
{ return BamFile(ResourceId()); }
@@ -241,7 +263,8 @@ BamFile ExternalResource::ToBamFile(void) const
// -------------------
ExternalResources::ExternalResources(void)
- : DataSetListElement<ExternalResource>("ExternalResources", XsdType::BASE_DATA_MODEL)
+ : DataSetListElement<ExternalResource>("ExternalResources",
+ XsdType::BASE_DATA_MODEL)
{ }
ExternalResources& ExternalResources::operator+=(const ExternalResources& other)
@@ -270,16 +293,24 @@ ExternalResources& ExternalResources::operator+=(const ExternalResources& other)
}
void ExternalResources::Add(const ExternalResource& ext)
-{ AddChild(ext); }
+{
+ // disallow external resources w/ duplicate ResourceIds
+ set<std::string> myResourceIds;
+ for (size_t i = 0; i < Size(); ++i) {
+ const ExternalResource& resource = this->operator[](i);
+ myResourceIds.insert(resource.ResourceId());
+ }
+ if (myResourceIds.find(ext.ResourceId()) == myResourceIds.cend())
+ AddChild(ext);
+}
vector<BamFile> ExternalResources::BamFiles(void) const
{
vector<BamFile> result;
const int numResources = Size();
result.reserve(numResources);
- for( const ExternalResource& ext : *this ) {
+ for( const ExternalResource& ext : *this )
result.push_back(ext.ToBamFile());
- }
return result;
}
@@ -290,8 +321,11 @@ void ExternalResources::Remove(const ExternalResource& ext)
// FileIndex
// -------------------
-FileIndex::FileIndex(void)
- : InputOutputDataType("FileIndex", XsdType::BASE_DATA_MODEL)
+FileIndex::FileIndex(const string& metatype, const string& filename)
+ : InputOutputDataType(metatype,
+ filename,
+ "FileIndex",
+ XsdType::BASE_DATA_MODEL)
{ }
// -------------------
@@ -347,7 +381,9 @@ void Filters::Remove(const Filter& filter)
// -------------------
HdfSubreadSet::HdfSubreadSet(void)
- : DataSetBase("HdfSubreadSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.HdfSubreadSet",
+ "HdfSubreadSet",
+ XsdType::DATASETS)
{ }
// -------------------
@@ -401,7 +437,9 @@ DEFINE_ACCESSORS(Provenance, ParentTool, ParentTool)
// -------------------
ReferenceSet::ReferenceSet(void)
- : DataSetBase("ReferenceSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.ReferenceSet",
+ "ReferenceSet",
+ XsdType::DATASETS)
{ }
// -------------------
@@ -436,5 +474,7 @@ void SubDataSets::Remove(const DataSetBase& subdataset)
// -------------------
SubreadSet::SubreadSet(void)
- : DataSetBase("SubreadSet", XsdType::DATASETS)
+ : DataSetBase("PacBio.DataSet.SubreadSet",
+ "SubreadSet",
+ XsdType::DATASETS)
{ }
diff --git a/src/DataSetUtils.h b/src/DataSetUtils.h
index 2fa1f8c..dcf234c 100644
--- a/src/DataSetUtils.h
+++ b/src/DataSetUtils.h
@@ -39,11 +39,15 @@
#define DATASETUTILS_H
#include "pbbam/DataSetTypes.h"
+#include <boost/uuid/random_generator.hpp>
+#include <boost/uuid/uuid_io.hpp>
namespace PacBio {
namespace BAM {
namespace internal {
+static const std::string XML_VERSION = std::string { "3.0.1" };
+
template<typename T>
inline const T& NullObject(void)
{
@@ -58,6 +62,14 @@ inline const PacBio::BAM::DataSetMetadata& NullObject(void)
return empty;
}
+inline
+std::string GenerateUuid(void)
+{
+ static boost::uuids::random_generator gen;
+ const boost::uuids::uuid uuid = gen();
+ return boost::uuids::to_string(uuid);
+}
+
} // namespace internal
} // namespace BAM
} // namespace PacBio
diff --git a/src/DataSetXsd.cpp b/src/DataSetXsd.cpp
index 0474691..161bd2b 100644
--- a/src/DataSetXsd.cpp
+++ b/src/DataSetXsd.cpp
@@ -32,10 +32,15 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file DataSetXsd.cpp
+/// \brief Implements the XSD- and namespace-related classes for DataSetXML.
+//
// Author: Derek Barnett
#include "pbbam/DataSetXsd.h"
+#include <unordered_map>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -46,25 +51,133 @@ namespace internal {
static map<XsdType, NamespaceInfo> DefaultRegistry(void)
{
- map<XsdType, NamespaceInfo> result;
- result[XsdType::NONE] = NamespaceInfo();
- result[XsdType::AUTOMATION_CONSTRAINTS] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioAutomationConstraints.xsd");
- result[XsdType::BASE_DATA_MODEL] = NamespaceInfo("pbbase", "http://pacificbiosciences.com/PacBioBaseDataModel.xsd");
- result[XsdType::COLLECTION_METADATA] = NamespaceInfo("pbmeta", "http://pacificbiosciences.com/PacBioCollectionMetadata.xsd");
- result[XsdType::COMMON_MESSAGES] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioCommonMessages.xsd");
- result[XsdType::DATA_MODEL] = NamespaceInfo("pbdm", "http://pacificbiosciences.com/PacBioDataModel.xsd");
- result[XsdType::DATA_STORE] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioDataStore.xsd");
- result[XsdType::DATASETS] = NamespaceInfo("pbds", "http://pacificbiosciences.com/PacBioDatasets.xsd");
- result[XsdType::DECL_DATA] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioDeclData.xsd");
- result[XsdType::PART_NUMBERS] = NamespaceInfo("pbpn", "http://pacificbiosciences.com/PacBioPartNumbers.xsd");
- result[XsdType::PRIMARY_METRICS] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioPrimaryMetrics.xsd");
- result[XsdType::REAGENT_KIT] = NamespaceInfo("pbrk", "http://pacificbiosciences.com/PacBioReagentKit.xsd");
- result[XsdType::RIGHTS_AND_ROLES] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioRightsAndRoles.xsd");
- result[XsdType::SAMPLE_INFO] = NamespaceInfo("pbsample", "http://pacificbiosciences.com/PacBioSampleInfo.xsd");
- result[XsdType::SEEDING_DATA] = NamespaceInfo("", "http://pacificbiosciences.com/PacBioSeedingData.xsd");
+ const auto result = map<XsdType, NamespaceInfo>
+ {
+ { XsdType::NONE, NamespaceInfo{ "", "" } },
+ { XsdType::AUTOMATION_CONSTRAINTS, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioAutomationConstraints.xsd" } },
+ { XsdType::BASE_DATA_MODEL, NamespaceInfo{ "pbbase", "http://pacificbiosciences.com/PacBioBaseDataModel.xsd" } },
+ { XsdType::COLLECTION_METADATA, NamespaceInfo{ "pbmeta", "http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" } },
+ { XsdType::COMMON_MESSAGES, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioCommonMessages.xsd" } },
+ { XsdType::DATA_MODEL, NamespaceInfo{ "pbdm", "http://pacificbiosciences.com/PacBioDataModel.xsd" } },
+ { XsdType::DATA_STORE, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioDataStore.xsd" } },
+ { XsdType::DATASETS, NamespaceInfo{ "pbds", "http://pacificbiosciences.com/PacBioDatasets.xsd" } },
+ { XsdType::DECL_DATA, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioDeclData.xsd" } },
+ { XsdType::PART_NUMBERS, NamespaceInfo{ "pbpn", "http://pacificbiosciences.com/PacBioPartNumbers.xsd" } },
+ { XsdType::PRIMARY_METRICS, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioPrimaryMetrics.xsd" } },
+ { XsdType::REAGENT_KIT, NamespaceInfo{ "pbrk", "http://pacificbiosciences.com/PacBioReagentKit.xsd" } },
+ { XsdType::RIGHTS_AND_ROLES, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioRightsAndRoles.xsd" } },
+ { XsdType::SAMPLE_INFO, NamespaceInfo{ "pbsample", "http://pacificbiosciences.com/PacBioSampleInfo.xsd" } },
+ { XsdType::SEEDING_DATA, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioSeedingData.xsd" } }
+ };
return result;
}
+static const auto elementRegistry = unordered_map<string, XsdType>
+{
+ // 'pbbase' elements
+ //
+ { "AutomationParameter" , XsdType::BASE_DATA_MODEL },
+ { "AutomationParameters" , XsdType::BASE_DATA_MODEL },
+ { "BinCount" , XsdType::BASE_DATA_MODEL },
+ { "BinCounts" , XsdType::BASE_DATA_MODEL },
+ { "BinLabel" , XsdType::BASE_DATA_MODEL },
+ { "BinLabels" , XsdType::BASE_DATA_MODEL },
+ { "BinWidth" , XsdType::BASE_DATA_MODEL },
+ { "ExternalResource" , XsdType::BASE_DATA_MODEL },
+ { "ExternalResources" , XsdType::BASE_DATA_MODEL },
+ { "FileIndex" , XsdType::BASE_DATA_MODEL },
+ { "FileIndices" , XsdType::BASE_DATA_MODEL },
+ { "MaxBinValue" , XsdType::BASE_DATA_MODEL },
+ { "MaxOutlierValue" , XsdType::BASE_DATA_MODEL },
+ { "MetricDescription" , XsdType::BASE_DATA_MODEL },
+ { "NumBins" , XsdType::BASE_DATA_MODEL },
+ { "Properties" , XsdType::BASE_DATA_MODEL },
+ { "Property" , XsdType::BASE_DATA_MODEL },
+ { "Sample95thPct" , XsdType::BASE_DATA_MODEL },
+ { "SampleMean" , XsdType::BASE_DATA_MODEL },
+ { "SampleMed" , XsdType::BASE_DATA_MODEL },
+ { "SampleSize" , XsdType::BASE_DATA_MODEL },
+ { "SampleStd" , XsdType::BASE_DATA_MODEL },
+
+ // 'pbds' elements
+ //
+ { "AdapterDimerFraction", XsdType::DATASETS },
+ { "AlignmentSet", XsdType::DATASETS },
+ { "BarcodeConstruction", XsdType::DATASETS },
+ { "BarcodeSet", XsdType::DATASETS },
+ { "ConsensusAlignmentSet", XsdType::DATASETS },
+ { "ConsensusReadSet", XsdType::DATASETS },
+ { "Contig", XsdType::DATASETS },
+ { "Contigs", XsdType::DATASETS },
+ { "ContigSet", XsdType::DATASETS },
+ { "ControlReadLenDist", XsdType::DATASETS },
+ { "ControlReadQualDist", XsdType::DATASETS },
+ { "DataSetMetdata", XsdType::DATASETS },
+ { "DataSet", XsdType::DATASETS },
+ { "DataSets", XsdType::DATASETS },
+ { "Filter", XsdType::DATASETS },
+ { "Filters", XsdType::DATASETS },
+ { "HdfSubreadSet", XsdType::DATASETS },
+ { "InsertReadLenDist", XsdType::DATASETS },
+ { "InsertReadQualDist" , XsdType::DATASETS },
+ { "MedianInsertDist", XsdType::DATASETS },
+ { "NumRecords", XsdType::DATASETS },
+ { "NumSequencingZmws", XsdType::DATASETS },
+ { "Organism", XsdType::DATASETS },
+ { "ParentTool", XsdType::DATASETS },
+ { "Ploidy", XsdType::DATASETS },
+ { "ProdDist", XsdType::DATASETS },
+ { "Provenance", XsdType::DATASETS },
+ { "ReadLenDist", XsdType::DATASETS },
+ { "ReadQualDist", XsdType::DATASETS },
+ { "ReadTypeDist", XsdType::DATASETS },
+ { "ReferenceSet", XsdType::DATASETS },
+ { "ShortInsertFraction", XsdType::DATASETS },
+ { "SubreadSet", XsdType::DATASETS },
+ { "SummaryStats", XsdType::DATASETS },
+ { "TotalLength", XsdType::DATASETS },
+
+ // 'pbmeta' elements
+ //
+ { "Automation", XsdType::COLLECTION_METADATA },
+ { "AutomationName", XsdType::COLLECTION_METADATA },
+ { "CellIndex", XsdType::COLLECTION_METADATA },
+ { "CellPac", XsdType::COLLECTION_METADATA },
+ { "CollectionFileCopy", XsdType::COLLECTION_METADATA },
+ { "CollectionMetadata", XsdType::COLLECTION_METADATA },
+ { "CollectionNumber", XsdType::COLLECTION_METADATA },
+ { "CollectionPathUri", XsdType::COLLECTION_METADATA },
+ { "Collections", XsdType::COLLECTION_METADATA },
+ { "Concentration", XsdType::COLLECTION_METADATA },
+ { "ConfigFileName", XsdType::COLLECTION_METADATA },
+ { "CopyFiles", XsdType::COLLECTION_METADATA },
+ { "InstCtrlVer", XsdType::COLLECTION_METADATA },
+ { "MetricsVerbosity", XsdType::COLLECTION_METADATA },
+ { "Name", XsdType::COLLECTION_METADATA },
+ { "OutputOptions", XsdType::COLLECTION_METADATA },
+ { "PlateId", XsdType::COLLECTION_METADATA },
+ { "Primary", XsdType::COLLECTION_METADATA },
+ { "Readout", XsdType::COLLECTION_METADATA },
+ { "ResultsFolder", XsdType::COLLECTION_METADATA },
+ { "RunDetails", XsdType::COLLECTION_METADATA },
+ { "RunId", XsdType::COLLECTION_METADATA },
+ { "SampleReuseEnabled", XsdType::COLLECTION_METADATA },
+ { "SequencingCondition", XsdType::COLLECTION_METADATA },
+ { "SigProcVer", XsdType::COLLECTION_METADATA },
+ { "SizeSelectionEnabled", XsdType::COLLECTION_METADATA },
+ { "StageHotstartEnabled", XsdType::COLLECTION_METADATA },
+ { "UseCount", XsdType::COLLECTION_METADATA },
+ { "WellName", XsdType::COLLECTION_METADATA },
+ { "WellSample", XsdType::COLLECTION_METADATA },
+
+ // 'pbsample' elements
+ //
+ { "BioSample", XsdType::SAMPLE_INFO },
+ { "BioSamplePointer", XsdType::SAMPLE_INFO },
+ { "BioSamplePointers", XsdType::SAMPLE_INFO },
+ { "BioSamples", XsdType::SAMPLE_INFO }
+};
+
} // namespace internal
} // namespace BAM
} // namespace PacBio
@@ -95,6 +208,11 @@ NamespaceRegistry::NamespaceRegistry(const NamespaceRegistry &other)
, defaultXsdType_(other.defaultXsdType_)
{ }
+NamespaceRegistry::NamespaceRegistry(NamespaceRegistry &&other)
+ : data_(std::move(other.data_))
+ , defaultXsdType_(std::move(other.defaultXsdType_))
+{ }
+
NamespaceRegistry& NamespaceRegistry::operator=(const NamespaceRegistry& other)
{
data_ = other.data_;
@@ -102,6 +220,13 @@ NamespaceRegistry& NamespaceRegistry::operator=(const NamespaceRegistry& other)
return *this;
}
+NamespaceRegistry& NamespaceRegistry::operator=(NamespaceRegistry&& other)
+{
+ data_ = std::move(other.data_);
+ defaultXsdType_ = std::move(other.defaultXsdType_);
+ return *this;
+}
+
NamespaceRegistry::~NamespaceRegistry(void) { }
const NamespaceInfo& NamespaceRegistry::DefaultNamespace(void) const
@@ -113,6 +238,18 @@ XsdType NamespaceRegistry::DefaultXsd(void) const
const NamespaceInfo& NamespaceRegistry::Namespace(const XsdType& xsd) const
{ return data_.at(xsd); }
+void NamespaceRegistry::Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo)
+{ data_[xsd] = namespaceInfo; }
+
+void NamespaceRegistry::SetDefaultXsd(const XsdType& xsd)
+{ defaultXsdType_ = xsd; }
+
+XsdType NamespaceRegistry::XsdForElement(const std::string& elementLabel) const
+{
+ const auto iter = internal::elementRegistry.find(elementLabel);
+ return (iter == internal::elementRegistry.cend() ? XsdType::NONE : iter->second);
+}
+
XsdType NamespaceRegistry::XsdForUri(const std::string& uri) const
{
map<XsdType, NamespaceInfo>::const_iterator iter = data_.cbegin();
@@ -124,9 +261,3 @@ XsdType NamespaceRegistry::XsdForUri(const std::string& uri) const
}
return XsdType::NONE;
}
-
-void NamespaceRegistry::Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo)
-{ data_[xsd] = namespaceInfo; }
-
-void NamespaceRegistry::SetDefaultXsd(const XsdType& xsd)
-{ defaultXsdType_ = xsd; }
diff --git a/src/EntireFileQuery.cpp b/src/EntireFileQuery.cpp
index 814dc51..6813492 100644
--- a/src/EntireFileQuery.cpp
+++ b/src/EntireFileQuery.cpp
@@ -32,71 +32,34 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file EntireFileQuery.cpp
+/// \brief Implements the EntireFileQuery class.
+//
// Author: Derek Barnett
#include "pbbam/EntireFileQuery.h"
-#include "pbbam/BamFile.h"
-
-#include "pbbam/internal/SequentialMergeStrategy.h"
-
-#include "MemoryUtils.h"
+#include "pbbam/CompositeBamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
-class EntireFileIterator : public internal::IBamFileIterator
+struct EntireFileQuery::EntireFileQueryPrivate
{
-public:
- EntireFileIterator(const BamFile& bamFile)
- : internal::IBamFileIterator(bamFile)
- {
- htsFile_.reset(sam_open(bamFile.Filename().c_str(), "rb"));
- if (!htsFile_)
- throw std::runtime_error("could not open BAM file for reading");
-
- htsHeader_.reset(sam_hdr_read(htsFile_.get()));
- if (!htsHeader_)
- throw std::runtime_error("could not read BAM header");
- }
+ EntireFileQueryPrivate(const DataSet& dataset)
+ : reader_(dataset)
+ { }
-public:
- bool GetNext(BamRecord& record) {
-
-// record = BamRecord(/*fileData_.Header()*/);
- const int result = sam_read1(htsFile_.get(),
- htsHeader_.get(),
- internal::BamRecordMemory::GetRawData(record).get());
- record.header_ = header_;
-
- // success
- if (result >= 0)
- return true;
-
- // normal EOF
- else if (result == -1)
- return false;
-
- // error (truncated file, etc)
- else
- throw std::runtime_error("corrupted file, may be truncated");
- }
-
-private:
- unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
- unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
+ SequentialCompositeBamReader reader_;
};
-EntireFileQuery::EntireFileQuery(const DataSet& dataset)
- : internal::IQuery(dataset)
-{
- // check files
- // if SO all coordinate
- // else if SO all queryname
- // else SO unsorted/unknown
- mergeStrategy_.reset(new internal::SequentialMergeStrategy(CreateIterators()));
-}
+EntireFileQuery::EntireFileQuery(const DataSet &dataset)
+ : internal::IQuery()
+ , d_(new EntireFileQueryPrivate(dataset))
+{ }
-EntireFileQuery::FileIterPtr EntireFileQuery::CreateIterator(const BamFile& bamFile)
-{ return FileIterPtr(new EntireFileIterator(bamFile)); }
+EntireFileQuery::~EntireFileQuery(void) { }
+bool EntireFileQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp
new file mode 100644
index 0000000..a0a59af
--- /dev/null
+++ b/src/FileUtils.cpp
@@ -0,0 +1,246 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "FileUtils.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <cassert>
+#include <sys/stat.h>
+#include <unistd.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// pops "file://" scheme off the front of a URI/filepath, if found
+static string removeFileUriScheme(const string& uri)
+{
+ assert(!uri.empty());
+
+ auto schemeLess = uri;
+ const auto fileScheme = string{"file://"};
+ const auto schemeFound = schemeLess.find(fileScheme);
+ if (schemeFound != string::npos) {
+ if (schemeFound != 0)
+ throw runtime_error("Malformed URI: scheme not at beginning");
+ schemeLess = schemeLess.substr(fileScheme.size());
+ }
+ return schemeLess;
+}
+
+#ifdef PBBAM_WIN_FILEPATHS
+
+static
+string removeDiskName(const string& filePath)
+{
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return filePath.substr(2);
+ }
+ return filePath;
+}
+
+static const char native_pathSeparator = '\\';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{
+ assert(!filePath.empty());
+
+ // if starts with single slash or double slash
+ if (boost::algorithm::starts_with(filePath, "\\"))
+ return true;
+
+ // if starts with single or double-dots -> not absolute
+ if (boost::algorithm::starts_with(filePath, "."))
+ return false;
+
+ // if starts with disk drive name and colon ("C:\foo\bar.txt")
+ // strip the drive name and check to see if the remaining path is absolute
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return native_pathIsAbsolute(removeDiskName(filePath));
+ }
+
+ // otherwise, likely relative
+ return false;
+}
+
+static string native_resolvedFilePath(const string& filePath,
+ const string& from)
+{
+ // strip file:// scheme if present
+ auto schemeLess = removeFileUriScheme(filePath);
+
+ // if empty or already absolute path, just return it
+ // upfront empty check simplifies further parsing logic
+ if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+ return schemeLess;
+
+ // else make relative from the provided 'from' directory
+ //
+ // first pop disk name, then any leading single-dot '.'
+ //
+ // since we're prepending the 'from' directory, we can remove
+ // any leading './' form our file path. this may just mean that
+ // we pop it off to add it right back (when from == '.'), but this
+ // keeps it consistent with other 'from' parent directories
+ //
+ schemeLess = removeDiskName(schemeLess);
+
+ const bool thisDirAtStart = (schemeLess.find(".") == 0);
+ if (thisDirAtStart) {
+ if (schemeLess.find(native_pathSeparator) == 1)
+ schemeLess = schemeLess.substr(2);
+ }
+ return from + native_pathSeparator + schemeLess;
+}
+
+#else // else for non-Windows systems
+
+static const char native_pathSeparator = '/';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{ return filePath.at(0) == '/'; }
+
+static string native_resolvedFilePath(const string& filePath,
+ const string& from)
+{
+ // strip file:// scheme if present
+ auto schemeLess = removeFileUriScheme(filePath);
+
+ // if empty or already absolute path, just return it
+ // upfront empty check simplifies further parsing logic
+ if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+ return schemeLess;
+
+ // else make relative from the provided 'from' directory
+ //
+ // since we're prepending the 'from' directory, we can remove
+ // any leading './' form our file path. this may just mean that
+ // we pop it off to add it right back (when from == '.'), but this
+ // keeps it consistent with other 'from' parent directories
+ //
+ const bool thisDirAtStart = (schemeLess.find(".") == 0);
+ if (thisDirAtStart) {
+ if (schemeLess.find(native_pathSeparator) == 1)
+ schemeLess = schemeLess.substr(2);
+ }
+ return from + native_pathSeparator + schemeLess;
+}
+
+#endif // PBBAM_WIN_FILEPATHS
+
+// see http://stackoverflow.com/questions/2869594/how-return-a-stdstring-from-cs-getcwd-function
+string FileUtils::CurrentWorkingDirectory(void)
+{
+ const size_t chunkSize = 1024;
+ const size_t maxNumChunks = 20;
+
+ // stack-based buffer for 'normal' case
+ char buffer[chunkSize];
+ if (getcwd(buffer, sizeof(buffer)) != NULL)
+ return string(buffer);
+
+ // if error is not ERANGE, then it's not a problem of too-long name... something else happened
+ if (errno != ERANGE)
+ throw runtime_error("could not determine current working directory path");
+
+ // long path - use heap, trying progressively longer buffers
+ for (size_t chunks = 2; chunks < maxNumChunks; ++chunks) {
+ unique_ptr<char> cwd(new char[chunkSize*chunks]);
+ if (getcwd(cwd.get(), chunkSize*chunks) != NULL)
+ return string(cwd.get());
+
+ // if error is not ERANGE, then it's not a problem of too-long name... something else happened
+ if (errno != ERANGE)
+ throw runtime_error("could not determine current working directory path");
+ }
+
+ // crazy long path name
+ throw runtime_error("could determine current working directory - extremely long path");
+}
+
+string FileUtils::DirectoryName(const string& file)
+{
+ const size_t found = file.rfind(Separator(), file.length());
+ if (found != string::npos)
+ return file.substr(0, found);
+ return string(".");
+}
+
+bool FileUtils::Exists(const char* fn)
+{
+ struct stat buf;
+ return (stat(fn, &buf) != -1);
+}
+
+chrono::system_clock::time_point FileUtils::LastModified(const char* fn)
+{
+ struct stat s;
+ if (stat(fn, &s) != 0)
+ throw runtime_error("could not get file timestamp");
+ return chrono::system_clock::from_time_t(s.st_mtime);
+}
+
+string FileUtils::ResolvedFilePath(const string& filePath,
+ const string& from)
+{ return native_resolvedFilePath(filePath, from); }
+
+constexpr char FileUtils::Separator(void)
+{ return native_pathSeparator; }
+
+off_t FileUtils::Size(const char* fn)
+{
+ struct stat s;
+ if (stat(fn, &s) != 0)
+ throw runtime_error("could not determine file size");
+ return s.st_size;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
diff --git a/src/FileUtils.h b/src/FileUtils.h
index 5cf7c72..112223e 100644
--- a/src/FileUtils.h
+++ b/src/FileUtils.h
@@ -38,12 +38,8 @@
#ifndef FILEUTILS_H
#define FILEUTILS_H
-#include <exception>
-#include <fstream>
-#include <iostream>
+#include <chrono>
#include <string>
-#include <ctime>
-#include <sys/stat.h>
namespace PacBio {
namespace BAM {
@@ -52,78 +48,95 @@ namespace internal {
struct FileUtils
{
public:
+
+ /// \returns application's current working directory
+ static std::string CurrentWorkingDirectory(void);
+
+ /// Parses a filepath for the the directory name for a file.
+ ///
+ /// Essentially this method strips the filename from the string provided (/path/to/file => /path/to).
+ /// If only a filename is provided, then "." is returned to indicate the current directory.
+ ///
+ /// \param[in] file name of file (can be just a filename or path/to/filename)
+ /// \returns file's directory name
+ ///
+ static std::string DirectoryName(const std::string& file);
+
+ /// Check for existence of a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns true if file exists & can be opened
+ ///
static bool Exists(const char* fn);
- static bool Exists(const std::string& fn);
- // throws if can't read
- static time_t LastModified(const char* fn);
- static time_t LastModified(const std::string& fn);
+ /// Check for existence of a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns true if file exists & can be opened
+ ///
+ static bool Exists(const std::string& fn);
- // throws if can't read
+ /// Check "last modified" timestamp for a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns time of last modification
+ /// \throws runtime_error if file info can't be accessed
+ ///
+ static std::chrono::system_clock::time_point LastModified(const char* fn);
+
+ /// Check "last modified" timestamp for a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns time of last modification
+ /// \throws runtime_error if file info can't be accessed
+ ///
+ static std::chrono::system_clock::time_point LastModified(const std::string& fn);
+
+ /// Resolves input file path using optional starting directory.
+ ///
+ /// \verbatim
+ /// /absolute/path/to/file.txt => /absolute/path/to/file.txt
+ /// ../relative/path/to/file.txt => <from>/../relative/path/to/file.txt
+ /// file.txt => <from>/file.txt
+ /// \endverbatim
+ ///
+ /// \note This method will strip any URI scheme as well ("file://") so that the result is immediately ready from I/O operations.
+ ///
+ /// \param[in] filePath file path to be resolved
+ /// \param[in] from optional starting directory (useful if not same as application's working directory)
+ /// \returns resolved file path
+ ///
+ static std::string ResolvedFilePath(const std::string& filePath,
+ const std::string& from = ".");
+
+ /// \returns native path separator
+ constexpr static char Separator(void);
+
+ /// Check size of file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns file size in bytes
+ /// \throws runtime_error if file info can't be accessed
+ ///
static off_t Size(const char* fn);
+
+ /// Check size of file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns file size in bytes
+ /// \throws runtime_error if file info can't be accessed
+ ///
static off_t Size(const std::string& fn);
};
-inline bool FileUtils::Exists(const char* fn)
-{ return Exists(std::string(fn)); }
-
inline bool FileUtils::Exists(const std::string& fn)
-{
- std::ifstream stream(fn);
- return !stream.fail();
-}
+{ return FileUtils::Exists(fn.c_str()); }
-inline time_t FileUtils::LastModified(const char* fn)
-{
- struct stat s;
- if (stat(fn, &s) != 0)
- throw std::runtime_error("could not get file timestamp");
-
-#ifdef __DARWIN_64_BIT_INO_T
- return s.st_mtimespec.tv_sec; // 64-bit OSX has a modified stat struct
-#else
- return s.st_mtime; // all others?
-#endif
-}
-
-inline time_t FileUtils::LastModified(const std::string& fn)
-{ return LastModified(fn.c_str()); }
-
-inline off_t FileUtils::Size(const char* fn)
-{
- struct stat s;
- if (stat(fn, &s) != 0)
- throw std::runtime_error("could not determine file size");
- return s.st_size;
-}
+inline std::chrono::system_clock::time_point FileUtils::LastModified(const std::string& fn)
+{ return FileUtils::LastModified(fn.c_str()); }
inline off_t FileUtils::Size(const std::string& fn)
-{ return Size(fn.c_str()); }
-
-//inline std::string FilenameExtension(const std::string& fn)
-//{
-// const size_t lastDot = fn.find_last_of(".");
-// return (lastDot != std::string::npos ? fn.substr(lastDot+1) : std::string());
-//}
-
-////
-//// -- examples --
-////
-//// input: /path/to/file.ext result: file.ext
-//// input: /path/to/file.ext.zip result: file.ext.zip
-//// input: file.ext result: file.ext
-////
-//inline std::string FilenameFromPath(const std::string& fullPath)
-//{
-// struct MatchesPathSeparator {
-// bool operator()(char c) const { return c == '/'; }
-// };
-
-// const auto lastSeparator = std::find_if(fullPath.rbegin(),
-// fullPath.rend(),
-// MatchesPathSeparator()).base();
-// return std::string(lastSeparator,fullPath.end());
-//}
+{ return FileUtils::Size(fn.c_str()); }
} // namespace internal
} // namespace BAM
diff --git a/src/Frames.cpp b/src/Frames.cpp
index 36e49d9..323d02e 100644
--- a/src/Frames.cpp
+++ b/src/Frames.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Frames.cpp
+/// \brief Implements the Frames class.
+//
// Author: Derek Barnett
#include "pbbam/Frames.h"
@@ -59,8 +63,6 @@ void InitIpdDownsampling(void)
// liftover from Dave's python code:
// .../bioinformatics/tools/kineticsTools/kineticsTools/_downsampling.py
- //
- // TODO: move this conversion functionality to pbbam
const int B = 2;
const int t = 6;
@@ -173,7 +175,7 @@ Frames& Frames::operator=(Frames&& other)
{ data_ = std::move(other.data_); return *this; }
Frames Frames::Decode(const std::vector<uint8_t>& codedData)
-{ return Frames(std::move(internal::CodeToFrames(codedData))); }
+{ return Frames(std::move(internal::CodeToFrames(codedData))); }
std::vector<uint8_t> Frames::Encode(const std::vector<uint16_t>& frames)
{ return internal::FramesToCode(frames); }
diff --git a/src/GenomicInterval.cpp b/src/GenomicInterval.cpp
index 7cf0b73..10ebc23 100644
--- a/src/GenomicInterval.cpp
+++ b/src/GenomicInterval.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicInterval.cpp
+/// \brief Implements the GenomicInterval class.
+//
// Author: Derek Barnett
#include "pbbam/GenomicInterval.h"
@@ -41,6 +45,7 @@
#include <cstdlib>
#include <cstring>
#include <ctype.h>
+#include <stdexcept>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
diff --git a/src/GenomicIntervalQuery.cpp b/src/GenomicIntervalQuery.cpp
index 087c626..b6ead9f 100644
--- a/src/GenomicIntervalQuery.cpp
+++ b/src/GenomicIntervalQuery.cpp
@@ -32,109 +32,42 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file GenomicIntervalQuery.cpp
+/// \brief Implements the GenomicIntervalQuery class.
+//
// Author: Derek Barnett
#include "pbbam/GenomicIntervalQuery.h"
-#include "pbbam/BamFile.h"
-#include "pbbam/internal/BamRecordSort.h"
-#include "pbbam/internal/MergeStrategy.h"
-#include "AssertUtils.h"
-#include "MemoryUtils.h"
-#include <cassert>
+#include "pbbam/CompositeBamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
-class GenomicIntervalIterator : public internal::IBamFileIterator
+struct GenomicIntervalQuery::GenomicIntervalQueryPrivate
{
-public:
- GenomicIntervalIterator(const GenomicInterval& interval,
- const BamFile& bamFile)
- : internal::IBamFileIterator(bamFile)
- , interval_(interval)
- {
- // open file
- htsFile_.reset(sam_open(bamFile.Filename().c_str(), "rb"));
- if (!htsFile_)
- throw std::runtime_error("could not open BAM file for reading");
-
- // load header info
- htsHeader_.reset(sam_hdr_read(htsFile_.get()));
- if (!htsHeader_)
- throw std::runtime_error("could not read BAM header data");
-
- // open index
- htsIndex_.reset(bam_index_load(bamFile.Filename().c_str()));
- if (!htsIndex_)
- throw std::runtime_error("could not load BAI index data");
-
- // initialize iterator
- if (bamFile.Header().HasSequence(interval_.Name())) {
- const int id = bamFile.ReferenceId(interval_.Name());
- if (id >= 0 && id < htsHeader_->n_targets) {
- htsIterator_.reset(sam_itr_queryi(htsIndex_.get(),
- id,
- interval.Start(),
- interval.Stop()));
- }
- }
- if (!htsIterator_)
- throw std::runtime_error("could not create iterator for requested region");
- }
-
-public:
- bool GetNext(BamRecord& record) {
+ GenomicIntervalQueryPrivate(const GenomicInterval& interval,
+ const DataSet& dataset)
+ : reader_(interval, dataset)
+ { }
- assert(htsFile_);
- assert(htsIterator_);
- const int result = sam_itr_next(htsFile_.get(),
- htsIterator_.get(),
- internal::BamRecordMemory::GetRawData(record).get());
- record.header_ = header_;
-
- // success
- if (result >= 0)
- return true;
-
- // normal EOF
- else if (result == -1)
- return false;
-
- // error (truncated file, etc)
- else
- throw std::runtime_error("corrupted file, may be truncated");
- }
-
-private:
- GenomicInterval interval_;
- unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
- unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
- unique_ptr<hts_idx_t, internal::HtslibIndexDeleter> htsIndex_;
- unique_ptr<hts_itr_t, internal::HtslibIteratorDeleter> htsIterator_;
+ GenomicIntervalCompositeBamReader reader_;
};
GenomicIntervalQuery::GenomicIntervalQuery(const GenomicInterval& interval,
- const DataSet& dataset)
- : internal::IQuery(dataset)
- , interval_(interval)
-{
- Interval(interval_);
-}
+ const DataSet &dataset)
+ : internal::IQuery()
+ , d_(new GenomicIntervalQueryPrivate(interval, dataset))
+{ }
+
+GenomicIntervalQuery::~GenomicIntervalQuery(void) { }
-GenomicIntervalQuery::FileIterPtr GenomicIntervalQuery::CreateIterator(const BamFile& bamFile)
-{ return FileIterPtr(new GenomicIntervalIterator(interval_, bamFile)); }
+bool GenomicIntervalQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
GenomicIntervalQuery& GenomicIntervalQuery::Interval(const GenomicInterval& interval)
-{
- interval_ = interval;
- // check files
- // if SO all coordinate
- // else if SO all queryname
- // else SO unsorted/unknown
- mergeStrategy_.reset(new internal::MergeStrategy<internal::ByPosition>(CreateIterators()));
- return *this;
-}
+{ d_->reader_.Interval(interval); return *this; }
-GenomicInterval GenomicIntervalQuery::Interval(void) const
-{ return interval_; }
+const GenomicInterval& GenomicIntervalQuery::Interval(void) const
+{ return d_->reader_.Interval(); }
diff --git a/src/GroupQuery.cpp b/src/GroupQuery.cpp
deleted file mode 100644
index 4b72b4f..0000000
--- a/src/GroupQuery.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Yuan Li
-
-#include "pbbam/GroupQuery.h"
-#include "MemoryUtils.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace std;
-
-SequentialGroupQueryBase::SequentialGroupQueryBase(const BamFile & file)
- : GroupQueryBase(file)
- , htsFile_(nullptr)
- , htsHeader_(nullptr)
- , nextRecord_()
-{
- htsFile_.reset(sam_open(file.Filename().c_str(), "rb"), internal::HtslibFileDeleter());
- if (!htsFile_)
- throw std::runtime_error("could not open BAM file for reading");
-
- htsHeader_.reset(sam_hdr_read(htsFile_.get()), internal::HtslibHeaderDeleter());
- if (!htsHeader_)
- throw std::runtime_error("could not read BAM header data");
-}
-
-bool SequentialGroupQueryBase::GetNext(vector<BamRecord> & records)
-{
- records.clear();
-
- if (nextRecord_.Impl().Name() != "") {
- records.push_back(nextRecord_);
- nextRecord_ = BamRecord();
- }
-
- while(true) {
- BamRecord record(file_.Header());
- const int result = sam_read1(htsFile_.get(),
- htsHeader_.get(),
- internal::BamRecordMemory::GetRawData(record).get());
- if (result >= 0) { // get next record
- if (records.size() == 0) {
- records.push_back(record); // add the first record
- } else {
- if (InSameGroup(record, records[0])) {
- records.push_back(record); // add remaining record
- } else {
- nextRecord_ = record; // store record from another zmw
- return true;
- }
- }
- } else { // unable to get next record
- if (records.size() > 0) return true; // Has records to return
- else return false; // Has no records to return
- }
- }
- assert(false); // Should not reach here.
- return false;
-}
diff --git a/src/IndexedFastaReader.cpp b/src/IndexedFastaReader.cpp
index fffe44b..715dd03 100644
--- a/src/IndexedFastaReader.cpp
+++ b/src/IndexedFastaReader.cpp
@@ -32,29 +32,47 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file IndexedFastaReader.cpp
+/// \brief Implements the IndexedFastaReader class.
+//
// Author: David Alexander
-#include "htslib/faidx.h"
+#include "pbbam/IndexedFastaReader.h"
+
#include "pbbam/BamRecord.h"
#include "pbbam/GenomicInterval.h"
-#include "pbbam/IndexedFastaReader.h"
#include "pbbam/Orientation.h"
#include "SequenceUtils.h"
-
-#include <cstdlib>
+#include <htslib/faidx.h>
#include <iostream>
+#include <cstdlib>
namespace PacBio {
namespace BAM {
-
IndexedFastaReader::IndexedFastaReader(const std::string& filename)
{
Open(filename);
}
-IndexedFastaReader::~IndexedFastaReader()
+IndexedFastaReader::IndexedFastaReader(const IndexedFastaReader& src)
+{
+ if (!Open(src.filename_))
+ throw std::runtime_error("Cannot open file " + src.filename_);
+}
+
+IndexedFastaReader& IndexedFastaReader::operator=(const IndexedFastaReader& rhs)
+{
+ if (&rhs == this)
+ return *this;
+
+ Open(rhs.filename_);
+ return *this;
+}
+
+IndexedFastaReader::~IndexedFastaReader(void)
{
Close();
}
@@ -72,7 +90,7 @@ bool IndexedFastaReader::Open(const std::string &filename)
}
}
-void IndexedFastaReader::Close()
+void IndexedFastaReader::Close(void)
{
filename_ = "";
if (handle_ != nullptr)
@@ -194,7 +212,7 @@ IndexedFastaReader::ReferenceSubsequence(const BamRecord& bamRecord,
}
-int IndexedFastaReader::NumSequences() const
+int IndexedFastaReader::NumSequences(void) const
{
REQUIRE_FAIDX_LOADED;
return faidx_nseq(handle_);
@@ -215,5 +233,4 @@ int IndexedFastaReader::SequenceLength(const std::string& name) const
else return len;
}
-
}} // PacBio::BAM
diff --git a/src/MemoryUtils.h b/src/MemoryUtils.h
index dc4be84..c22f9f5 100644
--- a/src/MemoryUtils.h
+++ b/src/MemoryUtils.h
@@ -132,6 +132,9 @@ public:
static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecord* r);
static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecordImpl& impl);
static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecordImpl* impl);
+
+ static void UpdateRecordTags(const BamRecord& r);
+ static void UpdateRecordTags(const BamRecordImpl& r);
};
inline const BamRecordImpl& BamRecordMemory::GetImpl(const BamRecord& r)
@@ -152,6 +155,12 @@ inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecordImpl&
inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecordImpl* impl)
{ return impl->d_; }
+inline void BamRecordMemory::UpdateRecordTags(const BamRecord& r)
+{ UpdateRecordTags(r.impl_); }
+
+inline void BamRecordMemory::UpdateRecordTags(const BamRecordImpl& r)
+{ r.UpdateTagMap(); }
+
} // namespace internal
} // namespace BAM
} // namespace PacBio
diff --git a/src/PbiBuilder.cpp b/src/PbiBuilder.cpp
index 65c87bd..bb172dc 100644
--- a/src/PbiBuilder.cpp
+++ b/src/PbiBuilder.cpp
@@ -33,6 +33,10 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiBuilder.cpp
+/// \brief Implements the PbiBuilder class.
+//
// Author: Derek Barnett
#include "pbbam/PbiBuilder.h"
@@ -41,6 +45,7 @@
#include "MemoryUtils.h"
#include "PbiIndexIO.h"
#include <htslib/bgzf.h>
+#include <thread>
#include <cassert>
using namespace PacBio;
using namespace PacBio::BAM;
@@ -75,10 +80,15 @@ PbiRawReferenceDataBuilder::PbiRawReferenceDataBuilder(const size_t numReference
: lastRefId_(-1)
, lastPos_(-1)
{
-// const BamHeader& header = bam.Header();
-// const size_t numReferences = header.Sequences().size();
+ // initialize with number of references we expect to see
+ //
+ // we can add more later, but want to ensure known references have an entry
+ // even if no records are observed mapping to it
+ //
for (size_t i = 0; i < numReferenceSequences; ++i)
rawReferenceEntries_[i] = PbiReferenceEntry(i);
+
+ // also create an "unmapped" entry
rawReferenceEntries_[PbiReferenceEntry::UNMAPPED_ID] = PbiReferenceEntry();
}
@@ -147,54 +157,112 @@ PbiRawReferenceData PbiRawReferenceDataBuilder::Result(void) const {
class PbiBuilderPrivate
{
public:
- PbiBuilderPrivate(const string& filename, const size_t numReferenceSequences);
+ PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads);
+ PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads);
~PbiBuilderPrivate(void);
public:
void AddRecord(const BamRecord& record, const int64_t vOffset);
public:
+ bool HasBarcodeData(void) const;
+ bool HasMappedData(void) const;
+ bool HasReferenceData(void) const;
+
+public:
unique_ptr<BGZF, HtslibBgzfDeleter> bgzf_;
PbiRawData rawData_;
PbiReferenceEntry::Row currentRow_;
- bool hasMappedData_;
- bool hasBarcodeData_;
- bool hasReferenceData_;
- unique_ptr<PbiRawReferenceDataBuilder> refDataBuilder_;
+ unique_ptr<PbiRawReferenceDataBuilder> refDataBuilder_;
};
-PbiBuilderPrivate::PbiBuilderPrivate(const string& filename, const size_t numReferenceSequences)
- : bgzf_(bgzf_open(filename.c_str(), "wb"))
+PbiBuilderPrivate::PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : bgzf_(nullptr)
, currentRow_(0)
- , hasMappedData_(true)
- , hasBarcodeData_(true)
- , hasReferenceData_(true)
, refDataBuilder_(nullptr)
{
- if (bgzf_.get()== 0)
+ const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+ bgzf_.reset(bgzf_open(filename.c_str(), mode.c_str()));
+ if (bgzf_.get() == 0)
throw std::runtime_error("could not open PBI file for writing");
+ size_t actualNumThreads = numThreads;
+ if (actualNumThreads == 0) {
+ actualNumThreads = thread::hardware_concurrency();
+
+ // if still unknown, default to single-threaded
+ if (actualNumThreads == 0)
+ actualNumThreads = 1;
+ }
+
+ // if multithreading requested, enable it
+ if (actualNumThreads > 1)
+ bgzf_mt(bgzf_.get(), actualNumThreads, 256);
+
if (numReferenceSequences > 0)
refDataBuilder_.reset(new PbiRawReferenceDataBuilder(numReferenceSequences));
- else
- hasReferenceData_ = false;
+}
+
+PbiBuilderPrivate::PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : bgzf_(nullptr)
+ , currentRow_(0)
+ , refDataBuilder_(nullptr)
+{
+ const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+ bgzf_.reset(bgzf_open(filename.c_str(), mode.c_str()));
+ if (bgzf_.get() == 0)
+ throw std::runtime_error("could not open PBI file for writing");
+
+ size_t actualNumThreads = numThreads;
+ if (actualNumThreads == 0) {
+ actualNumThreads = thread::hardware_concurrency();
+
+ // if still unknown, default to single-threaded
+ if (actualNumThreads == 0)
+ actualNumThreads = 1;
+ }
+
+ // if multithreading requested, enable it
+ if (actualNumThreads > 1)
+ bgzf_mt(bgzf_.get(), actualNumThreads, 256);
+
+ if (isCoordinateSorted && numReferenceSequences > 0)
+ refDataBuilder_.reset(new PbiRawReferenceDataBuilder(numReferenceSequences));
}
PbiBuilderPrivate::~PbiBuilderPrivate(void)
{
rawData_.NumReads(currentRow_);
+ const auto hasBarcodeData = HasBarcodeData();
+ const auto hasMappedData = HasMappedData();
+ const auto hasReferenceData = HasReferenceData();
+
// fetch reference data, if available
- if (hasReferenceData_) {
+ if (hasReferenceData) {
assert(refDataBuilder_);
rawData_.ReferenceData() = std::move(refDataBuilder_->Result());
}
// determine flags
- PbiFile::Sections sections = PbiFile::SUBREAD;
- if (hasMappedData_) sections |= PbiFile::MAPPED;
- if (hasBarcodeData_) sections |= PbiFile::BARCODE;
- if (hasReferenceData_) sections |= PbiFile::REFERENCE;
+ PbiFile::Sections sections = PbiFile::BASIC;
+ if (hasMappedData) sections |= PbiFile::MAPPED;
+ if (hasBarcodeData) sections |= PbiFile::BARCODE;
+ if (hasReferenceData) sections |= PbiFile::REFERENCE;
rawData_.FileSections(sections);
// write index contents to file
@@ -202,34 +270,85 @@ PbiBuilderPrivate::~PbiBuilderPrivate(void)
PbiIndexIO::WriteHeader(rawData_, fp);
const uint32_t numReads = rawData_.NumReads();
if (numReads > 0) {
- PbiIndexIO::WriteSubreadData(rawData_.SubreadData(), numReads, fp);
- if (rawData_.HasMappedData())
- PbiIndexIO::WriteMappedData(rawData_.MappedData(), numReads, fp);
- if (rawData_.HasReferenceData())
- PbiIndexIO::WriteReferenceData(rawData_.ReferenceData(), fp);
- if (rawData_.HasBarcodeData())
- PbiIndexIO::WriteBarcodeData(rawData_.BarcodeData(), numReads, fp);
+ PbiIndexIO::WriteBasicData(rawData_.BasicData(), numReads, fp);
+ if (hasMappedData) PbiIndexIO::WriteMappedData(rawData_.MappedData(), numReads, fp);
+ if (hasReferenceData) PbiIndexIO::WriteReferenceData(rawData_.ReferenceData(), fp);
+ if (hasBarcodeData) PbiIndexIO::WriteBarcodeData(rawData_.BarcodeData(), numReads, fp);
}
}
void PbiBuilderPrivate::AddRecord(const BamRecord& record, const int64_t vOffset)
{
+ // ensure updated data
record.ResetCachedPositions();
- rawData_.SubreadData().AddRecord(record, vOffset);
-
- if (hasMappedData_)
- hasMappedData_ &= rawData_.MappedData().AddRecord(record);
+ // store data
+ rawData_.BarcodeData().AddRecord(record);
+ rawData_.BasicData().AddRecord(record, vOffset);
+ rawData_.MappedData().AddRecord(record);
- if (hasReferenceData_)
- hasBarcodeData_ &= rawData_.BarcodeData().AddRecord(record);
+ if (refDataBuilder_) {
- if (hasReferenceData_)
- hasReferenceData_ &= refDataBuilder_->AddRecord(record, currentRow_);
+ // stop storing coordinate-sorted reference data if we encounter out-of-order record
+ const bool sorted = refDataBuilder_->AddRecord(record, currentRow_);
+ if (!sorted)
+ refDataBuilder_.reset();
+ }
+ // increment row counter
++currentRow_;
}
+bool PbiBuilderPrivate::HasBarcodeData(void) const
+{
+ // fetch data components
+ const auto& barcodeData = rawData_.BarcodeData();
+ const auto& bcForward = barcodeData.bcForward_;
+ const auto& bcReverse = barcodeData.bcReverse_;
+ const auto& bcQuality = barcodeData.bcQual_;
+
+ // ensure valid sizes
+ if (bcForward.size() != bcReverse.size() &&
+ bcForward.size() != bcQuality.size())
+ {
+ auto msg = string{ "error: inconsistency in PBI barcode data:\n" };
+ msg += string{ " bcForward has " } + to_string(bcForward.size()) + string{ " elements\n" };
+ msg += string{ " bcReverse has " } + to_string(bcReverse.size()) + string{ " elements\n" };
+ msg += string{ " bcQuality has " } + to_string(bcQuality.size()) + string{ " elements\n" };
+ msg += string{ "\n" };
+ msg += string{ " these containers should contain equal number of elements.\n" };
+ throw std::runtime_error(msg);
+ }
+ assert(bcForward.size() == rawData_.NumReads());
+
+ // check for data
+ for (uint32_t i = 0; i < rawData_.NumReads(); ++i) {
+ if (bcForward.at(i) != -1 ||
+ bcReverse.at(i) != -1 ||
+ bcQuality.at(i) != -1 )
+ {
+ return true;
+ }
+ }
+ // no actual data found
+ return false;
+}
+
+bool PbiBuilderPrivate::HasMappedData(void) const
+{
+ const auto& mappedData = rawData_.MappedData();
+ const auto& tIds = mappedData.tId_;
+ assert(tIds.size() == rawData_.NumReads());
+ for (const auto tId : tIds) {
+ if (tId >= 0)
+ return true;
+ }
+ return false; // all reads unmapped
+}
+
+bool PbiBuilderPrivate::HasReferenceData(void) const
+{ return bool(refDataBuilder_); }
+
} // namespace internal
} // namespace BAM
} // namespace PacBio
@@ -238,18 +357,44 @@ void PbiBuilderPrivate::AddRecord(const BamRecord& record, const int64_t vOffset
// PbiBuilder implementation
// ---------------------------
-PbiBuilder::PbiBuilder(const string& pbiFilename)
- : d_(new internal::PbiBuilderPrivate(pbiFilename, 0))
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+ const CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : d_(new internal::PbiBuilderPrivate(pbiFilename,
+ 0,
+ compressionLevel,
+ numThreads))
{ }
-PbiBuilder::PbiBuilder(const string& pbiFilename, const size_t numReferenceSequences)
- : d_(new internal::PbiBuilderPrivate(pbiFilename, numReferenceSequences))
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+ const size_t numReferenceSequences,
+ const CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : d_(new internal::PbiBuilderPrivate(pbiFilename,
+ numReferenceSequences,
+ compressionLevel,
+ numThreads))
+{ }
+
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : d_(new internal::PbiBuilderPrivate(pbiFilename,
+ numReferenceSequences,
+ isCoordinateSorted,
+ compressionLevel,
+ numThreads))
{ }
PbiBuilder::~PbiBuilder(void) { }
void PbiBuilder::AddRecord(const BamRecord& record, const int64_t vOffset)
-{ d_->AddRecord(record, vOffset); }
+{
+ internal::BamRecordMemory::UpdateRecordTags(record);
+ d_->AddRecord(record, vOffset);
+}
const PbiRawData& PbiBuilder::Index(void) const
{ return d_->rawData_; }
diff --git a/src/PbiFile.cpp b/src/PbiFile.cpp
index 0318e42..144c847 100644
--- a/src/PbiFile.cpp
+++ b/src/PbiFile.cpp
@@ -33,15 +33,16 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiFile.cpp
+/// \brief Implements the PbiFile methods.
+//
// Author: Derek Barnett
#include "pbbam/PbiFile.h"
#include "pbbam/BamFile.h"
#include "pbbam/PbiBuilder.h"
-#include "MemoryUtils.h"
-#include <htslib/sam.h>
-#include <cassert>
-
+#include "pbbam/BamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::PbiFile;
@@ -51,34 +52,20 @@ namespace PacBio {
namespace BAM {
namespace PbiFile {
-void CreateFrom(const BamFile& bamFile)
+void CreateFrom(const BamFile& bamFile,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads)
{
- // open input file for file handle & header
- unique_ptr<samFile,internal::HtslibFileDeleter> htsFile(sam_open(bamFile.Filename().c_str(), "rb"));
- if (!htsFile)
- throw std::runtime_error("could not open BAM file for reading");
-
- unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader(sam_hdr_read(htsFile.get()));
- if (!htsHeader)
- throw std::runtime_error("could not read BAM header data");
-
- samFile* fp = htsFile.get();
- bam_hdr_t* hdr = htsHeader.get();
- assert(fp);
- assert(hdr);
-
- // setup our record object
- BamRecord record;
- bam1_t* b = internal::BamRecordMemory::GetRawData(record).get();
- if (b == 0)
- throw std::runtime_error("could not allocate BAM record");
-
- // iterate through file, building index data
- PbiBuilder builder(bamFile.PacBioIndexFilename(), bamFile.Header().Sequences().size());
- int64_t offset = bgzf_tell(fp->fp.bgzf);
- while (sam_read1(fp, hdr, b) >= 0) {
- builder.AddRecord(record, offset);
- offset = bgzf_tell(fp->fp.bgzf);
+ PbiBuilder builder(bamFile.PacBioIndexFilename(),
+ bamFile.Header().Sequences().size(),
+ compressionLevel,
+ numThreads);
+ BamReader reader(bamFile);
+ BamRecord b;
+ int64_t offset = reader.VirtualTell();
+ while (reader.GetNext(b)) {
+ builder.AddRecord(b, offset);
+ offset = reader.VirtualTell();
}
}
diff --git a/src/PbiFilter.cpp b/src/PbiFilter.cpp
new file mode 100644
index 0000000..5c738bf
--- /dev/null
+++ b/src/PbiFilter.cpp
@@ -0,0 +1,249 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.cpp
+/// \brief Implements the PbiFilter class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/trim.hpp>
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <cctype>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+enum class BuiltIn
+{
+ AlignedEndFilter
+ , AlignedLengthFilter
+ , AlignedStartFilter
+ , AlignedStrandFilter
+ , BarcodeFilter
+ , BarcodeForwardFilter
+ , BarcodeQualityFilter
+ , BarcodeReverseFilter
+ , BarcodesFilter
+ , IdentityFilter
+ , LocalContextFilter
+ , MovieNameFilter
+ , NumDeletedBasesFilter
+ , NumInsertedBasesFilter
+ , NumMatchesFilter
+ , NumMismatchesFilter
+ , QueryEndFilter
+ , QueryLengthFilter
+ , QueryNameFilter
+ , QueryStartFilter
+ , ReadAccuracyFilter
+ , ReadGroupFilter
+ , ReferenceEndFilter
+ , ReferenceIdFilter
+ , ReferenceNameFilter
+ , ReferenceStartFilter
+ , ZmwFilter
+};
+
+static const unordered_map<string, BuiltIn> builtInLookup =
+{
+ // property name built-in filter
+ { "ae", BuiltIn::AlignedEndFilter },
+ { "aend", BuiltIn::AlignedEndFilter },
+ { "alignedlength", BuiltIn::AlignedLengthFilter },
+ { "as", BuiltIn::AlignedStartFilter },
+ { "astart", BuiltIn::AlignedStartFilter },
+ { "readstart", BuiltIn::AlignedStartFilter },
+ { "bc", BuiltIn::BarcodeFilter },
+ { "barcode", BuiltIn::BarcodeFilter },
+ { "accuracy", BuiltIn::IdentityFilter },
+ { "identity", BuiltIn::IdentityFilter },
+ { "cx", BuiltIn::LocalContextFilter },
+ { "movie", BuiltIn::MovieNameFilter },
+ { "qe", BuiltIn::QueryEndFilter },
+ { "qend", BuiltIn::QueryEndFilter },
+ { "length", BuiltIn::QueryLengthFilter },
+ { "querylength", BuiltIn::QueryLengthFilter },
+ { "qname", BuiltIn::QueryNameFilter },
+ { "qs", BuiltIn::QueryStartFilter },
+ { "qstart", BuiltIn::QueryStartFilter },
+ { "rq", BuiltIn::ReadAccuracyFilter },
+ { "te", BuiltIn::ReferenceEndFilter },
+ { "tend", BuiltIn::ReferenceEndFilter },
+ { "rname", BuiltIn::ReferenceNameFilter },
+ { "ts", BuiltIn::ReferenceStartFilter },
+ { "tstart", BuiltIn::ReferenceStartFilter },
+ { "pos", BuiltIn::ReferenceStartFilter },
+ { "zm", BuiltIn::ZmwFilter },
+ { "zmw", BuiltIn::ZmwFilter }
+};
+
+static const unordered_map<string, LocalContextFlags> contextFlagNames =
+{
+ { "NO_LOCAL_CONTEXT", LocalContextFlags::NO_LOCAL_CONTEXT },
+ { "ADAPTER_BEFORE", LocalContextFlags::ADAPTER_BEFORE },
+ { "ADAPTER_AFTER", LocalContextFlags::ADAPTER_AFTER },
+ { "BARCODE_BEFORE", LocalContextFlags::BARCODE_BEFORE },
+ { "BARCODE_AFTER", LocalContextFlags::BARCODE_AFTER },
+ { "FORWARD_PASS", LocalContextFlags::FORWARD_PASS },
+ { "REVERSE_PASS", LocalContextFlags::REVERSE_PASS }
+};
+
+static
+PbiFilter CreateLocalContextFilter(const std::string& value,
+ const Compare::Type compareType)
+{
+ if (value.empty())
+ throw std::runtime_error("empty value for local context filter property");
+
+ LocalContextFlags filterValue = LocalContextFlags::NO_LOCAL_CONTEXT;
+
+ // if raw integer
+ if (isdigit(value.at(0)))
+ filterValue = static_cast<LocalContextFlags>(stoi(value));
+
+ // else interpret as flag names
+ else {
+ vector<string> tokens = std::move(internal::Split(value, '|'));
+ for (string& token : tokens) {
+ boost::algorithm::trim(token); // trim whitespace
+ filterValue = (filterValue | contextFlagNames.at(token));
+ }
+ }
+
+ return PbiFilter{ PbiLocalContextFilter{filterValue, compareType} };
+}
+
+static
+PbiFilter FromDataSetProperty(const Property& property)
+{
+ try {
+ const string& value = property.Value();
+ const Compare::Type compareType = Compare::TypeFromOperator(property.Operator());
+ const BuiltIn builtInCode = builtInLookup.at(boost::algorithm::to_lower_copy(property.Name()));
+ switch (builtInCode) {
+ case BuiltIn::AlignedEndFilter : return PbiAlignedEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+ case BuiltIn::AlignedLengthFilter : return PbiAlignedLengthFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+ case BuiltIn::AlignedStartFilter : return PbiAlignedStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+ case BuiltIn::BarcodeFilter : return PbiBarcodeFilter{ static_cast<uint16_t>(stoul(value)), compareType };
+ case BuiltIn::IdentityFilter : return PbiIdentityFilter{ stof(value), compareType };
+ case BuiltIn::MovieNameFilter : return PbiMovieNameFilter{ value };
+ case BuiltIn::QueryEndFilter : return PbiQueryEndFilter{ stoi(value), compareType };
+ case BuiltIn::QueryLengthFilter : return PbiQueryLengthFilter{ stoi(value), compareType };
+ case BuiltIn::QueryNameFilter : return PbiQueryNameFilter{ value };
+ case BuiltIn::QueryStartFilter : return PbiQueryStartFilter{ stoi(value), compareType };
+ case BuiltIn::ReadAccuracyFilter : return PbiReadAccuracyFilter{ stof(value), compareType };
+ case BuiltIn::ReadGroupFilter : return PbiReadGroupFilter{ value, compareType };
+ case BuiltIn::ReferenceEndFilter : return PbiReferenceEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+ case BuiltIn::ReferenceIdFilter : return PbiReferenceIdFilter{ stoi(value), compareType };
+ case BuiltIn::ReferenceNameFilter : return PbiReferenceNameFilter{ value };
+ case BuiltIn::ReferenceStartFilter : return PbiReferenceStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+ case BuiltIn::ZmwFilter : return PbiZmwFilter{ stoi(value), compareType };
+ case BuiltIn::LocalContextFilter :
+ {
+ return CreateLocalContextFilter(value, compareType);
+ }
+ default :
+ throw std::exception();
+ }
+ // unreachable
+ return PbiFilter{ };
+
+ } catch (std::exception& e) {
+ stringstream s;
+ s << "error: could not create filter from XML Property element: " << endl
+ << " Name: " << property.Name() << endl
+ << " Value: " << property.Value() << endl
+ << " Operator: " << property.Operator() << endl
+ << " reason: " << e.what() << endl;
+ throw std::runtime_error(s.str());
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+PbiFilter PbiFilter::FromDataSet(const DataSet& dataset)
+{
+ auto datasetFilter = PbiFilter{ PbiFilter::UNION };
+ for (auto&& xmlFilter : dataset.Filters()) {
+ auto propertiesFilter = PbiFilter{ };
+ for (auto&& xmlProperty : xmlFilter.Properties())
+ propertiesFilter.Add(internal::FromDataSetProperty(xmlProperty));
+ datasetFilter.Add(propertiesFilter);
+ }
+ return datasetFilter;
+}
+
+PbiFilter PbiFilter::Intersection(const std::vector<PbiFilter>& filters)
+{
+ auto result = PbiFilter{ PbiFilter::INTERSECT };
+ result.Add(filters);
+ return result;
+}
+
+PbiFilter PbiFilter::Intersection(std::vector<PbiFilter>&& filters)
+{
+ auto result = PbiFilter{ PbiFilter::INTERSECT };
+ result.Add(std::move(filters));
+ return result;
+}
+
+PbiFilter PbiFilter::Union(const std::vector<PbiFilter>& filters)
+{
+ auto result = PbiFilter{ PbiFilter::UNION };
+ result.Add(filters);
+ return result;
+}
+
+PbiFilter PbiFilter::Union(std::vector<PbiFilter>&& filters)
+{
+ auto result = PbiFilter{ PbiFilter::UNION };
+ result.Add(std::move(filters));
+ return result;
+}
diff --git a/src/FilterEngine.cpp b/src/PbiFilterQuery.cpp
similarity index 72%
copy from src/FilterEngine.cpp
copy to src/PbiFilterQuery.cpp
index 1f47967..19d2b31 100644
--- a/src/FilterEngine.cpp
+++ b/src/PbiFilterQuery.cpp
@@ -32,45 +32,39 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file PbiFilterQuery.cpp
+/// \brief Implements the PbiFilterQuery class.
+//
// Author: Derek Barnett
-#include "pbbam/internal/FilterEngine.h"
+#include "pbbam/PbiFilterQuery.h"
+#include "pbbam/CompositeBamReader.h"
+
+
+#include <iostream>
+
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-namespace PacBio {
-namespace BAM {
-namespace internal {
+struct PbiFilterQuery::PbiFilterQueryPrivate
+{
+ PbiFilterQueryPrivate(const PbiFilter& filter, const DataSet& dataset)
+ : reader_(filter, dataset)
+ { }
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+PbiFilterQuery::PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new PbiFilterQueryPrivate(filter, dataset))
+{ }
-FilterEngine::FilterEngine(void) { }
+PbiFilterQuery::~PbiFilterQuery(void) { }
-bool FilterEngine::Accepts(const BamRecord& r) const
-{
-// foreach ( const FilterParameter& param, parameters_ ) {
-// if (!param.Accepts(r))
-// return false;
-// }
-// return true;
- (void)r;
- return true;
-}
-
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
-{
- size_t i = 0;
- while (i < r.size()) {
- if (!Accepts(r.at(i)))
- r.erase(r.begin() + i);
- else
- ++i;
- }
- return !r.empty();
-}
+bool PbiFilterQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/PbiFilterTypes.cpp b/src/PbiFilterTypes.cpp
new file mode 100644
index 0000000..e052c63
--- /dev/null
+++ b/src/PbiFilterTypes.cpp
@@ -0,0 +1,313 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.cpp
+/// \brief Implements the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterTypes.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <sstream>
+#include <string>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+template<typename T>
+IndexList readLengthHelper(const std::vector<T>& start,
+ const std::vector<T>& end,
+ const T& value,
+ const Compare::Type cmp)
+{
+ assert(start.size() == end.size());
+
+ auto result = IndexList{ };
+ const auto numElements = start.size();
+ for (size_t i = 0; i < numElements; ++i) {
+ const auto readLength = end[i] - start[i];
+ bool keep = false;
+ switch(cmp) {
+ case Compare::EQUAL : keep = (readLength == value); break;
+ case Compare::NOT_EQUAL : keep = (readLength != value); break;
+ case Compare::LESS_THAN : keep = (readLength < value); break;
+ case Compare::LESS_THAN_EQUAL : keep = (readLength <= value); break;
+ case Compare::GREATER_THAN : keep = (readLength > value); break;
+ case Compare::GREATER_THAN_EQUAL : keep = (readLength >= value); break;
+ default:
+ assert(false);
+ throw std::runtime_error(string{"read length filter encountered unknown Compare::Type: "} +
+ Compare::TypeToName(cmp));
+ }
+
+ if (keep)
+ result.push_back(i);
+ }
+ return result;
+}
+
+static
+PbiFilter filterFromMovieName(const string& movieName, bool includeCcs)
+{
+ // we'll match on any rgIds from our candidate list
+ auto filter = PbiFilter{ PbiFilter::UNION };
+ filter.Add(
+ {
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "POLYMERASE") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "HQREGION") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "SUBREAD") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "SCRAP") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "UNKNOWN") }
+ });
+ if (includeCcs)
+ filter.Add(PbiReadGroupFilter{ MakeReadGroupId(movieName, "CCS") });
+
+ return filter;
+}
+
+static
+PbiFilter filterFromQueryName(const string& queryName)
+{
+ // split full name into moviename, holenumber
+ const auto nameParts = internal::Split(queryName, '/');
+ if (nameParts.size() != 3) {
+ auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+ msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+ throw std::runtime_error(msg);
+ }
+
+ // main filter: {union of candidate rgIds} && zmw [&& qStart && qEnd](non-CCS reads)
+ auto filter = PbiFilter{ };
+ filter.Add(PbiZmwFilter{ stoi(nameParts.at(1)) }); // hole number
+
+ const auto movieName = nameParts.at(0);
+
+ // CCS (only 1 possible candidate rgId)
+ if (nameParts.at(2) == "ccs")
+ filter.Add(PbiReadGroupFilter{ MakeReadGroupId(movieName, "CCS") });
+
+ // all other read types
+ else {
+ // we'll match on any read type that matches our qname
+ // (except for CCS since it has a different QNAME anyway)
+ const auto rgIdFilter = filterFromMovieName(movieName, false);
+ filter.Add(rgIdFilter);
+
+ // add qStart/qEnd filters to our main filter
+ const auto queryIntervalParts = internal::Split(nameParts.at(2), '_');
+ if (queryIntervalParts.size() != 2) {
+ auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+ msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+ throw std::runtime_error(msg);
+ }
+ filter.Add(PbiQueryStartFilter{ stoi(queryIntervalParts.at(0)) });
+ filter.Add(PbiQueryEndFilter{ stoi(queryIntervalParts.at(1)) });
+ }
+ return filter;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// PbiAlignedLengthFilter
+
+bool PbiAlignedLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ const auto& mappedData = idx.MappedData();
+ const auto& aEnd = mappedData.aEnd_.at(row) ;
+ const auto& aStart = mappedData.aStart_.at(row);
+ const auto aLength = aEnd - aStart;
+ return CompareHelper(aLength);
+}
+
+// PbiIdentityFilter
+
+bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ const auto& mappedData = idx.MappedData();
+ const auto& nMM = mappedData.nMM_.at(row);
+ const auto& nIndels = mappedData.NumDeletedAndInsertedBasesAt(row);
+ const auto& nDel = nIndels.first;
+ const auto& nIns = nIndels.second;
+
+ const auto& basicData = idx.BasicData();
+ const auto& qStart = basicData.qStart_.at(row);
+ const auto& qEnd = basicData.qEnd_.at(row);
+
+ const auto readLength = qEnd - qStart;
+ const auto nonMatches = nMM + nDel + nIns;
+ const float identity = 1.0 - (static_cast<float>(nonMatches)/static_cast<float>(readLength));
+
+ return CompareHelper(identity);
+}
+
+// PbiMovieNameFilter
+
+PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName)
+ : compositeFilter_(internal::filterFromMovieName(movieName, true)) // include CCS
+{ }
+
+PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist)
+ : compositeFilter_(PbiFilter::UNION)
+{
+ for (const auto& movieName : whitelist)
+ compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
+}
+
+PbiMovieNameFilter::PbiMovieNameFilter(std::vector<std::string>&& whitelist)
+ : compositeFilter_(PbiFilter::UNION)
+{
+ for (auto&& movieName : whitelist)
+ compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
+}
+
+// PbiQueryLengthFilter
+
+bool PbiQueryLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ const auto& basicData = idx.BasicData();
+ const auto& qStart = basicData.qStart_.at(row);
+ const auto& qEnd = basicData.qEnd_.at(row);
+ const auto readLength = qEnd - qStart;
+ return CompareHelper(readLength);
+}
+
+// PbiQueryNameFilter
+
+PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname)
+ : compositeFilter_(internal::filterFromQueryName(qname))
+{ }
+
+PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist)
+ : compositeFilter_(PbiFilter::UNION)
+{
+ try {
+ for (const auto& qname : whitelist)
+ compositeFilter_.Add(internal::filterFromQueryName(qname));
+ }
+ // simply re-throw our own exception
+ catch (std::runtime_error&) {
+ throw;
+ }
+ // we may hit other exceptions (e.g. in stoi()) - but we'll pin on a bit of extra data
+ catch (std::exception& e) {
+ auto msg = string{ "PbiQueryNameFilter encountered error: " } + e.what();
+ throw std::runtime_error(msg);
+ }
+}
+
+PbiQueryNameFilter::PbiQueryNameFilter(std::vector<std::string>&& whitelist)
+ : compositeFilter_(PbiFilter::UNION)
+{
+ try {
+ for (const auto& qname : whitelist)
+ compositeFilter_.Add(internal::filterFromQueryName(qname));
+ }
+ // simply re-throw our own exception
+ catch (std::runtime_error&) {
+ throw;
+ }
+ // we may hit other exceptions (e.g. in stoi()) - but we'll pin on a bit of extra data
+ catch (std::exception& e) {
+ auto msg = string{ "PbiQueryNameFilter encountered error: " } + e.what();
+ throw std::runtime_error(msg);
+ }
+}
+
+// PbiReferenceNameFilter
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(const std::string& rname,
+ const Compare::Type cmp)
+ : initialized_(false)
+ , rname_(rname)
+ , cmp_(cmp)
+{
+ if (cmp != Compare::EQUAL && cmp != Compare::NOT_EQUAL) {
+ auto msg = std::string{ "Compare type: " };
+ msg += Compare::TypeToName(cmp);
+ msg += " not supported for PbiReferenceNameFilter (use one of Compare::EQUAL or Compare::NOT_EQUAL).";
+ throw std::runtime_error(msg);
+ }
+}
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(const std::vector<std::string>& whitelist)
+ : initialized_(false)
+ , rnameWhitelist_(whitelist)
+ , cmp_(Compare::EQUAL)
+{ }
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string>&& whitelist)
+ : initialized_(false)
+ , rnameWhitelist_(std::move(whitelist))
+ , cmp_(Compare::EQUAL)
+{ }
+
+bool PbiReferenceNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ if (!initialized_)
+ Initialize(idx);
+ return subFilter_.Accepts(idx, row);
+}
+
+void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const
+{
+ const auto pbiFilename = idx.Filename();
+ const auto bamFilename = pbiFilename.substr(0, pbiFilename.length() - 4);
+ const auto bamFile = BamFile{ bamFilename };
+
+ // single-value
+ if (rnameWhitelist_ == boost::none) {
+ const auto tId = bamFile.ReferenceId(rname_);
+ subFilter_ = PbiReferenceIdFilter{ tId, cmp_ };
+ }
+
+ // multi-value whitelist
+ else {
+ subFilter_ = PbiFilter(PbiFilter::UNION);
+ for (const auto& rname : rnameWhitelist_.get())
+ subFilter_.Add(PbiReferenceIdFilter{ bamFile.ReferenceId(rname) });
+ }
+ initialized_ = true;
+}
+
diff --git a/src/PbiIndex.cpp b/src/PbiIndex.cpp
index 2225a6c..3f54f7b 100644
--- a/src/PbiIndex.cpp
+++ b/src/PbiIndex.cpp
@@ -33,6 +33,10 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiIndex.cpp
+/// \brief Implements the PbiIndex class.
+//
// Author: Derek Barnett
#include "pbbam/PbiIndex.h"
@@ -43,55 +47,21 @@ using namespace PacBio::BAM::internal;
using namespace std;
// ----------------------------------
-// IndexResultBlock implementation
-// ----------------------------------
-
-IndexResultBlock::IndexResultBlock(void)
- : firstIndex_(0)
- , numReads_(0)
- , virtualOffset_(-1)
-{ }
-
-IndexResultBlock::IndexResultBlock(size_t idx, size_t numReads)
- : firstIndex_(idx)
- , numReads_(numReads)
- , virtualOffset_(-1)
-{ }
-
-bool IndexResultBlock::operator==(const IndexResultBlock& other) const
-{
- return firstIndex_ == other.firstIndex_ &&
- numReads_ == other.numReads_ &&
- virtualOffset_ == other.virtualOffset_;
-}
-
-bool IndexResultBlock::operator!=(const IndexResultBlock& other) const
-{ return !(*this == other); }
-
-// ----------------------------------
// SubreadLookupData implementation
// ----------------------------------
-SubreadLookupData::SubreadLookupData(void) { }
+BasicLookupData::BasicLookupData(void) { }
-SubreadLookupData::SubreadLookupData(const PbiRawSubreadData& rawData)
+BasicLookupData::BasicLookupData(const PbiRawBasicData& rawData)
: rgId_(rawData.rgId_)
, qStart_(rawData.qStart_)
, qEnd_(rawData.qEnd_)
, holeNumber_(rawData.holeNumber_)
, readQual_(rawData.readQual_)
+ , ctxtFlag_(rawData.ctxtFlag_)
, fileOffset_(rawData.fileOffset_)
{ }
-//SubreadLookupData::SubreadLookupData(PbiRawSubreadData&& rawData)
-// : rgId_(std::move(rawData.rgId_))
-// , qStart_(std::move(rawData.qStart_))
-// , qEnd_(std::move(rawData.qEnd_))
-// , holeNumber_(std::move(rawData.holeNumber_))
-// , readQual_(std::move(rawData.readQual_))
-// , fileOffset_(std::move(rawData.fileOffset_))
-//{ }
-
// ----------------------------------
// MappedLookupData implementation
// ----------------------------------
@@ -116,17 +86,10 @@ MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
std::map<uint32_t, IndexList> delRawData;
for (size_t i = 0; i < numElements; ++i) {
- // nIns, nDel
- const uint32_t aStart = rawData.aStart_.at(i);
- const uint32_t aEnd = rawData.aEnd_.at(i);
- const uint32_t tStart = rawData.tStart_.at(i);
- const uint32_t tEnd = rawData.tEnd_.at(i);
- const uint32_t nM = rawData.nM_.at(i);
- const uint32_t nMM = rawData.nMM_.at(i);
- const uint32_t numIns = (aEnd - aStart - nM - nMM);
- const uint32_t numDel = (tEnd - tStart - nM - nMM);
- insRawData[numIns].push_back(i);
- delRawData[numDel].push_back(i);
+ // nDel, nIns
+ const auto indels = rawData.NumDeletedAndInsertedBasesAt(i);
+ delRawData[indels.first].push_back(i);
+ insRawData[indels.second].push_back(i);
// strand
if (rawData.revStrand_.at(i) == 0)
@@ -139,27 +102,6 @@ MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
nDel_ = OrderedLookup<uint32_t>(std::move(delRawData));
}
-//MappedLookupData::MappedLookupData(PbiRawMappedData&& rawData)
-// : tId_(std::move(rawData.tId_))
-// , tStart_(std::move(rawData.tStart_))
-// , tEnd_(std::move(rawData.tEnd_))
-// , aStart_(std::move(rawData.aStart_))
-// , aEnd_(std::move(rawData.aEnd_))
-// , nM_(std::move(rawData.nM_))
-// , nMM_(std::move(rawData.nMM_))
-// , mapQV_(std::move(rawData.mapQV_))
-//{
-// const size_t numElements = rawData.revStrand_.size();
-// reverseStrand_.reserve(numElements/2);
-// forwardStrand_.reserve(numElements/2);
-// for (size_t i = 0; i < numElements; ++i) {
-// if (rawData.revStrand_.at(i) == 0)
-// forwardStrand_.push_back(i);
-// else
-// reverseStrand_.push_back(i);
-// }
-//}
-
// ----------------------------------
// BarcodeLookupData implementation
// ----------------------------------
@@ -167,18 +109,11 @@ MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
BarcodeLookupData::BarcodeLookupData(void) { }
BarcodeLookupData::BarcodeLookupData(const PbiRawBarcodeData& rawData)
- : bcLeft_(rawData.bcLeft_)
- , bcRight_(rawData.bcRight_)
+ : bcForward_(rawData.bcForward_)
+ , bcReverse_(rawData.bcReverse_)
, bcQual_(rawData.bcQual_)
- , ctxtFlag_(rawData.ctxtFlag_)
-{ }
-//BarcodeLookupData::BarcodeLookupData(PbiRawBarcodeData&& rawData)
-// : bcLeft_(std::move(rawData.bcLeft_))
-// , bcRight_(std::move(rawData.bcRight_))
-// , bcQual_(std::move(rawData.bcQual_))
-// , ctxtFlag_(std::move(rawData.ctxtFlag_))
-//{ }
+{ }
// ----------------------------------
// ReferenceLookupData implementation
@@ -196,41 +131,33 @@ ReferenceLookupData::ReferenceLookupData(const PbiRawReferenceData& rawData)
}
}
-//ReferenceLookupData::ReferenceLookupData(PbiRawReferenceData&& rawData)
-//{
-// const size_t numEntries = rawData.entries_.size();
-// references_.reserve(numEntries);
-// for (size_t i = 0; i < numEntries; ++i) {
-// const PbiReferenceEntry& entry = rawData.entries_.at(i);
-// references_[entry.tId_] = IndexRange(entry.beginRow_, entry.endRow_);
-// }
-//}
-
// --------------------------------
// PbiIndexPrivate implementation
// --------------------------------
PbiIndexPrivate::PbiIndexPrivate(void)
: version_(PbiFile::CurrentVersion)
- , sections_(PbiFile::SUBREAD)
+ , sections_(PbiFile::BASIC)
, numReads_(0)
{ }
PbiIndexPrivate::PbiIndexPrivate(const PbiRawData& rawIndex)
- : version_(rawIndex.Version())
+ : filename_(rawIndex.Filename())
+ , version_(rawIndex.Version())
, sections_(rawIndex.FileSections())
, numReads_(rawIndex.NumReads())
- , subreadData_(rawIndex.SubreadData())
+ , basicData_(rawIndex.BasicData())
, mappedData_(rawIndex.MappedData())
, referenceData_(rawIndex.ReferenceData())
, barcodeData_(rawIndex.BarcodeData())
{ }
PbiIndexPrivate::PbiIndexPrivate(PbiRawData&& rawIndex)
- : version_(std::move(rawIndex.Version()))
+ : filename_(std::move(rawIndex.Filename()))
+ , version_(std::move(rawIndex.Version()))
, sections_(std::move(rawIndex.FileSections()))
, numReads_(std::move(rawIndex.NumReads()))
- , subreadData_(std::move(rawIndex.SubreadData()))
+ , basicData_(std::move(rawIndex.BasicData()))
, mappedData_(std::move(rawIndex.MappedData()))
, referenceData_(std::move(rawIndex.ReferenceData()))
, barcodeData_(std::move(rawIndex.BarcodeData()))
@@ -239,10 +166,11 @@ PbiIndexPrivate::PbiIndexPrivate(PbiRawData&& rawIndex)
unique_ptr<PbiIndexPrivate> PbiIndexPrivate::DeepCopy(void) const
{
std::unique_ptr<PbiIndexPrivate> copy(new PbiIndexPrivate);
+ copy->filename_ = filename_;
copy->version_ = version_;
copy->sections_ = sections_;
copy->numReads_ = numReads_;
- copy->subreadData_ = subreadData_;
+ copy->basicData_ = basicData_;
copy->mappedData_ = mappedData_;
copy->referenceData_ = referenceData_;
copy->barcodeData_ = barcodeData_;
@@ -286,26 +214,5 @@ PbiIndex& PbiIndex::operator=(PbiIndex&& other)
PbiIndex::~PbiIndex(void) { }
-PbiFile::Sections PbiIndex::FileSections(void) const
-{ return d_->sections_; }
-
-bool PbiIndex::HasBarcodeData(void) const
-{ return d_->HasSection(PbiFile::BARCODE); }
-
-bool PbiIndex::HasMappedData(void) const
-{ return d_->HasSection(PbiFile::MAPPED); }
-
-bool PbiIndex::HasReferenceData(void) const
-{ return d_->HasSection(PbiFile::REFERENCE); }
-
-bool PbiIndex::HasSection(const PbiFile::Section section) const
-{ return d_->HasSection(section); }
-
-uint32_t PbiIndex::NumReads(void) const
-{ return d_->numReads_; }
-
-PbiFile::VersionEnum PbiIndex::Version(void) const
-{ return d_->version_; }
-
-const vector<int64_t>& PbiIndex::VirtualFileOffsets(void) const
-{ return d_->subreadData_.fileOffset_; }
+string PbiIndex::Filename(void) const
+{ return d_->filename_; }
diff --git a/src/PbiIndexIO.cpp b/src/PbiIndexIO.cpp
index 9d0d4e7..7b7733b 100644
--- a/src/PbiIndexIO.cpp
+++ b/src/PbiIndexIO.cpp
@@ -73,10 +73,13 @@ void PbiIndexIO::Load(PbiRawData& rawData,
LoadHeader(rawData, fp);
const uint32_t numReads = rawData.NumReads();
if (numReads > 0) {
- LoadSubreadData(rawData.SubreadData(), numReads, fp);
- LoadMappedData(rawData.MappedData(), numReads, fp);
- LoadReferenceData(rawData.ReferenceData(), fp);
- LoadBarcodeData(rawData.BarcodeData(), numReads, fp);
+ LoadBasicData(rawData.BasicData(), numReads, fp);
+ if (rawData.HasMappedData())
+ LoadMappedData(rawData.MappedData(), numReads, fp);
+ if (rawData.HasReferenceData())
+ LoadReferenceData(rawData.ReferenceData(), fp);
+ if (rawData.HasBarcodeData())
+ LoadBarcodeData(rawData.BarcodeData(), numReads, fp);
}
}
@@ -85,16 +88,15 @@ void PbiIndexIO::LoadBarcodeData(PbiRawBarcodeData& barcodeData,
BGZF* fp)
{
assert(numReads > 0);
+ (void)numReads; // quash warnings building in release mode
- LoadBgzfVector(fp, barcodeData.bcLeft_, numReads);
- LoadBgzfVector(fp, barcodeData.bcRight_, numReads);
- LoadBgzfVector(fp, barcodeData.bcQual_, numReads);
- LoadBgzfVector(fp, barcodeData.ctxtFlag_, numReads);
+ LoadBgzfVector(fp, barcodeData.bcForward_, numReads);
+ LoadBgzfVector(fp, barcodeData.bcReverse_, numReads);
+ LoadBgzfVector(fp, barcodeData.bcQual_, numReads);
- assert(barcodeData.bcLeft_.size() == numReads);
- assert(barcodeData.bcRight_.size() == numReads);
- assert(barcodeData.bcQual_.size() == numReads);
- assert(barcodeData.ctxtFlag_.size() == numReads);
+ assert(barcodeData.bcForward_.size() == numReads);
+ assert(barcodeData.bcReverse_.size() == numReads);
+ assert(barcodeData.bcQual_.size() == numReads);
}
void PbiIndexIO::LoadHeader(PbiRawData& index,
@@ -137,6 +139,7 @@ void PbiIndexIO::LoadMappedData(PbiRawMappedData& mappedData,
BGZF* fp)
{
assert(numReads > 0);
+ (void)numReads; // quash warnings building in release mode
LoadBgzfVector(fp, mappedData.tId_, numReads);
LoadBgzfVector(fp, mappedData.tStart_, numReads);
@@ -187,25 +190,28 @@ void PbiIndexIO::LoadReferenceData(PbiRawReferenceData& referenceData,
}
}
-void PbiIndexIO::LoadSubreadData(PbiRawSubreadData& subreadData,
+void PbiIndexIO::LoadBasicData(PbiRawBasicData& basicData,
const uint32_t numReads,
BGZF* fp)
{
assert(numReads > 0);
-
- LoadBgzfVector(fp, subreadData.rgId_, numReads);
- LoadBgzfVector(fp, subreadData.qStart_, numReads);
- LoadBgzfVector(fp, subreadData.qEnd_, numReads);
- LoadBgzfVector(fp, subreadData.holeNumber_, numReads);
- LoadBgzfVector(fp, subreadData.readQual_, numReads);
- LoadBgzfVector(fp, subreadData.fileOffset_, numReads);
-
- assert(subreadData.rgId_.size() == numReads);
- assert(subreadData.qStart_.size() == numReads);
- assert(subreadData.qEnd_.size() == numReads);
- assert(subreadData.holeNumber_.size() == numReads);
- assert(subreadData.readQual_.size() == numReads);
- assert(subreadData.fileOffset_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
+
+ LoadBgzfVector(fp, basicData.rgId_, numReads);
+ LoadBgzfVector(fp, basicData.qStart_, numReads);
+ LoadBgzfVector(fp, basicData.qEnd_, numReads);
+ LoadBgzfVector(fp, basicData.holeNumber_, numReads);
+ LoadBgzfVector(fp, basicData.readQual_, numReads);
+ LoadBgzfVector(fp, basicData.ctxtFlag_, numReads);
+ LoadBgzfVector(fp, basicData.fileOffset_, numReads);
+
+ assert(basicData.rgId_.size() == numReads);
+ assert(basicData.qStart_.size() == numReads);
+ assert(basicData.qEnd_.size() == numReads);
+ assert(basicData.holeNumber_.size() == numReads);
+ assert(basicData.readQual_.size() == numReads);
+ assert(basicData.ctxtFlag_.size() == numReads);
+ assert(basicData.fileOffset_.size() == numReads);
}
void PbiIndexIO::Save(const PbiRawData& index,
@@ -219,7 +225,7 @@ void PbiIndexIO::Save(const PbiRawData& index,
WriteHeader(index, fp);
const uint32_t numReads = index.NumReads();
if (numReads > 0) {
- WriteSubreadData(index.SubreadData(), numReads, fp);
+ WriteBasicData(index.BasicData(), numReads, fp);
if (index.HasMappedData())
WriteMappedData(index.MappedData(), numReads, fp);
@@ -235,15 +241,14 @@ void PbiIndexIO::WriteBarcodeData(const PbiRawBarcodeData& barcodeData,
BGZF* fp)
{
assert(numReads > 0);
- assert(barcodeData.bcLeft_.size() == numReads);
- assert(barcodeData.bcRight_.size() == numReads);
- assert(barcodeData.bcQual_.size() == numReads);
- assert(barcodeData.ctxtFlag_.size() == numReads);
+ assert(barcodeData.bcForward_.size() == numReads);
+ assert(barcodeData.bcReverse_.size() == numReads);
+ assert(barcodeData.bcQual_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
- WriteBgzfVector(fp, barcodeData.bcLeft_);
- WriteBgzfVector(fp, barcodeData.bcRight_);
+ WriteBgzfVector(fp, barcodeData.bcForward_);
+ WriteBgzfVector(fp, barcodeData.bcReverse_);
WriteBgzfVector(fp, barcodeData.bcQual_);
- WriteBgzfVector(fp, barcodeData.ctxtFlag_);
}
void PbiIndexIO::WriteHeader(const PbiRawData& index,
@@ -286,6 +291,7 @@ void PbiIndexIO::WriteMappedData(const PbiRawMappedData& mappedData,
assert(mappedData.nM_.size() == numReads);
assert(mappedData.nMM_.size() == numReads);
assert(mappedData.mapQV_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
WriteBgzfVector(fp, mappedData.tId_);
WriteBgzfVector(fp, mappedData.tStart_);
@@ -325,21 +331,24 @@ void PbiIndexIO::WriteReferenceData(const PbiRawReferenceData& referenceData,
}
}
-void PbiIndexIO::WriteSubreadData(const PbiRawSubreadData& subreadData,
- const uint32_t numReads,
- BGZF* fp)
+void PbiIndexIO::WriteBasicData(const PbiRawBasicData& basicData,
+ const uint32_t numReads,
+ BGZF* fp)
{
- assert(subreadData.rgId_.size() == numReads);
- assert(subreadData.qStart_.size() == numReads);
- assert(subreadData.qEnd_.size() == numReads);
- assert(subreadData.holeNumber_.size() == numReads);
- assert(subreadData.readQual_.size() == numReads);
- assert(subreadData.fileOffset_.size() == numReads);
-
- WriteBgzfVector(fp, subreadData.rgId_);
- WriteBgzfVector(fp, subreadData.qStart_);
- WriteBgzfVector(fp, subreadData.qEnd_);
- WriteBgzfVector(fp, subreadData.holeNumber_);
- WriteBgzfVector(fp, subreadData.readQual_);
- WriteBgzfVector(fp, subreadData.fileOffset_);
+ assert(basicData.rgId_.size() == numReads);
+ assert(basicData.qStart_.size() == numReads);
+ assert(basicData.qEnd_.size() == numReads);
+ assert(basicData.holeNumber_.size() == numReads);
+ assert(basicData.readQual_.size() == numReads);
+ assert(basicData.ctxtFlag_.size() == numReads);
+ assert(basicData.fileOffset_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
+
+ WriteBgzfVector(fp, basicData.rgId_);
+ WriteBgzfVector(fp, basicData.qStart_);
+ WriteBgzfVector(fp, basicData.qEnd_);
+ WriteBgzfVector(fp, basicData.holeNumber_);
+ WriteBgzfVector(fp, basicData.readQual_);
+ WriteBgzfVector(fp, basicData.ctxtFlag_);
+ WriteBgzfVector(fp, basicData.fileOffset_);
}
diff --git a/src/PbiIndexIO.h b/src/PbiIndexIO.h
index 9ec001d..1285a68 100644
--- a/src/PbiIndexIO.h
+++ b/src/PbiIndexIO.h
@@ -74,9 +74,9 @@ public:
BGZF* fp);
static void LoadReferenceData(PbiRawReferenceData& referenceData,
BGZF* fp);
- static void LoadSubreadData(PbiRawSubreadData& subreadData,
- const uint32_t numReads,
- BGZF* fp);
+ static void LoadBasicData(PbiRawBasicData& basicData,
+ const uint32_t numReads,
+ BGZF* fp);
// per-data-field load
template<typename T>
@@ -96,7 +96,7 @@ public:
BGZF* fp);
static void WriteReferenceData(const PbiRawReferenceData& referenceData,
BGZF* fp);
- static void WriteSubreadData(const PbiRawSubreadData& subreadData,
+ static void WriteBasicData(const PbiRawBasicData& subreadData,
const uint32_t numReads,
BGZF* fp);
diff --git a/src/PbiIndexedBamReader.cpp b/src/PbiIndexedBamReader.cpp
new file mode 100644
index 0000000..685d4c0
--- /dev/null
+++ b/src/PbiIndexedBamReader.cpp
@@ -0,0 +1,187 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndexedBamReader.cpp
+/// \brief Implements the PbiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiIndexedBamReader.h"
+#include <htslib/bgzf.h>
+
+#include <iostream>
+
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct PbiIndexedBamReaderPrivate
+{
+public:
+ PbiIndexedBamReaderPrivate(const string& pbiFilename)
+ : index_(pbiFilename)
+ , currentBlockReadCount_(0)
+ { }
+
+ void ApplyOffsets(void)
+ {
+ const std::vector<int64_t>& fileOffsets = index_.BasicData().fileOffset_;
+ for (IndexResultBlock& block : blocks_)
+ block.virtualOffset_ = fileOffsets.at(block.firstIndex_);
+ }
+
+ void Filter(const PbiFilter& filter)
+ {
+ // store request & reset counters
+ filter_ = filter;
+ currentBlockReadCount_ = 0;
+ blocks_.clear();
+
+ // find blocks of reads passing filter criteria
+ const uint32_t numReads = index_.NumReads();
+ if (filter_.IsEmpty()) {
+ blocks_.push_back(IndexResultBlock{0, numReads});
+ } else {
+ IndexList indices;
+ indices.reserve(numReads);
+ for (size_t i = 0; i < numReads; ++i) {
+ if (filter_.Accepts(index_, i))
+ indices.push_back(i);
+ }
+ blocks_ = mergedIndexBlocks(std::move(indices));
+ }
+
+ // apply offsets
+ ApplyOffsets();
+ }
+
+ int ReadRawData(BGZF* bgzf, bam1_t* b)
+ {
+ // no data to fetch, return false
+ if (blocks_.empty())
+ return -1; // "EOF"
+
+ // if on new block, seek to its first record
+ if (currentBlockReadCount_ == 0) {
+ auto seekResult = bgzf_seek(bgzf, blocks_.at(0).virtualOffset_, SEEK_SET);
+ if (seekResult == -1)
+ throw std::runtime_error("could not seek in BAM file");
+ }
+
+ // read next record
+ auto result = bam_read1(bgzf, b);
+
+ // update counters. if block finished, pop & reset
+ ++currentBlockReadCount_;
+ if (currentBlockReadCount_ == blocks_.at(0).numReads_) {
+ blocks_.pop_front();
+ currentBlockReadCount_ = 0;
+ }
+
+ return result;
+ }
+
+public:
+ PbiFilter filter_;
+ PbiRawData index_;
+ IndexResultBlocks blocks_;
+ size_t currentBlockReadCount_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+ const std::string& filename)
+ : PbiIndexedBamReader(filter, BamFile(filename))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+ const BamFile& bamFile)
+ : PbiIndexedBamReader(bamFile)
+{
+ Filter(filter);
+}
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+ BamFile&& bamFile)
+ : PbiIndexedBamReader(std::move(bamFile))
+{
+ Filter(filter);
+}
+
+PbiIndexedBamReader::PbiIndexedBamReader(const std::string& bamFilename)
+ : PbiIndexedBamReader(BamFile(bamFilename))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(const BamFile& bamFile)
+ : BamReader(bamFile)
+ , d_(new internal::PbiIndexedBamReaderPrivate(File().PacBioIndexFilename()))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(BamFile&& bamFile)
+ : BamReader(std::move(bamFile))
+ , d_(new internal::PbiIndexedBamReaderPrivate(File().PacBioIndexFilename()))
+{ }
+
+PbiIndexedBamReader::~PbiIndexedBamReader(void) { }
+
+int PbiIndexedBamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+ assert(d_);
+ return d_->ReadRawData(bgzf, b);
+}
+
+const PbiFilter& PbiIndexedBamReader::Filter(void) const
+{
+ assert(d_);
+ return d_->filter_;
+}
+
+PbiIndexedBamReader& PbiIndexedBamReader::Filter(const PbiFilter& filter)
+{
+ assert(d_);
+ d_->Filter(filter);
+ return *this;
+}
+
diff --git a/src/PbiRawData.cpp b/src/PbiRawData.cpp
index edcb6d0..a219a55 100644
--- a/src/PbiRawData.cpp
+++ b/src/PbiRawData.cpp
@@ -33,17 +33,50 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
+// File Description
+/// \file PbiRawData.cpp
+/// \brief Implements the classes used for working with raw PBI data.
+//
// Author: Derek Barnett
#include "pbbam/PbiRawData.h"
#include "pbbam/BamFile.h"
#include "pbbam/BamRecord.h"
#include "PbiIndexIO.h"
+#include <map>
#include <cassert>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static
+string ToString(const RecordType type)
+{
+ static const auto lookup = map<RecordType, string>
+ {
+ { RecordType::POLYMERASE, "POLYMERASE" },
+ { RecordType::HQREGION, "HQREGION" },
+ { RecordType::SUBREAD, "SUBREAD" },
+ { RecordType::CCS, "CCS" },
+ { RecordType::SCRAP, "SCRAP" },
+ { RecordType::UNKNOWN, "UNKNOWN" }
+ };
+
+ try {
+ return lookup.at(type);
+ } catch (std::exception&) {
+ throw std::runtime_error("error: unknown RecordType encountered");
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namesapce PacBio
+
// ----------------------------------
// PbiRawBarcodeData implementation
// ----------------------------------
@@ -52,63 +85,66 @@ PbiRawBarcodeData::PbiRawBarcodeData(void) { }
PbiRawBarcodeData::PbiRawBarcodeData(uint32_t numReads)
{
- bcLeft_.reserve(numReads);
- bcRight_.reserve(numReads);
+ bcForward_.reserve(numReads);
+ bcReverse_.reserve(numReads);
bcQual_.reserve(numReads);
- ctxtFlag_.reserve(numReads);
}
PbiRawBarcodeData::PbiRawBarcodeData(const PbiRawBarcodeData& other)
- : bcLeft_(other.bcLeft_)
- , bcRight_(other.bcRight_)
+ : bcForward_(other.bcForward_)
+ , bcReverse_(other.bcReverse_)
, bcQual_(other.bcQual_)
- , ctxtFlag_(other.ctxtFlag_)
{ }
PbiRawBarcodeData::PbiRawBarcodeData(PbiRawBarcodeData&& other)
- : bcLeft_(std::move(other.bcLeft_))
- , bcRight_(std::move(other.bcRight_))
+ : bcForward_(std::move(other.bcForward_))
+ , bcReverse_(std::move(other.bcReverse_))
, bcQual_(std::move(other.bcQual_))
- , ctxtFlag_(std::move(other.ctxtFlag_))
{ }
PbiRawBarcodeData& PbiRawBarcodeData::operator=(const PbiRawBarcodeData& other)
{
- bcLeft_ = other.bcLeft_;
- bcRight_ = other.bcRight_;
+ bcForward_ = other.bcForward_;
+ bcReverse_ = other.bcReverse_;
bcQual_ = other.bcQual_;
- ctxtFlag_ =other.ctxtFlag_;
return *this;
}
PbiRawBarcodeData& PbiRawBarcodeData::operator=(PbiRawBarcodeData&& other)
{
- bcLeft_ = std::move(other.bcLeft_);
- bcRight_ = std::move(other.bcRight_);
+ bcForward_ = std::move(other.bcForward_);
+ bcReverse_ = std::move(other.bcReverse_);
bcQual_ = std::move(other.bcQual_);
- ctxtFlag_ = std::move(other.ctxtFlag_);
return *this;
}
-bool PbiRawBarcodeData::AddRecord(const BamRecord& b)
+void PbiRawBarcodeData::AddRecord(const BamRecord& b)
{
- const BamRecordImpl& impl = b.Impl();
- const bool hasBcTag = impl.HasTag("bc");
- const bool hasBqTag = impl.HasTag("bq");
- const bool hasCxTag = impl.HasTag("cx");
- const bool hasBarcodeInfo = hasBcTag && hasBqTag && hasCxTag;
- if (!hasBarcodeInfo)
- return false;
-
- const vector<uint16_t> bcValue = impl.TagValue("bc").ToUInt16Array();
- assert(bcValue.size() == 2);
- bcLeft_.push_back(bcValue[0]);
- bcRight_.push_back(bcValue[1]);
-
- bcQual_.push_back(impl.TagValue("bq").ToUInt8());
- ctxtFlag_.push_back(impl.TagValue("cx").ToUInt8());
-
- return true;
+ // check for any barcode data (both required)
+ if (b.HasBarcodes() && b.HasBarcodeQuality()) {
+
+ // fetch data from record
+ const auto barcodes = b.Barcodes();
+ const auto barcodeQuality = b.BarcodeQuality();
+
+ // convert to signed integers (stored unsigned in BAM)
+ const auto bcForward = static_cast<int16_t>(barcodes.first);
+ const auto bcReverse = static_cast<int16_t>(barcodes.second);
+ const auto bcQuality = static_cast<int8_t>(barcodeQuality);
+
+ // only store actual data if all values >= 0
+ if (bcForward >= 0 && bcReverse >=0 && bcQuality >= 0) {
+ bcForward_.push_back(bcForward);
+ bcReverse_.push_back(bcReverse);
+ bcQual_.push_back(bcQuality);
+ return;
+ }
+ }
+
+ // if we get here, at least one value is either missing or is -1
+ bcForward_.push_back(-1);
+ bcReverse_.push_back(-1);
+ bcQual_.push_back(-1);
}
// ----------------------------------
@@ -182,11 +218,8 @@ PbiRawMappedData& PbiRawMappedData::operator=(PbiRawMappedData&& other)
return *this;
}
-bool PbiRawMappedData::AddRecord(const BamRecord& b)
+void PbiRawMappedData::AddRecord(const BamRecord& b)
{
- if (!b.IsMapped())
- return false;
-
tId_.push_back(b.ReferenceId());
tStart_.push_back(b.ReferenceStart());
tEnd_.push_back(b.ReferenceEnd());
@@ -195,25 +228,30 @@ bool PbiRawMappedData::AddRecord(const BamRecord& b)
revStrand_.push_back( (b.AlignedStrand() == Strand::REVERSE ? 1 : 0) );
mapQV_.push_back(b.MapQuality());
- uint32_t nM = 0;
- uint32_t nMM = 0;
- const Cigar& cigar = b.CigarData();
- auto cigarIter = cigar.cbegin();
- auto cigarEnd = cigar.cend();
- for (; cigarIter != cigarEnd; ++cigarIter) {
- const CigarOperation& op = (*cigarIter);
- if (op.Type() == CigarOperationType::SEQUENCE_MATCH)
- nM += op.Length();
- else if (op.Type() == CigarOperationType::SEQUENCE_MISMATCH)
- nMM += op.Length();
- else if (op.Type() == CigarOperationType::ALIGNMENT_MATCH)
- throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
- }
- nM_.push_back(nM);
- nMM_.push_back(nMM);
+ const auto matchesAndMismatches = b.NumMatchesAndMismatches();
+ nM_.push_back(matchesAndMismatches.first);
+ nMM_.push_back(matchesAndMismatches.second);
+}
- return true;
+uint32_t PbiRawMappedData::NumDeletedBasesAt(size_t recordIndex) const
+{ return NumDeletedAndInsertedBasesAt(recordIndex).first; }
+
+std::pair<uint32_t, uint32_t> PbiRawMappedData::NumDeletedAndInsertedBasesAt(size_t recordIndex) const
+{
+ const auto aStart = aStart_.at(recordIndex);
+ const auto aEnd = aEnd_.at(recordIndex);
+ const auto tStart = tStart_.at(recordIndex);
+ const auto tEnd = tEnd_.at(recordIndex);
+ const auto nM = nM_.at(recordIndex);
+ const auto nMM = nMM_.at(recordIndex);
+ const auto numIns = (aEnd - aStart - nM - nMM);
+ const auto numDel = (tEnd - tStart - nM - nMM);
+ return std::make_pair(numDel, numIns);
}
+
+uint32_t PbiRawMappedData::NumInsertedBasesAt(size_t recordIndex) const
+{ return NumDeletedAndInsertedBasesAt(recordIndex).second; }
+
// ------------------------------------
// PbiReferenceEntry implementation
// ------------------------------------
@@ -233,6 +271,12 @@ PbiReferenceEntry::PbiReferenceEntry(ID id)
, endRow_(UNSET_ROW)
{ }
+PbiReferenceEntry::PbiReferenceEntry(ID id, Row beginRow, Row endRow)
+ : tId_(id)
+ , beginRow_(beginRow)
+ , endRow_(endRow)
+{ }
+
PbiReferenceEntry::PbiReferenceEntry(const PbiReferenceEntry& other)
: tId_(other.tId_)
, beginRow_(other.beginRow_)
@@ -294,70 +338,74 @@ PbiRawReferenceData& PbiRawReferenceData::operator=(PbiRawReferenceData&& other)
// PbiRawSubreadData implementation
// ----------------------------------
-PbiRawSubreadData::PbiRawSubreadData(void) { }
+PbiRawBasicData::PbiRawBasicData(void) { }
-PbiRawSubreadData::PbiRawSubreadData(uint32_t numReads)
+PbiRawBasicData::PbiRawBasicData(uint32_t numReads)
{
rgId_.reserve(numReads);
qStart_.reserve(numReads);
qEnd_.reserve(numReads);
holeNumber_.reserve(numReads);
readQual_.reserve(numReads);
+ ctxtFlag_.reserve(numReads);
fileOffset_.reserve(numReads);
}
-PbiRawSubreadData::PbiRawSubreadData(const PbiRawSubreadData& other)
+PbiRawBasicData::PbiRawBasicData(const PbiRawBasicData& other)
: rgId_(other.rgId_)
, qStart_(other.qStart_)
, qEnd_(other.qEnd_)
, holeNumber_(other.holeNumber_)
, readQual_(other.readQual_)
+ , ctxtFlag_(other.ctxtFlag_)
, fileOffset_(other.fileOffset_)
{ }
-PbiRawSubreadData::PbiRawSubreadData(PbiRawSubreadData&& other)
+PbiRawBasicData::PbiRawBasicData(PbiRawBasicData&& other)
: rgId_(std::move(other.rgId_))
, qStart_(std::move(other.qStart_))
, qEnd_(std::move(other.qEnd_))
, holeNumber_(std::move(other.holeNumber_))
, readQual_(std::move(other.readQual_))
+ , ctxtFlag_(std::move(other.ctxtFlag_))
, fileOffset_(std::move(other.fileOffset_))
{ }
-PbiRawSubreadData& PbiRawSubreadData::operator=(const PbiRawSubreadData& other)
+PbiRawBasicData& PbiRawBasicData::operator=(const PbiRawBasicData& other)
{
rgId_ = other.rgId_;
qStart_ = other.qStart_;
qEnd_ = other.qEnd_;
holeNumber_ = other.holeNumber_;
readQual_ = other.readQual_;
+ ctxtFlag_ = other.ctxtFlag_;
fileOffset_ = other.fileOffset_;
return *this;
}
-PbiRawSubreadData& PbiRawSubreadData::operator=(PbiRawSubreadData&& other)
+PbiRawBasicData& PbiRawBasicData::operator=(PbiRawBasicData&& other)
{
rgId_ = std::move(other.rgId_);
qStart_ = std::move(other.qStart_);
qEnd_ = std::move(other.qEnd_);
holeNumber_ = std::move(other.holeNumber_);
readQual_ = std::move(other.readQual_);
+ ctxtFlag_ = std::move(other.ctxtFlag_);
fileOffset_ = std::move(other.fileOffset_);
return *this;
}
-void PbiRawSubreadData::AddRecord(const BamRecord& b, int64_t offset)
+void PbiRawBasicData::AddRecord(const BamRecord& b, int64_t offset)
{
-
- string rgId = b.ReadGroupId();
- if (rgId.empty()) {
- // calculate
- }
+ // read group ID
+ auto rgId = b.ReadGroupId();
+ if (rgId.empty())
+ rgId = MakeReadGroupId(b.MovieName(), internal::ToString(b.Type()));
const uint32_t rawid = std::stoul(rgId, nullptr, 16);
const int32_t id = static_cast<int32_t>(rawid);
-
rgId_.push_back(id);
+ // query start/end
if (b.Type() == RecordType::CCS) {
qStart_.push_back(-1);
qEnd_.push_back(-1);
@@ -366,16 +414,12 @@ void PbiRawSubreadData::AddRecord(const BamRecord& b, int64_t offset)
qEnd_.push_back(b.QueryEnd());
}
- if (b.HasHoleNumber())
- holeNumber_.push_back(b.HoleNumber());
- else
- holeNumber_.push_back(0); // TODO: what to do?
-
- if (b.HasReadAccuracy())
- readQual_.push_back(b.ReadAccuracy());
- else
- readQual_.push_back(0); // TODO: what to do?
+ // add'l basic data
+ holeNumber_.push_back(b.HasHoleNumber() ? b.HoleNumber() : 0);
+ readQual_.push_back(b.HasReadAccuracy() ? static_cast<float>(b.ReadAccuracy()) : 0.0f);
+ ctxtFlag_.push_back(b.HasLocalContextFlags() ? b.LocalContextFlags() : LocalContextFlags::NO_LOCAL_CONTEXT);
+ // virtual offset of record start
fileOffset_.push_back(offset);
}
@@ -390,7 +434,8 @@ PbiRawData::PbiRawData(void)
{ }
PbiRawData::PbiRawData(const string& pbiFilename)
- : version_(PbiFile::CurrentVersion)
+ : filename_(pbiFilename)
+ , version_(PbiFile::CurrentVersion)
, sections_(PbiFile::ALL)
, numReads_(0)
{
@@ -398,46 +443,50 @@ PbiRawData::PbiRawData(const string& pbiFilename)
}
PbiRawData::PbiRawData(const PbiRawData& other)
- : version_(other.version_)
+ : filename_(other.filename_)
+ , version_(other.version_)
, sections_(other.sections_)
, numReads_(other.numReads_)
, barcodeData_(other.barcodeData_)
, mappedData_(other.mappedData_)
, referenceData_(other.referenceData_)
- , subreadData_(other.subreadData_)
+ , basicData_(other.basicData_)
{ }
PbiRawData::PbiRawData(PbiRawData&& other)
- : version_(std::move(other.version_))
+ : filename_(std::move(other.filename_))
+ , version_(std::move(other.version_))
, sections_(std::move(other.sections_))
, numReads_(std::move(other.numReads_))
, barcodeData_(std::move(other.barcodeData_))
, mappedData_(std::move(other.mappedData_))
, referenceData_(std::move(other.referenceData_))
- , subreadData_(std::move(other.subreadData_))
+ , basicData_(std::move(other.basicData_))
{ }
PbiRawData& PbiRawData::operator=(const PbiRawData& other)
{
+ filename_ = other.filename_;
version_ = other.version_;
sections_ = other.sections_;
numReads_ = other.numReads_;
barcodeData_ = other.barcodeData_;
mappedData_ = other.mappedData_;
referenceData_ = other.referenceData_;
- subreadData_ = other.subreadData_;
+ basicData_ = other.basicData_;
return *this;
}
PbiRawData& PbiRawData::operator=(PbiRawData&& other)
{
+ filename_ = std::move(other.filename_);
version_ = std::move(other.version_);
sections_ = std::move(other.sections_);
numReads_ = std::move(other.numReads_);
barcodeData_ = std::move(other.barcodeData_);
mappedData_ = std::move(other.mappedData_);
referenceData_ = std::move(other.referenceData_);
- subreadData_ = std::move(other.subreadData_);
+ basicData_ = std::move(other.basicData_);
return *this;
}
diff --git a/src/ProgramInfo.cpp b/src/ProgramInfo.cpp
index 45c8680..75f193a 100644
--- a/src/ProgramInfo.cpp
+++ b/src/ProgramInfo.cpp
@@ -33,7 +33,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ProgramInfo.cpp
+/// \brief Implements the ProgramInfo class.
+//
// Author: Derek Barnett
#include "pbbam/ProgramInfo.h"
diff --git a/src/FilterEngine.cpp b/src/QNameQuery.cpp
similarity index 53%
rename from src/FilterEngine.cpp
rename to src/QNameQuery.cpp
index 1f47967..e544664 100644
--- a/src/FilterEngine.cpp
+++ b/src/QNameQuery.cpp
@@ -32,45 +32,73 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QNameQuery.cpp
+/// \brief Implements the QNameQuery class.
+//
// Author: Derek Barnett
-#include "pbbam/internal/FilterEngine.h"
+#include "pbbam/QNameQuery.h"
+#include "pbbam/CompositeBamReader.h"
+#include <boost/optional.hpp>
+#include <cassert>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+struct QNameQuery::QNameQueryPrivate
+{
+public:
+ QNameQueryPrivate(const DataSet& dataset)
+ : reader_(new SequentialCompositeBamReader(dataset))
+ , nextRecord_(boost::none)
+ { }
+ bool GetNext(vector<BamRecord>& records)
+ {
+ records.clear();
-FilterEngine::FilterEngine(void) { }
+ string groupRecordName;
-bool FilterEngine::Accepts(const BamRecord& r) const
-{
-// foreach ( const FilterParameter& param, parameters_ ) {
-// if (!param.Accepts(r))
-// return false;
-// }
-// return true;
- (void)r;
- return true;
-}
+ if (nextRecord_.is_initialized()) {
+ BamRecord r = nextRecord_.get();
+ groupRecordName = r.FullName();
+ records.push_back(std::move(r));
+ nextRecord_ = boost::none;
+ }
-bool FilterEngine::Accepts(vector<BamRecord>& r) const
-{
- size_t i = 0;
- while (i < r.size()) {
- if (!Accepts(r.at(i)))
- r.erase(r.begin() + i);
- else
- ++i;
+ BamRecord record;
+ while (reader_->GetNext(record)) {
+ if (records.empty()) {
+ groupRecordName = record.FullName();
+ records.push_back(record);
+ }
+ else {
+ assert(!records.empty());
+ if (record.FullName() == groupRecordName)
+ records.push_back(record);
+ else {
+ nextRecord_ = record;
+ return true;
+ }
+ }
+ }
+ return !records.empty();
}
- return !r.empty();
-}
+
+public:
+ unique_ptr<SequentialCompositeBamReader> reader_;
+ boost::optional<BamRecord> nextRecord_;
+};
+
+QNameQuery::QNameQuery(const DataSet& dataset)
+ : internal::IGroupQuery()
+ , d_(new QNameQueryPrivate(dataset))
+{ }
+
+QNameQuery::~QNameQuery(void) { }
+
+bool QNameQuery::GetNext(vector<BamRecord>& records)
+{ return d_->GetNext(records); }
diff --git a/src/QualityValue.cpp b/src/QualityValue.cpp
index 200b96b..e9f63c9 100644
--- a/src/QualityValue.cpp
+++ b/src/QualityValue.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file QualityValue.h
+/// \brief Implements the QualityValue class.
+//
// Author: Derek Barnett
#include "pbbam/QualityValue.h"
diff --git a/tests/src/test_TimeUtils.cpp b/src/ReadAccuracyQuery.cpp
similarity index 65%
copy from tests/src/test_TimeUtils.cpp
copy to src/ReadAccuracyQuery.cpp
index 7ab9fa5..8535189 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/src/ReadAccuracyQuery.cpp
@@ -32,28 +32,40 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadAccuracyQuery.cpp
+/// \brief Implements the ReadAccuracyQuery class.
+//
// Author: Derek Barnett
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include "pbbam/ReadAccuracyQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-TEST(TimeUtilsTest, ToIso8601)
+struct ReadAccuracyQuery::ReadAccuracyQueryPrivate
{
- const time_t rawTime = 436428750L;
- const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+ ReadAccuracyQueryPrivate(const Accuracy accuracy,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : reader_(PbiReadAccuracyFilter(accuracy, compareType), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+ReadAccuracyQuery::ReadAccuracyQuery(const Accuracy accuracy,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new ReadAccuracyQueryPrivate(accuracy, compareType, dataset))
+{ }
+
+ReadAccuracyQuery::~ReadAccuracyQuery(void) { }
- // can't hardcode expected (since we rely on localtime())
- const std::string& expected = "1983-10-31T06:12:30Z";
- const std::string& actual = internal::ToIso8601(timestamp);
- EXPECT_EQ(expected, actual);
-}
+bool ReadAccuracyQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/ReadGroupInfo.cpp b/src/ReadGroupInfo.cpp
index b48c602..023f388 100644
--- a/src/ReadGroupInfo.cpp
+++ b/src/ReadGroupInfo.cpp
@@ -32,15 +32,22 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ReadGroupInfo.cpp
+/// \brief Implements the ReadGroupInfo class.
+//
// Author: Derek Barnett
#include "pbbam/ReadGroupInfo.h"
+#include "ChemistryTable.h"
#include "SequenceUtils.h"
#include <cram/md5.h>
-#include <cstdio>
+#include <iomanip>
#include <set>
#include <sstream>
+#include <stdexcept>
+#include <cstdio>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -49,47 +56,69 @@ namespace PacBio {
namespace BAM {
namespace internal {
-static const string token_ID = string("ID");
-static const string token_CN = string("CN");
-static const string token_DS = string("DS");
-static const string token_DT = string("DT");
-static const string token_FO = string("FO");
-static const string token_KS = string("KS");
-static const string token_LB = string("LB");
-static const string token_PG = string("PG");
-static const string token_PI = string("PI");
-static const string token_PL = string("PL");
-static const string token_PU = string("PU");
-static const string token_SM = string("SM");
-
-static const string feature_DQ = string("DeletionQV");
-static const string feature_DT = string("DeletionTag");
-static const string feature_IQ = string("InsertionQV");
-static const string feature_MQ = string("MergeQV");
-static const string feature_SQ = string("SubstitutionQV");
-static const string feature_ST = string("SubstitutionTag");
-static const string feature_IP = string("Ipd");
-static const string feature_PW = string("PulseWidth");
-static const string feature_PM = string("PkMid");
-static const string feature_PA = string("PkMean");
-static const string feature_LT = string("Label");
-static const string feature_PQ = string("LabelQV");
-static const string feature_PT = string("AltLabel");
-static const string feature_PV = string("AltLabelQV");
-static const string feature_PG = string("PulseMergeQV");
-static const string feature_PC = string("PulseCall");
-static const string feature_PD = string("PrePulseFrames");
-static const string feature_PX = string("PulseCallWidth");
-
-static const string token_RT = string("READTYPE");
-static const string token_BK = string("BINDINGKIT");
-static const string token_SK = string("SEQUENCINGKIT");
-static const string token_BV = string("BASECALLERVERSION");
-static const string token_FR = string("FRAMERATEHZ");
-static const string token_CT = string("CONTROL");
-
-static const string codec_RAW = string("Frames");
-static const string codec_V1 = string("CodecV1");
+static const string sam_ID = string{ "ID" };
+static const string sam_CN = string{ "CN" };
+static const string sam_DS = string{ "DS" };
+static const string sam_DT = string{ "DT" };
+static const string sam_FO = string{ "FO" };
+static const string sam_KS = string{ "KS" };
+static const string sam_LB = string{ "LB" };
+static const string sam_PG = string{ "PG" };
+static const string sam_PI = string{ "PI" };
+static const string sam_PL = string{ "PL" };
+static const string sam_PM = string{ "PM" };
+static const string sam_PU = string{ "PU" };
+static const string sam_SM = string{ "SM" };
+
+static const string feature_DQ = string{ "DeletionQV" };
+static const string feature_DT = string{ "DeletionTag" };
+static const string feature_IQ = string{ "InsertionQV" };
+static const string feature_MQ = string{ "MergeQV" };
+static const string feature_SQ = string{ "SubstitutionQV" };
+static const string feature_ST = string{ "SubstitutionTag" };
+static const string feature_IP = string{ "Ipd" };
+static const string feature_PW = string{ "PulseWidth" };
+static const string feature_PM = string{ "PkMid" };
+static const string feature_PA = string{ "PkMean" };
+static const string feature_PI = string{ "PkMid2" };
+static const string feature_PS = string{ "PkMean2" };
+static const string feature_LT = string{ "Label" };
+static const string feature_PQ = string{ "LabelQV" };
+static const string feature_PT = string{ "AltLabel" };
+static const string feature_PV = string{ "AltLabelQV" };
+static const string feature_PG = string{ "PulseMergeQV" };
+static const string feature_PC = string{ "PulseCall" };
+static const string feature_PD = string{ "PrePulseFrames" };
+static const string feature_PX = string{ "PulseCallWidth" };
+static const string feature_SF = string{ "StartFrame" };
+
+static const string token_RT = string{ "READTYPE" };
+static const string token_BK = string{ "BINDINGKIT" };
+static const string token_SK = string{ "SEQUENCINGKIT" };
+static const string token_BV = string{ "BASECALLERVERSION" };
+static const string token_FR = string{ "FRAMERATEHZ" };
+static const string token_CT = string{ "CONTROL" };
+
+static const string token_BF = string{ "BarcodeFile" };
+static const string token_BH = string{ "BarcodeHash" };
+static const string token_BC = string{ "BarcodeCount" };
+static const string token_BM = string{ "BarcodeMode" };
+static const string token_BQ = string{ "BarcodeQuality" };
+
+static const string codec_RAW = string{ "Frames" };
+static const string codec_V1 = string{ "CodecV1" };
+
+static const string barcodemode_NONE = string{ "None" };
+static const string barcodemode_SYM = string{ "Symmetric" };
+static const string barcodemode_ASYM = string{ "Asymmetric" };
+
+static const string barcodequal_NONE = string{ "None" };
+static const string barcodequal_SCORE = string{ "Score" };
+static const string barcodequal_PROB = string{ "Probability" };
+
+static const string platformModelType_ASTRO = string{ "ASTRO" };
+static const string platformModelType_RS = string{ "RS" };
+static const string platformModelType_SEQUEL = string{ "SEQUEL" };
static
string BaseFeatureName(const BaseFeature& feature)
@@ -105,6 +134,8 @@ string BaseFeatureName(const BaseFeature& feature)
case BaseFeature::PULSE_WIDTH : return feature_PW;
case BaseFeature::PKMID : return feature_PM;
case BaseFeature::PKMEAN : return feature_PA;
+ case BaseFeature::PKMID2 : return feature_PI;
+ case BaseFeature::PKMEAN2 : return feature_PS;
case BaseFeature::LABEL_QV : return feature_PQ;
case BaseFeature::ALT_LABEL : return feature_PT;
case BaseFeature::ALT_LABEL_QV : return feature_PV;
@@ -112,10 +143,11 @@ string BaseFeatureName(const BaseFeature& feature)
case BaseFeature::PULSE_CALL : return feature_PC;
case BaseFeature::PRE_PULSE_FRAMES : return feature_PD;
case BaseFeature::PULSE_CALL_WIDTH : return feature_PX;
+ case BaseFeature::START_FRAME : return feature_SF;
default:
- throw std::runtime_error("unrecognized base feature");
+ throw std::runtime_error{ "unrecognized base feature" };
}
- return string();
+ return string{ }; // unreachable
}
static
@@ -125,13 +157,55 @@ string FrameCodecName(const FrameCodec& codec)
case FrameCodec::RAW : return codec_RAW;
case FrameCodec::V1 : return codec_V1;
default:
- throw std::runtime_error("unrecognized frame codec");
+ throw std::runtime_error{ "unrecognized frame codec" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string BarcodeModeName(const BarcodeModeType& mode)
+{
+ switch (mode) {
+ case BarcodeModeType::NONE : return barcodemode_NONE;
+ case BarcodeModeType::SYMMETRIC : return barcodemode_SYM;
+ case BarcodeModeType::ASYMMETRIC : return barcodemode_ASYM;
+ default:
+ throw std::runtime_error{ "unrecognized barcode mode" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string BarcodeQualityName(const BarcodeQualityType& type)
+{
+ switch (type) {
+ case BarcodeQualityType::NONE : return barcodequal_NONE;
+ case BarcodeQualityType::SCORE : return barcodequal_SCORE;
+ case BarcodeQualityType::PROBABILITY : return barcodequal_PROB;
+ default:
+ throw std::runtime_error{ "unrecognized barcode quality type" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string PlatformModelName(const PlatformModelType& type)
+{
+ switch (type) {
+ case PlatformModelType::ASTRO : return platformModelType_ASTRO;
+ case PlatformModelType::RS : return platformModelType_RS;
+ case PlatformModelType::SEQUEL : return platformModelType_SEQUEL;
+ default:
+ throw std::runtime_error{ "unrecognized platform model" };
}
- return string();
+ return string{ }; // unreachable
}
-static map<string, BaseFeature> nameToFeature;
-static map<string, FrameCodec> nameToCodec;
+static map<string, BaseFeature> nameToFeature;
+static map<string, FrameCodec> nameToCodec;
+static map<string, BarcodeModeType> nameToBarcodeMode;
+static map<string, BarcodeQualityType> nameToBarcodeQuality;
+static map<string, PlatformModelType> nameToPlatformModel;
static inline
void InitNameToFeature(void)
@@ -147,6 +221,8 @@ void InitNameToFeature(void)
nameToFeature[feature_PW] = BaseFeature::PULSE_WIDTH;
nameToFeature[feature_PM] = BaseFeature::PKMID;
nameToFeature[feature_PA] = BaseFeature::PKMEAN;
+ nameToFeature[feature_PI] = BaseFeature::PKMID2;
+ nameToFeature[feature_PS] = BaseFeature::PKMEAN2;
nameToFeature[feature_PQ] = BaseFeature::LABEL_QV;
nameToFeature[feature_PT] = BaseFeature::ALT_LABEL;
nameToFeature[feature_PV] = BaseFeature::ALT_LABEL_QV;
@@ -154,6 +230,7 @@ void InitNameToFeature(void)
nameToFeature[feature_PG] = BaseFeature::PULSE_MERGE_QV;
nameToFeature[feature_PD] = BaseFeature::PRE_PULSE_FRAMES;
nameToFeature[feature_PX] = BaseFeature::PULSE_CALL_WIDTH;
+ nameToFeature[feature_SF] = BaseFeature::START_FRAME;
}
}
@@ -167,14 +244,48 @@ void InitNameToCodec(void)
}
static inline
-bool IsBaseFeature(const std::string& name)
+void InitNameToBarcodeMode(void)
+{
+ if (nameToBarcodeMode.empty()) {
+ nameToBarcodeMode[barcodemode_NONE] = BarcodeModeType::NONE;
+ nameToBarcodeMode[barcodemode_SYM] = BarcodeModeType::SYMMETRIC;
+ nameToBarcodeMode[barcodemode_ASYM] = BarcodeModeType::ASYMMETRIC;
+ }
+}
+
+static inline
+void InitNameToBarcodeQuality(void)
+{
+ if (nameToBarcodeQuality.empty()) {
+ nameToBarcodeQuality[barcodequal_NONE] = BarcodeQualityType::NONE;
+ nameToBarcodeQuality[barcodequal_SCORE] = BarcodeQualityType::SCORE;
+ nameToBarcodeQuality[barcodequal_PROB] = BarcodeQualityType::PROBABILITY;
+ }
+}
+
+static inline
+void InitNameToPlatformModel(void)
+{
+ if (nameToPlatformModel.empty()) {
+ nameToPlatformModel[platformModelType_ASTRO] = PlatformModelType::ASTRO;
+ nameToPlatformModel[platformModelType_RS] = PlatformModelType::RS;
+ nameToPlatformModel[platformModelType_SEQUEL] = PlatformModelType::SEQUEL;
+ }
+}
+
+static inline
+bool IsLikelyBarcodeKey(const string& name)
+{ return name.find("Barcode") == 0; }
+
+static inline
+bool IsBaseFeature(const string& name)
{
InitNameToFeature();
return nameToFeature.find(name) != nameToFeature.cend();
}
static inline
-BaseFeature BaseFeatureFromName(const std::string& name)
+BaseFeature BaseFeatureFromName(const string& name)
{
InitNameToFeature();
return nameToFeature.at(name);
@@ -187,16 +298,39 @@ FrameCodec FrameCodecFromName(const string& name)
return nameToCodec.at(name);
}
+static inline
+BarcodeModeType BarcodeModeFromName(const string& name)
+{
+ InitNameToBarcodeMode();
+ return nameToBarcodeMode.at(name);
+}
+
+static inline
+BarcodeQualityType BarcodeQualityFromName(const string& name)
+{
+ InitNameToBarcodeQuality();
+ return nameToBarcodeQuality.at(name);
+}
+
+static inline
+PlatformModelType PlatformModelFromName(const string& name)
+{
+ InitNameToPlatformModel();
+ return nameToPlatformModel.at(name);
+}
+
} // namespace internal
ReadGroupInfo::ReadGroupInfo(void)
- : readType_("UNKNOWN")
+ : platformModel_(PlatformModelType::SEQUEL)
+ , readType_("UNKNOWN")
, ipdCodec_(FrameCodec::V1)
, pulseWidthCodec_(FrameCodec::V1)
{ }
ReadGroupInfo::ReadGroupInfo(const std::string& id)
: id_(id)
+ , platformModel_(PlatformModelType::SEQUEL)
, readType_("UNKNOWN")
, ipdCodec_(FrameCodec::V1)
, pulseWidthCodec_(FrameCodec::V1)
@@ -206,7 +340,21 @@ ReadGroupInfo::ReadGroupInfo(const std::string& movieName,
const std::string& readType)
: id_(MakeReadGroupId(movieName, readType))
, movieName_(movieName)
+ , platformModel_(PlatformModelType::SEQUEL)
+ , readType_(readType)
+ , ipdCodec_(FrameCodec::V1)
+ , pulseWidthCodec_(FrameCodec::V1)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(const std::string& movieName,
+ const std::string& readType,
+ const PlatformModelType platform)
+ : id_(MakeReadGroupId(movieName, readType))
+ , movieName_(movieName)
+ , platformModel_(platform)
, readType_(readType)
+ , ipdCodec_(FrameCodec::V1)
+ , pulseWidthCodec_(FrameCodec::V1)
{ }
ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
@@ -220,6 +368,7 @@ ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
, predictedInsertSize_(other.predictedInsertSize_)
, movieName_(other.movieName_)
, sample_(other.sample_)
+ , platformModel_(other.platformModel_)
, readType_(other.readType_)
, bindingKit_(other.bindingKit_)
, sequencingKit_(other.sequencingKit_)
@@ -228,6 +377,12 @@ ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
, control_(other.control_)
, ipdCodec_(other.ipdCodec_)
, pulseWidthCodec_(other.pulseWidthCodec_)
+ , hasBarcodeData_(other.hasBarcodeData_)
+ , barcodeFile_(other.barcodeFile_)
+ , barcodeHash_(other.barcodeHash_)
+ , barcodeCount_(other.barcodeCount_)
+ , barcodeMode_(other.barcodeMode_)
+ , barcodeQuality_(other.barcodeQuality_)
, features_(other.features_)
{ }
@@ -242,6 +397,7 @@ ReadGroupInfo::ReadGroupInfo(ReadGroupInfo&& other)
, predictedInsertSize_(std::move(other.predictedInsertSize_))
, movieName_(std::move(other.movieName_))
, sample_(std::move(other.sample_))
+ , platformModel_(std::move(other.platformModel_))
, readType_(std::move(other.readType_))
, bindingKit_(std::move(other.bindingKit_))
, sequencingKit_(std::move(other.sequencingKit_))
@@ -250,6 +406,12 @@ ReadGroupInfo::ReadGroupInfo(ReadGroupInfo&& other)
, control_(std::move(other.control_))
, ipdCodec_(std::move(other.ipdCodec_))
, pulseWidthCodec_(std::move(other.pulseWidthCodec_))
+ , hasBarcodeData_(std::move(other.hasBarcodeData_))
+ , barcodeFile_(std::move(other.barcodeFile_))
+ , barcodeHash_(std::move(other.barcodeHash_))
+ , barcodeCount_(std::move(other.barcodeCount_))
+ , barcodeMode_(std::move(other.barcodeMode_))
+ , barcodeQuality_(std::move(other.barcodeQuality_))
, features_(std::move(other.features_))
{ }
@@ -264,6 +426,7 @@ ReadGroupInfo& ReadGroupInfo::operator=(const ReadGroupInfo& other)
keySequence_ = other.keySequence_;
library_ = other.library_;
programs_ = other.programs_;
+ platformModel_ = other.platformModel_;
predictedInsertSize_ = other.predictedInsertSize_;
movieName_ = other.movieName_;
sample_ = other.sample_;
@@ -275,6 +438,12 @@ ReadGroupInfo& ReadGroupInfo::operator=(const ReadGroupInfo& other)
control_ = other.control_;
ipdCodec_ = other.ipdCodec_;
pulseWidthCodec_ = other.pulseWidthCodec_;
+ hasBarcodeData_ = other.hasBarcodeData_;
+ barcodeFile_ = other.barcodeFile_;
+ barcodeHash_ = other.barcodeHash_;
+ barcodeCount_ = other.barcodeCount_;
+ barcodeMode_ = other.barcodeMode_;
+ barcodeQuality_ = other.barcodeQuality_;
features_ = other.features_;
return *this;
}
@@ -288,6 +457,7 @@ ReadGroupInfo& ReadGroupInfo::operator=(ReadGroupInfo&& other)
keySequence_ = std::move(other.keySequence_);
library_ = std::move(other.library_);
programs_ = std::move(other.programs_);
+ platformModel_ = std::move(other.platformModel_);
predictedInsertSize_ = std::move(other.predictedInsertSize_);
movieName_ = std::move(other.movieName_);
sample_ = std::move(other.sample_);
@@ -299,6 +469,12 @@ ReadGroupInfo& ReadGroupInfo::operator=(ReadGroupInfo&& other)
control_ = std::move(other.control_);
ipdCodec_ = std::move(other.ipdCodec_);
pulseWidthCodec_ = std::move(other.pulseWidthCodec_);
+ hasBarcodeData_ = std::move(other.hasBarcodeData_);
+ barcodeFile_ = std::move(other.barcodeFile_);
+ barcodeHash_ = std::move(other.barcodeHash_);
+ barcodeCount_ = std::move(other.barcodeCount_);
+ barcodeMode_ = std::move(other.barcodeMode_);
+ barcodeQuality_ = std::move(other.barcodeQuality_);
features_ = std::move(other.features_);
return *this;
}
@@ -309,36 +485,67 @@ void ReadGroupInfo::DecodeSamDescription(const std::string& description)
// for each, split on equal
// determine name ->
- const vector<string>& tokens = internal::Split(description, ';');
+ auto tokens = internal::Split(description, ';');
if (tokens.empty())
return;
- // iterate over tokens
- auto tokenEnd = tokens.cend();
- for (auto tokenIter = tokens.cbegin(); tokenIter != tokenEnd; ++tokenIter) {
+ bool hasBarcodeFile = false;
+ bool hasBarcodeHash = false;
+ bool hasBarcodeCount = false;
+ bool hasBarcodeMode = false;
+ bool hasBarcodeQuality = false;
- const string& token = *tokenIter;
+ // iterate over tokens
+ for (auto&& token : tokens) {
- const size_t foundEqual = token.find('=');
+ const auto foundEqual = token.find('=');
if (foundEqual == string::npos)
continue;
- const string& key = token.substr(0,foundEqual);
- const string& value = token.substr(foundEqual+1);
+ const auto key = token.substr(0,foundEqual);
+ const auto value = token.substr(foundEqual+1);
+ // 'mandatory' items
if (key == internal::token_RT) readType_ = value;
else if (key == internal::token_BK) bindingKit_ = value;
else if (key == internal::token_BV) basecallerVersion_ = value;
else if (key == internal::token_SK) sequencingKit_ = value;
else if (key == internal::token_FR) frameRateHz_ = value;
- else if (key == internal::token_CT) control_ = value == "TRUE";
- else if (internal::IsBaseFeature(key)) {
+ else if (key == internal::token_CT) control_ = (value == "TRUE");
+
+ // base features
+ else if (internal::IsBaseFeature(key))
features_[internal::BaseFeatureFromName(key)] = value;
- }
+
+ // barcode data
+ else if (internal::IsLikelyBarcodeKey(key)) {
+ if (key == internal::token_BF) {
+ barcodeFile_ = value;
+ hasBarcodeFile = true;
+ }
+ else if (key == internal::token_BH) {
+ barcodeHash_ = value;
+ hasBarcodeHash = true;
+ }
+ else if (key == internal::token_BC) {
+ barcodeCount_ = static_cast<size_t>(std::stoul(value));
+ hasBarcodeCount = true;
+ }
+ else if (key == internal::token_BM) {
+ barcodeMode_ = internal::BarcodeModeFromName(value);
+ hasBarcodeMode = true;
+ }
+ else if (key == internal::token_BQ) {
+ barcodeQuality_ = internal::BarcodeQualityFromName(value);
+ hasBarcodeQuality = true;
+ }
+ }
+
+ // frame codecs
else {
- const vector<string> keyParts = internal::Split(key, ':');
+ const auto keyParts = internal::Split(key, ':');
if (keyParts.size() == 2) {
- const string& subkey = keyParts.at(0);
+ const auto& subkey = keyParts.at(0);
if (subkey == internal::feature_IP) {
ipdCodec_ = internal::FrameCodecFromName(keyParts.at(1));
features_[BaseFeature::IPD] = value;
@@ -350,15 +557,25 @@ void ReadGroupInfo::DecodeSamDescription(const std::string& description)
}
}
}
+
+ hasBarcodeData_ = (hasBarcodeFile &&
+ hasBarcodeHash &&
+ hasBarcodeCount &&
+ hasBarcodeMode &&
+ hasBarcodeQuality);
}
std::string ReadGroupInfo::EncodeSamDescription(void) const
{
- string result;
+ auto result = string{ };
result.reserve(256);
result.append(std::string(internal::token_RT+"=" + readType_));
- string featureName;
+ static const auto SEP = string{";"};
+ static const auto COLON = string{":"};
+ static const auto EQ = string{"="};
+
+ auto featureName = string{ };
const auto featureEnd = features_.cend();
auto featureIter = features_.cbegin();
for ( ; featureIter != featureEnd; ++featureIter ) {
@@ -366,21 +583,35 @@ std::string ReadGroupInfo::EncodeSamDescription(void) const
if (featureName.empty() || featureIter->second.empty())
continue;
else if (featureName == internal::feature_IP) {
- featureName.append(":");
+ featureName.append(COLON);
featureName.append(internal::FrameCodecName(ipdCodec_));
}
else if (featureName == internal::feature_PW) {
- featureName.append(":");
+ featureName.append(COLON);
featureName.append(internal::FrameCodecName(pulseWidthCodec_));
}
- result.append(string(';' + featureName + '=' + featureIter->second));
+ result.append(string(SEP + featureName + EQ + featureIter->second));
}
- if (!bindingKit_.empty()) result.append(";"+internal::token_BK+"="+bindingKit_);
- if (!sequencingKit_.empty()) result.append(";"+internal::token_SK+"="+sequencingKit_);
- if (!basecallerVersion_.empty()) result.append(";"+internal::token_BV+"="+basecallerVersion_);
- if (!frameRateHz_.empty()) result.append(";"+internal::token_FR+"="+frameRateHz_);
- if (control_) result.append(";"+internal::token_CT+"="+ (control_ ? "TRUE" : "FALSE"));
+ if (!bindingKit_.empty()) result.append(SEP + internal::token_BK +EQ + bindingKit_);
+ if (!sequencingKit_.empty()) result.append(SEP + internal::token_SK +EQ + sequencingKit_);
+ if (!basecallerVersion_.empty()) result.append(SEP + internal::token_BV +EQ + basecallerVersion_);
+ if (!frameRateHz_.empty()) result.append(SEP + internal::token_FR +EQ + frameRateHz_);
+ if (control_) result.append(SEP + internal::token_CT +EQ + (control_ ? "TRUE"
+ : "FALSE"));
+
+ if (hasBarcodeData_) {
+ const auto barcodeData =
+ string {
+ SEP + internal::token_BF + EQ + barcodeFile_ +
+ SEP + internal::token_BH + EQ + barcodeHash_ +
+ SEP + internal::token_BC + EQ + std::to_string(barcodeCount_) +
+ SEP + internal::token_BM + EQ + internal::BarcodeModeName(barcodeMode_) +
+ SEP + internal::token_BQ + EQ + internal::BarcodeQualityName(barcodeQuality_)
+ };
+ result.reserve(result.size() + barcodeData.size());
+ result.append(barcodeData);
+ }
return result;
}
@@ -388,29 +619,30 @@ std::string ReadGroupInfo::EncodeSamDescription(void) const
ReadGroupInfo ReadGroupInfo::FromSam(const string& sam)
{
// pop off '@RG\t', then split rest of line into tokens
- const vector<string>& tokens = internal::Split(sam.substr(4), '\t');
+ const auto tokens = internal::Split(sam.substr(4), '\t');
if (tokens.empty())
- return ReadGroupInfo();
+ return ReadGroupInfo{ };
- ReadGroupInfo rg;
- map<string, string> custom;
+ auto rg = ReadGroupInfo{ };
+ auto custom = map<string, string>{ };
- for (const string& token : tokens) {
- const string& tokenTag = token.substr(0,2);
- const string& tokenValue = token.substr(3);
+ for (auto&& token : tokens) {
+ const auto tokenTag = token.substr(0,2);
+ const auto tokenValue = token.substr(3);
// set read group info
- if (tokenTag == internal::token_ID) rg.Id(tokenValue);
- else if (tokenTag == internal::token_CN) rg.SequencingCenter(tokenValue);
- else if (tokenTag == internal::token_DT) rg.Date(tokenValue);
- else if (tokenTag == internal::token_FO) rg.FlowOrder(tokenValue);
- else if (tokenTag == internal::token_KS) rg.KeySequence(tokenValue);
- else if (tokenTag == internal::token_LB) rg.Library(tokenValue);
- else if (tokenTag == internal::token_PG) rg.Programs(tokenValue);
- else if (tokenTag == internal::token_PI) rg.PredictedInsertSize(tokenValue);
- else if (tokenTag == internal::token_PU) rg.MovieName(tokenValue);
- else if (tokenTag == internal::token_SM) rg.Sample(tokenValue);
- else if (tokenTag == internal::token_DS) rg.DecodeSamDescription(tokenValue);
+ if (tokenTag == internal::sam_ID) rg.Id(tokenValue);
+ else if (tokenTag == internal::sam_CN) rg.SequencingCenter(tokenValue);
+ else if (tokenTag == internal::sam_DT) rg.Date(tokenValue);
+ else if (tokenTag == internal::sam_FO) rg.FlowOrder(tokenValue);
+ else if (tokenTag == internal::sam_KS) rg.KeySequence(tokenValue);
+ else if (tokenTag == internal::sam_LB) rg.Library(tokenValue);
+ else if (tokenTag == internal::sam_PG) rg.Programs(tokenValue);
+ else if (tokenTag == internal::sam_PI) rg.PredictedInsertSize(tokenValue);
+ else if (tokenTag == internal::sam_PU) rg.MovieName(tokenValue);
+ else if (tokenTag == internal::sam_SM) rg.Sample(tokenValue);
+ else if (tokenTag == internal::sam_DS) rg.DecodeSamDescription(tokenValue);
+ else if (tokenTag == internal::sam_PM) rg.PlatformModel(internal::PlatformModelFromName(tokenValue));
// otherwise, "custom" tag
else
@@ -421,56 +653,83 @@ ReadGroupInfo ReadGroupInfo::FromSam(const string& sam)
return rg;
}
-ReadGroupInfo& ReadGroupInfo::IpdCodec(const FrameCodec& codec, const string& tag)
+string ReadGroupInfo::IntToId(const int32_t id)
+{
+ stringstream s;
+ s << std::setfill('0') << std::setw(8) << std::hex << id;
+ return s.str();
+}
+
+ReadGroupInfo& ReadGroupInfo::IpdCodec(const FrameCodec& codec,
+ const string& tag)
{
// store desired codec type
ipdCodec_ = codec;
// update base features map
- string actualTag = tag;
+ auto actualTag = tag;
if (actualTag.empty())
actualTag = "ip";
BaseFeatureTag(BaseFeature::IPD, actualTag);
return *this;
}
-ReadGroupInfo& ReadGroupInfo::PulseWidthCodec(const FrameCodec& codec, const string& tag)
+ReadGroupInfo& ReadGroupInfo::PulseWidthCodec(const FrameCodec& codec,
+ const string& tag)
{
// store desired codec type
pulseWidthCodec_ = codec;
// update base features map
- string actualTag = tag;
+ auto actualTag = tag;
if (actualTag.empty())
actualTag = "pw";
BaseFeatureTag(BaseFeature::PULSE_WIDTH, actualTag);
return *this;
}
+string ReadGroupInfo::SequencingChemistryFromTriple(const string& bindingKit,
+ const string& sequencingKit,
+ const string& basecallerVersion)
+{
+ const string ver{ basecallerVersion.substr(0, 3) };
+ for (const auto& row : internal::ChemistryTable) {
+ if (bindingKit == row[0] && sequencingKit == row[1] && ver == row[2])
+ return row[3];
+ }
+
+ // not found
+ throw InvalidSequencingChemistryException(bindingKit,
+ sequencingKit,
+ basecallerVersion);
+}
+
std::string ReadGroupInfo::ToSam(void) const
{
stringstream out;
out << "@RG"
- << internal::MakeSamTag(internal::token_ID, id_)
- << internal::MakeSamTag(internal::token_PL, Platform());
+ << internal::MakeSamTag(internal::sam_ID, id_)
+ << internal::MakeSamTag(internal::sam_PL, Platform());
- const string& description = EncodeSamDescription();
+ auto description = EncodeSamDescription();
if (!description.empty())
- out << internal::MakeSamTag(internal::token_DS, description);
-
- if (!sequencingCenter_.empty()) out << internal::MakeSamTag(internal::token_CN, sequencingCenter_);
- if (!date_.empty()) out << internal::MakeSamTag(internal::token_DT, date_);
- if (!flowOrder_.empty()) out << internal::MakeSamTag(internal::token_FO, flowOrder_);
- if (!keySequence_.empty()) out << internal::MakeSamTag(internal::token_KS, keySequence_);
- if (!library_.empty()) out << internal::MakeSamTag(internal::token_LB, library_);
- if (!programs_.empty()) out << internal::MakeSamTag(internal::token_PG, programs_);
- if (!predictedInsertSize_.empty()) out << internal::MakeSamTag(internal::token_PI, predictedInsertSize_);
- if (!movieName_.empty()) out << internal::MakeSamTag(internal::token_PU, movieName_);
- if (!sample_.empty()) out << internal::MakeSamTag(internal::token_SM, sample_);
+ out << internal::MakeSamTag(internal::sam_DS, description);
+
+ if (!sequencingCenter_.empty()) out << internal::MakeSamTag(internal::sam_CN, sequencingCenter_);
+ if (!date_.empty()) out << internal::MakeSamTag(internal::sam_DT, date_);
+ if (!flowOrder_.empty()) out << internal::MakeSamTag(internal::sam_FO, flowOrder_);
+ if (!keySequence_.empty()) out << internal::MakeSamTag(internal::sam_KS, keySequence_);
+ if (!library_.empty()) out << internal::MakeSamTag(internal::sam_LB, library_);
+ if (!programs_.empty()) out << internal::MakeSamTag(internal::sam_PG, programs_);
+ if (!predictedInsertSize_.empty()) out << internal::MakeSamTag(internal::sam_PI, predictedInsertSize_);
+ if (!movieName_.empty()) out << internal::MakeSamTag(internal::sam_PU, movieName_);
+ if (!sample_.empty()) out << internal::MakeSamTag(internal::sam_SM, sample_);
+
+ out << internal::MakeSamTag(internal::sam_PM, internal::PlatformModelName(platformModel_));
// append any custom tags
- map<string, string>::const_iterator customIter = custom_.cbegin();
- map<string, string>::const_iterator customEnd = custom_.cend();
+ auto customIter = custom_.cbegin();
+ auto customEnd = custom_.cend();
for ( ; customIter != customEnd; ++customIter )
out << internal::MakeSamTag(customIter->first, customIter->second);
@@ -493,7 +752,7 @@ std::string MakeReadGroupId(const std::string& movieName,
for (int i = 0; i < 4; ++i)
sprintf(&hexdigest[2*i], "%02x", digest[i]);
- return std::string(hexdigest, 8);
+ return std::string{hexdigest, 8};
}
bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
@@ -505,6 +764,7 @@ bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
&& keySequence_ == other.keySequence_
&& library_ == other.library_
&& programs_ == other.programs_
+ && platformModel_ == other.platformModel_
&& predictedInsertSize_ == other.predictedInsertSize_
&& movieName_ == other.movieName_
&& sample_ == other.sample_
@@ -516,9 +776,20 @@ bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
&& control_ == other.control_
&& ipdCodec_ == other.ipdCodec_
&& pulseWidthCodec_ == other.pulseWidthCodec_
+ && hasBarcodeData_ == other.hasBarcodeData_
+ && barcodeFile_ == other.barcodeFile_
+ && barcodeHash_ == other.barcodeHash_
+ && barcodeCount_ == other.barcodeCount_
+ && barcodeMode_ == other.barcodeMode_
+ && barcodeQuality_ == other.barcodeQuality_
&& features_.size() == other.features_.size()
- && std::equal(features_.begin(), features_.end(),
- other.features_.begin());
+ && std::equal(features_.cbegin(),
+ features_.cend(),
+ other.features_.cbegin())
+ && custom_.size() == other.custom_.size()
+ && std::equal(custom_.begin(),
+ custom_.end(),
+ other.custom_.cbegin());
}
} // namespace BAM
diff --git a/src/SamTagCodec.cpp b/src/SamTagCodec.cpp
index 532998f..43064b8 100644
--- a/src/SamTagCodec.cpp
+++ b/src/SamTagCodec.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SamTagCodec.h
+/// \brief Implements the SamTagCodec class.
+//
// Author: Derek Barnett
#include "pbbam/SamTagCodec.h"
@@ -142,17 +146,17 @@ TagCollection SamTagCodec::Decode(const string& tagString)
switch (type) {
- // technically only 'A' is allowed in SAM chars, but we'll be a little permissive
+ // technically only 'A' is allowed in SAM chars,
+ // but we'll be a little permissive
case 'A' :
case 'a' :
{
- tags[name] = static_cast<char>(remainder.at(0));
- tags[name].Modifier(TagModifier::ASCII_CHAR);
+ tags[name] = Tag(static_cast<char>(remainder.at(0), TagModifier::ASCII_CHAR));
break;
}
- // technically only 'i' is allowed in SAM ints, but we'll be a little permissive
- // since SAM might be a bit more "user-edited" than BAM
+ // technically only 'i' is allowed in SAM ints, but we'll be a little
+ // permissive since SAM might be a bit more "user-edited" than BAM
case 'c' :
case 'C' :
case 's' :
@@ -160,11 +164,13 @@ TagCollection SamTagCodec::Decode(const string& tagString)
case 'i' :
case 'I' :
{
+ // check out boost::numeric cast for these conversions
+
// negative value (force signed int)
if (remainder.at(0) == '-') {
const int32_t x = boost::lexical_cast<int32_t>(remainder);
if ( x >= INT8_MIN )
- tags[name] = static_cast<int8_t>(x); // check out boost::numeric cast
+ tags[name] = static_cast<int8_t>(x);
else if ( x >= INT16_MIN )
tags[name] = static_cast<int16_t>(x);
else
@@ -198,8 +204,7 @@ TagCollection SamTagCodec::Decode(const string& tagString)
case 'H' :
{
- tags[name] = remainder;
- tags[name].Modifier(TagModifier::HEX_STRING);
+ tags[name] = Tag(remainder, TagModifier::HEX_STRING);
break;
}
@@ -230,7 +235,6 @@ TagCollection SamTagCodec::Decode(const string& tagString)
return tags;
}
-
string SamTagCodec::Encode(const TagCollection& tags)
{
string result;
@@ -263,7 +267,7 @@ string SamTagCodec::Encode(const TagCollection& tags)
}
// "<TYPE>:<DATA>" for all other data
- switch ( tag.Type() ) {
+ switch (tag.Type()) {
case TagDataType::INT8 : result.append("i:"); appendSamValue(tag.ToInt8(), result, true); break;
case TagDataType::UINT8 : result.append("i:"); appendSamValue(tag.ToUInt8(), result, true); break;
case TagDataType::INT16 : result.append("i:"); appendSamValue(tag.ToInt16(), result); break;
diff --git a/src/SequenceInfo.cpp b/src/SequenceInfo.cpp
index fa7837d..43e4343 100644
--- a/src/SequenceInfo.cpp
+++ b/src/SequenceInfo.cpp
@@ -33,7 +33,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SequenceInfo.cpp
+/// \brief Implements the SequenceInfo class.
+//
// Author: Derek Barnett
#include "pbbam/SequenceInfo.h"
diff --git a/tests/src/test_TimeUtils.cpp b/src/SubreadLengthQuery.cpp
similarity index 65%
copy from tests/src/test_TimeUtils.cpp
copy to src/SubreadLengthQuery.cpp
index 7ab9fa5..1c7ce41 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/src/SubreadLengthQuery.cpp
@@ -32,28 +32,40 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file SubreadLengthQuery.cpp
+/// \brief Implements the SubreadLengthQuery class.
+//
// Author: Derek Barnett
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include "pbbam/SubreadLengthQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-TEST(TimeUtilsTest, ToIso8601)
+struct SubreadLengthQuery::SubreadLengthQueryPrivate
{
- const time_t rawTime = 436428750L;
- const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+ SubreadLengthQueryPrivate(const int32_t length,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : reader_(PbiQueryLengthFilter(length, compareType), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+SubreadLengthQuery::SubreadLengthQuery(const int32_t length,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new SubreadLengthQueryPrivate(length, compareType, dataset))
+{ }
+
+SubreadLengthQuery::~SubreadLengthQuery(void) { }
- // can't hardcode expected (since we rely on localtime())
- const std::string& expected = "1983-10-31T06:12:30Z";
- const std::string& actual = internal::ToIso8601(timestamp);
- EXPECT_EQ(expected, actual);
-}
+bool SubreadLengthQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/Tag.cpp b/src/Tag.cpp
index 7f0a10c..5c51321 100644
--- a/src/Tag.cpp
+++ b/src/Tag.cpp
@@ -32,10 +32,15 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file Tag.cpp
+/// \brief Defines the Tag class.
+//
// Author: Derek Barnett
#include "pbbam/Tag.h"
+#include <stdexcept>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -57,6 +62,24 @@ Tag::Tag(const vector<int32_t>& value) : data_(value), modifier_(TagModifier::N
Tag::Tag(const vector<uint32_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
Tag::Tag(const vector<float>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(int8_t value, const TagModifier mod)
+ : data_(value)
+ , modifier_(mod)
+{
+ if (mod == TagModifier::HEX_STRING)
+ throw runtime_error("HEX_STRING is not a valid tag modifier for int8_t data. "
+ "It is intended for string-type data only.");
+}
+
+Tag::Tag(const std::string& value, const TagModifier mod)
+ : data_(value)
+ , modifier_(mod)
+{
+ if (mod == TagModifier::ASCII_CHAR)
+ throw runtime_error("ASCII_CHAR is not a valid tag modifier for string-type data. "
+ "To construct an ASCII char tag, use a single-quoted value (e.g. 'X' instead of \"X\")");
+}
+
Tag::Tag(const Tag& other)
: data_(other.data_)
, modifier_(other.modifier_)
diff --git a/src/TagCollection.cpp b/src/TagCollection.cpp
index 7f50126..98ed22b 100644
--- a/src/TagCollection.cpp
+++ b/src/TagCollection.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file TagCollection.cpp
+/// \brief Implements the TagCollection class.
+//
// Author: Derek Barnett
#include "pbbam/TagCollection.h"
diff --git a/src/TimeUtils.h b/src/TimeUtils.h
index 615295d..b3fd75f 100644
--- a/src/TimeUtils.h
+++ b/src/TimeUtils.h
@@ -71,6 +71,25 @@ std::string ToIso8601(const std::chrono::system_clock::time_point& tp)
}
inline
+std::string ToDataSetFormat(const std::chrono::system_clock::time_point& tp)
+{
+ // get time info
+ const time_t ttime_t = std::chrono::system_clock::to_time_t(tp);
+ const std::chrono::system_clock::time_point tp_sec = std::chrono::system_clock::from_time_t(ttime_t);
+ const std::chrono::milliseconds ms = std::chrono::duration_cast<std::chrono::milliseconds>(tp - tp_sec);
+ const std::tm* ttm = gmtime(&ttime_t); // static obj, no free needed (may not be thread-safe though)
+
+ // format output
+ char date_time_format[] = "%y%m%d_%H%M%S";
+ char date_time_str[50];
+ strftime(date_time_str, sizeof(date_time_str), date_time_format, ttm);
+ std::string result(date_time_str);
+ if (ms.count() > 0)
+ result.append(std::to_string(ms.count()));
+ return result;
+}
+
+inline
std::chrono::system_clock::time_point CurrentTime(void)
{ return std::chrono::system_clock::now(); }
diff --git a/src/VirtualPolymeraseBamRecord.cpp b/src/VirtualPolymeraseBamRecord.cpp
index 7b3bf7b..eb23d6b 100644
--- a/src/VirtualPolymeraseBamRecord.cpp
+++ b/src/VirtualPolymeraseBamRecord.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseBamRecord.cpp
+/// \brief Implements the VirtualPolymeraseBamRecord class.
+//
// Author: Armin Töpfer
#include <iostream>
@@ -46,6 +50,55 @@
using namespace PacBio;
using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in] src Input vector that will be empty after execution
+/// \param[in,out] dst Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>& src, std::vector<T>& dst) noexcept
+{
+ if (dst.empty())
+ {
+ dst = std::move(src);
+ }
+ else
+ {
+ dst.reserve(dst.size() + src.size());
+ std::move(src.begin(), src.end(), std::back_inserter(dst));
+ src.clear();
+ }
+}
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in] src Input vector via perfect forwarding
+/// \param[in,out] dst Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>&& src, std::vector<T>& dst) noexcept
+{
+ if (dst.empty())
+ {
+ dst = std::move(src);
+ }
+ else
+ {
+ dst.reserve(dst.size() + src.size());
+ std::move(src.begin(), src.end(), std::back_inserter(dst));
+ src.clear();
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(
std::vector<BamRecord>&& unorderedSources, const BamHeader& header)
@@ -59,15 +112,22 @@ VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(
StitchSources();
}
-void VirtualPolymeraseBamRecord::StitchSources()
+bool VirtualPolymeraseBamRecord::HasVirtualRegionType(const VirtualRegionType regionType) const
+{ return virtualRegionsMap_.find(regionType) != virtualRegionsMap_.end(); }
+
+Frames VirtualPolymeraseBamRecord::IPDV1Frames(Orientation orientation) const
+{
+ const auto rawFrames = this->IPDRaw(orientation);
+ const std::vector<uint8_t> rawData(rawFrames.Data().begin(), rawFrames.Data().end());
+ return Frames::Decode(rawData);
+}
+
+
+void VirtualPolymeraseBamRecord::StitchSources(void)
{
const auto& firstRecord = sources_[0];
const auto& lastRecord = sources_[sources_.size() - 1];
- // Temporary variables used for stitching
- int accuracy = 0;
- int accuracyCounter = 0;
-
std::string sequence;
std::string deletionTag;
std::string substitutionTag;
@@ -83,12 +143,13 @@ void VirtualPolymeraseBamRecord::StitchSources()
QualityValues labelQv;
QualityValues alternativeLabelQv;
- Frames ipd;
- Frames pw;
- Frames pd;
- Frames px;
- std::vector<float> pa;
- std::vector<float> pm;
+ Frames ipd;
+ Frames pw;
+ Frames pd;
+ Frames px;
+ std::vector<float> pa;
+ std::vector<float> pm;
+ std::vector<uint32_t> sf;
// Stitch using tmp vars
for(auto& b : sources_)
@@ -97,12 +158,6 @@ void VirtualPolymeraseBamRecord::StitchSources()
MoveAppend(b.Qualities(), qualities);
- if (b.HasReadAccuracy())
- {
- accuracy += b.ReadAccuracy();
- ++accuracyCounter;
- }
-
if (b.HasDeletionQV())
MoveAppend(std::move(b.DeletionQV()), deletionQv);
@@ -154,10 +209,19 @@ void VirtualPolymeraseBamRecord::StitchSources()
if (b.HasPkmean())
MoveAppend(b.Pkmean(), pa);
- if (b.HasScrapType())
- {
- const auto regionType = b.ScrapType();
+ if (b.HasPkmid2())
+ MoveAppend(b.Pkmid2(), pm);
+ if (b.HasPkmean2())
+ MoveAppend(b.Pkmean2(), pa);
+
+ if (b.HasStartFrame())
+ MoveAppend(b.StartFrame(), sf);
+
+ if (b.HasScrapRegionType())
+ {
+ const VirtualRegionType regionType = b.ScrapRegionType();
+
if (!HasVirtualRegionType(regionType))
virtualRegionsMap_[regionType] = std::vector<VirtualRegion>();
@@ -179,15 +243,28 @@ void VirtualPolymeraseBamRecord::StitchSources()
regionType, b.QueryStart(), b.QueryEnd(), b.LocalContextFlags(),
barcodes.first, barcodes.second);
}
+
+ if (b.HasBarcodes() && !this->HasBarcodes())
+ this->Barcodes(b.Barcodes());
+
+ if (b.HasBarcodeQuality() && !this->HasBarcodeQuality())
+ this->BarcodeQuality(b.BarcodeQuality());
+
+ if (b.HasReadAccuracy() && !this->HasReadAccuracy())
+ this->ReadAccuracy(b.ReadAccuracy());
+
+ if (b.HasScrapZmwType())
+ {
+ if (!this->HasScrapZmwType())
+ this->ScrapZmwType(b.ScrapZmwType());
+ else if (this->ScrapZmwType() != b.ScrapZmwType())
+ throw std::runtime_error("ScrapZmwTypes do not match");
+ }
}
// ReadGroup
this->ReadGroup(this->header_.ReadGroups()[0]);
- // Avoid division by 0
- if (accuracyCounter > 0)
- this->ReadAccuracy(accuracy / accuracyCounter);
-
this->NumPasses(1);
// All records should contain the same SNR and hole number
@@ -246,6 +323,10 @@ void VirtualPolymeraseBamRecord::StitchSources()
if (!px.Data().empty())
this->PulseCallWidth(px, FrameEncodingType::LOSSLESS);
+ // 32 bit arrays
+ if (!sf.empty())
+ this->StartFrame(sf);
+
// Determine HQREGION bases on LQREGIONS
if (HasVirtualRegionType(VirtualRegionType::LQREGION))
{
@@ -280,9 +361,17 @@ void VirtualPolymeraseBamRecord::StitchSources()
}
}
-Frames VirtualPolymeraseBamRecord::IPDV1Frames(Orientation orientation) const
+
+std::map<VirtualRegionType, std::vector<VirtualRegion>>
+VirtualPolymeraseBamRecord::VirtualRegionsMap(void) const
+{ return virtualRegionsMap_; }
+
+std::vector<VirtualRegion>
+VirtualPolymeraseBamRecord::VirtualRegionsTable(const VirtualRegionType regionType) const
{
- const auto rawFrames = this->IPDRaw(orientation);
- const std::vector<uint8_t> rawData(rawFrames.Data().begin(), rawFrames.Data().end());
- return Frames::Decode(rawData);
-}
\ No newline at end of file
+ const auto iter = virtualRegionsMap_.find(regionType);
+ if (iter != virtualRegionsMap_.cend())
+ return iter->second;
+ return std::vector<VirtualRegion>();
+}
+
diff --git a/src/VirtualPolymeraseCompositeReader.cpp b/src/VirtualPolymeraseCompositeReader.cpp
new file mode 100644
index 0000000..a70dfe5
--- /dev/null
+++ b/src/VirtualPolymeraseCompositeReader.cpp
@@ -0,0 +1,146 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseCompositeReader.cpp
+/// \brief Implements the VirtualPolymeraseCompositeReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/virtual/VirtualPolymeraseCompositeReader.h"
+#include <boost/algorithm/string.hpp>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+VirtualPolymeraseCompositeReader::VirtualPolymeraseCompositeReader(const DataSet& dataset)
+ : currentReader_(nullptr)
+ , filter_(PbiFilter::FromDataSet(dataset))
+{
+ // set up source queue
+ string primaryFn;
+ string scrapsFn;
+ const ExternalResources& resources = dataset.ExternalResources();
+ for (const ExternalResource& resource : resources) {
+
+ primaryFn.clear();
+ scrapsFn.clear();
+
+ // if resource is possible "primary" BAM
+ const auto& metatype = resource.MetaType();
+ if (metatype == "PacBio.SubreadFile.SubreadBamFile" ||
+ metatype == "PacBio.SubreadFile.HqRegionBamFile")
+ {
+ // possible resolve relative path
+ primaryFn = dataset.ResolvePath(resource.ResourceId());
+
+ // check for associated scraps file
+ const ExternalResources& childResources = resource.ExternalResources();
+ for (const ExternalResource& childResource : childResources) {
+ const auto& childMetatype = childResource.MetaType();
+ if (childMetatype == "PacBio.SubreadFile.ScrapsBamFile" ||
+ childMetatype == "PacBio.SubreadFile.HqScrapsBamFile")
+ {
+ // possible resolve relative path
+ scrapsFn = dataset.ResolvePath(childResource.ResourceId());
+ break;
+ }
+ }
+ }
+
+ // queue up source for later
+ if (!primaryFn.empty() && !scrapsFn.empty())
+ sources_.push_back(make_pair(primaryFn,scrapsFn));
+ }
+
+ // open first available source
+ OpenNextReader();
+}
+
+bool VirtualPolymeraseCompositeReader::HasNext(void)
+{
+ return (currentReader_ && currentReader_->HasNext());
+}
+
+VirtualPolymeraseBamRecord VirtualPolymeraseCompositeReader::Next(void)
+{
+ if (currentReader_) {
+ const auto result = currentReader_->Next();
+ if (!currentReader_->HasNext())
+ OpenNextReader();
+ return result;
+ }
+
+ // no reader active
+ const string msg = { "no readers active, make sure you use "
+ "VirtualPolymeraseCompositeReader::HasNext before "
+ "requesting next record"
+ };
+ throw std::runtime_error(msg);
+}
+
+vector<BamRecord> VirtualPolymeraseCompositeReader::NextRaw(void)
+{
+ if (currentReader_) {
+ const auto result = currentReader_->NextRaw();
+ if (!currentReader_->HasNext())
+ OpenNextReader();
+ return result;
+ }
+
+ // no reader active
+ const string msg = { "no readers active, make sure you use "
+ "VirtualPolymeraseCompositeReader::HasNext before "
+ "requesting next group of records"
+ };
+ throw std::runtime_error(msg);
+}
+
+void VirtualPolymeraseCompositeReader::OpenNextReader(void)
+{
+ currentReader_.reset(nullptr);
+
+ // find next source pair with data
+ while(!sources_.empty()) {
+ const auto nextSource = sources_.front();
+ sources_.pop_front();
+
+ currentReader_.reset(new VirtualPolymeraseReader(nextSource.first,
+ nextSource.second,
+ filter_));
+ if (currentReader_->HasNext())
+ return;
+ }
+}
diff --git a/src/VirtualPolymeraseReader.cpp b/src/VirtualPolymeraseReader.cpp
index 271a96e..4c9f4b0 100644
--- a/src/VirtualPolymeraseReader.cpp
+++ b/src/VirtualPolymeraseReader.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualPolymeraseReader.cpp
+/// \brief Implements the VirtualPolymeraseReader class.
+//
// Author: Armin Töpfer
#include <stdexcept>
@@ -42,72 +46,240 @@
using namespace PacBio;
using namespace PacBio::BAM;
+using namespace std;
-VirtualPolymeraseReader::VirtualPolymeraseReader(
- const std::string& primaryBamFilePath, const std::string& scrapsBamFilePath)
- : primaryBamFilePath_(primaryBamFilePath)
- , scrapsBamFilePath_(scrapsBamFilePath)
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class IBackend
{
- primaryBamFile_ = std::unique_ptr<BamFile>(new BamFile(primaryBamFilePath_));
- primaryQuery_ = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*primaryBamFile_));
- primaryIt_ = primaryQuery_->begin();
-
- scrapsBamFile_ = std::unique_ptr<BamFile>(new BamFile(scrapsBamFilePath_));
- scrapsQuery_ = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*scrapsBamFile_));
- scrapsIt_ = scrapsQuery_->begin();
-
- polyHeader_ = std::unique_ptr<BamHeader>(
- new BamHeader(primaryBamFile_->Header().ToSam()));
-
- auto readGroups = polyHeader_->ReadGroups();
- if (readGroups.empty())
- throw std::runtime_error("Bam header of the primary bam has no read groups.");
- readGroups[0].ReadType("POLYMERASE");
- readGroups[0].Id(readGroups[0].MovieName(), "POLYMERASE");
- if (readGroups.size() > 1)
+protected:
+ IBackend(const string& primaryBamFilePath,
+ const string& scrapsBamFilePath)
{
- std::vector<ReadGroupInfo> singleGroup;
- singleGroup.emplace_back(std::move(readGroups[0]));
- readGroups = std::move(singleGroup);
- polyHeader_->ClearReadGroups();
+ primaryBamFile_ = std::unique_ptr<BamFile>(new BamFile(primaryBamFilePath));
+ scrapsBamFile_ = std::unique_ptr<BamFile>(new BamFile(scrapsBamFilePath));
+
+ polyHeader_ = std::unique_ptr<BamHeader>(
+ new BamHeader(primaryBamFile_->Header().ToSam()));
+
+ auto readGroups = polyHeader_->ReadGroups();
+ if (readGroups.empty())
+ throw std::runtime_error("Bam header of the primary bam has no read groups.");
+ readGroups[0].ReadType("POLYMERASE");
+ readGroups[0].Id(readGroups[0].MovieName(), "POLYMERASE");
+ if (readGroups.size() > 1)
+ {
+ std::vector<ReadGroupInfo> singleGroup;
+ singleGroup.emplace_back(std::move(readGroups[0]));
+ readGroups = std::move(singleGroup);
+ polyHeader_->ClearReadGroups();
+ }
+ polyHeader_->ReadGroups(readGroups);
}
- polyHeader_->ReadGroups(readGroups);
-}
-// This method is not thread safe
-VirtualPolymeraseBamRecord VirtualPolymeraseReader::Next()
+public:
+ ~IBackend(void) { }
+
+public:
+ virtual bool HasNext(void) =0;
+ virtual std::vector<BamRecord> NextRaw(void) =0;
+
+ const BamHeader& PolyHeader(void) const
+ { return *polyHeader_; }
+
+ BamHeader PrimaryHeader(void) const
+ { return primaryBamFile_->Header(); }
+
+ BamHeader ScrapsHeader(void) const
+ { return scrapsBamFile_->Header(); }
+
+protected:
+ std::unique_ptr<BamFile> primaryBamFile_;
+ std::unique_ptr<BamFile> scrapsBamFile_;
+ std::unique_ptr<BamHeader> polyHeader_;
+};
+
+class EntireFileBackend : public IBackend
{
- auto bamRecordVec = NextRaw();
- VirtualPolymeraseBamRecord stitched(std::move(bamRecordVec), *polyHeader_);
- return std::move(stitched);
-}
+public:
+ EntireFileBackend(const string& primaryBamFilepath,
+ const string& scrapsBamFilepath)
+ : IBackend(primaryBamFilepath, scrapsBamFilepath)
+ {
+ primaryQuery_ = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*primaryBamFile_));
+ primaryIt_ = primaryQuery_->begin();
+
+ scrapsQuery_ = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*scrapsBamFile_));
+ scrapsIt_ = scrapsQuery_->begin();
+ }
+
+ ~EntireFileBackend(void) { }
+
+public:
+ bool HasNext(void)
+ {
+ // Return true until both iterators are at the end of the query
+ return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+ }
+
+ std::vector<BamRecord> NextRaw(void)
+ {
+ std::vector<BamRecord> bamRecordVec;
-std::vector<BamRecord> VirtualPolymeraseReader::NextRaw()
+ // Current hole number, the smallest of scraps and primary.
+ // It can be that the next ZMW is scrap only.
+ int currentHoleNumber;
+ if (primaryIt_ == primaryQuery_->end())
+ currentHoleNumber = (*scrapsIt_).HoleNumber();
+ else if (scrapsIt_ == scrapsQuery_->end())
+ currentHoleNumber = (*primaryIt_).HoleNumber();
+ else
+ currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
+
+ // collect subreads or hqregions
+ while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
+ bamRecordVec.push_back(*primaryIt_++);
+
+ // collect scraps
+ while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
+ bamRecordVec.push_back(*scrapsIt_++);
+
+ return bamRecordVec;
+ }
+
+ std::unique_ptr<EntireFileQuery> primaryQuery_;
+ std::unique_ptr<EntireFileQuery> scrapsQuery_;
+ EntireFileQuery::iterator primaryIt_;
+ EntireFileQuery::iterator scrapsIt_;
+};
+
+class PbiFilterBackend : public IBackend
{
- std::vector<BamRecord> bamRecordVec;
-
- // Current hole number, the smallest of scraps and primary.
- // It can be that the next ZMW is scrap only.
- int currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
- // collect subreads or hqregions
- while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
- bamRecordVec.push_back(*primaryIt_++);
-
- // collect scraps
- while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
- bamRecordVec.push_back(*scrapsIt_++);
-
- return bamRecordVec;
-}
+public:
+ PbiFilterBackend(const string& primaryBamFilePath,
+ const string& scrapsBamFilePath,
+ const PbiFilter& filter)
+ : IBackend(primaryBamFilePath, scrapsBamFilePath)
+ {
+ primaryQuery_ = std::unique_ptr<PbiFilterQuery>(new PbiFilterQuery(filter, *primaryBamFile_));
+ primaryIt_ = primaryQuery_->begin();
-bool VirtualPolymeraseReader::HasNext()
+ scrapsQuery_ = std::unique_ptr<PbiFilterQuery>(new PbiFilterQuery(filter, *scrapsBamFile_));
+ scrapsIt_ = scrapsQuery_->begin();
+ }
+
+ ~PbiFilterBackend(void) { }
+
+public:
+ bool HasNext(void)
+ {
+ // Return true until both iterators are at the end of the query
+ return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+ }
+
+ std::vector<BamRecord> NextRaw(void)
+ {
+ std::vector<BamRecord> bamRecordVec;
+
+ // Current hole number, the smallest of scraps and primary.
+ // It can be that the next ZMW is scrap only.
+ int currentHoleNumber;
+ if (primaryIt_ == primaryQuery_->end())
+ currentHoleNumber = (*scrapsIt_).HoleNumber();
+ else if (scrapsIt_ == scrapsQuery_->end())
+ currentHoleNumber = (*primaryIt_).HoleNumber();
+ else
+ currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
+
+ // collect subreads or hqregions
+ while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
+ bamRecordVec.push_back(*primaryIt_++);
+
+ // collect scraps
+ while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
+ bamRecordVec.push_back(*scrapsIt_++);
+
+ return bamRecordVec;
+ }
+
+private:
+ std::unique_ptr<PbiFilterQuery> primaryQuery_;
+ std::unique_ptr<PbiFilterQuery> scrapsQuery_;
+ PbiFilterQuery::iterator primaryIt_;
+ PbiFilterQuery::iterator scrapsIt_;
+};
+
+} // namespace internal
+
+struct VirtualPolymeraseReader::VirtualPolymeraseReaderPrivate
+{
+ VirtualPolymeraseReaderPrivate(const string& primaryBamFilepath,
+ const string& scrapsBamFilePath,
+ const PbiFilter& filter)
+ : backend_(nullptr)
+ {
+ if (filter.IsEmpty()) {
+ backend_.reset(new internal::EntireFileBackend(primaryBamFilepath,
+ scrapsBamFilePath));
+ } else {
+ backend_.reset(new internal::PbiFilterBackend(primaryBamFilepath,
+ scrapsBamFilePath,
+ filter));
+ }
+ }
+
+ bool HasNext(void)
+ { return backend_->HasNext(); }
+
+ std::vector<BamRecord> NextRaw(void)
+ { return backend_->NextRaw(); }
+
+ const BamHeader& PolyHeader(void) const
+ { return backend_->PolyHeader(); }
+
+ BamHeader PrimaryHeader(void) const
+ { return backend_->PrimaryHeader(); }
+
+ BamHeader ScrapsHeader(void) const
+ { return backend_->ScrapsHeader(); }
+
+ std::unique_ptr<internal::IBackend> backend_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+VirtualPolymeraseReader::VirtualPolymeraseReader(const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath)
+ : d_(new VirtualPolymeraseReaderPrivate(primaryBamFilePath, scrapsBamFilePath, PbiFilter()))
+{ }
+
+VirtualPolymeraseReader::VirtualPolymeraseReader(const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath,
+ const PbiFilter& filter)
+ : d_(new VirtualPolymeraseReaderPrivate(primaryBamFilePath, scrapsBamFilePath, filter))
+{ }
+
+VirtualPolymeraseReader::~VirtualPolymeraseReader(void) { }
+
+bool VirtualPolymeraseReader::HasNext(void)
+{ return d_->HasNext(); }
+
+// This method is not thread safe
+VirtualPolymeraseBamRecord VirtualPolymeraseReader::Next(void)
{
- // Return true until both iterators are at the end of the query
- return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+ auto bamRecordVec = NextRaw();
+ VirtualPolymeraseBamRecord stitched(std::move(bamRecordVec), d_->PolyHeader());
+ return std::move(stitched);
}
-BamHeader VirtualPolymeraseReader::PrimaryHeader()
-{ return primaryBamFile_->Header(); }
+std::vector<BamRecord> VirtualPolymeraseReader::NextRaw(void)
+{ return d_->NextRaw(); }
+
+BamHeader VirtualPolymeraseReader::PrimaryHeader(void) const
+{ return d_->PrimaryHeader(); }
-BamHeader VirtualPolymeraseReader::ScrapsHeader()
-{ return scrapsBamFile_->Header(); }
+BamHeader VirtualPolymeraseReader::ScrapsHeader(void) const
+{ return d_->ScrapsHeader(); }
diff --git a/src/VirtualRegionTypeMap.cpp b/src/VirtualRegionTypeMap.cpp
index 4839b35..8c6c757 100644
--- a/src/VirtualRegionTypeMap.cpp
+++ b/src/VirtualRegionTypeMap.cpp
@@ -32,7 +32,11 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file VirtualRegionTypeMap.cpp
+/// \brief Implements the VirtualRegionTypeMap class.
+//
// Author: Armin Töpfer
#include "pbbam/virtual/VirtualRegionTypeMap.h"
@@ -42,8 +46,9 @@ using namespace PacBio::BAM;
std::map<char, VirtualRegionType> VirtualRegionTypeMap::ParseChar
{
- { 'A' , VirtualRegionType::ADAPTER },
- { 'B' , VirtualRegionType::BARCODE },
- { 'H' , VirtualRegionType::HQREGION },
- { 'L' , VirtualRegionType::LQREGION }
-};
\ No newline at end of file
+ { 'A' , VirtualRegionType::ADAPTER },
+ { 'B' , VirtualRegionType::BARCODE },
+ { 'H' , VirtualRegionType::HQREGION },
+ { 'F' , VirtualRegionType::FILTERED },
+ { 'L' , VirtualRegionType::LQREGION }
+};
diff --git a/src/XmlReader.cpp b/src/XmlReader.cpp
index 5e88e47..df4e782 100644
--- a/src/XmlReader.cpp
+++ b/src/XmlReader.cpp
@@ -88,7 +88,7 @@ void FromXml(const pugi::xml_node& xmlNode, DataSetElement& parent)
return;
// label & text
- DataSetElement e(xmlNode.name());
+ DataSetElement e(xmlNode.name(), FromInputXml());
e.Text(xmlNode.text().get());
// iterate attributes
diff --git a/src/XmlWriter.cpp b/src/XmlWriter.cpp
index bf42e36..6c7b7af 100644
--- a/src/XmlWriter.cpp
+++ b/src/XmlWriter.cpp
@@ -40,6 +40,7 @@
#include "pugixml/pugixml.hpp"
#include <fstream>
#include <iostream>
+#include <map>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
@@ -50,19 +51,42 @@ namespace BAM {
namespace internal {
static
+string Prefix(const string& input)
+{
+ const size_t colonFound = input.find(':');
+ if (colonFound == std::string::npos || colonFound == 0)
+ return string();
+ return input.substr(0, colonFound);
+}
+
+static
string OutputName(const DataSetElement& node,
const NamespaceRegistry& registry)
{
- if (node.PrefixLabel().empty())
- return registry.Namespace(node.Xsd()).Name() + ":" + node.LocalNameLabel().to_string();
- else
- return node.QualifiedNameLabel(); // is this correct? what if node's contents don't match registry
- // who gets priority?
+ // if from input XML, respect the namespaces given
+ if (node.IsVerbatimLabel())
+ return node.QualifiedNameLabel();
+
+ // otherwise, probably user-generated
+ else {
+ // if no namespace prefix, prepend the appropriate one & return
+ if (node.PrefixLabel().empty()) {
+ static const string colon = ":";
+ XsdType xsdType = node.Xsd();
+ if (xsdType == XsdType::NONE)
+ xsdType = registry.XsdForElement(node.LocalNameLabel().to_string());
+ return registry.Namespace(xsdType).Name() + colon + node.LocalNameLabel().to_string();
+ }
+ // otherwise, has prefix - return full name
+ else
+ return node.QualifiedNameLabel();
+ }
}
static
void ToXml(const DataSetElement& node,
const NamespaceRegistry& registry,
+ map<XsdType, string>& xsdPrefixesUsed,
pugi::xml_node& parentXml)
{
// create child of parent, w/ label & text
@@ -74,6 +98,11 @@ void ToXml(const DataSetElement& node,
if (!node.Text().empty())
xmlNode.text().set(node.Text().c_str());
+ // store XSD type for later
+ const string prefix = Prefix(label);
+ if (!prefix.empty())
+ xsdPrefixesUsed[node.Xsd()] = prefix;
+
// add attributes
auto attrIter = node.Attributes().cbegin();
auto attrEnd = node.Attributes().cend();
@@ -92,7 +121,7 @@ void ToXml(const DataSetElement& node,
auto childEnd = node.Children().cend();
for ( ; childIter != childEnd; ++childIter) {
const DataSetElement& child = (*childIter);
- ToXml(child, registry, xmlNode);
+ ToXml(child, registry, xsdPrefixesUsed, xmlNode);
}
}
@@ -108,7 +137,7 @@ void XmlWriter::ToStream(const DataSetBase& dataset,
const NamespaceRegistry& registry = dataset.Namespaces();
// create top-level dataset XML node
- const string& label = OutputName(dataset, registry);
+ const string& label = internal::OutputName(dataset, registry);
if (label.empty())
throw std::runtime_error("could not convert dataset node to XML");
pugi::xml_node root = doc.append_child(label.c_str());
@@ -129,12 +158,15 @@ void XmlWriter::ToStream(const DataSetBase& dataset,
attr.set_value(value.c_str());
}
+ map<XsdType, string> xsdPrefixesUsed;
+ xsdPrefixesUsed[dataset.Xsd()] = Prefix(label);
+
// iterate children, recursively building up subtree
auto childIter = dataset.Children().cbegin();
auto childEnd = dataset.Children().cend();
for ( ; childIter != childEnd; ++childIter) {
const DataSetElement& child = (*childIter);
- ToXml(child, registry, root);
+ ToXml(child, registry, xsdPrefixesUsed, root);
}
// write XML to stream
@@ -142,6 +174,41 @@ void XmlWriter::ToStream(const DataSetBase& dataset,
decl.append_attribute("version") = "1.0";
decl.append_attribute("encoding") = "utf-8";
+ // add XSD namespace attributes
+ pugi::xml_attribute xmlnsDefaultAttribute = root.attribute("xmlns");
+ if (xmlnsDefaultAttribute.empty()) {
+ xmlnsDefaultAttribute = root.append_attribute("xmlns");
+ xmlnsDefaultAttribute.set_value(registry.DefaultNamespace().Uri().c_str());
+ }
+ pugi::xml_attribute xsiAttribute = root.attribute("xmlns:xsi");
+ if (xsiAttribute.empty()) {
+ xsiAttribute = root.append_attribute("xmlns:xsi");
+ xsiAttribute.set_value("http://www.w3.org/2001/XMLSchema-instance");
+ }
+ pugi::xml_attribute xsiSchemaLocationAttribute = root.attribute("xsi:schemaLocation");
+ if (xsiSchemaLocationAttribute.empty()) {
+ xsiSchemaLocationAttribute = root.append_attribute("xsi:schemaLocation");
+ xsiSchemaLocationAttribute.set_value(registry.DefaultNamespace().Uri().c_str());
+ }
+
+ static const string xmlnsPrefix = "xmlns:";
+ map<XsdType, string>::const_iterator prefixIter = xsdPrefixesUsed.cbegin();
+ map<XsdType, string>::const_iterator prefixEnd = xsdPrefixesUsed.cend();
+ for ( ; prefixIter != prefixEnd; ++prefixIter ) {
+ const XsdType& xsd = prefixIter->first;
+ const string& prefix = prefixIter->second;
+ if (xsd == XsdType::NONE || prefix.empty())
+ continue;
+ const NamespaceInfo& nsInfo = registry.Namespace(xsd);
+ assert(nsInfo.Name() == prefix);
+ const string xmlnsName = xmlnsPrefix + prefix;
+ pugi::xml_attribute xmlnsAttribute = root.attribute(xmlnsName.c_str());
+ if (xmlnsAttribute.empty()) {
+ xmlnsAttribute = root.append_attribute(xmlnsName.c_str());
+ xmlnsAttribute.set_value(nsInfo.Uri().c_str());
+ }
+ }
+
// "no escapes" to allow explicit ">" "<" comparison operators in filter parameters
// we may remove this if/when comparison is separated from the value
doc.save(out, "\t", pugi::format_default | pugi::format_no_escapes, pugi::encoding_utf8);
diff --git a/src/ZmwGroupQuery.cpp b/src/ZmwGroupQuery.cpp
index bf76ce3..d33b34a 100644
--- a/src/ZmwGroupQuery.cpp
+++ b/src/ZmwGroupQuery.cpp
@@ -32,112 +32,81 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwQuery.cpp
+/// \brief Implements the ZmwQuery class.
+//
// Author: Derek Barnett
#include "pbbam/ZmwGroupQuery.h"
-#include "pbbam/PbiIndex.h"
-#include "pbbam/internal/BamRecordSort.h"
-#include "pbbam/internal/MergeStrategy.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/CompositeBamReader.h"
+#include "pbbam/PbiFilterTypes.h"
#include "MemoryUtils.h"
#include <algorithm>
-#include <map>
+#include <deque>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
-//using namespace PacBio::BAM::staging;
using namespace std;
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-class ZmwQueryGroupIterator : public IBamFileGroupIterator
+struct ZmwGroupQuery::ZmwGroupQueryPrivate
{
-public:
- ZmwQueryGroupIterator(const std::vector<int32_t>& zmwWhitelist,
- const BamFile& file)
- : IBamFileGroupIterator(file)
- {
- // init BAM file for reading
- htsFile_.reset(sam_open(file.Filename().c_str(), "rb"));
- if (!htsFile_)
- throw std::runtime_error("could not open BAM file for reading");
+ typedef PbiFilterCompositeBamReader<Compare::Zmw> ReaderType;
+ typedef std::unique_ptr<ReaderType> ReaderPtr;
- htsHeader_.reset(sam_hdr_read(htsFile_.get()));
- if (!htsHeader_)
- throw std::runtime_error("could not read BAM header data");
-
- // open index & query for ZMWs
- PbiIndex index(file.PacBioIndexFilename());
- for (int32_t zmw : zmwWhitelist)
- zmwGroups_[zmw] = index.Lookup(ZmwIndexRequest(zmw));
+ ZmwGroupQueryPrivate(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset)
+ : whitelist_(zmwWhitelist.cbegin(), zmwWhitelist.cend())
+ , reader_(nullptr)
+ {
+ std::sort(whitelist_.begin(), whitelist_.end());
+ whitelist_.erase(std::unique(whitelist_.begin(),
+ whitelist_.end()),
+ whitelist_.end());
+
+ if (!whitelist_.empty()) {
+ reader_ = ReaderPtr(new ReaderType(PbiZmwFilter{whitelist_.front()}, dataset));
+ whitelist_.pop_front();
+ }
}
-public:
- bool GetNext(std::vector<BamRecord>& r)
+ bool GetNext(std::vector<BamRecord>& records)
{
- r.clear();
- if (zmwGroups_.empty())
+ records.clear();
+ if (!reader_)
return false;
- BamRecord record(header_);
- const IndexResultBlocks& blocks = zmwGroups_.cbegin()->second;
- for (const IndexResultBlock& block : blocks) {
-
- // seek to first record in block
- const int seekResult = bgzf_seek(htsFile_.get()->fp.bgzf, block.virtualOffset_, SEEK_SET);
- if (seekResult == -1)
- throw std::runtime_error("could not seek in BAM file");
+ // get all records matching ZMW
+ BamRecord r;
+ while (reader_->GetNext(r))
+ records.push_back(r);
- // read block records
- for (size_t i = 0; i < block.numReads_; ++i) {
- const int readResult = sam_read1(htsFile_.get(),
- htsHeader_.get(),
- internal::BamRecordMemory::GetRawData(record).get());
-// record.header_ = fileData_.Header();
-
- if (readResult >= 0) // success
- r.push_back(record);
- else if (readResult == -1) // normal EOF
- break;
- else // error (truncated file, etc)
- throw std::runtime_error("corrupted file, may be truncated");
- }
+ // set next ZMW (if any left)
+ if (!whitelist_.empty()) {
+ reader_->Filter(PbiZmwFilter{whitelist_.front()});
+ whitelist_.pop_front();
}
- // pop zmw info & return success
- zmwGroups_.erase(zmwGroups_.begin());
- return !r.empty();
- }
+ // otherwise destroy reader, next iteration will return false
+ else
+ reader_.reset(nullptr);
- bool InSameGroup(const BamRecord& lhs, const BamRecord& rhs) const
- { return lhs.HoleNumber() == rhs.HoleNumber(); }
+ return true;
+ }
-private:
- unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
- unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
- map<int32_t, IndexResultBlocks> zmwGroups_;
+ std::deque<int32_t> whitelist_;
+ ReaderPtr reader_;
};
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-ZmwGroupQuery::ZmwGroupQuery(const DataSet& dataset)
- : IGroupQuery(dataset)
- , whitelist_(/* all dataset ZMWs */)
-{
- mergeStrategy_.reset(new GroupMergeStrategy<ByZmw>(CreateIterators()));
-}
-
ZmwGroupQuery::ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
const DataSet& dataset)
- : IGroupQuery(dataset)
- , whitelist_(zmwWhitelist)
-{
- mergeStrategy_.reset(new GroupMergeStrategy<ByZmw>(CreateIterators()));
-}
+ : internal::IGroupQuery()
+ , d_(new ZmwGroupQueryPrivate(zmwWhitelist, dataset))
+{ }
+
+ZmwGroupQuery::~ZmwGroupQuery(void) { }
-ZmwGroupQuery::FileIterPtr ZmwGroupQuery::CreateIterator(const BamFile& file)
-{ return FileIterPtr(new ZmwQueryGroupIterator(whitelist_, file)); }
+bool ZmwGroupQuery::GetNext(std::vector<BamRecord>& records)
+{ return d_->GetNext(records); }
diff --git a/src/ZmwQuery.cpp b/src/ZmwQuery.cpp
index 2b25723..7a45541 100644
--- a/src/ZmwQuery.cpp
+++ b/src/ZmwQuery.cpp
@@ -32,107 +32,38 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwQuery.cpp
+/// \brief Implements the ZmwQuery class.
+//
// Author: Derek Barnett
#include "pbbam/ZmwQuery.h"
-#include "pbbam/PbiIndex.h"
-#include "pbbam/internal/BamRecordSort.h"
-#include "pbbam/internal/MergeStrategy.h"
-#include "MemoryUtils.h"
-#include <htslib/bgzf.h>
-#include <htslib/sam.h>
-#include <algorithm>
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
-//using namespace PacBio::BAM::staging;
using namespace std;
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-class ZmwQueryIterator : public IBamFileIterator
+struct ZmwQuery::ZmwQueryPrivate
{
-public:
- ZmwQueryIterator(const std::vector<int32_t>& zmwWhitelist,
- const BamFile& bamFile)
- : internal::IBamFileIterator(bamFile)
- , currentBlockReadCount_(0)
- , htsFile_(nullptr)
- , htsHeader_(nullptr)
- {
- // init BAM file for reading
- htsFile_.reset(sam_open(bamFile.Filename().c_str(), "rb"));
- if (!htsFile_)
- throw std::runtime_error("could not open BAM file for reading");
-
- htsHeader_.reset(sam_hdr_read(htsFile_.get()));
- if (!htsHeader_)
- throw std::runtime_error("could not read BAM header data");
-
- // open index & query for ZMWs
- PbiIndex index(bamFile.PacBioIndexFilename());
- blocks_ = index.Lookup(ZmwIndexMultiRequest(zmwWhitelist));
- }
-
-public:
- bool GetNext(BamRecord& r){
-
- // no data to fetch, return false
- if (blocks_.empty())
- return false;
+ ZmwQueryPrivate(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset)
+ : reader_(PbiZmwFilter(zmwWhitelist), dataset)
+ { }
- // maybe seek to block
- if (currentBlockReadCount_ == 0) {
- const int seekResult = bgzf_seek(htsFile_.get()->fp.bgzf, blocks_.at(0).virtualOffset_, SEEK_SET);
- if (seekResult == -1)
- throw std::runtime_error("could not seek in BAM file");
- }
-
- // read next record
-// r = BamRecord(fileData_.Header());
- const int readResult = sam_read1(htsFile_.get(),
- htsHeader_.get(),
- internal::BamRecordMemory::GetRawData(r).get());
-// r.header_ = fileData_.Header();
- r.header_ = header_;
-
- // update counters
- ++currentBlockReadCount_;
- if (currentBlockReadCount_ == blocks_.at(0).numReads_) {
- blocks_.pop_front();
- currentBlockReadCount_ = 0;
- }
-
- // return result of reading
- if (readResult >= 0) // success
- return true;
- else if (readResult == -1) // normal EOF
- return false;
- else // error (truncated file, etc)
- throw std::runtime_error("corrupted file, may be truncated");
- }
-
-private:
- IndexResultBlocks blocks_;
- size_t currentBlockReadCount_;
- unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
- unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> htsHeader_;
+ PbiFilterCompositeBamReader<Compare::Zmw> reader_;
};
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
-
-ZmwQuery::ZmwQuery(const std::vector<int32_t> &zmwWhitelist,
+ZmwQuery::ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
const DataSet& dataset)
- : internal::IQuery(dataset)
- , whitelist_(zmwWhitelist)
-{
- mergeStrategy_.reset(new MergeStrategy<ByZmw>(CreateIterators()));
-}
+ : internal::IQuery()
+ , d_(new ZmwQueryPrivate(zmwWhitelist, dataset))
+{ }
+
+ZmwQuery::~ZmwQuery(void) { }
-ZmwQuery::FileIterPtr ZmwQuery::CreateIterator(const BamFile& bamFile)
-{ return FileIterPtr(new ZmwQueryIterator(whitelist_, bamFile)); }
+bool ZmwQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
diff --git a/src/VirtualRegionTypeMap.cpp b/src/ZmwTypeMap.cpp
similarity index 86%
copy from src/VirtualRegionTypeMap.cpp
copy to src/ZmwTypeMap.cpp
index 4839b35..2eea7b7 100644
--- a/src/VirtualRegionTypeMap.cpp
+++ b/src/ZmwTypeMap.cpp
@@ -32,18 +32,22 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file ZmwTypeMap.cpp
+/// \brief Implements the ZmwTypeMap class.
+//
// Author: Armin Töpfer
-#include "pbbam/virtual/VirtualRegionTypeMap.h"
+#include "pbbam/ZmwTypeMap.h"
using namespace PacBio;
using namespace PacBio::BAM;
-std::map<char, VirtualRegionType> VirtualRegionTypeMap::ParseChar
+std::map<char, ZmwType> ZmwTypeMap::ParseChar
{
- { 'A' , VirtualRegionType::ADAPTER },
- { 'B' , VirtualRegionType::BARCODE },
- { 'H' , VirtualRegionType::HQREGION },
- { 'L' , VirtualRegionType::LQREGION }
-};
\ No newline at end of file
+ { 'C' , ZmwType::CONTROL },
+ { 'M' , ZmwType::MALFORMED },
+ { 'N' , ZmwType::NORMAL },
+ { 'S' , ZmwType::SENTINEL }
+};
diff --git a/src/VirtualPolymeraseReader.cpp b/src/ZmwWhitelistVirtualReader.cpp
similarity index 50%
copy from src/VirtualPolymeraseReader.cpp
copy to src/ZmwWhitelistVirtualReader.cpp
index 271a96e..e716e1c 100644
--- a/src/VirtualPolymeraseReader.cpp
+++ b/src/ZmwWhitelistVirtualReader.cpp
@@ -32,33 +32,36 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwWhitelistVirtualReader.cpp
+/// \brief Implements the ZmwWhitelistVirtualReader class.
+//
+// Author: Derek Barnett
-// Author: Armin Töpfer
-
-#include <stdexcept>
-
-#include "pbbam/virtual/VirtualPolymeraseReader.h"
+#include "pbbam/virtual/ZmwWhitelistVirtualReader.h"
+#include "pbbam/PbiFilterTypes.h"
#include "pbbam/ReadGroupInfo.h"
-
+#include <set>
+#include <stdexcept>
using namespace PacBio;
using namespace PacBio::BAM;
+using namespace std;
-VirtualPolymeraseReader::VirtualPolymeraseReader(
- const std::string& primaryBamFilePath, const std::string& scrapsBamFilePath)
+ZmwWhitelistVirtualReader::ZmwWhitelistVirtualReader(const vector<int32_t>& zmwWhitelist,
+ const string& primaryBamFilePath,
+ const string& scrapsBamFilePath)
: primaryBamFilePath_(primaryBamFilePath)
, scrapsBamFilePath_(scrapsBamFilePath)
{
- primaryBamFile_ = std::unique_ptr<BamFile>(new BamFile(primaryBamFilePath_));
- primaryQuery_ = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*primaryBamFile_));
- primaryIt_ = primaryQuery_->begin();
-
- scrapsBamFile_ = std::unique_ptr<BamFile>(new BamFile(scrapsBamFilePath_));
- scrapsQuery_ = std::unique_ptr<EntireFileQuery>(new EntireFileQuery(*scrapsBamFile_));
- scrapsIt_ = scrapsQuery_->begin();
-
- polyHeader_ = std::unique_ptr<BamHeader>(
- new BamHeader(primaryBamFile_->Header().ToSam()));
+ // setup BAM files/readers
+ primaryBamFile_ = unique_ptr<BamFile>(new BamFile(primaryBamFilePath_));
+ scrapsBamFile_ = unique_ptr<BamFile>(new BamFile(scrapsBamFilePath_));
+ primaryReader_ = unique_ptr<PbiIndexedBamReader>(new PbiIndexedBamReader(*primaryBamFile_));
+ scrapsReader_ = unique_ptr<PbiIndexedBamReader>(new PbiIndexedBamReader(*scrapsBamFile_));
+ // setup new header for stitched data
+ polyHeader_ = unique_ptr<BamHeader>(new BamHeader(primaryBamFile_->Header().ToSam()));
auto readGroups = polyHeader_->ReadGroups();
if (readGroups.empty())
throw std::runtime_error("Bam header of the primary bam has no read groups.");
@@ -72,42 +75,67 @@ VirtualPolymeraseReader::VirtualPolymeraseReader(
polyHeader_->ClearReadGroups();
}
polyHeader_->ReadGroups(readGroups);
+
+ // remove ZMWs up front, that are not found in either file
+ PreFilterZmws(zmwWhitelist);
}
+bool ZmwWhitelistVirtualReader::HasNext(void) const
+{ return !zmwWhitelist_.empty(); }
+
// This method is not thread safe
-VirtualPolymeraseBamRecord VirtualPolymeraseReader::Next()
+VirtualPolymeraseBamRecord ZmwWhitelistVirtualReader::Next(void)
{
auto bamRecordVec = NextRaw();
VirtualPolymeraseBamRecord stitched(std::move(bamRecordVec), *polyHeader_);
return std::move(stitched);
}
-std::vector<BamRecord> VirtualPolymeraseReader::NextRaw()
+vector<BamRecord> ZmwWhitelistVirtualReader::NextRaw(void)
{
- std::vector<BamRecord> bamRecordVec;
+ auto result = vector<BamRecord>{ };
+ if (!HasNext())
+ return result;
+
+ const auto& zmw = zmwWhitelist_.front();
+ primaryReader_->Filter(PbiZmwFilter{zmw});
+ scrapsReader_->Filter(PbiZmwFilter{zmw});
- // Current hole number, the smallest of scraps and primary.
- // It can be that the next ZMW is scrap only.
- int currentHoleNumber = std::min((*primaryIt_).HoleNumber(), (*scrapsIt_).HoleNumber());
- // collect subreads or hqregions
- while (primaryIt_ != primaryQuery_->end() && currentHoleNumber == (*primaryIt_).HoleNumber())
- bamRecordVec.push_back(*primaryIt_++);
-
- // collect scraps
- while (scrapsIt_ != scrapsQuery_->end() && currentHoleNumber == (*scrapsIt_).HoleNumber())
- bamRecordVec.push_back(*scrapsIt_++);
+ auto record = BamRecord{ };
+ while (primaryReader_->GetNext(record))
+ result.push_back(record);
+ while (scrapsReader_->GetNext(record))
+ result.push_back(record);
- return bamRecordVec;
+ zmwWhitelist_.pop_front();
+ return result;
}
-bool VirtualPolymeraseReader::HasNext()
+void ZmwWhitelistVirtualReader::PreFilterZmws(const std::vector<int32_t>& zmwWhitelist)
{
- // Return true until both iterators are at the end of the query
- return primaryIt_ != primaryQuery_->end() || scrapsIt_ != scrapsQuery_->end();
+ // fetch input ZMWs
+ const PbiRawData primaryIndex(primaryBamFile_->PacBioIndexFilename());
+ const PbiRawData scrapsIndex(scrapsBamFile_->PacBioIndexFilename());
+ const auto& primaryZmws = primaryIndex.BasicData().holeNumber_;
+ const auto& scrapsZmws = scrapsIndex.BasicData().holeNumber_;
+
+ // toss them all into a set (for uniqueness & lookup here soon)
+ set<int32_t> inputZmws;
+ for (const auto& zmw : primaryZmws)
+ inputZmws.insert(zmw);
+ for (const auto& zmw : scrapsZmws)
+ inputZmws.insert(zmw);
+
+ // check our requested whitelist against files' ZMWs, keep if found
+ const auto inputEnd = inputZmws.cend();
+ for (const int32_t zmw : zmwWhitelist) {
+ if (inputZmws.find(zmw) != inputEnd)
+ zmwWhitelist_.push_back(zmw);
+ }
}
-BamHeader VirtualPolymeraseReader::PrimaryHeader()
+BamHeader ZmwWhitelistVirtualReader::PrimaryHeader(void) const
{ return primaryBamFile_->Header(); }
-BamHeader VirtualPolymeraseReader::ScrapsHeader()
+BamHeader ZmwWhitelistVirtualReader::ScrapsHeader(void) const
{ return scrapsBamFile_->Header(); }
diff --git a/src/files.cmake b/src/files.cmake
index 3a399d0..29243ea 100644
--- a/src/files.cmake
+++ b/src/files.cmake
@@ -11,9 +11,14 @@ set( PacBioBAM_H
${PacBioBAM_IncludeDir}/pbbam/BamRecordBuilder.h
${PacBioBAM_IncludeDir}/pbbam/BamRecordImpl.h
${PacBioBAM_IncludeDir}/pbbam/BamTagCodec.h
+ ${PacBioBAM_IncludeDir}/pbbam/BaiIndexedBamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/CompositeBamReader.h
${PacBioBAM_IncludeDir}/pbbam/BamWriter.h
+ ${PacBioBAM_IncludeDir}/pbbam/BarcodeQuery.h
${PacBioBAM_IncludeDir}/pbbam/Cigar.h
${PacBioBAM_IncludeDir}/pbbam/CigarOperation.h
+ ${PacBioBAM_IncludeDir}/pbbam/Compare.h
${PacBioBAM_IncludeDir}/pbbam/Config.h
${PacBioBAM_IncludeDir}/pbbam/DataSet.h
${PacBioBAM_IncludeDir}/pbbam/DataSetTypes.h
@@ -22,33 +27,52 @@ set( PacBioBAM_H
${PacBioBAM_IncludeDir}/pbbam/Frames.h
${PacBioBAM_IncludeDir}/pbbam/GenomicInterval.h
${PacBioBAM_IncludeDir}/pbbam/GenomicIntervalQuery.h
- ${PacBioBAM_IncludeDir}/pbbam/GroupQuery.h
- ${PacBioBAM_IncludeDir}/pbbam/GroupQueryBase.h
${PacBioBAM_IncludeDir}/pbbam/IndexedFastaReader.h
${PacBioBAM_IncludeDir}/pbbam/Interval.h
${PacBioBAM_IncludeDir}/pbbam/LocalContextFlags.h
${PacBioBAM_IncludeDir}/pbbam/Orientation.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiBasicTypes.h
${PacBioBAM_IncludeDir}/pbbam/PbiBuilder.h
${PacBioBAM_IncludeDir}/pbbam/PbiFile.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFilter.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFilterQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFilterTypes.h
${PacBioBAM_IncludeDir}/pbbam/PbiIndex.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiIndexedBamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiLookupData.h
${PacBioBAM_IncludeDir}/pbbam/PbiRawData.h
${PacBioBAM_IncludeDir}/pbbam/Position.h
${PacBioBAM_IncludeDir}/pbbam/ProgramInfo.h
+ ${PacBioBAM_IncludeDir}/pbbam/QNameQuery.h
${PacBioBAM_IncludeDir}/pbbam/QualityValue.h
${PacBioBAM_IncludeDir}/pbbam/QualityValues.h
- ${PacBioBAM_IncludeDir}/pbbam/QueryBase.h
+ ${PacBioBAM_IncludeDir}/pbbam/ReadAccuracyQuery.h
${PacBioBAM_IncludeDir}/pbbam/ReadGroupInfo.h
${PacBioBAM_IncludeDir}/pbbam/SamTagCodec.h
${PacBioBAM_IncludeDir}/pbbam/SequenceInfo.h
${PacBioBAM_IncludeDir}/pbbam/Strand.h
+ ${PacBioBAM_IncludeDir}/pbbam/SubreadLengthQuery.h
${PacBioBAM_IncludeDir}/pbbam/Tag.h
${PacBioBAM_IncludeDir}/pbbam/TagCollection.h
# ${PacBioBAM_IncludeDir}/pbbam/UnmappedReadsQuery.h
${PacBioBAM_IncludeDir}/pbbam/ZmwGroupQuery.h
${PacBioBAM_IncludeDir}/pbbam/ZmwQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/ZmwType.h
+ ${PacBioBAM_IncludeDir}/pbbam/ZmwTypeMap.h
- # internal headers
- ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordSort.h
+ # exception headers
+ ${PacBioBAM_IncludeDir}/pbbam/exception/InvalidSequencingChemistryException.h
+
+ # API-internal headers & inline files
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Accuracy.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamHeader.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecord.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordBuilder.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordImpl.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Cigar.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/CigarOperation.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Compare.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/CompositeBamReader.inl
${PacBioBAM_IncludeDir}/pbbam/internal/DataSet.inl
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetBaseTypes.h
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetBaseTypes.inl
@@ -57,25 +81,36 @@ set( PacBioBAM_H
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.h
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.inl
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetTypes.inl
- ${PacBioBAM_IncludeDir}/pbbam/internal/FilterEngine.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/IBamFileIterator.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/IMergeStrategy.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/MergeItem.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/MergeStrategy.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex_p.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex_p.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Frames.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/GenomicInterval.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Interval.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiBasicTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiFilter.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiFilterTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiLookupData.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiRawData.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/ProgramInfo.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QualityValue.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QualityValues.inl
${PacBioBAM_IncludeDir}/pbbam/internal/QueryBase.h
- ${PacBioBAM_IncludeDir}/pbbam/internal/SequentialMergeStrategy.h
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QueryBase.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/ReadGroupInfo.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/SequenceInfo.inl
${PacBioBAM_IncludeDir}/pbbam/internal/Tag.inl
# virtual headers
${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseBamRecord.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseCompositeReader.h
${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseReader.h
${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegion.h
${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegionType.h
${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegionTypeMap.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/ZmwWhitelistVirtualReader.h
+ # library-internal headers
${PacBioBAM_SourceDir}/AssertUtils.h
+ ${PacBioBAM_SourceDir}/ChemistryTable.h
${PacBioBAM_SourceDir}/DataSetIO.h
${PacBioBAM_SourceDir}/DataSetUtils.h
${PacBioBAM_SourceDir}/FileUtils.h
@@ -94,19 +129,23 @@ set( PacBioBAM_H
# sources
set( PacBioBAM_CPP
- # main API headers
${PacBioBAM_SourceDir}/Accuracy.cpp
${PacBioBAM_SourceDir}/AlignmentPrinter.cpp
${PacBioBAM_SourceDir}/AssertUtils.cpp
+ ${PacBioBAM_SourceDir}/BaiIndexedBamReader.cpp
${PacBioBAM_SourceDir}/BamFile.cpp
${PacBioBAM_SourceDir}/BamHeader.cpp
+ ${PacBioBAM_SourceDir}/BamReader.cpp
${PacBioBAM_SourceDir}/BamRecord.cpp
${PacBioBAM_SourceDir}/BamRecordBuilder.cpp
${PacBioBAM_SourceDir}/BamRecordImpl.cpp
${PacBioBAM_SourceDir}/BamTagCodec.cpp
${PacBioBAM_SourceDir}/BamWriter.cpp
+ ${PacBioBAM_SourceDir}/BarcodeQuery.cpp
+ ${PacBioBAM_SourceDir}/ChemistryTable.cpp
${PacBioBAM_SourceDir}/Cigar.cpp
${PacBioBAM_SourceDir}/CigarOperation.cpp
+ ${PacBioBAM_SourceDir}/Compare.cpp
${PacBioBAM_SourceDir}/Config.cpp
${PacBioBAM_SourceDir}/DataSet.cpp
${PacBioBAM_SourceDir}/DataSetBaseTypes.cpp
@@ -115,37 +154,43 @@ set( PacBioBAM_CPP
${PacBioBAM_SourceDir}/DataSetTypes.cpp
${PacBioBAM_SourceDir}/DataSetXsd.cpp
${PacBioBAM_SourceDir}/EntireFileQuery.cpp
- ${PacBioBAM_SourceDir}/FilterEngine.cpp
+ ${PacBioBAM_SourceDir}/FileUtils.cpp
${PacBioBAM_SourceDir}/FofnReader.cpp
${PacBioBAM_SourceDir}/Frames.cpp
${PacBioBAM_SourceDir}/GenomicInterval.cpp
${PacBioBAM_SourceDir}/GenomicIntervalQuery.cpp
- ${PacBioBAM_SourceDir}/GroupQuery.cpp
${PacBioBAM_SourceDir}/IndexedFastaReader.cpp
${PacBioBAM_SourceDir}/MemoryUtils.cpp
${PacBioBAM_SourceDir}/PbiBuilder.cpp
${PacBioBAM_SourceDir}/PbiFile.cpp
+ ${PacBioBAM_SourceDir}/PbiFilter.cpp
+ ${PacBioBAM_SourceDir}/PbiFilterQuery.cpp
+ ${PacBioBAM_SourceDir}/PbiFilterTypes.cpp
${PacBioBAM_SourceDir}/PbiIndex.cpp
+ ${PacBioBAM_SourceDir}/PbiIndexedBamReader.cpp
${PacBioBAM_SourceDir}/PbiIndexIO.cpp
${PacBioBAM_SourceDir}/PbiRawData.cpp
${PacBioBAM_SourceDir}/ProgramInfo.cpp
+ ${PacBioBAM_SourceDir}/QNameQuery.cpp
${PacBioBAM_SourceDir}/QualityValue.cpp
- ${PacBioBAM_SourceDir}/QueryBase.cpp
+ ${PacBioBAM_SourceDir}/ReadAccuracyQuery.cpp
${PacBioBAM_SourceDir}/ReadGroupInfo.cpp
${PacBioBAM_SourceDir}/SamTagCodec.cpp
${PacBioBAM_SourceDir}/SequenceInfo.cpp
+ ${PacBioBAM_SourceDir}/SubreadLengthQuery.cpp
${PacBioBAM_SourceDir}/Tag.cpp
${PacBioBAM_SourceDir}/TagCollection.cpp
# ${PacBioBAM_SourceDir}/UnmappedReadsQuery.cpp
+ ${PacBioBAM_SourceDir}/VirtualPolymeraseBamRecord.cpp
+ ${PacBioBAM_SourceDir}/VirtualPolymeraseCompositeReader.cpp
+ ${PacBioBAM_SourceDir}/VirtualPolymeraseReader.cpp
+ ${PacBioBAM_SourceDir}/VirtualRegionTypeMap.cpp
${PacBioBAM_SourceDir}/XmlReader.cpp
${PacBioBAM_SourceDir}/XmlWriter.cpp
${PacBioBAM_SourceDir}/ZmwGroupQuery.cpp
${PacBioBAM_SourceDir}/ZmwQuery.cpp
-
- # virtual
- ${PacBioBAM_SourceDir}/VirtualPolymeraseBamRecord.cpp
- ${PacBioBAM_SourceDir}/VirtualPolymeraseReader.cpp
- ${PacBioBAM_SourceDir}/VirtualRegionTypeMap.cpp
+ ${PacBioBAM_SourceDir}/ZmwTypeMap.cpp
+ ${PacBioBAM_SourceDir}/ZmwWhitelistVirtualReader.cpp
# XML I/O
${PacBioBAM_SourceDir}/pugixml/pugixml.cpp
diff --git a/src/swig/Accuracy.i b/src/swig/Accuracy.i
index c315115..1e6015c 100644
--- a/src/swig/Accuracy.i
+++ b/src/swig/Accuracy.i
@@ -9,9 +9,9 @@ using namespace PacBio::BAM;
%}
#ifdef SWIGPYTHON
-%rename(__int__) PacBio::BAM::Accuracy::operator int;
+%rename(__float__) PacBio::BAM::Accuracy::operator float;
#else // C#, R
-%rename(ToInt) PacBio::BAM::Accuracy::operator int;
+%rename(ToFloat) PacBio::BAM::Accuracy::operator float;
#endif
%include <pbbam/Accuracy.h>
\ No newline at end of file
diff --git a/src/swig/BamFile.i b/src/swig/BamFile.i
index 5b8a916..4a429e9 100644
--- a/src/swig/BamFile.i
+++ b/src/swig/BamFile.i
@@ -8,14 +8,11 @@ using namespace PacBio;
using namespace PacBio::BAM;
%}
+#ifdef SWIGR
+%ignore PacBio::BAM::BamFile::BamFile(const BamFile&);
+#endif
+
%ignore PacBio::BAM::BamFile::BamFile(BamFile&&);
%ignore PacBio::BAM::BamFile::operator=;
-HANDLE_STD_EXCEPTION(BamFile);
-HANDLE_STD_EXCEPTION(EnsurePacBioIndexExists);
-HANDLE_STD_EXCEPTION(EnsureStandardIndexExists);
-HANDLE_STD_EXCEPTION(ReferenceId);
-HANDLE_STD_EXCEPTION(ReferenceLength);
-HANDLE_STD_EXCEPTION(ReferenceName);
-
-%include <pbbam/BamFile.h>
\ No newline at end of file
+%include <pbbam/BamFile.h>
diff --git a/src/swig/BamHeader.i b/src/swig/BamHeader.i
index 5f7ea8e..3572f04 100644
--- a/src/swig/BamHeader.i
+++ b/src/swig/BamHeader.i
@@ -18,11 +18,4 @@ using namespace PacBio::BAM;
%template(ReadGroupInfoList) std::vector<PacBio::BAM::ReadGroupInfo>;
%template(SequenceInfoList) std::vector<PacBio::BAM::SequenceInfo>;
-HANDLE_STD_EXCEPTION(Program);
-HANDLE_STD_EXCEPTION(ReadGroup);
-HANDLE_STD_EXCEPTION(Sequence);
-HANDLE_STD_EXCEPTION(SequenceId);
-HANDLE_STD_EXCEPTION(SequenceLength);
-HANDLE_STD_EXCEPTION(SequenceName);
-
%include <pbbam/BamHeader.h>
diff --git a/src/swig/BamRecord.i b/src/swig/BamRecord.i
index 16e6036..4b8cee6 100644
--- a/src/swig/BamRecord.i
+++ b/src/swig/BamRecord.i
@@ -23,11 +23,11 @@ using namespace PacBio::BAM;
// C# gets confused by the const and nonconst overloads
%ignore PacBio::BAM::BamRecord::Impl() const;
-#ifdef SWIGR
+#if defined(SWIGR) || defined(SWIGPYTHON)
%rename("EncodedPkmean") PacBio::BAM::BamRecord::Pkmean(const std::vector<uint16_t>&);
%rename("EncodedPkmid") PacBio::BAM::BamRecord::Pkmid(const std::vector<uint16_t>&);
-#endif // SWIGR
-
-HANDLE_STD_EXCEPTION(CigarData);
+%rename("EncodedPkmean2") PacBio::BAM::BamRecord::Pkmean2(const std::vector<uint16_t>&);
+%rename("EncodedPkmid2") PacBio::BAM::BamRecord::Pkmid2(const std::vector<uint16_t>&);
+#endif
%include <pbbam/BamRecord.h>
diff --git a/src/swig/BamRecordBuilder.i b/src/swig/BamRecordBuilder.i
index 7e968f9..52e7690 100644
--- a/src/swig/BamRecordBuilder.i
+++ b/src/swig/BamRecordBuilder.i
@@ -8,5 +8,11 @@ using namespace PacBio;
using namespace PacBio::BAM;
%}
+%ignore PacBio::BAM::BamRecordBuilder::BamRecordBuilder(BamRecordBuilder&&); // move ctors not used
+%ignore PacBio::BAM::BamRecordBuilder::operator=;
+
+%ignore PacBio::BAM::BamRecordBuilder::Reset(BamRecord&&);
+%ignore PacBio::BAM::BamRecordBuilder::Cigar(PacBio::BAM::Cigar&&);
+%ignore PacBio::BAM::BamRecordBuilder::Tags(TagCollection&&);
%include <pbbam/BamRecordBuilder.h>
diff --git a/src/swig/BamRecordImpl.i b/src/swig/BamRecordImpl.i
index 3899147..2c8a48f 100644
--- a/src/swig/BamRecordImpl.i
+++ b/src/swig/BamRecordImpl.i
@@ -11,6 +11,4 @@ using namespace PacBio::BAM;
%ignore PacBio::BAM::BamRecordImpl::BamRecordImpl(BamRecordImpl&&);
%ignore PacBio::BAM::BamRecordImpl::operator=;
-HANDLE_STD_EXCEPTION(CigarData);
-
-%include <pbbam/BamRecordImpl.h>
\ No newline at end of file
+%include <pbbam/BamRecordImpl.h>
diff --git a/src/swig/BamWriter.i b/src/swig/BamWriter.i
index 87e332f..dd23e5b 100644
--- a/src/swig/BamWriter.i
+++ b/src/swig/BamWriter.i
@@ -12,7 +12,4 @@ using namespace PacBio::BAM;
%ignore PacBio::BAM::BamWriter(BamWriter&&); // move ctor not used
%ignore PacBio::BAM::BamWriter::operator=; // assignment operators not used
-HANDLE_STD_EXCEPTION(BamWriter);
-HANDLE_STD_EXCEPTION(Write);
-
-%include <pbbam/BamWriter.h>
\ No newline at end of file
+%include <pbbam/BamWriter.h>
diff --git a/src/swig/CigarOperation.i b/src/swig/CigarOperation.i
index b2a9586..0a23a17 100644
--- a/src/swig/CigarOperation.i
+++ b/src/swig/CigarOperation.i
@@ -15,8 +15,6 @@ using namespace PacBio::BAM;
%ignore PacBio::BAM::CigarOperation::CigarOperation(CigarOperationType, uint32_t);
#endif
-HANDLE_STD_EXCEPTION(CigarOperation);
-
%include <pbbam/CigarOperation.h>
// enums aren't always named consistently (at least between Mac/clang/swig & Linux/gcc/swig)
diff --git a/src/swig/DataSet.i b/src/swig/DataSet.i
index 8ba22c4..f8cba2b 100644
--- a/src/swig/DataSet.i
+++ b/src/swig/DataSet.i
@@ -14,4 +14,36 @@ using namespace PacBio::BAM;
// assignment operators not used
%ignore PacBio::BAM::DataSet::operator=;
-%include <pbbam/DataSet.h>
\ No newline at end of file
+#ifdef SWIGCSHARP
+
+// ignore non-const accessors
+%ignore PacBio::BAM::DataSet::Attribute(const std::string&);
+%ignore PacBio::BAM::DataSet::CreatedAt();
+%ignore PacBio::BAM::DataSet::Extensions();
+%ignore PacBio::BAM::DataSet::ExternalResources();
+%ignore PacBio::BAM::DataSet::Filters();
+%ignore PacBio::BAM::DataSet::Format();
+%ignore PacBio::BAM::DataSet::Metadata();
+%ignore PacBio::BAM::DataSet::MetaType();
+%ignore PacBio::BAM::DataSet::ModifiedAt();
+%ignore PacBio::BAM::DataSet::Name();
+%ignore PacBio::BAM::DataSet::Namespaces();
+%ignore PacBio::BAM::DataSet::ResourceId();
+%ignore PacBio::BAM::DataSet::SubDataSets();
+%ignore PacBio::BAM::DataSet::Tags();
+%ignore PacBio::BAM::DataSet::TimeStampedName();
+%ignore PacBio::BAM::DataSet::UniqueId();
+%ignore PacBio::BAM::DataSet::Version();
+
+// disable operator(s)
+%ignore PacBio::BAM::DataSet::operator+=;
+
+#endif // C#
+
+#ifdef SWIGR
+%ignore PacBio::BAM::DataSet::DataSet(const DataSet::TypeEnum type);
+/*%ignore PacBio::BAM::DataSet::DataSet(const BamFile& bamFile);*/
+#endif // R
+
+
+%include <pbbam/DataSet.h>
diff --git a/src/swig/DataSetTypes.i b/src/swig/DataSetTypes.i
index 2aeb0ff..5644d3f 100644
--- a/src/swig/DataSetTypes.i
+++ b/src/swig/DataSetTypes.i
@@ -18,6 +18,65 @@ using namespace PacBio::BAM::internal;
%ignore PacBio::BAM::internal::DataSetElement::operator[];
/*%rename(__getitem__) PacBio::BAM::internal::DataSetElement::operator[];*/
+%ignore PacBio::BAM::internal::XmlName::XmlName(XmlName&&);
+%ignore PacBio::BAM::internal::XmlName::operator=;
+
+#ifdef SWIGCSHARP
+
+// ignore non-const accessors
+%ignore PacBio::BAM::DataSetBase::ExternalResources();
+%ignore PacBio::BAM::DataSetBase::Filters();
+%ignore PacBio::BAM::DataSetBase::Metadata();
+%ignore PacBio::BAM::DataSetBase::Namespaces();
+%ignore PacBio::BAM::DataSetBase::SubDataSets();
+%ignore PacBio::BAM::DataSetMetadata::NumRecords();
+%ignore PacBio::BAM::DataSetMetadata::Provenance();
+%ignore PacBio::BAM::DataSetMetadata::TotalLength();
+%ignore PacBio::BAM::ExternalResource::ExternalResources();
+%ignore PacBio::BAM::Filter::Properties();
+%ignore PacBio::BAM::Property::Name();
+%ignore PacBio::BAM::Property::Operator();
+%ignore PacBio::BAM::Property::Value();
+%ignore PacBio::BAM::Provenance::CreatedBy();
+%ignore PacBio::BAM::Provenance::CommonServicesInstanceId();
+%ignore PacBio::BAM::Provenance::CreatorUserId();
+%ignore PacBio::BAM::Provenance::ParentJobId();
+%ignore PacBio::BAM::Provenance::ParentTool();
+%ignore PacBio::BAM::internal::BaseEntityType::Description();
+%ignore PacBio::BAM::internal::BaseEntityType::Extensions();
+%ignore PacBio::BAM::internal::BaseEntityType::Format();
+%ignore PacBio::BAM::internal::BaseEntityType::ModifiedAt();
+%ignore PacBio::BAM::internal::BaseEntityType::Name();
+%ignore PacBio::BAM::internal::BaseEntityType::ResourceId();
+%ignore PacBio::BAM::internal::BaseEntityType::Tags();
+%ignore PacBio::BAM::internal::BaseEntityType::Version();
+%ignore PacBio::BAM::internal::DataEntityType::Checksum();
+%ignore PacBio::BAM::internal::DataEntityType::EncodedValue();
+%ignore PacBio::BAM::internal::DataEntityType::MetaType();
+%ignore PacBio::BAM::internal::DataEntityType::SimpleValue();
+%ignore PacBio::BAM::internal::DataEntityType::TimeStampedName();
+%ignore PacBio::BAM::internal::DataEntityType::UniqueId();
+%ignore PacBio::BAM::internal::DataEntityType::ValueDataType();
+%ignore PacBio::BAM::internal::DataSetElement::Attribute(const std::string&);
+%ignore PacBio::BAM::internal::DataSetElement::Attributes();
+%ignore PacBio::BAM::internal::DataSetElement::Children();
+%ignore PacBio::BAM::internal::DataSetElement::ChildText(const std::string&);
+%ignore PacBio::BAM::internal::DataSetElement::CreatedAt();
+%ignore PacBio::BAM::internal::DataSetElement::Text();
+%ignore PacBio::BAM::internal::IndexedDataType::FileIndices();
+%ignore PacBio::BAM::internal::StrictEntityType::MetaType();
+%ignore PacBio::BAM::internal::StrictEntityType::TimeStampedName();
+%ignore PacBio::BAM::internal::StrictEntityType::UniqueId();
+
+// disable operator(s)
+%ignore PacBio::BAM::DataSetMetadata::operator+=;
+%ignore PacBio::BAM::ExternalResources::operator+=;
+%ignore PacBio::BAM::Filters::operator+=;
+%ignore PacBio::BAM::DataSetBase::operator+=;
+%ignore PacBio::BAM::SubDataSets::operator+=;
+
+#endif // C#
+
%include <pbbam/internal/DataSetElement.h>
%ignore PacBio::BAM::internal::DataSetElementList::operator[];
diff --git a/src/swig/EntireFileQuery.i b/src/swig/EntireFileQuery.i
index a0571e8..c7c0b06 100644
--- a/src/swig/EntireFileQuery.i
+++ b/src/swig/EntireFileQuery.i
@@ -10,8 +10,6 @@ using namespace PacBio;
using namespace PacBio::BAM;
%}
-HANDLE_STD_EXCEPTION(EntireFileQuery);
-
%include <pbbam/DataSet.h>
%include <pbbam/internal/QueryBase.h>
-%include <pbbam/EntireFileQuery.h>
\ No newline at end of file
+%include <pbbam/EntireFileQuery.h>
diff --git a/src/swig/GenomicInterval.i b/src/swig/GenomicInterval.i
index 626c8ac..199a3c3 100644
--- a/src/swig/GenomicInterval.i
+++ b/src/swig/GenomicInterval.i
@@ -8,4 +8,6 @@ using namespace PacBio;
using namespace PacBio::BAM;
%}
-%include <pbbam/GenomicInterval.h>
\ No newline at end of file
+%ignore PacBio::BAM::GenomicInterval::operator=;
+
+%include <pbbam/GenomicInterval.h>
diff --git a/src/swig/GenomicIntervalQuery.i b/src/swig/GenomicIntervalQuery.i
index 0ed7886..d3f9fa7 100644
--- a/src/swig/GenomicIntervalQuery.i
+++ b/src/swig/GenomicIntervalQuery.i
@@ -8,8 +8,4 @@ using namespace PacBio;
using namespace PacBio::BAM;
%}
-HANDLE_STD_EXCEPTION(CreateIterator);
-HANDLE_STD_EXCEPTION(GenomicIntervalQuery);
-HANDLE_STD_EXCEPTION(Interval);
-
-%include <pbbam/GenomicIntervalQuery.h>
\ No newline at end of file
+%include <pbbam/GenomicIntervalQuery.h>
diff --git a/src/swig/LocalContextFlags.i b/src/swig/LocalContextFlags.i
index b47a4d4..66ee990 100644
--- a/src/swig/LocalContextFlags.i
+++ b/src/swig/LocalContextFlags.i
@@ -8,4 +8,8 @@ using namespace PacBio;
using namespace PacBio::BAM;
%}
+#ifdef SWIGCSHARP
+%ignore operator|(const LocalContextFlags, const LocalContextFlags);
+#endif
+
%include <pbbam/LocalContextFlags.h>
diff --git a/src/swig/PacBioBam.i b/src/swig/PacBioBam.i
index ed8c746..668f06e 100644
--- a/src/swig/PacBioBam.i
+++ b/src/swig/PacBioBam.i
@@ -33,7 +33,6 @@ endif*/
/********* SWIG includes ************/
-%include "exception.i"
%include "stdint.i"
%include "std_common.i"
@@ -56,35 +55,15 @@ endif*/
%template(ShortList) std::vector<short>;
%template(CharList) std::vector<char>;
-// basic exception-handler helper
-//
-// -- STL builtins --
-// std::invalid_argument -> ValueError
-// std::domain_error -> ValueError
-// std::overflow_error -> OverflowError
-// std::out_of_range -> IndexError
-// std::length_error -> IndexError
-// std::runtime_error -> RuntimeError
-// std::exception -> SystemError
-//
-// (anything else) -> UnknownError
-//
-// * All pbbam exceptions are simply std::exception (SystemErro) for now,
-// until (if?) we flesh out a more detailed exception hierarchy.
-// Either way, new ones will inherit from std::exception, so SystemError
-// should still remain a valid handler.
-//
-%define HANDLE_STD_EXCEPTION(MethodName)
-%exception MethodName {
+// exception handling
+%include "exception.i"
+%exception {
try {
- $action
- }
- SWIG_CATCH_STDEXCEPT // catch std::exception
- catch (...) {
- SWIG_exception(SWIG_UnknownError, "Unknown exception");
- }
+ $action
+ } catch (const std::exception& e) {
+ SWIG_exception(SWIG_RuntimeError, e.what());
+ }
}
-%enddef
/********* PacBioBAM includes ************/
@@ -150,8 +129,6 @@ endif*/
// Query/iterator API
%include "QueryBase.i"
-%include "GroupQueryBase.i"
-%include "GroupQuery.i"
%include "EntireFileQuery.i"
%include "GenomicIntervalQuery.i"
%include "ZmwQuery.i"
@@ -164,3 +141,9 @@ endif*/
// FASTA
%include "IndexedFastaReader.i"
+
+// VirtualPolymeraseBamRecord
+%include "VirtualRegion.i"
+%include "VirtualPolymeraseBamRecord.i"
+%include "VirtualPolymeraseReader.i"
+%include "ZmwWhitelistVirtualReader.i"
diff --git a/src/swig/PbiRawData.i b/src/swig/PbiRawData.i
index 4992cc4..3db9ece 100644
--- a/src/swig/PbiRawData.i
+++ b/src/swig/PbiRawData.i
@@ -13,7 +13,7 @@ using namespace PacBio::BAM;
%ignore PacBio::BAM::PbiRawMappedData::PbiRawMappedData(PbiRawMappedData&&);
%ignore PacBio::BAM::PbiReferenceEntry::PbiReferenceEntry(PbiReferenceEntry&&);
%ignore PacBio::BAM::PbiRawReferenceData::PbiRawReferenceData(PbiRawReferenceData&&);
-%ignore PacBio::BAM::PbiRawSubreadData::PbiRawSubreadData(PbiRawSubreadData&&);
+%ignore PacBio::BAM::PbiRawBasicData::PbiRawBasicData(PbiRawBasicData&&);
%ignore PacBio::BAM::PbiRawData::PbiRawData(PbiRawData&&);
// assignment operators not used
@@ -21,9 +21,15 @@ using namespace PacBio::BAM;
%ignore PacBio::BAM::PbiRawMappedData::operator=;
%ignore PacBio::BAM::PbiReferenceEntry::operator=;
%ignore PacBio::BAM::PbiRawReferenceData::operator=;
-%ignore PacBio::BAM::PbiRawSubreadData::operator=;
+%ignore PacBio::BAM::PbiRawBasicData::operator=;
%ignore PacBio::BAM::PbiRawData::operator=;
-HANDLE_STD_EXCEPTION(PacBio::BAM::PbiRawMappedData::AddRecord);
+#ifdef SWIGCSHARP
+// ignore non-const accessors
+%ignore PacBio::BAM::PbiRawData::BarcodeData();
+%ignore PacBio::BAM::PbiRawData::MappedData();
+%ignore PacBio::BAM::PbiRawData::ReferenceData();
+%ignore PacBio::BAM::PbiRawData::BasicData();
+#endif // C#
%include <pbbam/PbiRawData.h>
diff --git a/src/swig/Tag.i b/src/swig/Tag.i
index 838454c..832c856 100644
--- a/src/swig/Tag.i
+++ b/src/swig/Tag.i
@@ -11,23 +11,7 @@ using namespace PacBio::BAM;
%ignore PacBio::BAM::Tag::Tag(Tag&&);
%ignore PacBio::BAM::Tag::operator=;
-HANDLE_STD_EXCEPTION(ToInt8);
-HANDLE_STD_EXCEPTION(ToUInt8);
-HANDLE_STD_EXCEPTION(ToInt16);
-HANDLE_STD_EXCEPTION(ToUInt16);
-HANDLE_STD_EXCEPTION(ToInt32);
-HANDLE_STD_EXCEPTION(ToUInt32);
-HANDLE_STD_EXCEPTION(ToFloat);
-HANDLE_STD_EXCEPTION(ToString);
-HANDLE_STD_EXCEPTION(ToInt8Array);
-HANDLE_STD_EXCEPTION(ToUInt8Array);
-HANDLE_STD_EXCEPTION(ToInt16Array);
-HANDLE_STD_EXCEPTION(ToUInt16Array);
-HANDLE_STD_EXCEPTION(ToInt32Array);
-HANDLE_STD_EXCEPTION(ToUInt32Array);
-HANDLE_STD_EXCEPTION(ToFloatArray);
-
-#ifdef SWIGR
+#if defined(SWIGR) || defined(SWIGPYTHON)
%ignore PacBio::BAM::Tag::Tag(int8_t value);
%ignore PacBio::BAM::Tag::Tag(uint8_t value);
@@ -47,84 +31,84 @@ HANDLE_STD_EXCEPTION(ToFloatArray);
%extend PacBio::BAM::Tag {
- PacBio::BAM::Tag FromInt8(int x) { return PacBio::BAM::Tag(static_cast<int8_t>(x)); }
- PacBio::BAM::Tag FromUInt8(int x) { return PacBio::BAM::Tag(static_cast<uint8_t>(x)); }
- PacBio::BAM::Tag FromInt16(int x) { return PacBio::BAM::Tag(static_cast<int16_t>(x)); }
- PacBio::BAM::Tag FromUInt16(int x) { return PacBio::BAM::Tag(static_cast<uint16_t>(x)); }
- PacBio::BAM::Tag FromInt32(int x) { return PacBio::BAM::Tag(static_cast<int32_t>(x)); }
- PacBio::BAM::Tag FromUInt32(int x) { return PacBio::BAM::Tag(static_cast<uint32_t>(x)); }
- PacBio::BAM::Tag FromFloat(int x) { return PacBio::BAM::Tag(static_cast<float>(x)); }
-
- PacBio::BAM::Tag FromInt8Array(const std::vector<int>& v)
- {
- std::vector<int8_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<int8_t>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
-
- PacBio::BAM::Tag FromUInt8Array(const std::vector<int>& v)
- {
- std::vector<uint8_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<uint8_t>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
-
- PacBio::BAM::Tag FromInt16Array(const std::vector<int>& v)
- {
- std::vector<int16_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<int16_t>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
-
- PacBio::BAM::Tag FromUInt16Array(const std::vector<int>& v)
- {
- std::vector<int16_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<uint16_t>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
-
- PacBio::BAM::Tag FromInt32Array(const std::vector<int>& v)
- {
- std::vector<int16_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<int32_t>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
-
- PacBio::BAM::Tag FromUInt32Array(const std::vector<int>& v)
- {
- std::vector<int16_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<uint32_t>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
-
- PacBio::BAM::Tag FromFloatArray(const std::vector<int>& v)
- {
- std::vector<int16_t> result;
- const size_t numElements = v.size();
- result.reserve(numElements);
- for (size_t i = 0; i < numElements; ++i)
- result.push_back(static_cast<float>(v.at(i)));
- return PacBio::BAM::Tag(result);
- }
+ PacBio::BAM::Tag FromInt8(int x) { return PacBio::BAM::Tag(static_cast<int8_t>(x)); }
+ PacBio::BAM::Tag FromUInt8(int x) { return PacBio::BAM::Tag(static_cast<uint8_t>(x)); }
+ PacBio::BAM::Tag FromInt16(int x) { return PacBio::BAM::Tag(static_cast<int16_t>(x)); }
+ PacBio::BAM::Tag FromUInt16(int x) { return PacBio::BAM::Tag(static_cast<uint16_t>(x)); }
+ PacBio::BAM::Tag FromInt32(int x) { return PacBio::BAM::Tag(static_cast<int32_t>(x)); }
+ PacBio::BAM::Tag FromUInt32(int x) { return PacBio::BAM::Tag(static_cast<uint32_t>(x)); }
+ PacBio::BAM::Tag FromFloat(int x) { return PacBio::BAM::Tag(static_cast<float>(x)); }
+
+ PacBio::BAM::Tag FromInt8Array(const std::vector<int>& v)
+ {
+ std::vector<int8_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<int8_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromUInt8Array(const std::vector<int>& v)
+ {
+ std::vector<uint8_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<uint8_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromInt16Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<int16_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromUInt16Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<uint16_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromInt32Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<int32_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromUInt32Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<uint32_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromFloatArray(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<float>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
}
#endif // SWIGR
-%include <pbbam/Tag.h>
\ No newline at end of file
+%include <pbbam/Tag.h>
diff --git a/src/swig/VirtualPolymeraseBamRecord.i b/src/swig/VirtualPolymeraseBamRecord.i
new file mode 100644
index 0000000..1a2a9c5
--- /dev/null
+++ b/src/swig/VirtualPolymeraseBamRecord.i
@@ -0,0 +1,24 @@
+/* VirtualPolymeraseBamRecord.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+/*%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&);*/
+%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(VirtualPolymeraseBamRecord&&);
+%ignore PacBio::BAM::VirtualPolymeraseBamRecord::operator=;
+
+// disabled - can't get it to work right (at least in Python)
+// but the same info is available (& correct) from record.VirtualRegionsTable(regionType)
+%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualRegionsMap;
+
+%template(VirtualRegionList) std::vector<PacBio::BAM::VirtualRegion>;
+%template(VirtualRegionsMap) std::map<PacBio::BAM::VirtualRegionType, std::vector<PacBio::BAM::VirtualRegion> >;
+
+%include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
\ No newline at end of file
diff --git a/src/swig/VirtualPolymeraseReader.i b/src/swig/VirtualPolymeraseReader.i
new file mode 100644
index 0000000..7ab62fe
--- /dev/null
+++ b/src/swig/VirtualPolymeraseReader.i
@@ -0,0 +1,11 @@
+/* VirtualPolymeraseReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/virtual/VirtualPolymeraseReader.h>
\ No newline at end of file
diff --git a/src/swig/VirtualRegion.i b/src/swig/VirtualRegion.i
new file mode 100644
index 0000000..2436de2
--- /dev/null
+++ b/src/swig/VirtualRegion.i
@@ -0,0 +1,18 @@
+/* VirtualRegion.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <map>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::VirtualRegion::VirtualRegion(VirtualRegion&&);
+%ignore PacBio::BAM::VirtualRegion::operator=;
+
+%include <pbbam/virtual/VirtualRegionType.h>
+%include <pbbam/virtual/VirtualRegion.h>
diff --git a/src/swig/ZmwWhitelistVirtualReader.i b/src/swig/ZmwWhitelistVirtualReader.i
new file mode 100644
index 0000000..5647ccf
--- /dev/null
+++ b/src/swig/ZmwWhitelistVirtualReader.i
@@ -0,0 +1,11 @@
+/* ZmwWhitelistVirtualReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 38dfe39..8b603a6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,15 +1,10 @@
find_package(Threads REQUIRED)
-# find samtools binary for comparing against 'gold standard' files
-#set(Samtools_Dir "/Users/derek/development/samtools")
-set(Samtools_Dir "${PacBioBAM_RootDir}/../../../../prebuilt.out/samtools/samtools-0.1.19/ubuntu-1404/bin")
-find_program(Samtools_Bin samtools HINTS ${Samtools_Dir})
-
# ensure tests directory exists
file(MAKE_DIRECTORY ${PacBioBAM_TestsDir}/bin)
file(MAKE_DIRECTORY ${PacBioBAM_TestsDir}/data/temp)
-# generate paths/values used by for test
+# generate paths/values used by for unit tests
configure_file(
${PacBioBAM_TestsDir}/src/TestData.h.in
${PacBioBAM_TestsDir}/src/TestData.h
@@ -26,7 +21,7 @@ include_directories(
${gtest_SOURCE_DIR}
)
-# grab PacBioBAM test source files
+# grab PacBioBAM unit test source files
include(files.cmake)
set(SOURCES
${PacBioBAMTest_H}
@@ -34,7 +29,7 @@ set(SOURCES
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
-# define test executable
+# define unit test executable
add_definitions(-DPBBAM_TESTING)
if(MSVC)
# VS2012+ pooh-pooh's Derek's "#define private public" trick
@@ -42,7 +37,7 @@ if(MSVC)
endif()
if(PacBioBAM_wrap_r)
- # SWIG R does not support PBBAM_SHARED_PTR, but it does support boost::shared_ptr
+ # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
# So force boost if we're wrapping for R.
add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
endif()
@@ -51,7 +46,7 @@ set_target_properties(test_pbbam PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_TestsDir}/bin
)
-# set up unit test to run our executable
+# add unit tests to test framework
add_test(
NAME UnitTests
WORKING_DIRECTORY ${PacBioBAM_TestsDir}/bin
diff --git a/tests/data/chunking/chunking.subreadset.xml b/tests/data/chunking/chunking.subreadset.xml
new file mode 100644
index 0000000..6d15ff1
--- /dev/null
+++ b/tests/data/chunking/chunking.subreadset.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource><pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="movie" Operator="=" Value="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"/>
+ <pbbase:Property Name="zm" Operator="lt" Value="1816"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
diff --git a/tests/data/chunking/chunking_emptyfilters.subreadset.xml b/tests/data/chunking/chunking_emptyfilters.subreadset.xml
new file mode 100644
index 0000000..917872b
--- /dev/null
+++ b/tests/data/chunking/chunking_emptyfilters.subreadset.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource><pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+</pbds:Filters>
+</pbds:SubreadSet>
diff --git a/tests/data/chunking/chunking_missingfilters.subreadset.xml b/tests/data/chunking/chunking_missingfilters.subreadset.xml
new file mode 100644
index 0000000..b91708e
--- /dev/null
+++ b/tests/data/chunking/chunking_missingfilters.subreadset.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource><pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+</pbds:SubreadSet>
+
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam
new file mode 100644
index 0000000..c4ec7ea
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi
new file mode 100644
index 0000000..4af87e2
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam
new file mode 100644
index 0000000..e623aca
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi
new file mode 100644
index 0000000..6479979
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam
new file mode 100644
index 0000000..8544f6a
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam differ
diff --git a/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi
new file mode 100644
index 0000000..a9f4edb
Binary files /dev/null and b/tests/data/chunking/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi differ
diff --git a/tests/data/dataset/ali1.xml b/tests/data/dataset/ali1.xml
index 015068e..ab0a82a 100644
--- a/tests/data/dataset/ali1.xml
+++ b/tests/data/dataset/ali1.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments0.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/ali2.xml b/tests/data/dataset/ali2.xml
index f71e2d2..c35f9ec 100644
--- a/tests/data/dataset/ali2.xml
+++ b/tests/data/dataset/ali2.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments2.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/ali3.xml b/tests/data/dataset/ali3.xml
index d0dc0d6..f58d25f 100644
--- a/tests/data/dataset/ali3.xml
+++ b/tests/data/dataset/ali3.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments2.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/ali4.xml b/tests/data/dataset/ali4.xml
index 015068e..ab0a82a 100644
--- a/tests/data/dataset/ali4.xml
+++ b/tests/data/dataset/ali4.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:AlignmentSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments0.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/bam_mapping.bam b/tests/data/dataset/bam_mapping.bam
index 00637b9..2d4ae7b 100644
Binary files a/tests/data/dataset/bam_mapping.bam and b/tests/data/dataset/bam_mapping.bam differ
diff --git a/tests/data/dataset/bam_mapping.bam.pbi b/tests/data/dataset/bam_mapping.bam.pbi
index 8ad2fd5..fe7c3be 100644
Binary files a/tests/data/dataset/bam_mapping.bam.pbi and b/tests/data/dataset/bam_mapping.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_1.bam b/tests/data/dataset/bam_mapping_1.bam
index 9644940..1e9670e 100644
Binary files a/tests/data/dataset/bam_mapping_1.bam and b/tests/data/dataset/bam_mapping_1.bam differ
diff --git a/tests/data/dataset/bam_mapping_1.bam.pbi b/tests/data/dataset/bam_mapping_1.bam.pbi
index 11e85a0..d99a174 100644
Binary files a/tests/data/dataset/bam_mapping_1.bam.pbi and b/tests/data/dataset/bam_mapping_1.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_2.bam b/tests/data/dataset/bam_mapping_2.bam
index 419701d..09678ea 100644
Binary files a/tests/data/dataset/bam_mapping_2.bam and b/tests/data/dataset/bam_mapping_2.bam differ
diff --git a/tests/data/dataset/bam_mapping_2.bam.pbi b/tests/data/dataset/bam_mapping_2.bam.pbi
index 9c46e0e..d1765ef 100644
Binary files a/tests/data/dataset/bam_mapping_2.bam.pbi and b/tests/data/dataset/bam_mapping_2.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_new.bam b/tests/data/dataset/bam_mapping_new.bam
new file mode 100644
index 0000000..3039331
Binary files /dev/null and b/tests/data/dataset/bam_mapping_new.bam differ
diff --git a/tests/data/dataset/bam_mapping_new.bam.pbi b/tests/data/dataset/bam_mapping_new.bam.pbi
new file mode 100644
index 0000000..82d497c
Binary files /dev/null and b/tests/data/dataset/bam_mapping_new.bam.pbi differ
diff --git a/tests/data/dataset/bam_mapping_staggered.xml b/tests/data/dataset/bam_mapping_staggered.xml
index 51a8b71..879c193 100644
--- a/tests/data/dataset/bam_mapping_staggered.xml
+++ b/tests/data/dataset/bam_mapping_staggered.xml
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='UTF-8'?>
-<pbds:DataSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CreatedAt="2015-05-13T10:58:26" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="30f72098-bc5b-e06b-566c-8b28dda909a8" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:DataSet CreatedAt="2015-05-13T10:58:26" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="30f72098-bc5b-e06b-566c-8b28dda909a8" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource ResourceId="file:tests/data/bam_mapping_1.bam">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/barcode.dataset.xml b/tests/data/dataset/barcode.dataset.xml
index 3613e20..1fbbb18 100644
--- a/tests/data/dataset/barcode.dataset.xml
+++ b/tests/data/dataset/barcode.dataset.xml
@@ -1,11 +1,11 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:BarcodeSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.BarcodeSet" Name="DataSet_BarcodeSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:BarcodeSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.BarcodeSet" Name="DataSet_BarcodeSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Barcodes FASTA" Description="Points to an example Barcodes FASTA file." MetaType="BarcodeFile.BarcodeFastaFile" ResourceId="file:///mnt/path/to/barcode.fasta" Tags="Example"/>
</pbbase:ExternalResources>
<pbds:DataSetMetadata>
<pbds:TotalLength>400</pbds:TotalLength>
<pbds:NumRecords>30</pbds:NumRecords>
- <pbsec:BarcodeConstruction>paired</pbsec:BarcodeConstruction>
+ <pbds:BarcodeConstruction>paired</pbds:BarcodeConstruction>
</pbds:DataSetMetadata>
</pbds:BarcodeSet>
diff --git a/tests/data/dataset/ccsread.dataset.xml b/tests/data/dataset/ccsread.dataset.xml
index 9baafab..97b5943 100644
--- a/tests/data/dataset/ccsread.dataset.xml
+++ b/tests/data/dataset/ccsread.dataset.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:ConsensusReadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.ConsensusReadSet" Name="DataSet_ConsensusReadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:ConsensusReadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ConsensusReadSet" Name="DataSet_ConsensusReadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First ConsensusRead BAM" Description="Points to an example ConsensusRead BAM file." MetaType="PacBio.ConsensusReadFile.ConsensusReadBamFile" ResourceId="file:///mnt/path/to/ccsreads0.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/contig.dataset.xml b/tests/data/dataset/contig.dataset.xml
index 77d7c4e..11a9b12 100644
--- a/tests/data/dataset/contig.dataset.xml
+++ b/tests/data/dataset/contig.dataset.xml
@@ -1,18 +1,18 @@
-<?xml version="1.0" encoding="utf-8"?>
-<pbds:ContigSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.ContigSet" Name="DataSet_ContigSet" Tags="AHAcontigs" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
- <pbbase:ExternalResources>
- <pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:ContigSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ContigSet" Name="DataSet_ContigSet" Tags="AHAcontigs" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBiosets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
<pbbase:FileIndices>
- <pbbase:FileIndex MetaType="PacBio.Index.SaWriterIndex" ResourceId="file:///mnt/path/to/reference.fasta.sa"/>
- <pbbase:FileIndex MetaType="PacBio.Index.SamIndex" ResourceId="file:///mnt/path/to/reference.fasta.fai"/>
+ <pbbase:FileIndex MetaType="PacBio.Index.SaWriterIndex" ResourceId="file:///mnt/path/to/reference.fasta.sa"/>
+ <pbbase:FileIndex MetaType="PacBio.Index.SamIndex" ResourceId="file:///mnt/path/to/reference.fasta.fai"/>
</pbbase:FileIndices>
- </pbbase:ExternalResource>
- </pbbase:ExternalResources>
- <pbds:DataSetMetadata>
- <pbds:TotalLength>5000000</pbds:TotalLength>
- <pbds:NumRecords>500</pbds:NumRecords>
- <pbsec:Contigs>
- <pbsec:Contig Name="gi|229359445|emb|AM181176.4|" Description="Pseudomonas fluorescens SBW25 complete genome|quiver" Length="6722109" Digest="f627c795efad7ce0050ed42b942d408e"/>
- </pbsec:Contigs>
- </pbds:DataSetMetadata>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>5000000</pbds:TotalLength>
+ <pbds:NumRecords>500</pbds:NumRecords>
+ <pbds:Contigs>
+ <pbds:Contig Name="gi|229359445|emb|AM181176.4|" Description="Pseudomonas fluorescens SBW25 complete genome|quiver" Length="6722109" Digest="f627c795efad7ce0050ed42b942d408e"/>
+ </pbds:Contigs>
+ </pbds:DataSetMetadata>
</pbds:ContigSet>
diff --git a/tests/data/dataset/hdfsubread_dataset.xml b/tests/data/dataset/hdfsubread_dataset.xml
index 8a0e0e2..29fdf31 100644
--- a/tests/data/dataset/hdfsubread_dataset.xml
+++ b/tests/data/dataset/hdfsubread_dataset.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:HdfSubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.HdfSubreadSet" Name="DataSet_HdfSubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:HdfSubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.HdfSubreadSet" Name="DataSet_HdfSubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" [...]
<pbbase:ExternalResourcess>
<pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads0.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/lambda_contigs.xml b/tests/data/dataset/lambda_contigs.xml
index e2bfb18..4abc8cc 100644
--- a/tests/data/dataset/lambda_contigs.xml
+++ b/tests/data/dataset/lambda_contigs.xml
@@ -1,2 +1,6 @@
<?xml version='1.0' encoding='UTF-8'?>
-<pbds:ReferenceSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CreatedAt="2015-05-28T10:56:36" MetaType="PacBio.DataSet.ReferenceSet" Name="" Tags="" UniqueId="596e87db-34f9-d2fd-c905-b017543170e1" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"><pbbase:ExternalResources><pbbase:ExternalResource ResourceId="file:tests/data/lambda_contigs.fasta" /></pbbase:ExternalResources></pbds:Re [...]
\ No newline at end of file
+<pbds:ReferenceSet CreatedAt="2015-05-28T10:56:36" MetaType="PacBio.DataSet.ReferenceSet" Name="" Tags="" UniqueId="596e87db-34f9-d2fd-c905-b017543170e1" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource ResourceId="file:tests/data/lambda_contigs.fasta"/>
+ </pbbase:ExternalResources>
+</pbds:ReferenceSet>
\ No newline at end of file
diff --git a/tests/data/dataset/malformed.xml b/tests/data/dataset/malformed.xml
new file mode 100644
index 0000000..e9000c8
--- /dev/null
+++ b/tests/data/dataset/malformed.xml
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<SubreadSet
+ Description="Merged dataset from 1 files using DatasetMerger 0.1.2"
+ MetaType="PacBio.DataSet.HdfSubreadSet"
+ Name="Subreads from runr000013_42267_150403"
+ Tags="pacbio.secondary.instrument=RS"
+ TimeStampedName="hdfsubreadset_2015-08-19T15:39:36.331-07:00"
+ UniqueId="b4741521-2a4c-42df-8a13-0a755ca9ed1e"
+ Version="0.5"
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:ns0="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:ns1="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:ns2="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:ns3="http://pacificbiosciences.com/PacBioReagentKit.xsd">
+ <ns0:ExternalResources>
+ <ns0:ExternalResource
+ MetaType="SubreadFile.SubreadBamFile"
+ TimeStampedName="SubreadFile.SubreadBamFile_00000000000000"
+ UniqueId="251acf71-9eb0-489e-9dd1-cdbd11432753"
+ ResourceId="file:///mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0//mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0/file.subreads.sub [...]
+ </ns0:ExternalResources>
+ <DataSetMetadata>
+ <TotalLength>50000000</TotalLength>
+ <NumRecords>150000</NumRecords>
+ <ns2:Collections>
+ <ns2:CollectionMetadata
+ Context="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"
+ InstrumentId="1"
+ InstrumentName="42267"
+ MetaType="PacBio.Collection"
+ TimeStampedName="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"
+ UniqueId="d66c8372-2b70-4dcf-b64f-9f8b5cc351fd">
+ <ns2:InstCtrlVer>2.3.0.1.142990</ns2:InstCtrlVer>
+ <ns2:SigProcVer>NRT at 172.31.128.10:8082, SwVer=2301.142990, HwVer=1.0</ns2:SigProcVer>
+ <ns2:RunDetails>
+ <ns2:RunId>r000013_42267_150403</ns2:RunId>
+ <ns2:Name>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:Name>
+ </ns2:RunDetails>
+ <ns2:WellSample Name="Inst42267-040315-SAT-100pM-2kb-P6C4">
+ <ns2:PlateId>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:PlateId>
+ <ns2:WellName>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:WellName>
+ <ns2:Concentration>0.0</ns2:Concentration>
+ <ns2:SampleReuseEnabled>false</ns2:SampleReuseEnabled>
+ <ns2:StageHotstartEnabled>false</ns2:StageHotstartEnabled>
+ <ns2:SizeSelectionEnabled>false</ns2:SizeSelectionEnabled>
+ <ns2:UseCount>1</ns2:UseCount>
+ <ns1:BioSamplePointers>
+ <ns1:BioSamplePointer>251acf71-9eb0-489e-9dd1-cdbd11432752</ns1:BioSamplePointer>
+ </ns1:BioSamplePointers>
+ </ns2:WellSample>
+ <ns2:Automation>
+ <ns0:AutomationParameters>
+ <ns0:AutomationParameter />
+ </ns0:AutomationParameters>
+ </ns2:Automation>
+ <ns2:CollectionNumber>7</ns2:CollectionNumber>
+ <ns2:CellIndex>4</ns2:CellIndex>
+ <ns2:CellPac Barcode="10080792080000000182317411029151" />
+ <ns2:Primary>
+ <ns2:AutomationName>BasecallerV1</ns2:AutomationName>
+ <ns2:ConfigFileName>2-3-0_P6-C4.xml</ns2:ConfigFileName>
+ <ns2:SequencingCondition />
+ <ns2:OutputOptions>
+ <ns2:ResultsFolder>Analysis_Results</ns2:ResultsFolder>
+ <ns2:CollectionPathUri>rsy://mp-rsync/vol55//RS_DATA_STAGING/42267/Inst42267-040315-SAT-100pM-2kb-P6C4_13/A04_7/</ns2:CollectionPathUri>
+ <ns2:CopyFiles>
+ <ns2:CollectionFileCopy>Fasta</ns2:CollectionFileCopy>
+ </ns2:CopyFiles>
+ <ns2:Readout>Bases</ns2:Readout>
+ <ns2:MetricsVerbosity>Minimal</ns2:MetricsVerbosity>
+ </ns2:OutputOptions>
+ </ns2:Primary>
+ </ns2:CollectionMetadata>
+ </ns2:Collections>
+ <ns1:BioSamples>
+ <ns1:BioSample
+ Description="Inst42267-SAT-100pM-2kbLambda-P6C4-Std120_CPS_040315"
+ MetaType="PacBio.Sample"
+ Name="Inst42267-040315-SAT-100pM-2kb-P6C4"
+ TimeStampedName="biosample_2015-08-19T15:39:36.331-07:00"
+ UniqueId="251acf71-9eb0-489e-9dd1-cdbd11432752" />
+ </ns1:BioSamples>
+ </DataSetMetadata>
+</SubreadSet>
diff --git a/tests/data/dataset/merge.fofn b/tests/data/dataset/merge.fofn
new file mode 100644
index 0000000..6524ba5
--- /dev/null
+++ b/tests/data/dataset/merge.fofn
@@ -0,0 +1,2 @@
+bam_mapping_1.bam
+bam_mapping_2.bam
diff --git a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam
index fb08bf9..52c0c8e 100644
Binary files a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam and b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam differ
diff --git a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai
index b87bfa7..b8892c2 100644
Binary files a/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai and b/tests/data/dataset/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai differ
diff --git a/tests/data/dataset/pbalchemy10kbp.xml b/tests/data/dataset/pbalchemy10kbp.xml
index 1b90cec..96189ad 100644
--- a/tests/data/dataset/pbalchemy10kbp.xml
+++ b/tests/data/dataset/pbalchemy10kbp.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
-<pbds:DataSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" CreatedAt="2015-05-22T16:56:16" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="58e3f7c5-24c1-b58b-fbd5-37de268cc2f0" Version="2.3.0" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:DataSet CreatedAt="2015-05-22T16:56:16" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="58e3f7c5-24c1-b58b-fbd5-37de268cc2f0" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource ResourceId="file:tests/data/pbalchemy10kbp.pbalign.sorted.pbver1.bam">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/reference.dataset.xml b/tests/data/dataset/reference.dataset.xml
index 953f863..3cfbe8c 100644
--- a/tests/data/dataset/reference.dataset.xml
+++ b/tests/data/dataset/reference.dataset.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:ReferenceSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.ReferenceSet" Name="DataSet_ReferenceSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+<pbds:ReferenceSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ReferenceSet" Name="DataSet_ReferenceSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/subread_dataset1.xml b/tests/data/dataset/subread_dataset1.xml
index ac6325b..1d64e79 100644
--- a/tests/data/dataset/subread_dataset1.xml
+++ b/tests/data/dataset/subread_dataset1.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" >
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xs [...]
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads0.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/subread_dataset2.xml b/tests/data/dataset/subread_dataset2.xml
index 63da322..a395330 100644
--- a/tests/data/dataset/subread_dataset2.xml
+++ b/tests/data/dataset/subread_dataset2.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" >
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xsi [...]
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads2.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/subread_dataset3.xml b/tests/data/dataset/subread_dataset3.xml
index 00a1786..91923a8 100644
--- a/tests/data/dataset/subread_dataset3.xml
+++ b/tests/data/dataset/subread_dataset3.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
-<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd" >
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xs [...]
<pbbase:ExternalResources>
<pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads2.bam" Tags="Example">
<pbbase:FileIndices>
diff --git a/tests/data/dataset/transformed_rs_subread_dataset.xml b/tests/data/dataset/transformed_rs_subread_dataset.xml
index 6b93870..465d9a6 100644
--- a/tests/data/dataset/transformed_rs_subread_dataset.xml
+++ b/tests/data/dataset/transformed_rs_subread_dataset.xml
@@ -1,13 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
-<pbds:HdfSubreadSet xmlns:uuid="java:java.util.UUID" xmlns:bax="http://whatever"
- xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd"
- xmlns:xs="http://www.w3.org/2001/XMLSchema"
- xmlns:fn="http://www.w3.org/2005/xpath-functions"
- Name="Subreads from run r001173_42129_130607"
- MetaType="PacBio.DataSet.SubreadSet"
- Tags="pacbio.secondary.instrument=RS"
- Version="0.5"
- UniqueId="abbc9183-b01e-4671-8c12-19efee534647">
+<pbds:HdfSubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:bax="http://whatever"
+ xmlns:fn="http://www.w3.org/2005/xpath-functions"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:uuid="java:java.util.UUID"
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ Name="Subreads from run r001173_42129_130607"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Tags="pacbio.secondary.instrument=RS"
+ Version="0.5"
+ UniqueId="abbc9183-b01e-4671-8c12-19efee534647">
<pbbase:ExternalResources>
<pbbase:ExternalResource MetaType="PacBio.SubreadFile.BaxFile"
ResourceId="file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.0.bax.h5"/>
diff --git a/tests/data/ex2.bam b/tests/data/ex2.bam
index efba168..3fb5f49 100644
Binary files a/tests/data/ex2.bam and b/tests/data/ex2.bam differ
diff --git a/tests/data/ex2.bam.bai b/tests/data/ex2.bam.bai
index f44c34d..a0a7868 100644
Binary files a/tests/data/ex2.bam.bai and b/tests/data/ex2.bam.bai differ
diff --git a/tests/data/ex2.sam b/tests/data/ex2.sam
index b609d6c..a984e87 100644
--- a/tests/data/ex2.sam
+++ b/tests/data/ex2.sam
@@ -1,4 +1,4 @@
- at HD VN:1.0 SO:coordinate pb:3.0b7
+ at HD VN:1.0 SO:coordinate pb:3.0.1
@SQ SN:seq1 LN:1575
@SQ SN:seq2 LN:1584
B7_591:4:96:693:509 73 seq1 1 99 36M * 0 0 CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCG <<<<<<<<<<<<<<<;<<<<<<<<<5<<<<<;:<;7 MF:i:18 Aq:i:73 NM:i:0 UQ:i:0 H0:i:1 H1:i:0
diff --git a/tests/data/ex2.bam b/tests/data/ex2_copy.bam
similarity index 100%
copy from tests/data/ex2.bam
copy to tests/data/ex2_copy.bam
diff --git a/tests/data/ex2.bam.bai b/tests/data/ex2_copy.bam.bai
similarity index 100%
copy from tests/data/ex2.bam.bai
copy to tests/data/ex2_copy.bam.bai
diff --git a/tests/data/phi29.bam b/tests/data/phi29.bam
new file mode 100644
index 0000000..46176b6
Binary files /dev/null and b/tests/data/phi29.bam differ
diff --git a/tests/data/polymerase/consolidate.subread.dataset.xml b/tests/data/polymerase/consolidate.subread.dataset.xml
new file mode 100644
index 0000000..ca85a7a
--- /dev/null
+++ b/tests/data/polymerase/consolidate.subread.dataset.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="production.subreads.bam">
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="production.scraps.bam">
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="qStart" Value="4000" Operator=">"/>
+ <pbbase:Property Name="qStart" Value="5000" Operator="<"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
diff --git a/tests/data/polymerase/filtered_resources.subread.dataset.xml b/tests/data/polymerase/filtered_resources.subread.dataset.xml
new file mode 100644
index 0000000..e414e00
--- /dev/null
+++ b/tests/data/polymerase/filtered_resources.subread.dataset.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="./production.subreads.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="./production.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="./internal.subreads.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="./internal.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqRegionBamFile"
+ ResourceId="./production_hq.hqregion.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5199"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqScrapsBamFile"
+ ResourceId="./production_hq.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="zm" Value="100000" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
\ No newline at end of file
diff --git a/tests/data/polymerase/internal.hqregions.bam b/tests/data/polymerase/internal.hqregions.bam
new file mode 100644
index 0000000..e2f7f09
Binary files /dev/null and b/tests/data/polymerase/internal.hqregions.bam differ
diff --git a/tests/data/polymerase/internal.lqregions.bam b/tests/data/polymerase/internal.lqregions.bam
new file mode 100644
index 0000000..b8aeed3
Binary files /dev/null and b/tests/data/polymerase/internal.lqregions.bam differ
diff --git a/tests/data/polymerase/internal.polymerase.bam b/tests/data/polymerase/internal.polymerase.bam
index 5a9a3c2..2a01fc3 100644
Binary files a/tests/data/polymerase/internal.polymerase.bam and b/tests/data/polymerase/internal.polymerase.bam differ
diff --git a/tests/data/polymerase/internal.scraps.bam b/tests/data/polymerase/internal.scraps.bam
index ee501d3..2c2f3fc 100644
Binary files a/tests/data/polymerase/internal.scraps.bam and b/tests/data/polymerase/internal.scraps.bam differ
diff --git a/tests/data/polymerase/internal.scraps.bam.pbi b/tests/data/polymerase/internal.scraps.bam.pbi
new file mode 100644
index 0000000..2d0bad9
Binary files /dev/null and b/tests/data/polymerase/internal.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/internal.subreads.bam b/tests/data/polymerase/internal.subreads.bam
index a352448..c45ff59 100644
Binary files a/tests/data/polymerase/internal.subreads.bam and b/tests/data/polymerase/internal.subreads.bam differ
diff --git a/tests/data/polymerase/internal.subreads.bam.pbi b/tests/data/polymerase/internal.subreads.bam.pbi
new file mode 100644
index 0000000..8059402
Binary files /dev/null and b/tests/data/polymerase/internal.subreads.bam.pbi differ
diff --git a/tests/data/polymerase/internal_hq.hqregion.bam b/tests/data/polymerase/internal_hq.hqregion.bam
deleted file mode 100644
index e59134a..0000000
Binary files a/tests/data/polymerase/internal_hq.hqregion.bam and /dev/null differ
diff --git a/tests/data/polymerase/internal_hq.scraps.bam b/tests/data/polymerase/internal_hq.scraps.bam
deleted file mode 100644
index 154d2a1..0000000
Binary files a/tests/data/polymerase/internal_hq.scraps.bam and /dev/null differ
diff --git a/tests/data/polymerase/internal_polymerase.fasta b/tests/data/polymerase/internal_polymerase.fasta
deleted file mode 100644
index 9fb8832..0000000
--- a/tests/data/polymerase/internal_polymerase.fasta
+++ /dev/null
@@ -1,2 +0,0 @@
->m130615_051803_richard_c100541252550000001823084511241346_s1_p0/66617/2659_7034
-CCAGTTTCTCTCTCACGTCACACCCATGAAAAGCAATGGATCTCTCTCTACACAACACAGAGCAAAGCGGAGGTTGGAGCTGTGAAAAAAAGAGATTGAGAATCCAATCCTTAGACCTCTATTAAGTCGACAACACCGCAGAGAACAAGCATCCTATCTGTGTCATTATCCGGTGGTGTTGGAGAGTTTGATATTATACAACAATAAAATACATATAATAGTAGAAATTCATCCCACAACAAAATCTTTATTAGGGTAAGTAGAAGCTTATCTATGAAACTGGGTTCATAAAAAGTTAAGAAAGAATAACAATAGATATAAAGAGGAACACACAAGTTGAATTTTATCGCATATAGCAATTAATCAAGAGGAATTGCAGGTTTTAATCTCATGGCTTTGAGCCTACTATTCTATGAGCTTGGGCTTACACACAGTGTCTTGACCGTAGATGCAGATCTTCTCCTCCAAACACACTAAACACCACCTTCACAA [...]
diff --git a/tests/data/polymerase/multiple_resources.subread.dataset.xml b/tests/data/polymerase/multiple_resources.subread.dataset.xml
new file mode 100644
index 0000000..109535d
--- /dev/null
+++ b/tests/data/polymerase/multiple_resources.subread.dataset.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="./production.subreads.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="./production.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqRegionBamFile"
+ ResourceId="./production_hq.hqregion.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5199"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqScrapsBamFile"
+ ResourceId="./production_hq.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+</pbds:SubreadSet>
\ No newline at end of file
diff --git a/tests/data/polymerase/production.polymerase.bam b/tests/data/polymerase/production.polymerase.bam
index 9c192da..4c84b23 100644
Binary files a/tests/data/polymerase/production.polymerase.bam and b/tests/data/polymerase/production.polymerase.bam differ
diff --git a/tests/data/polymerase/production.scraps.bam b/tests/data/polymerase/production.scraps.bam
index c8c20df..a32bdfb 100644
Binary files a/tests/data/polymerase/production.scraps.bam and b/tests/data/polymerase/production.scraps.bam differ
diff --git a/tests/data/polymerase/production.scraps.bam.pbi b/tests/data/polymerase/production.scraps.bam.pbi
new file mode 100644
index 0000000..c3abd5c
Binary files /dev/null and b/tests/data/polymerase/production.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/production.subreads.bam b/tests/data/polymerase/production.subreads.bam
index 9c56583..452aad5 100644
Binary files a/tests/data/polymerase/production.subreads.bam and b/tests/data/polymerase/production.subreads.bam differ
diff --git a/tests/data/polymerase/production.subreads.bam.pbi b/tests/data/polymerase/production.subreads.bam.pbi
new file mode 100644
index 0000000..f504955
Binary files /dev/null and b/tests/data/polymerase/production.subreads.bam.pbi differ
diff --git a/tests/data/polymerase/production_hq.hqregion.bam b/tests/data/polymerase/production_hq.hqregion.bam
index 2993089..66d436b 100644
Binary files a/tests/data/polymerase/production_hq.hqregion.bam and b/tests/data/polymerase/production_hq.hqregion.bam differ
diff --git a/tests/data/polymerase/production_hq.hqregion.bam.pbi b/tests/data/polymerase/production_hq.hqregion.bam.pbi
new file mode 100644
index 0000000..5ffa37c
Binary files /dev/null and b/tests/data/polymerase/production_hq.hqregion.bam.pbi differ
diff --git a/tests/data/polymerase/production_hq.scraps.bam b/tests/data/polymerase/production_hq.scraps.bam
index 1c392fc..716e098 100644
Binary files a/tests/data/polymerase/production_hq.scraps.bam and b/tests/data/polymerase/production_hq.scraps.bam differ
diff --git a/tests/data/polymerase/production_hq.scraps.bam.pbi b/tests/data/polymerase/production_hq.scraps.bam.pbi
new file mode 100644
index 0000000..f719103
Binary files /dev/null and b/tests/data/polymerase/production_hq.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/internal.polymerase.bam b/tests/data/polymerase/whitelist/internal.polymerase.bam
new file mode 100644
index 0000000..015ba80
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.polymerase.bam differ
diff --git a/tests/data/polymerase/whitelist/internal.polymerase.bam.pbi b/tests/data/polymerase/whitelist/internal.polymerase.bam.pbi
new file mode 100644
index 0000000..3961e55
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.polymerase.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/internal.scraps.bam b/tests/data/polymerase/whitelist/internal.scraps.bam
new file mode 100644
index 0000000..3ff05a5
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.scraps.bam differ
diff --git a/tests/data/polymerase/whitelist/internal.scraps.bam.pbi b/tests/data/polymerase/whitelist/internal.scraps.bam.pbi
new file mode 100644
index 0000000..ea72b36
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/internal.subreads.bam b/tests/data/polymerase/whitelist/internal.subreads.bam
new file mode 100644
index 0000000..ed5ba3a
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.subreads.bam differ
diff --git a/tests/data/polymerase/whitelist/internal.subreads.bam.pbi b/tests/data/polymerase/whitelist/internal.subreads.bam.pbi
new file mode 100644
index 0000000..f584738
Binary files /dev/null and b/tests/data/polymerase/whitelist/internal.subreads.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/scrapless.scraps.bam b/tests/data/polymerase/whitelist/scrapless.scraps.bam
new file mode 100644
index 0000000..7b989c4
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.scraps.bam differ
diff --git a/tests/data/polymerase/whitelist/scrapless.scraps.bam.pbi b/tests/data/polymerase/whitelist/scrapless.scraps.bam.pbi
new file mode 100644
index 0000000..140af8a
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.scraps.bam.pbi differ
diff --git a/tests/data/polymerase/whitelist/scrapless.subreads.bam b/tests/data/polymerase/whitelist/scrapless.subreads.bam
new file mode 100644
index 0000000..739b3b4
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.subreads.bam differ
diff --git a/tests/data/polymerase/whitelist/scrapless.subreads.bam.pbi b/tests/data/polymerase/whitelist/scrapless.subreads.bam.pbi
new file mode 100644
index 0000000..19ce255
Binary files /dev/null and b/tests/data/polymerase/whitelist/scrapless.subreads.bam.pbi differ
diff --git a/tests/data/relative/a/test.bam b/tests/data/relative/a/test.bam
new file mode 100644
index 0000000..26d72fb
Binary files /dev/null and b/tests/data/relative/a/test.bam differ
diff --git a/tests/data/relative/b/test1.bam b/tests/data/relative/b/test1.bam
new file mode 100644
index 0000000..26d72fb
Binary files /dev/null and b/tests/data/relative/b/test1.bam differ
diff --git a/tests/data/relative/b/test2.bam b/tests/data/relative/b/test2.bam
new file mode 100644
index 0000000..26d72fb
Binary files /dev/null and b/tests/data/relative/b/test2.bam differ
diff --git a/tests/data/relative/relative.fofn b/tests/data/relative/relative.fofn
new file mode 100644
index 0000000..755c589
--- /dev/null
+++ b/tests/data/relative/relative.fofn
@@ -0,0 +1,3 @@
+a/test.bam
+b/test1.bam
+b/test2.bam
diff --git a/tests/data/relative/relative.xml b/tests/data/relative/relative.xml
new file mode 100644
index 0000000..0e78fe4
--- /dev/null
+++ b/tests/data/relative/relative.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./a/test.bam" />
+ <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./b/test1.bam" />
+ <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./b/test2.bam"/>
+ </pbbase:ExternalResources>
+</pbds:SubreadSet>
diff --git a/tests/data/relative/relative2.fofn b/tests/data/relative/relative2.fofn
new file mode 100644
index 0000000..f1969ac
--- /dev/null
+++ b/tests/data/relative/relative2.fofn
@@ -0,0 +1,4 @@
+a/test.bam
+b/test1.bam
+b/test2.bam
+relative.xml
diff --git a/tests/data/test_group_query/test1.bam b/tests/data/test_group_query/test1.bam
index f92d6bf..5673abc 100644
Binary files a/tests/data/test_group_query/test1.bam and b/tests/data/test_group_query/test1.bam differ
diff --git a/tests/data/test_group_query/test2.bam b/tests/data/test_group_query/test2.bam
index 53c1d8f..565b224 100644
Binary files a/tests/data/test_group_query/test2.bam and b/tests/data/test_group_query/test2.bam differ
diff --git a/tests/data/test_group_query/test2.bam.pbi b/tests/data/test_group_query/test2.bam.pbi
index 1b0c1b9..384ad28 100644
Binary files a/tests/data/test_group_query/test2.bam.pbi and b/tests/data/test_group_query/test2.bam.pbi differ
diff --git a/tests/data/test_group_query/test3.bam b/tests/data/test_group_query/test3.bam
index 5b8548b..3b1e21b 100644
Binary files a/tests/data/test_group_query/test3.bam and b/tests/data/test_group_query/test3.bam differ
diff --git a/tests/data/truncated.bam b/tests/data/truncated.bam
new file mode 100644
index 0000000..f40e5f1
Binary files /dev/null and b/tests/data/truncated.bam differ
diff --git a/tests/files.cmake b/tests/files.cmake
index ea69cac..27cc8d4 100644
--- a/tests/files.cmake
+++ b/tests/files.cmake
@@ -18,26 +18,34 @@ set( PacBioBAMTest_CPP
${PacBioBAM_TestsDir}/src/test_BamRecordImplVariableData.cpp
${PacBioBAM_TestsDir}/src/test_BamRecordMapping.cpp
${PacBioBAM_TestsDir}/src/test_BamWriter.cpp
+ ${PacBioBAM_TestsDir}/src/test_BarcodeQuery.cpp
${PacBioBAM_TestsDir}/src/test_Cigar.cpp
+ ${PacBioBAM_TestsDir}/src/test_Compare.cpp
${PacBioBAM_TestsDir}/src/test_DataSetCore.cpp
${PacBioBAM_TestsDir}/src/test_DataSetIO.cpp
${PacBioBAM_TestsDir}/src/test_DataSetQuery.cpp
${PacBioBAM_TestsDir}/src/test_DataSetXsd.cpp
${PacBioBAM_TestsDir}/src/test_EndToEnd.cpp
${PacBioBAM_TestsDir}/src/test_EntireFileQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_FileUtils.cpp
${PacBioBAM_TestsDir}/src/test_Frames.cpp
${PacBioBAM_TestsDir}/src/test_GenomicIntervalQuery.cpp
- ${PacBioBAM_TestsDir}/src/test_GroupQuery.cpp
${PacBioBAM_TestsDir}/src/test_IndexedFastaReader.cpp
${PacBioBAM_TestsDir}/src/test_Intervals.cpp
${PacBioBAM_TestsDir}/src/test_PacBioIndex.cpp
+ ${PacBioBAM_TestsDir}/src/test_PbiFilter.cpp
+ ${PacBioBAM_TestsDir}/src/test_PbiFilterQuery.cpp
${PacBioBAM_TestsDir}/src/test_PolymeraseStitching.cpp
+ ${PacBioBAM_TestsDir}/src/test_QNameQuery.cpp
${PacBioBAM_TestsDir}/src/test_QualityValues.cpp
+ ${PacBioBAM_TestsDir}/src/test_ReadAccuracyQuery.cpp
${PacBioBAM_TestsDir}/src/test_ReadGroupInfo.cpp
${PacBioBAM_TestsDir}/src/test_SequenceUtils.cpp
+ ${PacBioBAM_TestsDir}/src/test_StringUtils.cpp
+ ${PacBioBAM_TestsDir}/src/test_SubreadLengthQuery.cpp
${PacBioBAM_TestsDir}/src/test_Tags.cpp
${PacBioBAM_TestsDir}/src/test_TimeUtils.cpp
# ${PacBioBAM_TestsDir}/src/test_UnmappedReadsQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_VirtualPolymeraseCompositeReader.cpp
${PacBioBAM_TestsDir}/src/test_ZmwQuery.cpp
-
)
diff --git a/tests/scripts/cram.py b/tests/scripts/cram.py
new file mode 100755
index 0000000..20c4681
--- /dev/null
+++ b/tests/scripts/cram.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python
+"""Functional testing framework for command line applications"""
+
+import difflib
+import itertools
+import optparse
+import os
+import re
+import signal
+import subprocess
+import sys
+import shutil
+import time
+import tempfile
+
+try:
+ import configparser
+except ImportError:
+ import ConfigParser as configparser
+
+__all__ = ['main', 'test']
+
+def findtests(paths):
+ """Yield tests in paths in sorted order"""
+ for p in paths:
+ if os.path.isdir(p):
+ for root, dirs, files in os.walk(p):
+ if os.path.basename(root).startswith('.'):
+ continue
+ for f in sorted(files):
+ if not f.startswith('.') and f.endswith('.t'):
+ yield os.path.normpath(os.path.join(root, f))
+ else:
+ yield os.path.normpath(p)
+
+def regex(pattern, s):
+ """Match a regular expression or return False if invalid.
+
+ >>> [bool(regex(r, 'foobar')) for r in ('foo.*', '***')]
+ [True, False]
+ """
+ try:
+ return re.match(pattern + r'\Z', s)
+ except re.error:
+ return False
+
+def glob(el, l):
+ r"""Match a glob-like pattern.
+
+ The only supported special characters are * and ?. Escaping is
+ supported.
+
+ >>> bool(glob(r'\* \\ \? fo?b*', '* \\ ? foobar'))
+ True
+ """
+ i, n = 0, len(el)
+ res = ''
+ while i < n:
+ c = el[i]
+ i += 1
+ if c == '\\' and el[i] in '*?\\':
+ res += el[i - 1:i + 1]
+ i += 1
+ elif c == '*':
+ res += '.*'
+ elif c == '?':
+ res += '.'
+ else:
+ res += re.escape(c)
+ return regex(res, l)
+
+annotations = {'glob': glob, 're': regex}
+
+def match(el, l):
+ """Match patterns based on annotations"""
+ for k in annotations:
+ ann = ' (%s)\n' % k
+ if el.endswith(ann) and annotations[k](el[:-len(ann)], l[:-1]):
+ return True
+ return False
+
+class SequenceMatcher(difflib.SequenceMatcher, object):
+ """Like difflib.SequenceMatcher, but matches globs and regexes"""
+
+ def find_longest_match(self, alo, ahi, blo, bhi):
+ """Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
+ # SequenceMatcher uses find_longest_match() to slowly whittle down
+ # the differences between a and b until it has each matching block.
+ # Because of this, we can end up doing the same matches many times.
+ matches = []
+ for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
+ if el != line and match(el, line):
+ # This fools the superclass's method into thinking that the
+ # regex/glob in a is identical to b by replacing a's line (the
+ # expected output) with b's line (the actual output).
+ self.a[alo + n] = line
+ matches.append((n, el))
+ ret = super(SequenceMatcher, self).find_longest_match(alo, ahi,
+ blo, bhi)
+ # Restore the lines replaced above. Otherwise, the diff output
+ # would seem to imply that the tests never had any regexes/globs.
+ for n, el in matches:
+ self.a[alo + n] = el
+ return ret
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+ tofiledate='', n=3, lineterm='\n', matcher=SequenceMatcher):
+ """Compare two sequences of lines; generate the delta as a unified diff.
+
+ This is like difflib.unified_diff(), but allows custom matchers.
+ """
+ started = False
+ for group in matcher(None, a, b).get_grouped_opcodes(n):
+ if not started:
+ fromdate = fromfiledate and '\t%s' % fromfiledate or ''
+ todate = fromfiledate and '\t%s' % tofiledate or ''
+ yield '--- %s%s%s' % (fromfile, fromdate, lineterm)
+ yield '+++ %s%s%s' % (tofile, todate, lineterm)
+ started = True
+ i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+ yield "@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1,
+ lineterm)
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in a[i1:i2]:
+ yield ' ' + line
+ continue
+ if tag == 'replace' or tag == 'delete':
+ for line in a[i1:i2]:
+ yield '-' + line
+ if tag == 'replace' or tag == 'insert':
+ for line in b[j1:j2]:
+ yield '+' + line
+
+needescape = re.compile(r'[\x00-\x09\x0b-\x1f\x7f-\xff]').search
+escapesub = re.compile(r'[\x00-\x09\x0b-\x1f\\\x7f-\xff]').sub
+escapemap = dict((chr(i), r'\x%02x' % i) for i in range(256))
+escapemap.update({'\\': '\\\\', '\r': r'\r', '\t': r'\t'})
+
+def escape(s):
+ """Like the string-escape codec, but doesn't escape quotes"""
+ return escapesub(lambda m: escapemap[m.group(0)], s[:-1]) + ' (esc)\n'
+
+def makeresetsigpipe():
+ """Make a function to reset SIGPIPE to SIG_DFL (for use in subprocesses).
+
+ Doing subprocess.Popen(..., preexec_fn=makeresetsigpipe()) will prevent
+ Python's SIGPIPE handler (SIG_IGN) from being inherited by the
+ child process.
+ """
+ if sys.platform == 'win32' or getattr(signal, 'SIGPIPE', None) is None:
+ return None
+ return lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+def test(path, shell, indent=2):
+ """Run test at path and return input, output, and diff.
+
+ This returns a 3-tuple containing the following:
+
+ (list of lines in test, same list with actual output, diff)
+
+ diff is a generator that yields the diff between the two lists.
+
+ If a test exits with return code 80, the actual output is set to
+ None and diff is set to [].
+ """
+ indent = ' ' * indent
+ cmdline = '%s$ ' % indent
+ conline = '%s> ' % indent
+
+ f = open(path)
+ abspath = os.path.abspath(path)
+ env = os.environ.copy()
+ env['TESTDIR'] = os.path.dirname(abspath)
+ env['TESTFILE'] = os.path.basename(abspath)
+ p = subprocess.Popen([shell, '-'], bufsize=-1, stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+ universal_newlines=True, env=env,
+ preexec_fn=makeresetsigpipe(),
+ close_fds=os.name == 'posix')
+ salt = 'CRAM%s' % time.time()
+
+ after = {}
+ refout, postout = [], []
+ i = pos = prepos = -1
+ stdin = []
+ for i, line in enumerate(f):
+ refout.append(line)
+ if line.startswith(cmdline):
+ after.setdefault(pos, []).append(line)
+ prepos = pos
+ pos = i
+ stdin.append('echo "\n%s %s $?"\n' % (salt, i))
+ stdin.append(line[len(cmdline):])
+ elif line.startswith(conline):
+ after.setdefault(prepos, []).append(line)
+ stdin.append(line[len(conline):])
+ elif not line.startswith(indent):
+ after.setdefault(pos, []).append(line)
+ stdin.append('echo "\n%s %s $?"\n' % (salt, i + 1))
+
+ output = p.communicate(input=''.join(stdin))[0]
+ if p.returncode == 80:
+ return (refout, None, [])
+
+ # Add a trailing newline to the input script if it's missing.
+ if refout and not refout[-1].endswith('\n'):
+ refout[-1] += '\n'
+
+ # We use str.split instead of splitlines to get consistent
+ # behavior between Python 2 and 3. In 3, we use unicode strings,
+ # which has more line breaks than \n and \r.
+ pos = -1
+ ret = 0
+ for i, line in enumerate(output[:-1].split('\n')):
+ line += '\n'
+ if line.startswith(salt):
+ presalt = postout.pop()
+ if presalt != '%s\n' % indent:
+ postout.append(presalt[:-1] + ' (no-eol)\n')
+ ret = int(line.split()[2])
+ if ret != 0:
+ postout.append('%s[%s]\n' % (indent, ret))
+ postout += after.pop(pos, [])
+ pos = int(line.split()[1])
+ else:
+ if needescape(line):
+ line = escape(line)
+ postout.append(indent + line)
+ postout += after.pop(pos, [])
+
+ diffpath = os.path.basename(abspath)
+ diff = unified_diff(refout, postout, diffpath, diffpath + '.err')
+ for firstline in diff:
+ return refout, postout, itertools.chain([firstline], diff)
+ return refout, postout, []
+
+def prompt(question, answers, auto=None):
+ """Write a prompt to stdout and ask for answer in stdin.
+
+ answers should be a string, with each character a single
+ answer. An uppercase letter is considered the default answer.
+
+ If an invalid answer is given, this asks again until it gets a
+ valid one.
+
+ If auto is set, the question is answered automatically with the
+ specified value.
+ """
+ default = [c for c in answers if c.isupper()]
+ while True:
+ sys.stdout.write('%s [%s] ' % (question, answers))
+ sys.stdout.flush()
+ if auto is not None:
+ sys.stdout.write(auto + '\n')
+ sys.stdout.flush()
+ return auto
+
+ answer = sys.stdin.readline().strip().lower()
+ if not answer and default:
+ return default[0]
+ elif answer and answer in answers.lower():
+ return answer
+
+def log(msg=None, verbosemsg=None, verbose=False):
+ """Write msg to standard out and flush.
+
+ If verbose is True, write verbosemsg instead.
+ """
+ if verbose:
+ msg = verbosemsg
+ if msg:
+ sys.stdout.write(msg)
+ sys.stdout.flush()
+
+def patch(cmd, diff, path):
+ """Run echo [lines from diff] | cmd -p0"""
+ p = subprocess.Popen([cmd, '-p0'], bufsize=-1, stdin=subprocess.PIPE,
+ universal_newlines=True,
+ preexec_fn=makeresetsigpipe(),
+ cwd=path,
+ close_fds=os.name == 'posix')
+ p.communicate(''.join(diff))
+ return p.returncode == 0
+
+def run(paths, tmpdir, shell, quiet=False, verbose=False, patchcmd=None,
+ answer=None, indent=2):
+ """Run tests in paths in tmpdir.
+
+ If quiet is True, diffs aren't printed. If verbose is True,
+ filenames and status information are printed.
+
+ If patchcmd is set, a prompt is written to stdout asking if
+ changed output should be merged back into the original test. The
+ answer is read from stdin. If 'y', the test is patched using patch
+ based on the changed output.
+ """
+ cwd = os.getcwd()
+ seen = set()
+ basenames = set()
+ skipped = failed = 0
+ for i, path in enumerate(findtests(paths)):
+ abspath = os.path.abspath(path)
+ if abspath in seen:
+ continue
+ seen.add(abspath)
+
+ log(None, '%s: ' % path, verbose)
+ if not os.stat(abspath).st_size:
+ skipped += 1
+ log('s', 'empty\n', verbose)
+ else:
+ basename = os.path.basename(path)
+ if basename in basenames:
+ basename = '%s-%s' % (basename, i)
+ else:
+ basenames.add(basename)
+ testdir = os.path.join(tmpdir, basename)
+ os.mkdir(testdir)
+ try:
+ os.chdir(testdir)
+ refout, postout, diff = test(abspath, shell, indent)
+ finally:
+ os.chdir(cwd)
+
+ errpath = abspath + '.err'
+ if postout is None:
+ skipped += 1
+ log('s', 'skipped\n', verbose)
+ elif not diff:
+ log('.', 'passed\n', verbose)
+ if os.path.exists(errpath):
+ os.remove(errpath)
+ else:
+ failed += 1
+ log('!', 'failed\n', verbose)
+ if not quiet:
+ log('\n', None, verbose)
+ errfile = open(errpath, 'w')
+ try:
+ for line in postout:
+ errfile.write(line)
+ finally:
+ errfile.close()
+ if not quiet:
+ if patchcmd:
+ diff = list(diff)
+ for line in diff:
+ log(line)
+ if (patchcmd and
+ prompt('Accept this change?', 'yN', answer) == 'y'):
+ if patch(patchcmd, diff, os.path.dirname(abspath)):
+ log(None, '%s: merged output\n' % path, verbose)
+ os.remove(errpath)
+ else:
+ log('%s: merge failed\n' % path)
+ log('\n', None, verbose)
+ log('# Ran %s tests, %s skipped, %s failed.\n'
+ % (len(seen), skipped, failed))
+ return bool(failed)
+
+def which(cmd):
+ """Return the patch to cmd or None if not found"""
+ for p in os.environ['PATH'].split(os.pathsep):
+ path = os.path.join(p, cmd)
+ if os.path.isfile(path) and os.access(path, os.X_OK):
+ return os.path.abspath(path)
+ return None
+
+def expandpath(path):
+ """Expands ~ and environment variables in path"""
+ return os.path.expanduser(os.path.expandvars(path))
+
+class OptionParser(optparse.OptionParser):
+ """Like optparse.OptionParser, but supports setting values through
+ CRAM= and .cramrc."""
+
+ def __init__(self, *args, **kwargs):
+ self._config_opts = {}
+ optparse.OptionParser.__init__(self, *args, **kwargs)
+
+ def add_option(self, *args, **kwargs):
+ option = optparse.OptionParser.add_option(self, *args, **kwargs)
+ if option.dest and option.dest != 'version':
+ key = option.dest.replace('_', '-')
+ self._config_opts[key] = option.action == 'store_true'
+ return option
+
+ def parse_args(self, args=None, values=None):
+ config = configparser.RawConfigParser()
+ config.read(expandpath(os.environ.get('CRAMRC', '.cramrc')))
+ defaults = {}
+ for key, isbool in self._config_opts.items():
+ try:
+ if isbool:
+ try:
+ value = config.getboolean('cram', key)
+ except ValueError:
+ value = config.get('cram', key)
+ self.error('--%s: invalid boolean value: %r'
+ % (key, value))
+ else:
+ value = config.get('cram', key)
+ except (configparser.NoSectionError, configparser.NoOptionError):
+ pass
+ else:
+ defaults[key] = value
+ self.set_defaults(**defaults)
+
+ eargs = os.environ.get('CRAM', '').strip()
+ if eargs:
+ import shlex
+ args = args or []
+ args += shlex.split(eargs)
+
+ try:
+ return optparse.OptionParser.parse_args(self, args, values)
+ except optparse.OptionValueError:
+ self.error(str(sys.exc_info()[1]))
+
+def main(args):
+ """Main entry point.
+
+ args should not contain the script name.
+ """
+ p = OptionParser(usage='cram [OPTIONS] TESTS...', prog='cram')
+ p.add_option('-V', '--version', action='store_true',
+ help='show version information and exit')
+ p.add_option('-q', '--quiet', action='store_true',
+ help="don't print diffs")
+ p.add_option('-v', '--verbose', action='store_true',
+ help='show filenames and test status')
+ p.add_option('-i', '--interactive', action='store_true',
+ help='interactively merge changed test output')
+ p.add_option('-y', '--yes', action='store_true',
+ help='answer yes to all questions')
+ p.add_option('-n', '--no', action='store_true',
+ help='answer no to all questions')
+ p.add_option('-E', '--preserve-env', action='store_true',
+ help="don't reset common environment variables")
+ p.add_option('--keep-tmpdir', action='store_true',
+ help='keep temporary directories')
+ p.add_option('--shell', action='store', default='/bin/sh', metavar='PATH',
+ help='shell to use for running tests')
+ p.add_option('--indent', action='store', default=2, metavar='NUM',
+ type='int', help='number of spaces to use for indentation')
+ opts, paths = p.parse_args(args)
+
+ if opts.version:
+ sys.stdout.write("""Cram CLI testing framework (version 0.6)
+
+Copyright (C) 2010-2011 Brodie Rao <brodie at bitheap.org> and others
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+""")
+ return
+
+ conflicts = [('-y', opts.yes, '-n', opts.no),
+ ('-q', opts.quiet, '-i', opts.interactive)]
+ for s1, o1, s2, o2 in conflicts:
+ if o1 and o2:
+ sys.stderr.write('options %s and %s are mutually exclusive\n'
+ % (s1, s2))
+ return 2
+
+ patchcmd = None
+ if opts.interactive:
+ patchcmd = which('patch')
+ if not patchcmd:
+ sys.stderr.write('patch(1) required for -i\n')
+ return 2
+
+ if not paths:
+ sys.stdout.write(p.get_usage())
+ return 2
+
+ badpaths = [path for path in paths if not os.path.exists(path)]
+ if badpaths:
+ sys.stderr.write('no such file: %s\n' % badpaths[0])
+ return 2
+
+ tmpdir = os.environ['CRAMTMP'] = tempfile.mkdtemp('', 'cramtests-')
+ proctmp = os.path.join(tmpdir, 'tmp')
+ os.mkdir(proctmp)
+ for s in ('TMPDIR', 'TEMP', 'TMP'):
+ os.environ[s] = proctmp
+
+ if not opts.preserve_env:
+ for s in ('LANG', 'LC_ALL', 'LANGUAGE'):
+ os.environ[s] = 'C'
+ os.environ['TZ'] = 'GMT'
+ os.environ['CDPATH'] = ''
+ os.environ['COLUMNS'] = '80'
+ os.environ['GREP_OPTIONS'] = ''
+
+ if opts.yes:
+ answer = 'y'
+ elif opts.no:
+ answer = 'n'
+ else:
+ answer = None
+
+ try:
+ return run(paths, tmpdir, opts.shell, opts.quiet, opts.verbose,
+ patchcmd, answer, opts.indent)
+ finally:
+ if opts.keep_tmpdir:
+ log('# Kept temporary directory: %s\n' % tmpdir)
+ else:
+ shutil.rmtree(tmpdir)
+
+if __name__ == '__main__':
+ try:
+ sys.exit(main(sys.argv[1:]))
+ except KeyboardInterrupt:
+ pass
diff --git a/tests/src/R/test_pbbam.sh.in b/tests/src/R/test_pbbam.sh.in
index af6eb89..458b149 100644
--- a/tests/src/R/test_pbbam.sh.in
+++ b/tests/src/R/test_pbbam.sh.in
@@ -37,9 +37,18 @@
#! /usr/bin/sh
+GENERATED_BAM=@PacBioBAM_TestsDir@/data/generated.bam
+
+touch $GENERATED_BAM
+chmod 644 $GENERATED_BAM
+
R --slave --no-save < @RTestRootDir@/test_pbbam.R --args \
@RTestRootDir@/tests \
@PacBioBAM_RLibDir@ \
@PacBioBAM_TestsDir@/data
+
+STATUS=$?
-rm @PacBioBAM_TestsDir@/data/generated.bam
+rm $GENERATED_BAM
+
+exit $STATUS
\ No newline at end of file
diff --git a/tests/src/R/tests/test_Accuracy.R b/tests/src/R/tests/test_Accuracy.R
index bc29eb0..e7e98e6 100644
--- a/tests/src/R/tests/test_Accuracy.R
+++ b/tests/src/R/tests/test_Accuracy.R
@@ -37,17 +37,26 @@
test_case("Accuracy_Clamp", {
- a_zero <- Accuracy(0)
- a_neg <- Accuracy(-1)
- a_min <- Accuracy(0)
- a_normal <- Accuracy(300)
- a_max <- Accuracy(1000)
- a_tooLarge <- Accuracy(2000)
+ a_zero <- Accuracy(0.0)
+ a_neg <- Accuracy(-0.5)
+ a_min <- Accuracy(0.0)
+ a_normal <- Accuracy(0.9)
+ a_max <- Accuracy(1.0)
+ a_tooLarge <- Accuracy(1.1)
- assertEqual(0L, a_zero$ToInt())
- assertEqual(0L, a_neg$ToInt())
- assertEqual(0L, a_min$ToInt())
- assertEqual(300L, a_normal$ToInt())
- assertEqual(1000L, a_max$ToInt())
- assertEqual(1000L, a_tooLarge$ToInt())
+ tolerance = 1e-5
+
+ assertTrue( abs(0.0 - a_zero$ToFloat()) <= tolerance )
+ assertTrue( abs(0.0 - a_neg$ToFloat()) <= tolerance )
+ assertTrue( abs(0.0 - a_min$ToFloat()) <= tolerance )
+ assertTrue( abs(0.9 - a_normal$ToFloat()) <= tolerance )
+ assertTrue( abs(1.0 - a_max$ToFloat()) <= tolerance )
+ assertTrue( abs(1.0 - a_tooLarge$ToFloat()) <= tolerance )
+
+ # assertEqual(0.0, a_zero$ToFloat())
+ # assertEqual(0.0, a_neg$ToFloat())
+ # assertEqual(0.0, a_min$ToFloat())
+ # assertEqual(0.9, a_normal$ToFloat())
+ # assertEqual(1.0, a_max$ToFloat())
+ # assertEqual(1.0, a_tooLarge$ToFloat())
})
diff --git a/tests/src/R/tests/test_BamFile.R b/tests/src/R/tests/test_BamFile.R
index 41e419c..93eea2f 100644
--- a/tests/src/R/tests/test_BamFile.R
+++ b/tests/src/R/tests/test_BamFile.R
@@ -61,11 +61,10 @@ test_case("BamFile_Ctor", {
result <- tryCatch(
{
f <- BamFile(fn)
- assertFalse(f$IsPacBioBAM())
invisible()
},
warning = function(w) {
- assertTrue(TRUE)
+ assertTrue(FALSE)
invisible()
},
error = function(e) {
diff --git a/tests/src/R/tests/test_BamHeader.R b/tests/src/R/tests/test_BamHeader.R
index eab44e5..b0008ea 100644
--- a/tests/src/R/tests/test_BamHeader.R
+++ b/tests/src/R/tests/test_BamHeader.R
@@ -100,7 +100,7 @@ test_case("BamHeader_Defaults", {
test_case("BamHeader_Decode", {
- text <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3",
+ text <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1",
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo",
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo",
"@RG\tID:rg1\tSM:control",
@@ -116,7 +116,7 @@ test_case("BamHeader_Decode", {
assertEqual("1.1", header$Version())
assertEqual("queryname", header$SortOrder())
- assertEqual("3.0b3", header$PacBioBamVersion())
+ assertEqual("3.0.1", header$PacBioBamVersion())
assertEqual(3L, header$ReadGroups()$size())
assertTrue(header$HasReadGroup("rg1"))
@@ -146,7 +146,7 @@ test_case("BamHeader_Decode", {
test_case("BamHeader_Encode", {
- expectedText <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3",
+ expectedText <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1",
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo",
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo",
"@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control",
@@ -179,7 +179,7 @@ test_case("BamHeader_Encode", {
header <- BamHeader()
header$Version("1.1")
header$SortOrder("queryname")
- header$PacBioBamVersion("3.0b3")
+ header$PacBioBamVersion("3.0.1")
header$AddReadGroup(rg1)
header$AddReadGroup(rg2)
header$AddReadGroup(rg3)
diff --git a/tests/src/R/tests/test_EndToEnd.R b/tests/src/R/tests/test_EndToEnd.R
index 04a06ac..ce0a2eb 100644
--- a/tests/src/R/tests/test_EndToEnd.R
+++ b/tests/src/R/tests/test_EndToEnd.R
@@ -41,17 +41,20 @@ originalNames <-function(inputFn, generatedFn) {
{
file <- BamFile(inputFn)
writer <- BamWriter(generatedFn, file$Header())
- entireFile <- EntireFileQuery(file)
+
+ ds <- DataSet(file)
+ entireFile <- EntireFileQuery(ds)
names_in <- list()
iter <- entireFile$begin()
end <- entireFile$end()
while ( iter$'__ne__'(end) ) {
- record <- iter$value()
+ record <- iter$value()
names_in <- c(names_in, record$FullName())
- writer$Write(record)
+ writer$Write(record)
iter$incr()
}
+ writer$TryFlush()
return(names_in)
},
error = function(e) {
@@ -64,18 +67,18 @@ originalNames <-function(inputFn, generatedFn) {
generatedNames <- function(generatedFn) {
- result <- tryCatch(
- {
- file <- BamFile(generatedFn)
- entireFile <- EntireFileQuery(file)
+ result <- tryCatch(
+ {
+ ds <- DataSet(generatedFn)
+ entireFile <- EntireFileQuery(ds)
names_out <- list()
- iter <- entireFile$begin()
- end <- entireFile$end()
- while ( iter$'__ne__'(end) ) {
- names_out <- c(names_out, iter$FullName())
- iter$incr()
- }
+ iter <- entireFile$begin()
+ end <- entireFile$end()
+ while ( iter$'__ne__'(end) ) {
+ names_out <- c(names_out, iter$FullName())
+ iter$incr()
+ }
return(names_out)
},
error = function(e) {
@@ -86,7 +89,7 @@ generatedNames <- function(generatedFn) {
return(result)
}
-test_case("EndToEnd_Placeholder", {
+test_case("EndToEnd_CopyFileAndReadBack", {
inputFn <- paste(test_data_path, "ex2.bam", sep="/")
generatedFn <- paste(test_data_path, "generated.bam", sep="/")
@@ -94,9 +97,9 @@ test_case("EndToEnd_Placeholder", {
# loop over original file, store names, write to generated file
names_in <- originalNames(inputFn, generatedFn)
- # read names from new file
- names_out <- generatedNames(generatedFn)
-
- # ensure equal
- assertEqual(names_in, names_out)
+ # read names from new file
+ names_out <- generatedNames(generatedFn)
+
+ # ensure equal
+ assertEqual(names_in, names_out)
})
diff --git a/tests/src/R/tests/test_Intervals.R b/tests/src/R/tests/test_Intervals.R
index 5160449..0071750 100644
--- a/tests/src/R/tests/test_Intervals.R
+++ b/tests/src/R/tests/test_Intervals.R
@@ -216,20 +216,20 @@ test_case("Intervals_Length",{
test_case("GenomicIntervals_Ctors", {
empty <- GenomicInterval()
- normal <- GenomicInterval(0, 100, 200)
+ normal <- GenomicInterval("seq1", 100, 200)
- assertEqual(-1L, empty$Id())
+ assertEqual("", empty$Name())
assertEqual(0L, empty$Start())
assertEqual(0L, empty$Stop())
- assertEqual(0L, normal$Id())
- assertEqual(100L, normal$Start())
- assertEqual(200L, normal$Stop())
+ assertEqual("seq1", normal$Name())
+ assertEqual(100L, normal$Start())
+ assertEqual(200L, normal$Stop())
})
test_case("GenomicIntervals_Copy", {
- a <- GenomicInterval(1, 10, 20)
+ a <- GenomicInterval("seq1", 10, 20)
b <- GenomicInterval(a)
c <- a
@@ -241,10 +241,10 @@ test_case("GenomicIntervals_Copy", {
test_case("GenomicIntervals_Modifiers", {
- a <- GenomicInterval(1, 10, 20)
+ a <- GenomicInterval("seq1", 10, 20)
b <- GenomicInterval(a)
- b$Id(5)
+ b$Name("seq5")
b$Start(2)
b$Stop(10)
@@ -253,11 +253,11 @@ test_case("GenomicIntervals_Modifiers", {
assertNotEqual(a, b)
- assertEqual(5L, b$Id())
+ assertEqual("seq5", b$Name())
assertEqual(2L, b$Start())
assertEqual(10L, b$Stop())
- assertEqual(a$Id(), c$Id())
+ assertEqual(a$Name(), c$Name())
# TODO: fix this to work with == or *anything* cleaner
assertTrue(b$Interval()$'__eq__'(c$Interval()))
@@ -265,12 +265,12 @@ test_case("GenomicIntervals_Modifiers", {
test_case("GenomicIntervals_Cover", {
- a <- GenomicInterval(0,2,4)
- b <- GenomicInterval(0,3,5)
- c <- GenomicInterval(0,6,8)
- d <- GenomicInterval(0,1,7)
- e <- GenomicInterval(0,5,8)
- f <- GenomicInterval(1,3,5) # same as b, different ref
+ a <- GenomicInterval("seq1",2,4)
+ b <- GenomicInterval("seq1",3,5)
+ c <- GenomicInterval("seq1",6,8)
+ d <- GenomicInterval("seq1",1,7)
+ e <- GenomicInterval("seq1",5,8)
+ f <- GenomicInterval("seq2",3,5) # same as b, different ref
# 0123456789
# a --
@@ -315,16 +315,16 @@ test_case("GenomicIntervals_Cover", {
test_case("GenomicIntervals_Validity", {
a <- GenomicInterval() # default
- b <- GenomicInterval(0,0,0) # valid id, start == stop (zero)
- c <- GenomicInterval(0,4,4) # valid id, start == stop (non-zero)
- d <- GenomicInterval(0,0,1) # valid id, start < stop (start == zero) OK
- e <- GenomicInterval(0,4,5) # valid id, start < stop (start > zero) OK
- f <- GenomicInterval(0,5,4) # valid id, start > stop
- g <- GenomicInterval(-1,0,0) # invalid id, start == stop (zero)
- h <- GenomicInterval(-1,4,4) # invalid id, start == stop (non-zero)
- i <- GenomicInterval(-1,0,1) # invalid id, start < stop (start == zero)
- j <- GenomicInterval(-1,4,5) # invalid id, start < stop (start > zero)
- k <- GenomicInterval(-1,5,4) # invalid id, start > stop
+ b <- GenomicInterval("seq1",0,0) # valid id, start == stop (zero)
+ c <- GenomicInterval("seq1",4,4) # valid id, start == stop (non-zero)
+ d <- GenomicInterval("seq",0,1) # valid id, start < stop (start == zero) OK
+ e <- GenomicInterval("seq1",4,5) # valid id, start < stop (start > zero) OK
+ f <- GenomicInterval("seq1",5,4) # valid id, start > stop
+ g <- GenomicInterval("",0,0) # invalid id, start == stop (zero)
+ h <- GenomicInterval("",4,4) # invalid id, start == stop (non-zero)
+ i <- GenomicInterval("",0,1) # invalid id, start < stop (start == zero)
+ j <- GenomicInterval("",4,5) # invalid id, start < stop (start > zero)
+ k <- GenomicInterval("",5,4) # invalid id, start > stop
assertTrue(d$IsValid())
assertTrue(e$IsValid())
diff --git a/tests/src/R/tests/test_PolymeraseStitching.R b/tests/src/R/tests/test_PolymeraseStitching.R
new file mode 100644
index 0000000..3e2a943
--- /dev/null
+++ b/tests/src/R/tests/test_PolymeraseStitching.R
@@ -0,0 +1,427 @@
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF
+# USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+compareContainers <- function(c1, c2) {
+
+ assertEqual(length(c1), length(c2))
+
+ numElements <- length(c1)
+ for (i in 1:numElements)
+ assertEqual(c1[i], c2[i])
+}
+
+compareFrames <- function(f1, f2) {
+
+ d1 <- f1$Data()
+ d2 <- f2$Data()
+ compareContainers(d1, d2)
+}
+
+compareRecords <- function(b1, b2) {
+
+ assertTrue(b1$HasDeletionQV())
+ assertTrue(b1$HasDeletionTag())
+ assertTrue(b1$HasInsertionQV())
+ assertTrue(b1$HasMergeQV())
+ assertTrue(b1$HasSubstitutionQV())
+ assertTrue(b1$HasSubstitutionTag())
+ assertTrue(b1$HasLabelQV())
+ assertTrue(b1$HasAltLabelQV())
+ assertTrue(b1$HasAltLabelTag())
+ assertTrue(b1$HasPkmean())
+ assertTrue(b1$HasPkmid())
+ assertTrue(b1$HasPulseCall())
+ assertTrue(b1$HasIPD())
+ assertTrue(b1$HasPulseWidth())
+ assertTrue(b1$HasPrePulseFrames())
+ assertTrue(b1$HasPulseCallWidth())
+ assertTrue(b1$HasPulseMergeQV())
+
+ assertTrue(b2$HasDeletionQV())
+ assertTrue(b2$HasDeletionTag())
+ assertTrue(b2$HasInsertionQV())
+ assertTrue(b2$HasMergeQV())
+ assertTrue(b2$HasSubstitutionQV())
+ assertTrue(b2$HasSubstitutionTag())
+ assertTrue(b2$HasLabelQV())
+ assertTrue(b2$HasAltLabelQV())
+ assertTrue(b2$HasAltLabelTag())
+ assertTrue(b2$HasPkmean())
+ assertTrue(b2$HasPkmid())
+ assertTrue(b2$HasPulseCall())
+ assertTrue(b2$HasIPD())
+ assertTrue(b2$HasPulseWidth())
+ assertTrue(b2$HasPrePulseFrames())
+ assertTrue(b2$HasPulseCallWidth())
+ assertTrue(b2$HasPulseMergeQV())
+
+ assertEqual(b1$FullName(), b2$FullName())
+ assertEqual(b1$HoleNumber(), b2$HoleNumber())
+ assertEqual(b1$NumPasses(), b2$NumPasses())
+ assertEqual(b1$Sequence(), b2$Sequence())
+ assertEqual(b1$DeletionTag(), b2$DeletionTag())
+ assertEqual(b1$SubstitutionTag(), b2$SubstitutionTag())
+ assertEqual(b1$AltLabelTag(), b2$AltLabelTag())
+ assertEqual(b1$PulseCall(), b2$PulseCall())
+
+ # compareContainers(b1$Pkmean(), b2$Pkmean())
+ # compareContainers(b1$Pkmid(), b2$Pkmid())
+ #
+ # compareFrames(b1$IPD(), b2$IPD())
+ # compareFrames(b1$PulseWidth(), b2$PulseWidth())
+ # compareFrames(b1$PrePulseFrames(), b2$PrePulseFrames())
+ # compareFrames(b1$PulseCallWidth(), b2$PulseCallWidth())
+
+ assertEqual(b1$ReadGroup()$Id(), b2$ReadGroup()$Id())
+
+ assertEqual(b1$Qualities()$Fastq(), b2$Qualities()$Fastq())
+ assertEqual(b1$DeletionQV()$Fastq(), b2$DeletionQV()$Fastq())
+ assertEqual(b1$InsertionQV()$Fastq(), b2$InsertionQV()$Fastq())
+ assertEqual(b1$MergeQV()$Fastq(), b2$MergeQV()$Fastq())
+ assertEqual(b1$SubstitutionQV()$Fastq(), b2$SubstitutionQV()$Fastq())
+ assertEqual(b1$LabelQV()$Fastq(), b2$LabelQV()$Fastq())
+ assertEqual(b1$AltLabelQV()$Fastq(), b2$AltLabelQV()$Fastq())
+ assertEqual(b1$PulseMergeQV()$Fastq(), b2$PulseMergeQV()$Fastq())
+
+ return
+}
+
+getVirtualRecord <- function(fn1, fn2) {
+
+ result <- tryCatch(
+ {
+ vpr <- VirtualPolymeraseReader(fn1, fn2)
+
+ assertTrue(vpr$HasNext())
+
+ virtualRecord <- vpr$Next()
+
+ assertFalse(vpr$HasNext())
+
+ return(virtualRecord)
+ },
+ error = function(e) {
+ print(paste('e:',e))
+ assertTrue(FALSE) # should not throw
+ return
+ }
+ )
+ return(result)
+}
+
+getPolymeraseRecord <- function(fn) {
+
+ result <- tryCatch(
+ {
+ ds <- DataSet(fn)
+ entireFile <- EntireFileQuery(ds)
+
+ polyIter <- entireFile$begin()
+ polyEnd <- entireFile$end()
+
+ assertTrue(polyIter$'__ne__'(polyEnd))
+
+ polyRecord <- polyIter$value()
+ polyIter$incr()
+
+ assertTrue(polyIter$'__eq__'(polyEnd))
+
+ return(polyRecord)
+ },
+ error = function(e) {
+ print(paste('e:',e))
+ assertTrue(FALSE) # should not throw
+ return
+ }
+ )
+ return(result)
+}
+
+test_case("PolymeraseStitching_VirtualRegions", {
+
+ subreadsFn <- paste(test_data_path, "polymerase/internal.subreads.bam", sep="/")
+ scrapsFn <- paste(test_data_path, "polymerase/internal.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+ # -- ADAPTER -- #
+
+ adapter <- virtualRecord$VirtualRegionsTable('ADAPTER')
+ assertEqual(7L, adapter$size())
+
+ region <- adapter$'__getitem__'(0)
+ assertEqual(3047L, region$beginPos)
+ assertEqual(3095L, region$endPos)
+
+ region <- adapter$'__getitem__'(1)
+ assertEqual(3650L, region$beginPos)
+ assertEqual(3700L, region$endPos)
+
+ region <- adapter$'__getitem__'(2)
+ assertEqual(4289L, region$beginPos)
+ assertEqual(4335L, region$endPos)
+
+ region <- adapter$'__getitem__'(3)
+ assertEqual(4888L, region$beginPos)
+ assertEqual(4939L, region$endPos)
+
+ region <- adapter$'__getitem__'(4)
+ assertEqual(5498L, region$beginPos)
+ assertEqual(5546L, region$endPos)
+
+ region <- adapter$'__getitem__'(5)
+ assertEqual(6116L, region$beginPos)
+ assertEqual(6173L, region$endPos)
+
+ region <- adapter$'__getitem__'(6)
+ assertEqual(6740L, region$beginPos)
+ assertEqual(6790L, region$endPos)
+
+ # -- BARCODE -- #
+
+ barcode = virtualRecord$VirtualRegionsTable('BARCODE')
+ assertEqual(14L, barcode$size())
+
+ region <- barcode$'__getitem__'(0)
+ assertEqual(3025L, region$beginPos)
+ assertEqual(3047L, region$endPos)
+
+ region <- barcode$'__getitem__'(1)
+ assertEqual(3095L, region$beginPos)
+ assertEqual(3116L, region$endPos)
+
+ region <- barcode$'__getitem__'(2)
+ assertEqual(3628L, region$beginPos)
+ assertEqual(3650L, region$endPos)
+
+ region <- barcode$'__getitem__'(3)
+ assertEqual(3700L, region$beginPos)
+ assertEqual(3722L, region$endPos)
+
+ region <- barcode$'__getitem__'(4)
+ assertEqual(4267L, region$beginPos)
+ assertEqual(4289L, region$endPos)
+
+ region <- barcode$'__getitem__'(5)
+ assertEqual(4335L, region$beginPos)
+ assertEqual(4356L, region$endPos)
+
+ region <- barcode$'__getitem__'(6)
+ assertEqual(4864L, region$beginPos)
+ assertEqual(4888L, region$endPos)
+
+ region <- barcode$'__getitem__'(7)
+ assertEqual(4939L, region$beginPos)
+ assertEqual(4960L, region$endPos)
+
+ region <- barcode$'__getitem__'(8)
+ assertEqual(5477L, region$beginPos)
+ assertEqual(5498L, region$endPos)
+
+ region <- barcode$'__getitem__'(9)
+ assertEqual(5546L, region$beginPos)
+ assertEqual(5571L, region$endPos)
+
+ region <- barcode$'__getitem__'(10)
+ assertEqual(6087L, region$beginPos)
+ assertEqual(6116L, region$endPos)
+
+ region <- barcode$'__getitem__'(11)
+ assertEqual(6173L, region$beginPos)
+ assertEqual(6199L, region$endPos)
+
+ region <- barcode$'__getitem__'(12)
+ assertEqual(6719L, region$beginPos)
+ assertEqual(6740L, region$endPos)
+
+ region <- barcode$'__getitem__'(13)
+ assertEqual(6790L, region$beginPos)
+ assertEqual(6812L, region$endPos)
+
+ # -- LQREGION -- #
+
+ lqregion = virtualRecord$VirtualRegionsTable('LQREGION')
+ assertEqual(2L, lqregion$size())
+
+ region <- lqregion$'__getitem__'(0)
+ assertEqual(0L, region$beginPos)
+ assertEqual(2659L, region$endPos)
+
+ region <- lqregion$'__getitem__'(1)
+ assertEqual(7034L, region$beginPos)
+ assertEqual(7035L, region$endPos)
+
+ # -- HQREGION -- #
+
+ hqregion = virtualRecord$VirtualRegionsTable('HQREGION')
+ assertEqual(1L, hqregion$size())
+
+ region <- hqregion$'__getitem__'(0)
+ assertEqual(2659L, region$beginPos)
+ assertEqual(7034L, region$endPos)
+})
+
+test_case("PolymeraseStitching_InternalSubreadsToOriginal", {
+
+ # stitch virtual polymerase record
+ subreadsFn <- paste(test_data_path, "polymerase/internal.subreads.bam", sep="/")
+ scrapsFn <- paste(test_data_path, "polymerase/internal.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/internal.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # check
+ compareRecords(polyRecord, virtualRecord)
+})
+
+test_case("PolymeraseStitching_InternalHQToOriginal", {
+
+ # stitch virtual polymerase record
+ hqRegionFn <- paste(test_data_path, "polymerase/internal.hqregions.bam", sep="/")
+ lqRegionFn <- paste(test_data_path, "polymerase/internal.lqregions.bam", sep="/")
+ virtualRecord <- getVirtualRecord(hqRegionFn, lqRegionFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/internal.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # check
+ compareRecords(polyRecord, virtualRecord)
+})
+
+test_case("PolymeraseStitching_ProductionSubreadsToOriginal", {
+
+ # stitch virtual polymerase record
+ subreadsFn <- paste(test_data_path, "polymerase/production.subreads.bam", sep="/")
+ scrapsFn <- paste(test_data_path, "polymerase/production.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/production.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # compare
+ assertEqual(polyRecord$FullName(), virtualRecord$FullName())
+ assertEqual(polyRecord$HoleNumber(), virtualRecord$HoleNumber())
+ assertEqual(polyRecord$NumPasses(), virtualRecord$NumPasses())
+ assertEqual(polyRecord$Sequence(), virtualRecord$Sequence())
+ assertEqual(polyRecord$DeletionTag(), virtualRecord$DeletionTag())
+ assertEqual(polyRecord$SubstitutionTag(), virtualRecord$SubstitutionTag())
+
+ compareFrames(polyRecord$IPD(), virtualRecord$IPDV1Frames())
+ assertEqual(polyRecord$ReadGroup()$Id(), virtualRecord$ReadGroup()$Id())
+
+ tolerance = 1e-5
+ assertTrue( abs(polyRecord$ReadAccuracy()$ToFloat() - virtualRecord$ReadAccuracy()$ToFloat()) <= tolerance )
+ # assertEqual(polyRecord$ReadAccuracy()$ToFloat(), virtualRecord$ReadAccuracy()$ToFloat())
+
+ assertEqual(polyRecord$Qualities()$Fastq(), virtualRecord$Qualities()$Fastq())
+ assertEqual(polyRecord$DeletionQV()$Fastq(), virtualRecord$DeletionQV()$Fastq())
+ assertEqual(polyRecord$InsertionQV()$Fastq(), virtualRecord$InsertionQV()$Fastq())
+ assertEqual(polyRecord$MergeQV()$Fastq(), virtualRecord$MergeQV()$Fastq())
+ assertEqual(polyRecord$SubstitutionQV()$Fastq(), virtualRecord$SubstitutionQV()$Fastq())
+})
+
+test_case("PolymeraseStitching_ProductionHQToOriginal", {
+
+ # stitch virtual polymerase record
+ hqRegionFn <- paste(test_data_path, "polymerase/production_hq.hqregion.bam", sep="/")
+ lqRegionFn <- paste(test_data_path, "polymerase/production_hq.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(hqRegionFn, lqRegionFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/production.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # compare
+ assertEqual(polyRecord$FullName(), virtualRecord$FullName())
+ assertEqual(polyRecord$HoleNumber(), virtualRecord$HoleNumber())
+ assertEqual(polyRecord$NumPasses(), virtualRecord$NumPasses())
+ assertEqual(polyRecord$Sequence(), virtualRecord$Sequence())
+ assertEqual(polyRecord$DeletionTag(), virtualRecord$DeletionTag())
+ assertEqual(polyRecord$SubstitutionTag(), virtualRecord$SubstitutionTag())
+
+ compareFrames(polyRecord$IPD(), virtualRecord$IPDV1Frames())
+ assertEqual(polyRecord$ReadGroup()$Id(), virtualRecord$ReadGroup()$Id())
+
+ tolerance = 1e-5
+ assertTrue( abs(polyRecord$ReadAccuracy()$ToFloat() - virtualRecord$ReadAccuracy()$ToFloat()) <= tolerance )
+ # assertEqual(polyRecord$ReadAccuracy()$ToInt(), virtualRecord$ReadAccuracy()$ToInt())
+
+ assertEqual(polyRecord$Qualities()$Fastq(), virtualRecord$Qualities()$Fastq())
+ assertEqual(polyRecord$DeletionQV()$Fastq(), virtualRecord$DeletionQV()$Fastq())
+ assertEqual(polyRecord$InsertionQV()$Fastq(), virtualRecord$InsertionQV()$Fastq())
+ assertEqual(polyRecord$MergeQV()$Fastq(), virtualRecord$MergeQV()$Fastq())
+ assertEqual(polyRecord$SubstitutionQV()$Fastq(), virtualRecord$SubstitutionQV()$Fastq())
+
+ assertTrue(polyRecord$HasDeletionQV())
+ assertTrue(polyRecord$HasDeletionTag())
+ assertTrue(polyRecord$HasInsertionQV())
+ assertTrue(polyRecord$HasMergeQV())
+ assertTrue(polyRecord$HasSubstitutionQV())
+ assertTrue(polyRecord$HasSubstitutionTag())
+ assertTrue(polyRecord$HasIPD())
+ assertFalse(polyRecord$HasLabelQV())
+ assertFalse(polyRecord$HasAltLabelQV())
+ assertFalse(polyRecord$HasAltLabelTag())
+ assertFalse(polyRecord$HasPkmean())
+ assertFalse(polyRecord$HasPkmid())
+ assertFalse(polyRecord$HasPulseCall())
+ assertFalse(polyRecord$HasPulseWidth())
+ assertFalse(polyRecord$HasPrePulseFrames())
+ assertFalse(polyRecord$HasPulseCallWidth())
+ assertFalse(polyRecord$HasPulseCall())
+
+ assertTrue(virtualRecord$HasDeletionQV())
+ assertTrue(virtualRecord$HasDeletionTag())
+ assertTrue(virtualRecord$HasInsertionQV())
+ assertTrue(virtualRecord$HasMergeQV())
+ assertTrue(virtualRecord$HasSubstitutionQV())
+ assertTrue(virtualRecord$HasSubstitutionTag())
+ assertTrue(virtualRecord$HasIPD())
+ assertFalse(virtualRecord$HasLabelQV())
+ assertFalse(virtualRecord$HasAltLabelQV())
+ assertFalse(virtualRecord$HasAltLabelTag())
+ assertFalse(virtualRecord$HasPkmean())
+ assertFalse(virtualRecord$HasPkmid())
+ assertFalse(virtualRecord$HasPulseCall())
+ assertFalse(virtualRecord$HasPulseWidth())
+ assertFalse(virtualRecord$HasPrePulseFrames())
+ assertFalse(virtualRecord$HasPulseCallWidth())
+ assertFalse(virtualRecord$HasPulseCall())
+})
\ No newline at end of file
diff --git a/tests/src/TestData.h.in b/tests/src/TestData.h.in
index e4d786c..1e1d9ca 100644
--- a/tests/src/TestData.h.in
+++ b/tests/src/TestData.h.in
@@ -44,10 +44,10 @@ namespace PacBio {
namespace BAM {
namespace tests {
-const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
-const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
-const std::string Data_Dir = std::string("@PacBioBAM_TestsDir@/data");
-const std::string Samtools_Bin = std::string("@Samtools_Bin@");
+const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
+const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
+const std::string Data_Dir = std::string("@PacBioBAM_TestsDir@/data");
+const std::string Bam2Sam = std::string("@PacBioBAM_BinDir@/bam2sam");
} // namespace tests
} // namespace BAM
diff --git a/tests/src/cram/bam2sam.t b/tests/src/cram/bam2sam.t
new file mode 100644
index 0000000..d306f23
--- /dev/null
+++ b/tests/src/cram/bam2sam.t
@@ -0,0 +1,63 @@
+Setup:
+
+ $ BAM2SAM="$TESTDIR/../../../bin/bam2sam" && export BAM2SAM
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+
+Normal:
+
+ $ $BAM2SAM < $DATADIR/phi29.bam | head -n 5
+ @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+ @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+ @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAG [...]
+
+Explicit Filename (not stdin):
+
+ $ $BAM2SAM $DATADIR/phi29.bam | head -n 5
+ @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+ @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+ @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAG [...]
+
+Header-Only:
+
+ $ $BAM2SAM --header-only < $DATADIR/phi29.bam | head -n 5
+ @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+ @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+ @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+
+No-Header:
+
+ $ $BAM2SAM --no-header < $DATADIR/phi29.bam | head -n 5
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAG [...]
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/4151_6183\t4\t*\t0\t255\t*\t*\t0\t0\tGATCCCGCGAATTAATTACGACTCACTATAGGGGAATTGTGAGCGGATAACAATTCCCGCCTCTAGAAATAATTTTGTTTAAACTTTTAAGAAAGGAGATATTACATATGAAACACAGCCACGTAAAATGTATTCCTGCGACTTGGAGACTACCACCAAGGTGAAGATTTGCCGCGTAATGGGCATACGGTTTACATGAAACATCGAAGAACAAACTCGAGTATAAGATTGGTAACTCCCCTGGATGAATTATGGCTTGGGTTACTGAAAGTTCGAGGTCTGACCTGTACTTCGCACAAATCTGAAAATTTGATGGCCGCAAATTTCAATTCATCACTGGCTGGAACGTAAACGGTTTTAAATGGTCCGCAGATCGGTCTGTGCC [...]
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/6234_8214\t4\t*\t0\t255\t*\t*\t0\t0\tAGAGTCATGTATAAGAGTTATTGCTCAGCGGTGGCAGCAGACAACTCAGCTTCCTTTCGGGCCTTTGTTAGCAGCCGGATCCAAGCTTGAATTCCTGCAAGCTCGAGTTATTTGATAGTAAAAGTGTCATCAAACCAGCACTACGGCCGAACCCGGTACCTGAACAGATTCGTTTCATTTTACGAGAAAAACCCACTTTGAAGTTTTGCCGAAAGTCACTTCTTTTTGATTTGTCCGTCATGCTGCGCATTTCACAGAGACTTGAATGTCAGTGTAGTCGTCATCGGGGGGGGGAAGAGCCCTCTACCAGTTTTGCCGTCTACTTCTTTCATGTAAATATCTGGATGTAGGTTTTCTGAACGCAGATATTTGCAGCTTAAAAGTG [...]
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/8294_10277\t4\t*\t0\t255\t*\t*\t0\t0\tGATTCCCGCGAAATTAATACGAATCACTATAAGGGGAATTGTGAGCGGATAACAATTCCCCTCTAGAAATAATTTTGTTTAACTTTAAGAGGGACGATATACATATGAACACATGCCTACGTAAAATGTATTCCTGCGAACTGTTGAGACTACCACCAAGGTTGAAGATTTGCCGCGTAATGGGCATACGGTTACATGAACATCGAAGACCACTCCGATATGAAGATTGGTTAACCCCTGGATGAATTTATGGCTTGGGTTCTGAAAGTTCAGGCTGACCTGTACTTCACAATCTGAAATTTGATGGCCGCATTCATCAATCACTGGCTGGAACGTAAAACGGTTTAAAAATGGTCCCGCAGATGGTCTGACAAATTAACTACA [...]
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/10327_12283\t4\t*\t0\t255\t*\t*\t0\t0\tAGAGTCATGTATAGAGTTATTGCTCAGCGGTGGCAGCACCAACTCAGCTTCCTTTCGGCTTTGTTAGCAGCCGATCCAAGCTTGAATTCCTGCAGCTCGGAGTTATTTGATAGTAAAAGTTGTCATCCAAACGCAGCACTACGCCCACCCGTACCTGAACAGGCTTTCGGTTTCATTTTACGAGAAAAACACTTTTGAAAGTTTTCGAAAGTCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAGAGAACTTGATGTCAGTGTAGTCGTCAGGAGAGCCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGAATGTAGGTTTTTCTGACGCAGATTATTTTGCACGCTTAAAAGTGGATT [...]
+
+Invalid-Args:
+
+ $ $BAM2SAM --header-only --no-header < $DATADIR/phi29.bam
+
+ ERROR: conflicting arguments requested: --no-header and --header-only
+
+ Usage: bam2sam [options] [input]
+
+ bam2sam converts a BAM file to SAM. It is essentially a stripped-down 'samtools
+ view', mostly useful for testing/debugging without requiring samtools. Input BAM
+ file is read from a file or stdin, and SAM output is written to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Options:
+ input Input BAM file. If not provided, stdin will be used as input.
+ --no-header Omit header from output.
+ --header-only Print only the header (no records).
+ [1]
+
diff --git a/tests/src/cram/pbindexdump_cpp.t b/tests/src/cram/pbindexdump_cpp.t
new file mode 100644
index 0000000..cf318ee
--- /dev/null
+++ b/tests/src/cram/pbindexdump_cpp.t
@@ -0,0 +1,39 @@
+Setup:
+
+ $ PBINDEXDUMP="$TESTDIR/../../../bin/pbindexdump" && export PBINDEXDUMP
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+
+Normal C++:
+
+ $ $PBINDEXDUMP --format=cpp $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ PbiRawData rawData;
+ rawData.Version(PbiFile::Version_3_0_1);
+ rawData.FileSections(PbiFile::BASIC);
+ rawData.NumReads(1);
+
+ PbiRawBasicData& basicData = rawData.BasicData();
+ basicData.rgId_ = {-898246524};
+ basicData.qStart_ = {2659};
+ basicData.qEnd_ = {7034};
+ basicData.holeNumber_ = {0};
+ basicData.readQual_ = {0.01};
+ basicData.ctxtFlag_ = {0};
+ basicData.fileOffset_ = {20054016};
+
+
+--(leave the blank lines above this)--
+
+Request C++, with JSON options (stdout includes usage/help, so we just want to check stderr):
+
+ $ $PBINDEXDUMP --format=cpp --json-indent-level=2 $DATADIR/polymerase/production_hq.hqregion.bam.pbi > /dev/null
+
+ ERROR: JSON formatting options not valid on non-JSON output
+
+ [1]
+
+ $ $PBINDEXDUMP --format=cpp --json-raw $DATADIR/polymerase/production_hq.hqregion.bam.pbi > /dev/null
+
+ ERROR: JSON formatting options not valid on non-JSON output
+
+ [1]
diff --git a/tests/src/cram/pbindexdump_json.t b/tests/src/cram/pbindexdump_json.t
new file mode 100644
index 0000000..676e21a
--- /dev/null
+++ b/tests/src/cram/pbindexdump_json.t
@@ -0,0 +1,83 @@
+Setup:
+
+ $ PBINDEXDUMP="$TESTDIR/../../../bin/pbindexdump" && export PBINDEXDUMP
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+
+Default settings (JSON):
+
+ $ $PBINDEXDUMP $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ {
+ "fileSections": [
+ "BasicData"
+ ],
+ "numReads": 1,
+ "reads": [
+ {
+ "contextFlag": 0,
+ "fileOffset": 20054016,
+ "holeNumber": 0,
+ "qEnd": 7034,
+ "qStart": 2659,
+ "readQuality": 0.00999999977648258,
+ "rgId": -898246524
+ }
+ ],
+ "version": "3.0.1"
+ }
+
+JSON indent level(2):
+
+ $ $PBINDEXDUMP --json-indent-level=2 $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ {
+ "fileSections": [
+ "BasicData"
+ ],
+ "numReads": 1,
+ "reads": [
+ {
+ "contextFlag": 0,
+ "fileOffset": 20054016,
+ "holeNumber": 0,
+ "qEnd": 7034,
+ "qStart": 2659,
+ "readQuality": 0.00999999977648258,
+ "rgId": -898246524
+ }
+ ],
+ "version": "3.0.1"
+ }
+
+JSON raw:
+
+ $ $PBINDEXDUMP --json-raw $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ {
+ "basicData": {
+ "ctxtFlag": [
+ 0
+ ],
+ "fileOffset": [
+ 20054016
+ ],
+ "holeNumber": [
+ 0
+ ],
+ "qEnd": [
+ 7034
+ ],
+ "qStart": [
+ 2659
+ ],
+ "readQual": [
+ 0.00999999977648258
+ ],
+ "rgId": [
+ -898246524
+ ]
+ },
+ "fileSections": [
+ "BasicData"
+ ],
+ "numReads": 1,
+ "version": "3.0.1"
+ }
diff --git a/tests/src/cram/pbmerge_aligned_ordering.t b/tests/src/cram/pbmerge_aligned_ordering.t
new file mode 100644
index 0000000..48a8553
--- /dev/null
+++ b/tests/src/cram/pbmerge_aligned_ordering.t
@@ -0,0 +1,197 @@
+Setup:
+
+ $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ INPUT_1="$DATADIR/dataset/bam_mapping_1.bam" && export INPUT_1
+ $ INPUT_2="$DATADIR/dataset/bam_mapping_2.bam" && export INPUT_2
+
+ $ MERGED_BAM="/tmp/aligned_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="/tmp/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $INPUT_1
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_1 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+ $ $BAM2SAM --header-only $INPUT_2
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_2 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+Normal Merge:
+
+ $ $PBMERGE $INPUT_1 $INPUT_2 > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ rm $MERGED_BAM
+
+Shuffle Input:
+
+ $ $PBMERGE $INPUT_2 $INPUT_2 > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7046_7293\tlambda_NEB3011\t5136 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/38025/6255_7894\tlambda_NEB3011\t5427 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5311_5508\tlambda_NEB3011\t5943 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/36363/899_1197\tlambda_NEB3011\t6258 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/36363/605_853\tlambda_NEB3011\t6312 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/31174/0_1029\tlambda_NEB3011\t6487 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/31174/1075_1271\tlambda_NEB3011\t6499 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/38025/5743_6211\tlambda_NEB3011\t6606 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/50257/6944_7361\tlambda_NEB3011\t6942 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/50257/6546_6903\tlambda_NEB3011\t7010 (esc)
+
+ $ rm $MERGED_BAM
+
+Explicit Output Filename (also enables PBI):
+
+ $ $PBMERGE -o $MERGED_BAM $INPUT_1 $INPUT_2
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Explicit Output Filename (with disabled PBI):
+
+ $ $PBMERGE -o $MERGED_BAM --no-pbi $INPUT_1 $INPUT_2
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_dataset.t b/tests/src/cram/pbmerge_dataset.t
new file mode 100644
index 0000000..076bcc0
--- /dev/null
+++ b/tests/src/cram/pbmerge_dataset.t
@@ -0,0 +1,144 @@
+Setup:
+
+ $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ INPUT_XML="$DATADIR/polymerase/consolidate.subread.dataset.xml" && export INPUT_XML
+ $ BAM_1="$DATADIR/polymerase/production.subreads.bam" && export BAM_1
+ $ BAM_2="$DATADIR/polymerase/production.scraps.bam" && export BAM_2
+
+ $ MERGED_BAM="/tmp/merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="/tmp/merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --no-header $BAM_1 | cut -f 1
+ ArminsFakeMovie/0/2659_3025
+ ArminsFakeMovie/0/3116_3628
+ ArminsFakeMovie/0/3722_4267
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4960_5477
+ ArminsFakeMovie/0/5571_6087
+ ArminsFakeMovie/0/6199_6719
+ ArminsFakeMovie/0/6812_7034
+
+ $ $BAM2SAM --no-header $BAM_2 | cut -f 1
+ ArminsFakeMovie/0/0_2659
+ ArminsFakeMovie/0/3025_3047
+ ArminsFakeMovie/0/3047_3095
+ ArminsFakeMovie/0/3095_3116
+ ArminsFakeMovie/0/3628_3650
+ ArminsFakeMovie/0/3650_3700
+ ArminsFakeMovie/0/3700_3722
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/5477_5498
+ ArminsFakeMovie/0/5498_5546
+ ArminsFakeMovie/0/5546_5571
+ ArminsFakeMovie/0/6087_6116
+ ArminsFakeMovie/0/6116_6173
+ ArminsFakeMovie/0/6173_6199
+ ArminsFakeMovie/0/6719_6740
+ ArminsFakeMovie/0/6740_6790
+ ArminsFakeMovie/0/6790_6812
+ ArminsFakeMovie/0/7034_7035
+
+Normal Merge from XML:
+
+ $ $PBMERGE -o $MERGED_BAM $INPUT_XML
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+ @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+ @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/4960_5477
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Normal Merge from XML (disabled PBI):
+
+ $ $PBMERGE --no-pbi -o $MERGED_BAM $INPUT_XML
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+ @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+ @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/4960_5477
+
+ $ rm $MERGED_BAM
+
+Write to stdout:
+
+ $ $PBMERGE --no-pbi $INPUT_XML > $MERGED_BAM
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+ @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+ @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/4960_5477
+
+ $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_fofn.t b/tests/src/cram/pbmerge_fofn.t
new file mode 100644
index 0000000..b88e08b
--- /dev/null
+++ b/tests/src/cram/pbmerge_fofn.t
@@ -0,0 +1,134 @@
+Setup:
+
+ $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ INPUT_FOFN="$DATADIR/dataset/merge.fofn" && export INPUT_FOFN
+ $ INPUT_1="$DATADIR/dataset/bam_mapping_1.bam" && export INPUT_1
+ $ INPUT_2="$DATADIR/dataset/bam_mapping_2.bam" && export INPUT_2
+
+ $ MERGED_BAM="/tmp/aligned_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="/tmp/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $INPUT_1
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_1 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+ $ $BAM2SAM --header-only $INPUT_2
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_2 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+Normal Merge from FOFN:
+
+ $ $PBMERGE -o $MERGED_BAM $INPUT_FOFN
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Normal Merge from FOFN (disabled PBI):
+
+ $ $PBMERGE --no-pbi -o $MERGED_BAM $INPUT_FOFN
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_mixed_ordering.t b/tests/src/cram/pbmerge_mixed_ordering.t
new file mode 100644
index 0000000..70cbe74
--- /dev/null
+++ b/tests/src/cram/pbmerge_mixed_ordering.t
@@ -0,0 +1,57 @@
+Setup:
+
+ $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ UNALIGNED_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export UNALIGNED_BAM
+ $ ALIGNED_BAM="$DATADIR/dataset/bam_mapping_1.bam" && export ALIGNED_BAM
+
+ $ MERGED_BAM="/tmp/mixed_ordering_merged.bam" && export MERGED_BAM
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $UNALIGNED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+ $ $BAM2SAM --no-header $UNALIGNED_BAM | cut -f 1
+ ArminsFakeMovie/100000/2659_7034
+
+ $ $BAM2SAM --header-only $ALIGNED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $ALIGNED_BAM | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+Normal Merge - should fail:
+
+ $ $PBMERGE $UNALIGNED_BAM $ALIGNED_BAM > $MERGED_BAM
+ ERROR: BAM file sort orders do not match, aborting merge
+ [1]
+
+Shuffle Input - should fail:
+
+ $ $PBMERGE $ALIGNED_BAM $UNALIGNED_BAM > $MERGED_BAM
+ ERROR: BAM file sort orders do not match, aborting merge
+ [1]
+
+Cleanup:
+
+ $ rm $MERGED_BAM
diff --git a/tests/src/cram/pbmerge_pacbio_ordering.t b/tests/src/cram/pbmerge_pacbio_ordering.t
new file mode 100644
index 0000000..729c627
--- /dev/null
+++ b/tests/src/cram/pbmerge_pacbio_ordering.t
@@ -0,0 +1,227 @@
+Setup:
+
+ $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ HQREGION_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export HQREGION_BAM
+ $ SCRAPS_BAM="$DATADIR/polymerase/internal.scraps.bam" && export SCRAPS_BAM
+
+ $ MERGED_BAM="/tmp/pacbio_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="/tmp/pacbio_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $HQREGION_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+ $ $BAM2SAM --no-header $HQREGION_BAM | cut -f 1
+ ArminsFakeMovie/100000/2659_7034
+
+ $ $BAM2SAM --header-only $SCRAPS_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+ $ $BAM2SAM --no-header $SCRAPS_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+
+Normal Merge:
+
+ $ $PBMERGE $HQREGION_BAM $SCRAPS_BAM > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+
+ $ rm $MERGED_BAM
+
+Shuffle Input:
+
+ $ $PBMERGE $SCRAPS_BAM $HQREGION_BAM > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+
+ $ rm $MERGED_BAM
+
+Explicit Output Filename (also enables PBI):
+
+ $ $PBMERGE -o $MERGED_BAM $HQREGION_BAM $SCRAPS_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Explicit Output Filename (with disabled PBI):
+
+ $ $PBMERGE -o $MERGED_BAM --no-pbi $HQREGION_BAM $SCRAPS_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-0.5.0\tPN:pbmerge\tVN:0.5.0 (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ rm $MERGED_BAM
diff --git a/tests/src/python/test/test_Accuracy.py b/tests/src/python/test/test_Accuracy.py
index 7bef5db..a8b2112 100755
--- a/tests/src/python/test/test_Accuracy.py
+++ b/tests/src/python/test/test_Accuracy.py
@@ -49,17 +49,17 @@ class AccuracyTest(unittest.TestCase):
# ------------ TESTS --------------
def test_clamp(self):
- a_zero = PacBioBam.Accuracy(0)
- a_neg = PacBioBam.Accuracy(-1)
- a_min = PacBioBam.Accuracy(0)
- a_normal = PacBioBam.Accuracy(300)
- a_max = PacBioBam.Accuracy(1000)
- a_tooLarge = PacBioBam.Accuracy(2000)
+ a_zero = PacBioBam.Accuracy(0.0)
+ a_neg = PacBioBam.Accuracy(-0.5)
+ a_min = PacBioBam.Accuracy(0.0)
+ a_normal = PacBioBam.Accuracy(0.9)
+ a_max = PacBioBam.Accuracy(1.0)
+ a_tooLarge = PacBioBam.Accuracy(1.1)
- self.assertEqual(0, int(a_zero))
- self.assertEqual(0, int(a_neg))
- self.assertEqual(0, int(a_min))
- self.assertEqual(300, int(a_normal))
- self.assertEqual(1000, int(a_max))
- self.assertEqual(1000, int(a_tooLarge))
+ self.assertAlmostEqual(float(0.0), float(a_zero))
+ self.assertAlmostEqual(float(0.0), float(a_neg))
+ self.assertAlmostEqual(float(0.0), float(a_min))
+ self.assertAlmostEqual(float(0.9), float(a_normal))
+ self.assertAlmostEqual(float(1.0), float(a_max))
+ self.assertAlmostEqual(float(1.0), float(a_tooLarge))
\ No newline at end of file
diff --git a/tests/src/python/test/test_BamFile.py b/tests/src/python/test/test_BamFile.py
index 214b2a8..aabfc59 100755
--- a/tests/src/python/test/test_BamFile.py
+++ b/tests/src/python/test/test_BamFile.py
@@ -55,7 +55,6 @@ class BamFileTest(unittest.TestCase):
def test_ctor(self):
f = PacBioBam.BamFile(self.ex2BamFn)
- self.assertFalse(f.IsPacBioBAM())
def test_nonExistentFile(self):
with self.assertRaises(RuntimeError):
diff --git a/tests/src/python/test/test_BamHeader.py b/tests/src/python/test/test_BamHeader.py
index 076683b..3a08fad 100755
--- a/tests/src/python/test/test_BamHeader.py
+++ b/tests/src/python/test/test_BamHeader.py
@@ -71,7 +71,7 @@ class BamHeaderTest(unittest.TestCase):
def test_decode(self):
- text = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+ text = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
"@RG\tID:rg1\tSM:control\n"
@@ -85,7 +85,7 @@ class BamHeaderTest(unittest.TestCase):
self.assertEqual("1.1", header.Version())
self.assertEqual("queryname", header.SortOrder())
- self.assertEqual("3.0b3", header.PacBioBamVersion())
+ self.assertEqual("3.0.1", header.PacBioBamVersion())
self.assertEqual(3, len(header.ReadGroups()))
self.assertTrue(header.HasReadGroup("rg1"))
@@ -113,7 +113,7 @@ class BamHeaderTest(unittest.TestCase):
def test_encode(self):
- expectedText = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+ expectedText = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
"@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
@@ -143,7 +143,7 @@ class BamHeaderTest(unittest.TestCase):
header = PacBioBam.BamHeader()
header.Version("1.1")
header.SortOrder("queryname")
- header.PacBioBamVersion("3.0b3")
+ header.PacBioBamVersion("3.0.1")
header.AddReadGroup(rg1)
header.AddReadGroup(rg2)
header.AddReadGroup(rg3)
diff --git a/tests/src/python/test/test_PolymeraseStitching.py b/tests/src/python/test/test_PolymeraseStitching.py
new file mode 100755
index 0000000..416ac71
--- /dev/null
+++ b/tests/src/python/test/test_PolymeraseStitching.py
@@ -0,0 +1,383 @@
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class PolymeraseStitchingTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def setUp(self):
+ self.data = config.TestData()
+
+ def runTest(self):
+ self.test_virtualRegions()
+ self.test_internalSubreadsToOriginal()
+ self.test_internalHqToOriginal()
+ self.test_productionSubreadsToOriginal()
+ self.test_productionHqToOriginal()
+
+ # ------------ TESTS --------------
+
+ def test_virtualRegions(self):
+
+ subreadBam = self.data.directory + "/polymerase/internal.subreads.bam"
+ scrapsBam = self.data.directory + "/polymerase/internal.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(subreadBam, scrapsBam)
+
+ virtualRecord = vpr.Next()
+
+ # NOTE: this method is disabled
+ #
+ # Any attempt to retrive this value resulted in several
+ # "swig/python detected a memory leak of type 'unknown', no destructor found."
+ # errors (& an empty dictionary result). The same info is available via the
+ # VirtualRegionsTable(regionType) method, though a bit clunkier if you just want
+ # to iterate. But access to region info for specific types are available & correct,
+ # so I'm just going to leave this one out for now. - DB
+ #
+ # regionMap = virtualRecord.VirtualRegionsMap();
+
+ # ADAPTER
+ adapter = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_ADAPTER)
+ self.assertEqual(7, len(adapter))
+ self.assertEqual(3047, adapter[0].beginPos);
+ self.assertEqual(3095, adapter[0].endPos);
+ self.assertEqual(3650, adapter[1].beginPos);
+ self.assertEqual(3700, adapter[1].endPos);
+ self.assertEqual(4289, adapter[2].beginPos);
+ self.assertEqual(4335, adapter[2].endPos);
+ self.assertEqual(4888, adapter[3].beginPos);
+ self.assertEqual(4939, adapter[3].endPos);
+ self.assertEqual(5498, adapter[4].beginPos);
+ self.assertEqual(5546, adapter[4].endPos);
+ self.assertEqual(6116, adapter[5].beginPos);
+ self.assertEqual(6173, adapter[5].endPos);
+ self.assertEqual(6740, adapter[6].beginPos);
+ self.assertEqual(6790, adapter[6].endPos);
+
+ # BARCODE
+ barcode = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_BARCODE)
+ self.assertEqual(14, len(barcode))
+ self.assertEqual(3025, barcode[0].beginPos);
+ self.assertEqual(3047, barcode[0].endPos);
+ self.assertEqual(3095, barcode[1].beginPos);
+ self.assertEqual(3116, barcode[1].endPos);
+ self.assertEqual(3628, barcode[2].beginPos);
+ self.assertEqual(3650, barcode[2].endPos);
+ self.assertEqual(3700, barcode[3].beginPos);
+ self.assertEqual(3722, barcode[3].endPos);
+ self.assertEqual(4267, barcode[4].beginPos);
+ self.assertEqual(4289, barcode[4].endPos);
+ self.assertEqual(4335, barcode[5].beginPos);
+ self.assertEqual(4356, barcode[5].endPos);
+ self.assertEqual(4864, barcode[6].beginPos);
+ self.assertEqual(4888, barcode[6].endPos);
+ self.assertEqual(4939, barcode[7].beginPos);
+ self.assertEqual(4960, barcode[7].endPos);
+ self.assertEqual(5477, barcode[8].beginPos);
+ self.assertEqual(5498, barcode[8].endPos);
+ self.assertEqual(5546, barcode[9].beginPos);
+ self.assertEqual(5571, barcode[9].endPos);
+ self.assertEqual(6087, barcode[10].beginPos);
+ self.assertEqual(6116, barcode[10].endPos);
+ self.assertEqual(6173, barcode[11].beginPos);
+ self.assertEqual(6199, barcode[11].endPos);
+ self.assertEqual(6719, barcode[12].beginPos);
+ self.assertEqual(6740, barcode[12].endPos);
+ self.assertEqual(6790, barcode[13].beginPos);
+ self.assertEqual(6812, barcode[13].endPos);
+
+ # HQREGION
+ hqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_HQREGION)
+ self.assertEqual(1, len(hqregion))
+
+ self.assertEqual(2659, hqregion[0].beginPos);
+ self.assertEqual(7034, hqregion[0].endPos);
+
+ # LQREGION
+ lqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_LQREGION)
+ self.assertEqual(2, len(lqregion))
+
+ self.assertEqual(0, lqregion[0].beginPos);
+ self.assertEqual(2659, lqregion[0].endPos);
+ self.assertEqual(7034, lqregion[1].beginPos);
+ self.assertEqual(7035, lqregion[1].endPos);
+
+ # SUBREAD
+ subread = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_SUBREAD)
+ self.assertEqual(8, len(subread))
+
+ def test_internalSubreadsToOriginal(self):
+
+ # stitch virtual polymerase record
+ subreadsBam = self.data.directory + "/polymerase/internal.subreads.bam"
+ scrapsBam = self.data.directory + "/polymerase/internal.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(subreadsBam, scrapsBam)
+
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+ self.assertFalse(vpr.HasNext())
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+ polyIter.incr()
+ self.assertTrue(polyIter == polyEnd)
+
+ # compare
+ self.compare(polyRecord, virtualRecord)
+
+ def test_internalHqToOriginal(self):
+
+ # stitch virtual polymerase record
+ hqRegionsBam = self.data.directory + "/polymerase/internal.hqregions.bam"
+ lqRegionsBam = self.data.directory + "/polymerase/internal.lqregions.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam)
+
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+ self.assertFalse(vpr.HasNext())
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+ polyIter.incr()
+ self.assertTrue(polyIter == polyEnd)
+
+ # # compare
+ self.compare(polyRecord, virtualRecord)
+
+ def test_productionSubreadsToOriginal(self):
+
+ # stitch virtual polymerase record
+ subreadsBam = self.data.directory + "/polymerase/production.subreads.bam"
+ scrapsBam = self.data.directory + "/polymerase/production.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(subreadsBam, scrapsBam)
+
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+ self.assertFalse(vpr.HasNext())
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+ polyIter.incr()
+ self.assertTrue(polyIter == polyEnd)
+
+ # compare
+ self.assertEqual(polyRecord.FullName(), virtualRecord.FullName());
+ self.assertEqual(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
+ self.assertEqual(polyRecord.NumPasses(), virtualRecord.NumPasses());
+ self.assertEqual(polyRecord.Sequence(), virtualRecord.Sequence());
+ self.assertEqual(polyRecord.DeletionTag(), virtualRecord.DeletionTag());
+ self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+ self.assertEqual(polyRecord.IPD(), virtualRecord.IPDV1Frames());
+ self.assertEqual(polyRecord.ReadGroup(), virtualRecord.ReadGroup());
+
+ self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy()));
+
+ self.assertEqual(polyRecord.Qualities().Fastq(), virtualRecord.Qualities().Fastq());
+ self.assertEqual(polyRecord.DeletionQV().Fastq(), virtualRecord.DeletionQV().Fastq());
+ self.assertEqual(polyRecord.InsertionQV().Fastq(), virtualRecord.InsertionQV().Fastq());
+ self.assertEqual(polyRecord.MergeQV().Fastq(), virtualRecord.MergeQV().Fastq());
+ self.assertEqual(polyRecord.SubstitutionQV().Fastq(), virtualRecord.SubstitutionQV().Fastq());
+
+ def test_productionHqToOriginal(self):
+
+ # stitch virtual polymerase record
+ hqRegionsBam = self.data.directory + "/polymerase/production_hq.hqregion.bam"
+ lqRegionsBam = self.data.directory + "/polymerase/production_hq.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam)
+
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+ self.assertFalse(vpr.HasNext())
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+ polyIter.incr()
+ self.assertTrue(polyIter == polyEnd)
+
+ # compare
+ self.assertFalse(polyRecord.HasPulseCall());
+ self.assertFalse(virtualRecord.HasPulseCall());
+
+ self.assertEqual(polyRecord.FullName(), virtualRecord.FullName());
+ self.assertEqual(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
+ self.assertEqual(polyRecord.NumPasses(), virtualRecord.NumPasses());
+ self.assertEqual(polyRecord.Sequence(), virtualRecord.Sequence());
+ self.assertEqual(polyRecord.DeletionTag(), virtualRecord.DeletionTag());
+ self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+ self.assertEqual(polyRecord.IPD(), virtualRecord.IPDV1Frames());
+ self.assertEqual(polyRecord.ReadGroup(), virtualRecord.ReadGroup());
+
+ self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy()));
+
+ self.assertEqual(polyRecord.Qualities().Fastq(), virtualRecord.Qualities().Fastq());
+ self.assertEqual(polyRecord.DeletionQV().Fastq(), virtualRecord.DeletionQV().Fastq());
+ self.assertEqual(polyRecord.InsertionQV().Fastq(), virtualRecord.InsertionQV().Fastq());
+ self.assertEqual(polyRecord.MergeQV().Fastq(), virtualRecord.MergeQV().Fastq());
+ self.assertEqual(polyRecord.SubstitutionQV().Fastq(), virtualRecord.SubstitutionQV().Fastq());
+
+ self.assertTrue(polyRecord.HasDeletionQV());
+ self.assertTrue(polyRecord.HasDeletionTag());
+ self.assertTrue(polyRecord.HasInsertionQV());
+ self.assertTrue(polyRecord.HasMergeQV());
+ self.assertTrue(polyRecord.HasSubstitutionQV());
+ self.assertTrue(polyRecord.HasSubstitutionTag());
+ self.assertTrue(polyRecord.HasIPD());
+ self.assertFalse(polyRecord.HasLabelQV());
+ self.assertFalse(polyRecord.HasAltLabelQV());
+ self.assertFalse(polyRecord.HasAltLabelTag());
+ self.assertFalse(polyRecord.HasPkmean());
+ self.assertFalse(polyRecord.HasPkmid());
+ self.assertFalse(polyRecord.HasPulseCall());
+ self.assertFalse(polyRecord.HasPulseWidth());
+ self.assertFalse(polyRecord.HasPrePulseFrames());
+ self.assertFalse(polyRecord.HasPulseCallWidth());
+
+ self.assertTrue(virtualRecord.HasDeletionQV());
+ self.assertTrue(virtualRecord.HasDeletionTag());
+ self.assertTrue(virtualRecord.HasInsertionQV());
+ self.assertTrue(virtualRecord.HasMergeQV());
+ self.assertTrue(virtualRecord.HasSubstitutionQV());
+ self.assertTrue(virtualRecord.HasSubstitutionTag());
+ self.assertTrue(virtualRecord.HasIPD());
+ self.assertFalse(virtualRecord.HasLabelQV());
+ self.assertFalse(virtualRecord.HasAltLabelQV());
+ self.assertFalse(virtualRecord.HasAltLabelTag());
+ self.assertFalse(virtualRecord.HasPkmean());
+ self.assertFalse(virtualRecord.HasPkmid());
+ self.assertFalse(virtualRecord.HasPulseCall());
+ self.assertFalse(virtualRecord.HasPulseWidth());
+ self.assertFalse(virtualRecord.HasPrePulseFrames());
+ self.assertFalse(virtualRecord.HasPulseCallWidth());
+
+ # ------------ HELPERS --------------
+
+ def compare(self, b1, b2):
+
+ self.assertTrue(b1.HasDeletionQV());
+ self.assertTrue(b1.HasDeletionTag());
+ self.assertTrue(b1.HasInsertionQV());
+ self.assertTrue(b1.HasMergeQV());
+ self.assertTrue(b1.HasSubstitutionQV());
+ self.assertTrue(b1.HasSubstitutionTag());
+ self.assertTrue(b1.HasLabelQV());
+ self.assertTrue(b1.HasAltLabelQV());
+ self.assertTrue(b1.HasAltLabelTag());
+ self.assertTrue(b1.HasPkmean());
+ self.assertTrue(b1.HasPkmid());
+ self.assertTrue(b1.HasPulseCall());
+ self.assertTrue(b1.HasIPD());
+ self.assertTrue(b1.HasPulseWidth());
+ self.assertTrue(b1.HasPrePulseFrames());
+ self.assertTrue(b1.HasPulseCallWidth());
+ self.assertTrue(b1.HasPulseMergeQV());
+
+ self.assertTrue(b2.HasDeletionQV());
+ self.assertTrue(b2.HasDeletionTag());
+ self.assertTrue(b2.HasInsertionQV());
+ self.assertTrue(b2.HasMergeQV());
+ self.assertTrue(b2.HasSubstitutionQV());
+ self.assertTrue(b2.HasSubstitutionTag());
+ self.assertTrue(b2.HasLabelQV());
+ self.assertTrue(b2.HasAltLabelQV());
+ self.assertTrue(b2.HasAltLabelTag());
+ self.assertTrue(b2.HasPkmean());
+ self.assertTrue(b2.HasPkmid());
+ self.assertTrue(b2.HasPulseCall());
+ self.assertTrue(b2.HasIPD());
+ self.assertTrue(b2.HasPulseWidth());
+ self.assertTrue(b2.HasPrePulseFrames());
+ self.assertTrue(b2.HasPulseCallWidth());
+ self.assertTrue(b2.HasPulseMergeQV());
+
+ self.assertEqual(b1.FullName(), b2.FullName());
+ self.assertEqual(b1.HoleNumber(), b2.HoleNumber());
+ self.assertEqual(b1.NumPasses(), b2.NumPasses());
+ self.assertEqual(b1.Sequence(), b2.Sequence());
+ self.assertEqual(b1.DeletionTag(), b2.DeletionTag());
+ self.assertEqual(b1.SubstitutionTag(), b2.SubstitutionTag());
+ self.assertEqual(b1.AltLabelTag(), b2.AltLabelTag());
+ self.assertEqual(b1.Pkmean(), b2.Pkmean());
+ self.assertEqual(b1.Pkmid(), b2.Pkmid());
+ self.assertEqual(b1.PulseCall(), b2.PulseCall());
+ self.assertEqual(b1.IPD(), b2.IPD());
+ self.assertEqual(b1.PulseWidth(), b2.PulseWidth());
+ self.assertEqual(b1.PrePulseFrames(), b2.PrePulseFrames());
+ self.assertEqual(b1.PulseCallWidth(), b2.PulseCallWidth());
+ self.assertEqual(b1.ReadGroup(), b2.ReadGroup());
+
+ self.assertEqual(b1.Qualities().Fastq(), b2.Qualities().Fastq());
+ self.assertEqual(b1.DeletionQV().Fastq(), b2.DeletionQV().Fastq());
+ self.assertEqual(b1.InsertionQV().Fastq(), b2.InsertionQV().Fastq());
+ self.assertEqual(b1.MergeQV().Fastq(), b2.MergeQV().Fastq());
+ self.assertEqual(b1.SubstitutionQV().Fastq(), b2.SubstitutionQV().Fastq());
+ self.assertEqual(b1.PulseMergeQV().Fastq(), b2.PulseMergeQV().Fastq());
+ self.assertEqual(b1.LabelQV().Fastq(), b2.LabelQV().Fastq());
+ self.assertEqual(b1.AltLabelQV().Fastq(), b2.AltLabelQV().Fastq());
+
+
diff --git a/tests/src/test_Accuracy.cpp b/tests/src/test_Accuracy.cpp
index 17d1f59..9750dd4 100644
--- a/tests/src/test_Accuracy.cpp
+++ b/tests/src/test_Accuracy.cpp
@@ -47,17 +47,17 @@ using namespace std;
TEST(AccuracyTest, ClampValues)
{
- Accuracy a_zero(0);
- Accuracy a_neg(-1);
- Accuracy a_min(0);
- Accuracy a_normal(300);
- Accuracy a_max(1000);
- Accuracy a_tooLarge(2000);
+ Accuracy a_zero(0.0);
+ Accuracy a_neg(-0.5);
+ Accuracy a_min(0.0);
+ Accuracy a_normal(0.9);
+ Accuracy a_max(1.0);
+ Accuracy a_tooLarge(1.1);
- EXPECT_EQ(0, a_zero);
- EXPECT_EQ(0, a_neg);
- EXPECT_EQ(0, a_min);
- EXPECT_EQ(300, a_normal);
- EXPECT_EQ(1000, a_max);
- EXPECT_EQ(1000, a_tooLarge);
+ EXPECT_FLOAT_EQ(0.0, a_zero);
+ EXPECT_FLOAT_EQ(0.0, a_neg);
+ EXPECT_FLOAT_EQ(0.0, a_min);
+ EXPECT_FLOAT_EQ(0.9, a_normal);
+ EXPECT_FLOAT_EQ(1.0, a_max);
+ EXPECT_FLOAT_EQ(1.0, a_tooLarge);
}
diff --git a/tests/src/test_AlignmentPrinter.cpp b/tests/src/test_AlignmentPrinter.cpp
index 0034f2d..9aa7f5b 100644
--- a/tests/src/test_AlignmentPrinter.cpp
+++ b/tests/src/test_AlignmentPrinter.cpp
@@ -62,41 +62,91 @@ const string singleInsertionBam = tests::Data_Dir + "/aligned.bam";
TEST(AlignmentPrinterTest, Print)
{
IndexedFastaReader r(lambdaFasta);
+ AlignmentPrinter pretty(r);
BamFile bamFile(singleInsertionBam);
EntireFileQuery bamQuery(bamFile);
-
auto it = bamQuery.begin();
-
- // std::cerr << record.AlignedStart() << std::endl;
- // std::cerr << record.Sequence(Orientation::GENOMIC, true) << std::endl;
- // std::cerr << record.Sequence(Orientation::GENOMIC, true, true) << std::endl;
+ // funky formatting used to format alignments
+ auto expected = string
+ {
+ "Read : singleInsertion2\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 49\n"
+ "Concordance : 0.96\n"
+ "\n"
+ "5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249\n"
+ " \x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| ||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG : 39\n"
+ "\n"
+ "5249 : ACTGGCTGAT : 5259\n"
+ " |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 39 : ACTGGCTGAT : 49\n"
+ "\n"
+ };
- AlignmentPrinter pretty(r);
+ auto record = *it++;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
- // std::string expected =
- // "Read : singleInsertion2\n"
- // "Reference : lambda_NEB3011\n"
- // "\n"
- // "Read-length : 49\n"
- // "Concordance : 0.96\n"
- // "\n"
- // " GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT\n"
- // " |||||||| ||||||||||||||||||| |||||||||||||||||||||\n"
- // " GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGGACTGGCTGAT\n";
- // EXPECT_EQ(expected, pretty.Print(record, Orientation::NATIVE));
+ expected = {
+ "Read : singleInsertion\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 49\n"
+ "Concordance : 0.96\n"
+ "\n"
+ "5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249\n"
+ " \x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| ||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG : 39\n"
+ "\n"
+ "5249 : ACTGGCTGAT : 5259\n"
+ " |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 39 : ACTGGCTGAT : 49\n"
+ "\n"
+ };
- auto record = *it++;
- std::cerr << pretty.Print(record, Orientation::GENOMIC);
- std::cerr << std::endl << std::endl;
record = *it++;
- std::cerr << pretty.Print(record, Orientation::GENOMIC);
- std::cerr << std::endl << std::endl;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+ expected = {
+ "Read : singleInsertion2\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 59\n"
+ "Concordance : 0.951\n"
+ "\n"
+ "9377 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCG : 9417\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||| |\n"
+ " 0 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGA--G : 38\n"
+ "\n"
+ "9417 : CAGCACGGT-AACAGCGGCAA : 9437\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||| ||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||\n"
+ " 38 : CAGCACGGTAAACAGCGGCAA : 59\n"
+ "\n"
+ };
+
record = *it++;
- std::cerr << pretty.Print(record, Orientation::GENOMIC);
- std::cerr << std::endl << std::endl;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+ expected = {
+ "Read : singleInsertion\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 59\n"
+ "Concordance : 0.951\n"
+ "\n"
+ "9377 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCG : 9417\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||| |\n"
+ " 0 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGA--G : 38\n"
+ "\n"
+ "9417 : CAGCACGGT-AACAGCGGCAA : 9437\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||| ||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||\n"
+ " 38 : CAGCACGGTAAACAGCGGCAA : 59\n"
+ "\n"
+ };
+
record = *it++;
- std::cerr << pretty.Print(record, Orientation::GENOMIC);
- std::cerr << std::endl << std::endl;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
}
diff --git a/tests/src/test_BamFile.cpp b/tests/src/test_BamFile.cpp
index 4ca910d..c8f1be8 100644
--- a/tests/src/test_BamFile.cpp
+++ b/tests/src/test_BamFile.cpp
@@ -42,7 +42,11 @@
#include "TestData.h"
#include <gtest/gtest.h>
#include <pbbam/BamFile.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/../../src/FileUtils.h>
#include <stdexcept>
+#include <cstdlib>
+#include <unistd.h>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -67,3 +71,107 @@ TEST(BamFileTest, NonBamFileThrows)
},
std::exception);
}
+
+TEST(BamFileTest, RelativePathBamOk)
+{
+ const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+ ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+ ASSERT_EQ(0, chdir("relative/a"));
+
+ { // direct BAM
+ BamFile file("../b/test1.bam");
+ EntireFileQuery entireFile(file);
+ int count = 0;
+ for (const BamRecord& r : entireFile) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(10, count);
+ }
+
+ { // dataset from BAM filename
+ DataSet ds("../b/test1.bam");
+ EntireFileQuery entireFile(ds);
+ int count = 0;
+ for (const BamRecord& r : entireFile) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(10, count);
+ }
+
+ { // dataset from BamFile object
+ BamFile file("../b/test1.bam");
+ DataSet ds(file);
+ EntireFileQuery entireFile(ds);
+ int count = 0;
+ for (const BamRecord& r : entireFile) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(10, count);
+ }
+
+ ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, RelativePathXmlOk)
+{
+ const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+
+ ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+
+ {
+ DataSet ds("relative/relative.xml");
+ EntireFileQuery entireFile(ds);
+ int count = 0;
+ for (const BamRecord& r : entireFile) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(30, count);
+ }
+
+ ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, RelativePathFofnOk)
+{
+ const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+ ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+
+ { // FOFN containing BAMs in different subdirs
+
+ DataSet ds("relative/relative.fofn");
+ EntireFileQuery entireFile(ds);
+ int count = 0;
+ for (const BamRecord& r : entireFile) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(30, count);
+ }
+
+ // NOTE: doesn't yet support a FOFN containing an XML with relative paths
+
+// { // FOFN containing subdir BAMs + relative.xml
+
+// DataSet ds("relative/relative2.fofn");
+// EntireFileQuery entireFile(ds);
+// int count = 0;
+// for (const BamRecord& r : entireFile) {
+// (void)r;
+// ++count;
+// }
+// EXPECT_EQ(60, count);
+// }
+
+ ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, TruncatedFileThrowsOk)
+{
+ const string fn = tests::Data_Dir + "/truncated.bam";
+ EXPECT_THROW(BamFile file(fn), std::runtime_error);
+}
+
diff --git a/tests/src/test_BamHeader.cpp b/tests/src/test_BamHeader.cpp
index c4f872a..b4e9fd6 100644
--- a/tests/src/test_BamHeader.cpp
+++ b/tests/src/test_BamHeader.cpp
@@ -82,7 +82,7 @@ TEST(BamHeaderTest, DefaultConstruction)
TEST(BamHeaderTest, DecodeTest)
{
- const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+ const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
"@RG\tID:rg1\tSM:control\n"
@@ -96,7 +96,7 @@ TEST(BamHeaderTest, DecodeTest)
EXPECT_EQ(string("1.1"), header.Version());
EXPECT_EQ(string("queryname"), header.SortOrder());
- EXPECT_EQ(string("3.0b3"), header.PacBioBamVersion());
+ EXPECT_EQ(string("3.0.1"), header.PacBioBamVersion());
EXPECT_EQ(3, header.ReadGroups().size());
EXPECT_TRUE(header.HasReadGroup("rg1"));
@@ -124,7 +124,51 @@ TEST(BamHeaderTest, DecodeTest)
EXPECT_EQ(string("citation needed"), header.Comments().at(1));
}
-TEST(BamHeaderCodecTest, EncodeTest)
+TEST(BamHeaderTest, VersionCheckOk)
+{
+
+ // empty
+ EXPECT_THROW({
+ const string text = "@HD\tVN:1.1\tSO:queryname\tpb:\n";
+ BamHeader h(text);
+ (void)h;
+ }, std::runtime_error);
+
+ // old beta version(s)
+ EXPECT_THROW({
+ const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n";
+ BamHeader h(text);
+ (void)h;
+ }, std::runtime_error);
+ EXPECT_THROW({
+ const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b7\n";
+ BamHeader h(text);
+ (void)h;
+ }, std::runtime_error);
+
+ // contains other, invalid info
+ EXPECT_THROW({
+ const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.should_not_work\n";
+ BamHeader h(text);
+ (void)h;
+ }, std::runtime_error);
+
+ // valid syntax, but earlier than minimum allowed version
+ EXPECT_THROW({
+ const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.0\n";
+ BamHeader h(text);
+ (void)h;
+ }, std::runtime_error);
+
+ // correct version syntax, number
+ EXPECT_NO_THROW({
+ const string text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n";
+ BamHeader h(text);
+ (void)h;
+ });
+}
+
+TEST(BamHeaderTest, EncodeTest)
{
ReadGroupInfo rg1("rg1");
rg1.Sample("control");
@@ -144,7 +188,7 @@ TEST(BamHeaderCodecTest, EncodeTest)
BamHeader header;
header.Version("1.1")
.SortOrder("queryname")
- .PacBioBamVersion("3.0b3")
+ .PacBioBamVersion("3.0.1")
.AddReadGroup(rg1)
.AddReadGroup(rg2)
.AddReadGroup(rg3)
@@ -154,12 +198,12 @@ TEST(BamHeaderCodecTest, EncodeTest)
.AddComment("ipsum and so on")
.AddComment("citation needed");
- const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+ const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
- "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
- "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
- "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
"@PG\tID:_foo_\tPN:ide\n"
"@CO\tipsum and so on\n"
"@CO\tcitation needed\n";
@@ -188,7 +232,7 @@ TEST(BamHeaderTest, ConvertToRawDataOk)
BamHeader header;
header.Version("1.1")
.SortOrder("queryname")
- .PacBioBamVersion("3.0b3")
+ .PacBioBamVersion("3.0.1")
.AddReadGroup(rg1)
.AddReadGroup(rg2)
.AddReadGroup(rg3)
@@ -198,12 +242,12 @@ TEST(BamHeaderTest, ConvertToRawDataOk)
.AddComment("ipsum and so on")
.AddComment("citation needed");
- const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+ const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
- "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
- "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
- "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
"@PG\tID:_foo_\tPN:ide\n"
"@CO\tipsum and so on\n"
"@CO\tcitation needed\n";
@@ -241,7 +285,7 @@ TEST(BamHeaderTest, ExtractFromRawDataOk)
BamHeader header;
header.Version("1.1")
.SortOrder("queryname")
- .PacBioBamVersion("3.0b3")
+ .PacBioBamVersion("3.0.1")
.AddReadGroup(rg1)
.AddReadGroup(rg2)
.AddReadGroup(rg3)
@@ -251,12 +295,12 @@ TEST(BamHeaderTest, ExtractFromRawDataOk)
.AddComment("ipsum and so on")
.AddComment("citation needed");
- const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n"
+ const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
"@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
"@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
- "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\n"
- "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
- "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
"@PG\tID:_foo_\tPN:ide\n"
"@CO\tipsum and so on\n"
"@CO\tcitation needed\n";
@@ -279,3 +323,139 @@ TEST(BamHeaderTest, ExtractFromRawDataOk)
text = newHeader.ToSam();
EXPECT_EQ(expectedText, text);
}
+
+TEST(BamHeaderTest, MergeOk)
+{
+ const string hdrText1 = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+ "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
+ "PM:SEQUEL\n"
+ "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+ "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+ "@CO\tcomment1\n"
+ };
+
+ const string hdrText2 = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
+ "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
+ "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
+ "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
+ "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
+ "PM:SEQUEL\n"
+ "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
+ "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
+ "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
+ "@CO\tcomment2\n"
+ };
+
+ const string mergedText = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+ "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
+ "PM:SEQUEL\n"
+ "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
+ "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
+ "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
+ "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
+ "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
+ "PM:SEQUEL\n"
+ "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+ "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+ "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
+ "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
+ "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
+ "@CO\tcomment1\n"
+ "@CO\tcomment2\n"
+ };
+
+ { // operator+
+
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ const BamHeader merged = header1 + header2;
+ EXPECT_EQ(mergedText, merged.ToSam());
+
+ // also make sure inputs not changed
+ EXPECT_EQ(hdrText1, header1.ToSam());
+ EXPECT_EQ(hdrText2, header2.ToSam());
+ }
+
+ { // operator+=
+
+ BamHeader header1(hdrText1);
+ header1 += BamHeader(hdrText2);
+ EXPECT_EQ(mergedText, header1.ToSam());
+ }
+}
+
+TEST(BamHeaderTest, MergeHandlesDuplicateReadGroups)
+{
+ const string hdrText = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+ "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\tPM:SEQUEL\n"
+ "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+ "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+ };
+
+ // duplicate @RG:IDs handled ok (i.e. not duplicated in output)
+
+ const BamHeader header1(hdrText);
+ const BamHeader header2(hdrText);
+ const BamHeader merged = header1 + header2;
+ EXPECT_EQ(hdrText, merged.ToSam());
+}
+
+TEST(BamHeaderTest, IncompatibleMergeFails)
+{
+ { // @HD:VN
+ const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+ const string hdrText2 = { "@HD\tVN:1.0\tSO:unknown\tpb:3.0.1\n" };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_THROW(header1 + header2, std::runtime_error);
+ }
+
+ { // @HD:SO
+ const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+ const string hdrText2 = { "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n" };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_THROW(header1 + header2, std::runtime_error);
+ }
+
+ { // @HD:pb
+ const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+ const string hdrText2 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.3\n" };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_THROW(header1 + header2, std::runtime_error);
+ }
+
+ { // @SQ list clash
+
+ const string hdrText1 = {
+ "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:foo\tLN:42\n"
+ "@SQ\tSN:bar\tLN:24\n"
+ };
+ const string hdrText2 = {
+ "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:foo\tLN:42\n"
+ "@SQ\tSN:baz\tLN:99\n"
+ };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_THROW(header1 + header2, std::runtime_error);
+ }
+}
diff --git a/tests/src/test_BamRecord.cpp b/tests/src/test_BamRecord.cpp
index e32ad2e..a166044 100644
--- a/tests/src/test_BamRecord.cpp
+++ b/tests/src/test_BamRecord.cpp
@@ -223,6 +223,9 @@ TEST(BamRecordTest, DefaultValues)
EXPECT_EQ(-1, bam.AlignedStart());
EXPECT_EQ(-1, bam.AlignedEnd());
EXPECT_THROW(bam.HoleNumber(), std::exception);
+ EXPECT_FALSE(bam.HasNumPasses());
+ EXPECT_THROW(bam.NumPasses(), std::exception);
+
// 8888888888888888888888888888888888888
// EXPECT_EQ(-1, bam.NumPasses());
// EXPECT_EQ(-1, bam.QueryStart());
@@ -442,6 +445,7 @@ TEST(BamRecordTest, CoreSetters)
// EXPECT_EQ(42, bam.HoleNumber());
// EXPECT_EQ(testQVs, bam.InsertionQVs());
// EXPECT_EQ(testQVs, bam.MergeQVs());
+
// EXPECT_EQ(42, bam.NumPasses());
// EXPECT_EQ(42, bam.QueryEnd());
// EXPECT_EQ(42, bam.QueryStart());
diff --git a/tests/src/test_BamRecordClipping.cpp b/tests/src/test_BamRecordClipping.cpp
index 425b1e5..5193868 100644
--- a/tests/src/test_BamRecordClipping.cpp
+++ b/tests/src/test_BamRecordClipping.cpp
@@ -1185,3 +1185,27 @@ TEST(BamRecordClippingTest, StaticClippedToReference)
// EXPECT_EQ(s3_tagQuals_clipped, s3.AltLabelQV(Orientation::GENOMIC).Fastq());
EXPECT_EQ(s3_frames_clipped, s3.IPD(Orientation::GENOMIC).Data());
}
+
+TEST(BamRecordTest, ClipCigarData)
+{
+ const Position qStart = 500;
+ const Position qEnd = 515;
+ const string seq = "TTAACCGTTAGCAAA";
+ const string quals = "--?]?]?]?]?*+++";
+ const string tagBases = "TTAACCGTTAGCAAA";
+ const string tagQuals = "--?]?]?]?]?*+++";
+ const f_data frames = { 40, 40, 10, 10, 20, 20, 30, 40, 40, 10, 30, 20, 10, 10, 10 };
+ const uint8_t mapQual = 80;
+ BamRecord s3 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+ const string s3_cigar = "5H2S4=1D2I2D4=3S7H";
+ s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+ s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+ const Cigar s3_cigar_raw = s3.CigarData();
+ const Cigar s3_cigar_clipped = s3.CigarData(true);
+
+ EXPECT_EQ(s3_cigar, s3_cigar_raw.ToStdString());
+ EXPECT_EQ(string("4=1D2I2D4="), s3_cigar_clipped.ToStdString());
+}
diff --git a/tests/src/test_BamWriter.cpp b/tests/src/test_BamWriter.cpp
index 320c698..37b4b40 100644
--- a/tests/src/test_BamWriter.cpp
+++ b/tests/src/test_BamWriter.cpp
@@ -46,6 +46,9 @@
#include <pbbam/BamRecord.h>
#include <pbbam/BamWriter.h>
#include <pbbam/EntireFileQuery.h>
+
+#include <pbbam/../../src/FileUtils.h>
+
#include <chrono>
#include <iostream>
#include <string>
@@ -196,3 +199,27 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
remove(generatedBamFn.c_str());
}
+
+//static
+//void CreateBamFile(const string& filename)
+//{
+// if (internal::FileUtils::Exists(filename))
+// return;
+
+// BamHeader header;
+// BamWriter writer(filename, header);
+
+// BamRecord r;
+// for (int i = 0; i < 10; ++i) {
+// writer.Write(r);
+// }
+//}
+
+
+//TEST(BamWriterTest, CreateBAMs)
+//{
+// const string relativeDir = tests::Data_Dir + "/relative";
+// CreateBamFile(relativeDir + "/a/test.bam");
+// CreateBamFile(relativeDir + "/b/test1.bam");
+// CreateBamFile(relativeDir + "/b/test2.bam");
+//}
diff --git a/src/QueryBase.cpp b/tests/src/test_BarcodeQuery.cpp
similarity index 89%
rename from src/QueryBase.cpp
rename to tests/src/test_BarcodeQuery.cpp
index 8bd2a53..6ec02a8 100644
--- a/src/QueryBase.cpp
+++ b/tests/src/test_BarcodeQuery.cpp
@@ -35,14 +35,19 @@
// Author: Derek Barnett
-#include "pbbam/QueryBase.h"
-#include "pbbam/BamRecord.h"
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BarcodeQuery.h>
+#include <string>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
-QueryBase::QueryBase(const BamFile& file)
- : file_(file)
-{ }
-
-QueryBase::~QueryBase(void) { }
+TEST(BarcodeQueryTest, QueryOk)
+{
+ // come back with barcoded data
+}
diff --git a/tests/src/test_Compare.cpp b/tests/src/test_Compare.cpp
new file mode 100644
index 0000000..b92a0a7
--- /dev/null
+++ b/tests/src/test_Compare.cpp
@@ -0,0 +1,739 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/Compare.h>
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+static inline
+BamRecord makeRecordWithTag(const string& tagName,
+ const Tag& tag)
+{
+ auto r = BamRecord{ };
+ r.Impl().AddTag(tagName, tag);
+ return r;
+}
+
+static
+BamRecord makeRecord(const Position qStart,
+ const Position qEnd,
+ const string& seq,
+ const string& quals,
+ const string& tagBases,
+ const string& tagQuals,
+ const vector<uint16_t>& frames)
+{
+ BamRecordImpl impl;
+ impl.SetSequenceAndQualities(seq, quals);
+
+ TagCollection tags;
+ tags["qs"] = qStart;
+ tags["qe"] = qEnd;
+ tags["ip"] = frames;
+ tags["pw"] = frames;
+ tags["dt"] = tagBases;
+ tags["st"] = tagBases;
+ tags["dq"] = tagQuals;
+ tags["iq"] = tagQuals;
+ tags["mq"] = tagQuals;
+ tags["sq"] = tagQuals;
+ tags["pq"] = tagQuals;
+ tags["pv"] = tagQuals;
+ impl.Tags(tags);
+
+ return BamRecord(std::move(impl));
+}
+
+static
+std::vector<BamRecord> makeMappedRecords(void)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const vector<uint16_t> frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const uint8_t mapQual = 80;
+
+ const string s1_cigar = "10=";
+ const string s2_cigar = "5=3D5=";
+ const string s3_cigar = "4=1D2I2D2X2=";
+
+ BamRecord s1 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s1_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+ s1.Map(0, 100, Strand::FORWARD, s1_cigar, mapQual);
+ s2.Map(0, 100, Strand::FORWARD, s2_cigar, mapQual);
+ s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+ s1_rev.Map(0, 100, Strand::REVERSE, s1_cigar, mapQual);
+ s2_rev.Map(0, 100, Strand::REVERSE, s2_cigar, mapQual);
+ s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+ return std::vector<BamRecord> { s1, s2, s3, s1_rev, s2_rev, s3_rev };
+}
+
+} // namespace tests
+
+TEST(CompareTest, TypeToNameOk)
+{
+ EXPECT_EQ(string{"Compare::EQUAL"}, Compare::TypeToName(Compare::EQUAL));
+ EXPECT_EQ(string{"Compare::NOT_EQUAL"}, Compare::TypeToName(Compare::NOT_EQUAL));
+ EXPECT_EQ(string{"Compare::LESS_THAN"}, Compare::TypeToName(Compare::LESS_THAN));
+ EXPECT_EQ(string{"Compare::LESS_THAN_EQUAL"}, Compare::TypeToName(Compare::LESS_THAN_EQUAL));
+ EXPECT_EQ(string{"Compare::GREATER_THAN"}, Compare::TypeToName(Compare::GREATER_THAN));
+ EXPECT_EQ(string{"Compare::GREATER_THAN_EQUAL"}, Compare::TypeToName(Compare::GREATER_THAN_EQUAL));
+ EXPECT_EQ(string{"Compare::CONTAINS"}, Compare::TypeToName(Compare::CONTAINS));
+ EXPECT_EQ(string{"Compare::NOT_CONTAINS"}, Compare::TypeToName(Compare::NOT_CONTAINS));
+
+ // invalid type throws
+ EXPECT_THROW(Compare::TypeToName(static_cast<Compare::Type>(42)), std::runtime_error);
+}
+
+TEST(CompareTest, TypeToOperatorOk)
+{
+ { // normal
+ EXPECT_EQ(Compare::TypeToOperator(Compare::EQUAL), string{"=="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_EQUAL), string{"!="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN), string{"<"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN_EQUAL), string{"<="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN), string{">"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN_EQUAL), string{">="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::CONTAINS), string{"&"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_CONTAINS), string{"~"});
+ }
+
+ { // alpha
+ EXPECT_EQ(Compare::TypeToOperator(Compare::EQUAL, true), string{"eq"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_EQUAL, true), string{"ne"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN, true), string{"lt"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN_EQUAL, true), string{"lte"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN, true), string{"gt"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN_EQUAL, true), string{"gte"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::CONTAINS, true), string{"and"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_CONTAINS, true), string{"not"});
+ }
+
+ // invalid type throws
+ EXPECT_THROW(Compare::TypeToOperator(static_cast<Compare::Type>(42)), std::runtime_error);
+}
+
+TEST(CompareTest, FromOperatorOk)
+{
+ EXPECT_EQ(Compare::EQUAL, Compare::TypeFromOperator("=="));
+ EXPECT_EQ(Compare::EQUAL, Compare::TypeFromOperator("="));
+ EXPECT_EQ(Compare::EQUAL, Compare::TypeFromOperator("eq"));
+ EXPECT_EQ(Compare::NOT_EQUAL, Compare::TypeFromOperator("!="));
+ EXPECT_EQ(Compare::NOT_EQUAL, Compare::TypeFromOperator("ne"));
+ EXPECT_EQ(Compare::LESS_THAN, Compare::TypeFromOperator("<"));
+ EXPECT_EQ(Compare::LESS_THAN, Compare::TypeFromOperator("lt"));
+ EXPECT_EQ(Compare::LESS_THAN, Compare::TypeFromOperator("<"));
+ EXPECT_EQ(Compare::LESS_THAN_EQUAL, Compare::TypeFromOperator("<="));
+ EXPECT_EQ(Compare::LESS_THAN_EQUAL, Compare::TypeFromOperator("lte"));
+ EXPECT_EQ(Compare::LESS_THAN_EQUAL, Compare::TypeFromOperator("<="));
+ EXPECT_EQ(Compare::GREATER_THAN, Compare::TypeFromOperator(">"));
+ EXPECT_EQ(Compare::GREATER_THAN, Compare::TypeFromOperator("gt"));
+ EXPECT_EQ(Compare::GREATER_THAN, Compare::TypeFromOperator(">"));
+ EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator(">="));
+ EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator("gte"));
+ EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator(">="));
+ EXPECT_EQ(Compare::CONTAINS, Compare::TypeFromOperator("&"));
+ EXPECT_EQ(Compare::NOT_CONTAINS, Compare::TypeFromOperator("~"));
+
+ // invalid operator strings throw
+ EXPECT_THROW(Compare::TypeFromOperator(""), std::runtime_error);
+ EXPECT_THROW(Compare::TypeFromOperator("invalid"), std::runtime_error);
+}
+
+TEST(CompareTest, AlignedEndOk)
+{
+ BamRecord r1; r1.alignedEnd_ = 300;
+ BamRecord r2; r2.alignedEnd_ = 200;
+ BamRecord r3; r3.alignedEnd_ = 400;
+ BamRecord r4; r4.alignedEnd_ = 100;
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::AlignedEnd());
+
+ EXPECT_EQ(r4.alignedEnd_, records.at(0).AlignedEnd());
+ EXPECT_EQ(r2.alignedEnd_, records.at(1).AlignedEnd());
+ EXPECT_EQ(r1.alignedEnd_, records.at(2).AlignedEnd());
+ EXPECT_EQ(r3.alignedEnd_, records.at(3).AlignedEnd());
+}
+
+TEST(CompareTest, AlignedStartOk)
+{
+ BamRecord r1; r1.alignedStart_ = 300;
+ BamRecord r2; r2.alignedStart_ = 200;
+ BamRecord r3; r3.alignedStart_ = 400;
+ BamRecord r4; r4.alignedStart_ = 100;
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::AlignedStart());
+
+ EXPECT_EQ(r4.alignedStart_, records.at(0).AlignedStart());
+ EXPECT_EQ(r2.alignedStart_, records.at(1).AlignedStart());
+ EXPECT_EQ(r1.alignedStart_, records.at(2).AlignedStart());
+ EXPECT_EQ(r3.alignedStart_, records.at(3).AlignedStart());
+}
+
+TEST(CompareTest, AlignedStrandOk)
+{
+ BamRecord r1; r1.Impl().SetReverseStrand(true);
+ BamRecord r2; r2.Impl().SetReverseStrand(false);
+ BamRecord r3; r3.Impl().SetReverseStrand(true);
+ BamRecord r4; r4.Impl().SetReverseStrand(false);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::AlignedStrand());
+
+ EXPECT_EQ(Strand::FORWARD, records.at(0).AlignedStrand());
+ EXPECT_EQ(Strand::FORWARD, records.at(1).AlignedStrand());
+ EXPECT_EQ(Strand::REVERSE, records.at(2).AlignedStrand());
+ EXPECT_EQ(Strand::REVERSE, records.at(3).AlignedStrand());
+}
+
+TEST(CompareTest, BarcodeForwardOk)
+{
+ BamRecord r1; r1.Barcodes(std::make_pair<uint16_t,uint16_t>(30,20));
+ BamRecord r2; r2.Barcodes(std::make_pair<uint16_t,uint16_t>(20,30));
+ BamRecord r3; r3.Barcodes(std::make_pair<uint16_t,uint16_t>(40,10));
+ BamRecord r4; r4.Barcodes(std::make_pair<uint16_t,uint16_t>(10,40));
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::BarcodeForward());
+
+ EXPECT_EQ(r4.BarcodeForward(), records.at(0).BarcodeForward());
+ EXPECT_EQ(r2.BarcodeForward(), records.at(1).BarcodeForward());
+ EXPECT_EQ(r1.BarcodeForward(), records.at(2).BarcodeForward());
+ EXPECT_EQ(r3.BarcodeForward(), records.at(3).BarcodeForward());
+}
+
+TEST(CompareTest, BarcodeReverseOk)
+{
+ BamRecord r1; r1.Barcodes(std::make_pair<uint16_t,uint16_t>(30,20));
+ BamRecord r2; r2.Barcodes(std::make_pair<uint16_t,uint16_t>(20,30));
+ BamRecord r3; r3.Barcodes(std::make_pair<uint16_t,uint16_t>(40,10));
+ BamRecord r4; r4.Barcodes(std::make_pair<uint16_t,uint16_t>(10,40));
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::BarcodeReverse());
+
+ EXPECT_EQ(r3.BarcodeReverse(), records.at(0).BarcodeReverse());
+ EXPECT_EQ(r1.BarcodeReverse(), records.at(1).BarcodeReverse());
+ EXPECT_EQ(r2.BarcodeReverse(), records.at(2).BarcodeReverse());
+ EXPECT_EQ(r4.BarcodeReverse(), records.at(3).BarcodeReverse());
+}
+
+TEST(CompareTest, BarcodeQualityOk)
+{
+ uint8_t q1 = 30;
+ uint8_t q2 = 20;
+ uint8_t q3 = 40;
+ uint8_t q4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("bq", Tag(q1)),
+ tests::makeRecordWithTag("bq", Tag(q2)),
+ tests::makeRecordWithTag("bq", Tag(q3)),
+ tests::makeRecordWithTag("bq", Tag(q4))
+ };
+ std::sort(records.begin(), records.end(), Compare::BarcodeQuality());
+
+ EXPECT_EQ(q4, records.at(0).BarcodeQuality());
+ EXPECT_EQ(q2, records.at(1).BarcodeQuality());
+ EXPECT_EQ(q1, records.at(2).BarcodeQuality());
+ EXPECT_EQ(q3, records.at(3).BarcodeQuality());
+}
+
+TEST(CompareTest, CustomCompareOk)
+{
+ struct CustomCompare : public Compare::MemberFunctionBase<bool, &BamRecord::HasDeletionTag> { };
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("dt", Tag(string("foo"))),
+ tests::makeRecordWithTag("dt", Tag(string("foo"))),
+ tests::makeRecordWithTag("dt", Tag(string("foo"))),
+ tests::makeRecordWithTag("dt", Tag(string("foo")))
+ };
+ records.push_back(BamRecord());
+ records.push_back(BamRecord());
+ records.push_back(BamRecord());
+ records.push_back(BamRecord());
+ EXPECT_EQ(8, records.size());
+
+ std::sort(records.begin(), records.end(), CustomCompare());
+
+ EXPECT_FALSE(records.at(0).HasDeletionTag());
+ EXPECT_FALSE(records.at(1).HasDeletionTag());
+ EXPECT_FALSE(records.at(2).HasDeletionTag());
+ EXPECT_FALSE(records.at(3).HasDeletionTag());
+ EXPECT_TRUE(records.at(4).HasDeletionTag());
+ EXPECT_TRUE(records.at(5).HasDeletionTag());
+ EXPECT_TRUE(records.at(6).HasDeletionTag());
+ EXPECT_TRUE(records.at(7).HasDeletionTag());
+}
+
+TEST(CompareTest, FullNameOk)
+{
+ BamRecord r1; r1.Impl().Name("c");
+ BamRecord r2; r2.Impl().Name("b");
+ BamRecord r3; r3.Impl().Name("d");
+ BamRecord r4; r4.Impl().Name("a");
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::FullName());
+
+ EXPECT_EQ(r4.FullName(), records.at(0).FullName());
+ EXPECT_EQ(r2.FullName(), records.at(1).FullName());
+ EXPECT_EQ(r1.FullName(), records.at(2).FullName());
+ EXPECT_EQ(r3.FullName(), records.at(3).FullName());
+}
+
+TEST(CompareTest, LocalContextFlagOk)
+{
+ BamRecord r1; r1.LocalContextFlags(LocalContextFlags::BARCODE_AFTER);
+ BamRecord r2; r2.LocalContextFlags(LocalContextFlags::ADAPTER_AFTER);
+ BamRecord r3; r3.LocalContextFlags(LocalContextFlags::REVERSE_PASS);
+ BamRecord r4; r4.LocalContextFlags(LocalContextFlags::NO_LOCAL_CONTEXT);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::LocalContextFlag());
+
+ EXPECT_EQ(r4.LocalContextFlags(), records.at(0).LocalContextFlags());
+ EXPECT_EQ(r2.LocalContextFlags(), records.at(1).LocalContextFlags());
+ EXPECT_EQ(r1.LocalContextFlags(), records.at(2).LocalContextFlags());
+ EXPECT_EQ(r3.LocalContextFlags(), records.at(3).LocalContextFlags());
+}
+
+TEST(CompareTest, MapQualityOk)
+{
+ BamRecord r1; r1.Impl().MapQuality(30);
+ BamRecord r2; r2.Impl().MapQuality(20);
+ BamRecord r3; r3.Impl().MapQuality(40);
+ BamRecord r4; r4.Impl().MapQuality(10);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::MapQuality());
+
+ EXPECT_EQ(r4.MapQuality(), records.at(0).MapQuality());
+ EXPECT_EQ(r2.MapQuality(), records.at(1).MapQuality());
+ EXPECT_EQ(r1.MapQuality(), records.at(2).MapQuality());
+ EXPECT_EQ(r3.MapQuality(), records.at(3).MapQuality());
+}
+
+TEST(CompareTest, MovieNameOk)
+{
+ auto rg1 = ReadGroupInfo { "a", "SUBREAD" };
+ auto rg2 = ReadGroupInfo { "b", "SUBREAD" };
+ auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+ auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+ BamHeader header;
+ header.AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddReadGroup(rg4);
+
+ BamRecord r1(header); r1.ReadGroup(rg3);
+ BamRecord r2(header); r2.ReadGroup(rg2);
+ BamRecord r3(header); r3.ReadGroup(rg4);
+ BamRecord r4(header); r4.ReadGroup(rg1);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::MovieName());
+
+ EXPECT_EQ(r4.MovieName(), records.at(0).MovieName());
+ EXPECT_EQ(r2.MovieName(), records.at(1).MovieName());
+ EXPECT_EQ(r1.MovieName(), records.at(2).MovieName());
+ EXPECT_EQ(r3.MovieName(), records.at(3).MovieName());
+}
+
+TEST(CompareTest, NoneOk)
+{
+ BamRecord r1; r1.Impl().Name("c");
+ BamRecord r2; r2.Impl().Name("b");
+ BamRecord r3; r3.Impl().Name("d");
+ BamRecord r4; r4.Impl().Name("a");
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::None());
+
+ EXPECT_EQ(r1.FullName(), records.at(0).FullName());
+ EXPECT_EQ(r2.FullName(), records.at(1).FullName());
+ EXPECT_EQ(r3.FullName(), records.at(2).FullName());
+ EXPECT_EQ(r4.FullName(), records.at(3).FullName());
+}
+
+TEST(CompareTest, NumDeletedBasesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(0, records.at(0).NumDeletedBases());
+ EXPECT_EQ(3, records.at(1).NumDeletedBases());
+ EXPECT_EQ(3, records.at(2).NumDeletedBases());
+ EXPECT_EQ(0, records.at(3).NumDeletedBases());
+ EXPECT_EQ(3, records.at(4).NumDeletedBases());
+ EXPECT_EQ(3, records.at(5).NumDeletedBases());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumDeletedBases());
+ EXPECT_EQ(0, records.at(0).NumDeletedBases());
+ EXPECT_EQ(0, records.at(1).NumDeletedBases());
+ EXPECT_EQ(3, records.at(2).NumDeletedBases());
+ EXPECT_EQ(3, records.at(3).NumDeletedBases());
+ EXPECT_EQ(3, records.at(4).NumDeletedBases());
+ EXPECT_EQ(3, records.at(5).NumDeletedBases());
+}
+
+TEST(CompareTest, NumInsertedBasesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(0, records.at(0).NumInsertedBases());
+ EXPECT_EQ(0, records.at(1).NumInsertedBases());
+ EXPECT_EQ(2, records.at(2).NumInsertedBases());
+ EXPECT_EQ(0, records.at(3).NumInsertedBases());
+ EXPECT_EQ(0, records.at(4).NumInsertedBases());
+ EXPECT_EQ(2, records.at(5).NumInsertedBases());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumInsertedBases());
+ EXPECT_EQ(0, records.at(0).NumInsertedBases());
+ EXPECT_EQ(0, records.at(1).NumInsertedBases());
+ EXPECT_EQ(0, records.at(2).NumInsertedBases());
+ EXPECT_EQ(0, records.at(3).NumInsertedBases());
+ EXPECT_EQ(2, records.at(4).NumInsertedBases());
+ EXPECT_EQ(2, records.at(5).NumInsertedBases());
+}
+
+TEST(CompareTest, NumMatchesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(10, records.at(0).NumMatches());
+ EXPECT_EQ(10, records.at(1).NumMatches());
+ EXPECT_EQ(6, records.at(2).NumMatches());
+ EXPECT_EQ(10, records.at(3).NumMatches());
+ EXPECT_EQ(10, records.at(4).NumMatches());
+ EXPECT_EQ(6, records.at(5).NumMatches());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumMatches());
+ EXPECT_EQ(6, records.at(0).NumMatches());
+ EXPECT_EQ(6, records.at(1).NumMatches());
+ EXPECT_EQ(10, records.at(2).NumMatches());
+ EXPECT_EQ(10, records.at(3).NumMatches());
+ EXPECT_EQ(10, records.at(4).NumMatches());
+ EXPECT_EQ(10, records.at(5).NumMatches());
+}
+
+TEST(CompareTest, NumMismatchesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(0, records.at(0).NumMismatches());
+ EXPECT_EQ(0, records.at(1).NumMismatches());
+ EXPECT_EQ(2, records.at(2).NumMismatches());
+ EXPECT_EQ(0, records.at(3).NumMismatches());
+ EXPECT_EQ(0, records.at(4).NumMismatches());
+ EXPECT_EQ(2, records.at(5).NumMismatches());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumMismatches());
+ EXPECT_EQ(0, records.at(0).NumMismatches());
+ EXPECT_EQ(0, records.at(1).NumMismatches());
+ EXPECT_EQ(0, records.at(2).NumMismatches());
+ EXPECT_EQ(0, records.at(3).NumMismatches());
+ EXPECT_EQ(2, records.at(4).NumMismatches());
+ EXPECT_EQ(2, records.at(5).NumMismatches());
+}
+
+TEST(CompareTest, QueryEndOk)
+{
+ Position q1 = 30;
+ Position q2 = 20;
+ Position q3 = 40;
+ Position q4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("qe", Tag(q1)),
+ tests::makeRecordWithTag("qe", Tag(q2)),
+ tests::makeRecordWithTag("qe", Tag(q3)),
+ tests::makeRecordWithTag("qe", Tag(q4))
+ };
+ std::sort(records.begin(), records.end(), Compare::QueryEnd());
+
+ EXPECT_EQ(q4, records.at(0).QueryEnd());
+ EXPECT_EQ(q2, records.at(1).QueryEnd());
+ EXPECT_EQ(q1, records.at(2).QueryEnd());
+ EXPECT_EQ(q3, records.at(3).QueryEnd());
+}
+
+TEST(CompareTest, QueryStartOk)
+{
+ Position q1 = 30;
+ Position q2 = 20;
+ Position q3 = 40;
+ Position q4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("qs", Tag(q1)),
+ tests::makeRecordWithTag("qs", Tag(q2)),
+ tests::makeRecordWithTag("qs", Tag(q3)),
+ tests::makeRecordWithTag("qs", Tag(q4))
+ };
+ std::sort(records.begin(), records.end(), Compare::QueryStart());
+
+ EXPECT_EQ(q4, records.at(0).QueryStart());
+ EXPECT_EQ(q2, records.at(1).QueryStart());
+ EXPECT_EQ(q1, records.at(2).QueryStart());
+ EXPECT_EQ(q3, records.at(3).QueryStart());
+}
+
+TEST(CompareTest, ReadGroupIdOk)
+{
+ auto rg1 = ReadGroupInfo { "foo", "SUBREAD" };
+ auto rg2 = ReadGroupInfo { "bar", "SUBREAD" };
+ auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+ auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+ BamHeader header;
+ header.AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddReadGroup(rg4);
+
+ BamRecord r1(header); r1.ReadGroup(rg3); // -> 99365356
+ BamRecord r2(header); r2.ReadGroup(rg2); // -> d9f305e4
+ BamRecord r3(header); r3.ReadGroup(rg4); // -> 54397cd6
+ BamRecord r4(header); r4.ReadGroup(rg1); // -> a60ddc69
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReadGroupId()); // lexical, NOT numeric ordering
+
+ EXPECT_EQ(r3.ReadGroupId(), records.at(0).ReadGroupId());
+ EXPECT_EQ(r1.ReadGroupId(), records.at(1).ReadGroupId());
+ EXPECT_EQ(r4.ReadGroupId(), records.at(2).ReadGroupId());
+ EXPECT_EQ(r2.ReadGroupId(), records.at(3).ReadGroupId());
+}
+
+TEST(CompareTest, ReadGroupNumericIdOk)
+{
+ auto rg1 = ReadGroupInfo { "a", "SUBREAD" };
+ auto rg2 = ReadGroupInfo { "b", "SUBREAD" };
+ auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+ auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+ BamHeader header;
+ header.AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddReadGroup(rg4);
+
+ BamRecord r1(header); r1.ReadGroup(rg3); // -> -1724492970
+ BamRecord r2(header); r2.ReadGroup(rg2); // -> 235381373
+ BamRecord r3(header); r3.ReadGroup(rg4); // -> 1413053654
+ BamRecord r4(header); r4.ReadGroup(rg1); // -> 1153643386
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReadGroupNumericId()); // numeric ordering
+
+ EXPECT_EQ(r1.ReadGroupNumericId(), records.at(0).ReadGroupNumericId());
+ EXPECT_EQ(r2.ReadGroupNumericId(), records.at(1).ReadGroupNumericId());
+ EXPECT_EQ(r4.ReadGroupNumericId(), records.at(2).ReadGroupNumericId());
+ EXPECT_EQ(r3.ReadGroupNumericId(), records.at(3).ReadGroupNumericId());
+}
+
+TEST(CompareTest, ReadAccuracyOk)
+{
+ Accuracy a1 = 30;
+ Accuracy a2 = 20;
+ Accuracy a3 = 40;
+ Accuracy a4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("rq", Tag(a1)),
+ tests::makeRecordWithTag("rq", Tag(a2)),
+ tests::makeRecordWithTag("rq", Tag(a3)),
+ tests::makeRecordWithTag("rq", Tag(a4))
+ };
+ std::sort(records.begin(), records.end(), Compare::ReadAccuracy());
+
+ EXPECT_EQ(a4, records.at(0).ReadAccuracy());
+ EXPECT_EQ(a2, records.at(1).ReadAccuracy());
+ EXPECT_EQ(a1, records.at(2).ReadAccuracy());
+ EXPECT_EQ(a3, records.at(3).ReadAccuracy());
+}
+
+TEST(CompareTest, ReferenceEndOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(110, records.at(0).ReferenceEnd());
+ EXPECT_EQ(113, records.at(1).ReferenceEnd());
+ EXPECT_EQ(111, records.at(2).ReferenceEnd());
+ EXPECT_EQ(110, records.at(3).ReferenceEnd());
+ EXPECT_EQ(113, records.at(4).ReferenceEnd());
+ EXPECT_EQ(111, records.at(5).ReferenceEnd());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::ReferenceEnd());
+ EXPECT_EQ(110, records.at(0).ReferenceEnd());
+ EXPECT_EQ(110, records.at(1).ReferenceEnd());
+ EXPECT_EQ(111, records.at(2).ReferenceEnd());
+ EXPECT_EQ(111, records.at(3).ReferenceEnd());
+ EXPECT_EQ(113, records.at(4).ReferenceEnd());
+ EXPECT_EQ(113, records.at(5).ReferenceEnd());
+}
+
+TEST(CompareTest, ReferenceIdOk)
+{
+ BamRecord r1; r1.Impl().ReferenceId(30);
+ BamRecord r2; r2.Impl().ReferenceId(20);
+ BamRecord r3; r3.Impl().ReferenceId(40);
+ BamRecord r4; r4.Impl().ReferenceId(10);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReferenceId());
+
+ EXPECT_EQ(r4.ReferenceId(), records.at(0).ReferenceId());
+ EXPECT_EQ(r2.ReferenceId(), records.at(1).ReferenceId());
+ EXPECT_EQ(r1.ReferenceId(), records.at(2).ReferenceId());
+ EXPECT_EQ(r3.ReferenceId(), records.at(3).ReferenceId());
+}
+
+TEST(CompareTest, ReferenceNameOk)
+{
+ auto seq1 = SequenceInfo { "seq1" };
+ auto seq2 = SequenceInfo { "seq2" };
+ auto seq3 = SequenceInfo { "seq3" };
+ auto seq4 = SequenceInfo { "seq4" };
+
+ BamHeader header;
+ header.AddSequence(seq1) // -> 0
+ .AddSequence(seq2) // -> 1
+ .AddSequence(seq3) // -> 2
+ .AddSequence(seq4); // -> 3
+
+ BamRecord r1(header); r1.Impl().SetMapped(true); r1.Impl().ReferenceId(2);
+ BamRecord r2(header); r2.Impl().SetMapped(true); r2.Impl().ReferenceId(1);
+ BamRecord r3(header); r3.Impl().SetMapped(true); r3.Impl().ReferenceId(3);
+ BamRecord r4(header); r4.Impl().SetMapped(true); r4.Impl().ReferenceId(0);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReferenceName());
+
+ EXPECT_EQ(seq1.Name(), records.at(0).ReferenceName());
+ EXPECT_EQ(seq2.Name(), records.at(1).ReferenceName());
+ EXPECT_EQ(seq3.Name(), records.at(2).ReferenceName());
+ EXPECT_EQ(seq4.Name(), records.at(3).ReferenceName());
+}
+
+TEST(CompareTest, ReferenceStartOk)
+{
+ BamRecord r1; r1.Impl().Position(30);
+ BamRecord r2; r2.Impl().Position(20);
+ BamRecord r3; r3.Impl().Position(40);
+ BamRecord r4; r4.Impl().Position(10);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReferenceStart());
+
+ EXPECT_EQ(r4.ReferenceStart(), records.at(0).ReferenceStart());
+ EXPECT_EQ(r2.ReferenceStart(), records.at(1).ReferenceStart());
+ EXPECT_EQ(r1.ReferenceStart(), records.at(2).ReferenceStart());
+ EXPECT_EQ(r3.ReferenceStart(), records.at(3).ReferenceStart());
+}
+
+TEST(CompareTest, ZmwOk)
+{
+ int32_t z1 = 30;
+ int32_t z2 = 20;
+ int32_t z3 = 40;
+ int32_t z4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("zm", Tag(z1)),
+ tests::makeRecordWithTag("zm", Tag(z2)),
+ tests::makeRecordWithTag("zm", Tag(z3)),
+ tests::makeRecordWithTag("zm", Tag(z4))
+ };
+ std::sort(records.begin(), records.end(), Compare::Zmw());
+
+ EXPECT_EQ(z4, records.at(0).HoleNumber());
+ EXPECT_EQ(z2, records.at(1).HoleNumber());
+ EXPECT_EQ(z1, records.at(2).HoleNumber());
+ EXPECT_EQ(z3, records.at(3).HoleNumber());
+}
diff --git a/tests/src/test_DataSetCore.cpp b/tests/src/test_DataSetCore.cpp
index 2ad0a4e..ba92b1a 100644
--- a/tests/src/test_DataSetCore.cpp
+++ b/tests/src/test_DataSetCore.cpp
@@ -76,28 +76,50 @@ TEST(DataSetCoreTest, XmlNameParts)
EXPECT_EQ(boost::string_ref(":node_name"), leadingColon.QualifiedName());
}
-TEST(DataSetCoreTest, NamespacesOk)
-{
-
-}
-
TEST(DataSetCoreTest, DefaultsOk)
{
DataSet dataset;
EXPECT_EQ(DataSet::GENERIC, dataset.Type());
- EXPECT_FALSE(dataset.CreatedAt().empty()); // default init
+ EXPECT_FALSE(dataset.CreatedAt().empty());
+ EXPECT_FALSE(dataset.MetaType().empty());
+ EXPECT_FALSE(dataset.TimeStampedName().empty());
+ EXPECT_FALSE(dataset.UniqueId().empty());
+
+ EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_"));
+
EXPECT_TRUE(dataset.Format().empty());
- EXPECT_TRUE(dataset.MetaType().empty());
EXPECT_TRUE(dataset.ModifiedAt().empty());
EXPECT_TRUE(dataset.Name().empty());
EXPECT_TRUE(dataset.ResourceId().empty());
EXPECT_TRUE(dataset.Tags().empty());
- EXPECT_TRUE(dataset.TimeStampedName().empty());
- EXPECT_TRUE(dataset.UniqueId().empty());
- EXPECT_TRUE(dataset.Version().empty());
EXPECT_EQ(0, dataset.ExternalResources().Size());
EXPECT_EQ(0, dataset.Filters().Size());
EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ EXPECT_EQ(string{"3.0.1"}, dataset.Version());
+}
+
+TEST(DataSetCoreTest, TimeStampedNamesOk)
+{
+ DataSet dataset;
+ AlignmentSet alignmentSet;
+ BarcodeSet barcodeSet;
+ ContigSet contigSet;
+ ConsensusAlignmentSet consensusAlignmentSet;
+ ConsensusReadSet consensusReadSet;
+ HdfSubreadSet hdfSubreadSet;
+ ReferenceSet referenceSet;
+ SubreadSet subreadSet;
+
+ EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_dataset-"));
+ EXPECT_EQ(0, alignmentSet.TimeStampedName().find("pacbio_dataset_alignmentset-"));
+ EXPECT_EQ(0, barcodeSet.TimeStampedName().find("pacbio_dataset_barcodeset-"));
+ EXPECT_EQ(0, contigSet.TimeStampedName().find("pacbio_dataset_contigset-"));
+ EXPECT_EQ(0, consensusAlignmentSet.TimeStampedName().find("pacbio_dataset_consensusalignmentset-"));
+ EXPECT_EQ(0, consensusReadSet.TimeStampedName().find("pacbio_dataset_consensusreadset-"));
+ EXPECT_EQ(0, hdfSubreadSet.TimeStampedName().find("pacbio_dataset_hdfsubreadset-"));
+ EXPECT_EQ(0, referenceSet.TimeStampedName().find("pacbio_dataset_referenceset-"));
+ EXPECT_EQ(0, subreadSet.TimeStampedName().find("pacbio_dataset_subreadset-"));
}
TEST(DataSetCoreTest, BasicGettersSettersOk)
@@ -161,17 +183,21 @@ TEST(DataSetCoreTest, AddExternalResources)
DataSet dataset;
EXPECT_EQ(0, dataset.ExternalResources().Size());
- ExternalResource resource1;
+ ExternalResource resource1("metatype", "id");
resource1.Name("file1");
- ExternalResource resource2;
+ ExternalResource resource2("metatype", "id2");
resource2.Name("file2");
- resource2.MetaType("blah");
dataset.ExternalResources().Add(resource1);
dataset.ExternalResources().Add(resource2);
EXPECT_EQ(2, dataset.ExternalResources().Size());
+ // disallow duplicates (checking on ResourceId)
+ ExternalResource duplicateResource("metatype", "id");
+ dataset.ExternalResources().Add(duplicateResource);
+ EXPECT_EQ(2, dataset.ExternalResources().Size());
+
// direct access
const ExternalResources& resources = dataset.ExternalResources();
EXPECT_EQ(string("file1"), resources[0].Name());
@@ -192,11 +218,11 @@ TEST(DataSetCoreTest, EditExternalResources)
{
DataSet dataset;
- ExternalResource resource;
+ ExternalResource resource("metatype", "id");
resource.Name("file1");
dataset.ExternalResources().Add(resource);
- resource.Name("file2");
+ resource.Name("file2").ResourceId("id2");
dataset.ExternalResources().Add(resource);
EXPECT_EQ(2, dataset.ExternalResources().Size());
@@ -206,6 +232,20 @@ TEST(DataSetCoreTest, EditExternalResources)
EXPECT_EQ(string("file2"), dataset.ExternalResources()[1].Name());
}
+TEST(DataSetCoreTest, NestedExternalResources)
+{
+ ExternalResource resource("metatype", "filename");
+ resource.ExternalResources().Add(ExternalResource("metatype.child", "filename.child"));
+ resource.ExternalResources().Add(ExternalResource("metatype.child2", "filename.child2"));
+
+ const ExternalResources& childResources = resource.ExternalResources();
+ EXPECT_EQ(2, childResources.Size());
+ EXPECT_EQ(string("metatype.child"), childResources[0].MetaType());
+ EXPECT_EQ(string("metatype.child2"), childResources[1].MetaType());
+ EXPECT_EQ(string("filename.child"), childResources[0].ResourceId());
+ EXPECT_EQ(string("filename.child2"), childResources[1].ResourceId());
+}
+
TEST(DataSetCoreTest, AddFilters)
{
DataSet dataset;
@@ -407,10 +447,10 @@ TEST(DataSetCoreTest, RemoveExternalResources)
DataSet dataset;
EXPECT_EQ(0, dataset.ExternalResources().Size());
- ExternalResource resource1;
+ ExternalResource resource1("metatype", "id");
resource1.Name("file1");
- ExternalResource resource2;
+ ExternalResource resource2("metatype", "id2");
resource2.Name("file2");
dataset.ExternalResources().Add(resource1);
diff --git a/tests/src/test_DataSetIO.cpp b/tests/src/test_DataSetIO.cpp
index b075788..8756f87 100644
--- a/tests/src/test_DataSetIO.cpp
+++ b/tests/src/test_DataSetIO.cpp
@@ -40,12 +40,16 @@
#endif
#include "TestData.h"
+#include "../src/FileUtils.h"
#include <gtest/gtest.h>
#include <pbbam/DataSet.h>
#include <pbbam/internal/DataSetElement.h>
-#include <stdexcept>
+#include <fstream>
#include <sstream>
+#include <stdexcept>
#include <string>
+#include <vector>
+#include <unistd.h>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -87,6 +91,10 @@ static void TestSubread2Xml(void);
static void TestSubread3Xml(void);
static void TestTransformedXml(void);
+static inline
+void changeCurrentDirectory(const std::string& dir)
+{ ASSERT_EQ(0, chdir(dir.c_str())); }
+
TEST(DataSetIOTest, FromBamFilename)
{
DataSet dataset(ex2BamFn);
@@ -97,6 +105,16 @@ TEST(DataSetIOTest, FromBamFilename)
EXPECT_EQ(ex2BamFn, bamRef.ResourceId());
}
+TEST(DataSetIOTest, FromBamFilenames)
+{
+ std::ifstream fofn(bamGroupFofn);
+ std::vector<std::string> files;
+ std::string file;
+ while (std::getline(fofn, file)) if (!file.empty()) files.emplace_back(file);
+ DataSet dataset(files);
+ EXPECT_EQ(3, dataset.ExternalResources().Size());
+}
+
TEST(DataSetIOTest, FromBamFileObject)
{
BamFile bamFile(ex2BamFn);
@@ -164,43 +182,43 @@ TEST(DataSetIOTest, ToXml)
dataset.MetaType("PacBio.DataSet.AlignmentSet");
dataset.Name("DataSet_AlignmentSet");
dataset.Tags("barcode moreTags mapping mytags");
+ dataset.TimeStampedName("my_tsn");
dataset.UniqueId("b095d0a3-94b8-4918-b3af-a3f81bbe519c");
- dataset.Version("2.3.0");
- dataset.Attribute("xmlns","http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd")
- .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
- .Attribute("xsi:schemaLocation",
- "http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd PacBioSecondaryDataModel.xsd");
+ dataset.Attribute("xmlns", "http://pacificbiosciences.com/PacBioDatasets.xsd")
+ .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
+ .Attribute("xsi:schemaLocation", "http://pacificbiosciences.com/PacBioDatasets.xsd");
// external resources
- ExternalResource resource1;
+ ExternalResource resource1("AlignmentFile.AlignmentBamFile", "file:/mnt/path/to/alignments2.bam");
resource1.Name("Third Alignments BAM");
resource1.Description("Points to an example Alignments BAM file.");
- resource1.MetaType("AlignmentFile.AlignmentBamFile");
- resource1.ResourceId("file:/mnt/path/to/alignments2.bam");
resource1.Tags("Example");
- FileIndex pbi1;
- pbi1.MetaType("PacBio.Index.PacBioIndex");
- pbi1.ResourceId("file:/mnt/path/to/alignments2.pbi");
+ resource1.TimeStampedName("my_tsn");
+ resource1.UniqueId("my_uuid");
+ FileIndex pbi1("PacBio.Index.PacBioIndex", "file:/mnt/path/to/alignments2.pbi");
+ pbi1.TimeStampedName("my_tsn");
+ pbi1.UniqueId("my_uuid");
resource1.FileIndices().Add(pbi1);
dataset.ExternalResources().Add(resource1);
- ExternalResource resource2;
+ ExternalResource resource2("AlignmentFile.AlignmentBamFile", "file:./alignments3.bam");
resource2.Name("Fourth Alignments BAM");
resource2.Description("Points to another example Alignments BAM file, by relative path.");
- resource2.MetaType("AlignmentFile.AlignmentBamFile");
- resource2.ResourceId("file:./alignments3.bam");
resource2.Tags("Example");
- FileIndex pbi2;
- pbi2.MetaType("PacBio.Index.PacBioIndex");
- pbi2.ResourceId("file:/mnt/path/to/alignments3.pbi");
+ resource2.TimeStampedName("my_tsn");
+ resource2.UniqueId("my_uuid");
+ FileIndex pbi2("PacBio.Index.PacBioIndex", "file:/mnt/path/to/alignments3.pbi");
+ pbi2.TimeStampedName("my_tsn");
+ pbi2.UniqueId("my_uuid");
+
resource2.FileIndices().Add(pbi2);
dataset.ExternalResources().Add(resource2);
// sub-datasets with filters
DataSetBase subDataSet1;
subDataSet1.Name("HighQuality Read Alignments");
+ subDataSet1.TimeStampedName("my_tsn");
subDataSet1.UniqueId("ab95d0a3-94b8-4918-b3af-a3f81bbe519c");
- subDataSet1.Version("2.3.0");
Filter filter1;
filter1.Properties().Add(Property("rq", "0.85", ">"));
subDataSet1.Filters().Add(filter1);
@@ -208,8 +226,8 @@ TEST(DataSetIOTest, ToXml)
DataSetBase subDataSet2;
subDataSet2.Name("Alignments to chromosome 1");
+ subDataSet2.TimeStampedName("my_tsn");
subDataSet2.UniqueId("ac95d0a3-94b8-4918-b3af-a3f81bbe519c");
- subDataSet2.Version("2.3.0");
Filter filter2;
filter2.Properties().Add(Property("RNAME", "chr1", "=="));
subDataSet2.Filters().Add(filter2);
@@ -218,31 +236,59 @@ TEST(DataSetIOTest, ToXml)
// write dataset
const string expectedXml =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
- "<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
- "Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
- "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\" "
- "xmlns=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd\" "
+ "<pbds:AlignmentSet "
+ "CreatedAt=\"2015-01-27T09:00:01\" "
+ "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" "
+ "Tags=\"barcode moreTags mapping mytags\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"3.0.1\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
"xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
- "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd "
- "PacBioSecondaryDataModel.xsd\">\n"
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:pbbase=\"http://pacificbiosciences.com/PacBioBaseDataModel.xsd\" "
+ "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
"\t<pbbase:ExternalResources>\n"
- "\t\t<pbbase:ExternalResource Description=\"Points to an example Alignments BAM file.\" "
- "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Third Alignments BAM\" "
- "ResourceId=\"file:/mnt/path/to/alignments2.bam\" Tags=\"Example\">\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to an example Alignments BAM file.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Third Alignments BAM\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.bam\" "
+ "Tags=\"Example\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\">\n"
"\t\t\t<pbbase:FileIndices>\n"
- "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.pbi\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
"\t\t\t</pbbase:FileIndices>\n"
"\t\t</pbbase:ExternalResource>\n"
- "\t\t<pbbase:ExternalResource Description=\"Points to another example Alignments BAM file, by relative path.\" "
- "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Fourth Alignments BAM\" "
- "ResourceId=\"file:./alignments3.bam\" Tags=\"Example\">\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Fourth Alignments BAM\" "
+ "ResourceId=\"file:./alignments3.bam\" "
+ "Tags=\"Example\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\">\n"
"\t\t\t<pbbase:FileIndices>\n"
- "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments3.pbi\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
"\t\t\t</pbbase:FileIndices>\n"
"\t\t</pbbase:ExternalResource>\n"
"\t</pbbase:ExternalResources>\n"
"\t<pbds:DataSets>\n"
- "\t\t<pbds:DataSet Name=\"HighQuality Read Alignments\" UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+ "\t\t<pbds:DataSet "
+ "MetaType=\"PacBio.DataSet.DataSet\" "
+ "Name=\"HighQuality Read Alignments\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"3.0.1\">\n"
"\t\t\t<pbds:Filters>\n"
"\t\t\t\t<pbds:Filter>\n"
"\t\t\t\t\t<pbbase:Properties>\n"
@@ -251,7 +297,12 @@ TEST(DataSetIOTest, ToXml)
"\t\t\t\t</pbds:Filter>\n"
"\t\t\t</pbds:Filters>\n"
"\t\t</pbds:DataSet>\n"
- "\t\t<pbds:DataSet Name=\"Alignments to chromosome 1\" UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+ "\t\t<pbds:DataSet "
+ "MetaType=\"PacBio.DataSet.DataSet\" "
+ "Name=\"Alignments to chromosome 1\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"3.0.1\">\n"
"\t\t\t<pbds:Filters>\n"
"\t\t\t\t<pbds:Filter>\n"
"\t\t\t\t\t<pbbase:Properties>\n"
@@ -272,30 +323,47 @@ static void TestFromXmlString(void)
{
const string inputXml =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
- "<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
- "Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
- "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\" "
- "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
- "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
- "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
+ "<pbds:AlignmentSet "
+ "CreatedAt=\"2015-01-27T09:00:01\" "
+ "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" "
+ "Tags=\"barcode moreTags mapping mytags\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
"\t<pbbase:ExternalResources>\n"
- "\t\t<pbbase:ExternalResource Description=\"Points to an example Alignments BAM file.\" "
- "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Third Alignments BAM\" "
- "ResourceId=\"file:/mnt/path/to/alignments2.bam\" Tags=\"Example\">\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to an example Alignments BAM file.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Third Alignments BAM\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.bam\" "
+ "Tags=\"Example\">\n"
"\t\t\t<pbbase:FileIndices>\n"
- "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
"\t\t\t</pbbase:FileIndices>\n"
"\t\t</pbbase:ExternalResource>\n"
- "\t\t<pbbase:ExternalResource Description=\"Points to another example Alignments BAM file, by relative path.\" "
- "MetaType=\"AlignmentFile.AlignmentBamFile\" Name=\"Fourth Alignments BAM\" "
- "ResourceId=\"file:./alignments3.bam\" Tags=\"Example\">\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Fourth Alignments BAM\" "
+ "ResourceId=\"file:./alignments3.bam\" "
+ "Tags=\"Example\">\n"
"\t\t\t<pbbase:FileIndices>\n"
- "\t\t\t\t<pbbase:FileIndex MetaType=\"PacBio.Index.PacBioIndex\" ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
"\t\t\t</pbbase:FileIndices>\n"
"\t\t</pbbase:ExternalResource>\n"
"\t</pbbase:ExternalResources>\n"
"\t<pbds:DataSets>\n"
- "\t\t<pbds:DataSet Name=\"HighQuality Read Alignments\" UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+ "\t\t<pbds:DataSet "
+ "Name=\"HighQuality Read Alignments\" "
+ "UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\">\n"
"\t\t\t<pbds:Filters>\n"
"\t\t\t\t<pbds:Filter>\n"
"\t\t\t\t\t<pbbase:Properties>\n"
@@ -304,7 +372,10 @@ static void TestFromXmlString(void)
"\t\t\t\t</pbds:Filter>\n"
"\t\t\t</pbds:Filters>\n"
"\t\t</pbds:DataSet>\n"
- "\t\t<pbds:DataSet Name=\"Alignments to chromosome 1\" UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\">\n"
+ "\t\t<pbds:DataSet "
+ "Name=\"Alignments to chromosome 1\" "
+ "UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\">\n"
"\t\t\t<pbds:Filters>\n"
"\t\t\t\t<pbds:Filter>\n"
"\t\t\t\t\t<pbbase:Properties>\n"
@@ -395,9 +466,9 @@ static void TestAli1Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
@@ -484,9 +555,9 @@ static void TestAli2Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
@@ -573,9 +644,9 @@ static void TestAli3Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
@@ -662,9 +733,9 @@ static void TestAli4Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
@@ -751,9 +822,9 @@ static void TestMappingStaggeredXml(void)
EXPECT_EQ(string(""), dataset.Tags());
EXPECT_EQ(string("30f72098-bc5b-e06b-566c-8b28dda909a8"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
@@ -838,9 +909,9 @@ static void TestBarcodeXml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -872,9 +943,9 @@ static void TestCcsReadXml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -927,9 +998,9 @@ static void TestLambdaContigsXml(void)
EXPECT_EQ(string(""), dataset.Tags());
EXPECT_EQ(string("596e87db-34f9-d2fd-c905-b017543170e1"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -950,9 +1021,9 @@ static void TestPbalchemyXml(void)
EXPECT_EQ(string(""), dataset.Tags());
EXPECT_EQ(string("58e3f7c5-24c1-b58b-fbd5-37de268cc2f0"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -979,9 +1050,9 @@ static void TestReferenceXml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
EXPECT_EQ(0, dataset.Filters().Size());
EXPECT_EQ(0, dataset.SubDataSets().Size());
@@ -1036,9 +1107,9 @@ static void TestSubread1Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
const ExternalResources& resources = dataset.ExternalResources();
ASSERT_EQ(2, resources.Size());
@@ -1106,9 +1177,9 @@ static void TestSubread2Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
const ExternalResources& resources = dataset.ExternalResources();
ASSERT_EQ(2, resources.Size());
@@ -1176,9 +1247,9 @@ static void TestSubread3Xml(void)
EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
EXPECT_EQ(string("2.3.0"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
- EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xsi:schemaLocation"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
const ExternalResources& resources = dataset.ExternalResources();
ASSERT_EQ(2, resources.Size());
@@ -1245,7 +1316,7 @@ static void TestTransformedXml(void)
EXPECT_EQ(string("pacbio.secondary.instrument=RS"), dataset.Tags());
EXPECT_EQ(string("abbc9183-b01e-4671-8c12-19efee534647"), dataset.UniqueId());
EXPECT_EQ(string("0.5"), dataset.Version());
- EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDataModel.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema"), dataset.Attribute("xmlns:xs"));
EXPECT_EQ(string("http://www.w3.org/2005/xpath-functions"), dataset.Attribute("xmlns:fn"));
EXPECT_EQ(string("java:java.util.UUID"), dataset.Attribute("xmlns:uuid"));
@@ -1279,3 +1350,184 @@ static void TestTransformedXml(void)
EXPECT_EQ(string("150000"), metadata.NumRecords());
EXPECT_EQ(string("50000000"), metadata.TotalLength());
}
+
+TEST(DataSetIOTest, InspectMalformedXml)
+{
+ const string xmlFn = tests::Data_Dir + "/dataset/malformed.xml";
+
+ DataSet ds(xmlFn);
+ stringstream s;
+ ds.SaveToStream(s);
+
+ const string expected =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<SubreadSet Description=\"Merged dataset from 1 files using DatasetMerger 0.1.2\" "
+ "MetaType=\"PacBio.DataSet.HdfSubreadSet\" Name=\"Subreads from runr000013_42267_150403\" "
+ "Tags=\"pacbio.secondary.instrument=RS\" TimeStampedName=\"hdfsubreadset_2015-08-19T15:39:36.331-07:00\" "
+ "UniqueId=\"b4741521-2a4c-42df-8a13-0a755ca9ed1e\" Version=\"0.5\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:ns0=\"http://pacificbiosciences.com/PacBioBaseDataModel.xsd\" "
+ "xmlns:ns1=\"http://pacificbiosciences.com/PacBioSampleInfo.xsd\" "
+ "xmlns:ns2=\"http://pacificbiosciences.com/PacBioCollectionMetadata.xsd\" "
+ "xmlns:ns3=\"http://pacificbiosciences.com/PacBioReagentKit.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
+ "\t<ns0:ExternalResources>\n"
+ "\t\t<ns0:ExternalResource MetaType=\"SubreadFile.SubreadBamFile\" "
+ "ResourceId=\"file:///mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0//mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_sub [...]
+ "TimeStampedName=\"SubreadFile.SubreadBamFile_00000000000000\" "
+ "UniqueId=\"251acf71-9eb0-489e-9dd1-cdbd11432753\" />\n"
+ "\t</ns0:ExternalResources>\n"
+ "\t<DataSetMetadata>\n"
+ "\t\t<TotalLength>50000000</TotalLength>\n"
+ "\t\t<NumRecords>150000</NumRecords>\n"
+ "\t\t<ns2:Collections>\n"
+ "\t\t\t<ns2:CollectionMetadata Context=\"m150404_101626_42267_c100807920800000001823174110291514_s1_p0\" "
+ "InstrumentId=\"1\" InstrumentName=\"42267\" MetaType=\"PacBio.Collection\" "
+ "TimeStampedName=\"m150404_101626_42267_c100807920800000001823174110291514_s1_p0\" "
+ "UniqueId=\"d66c8372-2b70-4dcf-b64f-9f8b5cc351fd\">\n"
+ "\t\t\t\t<ns2:InstCtrlVer>2.3.0.1.142990</ns2:InstCtrlVer>\n"
+ "\t\t\t\t<ns2:SigProcVer>NRT at 172.31.128.10:8082, SwVer=2301.142990, HwVer=1.0</ns2:SigProcVer>\n"
+ "\t\t\t\t<ns2:RunDetails>\n"
+ "\t\t\t\t\t<ns2:RunId>r000013_42267_150403</ns2:RunId>\n"
+ "\t\t\t\t\t<ns2:Name>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:Name>\n"
+ "\t\t\t\t</ns2:RunDetails>\n"
+ "\t\t\t\t<ns2:WellSample Name=\"Inst42267-040315-SAT-100pM-2kb-P6C4\">\n"
+ "\t\t\t\t\t<ns2:PlateId>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:PlateId>\n"
+ "\t\t\t\t\t<ns2:WellName>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:WellName>\n"
+ "\t\t\t\t\t<ns2:Concentration>0.0</ns2:Concentration>\n"
+ "\t\t\t\t\t<ns2:SampleReuseEnabled>false</ns2:SampleReuseEnabled>\n"
+ "\t\t\t\t\t<ns2:StageHotstartEnabled>false</ns2:StageHotstartEnabled>\n"
+ "\t\t\t\t\t<ns2:SizeSelectionEnabled>false</ns2:SizeSelectionEnabled>\n"
+ "\t\t\t\t\t<ns2:UseCount>1</ns2:UseCount>\n"
+ "\t\t\t\t\t<ns1:BioSamplePointers>\n"
+ "\t\t\t\t\t\t<ns1:BioSamplePointer>251acf71-9eb0-489e-9dd1-cdbd11432752</ns1:BioSamplePointer>\n"
+ "\t\t\t\t\t</ns1:BioSamplePointers>\n"
+ "\t\t\t\t</ns2:WellSample>\n"
+ "\t\t\t\t<ns2:Automation>\n"
+ "\t\t\t\t\t<ns0:AutomationParameters>\n"
+ "\t\t\t\t\t\t<ns0:AutomationParameter />\n"
+ "\t\t\t\t\t</ns0:AutomationParameters>\n"
+ "\t\t\t\t</ns2:Automation>\n"
+ "\t\t\t\t<ns2:CollectionNumber>7</ns2:CollectionNumber>\n"
+ "\t\t\t\t<ns2:CellIndex>4</ns2:CellIndex>\n"
+ "\t\t\t\t<ns2:CellPac Barcode=\"10080792080000000182317411029151\" />\n"
+ "\t\t\t\t<ns2:Primary>\n"
+ "\t\t\t\t\t<ns2:AutomationName>BasecallerV1</ns2:AutomationName>\n"
+ "\t\t\t\t\t<ns2:ConfigFileName>2-3-0_P6-C4.xml</ns2:ConfigFileName>\n"
+ "\t\t\t\t\t<ns2:SequencingCondition />\n"
+ "\t\t\t\t\t<ns2:OutputOptions>\n"
+ "\t\t\t\t\t\t<ns2:ResultsFolder>Analysis_Results</ns2:ResultsFolder>\n"
+ "\t\t\t\t\t\t<ns2:CollectionPathUri>rsy://mp-rsync/vol55//RS_DATA_STAGING/42267/Inst42267-040315-SAT-100pM-2kb-P6C4_13/A04_7/</ns2:CollectionPathUri>\n"
+ "\t\t\t\t\t\t<ns2:CopyFiles>\n"
+ "\t\t\t\t\t\t\t<ns2:CollectionFileCopy>Fasta</ns2:CollectionFileCopy>\n"
+ "\t\t\t\t\t\t</ns2:CopyFiles>\n"
+ "\t\t\t\t\t\t<ns2:Readout>Bases</ns2:Readout>\n"
+ "\t\t\t\t\t\t<ns2:MetricsVerbosity>Minimal</ns2:MetricsVerbosity>\n"
+ "\t\t\t\t\t</ns2:OutputOptions>\n"
+ "\t\t\t\t</ns2:Primary>\n"
+ "\t\t\t</ns2:CollectionMetadata>\n"
+ "\t\t</ns2:Collections>\n"
+ "\t\t<ns1:BioSamples>\n"
+ "\t\t\t<ns1:BioSample Description=\"Inst42267-SAT-100pM-2kbLambda-P6C4-Std120_CPS_040315\" "
+ "MetaType=\"PacBio.Sample\" Name=\"Inst42267-040315-SAT-100pM-2kb-P6C4\" "
+ "TimeStampedName=\"biosample_2015-08-19T15:39:36.331-07:00\" UniqueId=\"251acf71-9eb0-489e-9dd1-cdbd11432752\" />\n"
+ "\t\t</ns1:BioSamples>\n"
+ "\t</DataSetMetadata>\n"
+ "</SubreadSet>\n";
+
+ EXPECT_EQ(expected, s.str());
+}
+
+TEST(DataSetIOTest, RelativePathCarriedThroughOk_FromString)
+{
+ const string inputXml =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<pbds:AlignmentSet "
+ "CreatedAt=\"2015-01-27T09:00:01\" "
+ "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" "
+ "Tags=\"barcode moreTags mapping mytags\" "
+ "TimeStampedName=\"biosample_2015-08-19T15:39:36.331-07:00\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
+ "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
+ "\t<pbbase:ExternalResources>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to an example Alignments BAM file.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Third Alignments BAM\" "
+ "ResourceId=\"../path/to/resource1.bam\" "
+ "Tags=\"Example\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"../path/to/resource1.bam.pbi\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Fourth Alignments BAM\" "
+ "ResourceId=\"../path/to/resource2.bam\" "
+ "Tags=\"Example\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"../path/to/resource2.bam.pbi\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t</pbbase:ExternalResources>\n"
+ "</pbds:AlignmentSet>\n";
+
+ auto dataset = DataSet::FromXml(inputXml);
+
+ stringstream stream;
+ dataset.SaveToStream(stream);
+ auto outputXml = stream.str();
+
+ EXPECT_EQ(inputXml, outputXml);
+}
+
+TEST(DataSetIOTest, RelativePathCarriedThroughOk_FromFile)
+{
+ DataSet dataset(tests::Data_Dir + "/relative/relative.xml");
+ auto resources = dataset.ExternalResources();
+ EXPECT_EQ("./a/test.bam", resources[0].ResourceId());
+ EXPECT_EQ("./b/test1.bam", resources[1].ResourceId());
+ EXPECT_EQ("./b/test2.bam", resources[2].ResourceId());
+
+ stringstream out;
+ dataset.SaveToStream(out);
+
+ auto newDataset = DataSet::FromXml(out.str());
+ auto newResources = newDataset.ExternalResources();
+ EXPECT_EQ("./a/test.bam", newResources[0].ResourceId());
+ EXPECT_EQ("./b/test1.bam", newResources[1].ResourceId());
+ EXPECT_EQ("./b/test2.bam", newResources[2].ResourceId());
+}
+
+TEST(DataSetIOTest, DataSetFromRelativeBamFilename)
+{
+ // cache initial directory and move to location so we can test relatvie filename ok
+ const string startingDirectory = internal::FileUtils::CurrentWorkingDirectory();
+
+ const string targetDirectory = tests::Data_Dir + "/dataset";
+ changeCurrentDirectory(targetDirectory);
+ ASSERT_EQ(targetDirectory, internal::FileUtils::CurrentWorkingDirectory());
+
+ EXPECT_NO_THROW(
+ {
+ const string relativeBamFn = "../phi29.bam";
+ const DataSet ds(relativeBamFn);
+ const auto& files = ds.BamFiles();
+ EXPECT_EQ(1, files.size());
+ });
+
+ // restore working directory
+ changeCurrentDirectory(startingDirectory);
+}
+
diff --git a/tests/src/test_DataSetQuery.cpp b/tests/src/test_DataSetQuery.cpp
index da9b41e..2dc6b2b 100644
--- a/tests/src/test_DataSetQuery.cpp
+++ b/tests/src/test_DataSetQuery.cpp
@@ -49,6 +49,7 @@ using namespace PacBio::BAM;
using namespace std;
const string ex2BamFn = tests::Data_Dir + "/ex2.bam";
+const string ex2CopyBamFn = tests::Data_Dir + "/ex2_copy.bam";
const string bamMappingFn = tests::Data_Dir + "/dataset/bam_mapping.bam";
const string bamMappingFn_1 = tests::Data_Dir + "/dataset/bam_mapping_1.bam";
const string bamMappingFn_2 = tests::Data_Dir + "/dataset/bam_mapping_2.bam";
@@ -132,7 +133,7 @@ TEST(DataSetQueryTest, EntireFileQueryTest)
EXPECT_EQ(3307, count);
});
- // simple multi-file (actually just same file twice)
+ // duplicate file attempt
EXPECT_NO_THROW(
{
BamFile bamFile(ex2BamFn);
@@ -147,7 +148,7 @@ TEST(DataSetQueryTest, EntireFileQueryTest)
(void)record;
++count;
}
- EXPECT_EQ(3307*2, count);
+ EXPECT_EQ(3307, count); // same as single
});
// true multi-file dataset
@@ -263,7 +264,7 @@ TEST(DataSetQueryTest, GenomicIntervalQueryTest)
EXPECT_EQ(83, count);
});
- // multi-file (same twice)
+ // duplicate file
EXPECT_NO_THROW(
{
BamFile bamFile(ex2BamFn);
@@ -288,6 +289,81 @@ TEST(DataSetQueryTest, GenomicIntervalQueryTest)
prevPos = record.ReferenceStart();
++count;
}
+ EXPECT_EQ(39, count); // same as single file
+
+ // adjust interval and pass back in
+ count = 0;
+ interval.Start(500);
+ interval.Stop(600);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(166, count); // same as single file
+
+ // adjust again
+ count = 0;
+ interval.Name("seq2");
+ interval.Start(0);
+ interval.Stop(100);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(83, count); // same as single file
+
+ // unknown ref
+ count = 0;
+ interval.Name("does not exist");
+ interval.Start(0);
+ interval.Stop(100);
+ EXPECT_THROW(
+ query.Interval(interval);
+ , std::exception);
+ for (const BamRecord& record : query) { // iteration is still safe, just returns no data
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(0, count); // same as single file
+
+ // adjust again - make sure we can read a real region after an invalid one
+ interval.Name("seq2");
+ interval.Start(0);
+ interval.Stop(100);
+ query.Interval(interval);
+ count = 0;
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(83, count); // same as single file
+ });
+
+ // multi file BAM (same record content for easy testing, but different filename(ResourceId)
+ EXPECT_NO_THROW(
+ {
+ DataSet dataset;
+ dataset.ExternalResources().Add(BamFile(ex2BamFn));
+ dataset.ExternalResources().Add(BamFile(ex2CopyBamFn));
+
+ // count records & also ensure sorted merge
+ int count = 0;
+ int prevId = 0;
+ int prevPos = 0;
+
+ GenomicInterval interval("seq1", 0, 100);
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+
+ EXPECT_TRUE(record.ReferenceId() >= prevId);
+ EXPECT_TRUE(record.ReferenceStart() >= prevPos);
+
+ prevId = record.ReferenceId();
+ prevPos = record.ReferenceStart();
+ ++count;
+ }
EXPECT_EQ(39*2, count);
// adjust interval and pass back in
@@ -339,9 +415,6 @@ TEST(DataSetQueryTest, GenomicIntervalQueryTest)
}
EXPECT_EQ(83*2, count);
});
-
- // multi file BAM
-
}
// TODO: implement me
@@ -372,6 +445,30 @@ TEST(DataSetQueryTest, ZmwQueryTest)
});
// multi-file
+ {
+ BamFile bamFile(bamMappingFn);
+ bamFile.EnsurePacBioIndexExists();
+
+ BamFile bamFile_1(bamMappingFn_1);
+ bamFile_1.EnsurePacBioIndexExists();
+
+ BamFile bamFile_2(bamMappingFn_2);
+ bamFile_2.EnsurePacBioIndexExists();
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(ExternalResource(bamFile));
+ dataset.ExternalResources().Add(ExternalResource(bamFile_1));
+ dataset.ExternalResources().Add(ExternalResource(bamFile_2));
+
+ int count = 0;
+ ZmwQuery query(whitelist, dataset);
+ for (const BamRecord& r : query) {
+ const auto holeNumber = r.HoleNumber();
+ EXPECT_TRUE(holeNumber == 13473 || holeNumber == 38025);
+ ++count;
+ }
+ EXPECT_EQ(15, count);
+ }
}
TEST(DataSetQueryTest, ZmwGroupQueryTest)
@@ -417,18 +514,29 @@ TEST(DataSetQueryTest, ZmwGroupQueryTest)
dataset.ExternalResources().Add(ExternalResource(bamFile_1));
dataset.ExternalResources().Add(ExternalResource(bamFile_2));
- int count = 0;
+ int totalCount = 0;
+ int numRecordsInGroup = 0;
+ int groupCount = 0;
int32_t groupZmw = -1;
ZmwGroupQuery query(whitelist, dataset);
for (const vector<BamRecord>& group : query) {
for (const BamRecord& record: group) {
+ ++numRecordsInGroup;
if (groupZmw == -1)
groupZmw = record.HoleNumber();
EXPECT_EQ(groupZmw, record.HoleNumber());
- ++count;
+ ++totalCount;
}
+ if (groupCount == 0)
+ EXPECT_EQ(9, numRecordsInGroup);
+ else if (groupCount == 1)
+ EXPECT_EQ(6, numRecordsInGroup);
+ else
+ EXPECT_TRUE(false); // should not get here
+ numRecordsInGroup = 0;
+ ++groupCount;
groupZmw = -1;
}
- EXPECT_EQ(15, count);
+ EXPECT_EQ(15, totalCount);
});
}
diff --git a/tests/src/test_DataSetXsd.cpp b/tests/src/test_DataSetXsd.cpp
index 177b758..1238122 100644
--- a/tests/src/test_DataSetXsd.cpp
+++ b/tests/src/test_DataSetXsd.cpp
@@ -94,13 +94,16 @@ TEST(DataSetXsdTest, EditDatasetRegistry)
dataset.MetaType("PacBio.DataSet.AlignmentSet");
dataset.Name("DataSet_AlignmentSet");
dataset.Tags("barcode moreTags mapping mytags");
+ dataset.TimeStampedName("my_time_stamped_name");
dataset.UniqueId("b095d0a3-94b8-4918-b3af-a3f81bbe519c");
- dataset.Version("2.3.0");
- dataset.Attribute("xmlns","http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd")
- .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
- .Attribute("xsi:schemaLocation",
- "http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd PacBioSecondaryDataModel.xsd");
- dataset.ExternalResources().Add(ExternalResource("Fake.MetaType", "filename"));
+ dataset.Attribute("xmlns", "http://pacificbiosciences.com/PacBioDatasets.xsd")
+ .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
+ .Attribute("xsi:schemaLocation", "http://pacificbiosciences.com/PacBioDatasets.xsd");
+
+ ExternalResource ext("Fake.MetaType", "filename");
+ ext.TimeStampedName("custom_tsn")
+ .UniqueId("my_uuid");
+ dataset.ExternalResources().Add(ext);
dataset.Namespaces().Register(XsdType::BASE_DATA_MODEL, NamespaceInfo("custom", "http://custom/uri.xsd"));
@@ -108,13 +111,15 @@ TEST(DataSetXsdTest, EditDatasetRegistry)
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
"<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
"Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
- "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"2.3.0\" "
- "xmlns=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd\" "
+ "TimeStampedName=\"my_time_stamped_name\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"3.0.1\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
"xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
- "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioSecondaryDataModel.xsd "
- "PacBioSecondaryDataModel.xsd\">\n"
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:custom=\"http://custom/uri.xsd\" "
+ "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
"\t<custom:ExternalResources>\n"
- "\t\t<custom:ExternalResource MetaType=\"Fake.MetaType\" ResourceId=\"filename\" />\n"
+ "\t\t<custom:ExternalResource MetaType=\"Fake.MetaType\" ResourceId=\"filename\" TimeStampedName=\"custom_tsn\" UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
"\t</custom:ExternalResources>\n"
"</pbds:AlignmentSet>\n";
@@ -122,3 +127,56 @@ TEST(DataSetXsdTest, EditDatasetRegistry)
dataset.SaveToStream(s);
EXPECT_EQ(expectedXml, s.str());
}
+
+TEST(DataSetXsdTest, ElementRegistryOk)
+{
+ { // default namespaces
+
+ DataSet ds;
+
+ // append child elements that do not have a C++ built-in, nor namespace prefix with addition
+ DataSetMetadata& metadata = ds.Metadata();
+ metadata.AddChild(internal::DataSetElement("SummaryStats"));
+ metadata.AddChild(internal::DataSetElement("CopyFiles"));
+ metadata.AddChild(internal::DataSetElement("BioSamples"));
+ metadata.AddChild(internal::DataSetElement("AutomationParameters"));
+
+ stringstream s;
+ ds.SaveToStream(s);
+ const string output = s.str();
+
+ // check that default namespace is propagated properly
+ EXPECT_TRUE(output.find("pbds:SummaryStats") != string::npos);
+ EXPECT_TRUE(output.find("pbmeta:CopyFiles") != string::npos);
+ EXPECT_TRUE(output.find("pbsample:BioSamples") != string::npos);
+ EXPECT_TRUE(output.find("pbbase:AutomationParameters") != string::npos);
+ }
+
+ { // custom namespaces
+
+ DataSet ds;
+
+ // setup custom namespaces
+ ds.Namespaces().Register(XsdType::BASE_DATA_MODEL, NamespaceInfo("custom_base", "http://custom/base.xsd"));
+ ds.Namespaces().Register(XsdType::COLLECTION_METADATA, NamespaceInfo("custom_meta", "http://custom/meta.xsd"));
+ ds.Namespaces().Register(XsdType::DATASETS, NamespaceInfo("custom_ds", "http://custom/datasets.xsd"));
+ ds.Namespaces().Register(XsdType::SAMPLE_INFO, NamespaceInfo("custom_sample", "http://custom/base.xsd"));
+
+ // append child elements that do not have a C++ built-in, nor namespace prefix with addition
+ DataSetMetadata& metadata = ds.Metadata();
+ metadata.AddChild(internal::DataSetElement("SummaryStats"));
+ metadata.AddChild(internal::DataSetElement("CopyFiles"));
+ metadata.AddChild(internal::DataSetElement("BioSamples"));
+ metadata.AddChild(internal::DataSetElement("AutomationParameters"));
+
+ stringstream s;
+ ds.SaveToStream(s);
+ const string output = s.str();
+
+ // check that custom namespace is propagated properly
+ EXPECT_TRUE(output.find("custom_ds:SummaryStats") != string::npos);
+ EXPECT_TRUE(output.find("custom_meta:CopyFiles") != string::npos);
+ EXPECT_TRUE(output.find("custom_sample:BioSamples") != string::npos);
+ EXPECT_TRUE(output.find("custom_base:AutomationParameters") != string::npos);
+ }
+}
diff --git a/tests/src/test_EndToEnd.cpp b/tests/src/test_EndToEnd.cpp
index 7fe951e..fc6a740 100644
--- a/tests/src/test_EndToEnd.cpp
+++ b/tests/src/test_EndToEnd.cpp
@@ -48,13 +48,19 @@
#include <pbbam/EntireFileQuery.h>
#include <iostream>
#include <memory>
+#include <sstream>
#include <string>
#include <cstdio>
#include <cstdlib>
using namespace PacBio;
using namespace PacBio::BAM;
+using namespace PacBio::BAM::tests;
using namespace std;
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
struct Bam1Deleter
{
void operator()(bam1_t* b) {
@@ -84,77 +90,96 @@ struct BamHdrDeleter
const string inputBamFn = tests::Data_Dir + "/ex2.bam";
const string goldStandardSamFn = tests::Data_Dir + "/ex2.sam";
-const string generatedBamFn = tests::Data_Dir + "/generated.bam";
-const string generatedSamFn = tests::Data_Dir + "/generated.sam";
+const string generatedBamFn = "/tmp/generated.bam";
+const string generatedSamFn = "/tmp/generated.sam";
+const vector<string> generatedFiles = { generatedBamFn, generatedSamFn };
+
+static inline
+int RunBam2Sam(const string& bamFn,
+ const string& samFn,
+ const string& args = string())
+{
+ stringstream s;
+ s << tests::Bam2Sam << " " << args << " " << bamFn << " > " << samFn;
+ return system(s.str().c_str());
+}
static inline
-int Samtools_Bam2Sam(const string& bamFilename,
- const string& samFilename)
+int RunDiff(const string& fn1, const string& fn2)
{
- const std::string& convertArgs = string("view -h ") + bamFilename + string(" > ") + samFilename;
- const std::string& convertCommandLine = tests::Samtools_Bin + string(" ") + convertArgs;
- return system(convertCommandLine.c_str());
+ stringstream s;
+ s << "diff " << fn1 << " " << fn2;
+ return system(s.str().c_str());
}
static inline
-int Diff_Sam2Sam(const string& fn1,
- const string& fn2)
+void Remove(const vector<string>& files)
{
- const std::string& diffCommandLine = string("diff ") + fn1 + string(" ") + fn2;
- return system(diffCommandLine.c_str());
+ for (const auto& fn : files)
+ remove(fn.c_str());
}
static inline
-void RemoveGeneratedFiles(const string& fn1,
- const string& fn2)
+void CheckGeneratedOutput(void)
{
- remove(fn1.c_str());
- remove(fn2.c_str());
+ // convert to sam & diff against gold standard
+ const int convertRet = RunBam2Sam(generatedBamFn, generatedSamFn);
+ const int diffRet = RunDiff(goldStandardSamFn, generatedSamFn);
+ EXPECT_EQ(0, convertRet);
+ EXPECT_EQ(0, diffRet);
+
+ // clean up
+ Remove(generatedFiles);
}
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
// sanity check for rest of tests below
-TEST(EndToEndTest, ReadPureHtslib_WritePureHtslib)
+TEST(EndToEndTest, ReadAndWrite_PureHtslib)
{
- // open input BAM file
- PBBAM_SHARED_PTR<samFile> inputBam(sam_open(inputBamFn.c_str(), "r"), SamFileDeleter());
- EXPECT_TRUE(inputBam != 0);
- PBBAM_SHARED_PTR<bam_hdr_t> header(sam_hdr_read(inputBam.get()), BamHdrDeleter());
+ { // scoped to force flush & close before conversion/diff
- // open output BAM file
- PBBAM_SHARED_PTR<samFile> outputBam(sam_open(generatedBamFn.c_str(), "wb"), SamFileDeleter());
- sam_hdr_write(outputBam.get(), header.get());
+ // open files
- // copy BAM file
- PBBAM_SHARED_PTR<bam1_t> record(bam_init1(), Bam1Deleter());
- while (sam_read1(inputBam.get(), header.get(), record.get()) >= 0)
- sam_write1(outputBam.get(), header.get(), record.get());
+ unique_ptr<samFile, SamFileDeleter> inWrapper(sam_open(inputBamFn.c_str(), "r"));
+ samFile* in = inWrapper.get();
+ ASSERT_TRUE(in);
- // need to close files before comparing (to flush any buffers)
- inputBam.reset();
- outputBam.reset();
+ unique_ptr<samFile, SamFileDeleter> outWrapper(sam_open(generatedBamFn.c_str(), "wb"));
+ samFile* out = outWrapper.get();
+ ASSERT_TRUE(out);
- // convert to sam & diff against gold standard
+ // fetch & write header
- // TODO: disabled for now - need to replace non-PB BAM files in test cases
+ unique_ptr<bam_hdr_t, BamHdrDeleter> headerWrapper(sam_hdr_read(in));
+ bam_hdr_t* hdr = headerWrapper.get();
+ ASSERT_TRUE(hdr);
+ ASSERT_EQ(0, sam_hdr_write(out, hdr));
-// const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
-// const int diffRet = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
-// EXPECT_EQ(0, convertRet);
-// EXPECT_EQ(0, diffRet);
+ // fetch & write records
- // clean up
- RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+ unique_ptr<bam1_t, Bam1Deleter> record(bam_init1());
+ bam1_t* b = record.get();
+ ASSERT_TRUE(b);
+
+ while (sam_read1(in, hdr, b) >= 0)
+ sam_write1(out, hdr, b);
+ }
+
+ CheckGeneratedOutput();
}
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SingleThread)
+TEST(EndToEndTest, ReadAndWrite_SingleThread)
{
EXPECT_NO_THROW(
{
// open input BAM file
- BamFile bamFile(inputBamFn);
+ BamFile bamFile(tests::inputBamFn);
// open output BAM file
- BamWriter writer(generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 1);
+ BamWriter writer(tests::generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 1);
// copy BAM file
EntireFileQuery entireFile(bamFile);
@@ -162,17 +187,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SingleThread)
writer.Write(record);
});
- // convert to sam & diff against gold standard
- const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
- const int diffRet = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
- EXPECT_EQ(0, convertRet);
- EXPECT_EQ(0, diffRet);
-
- // clean up
- RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+ CheckGeneratedOutput();
}
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_APIDefaultThreadCount)
+TEST(EndToEndTest, ReadAndWrite_APIDefaultThreadCount)
{
EXPECT_NO_THROW(
{
@@ -188,18 +206,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_APIDefaultThreadCount)
writer.Write(record);
});
- // convert to sam & diff against gold standard
- const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
- const int diffRet = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
- EXPECT_EQ(0, convertRet);
- EXPECT_EQ(0, diffRet);
-
- // clean up
- RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
-
+ CheckGeneratedOutput();
}
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SystemDefaultThreadCount)
+TEST(EndToEndTest, ReadAndWrite_SystemDefaultThreadCount)
{
EXPECT_NO_THROW(
{
@@ -207,7 +217,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SystemDefaultThreadCount)
BamFile bamFile(inputBamFn);
// open output BAM file
- BamWriter writer(generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 0);
+ BamWriter writer(generatedBamFn,
+ bamFile.Header(),
+ BamWriter::DefaultCompression,
+ 0);
// copy BAM file
EntireFileQuery entireFile(bamFile);
@@ -215,17 +228,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_SystemDefaultThreadCount)
writer.Write(record);
});
- // convert to sam & diff against gold standard
- const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
- const int diffRet = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
- EXPECT_EQ(0, convertRet);
- EXPECT_EQ(0, diffRet);
-
- // clean up
- RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+ CheckGeneratedOutput();
}
-TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_UserThreadCount)
+TEST(EndToEndTest, ReadAndWrite_UserThreadCount)
{
EXPECT_NO_THROW(
{
@@ -233,7 +239,10 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_UserThreadCount)
BamFile bamFile(inputBamFn);
// open output BAM file
- BamWriter writer(generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 6);
+ BamWriter writer(generatedBamFn,
+ bamFile.Header(),
+ BamWriter::DefaultCompression,
+ 3);
// copy BAM file
EntireFileQuery entireFile(bamFile);
@@ -241,12 +250,5 @@ TEST(EndToEndTest, ReadBamRecord_WriteBamRecord_UserThreadCount)
writer.Write(record);
});
- // convert to sam & diff against gold standard
- const int convertRet = Samtools_Bam2Sam(generatedBamFn, generatedSamFn);
- const int diffRet = Diff_Sam2Sam(goldStandardSamFn, generatedSamFn);
- EXPECT_EQ(0, convertRet);
- EXPECT_EQ(0, diffRet);
-
- // clean up
- RemoveGeneratedFiles(generatedBamFn, generatedSamFn);
+ CheckGeneratedOutput();
}
diff --git a/tests/src/test_EntireFileQuery.cpp b/tests/src/test_EntireFileQuery.cpp
index b444f2b..6acfbb2 100644
--- a/tests/src/test_EntireFileQuery.cpp
+++ b/tests/src/test_EntireFileQuery.cpp
@@ -42,6 +42,7 @@
#include "TestData.h"
#include <gtest/gtest.h>
#include <pbbam/EntireFileQuery.h>
+#include <pbbam/BamWriter.h>
#include <string>
using namespace PacBio;
using namespace PacBio::BAM;
@@ -142,7 +143,7 @@ TEST(BamRecordTest, ReferenceName)
// {
// const string exampleBam = tests::Data_Dir + "/unmap1.bam";
// BamFile bamFile(exampleBam);
-// staging::EntireFileQuery records(bamFile);
+// EntireFileQuery records(bamFile);
// EXPECT_THROW(records.begin()->ReferenceName(), std::exception);
// }
diff --git a/tests/src/test_FileUtils.cpp b/tests/src/test_FileUtils.cpp
new file mode 100644
index 0000000..814200f
--- /dev/null
+++ b/tests/src/test_FileUtils.cpp
@@ -0,0 +1,325 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/../../src/FileUtils.h>
+#include <pbbam/../../src/TimeUtils.h>
+
+#include <boost/algorithm/string.hpp>
+
+#include <chrono>
+#include <string>
+#include <vector>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+
+#include <iostream>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(FileUtilsTest, ExistsOk)
+{
+ EXPECT_FALSE(FileUtils::Exists("does_not_exist.txt"));
+
+ const string tmp = "/tmp/pbbam_exists_check.tmp";
+ const string cmd = string("touch ") + tmp;
+ ASSERT_EQ(0, system(cmd.c_str()));
+ EXPECT_TRUE(FileUtils::Exists(tmp));
+}
+
+TEST(FileUtilsTest, LastModifiedOk)
+{
+ // a little tricky to check without going a full 'mock' filesystem route, but we can approximate
+ //
+ // also, I can't seem to get better than second resolution (on OSX 10.9/clang at least, st_mtimespec.tv_nsec is always zero)
+
+ const auto now = CurrentTime();
+ const auto nowDuration = now.time_since_epoch();
+ const auto nowSeconds = chrono::duration_cast<chrono::seconds>(nowDuration).count();
+
+ const string tmp = "/tmp/pbbam_lastmod_check.tmp";
+ const string rmCmd = string("rm ") + tmp;
+ const string touchCmd = string("touch ") + tmp;
+ int ret = system(rmCmd.c_str());
+ (void)ret; // unused
+ ASSERT_EQ(0, system(touchCmd.c_str()));
+
+ const auto stamp = FileUtils::LastModified(tmp);
+ const auto stampDuration = stamp.time_since_epoch();
+ const auto stampSeconds = chrono::duration_cast<chrono::seconds>(stampDuration).count();
+
+ EXPECT_LE(nowSeconds, stampSeconds);
+}
+
+TEST(FileUtilsTest, ResolvedFilePathOk)
+{
+ const string testFrom = "/path/to/myDir";
+
+ // "raw" filenames - no URI scheme
+
+ const string absolutePath = "/absolute/path/to/file.txt";
+ const string relativePath = "../relative/path/to/file.txt";
+ const string noPathFn = "file.txt";
+
+ const string resolvedAbsolutePath = FileUtils::ResolvedFilePath(absolutePath, testFrom);
+ const string resolvedRelativePath = FileUtils::ResolvedFilePath(relativePath, testFrom);
+ const string resolvedNoPath = FileUtils::ResolvedFilePath(noPathFn, testFrom);
+ const string resolvedAbsolutePath_defaultFrom = FileUtils::ResolvedFilePath(absolutePath);
+ const string resolvedRelativePath_defaultFrom = FileUtils::ResolvedFilePath(relativePath);
+ const string resolvedNoPath_defaultFrom = FileUtils::ResolvedFilePath(noPathFn);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsolutePath);
+ EXPECT_EQ("/path/to/myDir/../relative/path/to/file.txt", resolvedRelativePath);
+ EXPECT_EQ("/path/to/myDir/file.txt", resolvedNoPath);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsolutePath_defaultFrom);
+ EXPECT_EQ("./../relative/path/to/file.txt", resolvedRelativePath_defaultFrom);
+ EXPECT_EQ("./file.txt", resolvedNoPath_defaultFrom);
+
+ // filenames with URI scheme ("file://")
+
+ const string absoluteSchemeFn = "file:///absolute/path/to/file.txt";
+ const string relativeSchemeFn = "file://../relative/path/to/file.txt";
+ const string noPathSchemeFn = "file://file.txt";
+
+ const string resolvedAbsoluteSchemePath = FileUtils::ResolvedFilePath(absoluteSchemeFn, testFrom);
+ const string resolvedRelativeSchemePath = FileUtils::ResolvedFilePath(relativeSchemeFn, testFrom);
+ const string resolvedNoPathSchemeFn = FileUtils::ResolvedFilePath(noPathSchemeFn, testFrom);
+ const string resolvedAbsoluteSchemePath_defaultFrom = FileUtils::ResolvedFilePath(absoluteSchemeFn);
+ const string resolvedRelativeSchemePath_defaultFrom = FileUtils::ResolvedFilePath(relativeSchemeFn);
+ const string resolvedNoPathSchemeFn_defaultFrom = FileUtils::ResolvedFilePath(noPathSchemeFn);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsoluteSchemePath);
+ EXPECT_EQ("/path/to/myDir/../relative/path/to/file.txt", resolvedRelativeSchemePath);
+ EXPECT_EQ("/path/to/myDir/file.txt", resolvedNoPathSchemeFn);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsoluteSchemePath_defaultFrom);
+ EXPECT_EQ("./../relative/path/to/file.txt", resolvedRelativeSchemePath_defaultFrom);
+ EXPECT_EQ("./file.txt", resolvedNoPathSchemeFn_defaultFrom);
+}
+
+TEST(FileUtilsTest, SizeOk)
+{
+ const string tmp = "/tmp/pbbam_empty_file.tmp";
+ const string cmd = string("touch ") + tmp;
+ ASSERT_EQ(0, system(cmd.c_str()));
+ EXPECT_EQ(0, FileUtils::Size(tmp));
+
+ EXPECT_THROW(FileUtils::Size("does_not_exist.txt"), std::runtime_error);
+}
+
+// ####################################################################################################
+// The code below is part of a simple check whether or not a (Windows-only) file path is absolute.
+//
+// NOTE: (and this is admittedly brittle for maintenance, but) the internal methods used are literally
+// copied here for direct driving. There's likely a better way going forward, than the manual copy/paste.
+// But in the absence of a similar runtime environment to build in & test against, while
+// the motivating behavior is blocking other work, this lets me get the fix in their hands ASAP and still
+// have some test code poking it beforehand. -DB
+//
+namespace test_windows {
+
+static string removeFileUriScheme(const string& uri)
+{
+ assert(!uri.empty());
+
+ auto schemeLess = uri;
+ const auto fileScheme = string{"file://"};
+ const auto schemeFound = schemeLess.find(fileScheme);
+ if (schemeFound != string::npos) {
+ if (schemeFound != 0)
+ throw runtime_error("Malformed URI: scheme not at beginning");
+ schemeLess = schemeLess.substr(fileScheme.size());
+ }
+ return schemeLess;
+}
+
+static
+string removeDiskName(const string& filePath)
+{
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return filePath.substr(2);
+ }
+ return filePath;
+}
+
+static const char native_pathSeparator = '\\';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{
+ assert(!filePath.empty());
+
+ // if starts with single slash or double slash [cases 1,3]
+ if (boost::algorithm::starts_with(filePath, "\\"))
+ return true;
+
+ // if starts with single or double-dots -> not absolute [case 4 + ".\file.txt"]
+ if (boost::algorithm::starts_with(filePath, "."))
+ return false;
+
+ // if starts with drive name and colon ("C:\foo\bar.txt")
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return native_pathIsAbsolute(removeDiskName(filePath));
+ }
+
+ // otherwise, likely relative
+ return false;
+}
+
+static string native_resolvedFilePath(const string& filePath,
+ const string& from)
+{
+ // strip file:// scheme if present
+ auto schemeLess = removeFileUriScheme(filePath);
+
+ // if empty or already absolute path, just return it
+ // upfront empty check simplifies further parsing logic
+ if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+ return schemeLess;
+
+ // else make relative from the provided 'from' directory
+ //
+ // first pop disk name, then any leading single-dot '.'
+ //
+ // since we're prepending the 'from' directory, we can remove
+ // any leading './' form our file path. this may just mean that
+ // we pop it off to add it right back (when from == '.'), but this
+ // keeps it consistent with other 'from' parent directories
+ //
+ schemeLess = removeDiskName(schemeLess);
+
+ const bool thisDirAtStart = (schemeLess.find(".") == 0);
+ if (thisDirAtStart) {
+ if (schemeLess.find(native_pathSeparator) == 1)
+ schemeLess = schemeLess.substr(2);
+ }
+ return from + native_pathSeparator + schemeLess;
+}
+
+} // namespace test_windows
+
+TEST(FileUtilsTest, WindowsPathsOk)
+{
+ { // remove disk name
+
+ // "C:\tmp.txt"
+ string f1 = "C:\\tmp.txt";
+ EXPECT_EQ(string("\\tmp.txt"), test_windows::removeDiskName(f1));
+
+ // "C:tmp.txt"
+ string f2 = "C:tmp.txt";
+ EXPECT_EQ(string("tmp.txt"), test_windows::removeDiskName(f2));
+
+ // "\tmp.txt"
+ string f3 = "\\tmp.txt";
+ EXPECT_EQ(f3, test_windows::removeDiskName(f3));
+
+ // "tmp.txt"
+ string f4 = "tmp.txt";
+ EXPECT_EQ(f4, test_windows::removeDiskName(f4));
+ }
+
+ { // isAbsolute ?
+
+ // "\\server\path\to\tmp.txt"
+ EXPECT_TRUE(test_windows::native_pathIsAbsolute("\\\\server\\path\\to\tmp.txt"));
+
+ // "..\tmp.txt"
+ EXPECT_FALSE(test_windows::native_pathIsAbsolute("..\\tmp.txt"));
+
+ // ".\tmp.txt"
+ EXPECT_FALSE(test_windows::native_pathIsAbsolute(".\\tmp.txt"));
+
+ // "C:\path\to\tmp.txt"
+ EXPECT_TRUE(test_windows::native_pathIsAbsolute("C:\\path\\to\\tmp.txt"));
+
+ // "C:..\path\to\tmp.txt"
+ EXPECT_FALSE(test_windows::native_pathIsAbsolute("C:..\\path\\to\\tmp.txt"));
+ }
+
+ { // resolve file path
+
+ const string myRootDir = "C:\\path\\to\\myRootDir";
+
+ // "\\server\path\to\tmp.txt"
+ const string fn1 = "\\\\server\\path\\to\tmp.txt";
+ const string fn1_expected = fn1;
+ EXPECT_EQ(fn1_expected, test_windows::native_resolvedFilePath(fn1, myRootDir));
+
+ // "..\tmp.txt"
+ const string fn2 = "..\\tmp.txt";
+ const string fn2_expected = "C:\\path\\to\\myRootDir\\..\\tmp.txt";
+ EXPECT_EQ(fn2_expected, test_windows::native_resolvedFilePath(fn2, myRootDir));
+
+ // ".\tmp.txt"
+ const string fn3 = ".\\tmp.txt";
+ const string fn3_expected = "C:\\path\\to\\myRootDir\\tmp.txt";
+ EXPECT_EQ(fn3_expected, test_windows::native_resolvedFilePath(fn3, myRootDir));
+
+ // "C:\path\to\tmp.txt"
+ const string fn4 = "C:\\path\\to\\tmp.txt";
+ const string fn4_expected = fn4;
+ EXPECT_EQ(fn4_expected, test_windows::native_resolvedFilePath(fn4, myRootDir));
+
+ // "C:..\path\to\tmp.txt"
+ const string fn5 = "C:..\\path\\to\\tmp.txt";
+ const string fn5_expected = "C:\\path\\to\\myRootDir\\..\\path\\to\\tmp.txt";
+ EXPECT_EQ(fn5_expected, test_windows::native_resolvedFilePath(fn5, myRootDir));
+
+ // "C:tmp.txt"
+ const string fn6 = "C:tmp.txt";
+ const string fn6_expected = "C:\\path\\to\\myRootDir\\tmp.txt";
+ EXPECT_EQ(fn6_expected, test_windows::native_resolvedFilePath(fn6, myRootDir));
+ EXPECT_EQ(fn3_expected, test_windows::native_resolvedFilePath(fn6, myRootDir)); // our path is equivalent to fn3's "./temp.txt"
+ }
+}
+//
+// ####################################################################################################
+
+
diff --git a/tests/src/test_GenomicIntervalQuery.cpp b/tests/src/test_GenomicIntervalQuery.cpp
index 0fb98bc..43c8c1e 100644
--- a/tests/src/test_GenomicIntervalQuery.cpp
+++ b/tests/src/test_GenomicIntervalQuery.cpp
@@ -1,182 +1,166 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-//
-// * Neither the name of Pacific Biosciences nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: Derek Barnett
-
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
-#include "TestData.h"
-#include <gtest/gtest.h>
-#include <pbbam/GenomicIntervalQuery.h>
-#include <iostream>
-#include <string>
-using namespace PacBio;
-using namespace PacBio::BAM;
-using namespace std;
-
-const string inputBamFn = tests::Data_Dir + "/ex2.bam";
-
-TEST(GenomicIntervalQueryTest, ReuseQueryAndCountRecords)
-{
- const string seq1 = "seq1";
- const string seq2 = "seq2";
-
- // open input BAM file
- BamFile bamFile(inputBamFn);
-
- // count records
- int count = 0;
- GenomicInterval interval(seq1, 0, 100);
- GenomicIntervalQuery query(interval, bamFile);
- for (const BamRecord& record : query) {
- (void)record;
- ++count;
- }
- EXPECT_EQ(39, count);
-
- // adjust interval and pass back in
- count = 0;
- interval.Start(500);
- interval.Stop(600);
- query.Interval(interval);
- for (const BamRecord& record : query) {
- (void)record;
- ++count;
- }
- EXPECT_EQ(166, count);
-
- // adjust again
- count = 0;
- interval.Name(seq2);
- interval.Start(0);
- interval.Stop(100);
- query.Interval(interval);
- for (const BamRecord& record : query) {
- (void)record;
- ++count;
- }
- EXPECT_EQ(83, count);
-
- // unknown ref
- count = 0;
- interval.Name("does not exist");
- interval.Start(0);
- interval.Stop(100);
- EXPECT_THROW(
- query.Interval(interval);
- , std::exception);
- for (const BamRecord& record : query) { // iteration is still safe, just returns no data
- (void)record;
- ++count;
- }
- EXPECT_EQ(0, count);
-
- // adjust again - make sure we can read a real region after an invalid one
- interval.Name(seq2);
- interval.Start(0);
- interval.Stop(100);
- query.Interval(interval);
- count = 0;
- for (const BamRecord& record : query) {
- (void)record;
- ++count;
- }
- EXPECT_EQ(83, count);
-}
-
-TEST(GenomicIntervalQueryTest, NonConstBamRecord)
-{
- EXPECT_NO_THROW(
- {
- // open input BAM file
- BamFile bamFile(inputBamFn);
-
- // count records
- int count = 0;
- GenomicInterval interval("seq1", 0, 100);
- GenomicIntervalQuery query(interval, bamFile);
- for (BamRecord& record : query) {
- (void)record;
- ++count;
- }
- EXPECT_EQ(39, count);
- });
-}
-
-//TEST(GenomicIntervalQueryTest, WorksWithBamRecordImpl)
-//{
-// // open input BAM file
-// BamFile bamFile(inputBamFn);
-// EXPECT_TRUE(bamFile);
-
-// const int id = bamFile.ReferenceId("seq1");
-// EXPECT_TRUE(id != -1);
-
-// // count records
-// int count = 0;
-// GenomicInterval interval(id, 0, 100);
-// GenomicIntervalQuery query(interval, bamFile);
-// EXPECT_TRUE(query);
-// for (const BamRecordImpl& record : query) {
-// (void)record;
-// ++count;
-// }
-// EXPECT_EQ(39, count);
-//}
-
-//TEST(GenomicIntervalQueryTest, WorksWithNonConstBamRecordImpl)
-//{
-// // open input BAM file
-// BamFile bamFile(inputBamFn);
-// EXPECT_TRUE(bamFile);
-
-// const int id = bamFile.ReferenceId("seq1");
-// EXPECT_TRUE(id != -1);
-
-// // count records
-// int count = 0;
-// GenomicInterval interval(id, 0, 100);
-// GenomicIntervalQuery query(interval, bamFile);
-// EXPECT_TRUE(query);
-// for (BamRecordImpl& record : query) {
-// (void)record;
-// ++count;
-// }
-// EXPECT_EQ(39, count);
-//}
-
-// add special cases as needed
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/GenomicIntervalQuery.h>
+#include <iostream>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string inputBamFn = tests::Data_Dir + "/ex2.bam";
+
+TEST(GenomicIntervalQueryTest, ReuseQueryAndCountRecords)
+{
+ const string seq1 = "seq1";
+ const string seq2 = "seq2";
+
+ // open input BAM file
+ BamFile bamFile(inputBamFn);
+
+ // count records
+ int count = 0;
+ GenomicInterval interval(seq1, 0, 100);
+ GenomicIntervalQuery query(interval, bamFile);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(39, count);
+
+ // adjust interval and pass back in
+ count = 0;
+ interval.Start(500);
+ interval.Stop(600);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(166, count);
+
+ // adjust again
+ count = 0;
+ interval.Name(seq2);
+ interval.Start(0);
+ interval.Stop(100);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(83, count);
+
+ // unknown ref
+ count = 0;
+ interval.Name("does not exist");
+ interval.Start(0);
+ interval.Stop(100);
+ EXPECT_THROW(
+ query.Interval(interval);
+ , std::exception);
+ for (const BamRecord& record : query) { // iteration is still safe, just returns no data
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+
+ // adjust again - make sure we can read a real region after an invalid one
+ interval.Name(seq2);
+ interval.Start(0);
+ interval.Stop(100);
+ query.Interval(interval);
+ count = 0;
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(83, count);
+}
+
+TEST(GenomicIntervalQueryTest, NonConstBamRecord)
+{
+ EXPECT_NO_THROW(
+ {
+ // open input BAM file
+ BamFile bamFile(inputBamFn);
+
+ // count records
+ int count = 0;
+ GenomicInterval interval("seq1", 0, 100);
+ GenomicIntervalQuery query(interval, bamFile);
+ for (BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(39, count);
+ });
+}
+
+TEST(GenomicIntervalQueryTest, MissingBaiShouldThrow)
+{
+ GenomicInterval interval("seq1", 0, 100);
+ const string phi29Bam = tests::Data_Dir + "/phi29.bam";
+ const string hasBaiBam = tests::Data_Dir + "/dataset/bam_mapping1.bam";
+
+ { // single file, missing BAI
+
+ EXPECT_THROW(GenomicIntervalQuery query(interval, phi29Bam), std::runtime_error);
+ }
+
+ { // from dataset, all missing BAI
+
+ DataSet ds;
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ EXPECT_THROW(GenomicIntervalQuery query(interval, ds), std::runtime_error);
+ }
+
+ { // from dataset, mixed BAI presence
+
+ DataSet ds;
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ ds.ExternalResources().Add(ExternalResource("PacBio.AlignmentFile.AlignmentBamFile", hasBaiBam));
+ EXPECT_THROW(GenomicIntervalQuery query(interval, ds), std::runtime_error);
+ }
+}
diff --git a/tests/src/test_PacBioIndex.cpp b/tests/src/test_PacBioIndex.cpp
index c841c68..c747c9c 100644
--- a/tests/src/test_PacBioIndex.cpp
+++ b/tests/src/test_PacBioIndex.cpp
@@ -42,12 +42,13 @@
#include "TestData.h"
#include <gtest/gtest.h>
#include <pbbam/BamFile.h>
+#include <pbbam/BamReader.h>
#include <pbbam/BamWriter.h>
#include <pbbam/EntireFileQuery.h>
#include <pbbam/PbiBuilder.h>
#include <pbbam/PbiIndex.h>
+#include <pbbam/PbiLookupData.h>
#include <pbbam/PbiRawData.h>
-#include <pbbam/internal/PbiIndex_p.h>
#include <string>
#include <cstdio>
#include <cstdlib>
@@ -56,40 +57,70 @@ using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
-const string test2BamFn = tests::Data_Dir + "/test_group_query/test2.bam";
+const string test2BamFn = tests::Data_Dir + "/dataset/bam_mapping_new.bam";
+const string phi29BamFn = tests::Data_Dir + "/phi29.bam";
namespace PacBio {
namespace BAM {
namespace tests {
static
-PbiRawData Test2Bam_RawIndex(void)
+PbiRawData Test2Bam_CoreIndexData(void)
+
+{
+ PbiRawData rawData;
+ rawData.Version(PbiFile::Version_3_0_1);
+ rawData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::REFERENCE);
+ rawData.NumReads(10);
+
+ PbiRawBasicData& basicData = rawData.BasicData();
+ basicData.rgId_ = {-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275,-1574697275};
+ basicData.qStart_ = {48,387,0,9936,10232,7468,5557,7285,426,7064};
+ basicData.qEnd_ = {1132,1134,344,10187,10394,8906,7235,8657,1045,7421};
+ basicData.holeNumber_ = {49050,32328,32328,6469,6469,30983,13473,13473,19915,30983};
+ basicData.readQual_ = {0,0,0,0,0,0,0,0,0,0};
+ basicData.ctxtFlag_ = {0,0,0,0,0,0,0,0,0,0};
+ basicData.fileOffset_ = { };
+
+ PbiRawMappedData& mappedData = rawData.MappedData();
+ mappedData.tId_ = {0,0,0,0,0,0,0,0,0,0};
+ mappedData.tStart_ = {0,302,675,2170,2203,3572,4506,4507,4592,4669};
+ mappedData.tEnd_ = {471,1019,1026,2397,2326,5015,6125,5850,5203,5011};
+ mappedData.aStart_ = {653,395,1,9960,10271,7468,5574,7285,441,7075};
+ mappedData.aEnd_ = {1129,1134,344,10185,10394,8906,7235,8647,1040,7418};
+ mappedData.revStrand_ = {0,1,0,1,0,1,1,0,1,0};
+ mappedData.nM_ = {460,704,339,216,118,1394,1581,1313,583,333};
+ mappedData.nMM_ = {0,0,0,0,0,0,0,0,0,0};
+ mappedData.mapQV_ = {254,254,254,254,254,254,254,254,254,254};
+
+ PbiRawReferenceData& referenceData = rawData.ReferenceData();
+ referenceData.entries_ = {
+ PbiReferenceEntry{0,0,10},
+ PbiReferenceEntry{4294967295,4294967295,4294967295}
+ };
+
+ return rawData;
+}
+
+// NOTE: We have 2 different sets of offsets because the copied, new file differs in size than the existing one.
+//
+// Unsure which combination of write parameters were used on the original. Things like thread count,
+// compression level, etc. can effect compression ratio, BGZF block sizes, etc. even though the BAM record
+// content itself is equal. So we'll just track these index values separately, for now at least.
+//
+static
+PbiRawData Test2Bam_ExistingIndex(void)
{
- PbiRawData index;
- index.Version(PbiFile::Version_3_0_0);
- index.FileSections(PbiFile::SUBREAD | PbiFile::MAPPED);
- index.NumReads(4);
-
- PbiRawSubreadData& subreadData = index.SubreadData();
- subreadData.rgId_ = { -1197849594, -1197849594, -1197849594, -1197849594 };
- subreadData.qStart_ = { 2114, 2579, 4101, 5615 };
- subreadData.qEnd_ = { 2531, 4055, 5571, 6237 };
- subreadData.holeNumber_ = { 14743, 14743, 14743, 14743 };
- subreadData.readQual_ = { 901, 901, 901, 901 };
- subreadData.fileOffset_ = { 35651584, 35655123, 35667124, 35679164 };
-
- PbiRawMappedData& mappedData = index.mappedData_;
- mappedData.tId_ = { 0, 0, 0, 0 };
- mappedData.tStart_ = { 9507, 8453, 8455, 9291 };
- mappedData.tEnd_ = { 9903, 9902, 9893, 9900 };
- mappedData.aStart_ = { 2130, 2581, 4102, 5619 };
- mappedData.aEnd_ = { 2531, 4055, 5560, 6237 };
- mappedData.revStrand_ = { 0, 1, 0, 1 };
- mappedData.mapQV_ = { 254, 254, 254, 254 };
- mappedData.nM_ = { 384, 1411, 1393, 598 };
- mappedData.nMM_ = { 0, 0, 0, 0 }; // old 'M' ops were just replaced w/ '=', no 'X'
-
- // reference & barcode data are empty for this file
+ PbiRawData index = Test2Bam_CoreIndexData();
+ index.BasicData().fileOffset_ = {32636928,32645486,32651627,32654529,32656778,32658272,32669996,32683648,32694741,1388838912};
+ return index;
+}
+
+static
+PbiRawData Test2Bam_NewIndex(void)
+{
+ PbiRawData index = Test2Bam_CoreIndexData();
+ index.BasicData().fileOffset_ = { 33095680, 233766912, 387448832, 463667200, 530317312, 579731456, 857341952, 1171062784, 1436352512, 1567621120 };
return index;
}
@@ -102,13 +133,14 @@ void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual)
EXPECT_EQ(expected.NumReads(), actual.NumReads());
// subread data
- const PbiRawSubreadData& e = expected.SubreadData();
- const PbiRawSubreadData& a = actual.SubreadData();
+ const PbiRawBasicData& e = expected.BasicData();
+ const PbiRawBasicData& a = actual.BasicData();
EXPECT_EQ(e.rgId_, a.rgId_);
EXPECT_EQ(e.qStart_, a.qStart_);
EXPECT_EQ(e.qEnd_, a.qEnd_);
EXPECT_EQ(e.holeNumber_, a.holeNumber_);
EXPECT_EQ(e.readQual_, a.readQual_);
+ EXPECT_EQ(e.ctxtFlag_, a.ctxtFlag_);
EXPECT_EQ(e.fileOffset_, a.fileOffset_);
// mapped data
@@ -140,28 +172,28 @@ void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual)
if (expected.HasBarcodeData() && actual.HasBarcodeData()) {
const PbiRawBarcodeData& e = expected.BarcodeData();
const PbiRawBarcodeData& a = actual.BarcodeData();
- EXPECT_EQ(e.bcLeft_, a.bcLeft_);
- EXPECT_EQ(e.bcRight_, a.bcRight_);
+ EXPECT_EQ(e.bcForward_, a.bcForward_);
+ EXPECT_EQ(e.bcReverse_, a.bcReverse_);
EXPECT_EQ(e.bcQual_, a.bcQual_);
- EXPECT_EQ(e.ctxtFlag_, a.ctxtFlag_);
}
}
static
-bool SubreadLookupsEqual(const internal::SubreadLookupData& lhs,
- const internal::SubreadLookupData& rhs)
+bool BasicLookupsEqual(const BasicLookupData& lhs,
+ const BasicLookupData& rhs)
{
return (lhs.rgId_ == rhs.rgId_ &&
lhs.qStart_ == rhs.qStart_ &&
lhs.qEnd_ == rhs.qEnd_ &&
lhs.holeNumber_ == rhs.holeNumber_ &&
lhs.readQual_ == rhs.readQual_ &&
+ lhs.ctxtFlag_ == rhs.ctxtFlag_ &&
lhs.fileOffset_ == rhs.fileOffset_);
}
static
-bool MappedLookupsEqual(const internal::MappedLookupData& lhs,
- const internal::MappedLookupData& rhs)
+bool MappedLookupsEqual(const MappedLookupData& lhs,
+ const MappedLookupData& rhs)
{
return (lhs.tId_ == rhs.tId_ &&
lhs.tStart_ == rhs.tStart_ &&
@@ -176,20 +208,19 @@ bool MappedLookupsEqual(const internal::MappedLookupData& lhs,
}
static
-bool ReferenceLookupsEqual(const internal::ReferenceLookupData& lhs,
- const internal::ReferenceLookupData& rhs)
+bool ReferenceLookupsEqual(const ReferenceLookupData& lhs,
+ const ReferenceLookupData& rhs)
{
return lhs.references_ == rhs.references_;
}
static
-bool BarcodeLookupsEqual(const internal::BarcodeLookupData& lhs,
- const internal::BarcodeLookupData& rhs)
+bool BarcodeLookupsEqual(const BarcodeLookupData& lhs,
+ const BarcodeLookupData& rhs)
{
- return (lhs.bcLeft_ == rhs.bcLeft_ &&
- lhs.bcRight_ == rhs.bcRight_ &&
- lhs.bcQual_ == rhs.bcQual_ &&
- lhs.ctxtFlag_ == rhs.ctxtFlag_);
+ return (lhs.bcForward_ == rhs.bcForward_ &&
+ lhs.bcReverse_ == rhs.bcReverse_ &&
+ lhs.bcQual_ == rhs.bcQual_);
}
static
@@ -210,7 +241,7 @@ bool PbiIndicesEqual(const PbiIndex& lhs, const PbiIndex& rhs)
{ return false; }
// component compare
- if ( !SubreadLookupsEqual(lhsImpl->subreadData_, rhsImpl->subreadData_) ||
+ if ( !BasicLookupsEqual(lhsImpl->basicData_, rhsImpl->basicData_) ||
!MappedLookupsEqual(lhsImpl->mappedData_, rhsImpl->mappedData_) ||
!ReferenceLookupsEqual(lhsImpl->referenceData_, rhsImpl->referenceData_) ||
!BarcodeLookupsEqual(lhsImpl->barcodeData_, rhsImpl->barcodeData_))
@@ -228,7 +259,7 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
{
// do this in temp directory, so we can ensure write access
const string tempDir = "/tmp/";
- const string tempBamFn = tempDir + "test2.bam";
+ const string tempBamFn = tempDir + "bam_mapping_new.bam";
const string tempPbiFn = tempBamFn + ".pbi";
string cmd("cp ");
cmd += test2BamFn;
@@ -242,11 +273,11 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
EXPECT_EQ(tempPbiFn, bamFile.PacBioIndexFilename());
PbiRawData index(bamFile.PacBioIndexFilename());
- EXPECT_EQ(PbiFile::Version_3_0_0, index.Version());
- EXPECT_EQ(4, index.NumReads());
+ EXPECT_EQ(PbiFile::Version_3_0_1, index.Version());
+ EXPECT_EQ(10, index.NumReads());
EXPECT_TRUE(index.HasMappedData());
- const PbiRawData& expectedIndex = tests::Test2Bam_RawIndex();
+ const PbiRawData& expectedIndex = tests::Test2Bam_ExistingIndex();
tests::ExpectRawIndicesEqual(expectedIndex, index);
// clean up temp file(s)
@@ -254,6 +285,14 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
remove(tempPbiFn.c_str());
}
+::testing::AssertionResult CanRead(BamReader& reader, BamRecord& record, int i)
+{
+ if (reader.GetNext(record))
+ return ::testing::AssertionSuccess() << "i: " << i;
+ else
+ return ::testing::AssertionFailure() << "i: " << i;
+}
+
TEST(PacBioIndexTest, CreateOnTheFly)
{
// do this in temp directory, so we can ensure write access
@@ -261,12 +300,16 @@ TEST(PacBioIndexTest, CreateOnTheFly)
const string tempBamFn = tempDir + "temp.bam";
const string tempPbiFn = tempBamFn + ".pbi";
+ // NOTE: new file differs in size than existing (different write parameters may yield different file sizes, even though content is same)
+ const vector<int64_t> expectedNewOffsets = { 33095680, 233766912, 387448832, 463667200, 530317312, 579731456, 857341952, 1171062784, 1436352512, 1567621120 };
+ vector<int64_t> observedOffsets;
+
// create PBI on the fly from input BAM while we write to new file
{
BamFile bamFile(test2BamFn);
BamHeader header = bamFile.Header();
- BamWriter writer(tempBamFn, header);
+ BamWriter writer(tempBamFn, header); // default compression, default thread count
PbiBuilder builder(tempPbiFn, header.Sequences().size());
int64_t vOffset = 0;
@@ -274,17 +317,48 @@ TEST(PacBioIndexTest, CreateOnTheFly)
for (const BamRecord& record : entireFile) {
writer.Write(record, &vOffset);
builder.AddRecord(record, vOffset);
+ observedOffsets.push_back(vOffset);
+ }
+ }
+
+ EXPECT_EQ(expectedNewOffsets, observedOffsets);
+
+ // sanity check on original file
+ {
+ const vector<int64_t> originalFileOffsets = {32636928,32645486,32651627,32654529,32656778,32658272,32669996,32683648,32694741,1388838912};
+ BamRecord r;
+ BamReader reader(test2BamFn);
+ for (int i = 0; i < originalFileOffsets.size(); ++i) {
+ reader.VirtualSeek(originalFileOffsets.at(i));
+ EXPECT_TRUE(CanRead(reader, r, i));
+ }
+ }
+
+ // attempt to seek in our new file using both expected & observed offsets
+ {
+ BamRecord r;
+ BamReader reader(tempBamFn);
+ for (int i = 0; i < expectedNewOffsets.size(); ++i) {
+ reader.VirtualSeek(expectedNewOffsets.at(i));
+ EXPECT_TRUE(CanRead(reader, r, i));
+ }
+ for (int i = 0; i < observedOffsets.size(); ++i) {
+ reader.VirtualSeek(observedOffsets.at(i));
+ EXPECT_TRUE(CanRead(reader, r, i));
}
}
// compare data in new PBI file, to expected data
- const PbiRawData& expectedIndex = tests::Test2Bam_RawIndex();
+ const PbiRawData& expectedIndex = tests::Test2Bam_NewIndex();
const PbiRawData& fromBuilt = PbiRawData(tempPbiFn);
tests::ExpectRawIndicesEqual(expectedIndex, fromBuilt);
// straight diff of newly-generated PBI file to existing PBI
- const string pbiDiffCmd = string("diff -q ") + test2BamFn + ".pbi " + tempPbiFn;
- EXPECT_EQ(0, system(pbiDiffCmd.c_str()));
+ // TODO: Come back to this once pbindexump is in place.
+ // We can't exactly do this since file offsets may differ between 2 BAMs of differing compression levels.
+ // Should add some sort of BAM checksum based on contents, not just size, for this reason.
+// const string pbiDiffCmd = string("diff -q ") + test2BamFn + ".pbi " + tempPbiFn;
+// EXPECT_EQ(0, system(pbiDiffCmd.c_str()));
// clean up temp file(s)
remove(tempBamFn.c_str());
@@ -297,15 +371,64 @@ TEST(PacBioIndexTest, RawLoadFromPbiFile)
const string& pbiFilename = bamFile.PacBioIndexFilename();
const PbiRawData loadedIndex(pbiFilename);
- const PbiRawData& expectedIndex = tests::Test2Bam_RawIndex();
+ const PbiRawData& expectedIndex = tests::Test2Bam_ExistingIndex();
tests::ExpectRawIndicesEqual(expectedIndex, loadedIndex);
}
+TEST(PacBioIndexTest, BasicAndBarodeSectionsOnly)
+{
+ // do this in temp directory, so we can ensure write access
+ const string tempDir = "/tmp/";
+ const string tempBamFn = tempDir + "phi29.bam";
+ const string tempPbiFn = tempBamFn + ".pbi";
+ string cmd("cp ");
+ cmd += phi29BamFn;
+ cmd += " ";
+ cmd += tempDir;
+ int cmdResult = system(cmd.c_str());
+ (void)cmdResult;
+
+ BamFile bamFile(tempBamFn);
+ PbiFile::CreateFrom(bamFile);
+ EXPECT_EQ(tempPbiFn, bamFile.PacBioIndexFilename());
+
+ PbiRawData index(bamFile.PacBioIndexFilename());
+ EXPECT_EQ(PbiFile::Version_3_0_1, index.Version());
+ EXPECT_EQ(120, index.NumReads());
+ EXPECT_FALSE(index.HasMappedData());
+ EXPECT_TRUE(index.HasBarcodeData());
+
+ const vector<int16_t> expectedBcForward = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2};
+ const vector<int16_t> expectedBcReverse = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2};
+ const vector<int8_t> expectedBcQuality = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1};
+
+ const PbiRawBarcodeData& barcodeData = index.BarcodeData();
+ EXPECT_EQ(expectedBcForward, barcodeData.bcForward_);
+ EXPECT_EQ(expectedBcReverse, barcodeData.bcReverse_);
+ EXPECT_EQ(expectedBcQuality, barcodeData.bcQual_);
+
+
+ // clean up temp file(s)
+ remove(tempBamFn.c_str());
+ remove(tempPbiFn.c_str());
+
+}
+
+
TEST(PacBioIndexTest, ReferenceDataNotLoadedOnUnsortedBam)
{
BamFile bamFile(test2BamFn);
PbiRawData raw(bamFile.PacBioIndexFilename());
- EXPECT_FALSE(raw.HasReferenceData());
+ EXPECT_TRUE(raw.HasReferenceData());
}
TEST(PacBioIndexTest, LookupLoadFromFileOk)
@@ -314,8 +437,8 @@ TEST(PacBioIndexTest, LookupLoadFromFileOk)
EXPECT_NO_THROW(
{
PbiIndex index(bamFile.PacBioIndexFilename());
- EXPECT_EQ(4, index.NumReads());
- EXPECT_EQ(vector<int64_t>({ 35651584, 35655123, 35667124, 35679164 }), index.VirtualFileOffsets());
+ EXPECT_EQ(10, index.NumReads());
+ EXPECT_EQ(vector<int64_t>({32636928,32645486,32651627,32654529,32656778,32658272,32669996,32683648,32694741,1388838912}), index.BasicData().VirtualFileOffsets());
});
}
@@ -359,11 +482,10 @@ TEST(PacBioIndexTest, Copy_and_Move)
TEST(PacBioIndexTest, OrderedLookup)
{
- using PacBio::BAM::CompareType;
using PacBio::BAM::IndexList;
- using PacBio::BAM::internal::OrderedLookup;
+ using PacBio::BAM::OrderedLookup;
- OrderedLookup<int>::ContainerType oRawData;
+ OrderedLookup<int>::container_type oRawData;
oRawData[11] = { 0, 3, 4 };
oRawData[20] = { 1 };
oRawData[42] = { 2, 7, 8 };
@@ -374,51 +496,50 @@ TEST(PacBioIndexTest, OrderedLookup)
OrderedLookup<int> oLookup(oRawData);
// EQUAL
- EXPECT_EQ(IndexList({5}), oLookup.LookupIndices(10, CompareType::EQUAL));
- EXPECT_EQ(IndexList({0, 3, 4}), oLookup.LookupIndices(11, CompareType::EQUAL));
- EXPECT_EQ(IndexList({6}), oLookup.LookupIndices(12, CompareType::EQUAL));
- EXPECT_EQ(IndexList({1}), oLookup.LookupIndices(20, CompareType::EQUAL));
- EXPECT_EQ(IndexList({2, 7, 8}), oLookup.LookupIndices(42, CompareType::EQUAL));
- EXPECT_EQ(IndexList({9}), oLookup.LookupIndices(99, CompareType::EQUAL));
- EXPECT_EQ(IndexList(), oLookup.LookupIndices(66, CompareType::EQUAL)); // does not exist
+ EXPECT_EQ(IndexList({5}), oLookup.LookupIndices(10, Compare::EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4}), oLookup.LookupIndices(11, Compare::EQUAL));
+ EXPECT_EQ(IndexList({6}), oLookup.LookupIndices(12, Compare::EQUAL));
+ EXPECT_EQ(IndexList({1}), oLookup.LookupIndices(20, Compare::EQUAL));
+ EXPECT_EQ(IndexList({2, 7, 8}), oLookup.LookupIndices(42, Compare::EQUAL));
+ EXPECT_EQ(IndexList({9}), oLookup.LookupIndices(99, Compare::EQUAL));
+ EXPECT_EQ(IndexList(), oLookup.LookupIndices(66, Compare::EQUAL)); // does not exist
// NOT_EQUAL
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}), oLookup.LookupIndices(10, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}), oLookup.LookupIndices(11, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}), oLookup.LookupIndices(12, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(20, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}), oLookup.LookupIndices(42, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}), oLookup.LookupIndices(99, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(66, CompareType::NOT_EQUAL)); // does not exist
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}), oLookup.LookupIndices(10, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}), oLookup.LookupIndices(11, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}), oLookup.LookupIndices(12, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(20, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}), oLookup.LookupIndices(42, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}), oLookup.LookupIndices(99, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(66, Compare::NOT_EQUAL)); // does not exist
// LESS_THAN
- EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, CompareType::LESS_THAN));
- EXPECT_EQ(IndexList({0, 3, 4, 5}), oLookup.LookupIndices(12, CompareType::LESS_THAN));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, Compare::LESS_THAN));
+ EXPECT_EQ(IndexList({0, 3, 4, 5}), oLookup.LookupIndices(12, Compare::LESS_THAN));
// do more checks
// LESS_THAN_EQUAL
- EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, CompareType::LESS_THAN_EQUAL));
- EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(12, CompareType::LESS_THAN_EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, Compare::LESS_THAN_EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(12, Compare::LESS_THAN_EQUAL));
// more checks?
// GREATER_THAN
- EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, CompareType::GREATER_THAN));
- EXPECT_EQ(IndexList({9}), oLookup.LookupIndices(42, CompareType::GREATER_THAN));
+ EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, Compare::GREATER_THAN));
+ EXPECT_EQ(IndexList({9}), oLookup.LookupIndices(42, Compare::GREATER_THAN));
// more checks?
// GREATER_THAN_EQUAL
- EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, CompareType::GREATER_THAN_EQUAL));
- EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(42, CompareType::GREATER_THAN_EQUAL));
+ EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, Compare::GREATER_THAN_EQUAL));
+ EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(42, Compare::GREATER_THAN_EQUAL));
// more checks?
}
TEST(PacBioIndexTest, UnorderedLookup)
{
- using PacBio::BAM::CompareType;
using PacBio::BAM::IndexList;
- using PacBio::BAM::internal::UnorderedLookup;
+ using PacBio::BAM::UnorderedLookup;
- UnorderedLookup<int>::ContainerType uRawData;
+ UnorderedLookup<int>::container_type uRawData;
uRawData[11] = { 0, 3, 4 };
uRawData[20] = { 1 };
uRawData[42] = { 2, 7, 8 };
@@ -429,54 +550,53 @@ TEST(PacBioIndexTest, UnorderedLookup)
UnorderedLookup<int> uLookup(uRawData);
// EQUAL
- EXPECT_EQ(IndexList({5}), uLookup.LookupIndices(10, CompareType::EQUAL));
- EXPECT_EQ(IndexList({0, 3, 4}), uLookup.LookupIndices(11, CompareType::EQUAL));
- EXPECT_EQ(IndexList({6}), uLookup.LookupIndices(12, CompareType::EQUAL));
- EXPECT_EQ(IndexList({1}), uLookup.LookupIndices(20, CompareType::EQUAL));
- EXPECT_EQ(IndexList({2, 7, 8}), uLookup.LookupIndices(42, CompareType::EQUAL));
- EXPECT_EQ(IndexList({9}), uLookup.LookupIndices(99, CompareType::EQUAL));
- EXPECT_EQ(IndexList(), uLookup.LookupIndices(66, CompareType::EQUAL)); // does not exist
+ EXPECT_EQ(IndexList({5}), uLookup.LookupIndices(10, Compare::EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4}), uLookup.LookupIndices(11, Compare::EQUAL));
+ EXPECT_EQ(IndexList({6}), uLookup.LookupIndices(12, Compare::EQUAL));
+ EXPECT_EQ(IndexList({1}), uLookup.LookupIndices(20, Compare::EQUAL));
+ EXPECT_EQ(IndexList({2, 7, 8}), uLookup.LookupIndices(42, Compare::EQUAL));
+ EXPECT_EQ(IndexList({9}), uLookup.LookupIndices(99, Compare::EQUAL));
+ EXPECT_EQ(IndexList(), uLookup.LookupIndices(66, Compare::EQUAL)); // does not exist
// NOT_EQUAL
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}), uLookup.LookupIndices(10, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}), uLookup.LookupIndices(11, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}), uLookup.LookupIndices(12, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(20, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}), uLookup.LookupIndices(42, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}), uLookup.LookupIndices(99, CompareType::NOT_EQUAL));
- EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(66, CompareType::NOT_EQUAL)); // does not exist
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}), uLookup.LookupIndices(10, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}), uLookup.LookupIndices(11, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}), uLookup.LookupIndices(12, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(20, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}), uLookup.LookupIndices(42, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}), uLookup.LookupIndices(99, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(66, Compare::NOT_EQUAL)); // does not exist
// LESS_THAN
- EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, CompareType::LESS_THAN));
- EXPECT_EQ(IndexList({0, 3, 4, 5}), uLookup.LookupIndices(12, CompareType::LESS_THAN));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, Compare::LESS_THAN));
+ EXPECT_EQ(IndexList({0, 3, 4, 5}), uLookup.LookupIndices(12, Compare::LESS_THAN));
// more checks?
// LESS_THAN_EQUAL
- EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, CompareType::LESS_THAN_EQUAL));
- EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(12, CompareType::LESS_THAN_EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, Compare::LESS_THAN_EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(12, Compare::LESS_THAN_EQUAL));
// more checks?
// GREATER_THAN
- EXPECT_EQ(IndexList({2,7,8,9}), uLookup.LookupIndices(41, CompareType::GREATER_THAN));
- EXPECT_EQ(IndexList({9}), uLookup.LookupIndices(42, CompareType::GREATER_THAN));
+ EXPECT_EQ(IndexList({2,7,8,9}), uLookup.LookupIndices(41, Compare::GREATER_THAN));
+ EXPECT_EQ(IndexList({9}), uLookup.LookupIndices(42, Compare::GREATER_THAN));
// more checks?
// GREATER_THAN_EQUAL
- EXPECT_EQ(uLookup.LookupIndices(41, CompareType::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
- EXPECT_EQ(uLookup.LookupIndices(42, CompareType::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
+ EXPECT_EQ(uLookup.LookupIndices(41, Compare::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
+ EXPECT_EQ(uLookup.LookupIndices(42, Compare::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
// more checks?
}
TEST(PacBioIndexTest, MergeBlocks)
{
- using PacBio::BAM::CompareType;
using PacBio::BAM::IndexList;
using PacBio::BAM::IndexResultBlock;
using PacBio::BAM::IndexResultBlocks;
- using PacBio::BAM::internal::mergedIndexBlocks;
- using PacBio::BAM::internal::OrderedLookup;
+ using PacBio::BAM::mergedIndexBlocks;
+ using PacBio::BAM::OrderedLookup;
- OrderedLookup<int>::ContainerType oRawData;
+ OrderedLookup<int>::container_type oRawData;
oRawData[11] = { 0, 3, 4 };
oRawData[20] = { 1 };
oRawData[42] = { 2, 7, 8 };
@@ -487,82 +607,81 @@ TEST(PacBioIndexTest, MergeBlocks)
OrderedLookup<int> oLookup(oRawData);
// EQUAL
- auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, CompareType::EQUAL));
+ auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::EQUAL));
EXPECT_EQ(1, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(5, 1), mergedBlocks.at(0));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, CompareType::EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, Compare::EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 1), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(3, 2), mergedBlocks.at(1));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, CompareType::EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, Compare::EQUAL));
EXPECT_EQ(1, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(6, 1), mergedBlocks.at(0));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, CompareType::EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, Compare::EQUAL));
EXPECT_EQ(1, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(1, 1), mergedBlocks.at(0));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, CompareType::EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, Compare::EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(2, 1), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(7, 2), mergedBlocks.at(1));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, CompareType::EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, Compare::EQUAL));
EXPECT_EQ(1, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(9, 1), mergedBlocks.at(0));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, CompareType::EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, Compare::EQUAL));
EXPECT_TRUE(mergedBlocks.empty());
// NOT_EQUAL
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::NOT_EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 5), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(6, 4), mergedBlocks.at(1));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, Compare::NOT_EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(1, 2), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(5, 5), mergedBlocks.at(1));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, Compare::NOT_EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 6), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(7, 3), mergedBlocks.at(1));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, Compare::NOT_EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 1), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(2, 8), mergedBlocks.at(1));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, Compare::NOT_EQUAL));
EXPECT_EQ(3, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 2), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(3, 4), mergedBlocks.at(1));
EXPECT_EQ(IndexResultBlock(9, 1), mergedBlocks.at(2));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, Compare::NOT_EQUAL));
EXPECT_EQ(1, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 9), mergedBlocks.at(0));
- mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, CompareType::NOT_EQUAL));
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, Compare::NOT_EQUAL));
EXPECT_EQ(1, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 10), mergedBlocks.at(0));
}
TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
{
- using PacBio::BAM::CompareType;
+ using PacBio::BAM::BasicLookupData;
using PacBio::BAM::IndexList;
using PacBio::BAM::IndexResultBlock;
using PacBio::BAM::IndexResultBlocks;
- using PacBio::BAM::internal::mergedIndexBlocks;
- using PacBio::BAM::internal::OrderedLookup;
- using PacBio::BAM::internal::SubreadLookupData;
+ using PacBio::BAM::mergedIndexBlocks;
+ using PacBio::BAM::OrderedLookup;
- OrderedLookup<int>::ContainerType oRawData;
+ OrderedLookup<int>::container_type oRawData;
oRawData[11] = { 0, 3, 4 };
oRawData[20] = { 1 };
oRawData[42] = { 2, 7, 8 };
@@ -571,15 +690,15 @@ TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
oRawData[99] = { 9 };
OrderedLookup<int> oLookup(oRawData);
- auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, CompareType::NOT_EQUAL));
+ auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::NOT_EQUAL));
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(IndexResultBlock(0, 5), mergedBlocks.at(0));
EXPECT_EQ(IndexResultBlock(6, 4), mergedBlocks.at(1));
- SubreadLookupData subreadIndex;
- subreadIndex.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
- subreadIndex.ApplyOffsets(mergedBlocks);
+ BasicLookupData basicLookupData;
+ basicLookupData.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
+ basicLookupData.ApplyOffsets(mergedBlocks);
EXPECT_EQ(2, mergedBlocks.size());
EXPECT_EQ(0, mergedBlocks.at(0).virtualOffset_);
@@ -590,16 +709,14 @@ TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
TEST(PacBioIndexTest, LookupMulti)
{
- using PacBio::BAM::CompareType;
+ using PacBio::BAM::BasicLookupData;
using PacBio::BAM::IndexList;
using PacBio::BAM::IndexResultBlock;
using PacBio::BAM::IndexResultBlocks;
- using PacBio::BAM::SubreadField;
- using PacBio::BAM::internal::mergedIndexBlocks;
- using PacBio::BAM::internal::SubreadLookupData;
- using PacBio::BAM::internal::UnorderedLookup;
+ using PacBio::BAM::mergedIndexBlocks;
+ using PacBio::BAM::UnorderedLookup;
- UnorderedLookup<int32_t>::ContainerType uRawData;
+ UnorderedLookup<int32_t>::container_type uRawData;
uRawData[11] = { 0, 3, 4 };
uRawData[20] = { 1 };
uRawData[42] = { 2, 7, 8 };
@@ -607,15 +724,15 @@ TEST(PacBioIndexTest, LookupMulti)
uRawData[12] = { 6 };
uRawData[99] = { 9 };
- SubreadLookupData subreadIndex;
- subreadIndex.rgId_ = UnorderedLookup<int32_t>(uRawData);
- subreadIndex.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
+ BasicLookupData basicLookup;
+ basicLookup.rgId_ = UnorderedLookup<int32_t>(uRawData);
+ basicLookup.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
const std::vector<int32_t> whitelist = { 11, 42, 20 };
- const auto indices = subreadIndex.IndicesMulti(SubreadField::RG_ID, whitelist);
+ const auto indices = basicLookup.IndicesMulti(BasicLookupData::RG_ID, whitelist);
IndexResultBlocks mergedBlocks = mergedIndexBlocks(indices);
- subreadIndex.ApplyOffsets(mergedBlocks);
+ basicLookup.ApplyOffsets(mergedBlocks);
EXPECT_EQ(IndexList({0, 3, 4, 2, 7, 8, 1}), indices);
EXPECT_EQ(2, mergedBlocks.size());
@@ -634,56 +751,96 @@ TEST(PacBioIndexTest, LookupMulti)
TEST(PacBioIndexTest, LookupAPI)
{
const PbiIndex index(test2BamFn + ".pbi");
+ const BasicLookupData& basicData = index.BasicData();
+ const MappedLookupData& mappedData = index.MappedData();
+ const BarcodeLookupData& barcodeData = index.BarcodeData();
// rgId == x
- const IndexResultBlocks rgResult = index.Lookup(ReadGroupIndexRequest(-1197849594));
+ IndexResultBlocks rgResult = mergedIndexBlocks(basicData.Indices(BasicLookupData::RG_ID, -1574697275));
+ basicData.ApplyOffsets(rgResult);
EXPECT_EQ(1, rgResult.size());
EXPECT_EQ(0, rgResult.at(0).firstIndex_);
- EXPECT_EQ(4, rgResult.at(0).numReads_);
- EXPECT_EQ(35651584, rgResult.at(0).virtualOffset_);
+ EXPECT_EQ(10, rgResult.at(0).numReads_);
+ EXPECT_EQ(32636928, rgResult.at(0).virtualOffset_);
// rg != x
- const IndexResultBlocks notRgResult = index.Lookup(ReadGroupIndexRequest(-1197849594, CompareType::NOT_EQUAL));
+ IndexResultBlocks notRgResult = mergedIndexBlocks(basicData.Indices(BasicLookupData::RG_ID,
+ -1574697275,
+ Compare::NOT_EQUAL));
+ basicData.ApplyOffsets(notRgResult);
EXPECT_TRUE(notRgResult.empty());
// tEnd <= x
- const IndexResultBlocks tEndLteResult = index.Lookup(ReferenceEndIndexRequest(9900, CompareType::LESS_THAN_EQUAL));
+ IndexResultBlocks tEndLteResult = mergedIndexBlocks(mappedData.Indices(MappedLookupData::T_END,
+ 4500,
+ Compare::LESS_THAN_EQUAL));
+ basicData.ApplyOffsets(tEndLteResult);
EXPECT_EQ(1, tEndLteResult.size());
- EXPECT_EQ(2, tEndLteResult.at(0).firstIndex_);
- EXPECT_EQ(2, tEndLteResult.at(0).numReads_);
- EXPECT_EQ(35667124, tEndLteResult.at(0).virtualOffset_);
+ EXPECT_EQ(0, tEndLteResult.at(0).firstIndex_);
+ EXPECT_EQ(5, tEndLteResult.at(0).numReads_);
+ EXPECT_EQ(32636928, tEndLteResult.at(0).virtualOffset_);
// tEnd >= x
- const IndexResultBlocks tEndGteResult = index.Lookup(ReferenceEndIndexRequest(9900, CompareType::GREATER_THAN_EQUAL));
- EXPECT_EQ(2, tEndGteResult.size());
- EXPECT_EQ(0, tEndGteResult.at(0).firstIndex_);
- EXPECT_EQ(2, tEndGteResult.at(0).numReads_);
- EXPECT_EQ(35651584, tEndGteResult.at(0).virtualOffset_);
- EXPECT_EQ(3, tEndGteResult.at(1).firstIndex_);
- EXPECT_EQ(1, tEndGteResult.at(1).numReads_);
- EXPECT_EQ(35679164, tEndGteResult.at(1).virtualOffset_);
+ IndexResultBlocks tEndGteResult = mergedIndexBlocks(mappedData.Indices(MappedLookupData::T_START,
+ 4500,
+ Compare::GREATER_THAN_EQUAL));
+ basicData.ApplyOffsets(tEndGteResult);
+ EXPECT_EQ(1, tEndGteResult.size());
+ EXPECT_EQ(6, tEndGteResult.at(0).firstIndex_);
+ EXPECT_EQ(4, tEndGteResult.at(0).numReads_);
+ EXPECT_EQ(32669996, tEndGteResult.at(0).virtualOffset_);
// strand query
- const IndexResultBlocks forward = index.Lookup(StrandIndexRequest(Strand::FORWARD));
- EXPECT_EQ(2, forward.size());
+ IndexResultBlocks forward = mergedIndexBlocks(mappedData.Indices(MappedLookupData::STRAND,
+ Strand::FORWARD));
+ basicData.ApplyOffsets(forward);
+ EXPECT_EQ(5, forward.size());
EXPECT_EQ(0, forward.at(0).firstIndex_);
EXPECT_EQ(1, forward.at(0).numReads_);
- EXPECT_EQ(35651584, forward.at(0).virtualOffset_);
+ EXPECT_EQ(32636928, forward.at(0).virtualOffset_);
+
EXPECT_EQ(2, forward.at(1).firstIndex_);
EXPECT_EQ(1, forward.at(1).numReads_);
- EXPECT_EQ(35667124, forward.at(1).virtualOffset_);
+ EXPECT_EQ(32651627, forward.at(1).virtualOffset_);
+
+ EXPECT_EQ(4, forward.at(2).firstIndex_);
+ EXPECT_EQ(1, forward.at(2).numReads_);
+ EXPECT_EQ(32656778, forward.at(2).virtualOffset_);
+
+ EXPECT_EQ(7, forward.at(3).firstIndex_);
+ EXPECT_EQ(1, forward.at(3).numReads_);
+ EXPECT_EQ(32683648, forward.at(3).virtualOffset_);
+
+ EXPECT_EQ(9, forward.at(4).firstIndex_);
+ EXPECT_EQ(1, forward.at(4).numReads_);
+ EXPECT_EQ(1388838912, forward.at(4).virtualOffset_);
- const IndexResultBlocks reverse = index.Lookup(StrandIndexRequest(Strand::REVERSE));
- EXPECT_EQ(2, reverse.size());
+ // 0,1,0,1,0,1,1,0,1,0
+ IndexResultBlocks reverse = mergedIndexBlocks(mappedData.Indices(MappedLookupData::STRAND,
+ Strand::REVERSE));
+ basicData.ApplyOffsets(reverse);
+ EXPECT_EQ(4, reverse.size());
EXPECT_EQ(1, reverse.at(0).firstIndex_);
EXPECT_EQ(1, reverse.at(0).numReads_);
- EXPECT_EQ(35655123, reverse.at(0).virtualOffset_);
+ EXPECT_EQ(32645486, reverse.at(0).virtualOffset_);
+
EXPECT_EQ(3, reverse.at(1).firstIndex_);
EXPECT_EQ(1, reverse.at(1).numReads_);
- EXPECT_EQ(35679164, reverse.at(1).virtualOffset_);
+ EXPECT_EQ(32654529, reverse.at(1).virtualOffset_);
+
+ EXPECT_EQ(5, reverse.at(2).firstIndex_);
+ EXPECT_EQ(2, reverse.at(2).numReads_);
+ EXPECT_EQ(32658272, reverse.at(2).virtualOffset_);
+
+ EXPECT_EQ(8, reverse.at(3).firstIndex_);
+ EXPECT_EQ(1, reverse.at(3).numReads_);
+ EXPECT_EQ(32694741, reverse.at(3).virtualOffset_);
// query data field that is not in the PBI
- const IndexResultBlocks missing = index.Lookup(BarcodeQualityIndexRequest(77, CompareType::GREATER_THAN));
+ IndexResultBlocks missing = mergedIndexBlocks(barcodeData.Indices(BarcodeLookupData::BC_QUALITY,
+ 77,
+ Compare::GREATER_THAN));
+ basicData.ApplyOffsets(missing);
EXPECT_TRUE(missing.empty());
}
@@ -692,9 +849,13 @@ TEST(PacBioIndexTest, LookupByZmw)
BamFile f(tests::Data_Dir + "/dataset/bam_mapping.bam");
f.EnsurePacBioIndexExists();
- PbiIndex index(f.PacBioIndexFilename());
+ const PbiIndex index(f.PacBioIndexFilename());
+ const BasicLookupData& basicData = index.BasicData();
- const IndexResultBlocks blocks = index.Lookup(ZmwIndexRequest(20000, CompareType::LESS_THAN));
+ IndexResultBlocks blocks = mergedIndexBlocks(basicData.Indices(BasicLookupData::ZMW,
+ 20000,
+ Compare::LESS_THAN));
+ basicData.ApplyOffsets(blocks);
EXPECT_EQ(14, blocks.size());
//
@@ -743,11 +904,12 @@ TEST(PacBioIndexTest, LookupMultiZmw)
BamFile f(tests::Data_Dir + "/dataset/bam_mapping.bam");
f.EnsurePacBioIndexExists();
- PbiIndex index(f.PacBioIndexFilename());
+ const PbiIndex index(f.PacBioIndexFilename());
+ const BasicLookupData& basicData = index.BasicData();
const std::vector<int32_t> whitelist = { 13473, 38025 };
- const ZmwIndexMultiRequest request(whitelist);
- const IndexResultBlocks& blocks = index.Lookup(request);
+ IndexResultBlocks blocks = mergedIndexBlocks(basicData.IndicesMulti(BasicLookupData::ZMW, whitelist));
+ basicData.ApplyOffsets(blocks);
EXPECT_EQ(3, blocks.size());
diff --git a/tests/src/test_PbiFilter.cpp b/tests/src/test_PbiFilter.cpp
new file mode 100644
index 0000000..02d0d4d
--- /dev/null
+++ b/tests/src/test_PbiFilter.cpp
@@ -0,0 +1,1300 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/PbiFilter.h>
+#include <string>
+#include <cstdio>
+#include <cstdlib>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+// helper structs & methods
+
+static
+PbiRawData test2Bam_RawIndex(void)
+{
+ PbiRawData index;
+ index.NumReads(4);
+
+ PbiRawBasicData& subreadData = index.BasicData();
+ subreadData.rgId_ = { -1197849594, -1197849594, -1197849594, -1197849594 };
+ subreadData.qStart_ = { 2114, 2579, 4101, 5615 };
+ subreadData.qEnd_ = { 2531, 4055, 5571, 6237 };
+ subreadData.holeNumber_ = { 14743, 14743, 14743, 14743 };
+ subreadData.readQual_ = { 0.901, 0.601, 0.901, 0.601 };
+ subreadData.ctxtFlag_ = { 0, 1, 2, 3 };
+ subreadData.fileOffset_ = { 35651584, 35655125, 35667128, 35679170 };
+
+ PbiRawMappedData& mappedData = index.mappedData_;
+ mappedData.tId_ = { 0, 0, 0, 0 };
+ mappedData.tStart_ = { 9507, 8453, 8455, 9291 };
+ mappedData.tEnd_ = { 9903, 9902, 9893, 9900 };
+ mappedData.aStart_ = { 2130, 2581, 4102, 5619 };
+ mappedData.aEnd_ = { 2531, 4055, 5560, 6237 };
+ mappedData.revStrand_ = { 0, 1, 0, 1 };
+ mappedData.mapQV_ = { 254, 254, 254, 254 };
+ mappedData.nM_ = { 384, 1411, 1393, 598 };
+ mappedData.nMM_ = { 0, 0, 0, 0 };
+
+ PbiRawBarcodeData& barcodeData = index.barcodeData_;
+ barcodeData.bcForward_ = { 0, 17, 256, 17 };
+ barcodeData.bcReverse_ = { 1, 18, 257, 18 };
+ barcodeData.bcQual_ = { 42, 80, 42, 110 };
+
+ PbiRawReferenceData& referenceData = index.referenceData_;
+ referenceData.entries_.emplace_back( 0, 0, 3 );
+ referenceData.entries_.emplace_back( 1 );
+ referenceData.entries_.emplace_back( PbiReferenceEntry::UNMAPPED_ID );
+
+ return index;
+}
+
+static const PbiRawData shared_index = test2Bam_RawIndex();
+
+static
+void checkFilterRows(const PbiFilter& filter, const std::vector<size_t> expectedRows)
+{
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(shared_index, row));
+}
+
+static
+void checkFilterInternals(const PbiFilter& filter,
+ const PbiFilter::CompositionType expectedType,
+ const size_t expectedNumChildren,
+ const std::vector<size_t> expectedRows)
+{
+ EXPECT_EQ(expectedType, filter.d_->type_);
+ EXPECT_EQ(expectedNumChildren, filter.d_->filters_.size());
+ checkFilterRows(filter, expectedRows);
+}
+
+struct SimpleFilter
+{
+ bool Accepts(const PbiRawData& idx, const size_t row) const
+ { (void)idx; (void)row; return true; }
+};
+
+struct NoncompliantFilter { };
+
+struct SortUniqueTestFilter
+{
+ bool Accepts(const PbiRawData& idx, const size_t row) const
+ {
+ (void)idx;
+ switch(row) {
+ case 0: // fall through
+ case 1: // .
+ case 2: // .
+ case 3: // .
+ case 4: // .
+ case 7: // .
+ case 8: return true;
+ default:
+ return false;
+ }
+ }
+};
+
+struct SortUniqueTestFilter2
+{
+ bool Accepts(const PbiRawData& idx, const size_t row) const
+ {
+ (void)idx;
+ switch(row) {
+ case 3: // fall through
+ case 7: // .
+ case 5: return true;
+ default:
+ return false;
+ }
+ }
+};
+
+static inline
+PbiFilter emptyFilter(void)
+{ return PbiFilter{ }; }
+
+static inline
+PbiFilter simpleFilter(void)
+{ return PbiFilter{ SimpleFilter{ } }; }
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(PbiFilterTest, DefaultCtorOk)
+{
+ auto filter = PbiFilter{ };
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+}
+
+TEST(PbiFilterTest, CompositionOk)
+{
+ auto filter = PbiFilter{ };
+ filter.Add(PbiFilter{ });
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+}
+
+TEST(PbiFilterTest, CustomFilterOk)
+{
+ { // ctor
+ auto filter = PbiFilter{ tests::SimpleFilter{ } };
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+ }
+ { // Add
+ auto filter = PbiFilter{ };
+ filter.Add(tests::SimpleFilter{ });
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+ }
+
+// PbiFilter shouldNotCompile = PbiFilter{ tests::NoncompliantFilter{ } }; // <-- when uncommented, should not compile
+// PbiFilter shouldNotCompileEither; shouldNotCompileEither.Add(tests::NoncompliantFilter{ }); // <-- when uncommented, should not compile
+}
+
+TEST(PbiFilterTest, CopyOk)
+{
+ { // empty
+ const auto original = PbiFilter{ };
+
+ PbiFilter copyCtor(original);
+ PbiFilter copyAssign;
+ copyAssign = original;
+
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+ tests::checkFilterInternals(copyCtor, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+ tests::checkFilterInternals(copyAssign, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+ }
+ { // with children
+ const auto original = PbiFilter{ tests::SimpleFilter{ } };
+
+ PbiFilter copyCtor(original);
+ PbiFilter copyAssign;
+ copyAssign = original;
+
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+ tests::checkFilterInternals(copyCtor, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+ tests::checkFilterInternals(copyAssign, PbiFilter::INTERSECT, 1, std::vector<size_t>{});
+ }
+}
+
+TEST(PbiFilterTest, MoveOk)
+{
+ { // empty
+ const auto original = tests::emptyFilter();
+
+ PbiFilter moveCtor(tests::emptyFilter());
+ PbiFilter moveAssign;
+ moveAssign = tests::emptyFilter();
+
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+ tests::checkFilterInternals(moveCtor, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+ tests::checkFilterInternals(moveAssign, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});
+ }
+ { // with children
+ const auto original = tests::simpleFilter();
+
+ PbiFilter moveCtor(tests::simpleFilter());
+ PbiFilter moveAssign;
+ moveAssign = tests::simpleFilter();
+
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+ tests::checkFilterInternals(moveCtor, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+ tests::checkFilterInternals(moveAssign, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});
+ }
+}
+
+TEST(PbiFilterTest, SortsAndUniquesChildFilterResultsOk)
+{
+ const auto childFilter = tests::SortUniqueTestFilter{ };
+ const auto filter = PbiFilter{ childFilter };
+ tests::checkFilterRows(childFilter, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});
+ tests::checkFilterRows(filter, std::vector<size_t>{0, 1, 2, 3, 4, 7, 8});
+}
+
+TEST(PbiFilterTest, UnionOk)
+{
+ { // empty
+ { // copy
+ const auto emptyFilter = tests::emptyFilter();
+ const auto emptyFilter2 = tests::emptyFilter();
+ const auto u = PbiFilter::Union({ emptyFilter, emptyFilter2 });
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{0,1,2,3});
+ }
+ { // move
+ const auto u = PbiFilter::Union({ PbiFilter{ }, PbiFilter{ } });
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{0,1,2,3});
+ }
+ }
+
+ { // with (no-data) children - just checking composition
+ { // copy
+ const auto simpleFilter = tests::SimpleFilter{ };
+ const auto simpleFilter2 = tests::SimpleFilter{ };
+ const auto u = PbiFilter::Union({ simpleFilter, simpleFilter2 });
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{});
+ }
+ { // move
+ const auto u = PbiFilter::Union({ tests::SimpleFilter{ }, tests::SimpleFilter{ } });
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{});
+ }
+ }
+
+ { // 2-child union, results sorted & unique-d by PbiFilter
+
+ const auto child1 = tests::SortUniqueTestFilter{ };
+ const auto child2 = tests::SortUniqueTestFilter2{ };
+ const auto u = PbiFilter::Union({ child1, child2 });
+
+ tests::checkFilterRows(child1, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});
+ tests::checkFilterRows(child2, std::vector<size_t>{3, 7, 5});
+ tests::checkFilterRows(u, std::vector<size_t>{0, 1, 2, 3, 4, 5, 7, 8});
+ }
+}
+
+TEST(PbiFilterTest, IntersectOk)
+{
+ { // empty
+ { // copy
+ const auto emptyFilter = tests::emptyFilter();
+ const auto emptyFilter2 = tests::emptyFilter();
+ const auto i = PbiFilter::Intersection({ emptyFilter, emptyFilter2 });
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{0,1,2,3});
+ }
+ { // move
+ const auto i = PbiFilter::Intersection({ PbiFilter{ }, PbiFilter{ } });
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{0,1,2,3});
+ }
+ }
+
+ { // with (no-data) children - just checking composition
+ { // copy
+ const auto simpleFilter = tests::SimpleFilter{ };
+ const auto simpleFilter2 = tests::SimpleFilter{ };
+ const auto i = PbiFilter::Intersection({ simpleFilter, simpleFilter2 });
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{});
+ }
+ { // move
+ const auto i = PbiFilter::Intersection({ tests::SimpleFilter{ }, tests::SimpleFilter{ } });
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{});
+ }
+ }
+
+ { // 2-child intersect, sorted & unique-d by PbiFilter
+
+ const auto child1 = tests::SortUniqueTestFilter{ };
+ const auto child2 = tests::SortUniqueTestFilter2{ };
+ const auto i = PbiFilter::Intersection({ child1, child2 });
+
+ tests::checkFilterRows(child1, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});
+ tests::checkFilterRows(child2, std::vector<size_t>{3, 7, 5 });
+ tests::checkFilterRows(i, std::vector<size_t>{3, 7});
+ }
+}
+
+TEST(PbiFilterTest, AlignedEndFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4055 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4055, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4000, Compare::LESS_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 5560, Compare::GREATER_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 5560, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{2,3});
+ }
+
+ {
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 7000, Compare::GREATER_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+}
+
+TEST(PbiFilterTest, AlignedLengthFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiAlignedLengthFilter{ 500, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedLengthFilter{ 1000, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+ }
+}
+
+TEST(PbiFilterTest, AlignedStartFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 2600, Compare::LESS_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 4102, Compare::GREATER_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 4102, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 6000, Compare::GREATER_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{ });
+ }
+}
+
+TEST(PbiFilterTest, AlignedStrandFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::FORWARD } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::REVERSE } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::FORWARD, Compare::NOT_EQUAL } }; // same as Strand::REVERSE
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+
+ // unsupported compare types throw
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::LESS_THAN), std::runtime_error);
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::LESS_THAN_EQUAL), std::runtime_error);
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::GREATER_THAN), std::runtime_error);
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::GREATER_THAN_EQUAL), std::runtime_error);
+}
+
+TEST(PbiFilterTest, BarcodeFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiBarcodeFilter{ 17 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeFilter{ 18 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeFilter{ 0 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0});
+ }
+}
+
+TEST(PbiFilterTest, BarcodeForwardFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ 17 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ 400 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ {0, 256} } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+ }
+}
+
+TEST(PbiFilterTest, BarcodeQualityFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiBarcodeQualityFilter{ 80, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeQualityFilter{ 40, Compare::LESS_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+}
+
+TEST(PbiFilterTest, BarcodeReverseFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ 18 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ 400 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{ });
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ {1, 257} } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+ }
+}
+
+TEST(PbiFilterTest, BarcodesFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiBarcodesFilter{ 17, 18 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodesFilter{ 17, 19 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{ });
+ }
+ {
+ const auto filter = PbiFilter{ PbiBarcodesFilter{ std::make_pair(17,18) } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+}
+
+TEST(PbiFilterTest, IdentityFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiIdentityFilter{ 0.95, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+}
+
+TEST(PbiFilterTest, LocalContextFilterOk)
+{
+ { // == NO_LOCAL_CONTEXT
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0});
+ }
+ { // != ADAPTER_BEFORE (exact match)
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2,3});
+ }
+ { // contains ADAPTER_BEFORE
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+ { // does not contain ADAPTER_BEFORE
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+ }
+ { // include both ADAPTER_BEFORE and ADAPTER_AFTER
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }
+ });
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+ { // exclude both ADAPTER_BEFORE and ADAPTER_AFTER
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }
+ });
+ tests::checkFilterRows(filter, std::vector<size_t>{0});
+ }
+ { // include everything with either ADAPTER_BEFORE or ADAPTER_AFTER
+ const auto filter = PbiFilter::Union(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }
+ });
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+ }
+ { // include everything with either ADAPTER_BEFORE or ADAPTER_AFTER, but not both
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },
+ PbiFilter::Union(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }
+ })
+ });
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+ }
+}
+
+TEST(PbiFilterTest, MapQualityFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiMapQualityFilter{ 254 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiMapQualityFilter{ 254, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+}
+
+TEST(PbiFilterTest, MovieNameFilterOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+ const auto index = PbiRawData{ bamFile.PacBioIndexFilename() };
+
+ {
+ const auto filter = PbiFilter{ PbiMovieNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0" } };
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(index, row));
+ }
+ {
+ const auto filter = PbiFilter{ PbiMovieNameFilter{ "does_not_exist" } };
+ const auto expectedRows = std::vector<size_t>{};
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(index, row));
+ }
+ {
+ const auto names = vector<string>{"does_not_exist",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0"};
+ const auto filter = PbiFilter{ PbiMovieNameFilter{ names } };
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(index, row));
+ }
+}
+
+TEST(PbiFilterTest, NumDeletedBasesFilterOk)
+{
+ // del: { 12, 38, 45, 11} - calculated from raw data, not stored directly in testing object or read from PBI file
+
+ {
+ const auto filter = PbiFilter{ PbiNumDeletedBasesFilter{ 12, Compare::LESS_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiNumDeletedBasesFilter{ 45, Compare::EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{2});
+ }
+}
+
+TEST(PbiFilterTest, NumInsertedBasesFilterOk)
+{
+ // ins: { 17, 63, 65, 20 } - calculated from raw data, not stored directly testing object or read from PBI file
+
+ {
+ const auto filter = PbiFilter{ PbiNumInsertedBasesFilter{ 63, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+ }
+ {
+ const auto filter = PbiFilter{ PbiNumInsertedBasesFilter{ 17, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+ }
+}
+
+TEST(PbiFilterTest, NumMatchesFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiNumMatchesFilter{ 1000, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+ }
+ {
+ const auto filter = PbiFilter{ PbiNumMatchesFilter{ 400, Compare::LESS_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0});
+ }
+}
+
+TEST(PbiFilterTest, NumMismatchesFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiNumMismatchesFilter{ 0, Compare::EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiNumMismatchesFilter{ 0, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+}
+
+TEST(PbiFilterTest, QueryEndFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiQueryEndFilter{ 4055 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1});
+ }
+ {
+ const auto filter = PbiFilter{ PbiQueryEndFilter{ 6200, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+}
+
+TEST(PbiFilterTest, QueryLengthFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiQueryLengthFilter{ 500, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiQueryLengthFilter{ 1000, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});
+ }
+}
+
+TEST(PbiFilterTest, QueryNameFilterOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+ const auto index = PbiIndex{ bamFile.PacBioIndexFilename() };
+
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1});
+ }
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "does_not_exist/0/0_0" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+ {
+ const auto names = vector<string>{"m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"};
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ names } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});
+ }
+
+ // invalid QNAME syntax throws
+ EXPECT_THROW(
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ },
+ std::runtime_error);
+ EXPECT_THROW(
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ },
+ std::runtime_error);
+ EXPECT_THROW(
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo/bar" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ },
+ std::runtime_error);
+ EXPECT_THROW(
+ {
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo/bar/baz_bam" } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ },
+ std::exception); // come back to see why this is not runtime_error but something else
+}
+
+TEST(PbiFilterTest, QueryStartFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiQueryStartFilter{ 4101 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{2});
+ }
+ {
+ const auto filter = PbiFilter{ PbiQueryStartFilter{ 5000 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+ {
+ const auto filter = PbiFilter{ PbiQueryStartFilter{ 5000, Compare::GREATER_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+}
+
+TEST(PbiFilterTest, ReadAccuracyFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiReadAccuracyFilter{ 0.9 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+ {
+ const auto filter = PbiFilter{ PbiReadAccuracyFilter{ 0.9, Compare::GREATER_THAN } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});
+ }
+}
+
+TEST(PbiFilterTest, ReadGroupFilterOk)
+{
+ { // numeric ID
+ const auto filter = PbiReadGroupFilter{ -1197849594 };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+
+ const auto filter2 = PbiReadGroupFilter{ 200 };
+ tests::checkFilterRows(filter2, std::vector<size_t>{});
+ }
+ { // string ID
+ const auto filter = PbiReadGroupFilter{ "b89a4406" };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+
+ const auto filter2 = PbiReadGroupFilter{ "b89a4406" };
+ tests::checkFilterRows(filter2, std::vector<size_t>{0,1,2,3});
+ }
+ { // ReadGroupInfo object
+ const auto rg = ReadGroupInfo{ "b89a4406" };
+ const auto filter = PbiReadGroupFilter{ rg };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ { // multi-ID
+ const auto ids = vector<int32_t>({-1197849594, 200});
+ const auto filter = PbiReadGroupFilter{ ids };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ { // multi-string
+ const auto ids = vector<string>({"b89a4406", "deadbeef"});
+ const auto filter = PbiReadGroupFilter{ ids };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ { // multi-ReadGroupInfo
+ const auto ids = vector<ReadGroupInfo>({ ReadGroupInfo("b89a4406"), ReadGroupInfo("deadbeef")});
+ const auto filter = PbiReadGroupFilter{ ids };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+}
+
+TEST(PbiFilterTest, ReferenceEndFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiReferenceEndFilter{ 9900 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiReferenceEndFilter{ 9900, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,3});
+ }
+}
+
+TEST(PbiFilterTest, ReferenceIdFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiReferenceIdFilter{ 0 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiReferenceIdFilter{ 0, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+ {
+ const auto ids = vector<int32_t>({0, 42});
+ const auto filter = PbiFilter{ PbiReferenceIdFilter{ ids } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+}
+
+TEST(PbiFilterTest, ReferenceNameFilterOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+ const auto index = PbiRawData{ bamFile.PacBioIndexFilename() };
+
+ {
+ const auto filter = PbiFilter{ PbiReferenceNameFilter{ "lambda_NEB3011" } };
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(index, row));
+
+ }
+ {
+ const auto filter = PbiFilter{ PbiReferenceNameFilter{ "lambda_NEB3011", Compare::NOT_EQUAL } };
+ const auto expectedRows = std::vector<size_t>{};
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(index, row));
+ }
+ {
+ const auto names = vector<string>({ "lambda_NEB3011" }); // this file only has 1 :(
+ const auto filter = PbiFilter{ PbiReferenceNameFilter{ names } };
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};
+ for (size_t row : expectedRows)
+ EXPECT_TRUE(filter.Accepts(index, row));
+ }
+
+ // unsupported compare types throw
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::LESS_THAN), std::runtime_error);
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::LESS_THAN_EQUAL), std::runtime_error);
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::GREATER_THAN), std::runtime_error);
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::GREATER_THAN_EQUAL), std::runtime_error);
+}
+
+TEST(PbiFilterTest, ReferenceStartFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiReferenceStartFilter{ 8453 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{1});
+ }
+ {
+ const auto filter = PbiFilter{ PbiReferenceStartFilter{ 9200, Compare::GREATER_THAN_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,3});
+ }
+}
+
+TEST(PbiFilterTest, ZmwFilterOk)
+{
+ {
+ const auto filter = PbiFilter{ PbiZmwFilter{ 14743 } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+ {
+ const auto filter = PbiFilter{ PbiZmwFilter{ 14743, Compare::NOT_EQUAL } };
+ tests::checkFilterRows(filter, std::vector<size_t>{});
+ }
+ {
+ const auto zmws = vector<int32_t>({14743,42,200});
+ const auto filter = PbiFilter{ PbiZmwFilter{ zmws } };
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});
+ }
+}
+
+TEST(PbiFilterTest, FromDataSetOk)
+{
+ const auto expectedFilter =
+ PbiFilter::Union(
+ {
+ PbiFilter::Intersection(
+ {
+ PbiZmwFilter{ 14743 },
+ PbiReadAccuracyFilter { 0.9, Compare::GREATER_THAN_EQUAL }
+ }),
+
+ PbiReferenceStartFilter { 9200, Compare::GREATER_THAN_EQUAL }
+ });
+
+
+ auto properties1 = Properties{ };
+ properties1.Add(Property{ "zm", "14743", "==" });
+ properties1.Add(Property{ "rq", "0.9", ">=" });
+
+ auto datasetFilter1 = Filter{ };
+ datasetFilter1.Properties(properties1);
+
+ auto properties2 = Properties{ };
+ properties2.Add(Property{ "pos", "9200", ">=" });
+
+ auto datasetFilter2 = Filter{ };
+ datasetFilter2.Properties(properties2);
+
+ auto datasetFilters = Filters{ };
+ datasetFilters.Add(datasetFilter1);
+ datasetFilters.Add(datasetFilter2);
+ auto dataset = DataSet{ };
+ dataset.Filters(datasetFilters);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+
+ for (size_t i = 0; i < tests::shared_index.NumReads(); ++i) {
+ EXPECT_EQ(expectedFilter.Accepts(tests::shared_index, i),
+ generatedFilter.Accepts(tests::shared_index, i));
+ }
+}
+
+TEST(PbiFilterTest, LocalContextFiltersFromDataSetXmlOk)
+{
+ { // no adapters or barcodes
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::EQUAL };
+
+ // <Property Name="cx" Value="0" Operator="==" />
+
+ Property property("cx", "0", "==");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});
+ }
+ { // any adapters or barcodes
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL };
+
+ // <Property Name="cx" Value="0" Operator="!=" />
+
+ Property property("cx", "0", "!=");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+ }
+ { // contains adapter_before
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS };
+
+ // <Property Name="cx" Value="1" Operator="&" />
+
+ Property property("cx", "1", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,3});
+ }
+ { // contains adapter_before
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS };
+
+ // <Property Name="cx" Value="ADAPTER_BEFORE" Operator="&" />
+
+ Property property("cx", "ADAPTER_BEFORE", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,3});
+ }
+ { // contains adapter_after
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS };
+
+ // <Property Name="cx" Value="2" Operator="&" />
+
+ Property property("cx", "2", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{2,3});
+ }
+ { // contains adapter_before or adapter_after
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+ Compare::CONTAINS };
+
+ // <Property Name="cx" Value="3" Operator="&" />
+
+ Property property("cx", "3", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+ }
+ { // contains adapter_before or adapter_after
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+ Compare::CONTAINS };
+
+ // <Property Name="cx" Value="ADAPTER_BEFORE | ADAPTER_AFTER" Operator="&" />
+
+ Property property("cx", "ADAPTER_BEFORE | ADAPTER_AFTER", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+ }
+ { // contains adapter_before or adapter_after - no whitespace separation
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+ Compare::CONTAINS };
+
+ // <Property Name="cx" Value="ADAPTER_BEFORE|ADAPTER_AFTER" Operator="&" />
+
+ Property property("cx", "ADAPTER_BEFORE|ADAPTER_AFTER", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+ }
+ { // contains adapter_before or adapter_after - a lot of whitespace separation
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+ Compare::CONTAINS };
+
+ // <Property Name="cx" Value="ADAPTER_BEFORE | ADAPTER_AFTER" Operator="&" />
+
+ Property property("cx", "ADAPTER_BEFORE | ADAPTER_AFTER", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+ }
+ { // contains adapter_before or adapter_after, but not both
+
+ const auto expectedFilter = PbiFilter::Union(
+ {
+ PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS }
+ }),
+ PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }
+ })
+ });
+
+ // <Filters>
+ // <Filter>
+ // <Properties>
+ // <Property Name="cx" Value="0" Operator="!=" />
+ // <Property Name="cx" Value="1" Operator="~" />
+ // </Properties>
+ // </Filter>
+ // <Filter>
+ // <Properties>
+ // <Property Name="cx" Value="0" Operator="!=" />
+ // <Property Name="cx" Value="2" Operator="~" />
+ // </Properties>
+ // </Filter>
+ // </Filters>
+
+ auto filter1 = Filter{ };
+ filter1.Properties().Add(Property("cx", "0", "!="));
+ filter1.Properties().Add(Property("cx", "1", "~"));
+
+ auto filter2 = Filter{ };
+ filter2.Properties().Add(Property("cx", "0", "!="));
+ filter2.Properties().Add(Property("cx", "2", "~"));
+
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter1);
+ dataset.Filters().Add(filter2);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2});
+
+ }
+ { // contains adapter_before or adapter_after
+
+ const auto expectedFilter = PbiFilter::Union(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }
+ });
+
+ // <Filters>
+ // <Filter>
+ // <Properties>
+ // <Property Name="cx" Value="1" Operator="&" />
+ // </Properties>
+ // </Filter>
+ // <Filter>
+ // <Properties>
+ // <Property Name="cx" Value="2" Operator="&" />
+ // </Properties>
+ // </Filter>
+ // </Filters>
+
+ auto filter1 = Filter{ };
+ filter1.Properties().Add(Property("cx", "1", "&"));
+
+ auto filter2 = Filter{ };
+ filter2.Properties().Add(Property("cx", "2", "&"));
+
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter1);
+ dataset.Filters().Add(filter2);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});
+ }
+ { // adapter_before and adapter_after
+
+ const auto expectedFilter = PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }
+ });
+
+ // <Property Name="cx" Value="1" Operator="&" />
+ // <Property Name="cx" Value="2" Operator="&" />
+
+ Property property1("cx", "1", "&");
+ Property property2("cx", "2", "&");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property1);
+ filter.Properties().Add(property2);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{3});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{3});
+ }
+ { // adapter_before, but no adapter_after
+
+ const auto expectedFilter = PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }
+ });
+
+ // <Property Name="cx" Value="1" Operator="&" />
+ // <Property Name="cx" Value="2" Operator="~" />
+
+ Property property1("cx", "1", "&");
+ Property property2("cx", "2", "~");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property1);
+ filter.Properties().Add(property2);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1});
+ }
+ { // contains no adapter_before
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS };
+
+ // <Property Name="cx" Value="1" Operator="~" />
+
+ Property property("cx", "1", "~");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0,2});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0,2});
+ }
+ { // contains no adapter_before or adapter_after
+
+ const auto expectedFilter = PbiFilter::Intersection(
+ {
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }
+ });
+
+ // <Property Name="cx" Value="1" Operator="~" />
+ // <Property Name="cx" Value="2" Operator="~" />
+
+ Property property1("cx", "1", "~");
+ Property property2("cx", "2", "~");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property1);
+ filter.Properties().Add(property2);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});
+ }
+ { // contains no adapter_before or adapter_after
+
+ const auto expectedFilter =
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,
+ Compare::NOT_CONTAINS };
+
+ // <Property Name="cx" Value="3" Operator="~" />
+
+ Property property("cx", "3", "~");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0});
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});
+ }
+ { // throws on invalid enum name
+
+ Property property("cx", "DOES_NOT_EXIST", "~");
+
+ auto filter = Filter{ };
+ filter.Properties().Add(property);
+ DataSet dataset = DataSet{ };
+ dataset.Filters().Add(filter);
+
+ EXPECT_THROW(PbiFilter::FromDataSet(dataset), std::runtime_error);
+ }
+}
diff --git a/tests/src/test_PbiFilterQuery.cpp b/tests/src/test_PbiFilterQuery.cpp
new file mode 100644
index 0000000..9db400f
--- /dev/null
+++ b/tests/src/test_PbiFilterQuery.cpp
@@ -0,0 +1,245 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/PbiFilterQuery.h>
+#include <algorithm>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(PbiFilterQueryTest, QueryOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
+
+ {
+ int count = 0;
+ PbiFilterQuery query( PbiQueryLengthFilter{ 500, Compare::GREATER_THAN_EQUAL}, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 500);
+ }
+ EXPECT_EQ(3, count);
+ }
+ {
+ // all records aligned to reverse strand && pos >= 9200
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiAlignedStrandFilter{Strand::REVERSE},
+ PbiReferenceStartFilter{9200, Compare::GREATER_THAN_EQUAL}
+ });
+
+ int count = 0;
+ PbiFilterQuery query(filter, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_EQ(Strand::REVERSE, r.AlignedStrand());
+ EXPECT_GE((r.ReferenceStart()), 9200);
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"), r.FullName());
+ }
+ EXPECT_EQ(1, count);
+ }
+ {
+ // all records aligned to forward strand && pos >= 9200
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiAlignedStrandFilter{Strand::FORWARD},
+ PbiReferenceStartFilter{9200, Compare::GREATER_THAN_EQUAL}
+ });
+
+ int count = 0;
+ PbiFilterQuery query(filter, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_EQ(Strand::FORWARD, r.AlignedStrand());
+ EXPECT_GE((r.ReferenceStart()), 9200);
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2114_2531"), r.FullName());
+ }
+ EXPECT_EQ(1, count);
+ }
+ {
+ // all records from RG ("b89a4406") with numMatches >= 1200
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiReadGroupFilter{"b89a4406"},
+ PbiNumMatchesFilter{1200, Compare::GREATER_THAN_EQUAL}
+ });
+
+ int count = 0;
+ PbiFilterQuery query(filter, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_EQ(string("b89a4406"), r.ReadGroupId());
+ EXPECT_GE((r.NumMatches()), 1200);
+ if (count == 1)
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055"), r.FullName());
+ else if (count == 2)
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/4101_5571"), r.FullName());
+ }
+ EXPECT_EQ(2, count);
+ }
+}
+
+TEST(PbiFilterQueryTest, ZmwRangeFromDatasetOk)
+{
+ const auto expectedMovieName = string{ "m150404_101626_42267_c100807920800000001823174110291514_s1_p0" };
+
+ const DataSet ds(tests::Data_Dir + "/chunking/chunking.subreadset.xml");
+ EXPECT_EQ(3, ds.BamFiles().size());
+
+ { // movie name
+
+ int count = 0;
+ PbiFilterQuery query{ PbiMovieNameFilter{expectedMovieName}, ds };
+ for (const BamRecord& r : query) {
+ EXPECT_EQ(expectedMovieName, r.MovieName());
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+
+ { // sequencing chemistries
+ set<string> chems{ ds.SequencingChemistries() };
+ set<string> expected{ "P6-C4" };
+ EXPECT_TRUE(equal(chems.begin(), chems.end(), expected.begin()));
+ }
+
+ { // min ZMW
+
+ int count = 0;
+ PbiFilterQuery query{ PbiZmwFilter{54, Compare::GREATER_THAN}, ds };
+ for (const BamRecord& r : query) {
+ EXPECT_GT(r.HoleNumber(), 54);
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+
+ { // max ZMW
+
+ int count = 0;
+ PbiFilterQuery query{ PbiZmwFilter{1816, Compare::LESS_THAN}, ds };
+ for (const BamRecord& r : query) {
+ EXPECT_LT(r.HoleNumber(),1816);
+ ++count;
+ }
+ EXPECT_EQ(150, count);
+ }
+
+ { // put all together, from DataSet XML
+
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ EXPECT_EQ(expectedMovieName, r.MovieName());
+ const auto zmw = r.HoleNumber();
+ EXPECT_GT(zmw, 54);
+ EXPECT_LT(zmw, 1816);
+ ++count;
+ }
+ EXPECT_EQ(150, count);
+ }
+ { // empty filter object - should return all records from the same dataset
+
+ PbiFilterQuery query(PbiFilter{ }, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+ { // no <Filters> element present at all
+
+ const DataSet ds(tests::Data_Dir + "/chunking/chunking_missingfilters.subreadset.xml");
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+ { // <Filters> element contains no child <Filter> elements
+
+ const DataSet ds(tests::Data_Dir + "/chunking/chunking_emptyfilters.subreadset.xml");
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+}
+
+TEST(PbiFilterQueryTest, MissingPbiShouldThrow)
+{
+ const PbiFilter filter{ PbiZmwFilter{31883} };
+ const string phi29Bam = tests::Data_Dir + "/phi29.bam";
+ const string hasPbiBam = tests::Data_Dir + "/polymerase/production.scraps.bam";
+
+ { // single file, missing PBI
+
+ EXPECT_THROW(PbiFilterQuery(filter, phi29Bam), std::runtime_error);
+ }
+
+ { // from dataset, all missing PBI
+
+ DataSet ds;
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ EXPECT_THROW(PbiFilterQuery(filter, ds), std::runtime_error);
+ }
+
+ { // from dataset, mixed PBI presence
+
+ DataSet ds;
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.ScrapsBamFile", hasPbiBam));
+ EXPECT_THROW(PbiFilterQuery(filter, ds), std::runtime_error);
+ }
+}
diff --git a/tests/src/test_PolymeraseStitching.cpp b/tests/src/test_PolymeraseStitching.cpp
index 500f8c6..7c2e332 100644
--- a/tests/src/test_PolymeraseStitching.cpp
+++ b/tests/src/test_PolymeraseStitching.cpp
@@ -44,17 +44,22 @@
#include <string>
#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/Frames.h>
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
-#include "pbbam/virtual/VirtualPolymeraseReader.h"
-#include "pbbam/BamFile.h"
-#include "pbbam/BamRecord.h"
-#include "pbbam/EntireFileQuery.h"
-#include "pbbam/Frames.h"
#include "TestData.h"
using namespace PacBio;
using namespace PacBio::BAM;
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
void Compare(const BamRecord& b1, const BamRecord& b2)
{
EXPECT_TRUE(b1.HasDeletionQV());
@@ -118,6 +123,10 @@ void Compare(const BamRecord& b1, const BamRecord& b2)
EXPECT_EQ(b1.PulseMergeQV(), b2.PulseMergeQV());
}
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
TEST(VirtualPolymeraseReader, InternalSubreadsToOriginal)
{
// Create virtual polymerase read
@@ -138,7 +147,7 @@ TEST(VirtualPolymeraseReader, InternalSubreadsToOriginal)
auto polyRecord = *begin++;
EXPECT_TRUE(begin == end);
- Compare(polyRecord, virtualRecord);
+ tests::Compare(polyRecord, virtualRecord);
}
TEST(VirtualPolymeraseReader, InternalHQToOriginal)
@@ -161,7 +170,7 @@ TEST(VirtualPolymeraseReader, InternalHQToOriginal)
auto polyRecord = *begin++;
EXPECT_TRUE(begin == end);
- Compare(polyRecord, virtualRecord);
+ tests::Compare(polyRecord, virtualRecord);
}
TEST(VirtualPolymeraseReader, VirtualRegions)
@@ -248,6 +257,7 @@ TEST(VirtualPolymeraseReader, ProductionSubreadsToOriginal)
// Create virtual polymerase read
VirtualPolymeraseReader vpr(tests::Data_Dir + "/polymerase/production.subreads.bam",
tests::Data_Dir + "/polymerase/production.scraps.bam");
+
EXPECT_TRUE(vpr.HasNext());
auto virtualRecord = vpr.Next();
EXPECT_FALSE(vpr.HasNext());
@@ -261,11 +271,11 @@ TEST(VirtualPolymeraseReader, ProductionSubreadsToOriginal)
EXPECT_TRUE(begin != end);
auto polyRecord = *begin++;
- EXPECT_TRUE(begin == end);
+ EXPECT_TRUE(begin == end);
EXPECT_EQ(polyRecord.FullName(), virtualRecord.FullName());
EXPECT_EQ(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
- EXPECT_EQ(polyRecord.ReadAccuracy(), virtualRecord.ReadAccuracy());
+ EXPECT_FLOAT_EQ(polyRecord.ReadAccuracy(), virtualRecord.ReadAccuracy());
EXPECT_EQ(polyRecord.NumPasses(), virtualRecord.NumPasses());
EXPECT_EQ(polyRecord.Sequence(), virtualRecord.Sequence());
EXPECT_EQ(polyRecord.Qualities(), virtualRecord.Qualities());
@@ -350,3 +360,166 @@ TEST(VirtualPolymeraseReader, ProductionHQToOriginal)
EXPECT_FALSE(virtualRecord.HasPrePulseFrames());
EXPECT_FALSE(virtualRecord.HasPulseCallWidth());
}
+
+TEST(ZmwWhitelistVirtualReader, SingleZmwOk)
+{
+ const std::vector<int32_t> whitelist = { 200000 };
+
+ ZmwWhitelistVirtualReader reader(whitelist,
+ tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+
+ // create virtual record
+ EXPECT_TRUE(reader.HasNext());
+ auto virtualRecord = reader.Next();
+ EXPECT_FALSE(reader.HasNext());
+
+ // fetch original polymerase read (2nd record)
+ BamFile polyBam(tests::Data_Dir + "/polymerase/whitelist/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ ++begin;
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin++;
+
+ EXPECT_EQ(200000, virtualRecord.HoleNumber());
+
+ tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(ZmwWhitelistVirtualReader, MultiZmwsOk)
+{
+ const std::vector<int32_t> whitelist = { 100000, 300000 };
+
+ ZmwWhitelistVirtualReader reader(whitelist,
+ tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+
+
+ // create virtual records
+ EXPECT_TRUE(reader.HasNext());
+ auto virtualRecord1 = reader.Next();
+ EXPECT_TRUE(reader.HasNext());
+ auto virtualRecord2 = reader.Next();
+ EXPECT_FALSE(reader.HasNext());
+
+ // fetch original polymerase reads (2nd record)
+ BamFile polyBam(tests::Data_Dir + "/polymerase/whitelist/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+
+ EXPECT_TRUE(begin != end);
+ auto polyRecord1 = *begin++;
+ EXPECT_TRUE(begin != end);
+ ++begin;
+ EXPECT_TRUE(begin != end);
+ auto polyRecord2 = *begin++;
+ EXPECT_TRUE(begin == end);
+
+ EXPECT_EQ(100000, virtualRecord1.HoleNumber());
+ EXPECT_EQ(300000, virtualRecord2.HoleNumber());
+
+ tests::Compare(polyRecord1, virtualRecord1);
+ tests::Compare(polyRecord2, virtualRecord2);
+}
+
+TEST(ZmwWhitelistVirtualReader, EmptyListOk)
+{
+ const std::vector<int32_t> whitelist = { };
+
+ ZmwWhitelistVirtualReader reader(whitelist,
+ tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+ EXPECT_FALSE(reader.HasNext());
+ EXPECT_TRUE(reader.NextRaw().empty());
+}
+
+TEST(ZmwWhitelistVirtualReader, EmptyScrapsFileOk)
+{
+ const std::vector<int32_t> whitelist = { 10944689, 10944690 };
+ const std::string primaryBamFn = tests::Data_Dir + "/polymerase/whitelist/scrapless.subreads.bam" ;
+ const std::string scrapsBamFn = tests::Data_Dir + "/polymerase/whitelist/scrapless.scraps.bam" ;
+
+ int count = 0;
+ ZmwWhitelistVirtualReader reader(whitelist, primaryBamFn, scrapsBamFn);
+ while (reader.HasNext()) {
+ auto record = reader.Next();
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count);
+
+ const BamFile primaryBam(primaryBamFn);
+ const BamFile scrapsBam(scrapsBamFn);
+ const PbiRawData primaryIdx(primaryBam.PacBioIndexFilename());
+ const PbiRawData scrapsIdx(scrapsBam.PacBioIndexFilename());
+ EXPECT_EQ(3, primaryIdx.NumReads());
+ EXPECT_EQ(0, scrapsIdx.NumReads());
+}
+
+TEST(ZmwWhitelistVirtualReader, UnknownZmwOk)
+{
+ const std::vector<int32_t> whitelist = { 42 }; // ZMW not in our files
+
+ ZmwWhitelistVirtualReader reader(whitelist,
+ tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+ EXPECT_FALSE(reader.HasNext());
+ EXPECT_TRUE(reader.NextRaw().empty());
+}
+
+TEST(ZmwWhitelistVirtualReader, MixedKnownAndUnknownZmwsOk)
+{
+ const std::vector<int32_t> whitelist = { 42, 200000, 24 };
+
+ ZmwWhitelistVirtualReader reader(whitelist,
+ tests::Data_Dir + "/polymerase/whitelist/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/whitelist/internal.scraps.bam");
+
+ // everything below should behave exactly as 'SingleValueOk' test,
+ // as the unknown ZMWs will have been removed during construction
+
+ // create virtual record
+ EXPECT_TRUE(reader.HasNext());
+ auto virtualRecord = reader.Next();
+ EXPECT_FALSE(reader.HasNext());
+
+ // fetch original polymerase read (2nd record)
+ BamFile polyBam(tests::Data_Dir + "/polymerase/whitelist/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ ++begin;
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin++;
+
+ EXPECT_EQ(200000, virtualRecord.HoleNumber());
+
+ tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(VirtualPolymeraseBamRecord, VirtualRegionsTableOk)
+{
+ VirtualPolymeraseReader vpr(tests::Data_Dir + "/polymerase/production.subreads.bam",
+ tests::Data_Dir + "/polymerase/production.scraps.bam");
+ EXPECT_TRUE(vpr.HasNext());
+ const auto virtualRecord = vpr.Next();
+
+ const auto subreads = virtualRecord.VirtualRegionsTable(VirtualRegionType::SUBREAD);
+ const auto adapters = virtualRecord.VirtualRegionsTable(VirtualRegionType::ADAPTER);
+ const auto hqRegions = virtualRecord.VirtualRegionsTable(VirtualRegionType::HQREGION);
+ const auto lqRegions = virtualRecord.VirtualRegionsTable(VirtualRegionType::LQREGION);
+ const auto barcodes = virtualRecord.VirtualRegionsTable(VirtualRegionType::BARCODE);
+ const auto filtered = virtualRecord.VirtualRegionsTable(VirtualRegionType::FILTERED);
+
+ EXPECT_FALSE(subreads.empty());
+ EXPECT_FALSE(adapters.empty());
+ EXPECT_FALSE(hqRegions.empty());
+ EXPECT_FALSE(lqRegions.empty());
+ EXPECT_FALSE(barcodes.empty());
+ EXPECT_TRUE(filtered.empty()); // this annnotation type is not in data set
+}
diff --git a/tests/src/test_GroupQuery.cpp b/tests/src/test_QNameQuery.cpp
similarity index 57%
rename from tests/src/test_GroupQuery.cpp
rename to tests/src/test_QNameQuery.cpp
index 2f6769f..0c6b696 100644
--- a/tests/src/test_GroupQuery.cpp
+++ b/tests/src/test_QNameQuery.cpp
@@ -35,98 +35,45 @@
// Author: Yuan Li
-#ifdef PBBAM_TESTING
-#define private public
-#endif
-
#include "TestData.h"
#include <gtest/gtest.h>
-#include <pbbam/ZmwGroupQuery.h>
-#include <pbbam/GroupQuery.h>
+#include <pbbam/QNameQuery.h>
#include <string>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
+static const string dataDir = tests::Data_Dir + "/test_group_query/";
+static const string test1fn = string(dataDir) + "test1.bam";
+static const string test2fn = string(dataDir) + "test2.bam";
+static const string test3fn = string(dataDir) + "test3.bam";
-const string dataDir = tests::Data_Dir + "/test_group_query/";
-const string test1fn = string(dataDir) + "test1.bam";
-const string test2fn = string(dataDir) + "test2.bam";
-const string test3fn = string(dataDir) + "test3.bam";
-
-//void TestZmwQuery(const string & fn, const vector<int> & expected)
-//{
-// EXPECT_NO_THROW(
-// {
-// BamFile bamFile(fn);
-// vector<int> counts;
-// ZmwGroupQuery zmwQuery(bamFile);
-// for (const vector<BamRecord>& records : zmwQuery)
-// counts.push_back(records.size());
-// EXPECT_EQ(expected, counts);
-// });
-//}
-
-//void TestNoneConstZmwQuery(const string & fn, const vector<int> & expected)
-//{
-// EXPECT_NO_THROW(
-// {
-// BamFile bamFile(fn);
-// vector<int> counts;
-// ZmwGroupQuery zmwQuery(bamFile);
-// for (vector<BamRecord>& records : zmwQuery)
-// counts.push_back(records.size());
-// EXPECT_EQ(expected, counts);
-// });
-//}
-
-void TestQNameQuery(const string & fn, const vector<int> & expected)
+static
+void TestQNameQuery(const string& fn, const vector<int>& expected)
{
EXPECT_NO_THROW(
{
- BamFile bamFile(fn);
vector<int> counts;
- QNameQuery qQuery(bamFile);
- for (const vector<BamRecord>& records : qQuery)
+ QNameQuery qQuery(fn);
+ for (const vector<BamRecord>& records : qQuery)
counts.push_back(records.size());
EXPECT_EQ(expected, counts);
});
}
-void TestNoneConstQNameQuery(const string & fn, const vector<int> & expected)
-{
+static
+void TestNoneConstQNameQuery(const string& fn, const vector<int>& expected)
+{
EXPECT_NO_THROW(
{
- BamFile bamFile(fn);
vector<int> counts;
- QNameQuery qQuery(bamFile);
- for (vector<BamRecord>& records : qQuery)
+ QNameQuery qQuery(fn);
+ for (vector<BamRecord>& records : qQuery)
counts.push_back(records.size());
EXPECT_EQ(expected, counts);
});
}
-TEST(ZmwQueryTest, CountZmwSizes)
-{
-// // test case 1 has exactly one bamRecord.
-// string fn = test1fn;
-// vector<int> expected({1});
-// TestZmwQuery(fn, expected);
-// TestNoneConstZmwQuery(fn, expected);
-
-// // test case 2 has four bamRecords from the same zmw.
-// fn = test2fn;
-// expected = vector<int>({4});
-// TestZmwQuery(fn, expected);
-// TestNoneConstZmwQuery(fn, expected);
-
-// // more bamRecords in test case 3.
-// fn = test3fn;
-// expected = {2,3,3,2,2,1};
-// TestZmwQuery(fn, expected);
-// TestNoneConstZmwQuery(fn, expected);
-}
-
TEST(QNameQueryTest, CountQSizes)
{
// test case 1 has exactly one bamRecord.
diff --git a/tests/src/test_BamFile.cpp b/tests/src/test_ReadAccuracyQuery.cpp
similarity index 74%
copy from tests/src/test_BamFile.cpp
copy to tests/src/test_ReadAccuracyQuery.cpp
index 4ca910d..721bec7 100644
--- a/tests/src/test_BamFile.cpp
+++ b/tests/src/test_ReadAccuracyQuery.cpp
@@ -41,29 +41,32 @@
#include "TestData.h"
#include <gtest/gtest.h>
-#include <pbbam/BamFile.h>
-#include <stdexcept>
+#include <pbbam/ReadAccuracyQuery.h>
+#include <string>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
-TEST(BamFileTest, NonExistentFileThrows)
+TEST(ReadAccuracyQueryTest, QueryOk)
{
- EXPECT_THROW(
- {
- BamFile file("does_not_exist.bam");
- (void)file;
- },
- std::exception);
-}
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
-TEST(BamFileTest, NonBamFileThrows)
-{
- EXPECT_THROW(
{
- const std::string& fn = tests::Data_Dir + "/lambdaNEB.fa.fai";
- BamFile file(fn);
- (void)file;
- },
- std::exception);
+ int count = 0;
+ ReadAccuracyQuery query(0.901, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE(r.ReadAccuracy(), 0.901);
+ }
+ EXPECT_EQ(4, count);
+ }
+ {
+ int count = 0;
+ ReadAccuracyQuery query(0.95, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE(r.ReadAccuracy(), 0.901);
+ }
+ EXPECT_EQ(0, count);
+ }
}
diff --git a/tests/src/test_ReadGroupInfo.cpp b/tests/src/test_ReadGroupInfo.cpp
index db30fa2..463846c 100644
--- a/tests/src/test_ReadGroupInfo.cpp
+++ b/tests/src/test_ReadGroupInfo.cpp
@@ -41,8 +41,9 @@
#include <gtest/gtest.h>
#include <pbbam/ReadGroupInfo.h>
+#include <vector>
using namespace PacBio::BAM;
-
+using namespace std;
TEST(ReadGroupInfoTest, IdFromMovieNameAndReadType)
{
@@ -59,3 +60,64 @@ TEST(ReadGroupInfoTest, FrameCodecSetOk)
EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
}
+TEST(ReadGroupInfoTest, SequencingChemistryOk)
+{
+ using std::string;
+ using std::vector;
+
+ { // P6-C4
+ const vector<string> bindingKits { "100356300", "100372700" };
+ const vector<string> versions { "2.1", "2.3" };
+ ReadGroupInfo rg("P6C4");
+ rg.SequencingKit("100356200");
+ for (const string& bk : bindingKits) {
+ rg.BindingKit(bk);
+ for (const string& ver : versions) {
+ rg.BasecallerVersion(ver);
+ EXPECT_EQ("P6-C4", rg.SequencingChemistry());
+ }
+ }
+ }
+
+ { // S/P1-C1
+ const vector<string> sequencingKits { "100-619-400", "100-711-600" };
+ ReadGroupInfo rg("SP1C1");
+ rg.BindingKit("100-619-300");
+ rg.BasecallerVersion("3.0");
+ for (const string& sk : sequencingKits) {
+ rg.SequencingKit(sk);
+ EXPECT_EQ("S/P1-C1", rg.SequencingChemistry());
+ }
+ }
+
+ // basecaller 3.1.x
+ {
+ const vector<string> sequencingKits { "100-619-400", "100-711-600", "100-620-000" };
+ ReadGroupInfo rg("3.1");
+ rg.BindingKit("100-619-300");
+ rg.BasecallerVersion("3.1.0.171835");
+ for (const string& sk : sequencingKits) {
+ rg.SequencingKit(sk);
+ EXPECT_EQ("S/P1-C1", rg.SequencingChemistry());
+ }
+ }
+}
+
+TEST(ReadGroupInfoTest, SequencingChemistryThrowsOnBadTriple)
+{
+ try {
+ ReadGroupInfo rg("BAD");
+ rg.BindingKit("100372700");
+ rg.SequencingKit("100-619-400");
+ rg.BasecallerVersion("2.0");
+ //EXPECT_THROW(rg.SequencingChemistry(), InvalidSequencingChemistryException);
+ } catch (InvalidSequencingChemistryException& e) {
+ EXPECT_EQ(string("100372700"), e.BindingKit());
+ EXPECT_EQ(string("100-619-400"), e.SequencingKit());
+ EXPECT_EQ(string("2.0"), e.BasecallerVersion());
+ }
+}
+
+
+
+
diff --git a/tests/src/test_SequenceUtils.cpp b/tests/src/test_SequenceUtils.cpp
index a089579..20bf5e6 100644
--- a/tests/src/test_SequenceUtils.cpp
+++ b/tests/src/test_SequenceUtils.cpp
@@ -41,38 +41,14 @@
#include <gtest/gtest.h>
#include <pbbam/../../src/SequenceUtils.h>
-#include <pbbam/../../src/StringUtils.h>
#include <string>
#include <vector>
-
#include <climits>
-
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-TEST(StringUtilsTest, BasicSplit)
-{
- const string test = "foo\tbar\tbaz";
- const vector<string> tokens = internal::Split(test, '\t');
- EXPECT_EQ(3, tokens.size());
- EXPECT_TRUE(tokens.at(0) == "foo");
- EXPECT_TRUE(tokens.at(1) == "bar");
- EXPECT_TRUE(tokens.at(2) == "baz");
-}
-
-TEST(StringUtilsTest, SplitKeepsEmptyTokens)
-{
- const string test = "foo\tbar\t\tbaz";
- const vector<string> tokens = internal::Split(test, '\t');
- EXPECT_EQ(4, tokens.size());
- EXPECT_TRUE(tokens.at(0) == "foo");
- EXPECT_TRUE(tokens.at(1) == "bar");
- EXPECT_TRUE(tokens.at(2) == "");
- EXPECT_TRUE(tokens.at(3) == "baz");
-}
-
TEST(SequenceUtilsTest, ComplementChar)
{
// complement
diff --git a/tests/src/test_TimeUtils.cpp b/tests/src/test_StringUtils.cpp
similarity index 73%
copy from tests/src/test_TimeUtils.cpp
copy to tests/src/test_StringUtils.cpp
index 7ab9fa5..d335246 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/tests/src/test_StringUtils.cpp
@@ -40,20 +40,31 @@
#endif
#include <gtest/gtest.h>
-#include <pbbam/../../src/TimeUtils.h>
-
+#include <pbbam/../../src/StringUtils.h>
+#include <string>
+#include <vector>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
-TEST(TimeUtilsTest, ToIso8601)
+TEST(StringUtilsTest, BasicSplit)
{
- const time_t rawTime = 436428750L;
- const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+ const string test = "foo\tbar\tbaz";
+ const vector<string> tokens = internal::Split(test, '\t');
+ EXPECT_EQ(3, tokens.size());
+ EXPECT_TRUE(tokens.at(0) == "foo");
+ EXPECT_TRUE(tokens.at(1) == "bar");
+ EXPECT_TRUE(tokens.at(2) == "baz");
+}
- // can't hardcode expected (since we rely on localtime())
- const std::string& expected = "1983-10-31T06:12:30Z";
- const std::string& actual = internal::ToIso8601(timestamp);
- EXPECT_EQ(expected, actual);
+TEST(StringUtilsTest, SplitKeepsEmptyTokens)
+{
+ const string test = "foo\tbar\t\tbaz";
+ const vector<string> tokens = internal::Split(test, '\t');
+ EXPECT_EQ(4, tokens.size());
+ EXPECT_TRUE(tokens.at(0) == "foo");
+ EXPECT_TRUE(tokens.at(1) == "bar");
+ EXPECT_TRUE(tokens.at(2) == "");
+ EXPECT_TRUE(tokens.at(3) == "baz");
}
diff --git a/tests/src/test_BamFile.cpp b/tests/src/test_SubreadLengthQuery.cpp
similarity index 66%
copy from tests/src/test_BamFile.cpp
copy to tests/src/test_SubreadLengthQuery.cpp
index 4ca910d..a476823 100644
--- a/tests/src/test_BamFile.cpp
+++ b/tests/src/test_SubreadLengthQuery.cpp
@@ -41,29 +41,41 @@
#include "TestData.h"
#include <gtest/gtest.h>
-#include <pbbam/BamFile.h>
-#include <stdexcept>
+#include <pbbam/SubreadLengthQuery.h>
+#include <string>
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
-TEST(BamFileTest, NonExistentFileThrows)
+TEST(SubreadLengthQueryTest, QueryOk)
{
- EXPECT_THROW(
- {
- BamFile file("does_not_exist.bam");
- (void)file;
- },
- std::exception);
-}
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/test_group_query/test2.bam" } };
-TEST(BamFileTest, NonBamFileThrows)
-{
- EXPECT_THROW(
{
- const std::string& fn = tests::Data_Dir + "/lambdaNEB.fa.fai";
- BamFile file(fn);
- (void)file;
- },
- std::exception);
+ int count = 0;
+ SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 500);
+ }
+ EXPECT_EQ(3, count);
+ }
+ {
+ int count = 0;
+ SubreadLengthQuery query(1000, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 1000);
+ }
+ EXPECT_EQ(2, count);
+ }
+ {
+ int count = 0;
+ SubreadLengthQuery query(5000, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 5000);
+ }
+ EXPECT_EQ(0, count);
+ }
}
diff --git a/tests/src/test_Tags.cpp b/tests/src/test_Tags.cpp
index 2ed7aa0..6755204 100644
--- a/tests/src/test_Tags.cpp
+++ b/tests/src/test_Tags.cpp
@@ -48,6 +48,9 @@
#include <iostream>
#include <map>
#include <string>
+
+#include <typeinfo>
+
using namespace PacBio;
using namespace PacBio::BAM;
using namespace std;
@@ -70,6 +73,9 @@ TEST(TagTest, TagConstruction)
vector<uint32_t> u32_Array;
vector<float> float_array;
+ signed char c = 'A';
+ unsigned char uc = 'A';
+
Tag i8Tag(i8);
Tag u8Tag(u8);
Tag i16Tag(i16);
@@ -86,6 +92,9 @@ TEST(TagTest, TagConstruction)
Tag u32_array_Tag(u32_Array);
Tag float_array_Tag(float_array);
+ Tag charTag(c, TagModifier::ASCII_CHAR);
+ Tag ucharTag(uc, TagModifier::ASCII_CHAR);
+
EXPECT_TRUE(i8Tag.Type() == TagDataType::INT8);
EXPECT_TRUE(u8Tag.Type() == TagDataType::UINT8);
EXPECT_TRUE(i16Tag.Type() == TagDataType::INT16);
@@ -101,6 +110,9 @@ TEST(TagTest, TagConstruction)
EXPECT_TRUE(i32_array_Tag.Type() == TagDataType::INT32_ARRAY);
EXPECT_TRUE(u32_array_Tag.Type() == TagDataType::UINT32_ARRAY);
EXPECT_TRUE(float_array_Tag.Type() == TagDataType::FLOAT_ARRAY);
+
+ EXPECT_TRUE(charTag.ToAscii() == 'A');
+ EXPECT_TRUE(ucharTag.ToAscii() == 'A');
}
TEST(TagTest, CopyAndCompare)
@@ -235,31 +247,87 @@ TEST(TagTest, Type_UInt8)
TEST(TagTest, Type_Ascii)
{
- Tag pureAscii = Tag('$');
- pureAscii.Modifier(TagModifier::ASCII_CHAR);
-
+ const char c = '$';
+ const signed char sc = '$';
+ const unsigned char uc = '$';
const uint8_t u8 = 65;
const int8_t i8 = 66;
- Tag fromUint8 = Tag(u8);
- fromUint8.Modifier(TagModifier::ASCII_CHAR);
- Tag fromInt8 = Tag(i8);
- fromInt8.Modifier(TagModifier::ASCII_CHAR);
-
- EXPECT_TRUE(pureAscii.HasModifier(TagModifier::ASCII_CHAR));
- EXPECT_TRUE(pureAscii.IsIntegral());
- EXPECT_TRUE(pureAscii.IsNumeric());
- EXPECT_EQ('$', pureAscii.ToAscii());
-
- EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
- EXPECT_TRUE(fromUint8.IsIntegral());
- EXPECT_TRUE(fromUint8.IsNumeric());
- EXPECT_EQ('A', fromUint8.ToAscii());
-
- EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
- EXPECT_TRUE(fromInt8.IsIntegral());
- EXPECT_TRUE(fromInt8.IsNumeric());
- EXPECT_EQ('B', fromInt8.ToAscii());
+ { // old style: construct-then-modify
+
+ Tag fromPlainChar = Tag(c);
+ Tag fromSignedChar = Tag(sc);
+ Tag fromUnsignedChar = Tag(uc);
+ Tag fromUint8 = Tag(u8);
+ Tag fromInt8 = Tag(i8);
+ fromPlainChar.Modifier(TagModifier::ASCII_CHAR);
+ fromSignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUnsignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUint8.Modifier(TagModifier::ASCII_CHAR);
+ fromInt8.Modifier(TagModifier::ASCII_CHAR);
+
+ EXPECT_TRUE(fromPlainChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromPlainChar.IsIntegral());
+ EXPECT_TRUE(fromPlainChar.IsNumeric());
+ EXPECT_EQ('$', fromPlainChar.ToAscii());
+
+ EXPECT_TRUE(fromSignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromSignedChar.IsIntegral());
+ EXPECT_TRUE(fromSignedChar.IsNumeric());
+ EXPECT_EQ('$', fromSignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUnsignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUnsignedChar.IsIntegral());
+ EXPECT_TRUE(fromUnsignedChar.IsNumeric());
+ EXPECT_EQ('$', fromUnsignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUint8.IsIntegral());
+ EXPECT_TRUE(fromUint8.IsNumeric());
+ EXPECT_EQ('A', fromUint8.ToAscii());
+
+ EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromInt8.IsIntegral());
+ EXPECT_TRUE(fromInt8.IsNumeric());
+ EXPECT_EQ('B', fromInt8.ToAscii());
+ }
+
+ { // new style: construct directly as ASCII
+
+ const Tag fromPlainChar = Tag(c, TagModifier::ASCII_CHAR);
+ const Tag fromSignedChar = Tag(sc, TagModifier::ASCII_CHAR);
+ const Tag fromUnsignedChar = Tag(uc, TagModifier::ASCII_CHAR);
+ const Tag fromUint8 = Tag(u8, TagModifier::ASCII_CHAR);
+ const Tag fromInt8 = Tag(i8, TagModifier::ASCII_CHAR);
+
+ EXPECT_TRUE(fromPlainChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromPlainChar.IsIntegral());
+ EXPECT_TRUE(fromPlainChar.IsNumeric());
+ EXPECT_EQ('$', fromPlainChar.ToAscii());
+
+ EXPECT_TRUE(fromSignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromSignedChar.IsIntegral());
+ EXPECT_TRUE(fromSignedChar.IsNumeric());
+ EXPECT_EQ('$', fromSignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUnsignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUnsignedChar.IsIntegral());
+ EXPECT_TRUE(fromUnsignedChar.IsNumeric());
+ EXPECT_EQ('$', fromUnsignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUint8.IsIntegral());
+ EXPECT_TRUE(fromUint8.IsNumeric());
+ EXPECT_EQ('A', fromUint8.ToAscii());
+
+ EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromInt8.IsIntegral());
+ EXPECT_TRUE(fromInt8.IsNumeric());
+ EXPECT_EQ('B', fromInt8.ToAscii());
+ }
+
+ // check invalid constructs
+ EXPECT_THROW(Tag('A', TagModifier::HEX_STRING), std::runtime_error);
}
TEST(TagTest, Type_Int16)
@@ -396,6 +464,19 @@ TEST(TagTest, Type_String)
EXPECT_FALSE(tag.IsArray());
EXPECT_EQ(v, v2);
+
+ // "Hex format" string
+ const Tag hex("DEADBEEF", TagModifier::HEX_STRING);
+ EXPECT_TRUE(hex.Type() == TagDataType::STRING);
+ EXPECT_TRUE(hex.Typename() == "string");
+ EXPECT_TRUE(hex.IsString());
+ EXPECT_TRUE(hex.HasModifier(TagModifier::HEX_STRING));
+ EXPECT_FALSE(hex.IsNull());
+ EXPECT_FALSE(hex.IsNumeric());
+ EXPECT_FALSE(hex.IsArray());
+
+ // check invalid constructs
+ EXPECT_THROW(Tag("DEADBEEF", TagModifier::ASCII_CHAR), std::runtime_error);
}
TEST(TagTest, Type_Int8Array)
@@ -634,10 +715,10 @@ TEST(TagTest, ConvertToInt8)
// not allowed
EXPECT_THROW(underflow.ToInt8(), std::exception);
- EXPECT_THROW(overflow.ToInt8(), std::exception);
- EXPECT_THROW(floatTag.ToInt8(), std::exception);
+ EXPECT_THROW(overflow.ToInt8(), std::exception);
+ EXPECT_THROW(floatTag.ToInt8(), std::exception);
EXPECT_THROW(stringTag.ToInt8(), std::exception);
- EXPECT_THROW(arrayTag.ToInt8(), std::exception);
+ EXPECT_THROW(arrayTag.ToInt8(), std::exception);
}
TEST(TagTest, ConvertToUInt8)
@@ -660,11 +741,11 @@ TEST(TagTest, ConvertToUInt8)
});
// not allowed
- EXPECT_THROW(neg.ToUInt8(), std::exception);
- EXPECT_THROW(overflow.ToUInt8(), std::exception);
- EXPECT_THROW(floatTag.ToUInt8(), std::exception);
+ EXPECT_THROW(neg.ToUInt8(), std::exception);
+ EXPECT_THROW(overflow.ToUInt8(), std::exception);
+ EXPECT_THROW(floatTag.ToUInt8(), std::exception);
EXPECT_THROW(stringTag.ToUInt8(), std::exception);
- EXPECT_THROW(arrayTag.ToUInt8(), std::exception);
+ EXPECT_THROW(arrayTag.ToUInt8(), std::exception);
}
TEST(TagTest, ConvertToInt16)
@@ -690,10 +771,10 @@ TEST(TagTest, ConvertToInt16)
// not allowed
EXPECT_THROW(underflow.ToInt16(), std::exception);
- EXPECT_THROW(overflow.ToInt16(), std::exception);
- EXPECT_THROW(floatTag.ToInt16(), std::exception);
+ EXPECT_THROW(overflow.ToInt16(), std::exception);
+ EXPECT_THROW(floatTag.ToInt16(), std::exception);
EXPECT_THROW(stringTag.ToInt16(), std::exception);
- EXPECT_THROW(arrayTag.ToInt16(), std::exception);
+ EXPECT_THROW(arrayTag.ToInt16(), std::exception);
}
TEST(TagTest, ConvertToUInt16)
@@ -716,11 +797,11 @@ TEST(TagTest, ConvertToUInt16)
});
// not allowed
- EXPECT_THROW(neg.ToUInt16(), std::exception);
- EXPECT_THROW(overflow.ToUInt16(), std::exception);
- EXPECT_THROW(floatTag.ToUInt16(), std::exception);
+ EXPECT_THROW(neg.ToUInt16(), std::exception);
+ EXPECT_THROW(overflow.ToUInt16(), std::exception);
+ EXPECT_THROW(floatTag.ToUInt16(), std::exception);
EXPECT_THROW(stringTag.ToUInt16(), std::exception);
- EXPECT_THROW(arrayTag.ToUInt16(), std::exception);
+ EXPECT_THROW(arrayTag.ToUInt16(), std::exception);
}
TEST(TagTest, ConvertToInt32)
@@ -748,9 +829,9 @@ TEST(TagTest, ConvertToInt32)
});
// not allowed
- EXPECT_THROW(floatTag.ToInt32(), std::exception);
+ EXPECT_THROW(floatTag.ToInt32(), std::exception);
EXPECT_THROW(stringTag.ToInt32(), std::exception);
- EXPECT_THROW(arrayTag.ToInt32(), std::exception);
+ EXPECT_THROW(arrayTag.ToInt32(), std::exception);
}
TEST(TagTest, ConvertToUInt32)
@@ -776,10 +857,10 @@ TEST(TagTest, ConvertToUInt32)
});
// not allowed
- EXPECT_THROW(neg.ToUInt32(), std::exception);
- EXPECT_THROW(floatTag.ToUInt32(), std::exception);
+ EXPECT_THROW(neg.ToUInt32(), std::exception);
+ EXPECT_THROW(floatTag.ToUInt32(), std::exception);
EXPECT_THROW(stringTag.ToUInt32(), std::exception);
- EXPECT_THROW(arrayTag.ToUInt32(), std::exception);
+ EXPECT_THROW(arrayTag.ToUInt32(), std::exception);
}
TEST(TagCollectionTest, DefaultConstruction)
@@ -827,8 +908,7 @@ TEST(SamTagCodecTest, DecodeTest)
TagCollection expected;
expected["ST"] = string("foo");
expected["XY"] = int32_t(-42);
- expected["HX"] = string("1abc75");
- expected["HX"].Modifier(TagModifier::HEX_STRING);
+ expected["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
expected["VC"] = vector<int32_t>( { 42, -100, 37, 2048 } );
TagCollection tags = SamTagCodec::Decode(tagString);
@@ -850,8 +930,7 @@ TEST(SamTagCodecTest, EncodeTest)
TagCollection tags;
tags["ST"] = string("foo");
tags["XY"] = int32_t(-42);
- tags["HX"] = string("1abc75");
- tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
tags["VC"] = vector<int32_t>( { 42, -100, 37, 2048 } );
// "HX:H:1abc75\tST:Z:foo\0\tVC:B:i,42,-100,37,2048\tXY:i:-42"
@@ -981,8 +1060,7 @@ TEST(BamTagCodecTest, EncodeTest)
expected.push_back(valueBytes[3]);
TagCollection tags;
- tags["HX"] = string("1abc75");
- tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
tags["CA"] = charArray;
tags["XY"] = x;
@@ -990,3 +1068,77 @@ TEST(BamTagCodecTest, EncodeTest)
EXPECT_EQ(expected, data);
}
+TEST(BamTagCodecTest, AsciiTagsTest)
+{
+ vector<uint8_t> expected;
+ expected.reserve(20);
+ expected.push_back('I'); // I8:A:B
+ expected.push_back('8');
+ expected.push_back('A');
+ expected.push_back('B');
+ expected.push_back('P'); // PC:A:$
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('$');
+ expected.push_back('S'); // SC:A:$
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('$');
+ expected.push_back('U'); // U8:A:A
+ expected.push_back('8');
+ expected.push_back('A');
+ expected.push_back('A');
+ expected.push_back('U'); // UC:A:$
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('$');
+
+ const char c = '$';
+ const signed char sc = '$';
+ const unsigned char uc = '$';
+ const uint8_t u8 = 65;
+ const int8_t i8 = 66;
+
+ { // old style: construct-then-modify
+
+ Tag fromPlainChar = Tag(c);
+ Tag fromSignedChar = Tag(sc);
+ Tag fromUnsignedChar = Tag(uc);
+ Tag fromUint8 = Tag(u8);
+ Tag fromInt8 = Tag(i8);
+ fromPlainChar.Modifier(TagModifier::ASCII_CHAR);
+ fromSignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUnsignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUint8.Modifier(TagModifier::ASCII_CHAR);
+ fromInt8.Modifier(TagModifier::ASCII_CHAR);
+
+ TagCollection tags;
+ tags["PC"] = fromPlainChar;
+ tags["SC"] = fromSignedChar;
+ tags["UC"] = fromUnsignedChar;
+ tags["U8"] = fromUint8;
+ tags["I8"] = fromInt8;
+
+ const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, data);
+ }
+
+ { // new style: construct directly as ASCII
+
+ const Tag fromPlainChar = Tag(c, TagModifier::ASCII_CHAR);
+ const Tag fromSignedChar = Tag(sc, TagModifier::ASCII_CHAR);
+ const Tag fromUnsignedChar = Tag(uc, TagModifier::ASCII_CHAR);
+ const Tag fromUint8 = Tag(u8, TagModifier::ASCII_CHAR);
+ const Tag fromInt8 = Tag(i8, TagModifier::ASCII_CHAR);
+
+ TagCollection tags;
+ tags["PC"] = fromPlainChar;
+ tags["SC"] = fromSignedChar;
+ tags["UC"] = fromUnsignedChar;
+ tags["U8"] = fromUint8;
+ tags["I8"] = fromInt8;
+
+ const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, data);
+ }
+}
diff --git a/tests/src/test_TimeUtils.cpp b/tests/src/test_TimeUtils.cpp
index 7ab9fa5..90f1489 100644
--- a/tests/src/test_TimeUtils.cpp
+++ b/tests/src/test_TimeUtils.cpp
@@ -41,7 +41,6 @@
#include <gtest/gtest.h>
#include <pbbam/../../src/TimeUtils.h>
-
using namespace PacBio;
using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
@@ -52,8 +51,17 @@ TEST(TimeUtilsTest, ToIso8601)
const time_t rawTime = 436428750L;
const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
- // can't hardcode expected (since we rely on localtime())
- const std::string& expected = "1983-10-31T06:12:30Z";
- const std::string& actual = internal::ToIso8601(timestamp);
+ const auto expected = string{ "1983-10-31T06:12:30Z" }; // no ms in test case
+ const auto actual = internal::ToIso8601(timestamp);
+ EXPECT_EQ(expected, actual);
+}
+
+TEST(TimeUtilsTest, ToDataSetFormat)
+{
+ const time_t rawTime = 436428750L;
+ const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+
+ const auto expected = string{ "831031_061230" }; // no ms in test case
+ const std::string& actual = internal::ToDataSetFormat(timestamp);
EXPECT_EQ(expected, actual);
}
diff --git a/tests/src/test_VirtualPolymeraseCompositeReader.cpp b/tests/src/test_VirtualPolymeraseCompositeReader.cpp
new file mode 100644
index 0000000..4652faf
--- /dev/null
+++ b/tests/src/test_VirtualPolymeraseCompositeReader.cpp
@@ -0,0 +1,132 @@
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static
+size_t NumVirtualRecords(const string& primaryBamFn,
+ const string& scrapsBamFn)
+{
+ VirtualPolymeraseReader reader(primaryBamFn, scrapsBamFn);
+ size_t count = 0;
+ while (reader.HasNext()) {
+ const auto record = reader.Next();
+ (void)record;
+ ++count;
+ }
+ return count;
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(VirtualPolymeraseCompositeReaderTest, DataSetOk)
+{
+ // dataset contains these resources (subreads/scraps + hqregion/scraps BAMs)
+ const string primaryFn1 = tests::Data_Dir + "/polymerase/production.subreads.bam";
+ const string scrapsFn1 = tests::Data_Dir + "/polymerase/production.scraps.bam";
+ const string primaryFn2 = tests::Data_Dir + "/polymerase/production_hq.hqregion.bam";
+ const string scrapsFn2 = tests::Data_Dir + "/polymerase/production_hq.scraps.bam";
+ const size_t numExpectedRecords =
+ tests::NumVirtualRecords(primaryFn1, scrapsFn1) +
+ tests::NumVirtualRecords(primaryFn2, scrapsFn2);
+
+ const string datasetFn = tests::Data_Dir +
+ "/polymerase/multiple_resources.subread.dataset.xml";
+
+ DataSet ds(datasetFn);
+ VirtualPolymeraseCompositeReader reader(ds);
+ size_t numObservedRecords = 0;
+ while (reader.HasNext()) {
+ const auto record = reader.Next();
+ (void)record;
+ ++numObservedRecords;
+ }
+ EXPECT_EQ(numExpectedRecords, numObservedRecords);
+}
+
+TEST(VirtualPolymeraseCompositeReaderTest, EmptyDataSetOk)
+{
+ VirtualPolymeraseCompositeReader reader(DataSet{});
+ EXPECT_FALSE(reader.HasNext());
+}
+
+TEST(VirtualPolymeraseCompositeReaderTest, FilteredDataSetOk)
+{
+ // dataset contains these resources (subreads/scraps + hqregion/scraps BAMs)
+ const string primaryFn1 = tests::Data_Dir + "/polymerase/production.subreads.bam";
+ const string scrapsFn1 = tests::Data_Dir + "/polymerase/production.scraps.bam";
+ const string primaryFn2 = tests::Data_Dir + "/polymerase/internal.subreads.bam";
+ const string scrapsFn2 = tests::Data_Dir + "/polymerase/internal.scraps.bam";
+ const string primaryFn3 = tests::Data_Dir + "/polymerase/production_hq.hqregion.bam";
+ const string scrapsFn3 = tests::Data_Dir + "/polymerase/production_hq.scraps.bam";
+ const size_t totalRecords =
+ tests::NumVirtualRecords(primaryFn1, scrapsFn1) +
+ tests::NumVirtualRecords(primaryFn2, scrapsFn2) +
+ tests::NumVirtualRecords(primaryFn3, scrapsFn3);
+ EXPECT_EQ(3, totalRecords); // 1 per pair
+
+ // our filter will remove the 2 "production" BAM pairs
+ // using a ZMW filter that only the "internal" pair should pass
+ const string datasetFn = tests::Data_Dir +
+ "/polymerase/filtered_resources.subread.dataset.xml";
+
+ DataSet ds(datasetFn);
+ VirtualPolymeraseCompositeReader reader(ds);
+ size_t numObservedRecords = 0;
+ while (reader.HasNext()) {
+ const auto record = reader.Next();
+ (void)record;
+ ++numObservedRecords;
+ }
+ EXPECT_EQ(1, numObservedRecords);
+}
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index db63408..dd6757e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,3 +1,17 @@
-if(PacBioBAM_build_pbindex)
- add_subdirectory(pbindex)
+
+set(ToolsCommonDir ${PacBioBAM_ToolsDir}/common)
+set(PacBioBAM_CramTestsDir ${PacBioBAM_TestsDir}/src/cram)
+
+# quash warning with OptionParser
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+if(HAS_NO_UNUSED_PRIVATE_FIELD)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-private-field")
endif()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+
+# tools
+add_subdirectory(bam2sam)
+add_subdirectory(pbindex)
+add_subdirectory(pbindexdump)
+add_subdirectory(pbmerge)
diff --git a/tools/bam2sam/CMakeLists.txt b/tools/bam2sam/CMakeLists.txt
new file mode 100644
index 0000000..ef3a919
--- /dev/null
+++ b/tools/bam2sam/CMakeLists.txt
@@ -0,0 +1,32 @@
+
+set(Bam2SamSrcDir ${PacBioBAM_ToolsDir}/bam2sam/src)
+
+# create version header
+set(Bam2Sam_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${Bam2SamSrcDir}/Bam2SamVersion.h.in Bam2SamVersion.h @ONLY
+)
+
+# list source files
+set(BAM2SAM_SOURCES
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${Bam2SamSrcDir}/main.cpp
+ ${Bam2SamSrcDir}/Bam2Sam.cpp
+)
+
+# build bam2sam executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET bam2sam
+ SOURCES ${BAM2SAM_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+ add_test(
+ NAME bam2sam_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${PacBioBAM_CramTestsDir}/bam2sam.t
+ )
+endif()
diff --git a/tools/bam2sam/src/Bam2Sam.cpp b/tools/bam2sam/src/Bam2Sam.cpp
new file mode 100644
index 0000000..5fde774
--- /dev/null
+++ b/tools/bam2sam/src/Bam2Sam.cpp
@@ -0,0 +1,121 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "Bam2Sam.h"
+#include <htslib/sam.h>
+#include <stdexcept>
+#include <memory>
+#include <cassert>
+using namespace bam2sam;
+using namespace std;
+
+namespace bam2sam {
+
+struct HtslibFileDeleter
+{
+ void operator()(samFile* file)
+ {
+ if (file)
+ sam_close(file);
+ file = nullptr;
+ }
+};
+
+struct HtslibHeaderDeleter
+{
+ void operator()(bam_hdr_t* hdr)
+ {
+ if (hdr)
+ bam_hdr_destroy(hdr);
+ hdr = nullptr;
+ }
+};
+
+struct HtslibRecordDeleter
+{
+ void operator()(bam1_t* b)
+ {
+ if (b)
+ bam_destroy1(b);
+ b = nullptr;
+ }
+};
+
+} // namespace bam2sam
+
+void PbBam2Sam::Run(const Settings &settings)
+{
+ int htslibResult = 0;
+
+ // open files
+
+ unique_ptr<samFile, HtslibFileDeleter> inFileWrapper(sam_open(settings.inputFilename_.c_str(), "rb"));
+ samFile* in = inFileWrapper.get();
+ if (!in || !in->fp.bgzf)
+ throw std::runtime_error("could not read from stdin");
+
+ unique_ptr<samFile, HtslibFileDeleter> outFileWrapper(sam_open("-", "w"));
+ samFile* out = outFileWrapper.get();
+ if (!out)
+ throw std::runtime_error("could not write to stdout");
+
+ // fetch & write header
+
+ unique_ptr<bam_hdr_t, HtslibHeaderDeleter> headerWrapper(bam_hdr_read(in->fp.bgzf));
+ bam_hdr_t* hdr = headerWrapper.get();
+ if (!hdr)
+ throw std::runtime_error("could not read header");
+
+ if (!settings.noHeader_) {
+ htslibResult = sam_hdr_write(out, hdr);
+ if (htslibResult != 0)
+ throw std::runtime_error("could not write header");
+ if (settings.printHeaderOnly_)
+ return;
+ }
+
+ // fetch & write records
+
+ unique_ptr<bam1_t, HtslibRecordDeleter> recordWrapper(bam_init1());
+ bam1_t* b = recordWrapper.get();
+
+ while ((htslibResult = sam_read1(in, hdr, b)) >= 0) {
+ htslibResult = sam_write1(out, hdr, b);
+ if (htslibResult < 0)
+ throw std::runtime_error("error writing record to stdout");
+ }
+}
diff --git a/include/pbbam/Strand.h b/tools/bam2sam/src/Bam2Sam.h
similarity index 83%
copy from include/pbbam/Strand.h
copy to tools/bam2sam/src/Bam2Sam.h
index aa8535f..4a7ffbb 100644
--- a/include/pbbam/Strand.h
+++ b/tools/bam2sam/src/Bam2Sam.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,21 +35,19 @@
// Author: Derek Barnett
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef BAM2SAM_H
+#define BAM2SAM_H
-#include "pbbam/Config.h"
+#include "Settings.h"
-namespace PacBio {
-namespace BAM {
+namespace bam2sam {
-enum class Strand
+class PbBam2Sam
{
- FORWARD
- , REVERSE
+public:
+ static void Run(const Settings& settings);
};
-} // namespace BAM
-} // namespace PacBio
+} // namespace bam2sam
-#endif // STRAND_H
+#endif // PBIBAM2SAM_H
diff --git a/src/Config.cpp b/tools/bam2sam/src/Bam2SamVersion.h.in
similarity index 86%
copy from src/Config.cpp
copy to tools/bam2sam/src/Bam2SamVersion.h.in
index 677ad08..10319b7 100644
--- a/src/Config.cpp
+++ b/tools/bam2sam/src/Bam2SamVersion.h.in
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -35,14 +35,15 @@
// Author: Derek Barnett
-#include "pbbam/Config.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
+#ifndef BAM2SAMVERSION_H
+#define BAM2SAMVERSION_H
-namespace PacBio {
-namespace BAM {
+#include <string>
-int HtslibVerbosity = 0;
+namespace bam2sam {
-} // namespace BAM
-} // namespace PacBio
+const std::string Version = std::string("@Bam2Sam_VERSION@");
+
+} // namespace bam2sam
+
+#endif // BAM2SAMVERSION_H
diff --git a/include/pbbam/TagCollection.h b/tools/bam2sam/src/Settings.h
similarity index 78%
copy from include/pbbam/TagCollection.h
copy to tools/bam2sam/src/Settings.h
index 42b4018..d570dc9 100644
--- a/include/pbbam/TagCollection.h
+++ b/tools/bam2sam/src/Settings.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,24 +35,29 @@
// Author: Derek Barnett
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#ifndef SETTINGS_H
+#define SETTINGS_H
-#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
#include <string>
+#include <vector>
-namespace PacBio {
-namespace BAM {
+namespace bam2sam {
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+class Settings
{
public:
- bool Contains(const std::string& name) const;
+ Settings(void)
+ : noHeader_(false)
+ , printHeaderOnly_(false)
+ { }
+
+public:
+ std::string inputFilename_;
+ bool noHeader_;
+ bool printHeaderOnly_;
+ std::vector<std::string> errors_;
};
-} // namespace BAM
-} // namespace PacBio
+} // namespace bam2sam
-#endif // TAGCOLLECTION_H
+#endif // SETTINGS_H
diff --git a/tools/pbindex/src/main.cpp b/tools/bam2sam/src/main.cpp
similarity index 51%
copy from tools/pbindex/src/main.cpp
copy to tools/bam2sam/src/main.cpp
index 59065fa..d27b42f 100644
--- a/tools/pbindex/src/main.cpp
+++ b/tools/bam2sam/src/main.cpp
@@ -35,34 +35,43 @@
// Author: Derek Barnett
-#include "OptionParser.h"
-#include "PbIndex.h"
-#include "PbIndexVersion.h"
+#include "../common/OptionParser.h"
+#include "Bam2Sam.h"
+#include "Bam2SamVersion.h"
+#include <string>
+#include <vector>
#include <cassert>
-#include <iostream>
-using namespace std;
+#include <cstdlib>
static
-pbindex::Settings fromCommandLine(optparse::OptionParser& parser,
- int argc, char* argv[])
+bam2sam::Settings fromCommandLine(optparse::OptionParser& parser,
+ int argc, char* argv[])
{
- const optparse::Values options = parser.parse_args(argc, argv);
- (void)options;
+ bam2sam::Settings settings;
- pbindex::Settings settings;
+ const optparse::Values options = parser.parse_args(argc, argv);
- // get input filename
- const vector<string> positionalArgs = parser.args();
+ // input
+ const std::vector<std::string> positionalArgs = parser.args();
const size_t numPositionalArgs = positionalArgs.size();
if (numPositionalArgs == 0)
- settings.errors_.push_back("pbindex requires an input BAM filename");
+ settings.inputFilename_ = "-"; // stdin
else if (numPositionalArgs == 1)
- settings.inputBamFilename_ = parser.args().front();
+ settings.inputFilename_ = parser.args().front();
else {
assert(numPositionalArgs > 1);
- settings.errors_.push_back("pbindex does not support more than one input file per run");
+ settings.errors_.push_back("bam2sam does not support more than one input file per run");
}
+ // header options
+ if (options.is_set("no_header"))
+ settings.noHeader_ = options.get("no_header");
+ if (options.is_set("header_only"))
+ settings.printHeaderOnly_ = options.get("header_only");
+
+ if (settings.noHeader_ && settings.printHeaderOnly_)
+ settings.errors_.push_back("conflicting arguments requested: --no-header and --header-only");
+
return settings;
}
@@ -70,33 +79,49 @@ int main(int argc, char* argv[])
{
// setup help & options
optparse::OptionParser parser;
- parser.description("pbindex creates a index file that enables random-access to PacBio-specific data in BAM files. "
- "Generated index filename will be the same as input BAM plus .pbi suffix."
+ parser.description("bam2sam converts a BAM file to SAM. It is essentially a stripped-down "
+ "'samtools view', mostly useful for testing/debugging without requiring samtools. "
+ "Input BAM file is read from a file or stdin, and SAM output is written to stdout."
);
- parser.prog("pbindex");
- parser.usage("pbindex <input>");
- parser.version(pbindex::Version);
+ parser.prog("bam2sam");
+ parser.usage("bam2sam [options] [input]");
+ parser.version(bam2sam::Version);
parser.add_version_option(true);
parser.add_help_option(true);
- auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
- ioGroup.add_option("")
- .dest("input")
- .metavar("input")
- .help("Input BAM file");
- parser.add_option_group(ioGroup);
+ auto optionGroup = optparse::OptionGroup(parser, "Options");
+ optionGroup.add_option("")
+ .dest("input")
+ .metavar("input")
+ .help("Input BAM file. If not provided, stdin will be used as input.");
+ optionGroup.add_option("--no-header")
+ .dest("no_header")
+ .action("store_true")
+ .help("Omit header from output.");
+ optionGroup.add_option("--header-only")
+ .dest("header_only")
+ .action("store_true")
+ .help("Print only the header (no records).");
+ parser.add_option_group(optionGroup);
// parse command line for settings
- const pbindex::Settings settings = fromCommandLine(parser, argc, argv);
+ const bam2sam::Settings settings = fromCommandLine(parser, argc, argv);
if (!settings.errors_.empty()) {
- cerr << endl;
+ std::cerr << std::endl;
for (const auto e : settings.errors_)
- cerr << "ERROR: " << e << endl;
- cerr << endl;
+ std::cerr << "ERROR: " << e << std::endl;
+ std::cerr << std::endl;
parser.print_help();
return EXIT_FAILURE;
}
// run tool
- return pbindex::PbIndex::Run(settings);
+ try {
+ bam2sam::PbBam2Sam::Run(settings);
+ return EXIT_SUCCESS;
+ }
+ catch (std::exception& e) {
+ std::cerr << "ERROR: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
}
diff --git a/include/pbbam/AlignmentPrinter.h b/tools/common/BamFileMerger.h
similarity index 59%
copy from include/pbbam/AlignmentPrinter.h
copy to tools/common/BamFileMerger.h
index 6424c5f..d2a6bb2 100644
--- a/include/pbbam/AlignmentPrinter.h
+++ b/tools/common/BamFileMerger.h
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -33,51 +33,46 @@
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-// Author: Armin Töpfer
+// Author: Derek Barnett
-#ifndef ALIGNMENTPRINTER_H
-#define ALIGNMENTPRINTER_H
+#ifndef BAMFILEMERGER_H
+#define BAMFILEMERGER_H
-#include <memory>
+#include <pbbam/DataSet.h>
+#include <pbbam/PbiFilter.h>
+#include <pbbam/ProgramInfo.h>
#include <string>
-
-#include "pbbam/BamRecord.h"
-#include "pbbam/IndexedFastaReader.h"
-#include "pbbam/Orientation.h"
+#include <vector>
namespace PacBio {
namespace BAM {
+namespace common {
-class BamRecord;
-
-class AlignmentPrinter
+class BamFileMerger
{
public:
- AlignmentPrinter(const IndexedFastaReader& ifr)
- : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
- { }
-
- AlignmentPrinter() = delete;
- // Move constructor
- AlignmentPrinter(AlignmentPrinter&&) = default;
- // Copy constructor
- AlignmentPrinter(const AlignmentPrinter&) = delete;
- // Move assignment operator
- AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
- // Copy assignment operator
- AlignmentPrinter& operator=(const AlignmentPrinter&) = delete;
- // Destructor
- ~AlignmentPrinter() = default;
-
-public:
- std::string Print(const BamRecord& record,
- const Orientation orientation = Orientation::GENOMIC);
-
-private:
- const std::unique_ptr<IndexedFastaReader> ifr_;
+ /// \brief Runs merger on a dataset, applying any supplied filters.
+ ///
+ /// When this function exits, a merged BAM (and optional PBI) will have been
+ /// written and closed.
+ ///
+ /// \param[in] dataset provides input filenames & filters
+ /// \param[in] outputFilename resulting BAM output
+ /// \param[in] mergeProgram info about the calling program. Adds a @PG entry to merged header.
+ /// \param[in] createPbi if true, creates a PBI alongside output BAM
+ ///
+ /// \throws std::runtime_error if any any errors encountered while reading or writing
+ ///
+ static void Merge(const PacBio::BAM::DataSet& dataset,
+ const std::string& outputFilename,
+ const PacBio::BAM::ProgramInfo& mergeProgram = PacBio::BAM::ProgramInfo(),
+ bool createPbi = true);
};
+} // namespace common
} // namespace BAM
} // namespace PacBio
-#endif // ALIGNMENTPRINTER_H
+#include "BamFileMerger.inl"
+
+#endif // BAMFILEMERGER_H
diff --git a/tools/common/BamFileMerger.inl b/tools/common/BamFileMerger.inl
new file mode 100644
index 0000000..18dfbca
--- /dev/null
+++ b/tools/common/BamFileMerger.inl
@@ -0,0 +1,262 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "BamFileMerger.h"
+
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamReader.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/CompositeBamReader.h>
+#include <pbbam/PbiBuilder.h>
+
+#include <deque>
+#include <memory>
+#include <stdexcept>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace common {
+
+// ICollator
+
+class ICollator
+{
+public:
+ ~ICollator(void) { }
+
+ bool GetNext(BamRecord& record)
+ {
+ // nothing left to read
+ if (mergeItems_.empty())
+ return false;
+
+ // non-destructive 'pop' of first item from queue
+ auto firstIter = mergeItems_.begin();
+ auto firstItem = PacBio::BAM::internal::CompositeMergeItem{ std::move(firstIter->reader),
+ std::move(firstIter->record)
+ };
+ mergeItems_.pop_front();
+
+ // store its record in our output record
+ std::swap(record, firstItem.record);
+
+ // try fetch 'next' from first item's reader
+ // if successful, re-insert it into container & re-sort on our new values
+ // otherwise, this item will go out of scope & reader destroyed
+ if (firstItem.reader->GetNext(firstItem.record)) {
+ mergeItems_.push_front(std::move(firstItem));
+ UpdateSort();
+ }
+
+ // return success
+ return true;
+ }
+
+protected:
+ std::deque<PacBio::BAM::internal::CompositeMergeItem> mergeItems_;
+
+protected:
+ ICollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader> >&& readers)
+ {
+ for (auto&& reader : readers) {
+ auto item = internal::CompositeMergeItem{std::move(reader)};
+ if (item.reader->GetNext(item.record))
+ mergeItems_.push_back(std::move(item));
+ }
+ }
+
+ virtual void UpdateSort(void) =0;
+};
+
+// QNameCollator
+
+struct QNameSorter : std::binary_function<internal::CompositeMergeItem,
+ internal::CompositeMergeItem,
+ bool>
+{
+ bool operator()(const internal::CompositeMergeItem& lhs,
+ const internal::CompositeMergeItem& rhs)
+ {
+ const BamRecord& l = lhs.record;
+ const BamRecord& r = rhs.record;
+
+ // movie name
+ const int cmp = l.MovieName().compare(r.MovieName());
+ if (cmp != 0)
+ return cmp < 0;
+
+ // hole number
+ const auto lhsZmw = l.HoleNumber();
+ const auto rhsZmw = r.HoleNumber();
+ if (lhsZmw != rhsZmw)
+ return lhsZmw < rhsZmw;
+
+ // shuffle CCS reads after all others
+ const auto lhsReadType = l.Type();
+ const auto rhsReadType = r.Type();
+ if (lhsReadType == RecordType::CCS)
+ return false;
+ if (rhsReadType == RecordType::CCS)
+ return true;
+
+ // sort on qStart, then finally qEnd
+ const auto lhsQStart = l.QueryStart();
+ const auto rhsQStart = r.QueryStart();
+ return lhsQStart < rhsQStart;
+ }
+};
+
+class QNameCollator : public ICollator
+{
+public:
+ QNameCollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader>>&& readers)
+ : ICollator(std::move(readers))
+ { UpdateSort(); }
+
+ void UpdateSort(void)
+ { std::sort(mergeItems_.begin(), mergeItems_.end(), QNameSorter{ }); }
+};
+
+// AlignedCollator
+
+class AlignedCollator : public ICollator
+{
+public:
+ AlignedCollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader>>&& readers)
+ : ICollator(std::move(readers))
+ { UpdateSort(); }
+
+ void UpdateSort(void)
+ { std::sort(mergeItems_.begin(), mergeItems_.end(), PacBio::BAM::PositionSorter{ }); }
+};
+
+// BamFileMerger
+
+inline
+void BamFileMerger::Merge(const DataSet& dataset,
+ const std::string& outputFilename,
+ const ProgramInfo& mergeProgram,
+ bool createPbi)
+{
+ const PbiFilter filter = PbiFilter::FromDataSet(dataset);
+
+ std::vector<std::string> inputFilenames_;
+ const auto& bamFiles = dataset.BamFiles();
+ inputFilenames_.reserve(bamFiles.size());
+ for (const auto& file : bamFiles)
+ inputFilenames_.push_back(file.Filename());
+
+ if (inputFilenames_.empty())
+ throw std::runtime_error("no input filenames provided to BamFileMerger");
+
+ if (outputFilename.empty())
+ throw std::runtime_error("no output filename provide to BamFileMerger");
+
+
+ // attempt open input files
+ std::vector<std::unique_ptr<BamReader> > readers;
+ readers.reserve(inputFilenames_.size());
+ for (const auto& fn : inputFilenames_) {
+ if (filter.IsEmpty())
+ readers.emplace_back(new BamReader(fn));
+ else
+ readers.emplace_back(new PbiIndexedBamReader(filter, fn));
+ }
+
+ // read headers
+ std::vector<BamHeader> headers;
+ headers.reserve(readers.size());
+ for (auto&& reader : readers)
+ headers.push_back(reader->Header());
+
+ assert(!readers.empty());
+ assert(!headers.empty());
+
+ // merge headers
+ BamHeader mergedHeader = headers.front();
+ const std::string& usingSortOrder = mergedHeader.SortOrder();
+ const bool isCoordinateSorted = (usingSortOrder == "coordinate");
+ for (size_t i = 1; i < headers.size(); ++i) {
+ const BamHeader& header = headers.at(i);
+ if (header.SortOrder() != usingSortOrder)
+ throw std::runtime_error("BAM file sort orders do not match, aborting merge");
+ mergedHeader += headers.at(i);
+ }
+ if (mergeProgram.IsValid())
+ mergedHeader.AddProgram(mergeProgram);
+
+ // setup collator, based on sort order
+ std::unique_ptr<ICollator> collator;
+ if (isCoordinateSorted)
+ collator.reset(new AlignedCollator(std::move(readers)));
+ else
+ collator.reset(new QNameCollator(std::move(readers)));
+ // NOTE: readers *moved*, so no longer accessible here
+
+ // do merge, creating PBI on-the-fly
+ if (createPbi && (outputFilename != "-")) {
+
+ // TODO: this implementation recalculates all PBI values, when we really
+ // only need to collate entries and update offsets
+
+ BamWriter writer(outputFilename, mergedHeader);
+ PbiBuilder builder{ (outputFilename + ".pbi"),
+ mergedHeader.NumSequences(),
+ isCoordinateSorted
+ };
+ BamRecord record;
+ int64_t vOffset = 0;
+ while (collator->GetNext(record)) {
+ writer.Write(record, &vOffset);
+ builder.AddRecord(record, vOffset);
+ }
+ }
+
+ // otherwise just merge BAM
+ else {
+ BamWriter writer(outputFilename, mergedHeader);
+ BamRecord record;
+ while (collator->GetNext(record))
+ writer.Write(record);
+ }
+}
+
+} // namespace common
+} // namespace BAM
+} // namespace PacBio
diff --git a/tools/pbindex/src/OptionParser.cpp b/tools/common/OptionParser.cpp
similarity index 100%
rename from tools/pbindex/src/OptionParser.cpp
rename to tools/common/OptionParser.cpp
diff --git a/tools/pbindex/src/OptionParser.h b/tools/common/OptionParser.h
similarity index 100%
rename from tools/pbindex/src/OptionParser.h
rename to tools/common/OptionParser.h
diff --git a/tools/pbindex/CMakeLists.txt b/tools/pbindex/CMakeLists.txt
index 89572d0..6ebe5c2 100644
--- a/tools/pbindex/CMakeLists.txt
+++ b/tools/pbindex/CMakeLists.txt
@@ -1,27 +1,22 @@
+set(PbindexSrcDir ${PacBioBAM_ToolsDir}/pbindex/src)
+
# create version header
set(PbIndex_VERSION ${PacBioBAM_VERSION})
configure_file(
- ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndexVersion.h.in
- ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndexVersion.h
+ ${PbindexSrcDir}/PbIndexVersion.h.in PbIndexVersion.h @ONLY
)
-#pbindex sources
+# list source files
set(PBINDEX_SOURCES
- ${PacBioBAM_RootDir}/tools/pbindex/src/main.cpp
- ${PacBioBAM_RootDir}/tools/pbindex/src/OptionParser.h
- ${PacBioBAM_RootDir}/tools/pbindex/src/OptionParser.cpp
- ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndex.h
- ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndex.cpp
- ${PacBioBAM_RootDir}/tools/pbindex/src/PbIndexVersion.h
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${PbindexSrcDir}/main.cpp
+ ${PbindexSrcDir}/PbIndex.cpp
)
# build pbindex executable
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
-include_directories(${PacBioBAM_RootDir}/tools/pbindex/src ${PacBioBAM_INCLUDE_DIRS})
-add_executable(pbindex ${PBINDEX_SOURCES})
-set_target_properties(pbindex PROPERTIES
- RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET pbindex
+ SOURCES ${PBINDEX_SOURCES}
)
-target_link_libraries(pbindex pbbam)
-
diff --git a/tools/pbindex/src/PbIndex.cpp b/tools/pbindex/src/PbIndex.cpp
index ca8a234..e25fa0e 100644
--- a/tools/pbindex/src/PbIndex.cpp
+++ b/tools/pbindex/src/PbIndex.cpp
@@ -53,7 +53,7 @@ int PbIndex::Create(const Settings& settings)
try
{
PacBio::BAM::BamFile bamFile(settings.inputBamFilename_);
- bamFile.EnsurePacBioIndexExists();
+ bamFile.CreatePacBioIndex();
return EXIT_SUCCESS;
}
catch (std::runtime_error& e)
diff --git a/tools/pbindex/src/main.cpp b/tools/pbindex/src/main.cpp
index 59065fa..0f1bfb9 100644
--- a/tools/pbindex/src/main.cpp
+++ b/tools/pbindex/src/main.cpp
@@ -35,7 +35,7 @@
// Author: Derek Barnett
-#include "OptionParser.h"
+#include "../common/OptionParser.h"
#include "PbIndex.h"
#include "PbIndexVersion.h"
#include <cassert>
diff --git a/tools/pbindexdump/CMakeLists.txt b/tools/pbindexdump/CMakeLists.txt
new file mode 100644
index 0000000..26178e3
--- /dev/null
+++ b/tools/pbindexdump/CMakeLists.txt
@@ -0,0 +1,35 @@
+
+set(PbindexdumpSrcDir ${PacBioBAM_ToolsDir}/pbindexdump/src)
+
+# create version header
+set(PbIndexDump_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${PbindexdumpSrcDir}/PbIndexDumpVersion.h.in PbIndexDumpVersion.h @ONLY
+)
+
+# list source files
+set(PBINDEXDUMP_SOURCES
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${PbindexdumpSrcDir}/CppFormatter.cpp
+ ${PbindexdumpSrcDir}/JsonFormatter.cpp
+ ${PbindexdumpSrcDir}/PbIndexDump.cpp
+ ${PbindexdumpSrcDir}/main.cpp
+)
+
+# build pbindexdump executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET pbindexdump
+ SOURCES ${PBINDEXDUMP_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+ add_test(
+ NAME pbindexdump_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${PacBioBAM_CramTestsDir}/pbindexdump_json.t
+ ${PacBioBAM_CramTestsDir}/pbindexdump_cpp.t
+ )
+endif()
diff --git a/tools/pbindexdump/src/CppFormatter.cpp b/tools/pbindexdump/src/CppFormatter.cpp
new file mode 100644
index 0000000..696421e
--- /dev/null
+++ b/tools/pbindexdump/src/CppFormatter.cpp
@@ -0,0 +1,177 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "CppFormatter.h"
+#include <pbbam/PbiRawData.h>
+
+#include <iostream>
+#include <sstream>
+
+using namespace pbindexdump;
+using namespace std;
+
+namespace pbindexdump {
+
+static
+string printCppReferenceData(const PacBio::BAM::PbiRawReferenceData& referenceData)
+{
+ auto result = string{ "" };
+ for (const PacBio::BAM::PbiReferenceEntry& entry : referenceData.entries_) {
+ if (!result.empty())
+ result.append(",\n");
+ result.append( string{ " PbiReferenceEntry{" }
+ + to_string(entry.tId_) + "," + to_string(entry.beginRow_) + "," + to_string(entry.endRow_)
+ + string{ "}" } );
+ }
+ if (!result.empty())
+ result.append("\n");
+ return result;
+}
+
+template<typename T>
+string printVectorElements(const std::vector<T>& c)
+{
+ stringstream s;
+ for (const auto& e : c)
+ s << e << ",";
+ auto result = s.str();
+ if (!result.empty())
+ result.pop_back(); // remove final comma
+ return result;
+}
+
+template<>
+string printVectorElements(const std::vector<uint8_t>& c)
+{
+ stringstream s;
+ for (const auto& e : c)
+ s << static_cast<uint16_t>(e) << ","; // cast to larger uint, force print as number not character
+ auto result = s.str();
+ if (!result.empty())
+ result.pop_back(); // remove final comma
+ return result;
+}
+
+template<>
+string printVectorElements(const std::vector<int8_t>& c)
+{
+ stringstream s;
+ for (const auto& e : c)
+ s << static_cast<int16_t>(e) << ","; // cast to larger int, force print as number not character
+ auto result = s.str();
+ if (!result.empty())
+ result.pop_back(); // remove final comma
+ return result;
+}
+
+} // namespace pbindexdump
+
+CppFormatter::CppFormatter(const Settings& settings)
+ : IFormatter(settings)
+{ }
+
+void CppFormatter::Run(void)
+{
+ using namespace PacBio::BAM;
+
+ const PbiRawData rawData{ settings_.inputPbiFilename_ };
+ const PbiRawBarcodeData& barcodeData = rawData.BarcodeData();
+ const PbiRawBasicData& basicData = rawData.BasicData();
+ const PbiRawMappedData& mappedData = rawData.MappedData();
+ const PbiRawReferenceData& referenceData = rawData.ReferenceData();
+
+ auto version = string{ };
+ switch (rawData.Version()) {
+ case PbiFile::Version_3_0_0 : version = "PbiFile::Version_3_0_0"; break;
+ case PbiFile::Version_3_0_1 : version = "PbiFile::Version_3_0_1"; break;
+ default:
+ throw runtime_error("unsupported PBI version encountered");
+ }
+
+ auto fileSections = string{ "PbiFile::BASIC" };
+ if (rawData.HasBarcodeData()) fileSections += string{ " | PbiFile::BARCODE" };
+ if (rawData.HasMappedData()) fileSections += string{ " | PbiFile::MAPPED" };
+ if (rawData.HasReferenceData()) fileSections += string{ " | PbiFile::REFERENCE" };
+
+ stringstream s;
+ s << "PbiRawData rawData;" << endl
+ << "rawData.Version(" << version << ");" << endl
+ << "rawData.FileSections(" << fileSections << ");" << endl
+ << "rawData.NumReads(" << rawData.NumReads() << ");" << endl
+ << endl
+ << "PbiRawBasicData& basicData = rawData.BasicData();" << endl
+ << "basicData.rgId_ = {" << printVectorElements(basicData.rgId_) << "};" << endl
+ << "basicData.qStart_ = {" << printVectorElements(basicData.qStart_) << "};" << endl
+ << "basicData.qEnd_ = {" << printVectorElements(basicData.qEnd_) << "};" << endl
+ << "basicData.holeNumber_ = {" << printVectorElements(basicData.holeNumber_) << "};" << endl
+ << "basicData.readQual_ = {" << printVectorElements(basicData.readQual_) << "};" << endl
+ << "basicData.ctxtFlag_ = {" << printVectorElements(basicData.ctxtFlag_) << "};" << endl
+ << "basicData.fileOffset_ = {" << printVectorElements(basicData.fileOffset_) << "};" << endl
+ << endl;
+
+ if (rawData.HasBarcodeData()) {
+ s << "PbiRawBarcodeData& barcodeData = rawData.BarcodeData();" << endl
+ << "barcodeData.bcForward_ = {" << printVectorElements(barcodeData.bcForward_) << "};" << endl
+ << "barcodeData.bcReverse_ = {" << printVectorElements(barcodeData.bcReverse_) << "};" << endl
+ << "barcodeData.bcQual_ = {" << printVectorElements(barcodeData.bcQual_) << "};" << endl
+ << endl;
+ }
+
+ if (rawData.HasMappedData()) {
+ s << "PbiRawMappedData& mappedData = rawData.MappedData();" << endl
+ << "mappedData.tId_ = {" << printVectorElements(mappedData.tId_) << "};" << endl
+ << "mappedData.tStart_ = {" << printVectorElements(mappedData.tStart_) << "};" << endl
+ << "mappedData.tEnd_ = {" << printVectorElements(mappedData.tEnd_) << "};" << endl
+ << "mappedData.aStart_ = {" << printVectorElements(mappedData.aStart_) << "};" << endl
+ << "mappedData.aEnd_ = {" << printVectorElements(mappedData.aEnd_) << "};" << endl
+ << "mappedData.revStrand_ = {" << printVectorElements(mappedData.revStrand_) << "};" << endl
+ << "mappedData.nM_ = {" << printVectorElements(mappedData.nM_) << "};" << endl
+ << "mappedData.nMM_ = {" << printVectorElements(mappedData.nMM_) << "};" << endl
+ << "mappedData.mapQV_ = {" << printVectorElements(mappedData.mapQV_) << "};" << endl
+ << endl;
+ }
+
+ if (rawData.HasReferenceData()) {
+ s << "PbiRawReferenceData& referenceData = rawData.ReferenceData();" << endl
+ << "referenceData.entries_ = { " << endl
+ << printCppReferenceData(referenceData)
+ << "};" << endl
+ << endl;
+ }
+
+ cout << s.str() << endl;
+}
diff --git a/include/pbbam/Strand.h b/tools/pbindexdump/src/CppFormatter.h
similarity index 81%
copy from include/pbbam/Strand.h
copy to tools/pbindexdump/src/CppFormatter.h
index aa8535f..c2cda26 100644
--- a/include/pbbam/Strand.h
+++ b/tools/pbindexdump/src/CppFormatter.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,21 +35,20 @@
// Author: Derek Barnett
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef CPPFORMATTER_H
+#define CPPFORMATTER_H
-#include "pbbam/Config.h"
+#include "IFormatter.h"
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
-enum class Strand
+class CppFormatter : public IFormatter
{
- FORWARD
- , REVERSE
+public:
+ CppFormatter(const Settings& settings);
+ void Run(void);
};
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
-#endif // STRAND_H
+#endif // CPPFORMATTER_H
diff --git a/include/pbbam/TagCollection.h b/tools/pbindexdump/src/IFormatter.h
similarity index 77%
copy from include/pbbam/TagCollection.h
copy to tools/pbindexdump/src/IFormatter.h
index 42b4018..eb7e79b 100644
--- a/include/pbbam/TagCollection.h
+++ b/tools/pbindexdump/src/IFormatter.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,24 +35,30 @@
// Author: Derek Barnett
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#ifndef IFORMATTER_H
+#define IFORMATTER_H
-#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
-#include <string>
+#include "Settings.h"
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+class IFormatter
{
public:
- bool Contains(const std::string& name) const;
+ ~IFormatter(void) { }
+
+public:
+ virtual void Run(void) =0;
+
+protected:
+ const Settings& settings_;
+
+protected:
+ IFormatter(const Settings& settings)
+ : settings_(settings)
+ { }
};
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
-#endif // TAGCOLLECTION_H
+#endif // IFORMATTER_H
diff --git a/tools/pbindexdump/src/JsonFormatter.cpp b/tools/pbindexdump/src/JsonFormatter.cpp
new file mode 100644
index 0000000..368f659
--- /dev/null
+++ b/tools/pbindexdump/src/JsonFormatter.cpp
@@ -0,0 +1,195 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "JsonFormatter.h"
+#include "json.hpp"
+#include <pbbam/PbiFile.h>
+#include <iostream>
+#include <sstream>
+using namespace pbindexdump;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace pbindexdump {
+
+
+} // namespace pbindexdump
+
+JsonFormatter::JsonFormatter(const Settings& settings)
+ : IFormatter(settings)
+ , index_(settings.inputPbiFilename_)
+{ }
+
+void JsonFormatter::FormatMetadata(void)
+{
+ auto version = string{ };
+ switch (index_.Version()) {
+ case PbiFile::Version_3_0_0 : version = "3.0.0"; break;
+ case PbiFile::Version_3_0_1 : version = "3.0.1"; break;
+ default:
+ throw runtime_error("unsupported PBI version encountered");
+ }
+
+ nlohmann::json fileSections;
+ fileSections.push_back("BasicData");
+ if (index_.HasBarcodeData()) fileSections.push_back("BarcodeData");
+ if (index_.HasMappedData()) fileSections.push_back("MappedData");
+ if (index_.HasReferenceData()) fileSections.push_back("ReferenceData");
+
+ json_["version"] = version;
+ json_["fileSections"] = fileSections;
+ json_["numReads"] = index_.NumReads();
+}
+
+void JsonFormatter::FormatRaw(void)
+{
+ const PbiRawBasicData& basicData = index_.BasicData();
+ json_["basicData"]["rgId"] = basicData.rgId_;
+ json_["basicData"]["qStart"] = basicData.qStart_;
+ json_["basicData"]["qEnd"] = basicData.qEnd_;
+ json_["basicData"]["holeNumber"] = basicData.holeNumber_;
+ json_["basicData"]["readQual"] = basicData.readQual_;
+ json_["basicData"]["ctxtFlag"] = basicData.ctxtFlag_;
+ json_["basicData"]["fileOffset"] = basicData.fileOffset_;
+
+ if (index_.HasBarcodeData()) {
+ const PbiRawBarcodeData& barcodeData = index_.BarcodeData();
+ json_["barcodeData"]["bcForward"] = barcodeData.bcForward_;
+ json_["barcodeData"]["bcReverse"] = barcodeData.bcReverse_;
+ json_["barcodeData"]["bcQuality"] = barcodeData.bcQual_;
+ }
+
+ if (index_.HasMappedData()) {
+ const PbiRawMappedData& mappedData = index_.MappedData();
+
+ // casts to force -1 if unmapped
+ json_["mappedData"]["tId"] = mappedData.tId_;
+ json_["mappedData"]["tStart"] = mappedData.tStart_;
+ json_["mappedData"]["tEnd"] = mappedData.tEnd_;
+
+ json_["mappedData"]["aStart"] = mappedData.aStart_;
+ json_["mappedData"]["aEnd"] = mappedData.aEnd_;
+ json_["mappedData"]["revStrand"] = mappedData.revStrand_;
+ json_["mappedData"]["nM"] = mappedData.nM_;
+ json_["mappedData"]["nMM"] = mappedData.nMM_;
+ json_["mappedData"]["mapQV"] = mappedData.mapQV_;
+ }
+}
+
+void JsonFormatter::FormatRecords(void)
+{
+ nlohmann::json reads;
+ const uint32_t numReads = index_.NumReads();
+ const bool hasBarcodeData = index_.HasBarcodeData();
+ const bool hasMappedData = index_.HasMappedData();
+ for (uint32_t i = 0; i < numReads; ++i) {
+
+ nlohmann::json read;
+
+ // common data
+ const PbiRawBasicData& basicData = index_.BasicData();
+ read["rgId"] = basicData.rgId_[i];
+ read["qStart"] = basicData.qStart_[i];
+ read["qEnd"] = basicData.qEnd_[i];
+ read["holeNumber"] = basicData.holeNumber_[i];
+ read["readQuality"] = basicData.readQual_[i];
+ read["contextFlag"] = basicData.ctxtFlag_[i];
+ read["fileOffset"] = basicData.fileOffset_[i];
+
+ // barcode data, if present
+ if (hasBarcodeData) {
+ const PbiRawBarcodeData& barcodeData = index_.BarcodeData();
+ read["bcForward"] = barcodeData.bcForward_[i];
+ read["bcReverse"] = barcodeData.bcReverse_[i];
+ read["bcQuality"] = barcodeData.bcQual_[i];
+ }
+
+ // mapping data, if present
+ if (hasMappedData) {
+ const PbiRawMappedData& mappedData = index_.MappedData();
+
+ // casts to force -1 if unmapped
+ read["tId"] = static_cast<int32_t>(mappedData.tId_[i]);
+ read["tStart"] = static_cast<int32_t>(mappedData.tStart_[i]);
+ read["tEnd"] = static_cast<int32_t>(mappedData.tEnd_[i]);
+
+ read["aStart"] = mappedData.aStart_[i];
+ read["aEnd"] = mappedData.aEnd_[i];
+ read["nM"] = mappedData.nM_[i];
+ read["nMM"] = mappedData.nMM_[i];
+ read["mapQuality"] = mappedData.mapQV_[i];
+ read["reverseStrand"] = mappedData.revStrand_[i];
+ }
+
+ reads.push_back(std::move(read));
+ }
+ json_["reads"] = reads;
+}
+
+void JsonFormatter::FormatReferences(void)
+{
+ if (index_.HasReferenceData()) {
+ const PbiRawReferenceData& referenceData = index_.ReferenceData();
+ nlohmann::json references;
+ for (const PbiReferenceEntry& entry : referenceData.entries_) {
+ nlohmann::json element;
+ element["tId"] = static_cast<int32_t>(entry.tId_);
+ element["beginRow"] = static_cast<int32_t>(entry.beginRow_);
+ element["endRow"] = static_cast<int32_t>(entry.endRow_);
+ references.push_back(std::move(element));
+ }
+ json_["references"] = references;
+ }
+}
+
+void JsonFormatter::Print(void)
+{
+ cout << json_.dump(settings_.jsonIndentLevel_) << endl;
+}
+
+void JsonFormatter::Run(void)
+{
+ FormatMetadata();
+ FormatReferences();
+
+ if (settings_.jsonRaw_)
+ FormatRaw();
+ else
+ FormatRecords();
+
+ Print();
+}
diff --git a/include/pbbam/internal/FilterEngine.h b/tools/pbindexdump/src/JsonFormatter.h
similarity index 72%
rename from include/pbbam/internal/FilterEngine.h
rename to tools/pbindexdump/src/JsonFormatter.h
index bc4b88b..9bd6911 100644
--- a/include/pbbam/internal/FilterEngine.h
+++ b/tools/pbindexdump/src/JsonFormatter.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,37 +35,35 @@
// Author: Derek Barnett
-#ifndef FILTERENGINE_H
-#define FILTERENGINE_H
+#ifndef JSONFORMATTER_H
+#define JSONFORMATTER_H
-#include "pbbam/BamRecord.h"
-#include <memory>
-#include <vector>
+#include "IFormatter.h"
+#include "json.hpp"
+#include <pbbam/PbiRawData.h>
-namespace PacBio {
-namespace BAM {
-namespace internal {
-
-
-
-class FilterEngine {
+namespace pbindexdump {
+class JsonFormatter : public IFormatter
+{
public:
- FilterEngine(void);
+ JsonFormatter(const Settings& settings);
+ void Run(void);
-public:
- // returns true if record passes filter
- bool Accepts(const BamRecord& r) const;
+private:
+ void FormatMetadata(void);
+ void FormatReferences(void);
- // removes records that do not pass filter, returns true if !empty()
- bool Accepts(std::vector<BamRecord>& r) const;
+ void FormatRaw(void);
+ void FormatRecords(void);
-private:
+ void Print(void);
+private:
+ PacBio::BAM::PbiRawData index_;
+ nlohmann::json json_;
};
-} // namespace internal
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
-#endif // FILTERENGINE_H
+#endif // JSONFORMATTER_H
diff --git a/tools/pbindex/src/PbIndex.cpp b/tools/pbindexdump/src/PbIndexDump.cpp
similarity index 71%
copy from tools/pbindex/src/PbIndex.cpp
copy to tools/pbindexdump/src/PbIndexDump.cpp
index ca8a234..2dc925b 100644
--- a/tools/pbindex/src/PbIndex.cpp
+++ b/tools/pbindexdump/src/PbIndexDump.cpp
@@ -35,44 +35,23 @@
// Author: Derek Barnett
-#include "PbIndex.h"
-#include <pbbam/BamFile.h>
-#include <pbbam/PbiRawData.h>
-#include <iostream>
+#include "PbIndexDump.h"
+#include "CppFormatter.h"
+#include "JsonFormatter.h"
#include <cassert>
-#include <cstdlib>
-using namespace pbindex;
+using namespace pbindexdump;
using namespace std;
-Settings::Settings(void)
- : printPbiContents_(false)
-{ }
-
-int PbIndex::Create(const Settings& settings)
+void PbIndexDump::Run(const Settings& settings)
{
- try
- {
- PacBio::BAM::BamFile bamFile(settings.inputBamFilename_);
- bamFile.EnsurePacBioIndexExists();
- return EXIT_SUCCESS;
- }
- catch (std::runtime_error& e)
- {
- cerr << "pbindex ERROR: " << e.what() << endl;
- return EXIT_FAILURE;
+ std::unique_ptr<IFormatter> formatter(nullptr);
+ if (settings.format_ == "json") formatter.reset(new JsonFormatter(settings));
+ else if (settings.format_ == "cpp") formatter.reset(new CppFormatter(settings));
+ else {
+ string msg = { "unsupported output format requested: " };
+ msg += settings.format_;
+ throw runtime_error(msg);
}
+ assert(formatter);
+ formatter->Run();
}
-
-//int PbIndex::Print(const Settings& settings)
-//{
-
-//}
-
-int PbIndex::Run(const Settings& settings)
-{
-// if (settings.printPbiContents_)
-// return Print(settings);
-// else
- return Create(settings);
-}
-
diff --git a/include/pbbam/Strand.h b/tools/pbindexdump/src/PbIndexDump.h
similarity index 83%
copy from include/pbbam/Strand.h
copy to tools/pbindexdump/src/PbIndexDump.h
index aa8535f..e5ec2dc 100644
--- a/include/pbbam/Strand.h
+++ b/tools/pbindexdump/src/PbIndexDump.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,21 +35,19 @@
// Author: Derek Barnett
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef PBINDEXDUMP_H
+#define PBINDEXDUMP_H
-#include "pbbam/Config.h"
+namespace pbindexdump {
-namespace PacBio {
-namespace BAM {
+class Settings;
-enum class Strand
+class PbIndexDump
{
- FORWARD
- , REVERSE
+public:
+ static void Run(const Settings& settings);
};
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindex
-#endif // STRAND_H
+#endif // PBINDEXDUMP_H
diff --git a/include/pbbam/Strand.h b/tools/pbindexdump/src/PbIndexDumpVersion.h.in
similarity index 85%
copy from include/pbbam/Strand.h
copy to tools/pbindexdump/src/PbIndexDumpVersion.h.in
index aa8535f..ec49612 100644
--- a/include/pbbam/Strand.h
+++ b/tools/pbindexdump/src/PbIndexDumpVersion.h.in
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -35,21 +35,15 @@
// Author: Derek Barnett
-#ifndef STRAND_H
-#define STRAND_H
+#ifndef PBINDEXDUMPVERSION_H
+#define PBINDEXDUMPVERSION_H
-#include "pbbam/Config.h"
+#include <string>
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
-enum class Strand
-{
- FORWARD
- , REVERSE
-};
+const std::string Version = std::string("@PbIndexDump_VERSION@");
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
-#endif // STRAND_H
+#endif // PBINDEXDUMPVERSION_H
diff --git a/include/pbbam/TagCollection.h b/tools/pbindexdump/src/Settings.h
similarity index 76%
copy from include/pbbam/TagCollection.h
copy to tools/pbindexdump/src/Settings.h
index 42b4018..a520293 100644
--- a/include/pbbam/TagCollection.h
+++ b/tools/pbindexdump/src/Settings.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -23,7 +23,7 @@
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
@@ -35,24 +35,31 @@
// Author: Derek Barnett
-#ifndef TAGCOLLECTION_H
-#define TAGCOLLECTION_H
+#ifndef SETTINGS_H
+#define SETTINGS_H
-#include "pbbam/Config.h"
-#include "pbbam/Tag.h"
-#include <map>
#include <string>
+#include <vector>
-namespace PacBio {
-namespace BAM {
+namespace pbindexdump {
-class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+class Settings
{
public:
- bool Contains(const std::string& name) const;
+ Settings(void)
+ : format_("json")
+ , jsonIndentLevel_(4)
+ , jsonRaw_(false)
+ { }
+
+public:
+ std::string inputPbiFilename_;
+ std::string format_;
+ int jsonIndentLevel_;
+ bool jsonRaw_;
+ std::vector<std::string> errors_;
};
-} // namespace BAM
-} // namespace PacBio
+} // namespace pbindexdump
-#endif // TAGCOLLECTION_H
+#endif // SETTINGS_H
diff --git a/tools/pbindexdump/src/json.hpp b/tools/pbindexdump/src/json.hpp
new file mode 100644
index 0000000..7e174d7
--- /dev/null
+++ b/tools/pbindexdump/src/json.hpp
@@ -0,0 +1,7295 @@
+/*!
+ at mainpage
+
+These pages contain the API documentation of JSON for Modern C++, a C++11
+header-only JSON class.
+
+Class @ref nlohmann::basic_json is a good entry point for the documentation.
+
+ at copyright The code is licensed under the [MIT
+ License](http://opensource.org/licenses/MIT):
+ <br>
+ Copyright © 2013-2015 Niels Lohmann.
+ <br>
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+ <br>
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+ <br>
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+
+ at author [Niels Lohmann](http://nlohmann.me)
+ at see https://github.com/nlohmann/json to download the source code
+*/
+
+#ifndef NLOHMANN_JSON_HPP
+#define NLOHMANN_JSON_HPP
+
+#include <algorithm>
+#include <array>
+#include <ciso646>
+#include <cmath>
+#include <cstdio>
+#include <functional>
+#include <initializer_list>
+#include <iomanip>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// enable ssize_t on MinGW
+#ifdef __GNUC__
+ #ifdef __MINGW32__
+ #include <sys/types.h>
+ #endif
+#endif
+
+// enable ssize_t for MSVC
+#ifdef _MSC_VER
+ #include <basetsd.h>
+ using ssize_t = SSIZE_T;
+#endif
+
+/*!
+ at brief namespace for Niels Lohmann
+ at see https://github.com/nlohmann
+*/
+namespace nlohmann
+{
+
+
+/*!
+ at brief unnamed namespace with internal helper functions
+*/
+namespace
+{
+/*!
+ at brief Helper to determine whether there's a key_type for T.
+ at sa http://stackoverflow.com/a/7728728/266378
+*/
+template<typename T>
+struct has_mapped_type
+{
+ private:
+ template<typename C> static char test(typename C::mapped_type*);
+ template<typename C> static int test(...);
+ public:
+ enum { value = sizeof(test<T>(0)) == sizeof(char) };
+};
+
+/// "equality" comparison for floating point numbers
+template<typename T>
+static bool approx(const T a, const T b)
+{
+ return not (a > b or a < b);
+}
+}
+
+/*!
+ at brief a class to store JSON values
+
+ at tparam ObjectType type for JSON objects (@c std::map by default; will be used
+in @ref object_t)
+ at tparam ArrayType type for JSON arrays (@c std::vector by default; will be used
+in @ref array_t)
+ at tparam StringType type for JSON strings and object keys (@c std::string by
+default; will be used in @ref string_t)
+ at tparam BooleanType type for JSON booleans (@c `bool` by default; will be used
+in @ref boolean_t)
+ at tparam NumberIntegerType type for JSON integer numbers (@c `int64_t` by
+default; will be used in @ref number_integer_t)
+ at tparam NumberFloatType type for JSON floating-point numbers (@c `double` by
+default; will be used in @ref number_float_t)
+ at tparam AllocatorType type of the allocator to use (@c `std::allocator` by
+default)
+
+ at requirement The class satisfies the following concept requirements:
+- Basic
+ - [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible):
+ JSON values can be default constructed. The result will be a JSON null value.
+ - [MoveConstructible](http://en.cppreference.com/w/cpp/concept/MoveConstructible):
+ A JSON value can be constructed from an rvalue argument.
+ - [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible):
+ A JSON value can be copy-constrcuted from an lvalue expression.
+ - [MoveAssignable](http://en.cppreference.com/w/cpp/concept/MoveAssignable):
+ A JSON value van be assigned from an rvalue argument.
+ - [CopyAssignable](http://en.cppreference.com/w/cpp/concept/CopyAssignable):
+ A JSON value can be copy-assigned from an lvalue expression.
+ - [Destructible](http://en.cppreference.com/w/cpp/concept/Destructible):
+ JSON values can be destructed.
+- Layout
+ - [StandardLayoutType](http://en.cppreference.com/w/cpp/concept/StandardLayoutType):
+ JSON values have
+ [standard layout](http://en.cppreference.com/w/cpp/language/data_members#Standard_layout):
+ All non-static data members are private and standard layout types, the class
+ has no virtual functions or (virtual) base classes.
+- Library-wide
+ - [EqualityComparable](http://en.cppreference.com/w/cpp/concept/EqualityComparable):
+ JSON values can be compared with `==`, see @ref
+ operator==(const_reference,const_reference).
+ - [LessThanComparable](http://en.cppreference.com/w/cpp/concept/LessThanComparable):
+ JSON values can be compared with `<`, see @ref
+ operator<(const_reference,const_reference).
+ - [Swappable](http://en.cppreference.com/w/cpp/concept/Swappable):
+ Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of
+ other compatible types, using unqualified function call @ref swap().
+ - [NullablePointer](http://en.cppreference.com/w/cpp/concept/NullablePointer):
+ JSON values can be compared against `std::nullptr_t` objects which are used
+ to model the `null` value.
+- Container
+ - [Container](http://en.cppreference.com/w/cpp/concept/Container):
+ JSON values can be used like STL containers and provide iterator access.
+ - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer);
+ JSON values can be used like STL containers and provide reverse iterator
+ access.
+
+ at internal
+ at note ObjectType trick from http://stackoverflow.com/a/9860911
+ at endinternal
+
+ at see RFC 7159 <http://rfc7159.net/rfc7159>
+*/
+template <
+ template<typename U, typename V, typename... Args> class ObjectType = std::map,
+ template<typename U, typename... Args> class ArrayType = std::vector,
+ class StringType = std::string,
+ class BooleanType = bool,
+ class NumberIntegerType = int64_t,
+ class NumberFloatType = double,
+ template<typename U> class AllocatorType = std::allocator
+ >
+class basic_json
+{
+ private:
+ /// workaround type for MSVC
+ using basic_json_t = basic_json<ObjectType,
+ ArrayType,
+ StringType,
+ BooleanType,
+ NumberIntegerType,
+ NumberFloatType,
+ AllocatorType>;
+
+ public:
+
+ /////////////////////
+ // container types //
+ /////////////////////
+
+ /// @name container types
+ /// @{
+
+ /// the type of elements in a basic_json container
+ using value_type = basic_json;
+
+ /// the type of an element reference
+ using reference = value_type&;
+
+ /// the type of an element const reference
+ using const_reference = const value_type&;
+
+ /// a type to represent differences between iterators
+ using difference_type = std::ptrdiff_t;
+
+ /// a type to represent container sizes
+ using size_type = std::size_t;
+
+ /// the allocator type
+ using allocator_type = AllocatorType<basic_json>;
+
+ /// the type of an element pointer
+ using pointer = typename std::allocator_traits<allocator_type>::pointer;
+ /// the type of an element const pointer
+ using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
+
+ // forward declaration
+ template<typename Base> class json_reverse_iterator;
+
+ /// an iterator for a basic_json container
+ class iterator;
+ /// a const iterator for a basic_json container
+ class const_iterator;
+ /// a reverse iterator for a basic_json container
+ using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>;
+ /// a const reverse iterator for a basic_json container
+ using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>;
+
+ /// @}
+
+
+ /*!
+ @brief returns the allocator associated with the container
+ */
+ static allocator_type get_allocator()
+ {
+ return allocator_type();
+ }
+
+
+ ///////////////////////////
+ // JSON value data types //
+ ///////////////////////////
+
+ /// @name JSON value data types
+ /// @{
+
+ /*!
+ @brief a type for an object
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes JSON objects as follows:
+ > An object is an unordered collection of zero or more name/value pairs,
+ > where a name is a string and a value is a string, number, boolean, null,
+ > object, or array.
+
+ To store objects in C++, a type is defined by the template parameters @a
+ ObjectType which chooses the container (e.g., `std::map` or
+ `std::unordered_map`), @a StringType which chooses the type of the keys or
+ names, and @a AllocatorType which chooses the allocator to use.
+
+ #### Default type
+
+ With the default values for @a ObjectType (`std::map`), @a StringType
+ (`std::string`), and @a AllocatorType (`std::allocator`), the default value
+ for @a object_t is:
+
+ @code {.cpp}
+ std::map<
+ std::string, // key_type
+ basic_json, // value_type
+ std::less<std::string>, // key_compare
+ std::allocator<std::pair<const std::string, basic_json>> // allocator_type
+ >
+ @endcode
+
+ #### Behavior
+
+ The choice of @a object_t influences the behavior of the JSON class. With
+ the default type, objects have the following behavior:
+
+ - When all names are unique, objects will be interoperable in the sense
+ that all software implementations receiving that object will agree on the
+ name-value mappings.
+ - When the names within an object are not unique, later stored name/value
+ pairs overwrite previously stored name/value pairs, leaving the used
+ names unique. For instance, `{"key": 1}` and `{"key": 2, "key": 1}` will
+ be treated as equal and both stored as `{"key": 1}`.
+ - Internally, name/value pairs are stored in lexicographical order of the
+ names. Objects will also be serialized (see @ref dump) in this order. For
+ instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored and
+ serialized as `{"a": 2, "b": 1}`.
+ - When comparing objects, the order of the name/value pairs is irrelevant.
+ This makes objects interoperable in the sense that they will not be
+ affected by these differences. For instance, `{"b": 1, "a": 2}` and
+ `{"a": 2, "b": 1}` will be treated as equal.
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+ > An implementation may set limits on the maximum depth of nesting.
+
+ In this class, the object's limit of nesting is not constraint explicitly.
+ However, a maximum depth of nesting may be introduced by the compiler or
+ runtime environment. A theoretical limit can be queried by calling the @ref
+ max_size function of a JSON object.
+
+ #### Storage
+
+ Objects are stored as pointers in a `basic_json` type. That is, for any
+ access to object values, a pointer of type `object_t*` must be dereferenced.
+
+ @sa array_t
+ */
+ using object_t = ObjectType<StringType,
+ basic_json,
+ std::less<StringType>,
+ AllocatorType<std::pair<const StringType,
+ basic_json>>>;
+
+ /*!
+ @brief a type for an array
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes JSON arrays as follows:
+ > An array is an ordered sequence of zero or more values.
+
+ To store objects in C++, a type is defined by the template parameters @a
+ ArrayType which chooses the container (e.g., `std::vector` or `std::list`)
+ and @a AllocatorType which chooses the allocator to use.
+
+ #### Default type
+
+ With the default values for @a ArrayType (`std::vector`) and @a
+ AllocatorType (`std::allocator`), the default value for @a array_t is:
+
+ @code {.cpp}
+ std::vector<
+ basic_json, // value_type
+ std::allocator<basic_json> // allocator_type
+ >
+ @endcode
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+ > An implementation may set limits on the maximum depth of nesting.
+
+ In this class, the array's limit of nesting is not constraint explicitly.
+ However, a maximum depth of nesting may be introduced by the compiler or
+ runtime environment. A theoretical limit can be queried by calling the @ref
+ max_size function of a JSON array.
+
+ #### Storage
+
+ Arrays are stored as pointers in a `basic_json` type. That is, for any
+ access to array values, a pointer of type `array_t*` must be dereferenced.
+ */
+ using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
+
+ /*!
+ @brief a type for a string
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes JSON strings as follows:
+ > A string is a sequence of zero or more Unicode characters.
+
+ To store objects in C++, a type is defined by the template parameters @a
+ StringType which chooses the container (e.g., `std::string`) to use.
+
+ Unicode values are split by the JSON class into byte-sized characters
+ during deserialization.
+
+ #### Default type
+
+ With the default values for @a StringType (`std::string`), the default
+ value for @a string_t is:
+
+ @code {.cpp}
+ std::string
+ @endcode
+
+ #### String comparison
+
+ [RFC 7159](http://rfc7159.net/rfc7159) states:
+ > Software implementations are typically required to test names of object
+ > members for equality. Implementations that transform the textual
+ > representation into sequences of Unicode code units and then perform the
+ > comparison numerically, code unit by code unit, are interoperable in the
+ > sense that implementations will agree in all cases on equality or
+ > inequality of two strings. For example, implementations that compare
+ > strings with escaped characters unconverted may incorrectly find that
+ > `"a\\b"` and `"a\u005Cb"` are not equal.
+
+ This implementation is interoperable as it does compare strings code unit
+ by code unit.
+
+ #### Storage
+
+ String values are stored as pointers in a `basic_json` type. That is, for
+ any access to string values, a pointer of type `string_t*` must be
+ dereferenced.
+ */
+ using string_t = StringType;
+
+ /*!
+ @brief a type for a boolean
+
+ [RFC 7159](http://rfc7159.net/rfc7159) implicitly describes a boolean as a
+ type which differentiates the two literals `true` and `false`.
+
+ To store objects in C++, a type is defined by the template parameter @a
+ BooleanType which chooses the type to use.
+
+ #### Default type
+
+ With the default values for @a BooleanType (`bool`), the default value for
+ @a boolean_t is:
+
+ @code {.cpp}
+ bool
+ @endcode
+
+ #### Storage
+
+ Boolean values are stored directly inside a `basic_json` type.
+ */
+ using boolean_t = BooleanType;
+
+ /*!
+ @brief a type for a number (integer)
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows:
+ > The representation of numbers is similar to that used in most programming
+ > languages. A number is represented in base 10 using decimal digits. It
+ > contains an integer component that may be prefixed with an optional minus
+ > sign, which may be followed by a fraction part and/or an exponent part.
+ > Leading zeros are not allowed. (...) Numeric values that cannot be
+ > represented in the grammar below (such as Infinity and NaN) are not
+ > permitted.
+
+ This description includes both integer and floating-point numbers. However,
+ C++ allows more precise storage if it is known whether the number is an
+ integer or a floating-point number. Therefore, two different types, @ref
+ number_integer_t and @ref number_float_t are used.
+
+ To store integer numbers in C++, a type is defined by the template
+ parameter @a NumberIntegerType which chooses the type to use.
+
+ #### Default type
+
+ With the default values for @a NumberIntegerType (`int64_t`), the default
+ value for @a number_integer_t is:
+
+ @code {.cpp}
+ int64_t
+ @endcode
+
+ #### Default behavior
+
+ - The restrictions about leading zeros is not enforced in C++. Instead,
+ leading zeros in integer literals lead to an interpretation as octal
+ number. Internally, the value will be stored as decimal number. For
+ instance, the C++ integer literal `010` will be serialized to `8`. During
+ deserialization, leading zeros yield an error.
+ - Not-a-number (NaN) values will be serialized to `null`.
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+ > An implementation may set limits on the range and precision of numbers.
+
+ When the default type is used, the maximal integer number that can be
+ stored is `9223372036854775807` (INT64_MAX) and the minimal integer number
+ that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers
+ that are out of range will yield over/underflow when used in a constructor.
+ During deserialization, too large or small integer numbers will be
+ automatically be stored as @ref number_float_t.
+
+ [RFC 7159](http://rfc7159.net/rfc7159) further states:
+ > Note that when such software is used, numbers that are integers and are
+ > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense
+ > that implementations will agree exactly on their numeric values.
+
+ As this range is a subrange of the exactly supported range [INT64_MIN,
+ INT64_MAX], this class's integer type is interoperable.
+
+ #### Storage
+
+ Integer number values are stored directly inside a `basic_json` type.
+ */
+ using number_integer_t = NumberIntegerType;
+
+ /*!
+ @brief a type for a number (floating-point)
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows:
+ > The representation of numbers is similar to that used in most programming
+ > languages. A number is represented in base 10 using decimal digits. It
+ > contains an integer component that may be prefixed with an optional minus
+ > sign, which may be followed by a fraction part and/or an exponent part.
+ > Leading zeros are not allowed. (...) Numeric values that cannot be
+ > represented in the grammar below (such as Infinity and NaN) are not
+ > permitted.
+
+ This description includes both integer and floating-point numbers. However,
+ C++ allows more precise storage if it is known whether the number is an
+ integer or a floating-point number. Therefore, two different types, @ref
+ number_integer_t and @ref number_float_t are used.
+
+ To store floating-point numbers in C++, a type is defined by the template
+ parameter @a NumberFloatType which chooses the type to use.
+
+ #### Default type
+
+ With the default values for @a NumberFloatType (`double`), the default
+ value for @a number_float_t is:
+
+ @code {.cpp}
+ double
+ @endcode
+
+ #### Default behavior
+
+ - The restrictions about leading zeros is not enforced in C++. Instead,
+ leading zeros in floating-point literals will be ignored. Internally, the
+ value will be stored as decimal number. For instance, the C++
+ floating-point literal `01.2` will be serialized to `1.2`. During
+ deserialization, leading zeros yield an error.
+ - Not-a-number (NaN) values will be serialized to `null`.
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) states:
+ > This specification allows implementations to set limits on the range and
+ > precision of numbers accepted. Since software that implements IEEE
+ > 754-2008 binary64 (double precision) numbers is generally available and
+ > widely used, good interoperability can be achieved by implementations that
+ > expect no more precision or range than these provide, in the sense that
+ > implementations will approximate JSON numbers within the expected
+ > precision.
+
+ This implementation does exactly follow this approach, as it uses double
+ precision floating-point numbers. Note values smaller than
+ `-1.79769313486232e+308` and values greather than `1.79769313486232e+308`
+ will be stored as NaN internally and be serialized to `null`.
+
+ #### Storage
+
+ Floating-point number values are stored directly inside a `basic_json` type.
+ */
+ using number_float_t = NumberFloatType;
+
+ /// @}
+
+
+ ///////////////////////////
+ // JSON type enumeration //
+ ///////////////////////////
+
+ /*!
+ @brief the JSON type enumeration
+
+ This enumeration collects the different JSON types. It is internally used
+ to distinguish the stored values, and the functions is_null, is_object,
+ is_array, is_string, is_boolean, is_number, and is_discarded rely on it.
+ */
+ enum class value_t : uint8_t
+ {
+ null, ///< null value
+ object, ///< object (unordered set of name/value pairs)
+ array, ///< array (ordered collection of values)
+ string, ///< string value
+ boolean, ///< boolean value
+ number_integer, ///< number value (integer)
+ number_float, ///< number value (floating-point)
+ discarded ///< discarded by the the parser callback function
+ };
+
+
+ private:
+ /// helper for exception-safe object creation
+ template<typename T, typename... Args>
+ static T* create( Args&& ... args )
+ {
+ AllocatorType<T> alloc;
+ auto deleter = [&](T * object)
+ {
+ alloc.deallocate(object, 1);
+ };
+ std::unique_ptr<T, decltype(deleter)> object(alloc.allocate(1), deleter);
+ alloc.construct(object.get(), std::forward<Args>(args)...);
+ return object.release();
+ }
+
+ ////////////////////////
+ // JSON value storage //
+ ////////////////////////
+
+ /// a JSON value
+ union json_value
+ {
+ /// object (stored with pointer to save storage)
+ object_t* object;
+ /// array (stored with pointer to save storage)
+ array_t* array;
+ /// string (stored with pointer to save storage)
+ string_t* string;
+ /// boolean
+ boolean_t boolean;
+ /// number (integer)
+ number_integer_t number_integer;
+ /// number (floating-point)
+ number_float_t number_float;
+
+ /// default constructor (for null values)
+ json_value() noexcept = default;
+ /// constructor for booleans
+ json_value(boolean_t v) noexcept : boolean(v) {}
+ /// constructor for numbers (integer)
+ json_value(number_integer_t v) noexcept : number_integer(v) {}
+ /// constructor for numbers (floating-point)
+ json_value(number_float_t v) noexcept : number_float(v) {}
+ /// constructor for empty values of a given type
+ json_value(value_t t)
+ {
+ switch (t)
+ {
+ case (value_t::null):
+ case (value_t::discarded):
+ {
+ break;
+ }
+
+ case (value_t::object):
+ {
+ object = create<object_t>();
+ break;
+ }
+
+ case (value_t::array):
+ {
+ array = create<array_t>();
+ break;
+ }
+
+ case (value_t::string):
+ {
+ string = create<string_t>("");
+ break;
+ }
+
+ case (value_t::boolean):
+ {
+ boolean = boolean_t(false);
+ break;
+ }
+
+ case (value_t::number_integer):
+ {
+ number_integer = number_integer_t(0);
+ break;
+ }
+
+ case (value_t::number_float):
+ {
+ number_float = number_float_t(0.0);
+ break;
+ }
+ }
+ }
+
+ /// constructor for strings
+ json_value(const string_t& value)
+ {
+ string = create<string_t>(value);
+ }
+
+ /// constructor for objects
+ json_value(const object_t& value)
+ {
+ object = create<object_t>(value);
+ }
+
+ /// constructor for arrays
+ json_value(const array_t& value)
+ {
+ array = create<array_t>(value);
+ }
+ };
+
+
+ public:
+ //////////////////////////
+ // JSON parser callback //
+ //////////////////////////
+
+ /*!
+ @brief JSON callback events
+
+ This enumeration lists the parser events that can trigger calling a
+ callback function of type @ref parser_callback_t during parsing.
+ */
+ enum class parse_event_t : uint8_t
+ {
+ /// the parser read `{` and started to process a JSON object
+ object_start,
+ /// the parser read `}` and finished processing a JSON object
+ object_end,
+ /// the parser read `[` and started to process a JSON array
+ array_start,
+ /// the parser read `]` and finished processing a JSON array
+ array_end,
+ /// the parser read a key of a value in an object
+ key,
+ /// the parser finished reading a JSON value
+ value
+ };
+
+ /*!
+ @brief per-element parser callback type
+
+ With a parser callback function, the result of parsing a JSON text can be
+ influenced. When passed to @ref parse(std::istream&, parser_callback_t) or
+ @ref parse(const string_t&, parser_callback_t), it is called on certain
+ events (passed as @ref parse_event_t via parameter @a event) with a set
+ recursion depth @a depth and context JSON value @a parsed. The return value
+ of the callback function is a boolean indicating whether the element that
+ emitted the callback shall be kept or not.
+
+ We distinguish six scenarios (determined by the event type) in which the
+ callback function can be called. The following table describes the values
+ of the parameters @a depth, @a event, and @a parsed.
+
+ parameter @a event | description | parameter @a depth | parameter @a parsed
+ ------------------ | ----------- | ------------------ | -------------------
+ parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded
+ parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key
+ parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object
+ parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded
+ parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
+ parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
+
+ Discarding a value (i.e., returning `false`) has different effects depending on the
+ context in which function was called:
+
+ - Discarded values in structured types are skipped. That is, the parser
+ will behave as if the discarded value was never read.
+ - In case a value outside a structured type is skipped, it is replaced with
+ `null`. This case happens if the top-level element is skipped.
+
+ @param[in] depth the depth of the recursion during parsing
+
+ @param[in] event an event of type parse_event_t indicating the context in
+ the callback function has been called
+
+ @param[in,out] parsed the current intermediate parse result; note that
+ writing to this value has no effect for parse_event_t::key events
+
+ @return Whether the JSON value which called the function during parsing
+ should be kept (`true`) or not (`false`). In the latter case, it is either
+ skipped completely or replaced by an empty discarded object.
+
+ @sa @ref parse(std::istream&, parser_callback_t) or
+ @ref parse(const string_t&, parser_callback_t) for examples
+ */
+ using parser_callback_t = std::function<bool(
+ int depth, parse_event_t event, basic_json& parsed)>;
+
+
+ //////////////////
+ // constructors //
+ //////////////////
+
+ /*!
+ @brief create an empty value with a given type
+
+ Create an empty JSON value with a given type. The value will be default
+ initialized with an empty value which depends on the type:
+
+ Value type | initial value
+ ----------- | -------------
+ null | `null`
+ boolean | `false`
+ string | `""`
+ number | `0`
+ object | `{}`
+ array | `[]`
+
+ @param[in] value the type of the value to create
+
+ @complexity Constant.
+
+ @throw std::bad_alloc if allocation for object, array, or string value
+ fails
+
+ @liveexample{The following code shows the constructor for different @ref
+ value_t values,basic_json__value_t}
+ */
+ basic_json(const value_t value)
+ : m_type(value), m_value(value)
+ {}
+
+ /*!
+ @brief create a null object (implicitly)
+
+ Create a `null` JSON value. This is the implicit version of the `null`
+ value constructor as it takes no parameters.
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - As postcondition, it holds: `basic_json().empty() == true`.
+
+ @liveexample{The following code shows the constructor for a `null` JSON
+ value.,basic_json}
+
+ @sa basic_json(std::nullptr_t)
+ */
+ basic_json() noexcept = default;
+
+ /*!
+ @brief create a null object (explicitly)
+
+ Create a `null` JSON value. This is the explicitly version of the `null`
+ value constructor as it takes a null pointer as parameter. It allows to
+ create `null` values by explicitly assigning a @c nullptr to a JSON value.
+ The passed null pointer itself is not read - it is only used to choose the
+ right constructor.
+
+ @complexity Constant.
+
+ @liveexample{The following code shows the constructor with null pointer
+ parameter.,basic_json__nullptr_t}
+
+ @sa basic_json()
+ */
+ basic_json(std::nullptr_t) noexcept
+ : basic_json(value_t::null)
+ {}
+
+ /*!
+ @brief create an object (explicit)
+
+ Create an object JSON value with a given content.
+
+ @param[in] value a value for the object
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for object value fails
+
+ @liveexample{The following code shows the constructor with an @ref object_t
+ parameter.,basic_json__object_t}
+
+ @sa basic_json(const CompatibleObjectType&)
+ */
+ basic_json(const object_t& value)
+ : m_type(value_t::object), m_value(value)
+ {}
+
+ /*!
+ @brief create an object (implicit)
+
+ Create an object JSON value with a given content. This constructor allows
+ any type that can be used to construct values of type @ref object_t.
+ Examples include the types `std::map` and `std::unordered_map`.
+
+ @tparam CompatibleObjectType an object type whose `key_type` and
+ `value_type` is compatible to @ref object_t
+
+ @param[in] value a value for the object
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for object value fails
+
+ @liveexample{The following code shows the constructor with several
+ compatible object type parameters.,basic_json__CompatibleObjectType}
+
+ @sa basic_json(const object_t&)
+ */
+ template <class CompatibleObjectType, typename
+ std::enable_if<
+ std::is_constructible<typename object_t::key_type, typename CompatibleObjectType::key_type>::value and
+ std::is_constructible<basic_json, typename CompatibleObjectType::mapped_type>::value, int>::type
+ = 0>
+ basic_json(const CompatibleObjectType& value)
+ : m_type(value_t::object)
+ {
+ using std::begin;
+ using std::end;
+ m_value.object = create<object_t>(begin(value), end(value));
+ }
+
+ /*!
+ @brief create an array (explicit)
+
+ Create an array JSON value with a given content.
+
+ @param[in] value a value for the array
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for array value fails
+
+ @liveexample{The following code shows the constructor with an @ref array_t
+ parameter.,basic_json__array_t}
+
+ @sa basic_json(const CompatibleArrayType&)
+ */
+ basic_json(const array_t& value)
+ : m_type(value_t::array), m_value(value)
+ {}
+
+ /*!
+ @brief create an array (implicit)
+
+ Create an array JSON value with a given content. This constructor allows
+ any type that can be used to construct values of type @ref array_t.
+ Examples include the types `std::vector`, `std::list`, and `std::set`.
+
+ @tparam CompatibleArrayType an object type whose `value_type` is compatible
+ to @ref array_t
+
+ @param[in] value a value for the array
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for array value fails
+
+ @liveexample{The following code shows the constructor with several
+ compatible array type parameters.,basic_json__CompatibleArrayType}
+
+ @sa basic_json(const array_t&)
+ */
+ template <class CompatibleArrayType, typename
+ std::enable_if<
+ not std::is_same<CompatibleArrayType, typename basic_json_t::iterator>::value and
+ not std::is_same<CompatibleArrayType, typename basic_json_t::const_iterator>::value and
+ not std::is_same<CompatibleArrayType, typename basic_json_t::reverse_iterator>::value and
+ not std::is_same<CompatibleArrayType, typename basic_json_t::const_reverse_iterator>::value and
+ not std::is_same<CompatibleArrayType, typename array_t::iterator>::value and
+ not std::is_same<CompatibleArrayType, typename array_t::const_iterator>::value and
+ std::is_constructible<basic_json, typename CompatibleArrayType::value_type>::value, int>::type
+ = 0>
+ basic_json(const CompatibleArrayType& value)
+ : m_type(value_t::array)
+ {
+ using std::begin;
+ using std::end;
+ m_value.array = create<array_t>(begin(value), end(value));
+ }
+
+ /*!
+ @brief create a string (explicit)
+
+ Create an string JSON value with a given content.
+
+ @param[in] value a value for the string
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for string value fails
+
+ @liveexample{The following code shows the constructor with an @ref string_t
+ parameter.,basic_json__string_t}
+
+ @sa basic_json(const typename string_t::value_type*)
+ @sa basic_json(const CompatibleStringType&)
+ */
+ basic_json(const string_t& value)
+ : m_type(value_t::string), m_value(value)
+ {}
+
+ /*!
+ @brief create a string (explicit)
+
+ Create a string JSON value with a given content.
+
+ @param[in] value a literal value for the string
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for string value fails
+
+ @liveexample{The following code shows the constructor with string literal
+ parameter.,basic_json__string_t_value_type}
+
+ @sa basic_json(const string_t&)
+ @sa basic_json(const CompatibleStringType&)
+ */
+ basic_json(const typename string_t::value_type* value)
+ : basic_json(string_t(value))
+ {}
+
+ /*!
+ @brief create a string (implicit)
+
+ Create a string JSON value with a given content.
+
+ @param[in] value a value for the string
+
+ @tparam CompatibleStringType an string type which is compatible to @ref
+ string_t
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for string value fails
+
+ @liveexample{The following code shows the construction of a string value
+ from a compatible type.,basic_json__CompatibleStringType}
+
+ @sa basic_json(const string_t&)
+ */
+ template <class CompatibleStringType, typename
+ std::enable_if<
+ std::is_constructible<string_t, CompatibleStringType>::value, int>::type
+ = 0>
+ basic_json(const CompatibleStringType& value)
+ : basic_json(string_t(value))
+ {}
+
+ /*!
+ @brief create a boolean (explicit)
+
+ Creates a JSON boolean type from a given value.
+
+ @param[in] value a boolean value to store
+
+ @complexity Constant.
+
+ @liveexample{The example below demonstrates boolean
+ values.,basic_json__boolean_t}
+ */
+ basic_json(boolean_t value)
+ : m_type(value_t::boolean), m_value(value)
+ {}
+
+ /*!
+ @brief create an integer number (explicit)
+
+ Create an interger number JSON value with a given content.
+
+ @tparam T helper type to compare number_integer_t and int (not visible in)
+ the interface.
+
+ @param[in] value an integer to create a JSON number from
+
+ @note This constructor would have the same signature as @ref
+ basic_json(const int value), so we need to switch this one off in case
+ number_integer_t is the same as int. This is done via the helper type @a T.
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of a JSON integer
+ number value.,basic_json__number_integer_t}
+
+ @sa basic_json(const int)
+ */
+ template<typename T,
+ typename std::enable_if<
+ not (std::is_same<T, int>::value)
+ and std::is_same<T, number_integer_t>::value
+ , int>::type = 0>
+ basic_json(const number_integer_t value)
+ : m_type(value_t::number_integer), m_value(value)
+ {}
+
+ /*!
+ @brief create an integer number from an enum type (explicit)
+
+ Create an integer number JSON value with a given content.
+
+ @param[in] value an integer to create a JSON number from
+
+ @note This constructor allows to pass enums directly to a constructor. As
+ C++ has no way of specifying the type of an anonymous enum explicitly, we
+ can only rely on the fact that such values implicitly convert to int. As
+ int may already be the same type of number_integer_t, we may need to switch
+ off the constructor @ref basic_json(const number_integer_t).
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of a JSON integer
+ number value from an anonymous enum.,basic_json__const_int}
+
+ @sa basic_json(const number_integer_t)
+ */
+ basic_json(const int value)
+ : m_type(value_t::number_integer),
+ m_value(static_cast<number_integer_t>(value))
+ {}
+
+ /*!
+ @brief create an integer number (implicit)
+
+ Create an integer number JSON value with a given content. This constructor
+ allows any type that can be used to construct values of type @ref
+ number_integer_t. Examples may include the types `int`, `int32_t`, or
+ `short`.
+
+ @tparam CompatibleNumberIntegerType an integer type which is compatible to
+ @ref number_integer_t.
+
+ @param[in] value an integer to create a JSON number from
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of several JSON
+ integer number values from compatible
+ types.,basic_json__CompatibleIntegerNumberType}
+
+ @sa basic_json(const number_integer_t)
+ */
+ template<typename CompatibleNumberIntegerType, typename
+ std::enable_if<
+ std::is_constructible<number_integer_t, CompatibleNumberIntegerType>::value and
+ std::numeric_limits<CompatibleNumberIntegerType>::is_integer, CompatibleNumberIntegerType>::type
+ = 0>
+ basic_json(const CompatibleNumberIntegerType value) noexcept
+ : m_type(value_t::number_integer),
+ m_value(static_cast<number_integer_t>(value))
+ {}
+
+ /*!
+ @brief create a floating-point number (explicit)
+
+ Create a floating-point number JSON value with a given content.
+
+ @param[in] value a floating-point value to create a JSON number from
+
+ @note RFC 7159 <http://www.rfc-editor.org/rfc/rfc7159.txt>, section 6
+ disallows NaN values:
+ > Numeric values that cannot be represented in the grammar below (such
+ > as Infinity and NaN) are not permitted.
+ In case the parameter @a value is not a number, a JSON null value is
+ created instead.
+
+ @complexity Constant.
+
+ @liveexample{The following example creates several floating-point
+ values.,basic_json__number_float_t}
+ */
+ basic_json(const number_float_t value)
+ : m_type(value_t::number_float), m_value(value)
+ {
+ // replace infinity and NAN by null
+ if (not std::isfinite(value))
+ {
+ m_type = value_t::null;
+ m_value = json_value();
+ }
+ }
+
+ /*!
+ @brief create an floating-point number (implicit)
+
+ Create an floating-point number JSON value with a given content. This
+ constructor allows any type that can be used to construct values of type
+ @ref number_float_t. Examples may include the types `float`.
+
+ @tparam CompatibleNumberFloatType a floating-point type which is compatible
+ to @ref number_float_t.
+
+ @param[in] value a floating-point to create a JSON number from
+
+ @note RFC 7159 <http://www.rfc-editor.org/rfc/rfc7159.txt>, section 6
+ disallows NaN values:
+ > Numeric values that cannot be represented in the grammar below (such
+ > as Infinity and NaN) are not permitted.
+ In case the parameter @a value is not a number, a JSON null value is
+ created instead.
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of several JSON
+ floating-point number values from compatible
+ types.,basic_json__CompatibleNumberFloatType}
+
+ @sa basic_json(const number_float_t)
+ */
+ template<typename CompatibleNumberFloatType, typename = typename
+ std::enable_if<
+ std::is_constructible<number_float_t, CompatibleNumberFloatType>::value and
+ std::is_floating_point<CompatibleNumberFloatType>::value>::type
+ >
+ basic_json(const CompatibleNumberFloatType value) noexcept
+ : basic_json(number_float_t(value))
+ {}
+
+ /*!
+ @brief create a container (array or object) from an initializer list
+
+ Creates a JSON value of type array or object from the passed initializer
+ list @a init. In case @a type_deduction is `true` (default), the type of
+ the JSON value to be created is deducted from the initializer list @a init
+ according to the following rules:
+
+ 1. If the list is empty, an empty JSON object value `{}` is created.
+ 2. If the list consists of pairs whose first element is a string, a JSON
+ object value is created where the first elements of the pairs are treated
+ as keys and the second elements are as values.
+ 3. In all other cases, an array is created.
+
+ The rules aim to create the best fit between a C++ initializer list and
+ JSON values. The ratioinale is as follows:
+
+ 1. The empty initializer list is written as `{}` which is exactly an empty
+ JSON object.
+ 2. C++ has now way of describing mapped types other than to list a list of
+ pairs. As JSON requires that keys must be of type string, rule 2 is the
+ weakest constraint one can pose on initializer lists to interpret them as
+ an object.
+ 3. In all other cases, the initializer list could not be interpreted as
+ JSON object type, so interpreting it as JSON array type is safe.
+
+ With the rules described above, the following JSON values cannot be
+ expressed by an initializer list:
+
+ - the empty array (`[]`): use @ref array(std::initializer_list<basic_json>)
+ with an empty initializer list in this case
+ - arrays whose elements satisfy rule 2: use @ref
+ array(std::initializer_list<basic_json>) with the same initializer list
+ in this case
+
+ @note When used without parentheses around an empty initializer list, @ref
+ basic_json() is called instead of this function, yielding the JSON null
+ value.
+
+ @param[in] init initializer list with JSON values
+
+ @param[in] type_deduction internal parameter; when set to `true`, the type
+ of the JSON value is deducted from the initializer list @a init; when set
+ to `false`, the type provided via @a manual_type is forced. This mode is
+ used by the functions @ref array(std::initializer_list<basic_json>) and
+ @ref object(std::initializer_list<basic_json>).
+
+ @param[in] manual_type internal parameter; when @a type_deduction is set to
+ `false`, the created JSON value will use the provided type (only @ref
+ value_t::array and @ref value_t::object are valid); when @a type_deduction
+ is set to `true`, this parameter has no effect
+
+ @throw std::domain_error if @a type_deduction is `false`, @a manual_type is
+ `value_t::object`, but @a init contains an element which is not a pair
+ whose first element is a string
+
+ @complexity Linear in the size of the initializer list @a init.
+
+ @liveexample{The example below shows how JSON values are created from
+ initializer lists,basic_json__list_init_t}
+
+ @sa basic_json array(std::initializer_list<basic_json>) - create a JSON
+ array value from an initializer list
+ @sa basic_json object(std::initializer_list<basic_json>) - create a JSON
+ object value from an initializer list
+ */
+ basic_json(std::initializer_list<basic_json> init,
+ bool type_deduction = true,
+ value_t manual_type = value_t::array)
+ {
+ // the initializer list could describe an object
+ bool is_object = true;
+
+ // check if each element is an array with two elements whose first element
+ // is a string
+ for (const auto& element : init)
+ {
+ if (element.m_type != value_t::array or element.size() != 2
+ or element[0].m_type != value_t::string)
+ {
+ // we found an element that makes it impossible to use the
+ // initializer list as object
+ is_object = false;
+ break;
+ }
+ }
+
+ // adjust type if type deduction is not wanted
+ if (not type_deduction)
+ {
+ // if array is wanted, do not create an object though possible
+ if (manual_type == value_t::array)
+ {
+ is_object = false;
+ }
+
+ // if object is wanted but impossible, throw an exception
+ if (manual_type == value_t::object and not is_object)
+ {
+ throw std::domain_error("cannot create object from initializer list");
+ }
+ }
+
+ if (is_object)
+ {
+ // the initializer list is a list of pairs -> create object
+ m_type = value_t::object;
+ m_value = value_t::object;
+
+ for (auto& element : init)
+ {
+ m_value.object->emplace(std::move(*(element[0].m_value.string)), std::move(element[1]));
+ }
+ }
+ else
+ {
+ // the initializer list describes an array -> create array
+ m_type = value_t::array;
+ m_value.array = create<array_t>(std::move(init));
+ }
+ }
+
+ /*!
+ @brief explicitly create an array from an initializer list
+
+ Creates a JSON array value from a given initializer list. That is, given a
+ list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the
+ initializer list is empty, the empty array `[]` is created.
+
+ @note This function is only needed to express two edge cases that cannot be
+ realized with the initializer list constructor (@ref
+ basic_json(std::initializer_list<basic_json>, bool, value_t)). These cases
+ are:
+ 1. creating an array whose elements are all pairs whose first element is a
+ string - in this case, the initializer list constructor would create an
+ object, taking the first elements as keys
+ 2. creating an empty array - passing the empty initializer list to the
+ initializer list constructor yields an empty object
+
+ @param[in] init initializer list with JSON values to create an array from
+ (optional)
+
+ @return JSON array value
+
+ @complexity Linear in the size of @a init.
+
+ @liveexample{The following code shows an example for the @ref array
+ function.,array}
+
+ @sa basic_json(std::initializer_list<basic_json>, bool, value_t) - create a
+ JSON value from an initializer list
+ @sa basic_json object(std::initializer_list<basic_json>) - create a JSON
+ object value from an initializer list
+ */
+ static basic_json array(std::initializer_list<basic_json> init =
+ std::initializer_list<basic_json>())
+ {
+ return basic_json(init, false, value_t::array);
+ }
+
+ /*!
+ @brief explicitly create an object from an initializer list
+
+ Creates a JSON object value from a given initializer list. The initializer
+ lists elements must be pairs, and their first elments must be strings. If
+ the initializer list is empty, the empty object `{}` is created.
+
+ @note This function is only added for symmetry reasons. In contrast to the
+ related function @ref basic_json array(std::initializer_list<basic_json>),
+ there are no cases which can only be expressed by this function. That is,
+ any initializer list @a init can also be passed to the initializer list
+ constructor @ref basic_json(std::initializer_list<basic_json>, bool,
+ value_t).
+
+ @param[in] init initializer list to create an object from (optional)
+
+ @return JSON object value
+
+ @throw std::domain_error if @a init is not a pair whose first elements are
+ strings; thrown by @ref basic_json(std::initializer_list<basic_json>, bool,
+ value_t)
+
+ @complexity Linear in the size of @a init.
+
+ @liveexample{The following code shows an example for the @ref object
+ function.,object}
+
+ @sa basic_json(std::initializer_list<basic_json>, bool, value_t) - create a
+ JSON value from an initializer list
+ @sa basic_json array(std::initializer_list<basic_json>) - create a JSON
+ array value from an initializer list
+ */
+ static basic_json object(std::initializer_list<basic_json> init =
+ std::initializer_list<basic_json>())
+ {
+ return basic_json(init, false, value_t::object);
+ }
+
+ /*!
+ @brief construct an array with count copies of given value
+
+ Constructs a JSON array value by creating @a count copies of a passed
+ value. In case @a count is `0`, an empty array is created. As postcondition,
+ `std::distance(begin(),end()) == count` holds.
+
+ @param[in] count the number of JSON copies of @a value to create
+ @param[in] value the JSON value to copy
+
+ @complexity Linear in @a count.
+
+ @liveexample{The following code shows examples for the @ref
+ basic_json(size_type\, const basic_json&)
+ constructor.,basic_json__size_type_basic_json}
+ */
+ basic_json(size_type count, const basic_json& value)
+ : m_type(value_t::array)
+ {
+ m_value.array = create<array_t>(count, value);
+ }
+
+ /*!
+ @brief construct a JSON container given an iterator range
+
+ Constructs the JSON value with the contents of the range `[first, last)`.
+ The semantics depends on the different types a JSON value can have:
+ - In case of primitive types (number, boolean, or string), @a first must
+ be `begin()` and @a last must be `end()`. In this case, the value is
+ copied. Otherwise, std::out_of_range is thrown.
+ - In case of structured types (array, object), the constructor behaves
+ as similar versions for `std::vector`.
+ - In case of a null type, std::domain_error is thrown.
+
+ @tparam InputIT an input iterator type (@ref iterator or @ref
+ const_iterator)
+
+ @param[in] first begin of the range to copy from (included)
+ @param[in] last end of the range to copy from (excluded)
+
+ @throw std::domain_error if iterators are not compatible; that is, do not
+ belong to the same JSON value
+ @throw std::out_of_range if iterators are for a primitive type (number,
+ boolean, or string) where an out of range error can be detected easily
+ @throw std::bad_alloc if allocation for object, array, or string fails
+ @throw std::domain_error if called with a null value
+
+ @complexity Linear in distance between @a first and @a last.
+
+ @liveexample{The example below shows several ways to create JSON values by
+ specifying a subrange with iterators.,basic_json__InputIt_InputIt}
+ */
+ template <class InputIT, typename
+ std::enable_if<
+ std::is_same<InputIT, typename basic_json_t::iterator>::value or
+ std::is_same<InputIT, typename basic_json_t::const_iterator>::value
+ , int>::type
+ = 0>
+ basic_json(InputIT first, InputIT last) : m_type(first.m_object->m_type)
+ {
+ // make sure iterator fits the current value
+ if (first.m_object != last.m_object)
+ {
+ throw std::domain_error("iterators are not compatible");
+ }
+
+ // check if iterator range is complete for primitive values
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ case value_t::number_float:
+ case value_t::boolean:
+ case value_t::string:
+ {
+ if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())
+ {
+ throw std::out_of_range("iterators out of range");
+ }
+ break;
+ }
+
+ default:
+ {
+ break;
+ }
+ }
+
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ {
+ m_value.number_integer = first.m_object->m_value.number_integer;
+ break;
+ }
+
+ case value_t::number_float:
+ {
+ m_value.number_float = first.m_object->m_value.number_float;
+ break;
+ }
+
+ case value_t::boolean:
+ {
+ m_value.boolean = first.m_object->m_value.boolean;
+ break;
+ }
+
+ case value_t::string:
+ {
+ m_value = *first.m_object->m_value.string;
+ break;
+ }
+
+ case value_t::object:
+ {
+ m_value.object = create<object_t>(first.m_it.object_iterator, last.m_it.object_iterator);
+ break;
+ }
+
+ case value_t::array:
+ {
+ m_value.array = create<array_t>(first.m_it.array_iterator, last.m_it.array_iterator);
+ break;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use construct with iterators from " + first.m_object->type_name());
+ }
+ }
+ }
+
+ ///////////////////////////////////////
+ // other constructors and destructor //
+ ///////////////////////////////////////
+
+ /*!
+ @brief copy constructor
+
+ Creates a copy of a given JSON value.
+
+ @param[in] other the JSON value to copy
+
+ @complexity Linear in the size of @a other.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is linear.
+ - As postcondition, it holds: `other == basic_json(other)`.
+
+ @throw std::bad_alloc if allocation for object, array, or string fails.
+
+ @liveexample{The following code shows an example for the copy
+ constructor.,basic_json__basic_json}
+ */
+ basic_json(const basic_json& other)
+ : m_type(other.m_type)
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ case (value_t::discarded):
+ {
+ break;
+ }
+
+ case (value_t::object):
+ {
+ m_value = *other.m_value.object;
+ break;
+ }
+
+ case (value_t::array):
+ {
+ m_value = *other.m_value.array;
+ break;
+ }
+
+ case (value_t::string):
+ {
+ m_value = *other.m_value.string;
+ break;
+ }
+
+ case (value_t::boolean):
+ {
+ m_value = other.m_value.boolean;
+ break;
+ }
+
+ case (value_t::number_integer):
+ {
+ m_value = other.m_value.number_integer;
+ break;
+ }
+
+ case (value_t::number_float):
+ {
+ m_value = other.m_value.number_float;
+ break;
+ }
+ }
+ }
+
+ /*!
+ @brief move constructor
+
+ Move constructor. Constructs a JSON value with the contents of the given
+ value @a other using move semantics. It "steals" the resources from @a
+ other and leaves it as JSON null value.
+
+ @param[in,out] other value to move to this object
+
+ @post @a other is a JSON null value
+
+ @complexity Constant.
+
+ @liveexample{The code below shows the move constructor explicitly called
+ via std::move.,basic_json__moveconstructor}
+ */
+ basic_json(basic_json&& other) noexcept
+ : m_type(std::move(other.m_type)),
+ m_value(std::move(other.m_value))
+ {
+ // invalidate payload
+ other.m_type = value_t::null;
+ other.m_value = {};
+ }
+
+ /*!
+ @brief copy assignment
+
+ Copy assignment operator. Copies a JSON value via the "copy and swap"
+ strategy: It is expressed in terms of the copy constructor, destructor, and
+ the swap() member function.
+
+ @param[in] other value to copy from
+
+ @complexity Linear.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is linear.
+
+ @liveexample{The code below shows and example for the copy assignment. It
+ creates a copy of value `a` which is then swapped with `b`. Finally\, the
+ copy of `a` (which is the null value after the swap) is
+ destroyed.,basic_json__copyassignment}
+ */
+ reference& operator=(basic_json other) noexcept (
+ std::is_nothrow_move_constructible<value_t>::value and
+ std::is_nothrow_move_assignable<value_t>::value and
+ std::is_nothrow_move_constructible<json_value>::value and
+ std::is_nothrow_move_assignable<json_value>::value
+ )
+ {
+ using std::swap;
+ swap(m_type, other.m_type);
+ swap(m_value, other.m_value);
+ return *this;
+ }
+
+ /*!
+ @brief destructor
+
+ Destroys the JSON value and frees all allocated memory.
+
+ @complexity Linear.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is linear.
+ - All stored elements are destroyed and all memory is freed.
+ */
+ ~basic_json()
+ {
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ AllocatorType<object_t> alloc;
+ alloc.destroy(m_value.object);
+ alloc.deallocate(m_value.object, 1);
+ break;
+ }
+
+ case (value_t::array):
+ {
+ AllocatorType<array_t> alloc;
+ alloc.destroy(m_value.array);
+ alloc.deallocate(m_value.array, 1);
+ break;
+ }
+
+ case (value_t::string):
+ {
+ AllocatorType<string_t> alloc;
+ alloc.destroy(m_value.string);
+ alloc.deallocate(m_value.string, 1);
+ break;
+ }
+
+ default:
+ {
+ // all other types need no specific destructor
+ break;
+ }
+ }
+ }
+
+
+ public:
+ ///////////////////////
+ // object inspection //
+ ///////////////////////
+
+ /// @name object inspection
+ /// @{
+
+ /*!
+ @brief serialization
+
+ Serialization function for JSON values. The function tries to mimick
+ Python's @p json.dumps() function, and currently supports its @p indent
+ parameter.
+
+ @param[in] indent if indent is nonnegative, then array elements and object
+ members will be pretty-printed with that indent level. An indent level of 0
+ will only insert newlines. -1 (the default) selects the most compact
+ representation
+
+ @return string containing the serialization of the JSON value
+
+ @complexity Linear.
+
+ @liveexample{The following example shows the effect of different @a indent
+ parameters to the result of the serializaion.,dump}
+
+ @see https://docs.python.org/2/library/json.html#json.dump
+ */
+ string_t dump(const int indent = -1) const
+ {
+ std::stringstream ss;
+
+ if (indent >= 0)
+ {
+ dump(ss, true, static_cast<unsigned int>(indent));
+ }
+ else
+ {
+ dump(ss, false, 0);
+ }
+
+ return ss.str();
+ }
+
+ /*!
+ @brief return the type of the JSON value (explicit)
+
+ Return the type of the JSON value as a value from the @ref value_t
+ enumeration.
+
+ @return the type of the JSON value
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref type() for all JSON
+ types.,type}
+ */
+ value_t type() const noexcept
+ {
+ return m_type;
+ }
+
+ /*!
+ @brief return whether type is primitive
+
+ This function returns true iff the JSON type is primitive (string, number,
+ boolean, or null).
+
+ @return `true` if type is primitive (string, number, boolean, or null),
+ `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_primitive for all JSON
+ types.,is_primitive}
+ */
+ bool is_primitive() const noexcept
+ {
+ return is_null() or is_string() or is_boolean() or is_number();
+ }
+
+ /*!
+ @brief return whether type is structured
+
+ This function returns true iff the JSON type is structured (array or
+ object).
+
+ @return `true` if type is structured (array or object), `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_structured for all JSON
+ types.,is_structured}
+ */
+ bool is_structured() const noexcept
+ {
+ return is_array() or is_object();
+ }
+
+ /*!
+ @brief return whether value is null
+
+ This function returns true iff the JSON value is null.
+
+ @return `true` if type is null, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_null for all JSON
+ types.,is_null}
+ */
+ bool is_null() const noexcept
+ {
+ return m_type == value_t::null;
+ }
+
+ /*!
+ @brief return whether value is a boolean
+
+ This function returns true iff the JSON value is a boolean.
+
+ @return `true` if type is boolean, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_boolean for all JSON
+ types.,is_boolean}
+ */
+ bool is_boolean() const noexcept
+ {
+ return m_type == value_t::boolean;
+ }
+
+ /*!
+ @brief return whether value is a number
+
+ This function returns true iff the JSON value is a number. This includes
+ both integer and floating-point values.
+
+ @return `true` if type is number, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_number for all JSON
+ types.,is_number}
+ */
+ bool is_number() const noexcept
+ {
+ return is_number_integer() or is_number_float();
+ }
+
+ /*!
+ @brief return whether value is an integer number
+
+ This function returns true iff the JSON value is an integer number. This
+ excludes floating-point values.
+
+ @return `true` if type is an integer number, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_number_integer for all
+ JSON types.,is_number_integer}
+ */
+ bool is_number_integer() const noexcept
+ {
+ return m_type == value_t::number_integer;
+ }
+
+ /*!
+ @brief return whether value is a floating-point number
+
+ This function returns true iff the JSON value is a floating-point number.
+ This excludes integer values.
+
+ @return `true` if type is a floating-point number, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_number_float for all
+ JSON types.,is_number_float}
+ */
+ bool is_number_float() const noexcept
+ {
+ return m_type == value_t::number_float;
+ }
+
+ /*!
+ @brief return whether value is an object
+
+ This function returns true iff the JSON value is an object.
+
+ @return `true` if type is object, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_object for all JSON
+ types.,is_object}
+ */
+ bool is_object() const noexcept
+ {
+ return m_type == value_t::object;
+ }
+
+ /*!
+ @brief return whether value is an array
+
+ This function returns true iff the JSON value is an array.
+
+ @return `true` if type is array, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_array for all JSON
+ types.,is_array}
+ */
+ bool is_array() const noexcept
+ {
+ return m_type == value_t::array;
+ }
+
+ /*!
+ @brief return whether value is a string
+
+ This function returns true iff the JSON value is a string.
+
+ @return `true` if type is string, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_string for all JSON
+ types.,is_string}
+ */
+ bool is_string() const noexcept
+ {
+ return m_type == value_t::string;
+ }
+
+ /*!
+ @brief return whether value is discarded
+
+ This function returns true iff the JSON value was discarded during parsing
+ with a callback function (see @ref parser_callback_t).
+
+ @note This function will always be `false` for JSON values after parsing.
+ That is, discarded values can only occur during parsing, but will be
+ removed when inside a structured value or replaced by null in other cases.
+
+ @return `true` if type is discarded, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_discarded for all JSON
+ types.,is_discarded}
+ */
+ bool is_discarded() const noexcept
+ {
+ return m_type == value_t::discarded;
+ }
+
+ /*!
+ @brief return the type of the JSON value (implicit)
+
+ Implicitly return the type of the JSON value as a value from the @ref
+ value_t enumeration.
+
+ @return the type of the JSON value
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies the value_t operator for all
+ JSON types.,operator__value_t}
+ */
+ operator value_t() const noexcept
+ {
+ return m_type;
+ }
+
+ /// @}
+
+ private:
+ //////////////////
+ // value access //
+ //////////////////
+
+ /// get an object (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_convertible<typename object_t::key_type, typename T::key_type>::value and
+ std::is_convertible<basic_json_t, typename T::mapped_type>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ return T(m_value.object->begin(), m_value.object->end());
+ }
+ default:
+ {
+ throw std::domain_error("type must be object, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an object (explicit)
+ object_t get_impl(object_t*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ return *(m_value.object);
+ }
+ default:
+ {
+ throw std::domain_error("type must be object, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_convertible<basic_json_t, typename T::value_type>::value and
+ not std::is_same<basic_json_t, typename T::value_type>::value and
+ not std::is_arithmetic<T>::value and
+ not std::is_convertible<std::string, T>::value and
+ not has_mapped_type<T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ T to_vector;
+ std::transform(m_value.array->begin(), m_value.array->end(),
+ std::inserter(to_vector, to_vector.end()), [](basic_json i)
+ {
+ return i.get<typename T::value_type>();
+ });
+ return to_vector;
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_convertible<basic_json_t, T>::value and
+ not std::is_same<basic_json_t, T>::value
+ , int>::type = 0>
+ std::vector<T> get_impl(std::vector<T>*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ std::vector<T> to_vector;
+ to_vector.reserve(m_value.array->size());
+ std::transform(m_value.array->begin(), m_value.array->end(),
+ std::inserter(to_vector, to_vector.end()), [](basic_json i)
+ {
+ return i.get<T>();
+ });
+ return to_vector;
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_same<basic_json, typename T::value_type>::value and
+ not has_mapped_type<T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ return T(m_value.array->begin(), m_value.array->end());
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ array_t get_impl(array_t*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ return *(m_value.array);
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a string (explicit)
+ template <typename T, typename
+ std::enable_if<
+ std::is_convertible<string_t, T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::string):
+ {
+ return *m_value.string;
+ }
+ default:
+ {
+ throw std::domain_error("type must be string, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a number (explicit)
+ template<typename T, typename
+ std::enable_if<
+ std::is_arithmetic<T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::number_integer):
+ {
+ return static_cast<T>(m_value.number_integer);
+ }
+ case (value_t::number_float):
+ {
+ return static_cast<T>(m_value.number_float);
+ }
+ default:
+ {
+ throw std::domain_error("type must be number, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a boolean (explicit)
+ boolean_t get_impl(boolean_t*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::boolean):
+ {
+ return m_value.boolean;
+ }
+ default:
+ {
+ throw std::domain_error("type must be boolean, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a pointer to the value (object)
+ object_t* get_impl_ptr(object_t*) noexcept
+ {
+ return is_object() ? m_value.object : nullptr;
+ }
+
+ /// get a pointer to the value (object)
+ const object_t* get_impl_ptr(const object_t*) const noexcept
+ {
+ return is_object() ? m_value.object : nullptr;
+ }
+
+ /// get a pointer to the value (array)
+ array_t* get_impl_ptr(array_t*) noexcept
+ {
+ return is_array() ? m_value.array : nullptr;
+ }
+
+ /// get a pointer to the value (array)
+ const array_t* get_impl_ptr(const array_t*) const noexcept
+ {
+ return is_array() ? m_value.array : nullptr;
+ }
+
+ /// get a pointer to the value (string)
+ string_t* get_impl_ptr(string_t*) noexcept
+ {
+ return is_string() ? m_value.string : nullptr;
+ }
+
+ /// get a pointer to the value (string)
+ const string_t* get_impl_ptr(const string_t*) const noexcept
+ {
+ return is_string() ? m_value.string : nullptr;
+ }
+
+ /// get a pointer to the value (boolean)
+ boolean_t* get_impl_ptr(boolean_t*) noexcept
+ {
+ return is_boolean() ? &m_value.boolean : nullptr;
+ }
+
+ /// get a pointer to the value (boolean)
+ const boolean_t* get_impl_ptr(const boolean_t*) const noexcept
+ {
+ return is_boolean() ? &m_value.boolean : nullptr;
+ }
+
+ /// get a pointer to the value (integer number)
+ number_integer_t* get_impl_ptr(number_integer_t*) noexcept
+ {
+ return is_number_integer() ? &m_value.number_integer : nullptr;
+ }
+
+ /// get a pointer to the value (integer number)
+ const number_integer_t* get_impl_ptr(const number_integer_t*) const noexcept
+ {
+ return is_number_integer() ? &m_value.number_integer : nullptr;
+ }
+
+ /// get a pointer to the value (floating-point number)
+ number_float_t* get_impl_ptr(number_float_t*) noexcept
+ {
+ return is_number_float() ? &m_value.number_float : nullptr;
+ }
+
+ /// get a pointer to the value (floating-point number)
+ const number_float_t* get_impl_ptr(const number_float_t*) const noexcept
+ {
+ return is_number_float() ? &m_value.number_float : nullptr;
+ }
+
+ public:
+
+ /// @name value access
+ /// @{
+
+ /*!
+ @brief get a value (explicit)
+
+ Explicit type conversion between the JSON value and a compatible value.
+
+ @tparam ValueType non-pointer type compatible to the JSON value, for
+ instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+ `std::vector` types for JSON arrays
+
+ @return copy of the JSON value, converted to type @a ValueType
+
+ @throw std::domain_error in case passed type @a ValueType is incompatible
+ to JSON
+
+ @complexity Linear in the size of the JSON value.
+
+ @liveexample{The example below shows serveral conversions from JSON values
+ to other types. There a few things to note: (1) Floating-point numbers can
+ be converted to integers\, (2) A JSON array can be converted to a standard
+ `std::vector<short>`\, (3) A JSON object can be converted to C++
+ assiciative containers such as `std::unordered_map<std::string\,
+ json>`.,get__ValueType_const}
+
+ @internal
+ The idea of using a casted null pointer to choose the correct
+ implementation is from <http://stackoverflow.com/a/8315197/266378>.
+ @endinternal
+
+ @sa @ref operator ValueType() const for implicit conversion
+ @sa @ref get() for pointer-member access
+ */
+ template<typename ValueType, typename
+ std::enable_if<
+ not std::is_pointer<ValueType>::value
+ , int>::type = 0>
+ ValueType get() const
+ {
+ return get_impl(static_cast<ValueType*>(nullptr));
+ }
+
+ /*!
+ @brief get a pointer value (explicit)
+
+ Explicit pointer access to the internally stored JSON value. No copies are
+ made.
+
+ @warning Writing data to the pointee of the result yields an undefined
+ state.
+
+ @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+ object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or @ref
+ number_float_t.
+
+ @return pointer to the internally stored JSON value if the requested pointer
+ type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how pointers to internal values of a
+ JSON value can be requested. Note that no type conversions are made and a
+ `nullptr` is returned if the value and the requested pointer type does not
+ match.,get__PointerType}
+
+ @sa @ref get_ptr() for explicit pointer-member access
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ , int>::type = 0>
+ PointerType get() noexcept
+ {
+ // delegate the call to get_ptr
+ return get_ptr<PointerType>();
+ }
+
+ /*!
+ @brief get a pointer value (explicit)
+ @copydoc get()
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ , int>::type = 0>
+ const PointerType get() const noexcept
+ {
+ // delegate the call to get_ptr
+ return get_ptr<PointerType>();
+ }
+
+ /*!
+ @brief get a pointer value (implicit)
+
+ Implict pointer access to the internally stored JSON value. No copies are
+ made.
+
+ @warning Writing data to the pointee of the result yields an undefined
+ state.
+
+ @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+ object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or @ref
+ number_float_t.
+
+ @return pointer to the internally stored JSON value if the requested pointer
+ type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how pointers to internal values of a
+ JSON value can be requested. Note that no type conversions are made and a
+ `nullptr` is returned if the value and the requested pointer type does not
+ match.,get_ptr}
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ , int>::type = 0>
+ PointerType get_ptr() noexcept
+ {
+ // delegate the call to get_impl_ptr<>()
+ return get_impl_ptr(static_cast<PointerType>(nullptr));
+ }
+
+ /*!
+ @brief get a pointer value (implicit)
+ @copydoc get_ptr()
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ and std::is_const<PointerType>::value
+ , int>::type = 0>
+ const PointerType get_ptr() const noexcept
+ {
+ // delegate the call to get_impl_ptr<>() const
+ return get_impl_ptr(static_cast<const PointerType>(nullptr));
+ }
+
+ /*!
+ @brief get a value (implicit)
+
+ Implict type conversion between the JSON value and a compatible value. The
+ call is realized by calling @ref get() const.
+
+ @tparam ValueType non-pointer type compatible to the JSON value, for
+ instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+ `std::vector` types for JSON arrays
+
+ @return copy of the JSON value, converted to type @a ValueType
+
+ @throw std::domain_error in case passed type @a ValueType is incompatible
+ to JSON, thrown by @ref get() const
+
+ @complexity Linear in the size of the JSON value.
+
+ @liveexample{The example below shows serveral conversions from JSON values
+ to other types. There a few things to note: (1) Floating-point numbers can
+ be converted to integers\, (2) A JSON array can be converted to a standard
+ `std::vector<short>`\, (3) A JSON object can be converted to C++
+ assiciative containers such as `std::unordered_map<std::string\,
+ json>`.,operator__ValueType}
+ */
+ template<typename ValueType, typename
+ std::enable_if<
+ not std::is_pointer<ValueType>::value
+ , int>::type = 0>
+ operator ValueType() const
+ {
+ // delegate the call to get<>() const
+ return get<ValueType>();
+ }
+
+ /// @}
+
+
+ ////////////////////
+ // element access //
+ ////////////////////
+
+ /// @name element access
+ /// @{
+
+ /*!
+ @brief access specified array element with bounds checking
+
+ Returns a reference to the element at specified location @a idx, with
+ bounds checking.
+
+ @param[in] idx index of the element to access
+
+ @return reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array
+ @throw std::out_of_range if the index @a idx is out of range of the array;
+ that is, `idx >= size()`
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how array elements can be read and
+ written using at.,at__size_type}
+ */
+ reference at(size_type idx)
+ {
+ // at only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.array->at(idx);
+ }
+
+ /*!
+ @brief access specified array element with bounds checking
+
+ Returns a const reference to the element at specified location @a idx, with
+ bounds checking.
+
+ @param[in] idx index of the element to access
+
+ @return const reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array
+ @throw std::out_of_range if the index @a idx is out of range of the array;
+ that is, `idx >= size()`
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how array elements can be read using
+ at.,at__size_type_const}
+ */
+ const_reference at(size_type idx) const
+ {
+ // at only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.array->at(idx);
+ }
+
+ /*!
+ @brief access specified object element with bounds checking
+
+ Returns a reference to the element at with specified key @a key, with
+ bounds checking.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object
+ @throw std::out_of_range if the key @a key is is not stored in the object;
+ that is, `find(key) == end()`
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read and
+ written using at.,at__object_t_key_type}
+ */
+ reference at(const typename object_t::key_type& key)
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.object->at(key);
+ }
+
+ /*!
+ @brief access specified object element with bounds checking
+
+ Returns a const reference to the element at with specified key @a key, with
+ bounds checking.
+
+ @param[in] key key of the element to access
+
+ @return const reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object
+ @throw std::out_of_range if the key @a key is is not stored in the object;
+ that is, `find(key) == end()`
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read using
+ at.,at__object_t_key_type_const}
+ */
+ const_reference at(const typename object_t::key_type& key) const
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.object->at(key);
+ }
+
+ /*!
+ @brief access specified array element
+
+ Returns a reference to the element at specified location @a idx.
+
+ @note If @a idx is beyond the range of the array (i.e., `idx >= size()`),
+ then the array is silently filled up with `null` values to make `idx` a
+ valid reference to the last stored element.
+
+ @param[in] idx index of the element to access
+
+ @return reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array or null
+
+ @complexity Constant if @a idx is in the range of the array. Otherwise
+ linear in `idx - size()`.
+
+ @liveexample{The example below shows how array elements can be read and
+ written using [] operator. Note the addition of `null`
+ values.,operatorarray__size_type}
+ */
+ reference operator[](size_type idx)
+ {
+ // implicitly convert null to object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::array;
+ m_value.array = create<array_t>();
+ }
+
+ // [] only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ for (size_t i = m_value.array->size(); i <= idx; ++i)
+ {
+ m_value.array->push_back(basic_json());
+ }
+
+ return m_value.array->operator[](idx);
+ }
+
+ /*!
+ @brief access specified array element
+
+ Returns a const reference to the element at specified location @a idx.
+
+ @param[in] idx index of the element to access
+
+ @return const reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how array elements can be read using
+ the [] operator.,operatorarray__size_type_const}
+ */
+ const_reference operator[](size_type idx) const
+ {
+ // at only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.array->operator[](idx);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @note If @a key is not found in the object, then it is silently added to
+ the object and filled with a `null` value to make `key` a valid reference.
+ In case the value was `null` before, it is converted to an object.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read and
+ written using the [] operator.,operatorarray__key_type}
+ */
+ reference operator[](const typename object_t::key_type& key)
+ {
+ // implicitly convert null to object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::object;
+ m_value.object = create<object_t>();
+ }
+
+ // [] only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read using
+ the [] operator.,operatorarray__key_type_const}
+ */
+ const_reference operator[](const typename object_t::key_type& key) const
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @note If @a key is not found in the object, then it is silently added to
+ the object and filled with a `null` value to make `key` a valid reference.
+ In case the value was `null` before, it is converted to an object.
+
+ @note This function is required for compatibility reasons with Clang.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read and
+ written using the [] operator.,operatorarray__key_type}
+ */
+ template<typename T, std::size_t n>
+ reference operator[](const T (&key)[n])
+ {
+ // implicitly convert null to object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::object;
+ m_value = value_t::object;
+ }
+
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @note This function is required for compatibility reasons with Clang.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read using
+ the [] operator.,operatorarray__key_type_const}
+ */
+ template<typename T, std::size_t n>
+ const_reference operator[](const T (&key)[n]) const
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access the first element
+
+ Returns a reference to the first element in the container. For a JSON
+ container `c`, the expression `c.front()` is equivalent to `*c.begin()`.
+
+ @return In case of a structured type (array or object), a reference to the
+ first element is returned. In cast of number, string, or boolean values, a
+ reference to the value is returned.
+
+ @complexity Constant.
+
+ @note Calling `front` on an empty container is undefined.
+
+ @throw std::out_of_range when called on null value
+
+ @liveexample{The following code shows an example for @ref front.,front}
+ */
+ reference front()
+ {
+ return *begin();
+ }
+
+ /*!
+ @copydoc basic_json::front()
+ */
+ const_reference front() const
+ {
+ return *cbegin();
+ }
+
+ /*!
+ @brief access the last element
+
+ Returns a reference to the last element in the container. For a JSON
+ container `c`, the expression `c.back()` is equivalent to `{ auto tmp =
+ c.end(); --tmp; return *tmp; }`.
+
+ @return In case of a structured type (array or object), a reference to the
+ last element is returned. In cast of number, string, or boolean values, a
+ reference to the value is returned.
+
+ @complexity Constant.
+
+ @note Calling `back` on an empty container is undefined.
+
+ @throw std::out_of_range when called on null value.
+
+ @liveexample{The following code shows an example for @ref back.,back}
+ */
+ reference back()
+ {
+ auto tmp = end();
+ --tmp;
+ return *tmp;
+ }
+
+ /*!
+ @copydoc basic_json::back()
+ */
+ const_reference back() const
+ {
+ auto tmp = cend();
+ --tmp;
+ return *tmp;
+ }
+
+ /*!
+ @brief remove element given an iterator
+
+ Removes the element specified by iterator @a pos. Invalidates iterators and
+ references at or after the point of the erase, including the end()
+ iterator. The iterator @a pos must be valid and dereferenceable. Thus the
+ end() iterator (which is valid, but is not dereferencable) cannot be used
+ as a value for @a pos.
+
+ If called on a primitive type other than null, the resulting JSON value
+ will be `null`.
+
+ @param[in] pos iterator to the element to remove
+ @return Iterator following the last removed element. If the iterator @a pos
+ refers to the last element, the end() iterator is returned.
+
+ @tparam InteratorType an @ref iterator or @ref const_iterator
+
+ @throw std::domain_error if called on a `null` value
+ @throw std::domain_error if called on an iterator which does not belong to
+ the current JSON value
+ @throw std::out_of_range if called on a primitive type with invalid iterator
+ (i.e., any iterator which is not end())
+
+ @complexity The complexity depends on the type:
+ - objects: amortized constant
+ - arrays: linear in distance between pos and the end of the container
+ - strings: linear in the length of the string
+ - other types: constant
+
+ @liveexample{The example shows the result of erase for different JSON
+ types.,erase__IteratorType}
+ */
+ template <class InteratorType, typename
+ std::enable_if<
+ std::is_same<InteratorType, typename basic_json_t::iterator>::value or
+ std::is_same<InteratorType, typename basic_json_t::const_iterator>::value
+ , int>::type
+ = 0>
+ InteratorType erase(InteratorType pos)
+ {
+ // make sure iterator fits the current value
+ if (this != pos.m_object)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ InteratorType result = end();
+
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ case value_t::number_float:
+ case value_t::boolean:
+ case value_t::string:
+ {
+ if (not pos.m_it.primitive_iterator.is_begin())
+ {
+ throw std::out_of_range("iterator out of range");
+ }
+
+ if (m_type == value_t::string)
+ {
+ delete m_value.string;
+ m_value.string = nullptr;
+ }
+
+ m_type = value_t::null;
+ break;
+ }
+
+ case value_t::object:
+ {
+ result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator);
+ break;
+ }
+
+ case value_t::array:
+ {
+ result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator);
+ break;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use erase() with " + type_name());
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief remove elements given an iterator range
+
+ Removes the element specified by the range `[first; last)`. Invalidates
+ iterators and references at or after the point of the erase, including the
+ end() iterator. The iterator @a first does not need to be dereferenceable
+ if `first == last`: erasing an empty range is a no-op.
+
+ If called on a primitive type other than null, the resulting JSON value
+ will be `null`.
+
+ @param[in] first iterator to the beginning of the range to remove
+ @param[in] last iterator past the end of the range to remove
+ @return Iterator following the last removed element. If the iterator @a
+ second refers to the last element, the end() iterator is returned.
+
+ @tparam InteratorType an @ref iterator or @ref const_iterator
+
+ @throw std::domain_error if called on a `null` value
+ @throw std::domain_error if called on iterators which does not belong to
+ the current JSON value
+ @throw std::out_of_range if called on a primitive type with invalid iterators
+ (i.e., if `first != begin()` and `last != end()`)
+
+ @complexity The complexity depends on the type:
+ - objects: `log(size()) + std::distance(first, last)`
+ - arrays: linear in the distance between @a first and @a last, plus linear
+ in the distance between @a last and end of the container
+ - strings: linear in the length of the string
+ - other types: constant
+
+ @liveexample{The example shows the result of erase for different JSON
+ types.,erase__IteratorType_IteratorType}
+ */
+ template <class InteratorType, typename
+ std::enable_if<
+ std::is_same<InteratorType, typename basic_json_t::iterator>::value or
+ std::is_same<InteratorType, typename basic_json_t::const_iterator>::value
+ , int>::type
+ = 0>
+ InteratorType erase(InteratorType first, InteratorType last)
+ {
+ // make sure iterator fits the current value
+ if (this != first.m_object or this != last.m_object)
+ {
+ throw std::domain_error("iterators do not fit current value");
+ }
+
+ InteratorType result = end();
+
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ case value_t::number_float:
+ case value_t::boolean:
+ case value_t::string:
+ {
+ if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())
+ {
+ throw std::out_of_range("iterators out of range");
+ }
+
+ if (m_type == value_t::string)
+ {
+ delete m_value.string;
+ m_value.string = nullptr;
+ }
+
+ m_type = value_t::null;
+ break;
+ }
+
+ case value_t::object:
+ {
+ result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator,
+ last.m_it.object_iterator);
+ break;
+ }
+
+ case value_t::array:
+ {
+ result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator,
+ last.m_it.array_iterator);
+ break;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use erase with " + type_name());
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief remove element from a JSON object given a key
+
+ Removes elements from a JSON object with the key value @a key.
+
+ @param[in] key value of the elements to remove
+
+ @return Number of elements removed. If ObjectType is the default `std::map`
+ type, the return value will always be `0` (@a key was not found) or `1` (@a
+ key was found).
+
+ @throw std::domain_error when called on a type other than JSON object
+
+ @complexity `log(size()) + count(key)`
+
+ @liveexample{The example shows the effect of erase.,erase__key_type}
+ */
+ size_type erase(const typename object_t::key_type& key)
+ {
+ // this erase only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use erase() with " + type_name());
+ }
+
+ return m_value.object->erase(key);
+ }
+
+ /*!
+ @brief remove element from a JSON array given an index
+
+ Removes element from a JSON array at the index @a idx.
+
+ @param[in] idx index of the element to remove
+
+ @throw std::domain_error when called on a type other than JSON array
+ @throw std::out_of_range when `idx >= size()`
+
+ @complexity Linear in distance between @a idx and the end of the container.
+
+ @liveexample{The example shows the effect of erase.,erase__size_type}
+ */
+ void erase(const size_type idx)
+ {
+ // this erase only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use erase() with " + type_name());
+ }
+
+ if (idx >= size())
+ {
+ throw std::out_of_range("index out of range");
+ }
+
+ m_value.array->erase(m_value.array->begin() + static_cast<difference_type>(idx));
+ }
+
+ /*!
+ @brief find an element in a JSON object
+
+ Finds an element in a JSON object with key equivalent to @a key. If the
+ element is not found or the JSON value is not an object, end() is returned.
+
+ @param[in] key key value of the element to search for
+
+ @return Iterator to an element with key equivalent to @a key. If no such
+ element is found, past-the-end (see end()) iterator is returned.
+
+ @complexity Logarithmic in the size of the JSON object.
+
+ @liveexample{The example shows how find is used.,find__key_type}
+ */
+ iterator find(typename object_t::key_type key)
+ {
+ auto result = end();
+
+ if (m_type == value_t::object)
+ {
+ result.m_it.object_iterator = m_value.object->find(key);
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief find an element in a JSON object
+ @copydoc find(typename object_t::key_type)
+ */
+ const_iterator find(typename object_t::key_type key) const
+ {
+ auto result = cend();
+
+ if (m_type == value_t::object)
+ {
+ result.m_it.object_iterator = m_value.object->find(key);
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief returns the number of occurrences of a key in a JSON object
+
+ Returns the number of elements with key @a key. If ObjectType is the
+ default `std::map` type, the return value will always be `0` (@a key was
+ not found) or `1` (@a key was found).
+
+ @param[in] key key value of the element to count
+
+ @return Number of elements with key @a key. If the JSON value is not an
+ object, the return value will be `0`.
+
+ @complexity Logarithmic in the size of the JSON object.
+
+ @liveexample{The example shows how count is used.,count}
+ */
+ size_type count(typename object_t::key_type key) const
+ {
+ // return 0 for all nonobject types
+ return (m_type == value_t::object) ? m_value.object->count(key) : 0;
+ }
+
+ /// @}
+
+
+ ///////////////
+ // iterators //
+ ///////////////
+
+ /// @name iterators
+ /// @{
+
+ /*!
+ @brief returns an iterator to the first element
+
+ Returns an iterator to the first element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return iterator to the first element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+
+ @liveexample{The following code shows an example for @ref begin.,begin}
+ */
+ iterator begin()
+ {
+ iterator result(this);
+ result.set_begin();
+ return result;
+ }
+
+ /*!
+ @copydoc basic_json::cbegin()
+ */
+ const_iterator begin() const
+ {
+ return cbegin();
+ }
+
+ /*!
+ @brief returns a const iterator to the first element
+
+ Returns a const iterator to the first element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return const iterator to the first element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).begin()`.
+
+ @liveexample{The following code shows an example for @ref cbegin.,cbegin}
+ */
+ const_iterator cbegin() const
+ {
+ const_iterator result(this);
+ result.set_begin();
+ return result;
+ }
+
+ /*!
+ @brief returns an iterator to one past the last element
+
+ Returns an iterator to one past the last element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return iterator one past the last element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+
+ @liveexample{The following code shows an example for @ref end.,end}
+ */
+ iterator end()
+ {
+ iterator result(this);
+ result.set_end();
+ return result;
+ }
+
+ /*!
+ @copydoc basic_json::cend()
+ */
+ const_iterator end() const
+ {
+ return cend();
+ }
+
+ /*!
+ @brief returns a const iterator to one past the last element
+
+ Returns a const iterator to one past the last element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return const iterator one past the last element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).end()`.
+
+ @liveexample{The following code shows an example for @ref cend.,cend}
+ */
+ const_iterator cend() const
+ {
+ const_iterator result(this);
+ result.set_end();
+ return result;
+ }
+
+ /*!
+ @brief returns an iterator to the reverse-beginning
+
+ Returns an iterator to the reverse-beginning; that is, the last element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `reverse_iterator(end())`.
+
+ @liveexample{The following code shows an example for @ref rbegin.,rbegin}
+ */
+ reverse_iterator rbegin()
+ {
+ return reverse_iterator(end());
+ }
+
+ /*!
+ @copydoc basic_json::crbegin()
+ */
+ const_reverse_iterator rbegin() const
+ {
+ return crbegin();
+ }
+
+ /*!
+ @brief returns an iterator to the reverse-end
+
+ Returns an iterator to the reverse-end; that is, one before the first
+ element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `reverse_iterator(begin())`.
+
+ @liveexample{The following code shows an example for @ref rend.,rend}
+ */
+ reverse_iterator rend()
+ {
+ return reverse_iterator(begin());
+ }
+
+ /*!
+ @copydoc basic_json::crend()
+ */
+ const_reverse_iterator rend() const
+ {
+ return crend();
+ }
+
+ /*!
+ @brief returns a const reverse iterator to the last element
+
+ Returns a const iterator to the reverse-beginning; that is, the last
+ element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).rbegin()`.
+
+ @liveexample{The following code shows an example for @ref crbegin.,crbegin}
+ */
+ const_reverse_iterator crbegin() const
+ {
+ return const_reverse_iterator(cend());
+ }
+
+ /*!
+ @brief returns a const reverse iterator to one before the first
+
+ Returns a const reverse iterator to the reverse-end; that is, one before
+ the first element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).rend()`.
+
+ @liveexample{The following code shows an example for @ref crend.,crend}
+ */
+ const_reverse_iterator crend() const
+ {
+ return const_reverse_iterator(cbegin());
+ }
+
+ /// @}
+
+
+ //////////////
+ // capacity //
+ //////////////
+
+ /// @name capacity
+ /// @{
+
+ /*!
+ @brief checks whether the container is empty
+
+ Checks if a JSON value has no elements.
+
+ @return The return value depends on the different types and is
+ defined as follows:
+ Value type | return value
+ ----------- | -------------
+ null | @c true
+ boolean | @c false
+ string | @c false
+ number | @c false
+ object | result of function object_t::empty()
+ array | result of function array_t::empty()
+
+ @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+ Container concept; that is, their empty() functions have
+ constant complexity.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `begin() == end()`.
+
+ @liveexample{The following code uses @ref empty to check if a @ref json
+ object contains any elements.,empty}
+ */
+ bool empty() const noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ {
+ return true;
+ }
+
+ case (value_t::array):
+ {
+ return m_value.array->empty();
+ }
+
+ case (value_t::object):
+ {
+ return m_value.object->empty();
+ }
+
+ default:
+ {
+ // all other types are nonempty
+ return false;
+ }
+ }
+ }
+
+ /*!
+ @brief returns the number of elements
+
+ Returns the number of elements in a JSON value.
+
+ @return The return value depends on the different types and is
+ defined as follows:
+ Value type | return value
+ ----------- | -------------
+ null | @c 0
+ boolean | @c 1
+ string | @c 1
+ number | @c 1
+ object | result of function object_t::size()
+ array | result of function array_t::size()
+
+ @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+ Container concept; that is, their size() functions have
+ constant complexity.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `std::distance(begin(), end())`.
+
+ @liveexample{The following code calls @ref size on the different value
+ types.,size}
+ */
+ size_type size() const noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ {
+ return 0;
+ }
+
+ case (value_t::array):
+ {
+ return m_value.array->size();
+ }
+
+ case (value_t::object):
+ {
+ return m_value.object->size();
+ }
+
+ default:
+ {
+ // all other types have size 1
+ return 1;
+ }
+ }
+ }
+
+ /*!
+ @brief returns the maximum possible number of elements
+
+ Returns the maximum number of elements a JSON value is able to hold due to
+ system or library implementation limitations, i.e. `std::distance(begin(),
+ end())` for the JSON value.
+
+ @return The return value depends on the different types and is
+ defined as follows:
+ Value type | return value
+ ----------- | -------------
+ null | @c 0 (same as size())
+ boolean | @c 1 (same as size())
+ string | @c 1 (same as size())
+ number | @c 1 (same as size())
+ object | result of function object_t::max_size()
+ array | result of function array_t::max_size()
+
+ @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+ Container concept; that is, their max_size() functions have
+ constant complexity.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of returning `b.size()` where `b` is the largest
+ possible JSON value.
+
+ @liveexample{The following code calls @ref max_size on the different value
+ types. Note the output is implementation specific.,max_size}
+ */
+ size_type max_size() const noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ return m_value.array->max_size();
+ }
+
+ case (value_t::object):
+ {
+ return m_value.object->max_size();
+ }
+
+ default:
+ {
+ // all other types have max_size() == size()
+ return size();
+ }
+ }
+ }
+
+ /// @}
+
+
+ ///////////////
+ // modifiers //
+ ///////////////
+
+ /// @name modifiers
+ /// @{
+
+ /*!
+ @brief clears the contents
+
+ Clears the content of a JSON value and resets it to the default value as
+ if @ref basic_json(value_t) would have been called:
+
+ Value type | initial value
+ ----------- | -------------
+ null | `null`
+ boolean | `false`
+ string | `""`
+ number | `0`
+ object | `{}`
+ array | `[]`
+
+ @note Floating-point numbers are set to `0.0` which will be serialized to
+ `0`. The vale type remains @ref number_float_t.
+
+ @complexity Linear in the size of the JSON value.
+
+ @liveexample{The example below shows the effect of @ref clear to different
+ JSON types.,clear}
+ */
+ void clear() noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ case (value_t::discarded):
+ {
+ break;
+ }
+
+ case (value_t::number_integer):
+ {
+ m_value.number_integer = 0;
+ break;
+ }
+
+ case (value_t::number_float):
+ {
+ m_value.number_float = 0.0;
+ break;
+ }
+
+ case (value_t::boolean):
+ {
+ m_value.boolean = false;
+ break;
+ }
+
+ case (value_t::string):
+ {
+ m_value.string->clear();
+ break;
+ }
+
+ case (value_t::array):
+ {
+ m_value.array->clear();
+ break;
+ }
+
+ case (value_t::object):
+ {
+ m_value.object->clear();
+ break;
+ }
+ }
+ }
+
+ /*!
+ @brief add an object to an array
+
+ Appends the given element @a value to the end of the JSON value. If the
+ function is called on a JSON null value, an empty array is created before
+ appending @a value.
+
+ @param value the value to add to the JSON array
+
+ @throw std::domain_error when called on a type other than JSON array or null
+
+ @complexity Amortized constant.
+
+ @liveexample{The example shows how `push_back` and `+=` can be used to add
+ elements to a JSON array. Note how the `null` value was silently converted
+ to a JSON array.,push_back}
+ */
+ void push_back(basic_json&& value)
+ {
+ // push_back only works for null objects or arrays
+ if (not(m_type == value_t::null or m_type == value_t::array))
+ {
+ throw std::domain_error("cannot use push_back() with " + type_name());
+ }
+
+ // transform null object into an array
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::array;
+ m_value = value_t::array;
+ }
+
+ // add element to array (move semantics)
+ m_value.array->push_back(std::move(value));
+ // invalidate object
+ value.m_type = value_t::null;
+ }
+
+ /*!
+ @brief add an object to an array
+ @copydoc push_back(basic_json&&)
+ */
+ reference operator+=(basic_json&& value)
+ {
+ push_back(std::move(value));
+ return *this;
+ }
+
+ /*!
+ @brief add an object to an array
+ @copydoc push_back(basic_json&&)
+ */
+ void push_back(const basic_json& value)
+ {
+ // push_back only works for null objects or arrays
+ if (not(m_type == value_t::null or m_type == value_t::array))
+ {
+ throw std::domain_error("cannot use push_back() with " + type_name());
+ }
+
+ // transform null object into an array
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::array;
+ m_value = value_t::array;
+ }
+
+ // add element to array
+ m_value.array->push_back(value);
+ }
+
+ /*!
+ @brief add an object to an array
+ @copydoc push_back(basic_json&&)
+ */
+ reference operator+=(const basic_json& value)
+ {
+ push_back(value);
+ return *this;
+ }
+
+ /*!
+ @brief add an object to an object
+
+ Inserts the given element @a value to the JSON object. If the function is
+ called on a JSON null value, an empty object is created before inserting @a
+ value.
+
+ @param[in] value the value to add to the JSON object
+
+ @throw std::domain_error when called on a type other than JSON object or
+ null
+
+ @complexity Logarithmic in the size of the container, O(log(`size()`)).
+
+ @liveexample{The example shows how `push_back` and `+=` can be used to add
+ elements to a JSON object. Note how the `null` value was silently converted
+ to a JSON object.,push_back__object_t__value}
+ */
+ void push_back(const typename object_t::value_type& value)
+ {
+ // push_back only works for null objects or objects
+ if (not(m_type == value_t::null or m_type == value_t::object))
+ {
+ throw std::domain_error("cannot use push_back() with " + type_name());
+ }
+
+ // transform null object into an object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::object;
+ m_value = value_t::object;
+ }
+
+ // add element to array
+ m_value.object->insert(value);
+ }
+
+ /*!
+ @brief add an object to an object
+ @copydoc push_back(const typename object_t::value_type&)
+ */
+ reference operator+=(const typename object_t::value_type& value)
+ {
+ push_back(value);
+ return operator[](value.first);
+ }
+
+ /*!
+ @brief inserts element
+
+ Inserts element @a value before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] value element to insert
+ @return iterator pointing to the inserted @a value.
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+
+ @complexity Constant plus linear in the distance between pos and end of the
+ container.
+
+ @liveexample{The example shows how insert is used.,insert}
+ */
+ iterator insert(const_iterator pos, const basic_json& value)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, value);
+ return result;
+ }
+
+ /*!
+ @brief inserts element
+ @copydoc insert(const_iterator, const basic_json&)
+ */
+ iterator insert(const_iterator pos, basic_json&& value)
+ {
+ return insert(pos, value);
+ }
+
+ /*!
+ @brief inserts elements
+
+ Inserts @a count copies of @a value before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] count number of copies of @a value to insert
+ @param[in] value element to insert
+ @return iterator pointing to the first element inserted, or @a pos if
+ `count==0`
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+
+ @complexity Linear in @a count plus linear in the distance between @a pos
+ and end of the container.
+
+ @liveexample{The example shows how insert is used.,insert__count}
+ */
+ iterator insert(const_iterator pos, size_type count, const basic_json& value)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, count, value);
+ return result;
+ }
+
+ /*!
+ @brief inserts elements
+
+ Inserts elements from range `[first, last)` before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] first begin of the range of elements to insert
+ @param[in] last end of the range of elements to insert
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+ @throw std::domain_error if @a first and @a last do not belong to the same
+ JSON value
+ @throw std::domain_error if @a first or @a last are iterators into
+ container for which insert is called
+ @return iterator pointing to the first element inserted, or @a pos if
+ `first==last`
+
+ @complexity Linear in `std::distance(first, last)` plus linear in the
+ distance between @a pos and end of the container.
+
+ @liveexample{The example shows how insert is used.,insert__range}
+ */
+ iterator insert(const_iterator pos, const_iterator first, const_iterator last)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ if (first.m_object != last.m_object)
+ {
+ throw std::domain_error("iterators does not fit");
+ }
+
+ if (first.m_object == this or last.m_object == this)
+ {
+ throw std::domain_error("passed iterators may not belong to container");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator,
+ first.m_it.array_iterator, last.m_it.array_iterator);
+ return result;
+ }
+
+ /*!
+ @brief inserts elements
+
+ Inserts elements from initializer list @a ilist before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] ilist initializer list to insert the values from
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+ @return iterator pointing to the first element inserted, or @a pos if
+ `ilist` is empty
+
+ @complexity Linear in `ilist.size()` plus linear in the distance between @a
+ pos and end of the container.
+
+ @liveexample{The example shows how insert is used.,insert__ilist}
+ */
+ iterator insert(const_iterator pos, std::initializer_list<basic_json> ilist)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, ilist);
+ return result;
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of the JSON value with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other JSON value to exchange the contents with
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON arrays can be
+ swapped.,swap__reference}
+ */
+ void swap(reference other) noexcept (
+ std::is_nothrow_move_constructible<value_t>::value and
+ std::is_nothrow_move_assignable<value_t>::value and
+ std::is_nothrow_move_constructible<json_value>::value and
+ std::is_nothrow_move_assignable<json_value>::value
+ )
+ {
+ std::swap(m_type, other.m_type);
+ std::swap(m_value, other.m_value);
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of a JSON array with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other array to exchange the contents with
+
+ @throw std::domain_error when JSON value is not an array
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON values can be
+ swapped.,swap__array_t}
+ */
+ void swap(array_t& other)
+ {
+ // swap only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use swap() with " + type_name());
+ }
+
+ // swap arrays
+ std::swap(*(m_value.array), other);
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of a JSON object with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other object to exchange the contents with
+
+ @throw std::domain_error when JSON value is not an object
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON values can be
+ swapped.,swap__object_t}
+ */
+ void swap(object_t& other)
+ {
+ // swap only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use swap() with " + type_name());
+ }
+
+ // swap objects
+ std::swap(*(m_value.object), other);
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of a JSON string with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other string to exchange the contents with
+
+ @throw std::domain_error when JSON value is not a string
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON values can be
+ swapped.,swap__string_t}
+ */
+ void swap(string_t& other)
+ {
+ // swap only works for strings
+ if (m_type != value_t::string)
+ {
+ throw std::domain_error("cannot use swap() with " + type_name());
+ }
+
+ // swap strings
+ std::swap(*(m_value.string), other);
+ }
+
+ /// @}
+
+
+ //////////////////////////////////////////
+ // lexicographical comparison operators //
+ //////////////////////////////////////////
+
+ /// @name lexicographical comparison operators
+ /// @{
+
+ private:
+ /*!
+ @brief comparison operator for JSON types
+
+ Returns an ordering that is similar to Python:
+ - order: null < boolean < number < object < array < string
+ - furthermore, each type is not smaller than itself
+ */
+ friend bool operator<(const value_t lhs, const value_t rhs)
+ {
+ static constexpr std::array<uint8_t, 7> order = {{
+ 0, // null
+ 3, // object
+ 4, // array
+ 5, // string
+ 1, // boolean
+ 2, // integer
+ 2 // float
+ }
+ };
+
+ // discarded values are not comparable
+ if (lhs == value_t::discarded or rhs == value_t::discarded)
+ {
+ return false;
+ }
+
+ return order[static_cast<std::size_t>(lhs)] < order[static_cast<std::size_t>(rhs)];
+ }
+
+ public:
+ /*!
+ @brief comparison: equal
+
+ Compares two JSON values for equality according to the following rules:
+ - Two JSON values are equal if (1) they are from the same type and (2)
+ their stored values are the same.
+ - Integer and floating-point numbers are automatically converted before
+ comparison. Floating-point numbers are compared indirectly: two
+ floating-point numbers `f1` and `f2` are considered equal if neither
+ `f1 > f2` nor `f2 > f1` holds.
+ - Two JSON null values are equal.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether the values @a lhs and @a rhs are equal
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__equal}
+ */
+ friend bool operator==(const_reference lhs, const_reference rhs) noexcept
+ {
+ const auto lhs_type = lhs.type();
+ const auto rhs_type = rhs.type();
+
+ if (lhs_type == rhs_type)
+ {
+ switch (lhs_type)
+ {
+ case (value_t::array):
+ return *lhs.m_value.array == *rhs.m_value.array;
+ case (value_t::object):
+ return *lhs.m_value.object == *rhs.m_value.object;
+ case (value_t::null):
+ return true;
+ case (value_t::string):
+ return *lhs.m_value.string == *rhs.m_value.string;
+ case (value_t::boolean):
+ return lhs.m_value.boolean == rhs.m_value.boolean;
+ case (value_t::number_integer):
+ return lhs.m_value.number_integer == rhs.m_value.number_integer;
+ case (value_t::number_float):
+ return approx(lhs.m_value.number_float, rhs.m_value.number_float);
+ case (value_t::discarded):
+ return false;
+ }
+ }
+ else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float)
+ {
+ return approx(static_cast<number_float_t>(lhs.m_value.number_integer),
+ rhs.m_value.number_float);
+ }
+ else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer)
+ {
+ return approx(lhs.m_value.number_float,
+ static_cast<number_float_t>(rhs.m_value.number_integer));
+ }
+ return false;
+ }
+
+ /*!
+ @brief comparison: equal
+
+ The functions compares the given JSON value against a null pointer. As the
+ null pointer can be used to initialize a JSON value to null, a comparison
+ of JSON value @a v with a null pointer should be equivalent to call
+ `v.is_null()`.
+
+ @param[in] v JSON value to consider
+ @return whether @a v is null
+
+ @complexity Constant.
+
+ @liveexample{The example compares several JSON types to the null pointer.
+ ,operator__equal__nullptr_t}
+ */
+ friend bool operator==(const_reference v, std::nullptr_t) noexcept
+ {
+ return v.is_null();
+ }
+
+ /*!
+ @brief comparison: equal
+ @copydoc operator==(const_reference, std::nullptr_t)
+ */
+ friend bool operator==(std::nullptr_t, const_reference v) noexcept
+ {
+ return v.is_null();
+ }
+
+ /*!
+ @brief comparison: not equal
+
+ Compares two JSON values for inequality by calculating `not (lhs == rhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether the values @a lhs and @a rhs are not equal
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__notequal}
+ */
+ friend bool operator!=(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (lhs == rhs);
+ }
+
+ /*!
+ @brief comparison: not equal
+
+ The functions compares the given JSON value against a null pointer. As the
+ null pointer can be used to initialize a JSON value to null, a comparison
+ of JSON value @a v with a null pointer should be equivalent to call
+ `not v.is_null()`.
+
+ @param[in] v JSON value to consider
+ @return whether @a v is not null
+
+ @complexity Constant.
+
+ @liveexample{The example compares several JSON types to the null pointer.
+ ,operator__notequal__nullptr_t}
+ */
+ friend bool operator!=(const_reference v, std::nullptr_t) noexcept
+ {
+ return not v.is_null();
+ }
+
+ /*!
+ @brief comparison: not equal
+ @copydoc operator!=(const_reference, std::nullptr_t)
+ */
+ friend bool operator!=(std::nullptr_t, const_reference v) noexcept
+ {
+ return not v.is_null();
+ }
+
+ /*!
+ @brief comparison: less than
+
+ Compares whether one JSON value @a lhs is less than another JSON value @a
+ rhs according to the following rules:
+ - If @a lhs and @a rhs have the same type, the values are compared using
+ the default `<` operator.
+ - Integer and floating-point numbers are automatically converted before
+ comparison
+ - In case @a lhs and @a rhs have different types, the values are ignored
+ and the order of the types is considered, see
+ @ref operator<(const value_t, const value_t).
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is less than @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__less}
+ */
+ friend bool operator<(const_reference lhs, const_reference rhs) noexcept
+ {
+ const auto lhs_type = lhs.type();
+ const auto rhs_type = rhs.type();
+
+ if (lhs_type == rhs_type)
+ {
+ switch (lhs_type)
+ {
+ case (value_t::array):
+ return *lhs.m_value.array < *rhs.m_value.array;
+ case (value_t::object):
+ return *lhs.m_value.object < *rhs.m_value.object;
+ case (value_t::null):
+ return false;
+ case (value_t::string):
+ return *lhs.m_value.string < *rhs.m_value.string;
+ case (value_t::boolean):
+ return lhs.m_value.boolean < rhs.m_value.boolean;
+ case (value_t::number_integer):
+ return lhs.m_value.number_integer < rhs.m_value.number_integer;
+ case (value_t::number_float):
+ return lhs.m_value.number_float < rhs.m_value.number_float;
+ case (value_t::discarded):
+ return false;
+ }
+ }
+ else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float)
+ {
+ return static_cast<number_float_t>(lhs.m_value.number_integer) <
+ rhs.m_value.number_float;
+ }
+ else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer)
+ {
+ return lhs.m_value.number_float <
+ static_cast<number_float_t>(rhs.m_value.number_integer);
+ }
+
+ // We only reach this line if we cannot compare values. In that case,
+ // we compare types. Note we have to call the operator explicitly,
+ // because MSVC has problems otherwise.
+ return operator<(lhs_type, rhs_type);
+ }
+
+ /*!
+ @brief comparison: less than or equal
+
+ Compares whether one JSON value @a lhs is less than or equal to another
+ JSON value by calculating `not (rhs < lhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is less than or equal to @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__greater}
+ */
+ friend bool operator<=(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (rhs < lhs);
+ }
+
+ /*!
+ @brief comparison: greater than
+
+ Compares whether one JSON value @a lhs is greater than another
+ JSON value by calculating `not (lhs <= rhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is greater than to @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__lessequal}
+ */
+ friend bool operator>(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (lhs <= rhs);
+ }
+
+ /*!
+ @brief comparison: greater than or equal
+
+ Compares whether one JSON value @a lhs is greater than or equal to another
+ JSON value by calculating `not (lhs < rhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is greater than or equal to @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__greaterequal}
+ */
+ friend bool operator>=(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (lhs < rhs);
+ }
+
+ /// @}
+
+
+ ///////////////////
+ // serialization //
+ ///////////////////
+
+ /// @name serialization
+ /// @{
+
+ /*!
+ @brief serialize to stream
+
+ Serialize the given JSON value @a j to the output stream @a o. The JSON
+ value will be serialized using the @ref dump member function. The
+ indentation of the output can be controlled with the member variable
+ `width` of the output stream @a o. For instance, using the manipulator
+ `std::setw(4)` on @a o sets the indentation level to `4` and the
+ serialization result is the same as calling `dump(4)`.
+
+ @param[in,out] o stream to serialize to
+ @param[in] j JSON value to serialize
+
+ @return the stream @a o
+
+ @complexity Linear.
+
+ @liveexample{The example below shows the serialization with different
+ parameters to `width` to adjust the indentation level.,operator_serialize}
+ */
+ friend std::ostream& operator<<(std::ostream& o, const basic_json& j)
+ {
+ // read width member and use it as indentation parameter if nonzero
+ const bool pretty_print = (o.width() > 0);
+ const auto indentation = (pretty_print ? o.width() : 0);
+
+ // reset width to 0 for subsequent calls to this stream
+ o.width(0);
+
+ // do the actual serialization
+ j.dump(o, pretty_print, static_cast<unsigned int>(indentation));
+ return o;
+ }
+
+ /*!
+ @brief serialize to stream
+ @copydoc operator<<(std::ostream&, const basic_json&)
+ */
+ friend std::ostream& operator>>(const basic_json& j, std::ostream& o)
+ {
+ return o << j;
+ }
+
+ /// @}
+
+
+ /////////////////////
+ // deserialization //
+ /////////////////////
+
+ /// @name deserialization
+ /// @{
+
+ /*!
+ @brief deserialize from string
+
+ @param[in] s string to read a serialized JSON value from
+ @param[in] cb a parser callback function of type @ref parser_callback_t
+ which is used to control the deserialization by filtering unwanted values
+ (optional)
+
+ @return result of the deserialization
+
+ @complexity Linear in the length of the input. The parser is a predictive
+ LL(1) parser. The complexity can be higher if the parser callback function
+ @a cb has a super-linear complexity.
+
+ @liveexample{The example below demonstrates the parse function with and
+ without callback function.,parse__string__parser_callback_t}
+
+ @sa parse(std::istream&, parser_callback_t) for a version that reads from
+ an input stream
+ */
+ static basic_json parse(const string_t& s, parser_callback_t cb = nullptr)
+ {
+ return parser(s, cb).parse();
+ }
+
+ /*!
+ @brief deserialize from stream
+
+ @param[in,out] i stream to read a serialized JSON value from
+ @param[in] cb a parser callback function of type @ref parser_callback_t
+ which is used to control the deserialization by filtering unwanted values
+ (optional)
+
+ @return result of the deserialization
+
+ @complexity Linear in the length of the input. The parser is a predictive
+ LL(1) parser. The complexity can be higher if the parser callback function
+ @a cb has a super-linear complexity.
+
+ @liveexample{The example below demonstrates the parse function with and
+ without callback function.,parse__istream__parser_callback_t}
+
+ @sa parse(const string_t&, parser_callback_t) for a version that reads
+ from a string
+ */
+ static basic_json parse(std::istream& i, parser_callback_t cb = nullptr)
+ {
+ return parser(i, cb).parse();
+ }
+
+ static basic_json parse(std::istream&& i, parser_callback_t cb = nullptr)
+ {
+ return parser(i, cb).parse();
+ }
+
+ /*!
+ @brief deserialize from stream
+
+ Deserializes an input stream to a JSON value.
+
+ @param[in,out] i input stream to read a serialized JSON value from
+ @param[in,out] j JSON value to write the deserialized input to
+
+ @throw std::invalid_argument in case of parse errors
+
+ @complexity Linear in the length of the input. The parser is a predictive
+ LL(1) parser.
+
+ @liveexample{The example below shows how a JSON value is constructed by
+ reading a serialization from a stream.,operator_deserialize}
+
+ @sa parse(std::istream&, parser_callback_t) for a variant with a parser
+ callback function to filter values while parsing
+ */
+ friend std::istream& operator<<(basic_json& j, std::istream& i)
+ {
+ j = parser(i).parse();
+ return i;
+ }
+
+ /*!
+ @brief deserialize from stream
+ @copydoc operator<<(basic_json&, std::istream&)
+ */
+ friend std::istream& operator>>(std::istream& i, basic_json& j)
+ {
+ j = parser(i).parse();
+ return i;
+ }
+
+ /// @}
+
+
+ private:
+ ///////////////////////////
+ // convenience functions //
+ ///////////////////////////
+
+ /// return the type as string
+ string_t type_name() const
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ {
+ return "null";
+ }
+
+ case (value_t::object):
+ {
+ return "object";
+ }
+
+ case (value_t::array):
+ {
+ return "array";
+ }
+
+ case (value_t::string):
+ {
+ return "string";
+ }
+
+ case (value_t::boolean):
+ {
+ return "boolean";
+ }
+
+ case (value_t::discarded):
+ {
+ return "discarded";
+ }
+
+ default:
+ {
+ return "number";
+ }
+ }
+ }
+
+ /*!
+ @brief calculates the extra space to escape a JSON string
+
+ @param[in] s the string to escape
+ @return the number of characters required to escape string @a s
+
+ @complexity Linear in the length of string @a s.
+ */
+ static std::size_t extra_space(const string_t& s) noexcept
+ {
+ std::size_t result = 0;
+
+ for (const auto& c : s)
+ {
+ switch (c)
+ {
+ case '"':
+ case '\\':
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ {
+ // from c (1 byte) to \x (2 bytes)
+ result += 1;
+ break;
+ }
+
+ default:
+ {
+ if (c >= 0x00 and c <= 0x1f)
+ {
+ // from c (1 byte) to \uxxxx (6 bytes)
+ result += 5;
+ }
+ break;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief escape a string
+
+ Escape a string by replacing certain special characters by a sequence of an
+ escape character (backslash) and another character and other control
+ characters by a sequence of "\u" followed by a four-digit hex
+ representation.
+
+ @param[in] s the string to escape
+ @return the escaped string
+
+ @complexity Linear in the length of string @a s.
+ */
+ static string_t escape_string(const string_t& s) noexcept
+ {
+ const auto space = extra_space(s);
+ if (space == 0)
+ {
+ return s;
+ }
+
+ // create a result string of necessary size
+ string_t result(s.size() + space, '\\');
+ std::size_t pos = 0;
+
+ for (const auto& c : s)
+ {
+ switch (c)
+ {
+ // quotation mark (0x22)
+ case '"':
+ {
+ result[pos + 1] = '"';
+ pos += 2;
+ break;
+ }
+
+ // reverse solidus (0x5c)
+ case '\\':
+ {
+ // nothing to change
+ pos += 2;
+ break;
+ }
+
+ // backspace (0x08)
+ case '\b':
+ {
+ result[pos + 1] = 'b';
+ pos += 2;
+ break;
+ }
+
+ // formfeed (0x0c)
+ case '\f':
+ {
+ result[pos + 1] = 'f';
+ pos += 2;
+ break;
+ }
+
+ // newline (0x0a)
+ case '\n':
+ {
+ result[pos + 1] = 'n';
+ pos += 2;
+ break;
+ }
+
+ // carriage return (0x0d)
+ case '\r':
+ {
+ result[pos + 1] = 'r';
+ pos += 2;
+ break;
+ }
+
+ // horizontal tab (0x09)
+ case '\t':
+ {
+ result[pos + 1] = 't';
+ pos += 2;
+ break;
+ }
+
+ default:
+ {
+ if (c >= 0x00 and c <= 0x1f)
+ {
+ // print character c as \uxxxx
+ sprintf(&result[pos + 1], "u%04x", int(c));
+ pos += 6;
+ // overwrite trailing null character
+ result[pos] = '\\';
+ }
+ else
+ {
+ // all other characters are added as-is
+ result[pos++] = c;
+ }
+ break;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief internal implementation of the serialization function
+
+ This function is called by the public member function dump and organizes
+ the serializaion internally. The indentation level is propagated as
+ additional parameter. In case of arrays and objects, the function is called
+ recursively. Note that
+
+ - strings and object keys are escaped using escape_string()
+ - integer numbers are converted implictly via operator<<
+ - floating-point numbers are converted to a string using "%g" format
+
+ @param[out] o stream to write to
+ @param[in] pretty_print whether the output shall be pretty-printed
+ @param[in] indent_step the indent level
+ @param[in] current_indent the current indent level (only used internally)
+ */
+ void dump(std::ostream& o, const bool pretty_print, const unsigned int indent_step,
+ const unsigned int current_indent = 0) const
+ {
+ // variable to hold indentation for recursive calls
+ unsigned int new_indent = current_indent;
+
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ if (m_value.object->empty())
+ {
+ o << "{}";
+ return;
+ }
+
+ o << "{";
+
+ // increase indentation
+ if (pretty_print)
+ {
+ new_indent += indent_step;
+ o << "\n";
+ }
+
+ for (auto i = m_value.object->cbegin(); i != m_value.object->cend(); ++i)
+ {
+ if (i != m_value.object->cbegin())
+ {
+ o << (pretty_print ? ",\n" : ",");
+ }
+ o << string_t(new_indent, ' ') << "\""
+ << escape_string(i->first) << "\":"
+ << (pretty_print ? " " : "");
+ i->second.dump(o, pretty_print, indent_step, new_indent);
+ }
+
+ // decrease indentation
+ if (pretty_print)
+ {
+ new_indent -= indent_step;
+ o << "\n";
+ }
+
+ o << string_t(new_indent, ' ') + "}";
+ return;
+ }
+
+ case (value_t::array):
+ {
+ if (m_value.array->empty())
+ {
+ o << "[]";
+ return;
+ }
+
+ o << "[";
+
+ // increase indentation
+ if (pretty_print)
+ {
+ new_indent += indent_step;
+ o << "\n";
+ }
+
+ for (auto i = m_value.array->cbegin(); i != m_value.array->cend(); ++i)
+ {
+ if (i != m_value.array->cbegin())
+ {
+ o << (pretty_print ? ",\n" : ",");
+ }
+ o << string_t(new_indent, ' ');
+ i->dump(o, pretty_print, indent_step, new_indent);
+ }
+
+ // decrease indentation
+ if (pretty_print)
+ {
+ new_indent -= indent_step;
+ o << "\n";
+ }
+
+ o << string_t(new_indent, ' ') << "]";
+ return;
+ }
+
+ case (value_t::string):
+ {
+ o << string_t("\"") << escape_string(*m_value.string) << "\"";
+ return;
+ }
+
+ case (value_t::boolean):
+ {
+ o << (m_value.boolean ? "true" : "false");
+ return;
+ }
+
+ case (value_t::number_integer):
+ {
+ o << m_value.number_integer;
+ return;
+ }
+
+ case (value_t::number_float):
+ {
+ // 15 digits of precision allows round-trip IEEE 754
+ // string->double->string; to be safe, we read this value from
+ // std::numeric_limits<number_float_t>::digits10
+ o << std::setprecision(std::numeric_limits<number_float_t>::digits10) << m_value.number_float;
+ return;
+ }
+
+ case (value_t::discarded):
+ {
+ o << "<discarded>";
+ return;
+ }
+
+ default:
+ {
+ o << "null";
+ return;
+ }
+ }
+ }
+
+ private:
+ //////////////////////
+ // member variables //
+ //////////////////////
+
+ /// the type of the current element
+ value_t m_type = value_t::null;
+
+ /// the value of the current element
+ json_value m_value = {};
+
+
+ private:
+ ///////////////
+ // iterators //
+ ///////////////
+
+ /*!
+ @brief an iterator for primitive JSON types
+
+ This class models an iterator for primitive JSON types (boolean, number,
+ string). It's only purpose is to allow the iterator/const_iterator classes
+ to "iterate" over primitive values. Internally, the iterator is modeled by
+ a `difference_type` variable. Value begin_value (`0`) models the begin,
+ end_value (`1`) models past the end.
+ */
+ class primitive_iterator_t
+ {
+ public:
+ /// set iterator to a defined beginning
+ void set_begin()
+ {
+ m_it = begin_value;
+ }
+
+ /// set iterator to a defined past the end
+ void set_end()
+ {
+ m_it = end_value;
+ }
+
+ /// return whether the iterator can be dereferenced
+ bool is_begin() const
+ {
+ return (m_it == begin_value);
+ }
+
+ /// return whether the iterator is at end
+ bool is_end() const
+ {
+ return (m_it == end_value);
+ }
+
+ /// return reference to the value to change and compare
+ operator difference_type& ()
+ {
+ return m_it;
+ }
+
+ /// return value to compare
+ operator difference_type () const
+ {
+ return m_it;
+ }
+
+ private:
+ static constexpr difference_type begin_value = 0;
+ static constexpr difference_type end_value = begin_value + 1;
+
+ /// iterator as signed integer type
+ difference_type m_it = std::numeric_limits<std::ptrdiff_t>::min();
+ };
+
+ /*!
+ @brief an iterator value
+
+ @note This structure could easily be a union, but MSVC currently does not
+ allow unions members with complex constructors, see
+ https://github.com/nlohmann/json/pull/105.
+ */
+ struct internal_iterator
+ {
+ /// iterator for JSON objects
+ typename object_t::iterator object_iterator;
+ /// iterator for JSON arrays
+ typename array_t::iterator array_iterator;
+ /// generic iterator for all other types
+ primitive_iterator_t primitive_iterator;
+
+ /// create an uninitialized internal_iterator
+ internal_iterator()
+ : object_iterator(), array_iterator(), primitive_iterator()
+ {}
+ };
+
+ public:
+ /*!
+ @brief a const random access iterator for the @ref basic_json class
+
+ This class implements a const iterator for the @ref basic_json class. From
+ this class, the @ref iterator class is derived.
+
+ @requirement The class satisfies the following concept requirements:
+ - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+ The iterator that can be moved to point (forward and backward) to any
+ element in constant time.
+ */
+ class const_iterator : public std::iterator<std::random_access_iterator_tag, const basic_json>
+ {
+ /// allow basic_json to access private members
+ friend class basic_json;
+
+ public:
+ /// the type of the values when the iterator is dereferenced
+ using value_type = typename basic_json::value_type;
+ /// a type to represent differences between iterators
+ using difference_type = typename basic_json::difference_type;
+ /// defines a pointer to the type iterated over (value_type)
+ using pointer = typename basic_json::const_pointer;
+ /// defines a reference to the type iterated over (value_type)
+ using reference = typename basic_json::const_reference;
+ /// the category of the iterator
+ using iterator_category = std::bidirectional_iterator_tag;
+
+ /// default constructor
+ const_iterator() = default;
+
+ /// constructor for a given JSON instance
+ const_iterator(pointer object) : m_object(object)
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = typename object_t::iterator();
+ break;
+ }
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = typename array_t::iterator();
+ break;
+ }
+ default:
+ {
+ m_it.primitive_iterator = primitive_iterator_t();
+ break;
+ }
+ }
+ }
+
+ /// copy constructor given a nonconst iterator
+ const_iterator(const iterator& other) : m_object(other.m_object)
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = other.m_it.object_iterator;
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = other.m_it.array_iterator;
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator = other.m_it.primitive_iterator;
+ break;
+ }
+ }
+ }
+
+ /// copy constructor
+ const_iterator(const const_iterator& other) noexcept
+ : m_object(other.m_object), m_it(other.m_it)
+ {}
+
+ /// copy assignment
+ const_iterator& operator=(const_iterator other) noexcept(
+ std::is_nothrow_move_constructible<pointer>::value and
+ std::is_nothrow_move_assignable<pointer>::value and
+ std::is_nothrow_move_constructible<internal_iterator>::value and
+ std::is_nothrow_move_assignable<internal_iterator>::value
+ )
+ {
+ std::swap(m_object, other.m_object);
+ std::swap(m_it, other.m_it);
+ return *this;
+ }
+
+ private:
+ /// set the iterator to the first value
+ void set_begin()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = m_object->m_value.object->begin();
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = m_object->m_value.array->begin();
+ break;
+ }
+
+ case (basic_json::value_t::null):
+ {
+ // set to end so begin()==end() is true: null is empty
+ m_it.primitive_iterator.set_end();
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator.set_begin();
+ break;
+ }
+ }
+ }
+
+ /// set the iterator past the last value
+ void set_end()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = m_object->m_value.object->end();
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = m_object->m_value.array->end();
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator.set_end();
+ break;
+ }
+ }
+ }
+
+ public:
+ /// return a reference to the value pointed to by the iterator
+ reference operator*() const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return m_it.object_iterator->second;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return *m_it.array_iterator;
+ }
+
+ case (basic_json::value_t::null):
+ {
+ throw std::out_of_range("cannot get value");
+ }
+
+ default:
+ {
+ if (m_it.primitive_iterator.is_begin())
+ {
+ return *m_object;
+ }
+ else
+ {
+ throw std::out_of_range("cannot get value");
+ }
+ }
+ }
+ }
+
+ /// dereference the iterator
+ pointer operator->() const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return &(m_it.object_iterator->second);
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return &*m_it.array_iterator;
+ }
+
+ default:
+ {
+ if (m_it.primitive_iterator.is_begin())
+ {
+ return m_object;
+ }
+ else
+ {
+ throw std::out_of_range("cannot get value");
+ }
+ }
+ }
+ }
+
+ /// post-increment (it++)
+ const_iterator operator++(int)
+ {
+ auto result = *this;
+ ++(*this);
+
+ return result;
+ }
+
+ /// pre-increment (++it)
+ const_iterator& operator++()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ ++m_it.object_iterator;
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ ++m_it.array_iterator;
+ break;
+ }
+
+ default:
+ {
+ ++m_it.primitive_iterator;
+ break;
+ }
+ }
+
+ return *this;
+ }
+
+ /// post-decrement (it--)
+ const_iterator operator--(int)
+ {
+ auto result = *this;
+ --(*this);
+
+ return result;
+ }
+
+ /// pre-decrement (--it)
+ const_iterator& operator--()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ --m_it.object_iterator;
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ --m_it.array_iterator;
+ break;
+ }
+
+ default:
+ {
+ --m_it.primitive_iterator;
+ break;
+ }
+ }
+
+ return *this;
+ }
+
+ /// comparison: equal
+ bool operator==(const const_iterator& other) const
+ {
+ // if objects are not the same, the comparison is undefined
+ if (m_object != other.m_object)
+ {
+ throw std::domain_error("cannot compare iterators of different containers");
+ }
+
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return (m_it.object_iterator == other.m_it.object_iterator);
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return (m_it.array_iterator == other.m_it.array_iterator);
+ }
+
+ default:
+ {
+ return (m_it.primitive_iterator == other.m_it.primitive_iterator);
+ }
+ }
+ }
+
+ /// comparison: not equal
+ bool operator!=(const const_iterator& other) const
+ {
+ return not operator==(other);
+ }
+
+ /// comparison: smaller
+ bool operator<(const const_iterator& other) const
+ {
+ // if objects are not the same, the comparison is undefined
+ if (m_object != other.m_object)
+ {
+ throw std::domain_error("cannot compare iterators of different containers");
+ }
+
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator< for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return (m_it.array_iterator < other.m_it.array_iterator);
+ }
+
+ default:
+ {
+ return (m_it.primitive_iterator < other.m_it.primitive_iterator);
+ }
+ }
+ }
+
+ /// comparison: less than or equal
+ bool operator<=(const const_iterator& other) const
+ {
+ return not other.operator < (*this);
+ }
+
+ /// comparison: greater than
+ bool operator>(const const_iterator& other) const
+ {
+ return not operator<=(other);
+ }
+
+ /// comparison: greater than or equal
+ bool operator>=(const const_iterator& other) const
+ {
+ return not operator<(other);
+ }
+
+ /// add to iterator
+ const_iterator& operator+=(difference_type i)
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator+= for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator += i;
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator += i;
+ break;
+ }
+ }
+
+ return *this;
+ }
+
+ /// subtract from iterator
+ const_iterator& operator-=(difference_type i)
+ {
+ return operator+=(-i);
+ }
+
+ /// add to iterator
+ const_iterator operator+(difference_type i)
+ {
+ auto result = *this;
+ result += i;
+ return result;
+ }
+
+ /// subtract from iterator
+ const_iterator operator-(difference_type i)
+ {
+ auto result = *this;
+ result -= i;
+ return result;
+ }
+
+ /// return difference
+ difference_type operator-(const const_iterator& other) const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator- for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return m_it.array_iterator - other.m_it.array_iterator;
+ }
+
+ default:
+ {
+ return m_it.primitive_iterator - other.m_it.primitive_iterator;
+ }
+ }
+ }
+
+ /// access to successor
+ reference operator[](difference_type n) const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator[] for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return *(m_it.array_iterator + n);
+ }
+
+ case (basic_json::value_t::null):
+ {
+ throw std::out_of_range("cannot get value");
+ }
+
+ default:
+ {
+ if (m_it.primitive_iterator == -n)
+ {
+ return *m_object;
+ }
+ else
+ {
+ throw std::out_of_range("cannot get value");
+ }
+ }
+ }
+ }
+
+ /// return the key of an object iterator
+ typename object_t::key_type key() const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return m_it.object_iterator->first;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use key() for non-object iterators");
+ }
+ }
+ }
+
+ /// return the value of an iterator
+ reference value() const
+ {
+ return operator*();
+ }
+
+ private:
+ /// associated JSON instance
+ pointer m_object = nullptr;
+ /// the actual iterator of the associated instance
+ internal_iterator m_it = internal_iterator();
+ };
+
+ /*!
+ @brief a mutable random access iterator for the @ref basic_json class
+
+ @requirement The class satisfies the following concept requirements:
+ - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+ The iterator that can be moved to point (forward and backward) to any
+ element in constant time.
+ - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator):
+ It is possible to write to the pointed-to element.
+ */
+ class iterator : public const_iterator
+ {
+ public:
+ using base_iterator = const_iterator;
+ using pointer = typename basic_json::pointer;
+ using reference = typename basic_json::reference;
+
+ /// default constructor
+ iterator() = default;
+
+ /// constructor for a given JSON instance
+ iterator(pointer object) noexcept : base_iterator(object)
+ {}
+
+ /// copy constructor
+ iterator(const iterator& other) noexcept
+ : base_iterator(other)
+ {}
+
+ /// copy assignment
+ iterator& operator=(iterator other) noexcept(
+ std::is_nothrow_move_constructible<pointer>::value and
+ std::is_nothrow_move_assignable<pointer>::value and
+ std::is_nothrow_move_constructible<internal_iterator>::value and
+ std::is_nothrow_move_assignable<internal_iterator>::value
+ )
+ {
+ base_iterator::operator=(other);
+ return *this;
+ }
+
+ /// return a reference to the value pointed to by the iterator
+ reference operator*()
+ {
+ return const_cast<reference>(base_iterator::operator*());
+ }
+
+ /// dereference the iterator
+ pointer operator->()
+ {
+ return const_cast<pointer>(base_iterator::operator->());
+ }
+
+ /// post-increment (it++)
+ iterator operator++(int)
+ {
+ iterator result = *this;
+ base_iterator::operator++();
+ return result;
+ }
+
+ /// pre-increment (++it)
+ iterator& operator++()
+ {
+ base_iterator::operator++();
+ return *this;
+ }
+
+ /// post-decrement (it--)
+ iterator operator--(int)
+ {
+ iterator result = *this;
+ base_iterator::operator--();
+ return result;
+ }
+
+ /// pre-decrement (--it)
+ iterator& operator--()
+ {
+ base_iterator::operator--();
+ return *this;
+ }
+
+ /// add to iterator
+ iterator& operator+=(difference_type i)
+ {
+ base_iterator::operator+=(i);
+ return *this;
+ }
+
+ /// subtract from iterator
+ iterator& operator-=(difference_type i)
+ {
+ base_iterator::operator-=(i);
+ return *this;
+ }
+
+ /// add to iterator
+ iterator operator+(difference_type i)
+ {
+ auto result = *this;
+ result += i;
+ return result;
+ }
+
+ /// subtract from iterator
+ iterator operator-(difference_type i)
+ {
+ auto result = *this;
+ result -= i;
+ return result;
+ }
+
+ difference_type operator-(const iterator& other) const
+ {
+ return base_iterator::operator-(other);
+ }
+
+ /// access to successor
+ reference operator[](difference_type n) const
+ {
+ return const_cast<reference>(base_iterator::operator[](n));
+ }
+
+ /// return the value of an iterator
+ reference value() const
+ {
+ return const_cast<reference>(base_iterator::value());
+ }
+ };
+
+ /*!
+ @brief a template for a reverse iterator class
+
+ @tparam Base the base iterator type to reverse. Valid types are @ref
+ iterator (to create @ref reverse_iterator) and @ref const_iterator (to
+ create @ref const_reverse_iterator).
+
+ @requirement The class satisfies the following concept requirements:
+ - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+ The iterator that can be moved to point (forward and backward) to any
+ element in constant time.
+ - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator):
+ It is possible to write to the pointed-to element (only if @a Base is
+ @ref iterator).
+ */
+ template<typename Base>
+ class json_reverse_iterator : public std::reverse_iterator<Base>
+ {
+ public:
+ /// shortcut to the reverse iterator adaptor
+ using base_iterator = std::reverse_iterator<Base>;
+ /// the reference type for the pointed-to element
+ using reference = typename Base::reference;
+
+ /// create reverse iterator from iterator
+ json_reverse_iterator(const typename base_iterator::iterator_type& it)
+ : base_iterator(it) {}
+
+ /// create reverse iterator from base class
+ json_reverse_iterator(const base_iterator& it) : base_iterator(it) {}
+
+ /// post-increment (it++)
+ json_reverse_iterator operator++(int)
+ {
+ return base_iterator::operator++(1);
+ }
+
+ /// pre-increment (++it)
+ json_reverse_iterator& operator++()
+ {
+ base_iterator::operator++();
+ return *this;
+ }
+
+ /// post-decrement (it--)
+ json_reverse_iterator operator--(int)
+ {
+ return base_iterator::operator--(1);
+ }
+
+ /// pre-decrement (--it)
+ json_reverse_iterator& operator--()
+ {
+ base_iterator::operator--();
+ return *this;
+ }
+
+ /// add to iterator
+ json_reverse_iterator& operator+=(difference_type i)
+ {
+ base_iterator::operator+=(i);
+ return *this;
+ }
+
+ /// add to iterator
+ json_reverse_iterator operator+(difference_type i) const
+ {
+ auto result = *this;
+ result += i;
+ return result;
+ }
+
+ /// subtract from iterator
+ json_reverse_iterator operator-(difference_type i) const
+ {
+ auto result = *this;
+ result -= i;
+ return result;
+ }
+
+ /// return difference
+ difference_type operator-(const json_reverse_iterator& other) const
+ {
+ return this->base() - other.base();
+ }
+
+ /// access to successor
+ reference operator[](difference_type n) const
+ {
+ return *(this->operator+(n));
+ }
+
+ /// return the key of an object iterator
+ typename object_t::key_type key() const
+ {
+ auto it = --this->base();
+ return it.key();
+ }
+
+ /// return the value of an iterator
+ reference value() const
+ {
+ auto it = --this->base();
+ return it.operator * ();
+ }
+ };
+
+ /*!
+ @brief wrapper to access iterator member functions in range-based for
+
+ This class allows to access @ref key() and @ref value() during range-based
+ for loops. In these loops, a reference to the JSON values is returned, so
+ there is no access to the underlying iterator.
+ */
+ class iterator_wrapper
+ {
+ private:
+ /// the container to iterate
+ basic_json& container;
+ /// the type of the iterator to use while iteration
+ using json_iterator = decltype(std::begin(container));
+
+ /// internal iterator wrapper
+ class iterator_wrapper_internal
+ {
+ private:
+ /// the iterator
+ json_iterator anchor;
+ /// an index for arrays
+ size_t array_index = 0;
+
+ public:
+ /// construct wrapper given an iterator
+ iterator_wrapper_internal(json_iterator i) : anchor(i)
+ {}
+
+ /// dereference operator (needed for range-based for)
+ iterator_wrapper_internal& operator*()
+ {
+ return *this;
+ }
+
+ /// increment operator (needed for range-based for)
+ iterator_wrapper_internal& operator++()
+ {
+ ++anchor;
+ ++array_index;
+
+ return *this;
+ }
+
+ /// inequality operator (needed for range-based for)
+ bool operator!= (const iterator_wrapper_internal& o)
+ {
+ return anchor != o.anchor;
+ }
+
+ /// stream operator
+ friend std::ostream& operator<<(std::ostream& o, const iterator_wrapper_internal& w)
+ {
+ return o << w.value();
+ }
+
+ /// return key of the iterator
+ typename basic_json::string_t key() const
+ {
+ switch (anchor.m_object->type())
+ {
+ /// use integer array index as key
+ case (value_t::array):
+ {
+ return std::to_string(array_index);
+ }
+
+ /// use key from the object
+ case (value_t::object):
+ {
+ return anchor.key();
+ }
+
+ /// use an empty key for all primitive types
+ default:
+ {
+ return "";
+ }
+ }
+ }
+
+ /// return value of the iterator
+ typename json_iterator::reference value() const
+ {
+ return anchor.value();
+ }
+ };
+
+ public:
+ /// construct iterator wrapper from a container
+ iterator_wrapper(basic_json& cont)
+ : container(cont)
+ {}
+
+ /// return iterator begin (needed for range-based for)
+ iterator_wrapper_internal begin()
+ {
+ return iterator_wrapper_internal(container.begin());
+ }
+
+ /// return iterator end (needed for range-based for)
+ iterator_wrapper_internal end()
+ {
+ return iterator_wrapper_internal(container.end());
+ }
+ };
+
+ private:
+ //////////////////////
+ // lexer and parser //
+ //////////////////////
+
+ /*!
+ @brief lexical analysis
+
+ This class organizes the lexical analysis during JSON deserialization. The
+ core of it is a scanner generated by re2c <http://re2c.org> that processes
+ a buffer and recognizes tokens according to RFC 7159.
+ */
+ class lexer
+ {
+ public:
+ /// token types for the parser
+ enum class token_type
+ {
+ uninitialized, ///< indicating the scanner is uninitialized
+ literal_true, ///< the "true" literal
+ literal_false, ///< the "false" literal
+ literal_null, ///< the "null" literal
+ value_string, ///< a string - use get_string() for actual value
+ value_number, ///< a number - use get_number() for actual value
+ begin_array, ///< the character for array begin "["
+ begin_object, ///< the character for object begin "{"
+ end_array, ///< the character for array end "]"
+ end_object, ///< the character for object end "}"
+ name_separator, ///< the name separator ":"
+ value_separator, ///< the value separator ","
+ parse_error, ///< indicating a parse error
+ end_of_input ///< indicating the end of the input buffer
+ };
+
+ /// the char type to use in the lexer
+ using lexer_char_t = unsigned char;
+
+ /// constructor with a given buffer
+ explicit lexer(const string_t& s) noexcept
+ : m_stream(nullptr), m_buffer(s)
+ {
+ m_content = reinterpret_cast<const lexer_char_t*>(s.c_str());
+ m_start = m_cursor = m_content;
+ m_limit = m_content + s.size();
+ }
+ explicit lexer(std::istream* s) noexcept
+ : m_stream(s), m_buffer()
+ {
+ getline(*m_stream, m_buffer);
+ m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
+ m_start = m_cursor = m_content;
+ m_limit = m_content + m_buffer.size();
+ }
+
+ /// default constructor
+ lexer() = default;
+
+ // switch of unwanted functions
+ lexer(const lexer&) = delete;
+ lexer operator=(const lexer&) = delete;
+
+ /*!
+ @brief create a string from a Unicode code point
+
+ @param[in] codepoint1 the code point (can be high surrogate)
+ @param[in] codepoint2 the code point (can be low surrogate or 0)
+ @return string representation of the code point
+ @throw std::out_of_range if code point is >0x10ffff
+ @throw std::invalid_argument if the low surrogate is invalid
+
+ @see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
+ */
+ static string_t to_unicode(const std::size_t codepoint1,
+ const std::size_t codepoint2 = 0)
+ {
+ string_t result;
+
+ // calculate the codepoint from the given code points
+ std::size_t codepoint = codepoint1;
+
+ // check if codepoint1 is a high surrogate
+ if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF)
+ {
+ // check if codepoint2 is a low surrogate
+ if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF)
+ {
+ codepoint =
+ // high surrogate occupies the most significant 22 bits
+ (codepoint1 << 10)
+ // low surrogate occupies the least significant 15 bits
+ + codepoint2
+ // there is still the 0xD800, 0xDC00 and 0x10000 noise
+ // in the result so we have to substract with:
+ // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
+ - 0x35FDC00;
+ }
+ else
+ {
+ throw std::invalid_argument("missing or wrong low surrogate");
+ }
+ }
+
+ if (codepoint < 0x80)
+ {
+ // 1-byte characters: 0xxxxxxx (ASCII)
+ result.append(1, static_cast<typename string_t::value_type>(codepoint));
+ }
+ else if (codepoint <= 0x7ff)
+ {
+ // 2-byte characters: 110xxxxx 10xxxxxx
+ result.append(1, static_cast<typename string_t::value_type>(0xC0 | ((codepoint >> 6) & 0x1F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+ }
+ else if (codepoint <= 0xffff)
+ {
+ // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
+ result.append(1, static_cast<typename string_t::value_type>(0xE0 | ((codepoint >> 12) & 0x0F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 6) & 0x3F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+ }
+ else if (codepoint <= 0x10ffff)
+ {
+ // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ result.append(1, static_cast<typename string_t::value_type>(0xF0 | ((codepoint >> 18) & 0x07)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 12) & 0x3F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 6) & 0x3F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+ }
+ else
+ {
+ throw std::out_of_range("code points above 0x10FFFF are invalid");
+ }
+
+ return result;
+ }
+
+ /// return name of values of type token_type
+ static std::string token_type_name(token_type t)
+ {
+ switch (t)
+ {
+ case (token_type::uninitialized):
+ return "<uninitialized>";
+ case (token_type::literal_true):
+ return "true literal";
+ case (token_type::literal_false):
+ return "false literal";
+ case (token_type::literal_null):
+ return "null literal";
+ case (token_type::value_string):
+ return "string literal";
+ case (token_type::value_number):
+ return "number literal";
+ case (token_type::begin_array):
+ return "[";
+ case (token_type::begin_object):
+ return "{";
+ case (token_type::end_array):
+ return "]";
+ case (token_type::end_object):
+ return "}";
+ case (token_type::name_separator):
+ return ":";
+ case (token_type::value_separator):
+ return ",";
+ case (token_type::end_of_input):
+ return "<end of input>";
+ default:
+ return "<parse error>";
+ }
+ }
+
+ /*!
+ This function implements a scanner for JSON. It is specified using
+ regular expressions that try to follow RFC 7159 as close as possible.
+ These regular expressions are then translated into a deterministic
+ finite automaton (DFA) by the tool re2c <http://re2c.org>. As a result,
+ the translated code for this function consists of a large block of code
+ with goto jumps.
+
+ @return the class of the next token read from the buffer
+ */
+ token_type scan() noexcept
+ {
+ // pointer for backtracking information
+ m_marker = nullptr;
+
+ // remember the begin of the token
+ m_start = m_cursor;
+
+
+ {
+ lexer_char_t yych;
+ unsigned int yyaccept = 0;
+ static const unsigned char yybm[] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 32, 32, 0, 0, 32, 0, 0,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 96, 64, 0, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 0, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ };
+
+ if ((m_limit - m_cursor) < 5)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '9')
+ {
+ if (yych <= ' ')
+ {
+ if (yych <= '\n')
+ {
+ if (yych <= 0x00)
+ {
+ goto basic_json_parser_27;
+ }
+ if (yych <= 0x08)
+ {
+ goto basic_json_parser_29;
+ }
+ if (yych >= '\n')
+ {
+ goto basic_json_parser_4;
+ }
+ }
+ else
+ {
+ if (yych == '\r')
+ {
+ goto basic_json_parser_2;
+ }
+ if (yych <= 0x1F)
+ {
+ goto basic_json_parser_29;
+ }
+ }
+ }
+ else
+ {
+ if (yych <= ',')
+ {
+ if (yych == '"')
+ {
+ goto basic_json_parser_26;
+ }
+ if (yych <= '+')
+ {
+ goto basic_json_parser_29;
+ }
+ goto basic_json_parser_14;
+ }
+ else
+ {
+ if (yych <= '-')
+ {
+ goto basic_json_parser_22;
+ }
+ if (yych <= '/')
+ {
+ goto basic_json_parser_29;
+ }
+ if (yych <= '0')
+ {
+ goto basic_json_parser_23;
+ }
+ goto basic_json_parser_25;
+ }
+ }
+ }
+ else
+ {
+ if (yych <= 'm')
+ {
+ if (yych <= '\\')
+ {
+ if (yych <= ':')
+ {
+ goto basic_json_parser_16;
+ }
+ if (yych == '[')
+ {
+ goto basic_json_parser_6;
+ }
+ goto basic_json_parser_29;
+ }
+ else
+ {
+ if (yych <= ']')
+ {
+ goto basic_json_parser_8;
+ }
+ if (yych == 'f')
+ {
+ goto basic_json_parser_21;
+ }
+ goto basic_json_parser_29;
+ }
+ }
+ else
+ {
+ if (yych <= 'z')
+ {
+ if (yych <= 'n')
+ {
+ goto basic_json_parser_18;
+ }
+ if (yych == 't')
+ {
+ goto basic_json_parser_20;
+ }
+ goto basic_json_parser_29;
+ }
+ else
+ {
+ if (yych <= '{')
+ {
+ goto basic_json_parser_10;
+ }
+ if (yych == '}')
+ {
+ goto basic_json_parser_12;
+ }
+ goto basic_json_parser_29;
+ }
+ }
+ }
+basic_json_parser_2:
+ ++m_cursor;
+ yych = *m_cursor;
+ goto basic_json_parser_5;
+basic_json_parser_3:
+ {
+ return scan();
+ }
+basic_json_parser_4:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+basic_json_parser_5:
+ if (yybm[0 + yych] & 32)
+ {
+ goto basic_json_parser_4;
+ }
+ goto basic_json_parser_3;
+basic_json_parser_6:
+ ++m_cursor;
+ {
+ return token_type::begin_array;
+ }
+basic_json_parser_8:
+ ++m_cursor;
+ {
+ return token_type::end_array;
+ }
+basic_json_parser_10:
+ ++m_cursor;
+ {
+ return token_type::begin_object;
+ }
+basic_json_parser_12:
+ ++m_cursor;
+ {
+ return token_type::end_object;
+ }
+basic_json_parser_14:
+ ++m_cursor;
+ {
+ return token_type::value_separator;
+ }
+basic_json_parser_16:
+ ++m_cursor;
+ {
+ return token_type::name_separator;
+ }
+basic_json_parser_18:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych == 'u')
+ {
+ goto basic_json_parser_59;
+ }
+basic_json_parser_19:
+ {
+ return token_type::parse_error;
+ }
+basic_json_parser_20:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych == 'r')
+ {
+ goto basic_json_parser_55;
+ }
+ goto basic_json_parser_19;
+basic_json_parser_21:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych == 'a')
+ {
+ goto basic_json_parser_50;
+ }
+ goto basic_json_parser_19;
+basic_json_parser_22:
+ yych = *++m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_19;
+ }
+ if (yych <= '0')
+ {
+ goto basic_json_parser_49;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_40;
+ }
+ goto basic_json_parser_19;
+basic_json_parser_23:
+ yyaccept = 1;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 'D')
+ {
+ if (yych == '.')
+ {
+ goto basic_json_parser_42;
+ }
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ }
+basic_json_parser_24:
+ {
+ return token_type::value_number;
+ }
+basic_json_parser_25:
+ yyaccept = 1;
+ yych = *(m_marker = ++m_cursor);
+ goto basic_json_parser_41;
+basic_json_parser_26:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 0x0F)
+ {
+ goto basic_json_parser_19;
+ }
+ goto basic_json_parser_31;
+basic_json_parser_27:
+ ++m_cursor;
+ {
+ return token_type::end_of_input;
+ }
+basic_json_parser_29:
+ yych = *++m_cursor;
+ goto basic_json_parser_19;
+basic_json_parser_30:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+basic_json_parser_31:
+ if (yybm[0 + yych] & 64)
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= 0x0F)
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '"')
+ {
+ goto basic_json_parser_34;
+ }
+ goto basic_json_parser_33;
+basic_json_parser_32:
+ m_cursor = m_marker;
+ if (yyaccept == 0)
+ {
+ goto basic_json_parser_19;
+ }
+ else
+ {
+ goto basic_json_parser_24;
+ }
+basic_json_parser_33:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= 'e')
+ {
+ if (yych <= '/')
+ {
+ if (yych == '"')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= '.')
+ {
+ goto basic_json_parser_32;
+ }
+ goto basic_json_parser_30;
+ }
+ else
+ {
+ if (yych <= '\\')
+ {
+ if (yych <= '[')
+ {
+ goto basic_json_parser_32;
+ }
+ goto basic_json_parser_30;
+ }
+ else
+ {
+ if (yych == 'b')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ }
+ }
+ else
+ {
+ if (yych <= 'q')
+ {
+ if (yych <= 'f')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych == 'n')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ else
+ {
+ if (yych <= 's')
+ {
+ if (yych <= 'r')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ else
+ {
+ if (yych <= 't')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= 'u')
+ {
+ goto basic_json_parser_36;
+ }
+ goto basic_json_parser_32;
+ }
+ }
+ }
+basic_json_parser_34:
+ ++m_cursor;
+ {
+ return token_type::value_string;
+ }
+basic_json_parser_36:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_37;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= 'g')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+basic_json_parser_37:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_38;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= 'g')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+basic_json_parser_38:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_39;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= 'g')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+basic_json_parser_39:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= 'f')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+basic_json_parser_40:
+ yyaccept = 1;
+ m_marker = ++m_cursor;
+ if ((m_limit - m_cursor) < 3)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+basic_json_parser_41:
+ if (yybm[0 + yych] & 128)
+ {
+ goto basic_json_parser_40;
+ }
+ if (yych <= 'D')
+ {
+ if (yych != '.')
+ {
+ goto basic_json_parser_24;
+ }
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ goto basic_json_parser_24;
+ }
+basic_json_parser_42:
+ yych = *++m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_47;
+ }
+ goto basic_json_parser_32;
+basic_json_parser_43:
+ yych = *++m_cursor;
+ if (yych <= ',')
+ {
+ if (yych != '+')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= '-')
+ {
+ goto basic_json_parser_44;
+ }
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_45;
+ }
+ goto basic_json_parser_32;
+ }
+basic_json_parser_44:
+ yych = *++m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+basic_json_parser_45:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_24;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_45;
+ }
+ goto basic_json_parser_24;
+basic_json_parser_47:
+ yyaccept = 1;
+ m_marker = ++m_cursor;
+ if ((m_limit - m_cursor) < 3)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= 'D')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_24;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_47;
+ }
+ goto basic_json_parser_24;
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ goto basic_json_parser_24;
+ }
+basic_json_parser_49:
+ yyaccept = 1;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 'D')
+ {
+ if (yych == '.')
+ {
+ goto basic_json_parser_42;
+ }
+ goto basic_json_parser_24;
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ goto basic_json_parser_24;
+ }
+basic_json_parser_50:
+ yych = *++m_cursor;
+ if (yych != 'l')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 's')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 'e')
+ {
+ goto basic_json_parser_32;
+ }
+ ++m_cursor;
+ {
+ return token_type::literal_false;
+ }
+basic_json_parser_55:
+ yych = *++m_cursor;
+ if (yych != 'u')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 'e')
+ {
+ goto basic_json_parser_32;
+ }
+ ++m_cursor;
+ {
+ return token_type::literal_true;
+ }
+basic_json_parser_59:
+ yych = *++m_cursor;
+ if (yych != 'l')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 'l')
+ {
+ goto basic_json_parser_32;
+ }
+ ++m_cursor;
+ {
+ return token_type::literal_null;
+ }
+ }
+
+
+ }
+
+ /// append data from the stream to the internal buffer
+ void yyfill() noexcept
+ {
+ if (not m_stream or not * m_stream)
+ {
+ return;
+ }
+
+ const ssize_t offset_start = m_start - m_content;
+ const ssize_t offset_marker = m_marker - m_start;
+ const ssize_t offset_cursor = m_cursor - m_start;
+
+ m_buffer.erase(0, static_cast<size_t>(offset_start));
+ std::string line;
+ std::getline(*m_stream, line);
+ m_buffer += "\n" + line; // add line with newline symbol
+
+ m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
+ m_start = m_content;
+ m_marker = m_start + offset_marker;
+ m_cursor = m_start + offset_cursor;
+ m_limit = m_start + m_buffer.size() - 1;
+ }
+
+ /// return string representation of last read token
+ string_t get_token() const noexcept
+ {
+ return string_t(reinterpret_cast<typename string_t::const_pointer>(m_start),
+ static_cast<size_t>(m_cursor - m_start));
+ }
+
+ /*!
+ @brief return string value for string tokens
+
+ The function iterates the characters between the opening and closing
+ quotes of the string value. The complete string is the range
+ [m_start,m_cursor). Consequently, we iterate from m_start+1 to
+ m_cursor-1.
+
+ We differentiate two cases:
+
+ 1. Escaped characters. In this case, a new character is constructed
+ according to the nature of the escape. Some escapes create new
+ characters (e.g., @c "\\n" is replaced by @c "\n"), some are copied
+ as is (e.g., @c "\\\\"). Furthermore, Unicode escapes of the shape
+ @c "\\uxxxx" need special care. In this case, to_unicode takes care
+ of the construction of the values.
+ 2. Unescaped characters are copied as is.
+
+ @return string value of current token without opening and closing quotes
+ @throw std::out_of_range if to_unicode fails
+ */
+ string_t get_string() const
+ {
+ string_t result;
+ result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
+
+ // iterate the result between the quotes
+ for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i)
+ {
+ // process escaped characters
+ if (*i == '\\')
+ {
+ // read next character
+ ++i;
+
+ switch (*i)
+ {
+ // the default escapes
+ case 't':
+ {
+ result += "\t";
+ break;
+ }
+ case 'b':
+ {
+ result += "\b";
+ break;
+ }
+ case 'f':
+ {
+ result += "\f";
+ break;
+ }
+ case 'n':
+ {
+ result += "\n";
+ break;
+ }
+ case 'r':
+ {
+ result += "\r";
+ break;
+ }
+ case '\\':
+ {
+ result += "\\";
+ break;
+ }
+ case '/':
+ {
+ result += "/";
+ break;
+ }
+ case '"':
+ {
+ result += "\"";
+ break;
+ }
+
+ // unicode
+ case 'u':
+ {
+ // get code xxxx from uxxxx
+ auto codepoint = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>(i + 1),
+ 4).c_str(), nullptr, 16);
+
+ // check if codepoint is a high surrogate
+ if (codepoint >= 0xD800 and codepoint <= 0xDBFF)
+ {
+ // make sure there is a subsequent unicode
+ if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u')
+ {
+ throw std::invalid_argument("missing low surrogate");
+ }
+
+ // get code yyyy from uxxxx\uyyyy
+ auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
+ (i + 7), 4).c_str(), nullptr, 16);
+ result += to_unicode(codepoint, codepoint2);
+ // skip the next 11 characters (xxxx\uyyyy)
+ i += 11;
+ }
+ else
+ {
+ // add unicode character(s)
+ result += to_unicode(codepoint);
+ // skip the next four characters (xxxx)
+ i += 4;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ // all other characters are just copied to the end of the
+ // string
+ result.append(1, static_cast<typename string_t::value_type>(*i));
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief return number value for number tokens
+
+ This function translates the last token into a floating point number.
+ The pointer m_start points to the beginning of the parsed number. We
+ pass this pointer to std::strtod which sets endptr to the first
+ character past the converted number. If this pointer is not the same as
+ m_cursor, then either more or less characters have been used during the
+ comparison. This can happen for inputs like "01" which will be treated
+ like number 0 followed by number 1.
+
+ @return the result of the number conversion or NAN if the conversion
+ read past the current token. The latter case needs to be treated by the
+ caller function.
+
+ @throw std::range_error if passed value is out of range
+ */
+ long double get_number() const
+ {
+ // conversion
+ typename string_t::value_type* endptr;
+ const auto float_val = std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start),
+ &endptr);
+
+ // return float_val if the whole number was translated and NAN
+ // otherwise
+ return (reinterpret_cast<lexer_char_t*>(endptr) == m_cursor) ? float_val : NAN;
+ }
+
+ private:
+ /// optional input stream
+ std::istream* m_stream;
+ /// the buffer
+ string_t m_buffer;
+ /// the buffer pointer
+ const lexer_char_t* m_content = nullptr;
+ /// pointer to the beginning of the current symbol
+ const lexer_char_t* m_start = nullptr;
+ /// pointer for backtracking information
+ const lexer_char_t* m_marker = nullptr;
+ /// pointer to the current symbol
+ const lexer_char_t* m_cursor = nullptr;
+ /// pointer to the end of the buffer
+ const lexer_char_t* m_limit = nullptr;
+ };
+
+ /*!
+ @brief syntax analysis
+ */
+ class parser
+ {
+ public:
+ /// constructor for strings
+ parser(const string_t& s, parser_callback_t cb = nullptr)
+ : callback(cb), m_lexer(s)
+ {
+ // read first token
+ get_token();
+ }
+
+ /// a parser reading from an input stream
+ parser(std::istream& _is, parser_callback_t cb = nullptr)
+ : callback(cb), m_lexer(&_is)
+ {
+ // read first token
+ get_token();
+ }
+
+ /// public parser interface
+ basic_json parse()
+ {
+ basic_json result = parse_internal(true);
+
+ expect(lexer::token_type::end_of_input);
+
+ // return parser result and replace it with null in case the
+ // top-level value was discarded by the callback function
+ return result.is_discarded() ? basic_json() : result;
+ }
+
+ private:
+ /// the actual parser
+ basic_json parse_internal(bool keep)
+ {
+ auto result = basic_json(value_t::discarded);
+
+ switch (last_token)
+ {
+ case (lexer::token_type::begin_object):
+ {
+ if (keep and (not callback or (keep = callback(depth++, parse_event_t::object_start, result))))
+ {
+ // explicitly set result to object to cope with {}
+ result.m_type = value_t::object;
+ result.m_value = json_value(value_t::object);
+ }
+
+ // read next token
+ get_token();
+
+ // closing } -> we are done
+ if (last_token == lexer::token_type::end_object)
+ {
+ get_token();
+ if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+ return result;
+ }
+
+ // no comma is expected here
+ unexpect(lexer::token_type::value_separator);
+
+ // otherwise: parse key-value pairs
+ do
+ {
+ // ugly, but could be fixed with loop reorganization
+ if (last_token == lexer::token_type::value_separator)
+ {
+ get_token();
+ }
+
+ // store key
+ expect(lexer::token_type::value_string);
+ const auto key = m_lexer.get_string();
+
+ bool keep_tag = false;
+ if (keep)
+ {
+ if (callback)
+ {
+ basic_json k(key);
+ keep_tag = callback(depth, parse_event_t::key, k);
+ }
+ else
+ {
+ keep_tag = true;
+ }
+ }
+
+ // parse separator (:)
+ get_token();
+ expect(lexer::token_type::name_separator);
+
+ // parse and add value
+ get_token();
+ auto value = parse_internal(keep);
+ if (keep and keep_tag and not value.is_discarded())
+ {
+ result[key] = std::move(value);
+ }
+ }
+ while (last_token == lexer::token_type::value_separator);
+
+ // closing }
+ expect(lexer::token_type::end_object);
+ get_token();
+ if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+
+ return result;
+ }
+
+ case (lexer::token_type::begin_array):
+ {
+ if (keep and (not callback or (keep = callback(depth++, parse_event_t::array_start, result))))
+ {
+ // explicitly set result to object to cope with []
+ result.m_type = value_t::array;
+ result.m_value = json_value(value_t::array);
+ }
+
+ // read next token
+ get_token();
+
+ // closing ] -> we are done
+ if (last_token == lexer::token_type::end_array)
+ {
+ get_token();
+ if (callback and not callback(--depth, parse_event_t::array_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+ return result;
+ }
+
+ // no comma is expected here
+ unexpect(lexer::token_type::value_separator);
+
+ // otherwise: parse values
+ do
+ {
+ // ugly, but could be fixed with loop reorganization
+ if (last_token == lexer::token_type::value_separator)
+ {
+ get_token();
+ }
+
+ // parse value
+ auto value = parse_internal(keep);
+ if (keep and not value.is_discarded())
+ {
+ result.push_back(std::move(value));
+ }
+ }
+ while (last_token == lexer::token_type::value_separator);
+
+ // closing ]
+ expect(lexer::token_type::end_array);
+ get_token();
+ if (keep and callback and not callback(--depth, parse_event_t::array_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+
+ return result;
+ }
+
+ case (lexer::token_type::literal_null):
+ {
+ get_token();
+ result.m_type = value_t::null;
+ break;
+ }
+
+ case (lexer::token_type::value_string):
+ {
+ const auto s = m_lexer.get_string();
+ get_token();
+ result = basic_json(s);
+ break;
+ }
+
+ case (lexer::token_type::literal_true):
+ {
+ get_token();
+ result.m_type = value_t::boolean;
+ result.m_value = true;
+ break;
+ }
+
+ case (lexer::token_type::literal_false):
+ {
+ get_token();
+ result.m_type = value_t::boolean;
+ result.m_value = false;
+ break;
+ }
+
+ case (lexer::token_type::value_number):
+ {
+ auto float_val = m_lexer.get_number();
+
+ // NAN is returned if token could not be translated
+ // completely
+ if (std::isnan(float_val))
+ {
+ throw std::invalid_argument(std::string("parse error - ") +
+ m_lexer.get_token() + " is not a number");
+ }
+
+ get_token();
+
+ // check if conversion loses precision
+ const auto int_val = static_cast<number_integer_t>(float_val);
+ if (approx(float_val, static_cast<long double>(int_val)))
+ {
+ // we basic_json not lose precision -> return int
+ result.m_type = value_t::number_integer;
+ result.m_value = int_val;
+ }
+ else
+ {
+ // we would lose precision -> returnfloat
+ result.m_type = value_t::number_float;
+ result.m_value = static_cast<number_float_t>(float_val);
+ }
+ break;
+ }
+
+ default:
+ {
+ // the last token was unexpected
+ unexpect(last_token);
+ }
+ }
+
+ if (keep and callback and not callback(depth, parse_event_t::value, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+ return result;
+ }
+
+ /// get next token from lexer
+ typename lexer::token_type get_token()
+ {
+ last_token = m_lexer.scan();
+ return last_token;
+ }
+
+ void expect(typename lexer::token_type t) const
+ {
+ if (t != last_token)
+ {
+ std::string error_msg = "parse error - unexpected \'";
+ error_msg += m_lexer.get_token();
+ error_msg += "\' (" + lexer::token_type_name(last_token);
+ error_msg += "); expected " + lexer::token_type_name(t);
+ throw std::invalid_argument(error_msg);
+ }
+ }
+
+ void unexpect(typename lexer::token_type t) const
+ {
+ if (t == last_token)
+ {
+ std::string error_msg = "parse error - unexpected \'";
+ error_msg += m_lexer.get_token();
+ error_msg += "\' (";
+ error_msg += lexer::token_type_name(last_token) + ")";
+ throw std::invalid_argument(error_msg);
+ }
+ }
+
+ private:
+ /// current level of recursion
+ int depth = 0;
+ /// callback function
+ parser_callback_t callback;
+ /// the type of the last read token
+ typename lexer::token_type last_token = lexer::token_type::uninitialized;
+ /// the lexer
+ lexer m_lexer;
+ };
+};
+
+
+/////////////
+// presets //
+/////////////
+
+/*!
+ at brief default JSON class
+
+This type is the default specialization of the @ref basic_json class which uses
+the standard template types.
+*/
+using json = basic_json<>;
+}
+
+
+/////////////////////////
+// nonmember functions //
+/////////////////////////
+
+// specialization of std::swap, and std::hash
+namespace std
+{
+/*!
+ at brief exchanges the values of two JSON objects
+*/
+template <>
+inline void swap(nlohmann::json& j1,
+ nlohmann::json& j2) noexcept(
+ is_nothrow_move_constructible<nlohmann::json>::value and
+ is_nothrow_move_assignable<nlohmann::json>::value
+ )
+{
+ j1.swap(j2);
+}
+
+/// hash value for JSON objects
+template <>
+struct hash<nlohmann::json>
+{
+ /// return a hash value for a JSON object
+ std::size_t operator()(const nlohmann::json& j) const
+ {
+ // a naive hashing via the string representation
+ const auto& h = hash<nlohmann::json::string_t>();
+ return h(j.dump());
+ }
+};
+}
+
+/*!
+ at brief user-defined string literal for JSON values
+
+This operator implements a user-defined string literal for JSON objects. It can
+be used by adding \p "_json" to a string literal and returns a JSON object if
+no parse error occurred.
+
+ at param[in] s a string representation of a JSON object
+ at return a JSON object
+*/
+inline nlohmann::json operator "" _json(const char* s, std::size_t)
+{
+ return nlohmann::json::parse(reinterpret_cast<nlohmann::json::string_t::value_type*>
+ (const_cast<char*>(s)));
+}
+
+#endif
diff --git a/tools/pbindex/src/main.cpp b/tools/pbindexdump/src/main.cpp
similarity index 51%
copy from tools/pbindex/src/main.cpp
copy to tools/pbindexdump/src/main.cpp
index 59065fa..a6aefc6 100644
--- a/tools/pbindex/src/main.cpp
+++ b/tools/pbindexdump/src/main.cpp
@@ -35,32 +35,50 @@
// Author: Derek Barnett
-#include "OptionParser.h"
-#include "PbIndex.h"
-#include "PbIndexVersion.h"
+#include "../common/OptionParser.h"
+#include "PbIndexDump.h"
+#include "PbIndexDumpVersion.h"
+#include "Settings.h"
#include <cassert>
#include <iostream>
using namespace std;
static
-pbindex::Settings fromCommandLine(optparse::OptionParser& parser,
- int argc, char* argv[])
+pbindexdump::Settings fromCommandLine(optparse::OptionParser& parser,
+ int argc,
+ char* argv[])
{
const optparse::Values options = parser.parse_args(argc, argv);
- (void)options;
+ pbindexdump::Settings settings;
- pbindex::Settings settings;
-
- // get input filename
+ // input
const vector<string> positionalArgs = parser.args();
const size_t numPositionalArgs = positionalArgs.size();
if (numPositionalArgs == 0)
- settings.errors_.push_back("pbindex requires an input BAM filename");
+ settings.inputPbiFilename_ = "-"; // stdin
else if (numPositionalArgs == 1)
- settings.inputBamFilename_ = parser.args().front();
+ settings.inputPbiFilename_ = parser.args().front();
else {
assert(numPositionalArgs > 1);
- settings.errors_.push_back("pbindex does not support more than one input file per run");
+ settings.errors_.push_back("pbindexdump does not support more than one input file per run");
+ }
+
+ // output format
+ if (options.is_set("format"))
+ settings.format_ = options["format"];
+
+ // JSON options
+ if (settings.format_ == "json") {
+ if (options.is_set("json_indent_level"))
+ settings.jsonIndentLevel_ = options.get("json_indent_level");
+ if (options.is_set("json_raw"))
+ settings.jsonRaw_ = options.get("json_raw");
+ } else {
+ if (options.is_set("json_indent_level") ||
+ options.is_set("json_raw"))
+ {
+ settings.errors_.push_back("JSON formatting options not valid on non-JSON output");
+ }
}
return settings;
@@ -70,12 +88,10 @@ int main(int argc, char* argv[])
{
// setup help & options
optparse::OptionParser parser;
- parser.description("pbindex creates a index file that enables random-access to PacBio-specific data in BAM files. "
- "Generated index filename will be the same as input BAM plus .pbi suffix."
- );
- parser.prog("pbindex");
- parser.usage("pbindex <input>");
- parser.version(pbindex::Version);
+ parser.description("pbindexdump prints a human-readable view of PBI data to stdout.");
+ parser.prog("pbindexdump");
+ parser.usage("pbindexdump [options] [input]");
+ parser.version(pbindexdump::Version);
parser.add_version_option(true);
parser.add_help_option(true);
@@ -83,11 +99,32 @@ int main(int argc, char* argv[])
ioGroup.add_option("")
.dest("input")
.metavar("input")
- .help("Input BAM file");
+ .help("Input PBI file. If not provided, stdin will be used as input.");
+ ioGroup.add_option("--format")
+ .dest("format")
+ .metavar("STRING")
+ .help("Output format, one of:\n"
+ " json, cpp\n\n"
+ "json: pretty-printed JSON [default]\n\n"
+ "cpp: copy/paste-able C++ code that can be used to construct the"
+ " equivalent PacBio::BAM::PbiRawData object");
parser.add_option_group(ioGroup);
+ auto jsonGroup = optparse::OptionGroup(parser, "JSON Formatting");
+ jsonGroup.add_option("--json-indent-level")
+ .dest("json_indent_level")
+ .metavar("INT")
+ .help("JSON indent level [4]");
+ jsonGroup.add_option("--json-raw")
+ .dest("json_raw")
+ .action("store_true")
+ .help("Prints fields in a manner that more closely reflects the PBI"
+ " file format - presenting data as per-field columns, not"
+ " per-record objects.");
+ parser.add_option_group(jsonGroup);
+
// parse command line for settings
- const pbindex::Settings settings = fromCommandLine(parser, argc, argv);
+ const pbindexdump::Settings settings = fromCommandLine(parser, argc, argv);
if (!settings.errors_.empty()) {
cerr << endl;
for (const auto e : settings.errors_)
@@ -98,5 +135,12 @@ int main(int argc, char* argv[])
}
// run tool
- return pbindex::PbIndex::Run(settings);
+ try {
+ pbindexdump::PbIndexDump::Run(settings);
+ return EXIT_SUCCESS;
+ }
+ catch (std::exception& e) {
+ cerr << "ERROR: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
}
diff --git a/tools/pbmerge/CMakeLists.txt b/tools/pbmerge/CMakeLists.txt
new file mode 100644
index 0000000..c9728d7
--- /dev/null
+++ b/tools/pbmerge/CMakeLists.txt
@@ -0,0 +1,36 @@
+
+set(PbmergeSrcDir ${PacBioBAM_ToolsDir}/pbmerge/src)
+
+# create version header
+set(PbMerge_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${PbmergeSrcDir}/PbMergeVersion.h.in PbMergeVersion.h @ONLY
+)
+
+# list source files
+set(PBMERGE_SOURCES
+ ${ToolsCommonDir}/BamFileMerger.h
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${PbmergeSrcDir}/main.cpp
+)
+
+# build pbmerge executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET pbmerge
+ SOURCES ${PBMERGE_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+ add_test(
+ NAME pbmerge_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${PacBioBAM_CramTestsDir}/pbmerge_pacbio_ordering.t
+ ${PacBioBAM_CramTestsDir}/pbmerge_aligned_ordering.t
+ ${PacBioBAM_CramTestsDir}/pbmerge_mixed_ordering.t
+ ${PacBioBAM_CramTestsDir}/pbmerge_dataset.t
+ ${PacBioBAM_CramTestsDir}/pbmerge_fofn.t
+ )
+endif()
diff --git a/src/Config.cpp b/tools/pbmerge/src/PbMergeVersion.h.in
similarity index 86%
copy from src/Config.cpp
copy to tools/pbmerge/src/PbMergeVersion.h.in
index 677ad08..2bda4f0 100644
--- a/src/Config.cpp
+++ b/tools/pbmerge/src/PbMergeVersion.h.in
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -35,14 +35,15 @@
// Author: Derek Barnett
-#include "pbbam/Config.h"
-using namespace PacBio;
-using namespace PacBio::BAM;
+#ifndef PBMERGEVERSION_H
+#define PBMERGEVERSION_H
-namespace PacBio {
-namespace BAM {
+#include <string>
-int HtslibVerbosity = 0;
+namespace pbmerge {
-} // namespace BAM
-} // namespace PacBio
+const std::string Version = std::string("@PbMerge_VERSION@");
+
+} // namespace pbmerge
+
+#endif // PBMERGEVERSION_H
diff --git a/tools/pbmerge/src/main.cpp b/tools/pbmerge/src/main.cpp
new file mode 100644
index 0000000..3056dc1
--- /dev/null
+++ b/tools/pbmerge/src/main.cpp
@@ -0,0 +1,174 @@
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../common/OptionParser.h"
+#include "../common/BamFileMerger.h"
+#include "PbMergeVersion.h"
+#include <cassert>
+#include <iostream>
+using namespace std;
+
+namespace pbmerge {
+
+class Settings
+{
+public:
+ static Settings FromCommandLine(optparse::OptionParser& parser,
+ int argc, char* argv[])
+ {
+ pbmerge::Settings settings;
+ const optparse::Values options = parser.parse_args(argc, argv);
+
+ // input
+ const vector<string> positionalArgs = parser.args();
+ if (positionalArgs.empty())
+ settings.errors_.push_back("at least input one file must be specified");
+ else
+ settings.inputFilenames_ = positionalArgs;
+
+ // output
+ if (options.is_set("output"))
+ settings.outputFilename_ = options["output"];
+ else
+ settings.outputFilename_ = "-"; // stdout
+
+ // PBI?
+ if (settings.outputFilename_ == "-")
+ settings.createPbi_ = false; // always skip PBI if writing to stdout
+ else {
+ if (options.is_set("no_pbi"))
+ settings.createPbi_ = !options.get("no_pbi"); // user-disabled
+ else
+ settings.createPbi_ = true; // not specified, go ahead and generate by default
+ }
+
+ return settings;
+ }
+
+public:
+ std::vector<std::string> inputFilenames_;
+ std::string outputFilename_;
+ bool createPbi_;
+ std::vector<std::string> errors_;
+
+private:
+ Settings(void) { }
+};
+
+} // namespace pbmerge
+
+int main(int argc, char* argv[])
+{
+ // setup help & options
+ optparse::OptionParser parser;
+ parser.description("pbmerge merges PacBio BAM files. If the input is DataSetXML, "
+ "any filters will be applied. If no output filename is specified, "
+ "new BAM will be written to stdout."
+ );
+ parser.prog("pbmerge");
+ parser.usage("pbmerge [options] [-o <out.bam>] <INPUT>");
+ parser.version(pbmerge::Version);
+ parser.add_version_option(true);
+ parser.add_help_option(true);
+
+ auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
+ ioGroup.add_option("-o")
+ .dest("output")
+ .metavar("output")
+ .help("Output BAM filename. ");
+ ioGroup.add_option("--no-pbi")
+ .dest("no_pbi")
+ .action("store_true")
+ .help("Set this option to skip PBI index file creation. PBI creation is "
+ "automatically skipped if no output filename is provided."
+ );
+ ioGroup.add_option("")
+ .dest("input")
+ .metavar("INPUT")
+ .help("Input may be one of:\n"
+ " DataSetXML, list of BAM files, or FOFN\n\n"
+ " fofn: pbmerge -o merged.bam bams.fofn\n\n"
+ " bams: pbmerge -o merged.bam 1.bam 2.bam 3.bam\n\n"
+ " xml: pbmerge -o merged.bam foo.subreadset.xml\n\n"
+ );
+ parser.add_option_group(ioGroup);
+
+ // parse command line for settings
+ const pbmerge::Settings settings = pbmerge::Settings::FromCommandLine(parser, argc, argv);
+ if (!settings.errors_.empty()) {
+ cerr << endl;
+ for (const auto e : settings.errors_)
+ cerr << "ERROR: " << e << endl;
+ cerr << endl;
+ parser.print_help();
+ return EXIT_FAILURE;
+ }
+
+ // run tool
+ try {
+ // setup our @PG entry to add to header
+ PacBio::BAM::ProgramInfo mergeProgram;
+ mergeProgram.Id(string("pbmerge-")+pbmerge::Version)
+ .Name("pbmerge")
+ .Version(pbmerge::Version);
+
+ PacBio::BAM::DataSet dataset;
+ if (settings.inputFilenames_.size() == 1)
+ dataset = PacBio::BAM::DataSet(settings.inputFilenames_.front());
+ else
+ dataset = PacBio::BAM::DataSet(settings.inputFilenames_);
+
+
+ PacBio::BAM::common::BamFileMerger::Merge(dataset,
+ settings.outputFilename_,
+ mergeProgram,
+ settings.createPbi_);
+
+
+// PacBio::BAM::common::BamFileMerger merger(dataset,
+// settings.outputFilename_,
+// mergeProgram,
+// settings.createPbi_);
+// merger.Merge();
+
+ return EXIT_SUCCESS;
+ }
+ catch (std::exception& e) {
+ cerr << "ERROR: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbbam.git
More information about the debian-med-commit
mailing list