[med-svn] [Git][med-team/pbbam][master] 6 commits: New upstream version 0.19.0+dfsg

Andreas Tille gitlab at salsa.debian.org
Wed Oct 10 12:22:39 BST 2018


Andreas Tille pushed to branch master at Debian Med / pbbam


Commits:
25ba6fde by Andreas Tille at 2018-10-10T09:47:15Z
New upstream version 0.19.0+dfsg
- - - - -
dd9916ac by Andreas Tille at 2018-10-10T09:47:18Z
Update upstream source from tag 'upstream/0.19.0+dfsg'

Update to upstream version '0.19.0+dfsg'
with Debian dir 656052e8dac14b2e30c45eb0b1696ab0d376c2b4
- - - - -
9d7f5f33 by Andreas Tille at 2018-10-10T09:47:18Z
New upstream version

- - - - -
593fd0e9 by Andreas Tille at 2018-10-10T10:03:04Z
Fix samtools call in tests

- - - - -
a8f11402 by Andreas Tille at 2018-10-10T10:44:53Z
Bump (fake) SOVERSION (=release version)

- - - - -
c0a82780 by Andreas Tille at 2018-10-10T10:55:12Z
Upload to unstable

- - - - -


30 changed files:

- CHANGELOG.md
- CMakeLists.txt
- debian/changelog
- debian/control
- debian/rules
- include/pbbam/BamWriter.h
- include/pbbam/Compare.h
- include/pbbam/DataSet.h
- include/pbbam/DataSetTypes.h
- include/pbbam/PbiFilterTypes.h
- include/pbbam/internal/PbiFilterTypes.inl
- meson.build
- scripts/ci/test.sh
- src/BamRecordImpl.cpp
- src/BamWriter.cpp
- src/ChemistryTable.cpp
- src/Compare.cpp
- src/DataSet.cpp
- src/DataSetTypes.cpp
- src/DataSetXsd.cpp
- src/FileProducer.h
- src/PbiFilter.cpp
- src/PbiFilterTypes.cpp
- + tests/data/long-cigar-1.7.bam
- − tests/data/long-cigar.bam
- tests/meson.build
- tests/src/cram/pbbamify.t.in
- tests/src/test_BamWriter.cpp
- tests/src/test_LongCigar.cpp
- tools/meson.build


Changes:

=====================================
CHANGELOG.md
=====================================
@@ -10,6 +10,11 @@ guarantees will be maintained within each major version series.
 
 ## Active
 
+## [0.19.0] - 2018-09-11
+
+### Added
+ - TranscriptAlignmentSet to XML support
+
 ## [0.17.0] - 2018-03-18
 
 ### Added


=====================================
CMakeLists.txt
=====================================
@@ -3,7 +3,7 @@
 ########################################################################
 
 cmake_policy(SET CMP0048 NEW)  # lets us set version in project()
-project(PacBioBAM VERSION 0.17.0 LANGUAGES CXX C)
+project(PacBioBAM VERSION 0.19.0 LANGUAGES CXX C)
 cmake_minimum_required(VERSION 3.0)
 
 # project name & version


=====================================
debian/changelog
=====================================
@@ -1,3 +1,12 @@
+pbbam (0.19.0+dfsg-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Fix samtools call in tests
+  * d/control: Bump (fake) SOVERSION (=release version)
+
+ -- Andreas Tille <tille at debian.org>  Wed, 10 Oct 2018 12:45:02 +0200
+
 pbbam (0.18.0+dfsg-2) unstable; urgency=medium
 
   * Team upload.


=====================================
debian/control
=====================================
@@ -27,7 +27,7 @@ Package: pbbamtools
 Architecture: any
 Depends: ${shlibs:Depends},
          ${misc:Depends},
-         libpbbam0.18.0 (= ${binary:Version})
+         libpbbam0.19.0 (= ${binary:Version})
 Recommends: samtools
 Description: processing Pacific Biosciences binary alignment/map files
  The BAM format is a binary, compressed, record-oriented container format
@@ -42,7 +42,7 @@ Description: processing Pacific Biosciences binary alignment/map files
  This package provides command-line utilities for working with PacBio BAM
  files.
 
-Package: libpbbam0.18.0
+Package: libpbbam0.19.0
 Architecture: any
 Multi-Arch: same
 Section: libs
@@ -66,7 +66,7 @@ Description: Pacific Biosciences binary alignment/map (BAM) library
 Package: libpbbam-dev
 Architecture: any
 Section: libdevel
-Depends: libpbbam0.18.0 (= ${binary:Version}),
+Depends: libpbbam0.19.0 (= ${binary:Version}),
          libhts-dev,
          libssl-dev,
          ${misc:Depends}


=====================================
debian/rules
=====================================
@@ -56,6 +56,7 @@ override_dh_auto_clean:
 	-e 's|@PacBioBAM_VERSION@|$(DEB_VERSION_UPSTREAM)|g' \
 	-e 's|@GeneratedTestDataDir@|$(generated_data_dir)|g' \
 	-e 's|+dfsg||g' -e 's|\(/build/$(DEB_SOURCE)-[0-9.]\+\)/|\1+dfsg/|' \
+	-e 's/$$SAMTOOLS/samtools/g' \
 	$< > $@
 
 override_dh_install:


=====================================
include/pbbam/BamWriter.h
=====================================
@@ -87,14 +87,39 @@ public:
         BinCalculation_OFF
     };
 
+    ///
+    /// \brief The Config struct provides a "parameter object" for BamWriter
+    ///        settings. This allows for writer configuration without having to
+    ///        refer to ordering of parameters, default values, etc.
+    ///
+    struct Config
+    {
+        Config() = default;
+
+        // zlib compression level
+        CompressionLevel compressionLevel = DefaultCompression;
+
+        // The number of threads for compression. If set to 0, BamWriter will
+        // attempt to determine a reasonable estimate. If set to 1, this will
+        // force single-threaded execution. No checks are made against an upper limit.
+        size_t numThreads = 4;
+
+        // If ON, ensures that proper BAI bin numbers are provided for all records.
+        BamWriter::BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON;
+
+        // If true, write to <filename>.tmp, and rename  to <filename> in dtor.
+        // This allows downstream checks to see if BAM file may be truncated
+        // due to early termination (e.g. a thrown exception). If false, write
+        // directly to <filename>.
+        bool useTempFile = true;
+    };
+
 public:
     /// \name Constructors & Related Methods
     /// \{
 
     /// \brief Opens a %BAM file for writing & writes the header information.
     ///
-    /// The error status will be set if either operation fails.
-    ///
     /// \note Set \p filename to "-" for stdout.
     ///
     /// \param[in] filename         path to output %BAM file
@@ -111,13 +136,32 @@ public:
     ///            records written. This extra step may turned off when bin
     ///            numbers are not needed. Though if in doubt, keep the default.
     ///
+    /// \param[in] useTempFile      If true, write to <filename>.tmp, and rename
+    ///                             to <filename>. This provides for downstream
+    ///                             checks to see if BAM file may be truncated
+    ///                             due to early termination (a thrown exception).
+    ///
     /// \throws std::runtmie_error if there was a problem opening the file for
     ///         writing or if an error occurred while writing the header
     ///
     BamWriter(const std::string& filename, const BamHeader& header,
               const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
               const size_t numThreads = 4,
-              const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON);
+              const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON,
+              const bool useTempFile = true);
+
+    ///
+    /// \brief Opens a %BAM file for writing & writes the header information.
+    ///
+    /// \param[in] filename     path to output %BAM file
+    /// \param[in] header       BamHeader object
+    /// \param[in] config       container for add'l configuration options
+    ///
+    /// \throws std::runtmie_error if there was a problem opening the file for
+    ///         writing or if an error occurred while writing the header
+    ///
+    BamWriter(const std::string& filename, const BamHeader& header,
+              const BamWriter::Config& config);
 
     /// Fully flushes all buffered data & closes file.
     ~BamWriter() override;


=====================================
include/pbbam/Compare.h
=====================================
@@ -434,6 +434,28 @@ public:
     };
 
     /// \}
+
+    template <typename T>
+    static inline bool Check(const T& lhs, const T& rhs, const Compare::Type cmp)
+    {
+        switch (cmp) {
+            case Compare::EQUAL:
+                return lhs == rhs;
+            case Compare::LESS_THAN:
+                return lhs < rhs;
+            case Compare::LESS_THAN_EQUAL:
+                return lhs <= rhs;
+            case Compare::GREATER_THAN:
+                return lhs > rhs;
+            case Compare::GREATER_THAN_EQUAL:
+                return lhs >= rhs;
+            case Compare::NOT_EQUAL:
+                return lhs != rhs;
+            default:
+                assert(false);
+                throw std::runtime_error{"unsupported compare type requested"};
+        }
+    }
 };
 
 }  // namespace BAM


=====================================
include/pbbam/DataSet.h
=====================================
@@ -46,7 +46,8 @@ public:
         HDF_SUBREAD,
         REFERENCE,
         SUBREAD,
-        TRANSCRIPT
+        TRANSCRIPT,
+        TRANSCRIPT_ALIGNMENT
     };
 
     /// \brief Converts printable dataset type to type enum.


=====================================
include/pbbam/DataSetTypes.h
=====================================
@@ -861,6 +861,16 @@ public:
     TranscriptSet();
 };
 
+/// \brief The TranscriptAlignmentSet class represents a %TranscriptAlignmentSet
+///        root element in DataSetXML.
+///
+class PBBAM_EXPORT TranscriptAlignmentSet : public DataSetBase
+{
+public:
+    /// \brief Creates an empty TranscriptAlignmentSet dataset.
+    TranscriptAlignmentSet();
+};
+
 }  // namespace BAM
 }  // namespace PacBio
 


=====================================
include/pbbam/PbiFilterTypes.h
=====================================
@@ -35,7 +35,7 @@ public:
 
 protected:
     FilterBase(T value, const Compare::Type cmp);
-    FilterBase(std::vector<T> values);
+    FilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
 
 protected:
     bool CompareHelper(const T& lhs) const;
@@ -54,7 +54,7 @@ struct BarcodeDataFilterBase : public FilterBase<T>
 {
 protected:
     BarcodeDataFilterBase(T value, const Compare::Type cmp);
-    BarcodeDataFilterBase(std::vector<T> values);
+    BarcodeDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
 
 public:
     bool Accepts(const PbiRawData& idx, const size_t row) const;
@@ -69,7 +69,7 @@ struct BasicDataFilterBase : public FilterBase<T>
 {
 protected:
     BasicDataFilterBase(T value, const Compare::Type cmp);
-    BasicDataFilterBase(std::vector<T> values);
+    BasicDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
 
 public:
     bool Accepts(const PbiRawData& idx, const size_t row) const;
@@ -84,7 +84,7 @@ struct MappedDataFilterBase : public FilterBase<T>
 {
 protected:
     MappedDataFilterBase(T value, const Compare::Type cmp);
-    MappedDataFilterBase(std::vector<T> values);
+    MappedDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
 
 public:
     bool Accepts(const PbiRawData& idx, const size_t row) const;
@@ -201,7 +201,7 @@ public:
     ///
     /// \param[in] whitelist  barcode IDs to compare on
     ///
-    PbiBarcodeFilter(std::vector<int16_t> whitelist);
+    PbiBarcodeFilter(std::vector<int16_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
 
 public:
     /// \brief Performs the actual index lookup.
@@ -240,7 +240,8 @@ public:
     ///
     /// \param[in] whitelist  barcode IDs to compare on
     ///
-    PbiBarcodeForwardFilter(std::vector<int16_t> whitelist);
+    PbiBarcodeForwardFilter(std::vector<int16_t> whitelist,
+                            const Compare::Type cmp = Compare::EQUAL);
 };
 
 /// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible
@@ -288,7 +289,8 @@ public:
     ///
     /// \param[in] whitelist  barcode IDs to compare on
     ///
-    PbiBarcodeReverseFilter(std::vector<int16_t> whitelist);
+    PbiBarcodeReverseFilter(std::vector<int16_t> whitelist,
+                            const Compare::Type cmp = Compare::EQUAL);
 };
 
 /// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on
@@ -411,7 +413,7 @@ public:
     /// \note There is no compare type parameter here, it is always
     ///       Compare::EQUAL. Records will match movie name, exactly.
     ///
-    PbiMovieNameFilter(const std::string& movieName);
+    PbiMovieNameFilter(const std::string& movieName, const Compare::Type cmp = Compare::EQUAL);
 
     /// \brief Creates a 'whitelisted' movie name filter.
     ///
@@ -421,7 +423,8 @@ public:
     ///
     /// \param[in] whitelist    movie names to compare on
     ///
-    PbiMovieNameFilter(const std::vector<std::string>& whitelist);
+    PbiMovieNameFilter(const std::vector<std::string>& whitelist,
+                       const Compare::Type cmp = Compare::EQUAL);
 
 public:
     /// \brief Performs the actual index lookup.
@@ -432,6 +435,7 @@ public:
 
 private:
     PbiFilter compositeFilter_;
+    Compare::Type cmp_;
 };
 
 /// \brief The PbiNumDeletedBasesFilter class provides a PbiFilter-compatible
@@ -573,7 +577,7 @@ public:
     /// \note There is no compare type parameter here, it is always
     ///       Compare::EQUAL. Records will match query name, exactly.
     ///
-    PbiQueryNameFilter(const std::string& qname);
+    PbiQueryNameFilter(const std::string& qname, const Compare::Type cmp = Compare::EQUAL);
 
     /// \brief Creates a 'whitelisted' query name filter.
     ///
@@ -583,7 +587,8 @@ public:
     ///
     /// \param[in] whitelist    query names to compare on
     ///
-    PbiQueryNameFilter(const std::vector<std::string>& whitelist);
+    PbiQueryNameFilter(const std::vector<std::string>& whitelist,
+                       const Compare::Type cmp = Compare::EQUAL);
 
     PbiQueryNameFilter(const PbiQueryNameFilter& other);
     ~PbiQueryNameFilter();
@@ -686,7 +691,7 @@ public:
     ///
     /// \param[in] whitelist    read group IDs to compare on
     ///
-    PbiReadGroupFilter(std::vector<int32_t> whitelist);
+    PbiReadGroupFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
 
     /// \brief Creates a 'whitelisted' filter on read group printable IDs.
     ///
@@ -696,7 +701,8 @@ public:
     ///
     /// \param[in] whitelist    read group ID strings to compare on
     ///
-    PbiReadGroupFilter(const std::vector<std::string>& whitelist);
+    PbiReadGroupFilter(const std::vector<std::string>& whitelist,
+                       const Compare::Type cmp = Compare::EQUAL);
 
     /// \brief Creates a 'whitelisted' filter using read group objects.
     ///
@@ -706,7 +712,8 @@ public:
     ///
     /// \param[in] whitelist    read group objects to compare on
     ///
-    PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist);
+    PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist,
+                       const Compare::Type cmp = Compare::EQUAL);
 };
 
 /// \brief The PbiReferenceEndFilter class provides a PbiFilter-compatible
@@ -754,7 +761,7 @@ public:
     ///
     /// \param[in] whitelist    reference IDs to compare on
     ///
-    PbiReferenceIdFilter(std::vector<int32_t> whitelist);
+    PbiReferenceIdFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
 };
 
 /// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible
@@ -782,7 +789,8 @@ public:
     ///
     /// \param[in] whitelist    reference names to compare on
     ///
-    PbiReferenceNameFilter(std::vector<std::string> whitelist);
+    PbiReferenceNameFilter(std::vector<std::string> whitelist,
+                           const Compare::Type cmp = Compare::EQUAL);
 
 public:
     /// \brief Performs the actual index lookup.
@@ -848,7 +856,39 @@ public:
     ///
     /// \param[in] whitelist    ZMW hole numbers to compare on
     ///
-    PbiZmwFilter(std::vector<int32_t> whitelist);
+    PbiZmwFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
+};
+
+// ----------------------------------------------
+// NOTE: modulo filtering only enabled for ZMW.
+//
+// I need to generalize more if we're going to use
+// this on more fields.
+// ----------------------------------------------
+
+enum class FilterHash
+{
+    UNSIGNED_LONG_CAST,
+    BOOST_HASH_COMBINE,
+};
+
+struct PbiZmwModuloFilter
+{
+    PbiZmwModuloFilter(const uint32_t denominator, const uint32_t value,
+                       const FilterHash hashtype = FilterHash::UNSIGNED_LONG_CAST,
+                       const Compare::Type = Compare::EQUAL);
+
+    /// \brief Performs the actual index lookup.
+    ///
+    /// Most client code should not need to use this method directly.
+    ///
+    bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+    uint32_t denominator_;
+    uint32_t value_;
+    FilterHash hash_;
+    Compare::Type cmp_;
 };
 
 }  // namespace BAM


=====================================
include/pbbam/internal/PbiFilterTypes.inl
=====================================
@@ -8,6 +8,8 @@
 #include <cassert>
 #include <stdexcept>
 
+#include <boost/functional/hash/hash.hpp>
+
 namespace PacBio {
 namespace BAM {
 
@@ -20,8 +22,9 @@ inline FilterBase<T>::FilterBase(T value, const Compare::Type cmp)
 { }
 
 template <typename T>
-inline FilterBase<T>::FilterBase(std::vector<T> values)
+inline FilterBase<T>::FilterBase(std::vector<T> values, const Compare::Type cmp)
     : multiValue_{std::move(values)}
+    , cmp_{cmp}
 { }
 
 template<typename T>
@@ -36,31 +39,38 @@ inline bool FilterBase<T>::CompareHelper(const T& lhs) const
 template<typename T>
 inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
 {
-    // check provided value against all filter criteria,
-    // return true on any exact match
-    auto iter = multiValue_.get().cbegin();
-    const auto end  = multiValue_.get().cend();
-    for (; iter != end; ++iter) {
-        if (*iter == lhs)
-            return true;
+    if (cmp_ == Compare::EQUAL)
+    {
+        // check provided value against all filter criteria,
+        // return true on any exact match
+        auto iter = multiValue_.get().cbegin();
+        const auto end  = multiValue_.get().cend();
+        for (; iter != end; ++iter) {
+            if (*iter == lhs)
+                return true;
+        }
+        return false; // no matches
+    }
+    else if (cmp_ == Compare::NOT_EQUAL)
+    {
+        // check provided value against all filter criteria,
+        // return true on any exact match
+        auto iter = multiValue_.get().cbegin();
+        const auto end  = multiValue_.get().cend();
+        for (; iter != end; ++iter) {
+            if (*iter == lhs)
+                return false;
+        }
+        return true;
     }
-    return false; // no matches
+    else
+        throw std::runtime_error{"unsupported compare type on multivalue filter"};
 }
 
 template<typename T>
 inline bool FilterBase<T>::CompareSingleHelper(const T& lhs) const
 {
-    switch(cmp_) {
-        case Compare::EQUAL:              return lhs == value_;
-        case Compare::LESS_THAN:          return lhs < value_;
-        case Compare::LESS_THAN_EQUAL:    return lhs <= value_;
-        case Compare::GREATER_THAN:       return lhs > value_;
-        case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
-        case Compare::NOT_EQUAL:          return lhs != value_;
-        default:
-            assert(false);
-            throw std::runtime_error{"unsupported compare type requested"};
-    }
+    return Compare::Check(lhs, value_, cmp_);
 }
 
 template<>
@@ -90,8 +100,8 @@ inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(T value, const Com
 { }
 
 template<typename T, PbiFile::BarcodeField field>
-inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T> values)
-    : FilterBase<T>{std::move(values)}
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T> values, const Compare::Type cmp)
+    : FilterBase<T>{std::move(values), cmp}
 { }
 
 template<typename T, PbiFile::BarcodeField field>
@@ -117,8 +127,8 @@ inline BasicDataFilterBase<T, field>::BasicDataFilterBase(T value, const Compare
 { }
 
 template<typename T, PbiFile::BasicField field>
-inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T> values)
-    : FilterBase<T>{std::move(values)}
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T> values, const Compare::Type cmp)
+    : FilterBase<T>{std::move(values), cmp}
 { }
 
 template<typename T, PbiFile::BasicField field>
@@ -159,8 +169,8 @@ inline MappedDataFilterBase<T, field>::MappedDataFilterBase(T value, const Compa
 { }
 
 template<typename T, PbiFile::MappedField field>
-inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T> values)
-    : FilterBase<T>{std::move(values)}
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T> values, const Compare::Type cmp)
+    : FilterBase<T>{std::move(values), cmp}
 { }
 
 template<>
@@ -233,9 +243,9 @@ inline PbiBarcodeFilter::PbiBarcodeFilter(const int16_t barcode, const Compare::
                       }
 { }
 
-inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t> whitelist)
-    : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist)},
-                                           PbiBarcodeReverseFilter{std::move(whitelist)}
+inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
+    : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist), cmp},
+                                           PbiBarcodeReverseFilter{std::move(whitelist), cmp}
                                          })
                       }
 { }
@@ -249,8 +259,8 @@ inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const int16_t bcFwdId, c
     : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{bcFwdId, cmp}
 { }
 
-inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t> whitelist)
-    : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{std::move(whitelist)}
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
+    : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{std::move(whitelist), cmp}
 { }
 
 // PbiBarcodeQualityFilter
@@ -265,8 +275,8 @@ inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const int16_t bcRevId, c
     : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{bcRevId, cmp}
 { }
 
-inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t> whitelist)
-    : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{std::move(whitelist)}
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
+    : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{std::move(whitelist), cmp}
 { }
 
 // PbiBarcodesFilter
@@ -308,7 +318,12 @@ inline PbiMapQualityFilter::PbiMapQualityFilter(const uint8_t mapQual, const Com
 // PbiMovieNameFilter
 
 inline bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
-{ return compositeFilter_.Accepts(idx, row); }
+{
+    const bool found = compositeFilter_.Accepts(idx, row);
+    if (cmp_ == Compare::EQUAL) return found;
+    else if (cmp_ == Compare::NOT_EQUAL) return !found;
+    else throw std::runtime_error{"unsupported compare type on movie name filter"};
+}
 
 // PbiNumDeletedBasesFilter
 
@@ -372,20 +387,20 @@ inline PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Com
     : PbiReadGroupFilter{rg.Id(), cmp}
 { }
 
-inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t> whitelist)
-    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::move(whitelist)}
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::move(whitelist), cmp}
 { }
 
-inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist)
-    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}}
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}, cmp}
 {
     multiValue_->reserve(whitelist.size());
     for (const auto& rg : whitelist)
         multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
 }
 
-inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist)
-    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}}
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}, cmp}
 {
     multiValue_->reserve(whitelist.size());
     for (const auto& rg : whitelist)
@@ -404,8 +419,8 @@ inline PbiReferenceIdFilter::PbiReferenceIdFilter(const int32_t tId, const Compa
     : internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{tId, cmp}
 { }
 
-inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t> whitelist)
-    : internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{std::move(whitelist)}
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
+    : internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{std::move(whitelist), cmp}
 { }
 
 // PbiReferenceStartFilter
@@ -420,9 +435,70 @@ inline PbiZmwFilter::PbiZmwFilter(const int32_t zmw, const Compare::Type cmp)
     : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{zmw, cmp}
 { }
 
-inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t> whitelist)
-    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{std::move(whitelist)}
+inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
+    : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{std::move(whitelist), cmp}
+{ }
+
+// PbiZmwModuloFilter
+
+inline PbiZmwModuloFilter::PbiZmwModuloFilter(
+        const uint32_t denominator,
+        const uint32_t value,
+        const FilterHash hashType,
+        const Compare::Type cmp)
+    : denominator_{denominator}
+    , value_{value}
+    , hash_{hashType}
+    , cmp_{cmp}
 { }
 
+inline uint32_t UnsignedLongIntCast(const int32_t zm)
+{
+    return static_cast<uint32_t>(zm);
+}
+
+inline uint32_t BoostHashCombine(const int32_t zm)
+{
+    constexpr static const uint16_t mask = 0xFFFF;
+
+    const uint16_t upper = (zm >> 16) & mask;
+    const uint16_t lower = zm & mask;
+
+    // FIXME: discrepancies with Python API. Will return to nail down.
+
+    size_t seed = 0;
+    boost::hash_combine(seed, upper);
+    boost::hash_combine(seed, lower);
+    return static_cast<uint32_t>(seed);
+}
+
+inline bool PbiZmwModuloFilter::Accepts(const PbiRawData& idx,
+                                        const size_t row) const
+{
+    const auto zm = idx.BasicData().holeNumber_.at(row);
+
+    uint32_t hashValue;
+    switch(hash_)
+    {
+        case FilterHash::UNSIGNED_LONG_CAST :
+        {
+            hashValue = UnsignedLongIntCast(zm);
+            break;
+        }
+
+        case FilterHash::BOOST_HASH_COMBINE :
+        {
+            hashValue = BoostHashCombine(zm);
+            break;
+        }
+
+        default:
+            throw std::runtime_error{"unsupported filter hash type"};
+    }
+
+    const auto modResult = hashValue % denominator_;
+    return Compare::Check(modResult, value_, cmp_);
+}
+
 } // namespace BAM
 } // namespace PacBio


=====================================
meson.build
=====================================
@@ -1,7 +1,7 @@
 project(
   'PacBioBAM',
   'cpp',
-  version : '0.18.0',
+  version : '0.19.0',
   default_options : [
     'buildtype=release',
     'warning_level=3',


=====================================
scripts/ci/test.sh
=====================================
@@ -5,4 +5,26 @@ set -vex
 # TEST #
 ########
 
+type module >& /dev/null || . /mnt/software/Modules/current/init/bash
+
+# Note: htslib v1.7 added native long CIGAR support. pbbam "spoofs" it 
+#       when running <1.7. So we'll always check the default htslib for 
+#       general test success/fail, and then check pre-/post-v1.7 explicitly
+#       to ensure we pass in either context (detectable at runtime).
+
+# default htslib
+ninja -C "${CURRENT_BUILD_DIR:-build}" -v test
+
+# explicit htslib v1.6
+module unload htslib
+module load htslib/1.6
 ninja -C "${CURRENT_BUILD_DIR:-build}" -v test
+
+# explicit htslib v1.7
+module unload htslib
+module load htslib/1.7
+ninja -C "${CURRENT_BUILD_DIR:-build}" -v test\
+
+# restore default
+module unload htslib
+module load htslib


=====================================
src/BamRecordImpl.cpp
=====================================
@@ -12,19 +12,46 @@
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
+#include <tuple>
 #include <utility>
 
 #include <htslib/hts_endian.h>
 
+#include "pbbam/BamTagCodec.h"
+
 #include "BamRecordTags.h"
 #include "MemoryUtils.h"
-#include "pbbam/BamTagCodec.h"
+#include "StringUtils.h"
 
 namespace PacBio {
 namespace BAM {
 
 namespace {
 
+static bool DoesHtslibSupportLongCigar()
+{
+    const std::string htsVersion = hts_version();
+
+    // remove any "-<blah>" for non-release versions
+    const auto versionBase = PacBio::BAM::Split(htsVersion, '-');
+    if (versionBase.empty())
+        throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+    // grab major/minor version numbers
+    const auto versionParts = PacBio::BAM::Split(versionBase[0], '.');
+    if (versionParts.size() < 2)
+        throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+    // check against v1.7
+    const int versionMajor = std::stoi(versionParts[0]);
+    const int versionMinor = std::stoi(versionParts[1]);
+    static constexpr const int v17_major = 1;
+    static constexpr const int v17_minor = 7;
+    return std::tie(versionMajor, versionMinor) >= std::tie(v17_major, v17_minor);
+}
+
+static const bool has_native_long_cigar_support = DoesHtslibSupportLongCigar();
+
 Cigar FetchRawCigar(const uint32_t* const src, const uint32_t len)
 {
     Cigar result;
@@ -148,27 +175,21 @@ bool BamRecordImpl::AddTagImpl(const std::string& tagName, const Tag& value,
 Cigar BamRecordImpl::CigarData() const
 {
     const auto* b = d_.get();
-    if (HasLongCigar(b)) {
+    if (!has_native_long_cigar_support && HasLongCigar(b)) {
         // fetch long CIGAR from tag
         const auto cigarTag = TagValue("CG");
         const auto cigarTagValue = cigarTag.ToUInt32Array();
         return FetchRawCigar(cigarTagValue.data(), cigarTagValue.size());
     } else {
-        // fetch normal, short CIGAR from the standard location
+        // fetch CIGAR from the standard location
         return FetchRawCigar(bam_get_cigar(b), b->core.n_cigar);
     }
 }
 
 BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
 {
-    // Set normal, "short" CIGAR and remove CG tag if present.
-    if (cigar.size() < 65536) {
-        SetCigarData(cigar);
-        if (HasTag("CG")) RemoveTag("CG");
-    }
-
-    // Set long CIGAR data
-    else {
+    // if long CIGAR, using htslib version < 1.7, set it "manually"
+    if (!has_native_long_cigar_support && cigar.size() >= 65536) {
         // Add the 'fake' CIGAR in normal place.
         Cigar fake;
         fake.emplace_back(CigarOperationType::SOFT_CLIP, SequenceLength());
@@ -190,6 +211,12 @@ BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
             AddTag("CG", Tag{cigarData});
     }
 
+    // otherwise (v1.7+ or short CIGAR), use standard APIs
+    else {
+        if (HasTag("CG")) RemoveTag("CG");
+        SetCigarData(cigar);
+    }
+
     return *this;
 }
 


=====================================
src/BamWriter.cpp
=====================================
@@ -25,12 +25,13 @@ namespace PacBio {
 namespace BAM {
 namespace internal {
 
-class BamWriterPrivate : public internal::FileProducer
+class BamWriterPrivate
 {
 public:
     BamWriterPrivate(const std::string& filename, const std::shared_ptr<bam_hdr_t> rawHeader,
                      const BamWriter::CompressionLevel compressionLevel, const size_t numThreads,
-                     const BamWriter::BinCalculationMode binCalculationMode);
+                     const BamWriter::BinCalculationMode binCalculationMode,
+                     const bool useTempFile);
 
 public:
     void Write(const BamRecord& record);
@@ -41,21 +42,23 @@ public:
     bool calculateBins_;
     std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
     std::shared_ptr<bam_hdr_t> header_;
+    std::unique_ptr<internal::FileProducer> fileProducer_;
 };
 
 BamWriterPrivate::BamWriterPrivate(const std::string& filename,
                                    const std::shared_ptr<bam_hdr_t> rawHeader,
                                    const BamWriter::CompressionLevel compressionLevel,
                                    const size_t numThreads,
-                                   const BamWriter::BinCalculationMode binCalculationMode)
-    : internal::FileProducer{filename}
-    , calculateBins_{binCalculationMode == BamWriter::BinCalculation_ON}
-    , header_{rawHeader}
+                                   const BamWriter::BinCalculationMode binCalculationMode,
+                                   const bool useTempFile)
+    : calculateBins_{binCalculationMode == BamWriter::BinCalculation_ON}, header_{rawHeader}
 {
     if (!header_) throw std::runtime_error{"null header"};
 
+    if (useTempFile) fileProducer_ = std::make_unique<internal::FileProducer>(filename);
+
     // open file
-    const auto usingFilename = TempFilename();
+    const auto usingFilename = (fileProducer_ ? fileProducer_->TempFilename() : filename);
     const auto mode = std::string("wb") + std::to_string(static_cast<int>(compressionLevel));
     file_.reset(sam_open(usingFilename.c_str(), mode.c_str()));
     if (!file_) throw std::runtime_error{"could not open file for writing"};
@@ -123,7 +126,7 @@ inline void BamWriterPrivate::Write(const BamRecordImpl& recordImpl)
 
 BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
                      const BamWriter::CompressionLevel compressionLevel, const size_t numThreads,
-                     const BinCalculationMode binCalculationMode)
+                     const BinCalculationMode binCalculationMode, const bool useTempFile)
     : IRecordWriter()
 {
 #if PBBAM_AUTOVALIDATE
@@ -131,7 +134,18 @@ BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
 #endif
     d_ = std::make_unique<internal::BamWriterPrivate>(
         filename, internal::BamHeaderMemory::MakeRawHeader(header), compressionLevel, numThreads,
-        binCalculationMode);
+        binCalculationMode, useTempFile);
+}
+
+BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
+                     const BamWriter::Config& config)
+    : BamWriter{filename,
+                header,
+                config.compressionLevel,
+                config.numThreads,
+                config.binCalculationMode,
+                config.useTempFile}
+{
 }
 
 BamWriter::~BamWriter()


=====================================
src/ChemistryTable.cpp
=====================================
@@ -16,6 +16,8 @@ namespace PacBio {
 namespace BAM {
 namespace internal {
 
+// clang-format off
+
 extern const ChemistryTable BuiltInChemistryTable = {
 
     // BindingKit, SequencingKit, BasecallerVersion, Chemistry
@@ -72,23 +74,21 @@ extern const ChemistryTable BuiltInChemistryTable = {
     {{"101-365-900", "100-861-800", "5.0", "S/P2-C2/5.0"}},
     {{"101-365-900", "101-093-700", "5.0", "S/P2-C2/5.0"}},
 
-    // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet"); S/P2-C2
-    {{"101-365-900", "101-309-500", "5.0", "S/P2-C2/5.0"}},
-    // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn)"); S/P2-C2
-    {{"101-365-900", "101-309-400", "5.0", "S/P2-C2/5.0"}},
+    // 5.0.1 ChemRel; Sequel® Binding Kit 2.1; S/P2-C2
+    {{"101-365-900", "101-309-500", "5.0", "S/P2-C2/5.0"}}, // Sequel® Sequencing Plate 2.1 Silwet (8 rxn)
+    {{"101-365-900", "101-309-400", "5.0", "S/P2-C2/5.0"}}, // Sequel® Sequencing Plate 2.1 Silwet (4 rxn)
 
-    // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet - prototype parts"); S/P2-C2
-    {{"101-490-800", "101-490-900", "5.0", "S/P2-C2/5.0"}},
-    // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn) - prototype parts"); S/P2-C2
-    {{"101-490-800", "101-491-000", "5.0", "S/P2-C2/5.0"}},
-
-    // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet - prototype parts"); S/P2-C2
-    {{"101-500-400", "101-490-900", "5.0", "S/P2-C2/5.0"}},
-    // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn) - prototype parts"); S/P2-C2
-    {{"101-500-400", "101-491-000", "5.0", "S/P2-C2/5.0"}}
+    // 5.0.1 ChemRel; Sequel® Binding Kit 3.0; S/P3-C3
+    {{"101-500-400", "101-427-500", "5.0", "S/P3-C3/5.0"}}, // Sequel® Sequencing Plate 3.0 (8 rxn)
+    {{"101-500-400", "101-427-800", "5.0", "S/P3-C3/5.0"}}, // Sequel® Sequencing Plate 3.0 (4 rxn)
 
+    // 5.0.1 ChemRel; Sequel® Dev Binding Kit; S/P2-C2
+    {{"101-490-800", "101-490-900", "5.0", "S/P2-C2/5.0"}}, // Sequel® Dev Sequencing Plate (4 rxn)
+    {{"101-490-800", "101-491-000", "5.0", "S/P2-C2/5.0"}}, // Sequel® Dev Sequencing Plate (8 rxn)
 };
 
+// clang-format on
+
 ChemistryTable ChemistryTableFromXml(const std::string& mappingXml)
 {
     if (!FileUtils::Exists(mappingXml))


=====================================
src/Compare.cpp
=====================================
@@ -41,31 +41,33 @@ struct CompareTypeHash
 static const std::unordered_map<std::string, Compare::Type> opToTypeMap =
 {
     // basic operators plus some permissiveness for other representations
-    { "==",    Compare::EQUAL },
-    { "=",     Compare::EQUAL },
-    { "eq",    Compare::EQUAL },
-    { "!=",    Compare::NOT_EQUAL },
-    { "ne",    Compare::NOT_EQUAL },
-    { "<",     Compare::LESS_THAN },
-    { "lt",    Compare::LESS_THAN },
-    { "<",  Compare::LESS_THAN },
-    { "<=",    Compare::LESS_THAN_EQUAL },
-    { "lte",   Compare::LESS_THAN_EQUAL },
-    { "<=", Compare::LESS_THAN_EQUAL },
-    { ">",     Compare::GREATER_THAN },
-    { "gt",    Compare::GREATER_THAN },
-    { ">",  Compare::GREATER_THAN },
-    { ">=",    Compare::GREATER_THAN_EQUAL },
-    { "gte",   Compare::GREATER_THAN_EQUAL },
-    { ">=", Compare::GREATER_THAN_EQUAL },
-    { "&",     Compare::CONTAINS },
-    { "~",     Compare::NOT_CONTAINS }
+    { "==",     Compare::EQUAL },
+    { "=",      Compare::EQUAL },
+    { "eq",     Compare::EQUAL },
+    { "in",     Compare::EQUAL },
+    { "!=",     Compare::NOT_EQUAL },
+    { "ne",     Compare::NOT_EQUAL },
+    { "not_in", Compare::NOT_EQUAL },
+    { "<",      Compare::LESS_THAN },
+    { "lt",     Compare::LESS_THAN },
+    { "<",   Compare::LESS_THAN },
+    { "<=",     Compare::LESS_THAN_EQUAL },
+    { "lte",    Compare::LESS_THAN_EQUAL },
+    { "<=",  Compare::LESS_THAN_EQUAL },
+    { ">",      Compare::GREATER_THAN },
+    { "gt",     Compare::GREATER_THAN },
+    { ">",   Compare::GREATER_THAN },
+    { ">=",     Compare::GREATER_THAN_EQUAL },
+    { "gte",    Compare::GREATER_THAN_EQUAL },
+    { ">=",  Compare::GREATER_THAN_EQUAL },
+    { "&",      Compare::CONTAINS },
+    { "~",      Compare::NOT_CONTAINS }
 };
 
 static const std::unordered_map<Compare::Type, TypeAlias, CompareTypeHash> typeAliases =
 {
-    { Compare::EQUAL,              TypeAlias{ "Compare::EQUAL",              "==", "eq"  } },
-    { Compare::NOT_EQUAL,          TypeAlias{ "Compare::NOT_EQUAL",          "!=", "ne"  } },
+    { Compare::EQUAL,              TypeAlias{ "Compare::EQUAL",              "==", "eq" } },
+    { Compare::NOT_EQUAL,          TypeAlias{ "Compare::NOT_EQUAL",          "!=", "ne" } },
     { Compare::LESS_THAN,          TypeAlias{ "Compare::LESS_THAN",          "<",  "lt"  } },
     { Compare::LESS_THAN_EQUAL,    TypeAlias{ "Compare::LESS_THAN_EQUAL",    "<=", "lte" } },
     { Compare::GREATER_THAN,       TypeAlias{ "Compare::GREATER_THAN",       ">",  "gt"  } },


=====================================
src/DataSet.cpp
=====================================
@@ -93,6 +93,9 @@ DataSet::DataSet(const DataSet::TypeEnum type)
         case DataSet::TRANSCRIPT:
             d_ = std::make_unique<TranscriptSet>();
             break;
+        case DataSet::TRANSCRIPT_ALIGNMENT:
+            d_ = std::make_unique<TranscriptAlignmentSet>();
+            break;
         default:
             throw std::runtime_error{"unsupported dataset type"};
     }
@@ -229,6 +232,7 @@ DataSet::TypeEnum DataSet::NameToType(const std::string& typeName)
         lookup["ReferenceSet"] = DataSet::REFERENCE;
         lookup["SubreadSet"] = DataSet::SUBREAD;
         lookup["TranscriptSet"] = DataSet::TRANSCRIPT;
+        lookup["TranscriptAlignmentSet"] = DataSet::TRANSCRIPT_ALIGNMENT;
     }
     return lookup.at(typeName);  // throws if unknown typename
 }
@@ -293,6 +297,8 @@ std::string DataSet::TypeToName(const DataSet::TypeEnum& type)
             return "SubreadSet";
         case DataSet::TRANSCRIPT:
             return "TranscriptSet";
+        case DataSet::TRANSCRIPT_ALIGNMENT:
+            return "TranscriptAlignmentSet";
         default:
             throw std::runtime_error{"unsupported dataset type"};
     }


=====================================
src/DataSetTypes.cpp
=====================================
@@ -157,6 +157,8 @@ std::shared_ptr<DataSetBase> DataSetBase::Create(const std::string& typeName)
     if (typeName == std::string("HdfSubreadSet")) return std::make_shared<HdfSubreadSet>();
     if (typeName == std::string("ReferenceSet")) return std::make_shared<ReferenceSet>();
     if (typeName == std::string("TranscriptSet")) return std::make_shared<TranscriptSet>();
+    if (typeName == std::string("TranscriptAlignmentSet"))
+        return std::make_shared<TranscriptAlignmentSet>();
 
     // unknown typename
     throw std::runtime_error{"unsupported dataset type"};
@@ -437,5 +439,15 @@ TranscriptSet::TranscriptSet()
 {
 }
 
+// -------------------
+// TranscriptAlignmentSet
+// -------------------
+
+TranscriptAlignmentSet::TranscriptAlignmentSet()
+    : DataSetBase("PacBio.DataSet.TranscriptAlignmentSet", "TranscriptAlignmentSet",
+                  XsdType::DATASETS)
+{
+}
+
 }  // namespace BAM
 }  // namespace PacBio


=====================================
src/DataSetXsd.cpp
=====================================
@@ -103,6 +103,7 @@ static const auto elementRegistry = std::unordered_map<std::string, XsdType>
     { "SummaryStats",          XsdType::DATASETS },
     { "TotalLength",           XsdType::DATASETS },
     { "TranscriptSet",         XsdType::DATASETS },
+    { "TranscriptAlignmentSet",XsdType::DATASETS },
 
     // 'pbmeta' elements
     //


=====================================
src/FileProducer.h
=====================================
@@ -22,7 +22,7 @@ namespace internal {
 class FileProducer
 {
 
-protected:
+public:
     FileProducer() = delete;
 
     // Initializes FileProducer with specified target filename. Temp filename is
@@ -42,9 +42,8 @@ protected:
     //
     ~FileProducer();
 
-protected:
+public:
     const std::string& TargetFilename() const { return targetFilename_; }
-
     const std::string& TempFilename() const { return tempFilename_; }
 
 private:


=====================================
src/PbiFilter.cpp
=====================================
@@ -21,6 +21,7 @@
 #include <boost/algorithm/string/trim.hpp>
 #include <boost/numeric/conversion/cast.hpp>
 
+#include "FileUtils.h"
 #include "StringUtils.h"
 #include "pbbam/PbiFilterTypes.h"
 
@@ -202,8 +203,31 @@ static PbiFilter CreateLocalContextFilter(const std::string& value, const Compar
     return PbiFilter{PbiLocalContextFilter{filterValue, compareType}};
 }
 
-static PbiFilter CreateQueryNamesFilterFromFile(const std::string& value, const DataSet& dataset)
+static PbiFilter CreateMovieNameFilter(std::string value, const Compare::Type compareType)
 {
+    if (value.empty()) throw std::runtime_error{"empty value for movie property"};
+
+    if (isBracketed(value)) {
+        value.erase(0, 1);
+        value.pop_back();
+    }
+
+    if (isList(value)) {
+
+        if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+            throw std::runtime_error{"unsupported compare type on movie property"};
+
+        std::vector<std::string> tokens = internal::Split(value, ',');
+        return PbiMovieNameFilter{std::move(tokens), compareType};
+    } else
+        return PbiMovieNameFilter{value, compareType};
+}
+
+static PbiFilter CreateQueryNamesFilterFromFile(const std::string& value, const DataSet& dataset, const Compare::Type compareType)
+{
+    if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+        throw std::runtime_error{"unsupported compare type on query name property"};
+
     // resolve file from dataset, value
     const std::string resolvedFilename = dataset.ResolvePath(value);
     std::vector<std::string> whitelist;
@@ -211,12 +235,102 @@ static PbiFilter CreateQueryNamesFilterFromFile(const std::string& value, const
     std::ifstream in(resolvedFilename);
     while (std::getline(in, fn))
         whitelist.push_back(fn);
-    return PbiQueryNameFilter{whitelist};
+    return PbiQueryNameFilter{whitelist, compareType};
 }
 
-static PbiFilter CreateZmwFilter(std::string value, const Compare::Type compareType)
+static PbiFilter CreateQueryNameFilter(std::string value, const DataSet& dataset, const Compare::Type compareType)
 {
+    if (value.empty()) throw std::runtime_error{"empty value for query name property"};
 
+    // try possible filename first
+    const std::string resolvedFilename = dataset.ResolvePath(value);
+    if (internal::FileUtils::Exists(value))
+        return CreateQueryNamesFilterFromFile(value, dataset, compareType);
+
+    // otherwise "normal" qname (single, or list)
+
+    if (isBracketed(value)) {
+        value.erase(0, 1);
+        value.pop_back();
+    }
+
+    if (isList(value)) {
+
+        if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+            throw std::runtime_error{"unsupported compare type on query name property"};
+
+        std::vector<std::string> tokens = internal::Split(value, ',');
+        return PbiQueryNameFilter{std::move(tokens), compareType};
+    } else
+        return PbiQueryNameFilter{value, compareType};
+}
+
+static PbiFilter CreateReadGroupFilter(std::string value, const Compare::Type compareType)
+{
+    if (value.empty()) throw std::runtime_error{"empty value for read group property"};
+
+    if (isBracketed(value)) {
+        value.erase(0, 1);
+        value.pop_back();
+    }
+
+    if (isList(value)) {
+
+        if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+            throw std::runtime_error{"unsupported compare type on read group property"};
+
+        std::vector<std::string> tokens = internal::Split(value, ',');
+        return PbiReadGroupFilter{std::move(tokens), compareType};
+    } else
+        return PbiReadGroupFilter{value, compareType};
+}
+
+static PbiFilter CreateReferenceIdFilter(std::string value, const Compare::Type compareType)
+{
+    if (value.empty()) throw std::runtime_error{"empty value for reference ID property"};
+
+    if (isBracketed(value)) {
+        value.erase(0, 1);
+        value.pop_back();
+    }
+
+    if (isList(value)) {
+
+        if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+            throw std::runtime_error{"unsupported compare type on reference name ID property"};
+
+        std::vector<std::string> tokens = internal::Split(value, ',');
+        std::vector<int32_t> ids;
+        ids.reserve(tokens.size());
+        for (const auto& t : tokens)
+            ids.push_back(boost::numeric_cast<int32_t>(stoi(t)));
+        return PbiReferenceIdFilter{std::move(ids), compareType};
+    } else
+        return PbiReferenceIdFilter{boost::numeric_cast<int32_t>(stoi(value)), compareType};
+}
+
+static PbiFilter CreateReferenceNameFilter(std::string value, const Compare::Type compareType)
+{
+    if (value.empty()) throw std::runtime_error{"empty value for reference name property"};
+
+    if (isBracketed(value)) {
+        value.erase(0, 1);
+        value.pop_back();
+    }
+
+    if (isList(value)) {
+
+        if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+            throw std::runtime_error{"unsupported compare type on reference name property"};
+
+        std::vector<std::string> tokens = internal::Split(value, ',');
+        return PbiReferenceNameFilter{std::move(tokens), compareType};
+    } else
+        return PbiReferenceNameFilter{value, compareType};
+}
+
+static PbiFilter CreateZmwFilter(std::string value, const Compare::Type compareType)
+{
     if (value.empty()) throw std::runtime_error{"empty value for ZMW filter property"};
 
     if (isBracketed(value)) {
@@ -235,10 +349,38 @@ static PbiFilter CreateZmwFilter(std::string value, const Compare::Type compareT
         return PbiZmwFilter{boost::numeric_cast<int32_t>(stoi(value)), compareType};
 }
 
+static PbiFilter CreateZmwModuloFilter(const Property& property)
+{
+    if (!property.HasAttribute("Modulo") || !property.HasAttribute("Hash") ||
+        property.Name() != "zm")
+    {
+        throw std::runtime_error{"Modulo filter not supported on property: "};
+    }
+
+    const auto hashType = property.Attribute("Hash");
+    const FilterHash hash = [&hashType]()
+    {
+        if (boost::algorithm::to_lower_copy(hashType) == "uint32cast")
+            return FilterHash::UNSIGNED_LONG_CAST;
+        if (boost::algorithm::to_lower_copy(hashType) == "boosthashcombine")
+            return FilterHash::BOOST_HASH_COMBINE;
+        throw std::runtime_error{"unsuppoerted hash type: " + hashType};
+    }();
+
+    const uint32_t denom = std::stoul(property.Attribute("Modulo"));
+    const uint32_t value = std::stoul(property.Value());
+
+    return PbiZmwModuloFilter{ denom, value, hash, Compare::EQUAL };
+}
+
 static PbiFilter FromDataSetProperty(const Property& property, const DataSet& dataset)
 {
     try {
         const std::string& value = property.Value();
+
+        if (property.Name() == "zm" && property.HasAttribute("Modulo"))
+            return CreateZmwModuloFilter(property);
+
         const Compare::Type compareType = Compare::TypeFromOperator(property.Operator());
         const BuiltIn builtInCode =
             builtInLookup.at(boost::algorithm::to_lower_copy(property.Name()));
@@ -252,16 +394,11 @@ static PbiFilter FromDataSetProperty(const Property& property, const DataSet& da
             case BuiltIn::AlignedStartFilter   : return PbiAlignedStartFilter{ static_cast<uint32_t>(std::stoul(value)), compareType };
             case BuiltIn::BarcodeQualityFilter : return PbiBarcodeQualityFilter{ static_cast<uint8_t>(std::stoul(value)), compareType };
             case BuiltIn::IdentityFilter       : return PbiIdentityFilter{ std::stof(value), compareType };
-            case BuiltIn::MovieNameFilter      : return PbiMovieNameFilter{ value };
             case BuiltIn::QueryEndFilter       : return PbiQueryEndFilter{ std::stoi(value), compareType };
             case BuiltIn::QueryLengthFilter    : return PbiQueryLengthFilter{ std::stoi(value), compareType };
-            case BuiltIn::QueryNameFilter      : return PbiQueryNameFilter{ value };
             case BuiltIn::QueryStartFilter     : return PbiQueryStartFilter{ std::stoi(value), compareType };
             case BuiltIn::ReadAccuracyFilter   : return PbiReadAccuracyFilter{ std::stof(value), compareType };
-            case BuiltIn::ReadGroupFilter      : return PbiReadGroupFilter{ value, compareType };
             case BuiltIn::ReferenceEndFilter   : return PbiReferenceEndFilter{ static_cast<uint32_t>(std::stoul(value)), compareType };
-            case BuiltIn::ReferenceIdFilter    : return PbiReferenceIdFilter{ std::stoi(value), compareType };
-            case BuiltIn::ReferenceNameFilter  : return PbiReferenceNameFilter{ value };
             case BuiltIn::ReferenceStartFilter : return PbiReferenceStartFilter{ static_cast<uint32_t>(std::stoul(value)), compareType };
 
             // (maybe) list-value filters
@@ -269,10 +406,15 @@ static PbiFilter FromDataSetProperty(const Property& property, const DataSet& da
             case BuiltIn::BarcodeForwardFilter : return CreateBarcodeForwardFilter(value, compareType);
             case BuiltIn::BarcodeReverseFilter : return CreateBarcodeReverseFilter(value, compareType);
             case BuiltIn::LocalContextFilter   : return CreateLocalContextFilter(value, compareType);
+            case BuiltIn::MovieNameFilter      : return CreateMovieNameFilter(value, compareType);
+            case BuiltIn::QueryNameFilter      : return CreateQueryNameFilter(value, dataset, compareType);
+            case BuiltIn::ReadGroupFilter      : return CreateReadGroupFilter(value, compareType);
+            case BuiltIn::ReferenceIdFilter    : return CreateReferenceIdFilter(value, compareType);
+            case BuiltIn::ReferenceNameFilter  : return CreateReferenceNameFilter(value, compareType);
             case BuiltIn::ZmwFilter            : return CreateZmwFilter(value, compareType);
 
             // other built-ins
-            case BuiltIn::QueryNamesFromFileFilter : return CreateQueryNamesFilterFromFile(value, dataset); // compareType ignored
+            case BuiltIn::QueryNamesFromFileFilter : return CreateQueryNamesFilterFromFile(value, dataset, compareType);
 
             default :
             throw std::runtime_error{""};


=====================================
src/PbiFilterTypes.cpp
=====================================
@@ -127,13 +127,15 @@ bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const
 
 // PbiMovieNameFilter
 
-PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName)
+PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName, const Compare::Type cmp)
     : compositeFilter_{internal::filterFromMovieName(movieName, true)}  // include CCS
+    , cmp_{cmp}
 {
 }
 
-PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist)
-    : compositeFilter_{PbiFilter::UNION}
+PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist,
+                                       const Compare::Type cmp)
+    : compositeFilter_{PbiFilter::UNION}, cmp_{cmp}
 {
     for (const auto& movieName : whitelist)
         compositeFilter_.Add(internal::filterFromMovieName(movieName, true));  // include CCS
@@ -162,7 +164,9 @@ public:
     using RgIdLookup = std::unordered_map<int32_t, ZmwLookupPtr>;
 
 public:
-    PbiQueryNameFilterPrivate(const std::vector<std::string>& whitelist)
+    PbiQueryNameFilterPrivate(const std::vector<std::string>& whitelist,
+                              const Compare::Type cmp = Compare::EQUAL)
+        : cmp_{cmp}
     {
         for (const auto& queryName : whitelist) {
 
@@ -177,7 +181,10 @@ public:
 
     PbiQueryNameFilterPrivate(const std::unique_ptr<PbiQueryNameFilterPrivate>& other)
     {
-        if (other) lookup_ = other->lookup_;
+        if (other) {
+            lookup_ = other->lookup_;
+            cmp_ = other->cmp_;
+        }
     }
 
     bool Accepts(const PbiRawData& idx, const size_t row) const
@@ -201,7 +208,14 @@ public:
         const auto qStart = basicData.qStart_.at(row);
         const auto qEnd = basicData.qEnd_.at(row);
         const auto queryInterval = std::make_pair(qStart, qEnd);
-        return queryIntervals.find(queryInterval) != queryIntervals.end();
+
+        const bool found = queryIntervals.find(queryInterval) != queryIntervals.end();
+        if (cmp_ == Compare::EQUAL)
+            return found;
+        else if (cmp_ == Compare::NOT_EQUAL)
+            return !found;
+        else
+            throw std::runtime_error{"unsupported compare type on query name filter"};
     }
 
     std::vector<int32_t> CandidateRgIds(const std::string& movieName, const RecordType type)
@@ -295,16 +309,18 @@ public:
 
 private:
     RgIdLookup lookup_;
+    Compare::Type cmp_;
 };
 
-PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname)
+PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname, const Compare::Type cmp)
     : d_{std::make_unique<PbiQueryNameFilter::PbiQueryNameFilterPrivate>(
-          std::vector<std::string>{1, qname})}
+          std::vector<std::string>{1, qname}, cmp)}
 {
 }
 
-PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist)
-    : d_{std::make_unique<PbiQueryNameFilter::PbiQueryNameFilterPrivate>(whitelist)}
+PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist,
+                                       const Compare::Type cmp)
+    : d_{std::make_unique<PbiQueryNameFilter::PbiQueryNameFilterPrivate>(whitelist, cmp)}
 {
 }
 
@@ -333,8 +349,9 @@ PbiReferenceNameFilter::PbiReferenceNameFilter(std::string rname, Compare::Type
     }
 }
 
-PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string> whitelist)
-    : rnameWhitelist_{std::move(whitelist)}, cmp_{Compare::EQUAL}
+PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string> whitelist,
+                                               const Compare::Type cmp)
+    : rnameWhitelist_{std::move(whitelist)}, cmp_{cmp}
 {
 }
 
@@ -358,9 +375,10 @@ void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const
 
     // multi-value whitelist
     else {
-        subFilter_ = PbiFilter(PbiFilter::UNION);
+        std::vector<int32_t> ids;
         for (const auto& rname : rnameWhitelist_.get())
-            subFilter_.Add(PbiReferenceIdFilter{bamFile.ReferenceId(rname)});
+            ids.push_back(bamFile.ReferenceId(rname));
+        subFilter_ = PbiReferenceIdFilter{std::move(ids), cmp_};
     }
     initialized_ = true;
 }


=====================================
tests/data/long-cigar-1.7.bam
=====================================
Binary files /dev/null and b/tests/data/long-cigar-1.7.bam differ


=====================================
tests/data/long-cigar.bam deleted
=====================================
Binary files a/tests/data/long-cigar.bam and /dev/null differ


=====================================
tests/meson.build
=====================================
@@ -44,18 +44,27 @@ custom_target('pbbam_generate_data',
   build_by_default : true,
   install : false)
 
+pbbamify_synthetic_dataset = configure_file(
+  input : files('data/pbbamify/synthetic_movie_all.subreadset.xml.in'),
+  output : 'synthetic_movie_all.subreadset.xml',
+  configuration : pbbam_PbbamTestData_h_config)
+
+#########
+# tests #
+#########
+
 test(
-  'pbbam unit tests',
+  'pbbam formatting check',
+  pbbam_clang_formatter,
+  args : [
+    '--all'],
+  workdir : meson.source_root())
+
+test(
+  'pbbam gtest unittests',
   pbbam_test,
   args : [
-    '--gtest_output=xml:' + join_paths(meson.build_root(), 'pbbam-unit-tests.xml')],
+    '--gtest_output=xml:' + join_paths(meson.build_root(), 'pbbam-gtest-unittests.xml')],
   env : [
     'ARGS=-V',
     'VERBOSE=1'])
-
-test('pbbam formatting check', pbbam_clang_formatter, args : ['--all'], workdir : meson.source_root())
-
-pbbamify_synthetic_dataset = configure_file(
-  input : files('data/pbbamify/synthetic_movie_all.subreadset.xml.in'),
-  output : 'synthetic_movie_all.subreadset.xml',
-  configuration : pbbam_PbbamTestData_h_config)


=====================================
tests/src/cram/pbbamify.t.in
=====================================
@@ -8,7 +8,7 @@ Setup:
 
 Forward alignments with and without user specified tags, one alignment with undefined mapq, some alignments with basic CIGAR operations, 2 alignemtns with hard clipping, and several invalid alignments (1 without a seq field and 1 not present in the dataset) which should be skipped:
 
-  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | samtools view -h
+  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | $SAMTOOLS view -h
   [Warning] No records found for query 'synthetic_movie_1/10/0_100'. Skipping.
   [Warning] Sequence 'synthetic_movie_1/1/0_100' (length 90) is not of the same length as the PacBio BAM sequence (length 100)! Skipping.
   [Warning] Found 1 alignments without a seq field which were not converted (most likely secondary alignments).
@@ -34,7 +34,7 @@ Forward alignments with and without user specified tags, one alignment with unde
   synthetic_movie_1/2/0_101\t256\tsynthetic_ref_1\t30\t60\t8S1X21=3I1X3D13=1D1=1X1=1I9=1X32=1I1=6S\t*\t0\t0\tCGCTATTTTTGAAAATTTTCCGGTTTAAGGAAATTCCGTTCTTCTTCTGAATAACTTAATCTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAAACGAC\t!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\tRG:Z:8d2370c0\tcx:i:3\tip:B:C,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101\tnp:i:1\tpw:B:C,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101\tqe:i:101\tqs:i:0\trq:f:0.8\tsn:B:f,8.34462,15.7107,6.3469,10.3163\tzm:i:2 (esc)
 
 Reverse alignments: 2 primary alignments and 7 secondary, 6 alignments with extended CIGAR and 2 with basic CIGAR strings, 1 alignment with undefined (255) mapq, 2 alignments with hard clipping, 1 alignment with user defined tag. All alignments have a read group assigned which is different than the dataset.
-  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-2.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_2.subreads.bam | samtools view -h
+  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-2.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_2.subreads.bam | $SAMTOOLS view -h
   [INFO] Done processing 9 alignments in 0 min.
   @HD\tVN:1.5\tSO:unknown\tpb:3.0.3 (esc)
   @SQ\tSN:synthetic_ref_1\tLN:150\tM5:e1e940d621d949c9617566ddf3055922 (esc)
@@ -55,7 +55,7 @@ Reverse alignments: 2 primary alignments and 7 secondary, 6 alignments with exte
 
 CCS read:
 
-  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-3.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_3.subreads.bam | samtools view -h
+  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-3.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_3.subreads.bam | $SAMTOOLS view -h
   [INFO] Done processing 1 alignments in 0 min.
   @HD\tVN:1.5\tSO:unknown\tpb:3.0.1 (esc)
   @SQ\tSN:synthetic_ref_1\tLN:150\tM5:e1e940d621d949c9617566ddf3055922 (esc)
@@ -66,7 +66,7 @@ CCS read:
 
 No verbose output:
 
-  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam --verbose-level=0 $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | samtools view -h
+  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam --verbose-level=0 $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | $SAMTOOLS view -h
   @HD\tVN:1.5\tSO:unknown\tpb:3.0.3 (esc)
   @SQ\tSN:synthetic_ref_1\tLN:150\tM5:e1e940d621d949c9617566ddf3055922 (esc)
   @RG\tID:8d2370c0\tPL:PACBIO\tDS:READTYPE=SUBREAD;Ipd:CodecV1=ip;PulseWidth:CodecV1=pw;BINDINGKIT=100-862-200;SEQUENCINGKIT=100-861-800;BASECALLERVERSION=5.0.0.5552;FRAMERATEHZ=80.000000\tPU:synthetic_movie_1\tPM:SEQUEL (esc)
@@ -89,7 +89,7 @@ No verbose output:
 
 Test on a dataset, input contains alignments from all subread sets.
 
-  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-all.bam $DATADIR/pbbamify/synthetic-ref-1.fa $GENERATEDDATADIR/synthetic_movie_all.subreadset.xml | samtools view -h
+  $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-all.bam $DATADIR/pbbamify/synthetic-ref-1.fa $GENERATEDDATADIR/synthetic_movie_all.subreadset.xml | $SAMTOOLS view -h
   [Warning] No records found for query 'synthetic_movie_1/10/0_100'. Skipping.
   [Warning] Sequence 'synthetic_movie_1/1/0_100' (length 90) is not of the same length as the PacBio BAM sequence (length 100)! Skipping.
   [Warning] Found 1 alignments without a seq field which were not converted (most likely secondary alignments).


=====================================
tests/src/test_BamWriter.cpp
=====================================
@@ -15,7 +15,11 @@
 using namespace PacBio;
 using namespace PacBio::BAM;
 
-TEST(BamWriterTest, SingleWrite_UserRecord)
+// clang-format off
+
+namespace BamWriterTests {
+
+void checkSingleRecord(bool useTempFile)
 {
     const std::string fullName = "test/100/0_5";
     const std::string rgId = "6002b307";
@@ -58,7 +62,9 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
     const std::string generatedBamFn =
         PbbamTestsConfig::GeneratedData_Dir + "/bamwriter_generated.bam";
     {
-        BamWriter writer(generatedBamFn, inputHeader);
+        BamWriter::Config config;
+        config.useTempFile = useTempFile;
+        BamWriter writer(generatedBamFn, inputHeader, config);
         writer.Write(bamRecord);
     }
 
@@ -91,3 +97,17 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
     // clean up
     remove(generatedBamFn.c_str());
 }
+
+} // namespace BamWriterTests
+
+TEST(BamWriterTest, SingleWrite_UserRecord_WithTempFile)
+{
+    BamWriterTests::checkSingleRecord(true);
+}
+
+TEST(BamWriterTest, SingleWrite_UserRecord_NoTempFile)
+{
+    BamWriterTests::checkSingleRecord(false);
+}
+
+// clang-format on


=====================================
tests/src/test_LongCigar.cpp
=====================================
@@ -2,6 +2,7 @@
 
 #include <iostream>
 #include <string>
+#include <tuple>
 
 #include <gtest/gtest.h>
 
@@ -10,6 +11,9 @@
 #include <pbbam/BamReader.h>
 #include <pbbam/BamWriter.h>
 
+#include "../../src/MemoryUtils.h"
+#include "../../src/StringUtils.h"
+
 using BamReader = PacBio::BAM::BamReader;
 using BamRecord = PacBio::BAM::BamRecord;
 using BamWriter = PacBio::BAM::BamWriter;
@@ -18,15 +22,42 @@ using CigarOp = PacBio::BAM::CigarOperation;
 using PacBio::BAM::CigarOperationType;
 using Tag = PacBio::BAM::Tag;
 
+// clang-format off
+
 namespace LongCigarTests {
 
+static bool DoesHtslibSupportLongCigar()
+{
+    const std::string htsVersion = hts_version();
+
+    // remove any "-<blah>" for non-release versions
+    const auto versionBase = PacBio::BAM::Split(htsVersion, '-');
+    if (versionBase.empty())
+        throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+    // grab major/minor version numbers
+    const auto versionParts = PacBio::BAM::Split(versionBase[0], '.');
+    if (versionParts.size() < 2)
+         throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+    // check against v1.7
+    const int versionMajor = std::stoi(versionParts[0]);
+    const int versionMinor = std::stoi(versionParts[1]);
+    static constexpr const int v17_major = 1;
+    static constexpr const int v17_minor = 7;
+    return std::tie(versionMajor, versionMinor) >=
+           std::tie(v17_major, v17_minor);
+}
+
+static const bool has_native_long_cigar_support = DoesHtslibSupportLongCigar();
+
 // BAM record in this file has its CIGAR data in the new "CG" tag
-static const std::string LongCigarBam = PacBio::BAM::PbbamTestsConfig::Data_Dir + "/long-cigar.bam";
+static const std::string LongCigarBam = PacBio::BAM::PbbamTestsConfig::Data_Dir + "/long-cigar-1.7.bam";
 
 static const std::string LongCigarOut =
     PacBio::BAM::PbbamTestsConfig::GeneratedData_Dir + "/long-cigar-generated.bam";
 
-static const size_t numOps = 66000;
+static const size_t numOps = 72091;
 
 static BamRecord ReadLongCigarRecord(const std::string& fn)
 {
@@ -37,90 +68,58 @@ static BamRecord ReadLongCigarRecord(const std::string& fn)
     return b;
 }
 
-static void SetLongCigar(BamRecord* b)
-{
-    Cigar cigar;
-    cigar.resize(numOps);
-    for (size_t i = 0; i < LongCigarTests::numOps; ++i) {
-        const CigarOperationType type =
-            (i % 2 == 0 ? CigarOperationType::SEQUENCE_MATCH : CigarOperationType::INSERTION);
-        cigar.at(i) = CigarOp(type, 1);
-    }
-    b->Impl().CigarData(cigar);
-}
-
-static void CheckLongCigar(const Cigar& cigar)
-{
-    ASSERT_EQ(numOps, cigar.size());
-
-    for (size_t i = 0; i < numOps; ++i) {
-        const CigarOp& op = cigar.at(i);
-        EXPECT_EQ(1, op.Length());
-
-        const CigarOperationType expectedType =
-            (i % 2 == 0 ? CigarOperationType::SEQUENCE_MATCH : CigarOperationType::INSERTION);
-        EXPECT_EQ(expectedType, op.Type());
-    }
-}
-
-static void CheckLongCigarTag(const Tag& cigarTag)
-{
-    ASSERT_TRUE(cigarTag.IsUInt32Array());
-    const auto tagArray = cigarTag.ToUInt32Array();
-    ASSERT_EQ(numOps, tagArray.size());
-
-    for (size_t i = 0; i < numOps; ++i) {
-        const auto op = tagArray.at(i);
-        const auto expectedLength = 1;
-        const auto expectedType = (i % 2 == 0 ? BAM_CEQUAL : BAM_CINS);
-
-        EXPECT_EQ(expectedType, bam_cigar_op(op));
-        EXPECT_EQ(expectedLength, bam_cigar_oplen(op));
-    }
-}
-
 }  // namespace LongCigarTests
 
 TEST(LongCigarTest, ReadAndFetchLongCigar)
 {
     const auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarBam);
 
-    // public API
-    const auto cigar = b.CigarData();
-    EXPECT_EQ(66000, cigar.size());
-
-    // TODO: come back & check raw data once we have 'private access wrapper'
-    //       but we're looking good
+    EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+    if (LongCigarTests::has_native_long_cigar_support)
+        EXPECT_FALSE(b.Impl().HasTag("CG"));
+    else
+        EXPECT_TRUE(b.Impl().HasTag("CG"));
 }
 
 TEST(LongCigarTest, EditLongCigar)
 {
-    SCOPED_TRACE("EditLongCigar");
-
     auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarBam);
-    LongCigarTests::SetLongCigar(&b);
+    b.Impl().CigarData(b.CigarData());
 
-    const auto recordCigar = b.CigarData();
-    const auto cigarTag = b.Impl().TagValue("CG");
-    LongCigarTests::CheckLongCigar(recordCigar);
-    LongCigarTests::CheckLongCigarTag(cigarTag);
+    EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+    if (LongCigarTests::has_native_long_cigar_support)
+        EXPECT_FALSE(b.Impl().HasTag("CG"));
+    else
+        EXPECT_TRUE(b.Impl().HasTag("CG"));
 }
 
 TEST(LongCigarTest, WriteLongCigar)
 {
     SCOPED_TRACE("WriteLongCigar");
 
-    {  // write record with our custom long CIGAR
+    {   // edit & write
         auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarBam);
-        LongCigarTests::SetLongCigar(&b);
+        b.Impl().CigarData(b.CigarData());
+
+        EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+        if (LongCigarTests::has_native_long_cigar_support)
+            EXPECT_FALSE(b.Impl().HasTag("CG"));
+        else
+            EXPECT_TRUE(b.Impl().HasTag("CG"));
+
         BamWriter writer{LongCigarTests::LongCigarOut, b.header_};
         writer.Write(b);
     }
-    {  // read back in to check
+
+    {   // read back in
         auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarOut);
-        const auto recordCigar = b.CigarData();
-        const auto cigarTag = b.Impl().TagValue("CG");
-        LongCigarTests::CheckLongCigar(recordCigar);
-        LongCigarTests::CheckLongCigarTag(cigarTag);
+
+        EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+        if (LongCigarTests::has_native_long_cigar_support)
+            EXPECT_FALSE(b.Impl().HasTag("CG"));
+        else
+            EXPECT_TRUE(b.Impl().HasTag("CG"));
     }
 }
+
+// clang-format on


=====================================
tools/meson.build
=====================================
@@ -12,10 +12,6 @@ pbbam_Bam2SamVersion_h = configure_file(
   output : 'Bam2SamVersion.h',
   configuration : pbbam_Bam2SamVersion_h_config)
 
-if pbbam_htslib_dep.found()
-  pbbam_htslib_libdir = pbbam_htslib_dep.get_pkgconfig_variable('libdir')
-endif
-
 pbbam_bam2sam_cpp_sources = [pbbam_Bam2SamVersion_h]
 pbbam_bam2sam_cpp_sources += files([
   'common/OptionParser.cpp',
@@ -28,7 +24,6 @@ pbbam_bam2sam = executable(
   dependencies : [pbbam_htslib_dep, pbbam_thread_dep, pbbam_zlib_dep],
   include_directories : [pbbam_include_directories, include_directories('bam2sam')],
   install : get_option('build-tools'),
-  build_rpath : pbbam_htslib_libdir,
   cpp_args : pbbam_warning_flags)
 
 # tests
@@ -42,7 +37,7 @@ if get_option('tests')
     'bam2sam_CramTests',
     pbbam_cram_script,
     args : [
-      '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-bam2sam.xml'),
+      '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-bam2sam.xml'),
       '--verbose'] + [
         bam2sam_t],
     timeout : 1800)
@@ -119,7 +114,7 @@ if get_option('tests')
     'pbindexdump_CramTests',
     pbbam_cram_script,
     args : [
-      '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-pbindexdump.xml'),
+      '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbindexdump.xml'),
       '--verbose'] + [
         pbindexdump_json_t,
         pbindexdump_cpp_t],
@@ -178,7 +173,7 @@ if get_option('tests') and not get_option('auto-validate')
     'pbmerge_CramTests',
     pbbam_cram_script,
     args : [
-      '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-pbmerge.xml'),
+      '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbmerge.xml'),
       '--verbose'] + [
         pbmerge_pacbio_ordering_t,
         pbmerge_aligned_ordering_t,
@@ -224,6 +219,8 @@ pbbam_pbbamify = executable(
 
 # tests
 if get_option('tests')
+  pbbam_test_samtools = find_program('samtools', required : true)
+
   pbbamify_t = configure_file(
     input : pbbam_cram_pbbamify_t_in,
     output : 'pbbamify.t',
@@ -233,11 +230,10 @@ if get_option('tests')
     'pbbamify_CramTests',
     pbbam_cram_script,
     args : [
-      '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-pbbamify.xml'),
+      '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbbamify.xml'),
       '--verbose'] + [
         pbbamify_t],
+    env : [
+      'SAMTOOLS=' + pbbam_test_samtools.path()],
     timeout : 1800)
-
 endif
-
-### Tests will be added later.



View it on GitLab: https://salsa.debian.org/med-team/pbbam/compare/a90e83c9fdcbcd2415303185465e50926c78680e...c0a82780f2d3957725dbed2a13f87ba7faed6404

-- 
View it on GitLab: https://salsa.debian.org/med-team/pbbam/compare/a90e83c9fdcbcd2415303185465e50926c78680e...c0a82780f2d3957725dbed2a13f87ba7faed6404
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181010/d07d8938/attachment-0001.html>


More information about the debian-med-commit mailing list