[med-svn] [Git][med-team/pbbam][upstream] New upstream version 0.19.0+dfsg
Andreas Tille
gitlab at salsa.debian.org
Wed Oct 10 12:22:41 BST 2018
Andreas Tille pushed to branch upstream at Debian Med / pbbam
Commits:
25ba6fde by Andreas Tille at 2018-10-10T09:47:15Z
New upstream version 0.19.0+dfsg
- - - - -
27 changed files:
- CHANGELOG.md
- CMakeLists.txt
- include/pbbam/BamWriter.h
- include/pbbam/Compare.h
- include/pbbam/DataSet.h
- include/pbbam/DataSetTypes.h
- include/pbbam/PbiFilterTypes.h
- include/pbbam/internal/PbiFilterTypes.inl
- meson.build
- scripts/ci/test.sh
- src/BamRecordImpl.cpp
- src/BamWriter.cpp
- src/ChemistryTable.cpp
- src/Compare.cpp
- src/DataSet.cpp
- src/DataSetTypes.cpp
- src/DataSetXsd.cpp
- src/FileProducer.h
- src/PbiFilter.cpp
- src/PbiFilterTypes.cpp
- + tests/data/long-cigar-1.7.bam
- − tests/data/long-cigar.bam
- tests/meson.build
- tests/src/cram/pbbamify.t.in
- tests/src/test_BamWriter.cpp
- tests/src/test_LongCigar.cpp
- tools/meson.build
Changes:
=====================================
CHANGELOG.md
=====================================
@@ -10,6 +10,11 @@ guarantees will be maintained within each major version series.
## Active
+## [0.19.0] - 2018-09-11
+
+### Added
+ - TranscriptAlignmentSet to XML support
+
## [0.17.0] - 2018-03-18
### Added
=====================================
CMakeLists.txt
=====================================
@@ -3,7 +3,7 @@
########################################################################
cmake_policy(SET CMP0048 NEW) # lets us set version in project()
-project(PacBioBAM VERSION 0.17.0 LANGUAGES CXX C)
+project(PacBioBAM VERSION 0.19.0 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.0)
# project name & version
=====================================
include/pbbam/BamWriter.h
=====================================
@@ -87,14 +87,39 @@ public:
BinCalculation_OFF
};
+ ///
+ /// \brief The Config struct provides a "parameter object" for BamWriter
+ /// settings. This allows for writer configuration without having to
+ /// refer to ordering of parameters, default values, etc.
+ ///
+ struct Config
+ {
+ Config() = default;
+
+ // zlib compression level
+ CompressionLevel compressionLevel = DefaultCompression;
+
+ // The number of threads for compression. If set to 0, BamWriter will
+ // attempt to determine a reasonable estimate. If set to 1, this will
+ // force single-threaded execution. No checks are made against an upper limit.
+ size_t numThreads = 4;
+
+ // If ON, ensures that proper BAI bin numbers are provided for all records.
+ BamWriter::BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON;
+
+ // If true, write to <filename>.tmp, and rename to <filename> in dtor.
+ // This allows downstream checks to see if BAM file may be truncated
+ // due to early termination (e.g. a thrown exception). If false, write
+ // directly to <filename>.
+ bool useTempFile = true;
+ };
+
public:
/// \name Constructors & Related Methods
/// \{
/// \brief Opens a %BAM file for writing & writes the header information.
///
- /// The error status will be set if either operation fails.
- ///
/// \note Set \p filename to "-" for stdout.
///
/// \param[in] filename path to output %BAM file
@@ -111,13 +136,32 @@ public:
/// records written. This extra step may turned off when bin
/// numbers are not needed. Though if in doubt, keep the default.
///
+ /// \param[in] useTempFile If true, write to <filename>.tmp, and rename
+ /// to <filename>. This provides for downstream
+ /// checks to see if BAM file may be truncated
+ /// due to early termination (a thrown exception).
+ ///
/// \throws std::runtmie_error if there was a problem opening the file for
/// writing or if an error occurred while writing the header
///
BamWriter(const std::string& filename, const BamHeader& header,
const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
const size_t numThreads = 4,
- const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON);
+ const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON,
+ const bool useTempFile = true);
+
+ ///
+ /// \brief Opens a %BAM file for writing & writes the header information.
+ ///
+ /// \param[in] filename path to output %BAM file
+ /// \param[in] header BamHeader object
+ /// \param[in] config container for add'l configuration options
+ ///
+ /// \throws std::runtmie_error if there was a problem opening the file for
+ /// writing or if an error occurred while writing the header
+ ///
+ BamWriter(const std::string& filename, const BamHeader& header,
+ const BamWriter::Config& config);
/// Fully flushes all buffered data & closes file.
~BamWriter() override;
=====================================
include/pbbam/Compare.h
=====================================
@@ -434,6 +434,28 @@ public:
};
/// \}
+
+ template <typename T>
+ static inline bool Check(const T& lhs, const T& rhs, const Compare::Type cmp)
+ {
+ switch (cmp) {
+ case Compare::EQUAL:
+ return lhs == rhs;
+ case Compare::LESS_THAN:
+ return lhs < rhs;
+ case Compare::LESS_THAN_EQUAL:
+ return lhs <= rhs;
+ case Compare::GREATER_THAN:
+ return lhs > rhs;
+ case Compare::GREATER_THAN_EQUAL:
+ return lhs >= rhs;
+ case Compare::NOT_EQUAL:
+ return lhs != rhs;
+ default:
+ assert(false);
+ throw std::runtime_error{"unsupported compare type requested"};
+ }
+ }
};
} // namespace BAM
=====================================
include/pbbam/DataSet.h
=====================================
@@ -46,7 +46,8 @@ public:
HDF_SUBREAD,
REFERENCE,
SUBREAD,
- TRANSCRIPT
+ TRANSCRIPT,
+ TRANSCRIPT_ALIGNMENT
};
/// \brief Converts printable dataset type to type enum.
=====================================
include/pbbam/DataSetTypes.h
=====================================
@@ -861,6 +861,16 @@ public:
TranscriptSet();
};
+/// \brief The TranscriptAlignmentSet class represents a %TranscriptAlignmentSet
+/// root element in DataSetXML.
+///
+class PBBAM_EXPORT TranscriptAlignmentSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty TranscriptAlignmentSet dataset.
+ TranscriptAlignmentSet();
+};
+
} // namespace BAM
} // namespace PacBio
=====================================
include/pbbam/PbiFilterTypes.h
=====================================
@@ -35,7 +35,7 @@ public:
protected:
FilterBase(T value, const Compare::Type cmp);
- FilterBase(std::vector<T> values);
+ FilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
protected:
bool CompareHelper(const T& lhs) const;
@@ -54,7 +54,7 @@ struct BarcodeDataFilterBase : public FilterBase<T>
{
protected:
BarcodeDataFilterBase(T value, const Compare::Type cmp);
- BarcodeDataFilterBase(std::vector<T> values);
+ BarcodeDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
public:
bool Accepts(const PbiRawData& idx, const size_t row) const;
@@ -69,7 +69,7 @@ struct BasicDataFilterBase : public FilterBase<T>
{
protected:
BasicDataFilterBase(T value, const Compare::Type cmp);
- BasicDataFilterBase(std::vector<T> values);
+ BasicDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
public:
bool Accepts(const PbiRawData& idx, const size_t row) const;
@@ -84,7 +84,7 @@ struct MappedDataFilterBase : public FilterBase<T>
{
protected:
MappedDataFilterBase(T value, const Compare::Type cmp);
- MappedDataFilterBase(std::vector<T> values);
+ MappedDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
public:
bool Accepts(const PbiRawData& idx, const size_t row) const;
@@ -201,7 +201,7 @@ public:
///
/// \param[in] whitelist barcode IDs to compare on
///
- PbiBarcodeFilter(std::vector<int16_t> whitelist);
+ PbiBarcodeFilter(std::vector<int16_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
public:
/// \brief Performs the actual index lookup.
@@ -240,7 +240,8 @@ public:
///
/// \param[in] whitelist barcode IDs to compare on
///
- PbiBarcodeForwardFilter(std::vector<int16_t> whitelist);
+ PbiBarcodeForwardFilter(std::vector<int16_t> whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible
@@ -288,7 +289,8 @@ public:
///
/// \param[in] whitelist barcode IDs to compare on
///
- PbiBarcodeReverseFilter(std::vector<int16_t> whitelist);
+ PbiBarcodeReverseFilter(std::vector<int16_t> whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on
@@ -411,7 +413,7 @@ public:
/// \note There is no compare type parameter here, it is always
/// Compare::EQUAL. Records will match movie name, exactly.
///
- PbiMovieNameFilter(const std::string& movieName);
+ PbiMovieNameFilter(const std::string& movieName, const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' movie name filter.
///
@@ -421,7 +423,8 @@ public:
///
/// \param[in] whitelist movie names to compare on
///
- PbiMovieNameFilter(const std::vector<std::string>& whitelist);
+ PbiMovieNameFilter(const std::vector<std::string>& whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
public:
/// \brief Performs the actual index lookup.
@@ -432,6 +435,7 @@ public:
private:
PbiFilter compositeFilter_;
+ Compare::Type cmp_;
};
/// \brief The PbiNumDeletedBasesFilter class provides a PbiFilter-compatible
@@ -573,7 +577,7 @@ public:
/// \note There is no compare type parameter here, it is always
/// Compare::EQUAL. Records will match query name, exactly.
///
- PbiQueryNameFilter(const std::string& qname);
+ PbiQueryNameFilter(const std::string& qname, const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' query name filter.
///
@@ -583,7 +587,8 @@ public:
///
/// \param[in] whitelist query names to compare on
///
- PbiQueryNameFilter(const std::vector<std::string>& whitelist);
+ PbiQueryNameFilter(const std::vector<std::string>& whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
PbiQueryNameFilter(const PbiQueryNameFilter& other);
~PbiQueryNameFilter();
@@ -686,7 +691,7 @@ public:
///
/// \param[in] whitelist read group IDs to compare on
///
- PbiReadGroupFilter(std::vector<int32_t> whitelist);
+ PbiReadGroupFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' filter on read group printable IDs.
///
@@ -696,7 +701,8 @@ public:
///
/// \param[in] whitelist read group ID strings to compare on
///
- PbiReadGroupFilter(const std::vector<std::string>& whitelist);
+ PbiReadGroupFilter(const std::vector<std::string>& whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' filter using read group objects.
///
@@ -706,7 +712,8 @@ public:
///
/// \param[in] whitelist read group objects to compare on
///
- PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist);
+ PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiReferenceEndFilter class provides a PbiFilter-compatible
@@ -754,7 +761,7 @@ public:
///
/// \param[in] whitelist reference IDs to compare on
///
- PbiReferenceIdFilter(std::vector<int32_t> whitelist);
+ PbiReferenceIdFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible
@@ -782,7 +789,8 @@ public:
///
/// \param[in] whitelist reference names to compare on
///
- PbiReferenceNameFilter(std::vector<std::string> whitelist);
+ PbiReferenceNameFilter(std::vector<std::string> whitelist,
+ const Compare::Type cmp = Compare::EQUAL);
public:
/// \brief Performs the actual index lookup.
@@ -848,7 +856,39 @@ public:
///
/// \param[in] whitelist ZMW hole numbers to compare on
///
- PbiZmwFilter(std::vector<int32_t> whitelist);
+ PbiZmwFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
+};
+
+// ----------------------------------------------
+// NOTE: modulo filtering only enabled for ZMW.
+//
+// I need to generalize more if we're going to use
+// this on more fields.
+// ----------------------------------------------
+
+enum class FilterHash
+{
+ UNSIGNED_LONG_CAST,
+ BOOST_HASH_COMBINE,
+};
+
+struct PbiZmwModuloFilter
+{
+ PbiZmwModuloFilter(const uint32_t denominator, const uint32_t value,
+ const FilterHash hashtype = FilterHash::UNSIGNED_LONG_CAST,
+ const Compare::Type = Compare::EQUAL);
+
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ uint32_t denominator_;
+ uint32_t value_;
+ FilterHash hash_;
+ Compare::Type cmp_;
};
} // namespace BAM
=====================================
include/pbbam/internal/PbiFilterTypes.inl
=====================================
@@ -8,6 +8,8 @@
#include <cassert>
#include <stdexcept>
+#include <boost/functional/hash/hash.hpp>
+
namespace PacBio {
namespace BAM {
@@ -20,8 +22,9 @@ inline FilterBase<T>::FilterBase(T value, const Compare::Type cmp)
{ }
template <typename T>
-inline FilterBase<T>::FilterBase(std::vector<T> values)
+inline FilterBase<T>::FilterBase(std::vector<T> values, const Compare::Type cmp)
: multiValue_{std::move(values)}
+ , cmp_{cmp}
{ }
template<typename T>
@@ -36,31 +39,38 @@ inline bool FilterBase<T>::CompareHelper(const T& lhs) const
template<typename T>
inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
{
- // check provided value against all filter criteria,
- // return true on any exact match
- auto iter = multiValue_.get().cbegin();
- const auto end = multiValue_.get().cend();
- for (; iter != end; ++iter) {
- if (*iter == lhs)
- return true;
+ if (cmp_ == Compare::EQUAL)
+ {
+ // check provided value against all filter criteria,
+ // return true on any exact match
+ auto iter = multiValue_.get().cbegin();
+ const auto end = multiValue_.get().cend();
+ for (; iter != end; ++iter) {
+ if (*iter == lhs)
+ return true;
+ }
+ return false; // no matches
+ }
+ else if (cmp_ == Compare::NOT_EQUAL)
+ {
+ // check provided value against all filter criteria,
+ // return true on any exact match
+ auto iter = multiValue_.get().cbegin();
+ const auto end = multiValue_.get().cend();
+ for (; iter != end; ++iter) {
+ if (*iter == lhs)
+ return false;
+ }
+ return true;
}
- return false; // no matches
+ else
+ throw std::runtime_error{"unsupported compare type on multivalue filter"};
}
template<typename T>
inline bool FilterBase<T>::CompareSingleHelper(const T& lhs) const
{
- switch(cmp_) {
- case Compare::EQUAL: return lhs == value_;
- case Compare::LESS_THAN: return lhs < value_;
- case Compare::LESS_THAN_EQUAL: return lhs <= value_;
- case Compare::GREATER_THAN: return lhs > value_;
- case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
- case Compare::NOT_EQUAL: return lhs != value_;
- default:
- assert(false);
- throw std::runtime_error{"unsupported compare type requested"};
- }
+ return Compare::Check(lhs, value_, cmp_);
}
template<>
@@ -90,8 +100,8 @@ inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(T value, const Com
{ }
template<typename T, PbiFile::BarcodeField field>
-inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T> values)
- : FilterBase<T>{std::move(values)}
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T> values, const Compare::Type cmp)
+ : FilterBase<T>{std::move(values), cmp}
{ }
template<typename T, PbiFile::BarcodeField field>
@@ -117,8 +127,8 @@ inline BasicDataFilterBase<T, field>::BasicDataFilterBase(T value, const Compare
{ }
template<typename T, PbiFile::BasicField field>
-inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T> values)
- : FilterBase<T>{std::move(values)}
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T> values, const Compare::Type cmp)
+ : FilterBase<T>{std::move(values), cmp}
{ }
template<typename T, PbiFile::BasicField field>
@@ -159,8 +169,8 @@ inline MappedDataFilterBase<T, field>::MappedDataFilterBase(T value, const Compa
{ }
template<typename T, PbiFile::MappedField field>
-inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T> values)
- : FilterBase<T>{std::move(values)}
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T> values, const Compare::Type cmp)
+ : FilterBase<T>{std::move(values), cmp}
{ }
template<>
@@ -233,9 +243,9 @@ inline PbiBarcodeFilter::PbiBarcodeFilter(const int16_t barcode, const Compare::
}
{ }
-inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t> whitelist)
- : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist)},
- PbiBarcodeReverseFilter{std::move(whitelist)}
+inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist), cmp},
+ PbiBarcodeReverseFilter{std::move(whitelist), cmp}
})
}
{ }
@@ -249,8 +259,8 @@ inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const int16_t bcFwdId, c
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{bcFwdId, cmp}
{ }
-inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t> whitelist)
- : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{std::move(whitelist)}
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{std::move(whitelist), cmp}
{ }
// PbiBarcodeQualityFilter
@@ -265,8 +275,8 @@ inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const int16_t bcRevId, c
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{bcRevId, cmp}
{ }
-inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t> whitelist)
- : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{std::move(whitelist)}
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{std::move(whitelist), cmp}
{ }
// PbiBarcodesFilter
@@ -308,7 +318,12 @@ inline PbiMapQualityFilter::PbiMapQualityFilter(const uint8_t mapQual, const Com
// PbiMovieNameFilter
inline bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
-{ return compositeFilter_.Accepts(idx, row); }
+{
+ const bool found = compositeFilter_.Accepts(idx, row);
+ if (cmp_ == Compare::EQUAL) return found;
+ else if (cmp_ == Compare::NOT_EQUAL) return !found;
+ else throw std::runtime_error{"unsupported compare type on movie name filter"};
+}
// PbiNumDeletedBasesFilter
@@ -372,20 +387,20 @@ inline PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Com
: PbiReadGroupFilter{rg.Id(), cmp}
{ }
-inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t> whitelist)
- : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::move(whitelist)}
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::move(whitelist), cmp}
{ }
-inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist)
- : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}}
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}, cmp}
{
multiValue_->reserve(whitelist.size());
for (const auto& rg : whitelist)
multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
}
-inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist)
- : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}}
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}, cmp}
{
multiValue_->reserve(whitelist.size());
for (const auto& rg : whitelist)
@@ -404,8 +419,8 @@ inline PbiReferenceIdFilter::PbiReferenceIdFilter(const int32_t tId, const Compa
: internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{tId, cmp}
{ }
-inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t> whitelist)
- : internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{std::move(whitelist)}
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{std::move(whitelist), cmp}
{ }
// PbiReferenceStartFilter
@@ -420,9 +435,70 @@ inline PbiZmwFilter::PbiZmwFilter(const int32_t zmw, const Compare::Type cmp)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{zmw, cmp}
{ }
-inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t> whitelist)
- : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{std::move(whitelist)}
+inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{std::move(whitelist), cmp}
+{ }
+
+// PbiZmwModuloFilter
+
+inline PbiZmwModuloFilter::PbiZmwModuloFilter(
+ const uint32_t denominator,
+ const uint32_t value,
+ const FilterHash hashType,
+ const Compare::Type cmp)
+ : denominator_{denominator}
+ , value_{value}
+ , hash_{hashType}
+ , cmp_{cmp}
{ }
+inline uint32_t UnsignedLongIntCast(const int32_t zm)
+{
+ return static_cast<uint32_t>(zm);
+}
+
+inline uint32_t BoostHashCombine(const int32_t zm)
+{
+ constexpr static const uint16_t mask = 0xFFFF;
+
+ const uint16_t upper = (zm >> 16) & mask;
+ const uint16_t lower = zm & mask;
+
+ // FIXME: discrepancies with Python API. Will return to nail down.
+
+ size_t seed = 0;
+ boost::hash_combine(seed, upper);
+ boost::hash_combine(seed, lower);
+ return static_cast<uint32_t>(seed);
+}
+
+inline bool PbiZmwModuloFilter::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const auto zm = idx.BasicData().holeNumber_.at(row);
+
+ uint32_t hashValue;
+ switch(hash_)
+ {
+ case FilterHash::UNSIGNED_LONG_CAST :
+ {
+ hashValue = UnsignedLongIntCast(zm);
+ break;
+ }
+
+ case FilterHash::BOOST_HASH_COMBINE :
+ {
+ hashValue = BoostHashCombine(zm);
+ break;
+ }
+
+ default:
+ throw std::runtime_error{"unsupported filter hash type"};
+ }
+
+ const auto modResult = hashValue % denominator_;
+ return Compare::Check(modResult, value_, cmp_);
+}
+
} // namespace BAM
} // namespace PacBio
=====================================
meson.build
=====================================
@@ -1,7 +1,7 @@
project(
'PacBioBAM',
'cpp',
- version : '0.18.0',
+ version : '0.19.0',
default_options : [
'buildtype=release',
'warning_level=3',
=====================================
scripts/ci/test.sh
=====================================
@@ -5,4 +5,26 @@ set -vex
# TEST #
########
+type module >& /dev/null || . /mnt/software/Modules/current/init/bash
+
+# Note: htslib v1.7 added native long CIGAR support. pbbam "spoofs" it
+# when running <1.7. So we'll always check the default htslib for
+# general test success/fail, and then check pre-/post-v1.7 explicitly
+# to ensure we pass in either context (detectable at runtime).
+
+# default htslib
+ninja -C "${CURRENT_BUILD_DIR:-build}" -v test
+
+# explicit htslib v1.6
+module unload htslib
+module load htslib/1.6
ninja -C "${CURRENT_BUILD_DIR:-build}" -v test
+
+# explicit htslib v1.7
+module unload htslib
+module load htslib/1.7
+ninja -C "${CURRENT_BUILD_DIR:-build}" -v test\
+
+# restore default
+module unload htslib
+module load htslib
=====================================
src/BamRecordImpl.cpp
=====================================
@@ -12,19 +12,46 @@
#include <cstdlib>
#include <cstring>
#include <iostream>
+#include <tuple>
#include <utility>
#include <htslib/hts_endian.h>
+#include "pbbam/BamTagCodec.h"
+
#include "BamRecordTags.h"
#include "MemoryUtils.h"
-#include "pbbam/BamTagCodec.h"
+#include "StringUtils.h"
namespace PacBio {
namespace BAM {
namespace {
+static bool DoesHtslibSupportLongCigar()
+{
+ const std::string htsVersion = hts_version();
+
+ // remove any "-<blah>" for non-release versions
+ const auto versionBase = PacBio::BAM::Split(htsVersion, '-');
+ if (versionBase.empty())
+ throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+ // grab major/minor version numbers
+ const auto versionParts = PacBio::BAM::Split(versionBase[0], '.');
+ if (versionParts.size() < 2)
+ throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+ // check against v1.7
+ const int versionMajor = std::stoi(versionParts[0]);
+ const int versionMinor = std::stoi(versionParts[1]);
+ static constexpr const int v17_major = 1;
+ static constexpr const int v17_minor = 7;
+ return std::tie(versionMajor, versionMinor) >= std::tie(v17_major, v17_minor);
+}
+
+static const bool has_native_long_cigar_support = DoesHtslibSupportLongCigar();
+
Cigar FetchRawCigar(const uint32_t* const src, const uint32_t len)
{
Cigar result;
@@ -148,27 +175,21 @@ bool BamRecordImpl::AddTagImpl(const std::string& tagName, const Tag& value,
Cigar BamRecordImpl::CigarData() const
{
const auto* b = d_.get();
- if (HasLongCigar(b)) {
+ if (!has_native_long_cigar_support && HasLongCigar(b)) {
// fetch long CIGAR from tag
const auto cigarTag = TagValue("CG");
const auto cigarTagValue = cigarTag.ToUInt32Array();
return FetchRawCigar(cigarTagValue.data(), cigarTagValue.size());
} else {
- // fetch normal, short CIGAR from the standard location
+ // fetch CIGAR from the standard location
return FetchRawCigar(bam_get_cigar(b), b->core.n_cigar);
}
}
BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
{
- // Set normal, "short" CIGAR and remove CG tag if present.
- if (cigar.size() < 65536) {
- SetCigarData(cigar);
- if (HasTag("CG")) RemoveTag("CG");
- }
-
- // Set long CIGAR data
- else {
+ // if long CIGAR, using htslib version < 1.7, set it "manually"
+ if (!has_native_long_cigar_support && cigar.size() >= 65536) {
// Add the 'fake' CIGAR in normal place.
Cigar fake;
fake.emplace_back(CigarOperationType::SOFT_CLIP, SequenceLength());
@@ -190,6 +211,12 @@ BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
AddTag("CG", Tag{cigarData});
}
+ // otherwise (v1.7+ or short CIGAR), use standard APIs
+ else {
+ if (HasTag("CG")) RemoveTag("CG");
+ SetCigarData(cigar);
+ }
+
return *this;
}
=====================================
src/BamWriter.cpp
=====================================
@@ -25,12 +25,13 @@ namespace PacBio {
namespace BAM {
namespace internal {
-class BamWriterPrivate : public internal::FileProducer
+class BamWriterPrivate
{
public:
BamWriterPrivate(const std::string& filename, const std::shared_ptr<bam_hdr_t> rawHeader,
const BamWriter::CompressionLevel compressionLevel, const size_t numThreads,
- const BamWriter::BinCalculationMode binCalculationMode);
+ const BamWriter::BinCalculationMode binCalculationMode,
+ const bool useTempFile);
public:
void Write(const BamRecord& record);
@@ -41,21 +42,23 @@ public:
bool calculateBins_;
std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
std::shared_ptr<bam_hdr_t> header_;
+ std::unique_ptr<internal::FileProducer> fileProducer_;
};
BamWriterPrivate::BamWriterPrivate(const std::string& filename,
const std::shared_ptr<bam_hdr_t> rawHeader,
const BamWriter::CompressionLevel compressionLevel,
const size_t numThreads,
- const BamWriter::BinCalculationMode binCalculationMode)
- : internal::FileProducer{filename}
- , calculateBins_{binCalculationMode == BamWriter::BinCalculation_ON}
- , header_{rawHeader}
+ const BamWriter::BinCalculationMode binCalculationMode,
+ const bool useTempFile)
+ : calculateBins_{binCalculationMode == BamWriter::BinCalculation_ON}, header_{rawHeader}
{
if (!header_) throw std::runtime_error{"null header"};
+ if (useTempFile) fileProducer_ = std::make_unique<internal::FileProducer>(filename);
+
// open file
- const auto usingFilename = TempFilename();
+ const auto usingFilename = (fileProducer_ ? fileProducer_->TempFilename() : filename);
const auto mode = std::string("wb") + std::to_string(static_cast<int>(compressionLevel));
file_.reset(sam_open(usingFilename.c_str(), mode.c_str()));
if (!file_) throw std::runtime_error{"could not open file for writing"};
@@ -123,7 +126,7 @@ inline void BamWriterPrivate::Write(const BamRecordImpl& recordImpl)
BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
const BamWriter::CompressionLevel compressionLevel, const size_t numThreads,
- const BinCalculationMode binCalculationMode)
+ const BinCalculationMode binCalculationMode, const bool useTempFile)
: IRecordWriter()
{
#if PBBAM_AUTOVALIDATE
@@ -131,7 +134,18 @@ BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
#endif
d_ = std::make_unique<internal::BamWriterPrivate>(
filename, internal::BamHeaderMemory::MakeRawHeader(header), compressionLevel, numThreads,
- binCalculationMode);
+ binCalculationMode, useTempFile);
+}
+
+BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
+ const BamWriter::Config& config)
+ : BamWriter{filename,
+ header,
+ config.compressionLevel,
+ config.numThreads,
+ config.binCalculationMode,
+ config.useTempFile}
+{
}
BamWriter::~BamWriter()
=====================================
src/ChemistryTable.cpp
=====================================
@@ -16,6 +16,8 @@ namespace PacBio {
namespace BAM {
namespace internal {
+// clang-format off
+
extern const ChemistryTable BuiltInChemistryTable = {
// BindingKit, SequencingKit, BasecallerVersion, Chemistry
@@ -72,23 +74,21 @@ extern const ChemistryTable BuiltInChemistryTable = {
{{"101-365-900", "100-861-800", "5.0", "S/P2-C2/5.0"}},
{{"101-365-900", "101-093-700", "5.0", "S/P2-C2/5.0"}},
- // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet"); S/P2-C2
- {{"101-365-900", "101-309-500", "5.0", "S/P2-C2/5.0"}},
- // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn)"); S/P2-C2
- {{"101-365-900", "101-309-400", "5.0", "S/P2-C2/5.0"}},
+ // 5.0.1 ChemRel; Sequel® Binding Kit 2.1; S/P2-C2
+ {{"101-365-900", "101-309-500", "5.0", "S/P2-C2/5.0"}}, // Sequel® Sequencing Plate 2.1 Silwet (8 rxn)
+ {{"101-365-900", "101-309-400", "5.0", "S/P2-C2/5.0"}}, // Sequel® Sequencing Plate 2.1 Silwet (4 rxn)
- // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet - prototype parts"); S/P2-C2
- {{"101-490-800", "101-490-900", "5.0", "S/P2-C2/5.0"}},
- // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn) - prototype parts"); S/P2-C2
- {{"101-490-800", "101-491-000", "5.0", "S/P2-C2/5.0"}},
-
- // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet - prototype parts"); S/P2-C2
- {{"101-500-400", "101-490-900", "5.0", "S/P2-C2/5.0"}},
- // 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn) - prototype parts"); S/P2-C2
- {{"101-500-400", "101-491-000", "5.0", "S/P2-C2/5.0"}}
+ // 5.0.1 ChemRel; Sequel® Binding Kit 3.0; S/P3-C3
+ {{"101-500-400", "101-427-500", "5.0", "S/P3-C3/5.0"}}, // Sequel® Sequencing Plate 3.0 (8 rxn)
+ {{"101-500-400", "101-427-800", "5.0", "S/P3-C3/5.0"}}, // Sequel® Sequencing Plate 3.0 (4 rxn)
+ // 5.0.1 ChemRel; Sequel® Dev Binding Kit; S/P2-C2
+ {{"101-490-800", "101-490-900", "5.0", "S/P2-C2/5.0"}}, // Sequel® Dev Sequencing Plate (4 rxn)
+ {{"101-490-800", "101-491-000", "5.0", "S/P2-C2/5.0"}}, // Sequel® Dev Sequencing Plate (8 rxn)
};
+// clang-format on
+
ChemistryTable ChemistryTableFromXml(const std::string& mappingXml)
{
if (!FileUtils::Exists(mappingXml))
=====================================
src/Compare.cpp
=====================================
@@ -41,31 +41,33 @@ struct CompareTypeHash
static const std::unordered_map<std::string, Compare::Type> opToTypeMap =
{
// basic operators plus some permissiveness for other representations
- { "==", Compare::EQUAL },
- { "=", Compare::EQUAL },
- { "eq", Compare::EQUAL },
- { "!=", Compare::NOT_EQUAL },
- { "ne", Compare::NOT_EQUAL },
- { "<", Compare::LESS_THAN },
- { "lt", Compare::LESS_THAN },
- { "<", Compare::LESS_THAN },
- { "<=", Compare::LESS_THAN_EQUAL },
- { "lte", Compare::LESS_THAN_EQUAL },
- { "<=", Compare::LESS_THAN_EQUAL },
- { ">", Compare::GREATER_THAN },
- { "gt", Compare::GREATER_THAN },
- { ">", Compare::GREATER_THAN },
- { ">=", Compare::GREATER_THAN_EQUAL },
- { "gte", Compare::GREATER_THAN_EQUAL },
- { ">=", Compare::GREATER_THAN_EQUAL },
- { "&", Compare::CONTAINS },
- { "~", Compare::NOT_CONTAINS }
+ { "==", Compare::EQUAL },
+ { "=", Compare::EQUAL },
+ { "eq", Compare::EQUAL },
+ { "in", Compare::EQUAL },
+ { "!=", Compare::NOT_EQUAL },
+ { "ne", Compare::NOT_EQUAL },
+ { "not_in", Compare::NOT_EQUAL },
+ { "<", Compare::LESS_THAN },
+ { "lt", Compare::LESS_THAN },
+ { "<", Compare::LESS_THAN },
+ { "<=", Compare::LESS_THAN_EQUAL },
+ { "lte", Compare::LESS_THAN_EQUAL },
+ { "<=", Compare::LESS_THAN_EQUAL },
+ { ">", Compare::GREATER_THAN },
+ { "gt", Compare::GREATER_THAN },
+ { ">", Compare::GREATER_THAN },
+ { ">=", Compare::GREATER_THAN_EQUAL },
+ { "gte", Compare::GREATER_THAN_EQUAL },
+ { ">=", Compare::GREATER_THAN_EQUAL },
+ { "&", Compare::CONTAINS },
+ { "~", Compare::NOT_CONTAINS }
};
static const std::unordered_map<Compare::Type, TypeAlias, CompareTypeHash> typeAliases =
{
- { Compare::EQUAL, TypeAlias{ "Compare::EQUAL", "==", "eq" } },
- { Compare::NOT_EQUAL, TypeAlias{ "Compare::NOT_EQUAL", "!=", "ne" } },
+ { Compare::EQUAL, TypeAlias{ "Compare::EQUAL", "==", "eq" } },
+ { Compare::NOT_EQUAL, TypeAlias{ "Compare::NOT_EQUAL", "!=", "ne" } },
{ Compare::LESS_THAN, TypeAlias{ "Compare::LESS_THAN", "<", "lt" } },
{ Compare::LESS_THAN_EQUAL, TypeAlias{ "Compare::LESS_THAN_EQUAL", "<=", "lte" } },
{ Compare::GREATER_THAN, TypeAlias{ "Compare::GREATER_THAN", ">", "gt" } },
=====================================
src/DataSet.cpp
=====================================
@@ -93,6 +93,9 @@ DataSet::DataSet(const DataSet::TypeEnum type)
case DataSet::TRANSCRIPT:
d_ = std::make_unique<TranscriptSet>();
break;
+ case DataSet::TRANSCRIPT_ALIGNMENT:
+ d_ = std::make_unique<TranscriptAlignmentSet>();
+ break;
default:
throw std::runtime_error{"unsupported dataset type"};
}
@@ -229,6 +232,7 @@ DataSet::TypeEnum DataSet::NameToType(const std::string& typeName)
lookup["ReferenceSet"] = DataSet::REFERENCE;
lookup["SubreadSet"] = DataSet::SUBREAD;
lookup["TranscriptSet"] = DataSet::TRANSCRIPT;
+ lookup["TranscriptAlignmentSet"] = DataSet::TRANSCRIPT_ALIGNMENT;
}
return lookup.at(typeName); // throws if unknown typename
}
@@ -293,6 +297,8 @@ std::string DataSet::TypeToName(const DataSet::TypeEnum& type)
return "SubreadSet";
case DataSet::TRANSCRIPT:
return "TranscriptSet";
+ case DataSet::TRANSCRIPT_ALIGNMENT:
+ return "TranscriptAlignmentSet";
default:
throw std::runtime_error{"unsupported dataset type"};
}
=====================================
src/DataSetTypes.cpp
=====================================
@@ -157,6 +157,8 @@ std::shared_ptr<DataSetBase> DataSetBase::Create(const std::string& typeName)
if (typeName == std::string("HdfSubreadSet")) return std::make_shared<HdfSubreadSet>();
if (typeName == std::string("ReferenceSet")) return std::make_shared<ReferenceSet>();
if (typeName == std::string("TranscriptSet")) return std::make_shared<TranscriptSet>();
+ if (typeName == std::string("TranscriptAlignmentSet"))
+ return std::make_shared<TranscriptAlignmentSet>();
// unknown typename
throw std::runtime_error{"unsupported dataset type"};
@@ -437,5 +439,15 @@ TranscriptSet::TranscriptSet()
{
}
+// -------------------
+// TranscriptAlignmentSet
+// -------------------
+
+TranscriptAlignmentSet::TranscriptAlignmentSet()
+ : DataSetBase("PacBio.DataSet.TranscriptAlignmentSet", "TranscriptAlignmentSet",
+ XsdType::DATASETS)
+{
+}
+
} // namespace BAM
} // namespace PacBio
=====================================
src/DataSetXsd.cpp
=====================================
@@ -103,6 +103,7 @@ static const auto elementRegistry = std::unordered_map<std::string, XsdType>
{ "SummaryStats", XsdType::DATASETS },
{ "TotalLength", XsdType::DATASETS },
{ "TranscriptSet", XsdType::DATASETS },
+ { "TranscriptAlignmentSet",XsdType::DATASETS },
// 'pbmeta' elements
//
=====================================
src/FileProducer.h
=====================================
@@ -22,7 +22,7 @@ namespace internal {
class FileProducer
{
-protected:
+public:
FileProducer() = delete;
// Initializes FileProducer with specified target filename. Temp filename is
@@ -42,9 +42,8 @@ protected:
//
~FileProducer();
-protected:
+public:
const std::string& TargetFilename() const { return targetFilename_; }
-
const std::string& TempFilename() const { return tempFilename_; }
private:
=====================================
src/PbiFilter.cpp
=====================================
@@ -21,6 +21,7 @@
#include <boost/algorithm/string/trim.hpp>
#include <boost/numeric/conversion/cast.hpp>
+#include "FileUtils.h"
#include "StringUtils.h"
#include "pbbam/PbiFilterTypes.h"
@@ -202,8 +203,31 @@ static PbiFilter CreateLocalContextFilter(const std::string& value, const Compar
return PbiFilter{PbiLocalContextFilter{filterValue, compareType}};
}
-static PbiFilter CreateQueryNamesFilterFromFile(const std::string& value, const DataSet& dataset)
+static PbiFilter CreateMovieNameFilter(std::string value, const Compare::Type compareType)
{
+ if (value.empty()) throw std::runtime_error{"empty value for movie property"};
+
+ if (isBracketed(value)) {
+ value.erase(0, 1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+
+ if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+ throw std::runtime_error{"unsupported compare type on movie property"};
+
+ std::vector<std::string> tokens = internal::Split(value, ',');
+ return PbiMovieNameFilter{std::move(tokens), compareType};
+ } else
+ return PbiMovieNameFilter{value, compareType};
+}
+
+static PbiFilter CreateQueryNamesFilterFromFile(const std::string& value, const DataSet& dataset, const Compare::Type compareType)
+{
+ if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+ throw std::runtime_error{"unsupported compare type on query name property"};
+
// resolve file from dataset, value
const std::string resolvedFilename = dataset.ResolvePath(value);
std::vector<std::string> whitelist;
@@ -211,12 +235,102 @@ static PbiFilter CreateQueryNamesFilterFromFile(const std::string& value, const
std::ifstream in(resolvedFilename);
while (std::getline(in, fn))
whitelist.push_back(fn);
- return PbiQueryNameFilter{whitelist};
+ return PbiQueryNameFilter{whitelist, compareType};
}
-static PbiFilter CreateZmwFilter(std::string value, const Compare::Type compareType)
+static PbiFilter CreateQueryNameFilter(std::string value, const DataSet& dataset, const Compare::Type compareType)
{
+ if (value.empty()) throw std::runtime_error{"empty value for query name property"};
+ // try possible filename first
+ const std::string resolvedFilename = dataset.ResolvePath(value);
+ if (internal::FileUtils::Exists(value))
+ return CreateQueryNamesFilterFromFile(value, dataset, compareType);
+
+ // otherwise "normal" qname (single, or list)
+
+ if (isBracketed(value)) {
+ value.erase(0, 1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+
+ if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+ throw std::runtime_error{"unsupported compare type on query name property"};
+
+ std::vector<std::string> tokens = internal::Split(value, ',');
+ return PbiQueryNameFilter{std::move(tokens), compareType};
+ } else
+ return PbiQueryNameFilter{value, compareType};
+}
+
+static PbiFilter CreateReadGroupFilter(std::string value, const Compare::Type compareType)
+{
+ if (value.empty()) throw std::runtime_error{"empty value for read group property"};
+
+ if (isBracketed(value)) {
+ value.erase(0, 1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+
+ if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+ throw std::runtime_error{"unsupported compare type on read group property"};
+
+ std::vector<std::string> tokens = internal::Split(value, ',');
+ return PbiReadGroupFilter{std::move(tokens), compareType};
+ } else
+ return PbiReadGroupFilter{value, compareType};
+}
+
+static PbiFilter CreateReferenceIdFilter(std::string value, const Compare::Type compareType)
+{
+ if (value.empty()) throw std::runtime_error{"empty value for reference ID property"};
+
+ if (isBracketed(value)) {
+ value.erase(0, 1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+
+ if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+ throw std::runtime_error{"unsupported compare type on reference name ID property"};
+
+ std::vector<std::string> tokens = internal::Split(value, ',');
+ std::vector<int32_t> ids;
+ ids.reserve(tokens.size());
+ for (const auto& t : tokens)
+ ids.push_back(boost::numeric_cast<int32_t>(stoi(t)));
+ return PbiReferenceIdFilter{std::move(ids), compareType};
+ } else
+ return PbiReferenceIdFilter{boost::numeric_cast<int32_t>(stoi(value)), compareType};
+}
+
+static PbiFilter CreateReferenceNameFilter(std::string value, const Compare::Type compareType)
+{
+ if (value.empty()) throw std::runtime_error{"empty value for reference name property"};
+
+ if (isBracketed(value)) {
+ value.erase(0, 1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+
+ if (compareType != Compare::EQUAL && compareType != Compare::NOT_EQUAL)
+ throw std::runtime_error{"unsupported compare type on reference name property"};
+
+ std::vector<std::string> tokens = internal::Split(value, ',');
+ return PbiReferenceNameFilter{std::move(tokens), compareType};
+ } else
+ return PbiReferenceNameFilter{value, compareType};
+}
+
+static PbiFilter CreateZmwFilter(std::string value, const Compare::Type compareType)
+{
if (value.empty()) throw std::runtime_error{"empty value for ZMW filter property"};
if (isBracketed(value)) {
@@ -235,10 +349,38 @@ static PbiFilter CreateZmwFilter(std::string value, const Compare::Type compareT
return PbiZmwFilter{boost::numeric_cast<int32_t>(stoi(value)), compareType};
}
+static PbiFilter CreateZmwModuloFilter(const Property& property)
+{
+ if (!property.HasAttribute("Modulo") || !property.HasAttribute("Hash") ||
+ property.Name() != "zm")
+ {
+ throw std::runtime_error{"Modulo filter not supported on property: "};
+ }
+
+ const auto hashType = property.Attribute("Hash");
+ const FilterHash hash = [&hashType]()
+ {
+ if (boost::algorithm::to_lower_copy(hashType) == "uint32cast")
+ return FilterHash::UNSIGNED_LONG_CAST;
+ if (boost::algorithm::to_lower_copy(hashType) == "boosthashcombine")
+ return FilterHash::BOOST_HASH_COMBINE;
+ throw std::runtime_error{"unsuppoerted hash type: " + hashType};
+ }();
+
+ const uint32_t denom = std::stoul(property.Attribute("Modulo"));
+ const uint32_t value = std::stoul(property.Value());
+
+ return PbiZmwModuloFilter{ denom, value, hash, Compare::EQUAL };
+}
+
static PbiFilter FromDataSetProperty(const Property& property, const DataSet& dataset)
{
try {
const std::string& value = property.Value();
+
+ if (property.Name() == "zm" && property.HasAttribute("Modulo"))
+ return CreateZmwModuloFilter(property);
+
const Compare::Type compareType = Compare::TypeFromOperator(property.Operator());
const BuiltIn builtInCode =
builtInLookup.at(boost::algorithm::to_lower_copy(property.Name()));
@@ -252,16 +394,11 @@ static PbiFilter FromDataSetProperty(const Property& property, const DataSet& da
case BuiltIn::AlignedStartFilter : return PbiAlignedStartFilter{ static_cast<uint32_t>(std::stoul(value)), compareType };
case BuiltIn::BarcodeQualityFilter : return PbiBarcodeQualityFilter{ static_cast<uint8_t>(std::stoul(value)), compareType };
case BuiltIn::IdentityFilter : return PbiIdentityFilter{ std::stof(value), compareType };
- case BuiltIn::MovieNameFilter : return PbiMovieNameFilter{ value };
case BuiltIn::QueryEndFilter : return PbiQueryEndFilter{ std::stoi(value), compareType };
case BuiltIn::QueryLengthFilter : return PbiQueryLengthFilter{ std::stoi(value), compareType };
- case BuiltIn::QueryNameFilter : return PbiQueryNameFilter{ value };
case BuiltIn::QueryStartFilter : return PbiQueryStartFilter{ std::stoi(value), compareType };
case BuiltIn::ReadAccuracyFilter : return PbiReadAccuracyFilter{ std::stof(value), compareType };
- case BuiltIn::ReadGroupFilter : return PbiReadGroupFilter{ value, compareType };
case BuiltIn::ReferenceEndFilter : return PbiReferenceEndFilter{ static_cast<uint32_t>(std::stoul(value)), compareType };
- case BuiltIn::ReferenceIdFilter : return PbiReferenceIdFilter{ std::stoi(value), compareType };
- case BuiltIn::ReferenceNameFilter : return PbiReferenceNameFilter{ value };
case BuiltIn::ReferenceStartFilter : return PbiReferenceStartFilter{ static_cast<uint32_t>(std::stoul(value)), compareType };
// (maybe) list-value filters
@@ -269,10 +406,15 @@ static PbiFilter FromDataSetProperty(const Property& property, const DataSet& da
case BuiltIn::BarcodeForwardFilter : return CreateBarcodeForwardFilter(value, compareType);
case BuiltIn::BarcodeReverseFilter : return CreateBarcodeReverseFilter(value, compareType);
case BuiltIn::LocalContextFilter : return CreateLocalContextFilter(value, compareType);
+ case BuiltIn::MovieNameFilter : return CreateMovieNameFilter(value, compareType);
+ case BuiltIn::QueryNameFilter : return CreateQueryNameFilter(value, dataset, compareType);
+ case BuiltIn::ReadGroupFilter : return CreateReadGroupFilter(value, compareType);
+ case BuiltIn::ReferenceIdFilter : return CreateReferenceIdFilter(value, compareType);
+ case BuiltIn::ReferenceNameFilter : return CreateReferenceNameFilter(value, compareType);
case BuiltIn::ZmwFilter : return CreateZmwFilter(value, compareType);
// other built-ins
- case BuiltIn::QueryNamesFromFileFilter : return CreateQueryNamesFilterFromFile(value, dataset); // compareType ignored
+ case BuiltIn::QueryNamesFromFileFilter : return CreateQueryNamesFilterFromFile(value, dataset, compareType);
default :
throw std::runtime_error{""};
=====================================
src/PbiFilterTypes.cpp
=====================================
@@ -127,13 +127,15 @@ bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const
// PbiMovieNameFilter
-PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName)
+PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName, const Compare::Type cmp)
: compositeFilter_{internal::filterFromMovieName(movieName, true)} // include CCS
+ , cmp_{cmp}
{
}
-PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist)
- : compositeFilter_{PbiFilter::UNION}
+PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist,
+ const Compare::Type cmp)
+ : compositeFilter_{PbiFilter::UNION}, cmp_{cmp}
{
for (const auto& movieName : whitelist)
compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
@@ -162,7 +164,9 @@ public:
using RgIdLookup = std::unordered_map<int32_t, ZmwLookupPtr>;
public:
- PbiQueryNameFilterPrivate(const std::vector<std::string>& whitelist)
+ PbiQueryNameFilterPrivate(const std::vector<std::string>& whitelist,
+ const Compare::Type cmp = Compare::EQUAL)
+ : cmp_{cmp}
{
for (const auto& queryName : whitelist) {
@@ -177,7 +181,10 @@ public:
PbiQueryNameFilterPrivate(const std::unique_ptr<PbiQueryNameFilterPrivate>& other)
{
- if (other) lookup_ = other->lookup_;
+ if (other) {
+ lookup_ = other->lookup_;
+ cmp_ = other->cmp_;
+ }
}
bool Accepts(const PbiRawData& idx, const size_t row) const
@@ -201,7 +208,14 @@ public:
const auto qStart = basicData.qStart_.at(row);
const auto qEnd = basicData.qEnd_.at(row);
const auto queryInterval = std::make_pair(qStart, qEnd);
- return queryIntervals.find(queryInterval) != queryIntervals.end();
+
+ const bool found = queryIntervals.find(queryInterval) != queryIntervals.end();
+ if (cmp_ == Compare::EQUAL)
+ return found;
+ else if (cmp_ == Compare::NOT_EQUAL)
+ return !found;
+ else
+ throw std::runtime_error{"unsupported compare type on query name filter"};
}
std::vector<int32_t> CandidateRgIds(const std::string& movieName, const RecordType type)
@@ -295,16 +309,18 @@ public:
private:
RgIdLookup lookup_;
+ Compare::Type cmp_;
};
-PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname)
+PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname, const Compare::Type cmp)
: d_{std::make_unique<PbiQueryNameFilter::PbiQueryNameFilterPrivate>(
- std::vector<std::string>{1, qname})}
+ std::vector<std::string>{1, qname}, cmp)}
{
}
-PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist)
- : d_{std::make_unique<PbiQueryNameFilter::PbiQueryNameFilterPrivate>(whitelist)}
+PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist,
+ const Compare::Type cmp)
+ : d_{std::make_unique<PbiQueryNameFilter::PbiQueryNameFilterPrivate>(whitelist, cmp)}
{
}
@@ -333,8 +349,9 @@ PbiReferenceNameFilter::PbiReferenceNameFilter(std::string rname, Compare::Type
}
}
-PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string> whitelist)
- : rnameWhitelist_{std::move(whitelist)}, cmp_{Compare::EQUAL}
+PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string> whitelist,
+ const Compare::Type cmp)
+ : rnameWhitelist_{std::move(whitelist)}, cmp_{cmp}
{
}
@@ -358,9 +375,10 @@ void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const
// multi-value whitelist
else {
- subFilter_ = PbiFilter(PbiFilter::UNION);
+ std::vector<int32_t> ids;
for (const auto& rname : rnameWhitelist_.get())
- subFilter_.Add(PbiReferenceIdFilter{bamFile.ReferenceId(rname)});
+ ids.push_back(bamFile.ReferenceId(rname));
+ subFilter_ = PbiReferenceIdFilter{std::move(ids), cmp_};
}
initialized_ = true;
}
=====================================
tests/data/long-cigar-1.7.bam
=====================================
Binary files /dev/null and b/tests/data/long-cigar-1.7.bam differ
=====================================
tests/data/long-cigar.bam deleted
=====================================
Binary files a/tests/data/long-cigar.bam and /dev/null differ
=====================================
tests/meson.build
=====================================
@@ -44,18 +44,27 @@ custom_target('pbbam_generate_data',
build_by_default : true,
install : false)
+pbbamify_synthetic_dataset = configure_file(
+ input : files('data/pbbamify/synthetic_movie_all.subreadset.xml.in'),
+ output : 'synthetic_movie_all.subreadset.xml',
+ configuration : pbbam_PbbamTestData_h_config)
+
+#########
+# tests #
+#########
+
test(
- 'pbbam unit tests',
+ 'pbbam formatting check',
+ pbbam_clang_formatter,
+ args : [
+ '--all'],
+ workdir : meson.source_root())
+
+test(
+ 'pbbam gtest unittests',
pbbam_test,
args : [
- '--gtest_output=xml:' + join_paths(meson.build_root(), 'pbbam-unit-tests.xml')],
+ '--gtest_output=xml:' + join_paths(meson.build_root(), 'pbbam-gtest-unittests.xml')],
env : [
'ARGS=-V',
'VERBOSE=1'])
-
-test('pbbam formatting check', pbbam_clang_formatter, args : ['--all'], workdir : meson.source_root())
-
-pbbamify_synthetic_dataset = configure_file(
- input : files('data/pbbamify/synthetic_movie_all.subreadset.xml.in'),
- output : 'synthetic_movie_all.subreadset.xml',
- configuration : pbbam_PbbamTestData_h_config)
=====================================
tests/src/cram/pbbamify.t.in
=====================================
@@ -8,7 +8,7 @@ Setup:
Forward alignments with and without user specified tags, one alignment with undefined mapq, some alignments with basic CIGAR operations, 2 alignemtns with hard clipping, and several invalid alignments (1 without a seq field and 1 not present in the dataset) which should be skipped:
- $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | samtools view -h
+ $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | $SAMTOOLS view -h
[Warning] No records found for query 'synthetic_movie_1/10/0_100'. Skipping.
[Warning] Sequence 'synthetic_movie_1/1/0_100' (length 90) is not of the same length as the PacBio BAM sequence (length 100)! Skipping.
[Warning] Found 1 alignments without a seq field which were not converted (most likely secondary alignments).
@@ -34,7 +34,7 @@ Forward alignments with and without user specified tags, one alignment with unde
synthetic_movie_1/2/0_101\t256\tsynthetic_ref_1\t30\t60\t8S1X21=3I1X3D13=1D1=1X1=1I9=1X32=1I1=6S\t*\t0\t0\tCGCTATTTTTGAAAATTTTCCGGTTTAAGGAAATTCCGTTCTTCTTCTGAATAACTTAATCTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAAACGAC\t!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\tRG:Z:8d2370c0\tcx:i:3\tip:B:C,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101\tnp:i:1\tpw:B:C,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101\tqe:i:101\tqs:i:0\trq:f:0.8\tsn:B:f,8.34462,15.7107,6.3469,10.3163\tzm:i:2 (esc)
Reverse alignments: 2 primary alignments and 7 secondary, 6 alignments with extended CIGAR and 2 with basic CIGAR strings, 1 alignment with undefined (255) mapq, 2 alignments with hard clipping, 1 alignment with user defined tag. All alignments have a read group assigned which is different than the dataset.
- $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-2.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_2.subreads.bam | samtools view -h
+ $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-2.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_2.subreads.bam | $SAMTOOLS view -h
[INFO] Done processing 9 alignments in 0 min.
@HD\tVN:1.5\tSO:unknown\tpb:3.0.3 (esc)
@SQ\tSN:synthetic_ref_1\tLN:150\tM5:e1e940d621d949c9617566ddf3055922 (esc)
@@ -55,7 +55,7 @@ Reverse alignments: 2 primary alignments and 7 secondary, 6 alignments with exte
CCS read:
- $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-3.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_3.subreads.bam | samtools view -h
+ $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-3.bam $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_3.subreads.bam | $SAMTOOLS view -h
[INFO] Done processing 1 alignments in 0 min.
@HD\tVN:1.5\tSO:unknown\tpb:3.0.1 (esc)
@SQ\tSN:synthetic_ref_1\tLN:150\tM5:e1e940d621d949c9617566ddf3055922 (esc)
@@ -66,7 +66,7 @@ CCS read:
No verbose output:
- $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam --verbose-level=0 $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | samtools view -h
+ $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-1.bam --verbose-level=0 $DATADIR/pbbamify/synthetic-ref-1.fa $DATADIR/pbbamify/synthetic_movie_1.subreads.bam | $SAMTOOLS view -h
@HD\tVN:1.5\tSO:unknown\tpb:3.0.3 (esc)
@SQ\tSN:synthetic_ref_1\tLN:150\tM5:e1e940d621d949c9617566ddf3055922 (esc)
@RG\tID:8d2370c0\tPL:PACBIO\tDS:READTYPE=SUBREAD;Ipd:CodecV1=ip;PulseWidth:CodecV1=pw;BINDINGKIT=100-862-200;SEQUENCINGKIT=100-861-800;BASECALLERVERSION=5.0.0.5552;FRAMERATEHZ=80.000000\tPU:synthetic_movie_1\tPM:SEQUEL (esc)
@@ -89,7 +89,7 @@ No verbose output:
Test on a dataset, input contains alignments from all subread sets.
- $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-all.bam $DATADIR/pbbamify/synthetic-ref-1.fa $GENERATEDDATADIR/synthetic_movie_all.subreadset.xml | samtools view -h
+ $ $PBBAMIFY --input=$DATADIR/pbbamify/input-aligned-all.bam $DATADIR/pbbamify/synthetic-ref-1.fa $GENERATEDDATADIR/synthetic_movie_all.subreadset.xml | $SAMTOOLS view -h
[Warning] No records found for query 'synthetic_movie_1/10/0_100'. Skipping.
[Warning] Sequence 'synthetic_movie_1/1/0_100' (length 90) is not of the same length as the PacBio BAM sequence (length 100)! Skipping.
[Warning] Found 1 alignments without a seq field which were not converted (most likely secondary alignments).
=====================================
tests/src/test_BamWriter.cpp
=====================================
@@ -15,7 +15,11 @@
using namespace PacBio;
using namespace PacBio::BAM;
-TEST(BamWriterTest, SingleWrite_UserRecord)
+// clang-format off
+
+namespace BamWriterTests {
+
+void checkSingleRecord(bool useTempFile)
{
const std::string fullName = "test/100/0_5";
const std::string rgId = "6002b307";
@@ -58,7 +62,9 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
const std::string generatedBamFn =
PbbamTestsConfig::GeneratedData_Dir + "/bamwriter_generated.bam";
{
- BamWriter writer(generatedBamFn, inputHeader);
+ BamWriter::Config config;
+ config.useTempFile = useTempFile;
+ BamWriter writer(generatedBamFn, inputHeader, config);
writer.Write(bamRecord);
}
@@ -91,3 +97,17 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
// clean up
remove(generatedBamFn.c_str());
}
+
+} // namespace BamWriterTests
+
+TEST(BamWriterTest, SingleWrite_UserRecord_WithTempFile)
+{
+ BamWriterTests::checkSingleRecord(true);
+}
+
+TEST(BamWriterTest, SingleWrite_UserRecord_NoTempFile)
+{
+ BamWriterTests::checkSingleRecord(false);
+}
+
+// clang-format on
=====================================
tests/src/test_LongCigar.cpp
=====================================
@@ -2,6 +2,7 @@
#include <iostream>
#include <string>
+#include <tuple>
#include <gtest/gtest.h>
@@ -10,6 +11,9 @@
#include <pbbam/BamReader.h>
#include <pbbam/BamWriter.h>
+#include "../../src/MemoryUtils.h"
+#include "../../src/StringUtils.h"
+
using BamReader = PacBio::BAM::BamReader;
using BamRecord = PacBio::BAM::BamRecord;
using BamWriter = PacBio::BAM::BamWriter;
@@ -18,15 +22,42 @@ using CigarOp = PacBio::BAM::CigarOperation;
using PacBio::BAM::CigarOperationType;
using Tag = PacBio::BAM::Tag;
+// clang-format off
+
namespace LongCigarTests {
+static bool DoesHtslibSupportLongCigar()
+{
+ const std::string htsVersion = hts_version();
+
+ // remove any "-<blah>" for non-release versions
+ const auto versionBase = PacBio::BAM::Split(htsVersion, '-');
+ if (versionBase.empty())
+ throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+ // grab major/minor version numbers
+ const auto versionParts = PacBio::BAM::Split(versionBase[0], '.');
+ if (versionParts.size() < 2)
+ throw std::runtime_error{"invalid htslib version format: " + htsVersion};
+
+ // check against v1.7
+ const int versionMajor = std::stoi(versionParts[0]);
+ const int versionMinor = std::stoi(versionParts[1]);
+ static constexpr const int v17_major = 1;
+ static constexpr const int v17_minor = 7;
+ return std::tie(versionMajor, versionMinor) >=
+ std::tie(v17_major, v17_minor);
+}
+
+static const bool has_native_long_cigar_support = DoesHtslibSupportLongCigar();
+
// BAM record in this file has its CIGAR data in the new "CG" tag
-static const std::string LongCigarBam = PacBio::BAM::PbbamTestsConfig::Data_Dir + "/long-cigar.bam";
+static const std::string LongCigarBam = PacBio::BAM::PbbamTestsConfig::Data_Dir + "/long-cigar-1.7.bam";
static const std::string LongCigarOut =
PacBio::BAM::PbbamTestsConfig::GeneratedData_Dir + "/long-cigar-generated.bam";
-static const size_t numOps = 66000;
+static const size_t numOps = 72091;
static BamRecord ReadLongCigarRecord(const std::string& fn)
{
@@ -37,90 +68,58 @@ static BamRecord ReadLongCigarRecord(const std::string& fn)
return b;
}
-static void SetLongCigar(BamRecord* b)
-{
- Cigar cigar;
- cigar.resize(numOps);
- for (size_t i = 0; i < LongCigarTests::numOps; ++i) {
- const CigarOperationType type =
- (i % 2 == 0 ? CigarOperationType::SEQUENCE_MATCH : CigarOperationType::INSERTION);
- cigar.at(i) = CigarOp(type, 1);
- }
- b->Impl().CigarData(cigar);
-}
-
-static void CheckLongCigar(const Cigar& cigar)
-{
- ASSERT_EQ(numOps, cigar.size());
-
- for (size_t i = 0; i < numOps; ++i) {
- const CigarOp& op = cigar.at(i);
- EXPECT_EQ(1, op.Length());
-
- const CigarOperationType expectedType =
- (i % 2 == 0 ? CigarOperationType::SEQUENCE_MATCH : CigarOperationType::INSERTION);
- EXPECT_EQ(expectedType, op.Type());
- }
-}
-
-static void CheckLongCigarTag(const Tag& cigarTag)
-{
- ASSERT_TRUE(cigarTag.IsUInt32Array());
- const auto tagArray = cigarTag.ToUInt32Array();
- ASSERT_EQ(numOps, tagArray.size());
-
- for (size_t i = 0; i < numOps; ++i) {
- const auto op = tagArray.at(i);
- const auto expectedLength = 1;
- const auto expectedType = (i % 2 == 0 ? BAM_CEQUAL : BAM_CINS);
-
- EXPECT_EQ(expectedType, bam_cigar_op(op));
- EXPECT_EQ(expectedLength, bam_cigar_oplen(op));
- }
-}
-
} // namespace LongCigarTests
TEST(LongCigarTest, ReadAndFetchLongCigar)
{
const auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarBam);
- // public API
- const auto cigar = b.CigarData();
- EXPECT_EQ(66000, cigar.size());
-
- // TODO: come back & check raw data once we have 'private access wrapper'
- // but we're looking good
+ EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+ if (LongCigarTests::has_native_long_cigar_support)
+ EXPECT_FALSE(b.Impl().HasTag("CG"));
+ else
+ EXPECT_TRUE(b.Impl().HasTag("CG"));
}
TEST(LongCigarTest, EditLongCigar)
{
- SCOPED_TRACE("EditLongCigar");
-
auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarBam);
- LongCigarTests::SetLongCigar(&b);
+ b.Impl().CigarData(b.CigarData());
- const auto recordCigar = b.CigarData();
- const auto cigarTag = b.Impl().TagValue("CG");
- LongCigarTests::CheckLongCigar(recordCigar);
- LongCigarTests::CheckLongCigarTag(cigarTag);
+ EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+ if (LongCigarTests::has_native_long_cigar_support)
+ EXPECT_FALSE(b.Impl().HasTag("CG"));
+ else
+ EXPECT_TRUE(b.Impl().HasTag("CG"));
}
TEST(LongCigarTest, WriteLongCigar)
{
SCOPED_TRACE("WriteLongCigar");
- { // write record with our custom long CIGAR
+ { // edit & write
auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarBam);
- LongCigarTests::SetLongCigar(&b);
+ b.Impl().CigarData(b.CigarData());
+
+ EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+ if (LongCigarTests::has_native_long_cigar_support)
+ EXPECT_FALSE(b.Impl().HasTag("CG"));
+ else
+ EXPECT_TRUE(b.Impl().HasTag("CG"));
+
BamWriter writer{LongCigarTests::LongCigarOut, b.header_};
writer.Write(b);
}
- { // read back in to check
+
+ { // read back in
auto b = LongCigarTests::ReadLongCigarRecord(LongCigarTests::LongCigarOut);
- const auto recordCigar = b.CigarData();
- const auto cigarTag = b.Impl().TagValue("CG");
- LongCigarTests::CheckLongCigar(recordCigar);
- LongCigarTests::CheckLongCigarTag(cigarTag);
+
+ EXPECT_EQ(LongCigarTests::numOps, b.CigarData().size());
+ if (LongCigarTests::has_native_long_cigar_support)
+ EXPECT_FALSE(b.Impl().HasTag("CG"));
+ else
+ EXPECT_TRUE(b.Impl().HasTag("CG"));
}
}
+
+// clang-format on
=====================================
tools/meson.build
=====================================
@@ -12,10 +12,6 @@ pbbam_Bam2SamVersion_h = configure_file(
output : 'Bam2SamVersion.h',
configuration : pbbam_Bam2SamVersion_h_config)
-if pbbam_htslib_dep.found()
- pbbam_htslib_libdir = pbbam_htslib_dep.get_pkgconfig_variable('libdir')
-endif
-
pbbam_bam2sam_cpp_sources = [pbbam_Bam2SamVersion_h]
pbbam_bam2sam_cpp_sources += files([
'common/OptionParser.cpp',
@@ -28,7 +24,6 @@ pbbam_bam2sam = executable(
dependencies : [pbbam_htslib_dep, pbbam_thread_dep, pbbam_zlib_dep],
include_directories : [pbbam_include_directories, include_directories('bam2sam')],
install : get_option('build-tools'),
- build_rpath : pbbam_htslib_libdir,
cpp_args : pbbam_warning_flags)
# tests
@@ -42,7 +37,7 @@ if get_option('tests')
'bam2sam_CramTests',
pbbam_cram_script,
args : [
- '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-bam2sam.xml'),
+ '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-bam2sam.xml'),
'--verbose'] + [
bam2sam_t],
timeout : 1800)
@@ -119,7 +114,7 @@ if get_option('tests')
'pbindexdump_CramTests',
pbbam_cram_script,
args : [
- '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-pbindexdump.xml'),
+ '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbindexdump.xml'),
'--verbose'] + [
pbindexdump_json_t,
pbindexdump_cpp_t],
@@ -178,7 +173,7 @@ if get_option('tests') and not get_option('auto-validate')
'pbmerge_CramTests',
pbbam_cram_script,
args : [
- '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-pbmerge.xml'),
+ '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbmerge.xml'),
'--verbose'] + [
pbmerge_pacbio_ordering_t,
pbmerge_aligned_ordering_t,
@@ -224,6 +219,8 @@ pbbam_pbbamify = executable(
# tests
if get_option('tests')
+ pbbam_test_samtools = find_program('samtools', required : true)
+
pbbamify_t = configure_file(
input : pbbam_cram_pbbamify_t_in,
output : 'pbbamify.t',
@@ -233,11 +230,10 @@ if get_option('tests')
'pbbamify_CramTests',
pbbam_cram_script,
args : [
- '--xunit-file=' + join_paths(meson.build_root(), 'blasr-cram-pbbamify.xml'),
+ '--xunit-file=' + join_paths(meson.build_root(), 'pbbam-cram-pbbamify.xml'),
'--verbose'] + [
pbbamify_t],
+ env : [
+ 'SAMTOOLS=' + pbbam_test_samtools.path()],
timeout : 1800)
-
endif
-
-### Tests will be added later.
View it on GitLab: https://salsa.debian.org/med-team/pbbam/commit/25ba6fde382dd7980c9dbcb76f2fb98e7abc90d0
--
View it on GitLab: https://salsa.debian.org/med-team/pbbam/commit/25ba6fde382dd7980c9dbcb76f2fb98e7abc90d0
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181010/01362e1c/attachment-0001.html>
More information about the debian-med-commit
mailing list