[med-svn] [pbbam] 02/04: Imported Upstream version 0.7.4+ds
Afif Elghraoui
afif at moszumanska.debian.org
Mon Dec 19 05:43:26 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository pbbam.
commit a9374dc70989c14b8e6a8093bab28679ed35467a
Author: Afif Elghraoui <afif at debian.org>
Date: Sun Dec 18 21:16:58 2016 -0800
Imported Upstream version 0.7.4+ds
---
CHANGELOG.md | 39 +-
CMakeLists.txt | 165 +--
cmake/PbbamTool.cmake | 6 +-
cmake/pbbam-ccache.cmake | 8 +
cmake/pbbam-compilerflags.cmake | 44 +
cmake/pbbam-dependencies.cmake | 23 +
cmake/pbbam-libtype.cmake | 21 +
docs/source/conf.py | 4 +-
docs/specs/pbbam_structure.png | Bin 0 -> 23521 bytes
docs/specs/pbbam_updated_release3_2.rst | 618 ++++++++++
include/pbbam/Config.h | 37 +
.../ValidationException.h => FastaReader.h} | 86 +-
.../TestData.h.in => include/pbbam/FastaSequence.h | 70 +-
.../InvalidSequencingChemistryException.h | 7 +-
include/pbbam/exception/ValidationException.h | 6 +-
.../pbbam/internal/FastaSequence.inl | 33 +-
include/pbbam/internal/ReadGroupInfo.inl | 1 +
src/BamHeader.cpp | 41 +-
src/BamRecord.cpp | 2 +-
src/CMakeLists.txt | 112 +-
src/ChemistryTable.cpp | 13 +-
src/DataSet.cpp | 42 +-
src/FastaReader.cpp | 155 +++
src/PbiFilter.cpp | 85 +-
src/PbiIndexedBamReader.cpp | 4 +-
src/Pulse2BaseCache.h | 2 +-
src/SequenceUtils.h | 2 +-
src/files.cmake | 4 +
src/swig/CMakeLists.txt | 2 +-
src/swig/WrapCSharp.cmake | 2 +-
src/swig/WrapPython.cmake | 3 +-
tests/CMakeLists.txt | 136 ++-
tests/data/dataset/malformed.xml | 3 +-
tests/data/empty.bam | Bin 0 -> 350 bytes
tests/data/empty.bam.pbi | Bin 0 -> 67 bytes
tests/data/phi29.bam.pbi | Bin 0 -> 1394 bytes
tests/files.cmake | 1 +
tests/scripts/generate_data.py | 24 +
tests/src/CSharp/buildAssembly.sh.in | 8 +-
tests/src/TestData.h.in | 1 +
tests/src/cram/{bam2sam.t => bam2sam.t.in} | 4 +-
.../{pbindexdump_cpp.t => pbindexdump_cpp.t.in} | 4 +-
.../{pbindexdump_json.t => pbindexdump_json.t.in} | 4 +-
...ed_ordering.t => pbmerge_aligned_ordering.t.in} | 16 +-
.../{pbmerge_dataset.t => pbmerge_dataset.t.in} | 14 +-
.../src/cram/{pbmerge_fofn.t => pbmerge_fofn.t.in} | 12 +-
...ixed_ordering.t => pbmerge_mixed_ordering.t.in} | 6 +-
...bio_ordering.t => pbmerge_pacbio_ordering.t.in} | 16 +-
tests/src/test_BamHeader.cpp | 12 +-
tests/src/test_BamRecord.cpp | 1231 ++++++++++++++++++++
tests/src/test_BamWriter.cpp | 2 +-
tests/src/test_DataSetCore.cpp | 1 +
tests/src/test_DataSetIO.cpp | 4 +-
tests/src/test_DataSetQuery.cpp | 2 +-
tests/src/test_EndToEnd.cpp | 4 +-
tests/src/test_Fasta.cpp | 105 ++
tests/src/test_FileUtils.cpp | 7 +-
tests/src/test_PacBioIndex.cpp | 10 +-
tests/src/test_PbiFilterQuery.cpp | 187 ++-
tests/src/test_ReadGroupInfo.cpp | 25 +
tests/src/test_SamWriter.cpp | 4 +-
tools/CMakeLists.txt | 58 +-
tools/bam2sam/CMakeLists.txt | 11 +-
tools/pbindex/CMakeLists.txt | 2 +-
tools/pbindexdump/CMakeLists.txt | 18 +-
tools/pbmerge/CMakeLists.txt | 52 +-
66 files changed, 3148 insertions(+), 473 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6274f4b..7704263 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,41 @@ guarantees will be maintained within each major version series.
## Active
+### Added
+- Default DataSet 'Version' attribute if none already present (currently 4.0.0)
+
+## [0.7.4] - 2016-11-18
+
+### Changed
+- Compatibility for merging BAM files no longer requires exact match of PacBioBAM
+version number (header @HD:pb tag). As long as both files meet the minimum
+supported version number, the merge is allowed.
+
+## [0.7.3] - 2016-11-11
+
+### Added
+- Support for S/P2-C2 chemistry and forthcoming 4.0 basecaller
+
+## [0.7.2] - 2016-11-10
+
+### Removed
+- SAM header version equality check for merging BAM files. PacBioBAM version
+number carries more meaning for PacBio data and thus will be the basis of
+ensuring compatible merging.
+
+## [0.7.1] - 2016-11-09
+
+### Added
+- (Unindexed) FASTA reader & FastaSequence data structure.
+- Missing unit tests for internal BAM tag access.
+- Chemistry data for basecaller v3.3.
+- Missing parsers for filtering barcode quality ("bq"), barcode forward ("bcf"),
+and barcode reverse ("bcr") from DataSetXML.
+- Integrated htslib into project.
+
+### Fixed
+- Reverse complement on padding base.
+
## [0.7.0] - 2016-09-26
### Added
@@ -20,7 +55,9 @@ guarantees will be maintained within each major version series.
- Rolled back default pulse behavior in internal BAM API, to be backward-
compatible with existing client code (for now at least). v0.6.0 introduced
returning basecalled positions ONLY by default, rather than return ALL
-pulses.
+pulses.
+- Fixed crash when attempting to read from empty BAM/PBI files using the
+PbiFilter-enabled APIs.
## [0.6.0] - 2016-09-13
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 284ddba..48eeee8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@
########################################################################
cmake_policy(SET CMP0048 NEW) # lets us set version in project()
-project(PacBioBAM VERSION 0.7.0 LANGUAGES CXX C)
+project(PacBioBAM VERSION 0.7.4 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.0)
# project name & version
@@ -24,167 +24,52 @@ option(PacBioBAM_use_modbuild "Build PacBioBAM using Modular Build System."
option(PacBioBAM_use_ccache "Build PacBioBAM using ccache, if available." ON)
option(PacBioBAM_auto_validate "Build PacBioBAM with auto-validation enabled." OFF)
-# enable ccache, if available
-if(PacBioBAM_use_ccache)
- find_program(CCACHE_FOUND ccache)
- if(CCACHE_FOUND)
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
- set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
- endif()
-endif()
-
-# Deprecating the "PacBioBAM_build_pbindex" command line option in favor of more
-# general "PacBioBAM_build_tools", as we're starting to add new utilities.
-#
-# That said, I don't want to break current auto tests/builds, so I'm providing a
-# warning message so devs are aware.
-#
-if(DEFINED PacBioBAM_build_pbindex)
-
- # construct warning message
- set(pbindex_warning "\nDeprecated:\n-DPacBioBAM_build_pbindex\n")
- if (PacBioBAM_build_pbindex)
- set(pbindex_warning "${pbindex_warning} Building as requested,")
- else()
- set(pbindex_warning "${pbindex_warning} Skipping as requested,")
- endif()
- set(pbindex_warning "${pbindex_warning} but support for this option will be removed at some point in the future.\n")
- message(AUTHOR_WARNING "${pbindex_warning} ** Use -DPacBioBAM_build_tools instead. **\n")
-
- # force PacBioBAM_build_tools option
- set(PacBioBAM_build_tools ${PacBioBAM_build_pbindex} CACHE BOOL
- "Build PacBioBAM with add'l utilities (e.g. pbindex, pbindexdump)." FORCE)
-endif()
-
-# enable testing if requested
-if(PacBioBAM_build_tests)
- enable_testing()
-endif()
-
-# determine if we're generating SWIG bindings
-if(PacBioBAM_wrap_csharp OR PacBioBAM_wrap_r OR PacBioBAM_wrap_python)
+if (PacBioBAM_wrap_csharp OR PacBioBAM_wrap_r OR PacBioBAM_wrap_python)
set(wrapping_swig TRUE)
else()
set(wrapping_swig FALSE)
endif()
-# determine if we need a shared lib
-if(PacBioBAM_build_shared OR wrapping_swig)
- set(BUILD_SHARED_LIBS ON)
- set(htslib_build_shared ON CACHE BOOL "force htslibConfig to export proper library name")
- set(PB_LIB_MODE SHARED)
- set(PB_LIB_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
-else()
- set(BUILD_SHARED_LIBS OFF)
- set(PB_LIB_MODE STATIC)
- set(PB_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
+if(PacBioBAM_build_tests)
+ enable_testing()
endif()
-
-# main project paths
-set(PacBioBAM_RootDir ${PacBioBAM_SOURCE_DIR})
+
+# project paths
+set(PacBioBAM_RootDir ${CMAKE_CURRENT_LIST_DIR})
set(PacBioBAM_DocsDir ${PacBioBAM_RootDir}/docs)
set(PacBioBAM_IncludeDir ${PacBioBAM_RootDir}/include)
set(PacBioBAM_SourceDir ${PacBioBAM_RootDir}/src)
set(PacBioBAM_SwigSourceDir ${PacBioBAM_RootDir}/src/swig)
set(PacBioBAM_TestsDir ${PacBioBAM_RootDir}/tests)
+set(PacBioBAM_ThirdPartyDir ${PacBioBAM_RootDir}/third-party)
set(PacBioBAM_ToolsDir ${PacBioBAM_RootDir}/tools)
if(NOT PacBioBAM_OutputDir)
- set(PacBioBAM_OutputDir ${PacBioBAM_RootDir})
+ set(PacBioBAM_OutputDir ${CMAKE_CURRENT_BINARY_DIR})
else()
- # if SWIG bindings requested
if(${wrapping_swig})
message(FATAL_ERROR "SWIG bindings not currently supported in modular build.")
endif()
endif()
+set(PacBioBAM_BinDir ${PacBioBAM_OutputDir}/bin)
+set(PacBioBAM_LibDir ${PacBioBAM_OutputDir}/lib)
-set(PacBioBAM_BinDir ${PacBioBAM_OutputDir}/bin)
-set(PacBioBAM_LibDir ${PacBioBAM_OutputDir}/lib)
-set(PacBioBAM_ThirdPartyDir ${PacBioBAM_RootDir}/third-party)
-
+set(GeneratedDir ${CMAKE_BINARY_DIR}/generated)
+set(GeneratedTestDataDir ${GeneratedDir}/data)
file(MAKE_DIRECTORY ${PacBioBAM_BinDir})
file(MAKE_DIRECTORY ${PacBioBAM_LibDir})
+file(MAKE_DIRECTORY ${GeneratedDir})
+file(MAKE_DIRECTORY ${GeneratedTestDataDir})
-# use some custom Find*, Use* cmake modules
-list(APPEND CMAKE_MODULE_PATH "${PacBioBAM_RootDir}/cmake")
+# project configuration (keep this order)
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake ${CMAKE_MODULE_PATH})
+include(pbbam-ccache)
+include(pbbam-compilerflags)
+include(pbbam-libtype)
+include(pbbam-dependencies)
-# shared & third-party paths
-if(NOT HTSLIB_INCLUDE_DIRS OR
- NOT HTSLIB_LIBRARIES)
- if(HTSLIB_ROOTDIR)
- find_package(htslib
- PATHS ${HTSLIB_ROOTDIR}/
- REQUIRED)
- else()
- find_package(htslib
- PATHS ${PacBioBAM_SOURCE_DIR}/../htslib/
- REQUIRED)
- endif()
-endif()
-
-if(NOT Boost_INCLUDE_DIRS)
- find_package(Boost REQUIRED)
-endif()
-
-if (NOT ZLIB_INCLUDE_DIRS OR
- NOT ZLIB_LIBRARIES)
- find_package(ZLIB REQUIRED)
-endif()
-
-# shared CXX flags for src & tests
-if (MSVC)
- set(PacBioBAM_CXX_FLAGS "/Wall")
-else()
- set(PacBioBAM_CXX_FLAGS "-std=c++11 -Wall -Wno-sign-compare")
-endif()
-
-# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-Wno-unused-local-typedefs" HAS_NO_UNUSED_LOCAL_TYPEDEFS)
-if(HAS_NO_UNUSED_LOCAL_TYPEDEFS)
- set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-local-typedefs")
-endif()
-
-if(PacBioBAM_auto_validate)
- add_definitions("-DPBBAM_AUTOVALIDATE=1")
-endif()
-
-# For now, keep @rpath out of install names on OS X, as it causes SWIG
-# tests to fail.
-if(APPLE)
- set(CMAKE_MACOSX_RPATH OFF)
-endif()
-
-# Turn on windows-style filepath resolution.
-# We need to add this #define early (not just in the C# SWIG wrapper)
-if(WIN32 AND PacBioBAM_wrap_csharp)
- add_definitions(-DPBBAM_WIN_FILEPATHS)
-endif()
-
-# keep this order (src first, at least)
+# project components (keep this order)
add_subdirectory(src)
-
-if(PacBioBAM_build_tools)
- add_subdirectory(tools)
-endif()
-
-if(PacBioBAM_build_docs)
- add_subdirectory(docs)
-endif()
-
-if(PacBioBAM_build_tests)
-
- if (NOT GTEST_SRC_DIR)
- set(PREBUILT_GTEST_SRC ${PacBioBAM_RootDir}/../../../../prebuilt.tmpout/gtest/gtest_1.7.0/)
- if(EXISTS ${PREBUILT_GTEST_SRC})
- set(GTEST_SRC_DIR ${PREBUILT_GTEST_SRC})
- else()
- set(GTEST_SRC_DIR ../gtest) # keep old fallback behavior for external builds, for now at least
- endif()
- endif()
-
- add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
- add_subdirectory(tests)
-
-endif()
-
+add_subdirectory(tools)
+add_subdirectory(docs)
+add_subdirectory(tests)
diff --git a/cmake/PbbamTool.cmake b/cmake/PbbamTool.cmake
index a1411a7..daed917 100644
--- a/cmake/PbbamTool.cmake
+++ b/cmake/PbbamTool.cmake
@@ -9,9 +9,9 @@ function(create_pbbam_tool)
# create executable
include_directories(
- ${ToolsCommonDir} # shared tool code
- ${CMAKE_CURRENT_BINARY_DIR} # generated version headers
- ${PacBioBAM_INCLUDE_DIRS} # pbbam/htslib includes
+ ${ToolsCommonDir} # shared tool code
+ ${GeneratedDir} # generated version headers
+ ${PacBioBAM_INCLUDE_DIRS} # pbbam/htslib includes
)
add_executable(${create_pbbam_tool_TARGET} ${create_pbbam_tool_SOURCES})
set_target_properties(
diff --git a/cmake/pbbam-ccache.cmake b/cmake/pbbam-ccache.cmake
new file mode 100644
index 0000000..21b8ac5
--- /dev/null
+++ b/cmake/pbbam-ccache.cmake
@@ -0,0 +1,8 @@
+
+if(PacBioBAM_use_ccache)
+ find_program(CCACHE_FOUND ccache)
+ if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+ endif()
+endif()
diff --git a/cmake/pbbam-compilerflags.cmake b/cmake/pbbam-compilerflags.cmake
new file mode 100644
index 0000000..fcfd321
--- /dev/null
+++ b/cmake/pbbam-compilerflags.cmake
@@ -0,0 +1,44 @@
+
+include(CheckCXXCompilerFlag)
+
+# C++11 check & enabling
+if (CMAKE_VERSION VERSION_LESS "3.1")
+ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # clang
+ else()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # gcc
+ endif()
+else() # 3.1+
+ set(CMAKE_CXX_STANDARD 11)
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
+# shared CXX flags for src & tests
+if (MSVC)
+ set(PacBioBAM_CXX_FLAGS "/Wall")
+else()
+ set(PacBioBAM_CXX_FLAGS "-Wall")
+endif()
+
+# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
+check_cxx_compiler_flag("-Wno-unused-local-typedefs" HAS_NO_UNUSED_LOCAL_TYPEDEFS)
+if(HAS_NO_UNUSED_LOCAL_TYPEDEFS)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-local-typedefs")
+endif()
+
+check_cxx_compiler_flag("-Wno-sign-compare" HAS_NO_SIGN_COMPARE)
+if(HAS_NO_SIGN_COMPARE)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-sign-compare")
+endif()
+
+# Turn on windows-style filepath resolution.
+# We need to add this #define early (not just in the C# SWIG wrapper)
+if(WIN32 AND PacBioBAM_wrap_csharp)
+ add_definitions(-DPBBAM_WIN_FILEPATHS)
+endif()
+
+# For now, keep @rpath out of install names on OS X, as it causes SWIG
+# tests to fail.
+if(APPLE)
+ set(CMAKE_MACOSX_RPATH OFF)
+endif()
diff --git a/cmake/pbbam-dependencies.cmake b/cmake/pbbam-dependencies.cmake
new file mode 100644
index 0000000..c2e21e6
--- /dev/null
+++ b/cmake/pbbam-dependencies.cmake
@@ -0,0 +1,23 @@
+
+# pthreads
+find_package(Threads REQUIRED)
+
+# boost
+if(NOT Boost_INCLUDE_DIRS)
+ find_package(Boost REQUIRED)
+endif()
+
+# Winsock for htslib on Windows
+if(WIN32)
+ set(SOCKET_LIBRARIES "ws2_32")
+endif()
+
+# zlib
+if(NOT ZLIB_INCLUDE_DIRS OR NOT ZLIB_LIBRARIES)
+ find_package(ZLIB REQUIRED)
+endif()
+
+# htslib
+if(NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
+ add_subdirectory(third-party/htslib external/htslib)
+endif()
diff --git a/cmake/pbbam-libtype.cmake b/cmake/pbbam-libtype.cmake
new file mode 100644
index 0000000..4b9c0dd
--- /dev/null
+++ b/cmake/pbbam-libtype.cmake
@@ -0,0 +1,21 @@
+
+# determine if we need a shared lib
+if(PacBioBAM_build_shared OR ${wrapping_swig})
+ set(BUILD_SHARED_LIBS ON)
+ set(htslib_build_shared ON CACHE BOOL "force htslibConfig to export proper library name")
+ set(PB_LIB_MODE SHARED)
+ set(PB_LIB_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+else()
+ set(BUILD_SHARED_LIBS OFF)
+ set(PB_LIB_MODE STATIC)
+ set(PB_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+if(WIN32)
+ # Limit the number of DLLs we will have to bundle
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++")
+ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++")
+endif()
+
+
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
index a34faf0..c1de190 100755
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -103,9 +103,9 @@ author = u'Derek Barnett'
# built documents.
#
# The short X.Y version.
-version = '0.7.0'
+version = '0.7.4'
# The full version, including alpha/beta/rc tags.
-release = '0.7.0'
+release = '0.7.4'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/specs/pbbam_structure.png b/docs/specs/pbbam_structure.png
new file mode 100755
index 0000000..40f50cf
Binary files /dev/null and b/docs/specs/pbbam_structure.png differ
diff --git a/docs/specs/pbbam_updated_release3_2.rst b/docs/specs/pbbam_updated_release3_2.rst
new file mode 100755
index 0000000..72d9b76
--- /dev/null
+++ b/docs/specs/pbbam_updated_release3_2.rst
@@ -0,0 +1,618 @@
+=============================================================
+**Pbbam Core API Software Design & Functional Specification**
+=============================================================
+| *Version 0.2*
+| *Pacific Biosciences Engineering Group*
+| *Oct 17, 2016*
+
+1. Revision History
+===================
+
++-------------+---------------+--------------------+---------------------------------+
+| **Date** | **Revision** | **Author(s)** | **Comments** |
++=============+===============+====================+=================================+
+| 01-29-2016 | 0.1 | Derek Barnett | Initial draft created |
+| | | | |
++-------------+---------------+--------------------+---------------------------------+
+| 10-17-2016 | 0.2 | Derek Barnett | Added behavioral representation |
+| | | | and structural representation |
+| | | | diagram |
++-------------+---------------+--------------------+---------------------------------+
+
+2. Introduction
+===============
+
+2.1. Document Specification Identifier
+--------------------------------------
+
++-----------------------------------+------------------------------------------+
+| **Document Specification Prefix** | **Description** |
++===================================+==========================================+
+| FS\_SA\_PBBAM\_ | Functional spec for pbbam |
++-----------------------------------+------------------------------------------+
+
+2.2. Purpose
+------------
+
+This document is intended to describe the requirements and interface of the pbbam
+library, which provides functionality for creating, querying, and editing PacBio
+BAM files and associated file formats.
+
+2.3. Scope of Document
+----------------------
+
+This document covers the expected usage of the pbbam library, as well as any
+desired or required performance characteristics with respect to quality or speed.
+
+This document does not provide installation instructions or API documentation.
+
+2.4. Glossary of Terms
+----------------------
+
+The table below specifies only terms specific to this document, and skips
+acronyms/terms that are specified in `Pacific Biosciences Software Glossary`_.
+
+.. _Pacific Biosciences Software Glossary: http://smrtanalysis-docs/pb_sw_glossary.html
+
++------------------+-----------------------------------------------------------+
+| **Acronym/Term** | **Description** |
++==================+===========================================================+
+| API | Application Programming Interface - a set of routines, |
+| | protocols, and tools for building software applications. |
+| | In this document, this will consist of one or more |
+| | cooperating libraries that specify data structures, |
+| | methods, etc. for use within a target programming |
+| | language. |
++------------------+-----------------------------------------------------------+
+| Client | An application that uses the library. |
++------------------+-----------------------------------------------------------+
+| I/O | Input/output of data. |
++------------------+-----------------------------------------------------------+
+
+2.5. References
+---------------
+
++-------------+------------------------------+--------------------------------------+
+| **Ref No.** | **Document Name, Link** | **Description** |
++=============+==============================+======================================+
+| (1) | `BAM format`_ | General SAM/BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (2) | `PacBio BAM`_ | PacBio BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (3) | `PacBio BAM index`_ | PacBio BAM index specification |
++-------------+------------------------------+--------------------------------------+
+| (4) | `DataSet XML`_ | PacBio DataSet XML specification |
++-------------+------------------------------+--------------------------------------+
+| (5) | `Software Style Guide`_ | PacBio coding standards |
++-------------+------------------------------+--------------------------------------+
+| (6) | `SMRT Analysis`_ | General SMRT Analysis infrastructure |
++-------------+------------------------------+--------------------------------------+
+
+.. _BAM format: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _PacBio BAM: http://pacbiofileformats.readthedocs.org/en/3.0/BAM.html
+.. _PacBio BAM index: http://pacbiofileformats.readthedocs.org/en/3.0/PacBioBamIndex.html
+.. _DataSet XML: https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/DataSet.rst
+.. _Software Style Guide: http://smrtanalysis-docs/_downloads/PBISoftwareStyleGuide.doc
+.. _SMRT Analysis: http://smrtanalysis-docs/smrt_docs.html
+
+3. Software Overview
+====================
+
+3.1. Software Module Description
+--------------------------------
+
+As of the 3.0 release of SMRT Analysis, PacBio is embracing the industry standard
+`BAM format`_ (1) for (both aligned and unaligned) basecall data files. We have
+also formulated a BAM companion file format (.bam.pbi) enabling fast access to a
+richer set of per-read information as well as compatibility for software built
+around the legacy cmp.h5 format.
+
+The pbbam library provides components to create, query, & transform PacBio BAM
+data: sequence files and their associated indices. This includes a core C++
+library as well as bindings for additional programming languages.
+
+3.2. Software Module Functional Capabilities
+--------------------------------------------
+
+The library must be able to read and write BAM files that conform to the
+`PacBio BAM`_ specification (2). BAM records must be editable e.g. adding
+alignment information. Random access must be supported, whether by genomic
+region or by filtering record features. To this end, the library will be able to
+read, write, and create associated index files - both the standard BAM index
+(.bai) and the `PacBio BAM index`_ (.pbi) (3). In addition to working with
+individual files, datasets of related BAM files will be supported. These are
+described in a `DataSet XML`_ document. (4)
+
+3.3. User Characteristics
+-------------------------
+
++---------------------+--------------------------------------------------------+
+| **User Class/Role** | **User Knowledge and Skill Levels** |
++=====================+========================================================+
+| Developer | Competence in one or more programming languages |
+| | supported (C++, R, Python, C#). No knowledge of |
+| | molecular biology wet lab techniques required. |
++---------------------+--------------------------------------------------------+
+
+3.4. User Operations and Practices
+----------------------------------
+
+Developer users will interact with the software by incorporating the library
+into a client application.
+
+3.5. Operating Environment
+--------------------------
+
+The software is intended to be run in a Linux or OSX environment, with ideally 4
+or more cores.
+
+3.6. General Constraints
+------------------------
+
+Currently there are no constraints outside the operating environment and speed
+requirements. In particular, as the library will be used for writing the BAM
+files coming off a Sequel instrument, it should be able to keep pace.
+
+3.7. Assumptions and Dependencies
+---------------------------------
+
+Input routines for the library will expect to receive files that conform to the
+`PacBio BAM`_ (2) or `DataSet XML`_ (4) specifications.
+
+The pbbam library depends on Boost, zlib, and htslib libraries.
+
+3.8. Other Software
+-------------------
+
+Output PacBio BAMs will be compatible with the `PacBio BAM`_ specification (2)
+and thus compatible with the general `BAM format`_ specification (1). This
+ensures that a wide variety of downstream tools can interact with data files.
+
+The software uses `CMake`_ as its build system.
+
+The core C++ API relies on the following 3rd party components:
+
+* `zlib`_
+* `htslib`_
+* `Boost`_ (header-only modules)
+
+Wrapper APIs for additional languages (Python, R, C#) are generated by `SWIG`_.
+
+API documentation is generated via `Doxygen`_.
+
+.. _CMake: https://cmake.org/
+.. _zlib: http://www.zlib.net/
+.. _htslib: https://github.com/samtools/htslib
+.. _Boost: http://www.boost.org/
+.. _SWIG: http://www.swig.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+
+4. External Interfaces
+======================
+
+4.1. User Interfaces
+--------------------
+
+N/A
+
+4.2. Software Interfaces
+------------------------
+
+pbbam will require the following software:
+
+* `htslib`_ & `zlib`_ - provides low-level handling of compressed BAM data
+* `Boost`_ - provides utility classes
+
+Incoming data from upstream components will be compliant with
+PacBio BAM format - see `PacBio BAM`_ specification (2) for more detail.
+
+4.3. Hardware Interfaces
+------------------------
+
+N/A
+
+4.4. Communications Interfaces
+------------------------------
+
+N/A
+
+5. Functional Requirements
+==========================
+
+5.1. Query BAM data by genomic region
+-------------------------------------
+
+5.1.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some genomic
+region of interest.
+
+5.1.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a standard index (.bai) for each source BAM file
+* genomic interval (e.g. "chr1:1000-2000")
+
+5.1.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Obtain an `htslib`_ "iterator" object for a given file and region. This will be
+wrapped by pbbam to hide the low-level nature of this type, as well as handling
+memory lifetime.
+
+5.1.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which are aligned to the requested genomic interval.
+
+For example:
+
+.. code:: c++
+
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+ // ... use record data ...
+ }
+
+
+5.2. Query BAM data by filter criteria
+--------------------------------------
+
+5.2.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some filter
+criteria (e.g. only reads from ZMW hole number 200 with a read quality of >0.5).
+
+5.2.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a `PacBio BAM index`_ (.pbi) for each source BAM file
+* filters supported by data contained in the PBI
+
+5.2.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Query PBI files(s) for records that match the provided filter criteria. Merge
+contiguous runs of records into record blocks, to minimize seeks. Advancing the
+iterator either reads the next read from the current block or seeks to the next
+block and fetches the next record.
+
+5.2.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which satisfy the requested filter criteria.
+
+For example:
+
+.. code:: c++
+
+ PbiFilterQuery query(filter, dataset);
+ for (const BamRecord& record : query) {
+ // ... do stuff ...
+ }
+
+
+5.3. Write PacBio BAM data
+--------------------------
+
+5.3.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall be able to write `PacBio BAM`_ files conforming to the specification.
+
+5.3.2. Inputs
+~~~~~~~~~~~~~
+
+* filename
+* header information
+* BAM records
+
+5.3.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Create file handle for the provided filename, output initial header information.
+As records are passed in, write to file. Upon completion, flush any buffers and
+close file handle.
+
+Multithreading, provided by `htslib`_, will be utilized where possible to speed
+up the compression process - often then main bottleneck of BAM throughput.
+
+5.3.4. Outputs
+~~~~~~~~~~~~~~
+
+BAM file conforming to the `PacBio BAM`_ specification.
+
+5.4. Create PacBio BAM index file
+---------------------------------
+
+5.4.1. Description
+~~~~~~~~~~~~~~~~~~
+
+Much of PacBio BAM data processing relies on the presence of a `PacBio BAM index`_
+file. pbbam shall be able to generate this file type for a `PacBio BAM`_ file.
+
+5.4.2. Inputs
+~~~~~~~~~~~~~
+
+`PacBio BAM`_ file
+
+5.4.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Read through the input BAM records, storing the values relevant to a PBI index.
+At end of file, write the index contents to a file and close.
+
+5.4.4. Outputs
+~~~~~~~~~~~~~~
+
+`PacBio BAM index`_ file
+
+6. Non-Functional Requirements
+==============================
+
+6.1. Performance Requirements
+-----------------------------
+
+Since pbbam will be used to write all BAM files coming off a Sequel instrument, the
+library must keep pace with data generation requirements.
+
+6.2. Safety Requirements
+------------------------
+
+N/A
+
+6.3. Security Requirements
+--------------------------
+
+N/A
+
+6.4. Quality Attributes
+-----------------------
+
+6.4.1. Availability
+~~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product availability requirements.
+
+6.4.2. Data Integrity
+~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+Files that do not meet this requirement will raise exceptions and will not be
+accepted.
+
+6.4.3. Interoperability
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+
+6.4.4. Reliability
+~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product reliability requirements.
+
+6.4.5. Robustness
+~~~~~~~~~~~~~~~~~
+
+pbbam will raise exceptions upon encountering failure cases, allowing client
+applications to recover or report the error to a UI.
+
+6.4.6. Usability
+~~~~~~~~~~~~~~~~
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.4.7. Maintainability
+~~~~~~~~~~~~~~~~~~~~~~
+
+The source code of the software covered in this functional specification shall
+adhere to the PacBio `Software Style Guide`_ (9) work instruction, to guarantee
+high quality of code that facilitates maintainability.
+
+6.4.8. Customizability
+~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.4.9. Compatibility
+~~~~~~~~~~~~~~~~~~~~
+
+pbbam shall support backward compatibility of the API and BAM format versions
+in order not to break existing clients.
+
+6.5. Business Rules
+-------------------
+
+N/A
+
+6.6. Compliance Requirements
+----------------------------
+
+N/A
+
+6.7. Alarms and Error Handling
+------------------------------
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.8. Persistence Requirements
+-----------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+6.9. Installation and Upgrade
+-----------------------------
+
+Installation and Upgrade of this software will be handled as part of the SMRT
+Analysis subsystem. See `SMRT Analysis`_ (6) specifications for more detail.
+
+Additionally, the library may be built independently, either from internal
+version control (Perforce) or from the public-facing Github repository. In
+either case, `CMake`_ is used to drive the build process.
+
+6.10. Administration and Maintenance
+------------------------------------
+
+N/A
+
+6.11. User Documentation
+------------------------
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+The "offline" API documentation may be built directly from the source code, using
+`Doxygen`_. Online documentation will be generated via a continuous integration
+server, thus ensuring it is always pointing to the current codebase.
+
+7. High Level Design
+====================
+
+7.1. Top Level Context
+----------------------
+
+The pbbam library is intended to be linked in with client applications,
+providing programmatic access to data files.
+
+7.2. Use Cases
+--------------
+
+Primary use cases for pbbam include:
+
+* BAM file creation
+* BAM file query - iterable access to various subsets of data
+
+8. Detailed Design
+==================
+
+8.1. Structural Representation
+------------------------------
+
+.. image:: ./pbbam_structure.png
+
+8.2. Behavioral Representation
+------------------------------
+
+The typical access pattern involves a client query against BAM data, optionally
+described in DataSet XML. The query may involve some number of filter criteria.
+
+pbbam queries the associated index files (*.pbi) to pre-determine which records
+pass filtering criteria and where they reside on disk. The client code is given
+an iterable object, such that each iteration of the main access loop returns a
+valid BAM record for analysis, modification, etc.
+
+8.3. Information Storage
+------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+8.4. Technology Overview
+------------------------
+
+pbbam is implemented in C++-11 and should perform as designed on any UNIX-like
+operating system (Linux distributions, Apple OSX, etc.).
+
+8.5. SOUP Components
+--------------------
+
+pbbam utilizes CMake for its build system. The C++ library uses the following
+3rd-party software components: `Boost`_, `htslib`_, & `zlib`_. Wrappers for additional
+languages are generated using SWIG.
+
+8.6. Deployment and Configuration
+---------------------------------
+
+Please refer to `SMRT Analysis`_ (6) documentation
+
+9. Automated Tests
+==================
+
+9.1. Unit Testing
+-----------------
+
+The library shall have unit tests for all classes & components.
+
+9.2. Performance Testing
+------------------------
+
+Unit tests may evaluate performance requirements as desired.
+
+9.3. Regression Testing
+-----------------------
+
+As its role is primarily in data I/O, pbbam has no "scientific quality/validity"
+metrics that would indicate a regression. Instead, passing its unit tests and
+end-to-end tests will indicate that a regression has not been introduced.
+
+These tests will be run after each check-in and nightly.
+
+10. Requirements Traceability Matrices
+======================================
+
+This section provides traces from requirements specified in PRD/DIR documents to the
+requirements covered in this functional specification, and from these
+functional requirements to corresponding Test Cases/Procedures.
+
+10.1. HPQC Functional Specifications
+------------------------------------
+
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| **PBI_ID** | **Name** | **Description** | **Comment** | **Metric** | **Owner** | **PRD/DIR Path** |
++=============+===========================+===================================================+=============+============+===========+==================================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | | Yes | dbarnett | \\DIR\\Functionality\\Software\Common\APIs\\ |
+| | genomic region | data, limited to some genomic region of interest. | | | | Software shall provide an API to allow 3rd |
+| | | | | | | party software to extract all run information |
+| | | | | | | including summary reports and locations |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | | Yes | dbarnett | \\DIR\\Functionality\\Software\Common\APIs\\ |
+| | filter criteria | data, limited to some filter criteria (e.g. only | | | | Software shall provide an API to allow 3rd |
+| | | reads from ZMW hole number 200 with a read | | | | party software to extract all run information |
+| | | quality of >0.5). | | | | including summary reports and locations |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | | Yes | dbarnett | \\DIR\\Functionality\\Software\\PostProcessing\\ |
+| | | the `PacBio BAM`_ specification. | | | | Software shall provide base files including |
+| | | | | | | kinetic information in industry standard format |
+| | | | | | | such as SAM/BAM using current specifications |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | | Yes | dbarnett | \\DIR\\Functionality\\Software\\PostProcessing\\ |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | | | | Software shall provide base files including |
+| | | shall be able to generate this file type for a | | | | kinetic information in industry standard format |
+| | | `PacBio BAM`_ file. | | | | such as SAM/BAM using current specifications |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+
+10.2. Automated Tests Coverage
+------------------------------
+
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| **FS Item** | **FS Item Title** | **Use Case Description** | **Test Case Name/ID** |
++=============+===========================+====================================================+==================================================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | See section 9.1. Unit Testing. |
+| | genomic region | data, limited to some genomic region of interest. | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | See section 9.1. Unit Testing. |
+| | filter criteria | data, limited to some filter criteria (e.g. only | |
+| | | reads from ZMW hole number 200 with a read | |
+| | | quality of >0.5). | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | See section 9.1. Unit Testing. |
+| | | the `PacBio BAM`_ specification. | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | See section 9.1. Unit Testing. |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | |
+| | | shall be able to generate this file type for a | |
+| | | `PacBio BAM`_ file. | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+
diff --git a/include/pbbam/Config.h b/include/pbbam/Config.h
index e5a5f3c..2521288 100644
--- a/include/pbbam/Config.h
+++ b/include/pbbam/Config.h
@@ -44,6 +44,43 @@
#include <cstdint>
+#ifndef INT8_MAX
+#define INT8_MAX 127
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX 32767
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX 2147483647
+#endif
+#ifndef INT64_MAX
+#define INT64_MAX 9223372036854775807LL
+#endif
+#ifndef INT8_MIN
+#define INT8_MIN -128
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN -32768
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-INT32_MAX-1)
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN (-INT64_MAX-1)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX 255
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX 65535
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX 4294967295U
+#endif
+#ifndef UINT64_MAX
+#define UINT64_MAX 18446744073709551615ULL
+#endif
+
/// \name Library Import/Export
/// \{
diff --git a/include/pbbam/exception/ValidationException.h b/include/pbbam/FastaReader.h
similarity index 56%
copy from include/pbbam/exception/ValidationException.h
copy to include/pbbam/FastaReader.h
index 58324b9..dc19e53 100644
--- a/include/pbbam/exception/ValidationException.h
+++ b/include/pbbam/FastaReader.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -34,62 +34,80 @@
// SUCH DAMAGE.
//
// File Description
-/// \file ValidationException.h
-/// \brief Defines the ValidationException class.
+/// \file FastaReader.h
+/// \brief Defines the FastaReader class.
//
// Author: Derek Barnett
-#ifndef VALIDATIONEXCEPTION_H
-#define VALIDATIONEXCEPTION_H
+#ifndef FASTAREADER_H
+#define FASTAREADER_H
-#include <map>
-#include <sstream>
-#include <stdexcept>
-#include <string>
+#include "pbbam/FastaSequence.h"
+#include <memory>
#include <vector>
namespace PacBio {
namespace BAM {
-/// \brief The ValidationExecption represents an exception that will be thrown
-/// when any error is encountered using the Validator API. In addition to
-/// a default display message, it provides programmatic access to all
-/// reported error messages.
+namespace internal { struct FastaReaderPrivate; }
+
///
-/// \sa Validator::Validate(const BamRecord& record)
+/// \brief The FastaReader provides sequential access to FASTA records.
///
-class ValidationException : public std::runtime_error
+class FastaReader
{
public:
- typedef std::vector<std::string> ErrorList;
- typedef std::map<std::string, ErrorList> ErrorMap;
+ ///
+ /// \brief Reads all FASTA sequences from a file
+ ///
+ /// \param fn FASTA filename
+ /// \return vector of FastaSequence results
+ ///
+ static std::vector<FastaSequence> ReadAll(const std::string& fn);
public:
- ValidationException(const ErrorMap& fileErrors,
- const ErrorMap& readGroupErrors,
- const ErrorMap& recordErrors);
- ValidationException(ErrorMap&& fileErrors,
- ErrorMap&& readGroupErrors,
- ErrorMap&& recordErrors);
+ /// \name Constructors & Related Methods
+ /// \{
+
+ explicit FastaReader(const std::string& fn);
+ FastaReader(FastaReader&& other);
+ FastaReader& operator=(FastaReader&& other);
+ ~FastaReader(void);
+
+ // copy is disabled
+ FastaReader(const FastaReader&) = delete;
+ FastaReader& operator=(const FastaReader&) = delete;
+
+ /// \}
public:
- const ErrorMap& FileErrors(void) const;
- const ErrorMap& ReadGroupErrors(void) const;
- const ErrorMap& RecordErrors(void) const;
+ /// \name Sequence Access
+ /// \{
- virtual const char* what(void) const noexcept;
+ ///
+ /// \brief GetNext
+ ///
+ /// \code{cpp}
+ ///
+ /// FastaReader reader{ fn };
+ /// FastaSequence f;
+ /// while (reader.GetNext(f)) {
+ /// // do stuff with f
+ /// }
+ /// \endcode
+ ///
+ /// \param[out] record
+ /// \return success/failure
+ ///
+ bool GetNext(FastaSequence& record);
-private:
- ErrorMap fileErrors_;
- ErrorMap readGroupErrors_;
- ErrorMap recordErrors_;
- std::string msg_;
+ /// \}
private:
- void FormatMessage(void);
+ std::unique_ptr<internal::FastaReaderPrivate> d_;
};
} // namespace BAM
} // namespace PacBio
-#endif // VALIDATIONEXCEPTION_H
+#endif // FASTAREADER_H
diff --git a/tests/src/TestData.h.in b/include/pbbam/FastaSequence.h
similarity index 60%
copy from tests/src/TestData.h.in
copy to include/pbbam/FastaSequence.h
index 297601f..7748506 100644
--- a/tests/src/TestData.h.in
+++ b/include/pbbam/FastaSequence.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -32,26 +32,72 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file FastaSequence.h
+/// \brief Defines the FastaSequence class.
+//
// Author: Derek Barnett
-#ifndef TESTDATA_H
-#define TESTDATA_H
+#ifndef FASTASEQUENCE_H
+#define FASTASEQUENCE_H
#include <string>
namespace PacBio {
namespace BAM {
-namespace tests {
-const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
-const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
-const std::string Data_Dir = std::string("@PacBioBAM_TestsDir@/data");
-const std::string GeneratedData_Dir = std::string("@GeneratedTestDataDir@");
-const std::string Bam2Sam = std::string("@PacBioBAM_BinDir@/bam2sam");
+///
+/// \brief The FastaSequence class represents a FASTA record (name & bases)
+///
+class FastaSequence
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ ///
+ /// \brief FastaSequence
+ /// \param name
+ /// \param bases
+ ///
+ explicit FastaSequence(std::string name, std::string bases);
+
+ FastaSequence(void) = default;
+ FastaSequence(const FastaSequence&) = default;
+ FastaSequence(FastaSequence&&) = default;
+ FastaSequence& operator=(const FastaSequence&) = default;
+ FastaSequence& operator=(FastaSequence&&) = default;
+ ~FastaSequence(void) = default;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ ///
+ /// \brief Name
+ /// \return
+ ///
+ std::string Name(void) const;
+
+ ///
+ /// \brief Bases
+ /// \return
+ ///
+ std::string Bases(void) const;
+
+ /// \}
+
+private:
+ std::string name_;
+ std::string bases_;
+};
-} // namespace tests
} // namespace BAM
} // namespace PacBio
-#endif // TESTDATA_H
+#include "internal/FastaSequence.inl"
+
+#endif // FASTASEQUENCE_H
diff --git a/include/pbbam/exception/InvalidSequencingChemistryException.h b/include/pbbam/exception/InvalidSequencingChemistryException.h
index 9761703..a670bc3 100644
--- a/include/pbbam/exception/InvalidSequencingChemistryException.h
+++ b/include/pbbam/exception/InvalidSequencingChemistryException.h
@@ -71,6 +71,11 @@ public:
what_ = s.str();
}
+ // This is a work around for the Intel PHI compiler (icpc)
+ ~InvalidSequencingChemistryException() throw()
+ {
+
+ }
public:
const std::string& BindingKit(void) const
{ return bindingKit_; }
@@ -82,7 +87,7 @@ public:
{ return basecallerVersion_; }
public:
- virtual const char* what(void) const noexcept
+ const char* what(void) const noexcept override
{ return what_.c_str(); }
protected:
diff --git a/include/pbbam/exception/ValidationException.h b/include/pbbam/exception/ValidationException.h
index 58324b9..92d8f17 100644
--- a/include/pbbam/exception/ValidationException.h
+++ b/include/pbbam/exception/ValidationException.h
@@ -71,13 +71,17 @@ public:
ValidationException(ErrorMap&& fileErrors,
ErrorMap&& readGroupErrors,
ErrorMap&& recordErrors);
+ // This is a work around for the Intel PHI compiler (icpc)
+ ~ValidationException() throw()
+ {
+ }
public:
const ErrorMap& FileErrors(void) const;
const ErrorMap& ReadGroupErrors(void) const;
const ErrorMap& RecordErrors(void) const;
- virtual const char* what(void) const noexcept;
+ const char* what(void) const noexcept override;
private:
ErrorMap fileErrors_;
diff --git a/tests/src/TestData.h.in b/include/pbbam/internal/FastaSequence.inl
similarity index 76%
copy from tests/src/TestData.h.in
copy to include/pbbam/internal/FastaSequence.inl
index 297601f..fe28170 100644
--- a/tests/src/TestData.h.in
+++ b/include/pbbam/internal/FastaSequence.inl
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
@@ -32,26 +32,29 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
-
+//
+// File Description
+/// \file FastaSequence.inl
+/// \brief Inline implementations for the FastaSequence class.
+//
// Author: Derek Barnett
-#ifndef TESTDATA_H
-#define TESTDATA_H
-
-#include <string>
+#include "pbbam/FastaSequence.h"
namespace PacBio {
namespace BAM {
-namespace tests {
-const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
-const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
-const std::string Data_Dir = std::string("@PacBioBAM_TestsDir@/data");
-const std::string GeneratedData_Dir = std::string("@GeneratedTestDataDir@");
-const std::string Bam2Sam = std::string("@PacBioBAM_BinDir@/bam2sam");
+inline FastaSequence::FastaSequence(std::string name,
+ std::string bases)
+ : name_{std::move(name)}
+ , bases_{std::move(bases)}
+{ }
+
+inline std::string FastaSequence::Bases(void) const
+{ return bases_; }
+
+inline std::string FastaSequence::Name(void) const
+{ return name_; }
-} // namespace tests
} // namespace BAM
} // namespace PacBio
-
-#endif // TESTDATA_H
diff --git a/include/pbbam/internal/ReadGroupInfo.inl b/include/pbbam/internal/ReadGroupInfo.inl
index bb2232d..b8a24e0 100644
--- a/include/pbbam/internal/ReadGroupInfo.inl
+++ b/include/pbbam/internal/ReadGroupInfo.inl
@@ -39,6 +39,7 @@
//
// Author: Derek Barnett
+#include <stdexcept>
#include "pbbam/ReadGroupInfo.h"
namespace PacBio {
diff --git a/src/BamHeader.cpp b/src/BamHeader.cpp
index e3b7625..b69f4a3 100644
--- a/src/BamHeader.cpp
+++ b/src/BamHeader.cpp
@@ -64,30 +64,39 @@ static const string token_VN = string("VN");
static const string token_SO = string("SO");
static const string token_pb = string("pb");
+static inline
+bool CheckSortOrder(const string& lhs, const string& rhs)
+{ return lhs == rhs; }
+
+static inline
+bool CheckPbVersion(const string& lhs, const string& rhs)
+{
+ return ( Version{ lhs } >= Version::Minimum &&
+ Version{ rhs } >= Version::Minimum);
+}
+
+static inline
+bool CheckSequences(const string& sortOrder,
+ const vector<SequenceInfo>& lhs,
+ const vector<SequenceInfo>& rhs)
+{
+ return ( (sortOrder == "coordinate") ? lhs == rhs : true);
+}
+
static
void EnsureCanMerge(const BamHeader& lhs, const BamHeader& rhs)
{
// check compatibility
- const bool samVersionOk = lhs.Version() == rhs.Version();
- const bool sortOrderOk = lhs.SortOrder() == rhs.SortOrder();
- const bool pbVersionOk = lhs.PacBioBamVersion() == rhs.PacBioBamVersion();
- const bool sequencesOk = ( (lhs.SortOrder() == "coordinate") ? lhs.Sequences() == rhs.Sequences()
- : true);
-
- // if all checks out, return
- if (samVersionOk && sortOrderOk && pbVersionOk && sequencesOk)
+ const bool sortOrderOk = CheckSortOrder(lhs.SortOrder(), rhs.SortOrder());
+ const bool pbVersionOk = CheckPbVersion(lhs.PacBioBamVersion(), rhs.PacBioBamVersion());
+ const bool sequencesOk = CheckSequences(lhs.SortOrder(), lhs.Sequences(), rhs.Sequences());
+ if (sortOrderOk && pbVersionOk && sequencesOk)
return;
- // else, format error message & throw
+ // if any checks failed, format error message & throw
stringstream e;
e << "could not merge BAM headers:" << endl;
- if (!samVersionOk) {
- e << " mismatched SAM versions (@HD:VN) : ("
- << lhs.Version() << ", " << rhs.Version()
- << ")" << endl;
- }
-
if (!sortOrderOk) {
e << " mismatched sort orders (@HD:SO) : ("
<< lhs.SortOrder() << ", " << rhs.SortOrder()
@@ -95,7 +104,7 @@ void EnsureCanMerge(const BamHeader& lhs, const BamHeader& rhs)
}
if (!pbVersionOk) {
- e << " mismatched PacBio BAM versions (@HD:pb) : ("
+ e << " incompatible PacBio BAM versions (@HD:pb) : ("
<< lhs.PacBioBamVersion() << ", " << rhs.PacBioBamVersion()
<< ")" << endl;
}
diff --git a/src/BamRecord.cpp b/src/BamRecord.cpp
index 4251cca..ae5253e 100644
--- a/src/BamRecord.cpp
+++ b/src/BamRecord.cpp
@@ -318,7 +318,7 @@ void ClipAndGapifyBases(const BamRecordImpl& impl,
string* seq)
{
ClipAndGapify<string, char>(impl, aligned, exciseSoftClips,
- seq, '*', '-');
+ seq, char('*'), char('-'));
}
static inline
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a7dfb2f..4b0b2dc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,35 +1,3 @@
-find_package(Threads)
-
-if(WIN32)
- # Need winsock on windows
- set(SOCKET_LIBRARIES "ws2_32")
-
- # Limit the number of DLLs we will have to bundle
- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++")
- set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++")
-endif()
-
-# define PacBioBAM library dependencies
-set(PacBioBAM_DependencyIncludes
- ${Boost_INCLUDE_DIRS}
- ${HTSLIB_INCLUDE_DIRS}
- ${ZLIB_INCLUDE_DIRS}
-)
-set(PacBioBAM_DependencyLibraries
- ${HTSLIB_LIBRARIES}
- ${ZLIB_LIBRARIES}
- ${CMAKE_THREAD_LIBS_INIT}
- ${SOCKET_LIBRARIES}
-)
-
-# set up library include dirs
-include_directories(SYSTEM
- ${PacBioBAM_DependencyIncludes}
-)
-
-include_directories(
- ${PacBioBAM_IncludeDir}
-)
# grab library source files
include(files.cmake)
@@ -40,44 +8,82 @@ set(SOURCES
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
# define actual library
-add_definitions(-DPBBAM_LIBRARY)
-if(PacBioBAM_wrap_r)
- # SWIG R does not support PBBAM_SHARED_PTR, but it does support boost::shared_ptr
- # So force boost if we're wrapping for R.
- add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
-endif()
-
add_library(pbbam ${SOURCES})
+
+# library properties
+target_compile_definitions(pbbam
+ PRIVATE "-DPBBAM_LIBRARY"
+)
set_target_properties(pbbam PROPERTIES
ARCHIVE_OUTPUT_DIRECTORY ${PacBioBAM_LibDir}
RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_LibDir}
LIBRARY_OUTPUT_DIRECTORY ${PacBioBAM_LibDir}
)
-
-# link dependency libs to pbbam
-target_link_libraries(pbbam ${PacBioBAM_DependencyLibraries})
-target_include_directories(pbbam
+
+if(PacBioBAM_wrap_r)
+ # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
+ # So force boost if we're wrapping for R.
+ target_compile_definitions(pbbam
+ PUBLIC -DPBBAM_USE_BOOST_SHARED_PTR
+ )
+endif()
+
+if(PacBioBAM_auto_validate)
+ target_compile_definitions(pbbam
+ PUBLIC "-DPBBAM_AUTOVALIDATE=1"
+ )
+endif()
+
+# pbbam includes
+target_include_directories(pbbam
+ PUBLIC
+ ${PacBioBAM_IncludeDir}
+ ${HTSLIB_INCLUDE_DIRS}
+ ${Boost_INCLUDE_DIRS}
+ ${ZLIB_INCLUDE_DIRS}
+)
+
+# set link dependencies
+# if htslib provided externally
+if(HTSLIB_LIBRARIES)
+ set(pbbam_all_dependency_libs
+ ${HTSLIB_LIBRARIES}
+ ${ZLIB_LIBRARIES}
+ ${SOCKET_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+# otherwise, use the "in-project" htslib target
+else()
+ set(pbbam_all_dependency_libs
+ $<TARGET_FILE:hts>
+ ${ZLIB_LIBRARIES}
+ ${SOCKET_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+endif()
+
+target_link_libraries(pbbam
PUBLIC
- ${PacBioBAM_IncludeDir}
- ${PacBioBAM_DependencyIncludes}
+ ${pbbam_all_dependency_libs}
)
-# define symbols for projects that use PacBioBAM
+# define include paths for projects that use pbbam
set(PacBioBAM_INCLUDE_DIRS
- ${PacBioBAM_IncludeDir} ${PacBioBAM_DependencyIncludes}
+ ${PacBioBAM_IncludeDir}
+ ${HTSLIB_INCLUDE_DIRS}
+ ${Boost_INCLUDE_DIRS}
+ ${ZLIB_INCLUDE_DIRS}
CACHE INTERNAL
"${PROJECT_NAME}: Include Directories"
FORCE
)
-
set(PacBioBAM_LIBRARIES
- ${PacBioBAM_LibDir}/libpbbam${PB_LIB_SUFFIX} ${PacBioBAM_DependencyLibraries}
+ $<TARGET_FILE:pbbam>
+ ${pbbam_all_dependency_libs}
CACHE INTERNAL
"${PROJECT_NAME}: Libraries"
FORCE
)
-if(${wrapping_swig})
- # add SWIG directory
- add_subdirectory(swig)
-endif() # swig
+# add SWIG directory
+add_subdirectory(swig)
diff --git a/src/ChemistryTable.cpp b/src/ChemistryTable.cpp
index d637a9e..fbc161f 100644
--- a/src/ChemistryTable.cpp
+++ b/src/ChemistryTable.cpp
@@ -62,15 +62,22 @@ extern const std::vector<std::array<std::string, 4>> ChemistryTable = {
// 3.1 ("Echidna"): S/P1-C1.1
{{"100-619-300", "100-867-300", "3.1", "S/P1-C1.1"}},
{{"100-619-300", "100-867-300", "3.2", "S/P1-C1.1"}},
-
+ {{"100-619-300", "100-867-300", "3.3", "S/P1-C1.1"}},
// 3.1.1 ("Flea"): S/P1-C1.2
{{"100-619-300", "100-902-100", "3.1", "S/P1-C1.2"}},
{{"100-619-300", "100-902-100", "3.2", "S/P1-C1.2"}},
-
+ {{"100-619-300", "100-902-100", "3.3", "S/P1-C1.2"}},
+ {{"100-619-300", "100-902-100", "4.0", "S/P1-C1.2"}},
// 3.2 ("Goat"): S/P1-C1.3
- {{"100-619-300", "100-972-200", "3.2", "S/P1-C1.3"}}
+ {{"100-619-300", "100-972-200", "3.2", "S/P1-C1.3"}},
+ {{"100-619-300", "100-972-200", "3.3", "S/P1-C1.3"}},
+ {{"100-619-300", "100-972-200", "4.0", "S/P1-C1.3"}},
+
+ // 4.0 ("Seabiscuit"); S/P2-C2
+ {{"100-862-200", "100-861-800", "4.0", "S/P2-C2"}}
+
};
} // namespace internal
diff --git a/src/DataSet.cpp b/src/DataSet.cpp
index a44780b..af6141c 100644
--- a/src/DataSet.cpp
+++ b/src/DataSet.cpp
@@ -52,9 +52,32 @@ using namespace PacBio::BAM;
using namespace PacBio::BAM::internal;
using namespace std;
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static const string defaultVersion{ "4.0.0" };
+
+static inline void InitDefaults(DataSet& ds)
+{
+ // provide default 'CreatedAt' & 'Version' attributes if not already present in XML
+
+ if (ds.CreatedAt().empty())
+ ds.CreatedAt(internal::ToIso8601(CurrentTime()));
+
+ if (ds.Version().empty())
+ ds.Version(internal::defaultVersion);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
DataSet::DataSet(void)
: DataSet(DataSet::GENERIC)
-{ }
+{
+ InitDefaults(*this);
+}
DataSet::DataSet(const DataSet::TypeEnum type)
: d_(nullptr)
@@ -74,14 +97,14 @@ DataSet::DataSet(const DataSet::TypeEnum type)
throw std::runtime_error("unsupported dataset type"); // unknown type
}
- CreatedAt(internal::ToIso8601(CurrentTime()));
+ InitDefaults(*this);
}
DataSet::DataSet(const BamFile& bamFile)
: d_(DataSetIO::FromUri(bamFile.Filename()))
, path_(FileUtils::CurrentWorkingDirectory())
{
- CreatedAt(internal::ToIso8601(CurrentTime()));
+ InitDefaults(*this);
}
DataSet::DataSet(const string& filename)
@@ -98,12 +121,15 @@ DataSet::DataSet(const string& filename)
{
path_ = FileUtils::CurrentWorkingDirectory();
}
+ InitDefaults(*this);
}
DataSet::DataSet(const vector<string>& filenames)
: d_(DataSetIO::FromUris(filenames))
, path_(FileUtils::CurrentWorkingDirectory())
-{ }
+{
+ InitDefaults(*this);
+}
DataSet::DataSet(const DataSet& other)
: path_(other.path_)
@@ -148,19 +174,14 @@ vector<BamFile> DataSet::BamFiles(void) const
{
const PacBio::BAM::ExternalResources& resources = ExternalResources();
-// cerr << "path: " << this->path_ << endl;
-
vector<BamFile> result;
result.reserve(resources.Size());
for(const ExternalResource& ext : resources) {
-// cerr << ext.ResourceId() << std::endl;
-
// only bother resolving file path if this is a BAM file
boost::iterator_range<string::const_iterator> bamFound = boost::algorithm::ifind_first(ext.MetaType(), "bam");
if (!bamFound.empty()) {
const string fn = ResolvePath(ext.ResourceId());
-// const string fn = internal::FileUtils::ResolvedFilePath(ext.ResourceId(), path_);
result.push_back(BamFile(fn));
}
}
@@ -171,8 +192,7 @@ DataSet DataSet::FromXml(const string& xml)
{
DataSet result;
result.d_ = internal::DataSetIO::FromXmlString(xml);
- if (result.CreatedAt().empty())
- result.CreatedAt(internal::ToIso8601(internal::CurrentTime()));
+ InitDefaults(result);
return result;
}
diff --git a/src/FastaReader.cpp b/src/FastaReader.cpp
new file mode 100644
index 0000000..f82d635
--- /dev/null
+++ b/src/FastaReader.cpp
@@ -0,0 +1,155 @@
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file FastaReader.cpp
+/// \brief Implements the FastaReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/FastaReader.h"
+#include <htslib/faidx.h>
+#include <stdexcept>
+#include <fstream>
+#include <iostream>
+#include <limits>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct FastaReaderPrivate
+{
+ ifstream stream_;
+ string name_;
+ string bases_;
+
+ FastaReaderPrivate(const std::string& fn)
+ : stream_(fn)
+ {
+ if (!stream_)
+ throw std::runtime_error("FastaReader - could not open " + fn + " for reading");
+ FetchNext();
+ }
+
+ bool GetNext(FastaSequence& record)
+ {
+ if (name_.empty() && bases_.empty())
+ return false;
+ record = FastaSequence { name_, bases_ };
+ FetchNext();
+ return true;
+ }
+
+private:
+ void FetchNext(void)
+ {
+ name_.clear();
+ bases_.clear();
+
+ SkipNewlines();
+ ReadName();
+ ReadBases();
+ }
+
+ inline void SkipNewlines(void)
+ {
+ if (!stream_)
+ return;
+ if (stream_.peek() == '\n')
+ stream_.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+ }
+
+ void ReadName(void) {
+ if (!stream_)
+ return;
+ if (stream_.get() == '>')
+ std::getline(stream_, name_, '\n');
+ }
+
+ void ReadBases(void)
+ {
+ if (!stream_)
+ return;
+ char c = static_cast<char>(stream_.peek());
+ string line;
+ while (c != '>') {
+ if (!stream_)
+ return;
+ std::getline(stream_, line, '\n');
+ bases_ += line;
+ if (!stream_)
+ return;
+ c = static_cast<char>(stream_.peek());
+ }
+ }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+FastaReader::FastaReader(const std::string& fn)
+ : d_{ new internal::FastaReaderPrivate{ fn } }
+{ }
+
+FastaReader::FastaReader(FastaReader&& other)
+ : d_{ std::move(other.d_) }
+{ }
+
+FastaReader& FastaReader::operator=(FastaReader&& other)
+{
+ d_.swap(other.d_);
+ return *this;
+}
+
+FastaReader::~FastaReader(void) { }
+
+bool FastaReader::GetNext(FastaSequence& record)
+{ return d_->GetNext(record); }
+
+vector<FastaSequence> FastaReader::ReadAll(const string& fn)
+{
+ vector<FastaSequence> result;
+ result.reserve(256);
+ FastaReader reader{ fn };
+ FastaSequence s;
+ while(reader.GetNext(s))
+ result.emplace_back(s);
+ return result;
+}
diff --git a/src/PbiFilter.cpp b/src/PbiFilter.cpp
index af3097f..0ce8930 100644
--- a/src/PbiFilter.cpp
+++ b/src/PbiFilter.cpp
@@ -104,6 +104,10 @@ static const unordered_map<string, BuiltIn> builtInLookup =
{ "readstart", BuiltIn::AlignedStartFilter },
{ "bc", BuiltIn::BarcodeFilter },
{ "barcode", BuiltIn::BarcodeFilter },
+ { "bcf", BuiltIn::BarcodeForwardFilter },
+ { "bq", BuiltIn::BarcodeQualityFilter },
+ { "bcq", BuiltIn::BarcodeQualityFilter },
+ { "bcr", BuiltIn::BarcodeReverseFilter },
{ "accuracy", BuiltIn::IdentityFilter },
{ "identity", BuiltIn::IdentityFilter },
{ "cx", BuiltIn::LocalContextFilter },
@@ -138,24 +142,24 @@ static const unordered_map<string, LocalContextFlags> contextFlagNames =
{ "REVERSE_PASS", LocalContextFlags::REVERSE_PASS }
};
+// helper methods (for handling maybe-list strings))
+static inline bool isBracketed(const string& value)
+{
+ static const string openBrackets = "[({";
+ static const string closeBrackets = "])}";
+ return openBrackets.find(value.at(0)) != string::npos &&
+ closeBrackets.find(value.at(value.length()-1)) != string::npos;
+};
+
+static inline bool isList(const string& value)
+{
+ return value.find(',') != string::npos;
+}
+
static
PbiFilter CreateBarcodeFilter(string value,
const Compare::Type compareType)
{
- // little helper lambdas (for readability below)
- auto isBracketed = [](const string& value)
- {
- static const string openBrackets = "[({";
- static const string closeBrackets = "])}";
- return openBrackets.find(value.at(0)) != string::npos &&
- closeBrackets.find(value.at(value.length()-1)) != string::npos;
- };
- auto isList = [](const string& value)
- {
- return value.find(',') != string::npos;
- };
-
-
if (value.empty())
throw std::runtime_error("empty value for barcode filter property");
@@ -177,6 +181,52 @@ PbiFilter CreateBarcodeFilter(string value,
}
static
+PbiFilter CreateBarcodeForwardFilter(string value,
+ const Compare::Type compareType)
+{
+ if (value.empty())
+ throw std::runtime_error("empty value for barcode_forward filter property");
+
+ if (isBracketed(value)) {
+ value.erase(0,1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+ vector<string> tokens = internal::Split(value, ',');
+ vector<int16_t> barcodes;
+ barcodes.reserve(tokens.size());
+ for (const auto& t : tokens)
+ barcodes.push_back(boost::numeric_cast<int16_t>(stoi(t)));
+ return PbiBarcodeForwardFilter{ std::move(barcodes) };
+ } else
+ return PbiBarcodeForwardFilter{ boost::numeric_cast<int16_t>(stoi(value)), compareType };
+}
+
+static
+PbiFilter CreateBarcodeReverseFilter(string value,
+ const Compare::Type compareType)
+{
+ if (value.empty())
+ throw std::runtime_error("empty value for barcode_reverse filter property");
+
+ if (isBracketed(value)) {
+ value.erase(0,1);
+ value.pop_back();
+ }
+
+ if (isList(value)) {
+ vector<string> tokens = internal::Split(value, ',');
+ vector<int16_t> barcodes;
+ barcodes.reserve(tokens.size());
+ for (const auto& t : tokens)
+ barcodes.push_back(boost::numeric_cast<int16_t>(stoi(t)));
+ return PbiBarcodeReverseFilter{ std::move(barcodes) };
+ } else
+ return PbiBarcodeReverseFilter{ boost::numeric_cast<int16_t>(stoi(value)), compareType };
+}
+
+static
PbiFilter CreateLocalContextFilter(const string& value,
const Compare::Type compareType)
{
@@ -229,6 +279,7 @@ PbiFilter FromDataSetProperty(const Property& property,
case BuiltIn::AlignedEndFilter : return PbiAlignedEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };
case BuiltIn::AlignedLengthFilter : return PbiAlignedLengthFilter{ static_cast<uint32_t>(stoul(value)), compareType };
case BuiltIn::AlignedStartFilter : return PbiAlignedStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };
+ case BuiltIn::BarcodeQualityFilter : return PbiBarcodeQualityFilter{ static_cast<uint8_t>(stoul(value)), compareType };
case BuiltIn::IdentityFilter : return PbiIdentityFilter{ stof(value), compareType };
case BuiltIn::MovieNameFilter : return PbiMovieNameFilter{ value };
case BuiltIn::QueryEndFilter : return PbiQueryEndFilter{ stoi(value), compareType };
@@ -244,8 +295,10 @@ PbiFilter FromDataSetProperty(const Property& property,
case BuiltIn::ZmwFilter : return PbiZmwFilter{ stoi(value), compareType };
// (maybe) list-value filters
- case BuiltIn::BarcodeFilter : return CreateBarcodeFilter(value, compareType);
- case BuiltIn::LocalContextFilter : return CreateLocalContextFilter(value, compareType);
+ case BuiltIn::BarcodeFilter : return CreateBarcodeFilter(value, compareType);
+ case BuiltIn::BarcodeForwardFilter : return CreateBarcodeForwardFilter(value, compareType);
+ case BuiltIn::BarcodeReverseFilter : return CreateBarcodeReverseFilter(value, compareType);
+ case BuiltIn::LocalContextFilter : return CreateLocalContextFilter(value, compareType);
// other built-ins
case BuiltIn::QueryNamesFromFileFilter : return CreateQueryNamesFilterFromFile(value, dataset); // compareType ignored
diff --git a/src/PbiIndexedBamReader.cpp b/src/PbiIndexedBamReader.cpp
index 685d4c0..e9aeeb7 100644
--- a/src/PbiIndexedBamReader.cpp
+++ b/src/PbiIndexedBamReader.cpp
@@ -78,7 +78,9 @@ public:
// find blocks of reads passing filter criteria
const uint32_t numReads = index_.NumReads();
- if (filter_.IsEmpty()) {
+ if (numReads == 0) { // empty PBI - no reads to use
+ return;
+ } else if (filter_.IsEmpty()) { // empty filter - use all reads
blocks_.push_back(IndexResultBlock{0, numReads});
} else {
IndexList indices;
diff --git a/src/Pulse2BaseCache.h b/src/Pulse2BaseCache.h
index cf47237..ae5bb1c 100644
--- a/src/Pulse2BaseCache.h
+++ b/src/Pulse2BaseCache.h
@@ -73,7 +73,7 @@ public:
Pulse2BaseCache(Pulse2BaseCache&& other) = default;
Pulse2BaseCache& operator=(const Pulse2BaseCache&) = default;
Pulse2BaseCache& operator=(Pulse2BaseCache&&) = default;
- ~Pulse2BaseCache(void) noexcept = default;
+ ~Pulse2BaseCache(void) noexcept {}
public:
diff --git a/src/SequenceUtils.h b/src/SequenceUtils.h
index f1ad3c0..14ad898 100644
--- a/src/SequenceUtils.h
+++ b/src/SequenceUtils.h
@@ -113,7 +113,7 @@ inline void ReverseComplementCaseSens(std::string& seq)
int8_t rc_table[128] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 32, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 45, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 42, 4, 4, 45, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 84, 4, 71, 4, 4, 4, 67, 4, 4, 4, 4,
4, 4, 78, 4, 4, 4, 4, 4, 65, 65, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 116, 4, 103, 4, 4, 4, 99, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
diff --git a/src/files.cmake b/src/files.cmake
index e966099..808b585 100644
--- a/src/files.cmake
+++ b/src/files.cmake
@@ -27,6 +27,8 @@ set( PacBioBAM_H
${PacBioBAM_IncludeDir}/pbbam/DataSetTypes.h
${PacBioBAM_IncludeDir}/pbbam/DataSetXsd.h
${PacBioBAM_IncludeDir}/pbbam/EntireFileQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/FastaReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/FastaSequence.h
${PacBioBAM_IncludeDir}/pbbam/FrameEncodingType.h
${PacBioBAM_IncludeDir}/pbbam/Frames.h
${PacBioBAM_IncludeDir}/pbbam/GenomicInterval.h
@@ -93,6 +95,7 @@ set( PacBioBAM_H
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.h
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.inl
${PacBioBAM_IncludeDir}/pbbam/internal/DataSetTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/FastaSequence.inl
${PacBioBAM_IncludeDir}/pbbam/internal/Frames.inl
${PacBioBAM_IncludeDir}/pbbam/internal/GenomicInterval.inl
${PacBioBAM_IncludeDir}/pbbam/internal/Interval.inl
@@ -179,6 +182,7 @@ set( PacBioBAM_CPP
${PacBioBAM_SourceDir}/DataSetTypes.cpp
${PacBioBAM_SourceDir}/DataSetXsd.cpp
${PacBioBAM_SourceDir}/EntireFileQuery.cpp
+ ${PacBioBAM_SourceDir}/FastaReader.cpp
${PacBioBAM_SourceDir}/FileProducer.cpp
${PacBioBAM_SourceDir}/FileUtils.cpp
${PacBioBAM_SourceDir}/FofnReader.cpp
diff --git a/src/swig/CMakeLists.txt b/src/swig/CMakeLists.txt
index 8f52386..a8869c3 100644
--- a/src/swig/CMakeLists.txt
+++ b/src/swig/CMakeLists.txt
@@ -8,7 +8,7 @@ if(${wrapping_swig})
find_package(SWIG 3.0.5 REQUIRED)
include(${SWIG_USE_FILE})
- include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+ include_directories(${PacBioBAM_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
#
# quash compiler warnings from SWIG-generated code
diff --git a/src/swig/WrapCSharp.cmake b/src/swig/WrapCSharp.cmake
index a6eee7f..cb44d74 100644
--- a/src/swig/WrapCSharp.cmake
+++ b/src/swig/WrapCSharp.cmake
@@ -41,6 +41,6 @@ configure_file(
add_custom_command(
OUTPUT ${PacBioBAM_CSharpDLL}
DEPENDS ${SWIG_MODULE_PacBioBam_REAL_NAME}
- COMMAND bash ./buildAssembly.sh
+ COMMAND bash ./buildAssembly.sh "${HTSLIB_LIBRARIES}"
)
add_custom_target(CSharpAssembly ALL DEPENDS ${PacBioBAM_CSharpDLL})
diff --git a/src/swig/WrapPython.cmake b/src/swig/WrapPython.cmake
index 839d7a5..719c5c2 100644
--- a/src/swig/WrapPython.cmake
+++ b/src/swig/WrapPython.cmake
@@ -16,7 +16,8 @@ set_target_properties(
PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${PacBioBAM_PythonLibDir}
)
-add_dependencies(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam)
+#add_dependencies(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam ${PacBioBAM_LIBRARIES})
+target_link_libraries(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam)
# simple "wrapper worked" check
# this is run every build, to check importing from Python, but does NOT run full Python-side unit tests
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9115b32..bbcc1e5 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,72 +1,80 @@
-find_package(Threads REQUIRED)
-# ensure output directories exists
-file(MAKE_DIRECTORY ${PacBioBAM_TestsDir}/bin)
-set(GeneratedTestDataDir ${CMAKE_BINARY_DIR}/generated/data)
-file(MAKE_DIRECTORY ${GeneratedTestDataDir})
+if(PacBioBAM_build_tests)
-# generate paths/values used by for unit tests
-configure_file(
- ${PacBioBAM_TestsDir}/src/TestData.h.in
- ${PacBioBAM_TestsDir}/src/TestData.h
-)
-configure_file(
- ${PacBioBAM_TestsDir}/data/group/group.fofn.in
- ${PacBioBAM_TestsDir}/data/group/group.fofn
-)
+ # setup GoogleTest
+ if (NOT GTEST_SRC_DIR)
+ set(PREBUILT_GTEST_SRC ${PacBioBAM_RootDir}/../../../../prebuilt.tmpout/gtest/gtest_1.7.0/)
+ if(EXISTS ${PREBUILT_GTEST_SRC})
+ set(GTEST_SRC_DIR ${PREBUILT_GTEST_SRC})
+ else()
+ set(GTEST_SRC_DIR ${PacBioBAM_RootDir}/../gtest) # keep old fallback behavior for external builds, for now at least
+ endif()
+ endif()
+ add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
-# add PacBioBAM & GoogleTest includes
-include_directories(
- ${PacBioBAM_INCLUDE_DIRS}
- ${gtest_SOURCE_DIR}/include
- ${gtest_SOURCE_DIR}
-)
+ # generate paths/values used by for unit tests
+ configure_file(
+ ${PacBioBAM_TestsDir}/src/TestData.h.in
+ ${CMAKE_BINARY_DIR}/generated/TestData.h
+ )
+ configure_file(
+ ${PacBioBAM_TestsDir}/data/group/group.fofn.in
+ ${CMAKE_BINARY_DIR}/generated/group.fofn
+ )
-# grab PacBioBAM unit test source files
-include(files.cmake)
-set(SOURCES
- ${PacBioBAMTest_H}
- ${PacBioBAMTest_CPP}
-)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+ # grab PacBioBAM unit test source files
+ include(files.cmake)
+ set(SOURCES
+ ${PacBioBAMTest_H}
+ ${PacBioBAMTest_CPP}
+ )
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
-# define unit test executable
-add_definitions(-DPBBAM_TESTING)
-if(MSVC)
- # VS2012+ pooh-pooh's Derek's "#define private public" trick
- add_definitions(-D_ALLOW_KEYWORD_MACROS)
-endif()
+ # define unit test executable
+ add_definitions(-DPBBAM_TESTING)
+ if(MSVC)
+ # VS2012+ pooh-pooh's Derek's "#define private public" trick
+ add_definitions(-D_ALLOW_KEYWORD_MACROS)
+ endif()
-if(PacBioBAM_wrap_r)
- # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
- # So force boost if we're wrapping for R.
- add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
-endif()
-add_executable(test_pbbam ${SOURCES})
-set_target_properties(test_pbbam PROPERTIES
- RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_TestsDir}/bin
-)
+ if(PacBioBAM_wrap_r)
+ # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
+ # So force boost if we're wrapping for R.
+ add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
+ endif()
-# generate test data
-add_custom_target(
- generate_test_data
- WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
- COMMAND "python" generate_data.py
- ${PacBioBAM_TestsDir}/data/
- ${GeneratedTestDataDir}
- baz
-)
+ add_executable(test_pbbam ${SOURCES})
+ set_target_properties(test_pbbam PROPERTIES
+ RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+ )
+ target_include_directories(test_pbbam
+ PUBLIC
+ ${CMAKE_BINARY_DIR}/generated
+ ${PacBioBAM_INCLUDE_DIRS}
+ ${gtest_SOURCE_DIR}/include
+ ${gtest_SOURCE_DIR}
+ )
-# add unit tests to test framework
-add_test(
- NAME UnitTests
- WORKING_DIRECTORY ${PacBioBAM_TestsDir}/bin
- COMMAND test_pbbam
-)
-add_dependencies(test_pbbam pbbam generate_test_data)
-target_link_libraries(test_pbbam
- ${PacBioBAM_LIBRARIES}
- ${CMAKE_THREAD_LIBS_INIT} # quirky pthreads
- gtest
- gtest_main
-)
+ # generate test data
+ add_custom_target(
+ generate_test_data
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" generate_data.py
+ ${PacBioBAM_TestsDir}/data/
+ ${GeneratedTestDataDir}
+ )
+
+ # add unit tests to test framework
+ add_test(
+ NAME UnitTests
+ WORKING_DIRECTORY ${PacBioBAM_BinDir}
+ COMMAND test_pbbam
+ )
+ add_dependencies(test_pbbam generate_test_data)
+ target_link_libraries(test_pbbam
+ pbbam
+ ${CMAKE_THREAD_LIBS_INIT} # quirky pthreads
+ gtest
+ gtest_main
+ )
+endif() # PacBioBAM_build_tests
diff --git a/tests/data/dataset/malformed.xml b/tests/data/dataset/malformed.xml
index e9000c8..31e0942 100644
--- a/tests/data/dataset/malformed.xml
+++ b/tests/data/dataset/malformed.xml
@@ -1,6 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<SubreadSet
- Description="Merged dataset from 1 files using DatasetMerger 0.1.2"
+ CreatedAt="2015-08-19T15:39:36.331"
+ Description="Merged dataset from 1 files using DatasetMerger 0.1.2"
MetaType="PacBio.DataSet.HdfSubreadSet"
Name="Subreads from runr000013_42267_150403"
Tags="pacbio.secondary.instrument=RS"
diff --git a/tests/data/empty.bam b/tests/data/empty.bam
new file mode 100644
index 0000000..1b22456
Binary files /dev/null and b/tests/data/empty.bam differ
diff --git a/tests/data/empty.bam.pbi b/tests/data/empty.bam.pbi
new file mode 100644
index 0000000..e398d79
Binary files /dev/null and b/tests/data/empty.bam.pbi differ
diff --git a/tests/data/phi29.bam.pbi b/tests/data/phi29.bam.pbi
new file mode 100644
index 0000000..5282b94
Binary files /dev/null and b/tests/data/phi29.bam.pbi differ
diff --git a/tests/files.cmake b/tests/files.cmake
index 39ca304..61370ac 100644
--- a/tests/files.cmake
+++ b/tests/files.cmake
@@ -27,6 +27,7 @@ set( PacBioBAMTest_CPP
${PacBioBAM_TestsDir}/src/test_DataSetXsd.cpp
${PacBioBAM_TestsDir}/src/test_EndToEnd.cpp
${PacBioBAM_TestsDir}/src/test_EntireFileQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_Fasta.cpp
${PacBioBAM_TestsDir}/src/test_FileUtils.cpp
${PacBioBAM_TestsDir}/src/test_Frames.cpp
${PacBioBAM_TestsDir}/src/test_GenomicIntervalQuery.cpp
diff --git a/tests/scripts/generate_data.py b/tests/scripts/generate_data.py
index b1c8def..ac28dbb 100755
--- a/tests/scripts/generate_data.py
+++ b/tests/scripts/generate_data.py
@@ -5,6 +5,15 @@ from __future__ import print_function
import os, shutil, sys
import StringIO
+fastaSeq_1 = """TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+AACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGGCGCAGGCG"""
+
+fastaSeq_2 = """TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+AACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAAC"""
+
+fastaSeq_3 = """TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTAACCCTAACCCT"""
+
# file creation decorator
def fileMaker(func):
def inner(*args, **kwargs):
@@ -50,6 +59,7 @@ class TestDataGenerator:
'truncated.bam' : self.makeTruncatedBam,
'chunking_emptyfilters.subreadset.xml' : self.makeChunkingXml,
'chunking_missingfilters.subreadset.xml' : self.makeChunkingXml,
+ 'normal.fa' : self.makeNormalFasta
}
self.outputSymlinks = {
'aligned.bam' : self.makeAlignedBamCopy,
@@ -64,6 +74,7 @@ class TestDataGenerator:
'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi' : self.makeChunkingSymlink,
'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam' : self.makeChunkingSymlink,
'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi' : self.makeChunkingSymlink,
+ 'missing_pbi.bam' : self.makeMissingPbiBam,
}
def editChunkingXml(self, outputFn, removeFiltersNode):
@@ -92,6 +103,12 @@ class TestDataGenerator:
source = os.path.join(self.testDataDir,'chunking', outputFn)
dest = os.path.join(self.generatedDataDir, outputFn)
os.symlink(source, dest)
+
+ @fileLinker
+ def makeMissingPbiBam(self, outputFn):
+ source = os.path.join(self.testDataDir, 'phi29.bam')
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ os.symlink(source, dest)
@fileMaker
def makeChunkingXml(self, outputFn):
@@ -102,6 +119,13 @@ class TestDataGenerator:
self.editChunkingXml(outputFn, removeFiltersNode)
@fileMaker
+ def makeNormalFasta(self, outputFn):
+ content = ">1\n" + fastaSeq_1 + "\n>2\n" + fastaSeq_2 + "\n>3\n" + fastaSeq_3
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ with open(outputFn, 'w') as fasta_out:
+ fasta_out.write(content)
+
+ @fileMaker
def makeTruncatedBam(self, outputFn):
source = os.path.join(self.testDataDir, 'phi29.bam')
dest = os.path.join(self.generatedDataDir, outputFn)
diff --git a/tests/src/CSharp/buildAssembly.sh.in b/tests/src/CSharp/buildAssembly.sh.in
index 30313e2..7e667b3 100644
--- a/tests/src/CSharp/buildAssembly.sh.in
+++ b/tests/src/CSharp/buildAssembly.sh.in
@@ -32,6 +32,10 @@ CSPROJ_ROOT=${PacBioBAM_CSharpLibDir}
CSPROJ=${PacBioBAM_CSharpLibDir}/PacBio.BAM.csproj
ASSEMBLY_ROOT=${PacBioBAM_CSharpLibDir}/bin/Debug
+# get expanded cmake generator expression ( $<TARGET:hts> or externally defined -DHTSLIB_LIBRARIES="")
+# from cmd line
+EXPANDED_HTSLIB_LIBRARIES=("$@")
+
#
# Make the managed DLL
#
@@ -41,7 +45,7 @@ ASSEMBLY_ROOT=${PacBioBAM_CSharpLibDir}/bin/Debug
# Copy the dependency libs
#
cp ${PacBioBAM_LibDir}/libpbbam${CMAKE_SHARED_LIBRARY_SUFFIX} $ASSEMBLY_ROOT
-cp ${HTSLIB_LIBRARIES} $ASSEMBLY_ROOT # Need "libhts*.dylib"
+cp "$EXPANDED_HTSLIB_LIBRARIES" $ASSEMBLY_ROOT # Need "libhts*.dylib"
if [ "$PLATFORM" == "Windows" ]
then
@@ -52,7 +56,7 @@ then
else
# For UNIX this is .so, even Mac. Not sure why.
cp ${PacBioBAM_CSharpLibDir}/libPacBioBam.so $ASSEMBLY_ROOT
- cp ${HTSLIB_LIBRARIES_VERSIONED_LINK} $ASSEMBLY_ROOT # Need "libhts*.dylib"
+# cp ${HTSLIB_LIBRARIES_VERSIONED_LINK} $ASSEMBLY_ROOT # Need "libhts*.dylib"
fi
# Bundle test data
diff --git a/tests/src/TestData.h.in b/tests/src/TestData.h.in
index 297601f..b25d262 100644
--- a/tests/src/TestData.h.in
+++ b/tests/src/TestData.h.in
@@ -47,6 +47,7 @@ namespace tests {
const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
const std::string Data_Dir = std::string("@PacBioBAM_TestsDir@/data");
+const std::string Generated_Dir = std::string("@GeneratedDir@");
const std::string GeneratedData_Dir = std::string("@GeneratedTestDataDir@");
const std::string Bam2Sam = std::string("@PacBioBAM_BinDir@/bam2sam");
diff --git a/tests/src/cram/bam2sam.t b/tests/src/cram/bam2sam.t.in
similarity index 99%
rename from tests/src/cram/bam2sam.t
rename to tests/src/cram/bam2sam.t.in
index d306f23..66645c4 100644
--- a/tests/src/cram/bam2sam.t
+++ b/tests/src/cram/bam2sam.t.in
@@ -1,8 +1,8 @@
Setup:
- $ BAM2SAM="$TESTDIR/../../../bin/bam2sam" && export BAM2SAM
+ $ BAM2SAM="@PacBioBAM_BinDir@/bam2sam" && export BAM2SAM
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
Normal:
diff --git a/tests/src/cram/pbindexdump_cpp.t b/tests/src/cram/pbindexdump_cpp.t.in
similarity index 88%
rename from tests/src/cram/pbindexdump_cpp.t
rename to tests/src/cram/pbindexdump_cpp.t.in
index cf318ee..18a210c 100644
--- a/tests/src/cram/pbindexdump_cpp.t
+++ b/tests/src/cram/pbindexdump_cpp.t.in
@@ -1,8 +1,8 @@
Setup:
- $ PBINDEXDUMP="$TESTDIR/../../../bin/pbindexdump" && export PBINDEXDUMP
+ $ PBINDEXDUMP="@PacBioBAM_BinDir@/pbindexdump" && export PBINDEXDUMP
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
Normal C++:
diff --git a/tests/src/cram/pbindexdump_json.t b/tests/src/cram/pbindexdump_json.t.in
similarity index 92%
rename from tests/src/cram/pbindexdump_json.t
rename to tests/src/cram/pbindexdump_json.t.in
index 676e21a..0c1cbcd 100644
--- a/tests/src/cram/pbindexdump_json.t
+++ b/tests/src/cram/pbindexdump_json.t.in
@@ -1,8 +1,8 @@
Setup:
- $ PBINDEXDUMP="$TESTDIR/../../../bin/pbindexdump" && export PBINDEXDUMP
+ $ PBINDEXDUMP="@PacBioBAM_BinDir@/pbindexdump" && export PBINDEXDUMP
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
Default settings (JSON):
diff --git a/tests/src/cram/pbmerge_aligned_ordering.t b/tests/src/cram/pbmerge_aligned_ordering.t.in
similarity index 96%
rename from tests/src/cram/pbmerge_aligned_ordering.t
rename to tests/src/cram/pbmerge_aligned_ordering.t.in
index b029c18..58171bb 100644
--- a/tests/src/cram/pbmerge_aligned_ordering.t
+++ b/tests/src/cram/pbmerge_aligned_ordering.t.in
@@ -1,15 +1,15 @@
Setup:
- $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
$ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
$ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
$ INPUT_1="$DATADIR/dataset/bam_mapping_1.bam" && export INPUT_1
$ INPUT_2="$DATADIR/dataset/bam_mapping_2.bam" && export INPUT_2
- $ MERGED_BAM="/tmp/aligned_ordering_merged.bam" && export MERGED_BAM
- $ MERGED_BAM_PBI="/tmp/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+ $ MERGED_BAM="@GeneratedTestDataDir@/aligned_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
Sanity Check:
@@ -58,7 +58,7 @@ Normal Merge:
@SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
@RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
@PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
@@ -93,7 +93,7 @@ Shuffle Input:
@SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
@RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
@PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
@@ -128,7 +128,7 @@ Explicit Output Filename (also enables PBI):
@SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
@RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
@PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
@@ -167,7 +167,7 @@ Explicit Output Filename (with disabled PBI):
@SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
@RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
@PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
diff --git a/tests/src/cram/pbmerge_dataset.t b/tests/src/cram/pbmerge_dataset.t.in
similarity index 91%
rename from tests/src/cram/pbmerge_dataset.t
rename to tests/src/cram/pbmerge_dataset.t.in
index 02ae897..1c7cb7a 100644
--- a/tests/src/cram/pbmerge_dataset.t
+++ b/tests/src/cram/pbmerge_dataset.t.in
@@ -1,16 +1,16 @@
Setup:
- $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
$ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
$ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
$ INPUT_XML="$DATADIR/polymerase/consolidate.subread.dataset.xml" && export INPUT_XML
$ BAM_1="$DATADIR/polymerase/production.subreads.bam" && export BAM_1
$ BAM_2="$DATADIR/polymerase/production.scraps.bam" && export BAM_2
- $ MERGED_BAM="/tmp/merged.bam" && export MERGED_BAM
- $ MERGED_BAM_PBI="/tmp/merged.bam.pbi" && export MERGED_BAM_PBI
+ $ MERGED_BAM="@GeneratedTestDataDir@/merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/merged.bam.pbi" && export MERGED_BAM_PBI
Sanity Check:
@@ -66,7 +66,7 @@ Normal Merge from XML:
@PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
@PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
@PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/0/4267_4289
@@ -98,7 +98,7 @@ Normal Merge from XML (disabled PBI):
@PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
@PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
@PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/0/4267_4289
@@ -129,7 +129,7 @@ Write to stdout:
@PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
@PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
@PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/0/4267_4289
diff --git a/tests/src/cram/pbmerge_fofn.t b/tests/src/cram/pbmerge_fofn.t.in
similarity index 94%
rename from tests/src/cram/pbmerge_fofn.t
rename to tests/src/cram/pbmerge_fofn.t.in
index 86ca74f..34e9af6 100644
--- a/tests/src/cram/pbmerge_fofn.t
+++ b/tests/src/cram/pbmerge_fofn.t.in
@@ -1,16 +1,16 @@
Setup:
- $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
$ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
$ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
$ INPUT_FOFN="$DATADIR/merge.fofn" && export INPUT_FOFN
$ INPUT_1="$DATADIR/aligned.bam" && export INPUT_1
$ INPUT_2="$DATADIR/aligned2.bam" && export INPUT_2
- $ MERGED_BAM="/tmp/aligned_ordering_merged.bam" && export MERGED_BAM
- $ MERGED_BAM_PBI="/tmp/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+ $ MERGED_BAM="@GeneratedTestDataDir@/aligned_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
Sanity Check:
@@ -61,7 +61,7 @@ Normal Merge from FOFN:
@RG\tID:b89a4406\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;FRAMERATEHZ=100\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
@PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
@PG\tID:bwa\tPN:bwa\tVN:0.7.10-r1017-dirty\tCL:bwa mem lambdaNEB.fa singleInsertion.fasta (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
@@ -99,7 +99,7 @@ Normal Merge from FOFN (disabled PBI):
@RG\tID:b89a4406\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;FRAMERATEHZ=100\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
@PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
@PG\tID:bwa\tPN:bwa\tVN:0.7.10-r1017-dirty\tCL:bwa mem lambdaNEB.fa singleInsertion.fasta (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
diff --git a/tests/src/cram/pbmerge_mixed_ordering.t b/tests/src/cram/pbmerge_mixed_ordering.t.in
similarity index 94%
rename from tests/src/cram/pbmerge_mixed_ordering.t
rename to tests/src/cram/pbmerge_mixed_ordering.t.in
index 70cbe74..6f1f3f9 100644
--- a/tests/src/cram/pbmerge_mixed_ordering.t
+++ b/tests/src/cram/pbmerge_mixed_ordering.t.in
@@ -1,14 +1,14 @@
Setup:
- $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
$ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
$ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
$ UNALIGNED_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export UNALIGNED_BAM
$ ALIGNED_BAM="$DATADIR/dataset/bam_mapping_1.bam" && export ALIGNED_BAM
- $ MERGED_BAM="/tmp/mixed_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM="@GeneratedTestDataDir@/mixed_ordering_merged.bam" && export MERGED_BAM
Sanity Check:
diff --git a/tests/src/cram/pbmerge_pacbio_ordering.t b/tests/src/cram/pbmerge_pacbio_ordering.t.in
similarity index 96%
rename from tests/src/cram/pbmerge_pacbio_ordering.t
rename to tests/src/cram/pbmerge_pacbio_ordering.t.in
index 78e0755..f52759f 100644
--- a/tests/src/cram/pbmerge_pacbio_ordering.t
+++ b/tests/src/cram/pbmerge_pacbio_ordering.t.in
@@ -1,15 +1,15 @@
Setup:
- $ TOOLS_BIN="$TESTDIR/../../../bin" && export TOOLS_BIN
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
$ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
$ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
- $ DATADIR="$TESTDIR/../../data" && export DATADIR
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
$ HQREGION_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export HQREGION_BAM
$ SCRAPS_BAM="$DATADIR/polymerase/internal.scraps.bam" && export SCRAPS_BAM
- $ MERGED_BAM="/tmp/pacbio_ordering_merged.bam" && export MERGED_BAM
- $ MERGED_BAM_PBI="/tmp/pacbio_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+ $ MERGED_BAM="@GeneratedTestDataDir@/pacbio_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/pacbio_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
Sanity Check:
@@ -112,7 +112,7 @@ Normal Merge:
@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/100000/0_2659
@@ -199,7 +199,7 @@ Shuffle Input:
@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/100000/0_2659
@@ -286,7 +286,7 @@ Explicit Output Filename (also enables PBI):
@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/100000/0_2659
@@ -377,7 +377,7 @@ Explicit Output Filename (with disabled PBI):
@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
- @PG\tID:pbmerge-0.7.0\tPN:pbmerge\tVN:0.7.0 (esc)
+ @PG\tID:pbmerge- at PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
$ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
ArminsFakeMovie/100000/0_2659
diff --git a/tests/src/test_BamHeader.cpp b/tests/src/test_BamHeader.cpp
index f1e14ea..4c49b95 100644
--- a/tests/src/test_BamHeader.cpp
+++ b/tests/src/test_BamHeader.cpp
@@ -392,17 +392,17 @@ TEST(BamHeaderTest, MergeHandlesDuplicateReadGroups)
EXPECT_EQ(hdrText, merged.ToSam());
}
-TEST(BamHeaderTest, IncompatibleMergeFails)
+TEST(BamHeaderTest, MergeCompatibilityOk)
{
- { // @HD:VN
+ { // different @HD:VN - this IS allowed (as of SAT-465, pbbam v0.7.2)
const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
const string hdrText2 = { "@HD\tVN:1.0\tSO:unknown\tpb:3.0.1\n" };
const BamHeader header1(hdrText1);
const BamHeader header2(hdrText2);
- EXPECT_THROW(header1 + header2, std::runtime_error);
+ EXPECT_NO_THROW(header1 + header2);
}
- { // @HD:SO
+ { // different @HD:SO
const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
const string hdrText2 = { "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n" };
const BamHeader header1(hdrText1);
@@ -410,12 +410,12 @@ TEST(BamHeaderTest, IncompatibleMergeFails)
EXPECT_THROW(header1 + header2, std::runtime_error);
}
- { // @HD:pb
+ { // different @HD:pb - this IS allowed (as of SAT-529, pbbam 0.7.4)
const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
const string hdrText2 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.3\n" };
const BamHeader header1(hdrText1);
const BamHeader header2(hdrText2);
- EXPECT_THROW(header1 + header2, std::runtime_error);
+ EXPECT_NO_THROW(header1 + header2);
}
{ // @SQ list clash
diff --git a/tests/src/test_BamRecord.cpp b/tests/src/test_BamRecord.cpp
index ceb7429..753ab6b 100644
--- a/tests/src/test_BamRecord.cpp
+++ b/tests/src/test_BamRecord.cpp
@@ -172,6 +172,73 @@ BamRecord MakeCigaredQualRecord(const string& quals,
return BamRecord(std::move(impl));
}
+static
+BamRecord MakeCigaredPulseBaseRecord(const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseBases,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls; // PulseCall
+ tags["pt"] = pulseBases; // AltLabelTag
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseQualRecord(const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseQuals,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls;
+ tags["pv"] = pulseQuals; // AltLabelQV
+ tags["pq"] = pulseQuals; // LabelQV
+ tags["pg"] = pulseQuals; // PulseMergeQV
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseFrameRecord(const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint16_t>& pulseFrames,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls;
+ tags["pd"] = pulseFrames; // PrePulseFrames
+ tags["px"] = pulseFrames; // PulseCallWidth
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseUIntRecord(const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint32_t>& pulseUInts,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls;
+ tags["sf"] = pulseUInts; // StartFrame
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
// ----------------------------------------------------------
// helper structs and methods for checking combinations of:
// aligned strand, orientation requested, alignment, clipping
@@ -233,6 +300,46 @@ void CheckAlignAndClip(const string& cigar,
}
}
+template<typename DataType, typename MakeRecordType, typename FetchDataType>
+void CheckPulseDataAlignAndClip(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const DataType& input,
+ const tests::ExpectedResult<DataType>& allPulses,
+ const tests::ExpectedResult<DataType>& basecallsOnly,
+ const MakeRecordType& makeRecord,
+ const FetchDataType& fetchData)
+{
+ { // map to forward strand
+ const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::FORWARD);
+
+ EXPECT_EQ(allPulses.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
+ EXPECT_EQ(allPulses.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::ALL));
+ // no align/clipping operations available on ALL pulses
+
+ EXPECT_EQ(basecallsOnly.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardNativeAligned(), fetchData(b, Orientation::NATIVE, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true, PulseBehavior::BASECALLS_ONLY));
+ }
+ { // map to reverse strand
+ const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::REVERSE);
+
+ EXPECT_EQ(allPulses.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
+ EXPECT_EQ(allPulses.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::ALL));
+ // no align/clipping operations available on ALL pulses
+
+ EXPECT_EQ(basecallsOnly.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseNativeAligned(), fetchData(b, Orientation::NATIVE, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true, PulseBehavior::BASECALLS_ONLY));
+ }
+}
+
static
void CheckBaseTagsClippedAndAligned(const string& cigar,
const string& input,
@@ -389,6 +496,160 @@ void CheckSequenceClippedAndAligned(const string& cigar,
);
}
+static
+void CheckPulseBaseTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseBases,
+ const ExpectedResult<string>& allPulses,
+ const ExpectedResult<string>& basecallsOnly)
+{
+ // aligned record + AltLabelTag
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseBases,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseBaseRecord(seqBases, pulseCalls, pulseBases, cigar, strand); };
+
+ // AltLabelTag
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.AltLabelTag(orientation, aligned, exciseSoftClips, pulseBehavior); }
+ );
+ // PulseCall
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PulseCall(orientation, aligned, exciseSoftClips, pulseBehavior); }
+ );
+}
+
+static
+void CheckPulseFrameTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint16_t>& pulseFrames,
+ const ExpectedResult<vector<uint16_t>>& allPulses,
+ const ExpectedResult<vector<uint16_t>>& basecallsOnly)
+{
+ // aligned record + PrePulseFrames
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint16_t>& pulseFrames,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseFrameRecord(seqBases, pulseCalls, pulseFrames, cigar, strand); };
+
+ // PrePulseFrame
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PrePulseFrames(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
+ );
+ // PulseCallWidth
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PulseCallWidth(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
+ );
+}
+
+/*
+
+ { BamRecordTag::PKMEAN, {"pa", true} }, photons (vector<float>
+ { BamRecordTag::PKMEAN_2, {"ps", true} }, photons
+ { BamRecordTag::PKMID, {"pm", true} }, photons
+ { BamRecordTag::PKMID_2, {"pi", true} }, photons
+*/
+
+static
+void CheckPulseQualityTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseQuals,
+ const ExpectedResult<string>& allPulses,
+ const ExpectedResult<string>& basecallsOnly)
+{
+ // aligned record + AltLabelQV
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseQuals,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseQualRecord(seqBases, pulseCalls, pulseQuals, cigar, strand); };
+
+ // AltLabelQV
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.AltLabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
+ );
+ // LabelQV
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.LabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
+ );
+ // PulseMergeQV
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PulseMergeQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
+ );
+}
+
+static
+void CheckPulseUIntTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint32_t>& startFrames,
+ const ExpectedResult<vector<uint32_t>>& allPulses,
+ const ExpectedResult<vector<uint32_t>>& basecallsOnly)
+{
+ // aligned record + StartFrame
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint32_t>& startFrames,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseUIntRecord(seqBases, pulseCalls, startFrames, cigar, strand); };
+
+ // StartFrame
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, startFrames, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.StartFrame(orientation, aligned, exciseSoftClips, pulseBehavior); }
+ );
+}
+
+
+
} // namespace tests
TEST(BamRecordTest, DefaultValues)
@@ -1475,3 +1736,973 @@ TEST(BamRecordTest, FrameTagsClippedAndAligned)
);
}
}
+
+TEST(BamRecordTest, PulseBaseTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseBaseTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "AAaaCCGggTTA", // tag data
+
+ { // all pulses
+
+ "AAaaCCGggTTA", // forward strand, genomic
+ "AAaaCCGggTTA", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "TAAccCGGttTT", // reverse strand, genomic
+ "AAaaCCGggTTA", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "TAACGGTT", // reverse strand, genomic
+ "AACCGTTA", // reverse strand, native
+ "TAAC---GGTT", // reverse strand, genomic, aligned
+ "AACC---GTTA", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseBaseTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "ATttCCTtAGGggTT", // tag data
+
+ { // all pulses
+
+ "ATttCCTtAGGggTT", // forward strand, genomic
+ "ATttCCTtAGGggTT", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "AAccCCTaAGGaaAT", // reverse strand, genomic
+ "ATttCCTtAGGggTT", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned
+ "ATCC-TA--GGTT", // forward strand, native, aligned
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned, clipped
+ "ATCC-TA--GGTT", // forward strand, native, aligned, clipped
+ "AACCTAGGAT", // reverse strand, genomic
+ "ATCCTAGGTT", // reverse strand, native
+ "AACC-TA--GGAT", // reverse strand, genomic, aligned
+ "ATCC--TA-GGTT", // reverse strand, native, aligned
+ "AACC-TA--GGAT", // reverse strand, genomic, aligned, clipped
+ "ATCC--TA-GGTT" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseBaseTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "ATttCCTtAGGggTT", // tag data
+ {
+ "ATttCCTtAGGggTT", // forward strand, genomic
+ "ATttCCTtAGGggTT", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "AAccCCTaAGGaaAT", // reverse strand, genomic
+ "ATttCCTtAGGggTT", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ {
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned, clipped
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned, clipped
+ "AACCTAGGAT", // reverse strand, genomic
+ "ATCCTAGGTT", // reverse strand, native
+ "AACC-**TA**--GGAT", // reverse strand, genomic, aligned
+ "ATCC--**TA**-GGTT", // reverse strand, native, aligned
+ "AACC-**TA**--GGAT", // reverse strand, genomic, aligned, clipped
+ "ATCC--**TA**-GGTT" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseBaseTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "TTTttAACCccGTTAaaCCG", // tag data
+
+ { // all pulses
+
+ "TTTttAACCccGTTAaaCCG", // forward strand, genomic
+ "TTTttAACCccGTTAaaCCG", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "CGGttTAACggGGTTaaAAA", // reverse strand, genomic
+ "TTTttAACCccGTTAaaCCG", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "TTTAACCGTTACCG", // forward strand, genomic
+ "TTTAACCGTTACCG", // forward strand, native
+ "TTTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "CGGTAACGGTTAAA", // reverse strand, genomic
+ "TTTAACCGTTACCG", // reverse strand, native
+ "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
+ "TTTAACC---GTTACCG", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseBaseTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "AAaaCCGggTTA", // tag data
+
+ { // all pulses
+
+ "AAaaCCGggTTA", // forward strand, genomic
+ "AAaaCCGggTTA", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "TAAccCGGttTT", // reverse strand, genomic
+ "AAaaCCGggTTA", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "TAACGGTT", // reverse strand, genomic
+ "AACCGTTA", // reverse strand, native
+ "TAAC---GGTT", // reverse strand, genomic, aligned
+ "AACC---GTTA", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseBaseTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "TTTttAACCccGTTAaaCCG", // tag data
+
+ { // all pulses
+
+ "TTTttAACCccGTTAaaCCG", // forward strand, genomic
+ "TTTttAACCccGTTAaaCCG", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "CGGttTAACggGGTTaaAAA", // reverse strand, genomic
+ "TTTttAACCccGTTAaaCCG", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "TTTAACCGTTACCG", // forward strand, genomic
+ "TTTAACCGTTACCG", // forward strand, native
+ "TTTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "CGGTAACGGTTAAA", // reverse strand, genomic
+ "TTTAACCGTTACCG", // reverse strand, native
+ "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
+ "TTTAACC---GTTACCG", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseQualityTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseQualityTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "?]!!?]?!!]?@", // tag data
+
+ { // all pulses
+
+ "?]!!?]?!!]?@", // forward strand, genomic
+ "?]!!?]?!!]?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned + clipped
+ "", // forward strand, native, aligned + clipped
+ "@?]!!?]?!!]?", // reverse strand, genomic
+ "?]!!?]?!!]?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned + clipped
+ "" // reverse strand, native, aligned + clipped
+ },
+ { // basecalls only
+
+ "?]?]?]?@", // forward strand, genomic
+ "?]?]?]?@", // forward strand, native
+ "?]?]!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?]?]?", // reverse strand, genomic
+ "?]?]?]?@", // reverse strand, native
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseQualityTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "?]!!?]8!7?]!!?@", // tag data
+
+ { // all pulses
+
+ "?]!!?]8!7?]!!?@", // forward strand, genomic
+ "?]!!?]8!7?]!!?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned + clipped
+ "", // forward strand, native, aligned + clipped
+ "@?!!]?7!8]?!!]?", // reverse strand, genomic
+ "?]!!?]8!7?]!!?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned + clipped
+ "" // reverse strand, native, aligned + clipped
+ },
+ { // basecalls only
+
+ "?]?]87?]?@", // forward strand, genomic
+ "?]?]87?]?@", // forward strand, native
+ "?]?]!87!!?]?@", // forward strand, genomic, aligned
+ "?]?]!87!!?]?@", // forward strand, native, aligned
+ "?]?]!87!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!87!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?78]?]?", // reverse strand, genomic
+ "?]?]87?]?@", // reverse strand, native
+ "@?]?!78!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!87!?]?@", // reverse strand, native, aligned
+ "@?]?!78!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!87!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseQualityTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "?]!!?]8!7?]!!?@", // tag data
+ {
+ "?]!!?]8!7?]!!?@", // forward strand, genomic
+ "?]!!?]8!7?]!!?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned + clipped
+ "", // forward strand, native, aligned + clipped
+ "@?!!]?7!8]?!!]?", // reverse strand, genomic
+ "?]!!?]8!7?]!!?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned + clipped
+ "" // reverse strand, native, aligned + clipped
+ },
+ {
+ "?]?]87?]?@", // forward strand, genomic
+ "?]?]87?]?@", // forward strand, native
+ "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!87!!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!!!87!!!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?78]?]?", // reverse strand, genomic
+ "?]?]87?]?@", // reverse strand, native
+ "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!!87!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!!!87!!!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseQualityTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "vvv!!?]?]!!?]?@!!xxx", // tag data
+
+ { // all pulses
+
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "xxx!!@?]?!!]?]?!!vvv", // reverse strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "vvv?]?]?]?@xxx", // forward strand, genomic
+ "vvv?]?]?]?@xxx", // forward strand, native
+ "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "xxx@?]?]?]?vvv", // reverse strand, genomic
+ "vvv?]?]?]?@xxx", // reverse strand, native
+ "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseQualityTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "?]!!?]?!!]?@", // tag data
+
+ { // all pulses
+
+ "?]!!?]?!!]?@", // forward strand, genomic
+ "?]!!?]?!!]?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "@?]!!?]?!!]?", // reverse strand, genomic
+ "?]!!?]?!!]?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "?]?]?]?@", // forward strand, genomic
+ "?]?]?]?@", // forward strand, native
+ "?]?]!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "@?]?]?]?", // reverse strand, genomic
+ "?]?]?]?@", // reverse strand, native
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseQualityTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "vvv!!?]?]!!?]?@!!xxx", // tag data
+
+ { // all pulses
+
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "xxx!!@?]?!!]?]?!!vvv", // reverse strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "vvv?]?]?]?@xxx", // forward strand, genomic
+ "vvv?]?]?]?@xxx", // forward strand, native
+ "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "xxx@?]?]?]?vvv", // reverse strand, genomic
+ "vvv?]?]?]?@xxx", // reverse strand, native
+ "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseFrameTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseFrameTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseFrameTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseFrameTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseFrameTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseFrameTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseFrameTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseUIntTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseUIntTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseUIntTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseUIntTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseUIntTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseUIntTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseUIntTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
diff --git a/tests/src/test_BamWriter.cpp b/tests/src/test_BamWriter.cpp
index aa92541..75dffe5 100644
--- a/tests/src/test_BamWriter.cpp
+++ b/tests/src/test_BamWriter.cpp
@@ -90,7 +90,7 @@ TEST(BamWriterTest, SingleWrite_UserRecord)
bamRecord.impl_.Tags(tags);
// write record to file
- const string generatedBamFn = "/tmp/bamwriter_generated.bam";
+ const string generatedBamFn = tests::GeneratedData_Dir + "/bamwriter_generated.bam";
{
BamWriter writer(generatedBamFn, inputHeader);
writer.Write(bamRecord);
diff --git a/tests/src/test_DataSetCore.cpp b/tests/src/test_DataSetCore.cpp
index d84296b..df9eae2 100644
--- a/tests/src/test_DataSetCore.cpp
+++ b/tests/src/test_DataSetCore.cpp
@@ -84,6 +84,7 @@ TEST(DataSetCoreTest, DefaultsOk)
EXPECT_FALSE(dataset.MetaType().empty());
EXPECT_FALSE(dataset.TimeStampedName().empty());
EXPECT_FALSE(dataset.UniqueId().empty());
+ EXPECT_FALSE(dataset.Version().empty());
EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_"));
diff --git a/tests/src/test_DataSetIO.cpp b/tests/src/test_DataSetIO.cpp
index 47f3066..8f2adb5 100644
--- a/tests/src/test_DataSetIO.cpp
+++ b/tests/src/test_DataSetIO.cpp
@@ -55,7 +55,7 @@ using namespace PacBio::BAM;
using namespace std;
const string alignedBamFn = tests::Data_Dir + "/aligned.bam";
-const string bamGroupFofn = tests::Data_Dir + "/group/group.fofn";
+const string bamGroupFofn = tests::Generated_Dir + "/group.fofn";
const string ali1XmlFn = tests::Data_Dir + "/dataset/ali1.xml";
const string ali2XmlFn = tests::Data_Dir + "/dataset/ali2.xml";
@@ -1344,7 +1344,7 @@ TEST(DataSetIOTest, InspectMalformedXml)
const string expected =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
- "<SubreadSet Description=\"Merged dataset from 1 files using DatasetMerger 0.1.2\" "
+ "<SubreadSet CreatedAt=\"2015-08-19T15:39:36.331\" Description=\"Merged dataset from 1 files using DatasetMerger 0.1.2\" "
"MetaType=\"PacBio.DataSet.HdfSubreadSet\" Name=\"Subreads from runr000013_42267_150403\" "
"Tags=\"pacbio.secondary.instrument=RS\" TimeStampedName=\"hdfsubreadset_2015-08-19T15:39:36.331-07:00\" "
"UniqueId=\"b4741521-2a4c-42df-8a13-0a755ca9ed1e\" Version=\"0.5\" "
diff --git a/tests/src/test_DataSetQuery.cpp b/tests/src/test_DataSetQuery.cpp
index 624136c..996fbfe 100644
--- a/tests/src/test_DataSetQuery.cpp
+++ b/tests/src/test_DataSetQuery.cpp
@@ -53,7 +53,7 @@ const string aligned2BamFn = tests::Data_Dir + "/aligned2.bam";
const string alignedCopyBamFn = tests::GeneratedData_Dir + "/aligned.bam";
const string aligned2CopyBamFn = tests::GeneratedData_Dir + "/aligned2.bam";
-const string group_fofn = tests::Data_Dir + "/group/group.fofn";
+const string group_fofn = tests::Generated_Dir + "/group.fofn";
const string group_file1 = tests::Data_Dir + "/group/test1.bam";
const string group_file2 = tests::Data_Dir + "/group/test2.bam";
const string group_file3 = tests::Data_Dir + "/group/test3.bam";
diff --git a/tests/src/test_EndToEnd.cpp b/tests/src/test_EndToEnd.cpp
index ecd21c7..9675914 100644
--- a/tests/src/test_EndToEnd.cpp
+++ b/tests/src/test_EndToEnd.cpp
@@ -90,8 +90,8 @@ struct BamHdrDeleter
const string inputBamFn = tests::Data_Dir + "/aligned.bam";
const string goldStandardSamFn = tests::Data_Dir + "/aligned.sam";
-const string generatedBamFn = "/tmp/generated.bam";
-const string generatedSamFn = "/tmp/generated.sam";
+const string generatedBamFn = tests::GeneratedData_Dir + "/generated.bam";
+const string generatedSamFn = tests::GeneratedData_Dir + "/generated.sam";
const vector<string> generatedFiles = { generatedBamFn, generatedSamFn };
static inline
diff --git a/tests/src/test_Fasta.cpp b/tests/src/test_Fasta.cpp
new file mode 100644
index 0000000..25b0390
--- /dev/null
+++ b/tests/src/test_Fasta.cpp
@@ -0,0 +1,105 @@
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/FastaReader.h>
+#include <pbbam/FastaSequence.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+static void CheckSequence(const size_t index, const FastaSequence& seq)
+{
+ SCOPED_TRACE("checking FASTA seq:" + std::to_string(index));
+ switch (index) {
+ case 0 :
+ EXPECT_EQ("1", seq.Name());
+ EXPECT_EQ("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACAACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGGCGCAGGCG", seq.Bases());
+ break;
+
+ case 1 :
+ EXPECT_EQ("2", seq.Name());
+ EXPECT_EQ("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACAACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAAC", seq.Bases());
+ break;
+
+ case 2 :
+ EXPECT_EQ("3", seq.Name());
+ EXPECT_EQ("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTAACCCTAACCCT", seq.Bases());
+ break;
+
+ default:
+ ASSERT_TRUE(false); // invalid index
+ }
+}
+
+TEST(FastaSequenceTest, BasicConstructorOk)
+{
+ FastaSequence seq{ "1", "GATTACA" };
+ EXPECT_EQ("1", seq.Name());
+ EXPECT_EQ("GATTACA", seq.Bases());
+}
+
+TEST(FastaReaderTest, IterableOk)
+{
+ const string fn = tests::GeneratedData_Dir + "/normal.fa";
+ FastaReader reader{ fn };
+
+ size_t count = 0;
+ FastaSequence seq;
+ while (reader.GetNext(seq)) {
+ CheckSequence(count, seq);
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
+
+TEST(FastaReaderTest, ReadAllOk)
+{
+ const string fn = tests::GeneratedData_Dir + "/normal.fa";
+
+ size_t count = 0;
+ for (const auto& seq : FastaReader::ReadAll(fn)) {
+ CheckSequence(count, seq);
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
diff --git a/tests/src/test_FileUtils.cpp b/tests/src/test_FileUtils.cpp
index 814200f..c1b4beb 100644
--- a/tests/src/test_FileUtils.cpp
+++ b/tests/src/test_FileUtils.cpp
@@ -39,6 +39,7 @@
#define private public
#endif
+#include "TestData.h"
#include <gtest/gtest.h>
#include <pbbam/../../src/FileUtils.h>
#include <pbbam/../../src/TimeUtils.h>
@@ -63,7 +64,7 @@ TEST(FileUtilsTest, ExistsOk)
{
EXPECT_FALSE(FileUtils::Exists("does_not_exist.txt"));
- const string tmp = "/tmp/pbbam_exists_check.tmp";
+ const string tmp = tests::GeneratedData_Dir + "/pbbam_exists_check.tmp";
const string cmd = string("touch ") + tmp;
ASSERT_EQ(0, system(cmd.c_str()));
EXPECT_TRUE(FileUtils::Exists(tmp));
@@ -79,7 +80,7 @@ TEST(FileUtilsTest, LastModifiedOk)
const auto nowDuration = now.time_since_epoch();
const auto nowSeconds = chrono::duration_cast<chrono::seconds>(nowDuration).count();
- const string tmp = "/tmp/pbbam_lastmod_check.tmp";
+ const string tmp = tests::GeneratedData_Dir + "/pbbam_lastmod_check.tmp";
const string rmCmd = string("rm ") + tmp;
const string touchCmd = string("touch ") + tmp;
int ret = system(rmCmd.c_str());
@@ -142,7 +143,7 @@ TEST(FileUtilsTest, ResolvedFilePathOk)
TEST(FileUtilsTest, SizeOk)
{
- const string tmp = "/tmp/pbbam_empty_file.tmp";
+ const string tmp = tests::GeneratedData_Dir + "/pbbam_empty_file.tmp";
const string cmd = string("touch ") + tmp;
ASSERT_EQ(0, system(cmd.c_str()));
EXPECT_EQ(0, FileUtils::Size(tmp));
diff --git a/tests/src/test_PacBioIndex.cpp b/tests/src/test_PacBioIndex.cpp
index 3375647..fa17dc7 100644
--- a/tests/src/test_PacBioIndex.cpp
+++ b/tests/src/test_PacBioIndex.cpp
@@ -258,13 +258,13 @@ bool PbiIndicesEqual(const PbiIndex& lhs, const PbiIndex& rhs)
TEST(PacBioIndexTest, CreateFromExistingBam)
{
// do this in temp directory, so we can ensure write access
- const string tempDir = "/tmp/";
- const string tempBamFn = tempDir + "aligned2.bam";
+ const string tempDir = tests::GeneratedData_Dir + "/";
+ const string tempBamFn = tempDir + "aligned_copy.bam";
const string tempPbiFn = tempBamFn + ".pbi";
string cmd("cp ");
cmd += test2BamFn;
cmd += " ";
- cmd += tempDir;
+ cmd += tempBamFn;
int cmdResult = system(cmd.c_str());
(void)cmdResult;
@@ -296,7 +296,7 @@ TEST(PacBioIndexTest, CreateFromExistingBam)
TEST(PacBioIndexTest, CreateOnTheFly)
{
// do this in temp directory, so we can ensure write access
- const string tempDir = "/tmp/";
+ const string tempDir = tests::GeneratedData_Dir + "/";
const string tempBamFn = tempDir + "temp.bam";
const string tempPbiFn = tempBamFn + ".pbi";
@@ -378,7 +378,7 @@ TEST(PacBioIndexTest, RawLoadFromPbiFile)
TEST(PacBioIndexTest, BasicAndBarodeSectionsOnly)
{
// do this in temp directory, so we can ensure write access
- const string tempDir = "/tmp/";
+ const string tempDir = tests::GeneratedData_Dir + "/";
const string tempBamFn = tempDir + "phi29.bam";
const string tempPbiFn = tempBamFn + ".pbi";
string cmd("cp ");
diff --git a/tests/src/test_PbiFilterQuery.cpp b/tests/src/test_PbiFilterQuery.cpp
index 21bd553..cacd7cd 100644
--- a/tests/src/test_PbiFilterQuery.cpp
+++ b/tests/src/test_PbiFilterQuery.cpp
@@ -219,7 +219,7 @@ TEST(PbiFilterQueryTest, ZmwRangeFromDatasetOk)
TEST(PbiFilterQueryTest, MissingPbiShouldThrow)
{
const PbiFilter filter{ PbiZmwFilter{31883} };
- const string phi29Bam = tests::Data_Dir + "/phi29.bam";
+ const string phi29Bam = tests::GeneratedData_Dir + "/missing_pbi.bam";
const string hasPbiBam = tests::Data_Dir + "/polymerase/production.scraps.bam";
{ // single file, missing PBI
@@ -257,3 +257,188 @@ TEST(PbiFilterQueryTest, QNameWhitelistFile)
EXPECT_EQ(3, count);
}
+TEST(PbiFilterQueryTest, EmptyFiles)
+{
+ const BamFile file{ tests::Data_Dir + "/empty.bam" };
+ PbiFilterQuery query{ PbiFilter{}, file };
+ size_t count = 0;
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+}
+
+TEST(PbiFilterQueryTest, BarcodeData)
+{
+ const BamFile file{ tests::Data_Dir + "/phi29.bam" };
+
+ // bc_quality == 1
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeQualityFilter{1}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(120, count);
+ }
+
+ // bc_quality != 1
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeQualityFilter{1, Compare::NOT_EQUAL}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+ }
+
+ // bc_forward == 0
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeForwardFilter{0}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(40, count);
+ }
+
+ // bc_forward == [0,2]
+ {
+ size_t count = 0;
+ const auto ids = vector<int16_t>{ 0, 2 };
+ PbiFilterQuery query{ PbiBarcodeForwardFilter{ ids }, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(80, count);
+ }
+
+ // bc_reverse != 0
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeReverseFilter{0, Compare::NOT_EQUAL}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(80, count);
+ }
+}
+
+TEST(PbiFilterQueryTest, BarcodeQualityFromXml)
+{
+
+const string xml_all = R"_XML_(
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="bq" Operator="=" Value="1"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
+)_XML_";
+
+const string xml_none = R"_XML_(
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="bq" Operator="!=" Value="1"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
+)_XML_";
+
+ const BamFile file{ tests::Data_Dir + "/phi29.bam" };
+
+ { // filter allows all records
+ const DataSet ds = DataSet::FromXml(xml_all);
+ const PbiFilterQuery query { PbiFilter::FromDataSet(ds), file };
+ size_t count = 0;
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(120, count);
+ }
+ { // filter allows no records
+ const DataSet ds = DataSet::FromXml(xml_none);
+ const PbiFilterQuery query { PbiFilter::FromDataSet(ds), file };
+ size_t count = 0;
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+ }
+}
+
+
diff --git a/tests/src/test_ReadGroupInfo.cpp b/tests/src/test_ReadGroupInfo.cpp
index 2dccd5d..8b9f23a 100644
--- a/tests/src/test_ReadGroupInfo.cpp
+++ b/tests/src/test_ReadGroupInfo.cpp
@@ -96,6 +96,8 @@ TEST(ReadGroupInfoTest, SequencingChemistryOk)
{ // S/P1-C1.1 (Echidna)
const string& chem = "S/P1-C1.1";
EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-867-300","3.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-867-300","3.2"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-867-300","3.3"));
ReadGroupInfo rg("dummy");
rg.BindingKit("100-619-300")
@@ -104,6 +106,29 @@ TEST(ReadGroupInfoTest, SequencingChemistryOk)
EXPECT_EQ(chem, rg.SequencingChemistry());
}
+ { // S/P1-C1.2 (Flea)
+ const string& chem = "S/P1-C1.2";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-902-100","3.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-902-100","3.2"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-902-100","3.3"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-902-100")
+ .BasecallerVersion("3.1");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
+ { // S/P1-C1.3 (Goat)
+ const string& chem = "S/P1-C1.3";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-972-200","3.2"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-972-200","3.3"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-972-200")
+ .BasecallerVersion("3.3");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
}
TEST(ReadGroupInfoTest, SequencingChemistryThrowsOnBadTriple)
diff --git a/tests/src/test_SamWriter.cpp b/tests/src/test_SamWriter.cpp
index 4ec2d7f..f13b5df 100644
--- a/tests/src/test_SamWriter.cpp
+++ b/tests/src/test_SamWriter.cpp
@@ -58,7 +58,7 @@ TEST(SamWriterTest, HeaderOk)
EXPECT_NO_THROW(
{
// write header to file
- const string generatedFn = "/tmp/samwriter_hdr_only.sam";
+ const string generatedFn = tests::GeneratedData_Dir + "/samwriter_hdr_only.sam";
{
const BamHeader inputHeader(hdrText);
SamWriter writer(generatedFn, inputHeader);
@@ -124,7 +124,7 @@ TEST(SamWriterTest, SingleRecordOk)
EXPECT_NO_THROW(
{
// write data to file
- const string generatedFn = "/tmp/samwriter_hdr_and_record.sam";
+ const string generatedFn = tests::GeneratedData_Dir + "/samwriter_hdr_and_record.sam";
{
SamWriter writer(generatedFn, inputHeader);
writer.Write(record);
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index dd6757e..5c589c1 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,17 +1,47 @@
-set(ToolsCommonDir ${PacBioBAM_ToolsDir}/common)
-set(PacBioBAM_CramTestsDir ${PacBioBAM_TestsDir}/src/cram)
-
-# quash warning with OptionParser
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
-if(HAS_NO_UNUSED_PRIVATE_FIELD)
- set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-private-field")
+
+if(DEFINED PacBioBAM_build_pbindex)
+
+ # Deprecating the "PacBioBAM_build_pbindex" command line option in favor of more
+ # general "PacBioBAM_build_tools", as we're starting to add new utilities.
+ #
+ # That said, I don't want to break current auto tests/builds, so I'm providing a
+ # warning message so devs are aware.
+ #
+ # construct warning message
+ set(pbindex_warning "\nDeprecated:\n-DPacBioBAM_build_pbindex\n")
+ if (PacBioBAM_build_pbindex)
+ set(pbindex_warning "${pbindex_warning} Building as requested,")
+ else()
+ set(pbindex_warning "${pbindex_warning} Skipping as requested,")
+ endif()
+ set(pbindex_warning "${pbindex_warning} but support for this option will be removed at some point in the future.\n")
+ message(AUTHOR_WARNING "${pbindex_warning} ** Use -DPacBioBAM_build_tools instead. **\n")
+
+ # force PacBioBAM_build_tools option
+ set(PacBioBAM_build_tools
+ ${PacBioBAM_build_pbindex} CACHE BOOL
+ "Build PacBioBAM with add'l utilities (e.g. pbindex, pbindexdump)." FORCE)
endif()
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
-# tools
-add_subdirectory(bam2sam)
-add_subdirectory(pbindex)
-add_subdirectory(pbindexdump)
-add_subdirectory(pbmerge)
+if (PacBioBAM_build_tools)
+
+ # tools directory
+ set(ToolsCommonDir ${PacBioBAM_ToolsDir}/common)
+ set(PacBioBAM_CramTestsDir ${PacBioBAM_TestsDir}/src/cram)
+
+ # quash warning with OptionParser
+ include(CheckCXXCompilerFlag)
+ check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+ if(HAS_NO_UNUSED_PRIVATE_FIELD)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-private-field")
+ endif()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+
+ # tools
+ add_subdirectory(bam2sam)
+ add_subdirectory(pbindex)
+ add_subdirectory(pbindexdump)
+ add_subdirectory(pbmerge)
+
+endif()
diff --git a/tools/bam2sam/CMakeLists.txt b/tools/bam2sam/CMakeLists.txt
index ef3a919..5554970 100644
--- a/tools/bam2sam/CMakeLists.txt
+++ b/tools/bam2sam/CMakeLists.txt
@@ -4,7 +4,7 @@ set(Bam2SamSrcDir ${PacBioBAM_ToolsDir}/bam2sam/src)
# create version header
set(Bam2Sam_VERSION ${PacBioBAM_VERSION})
configure_file(
- ${Bam2SamSrcDir}/Bam2SamVersion.h.in Bam2SamVersion.h @ONLY
+ ${Bam2SamSrcDir}/Bam2SamVersion.h.in ${GeneratedDir}/Bam2SamVersion.h @ONLY
)
# list source files
@@ -23,10 +23,17 @@ create_pbbam_tool(
# cram tests
if (PacBioBAM_build_tests)
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/bam2sam.t.in
+ ${GeneratedDir}/bam2sam.t
+ )
+
add_test(
NAME bam2sam_CramTests
WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
COMMAND "python" cram.py
- ${PacBioBAM_CramTestsDir}/bam2sam.t
+ ${GeneratedDir}/bam2sam.t
)
+
endif()
diff --git a/tools/pbindex/CMakeLists.txt b/tools/pbindex/CMakeLists.txt
index 6ebe5c2..0bfcf33 100644
--- a/tools/pbindex/CMakeLists.txt
+++ b/tools/pbindex/CMakeLists.txt
@@ -4,7 +4,7 @@ set(PbindexSrcDir ${PacBioBAM_ToolsDir}/pbindex/src)
# create version header
set(PbIndex_VERSION ${PacBioBAM_VERSION})
configure_file(
- ${PbindexSrcDir}/PbIndexVersion.h.in PbIndexVersion.h @ONLY
+ ${PbindexSrcDir}/PbIndexVersion.h.in ${GeneratedDir}/PbIndexVersion.h @ONLY
)
# list source files
diff --git a/tools/pbindexdump/CMakeLists.txt b/tools/pbindexdump/CMakeLists.txt
index 26178e3..88c07b9 100644
--- a/tools/pbindexdump/CMakeLists.txt
+++ b/tools/pbindexdump/CMakeLists.txt
@@ -4,7 +4,7 @@ set(PbindexdumpSrcDir ${PacBioBAM_ToolsDir}/pbindexdump/src)
# create version header
set(PbIndexDump_VERSION ${PacBioBAM_VERSION})
configure_file(
- ${PbindexdumpSrcDir}/PbIndexDumpVersion.h.in PbIndexDumpVersion.h @ONLY
+ ${PbindexdumpSrcDir}/PbIndexDumpVersion.h.in ${GeneratedDir}/PbIndexDumpVersion.h @ONLY
)
# list source files
@@ -25,11 +25,23 @@ create_pbbam_tool(
# cram tests
if (PacBioBAM_build_tests)
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbindexdump_json.t.in
+ ${GeneratedDir}/pbindexdump_json.t
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbindexdump_cpp.t.in
+ ${GeneratedDir}/pbindexdump_cpp.t
+ )
+
add_test(
NAME pbindexdump_CramTests
WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
COMMAND "python" cram.py
- ${PacBioBAM_CramTestsDir}/pbindexdump_json.t
- ${PacBioBAM_CramTestsDir}/pbindexdump_cpp.t
+ ${GeneratedDir}/pbindexdump_json.t
+ ${GeneratedDir}/pbindexdump_cpp.t
)
+
endif()
diff --git a/tools/pbmerge/CMakeLists.txt b/tools/pbmerge/CMakeLists.txt
index 700a75b..fa9a906 100644
--- a/tools/pbmerge/CMakeLists.txt
+++ b/tools/pbmerge/CMakeLists.txt
@@ -4,7 +4,7 @@ set(PbmergeSrcDir ${PacBioBAM_ToolsDir}/pbmerge/src)
# create version header
set(PbMerge_VERSION ${PacBioBAM_VERSION})
configure_file(
- ${PbmergeSrcDir}/PbMergeVersion.h.in PbMergeVersion.h @ONLY
+ ${PbmergeSrcDir}/PbMergeVersion.h.in ${GeneratedDir}/PbMergeVersion.h @ONLY
)
# list source files
@@ -26,15 +26,47 @@ if (PacBioBAM_build_tests)
if(PacBioBAM_auto_validate)
# skip for now til we clean up merge tests under autovalidate, too
else()
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_pacbio_ordering.t.in
+ ${GeneratedDir}/pbmerge_pacbio_ordering.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_aligned_ordering.t.in
+ ${GeneratedDir}/pbmerge_aligned_ordering.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_mixed_ordering.t.in
+ ${GeneratedDir}/pbmerge_mixed_ordering.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_dataset.t.in
+ ${GeneratedDir}/pbmerge_dataset.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_fofn.t.in
+ ${GeneratedDir}/pbmerge_fofn.t
+ @ONLY
+ )
+
add_test(
- NAME pbmerge_CramTests
- WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
- COMMAND "python" cram.py
- ${PacBioBAM_CramTestsDir}/pbmerge_pacbio_ordering.t
- ${PacBioBAM_CramTestsDir}/pbmerge_aligned_ordering.t
- ${PacBioBAM_CramTestsDir}/pbmerge_mixed_ordering.t
- ${PacBioBAM_CramTestsDir}/pbmerge_dataset.t
- ${PacBioBAM_CramTestsDir}/pbmerge_fofn.t
- )
+ NAME pbmerge_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${GeneratedDir}/pbmerge_pacbio_ordering.t
+ ${GeneratedDir}/pbmerge_aligned_ordering.t
+ ${GeneratedDir}/pbmerge_mixed_ordering.t
+ ${GeneratedDir}/pbmerge_dataset.t
+ ${GeneratedDir}/pbmerge_fofn.t
+ )
+
endif()
endif()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbbam.git
More information about the debian-med-commit
mailing list