[med-svn] [Git][med-team/racon][upstream] New upstream version 1.4.20
Michael R. Crusoe
gitlab at salsa.debian.org
Tue Jan 19 08:00:39 GMT 2021
Michael R. Crusoe pushed to branch upstream at Debian Med / racon
Commits:
58f83eea by Michael R. Crusoe at 2021-01-19T08:00:10+01:00
New upstream version 1.4.20
- - - - -
18 changed files:
- .gitmodules
- .travis.yml
- CMakeLists.txt
- README.md
- scripts/racon_wrapper.py
- src/cuda/cudaaligner.cpp
- src/cuda/cudaaligner.hpp
- src/cuda/cudabatch.cpp
- src/cuda/cudabatch.hpp
- src/cuda/cudapolisher.cpp
- src/cuda/cudapolisher.hpp
- src/main.cpp
- src/overlap.hpp
- src/polisher.cpp
- src/polisher.hpp
- src/window.cpp
- test/racon_test.cpp
- vendor/meson.build
Changes:
=====================================
.gitmodules
=====================================
@@ -1,6 +1,3 @@
-[submodule "vendor/bioparser"]
- path = vendor/bioparser
- url = https://github.com/rvaser/bioparser
[submodule "vendor/spoa"]
path = vendor/spoa
url = https://github.com/rvaser/spoa
@@ -10,12 +7,10 @@
[submodule "vendor/edlib"]
path = vendor/edlib
url = https://github.com/martinsos/edlib
-[submodule "vendor/googletest"]
- path = vendor/googletest
- url = https://github.com/google/googletest
[submodule "vendor/rampler"]
path = vendor/rampler
url = https://github.com/rvaser/rampler
-[submodule "vendor/ClaraGenomicsAnalysis"]
- path = vendor/ClaraGenomicsAnalysis
- url = https://github.com/clara-genomics/ClaraGenomicsAnalysis
+[submodule "vendor/GenomeWorks"]
+ path = vendor/GenomeWorks
+ url = https://github.com/clara-parabricks/GenomeWorks.git
+ branch = master
=====================================
.travis.yml
=====================================
@@ -2,42 +2,46 @@ dist: trusty
language: cpp
-compiler:
- - clang
- - gcc
+matrix:
+ include:
+ - name: "GCC 4.8 (Linux)" # GCC 4.8.5 & CMake 3.9.2
+ os: linux
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ packages:
+ - g++-4.8
+ - cmake
+ env:
+ - SET_COMPILER="export CC=gcc-4.8 && export CXX=g++-4.8"
+
+ - name: "Clang 3.5 (Linux)" # Clang 3.5.0 & CMake 3.9.2
+ os: linux
+ addons:
+ apt:
+ sources:
+ - llvm-toolchain-trusty-3.5
+ packages:
+ - clang-3.5
+ - cmake
+ env:
+ - SET_COMPILER="export CC=clang-3.5 && export CXX=clang++-3.5"
before_install:
- # cmake 3.2
- - sudo add-apt-repository ppa:george-edison55/cmake-3.x -y
-
- # g++4.8.1
- - if [ "$CXX" == "g++" ]; then sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; fi
-
- # clang 3.4
- - if [ "$CXX" == "clang++" ]; then sudo add-apt-repository -y ppa:h-rayflood/llvm; fi
-
- - sudo apt-get update -qq
+ - eval "${SET_COMPILER}"
+ - git clone https://github.com/google/googletest && cd googletest && mkdir build && cd build && git checkout 703bd9c
+ - cmake -DCMAKE_CXX_FLAGS="-std=c++11" .. && make && sudo make install
+ - cd ../../
install:
- # cmake 3.2
- - sudo apt-get install cmake cmake-data
-
- # g++4.8.1
- - if [ "$CXX" == "g++" ]; then sudo apt-get install -qq g++-4.8; fi
- - if [ "$CXX" == "g++" ]; then export CXX="g++-4.8"; fi
-
- # clang 3.4
- - if [ "$CXX" == "clang++" ]; then sudo apt-get install --allow-unauthenticated -qq clang-3.4; fi
- - if [ "$CXX" == "clang++" ]; then export CXX="clang++-3.4"; fi
+ - mkdir build && cd build
+ - cmake -Dspoa_build_executable=ON -Dracon_build_tests=ON -Dracon_build_wrapper=ON -DCMAKE_BUILD_TYPE=Release .. && make
script:
- - mkdir build
- - cd build
- - cmake -Dracon_build_tests=ON -DCMAKE_BUILD_TYPE=Release ..
- - make
- - ./bin/racon_test
+ - ./bin/racon --version
+ - ./bin/racon_test
notifications:
- email:
- on_success: change
- on_failure: always
+ email:
+ on_failure: always
=====================================
CMakeLists.txt
=====================================
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.2)
project(racon)
-set(racon_version 1.4.13)
+set(racon_version 1.4.20)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
@@ -48,7 +48,7 @@ endif()
target_compile_definitions(racon PRIVATE RACON_VERSION="v${racon_version}")
if (NOT TARGET bioparser)
- add_subdirectory(vendor/bioparser EXCLUDE_FROM_ALL)
+ add_subdirectory(vendor/spoa/vendor/bioparser EXCLUDE_FROM_ALL)
endif()
if (NOT TARGET spoa)
add_subdirectory(vendor/spoa EXCLUDE_FROM_ALL)
@@ -66,24 +66,24 @@ if (racon_enable_cuda)
find_package(cudaaligner REQUIRED)
elseif (DEFINED CLARAGENOMICSANALYSIS_SRC_PATH)
if (NOT TARGET cudapoa)
- add_subdirectory(${CLARAGENOMICSANALYSIS_SRC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/ClaraGenomicsAnalysis EXCLUDE_FROM_ALL)
+ add_subdirectory(${CLARAGENOMICSANALYSIS_SRC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
endif()
if (NOT TARGET cudaaligner)
- add_subdirectory(${CLARAGENOMICSANALYSIS_SRC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/ClaraGenomicsAnalysis EXCLUDE_FROM_ALL)
+ add_subdirectory(${CLARAGENOMICSANALYSIS_SRC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
endif()
- elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/ClaraGenomicsAnalysis)
+ elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/GenomeWorks)
if (NOT TARGET cudapoa)
- add_subdirectory(vendor/ClaraGenomicsAnalysis ${CMAKE_CURRENT_BINARY_DIR}/ClaraGenomicsAnalysis EXCLUDE_FROM_ALL)
+ add_subdirectory(vendor/GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
endif()
if (NOT TARGET cudaaligner)
- add_subdirectory(vendor/ClaraGenomicsAnalysis ${CMAKE_CURRENT_BINARY_DIR}/ClaraGenomicsAnalysis EXCLUDE_FROM_ALL)
+ add_subdirectory(vendor/GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
endif()
else()
if (NOT TARGET cudapoa)
- add_subdirectory(../ClaraGenomicsAnalysis ${CMAKE_CURRENT_BINARY_DIR}/ClaraGenomicsAnalysis EXCLUDE_FROM_ALL)
+ add_subdirectory(../GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
endif()
if (NOT TARGET cudaaligner)
- add_subdirectory(../ClaraGenomicsAnalysis ${CMAKE_CURRENT_BINARY_DIR}/ClaraGenomicsAnalysis EXCLUDE_FROM_ALL)
+ add_subdirectory(../GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
endif()
endif()
endif()
@@ -96,6 +96,7 @@ endif()
install(TARGETS racon DESTINATION bin)
if (racon_build_tests)
+ find_package(GTest REQUIRED)
set(racon_test_data_path ${PROJECT_SOURCE_DIR}/test/data/)
configure_file("${PROJECT_SOURCE_DIR}/test/racon_test_config.h.in"
"${PROJECT_BINARY_DIR}/config/racon_test_config.h")
@@ -118,11 +119,7 @@ if (racon_build_tests)
add_executable(racon_test ${racon_test_sources})
endif()
- if (NOT TARGET gtest_main)
- add_subdirectory(vendor/googletest/googletest EXCLUDE_FROM_ALL)
- endif()
-
- target_link_libraries(racon_test bioparser spoa thread_pool edlib_static gtest_main)
+ target_link_libraries(racon_test bioparser spoa thread_pool edlib_static GTest::Main)
if (racon_enable_cuda)
target_link_libraries(racon_test cudapoa cudaaligner)
endif()
=====================================
README.md
=====================================
@@ -1,7 +1,7 @@
# Racon
[![Latest GitHub release](https://img.shields.io/github/release/lbcb-sci/racon.svg)](https://github.com/lbcb-sci/racon/releases/latest)
-[![Build status for gcc/clang](https://travis-ci.org/lbcb-sci/racon.svg?branch=master)](https://travis-ci.org/lbcb-sci/racon)
+[![Build status for gcc/clang](https://travis-ci.com/lbcb-sci/racon.svg?branch=master)](https://travis-ci.com/lbcb-sci/racon)
[![Published in Genome Research](https://img.shields.io/badge/published%20in-Genome%20Research-blue.svg)](https://doi.org/10.1101/gr.214270.116)
Consensus module for raw de novo DNA assembly of long uncorrected reads.
@@ -20,11 +20,12 @@ A **wrapper script** is also available to enable easier usage to the end-user fo
## Dependencies
1. gcc 4.8+ or clang 3.4+
2. cmake 3.2+
+3. zlib
### CUDA Support
1. gcc 5.0+
2. cmake 3.10+
-4. CUDA 9.0+
+3. CUDA 9.0+
## Installation
To install Racon run the following commands:
@@ -44,12 +45,12 @@ Optionally, you can run `sudo make install` to install racon executable to your
***Note***: if you omitted `--recursive` from `git clone`, run `git submodule update --init --recursive` before proceeding with compilation.
-To build unit tests add `-Dracon_build_tests=ON` while running `cmake`. After installation, an executable named `racon_test` will be created in `build/bin`.
+To build unit tests add `-Dracon_build_tests=ON` while running `cmake` (Gtest required). After installation, an executable named `racon_test` will be created in `build/bin`.
To build the wrapper script add `-Dracon_build_wrapper=ON` while running `cmake`. After installation, an executable named `racon_wrapper` (python script) will be created in `build/bin`.
### CUDA Support
-Racon makes use of [NVIDIA's ClaraGenomicsAnalysis SDK](https://github.com/clara-genomics/ClaraGenomicsAnalysis) for CUDA accelerated polishing and alignment.
+Racon makes use of [NVIDIA's GenomeWorks SDK](https://github.com/clara-parabricks/GenomeWorks) for CUDA accelerated polishing and alignment.
To build `racon` with CUDA support, add `-Dracon_enable_cuda=ON` while running `cmake`. If CUDA support is unavailable, the `cmake` step will error out.
Note that the CUDA support flag does not produce a new binary target. Instead it augments the existing `racon` binary itself.
@@ -122,12 +123,16 @@ Usage of `racon` is as following:
only available when built with CUDA:
-c, --cudapoa-batches <int>
default: 0
- number of batches for CUDA accelerated polishing
+ number of batches for CUDA accelerated polishing per GPU
-b, --cuda-banded-alignment
use banding approximation for polishing on GPU. Only applicable when -c is used.
--cudaaligner-batches <int>
default: 0
- number of batches for CUDA accelerated alignment
+ number of batches for CUDA accelerated alignment per GPU
+ --cudaaligner-band-width <int>
+ default: 0
+ Band width for cuda alignment. Must be >= 0. Non-zero allows user defined
+ band width, whereas 0 implies auto band width determination.
`racon_test` is run without any parameters.
=====================================
scripts/racon_wrapper.py
=====================================
@@ -35,9 +35,9 @@ class RaconWrapper:
self.mismatch = mismatch
self.gap = gap
self.threads = threads
- self.cudaaligner_batches = cudaaligner_batches
- self.cudapoa_batches = cudapoa_batches
- self.cuda_banded_alignment = cuda_banded_alignment
+ # self.cudaaligner_batches = cudaaligner_batches
+ # self.cudapoa_batches = cudapoa_batches
+ # self.cuda_banded_alignment = cuda_banded_alignment
self.work_directory = os.getcwd() + '/racon_work_directory_' + str(time.time())
def __enter__(self):
@@ -119,7 +119,7 @@ class RaconWrapper:
racon_params = [RaconWrapper.__racon]
if (self.include_unpolished == True): racon_params.append('-u')
if (self.fragment_correction == True): racon_params.append('-f')
- if (self.cuda_banded_alignment == True): racon_params.append('-b')
+ # if (self.cuda_banded_alignment == True): racon_params.append('-b')
racon_params.extend(['-w', str(self.window_length),
'-q', str(self.quality_threshold),
'-e', str(self.error_threshold),
@@ -127,8 +127,8 @@ class RaconWrapper:
'-x', str(self.mismatch),
'-g', str(self.gap),
'-t', str(self.threads),
- '--cudaaligner-batches', str(self.cudaaligner_batches),
- '-c', str(self.cudapoa_batches),
+ # '--cudaaligner-batches', str(self.cudaaligner_batches),
+ # '-c', str(self.cudapoa_batches),
self.subsampled_sequences, self.overlaps, ""])
for target_sequences_part in self.split_target_sequences:
=====================================
src/cuda/cudaaligner.cpp
=====================================
@@ -4,49 +4,48 @@
* @brief CUDABatchAligner class source file
*/
-#include <claragenomics/utils/cudautils.hpp>
+#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include "cudaaligner.hpp"
namespace racon {
+using namespace claraparabricks::genomeworks::cudaaligner;
+
std::atomic<uint32_t> CUDABatchAligner::batches;
-std::unique_ptr<CUDABatchAligner> createCUDABatchAligner(uint32_t max_query_size,
- uint32_t max_target_size,
- uint32_t max_alignments,
- uint32_t device_id)
+std::unique_ptr<CUDABatchAligner> createCUDABatchAligner(uint32_t max_bandwidth,
+ uint32_t device_id,
+ int64_t max_gpu_memory)
{
- return std::unique_ptr<CUDABatchAligner>(new CUDABatchAligner(max_query_size,
- max_target_size,
- max_alignments,
- device_id));
+ return std::unique_ptr<CUDABatchAligner>(new CUDABatchAligner(max_bandwidth,
+ device_id,
+ max_gpu_memory));
}
-CUDABatchAligner::CUDABatchAligner(uint32_t max_query_size,
- uint32_t max_target_size,
- uint32_t max_alignments,
- uint32_t device_id)
+CUDABatchAligner::CUDABatchAligner(uint32_t max_bandwidth,
+ uint32_t device_id,
+ int64_t max_gpu_memory)
: overlaps_()
, stream_(0)
{
bid_ = CUDABatchAligner::batches++;
- CGA_CU_CHECK_ERR(cudaSetDevice(device_id));
+ GW_CU_CHECK_ERR(cudaSetDevice(device_id));
- CGA_CU_CHECK_ERR(cudaStreamCreate(&stream_));
+ GW_CU_CHECK_ERR(cudaStreamCreate(&stream_));
- aligner_ = claragenomics::cudaaligner::create_aligner(max_query_size,
- max_target_size,
- max_alignments,
- claragenomics::cudaaligner::AlignmentType::global_alignment,
- stream_,
- device_id);
+ aligner_ = create_aligner(AlignmentType::global_alignment,
+ max_bandwidth,
+ stream_,
+ device_id,
+ max_gpu_memory);
}
CUDABatchAligner::~CUDABatchAligner()
{
- CGA_CU_CHECK_ERR(cudaStreamDestroy(stream_));
+ aligner_.reset();
+ GW_CU_CHECK_ERR(cudaStreamDestroy(stream_));
}
bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<Sequence>>& sequences)
@@ -59,18 +58,18 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
// NOTE: The cudaaligner API for adding alignments is the opposite of edlib. Hence, what is
// treated as target in edlib is query in cudaaligner and vice versa.
- claragenomics::cudaaligner::StatusType s = aligner_->add_alignment(t, t_len,
+ StatusType s = aligner_->add_alignment(t, t_len,
q, q_len);
- if (s == claragenomics::cudaaligner::StatusType::exceeded_max_alignments)
+ if (s == StatusType::exceeded_max_alignments)
{
return false;
}
- else if (s == claragenomics::cudaaligner::StatusType::exceeded_max_alignment_difference
- || s == claragenomics::cudaaligner::StatusType::exceeded_max_length)
+ else if (s == StatusType::exceeded_max_alignment_difference
+ || s == StatusType::exceeded_max_length)
{
// Do nothing as this case will be handled by CPU aligner.
}
- else if (s != claragenomics::cudaaligner::StatusType::success)
+ else if (s != StatusType::success)
{
fprintf(stderr, "Unknown error in cuda aligner!\n");
}
@@ -90,7 +89,7 @@ void CUDABatchAligner::generate_cigar_strings()
{
aligner_->sync_alignments();
- const std::vector<std::shared_ptr<claragenomics::cudaaligner::Alignment>>& alignments = aligner_->get_alignments();
+ const std::vector<std::shared_ptr<Alignment>>& alignments = aligner_->get_alignments();
// Number of alignments should be the same as number of overlaps.
if (overlaps_.size() != alignments.size())
{
=====================================
src/cuda/cudaaligner.hpp
=====================================
@@ -3,9 +3,9 @@
*
* @brief CUDA aligner class header file
*/
-#include <claragenomics/cudaaligner/cudaaligner.hpp>
-#include <claragenomics/cudaaligner/aligner.hpp>
-#include <claragenomics/cudaaligner/alignment.hpp>
+#include <claraparabricks/genomeworks/cudaaligner/cudaaligner.hpp>
+#include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
+#include <claraparabricks/genomeworks/cudaaligner/alignment.hpp>
#include "overlap.hpp"
#include "sequence.hpp"
@@ -16,7 +16,7 @@
namespace racon {
class CUDABatchAligner;
-std::unique_ptr<CUDABatchAligner> createCUDABatchAligner(uint32_t max_query_size, uint32_t max_target_size, uint32_t max_alignments, uint32_t device_id);
+std::unique_ptr<CUDABatchAligner> createCUDABatchAligner(uint32_t max_bandwidth, uint32_t device_id, int64_t max_gpu_memory);
class CUDABatchAligner
{
@@ -68,14 +68,14 @@ class CUDABatchAligner
// Builder function to create a new CUDABatchAligner object.
friend std::unique_ptr<CUDABatchAligner>
- createCUDABatchAligner(uint32_t max_query_size, uint32_t max_target_size, uint32_t max_alignments, uint32_t device_id);
+ createCUDABatchAligner(uint32_t max_bandwidth, uint32_t device_id, int64_t max_gpu_memory);
protected:
- CUDABatchAligner(uint32_t max_query_size, uint32_t max_target_size, uint32_t max_alignments, uint32_t device_id);
+ CUDABatchAligner(uint32_t max_bandwidth, uint32_t device_id, int64_t max_gpu_memory);
CUDABatchAligner(const CUDABatchAligner&) = delete;
const CUDABatchAligner& operator=(const CUDABatchAligner&) = delete;
- std::unique_ptr<claragenomics::cudaaligner::Aligner> aligner_;
+ std::unique_ptr<claraparabricks::genomeworks::cudaaligner::Aligner> aligner_;
std::vector<Overlap*> overlaps_;
=====================================
src/cuda/cudabatch.cpp
=====================================
@@ -13,10 +13,12 @@
#include "cudautils.hpp"
#include "spoa/spoa.hpp"
-#include <claragenomics/utils/cudautils.hpp>
+#include <claraparabricks/genomeworks/utils/cudautils.hpp>
namespace racon {
+using namespace claraparabricks::genomeworks::cudapoa;
+
std::atomic<uint32_t> CUDABatchProcessor::batches;
std::unique_ptr<CUDABatchProcessor> createCUDABatch(uint32_t max_window_depth,
@@ -49,28 +51,32 @@ CUDABatchProcessor::CUDABatchProcessor(uint32_t max_window_depth,
bid_ = CUDABatchProcessor::batches++;
// Create new CUDA stream.
- CGA_CU_CHECK_ERR(cudaStreamCreate(&stream_));
-
- cudapoa_batch_ = claragenomics::cudapoa::create_batch(max_window_depth,
- device,
- stream_,
- avail_mem,
- claragenomics::cudapoa::OutputType::consensus,
- gap,
- mismatch,
- match,
- cuda_banded_alignment);
+ GW_CU_CHECK_ERR(cudaStreamCreate(&stream_));
+
+ BatchConfig batch_config(1023,
+ max_window_depth,
+ 256,
+ cuda_banded_alignment ? BandMode::static_band : BandMode::full_band);
+
+ cudapoa_batch_ = create_batch(device,
+ stream_,
+ avail_mem,
+ OutputType::consensus,
+ batch_config,
+ gap,
+ mismatch,
+ match);
}
CUDABatchProcessor::~CUDABatchProcessor()
{
// Destroy CUDA stream.
- CGA_CU_CHECK_ERR(cudaStreamDestroy(stream_));
+ GW_CU_CHECK_ERR(cudaStreamDestroy(stream_));
}
bool CUDABatchProcessor::addWindow(std::shared_ptr<Window> window)
{
- claragenomics::cudapoa::Group poa_group;
+ Group poa_group;
uint32_t num_seqs = window->sequences_.size();
std::vector<std::vector<int8_t>> all_read_weights(num_seqs, std::vector<int8_t>());
@@ -79,7 +85,7 @@ bool CUDABatchProcessor::addWindow(std::shared_ptr<Window> window)
std::pair<const char*, uint32_t> qualities = window->qualities_.front();
std::vector<int8_t> backbone_weights;
convertPhredQualityToWeights(qualities.first, qualities.second, all_read_weights[0]);
- claragenomics::cudapoa::Entry e = {
+ Entry e = {
seq.first,
all_read_weights[0].data(),
static_cast<int32_t>(seq.second)
@@ -107,7 +113,7 @@ bool CUDABatchProcessor::addWindow(std::shared_ptr<Window> window)
qualities = window->qualities_.at(i);
convertPhredQualityToWeights(qualities.first, qualities.second, all_read_weights[i]);
- claragenomics::cudapoa::Entry p = {
+ Entry p = {
seq.first,
all_read_weights[i].data(),
static_cast<int32_t>(seq.second)
@@ -116,12 +122,11 @@ bool CUDABatchProcessor::addWindow(std::shared_ptr<Window> window)
}
// Add group to CUDAPOA batch object.
- std::vector<claragenomics::cudapoa::StatusType> entry_status;
- claragenomics::cudapoa::StatusType status = cudapoa_batch_->add_poa_group(entry_status,
- poa_group);
+ std::vector<StatusType> entry_status;
+ StatusType status = cudapoa_batch_->add_poa_group(entry_status, poa_group);
// If group was added, then push window in accepted windows list.
- if (status != claragenomics::cudapoa::StatusType::success)
+ if (status != StatusType::success)
{
return false;
}
@@ -135,17 +140,17 @@ bool CUDABatchProcessor::addWindow(std::shared_ptr<Window> window)
int32_t seq_added = 0;
for(uint32_t i = 1; i < entry_status.size(); i++)
{
- if (entry_status[i] == claragenomics::cudapoa::StatusType::exceeded_maximum_sequence_size)
+ if (entry_status[i] == StatusType::exceeded_maximum_sequence_size)
{
long_seq++;
continue;
}
- else if (entry_status[i] == claragenomics::cudapoa::StatusType::exceeded_maximum_sequences_per_poa)
+ else if (entry_status[i] == StatusType::exceeded_maximum_sequences_per_poa)
{
skipped_seq++;
continue;
}
- else if (entry_status[i] != claragenomics::cudapoa::StatusType::success)
+ else if (entry_status[i] != StatusType::success)
{
fprintf(stderr, "Could not add sequence to POA in batch %d.\n",
cudapoa_batch_->batch_id());
@@ -195,13 +200,13 @@ void CUDABatchProcessor::getConsensus()
{
std::vector<std::string> consensuses;
std::vector<std::vector<uint16_t>> coverages;
- std::vector<claragenomics::cudapoa::StatusType> output_status;
+ std::vector<StatusType> output_status;
cudapoa_batch_->get_consensus(consensuses, coverages, output_status);
for(uint32_t i = 0; i < windows_.size(); i++)
{
auto window = windows_.at(i);
- if (output_status.at(i) != claragenomics::cudapoa::StatusType::success)
+ if (output_status.at(i) != StatusType::success)
{
// leave the failure cases to CPU polisher
window_consensus_status_.emplace_back(false);
=====================================
src/cuda/cudabatch.hpp
=====================================
@@ -11,7 +11,7 @@
#include <atomic>
#include "window.hpp"
-#include <claragenomics/cudapoa/batch.hpp>
+#include <claraparabricks/genomeworks/cudapoa/batch.hpp>
namespace spoa {
class AlignmentEngine;
@@ -106,7 +106,7 @@ protected:
uint32_t bid_ = 0;
// CUDA-POA library object that manages POA batch.
- std::unique_ptr<claragenomics::cudapoa::Batch> cudapoa_batch_;
+ std::unique_ptr<claraparabricks::genomeworks::cudapoa::Batch> cudapoa_batch_;
// Stream for running POA batch.
cudaStream_t stream_;
=====================================
src/cuda/cudapolisher.cpp
=====================================
@@ -12,9 +12,10 @@
#include "sequence.hpp"
#include "logger.hpp"
#include "cudapolisher.hpp"
-#include <claragenomics/utils/cudautils.hpp>
+#include <claraparabricks/genomeworks/utils/cudautils.hpp>
+#include <algorithm>
-#include "bioparser/bioparser.hpp"
+#include "bioparser/parser.hpp"
namespace racon {
@@ -29,7 +30,7 @@ CUDAPolisher::CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
PolisherType type, uint32_t window_length, double quality_threshold,
double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
- uint32_t cudaaligner_batches)
+ uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width)
: Polisher(std::move(sparser), std::move(oparser), std::move(tparser),
type, window_length, quality_threshold, error_threshold, trim,
match, mismatch, gap, num_threads)
@@ -39,11 +40,12 @@ CUDAPolisher::CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
, mismatch_(mismatch)
, match_(match)
, cuda_banded_alignment_(cuda_banded_alignment)
+ , cudaaligner_band_width_(cudaaligner_band_width)
{
- claragenomics::cudapoa::Init();
- claragenomics::cudaaligner::Init();
+ claraparabricks::genomeworks::cudapoa::Init();
+ claraparabricks::genomeworks::cudaaligner::Init();
- CGA_CU_CHECK_ERR(cudaGetDeviceCount(&num_devices_));
+ GW_CU_CHECK_ERR(cudaGetDeviceCount(&num_devices_));
if (num_devices_ < 1)
{
@@ -56,8 +58,8 @@ CUDAPolisher::CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
for(int32_t dev_id = 0; dev_id < num_devices_; dev_id++)
{
std::cerr << "Initialize device " << dev_id << std::endl;
- CGA_CU_CHECK_ERR(cudaSetDevice(dev_id));
- CGA_CU_CHECK_ERR(cudaFree(0));
+ GW_CU_CHECK_ERR(cudaSetDevice(dev_id));
+ GW_CU_CHECK_ERR(cudaFree(0));
}
std::cerr << "[CUDAPolisher] Constructed." << std::endl;
@@ -69,27 +71,10 @@ CUDAPolisher::~CUDAPolisher()
cudaProfilerStop();
}
-std::vector<uint32_t> CUDAPolisher::calculate_batches_per_gpu(uint32_t batches, uint32_t gpus)
-{
- // Bin batches into each GPU.
- std::vector<uint32_t> batches_per_gpu(gpus, batches / gpus);
-
- for(uint32_t i = 0; i < batches % gpus; ++i)
- {
- ++batches_per_gpu[i];
- }
-
- return batches_per_gpu;
-}
-
-
void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps)
{
if (cudaaligner_batches_ >= 1)
{
- // TODO: Experimentally this is giving decent perf
- const uint32_t MAX_ALIGNMENTS = 200;
-
logger_->log();
std::mutex mutex_overlaps;
uint32_t next_overlap_index = 0;
@@ -105,7 +90,7 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
uint32_t count = overlaps.size();
while(next_overlap_index < count)
{
- if (batch->addOverlap(overlaps.at(next_overlap_index).get(), sequences_))
+ if (batch->addOverlap(overlaps[next_overlap_index].get(), sequences_))
{
next_overlap_index++;
}
@@ -159,14 +144,38 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
}
};
- // Bin batches into each GPU.
- std::vector<uint32_t> batches_per_gpu = calculate_batches_per_gpu(cudaaligner_batches_, num_devices_);
+ // Calculate mean and std deviation of target/query sizes
+ // and use that to calculate cudaaligner batch size.
+
+ // Calculate average length
+ int64_t len_sum = 0;
+ for(uint32_t i = 0; i < overlaps.size(); i++)
+ {
+ len_sum += overlaps[i]->length();
+ }
+ int64_t mean = len_sum / overlaps.size();
+
+ // Calculate band width automatically if set to 0
+ if (cudaaligner_band_width_ == 0)
+ {
+ // Use 10% of max sequence length as band width
+ cudaaligner_band_width_ = static_cast<uint32_t>(mean * 0.1f);
+ }
for(int32_t device = 0; device < num_devices_; device++)
{
- for(uint32_t batch = 0; batch < batches_per_gpu.at(device); batch++)
+ GW_CU_CHECK_ERR(cudaSetDevice(device));
+
+
+ size_t free, total;
+ GW_CU_CHECK_ERR(cudaMemGetInfo(&free, &total));
+ const size_t free_usable_memory = static_cast<float>(free) * 90 / 100; // Using 90% of available memory
+ const int64_t usable_memory_per_aligner = free_usable_memory / cudaaligner_batches_;
+ const int32_t max_bandwidth = cudaaligner_band_width_ & ~0x1; // Band width needs to be even
+ std::cerr << "GPU " << device << ": Aligning with band width " << max_bandwidth << std::endl;
+ for(uint32_t batch = 0; batch < cudaaligner_batches_; batch++)
{
- batch_aligners_.emplace_back(createCUDABatchAligner(15000, 15000, MAX_ALIGNMENTS, device));
+ batch_aligners_.emplace_back(createCUDABatchAligner(max_bandwidth, device, usable_memory_per_aligner));
}
}
@@ -177,7 +186,7 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
for(auto& aligner : batch_aligners_)
{
thread_futures.emplace_back(
- thread_pool_->submit(
+ thread_pool_->Submit(
process_batch,
aligner.get()
)
@@ -190,6 +199,11 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
}
batch_aligners_.clear();
+
+ // Determine overlaps missed by GPU which will fall back to CPU.
+ int64_t missing_overlaps = std::count_if(begin(overlaps), end(overlaps),[](std::unique_ptr<Overlap> const& o){ return o->cigar().empty();});
+
+ std::cerr << "Alignment skipped by GPU: " << missing_overlaps << " / " << overlaps.size() << std::endl;
}
// This call runs the breaking point detection code for all alignments.
@@ -211,18 +225,15 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
// Creation and use of batches.
const uint32_t MAX_DEPTH_PER_WINDOW = 200;
- // Bin batches into each GPU.
- std::vector<uint32_t> batches_per_gpu = calculate_batches_per_gpu(cudapoa_batches_, num_devices_);
-
for(int32_t device = 0; device < num_devices_; device++)
{
size_t total = 0, free = 0;
- CGA_CU_CHECK_ERR(cudaSetDevice(device));
- CGA_CU_CHECK_ERR(cudaMemGetInfo(&free, &total));
+ GW_CU_CHECK_ERR(cudaSetDevice(device));
+ GW_CU_CHECK_ERR(cudaMemGetInfo(&free, &total));
// Using 90% of available memory as heuristic since not all available memory can be used
// due to fragmentation.
- size_t mem_per_batch = 0.9 * free/batches_per_gpu.at(device);
- for(uint32_t batch = 0; batch < batches_per_gpu.at(device); batch++)
+ size_t mem_per_batch = 0.9 * free / cudapoa_batches_;
+ for(uint32_t batch = 0; batch < cudapoa_batches_; batch++)
{
batch_processors_.emplace_back(createCUDABatch(MAX_DEPTH_PER_WINDOW, device, mem_per_batch, gap_, mismatch_, match_, cuda_banded_alignment_));
}
@@ -326,7 +337,7 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
for(auto& batch_processor : batch_processors_)
{
thread_futures.emplace_back(
- thread_pool_->submit(
+ thread_pool_->Submit(
process_batch,
batch_processor.get()
)
@@ -347,14 +358,9 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
for (uint64_t i = 0; i < windows_.size(); ++i) {
if (window_consensus_status_.at(i) == false)
{
- thread_failed_windows.emplace_back(thread_pool_->submit(
+ thread_failed_windows.emplace_back(thread_pool_->Submit(
[&](uint64_t j) -> bool {
- auto it = thread_to_id_.find(std::this_thread::get_id());
- if (it == thread_to_id_.end()) {
- fprintf(stderr, "[racon::CUDAPolisher::polish] error: "
- "thread identifier not present!\n");
- exit(1);
- }
+ auto it = thread_pool_->thread_ids().find(std::this_thread::get_id());
return window_consensus_status_.at(j) = windows_[j]->generate_consensus(
alignment_engines_[it->second], trim_);
}, i));
=====================================
src/cuda/cudapolisher.hpp
=====================================
@@ -28,7 +28,7 @@ public:
PolisherType type, uint32_t window_length, double quality_threshold,
double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
- uint32_t cudaaligner_batches);
+ uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width);
protected:
CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
@@ -37,7 +37,7 @@ protected:
PolisherType type, uint32_t window_length, double quality_threshold,
double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
- uint32_t cudaaligner_batches);
+ uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width);
CUDAPolisher(const CUDAPolisher&) = delete;
const CUDAPolisher& operator=(const CUDAPolisher&) = delete;
virtual void find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps) override;
@@ -69,6 +69,9 @@ protected:
// Use banded POA alignment
bool cuda_banded_alignment_;
+
+ // Band parameter for pairwise alignment
+ uint32_t cudaaligner_band_width_;
};
}
=====================================
src/main.cpp
=====================================
@@ -18,6 +18,7 @@
static const char* version = RACON_VERSION;
static const int32_t CUDAALIGNER_INPUT_CODE = 10000;
+static const int32_t CUDAALIGNER_BAND_WIDTH_INPUT_CODE = 10001;
static struct option options[] = {
{"include-unpolished", no_argument, 0, 'u'},
@@ -36,6 +37,7 @@ static struct option options[] = {
{"cudapoa-batches", optional_argument, 0, 'c'},
{"cuda-banded-alignment", no_argument, 0, 'b'},
{"cudaaligner-batches", required_argument, 0, CUDAALIGNER_INPUT_CODE},
+ {"cudaaligner-band-width", required_argument, 0, CUDAALIGNER_BAND_WIDTH_INPUT_CODE},
#endif
{0, 0, 0, 0}
};
@@ -61,6 +63,7 @@ int main(int argc, char** argv) {
uint32_t cudapoa_batches = 0;
uint32_t cudaaligner_batches = 0;
+ uint32_t cudaaligner_band_width = 0;
bool cuda_banded_alignment = false;
std::string optstring = "ufw:q:e:m:x:g:t:h";
@@ -127,6 +130,9 @@ int main(int argc, char** argv) {
case CUDAALIGNER_INPUT_CODE: // cudaaligner-batches
cudaaligner_batches = atoi(optarg);
break;
+ case CUDAALIGNER_BAND_WIDTH_INPUT_CODE: // cudaaligner-band-width
+ cudaaligner_band_width = atoi(optarg);
+ break;
#endif
default:
exit(1);
@@ -147,7 +153,8 @@ int main(int argc, char** argv) {
input_paths[2], type == 0 ? racon::PolisherType::kC :
racon::PolisherType::kF, window_length, quality_threshold,
error_threshold, trim, match, mismatch, gap, num_threads,
- cudapoa_batches, cuda_banded_alignment, cudaaligner_batches);
+ cudapoa_batches, cuda_banded_alignment, cudaaligner_batches,
+ cudaaligner_band_width);
polisher->initialize();
@@ -212,12 +219,16 @@ void help() {
#ifdef CUDA_ENABLED
" -c, --cudapoa-batches <int>\n"
" default: 0\n"
- " number of batches for CUDA accelerated polishing\n"
+ " number of batches for CUDA accelerated polishing per GPU\n"
" -b, --cuda-banded-alignment\n"
" use banding approximation for alignment on GPU\n"
" --cudaaligner-batches <int>\n"
" default: 0\n"
- " number of batches for CUDA accelerated alignment\n"
+ " number of batches for CUDA accelerated alignment per GPU\n"
+ " --cudaaligner-band-width <int>\n"
+ " default: 0\n"
+ " Band width for cuda alignment. Must be >= 0. Non-zero allows user defined \n"
+ " band width, whereas 0 implies auto band width determination.\n"
#endif
);
}
=====================================
src/overlap.hpp
=====================================
@@ -61,6 +61,10 @@ public:
return error_;
}
+ const std::string& cigar() const {
+ return cigar_;
+ }
+
const std::vector<std::pair<uint32_t, uint32_t>>& breaking_points() const {
return breaking_points_;
}
=====================================
src/polisher.cpp
=====================================
@@ -17,7 +17,11 @@
#include "cuda/cudapolisher.hpp"
#endif
-#include "bioparser/bioparser.hpp"
+#include "bioparser/fasta_parser.hpp"
+#include "bioparser/fastq_parser.hpp"
+#include "bioparser/mhap_parser.hpp"
+#include "bioparser/paf_parser.hpp"
+#include "bioparser/sam_parser.hpp"
#include "thread_pool/thread_pool.hpp"
#include "spoa/spoa.hpp"
@@ -57,7 +61,7 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
PolisherType type, uint32_t window_length, double quality_threshold,
double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
- uint32_t cudaaligner_batches) {
+ uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width) {
if (type != PolisherType::kC && type != PolisherType::kF) {
fprintf(stderr, "[racon::createPolisher] error: invalid polisher type!\n");
@@ -83,11 +87,11 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
if (is_suffix(sequences_path, ".fasta") || is_suffix(sequences_path, ".fasta.gz") ||
is_suffix(sequences_path, ".fna") || is_suffix(sequences_path, ".fna.gz") ||
is_suffix(sequences_path, ".fa") || is_suffix(sequences_path, ".fa.gz")) {
- sparser = bioparser::createParser<bioparser::FastaParser, Sequence>(
+ sparser = bioparser::Parser<Sequence>::Create<bioparser::FastaParser>(
sequences_path);
} else if (is_suffix(sequences_path, ".fastq") || is_suffix(sequences_path, ".fastq.gz") ||
is_suffix(sequences_path, ".fq") || is_suffix(sequences_path, ".fq.gz")) {
- sparser = bioparser::createParser<bioparser::FastqParser, Sequence>(
+ sparser = bioparser::Parser<Sequence>::Create<bioparser::FastqParser>(
sequences_path);
} else {
fprintf(stderr, "[racon::createPolisher] error: "
@@ -99,13 +103,13 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
}
if (is_suffix(overlaps_path, ".mhap") || is_suffix(overlaps_path, ".mhap.gz")) {
- oparser = bioparser::createParser<bioparser::MhapParser, Overlap>(
+ oparser = bioparser::Parser<Overlap>::Create<bioparser::MhapParser>(
overlaps_path);
} else if (is_suffix(overlaps_path, ".paf") || is_suffix(overlaps_path, ".paf.gz")) {
- oparser = bioparser::createParser<bioparser::PafParser, Overlap>(
+ oparser = bioparser::Parser<Overlap>::Create<bioparser::PafParser>(
overlaps_path);
} else if (is_suffix(overlaps_path, ".sam") || is_suffix(overlaps_path, ".sam.gz")) {
- oparser = bioparser::createParser<bioparser::SamParser, Overlap>(
+ oparser = bioparser::Parser<Overlap>::Create<bioparser::SamParser>(
overlaps_path);
} else {
fprintf(stderr, "[racon::createPolisher] error: "
@@ -117,11 +121,11 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
if (is_suffix(target_path, ".fasta") || is_suffix(target_path, ".fasta.gz") ||
is_suffix(target_path, ".fna") || is_suffix(target_path, ".fna.gz") ||
is_suffix(target_path, ".fa") || is_suffix(target_path, ".fa.gz")) {
- tparser = bioparser::createParser<bioparser::FastaParser, Sequence>(
+ tparser = bioparser::Parser<Sequence>::Create<bioparser::FastaParser>(
target_path);
} else if (is_suffix(target_path, ".fastq") || is_suffix(target_path, ".fastq.gz") ||
is_suffix(target_path, ".fq") || is_suffix(target_path, ".fq.gz")) {
- tparser = bioparser::createParser<bioparser::FastqParser, Sequence>(
+ tparser = bioparser::Parser<Sequence>::Create<bioparser::FastqParser>(
target_path);
} else {
fprintf(stderr, "[racon::createPolisher] error: "
@@ -139,7 +143,8 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
return std::unique_ptr<Polisher>(new CUDAPolisher(std::move(sparser),
std::move(oparser), std::move(tparser), type, window_length,
quality_threshold, error_threshold, trim, match, mismatch, gap,
- num_threads, cudapoa_batches, cuda_banded_alignment, cudaaligner_batches));
+ num_threads, cudapoa_batches, cuda_banded_alignment, cudaaligner_batches,
+ cudaaligner_band_width));
#else
fprintf(stderr, "[racon::createPolisher] error: "
"Attemping to use CUDA when CUDA support is not available.\n"
@@ -151,6 +156,7 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
else
{
(void) cuda_banded_alignment;
+ (void) cudaaligner_band_width;
return std::unique_ptr<Polisher>(new Polisher(std::move(sparser),
std::move(oparser), std::move(tparser), type, window_length,
quality_threshold, error_threshold, trim, match, mismatch, gap,
@@ -169,18 +175,13 @@ Polisher::Polisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
quality_threshold), error_threshold_(error_threshold), trim_(trim),
alignment_engines_(), sequences_(), dummy_quality_(window_length, '!'),
window_length_(window_length), windows_(),
- thread_pool_(thread_pool::createThreadPool(num_threads)),
- thread_to_id_(), logger_(new Logger()) {
-
- uint32_t id = 0;
- for (const auto& it: thread_pool_->thread_identifiers()) {
- thread_to_id_[it] = id++;
- }
+ thread_pool_(std::make_shared<thread_pool::ThreadPool>(num_threads)),
+ logger_(new Logger()) {
for (uint32_t i = 0; i < num_threads; ++i) {
- alignment_engines_.emplace_back(spoa::createAlignmentEngine(
+ alignment_engines_.emplace_back(spoa::AlignmentEngine::Create(
spoa::AlignmentType::kNW, match, mismatch, gap));
- alignment_engines_.back()->prealloc(window_length_, 5);
+ alignment_engines_.back()->Prealloc(window_length_, 5);
}
}
@@ -198,8 +199,8 @@ void Polisher::initialize() {
logger_->log();
- tparser_->reset();
- tparser_->parse(sequences_, -1);
+ tparser_->Reset();
+ sequences_ = tparser_->Parse(-1);
uint64_t targets_size = sequences_.size();
if (targets_size == 0) {
@@ -224,10 +225,17 @@ void Polisher::initialize() {
uint64_t sequences_size = 0, total_sequences_length = 0;
- sparser_->reset();
+ sparser_->Reset();
while (true) {
uint64_t l = sequences_.size();
- auto status = sparser_->parse(sequences_, kChunkSize);
+ auto reads = sparser_->Parse(kChunkSize);
+ if (reads.empty()) {
+ break;
+ }
+ sequences_.insert(
+ sequences_.end(),
+ std::make_move_iterator(reads.begin()),
+ std::make_move_iterator(reads.end()));
uint64_t n = 0;
for (uint64_t i = l; i < sequences_.size(); ++i, ++sequences_size) {
@@ -256,10 +264,6 @@ void Polisher::initialize() {
}
shrinkToFit(sequences_, l);
-
- if (!status) {
- break;
- }
}
if (sequences_size == 0) {
@@ -306,12 +310,19 @@ void Polisher::initialize() {
}
};
- oparser_->reset();
- uint64_t l = 0;
+ oparser_->Reset();
+ uint64_t l = 0, c = 0;
while (true) {
- auto status = oparser_->parse(overlaps, kChunkSize);
+ auto overlaps_chunk = oparser_->Parse(kChunkSize);
+ if (overlaps_chunk.empty()) {
+ break;
+ }
+ overlaps.insert(
+ overlaps.end(),
+ std::make_move_iterator(overlaps_chunk.begin()),
+ std::make_move_iterator(overlaps_chunk.end()));
- uint64_t c = l;
+ c = l;
for (uint64_t i = l; i < overlaps.size(); ++i) {
overlaps[i]->transmute(sequences_, name_to_id, id_to_id);
@@ -328,28 +339,18 @@ void Polisher::initialize() {
c = i;
}
}
- if (!status) {
- remove_invalid_overlaps(c, overlaps.size());
- c = overlaps.size();
- }
-
- for (uint64_t i = l; i < c; ++i) {
- if (overlaps[i] == nullptr) {
- continue;
- }
-
- if (overlaps[i]->strand()) {
- has_reverse_data[overlaps[i]->q_id()] = true;
- } else {
- has_data[overlaps[i]->q_id()] = true;
- }
- }
uint64_t n = shrinkToFit(overlaps, l);
l = c - n;
-
- if (!status) {
- break;
+ }
+ remove_invalid_overlaps(l, overlaps.size());
+ shrinkToFit(overlaps, l);
+
+ for (const auto& it : overlaps) {
+ if (it->strand()) {
+ has_reverse_data[it->q_id()] = true;
+ } else {
+ has_data[it->q_id()] = true;
}
}
@@ -367,7 +368,7 @@ void Polisher::initialize() {
std::vector<std::future<void>> thread_futures;
for (uint64_t i = 0; i < sequences_.size(); ++i) {
- thread_futures.emplace_back(thread_pool_->submit(
+ thread_futures.emplace_back(thread_pool_->Submit(
[&](uint64_t j) -> void {
sequences_[j]->transmute(has_name[j], has_data[j], has_reverse_data[j]);
}, i));
@@ -462,7 +463,7 @@ void Polisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>
{
std::vector<std::future<void>> thread_futures;
for (uint64_t i = 0; i < overlaps.size(); ++i) {
- thread_futures.emplace_back(thread_pool_->submit(
+ thread_futures.emplace_back(thread_pool_->Submit(
[&](uint64_t j) -> void {
overlaps[j]->find_breaking_points(sequences_, window_length_);
}, i));
@@ -489,14 +490,9 @@ void Polisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
std::vector<std::future<bool>> thread_futures;
for (uint64_t i = 0; i < windows_.size(); ++i) {
- thread_futures.emplace_back(thread_pool_->submit(
+ thread_futures.emplace_back(thread_pool_->Submit(
[&](uint64_t j) -> bool {
- auto it = thread_to_id_.find(std::this_thread::get_id());
- if (it == thread_to_id_.end()) {
- fprintf(stderr, "[racon::Polisher::polish] error: "
- "thread identifier not present!\n");
- exit(1);
- }
+ auto it = thread_pool_->thread_ids().find(std::this_thread::get_id()); // NOLINT
return windows_[j]->generate_consensus(
alignment_engines_[it->second], trim_);
}, i));
=====================================
src/polisher.hpp
=====================================
@@ -44,7 +44,8 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
PolisherType type, uint32_t window_length, double quality_threshold,
double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
uint32_t num_threads, uint32_t cuda_batches = 0,
- bool cuda_banded_alignment = false, uint32_t cudaaligner_batches = 0);
+ bool cuda_banded_alignment = false, uint32_t cudaaligner_batches = 0,
+ uint32_t cudaaligner_band_width = 0);
class Polisher {
public:
@@ -60,7 +61,7 @@ public:
PolisherType type, uint32_t window_length, double quality_threshold,
double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
uint32_t num_threads, uint32_t cuda_batches, bool cuda_banded_alignment,
- uint32_t cudaaligner_batches);
+ uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width);
protected:
Polisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
@@ -90,8 +91,7 @@ protected:
uint32_t window_length_;
std::vector<std::shared_ptr<Window>> windows_;
- std::unique_ptr<thread_pool::ThreadPool> thread_pool_;
- std::unordered_map<std::thread::id, uint32_t> thread_to_id_;
+ std::shared_ptr<thread_pool::ThreadPool> thread_pool_;
std::unique_ptr<Logger> logger_;
};
=====================================
src/window.cpp
=====================================
@@ -70,10 +70,11 @@ bool Window::generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment
return false;
}
- auto graph = spoa::createGraph();
- graph->add_alignment(spoa::Alignment(), sequences_.front().first,
- sequences_.front().second, qualities_.front().first,
- qualities_.front().second);
+ spoa::Graph graph{};
+ graph.AddAlignment(
+ spoa::Alignment(),
+ sequences_.front().first, sequences_.front().second,
+ qualities_.front().first, qualities_.front().second);
std::vector<uint32_t> rank;
rank.reserve(sequences_.size());
@@ -91,29 +92,35 @@ bool Window::generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment
spoa::Alignment alignment;
if (positions_[i].first < offset && positions_[i].second >
sequences_.front().second - offset) {
- alignment = alignment_engine->align(sequences_[i].first,
- sequences_[i].second, graph);
+ alignment = alignment_engine->Align(
+ sequences_[i].first, sequences_[i].second,
+ graph);
} else {
- std::vector<int32_t> mapping;
- auto subgraph = graph->subgraph(positions_[i].first,
- positions_[i].second, mapping);
- alignment = alignment_engine->align( sequences_[i].first,
- sequences_[i].second, subgraph);
- subgraph->update_alignment(alignment, mapping);
+ std::vector<const spoa::Graph::Node*> mapping;
+ auto subgraph = graph.Subgraph(
+ positions_[i].first,
+ positions_[i].second,
+ &mapping);
+ alignment = alignment_engine->Align(
+ sequences_[i].first, sequences_[i].second,
+ subgraph);
+ subgraph.UpdateAlignment(mapping, &alignment);
}
if (qualities_[i].first == nullptr) {
- graph->add_alignment(alignment, sequences_[i].first,
- sequences_[i].second);
+ graph.AddAlignment(
+ alignment,
+ sequences_[i].first, sequences_[i].second);
} else {
- graph->add_alignment(alignment, sequences_[i].first,
- sequences_[i].second, qualities_[i].first,
- qualities_[i].second);
+ graph.AddAlignment(
+ alignment,
+ sequences_[i].first, sequences_[i].second,
+ qualities_[i].first, qualities_[i].second);
}
}
std::vector<uint32_t> coverages;
- consensus_ = graph->generate_consensus(coverages);
+ consensus_ = graph.GenerateConsensus(&coverages);
if (type_ == WindowType::kTGS && trim) {
uint32_t average_coverage = (sequences_.size() - 1) / 2;
=====================================
test/racon_test.cpp
=====================================
@@ -10,7 +10,7 @@
#include "polisher.hpp"
#include "edlib.h"
-#include "bioparser/bioparser.hpp"
+#include "bioparser/fasta_parser.hpp"
#include "gtest/gtest.h"
uint32_t calculateEditDistance(const std::string& query, const std::string& target) {
@@ -98,13 +98,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualities) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1312);
+ EXPECT_EQ(1312, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data()));
}
TEST_F(RaconPolishingTest, ConsensusWithoutQualities) {
@@ -120,13 +121,15 @@ TEST_F(RaconPolishingTest, ConsensusWithoutQualities) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1566);
+
+ EXPECT_EQ(1566, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data()));
}
TEST_F(RaconPolishingTest, ConsensusWithQualitiesAndAlignments) {
@@ -142,13 +145,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesAndAlignments) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1317);
+ EXPECT_EQ(1317, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data()));
}
TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesAndWithAlignments) {
@@ -164,13 +168,14 @@ TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesAndWithAlignments) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1770);
+ EXPECT_EQ(1770, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data()));
}
TEST_F(RaconPolishingTest, ConsensusWithQualitiesLargerWindow) {
@@ -186,13 +191,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesLargerWindow) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1289);
+ EXPECT_EQ(1289, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data()));
}
TEST_F(RaconPolishingTest, ConsensusWithQualitiesEditDistance) {
@@ -208,13 +214,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesEditDistance) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1321);
+ EXPECT_EQ(1321, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data()));
}
TEST_F(RaconPolishingTest, FragmentCorrectionWithQualities) {
@@ -229,7 +236,7 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithQualities) {
EXPECT_EQ(polished_sequences.size(), 39);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
EXPECT_EQ(total_length, 389394);
@@ -247,7 +254,7 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFull) {
EXPECT_EQ(polished_sequences.size(), 236);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
EXPECT_EQ(total_length, 1658216);
@@ -265,7 +272,7 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithoutQualitiesFull) {
EXPECT_EQ(polished_sequences.size(), 236);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
EXPECT_EQ(total_length, 1663982);
@@ -283,7 +290,7 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullMhap) {
EXPECT_EQ(polished_sequences.size(), 236);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
EXPECT_EQ(total_length, 1658216);
@@ -303,13 +310,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesCUDA) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1385); // CPU 1312
+ EXPECT_EQ(1385, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data())); // CPU 1312
}
TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesCUDA) {
@@ -325,13 +333,14 @@ TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesCUDA) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1607); // CPU 1566
+ EXPECT_EQ(1607, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data())); // CPU 1566
}
TEST_F(RaconPolishingTest, ConsensusWithQualitiesAndAlignmentsCUDA) {
@@ -347,13 +356,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesAndAlignmentsCUDA) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1541); // CPU 1317
+ EXPECT_EQ(1541, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data())); // CPU 1317
}
TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesAndWithAlignmentsCUDA) {
@@ -369,13 +379,14 @@ TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesAndWithAlignmentsCUDA) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1661); // CPU 1770
+ EXPECT_EQ(1661, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data())); // CPU 1770
}
TEST_F(RaconPolishingTest, ConsensusWithQualitiesLargerWindowCUDA) {
@@ -391,13 +402,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesLargerWindowCUDA) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 4168); // CPU 1289
+ EXPECT_EQ(4168, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data())); // CPU 1289
}
TEST_F(RaconPolishingTest, ConsensusWithQualitiesEditDistanceCUDA) {
@@ -413,13 +425,14 @@ TEST_F(RaconPolishingTest, ConsensusWithQualitiesEditDistanceCUDA) {
polished_sequences[0]->create_reverse_complement();
- auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
+ auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
racon_test_data_path + "sample_reference.fasta.gz");
- parser->parse(polished_sequences, -1);
- EXPECT_EQ(polished_sequences.size(), 2);
+ auto reference = parser->Parse(-1);
+ EXPECT_EQ(reference.size(), 1);
- EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
- polished_sequences[1]->data()), 1361); // CPU 1321
+ EXPECT_EQ(1361, calculateEditDistance(
+ polished_sequences[0]->reverse_complement(),
+ reference[0]->data())); // CPU 1321
}
TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesCUDA) {
@@ -434,10 +447,10 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesCUDA) {
EXPECT_EQ(polished_sequences.size(), 39);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
- EXPECT_EQ(total_length, 385543); // CPU 389394
+ EXPECT_EQ(total_length, 385543); // CPU 389394
}
TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullCUDA) {
@@ -452,10 +465,10 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullCUDA) {
EXPECT_EQ(polished_sequences.size(), 236);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
- EXPECT_EQ(total_length, 1655505); // CPU 1658216
+ EXPECT_EQ(total_length, 1655505); // CPU 1658216
}
TEST_F(RaconPolishingTest, FragmentCorrectionWithoutQualitiesFullCUDA) {
@@ -470,10 +483,10 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithoutQualitiesFullCUDA) {
EXPECT_EQ(polished_sequences.size(), 236);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
- EXPECT_EQ(total_length, 1663732); // CPU 1663982
+ EXPECT_EQ(total_length, 1663732); // CPU 1663982
}
TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullMhapCUDA) {
@@ -488,9 +501,9 @@ TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullMhapCUDA) {
EXPECT_EQ(polished_sequences.size(), 236);
uint32_t total_length = 0;
- for (const auto& it: polished_sequences) {
+ for (const auto& it : polished_sequences) {
total_length += it->data().size();
}
- EXPECT_EQ(total_length, 1655505); // CPU 1658216
+ EXPECT_EQ(total_length, 1655505); // CPU 1658216
}
#endif
=====================================
vendor/meson.build
=====================================
@@ -1,20 +1,22 @@
vendor_cpp_sources = files([
'edlib/edlib/src/edlib.cpp',
+
'rampler/src/sampler.cpp',
- 'rampler/src/sequence.cpp',
+
'spoa/src/alignment_engine.cpp',
+ 'spoa/src/dispatcher.cpp',
'spoa/src/graph.cpp',
- 'spoa/src/sequence.cpp',
- 'spoa/src/simd_alignment_engine.cpp',
+ 'spoa/src/simd_alignment_engine_dispatch.cpp',
'spoa/src/sisd_alignment_engine.cpp',
- 'thread_pool/src/thread_pool.cpp'
])
vendor_include_directories = [
- include_directories('bioparser/include'),
include_directories('edlib/edlib/include'),
include_directories('rampler/src'),
include_directories('spoa/include'),
+ include_directories('spoa/vendor/cereal/include'),
+ include_directories('spoa/vendor/bioparser/include'),
+ include_directories('spoa/vendor/bioparser/vendor/biosoup/include'),
include_directories('thread_pool/include')
]
View it on GitLab: https://salsa.debian.org/med-team/racon/-/commit/58f83eeafb33ea77b0359dd651d1010e3e5c3c37
--
View it on GitLab: https://salsa.debian.org/med-team/racon/-/commit/58f83eeafb33ea77b0359dd651d1010e3e5c3c37
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210119/bd722f61/attachment-0001.html>
More information about the debian-med-commit
mailing list