[med-svn] [Git][med-team/racon][upstream] New upstream version 1.4.13
Michael R. Crusoe
gitlab at salsa.debian.org
Mon Apr 13 18:24:25 BST 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / racon
b8039ef4 by Michael R. Crusoe at 2020-04-13T18:39:45+02:00
New upstream version 1.4.13
- - - - -
18 changed files:
- .gitignore
- CMakeLists.txt
- + Makefile
- + meson.build
- + meson_options.txt
- + scripts/racon_preprocess.py
- scripts/racon_wrapper.py
- src/cuda/cudaaligner.cpp
- src/cuda/cudaaligner.hpp
- src/cuda/cudapolisher.cpp
- src/main.cpp
- + src/meson.build
- + src/version.hpp.in
- + subprojects/gtest.wrap
- + subprojects/zlib.wrap
- + test/meson.build
- + vendor/meson.build
@@ -1,2 +1,5 @@
# Compiled Object files
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.2)
-set(racon_version 1.4.10)
+set(racon_version 1.4.13)
@@ -0,0 +1,29 @@
+.PHONY: all clean meson cmake debug dist modules
+all: meson
+ rm -rf build build-meson
+meson: modules
+ @echo "[Invoking Meson]"
+ @mkdir -p build-meson && cd build-meson && meson --buildtype=release -Dc_args=-O3 -Dtests=true && ninja
+rebuild: modules
+ @echo "[Running Ninja only]"
+ @ninja -C build-meson
+cmake: modules
+ @echo "[Invoking CMake]"
+ @mkdir -p build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -Dracon_build_tests=ON .. && make
+debug: modules
+ @echo "[Invoking Meson]"
+ @mkdir -p build-debug && cd build-debug && (meson --buildtype=debugoptimized -Db_sanitize=address -Dtests=true) && ninja
+dist: release
+ cd build && ninja-dist
+ @echo "[Fetching submodules]"
+ @git submodule update --init
@@ -74,6 +74,7 @@ Usage of `racon` is as following:
racon [options ...] <sequences> <overlaps> <target sequences>
+ # default output is stdout
input file in FASTA/FASTQ format (can be compressed with gzip)
containing sequences used for correction
@@ -119,13 +120,14 @@ Usage of `racon` is as following:
prints the usage
only available when built with CUDA:
- -c, --cudapoa-batches
- default: 1
+ -c, --cudapoa-batches <int>
+ default: 0
number of batches for CUDA accelerated polishing
-b, --cuda-banded-alignment
use banding approximation for polishing on GPU. Only applicable when -c is used.
- --cudaaligner-batches
- Number of batches for CUDA accelerated alignment
+ --cudaaligner-batches <int>
+ default: 0
+ number of batches for CUDA accelerated alignment
`racon_test` is run without any parameters.
@@ -0,0 +1,131 @@
+ 'Racon',
+ 'cpp',
+ version : '1.4.13',
+ default_options : [
+ 'buildtype=release',
+ 'warning_level=3',
+ 'cpp_std=c++11'],
+ license : 'MIT',
+ meson_version : '>= 0.48')
+cpp = meson.get_compiler('cpp')
+opt_compile_with_tests = get_option('tests')
+racon_warning_flags = []
+racon_cpp_flags = []
+# Dependencies #
+# Threads.
+racon_thread_dep = dependency('threads', required : true)
+# Zlib.
+racon_zlib_dep = dependency('zlib', required: true, version : '>= 1.2.11', fallback : ['zlib', 'zlib_dep'])
+# Google test.
+if (not meson.is_subproject()) and opt_compile_with_tests
+ gtest_dep = dependency('gtest', main : true, required : false)
+ if not gtest_dep.found()
+ gtest_proj = subproject('gtest')
+ gtest_inc = gtest_proj.get_variable('gtest_incdir')
+ gtest_lib = static_library('gtest', gtest_proj.get_variable('gtest_libsources'),
+ gtest_proj.get_variable('gtest_mainsources'),
+ include_directories : gtest_inc)
+ gtest_dep = declare_dependency(include_directories : gtest_inc,
+ link_with : gtest_lib, dependencies: racon_thread_dep)
+ endif
+# Configuring headers #
+racon_version_commit = 'unknown'
+git_command = find_program('git', required: false)
+if git_command.found()
+ git_run = run_command('git', ['log', '-1', '--pretty=%h'])
+ if git_run.returncode() == 0
+ racon_version_commit = git_run.stdout().strip()
+ endif
+racon_version_h_config = configuration_data()
+racon_version = meson.project_version()
+racon_version_split = meson.project_version().split('.')
+racon_version_h_config.set('RACON_VERSION_MAJOR', racon_version_split[0])
+racon_version_h_config.set('RACON_VERSION_MINOR', racon_version_split[1])
+racon_version_h_config.set('RACON_VERSION_PATCH', racon_version_split[2])
+racon_version_h_config.set('RACON_VERSION_COMMIT', racon_version_commit)
+racon_version_h = configure_file(
+ input : files('src/version.hpp.in'),
+ output : 'version.hpp',
+ configuration : racon_version_h_config)
+racon_cpp_flags += ['-DRACON_VERSION="' + meson.project_version() + '-' + racon_version_commit + '"']
+# Headers #
+racon_include_directories = [include_directories('src'), include_directories('test')]
+# Sources + codebase #
+if (not meson.is_subproject()) and opt_compile_with_tests
+ subdir('test')
+all_sources = racon_cpp_sources + vendor_cpp_sources
+# The Racon exe. #
+racon_dep = declare_dependency(
+ include_directories: vendor_include_directories + racon_include_directories,
+ link_with: [racon_lib, vendor_lib],
+ dependencies: [racon_thread_dep, racon_zlib_dep],
+ version: meson.project_version(),
+ compile_args: racon_warning_flags + racon_cpp_flags)
+if not meson.is_subproject()
+ racon_bin = executable(
+ 'racon',
+ ['src/main.cpp'],
+ install : true,
+ dependencies : [racon_thread_dep, racon_zlib_dep],
+ include_directories : vendor_include_directories + racon_include_directories,
+ link_with : [racon_lib],
+ cpp_args : [racon_warning_flags, racon_cpp_flags])
+ ######################
+ # Tests #
+ ######################
+ if opt_compile_with_tests
+ if gtest_dep.found()
+ tests_bin = executable(
+ 'racon_test',
+ racon_test_cpp_sources,
+ dependencies : [racon_thread_dep, racon_zlib_dep, gtest_dep],
+ include_directories : racon_include_directories + vendor_include_directories + racon_test_include_directories,
+ link_with : [racon_lib, vendor_lib],
+ cpp_args : [racon_warning_flags, racon_cpp_flags, racon_test_extra_flags])
+ endif
+ endif
@@ -0,0 +1 @@
+option('tests', type : 'boolean', value : true, description : 'Enable dependencies required for testing')
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import os, sys, argparse
+def eprint(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+def parse_file(file_name, read_set):
+ line_id = 0
+ name = ''
+ data = ''
+ qual = ''
+ valid = False
+ with (open(file_name)) as f:
+ for line in f:
+ if (line_id == 0):
+ if (valid):
+ if (len(name) == 0 or len(data) == 0 or len(data) != len(qual)):
+ eprint('File is not in FASTQ format')
+ sys.exit(1)
+ valid = False
+ if (name in read_set):
+ print(name + '2')
+ else:
+ read_set.add(name)
+ print(name + '1')
+ print(data)
+ print('+')
+ print(qual)
+ name = line.rstrip().split(' ')[0]
+ data = ''
+ qual = ''
+ line_id = 1
+ elif (line_id == 1):
+ if (line[0] == '+'):
+ line_id = 2
+ else:
+ data += line.rstrip()
+ elif (line_id == 2):
+ qual += line.rstrip()
+ if (len(qual) >= len(data)):
+ valid = True
+ line_id = 0
+ if (valid):
+ if (len(name) == 0 or len(data) == 0 or len(data) != len(qual)):
+ eprint(len(name), len(data), len(qual))
+ eprint('File is not in FASTQ format')
+ sys.exit(1)
+ if (name in read_set):
+ print(name + '2')
+ else:
+ read_set.add(name)
+ print(name + '1')
+ print(data)
+ print('+')
+ print(qual)
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='''Script for preprocessing
+ Illumina paired-end reads for usage in Racon. Each read will get unique
+ header up to the first white space to distinguish those forming a pair.''')
+ parser.add_argument('first', help='''File containing the first read of a pair
+ or both.''')
+ parser.add_argument('second', nargs='?', help='''Optional file containing
+ read pairs of the same paired-end sequencing run.''')
+ args = parser.parse_args()
+ read_set = set()
+ parse_file(args.first, read_set)
+ if (args.second is not None):
+ parse_file(args.second, read_set)
@@ -4,7 +4,7 @@ from __future__ import print_function
import os, sys, time, shutil, argparse, subprocess
def eprint(*args, **kwargs):
- print(*args, file=sys.stderr, **kwargs)
+ print(*args, file=sys.stderr, **kwargs, flush=True)
@@ -15,7 +15,8 @@ class RaconWrapper:
def __init__(self, sequences, overlaps, target_sequences, split, subsample,
include_unpolished, fragment_correction, window_length, quality_threshold,
- error_threshold, match, mismatch, gap, threads):
+ error_threshold, match, mismatch, gap, threads,
+ cudaaligner_batches, cudapoa_batches, cuda_banded_alignment):
self.sequences = os.path.abspath(sequences)
self.subsampled_sequences = None
@@ -34,6 +35,9 @@ class RaconWrapper:
self.mismatch = mismatch
self.gap = gap
self.threads = threads
+ self.cudaaligner_batches = cudaaligner_batches
+ self.cudapoa_batches = cudapoa_batches
+ self.cuda_banded_alignment = cuda_banded_alignment
self.work_directory = os.getcwd() + '/racon_work_directory_' + str(time.time())
def __enter__(self):
@@ -104,6 +108,8 @@ class RaconWrapper:
i += 1
+ eprint('[RaconWrapper::run] total number of splits: ' + str(i))
if (len(self.split_target_sequences) == 0):
eprint('[RaconWrapper::run] error: unable to find split target sequences!')
@@ -113,6 +119,7 @@ class RaconWrapper:
racon_params = [RaconWrapper.__racon]
if (self.include_unpolished == True): racon_params.append('-u')
if (self.fragment_correction == True): racon_params.append('-f')
+ if (self.cuda_banded_alignment == True): racon_params.append('-b')
racon_params.extend(['-w', str(self.window_length),
'-q', str(self.quality_threshold),
'-e', str(self.error_threshold),
@@ -120,6 +127,8 @@ class RaconWrapper:
'-x', str(self.mismatch),
'-g', str(self.gap),
'-t', str(self.threads),
+ '--cudaaligner-batches', str(self.cudaaligner_batches),
+ '-c', str(self.cudapoa_batches),
self.subsampled_sequences, self.overlaps, ""])
for target_sequences_part in self.split_target_sequences:
@@ -179,13 +188,17 @@ if __name__ == '__main__':
parser.add_argument('-g', '--gap', default=-8, help='''gap penalty (must be
parser.add_argument('-t', '--threads', default=1, help='''number of threads''')
+ parser.add_argument('--cudaaligner-batches', default=0, help='''number of batches for CUDA accelerated alignment''')
+ parser.add_argument('-c', '--cudapoa-batches', default=0, help='''number of batches for CUDA accelerated polishing''')
+ parser.add_argument('-b', '--cuda-banded-alignment', action='store_true', help='''use banding approximation for polishing on GPU. Only applicable when -c is used.''')
args = parser.parse_args()
racon = RaconWrapper(args.sequences, args.overlaps, args.target_sequences,
args.split, args.subsample, args.include_unpolished,
args.fragment_correction, args.window_length, args.quality_threshold,
- args.error_threshold, args.match, args.mismatch, args.gap, args.threads)
+ args.error_threshold, args.match, args.mismatch, args.gap, args.threads,
+ args.cudaaligner_batches, args.cudapoa_batches, args.cuda_banded_alignment)
with racon:
@@ -39,7 +39,7 @@ CUDABatchAligner::CUDABatchAligner(uint32_t max_query_size,
aligner_ = claragenomics::cudaaligner::create_aligner(max_query_size,
- claragenomics::cudaaligner::AlignmentType::global,
+ claragenomics::cudaaligner::AlignmentType::global_alignment,
@@ -68,9 +68,7 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
else if (s == claragenomics::cudaaligner::StatusType::exceeded_max_alignment_difference
|| s == claragenomics::cudaaligner::StatusType::exceeded_max_length)
- cpu_overlap_data_.emplace_back(std::make_pair<std::string, std::string>(std::string(q, q + q_len),
- std::string(t, t + t_len)));
- cpu_overlaps_.push_back(overlap);
+ // Do nothing as this case will be handled by CPU aligner.
else if (s != claragenomics::cudaaligner::StatusType::success)
@@ -86,21 +84,9 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
void CUDABatchAligner::alignAll()
- compute_cpu_overlaps();
-void CUDABatchAligner::compute_cpu_overlaps()
- for(std::size_t a = 0; a < cpu_overlaps_.size(); a++)
- {
- // Run CPU version of overlap.
- Overlap* overlap = cpu_overlaps_[a];
- overlap->align_overlaps(cpu_overlap_data_[a].first.c_str(), cpu_overlap_data_[a].first.length(),
- cpu_overlap_data_[a].second.c_str(), cpu_overlap_data_[a].second.length());
- }
-void CUDABatchAligner::find_breaking_points(uint32_t window_length)
+void CUDABatchAligner::generate_cigar_strings()
@@ -113,19 +99,12 @@ void CUDABatchAligner::find_breaking_points(uint32_t window_length)
for(std::size_t a = 0; a < alignments.size(); a++)
overlaps_[a]->cigar_ = alignments[a]->convert_to_cigar();
- overlaps_[a]->find_breaking_points_from_cigar(window_length);
- }
- for(Overlap* overlap : cpu_overlaps_)
- {
- // Run CPU version of breaking points.
- overlap->find_breaking_points_from_cigar(window_length);
void CUDABatchAligner::reset()
- cpu_overlaps_.clear();
@@ -49,10 +49,11 @@ class CUDABatchAligner
virtual void alignAll();
- * @brief Find breaking points in alignments.
+ * @brief Generate cigar strings for overlaps that were successfully
+ * copmuted on the GPU.
- virtual void find_breaking_points(uint32_t window_length);
+ virtual void generate_cigar_strings();
* @brief Resets the state of the object, which includes
@@ -74,13 +75,10 @@ class CUDABatchAligner
CUDABatchAligner(const CUDABatchAligner&) = delete;
const CUDABatchAligner& operator=(const CUDABatchAligner&) = delete;
- void compute_cpu_overlaps();
std::unique_ptr<claragenomics::cudaaligner::Aligner> aligner_;
std::vector<Overlap*> overlaps_;
- std::vector<Overlap*> cpu_overlaps_;
std::vector<std::pair<std::string, std::string>> cpu_overlap_data_;
// Static batch count used to generate batch IDs.
@@ -85,12 +85,7 @@ std::vector<uint32_t> CUDAPolisher::calculate_batches_per_gpu(uint32_t batches,
void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps)
- if (cudaaligner_batches_ < 1)
- {
- // TODO: Kept CPU overlap alignment right now while GPU is a dummy implmentation.
- Polisher::find_overlap_breaking_points(overlaps);
- }
- else
+ if (cudaaligner_batches_ >= 1)
// TODO: Experimentally this is giving decent perf
const uint32_t MAX_ALIGNMENTS = 200;
@@ -137,7 +132,10 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
// Launch workload.
- batch->find_breaking_points(window_length_);
+ // Generate CIGAR strings for successful alignments. The actual breaking points
+ // will be calculate by the overlap object.
+ batch->generate_cigar_strings();
// logging bar
@@ -193,6 +191,12 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
+ // This call runs the breaking point detection code for all alignments.
+ // Any overlaps that couldn't be processed by the GPU are also handled here
+ // by the CPU aligner.
+ logger_->log();
+ Polisher::find_overlap_breaking_points(overlaps);
void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
@@ -165,6 +165,7 @@ void help() {
"usage: racon [options ...] <sequences> <overlaps> <target sequences>\n"
+ " #default output is stdout\n"
" <sequences>\n"
" input file in FASTA/FASTQ format (can be compressed with gzip)\n"
" containing sequences used for correction\n"
@@ -209,14 +210,14 @@ void help() {
" -h, --help\n"
" prints the usage\n"
- " -c, --cudapoa-batches\n"
- " default: 1\n"
+ " -c, --cudapoa-batches <int>\n"
+ " default: 0\n"
" number of batches for CUDA accelerated polishing\n"
" -b, --cuda-banded-alignment\n"
" use banding approximation for alignment on GPU\n"
- " --cudaaligner-batches\n"
- " Number of batches for CUDA accelerated alignment\n"
+ " --cudaaligner-batches <int>\n"
+ " default: 0\n"
+ " number of batches for CUDA accelerated alignment\n"
@@ -0,0 +1,22 @@
+racon_cpp_sources = files([
+ 'logger.cpp',
+ 'overlap.cpp',
+ 'polisher.cpp',
+ 'sequence.cpp',
+ 'window.cpp'
+racon_extra_flags = []
+racon_lib_install = (not meson.is_subproject()) or (get_option('default_library') == 'shared')
+racon_lib = library(
+ 'racon',
+ racon_cpp_sources,
+ soversion : 0,
+ version : meson.project_version(),
+ install : racon_lib_install,
+ link_with : vendor_lib,
+ dependencies : [racon_thread_dep, racon_zlib_dep],
+ include_directories : racon_include_directories + vendor_include_directories,
+ cpp_args : [racon_extra_flags, racon_warning_flags, racon_cpp_flags])
@@ -0,0 +1,22 @@
+ * @file version.hpp
+ *
+ * @brief Version information for the entire project.
+ */
+#pragma once
+#include <string>
+static const std::string RACON_VERSION_COMMIT("@RACON_VERSION_COMMIT@");
+static const std::string RACON_VERSION_STRING =
+ std::to_string(RACON_VERSION_MAJOR) + "." +
+ std::to_string(RACON_VERSION_MINOR) + "." +
+ std::to_string(RACON_VERSION_PATCH) + "-" +
+static const std::string COMPILE_DATE = (std::string(__DATE__) + std::string(" at ") + std::string(__TIME__));
@@ -0,0 +1,10 @@
+directory = googletest-release-1.8.0
+source_url = https://github.com/google/googletest/archive/release-1.8.0.zip
+source_filename = gtest-1.8.0.zip
+source_hash = f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
+patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.0/5/get_zip
+patch_filename = gtest-1.8.0-5-wrap.zip
+patch_hash = 7eeaede4aa2610a403313b74e04baf91ccfbaef03203d8f56312e22df1834ec5
@@ -0,0 +1,10 @@
+directory = zlib-1.2.11
+source_url = http://zlib.net/fossils/zlib-1.2.11.tar.gz
+source_filename = zlib-1.2.11.tar.gz
+source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1
+patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.11/3/get_zip
+patch_filename = zlib-1.2.11-3-wrap.zip
+patch_hash = f07dc491ab3d05daf00632a0591e2ae61b470615b5b73bcf9b3f061fff65cff0
@@ -0,0 +1,14 @@
+racon_test_cpp_sources = files([
+ 'racon_test.cpp'
+racon_test_include_directories = [include_directories('.')]
+racon_test_extra_flags = []
+racon_test_config_h_vars = configuration_data()
+racon_test_config_h_vars.set('racon_test_data_path', meson.source_root() + '/test/data/')
+racon_test_config_h = configure_file(
+ input : files('racon_test_config.h.in'),
+ output : 'racon_test_config.h',
+ configuration : racon_test_config_h_vars)
@@ -0,0 +1,34 @@
+vendor_cpp_sources = files([
+ 'edlib/edlib/src/edlib.cpp',
+ 'rampler/src/sampler.cpp',
+ 'rampler/src/sequence.cpp',
+ 'spoa/src/alignment_engine.cpp',
+ 'spoa/src/graph.cpp',
+ 'spoa/src/sequence.cpp',
+ 'spoa/src/simd_alignment_engine.cpp',
+ 'spoa/src/sisd_alignment_engine.cpp',
+ 'thread_pool/src/thread_pool.cpp'
+vendor_include_directories = [
+ include_directories('bioparser/include'),
+ include_directories('edlib/edlib/include'),
+ include_directories('rampler/src'),
+ include_directories('spoa/include'),
+ include_directories('thread_pool/include')
+ ]
+vendor_extra_flags = []
+vendor_lib_install = (not meson.is_subproject()) or (get_option('default_library') == 'shared')
+vendor_lib = library(
+ 'vendor',
+ vendor_cpp_sources,
+ soversion : 0,
+ version : meson.project_version(),
+ install : vendor_lib_install,
+ link_with : [],
+ dependencies : [racon_thread_dep, racon_zlib_dep],
+ include_directories : vendor_include_directories,
+ cpp_args : [vendor_extra_flags, racon_warning_flags, racon_cpp_flags])
View it on GitLab: https://salsa.debian.org/med-team/racon/-/commit/b8039ef4f01c49ef0aca28a74ac598a9b4916c22
View it on GitLab: https://salsa.debian.org/med-team/racon/-/commit/b8039ef4f01c49ef0aca28a74ac598a9b4916c22
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200413/dded9ea7/attachment-0001.html>
More information about the debian-med-commit
mailing list