[med-svn] [Git][med-team/spoa][upstream] New upstream version 3.4.0
Andreas Tille
gitlab at salsa.debian.org
Wed Aug 5 08:04:54 BST 2020
Andreas Tille pushed to branch upstream at Debian Med / spoa
Commits:
72c8f2f3 by Andreas Tille at 2020-08-04T19:42:06+02:00
New upstream version 3.4.0
- - - - -
12 changed files:
- .gitmodules
- CMakeLists.txt
- README.md
- + include/spoa/architectures.hpp
- include/spoa/graph.hpp
- src/alignment_engine.cpp
- + src/dispatcher.cpp
- src/graph.cpp
- src/main.cpp
- src/simd_alignment_engine.hpp
- + src/simd_alignment_engine_dispatch.cpp
- src/simd_alignment_engine.cpp → src/simd_alignment_engine_impl.hpp
Changes:
=====================================
.gitmodules
=====================================
@@ -4,3 +4,10 @@
[submodule "vendor/googletest"]
path = vendor/googletest
url = https://github.com/google/googletest
+[submodule "vendor/simde"]
+ path = vendor/simde
+ url = https://github.com/nemequ/simde.git
+[submodule "vendor/cpu_features"]
+ path = vendor/cpu_features
+ url = https://github.com/mbrcic/cpu_features.git
+ branch = patch-4
=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.2)
-project(spoa LANGUAGES CXX VERSION 3.0.0)
+project(spoa LANGUAGES CXX VERSION 4.0.0)
include(GNUInstallDirs)
@@ -16,31 +16,88 @@ option(spoa_build_executable "Build spoa standalone tool" OFF)
option(spoa_build_tests "Build spoa unit tests" OFF)
option(spoa_optimize_for_native "Build spoa with -march=native" ON)
option(spoa_optimize_for_portability "Build spoa with -msse4.1" OFF)
+option(spoa_use_simde "Use SIMDe library for porting vectorized code" OFF)
+option(spoa_use_simde_nonvec "Use SIMDe library for nonvectorized code" OFF)
+option(spoa_use_simde_openmp "Use SIMDe support for OpenMP SIMD" OFF)
+option(spoa_generate_dispatch "Use SIMDe to generate x86 dispatch" OFF)
+
+if(NOT spoa_generate_dispatch) # optimization flags defeat the purpose of dispatching
+ if (spoa_optimize_for_portability)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
+ elseif (spoa_optimize_for_native)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+ endif()
+endif()
-if (spoa_optimize_for_portability)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
-elseif (spoa_optimize_for_native)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+if (spoa_use_simde OR spoa_use_simde_nonvec OR spoa_use_simde_openmp OR spoa_generate_dispatch)
+ add_definitions(-DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES)
+ if (spoa_use_simde_nonvec)
+ add_definitions(-DSIMDE_NO_NATIVE)
+ endif()
+ if (spoa_use_simde_openmp)
+ add_definitions(-DSIMDE_ENABLE_OPENMP)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
+ endif()
+ if (spoa_generate_dispatch)
+ add_definitions(-DGEN_DISPATCH)
+ endif()
endif()
# build SPOA as a static library by default
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
+list(APPEND INCLUDES
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+ $<INSTALL_INTERFACE:include>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/simde>)
+
+# generating in also a dispatcher that handles both dispatching and non-dispatching case
+
add_library(spoa
src/alignment_engine.cpp
src/graph.cpp
- src/simd_alignment_engine.cpp
- src/sisd_alignment_engine.cpp)
+ src/sisd_alignment_engine.cpp
+ src/dispatcher.cpp)
target_include_directories(spoa PUBLIC
- $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
- $<INSTALL_INTERFACE:include>)
+ ${INCLUDES}
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/cpu_features/include>)
set_target_properties(spoa
PROPERTIES
VERSION ${spoa_VERSION}
SOVERSION ${spoa_VERSION})
+# in dispatching case, generate different optimized versions
+
+if (spoa_generate_dispatch)
+
+if (NOT TARGET cpu_features)
+ add_subdirectory(vendor/cpu_features)
+endif()
+
+list(APPEND Archs avx2 sse4.1 sse2)
+
+foreach(arch IN LISTS Archs)
+ add_library(spoa_${arch} OBJECT src/simd_alignment_engine_dispatch.cpp)
+ target_include_directories(spoa_${arch} PUBLIC ${INCLUDES})
+ set_target_properties(spoa_${arch} PROPERTIES COMPILE_FLAGS "-m${arch}")
+ if (BUILD_SHARED_LIBS)
+ set_property(TARGET spoa_${arch}
+ PROPERTY POSITION_INDEPENDENT_CODE ON)
+ endif()
+endforeach()
+
+add_dependencies(spoa
+ spoa_avx2
+ spoa_sse4.1
+ spoa_sse2)
+
+target_link_libraries(spoa
+ cpu_features)
+
+endif()
+
install(TARGETS spoa DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/spoa DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
=====================================
README.md
=====================================
@@ -27,7 +27,18 @@ cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make
```
-a library named `libspoa.a` will appear in the `build/lib` directory. If you want the spoa executable, run the following two commands:
+a library named `libspoa.a` will appear in the `build/lib` directory.
+
+Various options can be enabled while running `cmake`:
+
+- `spoa_optimize_for_native`: builds with `-march=native`
+- `spoa_optimize_for_portability`: builds with `-msse4.1`
+- `spoa_use_simde`: builds with SIMDe for porting vectorized code
+- `spoa_use_simde_nonvec`: uses SIMDe library for nonvectorized code
+- `spoa_use_simde_openmp`: uses SIMDe support for OpenMP SIMD
+- `spoa_generate_dispatch`: uses SIMDe to generate x86 dispatch
+
+If you want the spoa executable, run the following two commands:
```bash
cmake -DCMAKE_BUILD_TYPE=Release -Dspoa_build_executable=ON ..
@@ -85,6 +96,10 @@ spoa [options ...] <sequences>
0 - consensus
1 - multiple sequence alignment
2 - 0 & 1
+ -G, --gfa
+ write GFA on stdout
+ -C, --gfa-with-consensus
+ write GFA with consensus on stdout
-d, --dot <file>
output file for the final POA graph in DOT format
--version
=====================================
include/spoa/architectures.hpp
=====================================
@@ -0,0 +1,10 @@
+/*!
+ * @file architectures.hpp
+ *
+ * @brief Arch enum class header file
+ */
+namespace spoa {
+
+enum class Arch{avx2, sse4_1, sse2, automatic};
+
+}
\ No newline at end of file
=====================================
include/spoa/graph.hpp
=====================================
@@ -83,6 +83,8 @@ public:
void print_dot(const std::string& path) const;
+ void print_gfa(std::ostream& out, const std::vector<std::string>& sequence_names, bool include_consensus = false) const;
+
void clear();
friend std::unique_ptr<Graph> createGraph();
=====================================
src/alignment_engine.cpp
=====================================
@@ -7,6 +7,7 @@
#include <limits>
#include <algorithm>
#include <exception>
+#include <stdexcept>
#include "sisd_alignment_engine.hpp"
#include "simd_alignment_engine.hpp"
=====================================
src/dispatcher.cpp
=====================================
@@ -0,0 +1,62 @@
+/*!
+ * @file dispatcher.cpp
+ *
+ * @brief CPU dispatching mechanism that also covers non-dispatching case
+ */
+
+#include "simd_alignment_engine_impl.hpp"
+
+#ifdef GEN_DISPATCH
+
+#include "cpuinfo_x86.h"
+
+static const cpu_features::X86Features features = cpu_features::GetX86Info().features;
+
+#endif
+
+
+namespace spoa{
+
+#ifndef GEN_DISPATCH
+template class SimdAlignmentEngine<Arch::automatic>;
+
+template
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<Arch::automatic>(AlignmentType type,
+ AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+ std::int8_t e, std::int8_t q, std::int8_t c);
+#endif
+
+
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
+ AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+ std::int8_t e, std::int8_t q, std::int8_t c) {
+
+#ifdef GEN_DISPATCH
+
+ if (features.avx2)
+ {
+ //std::cout<<"AVX2"<<std::endl;
+ return createSimdAlignmentEngine<Arch::avx2>(type,
+ subtype, m, n, g, e, q, c);
+ }
+ else if (features.sse4_1){
+
+ //std::cout<<"SSE4"<<std::endl;
+ return createSimdAlignmentEngine<Arch::sse4_1>(type,
+ subtype, m, n, g, e, q, c);
+ }
+ else {
+ //std::cout<<"SSE2"<<std::endl;
+ return createSimdAlignmentEngine<Arch::sse2>(type,
+ subtype, m, n, g, e, q, c);
+ }
+#else
+ return createSimdAlignmentEngine<Arch::automatic>(type,
+ subtype, m, n, g, e, q, c);
+#endif
+
+
+}
+
+}
+
=====================================
src/graph.cpp
=====================================
@@ -8,6 +8,7 @@
#include <algorithm>
#include <stack>
#include <fstream>
+#include <stdexcept>
#include "spoa/graph.hpp"
@@ -725,6 +726,57 @@ void Graph::print_dot(const std::string& path) const {
out.close();
}
+void Graph::print_gfa(std::ostream& out,
+ const std::vector<std::string>& sequence_names,
+ bool include_consensus) const {
+
+ std::vector<std::int32_t> in_consensus(nodes_.size(), -1);
+ std::int32_t rank = 0;
+ for (const auto& id: consensus_) {
+ in_consensus[id] = rank++;
+ }
+
+ out << "H" << "\t" << "VN:Z:1.0" << std::endl;
+
+ for (std::uint32_t i = 0; i < nodes_.size(); ++i) {
+ out << "S" << "\t" << i+1 << "\t" << static_cast<char>(decoder_[nodes_[i]->code_]);
+ if (in_consensus[i] != -1) {
+ out << "\t" << "ic:Z:true";
+ }
+ out << std::endl;
+ for (const auto& edge: nodes_[i]->out_edges_) {
+ out << "L" << "\t" << i+1 << "\t" << "+" << "\t" << edge->end_node_id_+1 << "\t" << "+" << "\t" << "0M" << "\t"
+ << "ew:f:" << edge->total_weight_;
+ if (in_consensus[i] + 1 == in_consensus[edge->end_node_id_]) {
+ out << "\t" << "ic:Z:true";
+ }
+ out << std::endl;
+ }
+ }
+
+ for (std::uint32_t i = 0; i < num_sequences_; ++i) {
+ out << "P" << "\t" << sequence_names[i] << "\t";
+ std::uint32_t node_id = sequences_begin_nodes_ids_[i];
+ while (true) {
+ out << node_id+1 << "+";
+ if (!nodes_[node_id]->successor(node_id, i)) {
+ break;
+ } else {
+ out << ",";
+ }
+ }
+ out << "\t" << "*" << std::endl;
+ }
+
+ if (include_consensus) {
+ out << "P" << "\t" << "Consensus" << "\t";
+ for (const auto& id: consensus_) {
+ out << id+1 << "+";
+ }
+ out << "\t" << "*" << std::endl;
+ }
+}
+
void Graph::clear() {
num_codes_ = 0;
num_sequences_ = 0;
=====================================
src/main.cpp
=====================================
@@ -10,12 +10,14 @@
#include "spoa/spoa.hpp"
#include "bioparser/bioparser.hpp"
-static const std::string version = "v3.0.2";
+static const std::string version = "v3.4.0";
static struct option options[] = {
{"algorithm", required_argument, nullptr, 'l'},
{"result", required_argument, nullptr, 'r'},
{"dot", required_argument, nullptr, 'd'},
+ {"gfa", required_argument, nullptr, 'G'},
+ {"gfa-with-consensus", no_argument, nullptr, 'C'},
{"version", no_argument, nullptr, 'v'},
{"help", no_argument, nullptr, 'h'},
{nullptr, 0, nullptr, 0}
@@ -36,9 +38,11 @@ int main(int argc, char** argv) {
std::uint8_t result = 0;
std::string dot_path = "";
+ bool write_gfa = false;
+ bool write_gfa_with_consensus = false;
- char opt;
- while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:h", options, nullptr)) != -1) {
+ int opt;
+ while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:GCh", options, nullptr)) != -1) {
switch (opt) {
case 'm': m = atoi(optarg); break;
case 'n': n = atoi(optarg); break;
@@ -49,6 +53,8 @@ int main(int argc, char** argv) {
case 'l': algorithm = atoi(optarg); break;
case 'r': result = atoi(optarg); break;
case 'd': dot_path = optarg; break;
+ case 'G': write_gfa = true; break;
+ case 'C': write_gfa = true; write_gfa_with_consensus = true; break;
case 'v': std::cout << version << std::endl; return 0;
case 'h': help(); return 0;
default: return 1;
@@ -118,18 +124,26 @@ int main(int argc, char** argv) {
}
}
- if (result == 0 || result == 2) {
+ if (write_gfa) {
+ // force consensus genertion for graph annotation
std::string consensus = graph->generate_consensus();
- std::cout << "Consensus (" << consensus.size() << ")" << std::endl;
- std::cout << consensus << std::endl;
- }
-
- if (result == 1 || result == 2) {
+ // save sequence names for graph path labeling
+ std::vector<std::string> sequence_names;
+ for (auto& s : sequences) {
+ sequence_names.push_back(s->name());
+ }
+ // write the graph, with consensus as a path if requested
+ graph->print_gfa(std::cout, sequence_names, write_gfa_with_consensus);
+ } else if (result == 0) {
+ std::string consensus = graph->generate_consensus();
+ std::cout << ">Consensus LN:i:" << consensus.size() << std::endl
+ << consensus << std::endl;
+ } else {
std::vector<std::string> msa;
- graph->generate_multiple_sequence_alignment(msa);
- std::cout << "Multiple sequence alignment" << std::endl;
- for (const auto& it: msa) {
- std::cout << it << std::endl;
+ graph->generate_multiple_sequence_alignment(msa, result == 2);
+ for (std::uint32_t i = 0; i < msa.size(); ++i) {
+ std::cout << ">" << (i < sequences.size() ? sequences[i]->name() : "Consensus") << std::endl
+ << msa[i] << std::endl;
}
}
@@ -179,6 +193,10 @@ void help() {
" 0 - consensus\n"
" 1 - multiple sequence alignment\n"
" 2 - 0 & 1\n"
+ " -G, --gfa\n"
+ " write GFA on stdout\n"
+ " -C, --gfa-with-consensus\n"
+ " write GFA with consensus on stdout\n"
" -d, --dot <file>\n"
" output file for the final POA graph in DOT format\n"
" --version\n"
=====================================
src/simd_alignment_engine.hpp
=====================================
@@ -1,7 +1,7 @@
/*!
* @file simd_alignment_engine.hpp
*
- * @brief SimdAlignmentEngine class header file
+ * @brief SimdAlignmentEngine class template definition file
*/
#pragma once
@@ -12,16 +12,29 @@
#include <vector>
#include "spoa/alignment_engine.hpp"
+#include "spoa/architectures.hpp"
namespace spoa {
class Graph;
+template<Arch S>
class SimdAlignmentEngine;
+
std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
std::int8_t e, std::int8_t q, std::int8_t c);
+
+
+template<Arch S>
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
+ AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+ std::int8_t e, std::int8_t q, std::int8_t c);
+
+
+
+template<Arch S>
class SimdAlignmentEngine: public AlignmentEngine {
public:
~SimdAlignmentEngine();
@@ -32,7 +45,7 @@ public:
Alignment align(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept override;
- friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(
+ friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<S>(
AlignmentType type, AlignmentSubtype subtype, std::int8_t m,
std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q,
std::int8_t c);
=====================================
src/simd_alignment_engine_dispatch.cpp
=====================================
@@ -0,0 +1,28 @@
+/*!
+ * @file simd_alignment_engine_dispatch.cpp
+ *
+ * @brief Instantiation of different SIMD engines
+ */
+
+ #include "simd_alignment_engine_impl.hpp"
+
+ #if defined(__AVX2__)
+ #define ARCH Arch::avx2
+ #elif defined (__SSE4_1__)
+ #define ARCH Arch::sse4_1
+ #else
+ #define ARCH Arch::sse2
+ #endif
+
+
+namespace spoa{
+
+template class SimdAlignmentEngine<ARCH>;
+
+template
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<ARCH>(AlignmentType type,
+ AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+ std::int8_t e, std::int8_t q, std::int8_t c);
+
+}
+
=====================================
src/simd_alignment_engine.cpp → src/simd_alignment_engine_impl.hpp
=====================================
@@ -1,7 +1,7 @@
/*!
- * @file simd_alignment_engine.cpp
+ * @file simd_alignment_engine_impl.hpp
*
- * @brief SimdAlignmentEngine class source file
+ * @brief SimdAlignmentEngine class template implementation file
*/
#include <iostream>
@@ -9,7 +9,16 @@
#include <limits>
extern "C" {
+ #ifdef USE_SIMDE
+ #ifdef __AVX2__
+ #include <simde/x86/avx2.h>
+ #else
+ #include <simde/x86/sse4.1.h> // SSE4.1 is covered better
+ #endif
+
+ #elif defined(__AVX2__) || defined(__SSE4_1__)
#include <immintrin.h> // AVX2 and lower
+ #endif
}
#include "spoa/graph.hpp"
@@ -17,6 +26,7 @@ extern "C" {
namespace spoa {
+
// Taken from https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=216149
inline void* align(std::size_t __align, std::size_t __size, void*& __ptr,
std::size_t& __space) noexcept {
@@ -32,7 +42,8 @@ inline void* align(std::size_t __align, std::size_t __size, void*& __ptr,
}
}
-template<typename T>
+// TODO: what to do with this??
+template<Arch S,typename T>
T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignment) {
*storage = new T[size + alignment - 1];
void* ptr = static_cast<void*>(*storage);
@@ -40,7 +51,7 @@ T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignmen
return static_cast<T*>(align(alignment, size * sizeof(T), ptr, storage_size));
}
-template<typename T>
+template<Arch S,typename T>
struct InstructionSet;
#if defined(__AVX2__)
@@ -69,8 +80,8 @@ inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
_mm256_srli_si256(_mm256_permute2x128_si256(a, a, \
_MM_SHUFFLE(2, 0, 0, 1)), n - 16)
-template<>
-struct InstructionSet<std::int16_t> {
+template<Arch S>
+struct InstructionSet<S,std::int16_t> {
using type = std::int16_t;
static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
static constexpr std::uint32_t kLogNumVar = 4;
@@ -105,8 +116,8 @@ struct InstructionSet<std::int16_t> {
}
};
-template<>
-struct InstructionSet<std::int32_t> {
+template<Arch S>
+struct InstructionSet<S,std::int32_t> {
using type = std::int32_t;
static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
static constexpr std::uint32_t kLogNumVar = 3;
@@ -139,7 +150,7 @@ struct InstructionSet<std::int32_t> {
}
};
-#elif defined(__SSE4_1__)
+#elif defined(__SSE4_1__) || defined(USE_SIMDE)
constexpr std::uint32_t kRegisterSize = 128;
using __mxxxi = __m128i;
@@ -162,8 +173,8 @@ inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
#define _mmxxx_srli_si(a, n) \
_mm_srli_si128(a, n)
-template<>
-struct InstructionSet<std::int16_t> {
+template<Arch S>
+struct InstructionSet<S,std::int16_t> {
using type = std::int16_t;
static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
static constexpr std::uint32_t kLogNumVar = 3;
@@ -196,8 +207,8 @@ struct InstructionSet<std::int16_t> {
}
};
-template<>
-struct InstructionSet<std::int32_t> {
+template<Arch S>
+struct InstructionSet<S,std::int32_t> {
using type = std::int32_t;
static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
static constexpr std::uint32_t kLogNumVar = 2;
@@ -230,9 +241,9 @@ struct InstructionSet<std::int32_t> {
#endif
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
-template<typename T>
+template<Arch S,typename T>
void _mmxxx_print(const __mxxxi& a) {
__attribute__((aligned(kRegisterSize / 8))) typename T::type
@@ -244,7 +255,7 @@ void _mmxxx_print(const __mxxxi& a) {
}
}
-template<typename T>
+template<Arch S,typename T>
typename T::type _mmxxx_max_value(const __mxxxi& a) {
typename T::type max_score = 0;
@@ -259,7 +270,7 @@ typename T::type _mmxxx_max_value(const __mxxxi& a) {
return max_score;
}
-template<typename T>
+template<Arch S, typename T>
typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) {
__attribute__((aligned(kRegisterSize / 8))) typename T::type
@@ -269,7 +280,7 @@ typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) {
return unpacked[i];
}
-template<typename T>
+template<Arch S, typename T>
std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width,
typename T::type value) {
@@ -290,13 +301,14 @@ std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width,
#endif
+template<Arch S>
std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
std::int8_t e, std::int8_t q, std::int8_t c) {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
- return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine(type,
+ return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine<S>(type,
subtype, m, n, g, e, q, c));
#else
@@ -306,9 +318,11 @@ std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
#endif
}
-struct SimdAlignmentEngine::Implementation {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+template<Arch S>
+struct SimdAlignmentEngine<S>::Implementation {
+
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::vector<std::uint32_t> node_id_to_rank;
@@ -345,42 +359,46 @@ struct SimdAlignmentEngine::Implementation {
#endif
};
-SimdAlignmentEngine::SimdAlignmentEngine(AlignmentType type,
+template<Arch S>
+SimdAlignmentEngine<S>::SimdAlignmentEngine(AlignmentType type,
AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
std::int8_t e, std::int8_t q, std::int8_t c)
: AlignmentEngine(type, subtype, m, n, g, e, q, c),
pimpl_(new Implementation()) {
}
-SimdAlignmentEngine::~SimdAlignmentEngine() {
+template<Arch S>
+SimdAlignmentEngine<S>::~SimdAlignmentEngine() {
}
-void SimdAlignmentEngine::prealloc(std::uint32_t max_sequence_size,
+template<Arch S>
+void SimdAlignmentEngine<S>::prealloc(std::uint32_t max_sequence_size,
std::uint32_t alphabet_size) {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::uint32_t longest_path = max_sequence_size * (alphabet_size + 1) + 1 +
- InstructionSet<std::int16_t>::kNumVar;
+ InstructionSet<S,std::int16_t>::kNumVar;
std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)),
std::max(abs(g_), abs(q_)));
if (max_penalty * longest_path < std::numeric_limits<std::int16_t>::max()) {
- realloc((max_sequence_size / InstructionSet<std::int16_t>::kNumVar) + 1,
+ realloc((max_sequence_size / InstructionSet<S,std::int16_t>::kNumVar) + 1,
alphabet_size * max_sequence_size, alphabet_size);
} else {
- realloc((max_sequence_size / InstructionSet<std::int32_t>::kNumVar) + 1,
+ realloc((max_sequence_size / InstructionSet<S,std::int32_t>::kNumVar) + 1,
alphabet_size * max_sequence_size, alphabet_size);
}
#endif
}
-void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
+template<Arch S>
+void SimdAlignmentEngine<S>::realloc(std::uint32_t matrix_width,
std::uint32_t matrix_height, std::uint32_t num_codes) {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
if (pimpl_->node_id_to_rank.size() < matrix_height - 1) {
pimpl_->node_id_to_rank.resize(matrix_height - 1, 0);
@@ -388,7 +406,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
if (pimpl_->sequence_profile_size < num_codes * matrix_width) {
__mxxxi* storage = nullptr;
pimpl_->sequence_profile_size = num_codes * matrix_width;
- pimpl_->sequence_profile = allocateAlignedMemory(&storage,
+ pimpl_->sequence_profile = allocateAlignedMemory<S>(&storage,
pimpl_->sequence_profile_size, kRegisterSize / 8);
pimpl_->sequence_profile_storage.reset();
pimpl_->sequence_profile_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -400,7 +418,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
if (pimpl_->M_size < matrix_height * matrix_width) {
__mxxxi* storage = nullptr;
pimpl_->M_size = matrix_height * matrix_width;
- pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+ pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
kRegisterSize / 8);
pimpl_->M_storage.reset();
pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -412,7 +430,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
if (pimpl_->M_size < 3 * matrix_height * matrix_width) {
__mxxxi* storage = nullptr;
pimpl_->M_size = 3 * matrix_height * matrix_width;
- pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+ pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
kRegisterSize / 8);
pimpl_->F = pimpl_->H + matrix_height * matrix_width;
pimpl_->E = pimpl_->F + matrix_height * matrix_width;
@@ -426,7 +444,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
if (pimpl_->M_size < 5 * matrix_height * matrix_width) {
__mxxxi* storage = nullptr;
pimpl_->M_size = 5 * matrix_height * matrix_width;
- pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+ pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
kRegisterSize / 8);
pimpl_->F = pimpl_->H + matrix_height * matrix_width;
pimpl_->E = pimpl_->F + matrix_height * matrix_width;
@@ -436,18 +454,18 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage);
}
}
- if (pimpl_->masks_size < InstructionSet<std::int16_t>::kLogNumVar + 1) {
+ if (pimpl_->masks_size < InstructionSet<S,std::int16_t>::kLogNumVar + 1) {
__mxxxi* storage = nullptr;
- pimpl_->masks_size = InstructionSet<std::int16_t>::kLogNumVar + 1;
- pimpl_->masks = allocateAlignedMemory(&storage,
+ pimpl_->masks_size = InstructionSet<S,std::int16_t>::kLogNumVar + 1;
+ pimpl_->masks = allocateAlignedMemory<S>(&storage,
pimpl_->masks_size, kRegisterSize / 8);
pimpl_->masks_storage.reset();
pimpl_->masks_storage = std::unique_ptr<__mxxxi[]>(storage);
}
- if (pimpl_->penalties_size < 2 * InstructionSet<std::int16_t>::kLogNumVar) {
+ if (pimpl_->penalties_size < 2 * InstructionSet<S,std::int16_t>::kLogNumVar) {
__mxxxi* storage = nullptr;
- pimpl_->penalties_size = 2 * InstructionSet<std::int16_t>::kLogNumVar;
- pimpl_->penalties = allocateAlignedMemory(&storage,
+ pimpl_->penalties_size = 2 * InstructionSet<S,std::int16_t>::kLogNumVar;
+ pimpl_->penalties = allocateAlignedMemory<S>(&storage,
pimpl_->penalties_size, kRegisterSize / 8);
pimpl_->penalties_storage.reset();
pimpl_->penalties_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -456,12 +474,13 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
#endif
}
+template<Arch S>
template<typename T>
-void SimdAlignmentEngine::initialize(const char* sequence,
+void SimdAlignmentEngine<S>::initialize(const char* sequence,
const std::unique_ptr<Graph>& graph, std::uint32_t normal_matrix_width,
std::uint32_t matrix_width, std::uint32_t matrix_height) noexcept {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::int32_t padding_penatly = -1 * std::max(std::max(abs(m_), abs(n_)),
std::max(abs(g_), abs(q_)));
@@ -646,35 +665,36 @@ void SimdAlignmentEngine::initialize(const char* sequence,
#endif
}
-Alignment SimdAlignmentEngine::align(const char* sequence,
+template<Arch S>
+Alignment SimdAlignmentEngine<S>::align(const char* sequence,
std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
if (graph->nodes().empty() || sequence_size == 0) {
return Alignment();
}
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::uint32_t longest_path = graph->nodes().size() + 1 + sequence_size +
- InstructionSet<std::int16_t>::kNumVar;
+ InstructionSet<S,std::int16_t>::kNumVar;
std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)), abs(g_));
if (max_penalty * longest_path < std::numeric_limits<std::int16_t>::max()) {
if (subtype_ == AlignmentSubtype::kLinear) {
- return linear<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+ return linear<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
} else if (subtype_ == AlignmentSubtype::kAffine) {
- return affine<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+ return affine<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
} else if (subtype_ == AlignmentSubtype::kConvex) {
- return convex<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+ return convex<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
}
} else {
if (subtype_ == AlignmentSubtype::kLinear) {
- return linear<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+ return linear<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
} else if (subtype_ == AlignmentSubtype::kAffine) {
- return affine<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+ return affine<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
} else if (subtype_ == AlignmentSubtype::kConvex) {
- return convex<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+ return convex<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
}
}
@@ -687,11 +707,12 @@ Alignment SimdAlignmentEngine::align(const char* sequence,
#endif
}
+template<Arch S>
template<typename T>
-Alignment SimdAlignmentEngine::linear(const char* sequence,
+Alignment SimdAlignmentEngine<S>::linear(const char* sequence,
std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::uint32_t normal_matrix_width = sequence_size;
std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -805,7 +826,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
}
if (type_ == AlignmentType::kSW) {
- std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+ std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
if (max_score < max_row_score) {
max_score = max_row_score;
max_i = i;
@@ -813,7 +834,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
} else if (type_ == AlignmentType::kOV) {
if (node->out_edges().empty()) {
- std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+ std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
if (max_score < max_row_score) {
max_score = max_row_score;
max_i = i;
@@ -822,7 +843,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
} else if (type_ == AlignmentType::kNW) {
if (node->out_edges().empty()) {
- std::int32_t max_row_score = _mmxxx_value_at<T>(
+ std::int32_t max_row_score = _mmxxx_value_at<S,T>(
H_row[matrix_width - 1], last_column_id);
if (max_score < max_row_score) {
max_score = max_row_score;
@@ -837,12 +858,12 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
}
if (type_ == AlignmentType::kSW) {
- max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+ max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
matrix_width, max_score);
} else if (type_ == AlignmentType::kOV) {
if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
- max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+ max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
matrix_width, max_score);
} else {
max_j = normal_matrix_width - 1;
@@ -860,7 +881,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
}
typename T::type* backtrack_storage = nullptr;
- typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+ typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
3 * T::kNumVar + 2 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
typename T::type* H_pred = H + T::kNumVar;
typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
@@ -1034,11 +1055,12 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
#endif
}
+template<Arch S>
template<typename T>
-Alignment SimdAlignmentEngine::affine(const char* sequence,
+Alignment SimdAlignmentEngine<S>::affine(const char* sequence,
std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::uint32_t normal_matrix_width = sequence_size;
std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -1160,7 +1182,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
}
if (type_ == AlignmentType::kSW) {
- std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+ std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
if (max_score < max_row_score) {
max_score = max_row_score;
max_i = i;
@@ -1168,7 +1190,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
} else if (type_ == AlignmentType::kOV) {
if (node->out_edges().empty()) {
- std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+ std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
if (max_score < max_row_score) {
max_score = max_row_score;
max_i = i;
@@ -1177,7 +1199,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
} else if (type_ == AlignmentType::kNW) {
if (node->out_edges().empty()) {
- std::int32_t max_row_score = _mmxxx_value_at<T>(
+ std::int32_t max_row_score = _mmxxx_value_at<S,T>(
H_row[matrix_width - 1], last_column_id);
if (max_score < max_row_score) {
max_score = max_row_score;
@@ -1192,12 +1214,12 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
}
if (type_ == AlignmentType::kSW) {
- max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+ max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
matrix_width, max_score);
} else if (type_ == AlignmentType::kOV) {
if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
- max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+ max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
matrix_width, max_score);
} else {
max_j = normal_matrix_width - 1;
@@ -1215,7 +1237,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
}
typename T::type* backtrack_storage = nullptr;
- typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+ typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
6 * T::kNumVar + 3 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
typename T::type* H_pred = H + T::kNumVar;
typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
@@ -1464,11 +1486,12 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
#endif
}
+template<Arch S>
template<typename T>
-Alignment SimdAlignmentEngine::convex(const char* sequence,
+Alignment SimdAlignmentEngine<S>::convex(const char* sequence,
std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
std::uint32_t normal_matrix_width = sequence_size;
std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -1625,7 +1648,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
}
if (type_ == AlignmentType::kSW) {
- std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+ std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
if (max_score < max_row_score) {
max_score = max_row_score;
max_i = i;
@@ -1633,7 +1656,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
} else if (type_ == AlignmentType::kOV) {
if (node->out_edges().empty()) {
- std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+ std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
if (max_score < max_row_score) {
max_score = max_row_score;
max_i = i;
@@ -1642,7 +1665,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
} else if (type_ == AlignmentType::kNW) {
if (node->out_edges().empty()) {
- std::int32_t max_row_score = _mmxxx_value_at<T>(
+ std::int32_t max_row_score = _mmxxx_value_at<S,T>(
H_row[matrix_width - 1], last_column_id);
if (max_score < max_row_score) {
max_score = max_row_score;
@@ -1657,12 +1680,12 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
}
if (type_ == AlignmentType::kSW) {
- max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+ max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
matrix_width, max_score);
} else if (type_ == AlignmentType::kOV) {
if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
- max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+ max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
matrix_width, max_score);
} else {
max_j = normal_matrix_width - 1;
@@ -1680,7 +1703,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
}
typename T::type* backtrack_storage = nullptr;
- typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+ typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
9 * T::kNumVar + 4 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
typename T::type* H_pred = H + T::kNumVar;
typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
View it on GitLab: https://salsa.debian.org/med-team/spoa/-/commit/72c8f2f31382264de7a10a9cd9651b7e3ebcc4c4
--
View it on GitLab: https://salsa.debian.org/med-team/spoa/-/commit/72c8f2f31382264de7a10a9cd9651b7e3ebcc4c4
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200805/b8ca3142/attachment-0001.html>
More information about the debian-med-commit
mailing list