[med-svn] [Git][med-team/spoa][upstream] New upstream version 3.4.0

Wed Aug 5 08:04:54 BST 2020


Andreas Tille pushed to branch upstream at Debian Med / spoa


Commits:
72c8f2f3 by Andreas Tille at 2020-08-04T19:42:06+02:00
New upstream version 3.4.0
- - - - -


12 changed files:

- .gitmodules
- CMakeLists.txt
- README.md
- + include/spoa/architectures.hpp
- include/spoa/graph.hpp
- src/alignment_engine.cpp
- + src/dispatcher.cpp
- src/graph.cpp
- src/main.cpp
- src/simd_alignment_engine.hpp
- + src/simd_alignment_engine_dispatch.cpp
- src/simd_alignment_engine.cpp → src/simd_alignment_engine_impl.hpp


Changes:

=====================================
.gitmodules
=====================================
@@ -4,3 +4,10 @@
 [submodule "vendor/googletest"]
 	path = vendor/googletest
 	url = https://github.com/google/googletest
+[submodule "vendor/simde"]
+	path = vendor/simde
+	url = https://github.com/nemequ/simde.git
+[submodule "vendor/cpu_features"]
+	path = vendor/cpu_features
+	url = https://github.com/mbrcic/cpu_features.git
+	branch = patch-4


=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.2)
-project(spoa LANGUAGES CXX VERSION 3.0.0)
+project(spoa LANGUAGES CXX VERSION 4.0.0)
 
 include(GNUInstallDirs)
 
@@ -16,31 +16,88 @@ option(spoa_build_executable "Build spoa standalone tool" OFF)
 option(spoa_build_tests "Build spoa unit tests" OFF)
 option(spoa_optimize_for_native "Build spoa with -march=native" ON)
 option(spoa_optimize_for_portability "Build spoa with -msse4.1" OFF)
+option(spoa_use_simde "Use SIMDe library for porting vectorized code" OFF)
+option(spoa_use_simde_nonvec "Use SIMDe library for nonvectorized code" OFF)
+option(spoa_use_simde_openmp "Use SIMDe support for OpenMP SIMD" OFF)
+option(spoa_generate_dispatch "Use SIMDe to generate x86 dispatch" OFF)
+
+if(NOT spoa_generate_dispatch) # optimization flags defeat the purpose of dispatching
+    if (spoa_optimize_for_portability)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
+    elseif (spoa_optimize_for_native)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+    endif()
+endif()
 
-if (spoa_optimize_for_portability)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
-elseif (spoa_optimize_for_native)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+if (spoa_use_simde OR spoa_use_simde_nonvec OR spoa_use_simde_openmp OR spoa_generate_dispatch)
+    add_definitions(-DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES)
+    if (spoa_use_simde_nonvec)
+        add_definitions(-DSIMDE_NO_NATIVE)
+    endif()
+    if (spoa_use_simde_openmp)
+        add_definitions(-DSIMDE_ENABLE_OPENMP)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
+    endif()
+    if (spoa_generate_dispatch)
+        add_definitions(-DGEN_DISPATCH)
+    endif()
 endif()
 
 # build SPOA as a static library by default
 set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
 
+list(APPEND INCLUDES
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/simde>)
+
+# generating in also a dispatcher that handles both dispatching and non-dispatching case
+
 add_library(spoa
     src/alignment_engine.cpp
     src/graph.cpp
-    src/simd_alignment_engine.cpp
-    src/sisd_alignment_engine.cpp)
+    src/sisd_alignment_engine.cpp
+    src/dispatcher.cpp)
 
 target_include_directories(spoa PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-    $<INSTALL_INTERFACE:include>)
+    ${INCLUDES}
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/cpu_features/include>)
 
 set_target_properties(spoa
     PROPERTIES
     VERSION ${spoa_VERSION}
     SOVERSION ${spoa_VERSION})
 
+# in dispatching case, generate different optimized versions
+
+if (spoa_generate_dispatch)
+
+if (NOT TARGET cpu_features)
+    add_subdirectory(vendor/cpu_features)
+endif()
+
+list(APPEND Archs avx2 sse4.1 sse2)
+
+foreach(arch IN LISTS Archs)
+  add_library(spoa_${arch} OBJECT src/simd_alignment_engine_dispatch.cpp)
+  target_include_directories(spoa_${arch} PUBLIC ${INCLUDES})
+  set_target_properties(spoa_${arch} PROPERTIES COMPILE_FLAGS "-m${arch}")
+  if (BUILD_SHARED_LIBS)
+    set_property(TARGET spoa_${arch}
+      PROPERTY POSITION_INDEPENDENT_CODE ON)
+  endif()
+endforeach()
+
+add_dependencies(spoa
+  spoa_avx2
+  spoa_sse4.1
+  spoa_sse2)
+
+target_link_libraries(spoa
+    cpu_features)
+
+endif()
+
 install(TARGETS spoa DESTINATION ${CMAKE_INSTALL_LIBDIR})
 install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/spoa DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 


=====================================
README.md
=====================================
@@ -27,7 +27,18 @@ cd build
 cmake -DCMAKE_BUILD_TYPE=Release ..
 make
 ```
-a library named `libspoa.a` will appear in the `build/lib` directory. If you want the spoa executable, run the following two commands:
+a library named `libspoa.a` will appear in the `build/lib` directory.
+
+Various options can be enabled while running `cmake`:
+
+- `spoa_optimize_for_native`: builds with `-march=native`
+- `spoa_optimize_for_portability`: builds with `-msse4.1`
+- `spoa_use_simde`: builds with SIMDe for porting vectorized code
+- `spoa_use_simde_nonvec`: uses SIMDe library for nonvectorized code
+- `spoa_use_simde_openmp`: uses SIMDe support for OpenMP SIMD
+- `spoa_generate_dispatch`: uses SIMDe to generate x86 dispatch
+
+If you want the spoa executable, run the following two commands:
 
 ```bash
 cmake -DCMAKE_BUILD_TYPE=Release -Dspoa_build_executable=ON ..
@@ -85,6 +96,10 @@ spoa [options ...] <sequences>
                 0 - consensus
                 1 - multiple sequence alignment
                 2 - 0 & 1
+        -G, --gfa
+            write GFA on stdout
+        -C, --gfa-with-consensus
+            write GFA with consensus on stdout
         -d, --dot <file>
             output file for the final POA graph in DOT format
         --version


=====================================
include/spoa/architectures.hpp
=====================================
@@ -0,0 +1,10 @@
+/*!
+ * @file architectures.hpp
+ *
+ * @brief Arch enum class header file
+ */
+namespace spoa {
+
+enum class Arch{avx2, sse4_1, sse2, automatic};
+
+}
\ No newline at end of file


=====================================
include/spoa/graph.hpp
=====================================
@@ -83,6 +83,8 @@ public:
 
     void print_dot(const std::string& path) const;
 
+    void print_gfa(std::ostream& out, const std::vector<std::string>& sequence_names, bool include_consensus = false) const;
+
     void clear();
 
     friend std::unique_ptr<Graph> createGraph();


=====================================
src/alignment_engine.cpp
=====================================
@@ -7,6 +7,7 @@
 #include <limits>
 #include <algorithm>
 #include <exception>
+#include <stdexcept>
 
 #include "sisd_alignment_engine.hpp"
 #include "simd_alignment_engine.hpp"


=====================================
src/dispatcher.cpp
=====================================
@@ -0,0 +1,62 @@
+/*!
+ * @file dispatcher.cpp
+ *
+ * @brief CPU dispatching mechanism that also covers non-dispatching case
+ */
+
+#include "simd_alignment_engine_impl.hpp"
+
+#ifdef GEN_DISPATCH
+
+#include "cpuinfo_x86.h"
+
+static const cpu_features::X86Features features = cpu_features::GetX86Info().features;
+
+#endif
+
+
+namespace spoa{
+
+#ifndef GEN_DISPATCH
+template class SimdAlignmentEngine<Arch::automatic>;
+
+template
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<Arch::automatic>(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c);
+#endif
+
+
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c) {
+
+#ifdef GEN_DISPATCH
+
+    if (features.avx2)
+    {
+        //std::cout<<"AVX2"<<std::endl;
+        return createSimdAlignmentEngine<Arch::avx2>(type,
+            subtype, m, n, g, e, q, c);
+    }
+    else if (features.sse4_1){
+
+        //std::cout<<"SSE4"<<std::endl;
+        return createSimdAlignmentEngine<Arch::sse4_1>(type,
+            subtype, m, n, g, e, q, c);
+    }
+    else {
+        //std::cout<<"SSE2"<<std::endl;
+        return createSimdAlignmentEngine<Arch::sse2>(type,
+            subtype, m, n, g, e, q, c);
+    }
+#else
+    return createSimdAlignmentEngine<Arch::automatic>(type,
+            subtype, m, n, g, e, q, c);
+#endif
+
+
+}
+
+}
+


=====================================
src/graph.cpp
=====================================
@@ -8,6 +8,7 @@
 #include <algorithm>
 #include <stack>
 #include <fstream>
+#include <stdexcept>
 
 #include "spoa/graph.hpp"
 
@@ -725,6 +726,57 @@ void Graph::print_dot(const std::string& path) const {
     out.close();
 }
 
+void Graph::print_gfa(std::ostream& out,
+                      const std::vector<std::string>& sequence_names,
+                      bool include_consensus) const {
+
+    std::vector<std::int32_t> in_consensus(nodes_.size(), -1);
+    std::int32_t rank = 0;
+    for (const auto& id: consensus_) {
+        in_consensus[id] = rank++;
+    }
+
+    out << "H" << "\t" << "VN:Z:1.0" << std::endl;
+
+    for (std::uint32_t i = 0; i < nodes_.size(); ++i) {
+        out << "S" << "\t" << i+1 << "\t" << static_cast<char>(decoder_[nodes_[i]->code_]);
+        if (in_consensus[i] != -1) {
+            out << "\t" << "ic:Z:true";
+        }
+        out << std::endl;
+        for (const auto& edge: nodes_[i]->out_edges_) {
+            out << "L" << "\t" << i+1 << "\t" << "+" << "\t" << edge->end_node_id_+1 << "\t" << "+" << "\t" << "0M" << "\t"
+                << "ew:f:" << edge->total_weight_;
+            if (in_consensus[i] + 1 == in_consensus[edge->end_node_id_]) {
+                out << "\t" << "ic:Z:true";
+            }
+            out << std::endl;
+        }
+    }
+
+    for (std::uint32_t i = 0; i < num_sequences_; ++i) {
+        out << "P" << "\t" << sequence_names[i] << "\t";
+        std::uint32_t node_id = sequences_begin_nodes_ids_[i];
+        while (true) {
+            out << node_id+1 << "+";
+            if (!nodes_[node_id]->successor(node_id, i)) {
+                break;
+            } else {
+                out << ",";
+            }
+        }
+        out << "\t" << "*" << std::endl;
+    }
+
+    if (include_consensus) {
+        out << "P" << "\t" << "Consensus" << "\t";
+        for (const auto& id: consensus_) {
+            out << id+1 << "+";
+        }
+        out << "\t" << "*" << std::endl;
+    }
+}
+
 void Graph::clear() {
     num_codes_ = 0;
     num_sequences_ = 0;


=====================================
src/main.cpp
=====================================
@@ -10,12 +10,14 @@
 #include "spoa/spoa.hpp"
 #include "bioparser/bioparser.hpp"
 
-static const std::string version = "v3.0.2";
+static const std::string version = "v3.4.0";
 
 static struct option options[] = {
     {"algorithm", required_argument, nullptr, 'l'},
     {"result", required_argument, nullptr, 'r'},
     {"dot", required_argument, nullptr, 'd'},
+    {"gfa", required_argument, nullptr, 'G'},
+    {"gfa-with-consensus", no_argument, nullptr, 'C'},
     {"version", no_argument, nullptr, 'v'},
     {"help", no_argument, nullptr, 'h'},
     {nullptr, 0, nullptr, 0}
@@ -36,9 +38,11 @@ int main(int argc, char** argv) {
     std::uint8_t result = 0;
 
     std::string dot_path = "";
+    bool write_gfa = false;
+    bool write_gfa_with_consensus = false;
 
-    char opt;
-    while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:h", options, nullptr)) != -1) {
+    int opt;
+    while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:GCh", options, nullptr)) != -1) {
         switch (opt) {
             case 'm': m = atoi(optarg); break;
             case 'n': n = atoi(optarg); break;
@@ -49,6 +53,8 @@ int main(int argc, char** argv) {
             case 'l': algorithm = atoi(optarg); break;
             case 'r': result = atoi(optarg); break;
             case 'd': dot_path = optarg; break;
+            case 'G': write_gfa = true; break;
+            case 'C': write_gfa = true; write_gfa_with_consensus = true; break;
             case 'v': std::cout << version << std::endl; return 0;
             case 'h': help(); return 0;
             default: return 1;
@@ -118,18 +124,26 @@ int main(int argc, char** argv) {
         }
     }
 
-    if (result == 0 || result == 2) {
+    if (write_gfa) {
+        // force consensus genertion for graph annotation
         std::string consensus = graph->generate_consensus();
-        std::cout << "Consensus (" << consensus.size() << ")" << std::endl;
-        std::cout << consensus << std::endl;
-    }
-
-    if (result == 1 || result == 2) {
+        // save sequence names for graph path labeling
+        std::vector<std::string> sequence_names;
+        for (auto& s : sequences) {
+            sequence_names.push_back(s->name());
+        }
+        // write the graph, with consensus as a path if requested
+        graph->print_gfa(std::cout, sequence_names, write_gfa_with_consensus);
+    } else if (result == 0) {
+        std::string consensus = graph->generate_consensus();
+        std::cout << ">Consensus LN:i:" << consensus.size() << std::endl
+                  << consensus << std::endl;
+    } else {
         std::vector<std::string> msa;
-        graph->generate_multiple_sequence_alignment(msa);
-        std::cout << "Multiple sequence alignment" << std::endl;
-        for (const auto& it: msa) {
-            std::cout << it << std::endl;
+        graph->generate_multiple_sequence_alignment(msa, result == 2);
+        for (std::uint32_t i = 0; i < msa.size(); ++i) {
+            std::cout << ">" << (i < sequences.size() ? sequences[i]->name() : "Consensus") << std::endl
+                      << msa[i] << std::endl;
         }
     }
 
@@ -179,6 +193,10 @@ void help() {
         "                0 - consensus\n"
         "                1 - multiple sequence alignment\n"
         "                2 - 0 & 1\n"
+        "        -G, --gfa\n"
+        "            write GFA on stdout\n"
+        "        -C, --gfa-with-consensus\n"
+        "            write GFA with consensus on stdout\n"
         "        -d, --dot <file>\n"
         "            output file for the final POA graph in DOT format\n"
         "        --version\n"


=====================================
src/simd_alignment_engine.hpp
=====================================
@@ -1,7 +1,7 @@
 /*!
  * @file simd_alignment_engine.hpp
  *
- * @brief SimdAlignmentEngine class header file
+ * @brief SimdAlignmentEngine class template definition file
  */
 
 #pragma once
@@ -12,16 +12,29 @@
 #include <vector>
 
 #include "spoa/alignment_engine.hpp"
+#include "spoa/architectures.hpp"
 
 namespace spoa {
 
 class Graph;
 
+template<Arch S> 
 class SimdAlignmentEngine;
+
 std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
     std::int8_t e, std::int8_t q, std::int8_t c);
 
+
+
+template<Arch S>
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c);
+
+
+
+template<Arch S> 
 class SimdAlignmentEngine: public AlignmentEngine {
 public:
     ~SimdAlignmentEngine();
@@ -32,7 +45,7 @@ public:
     Alignment align(const char* sequence, std::uint32_t sequence_size,
         const std::unique_ptr<Graph>& graph) noexcept override;
 
-    friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(
+    friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<S>(
         AlignmentType type, AlignmentSubtype subtype, std::int8_t m,
         std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q,
         std::int8_t c);


=====================================
src/simd_alignment_engine_dispatch.cpp
=====================================
@@ -0,0 +1,28 @@
+/*!
+ * @file simd_alignment_engine_dispatch.cpp
+ *
+ * @brief Instantiation of different SIMD engines
+ */
+
+ #include "simd_alignment_engine_impl.hpp"
+
+ #if defined(__AVX2__)
+ #define ARCH Arch::avx2
+ #elif defined (__SSE4_1__)
+ #define ARCH Arch::sse4_1
+ #else
+ #define ARCH Arch::sse2
+ #endif
+
+
+namespace spoa{
+
+template class SimdAlignmentEngine<ARCH>;
+
+template
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<ARCH>(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c);
+
+}
+


=====================================
src/simd_alignment_engine.cpp → src/simd_alignment_engine_impl.hpp
=====================================
@@ -1,7 +1,7 @@
 /*!
- * @file simd_alignment_engine.cpp
+ * @file simd_alignment_engine_impl.hpp
  *
- * @brief SimdAlignmentEngine class source file
+ * @brief SimdAlignmentEngine class template implementation file
  */
 
 #include <iostream>
@@ -9,7 +9,16 @@
 #include <limits>
 
 extern "C" {
+    #ifdef USE_SIMDE
+    #ifdef __AVX2__
+    #include <simde/x86/avx2.h>
+    #else
+    #include <simde/x86/sse4.1.h> // SSE4.1 is covered better
+    #endif
+
+    #elif defined(__AVX2__) || defined(__SSE4_1__)
     #include <immintrin.h> // AVX2 and lower
+    #endif
 }
 
 #include "spoa/graph.hpp"
@@ -17,6 +26,7 @@ extern "C" {
 
 namespace spoa {
 
+
 // Taken from https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=216149
 inline void* align(std::size_t __align, std::size_t __size, void*& __ptr,
     std::size_t& __space) noexcept {
@@ -32,7 +42,8 @@ inline void* align(std::size_t __align, std::size_t __size, void*& __ptr,
     }
 }
 
-template<typename T>
+// TODO: what to do with this??
+template<Arch S,typename T>
 T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignment) {
     *storage = new T[size + alignment - 1];
     void* ptr = static_cast<void*>(*storage);
@@ -40,7 +51,7 @@ T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignmen
     return static_cast<T*>(align(alignment, size * sizeof(T), ptr, storage_size));
 }
 
-template<typename T>
+template<Arch S,typename T>
 struct InstructionSet;
 
 #if defined(__AVX2__)
@@ -69,8 +80,8 @@ inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
     _mm256_srli_si256(_mm256_permute2x128_si256(a, a, \
         _MM_SHUFFLE(2, 0, 0, 1)), n - 16)
 
-template<>
-struct InstructionSet<std::int16_t> {
+template<Arch S>
+struct InstructionSet<S,std::int16_t> {
     using type = std::int16_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 4;
@@ -105,8 +116,8 @@ struct InstructionSet<std::int16_t> {
     }
 };
 
-template<>
-struct InstructionSet<std::int32_t> {
+template<Arch S>
+struct InstructionSet<S,std::int32_t> {
     using type = std::int32_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 3;
@@ -139,7 +150,7 @@ struct InstructionSet<std::int32_t> {
     }
 };
 
-#elif defined(__SSE4_1__)
+#elif defined(__SSE4_1__) || defined(USE_SIMDE)
 
 constexpr std::uint32_t kRegisterSize = 128;
 using __mxxxi = __m128i;
@@ -162,8 +173,8 @@ inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
 #define _mmxxx_srli_si(a, n) \
     _mm_srli_si128(a, n)
 
-template<>
-struct InstructionSet<std::int16_t> {
+template<Arch S>
+struct InstructionSet<S,std::int16_t> {
     using type = std::int16_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 3;
@@ -196,8 +207,8 @@ struct InstructionSet<std::int16_t> {
     }
 };
 
-template<>
-struct InstructionSet<std::int32_t> {
+template<Arch S>
+struct InstructionSet<S,std::int32_t> {
     using type = std::int32_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 2;
@@ -230,9 +241,9 @@ struct InstructionSet<std::int32_t> {
 
 #endif
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
-template<typename T>
+template<Arch S,typename T>
 void _mmxxx_print(const __mxxxi& a) {
 
     __attribute__((aligned(kRegisterSize / 8))) typename T::type
@@ -244,7 +255,7 @@ void _mmxxx_print(const __mxxxi& a) {
     }
 }
 
-template<typename T>
+template<Arch S,typename T>
 typename T::type _mmxxx_max_value(const __mxxxi& a) {
 
     typename T::type max_score = 0;
@@ -259,7 +270,7 @@ typename T::type _mmxxx_max_value(const __mxxxi& a) {
     return max_score;
 }
 
-template<typename T>
+template<Arch S, typename T>
 typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) {
 
     __attribute__((aligned(kRegisterSize / 8))) typename T::type
@@ -269,7 +280,7 @@ typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) {
     return unpacked[i];
 }
 
-template<typename T>
+template<Arch S, typename T>
 std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width,
     typename T::type value) {
 
@@ -290,13 +301,14 @@ std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width,
 
 #endif
 
+template<Arch S>
 std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
     std::int8_t e, std::int8_t q, std::int8_t c) {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
-    return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine(type,
+    return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine<S>(type,
         subtype, m, n, g, e, q, c));
 
 #else
@@ -306,9 +318,11 @@ std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
 #endif
 }
 
-struct SimdAlignmentEngine::Implementation {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+template<Arch S>
+struct SimdAlignmentEngine<S>::Implementation {
+
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::vector<std::uint32_t> node_id_to_rank;
 
@@ -345,42 +359,46 @@ struct SimdAlignmentEngine::Implementation {
 #endif
 };
 
-SimdAlignmentEngine::SimdAlignmentEngine(AlignmentType type,
+template<Arch S>
+SimdAlignmentEngine<S>::SimdAlignmentEngine(AlignmentType type,
     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
     std::int8_t e, std::int8_t q, std::int8_t c)
         : AlignmentEngine(type, subtype, m, n, g, e, q, c),
         pimpl_(new Implementation()) {
 }
 
-SimdAlignmentEngine::~SimdAlignmentEngine() {
+template<Arch S>
+SimdAlignmentEngine<S>::~SimdAlignmentEngine() {
 }
 
-void SimdAlignmentEngine::prealloc(std::uint32_t max_sequence_size,
+template<Arch S>
+void SimdAlignmentEngine<S>::prealloc(std::uint32_t max_sequence_size,
     std::uint32_t alphabet_size) {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t longest_path = max_sequence_size * (alphabet_size + 1) + 1 +
-        InstructionSet<std::int16_t>::kNumVar;
+        InstructionSet<S,std::int16_t>::kNumVar;
 
     std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)),
         std::max(abs(g_), abs(q_)));
 
     if (max_penalty * longest_path < std::numeric_limits<std::int16_t>::max()) {
-        realloc((max_sequence_size / InstructionSet<std::int16_t>::kNumVar) + 1,
+        realloc((max_sequence_size / InstructionSet<S,std::int16_t>::kNumVar) + 1,
             alphabet_size * max_sequence_size, alphabet_size);
     } else {
-        realloc((max_sequence_size / InstructionSet<std::int32_t>::kNumVar) + 1,
+        realloc((max_sequence_size / InstructionSet<S,std::int32_t>::kNumVar) + 1,
             alphabet_size * max_sequence_size, alphabet_size);
     }
 
 #endif
 }
 
-void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
+template<Arch S>
+void SimdAlignmentEngine<S>::realloc(std::uint32_t matrix_width,
     std::uint32_t matrix_height, std::uint32_t num_codes) {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     if (pimpl_->node_id_to_rank.size() < matrix_height - 1) {
         pimpl_->node_id_to_rank.resize(matrix_height - 1, 0);
@@ -388,7 +406,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
     if (pimpl_->sequence_profile_size < num_codes * matrix_width) {
         __mxxxi* storage = nullptr;
         pimpl_->sequence_profile_size = num_codes * matrix_width;
-        pimpl_->sequence_profile = allocateAlignedMemory(&storage,
+        pimpl_->sequence_profile = allocateAlignedMemory<S>(&storage,
             pimpl_->sequence_profile_size, kRegisterSize / 8);
         pimpl_->sequence_profile_storage.reset();
         pimpl_->sequence_profile_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -400,7 +418,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
         if (pimpl_->M_size < matrix_height * matrix_width) {
             __mxxxi* storage = nullptr;
             pimpl_->M_size = matrix_height * matrix_width;
-            pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+            pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
                 kRegisterSize / 8);
             pimpl_->M_storage.reset();
             pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -412,7 +430,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
         if (pimpl_->M_size < 3 * matrix_height * matrix_width) {
             __mxxxi* storage = nullptr;
             pimpl_->M_size = 3 * matrix_height * matrix_width;
-            pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+            pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
                 kRegisterSize / 8);
             pimpl_->F = pimpl_->H + matrix_height * matrix_width;
             pimpl_->E = pimpl_->F + matrix_height * matrix_width;
@@ -426,7 +444,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
         if (pimpl_->M_size < 5 * matrix_height * matrix_width) {
             __mxxxi* storage = nullptr;
             pimpl_->M_size = 5 * matrix_height * matrix_width;
-            pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+            pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
                 kRegisterSize / 8);
             pimpl_->F = pimpl_->H + matrix_height * matrix_width;
             pimpl_->E = pimpl_->F + matrix_height * matrix_width;
@@ -436,18 +454,18 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
             pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage);
         }
     }
-    if (pimpl_->masks_size < InstructionSet<std::int16_t>::kLogNumVar + 1) {
+    if (pimpl_->masks_size < InstructionSet<S,std::int16_t>::kLogNumVar + 1) {
         __mxxxi* storage = nullptr;
-        pimpl_->masks_size = InstructionSet<std::int16_t>::kLogNumVar + 1;
-        pimpl_->masks = allocateAlignedMemory(&storage,
+        pimpl_->masks_size = InstructionSet<S,std::int16_t>::kLogNumVar + 1;
+        pimpl_->masks = allocateAlignedMemory<S>(&storage,
             pimpl_->masks_size, kRegisterSize / 8);
         pimpl_->masks_storage.reset();
         pimpl_->masks_storage = std::unique_ptr<__mxxxi[]>(storage);
     }
-    if (pimpl_->penalties_size < 2 * InstructionSet<std::int16_t>::kLogNumVar) {
+    if (pimpl_->penalties_size < 2 * InstructionSet<S,std::int16_t>::kLogNumVar) {
         __mxxxi* storage = nullptr;
-        pimpl_->penalties_size = 2 * InstructionSet<std::int16_t>::kLogNumVar;
-        pimpl_->penalties = allocateAlignedMemory(&storage,
+        pimpl_->penalties_size = 2 * InstructionSet<S,std::int16_t>::kLogNumVar;
+        pimpl_->penalties = allocateAlignedMemory<S>(&storage,
             pimpl_->penalties_size, kRegisterSize / 8);
         pimpl_->penalties_storage.reset();
         pimpl_->penalties_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -456,12 +474,13 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-void SimdAlignmentEngine::initialize(const char* sequence,
+void SimdAlignmentEngine<S>::initialize(const char* sequence,
     const std::unique_ptr<Graph>& graph, std::uint32_t normal_matrix_width,
     std::uint32_t matrix_width, std::uint32_t matrix_height) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::int32_t padding_penatly = -1 * std::max(std::max(abs(m_), abs(n_)),
         std::max(abs(g_), abs(q_)));
@@ -646,35 +665,36 @@ void SimdAlignmentEngine::initialize(const char* sequence,
 #endif
 }
 
-Alignment SimdAlignmentEngine::align(const char* sequence,
+template<Arch S>
+Alignment SimdAlignmentEngine<S>::align(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
     if (graph->nodes().empty() || sequence_size == 0) {
         return Alignment();
     }
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t longest_path = graph->nodes().size() + 1 + sequence_size +
-        InstructionSet<std::int16_t>::kNumVar;
+        InstructionSet<S,std::int16_t>::kNumVar;
 
     std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)), abs(g_));
 
     if (max_penalty * longest_path < std::numeric_limits<std::int16_t>::max()) {
         if (subtype_ == AlignmentSubtype::kLinear) {
-            return linear<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+            return linear<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kAffine) {
-            return affine<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+            return affine<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kConvex) {
-            return convex<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+            return convex<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
         }
     } else {
         if (subtype_ == AlignmentSubtype::kLinear) {
-            return linear<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+            return linear<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kAffine) {
-            return affine<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+            return affine<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kConvex) {
-            return convex<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+            return convex<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
         }
     }
 
@@ -687,11 +707,12 @@ Alignment SimdAlignmentEngine::align(const char* sequence,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-Alignment SimdAlignmentEngine::linear(const char* sequence,
+Alignment SimdAlignmentEngine<S>::linear(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t normal_matrix_width = sequence_size;
     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -805,7 +826,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
         }
 
         if (type_ == AlignmentType::kSW) {
-            std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+            std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
             if (max_score < max_row_score) {
                 max_score = max_row_score;
                 max_i = i;
@@ -813,7 +834,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
 
         } else if (type_ == AlignmentType::kOV) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+                std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
                     max_i = i;
@@ -822,7 +843,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
 
         } else if (type_ == AlignmentType::kNW) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_value_at<T>(
+                std::int32_t max_row_score = _mmxxx_value_at<S,T>(
                     H_row[matrix_width - 1], last_column_id);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
@@ -837,12 +858,12 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
     }
 
     if (type_ == AlignmentType::kSW) {
-        max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+        max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
             matrix_width, max_score);
 
     } else if (type_ == AlignmentType::kOV) {
         if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
-            max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+            max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
                 matrix_width, max_score);
         } else {
             max_j = normal_matrix_width - 1;
@@ -860,7 +881,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
     }
 
     typename T::type* backtrack_storage = nullptr;
-    typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+    typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
         3 * T::kNumVar + 2 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
     typename T::type* H_pred = H + T::kNumVar;
     typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
@@ -1034,11 +1055,12 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-Alignment SimdAlignmentEngine::affine(const char* sequence,
+Alignment SimdAlignmentEngine<S>::affine(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t normal_matrix_width = sequence_size;
     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -1160,7 +1182,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
         }
 
         if (type_ == AlignmentType::kSW) {
-            std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+            std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
             if (max_score < max_row_score) {
                 max_score = max_row_score;
                 max_i = i;
@@ -1168,7 +1190,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
 
         } else if (type_ == AlignmentType::kOV) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+                std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
                     max_i = i;
@@ -1177,7 +1199,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
 
         } else if (type_ == AlignmentType::kNW) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_value_at<T>(
+                std::int32_t max_row_score = _mmxxx_value_at<S,T>(
                     H_row[matrix_width - 1], last_column_id);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
@@ -1192,12 +1214,12 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
     }
 
     if (type_ == AlignmentType::kSW) {
-        max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+        max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
             matrix_width, max_score);
 
     } else if (type_ == AlignmentType::kOV) {
         if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
-            max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+            max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
                 matrix_width, max_score);
         } else {
             max_j = normal_matrix_width - 1;
@@ -1215,7 +1237,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
     }
 
     typename T::type* backtrack_storage = nullptr;
-    typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+    typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
         6 * T::kNumVar + 3 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
     typename T::type* H_pred = H + T::kNumVar;
     typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
@@ -1464,11 +1486,12 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-Alignment SimdAlignmentEngine::convex(const char* sequence,
+Alignment SimdAlignmentEngine<S>::convex(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t normal_matrix_width = sequence_size;
     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -1625,7 +1648,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
         }
 
         if (type_ == AlignmentType::kSW) {
-            std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+            std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
             if (max_score < max_row_score) {
                 max_score = max_row_score;
                 max_i = i;
@@ -1633,7 +1656,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
 
         } else if (type_ == AlignmentType::kOV) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+                std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
                     max_i = i;
@@ -1642,7 +1665,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
 
         } else if (type_ == AlignmentType::kNW) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_value_at<T>(
+                std::int32_t max_row_score = _mmxxx_value_at<S,T>(
                     H_row[matrix_width - 1], last_column_id);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
@@ -1657,12 +1680,12 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
     }
 
     if (type_ == AlignmentType::kSW) {
-        max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+        max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
             matrix_width, max_score);
 
     } else if (type_ == AlignmentType::kOV) {
         if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
-            max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+            max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
                 matrix_width, max_score);
         } else {
             max_j = normal_matrix_width - 1;
@@ -1680,7 +1703,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
     }
 
     typename T::type* backtrack_storage = nullptr;
-    typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+    typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
         9 * T::kNumVar + 4 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
     typename T::type* H_pred = H + T::kNumVar;
     typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;



View it on GitLab: https://salsa.debian.org/med-team/spoa/-/commit/72c8f2f31382264de7a10a9cd9651b7e3ebcc4c4

-- 
View it on GitLab: https://salsa.debian.org/med-team/spoa/-/commit/72c8f2f31382264de7a10a9cd9651b7e3ebcc4c4
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200805/b8ca3142/attachment-0001.html>