[med-svn] [Git][med-team/spoa][master] 8 commits: New upstream version 3.4.0

Andreas Tille gitlab at salsa.debian.org
Wed Aug 5 08:04:48 BST 2020



Andreas Tille pushed to branch master at Debian Med / spoa


Commits:
72c8f2f3 by Andreas Tille at 2020-08-04T19:42:06+02:00
New upstream version 3.4.0
- - - - -
e667d3db by Andreas Tille at 2020-08-04T19:42:06+02:00
routine-update: New upstream version

- - - - -
ded914ed by Andreas Tille at 2020-08-04T19:42:06+02:00
Update upstream source from tag 'upstream/3.4.0'

Update to upstream version '3.4.0'
with Debian dir a42d5773791cefee8c68a32dd46c7a034d09c2a4
- - - - -
7a9d7fb8 by Andreas Tille at 2020-08-04T19:42:07+02:00
routine-update: debhelper-compat 13

- - - - -
9082d356 by Andreas Tille at 2020-08-04T22:55:53+02:00
Adapt patches to new upstream version

- - - - -
76a13d56 by Andreas Tille at 2020-08-05T08:18:40+02:00
New upstream version with new SOVERSION

- - - - -
2c18e828 by Andreas Tille at 2020-08-05T09:01:29+02:00
Adapt symbols file

- - - - -
14428594 by Andreas Tille at 2020-08-05T09:03:46+02:00
Upload to unstable

- - - - -


23 changed files:

- .gitmodules
- CMakeLists.txt
- README.md
- debian/changelog
- debian/control
- debian/libspoa3.symbols.amd64 → debian/libspoa4.0.0.symbols.amd64
- − debian/patches/fix-959880.patch
- debian/patches/fix-ftbfs-gcc-10.patch
- − debian/patches/fix_soversion.patch
- − debian/patches/getopt.patch
- debian/patches/series
- debian/patches/shared_and_static.patch
- − debian/patches/simde
- debian/patches/use_debian_packaged_libs.patch
- + include/spoa/architectures.hpp
- include/spoa/graph.hpp
- src/alignment_engine.cpp
- + src/dispatcher.cpp
- src/graph.cpp
- src/main.cpp
- src/simd_alignment_engine.hpp
- + src/simd_alignment_engine_dispatch.cpp
- src/simd_alignment_engine.cpp → src/simd_alignment_engine_impl.hpp


Changes:

=====================================
.gitmodules
=====================================
@@ -4,3 +4,10 @@
 [submodule "vendor/googletest"]
 	path = vendor/googletest
 	url = https://github.com/google/googletest
+[submodule "vendor/simde"]
+	path = vendor/simde
+	url = https://github.com/nemequ/simde.git
+[submodule "vendor/cpu_features"]
+	path = vendor/cpu_features
+	url = https://github.com/mbrcic/cpu_features.git
+	branch = patch-4


=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.2)
-project(spoa LANGUAGES CXX VERSION 3.0.0)
+project(spoa LANGUAGES CXX VERSION 4.0.0)
 
 include(GNUInstallDirs)
 
@@ -16,31 +16,88 @@ option(spoa_build_executable "Build spoa standalone tool" OFF)
 option(spoa_build_tests "Build spoa unit tests" OFF)
 option(spoa_optimize_for_native "Build spoa with -march=native" ON)
 option(spoa_optimize_for_portability "Build spoa with -msse4.1" OFF)
+option(spoa_use_simde "Use SIMDe library for porting vectorized code" OFF)
+option(spoa_use_simde_nonvec "Use SIMDe library for nonvectorized code" OFF)
+option(spoa_use_simde_openmp "Use SIMDe support for OpenMP SIMD" OFF)
+option(spoa_generate_dispatch "Use SIMDe to generate x86 dispatch" OFF)
+
+if(NOT spoa_generate_dispatch) # optimization flags defeat the purpose of dispatching
+    if (spoa_optimize_for_portability)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
+    elseif (spoa_optimize_for_native)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+    endif()
+endif()
 
-if (spoa_optimize_for_portability)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
-elseif (spoa_optimize_for_native)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
+if (spoa_use_simde OR spoa_use_simde_nonvec OR spoa_use_simde_openmp OR spoa_generate_dispatch)
+    add_definitions(-DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES)
+    if (spoa_use_simde_nonvec)
+        add_definitions(-DSIMDE_NO_NATIVE)
+    endif()
+    if (spoa_use_simde_openmp)
+        add_definitions(-DSIMDE_ENABLE_OPENMP)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp-simd")
+    endif()
+    if (spoa_generate_dispatch)
+        add_definitions(-DGEN_DISPATCH)
+    endif()
 endif()
 
 # build SPOA as a static library by default
 set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
 
+list(APPEND INCLUDES
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/simde>)
+
+# generating in also a dispatcher that handles both dispatching and non-dispatching case
+
 add_library(spoa
     src/alignment_engine.cpp
     src/graph.cpp
-    src/simd_alignment_engine.cpp
-    src/sisd_alignment_engine.cpp)
+    src/sisd_alignment_engine.cpp
+    src/dispatcher.cpp)
 
 target_include_directories(spoa PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-    $<INSTALL_INTERFACE:include>)
+    ${INCLUDES}
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/cpu_features/include>)
 
 set_target_properties(spoa
     PROPERTIES
     VERSION ${spoa_VERSION}
     SOVERSION ${spoa_VERSION})
 
+# in dispatching case, generate different optimized versions
+
+if (spoa_generate_dispatch)
+
+if (NOT TARGET cpu_features)
+    add_subdirectory(vendor/cpu_features)
+endif()
+
+list(APPEND Archs avx2 sse4.1 sse2)
+
+foreach(arch IN LISTS Archs)
+  add_library(spoa_${arch} OBJECT src/simd_alignment_engine_dispatch.cpp)
+  target_include_directories(spoa_${arch} PUBLIC ${INCLUDES})
+  set_target_properties(spoa_${arch} PROPERTIES COMPILE_FLAGS "-m${arch}")
+  if (BUILD_SHARED_LIBS)
+    set_property(TARGET spoa_${arch}
+      PROPERTY POSITION_INDEPENDENT_CODE ON)
+  endif()
+endforeach()
+
+add_dependencies(spoa
+  spoa_avx2
+  spoa_sse4.1
+  spoa_sse2)
+
+target_link_libraries(spoa
+    cpu_features)
+
+endif()
+
 install(TARGETS spoa DESTINATION ${CMAKE_INSTALL_LIBDIR})
 install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/spoa DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 


=====================================
README.md
=====================================
@@ -27,7 +27,18 @@ cd build
 cmake -DCMAKE_BUILD_TYPE=Release ..
 make
 ```
-a library named `libspoa.a` will appear in the `build/lib` directory. If you want the spoa executable, run the following two commands:
+a library named `libspoa.a` will appear in the `build/lib` directory.
+
+Various options can be enabled while running `cmake`:
+
+- `spoa_optimize_for_native`: builds with `-march=native`
+- `spoa_optimize_for_portability`: builds with `-msse4.1`
+- `spoa_use_simde`: builds with SIMDe for porting vectorized code
+- `spoa_use_simde_nonvec`: uses SIMDe library for nonvectorized code
+- `spoa_use_simde_openmp`: uses SIMDe support for OpenMP SIMD
+- `spoa_generate_dispatch`: uses SIMDe to generate x86 dispatch
+
+If you want the spoa executable, run the following two commands:
 
 ```bash
 cmake -DCMAKE_BUILD_TYPE=Release -Dspoa_build_executable=ON ..
@@ -85,6 +96,10 @@ spoa [options ...] <sequences>
                 0 - consensus
                 1 - multiple sequence alignment
                 2 - 0 & 1
+        -G, --gfa
+            write GFA on stdout
+        -C, --gfa-with-consensus
+            write GFA with consensus on stdout
         -d, --dot <file>
             output file for the final POA graph in DOT format
         --version


=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+spoa (3.4.0-1) unstable; urgency=medium
+
+  * New upstream version with new SOVERSION
+    Closes: #966920
+  * debhelper-compat 13 (routine-update)
+
+ -- Andreas Tille <tille at debian.org>  Wed, 05 Aug 2020 09:02:12 +0200
+
 spoa (3.0.2-5) unstable; urgency=medium
 
   * Team upload


=====================================
debian/control
=====================================
@@ -3,14 +3,14 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.
 Uploaders: Andreas Tille <tille at debian.org>
 Section: science
 Priority: optional
-Build-Depends: debhelper-compat (= 12),
+Build-Depends: debhelper-compat (= 13),
                cmake,
                d-shlibs,
                rename,
                libbioparser-dev (>= 2.0),
                libgtest-dev,
                zlib1g-dev,
-               libsimde-dev (>= 0.0.0.git.20200421)
+               libsimde-dev
 Standards-Version: 4.5.0
 Vcs-Browser: https://salsa.debian.org/med-team/spoa
 Vcs-Git: https://salsa.debian.org/med-team/spoa.git
@@ -29,7 +29,7 @@ Description: SIMD partial order alignment tool
  (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment
  (overlap).
 
-Package: libspoa3
+Package: libspoa4.0.0
 Architecture: any
 Multi-Arch: same
 Section: libs
@@ -51,7 +51,7 @@ Multi-Arch: same
 Section: libdevel
 Depends: ${shlibs:Depends},
          ${misc:Depends},
-         libspoa3 (= ${binary:Version})
+         libspoa4.0.0 (= ${binary:Version})
 Description: SIMD partial order alignment library (development files)
  Spoa (SIMD POA) is a c++ implementation of the partial order alignment
  (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which


=====================================
debian/libspoa3.symbols.amd64 → debian/libspoa4.0.0.symbols.amd64
=====================================
@@ -1,25 +1,17 @@
-libspoa.so.3 libspoa3 #MINVER#
+libspoa.so.4.0.0 libspoa4.0.0 #MINVER#
 * Build-Depends-Package: libspoa-dev
  _ZN4spoa11createGraphEv at Base 1.1.3
  _ZN4spoa15AlignmentEngine5alignERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt10unique_ptrINS_5GraphESt14default_deleteISA_EE at Base 3.0.0
  _ZN4spoa15AlignmentEngineC1ENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.0.0
  _ZN4spoa15AlignmentEngineC2ENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.0.0
- _ZN4spoa19SimdAlignmentEngine10initializeINS_14InstructionSetIiEEEEvPKcRKSt10unique_ptrINS_5GraphESt14default_deleteIS7_EEjjj at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine10initializeINS_14InstructionSetIsEEEEvPKcRKSt10unique_ptrINS_5GraphESt14default_deleteIS7_EEjjj at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine5alignEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteIS4_EE at Base 3.0.0
- _ZN4spoa19SimdAlignmentEngine6affineINS_14InstructionSetIiEEEESt6vectorISt4pairIiiESaIS6_EEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteISC_EE at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine6affineINS_14InstructionSetIsEEEESt6vectorISt4pairIiiESaIS6_EEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteISC_EE at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine6convexINS_14InstructionSetIiEEEESt6vectorISt4pairIiiESaIS6_EEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteISC_EE at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine6convexINS_14InstructionSetIsEEEESt6vectorISt4pairIiiESaIS6_EEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteISC_EE at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine6linearINS_14InstructionSetIiEEEESt6vectorISt4pairIiiESaIS6_EEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteISC_EE at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine6linearINS_14InstructionSetIsEEEESt6vectorISt4pairIiiESaIS6_EEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteISC_EE at Base 3.0.2
- _ZN4spoa19SimdAlignmentEngine7reallocEjjj at Base 1.1.3
- _ZN4spoa19SimdAlignmentEngine8preallocEjj at Base 1.1.3
- _ZN4spoa19SimdAlignmentEngineC1ENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.0.0
- _ZN4spoa19SimdAlignmentEngineC2ENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.0.0
- _ZN4spoa19SimdAlignmentEngineD0Ev at Base 1.1.3
- _ZN4spoa19SimdAlignmentEngineD1Ev at Base 1.1.3
- _ZN4spoa19SimdAlignmentEngineD2Ev at Base 1.1.3
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EE5alignEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteIS6_EE at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EE7reallocEjjj at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EE8preallocEjj at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EEC1ENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EEC2ENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EED0Ev at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EED1Ev at Base 3.4.0
+ _ZN4spoa19SimdAlignmentEngineILNS_4ArchE3EED2Ev at Base 3.4.0
  _ZN4spoa19SisdAlignmentEngine10initializeEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteIS4_EE at Base 1.1.3
  _ZN4spoa19SisdAlignmentEngine5alignEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteIS4_EE at Base 1.1.3
  _ZN4spoa19SisdAlignmentEngine6affineEPKcjRKSt10unique_ptrINS_5GraphESt14default_deleteIS4_EE at Base 3.0.0
@@ -36,6 +28,7 @@ libspoa.so.3 libspoa3 #MINVER#
  _ZN4spoa21createAlignmentEngineENS_13AlignmentTypeEaaaa at Base 3.0.0
  _ZN4spoa21createAlignmentEngineENS_13AlignmentTypeEaaaaaa at Base 3.0.0
  _ZN4spoa25createSimdAlignmentEngineENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.0.0
+ _ZN4spoa25createSimdAlignmentEngineILNS_4ArchE3EEESt10unique_ptrINS_15AlignmentEngineESt14default_deleteIS3_EENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.4.0
  _ZN4spoa25createSisdAlignmentEngineENS_13AlignmentTypeENS_16AlignmentSubtypeEaaaaaa at Base 3.0.0
  _ZN4spoa4Edge12add_sequenceEjj at Base 1.1.3
  _ZN4spoa4EdgeC1Ejjjj at Base 1.1.3
@@ -76,6 +69,7 @@ libspoa.so.3 libspoa3 #MINVER#
  _ZNK4spoa5Graph38initialize_multiple_sequence_alignmentERSt6vectorIjSaIjEE at Base 1.1.3
  _ZNK4spoa5Graph8subgraphEjjRSt6vectorIiSaIiEE at Base 1.1.3
  _ZNK4spoa5Graph9print_dotERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE at Base 1.1.5
+ _ZNK4spoa5Graph9print_gfaERSoRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS8_EEb at Base 3.4.0
  _ZNKSt5ctypeIcE8do_widenEc at Base 1.1.5
  _ZNSt10_HashtableIjjSaIjENSt8__detail9_IdentityESt8equal_toIjESt4hashIjENS1_18_Mod_range_hashingENS1_20_Default_ranged_hashENS1_20_Prime_rehash_policyENS1_17_Hashtable_traitsILb0ELb1ELb1EEEE9_M_rehashEmRKm at Base 3.0.0
  _ZNSt11_Deque_baseIjSaIjEE17_M_initialize_mapEm at Base 1.1.3
@@ -90,40 +84,35 @@ libspoa.so.3 libspoa3 #MINVER#
  _ZNSt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EE14_M_get_deleterERKSt9type_info at Base 1.1.3
  _ZNSt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EED0Ev at Base 1.1.3
  _ZNSt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EED1Ev at Base 1.1.3
- _ZNSt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EED2Ev at Base 1.1.3
  _ZNSt5dequeIjSaIjEE16_M_push_back_auxIJRKjEEEvDpOT_ at Base 1.1.3
  _ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EE12emplace_backIJRS5_EEEvDpOT_ at Base 1.1.3
  _ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EE17_M_realloc_insertIJRS5_EEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT_ at Base 1.1.3
  _ZNSt6vectorISt10shared_ptrIN4spoa4EdgeEESaIS3_EE17_M_realloc_insertIJRS3_EEEvN9__gnu_cxx17__normal_iteratorIPS3_S5_EEDpOT_ at Base 1.1.3
  _ZNSt6vectorISt10unique_ptrIN4spoa4NodeESt14default_deleteIS2_EESaIS5_EE17_M_realloc_insertIJS5_EEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT_ at Base 1.1.3
  _ZNSt6vectorISt4pairIiiESaIS1_EE12emplace_backIJRKjiEEEvDpOT_ at Base 3.0.0
- _ZNSt6vectorISt4pairIiiESaIS1_EE12emplace_backIJiRiEEEvDpOT_ at Base 3.0.2
  _ZNSt6vectorISt4pairIiiESaIS1_EE12emplace_backIJijEEEvDpOT_ at Base 3.0.0
- _ZNSt6vectorISt4pairIiiESaIS1_EE12emplace_backIJjiEEEvDpOT_ at Base 3.0.2
  _ZNSt6vectorISt4pairIiiESaIS1_EE12emplace_backIJjjEEEvDpOT_ at Base 3.0.0
  _ZNSt6vectorIbSaIbEE14_M_fill_insertESt13_Bit_iteratormb at Base 1.1.3
  _ZNSt6vectorIiSaIiEE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPiS1_EEmRKi at Base 1.1.3
  _ZNSt6vectorIjSaIjEE12emplace_backIJRKiEEEvDpOT_ at Base 1.1.3
  _ZNSt6vectorIjSaIjEE12emplace_backIJRiEEEvDpOT_ at Base 1.1.3
  _ZNSt6vectorIjSaIjEE12emplace_backIJRjEEEvDpOT_ at Base 1.1.3
- _ZNSt6vectorIjSaIjEE12emplace_backIJiEEEvDpOT_ at Base 3.0.2
- _ZNSt6vectorIjSaIjEE12emplace_backIJjEEEvDpOT_ at Base 3.0.2
  _ZNSt6vectorIjSaIjEE14_M_fill_insertEN9__gnu_cxx17__normal_iteratorIPjS1_EEmRKj at Base 1.1.3
  _ZNSt6vectorIjSaIjEE17_M_realloc_insertIJRKjEEEvN9__gnu_cxx17__normal_iteratorIPjS1_EEDpOT_ at Base 1.1.3
  _ZTIN4spoa15AlignmentEngineE at Base 1.1.3
- _ZTIN4spoa19SimdAlignmentEngineE at Base 1.1.3
+ _ZTIN4spoa19SimdAlignmentEngineILNS_4ArchE3EEE at Base 3.4.0
  _ZTIN4spoa19SisdAlignmentEngineE at Base 1.1.3
  _ZTISt11_Mutex_baseILN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3
  _ZTISt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3
  _ZTISt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3
  _ZTSN4spoa15AlignmentEngineE at Base 1.1.3
- _ZTSN4spoa19SimdAlignmentEngineE at Base 1.1.3
+ _ZTSN4spoa19SimdAlignmentEngineILNS_4ArchE3EEE at Base 3.4.0
  _ZTSN4spoa19SisdAlignmentEngineE at Base 1.1.3
  _ZTSSt11_Mutex_baseILN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3
  _ZTSSt14default_deleteIN4spoa4EdgeEE at Base 1.1.3
  _ZTSSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3
  _ZTSSt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3
  _ZTVN4spoa15AlignmentEngineE at Base 1.1.3
- _ZTVN4spoa19SimdAlignmentEngineE at Base 1.1.3
+ _ZTVN4spoa19SimdAlignmentEngineILNS_4ArchE3EEE at Base 3.4.0
  _ZTVN4spoa19SisdAlignmentEngineE at Base 1.1.3
  _ZTVSt19_Sp_counted_deleterIPN4spoa4EdgeESt14default_deleteIS1_ESaIvELN9__gnu_cxx12_Lock_policyE2EE at Base 1.1.3


=====================================
debian/patches/fix-959880.patch deleted
=====================================
@@ -1,25 +0,0 @@
-Description: Fix FTBFS #959880
-On ppc64el, altivec.h redefines bool, pixel and vector which can collide with
-c++ types.
-This altivec.h inclusion is done with the introduction of simde.
-As altivec.h explains, it's possible to undefine those for C++ compatibility in
-src/simd_alignment_engine.cpp which let it define the variables that other files
-will use while not impacting simde.
-Author: Frédéric Bonnard <frediz at debian.org>
----
-This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
---- a/src/simd_alignment_engine.cpp
-+++ b/src/simd_alignment_engine.cpp
-@@ -13,6 +13,12 @@
-     #include <simde/x86/avx2.h> // AVX2 and lower
- }
- 
-+#if defined(__VEC__) && defined(__ALTIVEC__) && !defined(__APPLE_ALTIVEC__)
-+#  undef vector
-+#  undef pixel
-+#  undef bool
-+#endif
-+
- #include "spoa/graph.hpp"
- #include "simd_alignment_engine.hpp"
- 


=====================================
debian/patches/fix-ftbfs-gcc-10.patch
=====================================
@@ -6,13 +6,13 @@ Applied-Upstream: https://github.com/rvaser/spoa/pull/28
 Last-Update: 2020-05-02
 ---
 This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
---- spoa.orig/src/alignment_engine.cpp
-+++ spoa/src/alignment_engine.cpp
+--- a/src/alignment_engine.cpp
++++ b/src/alignment_engine.cpp
 @@ -6,6 +6,7 @@
-
+ 
  #include <limits>
  #include <algorithm>
 +#include <stdexcept>
  #include <exception>
+ #include <stdexcept>
  
- #include "sisd_alignment_engine.hpp"


=====================================
debian/patches/fix_soversion.patch deleted
=====================================
@@ -1,15 +0,0 @@
-Note: Patch is not activated to not derive to much from upstream
-Description: Invent SOVERSION different from upstream version
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Mon, 28 Jan 2019 20:04:58 +0100
-Bug-Upstream: https://github.com/rvaser/spoa/issues/14
-
---- spoa.orig/CMakeLists.txt
-+++ spoa/CMakeLists.txt
-@@ -1,5 +1,5 @@
- cmake_minimum_required(VERSION 3.2)
--project(spoa LANGUAGES CXX VERSION 3.0.0)
-+project(spoa LANGUAGES CXX VERSION 3)
- 
- include(GNUInstallDirs)
- 


=====================================
debian/patches/getopt.patch deleted
=====================================
@@ -1,16 +0,0 @@
-Description: Fix getopt for architectures where char is unsigned
-Bug-Debian: https://bugs.debian.org/956809
-From: Adrian Bunk <bunk at debian.org>
-Date: Wed, 15 Apr 2020 15:58:08 +0300
-
---- a/src/main.cpp
-+++ b/src/main.cpp
-@@ -37,7 +37,7 @@ int main(int argc, char** argv) {
- 
-     std::string dot_path = "";
- 
--    char opt;
-+    int opt;
-     while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:h", options, nullptr)) != -1) {
-         switch (opt) {
-             case 'm': m = atoi(optarg); break;


=====================================
debian/patches/series
=====================================
@@ -1,7 +1,3 @@
 fix-ftbfs-gcc-10.patch
 use_debian_packaged_libs.patch
 shared_and_static.patch
-fix_soversion.patch
-simde
-getopt.patch
-fix-959880.patch


=====================================
debian/patches/shared_and_static.patch
=====================================
@@ -2,29 +2,35 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update:  Fri, 08 Jun 2018 13:20:51 +0200
 Description: Build shared and static lib
 
---- spoa.orig/CMakeLists.txt
-+++ spoa/CMakeLists.txt
-@@ -24,9 +24,15 @@
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -44,7 +44,7 @@ if (spoa_use_simde OR spoa_use_simde_non
  endif()
  
  # build SPOA as a static library by default
 -set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
 +# set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
  
+ list(APPEND INCLUDES
+     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+@@ -53,7 +53,13 @@ list(APPEND INCLUDES
+ 
+ # generating in also a dispatcher that handles both dispatching and non-dispatching case
+ 
 -add_library(spoa
 +add_library(spoa SHARED
 +    src/alignment_engine.cpp
 +    src/graph.cpp
-+    src/simd_alignment_engine.cpp
-+    src/sisd_alignment_engine.cpp)
++    src/sisd_alignment_engine.cpp
++    src/dispatcher.cpp)
 +
 +add_library(spoa_static STATIC
      src/alignment_engine.cpp
      src/graph.cpp
-     src/simd_alignment_engine.cpp
-@@ -36,12 +42,17 @@
-     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-     $<INSTALL_INTERFACE:include>)
+     src/sisd_alignment_engine.cpp
+@@ -63,6 +69,10 @@ target_include_directories(spoa PUBLIC
+     ${INCLUDES}
+     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/vendor/cpu_features/include>)
  
 +target_include_directories(spoa_static PUBLIC
 +    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
@@ -33,7 +39,8 @@ Description: Build shared and static lib
  set_target_properties(spoa
      PROPERTIES
      VERSION ${spoa_VERSION}
-     SOVERSION ${spoa_VERSION})
+@@ -99,6 +109,7 @@ target_link_libraries(spoa
+ endif()
  
  install(TARGETS spoa DESTINATION ${CMAKE_INSTALL_LIBDIR})
 +install(TARGETS spoa_static DESTINATION ${CMAKE_INSTALL_LIBDIR})


=====================================
debian/patches/simde deleted
=====================================
@@ -1,290 +0,0 @@
-From: Michael R. Crusoe <michael.crusoe at gmail.com>
-Subject: Enable building on non-x86_64
---- spoa.orig/src/simd_alignment_engine.cpp
-+++ spoa/src/simd_alignment_engine.cpp
-@@ -9,7 +9,8 @@
- #include <limits>
- 
- extern "C" {
--    #include <immintrin.h> // AVX2 and lower
-+    #define SIMDE_ENABLE_NATIVE_ALIASES
-+    #include <simde/x86/avx2.h> // AVX2 and lower
- }
- 
- #include "spoa/graph.hpp"
-@@ -43,8 +44,6 @@
- template<typename T>
- struct InstructionSet;
- 
--#if defined(__AVX2__)
--
- constexpr std::uint32_t kRegisterSize = 256;
- using __mxxxi = __m256i;
- 
-@@ -139,99 +138,6 @@
-     }
- };
- 
--#elif defined(__SSE4_1__)
--
--constexpr std::uint32_t kRegisterSize = 128;
--using __mxxxi = __m128i;
--
--inline __mxxxi _mmxxx_load_si(__mxxxi const* mem_addr) {
--    return _mm_load_si128(mem_addr);
--}
--
--inline void _mmxxx_store_si(__mxxxi* mem_addr, const __mxxxi& a) {
--    _mm_store_si128(mem_addr, a);
--}
--
--inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
--    return _mm_or_si128(a, b);
--}
--
--#define _mmxxx_slli_si(a, n) \
--    _mm_slli_si128(a, n)
--
--#define _mmxxx_srli_si(a, n) \
--    _mm_srli_si128(a, n)
--
--template<>
--struct InstructionSet<std::int16_t> {
--    using type = std::int16_t;
--    static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
--    static constexpr std::uint32_t kLogNumVar = 3;
--    static constexpr std::uint32_t kLSS = 2;
--    static constexpr std::uint32_t kRSS = 14;
--    static inline __mxxxi _mmxxx_add_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_add_epi16(a, b);
--    }
--    static inline __mxxxi _mmxxx_sub_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_sub_epi16(a, b);
--    }
--    static inline __mxxxi _mmxxx_min_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_min_epi16(a, b);
--    }
--    static inline __mxxxi _mmxxx_max_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_max_epi16(a, b);
--    }
--    static inline __mxxxi _mmxxx_set1_epi(type a) {
--        return _mm_set1_epi16(a);
--    }
--    static inline void _mmxxx_prefix_max(__mxxxi& a, const __mxxxi* masks,
--        const __mxxxi* penalties) {
--
--        a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[0], _mmxxx_slli_si(
--            _mmxxx_add_epi(a, penalties[0]), 2)));
--        a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[1], _mmxxx_slli_si(
--            _mmxxx_add_epi(a, penalties[1]), 4)));
--        a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[2], _mmxxx_slli_si(
--            _mmxxx_add_epi(a, penalties[2]), 8)));
--    }
--};
--
--template<>
--struct InstructionSet<std::int32_t> {
--    using type = std::int32_t;
--    static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
--    static constexpr std::uint32_t kLogNumVar = 2;
--    static constexpr std::uint32_t kLSS = 4;
--    static constexpr std::uint32_t kRSS = 12;
--    static inline __mxxxi _mmxxx_add_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_add_epi32(a, b);
--    }
--    static inline __mxxxi _mmxxx_sub_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_sub_epi32(a, b);
--    }
--    static inline __mxxxi _mmxxx_min_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_min_epi32(a, b);
--    }
--    static inline __mxxxi _mmxxx_max_epi(const __mxxxi& a, const __mxxxi& b) {
--        return _mm_max_epi32(a, b);
--    }
--    static inline __mxxxi _mmxxx_set1_epi(type a) {
--        return _mm_set1_epi32(a);
--    }
--    static inline void _mmxxx_prefix_max(__mxxxi& a, const __mxxxi* masks,
--        const __mxxxi* penalties) {
--
--        a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[0], _mmxxx_slli_si(
--            _mmxxx_add_epi(a, penalties[0]), 4)));
--        a = _mmxxx_max_epi(a, _mmxxx_or_si(masks[1], _mmxxx_slli_si(
--            _mmxxx_add_epi(a, penalties[1]), 8)));
--    }
--};
--
--#endif
--
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
- template<typename T>
- void _mmxxx_print(const __mxxxi& a) {
- 
-@@ -288,28 +194,16 @@
-     return -1;
- }
- 
--#endif
--
- std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
-     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
-     std::int8_t e, std::int8_t q, std::int8_t c) {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine(type,
-         subtype, m, n, g, e, q, c));
--
--#else
--
--    return nullptr;
--
--#endif
- }
- 
- struct SimdAlignmentEngine::Implementation {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::vector<std::uint32_t> node_id_to_rank;
- 
-     std::unique_ptr<__mxxxi[]> sequence_profile_storage;
-@@ -342,7 +236,6 @@
-             penalties(nullptr) {
-     }
- 
--#endif
- };
- 
- SimdAlignmentEngine::SimdAlignmentEngine(AlignmentType type,
-@@ -358,8 +251,6 @@
- void SimdAlignmentEngine::prealloc(std::uint32_t max_sequence_size,
-     std::uint32_t alphabet_size) {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::uint32_t longest_path = max_sequence_size * (alphabet_size + 1) + 1 +
-         InstructionSet<std::int16_t>::kNumVar;
- 
-@@ -374,14 +265,11 @@
-             alphabet_size * max_sequence_size, alphabet_size);
-     }
- 
--#endif
- }
- 
- void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
-     std::uint32_t matrix_height, std::uint32_t num_codes) {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     if (pimpl_->node_id_to_rank.size() < matrix_height - 1) {
-         pimpl_->node_id_to_rank.resize(matrix_height - 1, 0);
-     }
-@@ -453,7 +341,6 @@
-         pimpl_->penalties_storage = std::unique_ptr<__mxxxi[]>(storage);
-     }
- 
--#endif
- }
- 
- template<typename T>
-@@ -461,8 +348,6 @@
-     const std::unique_ptr<Graph>& graph, std::uint32_t normal_matrix_width,
-     std::uint32_t matrix_width, std::uint32_t matrix_height) noexcept {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::int32_t padding_penatly = -1 * std::max(std::max(abs(m_), abs(n_)),
-         std::max(abs(g_), abs(q_)));
- 
-@@ -643,7 +528,6 @@
-             break;
-     }
- 
--#endif
- }
- 
- Alignment SimdAlignmentEngine::align(const char* sequence,
-@@ -653,8 +537,6 @@
-         return Alignment();
-     }
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::uint32_t longest_path = graph->nodes().size() + 1 + sequence_size +
-         InstructionSet<std::int16_t>::kNumVar;
- 
-@@ -680,19 +562,12 @@
- 
-     return Alignment();
- 
--#else
--
--    return Alignment();
--
--#endif
- }
- 
- template<typename T>
- Alignment SimdAlignmentEngine::linear(const char* sequence,
-     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::uint32_t normal_matrix_width = sequence_size;
-     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
-         0 : T::kNumVar - sequence_size % T::kNumVar)) / T::kNumVar;
-@@ -1027,19 +902,12 @@
-     std::reverse(alignment.begin(), alignment.end());
-     return alignment;
- 
--#else
--
--    return Alignment();
--
--#endif
- }
- 
- template<typename T>
- Alignment SimdAlignmentEngine::affine(const char* sequence,
-     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::uint32_t normal_matrix_width = sequence_size;
-     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
-         0 : T::kNumVar - sequence_size % T::kNumVar)) / T::kNumVar;
-@@ -1457,19 +1325,12 @@
-     std::reverse(alignment.begin(), alignment.end());
-     return alignment;
- 
--#else
--
--    return Alignment();
--
--#endif
- }
- 
- template<typename T>
- Alignment SimdAlignmentEngine::convex(const char* sequence,
-     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
- 
--#if defined(__AVX2__) || defined(__SSE4_1__)
--
-     std::uint32_t normal_matrix_width = sequence_size;
-     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
-         0 : T::kNumVar - sequence_size % T::kNumVar)) / T::kNumVar;
-@@ -1962,11 +1823,6 @@
- 
-     std::reverse(alignment.begin(), alignment.end());
-     return alignment;
--#else
--
--    return Alignment();
--
--#endif
- }
- 
- }


=====================================
debian/patches/use_debian_packaged_libs.patch
=====================================
@@ -2,9 +2,9 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update:  Fri, 08 Jun 2018 13:20:51 +0200
 Description: Use Debian packaged libraries
 
---- spoa.orig/CMakeLists.txt
-+++ spoa/CMakeLists.txt
-@@ -53,11 +53,7 @@
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -110,11 +110,7 @@ if (spoa_build_executable)
          src/sequence.cpp
          src/main.cpp)
  
@@ -17,7 +17,7 @@ Description: Use Debian packaged libraries
      set_target_properties(spoa_bin PROPERTIES OUTPUT_NAME spoa)
  
      install(TARGETS spoa_bin DESTINATION ${CMAKE_INSTALL_BINDIR})
-@@ -74,12 +70,5 @@
+@@ -131,12 +127,5 @@ if (spoa_build_tests)
          src/sequence.cpp
          test/spoa_test.cpp)
  


=====================================
include/spoa/architectures.hpp
=====================================
@@ -0,0 +1,10 @@
+/*!
+ * @file architectures.hpp
+ *
+ * @brief Arch enum class header file
+ */
+namespace spoa {
+
+enum class Arch{avx2, sse4_1, sse2, automatic};
+
+}
\ No newline at end of file


=====================================
include/spoa/graph.hpp
=====================================
@@ -83,6 +83,8 @@ public:
 
     void print_dot(const std::string& path) const;
 
+    void print_gfa(std::ostream& out, const std::vector<std::string>& sequence_names, bool include_consensus = false) const;
+
     void clear();
 
     friend std::unique_ptr<Graph> createGraph();


=====================================
src/alignment_engine.cpp
=====================================
@@ -7,6 +7,7 @@
 #include <limits>
 #include <algorithm>
 #include <exception>
+#include <stdexcept>
 
 #include "sisd_alignment_engine.hpp"
 #include "simd_alignment_engine.hpp"


=====================================
src/dispatcher.cpp
=====================================
@@ -0,0 +1,62 @@
+/*!
+ * @file dispatcher.cpp
+ *
+ * @brief CPU dispatching mechanism that also covers non-dispatching case
+ */
+
+#include "simd_alignment_engine_impl.hpp"
+
+#ifdef GEN_DISPATCH
+
+#include "cpuinfo_x86.h"
+
+static const cpu_features::X86Features features = cpu_features::GetX86Info().features;
+
+#endif
+
+
+namespace spoa{
+
+#ifndef GEN_DISPATCH
+template class SimdAlignmentEngine<Arch::automatic>;
+
+template
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<Arch::automatic>(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c);
+#endif
+
+
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c) {
+
+#ifdef GEN_DISPATCH
+
+    if (features.avx2)
+    {
+        //std::cout<<"AVX2"<<std::endl;
+        return createSimdAlignmentEngine<Arch::avx2>(type,
+            subtype, m, n, g, e, q, c);
+    }
+    else if (features.sse4_1){
+
+        //std::cout<<"SSE4"<<std::endl;
+        return createSimdAlignmentEngine<Arch::sse4_1>(type,
+            subtype, m, n, g, e, q, c);
+    }
+    else {
+        //std::cout<<"SSE2"<<std::endl;
+        return createSimdAlignmentEngine<Arch::sse2>(type,
+            subtype, m, n, g, e, q, c);
+    }
+#else
+    return createSimdAlignmentEngine<Arch::automatic>(type,
+            subtype, m, n, g, e, q, c);
+#endif
+
+
+}
+
+}
+


=====================================
src/graph.cpp
=====================================
@@ -8,6 +8,7 @@
 #include <algorithm>
 #include <stack>
 #include <fstream>
+#include <stdexcept>
 
 #include "spoa/graph.hpp"
 
@@ -725,6 +726,57 @@ void Graph::print_dot(const std::string& path) const {
     out.close();
 }
 
+void Graph::print_gfa(std::ostream& out,
+                      const std::vector<std::string>& sequence_names,
+                      bool include_consensus) const {
+
+    std::vector<std::int32_t> in_consensus(nodes_.size(), -1);
+    std::int32_t rank = 0;
+    for (const auto& id: consensus_) {
+        in_consensus[id] = rank++;
+    }
+
+    out << "H" << "\t" << "VN:Z:1.0" << std::endl;
+
+    for (std::uint32_t i = 0; i < nodes_.size(); ++i) {
+        out << "S" << "\t" << i+1 << "\t" << static_cast<char>(decoder_[nodes_[i]->code_]);
+        if (in_consensus[i] != -1) {
+            out << "\t" << "ic:Z:true";
+        }
+        out << std::endl;
+        for (const auto& edge: nodes_[i]->out_edges_) {
+            out << "L" << "\t" << i+1 << "\t" << "+" << "\t" << edge->end_node_id_+1 << "\t" << "+" << "\t" << "0M" << "\t"
+                << "ew:f:" << edge->total_weight_;
+            if (in_consensus[i] + 1 == in_consensus[edge->end_node_id_]) {
+                out << "\t" << "ic:Z:true";
+            }
+            out << std::endl;
+        }
+    }
+
+    for (std::uint32_t i = 0; i < num_sequences_; ++i) {
+        out << "P" << "\t" << sequence_names[i] << "\t";
+        std::uint32_t node_id = sequences_begin_nodes_ids_[i];
+        while (true) {
+            out << node_id+1 << "+";
+            if (!nodes_[node_id]->successor(node_id, i)) {
+                break;
+            } else {
+                out << ",";
+            }
+        }
+        out << "\t" << "*" << std::endl;
+    }
+
+    if (include_consensus) {
+        out << "P" << "\t" << "Consensus" << "\t";
+        for (const auto& id: consensus_) {
+            out << id+1 << "+";
+        }
+        out << "\t" << "*" << std::endl;
+    }
+}
+
 void Graph::clear() {
     num_codes_ = 0;
     num_sequences_ = 0;


=====================================
src/main.cpp
=====================================
@@ -10,12 +10,14 @@
 #include "spoa/spoa.hpp"
 #include "bioparser/bioparser.hpp"
 
-static const std::string version = "v3.0.2";
+static const std::string version = "v3.4.0";
 
 static struct option options[] = {
     {"algorithm", required_argument, nullptr, 'l'},
     {"result", required_argument, nullptr, 'r'},
     {"dot", required_argument, nullptr, 'd'},
+    {"gfa", required_argument, nullptr, 'G'},
+    {"gfa-with-consensus", no_argument, nullptr, 'C'},
     {"version", no_argument, nullptr, 'v'},
     {"help", no_argument, nullptr, 'h'},
     {nullptr, 0, nullptr, 0}
@@ -36,9 +38,11 @@ int main(int argc, char** argv) {
     std::uint8_t result = 0;
 
     std::string dot_path = "";
+    bool write_gfa = false;
+    bool write_gfa_with_consensus = false;
 
-    char opt;
-    while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:h", options, nullptr)) != -1) {
+    int opt;
+    while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:GCh", options, nullptr)) != -1) {
         switch (opt) {
             case 'm': m = atoi(optarg); break;
             case 'n': n = atoi(optarg); break;
@@ -49,6 +53,8 @@ int main(int argc, char** argv) {
             case 'l': algorithm = atoi(optarg); break;
             case 'r': result = atoi(optarg); break;
             case 'd': dot_path = optarg; break;
+            case 'G': write_gfa = true; break;
+            case 'C': write_gfa = true; write_gfa_with_consensus = true; break;
             case 'v': std::cout << version << std::endl; return 0;
             case 'h': help(); return 0;
             default: return 1;
@@ -118,18 +124,26 @@ int main(int argc, char** argv) {
         }
     }
 
-    if (result == 0 || result == 2) {
+    if (write_gfa) {
+        // force consensus genertion for graph annotation
         std::string consensus = graph->generate_consensus();
-        std::cout << "Consensus (" << consensus.size() << ")" << std::endl;
-        std::cout << consensus << std::endl;
-    }
-
-    if (result == 1 || result == 2) {
+        // save sequence names for graph path labeling
+        std::vector<std::string> sequence_names;
+        for (auto& s : sequences) {
+            sequence_names.push_back(s->name());
+        }
+        // write the graph, with consensus as a path if requested
+        graph->print_gfa(std::cout, sequence_names, write_gfa_with_consensus);
+    } else if (result == 0) {
+        std::string consensus = graph->generate_consensus();
+        std::cout << ">Consensus LN:i:" << consensus.size() << std::endl
+                  << consensus << std::endl;
+    } else {
         std::vector<std::string> msa;
-        graph->generate_multiple_sequence_alignment(msa);
-        std::cout << "Multiple sequence alignment" << std::endl;
-        for (const auto& it: msa) {
-            std::cout << it << std::endl;
+        graph->generate_multiple_sequence_alignment(msa, result == 2);
+        for (std::uint32_t i = 0; i < msa.size(); ++i) {
+            std::cout << ">" << (i < sequences.size() ? sequences[i]->name() : "Consensus") << std::endl
+                      << msa[i] << std::endl;
         }
     }
 
@@ -179,6 +193,10 @@ void help() {
         "                0 - consensus\n"
         "                1 - multiple sequence alignment\n"
         "                2 - 0 & 1\n"
+        "        -G, --gfa\n"
+        "            write GFA on stdout\n"
+        "        -C, --gfa-with-consensus\n"
+        "            write GFA with consensus on stdout\n"
         "        -d, --dot <file>\n"
         "            output file for the final POA graph in DOT format\n"
         "        --version\n"


=====================================
src/simd_alignment_engine.hpp
=====================================
@@ -1,7 +1,7 @@
 /*!
  * @file simd_alignment_engine.hpp
  *
- * @brief SimdAlignmentEngine class header file
+ * @brief SimdAlignmentEngine class template definition file
  */
 
 #pragma once
@@ -12,16 +12,29 @@
 #include <vector>
 
 #include "spoa/alignment_engine.hpp"
+#include "spoa/architectures.hpp"
 
 namespace spoa {
 
 class Graph;
 
+template<Arch S> 
 class SimdAlignmentEngine;
+
 std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
     std::int8_t e, std::int8_t q, std::int8_t c);
 
+
+
+template<Arch S>
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c);
+
+
+
+template<Arch S> 
 class SimdAlignmentEngine: public AlignmentEngine {
 public:
     ~SimdAlignmentEngine();
@@ -32,7 +45,7 @@ public:
     Alignment align(const char* sequence, std::uint32_t sequence_size,
         const std::unique_ptr<Graph>& graph) noexcept override;
 
-    friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(
+    friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<S>(
         AlignmentType type, AlignmentSubtype subtype, std::int8_t m,
         std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q,
         std::int8_t c);


=====================================
src/simd_alignment_engine_dispatch.cpp
=====================================
@@ -0,0 +1,28 @@
+/*!
+ * @file simd_alignment_engine_dispatch.cpp
+ *
+ * @brief Instantiation of different SIMD engines
+ */
+
+ #include "simd_alignment_engine_impl.hpp"
+
+ #if defined(__AVX2__)
+ #define ARCH Arch::avx2
+ #elif defined (__SSE4_1__)
+ #define ARCH Arch::sse4_1
+ #else
+ #define ARCH Arch::sse2
+ #endif
+
+
+namespace spoa{
+
+template class SimdAlignmentEngine<ARCH>;
+
+template
+std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine<ARCH>(AlignmentType type,
+    AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
+    std::int8_t e, std::int8_t q, std::int8_t c);
+
+}
+


=====================================
src/simd_alignment_engine.cpp → src/simd_alignment_engine_impl.hpp
=====================================
@@ -1,7 +1,7 @@
 /*!
- * @file simd_alignment_engine.cpp
+ * @file simd_alignment_engine_impl.hpp
  *
- * @brief SimdAlignmentEngine class source file
+ * @brief SimdAlignmentEngine class template implementation file
  */
 
 #include <iostream>
@@ -9,7 +9,16 @@
 #include <limits>
 
 extern "C" {
+    #ifdef USE_SIMDE
+    #ifdef __AVX2__
+    #include <simde/x86/avx2.h>
+    #else
+    #include <simde/x86/sse4.1.h> // SSE4.1 is covered better
+    #endif
+
+    #elif defined(__AVX2__) || defined(__SSE4_1__)
     #include <immintrin.h> // AVX2 and lower
+    #endif
 }
 
 #include "spoa/graph.hpp"
@@ -17,6 +26,7 @@ extern "C" {
 
 namespace spoa {
 
+
 // Taken from https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=216149
 inline void* align(std::size_t __align, std::size_t __size, void*& __ptr,
     std::size_t& __space) noexcept {
@@ -32,7 +42,8 @@ inline void* align(std::size_t __align, std::size_t __size, void*& __ptr,
     }
 }
 
-template<typename T>
+// TODO: what to do with this??
+template<Arch S,typename T>
 T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignment) {
     *storage = new T[size + alignment - 1];
     void* ptr = static_cast<void*>(*storage);
@@ -40,7 +51,7 @@ T* allocateAlignedMemory(T** storage, std::uint32_t size, std::uint32_t alignmen
     return static_cast<T*>(align(alignment, size * sizeof(T), ptr, storage_size));
 }
 
-template<typename T>
+template<Arch S,typename T>
 struct InstructionSet;
 
 #if defined(__AVX2__)
@@ -69,8 +80,8 @@ inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
     _mm256_srli_si256(_mm256_permute2x128_si256(a, a, \
         _MM_SHUFFLE(2, 0, 0, 1)), n - 16)
 
-template<>
-struct InstructionSet<std::int16_t> {
+template<Arch S>
+struct InstructionSet<S,std::int16_t> {
     using type = std::int16_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 4;
@@ -105,8 +116,8 @@ struct InstructionSet<std::int16_t> {
     }
 };
 
-template<>
-struct InstructionSet<std::int32_t> {
+template<Arch S>
+struct InstructionSet<S,std::int32_t> {
     using type = std::int32_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 3;
@@ -139,7 +150,7 @@ struct InstructionSet<std::int32_t> {
     }
 };
 
-#elif defined(__SSE4_1__)
+#elif defined(__SSE4_1__) || defined(USE_SIMDE)
 
 constexpr std::uint32_t kRegisterSize = 128;
 using __mxxxi = __m128i;
@@ -162,8 +173,8 @@ inline __mxxxi _mmxxx_or_si(const __mxxxi& a, const __mxxxi& b) {
 #define _mmxxx_srli_si(a, n) \
     _mm_srli_si128(a, n)
 
-template<>
-struct InstructionSet<std::int16_t> {
+template<Arch S>
+struct InstructionSet<S,std::int16_t> {
     using type = std::int16_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 3;
@@ -196,8 +207,8 @@ struct InstructionSet<std::int16_t> {
     }
 };
 
-template<>
-struct InstructionSet<std::int32_t> {
+template<Arch S>
+struct InstructionSet<S,std::int32_t> {
     using type = std::int32_t;
     static constexpr std::uint32_t kNumVar = kRegisterSize / (8 * sizeof(type));
     static constexpr std::uint32_t kLogNumVar = 2;
@@ -230,9 +241,9 @@ struct InstructionSet<std::int32_t> {
 
 #endif
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
-template<typename T>
+template<Arch S,typename T>
 void _mmxxx_print(const __mxxxi& a) {
 
     __attribute__((aligned(kRegisterSize / 8))) typename T::type
@@ -244,7 +255,7 @@ void _mmxxx_print(const __mxxxi& a) {
     }
 }
 
-template<typename T>
+template<Arch S,typename T>
 typename T::type _mmxxx_max_value(const __mxxxi& a) {
 
     typename T::type max_score = 0;
@@ -259,7 +270,7 @@ typename T::type _mmxxx_max_value(const __mxxxi& a) {
     return max_score;
 }
 
-template<typename T>
+template<Arch S, typename T>
 typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) {
 
     __attribute__((aligned(kRegisterSize / 8))) typename T::type
@@ -269,7 +280,7 @@ typename T::type _mmxxx_value_at(const __mxxxi& a, std::uint32_t i) {
     return unpacked[i];
 }
 
-template<typename T>
+template<Arch S, typename T>
 std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width,
     typename T::type value) {
 
@@ -290,13 +301,14 @@ std::int32_t _mmxxx_index_of(const __mxxxi* row, std::uint32_t row_width,
 
 #endif
 
+template<Arch S>
 std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
     std::int8_t e, std::int8_t q, std::int8_t c) {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
-    return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine(type,
+    return std::unique_ptr<AlignmentEngine>(new SimdAlignmentEngine<S>(type,
         subtype, m, n, g, e, q, c));
 
 #else
@@ -306,9 +318,11 @@ std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
 #endif
 }
 
-struct SimdAlignmentEngine::Implementation {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+template<Arch S>
+struct SimdAlignmentEngine<S>::Implementation {
+
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::vector<std::uint32_t> node_id_to_rank;
 
@@ -345,42 +359,46 @@ struct SimdAlignmentEngine::Implementation {
 #endif
 };
 
-SimdAlignmentEngine::SimdAlignmentEngine(AlignmentType type,
+template<Arch S>
+SimdAlignmentEngine<S>::SimdAlignmentEngine(AlignmentType type,
     AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
     std::int8_t e, std::int8_t q, std::int8_t c)
         : AlignmentEngine(type, subtype, m, n, g, e, q, c),
         pimpl_(new Implementation()) {
 }
 
-SimdAlignmentEngine::~SimdAlignmentEngine() {
+template<Arch S>
+SimdAlignmentEngine<S>::~SimdAlignmentEngine() {
 }
 
-void SimdAlignmentEngine::prealloc(std::uint32_t max_sequence_size,
+template<Arch S>
+void SimdAlignmentEngine<S>::prealloc(std::uint32_t max_sequence_size,
     std::uint32_t alphabet_size) {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t longest_path = max_sequence_size * (alphabet_size + 1) + 1 +
-        InstructionSet<std::int16_t>::kNumVar;
+        InstructionSet<S,std::int16_t>::kNumVar;
 
     std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)),
         std::max(abs(g_), abs(q_)));
 
     if (max_penalty * longest_path < std::numeric_limits<std::int16_t>::max()) {
-        realloc((max_sequence_size / InstructionSet<std::int16_t>::kNumVar) + 1,
+        realloc((max_sequence_size / InstructionSet<S,std::int16_t>::kNumVar) + 1,
             alphabet_size * max_sequence_size, alphabet_size);
     } else {
-        realloc((max_sequence_size / InstructionSet<std::int32_t>::kNumVar) + 1,
+        realloc((max_sequence_size / InstructionSet<S,std::int32_t>::kNumVar) + 1,
             alphabet_size * max_sequence_size, alphabet_size);
     }
 
 #endif
 }
 
-void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
+template<Arch S>
+void SimdAlignmentEngine<S>::realloc(std::uint32_t matrix_width,
     std::uint32_t matrix_height, std::uint32_t num_codes) {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     if (pimpl_->node_id_to_rank.size() < matrix_height - 1) {
         pimpl_->node_id_to_rank.resize(matrix_height - 1, 0);
@@ -388,7 +406,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
     if (pimpl_->sequence_profile_size < num_codes * matrix_width) {
         __mxxxi* storage = nullptr;
         pimpl_->sequence_profile_size = num_codes * matrix_width;
-        pimpl_->sequence_profile = allocateAlignedMemory(&storage,
+        pimpl_->sequence_profile = allocateAlignedMemory<S>(&storage,
             pimpl_->sequence_profile_size, kRegisterSize / 8);
         pimpl_->sequence_profile_storage.reset();
         pimpl_->sequence_profile_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -400,7 +418,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
         if (pimpl_->M_size < matrix_height * matrix_width) {
             __mxxxi* storage = nullptr;
             pimpl_->M_size = matrix_height * matrix_width;
-            pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+            pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
                 kRegisterSize / 8);
             pimpl_->M_storage.reset();
             pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -412,7 +430,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
         if (pimpl_->M_size < 3 * matrix_height * matrix_width) {
             __mxxxi* storage = nullptr;
             pimpl_->M_size = 3 * matrix_height * matrix_width;
-            pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+            pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
                 kRegisterSize / 8);
             pimpl_->F = pimpl_->H + matrix_height * matrix_width;
             pimpl_->E = pimpl_->F + matrix_height * matrix_width;
@@ -426,7 +444,7 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
         if (pimpl_->M_size < 5 * matrix_height * matrix_width) {
             __mxxxi* storage = nullptr;
             pimpl_->M_size = 5 * matrix_height * matrix_width;
-            pimpl_->H = allocateAlignedMemory(&storage, pimpl_->M_size,
+            pimpl_->H = allocateAlignedMemory<S>(&storage, pimpl_->M_size,
                 kRegisterSize / 8);
             pimpl_->F = pimpl_->H + matrix_height * matrix_width;
             pimpl_->E = pimpl_->F + matrix_height * matrix_width;
@@ -436,18 +454,18 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
             pimpl_->M_storage = std::unique_ptr<__mxxxi[]>(storage);
         }
     }
-    if (pimpl_->masks_size < InstructionSet<std::int16_t>::kLogNumVar + 1) {
+    if (pimpl_->masks_size < InstructionSet<S,std::int16_t>::kLogNumVar + 1) {
         __mxxxi* storage = nullptr;
-        pimpl_->masks_size = InstructionSet<std::int16_t>::kLogNumVar + 1;
-        pimpl_->masks = allocateAlignedMemory(&storage,
+        pimpl_->masks_size = InstructionSet<S,std::int16_t>::kLogNumVar + 1;
+        pimpl_->masks = allocateAlignedMemory<S>(&storage,
             pimpl_->masks_size, kRegisterSize / 8);
         pimpl_->masks_storage.reset();
         pimpl_->masks_storage = std::unique_ptr<__mxxxi[]>(storage);
     }
-    if (pimpl_->penalties_size < 2 * InstructionSet<std::int16_t>::kLogNumVar) {
+    if (pimpl_->penalties_size < 2 * InstructionSet<S,std::int16_t>::kLogNumVar) {
         __mxxxi* storage = nullptr;
-        pimpl_->penalties_size = 2 * InstructionSet<std::int16_t>::kLogNumVar;
-        pimpl_->penalties = allocateAlignedMemory(&storage,
+        pimpl_->penalties_size = 2 * InstructionSet<S,std::int16_t>::kLogNumVar;
+        pimpl_->penalties = allocateAlignedMemory<S>(&storage,
             pimpl_->penalties_size, kRegisterSize / 8);
         pimpl_->penalties_storage.reset();
         pimpl_->penalties_storage = std::unique_ptr<__mxxxi[]>(storage);
@@ -456,12 +474,13 @@ void SimdAlignmentEngine::realloc(std::uint32_t matrix_width,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-void SimdAlignmentEngine::initialize(const char* sequence,
+void SimdAlignmentEngine<S>::initialize(const char* sequence,
     const std::unique_ptr<Graph>& graph, std::uint32_t normal_matrix_width,
     std::uint32_t matrix_width, std::uint32_t matrix_height) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::int32_t padding_penatly = -1 * std::max(std::max(abs(m_), abs(n_)),
         std::max(abs(g_), abs(q_)));
@@ -646,35 +665,36 @@ void SimdAlignmentEngine::initialize(const char* sequence,
 #endif
 }
 
-Alignment SimdAlignmentEngine::align(const char* sequence,
+template<Arch S>
+Alignment SimdAlignmentEngine<S>::align(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
     if (graph->nodes().empty() || sequence_size == 0) {
         return Alignment();
     }
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t longest_path = graph->nodes().size() + 1 + sequence_size +
-        InstructionSet<std::int16_t>::kNumVar;
+        InstructionSet<S,std::int16_t>::kNumVar;
 
     std::uint32_t max_penalty = std::max(std::max(abs(m_), abs(n_)), abs(g_));
 
     if (max_penalty * longest_path < std::numeric_limits<std::int16_t>::max()) {
         if (subtype_ == AlignmentSubtype::kLinear) {
-            return linear<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+            return linear<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kAffine) {
-            return affine<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+            return affine<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kConvex) {
-            return convex<InstructionSet<std::int16_t>>(sequence, sequence_size, graph);
+            return convex<InstructionSet<S,std::int16_t>>(sequence, sequence_size, graph);
         }
     } else {
         if (subtype_ == AlignmentSubtype::kLinear) {
-            return linear<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+            return linear<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kAffine) {
-            return affine<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+            return affine<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
         } else if (subtype_ == AlignmentSubtype::kConvex) {
-            return convex<InstructionSet<std::int32_t>>(sequence, sequence_size, graph);
+            return convex<InstructionSet<S,std::int32_t>>(sequence, sequence_size, graph);
         }
     }
 
@@ -687,11 +707,12 @@ Alignment SimdAlignmentEngine::align(const char* sequence,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-Alignment SimdAlignmentEngine::linear(const char* sequence,
+Alignment SimdAlignmentEngine<S>::linear(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t normal_matrix_width = sequence_size;
     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -805,7 +826,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
         }
 
         if (type_ == AlignmentType::kSW) {
-            std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+            std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
             if (max_score < max_row_score) {
                 max_score = max_row_score;
                 max_i = i;
@@ -813,7 +834,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
 
         } else if (type_ == AlignmentType::kOV) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+                std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
                     max_i = i;
@@ -822,7 +843,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
 
         } else if (type_ == AlignmentType::kNW) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_value_at<T>(
+                std::int32_t max_row_score = _mmxxx_value_at<S,T>(
                     H_row[matrix_width - 1], last_column_id);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
@@ -837,12 +858,12 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
     }
 
     if (type_ == AlignmentType::kSW) {
-        max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+        max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
             matrix_width, max_score);
 
     } else if (type_ == AlignmentType::kOV) {
         if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
-            max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+            max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
                 matrix_width, max_score);
         } else {
             max_j = normal_matrix_width - 1;
@@ -860,7 +881,7 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
     }
 
     typename T::type* backtrack_storage = nullptr;
-    typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+    typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
         3 * T::kNumVar + 2 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
     typename T::type* H_pred = H + T::kNumVar;
     typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
@@ -1034,11 +1055,12 @@ Alignment SimdAlignmentEngine::linear(const char* sequence,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-Alignment SimdAlignmentEngine::affine(const char* sequence,
+Alignment SimdAlignmentEngine<S>::affine(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t normal_matrix_width = sequence_size;
     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -1160,7 +1182,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
         }
 
         if (type_ == AlignmentType::kSW) {
-            std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+            std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
             if (max_score < max_row_score) {
                 max_score = max_row_score;
                 max_i = i;
@@ -1168,7 +1190,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
 
         } else if (type_ == AlignmentType::kOV) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+                std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
                     max_i = i;
@@ -1177,7 +1199,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
 
         } else if (type_ == AlignmentType::kNW) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_value_at<T>(
+                std::int32_t max_row_score = _mmxxx_value_at<S,T>(
                     H_row[matrix_width - 1], last_column_id);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
@@ -1192,12 +1214,12 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
     }
 
     if (type_ == AlignmentType::kSW) {
-        max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+        max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
             matrix_width, max_score);
 
     } else if (type_ == AlignmentType::kOV) {
         if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
-            max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+            max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
                 matrix_width, max_score);
         } else {
             max_j = normal_matrix_width - 1;
@@ -1215,7 +1237,7 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
     }
 
     typename T::type* backtrack_storage = nullptr;
-    typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+    typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
         6 * T::kNumVar + 3 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
     typename T::type* H_pred = H + T::kNumVar;
     typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;
@@ -1464,11 +1486,12 @@ Alignment SimdAlignmentEngine::affine(const char* sequence,
 #endif
 }
 
+template<Arch S>
 template<typename T>
-Alignment SimdAlignmentEngine::convex(const char* sequence,
+Alignment SimdAlignmentEngine<S>::convex(const char* sequence,
     std::uint32_t sequence_size, const std::unique_ptr<Graph>& graph) noexcept {
 
-#if defined(__AVX2__) || defined(__SSE4_1__)
+#if defined(__AVX2__) || defined(__SSE4_1__) || defined(USE_SIMDE)
 
     std::uint32_t normal_matrix_width = sequence_size;
     std::uint32_t matrix_width = (sequence_size + (sequence_size % T::kNumVar == 0 ?
@@ -1625,7 +1648,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
         }
 
         if (type_ == AlignmentType::kSW) {
-            std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+            std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
             if (max_score < max_row_score) {
                 max_score = max_row_score;
                 max_i = i;
@@ -1633,7 +1656,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
 
         } else if (type_ == AlignmentType::kOV) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_max_value<T>(score);
+                std::int32_t max_row_score = _mmxxx_max_value<S,T>(score);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
                     max_i = i;
@@ -1642,7 +1665,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
 
         } else if (type_ == AlignmentType::kNW) {
             if (node->out_edges().empty()) {
-                std::int32_t max_row_score = _mmxxx_value_at<T>(
+                std::int32_t max_row_score = _mmxxx_value_at<S,T>(
                     H_row[matrix_width - 1], last_column_id);
                 if (max_score < max_row_score) {
                     max_score = max_row_score;
@@ -1657,12 +1680,12 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
     }
 
     if (type_ == AlignmentType::kSW) {
-        max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+        max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
             matrix_width, max_score);
 
     } else if (type_ == AlignmentType::kOV) {
         if (graph->nodes()[rank_to_node_id[max_i - 1]]->out_edges().empty()) {
-            max_j = _mmxxx_index_of<T>(&(pimpl_->H[max_i * matrix_width]),
+            max_j = _mmxxx_index_of<S,T>(&(pimpl_->H[max_i * matrix_width]),
                 matrix_width, max_score);
         } else {
             max_j = normal_matrix_width - 1;
@@ -1680,7 +1703,7 @@ Alignment SimdAlignmentEngine::convex(const char* sequence,
     }
 
     typename T::type* backtrack_storage = nullptr;
-    typename T::type* H = allocateAlignedMemory(&backtrack_storage,
+    typename T::type* H = allocateAlignedMemory<S>(&backtrack_storage,
         9 * T::kNumVar + 4 * T::kNumVar * max_num_predecessors, kRegisterSize / 8);
     typename T::type* H_pred = H + T::kNumVar;
     typename T::type* H_diag_pred = H_pred + T::kNumVar * max_num_predecessors;



View it on GitLab: https://salsa.debian.org/med-team/spoa/-/compare/ec6f4fb82326b24b095e91863b883ce6775bb7db...14428594db7dbbd056b3eb43ce208ac113ac7d6a

-- 
View it on GitLab: https://salsa.debian.org/med-team/spoa/-/compare/ec6f4fb82326b24b095e91863b883ce6775bb7db...14428594db7dbbd056b3eb43ce208ac113ac7d6a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200805/b9c54b01/attachment-0001.html>


More information about the debian-med-commit mailing list