[med-svn] [Git][med-team/rapmap][master] Add non-x86 compatability with libsimde-dev

Michael R. Crusoe gitlab at salsa.debian.org
Fri Jun 5 10:32:06 BST 2020



Michael R. Crusoe pushed to branch master at Debian Med / rapmap


Commits:
f6be793f by Michael R. Crusoe at 2020-06-05T11:09:20+02:00
Add non-x86 compatability with libsimde-dev

- - - - -


6 changed files:

- debian/changelog
- debian/control
- debian/patches/portable_pause
- debian/patches/series
- + debian/patches/simde
- debian/rules


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+rapmap (0.15.0+dfsg-2) UNRELEASED; urgency=medium
+
+  * Add non-x86 compatability with libsimde-dev
+
+ -- Michael R. Crusoe <crusoe at debian.org>  Fri, 05 Jun 2020 09:40:30 +0200
+
 rapmap (0.15.0+dfsg-1) unstable; urgency=medium
 
   * New upstream version


=====================================
debian/control
=====================================
@@ -11,6 +11,7 @@ Build-Depends: debhelper-compat (= 12),
                libcereal-dev,
                libjellyfish-2.0-dev,
                libjemalloc-dev,
+               libsimde-dev,
                libsparsehash-dev,
                libspdlog-dev,
                libtclap-dev,
@@ -26,6 +27,7 @@ Package: rapmap
 Architecture: amd64
 Depends: ${shlibs:Depends},
          ${misc:Depends}
+Built-Using: ${simde:Built-Using}
 Description: rapid sensitive and accurate DNA read mapping via quasi-mapping
  RapMap is a testing ground for ideas in quasi-mapping / (lightweight /
  pseudo) transcriptome alignment. That means that, at this point, it is


=====================================
debian/patches/portable_pause
=====================================
@@ -1,18 +1,40 @@
-From: Michael R. Crusoe <michael.crusoe at gmail.com>
+From: Michael R. Crusoe <crusoe at debian.org>
 Subject: Add portable pauses
---- rapmap.orig/include/FastxParserThreadUtils.hpp
-+++ rapmap/include/FastxParserThreadUtils.hpp
-@@ -18,7 +18,13 @@
+Forwarded: https://github.com/COMBINE-lab/RapMap/pull/49
+--- a/include/FastxParserThreadUtils.hpp
++++ b/include/FastxParserThreadUtils.hpp
+@@ -6,6 +6,9 @@
+ #include <chrono>
+ #include <random>
+ #include <pthread.h>
++#if defined(__SSE2__)
++#include <xmmintrin.h> // _mm_pause
++#endif
+ 
+ // Most of this code is taken directly from https://github.com/geidav/spinlocks-bench/blob/master/os.hpp.
+ // However, things may be renamed, modified, or randomly mangled over time.
+@@ -18,7 +21,23 @@ namespace fastx_parser {
      static const size_t MAX_BACKOFF_ITERS = 1024;
  
      ALWAYS_INLINE static void cpuRelax() {
-+#if defined(__aarch64__) || defined(arm64)
-+      asm volatile("yield" ::: "memory");
-+#elif defined(__PPC64__) || defined(PPC64) || defined(__ppc64__)
-+      asm("ori 0,0,0");
-+#else
-       asm("pause");
-+#endif
+-      asm("pause");
++    #if defined(__SSE2__)  // AMD and Intel
++      _mm_pause();
++    #elif defined(__i386__) || defined(__x86_64__)
++      asm volatile("pause");
++    #elif defined(__aarch64__)
++      asm volatile("wfe");
++    #elif defined(__armel__) || defined(__ARMEL__)
++      asm volatile ("nop" ::: "memory");
++    #elif defined(__arm__) || defined(__aarch64__)
++      __asm__ __volatile__ ("yield" ::: "memory");
++    #elif defined(__ia64__)  // IA64
++      __asm__ __volatile__ ("hint @pause");
++    #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
++       __asm__ __volatile__ ("or 27,27,27" ::: "memory");
++    #else  // everything else.
++       asm volatile ("nop" ::: "memory");
++    #endif
      }
  
      ALWAYS_INLINE void yieldSleep() {


=====================================
debian/patches/series
=====================================
@@ -4,3 +4,4 @@ avoid_privacy_breach.patch
 spelling
 portable_pause
 no_gomp_needed
+simde


=====================================
debian/patches/simde
=====================================
@@ -0,0 +1,642 @@
+From: Michael R. Crusoe <crusoe at debian.org>
+Subject: Add non-x86 portability using SIMD Everywhere
+--- rapmap.orig/src/metro/metrohash128crc.cpp
++++ rapmap/src/metro/metrohash128crc.cpp
+@@ -24,7 +24,8 @@
+ //
+ 
+ 
+-#include <nmmintrin.h>
++#define SIMDE_ENABLE_NATIVE_ALIASES
++#include "simde/x86/sse4.2.h"
+ #include <string.h>
+ #include "metro/metrohash.h"
+ #include "metro/platform.h"
+--- rapmap.orig/src/ksw2pp/ksw2_extd2_sse.c
++++ rapmap/src/ksw2pp/ksw2_extd2_sse.c
+@@ -3,29 +3,19 @@
+ #include <assert.h>
+ #include "ksw2pp/ksw2.h"
+ 
+-#ifdef __SSE2__
+-#include <emmintrin.h>
++#define SIMDE_ENABLE_NATIVE_ALIASES
++#include "simde/x86/sse4.1.h"
+ 
+-#ifdef KSW_SSE2_ONLY
+-#undef __SSE4_1__
+-#endif
+-
+-#ifdef __SSE4_1__
+-#include <smmintrin.h>
+-#endif
+-
+-#ifdef KSW_CPU_DISPATCH
+ #ifdef __SSE4_1__
+ void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
+ 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
+-#else
++#elif defined(__SSE2__)
+ void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
+ 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
+-#endif
+ #else
+ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
+ 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
+-#endif // ~KSW_CPU_DISPATCH
++#endif
+ {
+ #define __dp_code_block1 \
+ 	z = _mm_load_si128(&s[t]); \
+@@ -161,13 +151,8 @@
+ 				st = _mm_loadu_si128((__m128i*)&qrr[t]);
+ 				mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
+ 				tmp = _mm_cmpeq_epi8(sq, st);
+-#ifdef __SSE4_1__
+ 				tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
+ 				tmp = _mm_blendv_epi8(tmp,     sc_N_,   mask);
+-#else
+-				tmp = _mm_or_si128(_mm_andnot_si128(tmp,  sc_mis_), _mm_and_si128(tmp,  sc_mch_));
+-				tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp),     _mm_and_si128(mask, sc_N_));
+-#endif
+ 				_mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp);
+ 			}
+ 		} else {
+@@ -184,7 +169,6 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				z = _mm_max_epi8(z, a);
+ 				z = _mm_max_epi8(z, b);
+ 				z = _mm_max_epi8(z, a2);
+@@ -195,27 +179,6 @@
+ 				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_max_epi8(b,  zero_), qe_));
+ 				_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_max_epi8(a2, zero_), qe2_));
+ 				_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_max_epi8(b2, zero_), qe2_));
+-#else
+-				tmp = _mm_cmpgt_epi8(a,  z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
+-				tmp = _mm_cmpgt_epi8(b,  z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
+-				tmp = _mm_cmpgt_epi8(a2, z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
+-				tmp = _mm_cmpgt_epi8(b2, z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
+-				tmp = _mm_cmplt_epi8(sc_mch_, z);
+-				z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
+-				__dp_code_block2;
+-				tmp = _mm_cmpgt_epi8(a, zero_);
+-				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
+-				tmp = _mm_cmpgt_epi8(b, zero_);
+-				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_and_si128(tmp, b),  qe_));
+-				tmp = _mm_cmpgt_epi8(a2, zero_);
+-				_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_));
+-				tmp = _mm_cmpgt_epi8(b2, zero_);
+-				_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_));
+-#endif
+ 			}
+ 		} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
+ 			__m128i *pr = p + (size_t)r * n_col_ - st_;
+@@ -223,7 +186,6 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				d = _mm_and_si128(_mm_cmpgt_epi8(a, z), _mm_set1_epi8(1));       // d = a  > z? 1 : 0
+ 				z = _mm_max_epi8(z, a);
+ 				d = _mm_blendv_epi8(d, _mm_set1_epi8(2), _mm_cmpgt_epi8(b,  z)); // d = b  > z? 2 : d
+@@ -233,22 +195,6 @@
+ 				d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
+ 				z = _mm_max_epi8(z, b2);
+ 				z = _mm_min_epi8(z, sc_mch_);
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-				tmp = _mm_cmpgt_epi8(a,  z);
+-				d = _mm_and_si128(tmp, _mm_set1_epi8(1));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
+-				tmp = _mm_cmpgt_epi8(b,  z);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(2)));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
+-				tmp = _mm_cmpgt_epi8(a2, z);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3)));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
+-				tmp = _mm_cmpgt_epi8(b2, z);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
+-				tmp = _mm_cmplt_epi8(sc_mch_, z);
+-				z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
+-#endif
+ 				__dp_code_block2;
+ 				tmp = _mm_cmpgt_epi8(a, zero_);
+ 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
+@@ -270,7 +216,6 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), _mm_set1_epi8(1));    // d = z > a?  0 : 1
+ 				z = _mm_max_epi8(z, a);
+ 				d = _mm_blendv_epi8(_mm_set1_epi8(2), d, _mm_cmpgt_epi8(z, b));  // d = z > b?  d : 2
+@@ -280,22 +225,6 @@
+ 				d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
+ 				z = _mm_max_epi8(z, b2);
+ 				z = _mm_min_epi8(z, sc_mch_);
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-				tmp = _mm_cmpgt_epi8(z, a);
+-				d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a));
+-				tmp = _mm_cmpgt_epi8(z, b);
+-				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(2)));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b));
+-				tmp = _mm_cmpgt_epi8(z, a2);
+-				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3)));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2));
+-				tmp = _mm_cmpgt_epi8(z, b2);
+-				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
+-				tmp = _mm_cmplt_epi8(sc_mch_, z);
+-				z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
+-#endif
+ 				__dp_code_block2;
+ 				tmp = _mm_cmpgt_epi8(zero_, a);
+ 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_andnot_si128(tmp, a),  qe_));
+@@ -330,13 +259,8 @@
+ 					_mm_storeu_si128((__m128i*)&H[t], H1);
+ 					t_ = _mm_set1_epi32(t);
+ 					tmp = _mm_cmpgt_epi32(H1, max_H_);
+-#ifdef __SSE4_1__
+ 					max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
+ 					max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
+-#else
+-					max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
+-					max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
+-#endif
+ 				}
+ 				_mm_storeu_si128((__m128i*)HH, max_H_);
+ 				_mm_storeu_si128((__m128i*)tt, max_t_);
+@@ -391,4 +315,3 @@
+ 		kfree(km, mem2); kfree(km, off);
+ 	}
+ }
+-#endif // __SSE2__
+--- rapmap.orig/src/ksw2pp/ksw2_extf2_sse.c
++++ rapmap/src/ksw2pp/ksw2_extf2_sse.c
+@@ -1,22 +1,16 @@
+ #include <string.h>
+ #include "ksw2pp/ksw2.h"
+ 
+-#ifdef __SSE2__
+-#include <emmintrin.h>
++#define SIMDE_ENABLE_NATIVE_ALIASES
++#include "simde/x86/sse4.1.h"
+ 
+ #ifdef __SSE4_1__
+-#include <smmintrin.h>
+-#endif
+-
+-#ifdef KSW_CPU_DISPATCH
+-#ifdef __SSE4_1__
+ void ksw_extf2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez)
+-#else
+-  void ksw_extf2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez)
+-#endif
++#elif defined(__SSE2__)
++void ksw_extf2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez)
+ #else
+ void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez)
+-#endif // ~KSW_CPU_DISPATCH
++#endif
+ {
+ 	int32_t r, t, tlen_, qlen_, last_st, last_en, H0 = 0, last_H0_t = 0;
+ 	uint8_t *qr, *sf, *mem;
+@@ -60,11 +54,7 @@
+ 			sq = _mm_loadu_si128((__m128i*)&sf[t]);
+ 			st = _mm_loadu_si128((__m128i*)&qrr[t]);
+ 			tmp = _mm_cmpeq_epi8(sq, st);
+-#ifdef __SSE4_1__
+ 			tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
+-#else
+-			tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_));
+-#endif
+ 			_mm_storeu_si128((__m128i*)((uint8_t*)s + t), tmp);
+ 		}
+ 		for (t = st_; t <= en_; ++t) {
+@@ -75,12 +65,7 @@
+ 			vt1 = _mm_or_si128(_mm_slli_si128(vt1, 1), v1_); // vt1 <- v[r-1][t-1..t+14]
+ 			v1_ = tmp;
+ 			ut = _mm_load_si128(&u[t]);                      // ut <- u[t..t+15]
+-#ifdef __SSE4_1__
+ 			z = _mm_max_epi8(z, vt1);                        // z = z > a? z : a (signed)
+-#else
+-			z = _mm_and_si128(z, _mm_cmpgt_epi8(z, _mm_setzero_si128()));  // z = z > 0? z : 0;
+-			z = _mm_max_epu8(z, vt1);                        // z = max(z, a); this works because both are non-negative
+-#endif
+ 			z = _mm_max_epu8(z, ut);                         // z = max(z, b); this works because both are non-negative
+ 			_mm_store_si128(&u[t], _mm_sub_epi8(z, vt1));    // u[r][t..t+15] <- z - v[r-1][t-1..t+14]
+ 			_mm_store_si128(&v[t], _mm_sub_epi8(z, ut));     // v[r][t..t+15] <- z - u[r-1][t..t+15]
+@@ -104,4 +89,3 @@
+ 	else ez->zdropped = 1;
+ 	kfree(km, mem);
+ }
+-#endif // __SSE2__
+--- rapmap.orig/src/ksw2pp/ksw2_exts2_sse.c
++++ rapmap/src/ksw2pp/ksw2_exts2_sse.c
+@@ -3,29 +3,19 @@
+ #include <assert.h>
+ #include "ksw2pp/ksw2.h"
+ 
+-#ifdef __SSE2__
+-#include <emmintrin.h>
++#define SIMDE_ENABLE_NATIVE_ALIASES
++#include "simde/x86/sse4.1.h"
+ 
+-#ifdef KSW_SSE2_ONLY
+-#undef __SSE4_1__
+-#endif
+-
+-#ifdef __SSE4_1__
+-#include <smmintrin.h>
+-#endif
+-
+-#ifdef KSW_CPU_DISPATCH
+ #ifdef __SSE4_1__
+ void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
+ 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
+-#else
++#elif defined(__SSE2__)
+ void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
+ 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
+-#endif
+ #else
+ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
+ 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
+-#endif // ~KSW_CPU_DISPATCH
++#endif
+ {
+ #define __dp_code_block1 \
+ 	z = _mm_load_si128(&s[t]); \
+@@ -161,13 +151,8 @@
+ 				st = _mm_loadu_si128((__m128i*)&qrr[t]);
+ 				mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
+ 				tmp = _mm_cmpeq_epi8(sq, st);
+-#ifdef __SSE4_1__
+ 				tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
+ 				tmp = _mm_blendv_epi8(tmp,     sc_N_,   mask);
+-#else
+-				tmp = _mm_or_si128(_mm_andnot_si128(tmp,  sc_mis_), _mm_and_si128(tmp,  sc_mch_));
+-				tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp),     _mm_and_si128(mask, sc_N_));
+-#endif
+ 				_mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp);
+ 			}
+ 		} else {
+@@ -184,7 +169,6 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i z, a, b, a2, a2a, xt1, x2t1, vt1, ut, tmp;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				z = _mm_max_epi8(z, a);
+ 				z = _mm_max_epi8(z, b);
+ 				z = _mm_max_epi8(z, a2a);
+@@ -193,23 +177,6 @@
+ 				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_max_epi8(b,  zero_), qe_));
+ 				tmp = _mm_load_si128(&donor[t]);
+ 				_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_max_epi8(a2, tmp), q2_));
+-#else
+-				tmp = _mm_cmpgt_epi8(a,  z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
+-				tmp = _mm_cmpgt_epi8(b,  z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
+-				tmp = _mm_cmpgt_epi8(a2a, z);
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2a));
+-				__dp_code_block2;
+-				tmp = _mm_cmpgt_epi8(a, zero_);
+-				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
+-				tmp = _mm_cmpgt_epi8(b, zero_);
+-				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_and_si128(tmp, b),  qe_));
+-				tmp = _mm_load_si128(&donor[t]); // TODO: check if this is correct
+-				tmp = _mm_cmpgt_epi8(a2, tmp);
+-				tmp = _mm_or_si128(_mm_andnot_si128(tmp, tmp), _mm_and_si128(tmp, a2));
+-				_mm_store_si128(&x2[t], _mm_sub_epi8(tmp, q2_));
+-#endif
+ 			}
+ 		} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
+ 			__m128i *pr = p + r * n_col_ - st_;
+@@ -217,24 +184,12 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i d, z, a, b, a2, a2a, xt1, x2t1, vt1, ut, tmp, tmp2;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				d = _mm_and_si128(_mm_cmpgt_epi8(a, z), _mm_set1_epi8(1));       // d = a  > z? 1 : 0
+ 				z = _mm_max_epi8(z, a);
+ 				d = _mm_blendv_epi8(d, _mm_set1_epi8(2), _mm_cmpgt_epi8(b,  z)); // d = b  > z? 2 : d
+ 				z = _mm_max_epi8(z, b);
+ 				d = _mm_blendv_epi8(d, _mm_set1_epi8(3), _mm_cmpgt_epi8(a2a, z)); // d = a2 > z? 3 : d
+ 				z = _mm_max_epi8(z, a2a);
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-				tmp = _mm_cmpgt_epi8(a,  z);
+-				d = _mm_and_si128(tmp, _mm_set1_epi8(1));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
+-				tmp = _mm_cmpgt_epi8(b,  z);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(2)));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
+-				tmp = _mm_cmpgt_epi8(a2a, z);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3)));
+-				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2a));
+-#endif
+ 				__dp_code_block2;
+ 				tmp = _mm_cmpgt_epi8(a, zero_);
+ 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
+@@ -245,11 +200,7 @@
+ 
+ 				tmp2 = _mm_load_si128(&donor[t]);
+ 				tmp = _mm_cmpgt_epi8(a2, tmp2);
+-#ifdef __SSE4_1__
+ 				tmp2 = _mm_max_epi8(a2, tmp2);
+-#else
+-				tmp2 = _mm_or_si128(_mm_andnot_si128(tmp, tmp2), _mm_and_si128(tmp, a2));
+-#endif
+ 				_mm_store_si128(&x2[t], _mm_sub_epi8(tmp2, q2_));
+ 				d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x20)));
+ 				_mm_store_si128(&pr[t], d);
+@@ -260,24 +211,12 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i d, z, a, b, a2, a2a, xt1, x2t1, vt1, ut, tmp, tmp2;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), _mm_set1_epi8(1));    // d = z > a?  0 : 1
+ 				z = _mm_max_epi8(z, a);
+ 				d = _mm_blendv_epi8(_mm_set1_epi8(2), d, _mm_cmpgt_epi8(z, b));  // d = z > b?  d : 2
+ 				z = _mm_max_epi8(z, b);
+ 				d = _mm_blendv_epi8(_mm_set1_epi8(3), d, _mm_cmpgt_epi8(z, a2a)); // d = z > a2? d : 3
+ 				z = _mm_max_epi8(z, a2a);
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-				tmp = _mm_cmpgt_epi8(z, a);
+-				d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a));
+-				tmp = _mm_cmpgt_epi8(z, b);
+-				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(2)));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b));
+-				tmp = _mm_cmpgt_epi8(z, a2a);
+-				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3)));
+-				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2a));
+-#endif
+ 				__dp_code_block2;
+ 				tmp = _mm_cmpgt_epi8(zero_, a);
+ 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_andnot_si128(tmp, a),  qe_));
+@@ -288,11 +227,7 @@
+ 
+ 				tmp2 = _mm_load_si128(&donor[t]);
+ 				tmp = _mm_cmpgt_epi8(tmp2, a2);
+-#ifdef __SSE4_1__
+ 				tmp2 = _mm_max_epi8(tmp2, a2);
+-#else
+-				tmp2 = _mm_or_si128(_mm_andnot_si128(tmp, a2), _mm_and_si128(tmp, tmp2));
+-#endif
+ 				_mm_store_si128(&x2[t], _mm_sub_epi8(tmp2, q2_));
+ 				d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0
+ 				_mm_store_si128(&pr[t], d);
+@@ -316,13 +251,8 @@
+ 					_mm_storeu_si128((__m128i*)&H[t], H1);
+ 					t_ = _mm_set1_epi32(t);
+ 					tmp = _mm_cmpgt_epi32(H1, max_H_);
+-#ifdef __SSE4_1__
+ 					max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
+ 					max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
+-#else
+-					max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
+-					max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
+-#endif
+ 				}
+ 				_mm_storeu_si128((__m128i*)HH, max_H_);
+ 				_mm_storeu_si128((__m128i*)tt, max_t_);
+@@ -373,4 +303,3 @@
+ 		kfree(km, mem2); kfree(km, off);
+ 	}
+ }
+-#endif // __SSE2__
+--- rapmap.orig/src/ksw2pp/ksw2_extz2_sse.c
++++ rapmap/src/ksw2pp/ksw2_extz2_sse.c
+@@ -2,26 +2,16 @@
+ #include <assert.h>
+ #include "ksw2pp/ksw2.h"
+ 
+-#ifdef __SSE2__
+-#include <emmintrin.h>
+-
+-#ifdef KSW_SSE2_ONLY
+-#undef __SSE4_1__
+-#endif
++#define SIMDE_ENABLE_NATIVE_ALIASES
++#include "simde/x86/sse4.1.h"
+ 
+ #ifdef __SSE4_1__
+-#include <smmintrin.h>
+-#endif
+-
+-#ifdef KSW_CPU_DISPATCH
+-#ifdef __SSE4_1__
+ void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
+-#else
++#elif defined(__SSE2__)
+ void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
+-#endif
+ #else
+ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
+-#endif // ~KSW_CPU_DISPATCH
++#endif
+ {
+ #define __dp_code_block1 \
+ 	z = _mm_add_epi8(_mm_load_si128(&s[t]), qe2_); \
+@@ -129,13 +119,8 @@
+ 				st = _mm_loadu_si128((__m128i*)&qrr[t]);
+ 				mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
+ 				tmp = _mm_cmpeq_epi8(sq, st);
+-#ifdef __SSE4_1__
+ 				tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
+ 				tmp = _mm_blendv_epi8(tmp,     sc_N_,   mask);
+-#else
+-				tmp = _mm_or_si128(_mm_andnot_si128(tmp,  sc_mis_), _mm_and_si128(tmp,  sc_mch_));
+-				tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp),     _mm_and_si128(mask, sc_N_));
+-#endif
+ 				_mm_storeu_si128((__m128i*)((uint8_t*)s + t), tmp);
+ 			}
+ 		} else {
+@@ -151,22 +136,10 @@
+ 			for (t = st_; t <= en_; ++t) {
+ 				__m128i z, a, b, xt1, vt1, ut, tmp;
+ 				__dp_code_block1;
+-#ifdef __SSE4_1__
+ 				z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8()
+-				z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
+-				z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
+-#endif
+ 				__dp_code_block2;
+-#ifdef __SSE4_1__
+ 				_mm_store_si128(&x[t], _mm_max_epi8(a, zero_));
+ 				_mm_store_si128(&y[t], _mm_max_epi8(b, zero_));
+-#else
+-				tmp = _mm_cmpgt_epi8(a, zero_);
+-				_mm_store_si128(&x[t], _mm_and_si128(a, tmp));
+-				tmp = _mm_cmpgt_epi8(b, zero_);
+-				_mm_store_si128(&y[t], _mm_and_si128(b, tmp));
+-#endif
+ 			}
+ 		} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
+ 			__m128i *pr = p + (size_t)r * n_col_ - st_;
+@@ -175,16 +148,9 @@
+ 				__m128i d, z, a, b, xt1, vt1, ut, tmp;
+ 				__dp_code_block1;
+ 				d = _mm_and_si128(_mm_cmpgt_epi8(a, z), flag1_); // d = a > z? 1 : 0
+-#ifdef __SSE4_1__
+ 				z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
+ 				tmp = _mm_cmpgt_epi8(b, z);
+ 				d = _mm_blendv_epi8(d, flag2_, tmp);             // d = b > z? 2 : d
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-				z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
+-				z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
+-				tmp = _mm_cmpgt_epi8(b, z);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, flag2_)); // d = b > z? 2 : d; emulating blendv
+-#endif
+ 				__dp_code_block2;
+ 				tmp = _mm_cmpgt_epi8(a, zero_);
+ 				_mm_store_si128(&x[t], _mm_and_si128(tmp, a));
+@@ -201,16 +167,9 @@
+ 				__m128i d, z, a, b, xt1, vt1, ut, tmp;
+ 				__dp_code_block1;
+ 				d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), flag1_); // d = z > a? 0 : 1
+-#ifdef __SSE4_1__
+ 				z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
+ 				tmp = _mm_cmpgt_epi8(z, b);
+ 				d = _mm_blendv_epi8(flag2_, d, tmp);             // d = z > b? d : 2
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-				z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
+-				z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
+-				tmp = _mm_cmpgt_epi8(z, b);
+-				d = _mm_or_si128(_mm_andnot_si128(tmp, flag2_), _mm_and_si128(tmp, d)); // d = z > b? d : 2; emulating blendv
+-#endif
+ 				__dp_code_block2;
+ 				tmp = _mm_cmpgt_epi8(zero_, a);
+ 				_mm_store_si128(&x[t], _mm_andnot_si128(tmp, a));
+@@ -241,13 +200,8 @@
+ 					_mm_storeu_si128((__m128i*)&H[t], H1);
+ 					t_ = _mm_set1_epi32(t);
+ 					tmp = _mm_cmpgt_epi32(H1, max_H_);
+-#ifdef __SSE4_1__
+ 					max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
+ 					max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
+-#else
+-					max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
+-					max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
+-#endif
+ 				}
+ 				_mm_storeu_si128((__m128i*)HH, max_H_);
+ 				_mm_storeu_si128((__m128i*)tt, max_t_);
+@@ -302,4 +256,3 @@
+ 		kfree(km, mem2); kfree(km, off);
+ 	}
+ }
+-#endif // __SSE2__
+--- rapmap.orig/src/ksw2pp/ksw2_gg2_sse.c
++++ rapmap/src/ksw2pp/ksw2_gg2_sse.c
+@@ -1,12 +1,8 @@
+ #include <stdio.h> // for debugging only
+ #include "ksw2pp/ksw2.h"
+ 
+-#ifdef __SSE2__
+-#include <emmintrin.h>
+-
+-#ifdef __SSE4_1__
+-#include <smmintrin.h>
+-#endif
++#define SIMDE_ENABLE_NATIVE_ALIASES
++#include "simde/x86/sse4.1.h"
+ 
+ int ksw_gg2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_)
+ {
+@@ -86,16 +82,9 @@
+ 			b = _mm_add_epi8(_mm_load_si128(&y[t]), ut); // b <- y[r-1][t..t+15] + u[r-1][t..t+15]
+ 
+ 			d = _mm_and_si128(_mm_cmpgt_epi8(a, z), flag1_); // d = a > z? 1 : 0
+-#ifdef __SSE4_1__
+ 			z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
+ 			tmp = _mm_cmpgt_epi8(b, z);
+ 			d = _mm_blendv_epi8(d, flag2_, tmp);             // d = b > z? 2 : d
+-#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
+-			z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
+-			z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
+-			tmp = _mm_cmpgt_epi8(b, z);
+-			d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, flag2_)); // d = b > z? 2 : d; emulating blendv
+-#endif
+ 			z = _mm_max_epu8(z, b);                          // z = max(z, b); this works because both are non-negative
+ 			_mm_store_si128(&u[t], _mm_sub_epi8(z, vt1)); // u[r][t..t+15] <- z - v[r-1][t-1..t+14]
+ 			_mm_store_si128(&v[t], _mm_sub_epi8(z, ut));  // v[r][t..t+15] <- z - u[r-1][t..t+15]
+@@ -124,4 +113,3 @@
+ 	kfree(km, mem2); kfree(km, off);
+ 	return H0;
+ }
+-#endif // __SSE2__
+--- rapmap.orig/src/ksw2pp/KSW2Aligner.cpp
++++ rapmap/src/ksw2pp/KSW2Aligner.cpp
+@@ -27,10 +27,12 @@
+ 	asm volatile ("cpuid"
+ 			: "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
+ 			: "0" (func_id), "2" (subfunc_id));
+-#else // on 32bit, ebx can NOT be used as PIC code
++#elif defined(__i386__) // on 32bit, ebx can NOT be used as PIC code
+ 	asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+ 			: "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
+ 			: "0" (func_id), "2" (subfunc_id));
++#else
++	cpuid[0] = 0
+ #endif
+ }
+ #endif
+--- rapmap.orig/src/CMakeLists.txt
++++ rapmap/src/CMakeLists.txt
+@@ -87,20 +87,36 @@
+ check_ipo_supported(RESULT HAS_IPOHAS_IPO)
+ 
+ add_library(ksw2pp_sse2 OBJECT ${KSW2PP_ADVANCED_LIB_SRCS})
+-add_library(ksw2pp_sse4 OBJECT ${KSW2PP_ADVANCED_LIB_SRCS})
+ add_library(ksw2pp_basic OBJECT ${KSW2PP_BASIC_LIB_SRCS})
++set_target_properties(ksw2pp_basic PROPERTIES INCLUDE_DIRECTORIES ${GAT_SOURCE_DIR}/include)
+ 
+-set_target_properties(ksw2pp_sse2 PROPERTIES COMPILE_FLAGS "-O3 -msse2 -mno-sse4.1")
+-set_target_properties(ksw2pp_sse2 PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;KSW_SSE2_ONLY;HAVE_KALLOC")
+-set_target_properties(ksw2pp_sse4 PROPERTIES COMPILE_FLAGS "-O3 -msse4.1")
+-set_target_properties(ksw2pp_sse4 PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;HAVE_KALLOC")
+-set_target_properties(ksw2pp_basic PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;HAVE_KALLOC")
++if(NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
++  EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
++  set(CMAKE_SYSTEM_PROCESSOR "${ARCHITECTURE}")
++endif()
+ 
+-set_target_properties(ksw2pp_basic PROPERTIES INCLUDE_DIRECTORIES ${GAT_SOURCE_DIR}/include)
+-set_target_properties(ksw2pp_sse4 PROPERTIES INCLUDE_DIRECTORIES ${GAT_SOURCE_DIR}/include)
++if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
++    set(CMAKE_SYSTEM_PROCESSOR "amd64")
++elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686")
++    set(CMAKE_SYSTEM_PROCESSOR "i386")
++endif()
++
++message("CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
++
++if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "amd64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "i386")
++  add_library(ksw2pp_sse4 OBJECT ${KSW2PP_ADVANCED_LIB_SRCS})
++  set_target_properties(ksw2pp_sse2 PROPERTIES COMPILE_FLAGS "-O3 -msse2 -mno-sse4.1")
++  set_target_properties(ksw2pp_sse2 PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;KSW_SSE2_ONLY;HAVE_KALLOC")
++  set_target_properties(ksw2pp_sse4 PROPERTIES COMPILE_FLAGS "-O3 -msse4.1")
++  set_target_properties(ksw2pp_sse4 PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;HAVE_KALLOC")
++  set_target_properties(ksw2pp_basic PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;HAVE_KALLOC")
++  set_target_properties(ksw2pp_sse4 PROPERTIES INCLUDE_DIRECTORIES ${GAT_SOURCE_DIR}/include)
++  add_library(ksw2pp STATIC $<TARGET_OBJECTS:ksw2pp_sse2> $<TARGET_OBJECTS:ksw2pp_sse4> $<TARGET_OBJECTS:ksw2pp_basic>)
++else()
++  add_library(ksw2pp STATIC $<TARGET_OBJECTS:ksw2pp_sse2> $<TARGET_OBJECTS:ksw2pp_basic>)
++endif()
+ 
+ # Build the ksw2pp library
+-add_library(ksw2pp STATIC $<TARGET_OBJECTS:ksw2pp_sse2> $<TARGET_OBJECTS:ksw2pp_sse4> $<TARGET_OBJECTS:ksw2pp_basic>)
+ set_target_properties(ksw2pp PROPERTIES COMPILE_DEFINITIONS "KSW_CPU_DISPATCH;HAVE_KALLOC")
+ if(HAS_IPO)
+   set_property(TARGET ksw2pp PROPERTY INTERPROCEDURAL_OPTIMIZATION True)


=====================================
debian/rules
=====================================
@@ -24,3 +24,6 @@ override_dh_auto_build:
 
 override_dh_clean:
 	dh_clean README.html sample_data/sample_quasi_index/ sample_data/sample_quasi_index_ph/ sample_data/sample_quasi_map.sam sample_data/sample_quasi_map_ph.sam
+
+override_dh_gencontrol:
+	dh_gencontrol -- -Vsimde:Built-Using="$(shell dpkg-query -f '$${source:Package} (= $${source:Version}), ' -W "libsimde-dev")"



View it on GitLab: https://salsa.debian.org/med-team/rapmap/-/commit/f6be793f5bbef20fabd1b046ad0c520840344334

-- 
View it on GitLab: https://salsa.debian.org/med-team/rapmap/-/commit/f6be793f5bbef20fabd1b046ad0c520840344334
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200605/f1f9e650/attachment-0001.html>


More information about the debian-med-commit mailing list