[med-svn] [Git][med-team/minimap2][master] 4 commits: Refresh patches, dropping simde

Michael R. Crusoe (@crusoe) gitlab at salsa.debian.org
Thu Oct 14 12:49:26 BST 2021



Michael R. Crusoe pushed to branch master at Debian Med / minimap2


Commits:
4e8c3e6f by Michael R. Crusoe at 2021-10-14T12:35:11+02:00
Refresh patches, dropping simde

- - - - -
f59569ae by Michael R. Crusoe at 2021-10-14T12:41:58+02:00
Add setuptools as a build-dep

- - - - -
1ca19dee by Michael R. Crusoe at 2021-10-14T12:47:02+02:00
collapse overlapping dh_auto_clean overrides

- - - - -
dcc5929a by Michael R. Crusoe at 2021-10-14T13:04:33+02:00
routine-update: Ready to upload to unstable

- - - - -


9 changed files:

- debian/changelog
- debian/control
- debian/patches/ar.patch
- debian/patches/hardening.patch
- debian/patches/link_mappy_to_libminimap.patch
- debian/patches/python-sse4-arch.patch
- debian/patches/series
- − debian/patches/simde
- debian/rules


Changes:

=====================================
debian/changelog
=====================================
@@ -1,11 +1,18 @@
-minimap2 (2.22+dfsg-1) UNRELEASED; urgency=medium
+minimap2 (2.22+dfsg-1) unstable; urgency=medium
 
+  * Team upload
+
+  [ Andreas Tille ]
   * Fix watchfile to detect new versions on github
   * New upstream version
   * Standards-Version: 4.6.0 (routine-update)
   * Use sensible file name for uscan downloads
 
- -- Andreas Tille <tille at debian.org>  Thu, 14 Oct 2021 08:51:33 +0200
+  [ Michael R. Crusoe ]
+  * Refresh patches, dropping simde
+  * Add setuptools as a build-dep
+
+ -- Michael R. Crusoe <crusoe at debian.org>  Thu, 14 Oct 2021 12:59:39 +0200
 
 minimap2 (2.17+dfsg-12) unstable; urgency=medium
 


=====================================
debian/control
=====================================
@@ -4,6 +4,7 @@ Uploaders: Andreas Tille <tille at debian.org>
 Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 13),
+               python3-setuptools,
                dh-python,
                cython3,
                python3-all-dev,


=====================================
debian/patches/ar.patch
=====================================
@@ -2,9 +2,9 @@ Author: Steffen Moeller
 Last-Update: 2020-03-15 04:06:20 +0100
 Descriptio: Fix ar call to support debugging symbols properly
 
---- a/Makefile
-+++ b/Makefile
-@@ -35,7 +35,8 @@ minimap2-lite:example.o libminimap2.a
+--- minimap2.orig/Makefile
++++ minimap2/Makefile
+@@ -51,7 +51,8 @@
  		$(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS) $(LDFLAGS)
  
  libminimap2.a:$(OBJS)


=====================================
debian/patches/hardening.patch
=====================================
@@ -2,17 +2,17 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update: Fri, 17 Aug 2018 11:04:09 +0200
 Description: Propagate hardening options
 
---- a/Makefile
-+++ b/Makefile
+--- minimap2.orig/Makefile
++++ minimap2/Makefile
 @@ -1,5 +1,5 @@
 -CFLAGS=		-g -Wall -O2 -Wc++-compat #-Wextra
 -CPPFLAGS=	-DHAVE_KALLOC
 +CFLAGS+=		-g -Wall -O2 -Wc++-compat #-Wextra
 +CPPFLAGS+=	-DHAVE_KALLOC
  INCLUDES=
- OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o splitidx.o ksw2_ll_sse.o
- PROG=		minimap2
-@@ -33,16 +33,16 @@ all:$(PROG)
+ OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
+ 			lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
+@@ -45,16 +45,16 @@
  extra:all $(PROG_EXTRA)
  
  minimap2:main.o libminimap2.a


=====================================
debian/patches/link_mappy_to_libminimap.patch
=====================================
@@ -3,9 +3,9 @@ Last-Update: Fri, 15 May 2020 14:00:54 +0200
 Description: Add libminimap to Python library
  FIXME: seems extra_link_args and extra_objects are ignored both
 
---- a/setup.py
-+++ b/setup.py
-@@ -19,6 +19,8 @@ import sys, platform
+--- minimap2.orig/setup.py
++++ minimap2/setup.py
+@@ -9,6 +9,8 @@
  sys.path.append('python')
  
  extra_compile_args = ['-DHAVE_KALLOC']


=====================================
debian/patches/python-sse4-arch.patch
=====================================
@@ -3,9 +3,9 @@ From: Michael Hudson-Doyle <michael.hudson at canonical.com>
 Bug-Debian: https://bugs.debian.org/969596
 Description: Only pass -msse4.1 to the compiler on amd64
 
---- a/setup.py
-+++ b/setup.py
-@@ -26,7 +26,7 @@
+--- minimap2.orig/setup.py
++++ minimap2/setup.py
+@@ -16,7 +16,7 @@
  if platform.machine() in ["aarch64", "arm64"]:
  	include_dirs.append("sse2neon/")
  	extra_compile_args.extend(['-ftree-vectorize', '-DKSW_SSE2_ONLY', '-D__SSE2__'])


=====================================
debian/patches/series
=====================================
@@ -1,6 +1,5 @@
 hardening.patch
 do_not_use_natbib.bst.patch
-simde
 ar.patch
 link_mappy_to_libminimap.patch
 python-sse4-arch.patch


=====================================
debian/patches/simde deleted
=====================================
@@ -1,544 +0,0 @@
-Author: Michael R. Crusoe <michael.crusoe at gmail.com>
-Description: Add support for more architectures
-
-using the SIMD Everywhere library
---- minimap2.orig/ksw2_extd2_sse.c
-+++ minimap2/ksw2_extd2_sse.c
-@@ -3,29 +3,19 @@
- #include <assert.h>
- #include "ksw2.h"
- 
--#ifdef __SSE2__
--#include <emmintrin.h>
-+#define SIMDE_ENABLE_NATIVE_ALIASES
-+#include "simde/x86/sse4.1.h"
- 
--#ifdef KSW_SSE2_ONLY
--#undef __SSE4_1__
--#endif
--
--#ifdef __SSE4_1__
--#include <smmintrin.h>
--#endif
--
--#ifdef KSW_CPU_DISPATCH
--#ifdef __SSE4_1__
-+#if defined(SIMDE_SSE4_1_NATIVE)
- void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
- 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
--#else
-+#elif defined(SIMDE_SSE2_NATIVE)
- void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
--				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
--#endif
-+                                  int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
- #else
- void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
--				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
--#endif // ~KSW_CPU_DISPATCH
-+                                  int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
-+#endif
- {
- #define __dp_code_block1 \
- 	z = _mm_load_si128(&s[t]); \
-@@ -161,13 +151,8 @@
- 				st = _mm_loadu_si128((__m128i*)&qrr[t]);
- 				mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
- 				tmp = _mm_cmpeq_epi8(sq, st);
--#ifdef __SSE4_1__
- 				tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
- 				tmp = _mm_blendv_epi8(tmp,     sc_N_,   mask);
--#else
--				tmp = _mm_or_si128(_mm_andnot_si128(tmp,  sc_mis_), _mm_and_si128(tmp,  sc_mch_));
--				tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp),     _mm_and_si128(mask, sc_N_));
--#endif
- 				_mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp);
- 			}
- 		} else {
-@@ -184,7 +169,6 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				z = _mm_max_epi8(z, a);
- 				z = _mm_max_epi8(z, b);
- 				z = _mm_max_epi8(z, a2);
-@@ -195,27 +179,6 @@
- 				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_max_epi8(b,  zero_), qe_));
- 				_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_max_epi8(a2, zero_), qe2_));
- 				_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_max_epi8(b2, zero_), qe2_));
--#else
--				tmp = _mm_cmpgt_epi8(a,  z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
--				tmp = _mm_cmpgt_epi8(b,  z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
--				tmp = _mm_cmpgt_epi8(a2, z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
--				tmp = _mm_cmpgt_epi8(b2, z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
--				tmp = _mm_cmplt_epi8(sc_mch_, z);
--				z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
--				__dp_code_block2;
--				tmp = _mm_cmpgt_epi8(a, zero_);
--				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
--				tmp = _mm_cmpgt_epi8(b, zero_);
--				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_and_si128(tmp, b),  qe_));
--				tmp = _mm_cmpgt_epi8(a2, zero_);
--				_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_));
--				tmp = _mm_cmpgt_epi8(b2, zero_);
--				_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_));
--#endif
- 			}
- 		} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
- 			__m128i *pr = p + (size_t)r * n_col_ - st_;
-@@ -223,7 +186,6 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				d = _mm_and_si128(_mm_cmpgt_epi8(a, z), _mm_set1_epi8(1));       // d = a  > z? 1 : 0
- 				z = _mm_max_epi8(z, a);
- 				d = _mm_blendv_epi8(d, _mm_set1_epi8(2), _mm_cmpgt_epi8(b,  z)); // d = b  > z? 2 : d
-@@ -233,22 +195,6 @@
- 				d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
- 				z = _mm_max_epi8(z, b2);
- 				z = _mm_min_epi8(z, sc_mch_);
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
--				tmp = _mm_cmpgt_epi8(a,  z);
--				d = _mm_and_si128(tmp, _mm_set1_epi8(1));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
--				tmp = _mm_cmpgt_epi8(b,  z);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(2)));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
--				tmp = _mm_cmpgt_epi8(a2, z);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3)));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
--				tmp = _mm_cmpgt_epi8(b2, z);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
--				tmp = _mm_cmplt_epi8(sc_mch_, z);
--				z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
--#endif
- 				__dp_code_block2;
- 				tmp = _mm_cmpgt_epi8(a, zero_);
- 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
-@@ -270,7 +216,6 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), _mm_set1_epi8(1));    // d = z > a?  0 : 1
- 				z = _mm_max_epi8(z, a);
- 				d = _mm_blendv_epi8(_mm_set1_epi8(2), d, _mm_cmpgt_epi8(z, b));  // d = z > b?  d : 2
-@@ -280,22 +225,6 @@
- 				d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
- 				z = _mm_max_epi8(z, b2);
- 				z = _mm_min_epi8(z, sc_mch_);
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
--				tmp = _mm_cmpgt_epi8(z, a);
--				d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a));
--				tmp = _mm_cmpgt_epi8(z, b);
--				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(2)));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b));
--				tmp = _mm_cmpgt_epi8(z, a2);
--				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3)));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2));
--				tmp = _mm_cmpgt_epi8(z, b2);
--				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
--				tmp = _mm_cmplt_epi8(sc_mch_, z);
--				z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
--#endif
- 				__dp_code_block2;
- 				tmp = _mm_cmpgt_epi8(zero_, a);
- 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_andnot_si128(tmp, a),  qe_));
-@@ -330,13 +259,8 @@
- 					_mm_storeu_si128((__m128i*)&H[t], H1);
- 					t_ = _mm_set1_epi32(t);
- 					tmp = _mm_cmpgt_epi32(H1, max_H_);
--#ifdef __SSE4_1__
- 					max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
- 					max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
--#else
--					max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
--					max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
--#endif
- 				}
- 				_mm_storeu_si128((__m128i*)HH, max_H_);
- 				_mm_storeu_si128((__m128i*)tt, max_t_);
-@@ -391,4 +315,3 @@
- 		kfree(km, mem2); kfree(km, off);
- 	}
- }
--#endif // __SSE2__
---- minimap2.orig/ksw2_exts2_sse.c
-+++ minimap2/ksw2_exts2_sse.c
-@@ -3,29 +3,19 @@
- #include <assert.h>
- #include "ksw2.h"
- 
--#ifdef __SSE2__
--#include <emmintrin.h>
-+#define SIMDE_ENABLE_NATIVE_ALIASES
-+#include "simde/x86/sse4.1.h"
- 
--#ifdef KSW_SSE2_ONLY
--#undef __SSE4_1__
--#endif
--
--#ifdef __SSE4_1__
--#include <smmintrin.h>
--#endif
--
--#ifdef KSW_CPU_DISPATCH
--#ifdef __SSE4_1__
-+#if defined(SIMDE_SSE4_1_NATIVE)
- void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
- 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
--#else
-+#elif defined(SIMDE_SSE2_NATIVE)
- void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
- 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
--#endif
- #else
- void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
- 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
--#endif // ~KSW_CPU_DISPATCH
-+#endif
- {
- #define __dp_code_block1 \
- 	z = _mm_load_si128(&s[t]); \
-@@ -194,13 +184,8 @@
- 				st = _mm_loadu_si128((__m128i*)&qrr[t]);
- 				mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
- 				tmp = _mm_cmpeq_epi8(sq, st);
--#ifdef __SSE4_1__
- 				tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
- 				tmp = _mm_blendv_epi8(tmp,     sc_N_,   mask);
--#else
--				tmp = _mm_or_si128(_mm_andnot_si128(tmp,  sc_mis_), _mm_and_si128(tmp,  sc_mch_));
--				tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp),     _mm_and_si128(mask, sc_N_));
--#endif
- 				_mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp);
- 			}
- 		} else {
-@@ -217,7 +202,6 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i z, a, b, a2, a2a, xt1, x2t1, vt1, ut, tmp;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				z = _mm_max_epi8(z, a);
- 				z = _mm_max_epi8(z, b);
- 				z = _mm_max_epi8(z, a2a);
-@@ -226,23 +210,6 @@
- 				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_max_epi8(b,  zero_), qe_));
- 				tmp = _mm_load_si128(&donor[t]);
- 				_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_max_epi8(a2, tmp), q2_));
--#else
--				tmp = _mm_cmpgt_epi8(a,  z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
--				tmp = _mm_cmpgt_epi8(b,  z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
--				tmp = _mm_cmpgt_epi8(a2a, z);
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2a));
--				__dp_code_block2;
--				tmp = _mm_cmpgt_epi8(a, zero_);
--				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
--				tmp = _mm_cmpgt_epi8(b, zero_);
--				_mm_store_si128(&y[t],  _mm_sub_epi8(_mm_and_si128(tmp, b),  qe_));
--				tmp = _mm_load_si128(&donor[t]); // TODO: check if this is correct
--				tmp = _mm_cmpgt_epi8(a2, tmp);
--				tmp = _mm_or_si128(_mm_andnot_si128(tmp, tmp), _mm_and_si128(tmp, a2));
--				_mm_store_si128(&x2[t], _mm_sub_epi8(tmp, q2_));
--#endif
- 			}
- 		} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
- 			__m128i *pr = p + r * n_col_ - st_;
-@@ -250,24 +217,12 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i d, z, a, b, a2, a2a, xt1, x2t1, vt1, ut, tmp, tmp2;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				d = _mm_and_si128(_mm_cmpgt_epi8(a, z), _mm_set1_epi8(1));       // d = a  > z? 1 : 0
- 				z = _mm_max_epi8(z, a);
- 				d = _mm_blendv_epi8(d, _mm_set1_epi8(2), _mm_cmpgt_epi8(b,  z)); // d = b  > z? 2 : d
- 				z = _mm_max_epi8(z, b);
- 				d = _mm_blendv_epi8(d, _mm_set1_epi8(3), _mm_cmpgt_epi8(a2a, z)); // d = a2 > z? 3 : d
- 				z = _mm_max_epi8(z, a2a);
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
--				tmp = _mm_cmpgt_epi8(a,  z);
--				d = _mm_and_si128(tmp, _mm_set1_epi8(1));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
--				tmp = _mm_cmpgt_epi8(b,  z);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(2)));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
--				tmp = _mm_cmpgt_epi8(a2a, z);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3)));
--				z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2a));
--#endif
- 				__dp_code_block2;
- 				tmp = _mm_cmpgt_epi8(a, zero_);
- 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_and_si128(tmp, a),  qe_));
-@@ -278,11 +233,7 @@
- 
- 				tmp2 = _mm_load_si128(&donor[t]);
- 				tmp = _mm_cmpgt_epi8(a2, tmp2);
--#ifdef __SSE4_1__
- 				tmp2 = _mm_max_epi8(a2, tmp2);
--#else
--				tmp2 = _mm_or_si128(_mm_andnot_si128(tmp, tmp2), _mm_and_si128(tmp, a2));
--#endif
- 				_mm_store_si128(&x2[t], _mm_sub_epi8(tmp2, q2_));
- 				d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x20)));
- 				_mm_store_si128(&pr[t], d);
-@@ -293,24 +244,12 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i d, z, a, b, a2, a2a, xt1, x2t1, vt1, ut, tmp, tmp2;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), _mm_set1_epi8(1));    // d = z > a?  0 : 1
- 				z = _mm_max_epi8(z, a);
- 				d = _mm_blendv_epi8(_mm_set1_epi8(2), d, _mm_cmpgt_epi8(z, b));  // d = z > b?  d : 2
- 				z = _mm_max_epi8(z, b);
- 				d = _mm_blendv_epi8(_mm_set1_epi8(3), d, _mm_cmpgt_epi8(z, a2a)); // d = z > a2? d : 3
- 				z = _mm_max_epi8(z, a2a);
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
--				tmp = _mm_cmpgt_epi8(z, a);
--				d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a));
--				tmp = _mm_cmpgt_epi8(z, b);
--				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(2)));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b));
--				tmp = _mm_cmpgt_epi8(z, a2a);
--				d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3)));
--				z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2a));
--#endif
- 				__dp_code_block2;
- 				tmp = _mm_cmpgt_epi8(zero_, a);
- 				_mm_store_si128(&x[t],  _mm_sub_epi8(_mm_andnot_si128(tmp, a),  qe_));
-@@ -321,11 +260,7 @@
- 
- 				tmp2 = _mm_load_si128(&donor[t]);
- 				tmp = _mm_cmpgt_epi8(tmp2, a2);
--#ifdef __SSE4_1__
- 				tmp2 = _mm_max_epi8(tmp2, a2);
--#else
--				tmp2 = _mm_or_si128(_mm_andnot_si128(tmp, a2), _mm_and_si128(tmp, tmp2));
--#endif
- 				_mm_store_si128(&x2[t], _mm_sub_epi8(tmp2, q2_));
- 				d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0
- 				_mm_store_si128(&pr[t], d);
-@@ -349,13 +284,8 @@
- 					_mm_storeu_si128((__m128i*)&H[t], H1);
- 					t_ = _mm_set1_epi32(t);
- 					tmp = _mm_cmpgt_epi32(H1, max_H_);
--#ifdef __SSE4_1__
- 					max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
- 					max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
--#else
--					max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
--					max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
--#endif
- 				}
- 				_mm_storeu_si128((__m128i*)HH, max_H_);
- 				_mm_storeu_si128((__m128i*)tt, max_t_);
-@@ -406,4 +336,3 @@
- 		kfree(km, mem2); kfree(km, off);
- 	}
- }
--#endif // __SSE2__
---- minimap2.orig/ksw2_extz2_sse.c
-+++ minimap2/ksw2_extz2_sse.c
-@@ -2,26 +2,16 @@
- #include <assert.h>
- #include "ksw2.h"
- 
--#ifdef __SSE2__
--#include <emmintrin.h>
-+#define SIMDE_ENABLE_NATIVE_ALIASES
-+#include "simde/x86/sse4.1.h"
- 
--#ifdef KSW_SSE2_ONLY
--#undef __SSE4_1__
--#endif
--
--#ifdef __SSE4_1__
--#include <smmintrin.h>
--#endif
--
--#ifdef KSW_CPU_DISPATCH
--#ifdef __SSE4_1__
-+#if defined(SIMDE_SSE4_1_NATIVE)
- void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
--#else
-+#elif defined(SIMDE_SSE2_NATIVE)
- void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
--#endif
- #else
- void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
--#endif // ~KSW_CPU_DISPATCH
-+#endif
- {
- #define __dp_code_block1 \
- 	z = _mm_add_epi8(_mm_load_si128(&s[t]), qe2_); \
-@@ -129,13 +119,8 @@
- 				st = _mm_loadu_si128((__m128i*)&qrr[t]);
- 				mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
- 				tmp = _mm_cmpeq_epi8(sq, st);
--#ifdef __SSE4_1__
- 				tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
- 				tmp = _mm_blendv_epi8(tmp,     sc_N_,   mask);
--#else
--				tmp = _mm_or_si128(_mm_andnot_si128(tmp,  sc_mis_), _mm_and_si128(tmp,  sc_mch_));
--				tmp = _mm_or_si128(_mm_andnot_si128(mask, tmp),     _mm_and_si128(mask, sc_N_));
--#endif
- 				_mm_storeu_si128((__m128i*)((uint8_t*)s + t), tmp);
- 			}
- 		} else {
-@@ -151,22 +136,10 @@
- 			for (t = st_; t <= en_; ++t) {
- 				__m128i z, a, b, xt1, vt1, ut, tmp;
- 				__dp_code_block1;
--#ifdef __SSE4_1__
- 				z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8()
--				z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
--				z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
--#endif
- 				__dp_code_block2;
--#ifdef __SSE4_1__
- 				_mm_store_si128(&x[t], _mm_max_epi8(a, zero_));
- 				_mm_store_si128(&y[t], _mm_max_epi8(b, zero_));
--#else
--				tmp = _mm_cmpgt_epi8(a, zero_);
--				_mm_store_si128(&x[t], _mm_and_si128(a, tmp));
--				tmp = _mm_cmpgt_epi8(b, zero_);
--				_mm_store_si128(&y[t], _mm_and_si128(b, tmp));
--#endif
- 			}
- 		} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
- 			__m128i *pr = p + (size_t)r * n_col_ - st_;
-@@ -175,16 +148,9 @@
- 				__m128i d, z, a, b, xt1, vt1, ut, tmp;
- 				__dp_code_block1;
- 				d = _mm_and_si128(_mm_cmpgt_epi8(a, z), flag1_); // d = a > z? 1 : 0
--#ifdef __SSE4_1__
- 				z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
- 				tmp = _mm_cmpgt_epi8(b, z);
- 				d = _mm_blendv_epi8(d, flag2_, tmp);             // d = b > z? 2 : d
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
--				z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
--				z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
--				tmp = _mm_cmpgt_epi8(b, z);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, flag2_)); // d = b > z? 2 : d; emulating blendv
--#endif
- 				__dp_code_block2;
- 				tmp = _mm_cmpgt_epi8(a, zero_);
- 				_mm_store_si128(&x[t], _mm_and_si128(tmp, a));
-@@ -201,16 +167,9 @@
- 				__m128i d, z, a, b, xt1, vt1, ut, tmp;
- 				__dp_code_block1;
- 				d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), flag1_); // d = z > a? 0 : 1
--#ifdef __SSE4_1__
- 				z = _mm_max_epi8(z, a);                          // z = z > a? z : a (signed)
- 				tmp = _mm_cmpgt_epi8(z, b);
- 				d = _mm_blendv_epi8(flag2_, d, tmp);             // d = z > b? d : 2
--#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
--				z = _mm_and_si128(z, _mm_cmpgt_epi8(z, zero_));  // z = z > 0? z : 0;
--				z = _mm_max_epu8(z, a);                          // z = max(z, a); this works because both are non-negative
--				tmp = _mm_cmpgt_epi8(z, b);
--				d = _mm_or_si128(_mm_andnot_si128(tmp, flag2_), _mm_and_si128(tmp, d)); // d = z > b? d : 2; emulating blendv
--#endif
- 				__dp_code_block2;
- 				tmp = _mm_cmpgt_epi8(zero_, a);
- 				_mm_store_si128(&x[t], _mm_andnot_si128(tmp, a));
-@@ -241,13 +200,8 @@
- 					_mm_storeu_si128((__m128i*)&H[t], H1);
- 					t_ = _mm_set1_epi32(t);
- 					tmp = _mm_cmpgt_epi32(H1, max_H_);
--#ifdef __SSE4_1__
- 					max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
- 					max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
--#else
--					max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
--					max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
--#endif
- 				}
- 				_mm_storeu_si128((__m128i*)HH, max_H_);
- 				_mm_storeu_si128((__m128i*)tt, max_t_);
-@@ -302,4 +256,3 @@
- 		kfree(km, mem2); kfree(km, off);
- 	}
- }
--#endif // __SSE2__
---- minimap2.orig/ksw2_ll_sse.c
-+++ minimap2/ksw2_ll_sse.c
-@@ -1,7 +1,8 @@
- #include <stdlib.h>
- #include <stdint.h>
- #include <string.h>
--#include <emmintrin.h>
-+#define SIMDE_ENABLE_NATIVE_ALIASES
-+#include "simde/x86/sse2.h"
- #include "ksw2.h"
- 
- #ifdef __GNUC__
---- minimap2.orig/Makefile
-+++ minimap2/Makefile
-@@ -6,21 +6,17 @@
- PROG_EXTRA=	sdust minimap2-lite
- LIBS=		-lm -lz -lpthread
- 
--ifeq ($(arm_neon),) # if arm_neon is not defined
--ifeq ($(sse2only),) # if sse2only is not defined
-+OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
-+
-+ifneq ($(amd64),)
- 	OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
--else                # if sse2only is defined
--	OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
-+else ifneq ($(i386),)
-+	OBJS+=ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
- endif
--else				# if arm_neon is defined
--	OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
--    INCLUDES+=-Isse2neon
--ifeq ($(aarch64),)	#if aarch64 is not defined
--	CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
--else				#if aarch64 is defined
-+
-+ifneq ($(aarch64),)	#if aarch64 is defined
- 	CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
- endif
--endif
- 
- .PHONY:all extra clean depend
- .SUFFIXES:.c .o
-@@ -46,7 +42,7 @@
- 
- # SSE-specific targets on x86/x86_64
- 
--ifeq ($(arm_neon),)   # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
-+ifneq ($(amd64),)   # if amd64 is not defined, compile this target with the default setting (i.e. no -msse2)
- ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
- 		$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
- endif
-@@ -72,17 +68,6 @@
- ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
- 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
- 
--# NEON-specific targets on ARM
--
--ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
--		$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
--
--ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
--		$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
--
--ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
--		$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
--
- # other non-file targets
- 
- clean:


=====================================
debian/rules
=====================================
@@ -4,7 +4,7 @@ export DEB_BUILD_MAINT_OPTIONS = hardening=+all
 # from minimap 1
 #export DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow
 include /usr/share/dpkg/architecture.mk
-export DEB_CFLAGS_MAINT_APPEND += -fopenmp-simd -O3 -DSIMDE_ENABLE_OPENMP
+export DEB_CFLAGS_MAINT_APPEND += -fopenmp-simd -O3 -DSIMDE_ENABLE_OPENMP -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES
 
 RANLIB ?= ranlib
 
@@ -14,6 +14,9 @@ RANLIB ?= ranlib
 override_dh_auto_clean:
 	dh_auto_clean
 	cd tex && make clean
+	rm -f MT-human.mmi
+	rm -f tex/minimap2.aux tex/minimap2.bbl tex/minimap2.blg tex/minimap2.log tex/minimap2.out tex/minimap2.pdf
+	rm -f libminimap2.a
 
 ifneq (,$(filter $(DEB_HOST_ARCH_CPU),amd64))
 build_vars += amd64=1
@@ -47,11 +50,5 @@ override_dh_strip:
 	dh_strip
 	$(RANLIB) $(CURDIR)/debian/libminimap2-dev/usr/lib/libminimap2.a
 
-override_dh_auto_clean:
-	dh_auto_clean
-	rm -f MT-human.mmi
-	rm -f tex/minimap2.aux tex/minimap2.bbl tex/minimap2.blg tex/minimap2.log tex/minimap2.out tex/minimap2.pdf
-	rm -f libminimap2.a
-
 override_dh_installman:
 	dh_installman --language=C



View it on GitLab: https://salsa.debian.org/med-team/minimap2/-/compare/b58b74db97f2a93f8802202caa21f2da0bd3b590...dcc5929a37635e276c038cff118a12ff388ddd81

-- 
View it on GitLab: https://salsa.debian.org/med-team/minimap2/-/compare/b58b74db97f2a93f8802202caa21f2da0bd3b590...dcc5929a37635e276c038cff118a12ff388ddd81
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211014/502940ce/attachment-0001.htm>


More information about the debian-med-commit mailing list