[med-svn] [Git][med-team/last-align][master] 4 commits: New upstream version 1045

Michael R. Crusoe gitlab at salsa.debian.org
Mon Dec 30 17:58:16 GMT 2019



Michael R. Crusoe pushed to branch master at Debian Med / last-align


Commits:
73307486 by Michael R. Crusoe at 2019-12-30T17:44:41Z
New upstream version 1045
- - - - -
a73387df by Michael R. Crusoe at 2019-12-30T17:44:41Z
routine-update: New upstream version

- - - - -
71d75d6e by Michael R. Crusoe at 2019-12-30T17:44:42Z
Update upstream source from tag 'upstream/1045'

Update to upstream version '1045'
with Debian dir 99451b1cc24f1b0a8bdfa910c2efb8edcae6efb5
- - - - -
852e4d6a by Michael R. Crusoe at 2019-12-30T17:56:50Z
update simde patches

- - - - -


8 changed files:

- ChangeLog.txt
- debian/changelog
- debian/patches/simde
- src/Alignment.cc
- src/GappedXdropAligner.hh
- src/GappedXdropAlignerDna.cc
- src/mcf_simd.hh
- src/version.hh


Changes:

=====================================
ChangeLog.txt
=====================================
@@ -1,9 +1,16 @@
+2019-12-23  Martin C. Frith  <Martin C. Frith>
+
+	* src/Alignment.cc, src/GappedXdropAligner.hh,
+	src/GappedXdropAlignerDna.cc, src/mcf_simd.hh:
+	Make it usable without SSE/SIMD
+	[4270e27518ca] [tip]
+
 2019-12-20  Martin C. Frith  <Martin C. Frith>
 
 	* src/GappedXdropAligner.hh, src/GappedXdropAlignerDna.cc,
 	src/mcf_simd.hh, test/last-test.out, test/last-test.sh:
 	Maybe make lastal gapped alignment a bit faster
-	[11810fcff80c] [tip]
+	[11810fcff80c]
 
 2019-12-19  Martin C. Frith  <Martin C. Frith>
 


=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+last-align (1045-1) UNRELEASED; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Updated simde patches
+
+ -- Michael R. Crusoe <michael.crusoe at gmail.com>  Mon, 30 Dec 2019 18:44:40 +0100
+
 last-align (1044-2) unstable; urgency=medium
 
   * Team upload.


=====================================
debian/patches/simde
=====================================
@@ -1,10 +1,12 @@
 --- last-align.orig/src/mcf_simd.hh
 +++ last-align/src/mcf_simd.hh
-@@ -4,38 +4,36 @@
+@@ -4,47 +4,43 @@
  #ifndef MCF_SIMD_HH
  #define MCF_SIMD_HH
  
+-#if defined __SSE4_1__
 -#include <immintrin.h>
+-#endif
 +#include "../debian/include/simde/x86/avx2.h"
  
  namespace mcf {
@@ -47,7 +49,6 @@
  }
  
  const int simdLen = 8;
-@@ -43,14 +41,14 @@
  
  static inline SimdInt simdSet(int i7, int i6, int i5, int i4,
  			      int i3, int i2, int i1, int i0) {
@@ -55,16 +56,8 @@
 +  return simde_mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
- static inline SimdInt simdSet2(short iF, short iE, short iD, short iC,
- 			       short iB, short iA, short i9, short i8,
- 			       short i7, short i6, short i5, short i4,
- 			       short i3, short i2, short i1, short i0) {
--  return _mm256_set_epi16(iF, iE, iD, iC, iB, iA, i9, i8,
-+  return simde_mm256_set_epi16(iF, iE, iD, iC, iB, iA, i9, i8,
- 			  i7, i6, i5, i4, i3, i2, i1, i0);
- }
- 
-@@ -62,265 +60,108 @@
+ static inline SimdInt simdSet1(char jF, char jE, char jD, char jC,
+@@ -55,207 +51,80 @@
  			       char iB, char iA, char i9, char i8,
  			       char i7, char i6, char i5, char i4,
  			       char i3, char i2, char i1, char i0) {
@@ -77,129 +70,69 @@
  
  static inline SimdInt simdFill(int x) {
 -  return _mm256_set1_epi32(x);
-+  return simde_mm256_set1_epi32(x);
- }
- 
- static inline SimdInt simdFill2(short x) {
--  return _mm256_set1_epi16(x);
-+  return simde_mm256_set1_epi16(x);
- }
- 
- static inline SimdInt simdFill1(char x) {
+-}
+-
+-static inline SimdInt simdFill1(char x) {
 -  return _mm256_set1_epi8(x);
-+  return simde_mm256_set1_epi8(x);
- }
- 
- static inline SimdInt simdEq1(SimdInt x, SimdInt y) {
--  return _mm256_cmpeq_epi8(x, y);
-+  return simde_mm256_cmpeq_epi8(x, y);
- }
- 
- static inline SimdInt simdGt(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdGt(SimdInt x, SimdInt y) {
 -  return _mm256_cmpgt_epi32(x, y);
-+  return simde_mm256_cmpgt_epi32(x, y);
- }
- 
- static inline SimdInt simdGt2(SimdInt x, SimdInt y) {
--  return _mm256_cmpgt_epi16(x, y);
-+  return simde_mm256_cmpgt_epi16(x, y);
- }
- 
- static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdGe1(SimdInt x, SimdInt y) {
+-  return _mm256_cmpeq_epi8(_mm256_min_epu8(x, y), y);
+-}
+-
+-static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
 -  return _mm256_add_epi32(x, y);
-+  return simde_mm256_add_epi32(x, y);
- }
- 
- static inline SimdInt simdAdd2(SimdInt x, SimdInt y) {
--  return _mm256_add_epi16(x, y);
-+  return simde_mm256_add_epi16(x, y);
- }
- 
- static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
 -  return _mm256_add_epi8(x, y);
-+  return simde_mm256_add_epi8(x, y);
- }
- 
- static inline SimdInt simdAdds1(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdAdds1(SimdInt x, SimdInt y) {
 -  return _mm256_adds_epu8(x, y);
-+  return simde_mm256_adds_epu8(x, y);
- }
- 
- static inline SimdInt simdSub(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdSub(SimdInt x, SimdInt y) {
 -  return _mm256_sub_epi32(x, y);
-+  return simde_mm256_sub_epi32(x, y);
- }
- 
- static inline SimdInt simdSub2(SimdInt x, SimdInt y) {
--  return _mm256_sub_epi16(x, y);
-+  return simde_mm256_sub_epi16(x, y);
- }
- 
- static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
 -  return _mm256_sub_epi8(x, y);
-+  return simde_mm256_sub_epi8(x, y);
- }
- 
- static inline SimdInt simdLeft(SimdInt x, int bits) {
+-}
+-
+-static inline SimdInt simdLeft(SimdInt x, int bits) {
 -  return _mm256_slli_epi32(x, bits);
-+  return simde_mm256_slli_epi32(x, bits);
- }
- 
- static inline SimdInt simdMax(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdMax(SimdInt x, SimdInt y) {
 -  return _mm256_max_epi32(x, y);
-+  return simde_mm256_max_epi32(x, y);
- }
- 
- static inline SimdInt simdMax2(SimdInt x, SimdInt y) {
--  return _mm256_max_epi16(x, y);
-+  return simde_mm256_max_epi16(x, y);
- }
- 
- static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
+-}
+-
+-static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
 -  return _mm256_min_epu8(x, y);
-+  return simde_mm256_min_epu8(x, y);
- }
- 
- static inline int simdHorizontalMax(SimdInt x) {
+-}
+-
+-static inline int simdHorizontalMax(SimdInt x) {
 -  __m128i z = _mm256_castsi256_si128(x);
 -  z = _mm_max_epi32(z, _mm256_extracti128_si256(x, 1));
 -  z = _mm_max_epi32(z, _mm_shuffle_epi32(z, 0x4E));
 -  z = _mm_max_epi32(z, _mm_shuffle_epi32(z, 0xB1));
 -  return _mm_cvtsi128_si32(z);
-+  simde__m128i z = simde_mm256_castsi256_si128(x);
-+  z = simde_mm_max_epi32(z, simde_mm256_extracti128_si256(x, 1));
-+  z = simde_mm_max_epi32(z, simde_mm_shuffle_epi32(z, 0x4E));
-+  z = simde_mm_max_epi32(z, simde_mm_shuffle_epi32(z, 0xB1));
-+  return simde_mm_cvtsi128_si32(z);
- }
- 
- static inline int simdHorizontalMax2(SimdInt x) {
--  __m128i z = _mm256_castsi256_si128(x);
--  z = _mm_max_epi16(z, _mm256_extracti128_si256(x, 1));
--  z = _mm_sub_epi16(_mm_set1_epi16(32767), z);
--  z = _mm_minpos_epu16(z);
--  return 32767 - _mm_extract_epi16(z, 0);
-+  simde__m128i z = simde_mm256_castsi256_si128(x);
-+  z = simde_mm_max_epi16(z, simde_mm256_extracti128_si256(x, 1));
-+  z = simde_mm_sub_epi16(simde_mm_set1_epi16(32767), z);
-+  z = simde_mm_minpos_epu16(z);
-+  return 32767 - simde_mm_extract_epi16(z, 0);
- }
- 
- static inline int simdHorizontalMin1(SimdInt x) {
+-}
+-
+-static inline int simdHorizontalMin1(SimdInt x) {
 -  __m128i z = _mm256_castsi256_si128(x);
 -  z = _mm_min_epu8(z, _mm256_extracti128_si256(x, 1));
 -  z = _mm_min_epu8(z, _mm_srli_epi16(z, 8));
 -  z = _mm_minpos_epu16(z);
 -  return _mm_extract_epi16(z, 0);
-+  simde__m128i z = simde_mm256_castsi256_si128(x);
-+  z = simde_mm_min_epu8(z, simde_mm256_extracti128_si256(x, 1));
-+  z = simde_mm_min_epu8(z, simde_mm_srli_epi16(z, 8));
-+  z = simde_mm_minpos_epu16(z);
-+  return simde_mm_extract_epi16(z, 0);
- }
- 
- static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
+-}
+-
+-static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
 -  return _mm256_shuffle_epi8(items, choices);
 -}
 -
@@ -234,16 +167,9 @@
 -}
 -
 -const int simdLen = 4;
--const int simdLen2 = 8;
 -
 -static inline SimdInt simdSet(int i3, int i2, int i1, int i0) {
 -  return _mm_set_epi32(i3, i2, i1, i0);
-+  return simde_mm256_shuffle_epi8(items, choices);
- }
- 
--static inline SimdInt simdSet2(short i7, short i6, short i5, short i4,
--			       short i3, short i2, short i1, short i0) {
--  return _mm_set_epi16(i7, i6, i5, i4, i3, i2, i1, i0);
 -}
 -
 -static inline SimdInt simdSet1(char iF, char iE, char iD, char iC,
@@ -256,98 +182,97 @@
 -
 -static inline SimdInt simdFill(int x) {
 -  return _mm_set1_epi32(x);
--}
--
--static inline SimdInt simdFill2(short x) {
--  return _mm_set1_epi16(x);
--}
--
--static inline SimdInt simdFill1(char x) {
++  return simde_mm256_set1_epi32(x);
+ }
+ 
+ static inline SimdInt simdFill1(char x) {
 -  return _mm_set1_epi8(x);
--}
--
--static inline SimdInt simdEq1(SimdInt x, SimdInt y) {
--  return _mm_cmpeq_epi8(x, y);
--}
--
--static inline SimdInt simdGt(SimdInt x, SimdInt y) {
++  return simde_mm256_set1_epi8(x);
+ }
+ 
+ static inline SimdInt simdGt(SimdInt x, SimdInt y) {
 -  return _mm_cmpgt_epi32(x, y);
--}
--
--static inline SimdInt simdGt2(SimdInt x, SimdInt y) {
--  return _mm_cmpgt_epi16(x, y);
--}
--
--static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
++  return simde_mm256_cmpgt_epi32(x, y);
+ }
+ 
+ static inline SimdInt simdGe1(SimdInt x, SimdInt y) {
+-  return _mm_cmpeq_epi8(_mm_min_epu8(x, y), y);
++  return simde_mm256_cmpeq_epi8(simde_mm256_min_epu8(x, y), y);
+ }
+ 
+ static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
 -  return _mm_add_epi32(x, y);
--}
--
--static inline SimdInt simdAdd2(SimdInt x, SimdInt y) {
--  return _mm_add_epi16(x, y);
--}
--
--static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
++  return simde_mm256_add_epi32(x, y);
+ }
+ 
+ static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
 -  return _mm_add_epi8(x, y);
--}
--
--static inline SimdInt simdAdds1(SimdInt x, SimdInt y) {
++  return simde_mm256_add_epi8(x, y);
+ }
+ 
+ static inline SimdInt simdAdds1(SimdInt x, SimdInt y) {
 -  return _mm_adds_epu8(x, y);
--}
--
--static inline SimdInt simdSub(SimdInt x, SimdInt y) {
++  return simde_mm256_adds_epu8(x, y);
+ }
+ 
+ static inline SimdInt simdSub(SimdInt x, SimdInt y) {
 -  return _mm_sub_epi32(x, y);
--}
--
--static inline SimdInt simdSub2(SimdInt x, SimdInt y) {
--  return _mm_sub_epi16(x, y);
--}
--
--static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
++  return simde_mm256_sub_epi32(x, y);
+ }
+ 
+ static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
 -  return _mm_sub_epi8(x, y);
--}
--
--static inline SimdInt simdLeft(SimdInt x, int bits) {
++  return simde_mm256_sub_epi8(x, y);
+ }
+ 
+ static inline SimdInt simdLeft(SimdInt x, int bits) {
 -  return _mm_slli_epi32(x, bits);
--}
--
--static inline SimdInt simdMax(SimdInt x, SimdInt y) {
++  return simde_mm256_slli_epi32(x, bits);
+ }
+ 
+ static inline SimdInt simdMax(SimdInt x, SimdInt y) {
 -  return _mm_max_epi32(x, y);  // SSE4.1
--}
--
--static inline SimdInt simdMax2(SimdInt x, SimdInt y) {
--  return _mm_max_epi16(x, y);
--}
--
--static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
++  return simde_mm256_max_epi32(x, y);
+ }
+ 
+ static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
 -  return _mm_min_epu8(x, y);
--}
--
--static inline int simdHorizontalMax(SimdInt x) {
++  return simde_mm256_min_epu8(x, y);
+ }
+ 
+ static inline int simdHorizontalMax(SimdInt x) {
 -  x = simdMax(x, _mm_shuffle_epi32(x, 0x4E));
 -  x = simdMax(x, _mm_shuffle_epi32(x, 0xB1));
 -  return _mm_cvtsi128_si32(x);
--}
--
--static inline int simdHorizontalMax2(SimdInt x) {
--  x = simdSub2(simdFill2(32767), x);
--  x = _mm_minpos_epu16(x);  // SSE4.1
--  return 32767 - _mm_extract_epi16(x, 0);
--}
--
--static inline int simdHorizontalMin1(SimdInt x) {
++  simde__m128i z = simde_mm256_castsi256_si128(x);
++  z = simde_mm_max_epi32(z, simde_mm256_extracti128_si256(x, 1));
++  z = simde_mm_max_epi32(z, simde_mm_shuffle_epi32(z, 0x4E));
++  z = simde_mm_max_epi32(z, simde_mm_shuffle_epi32(z, 0xB1));
++  return simde_mm_cvtsi128_si32(z);
+ }
+ 
+ static inline int simdHorizontalMin1(SimdInt x) {
 -  x = _mm_min_epu8(x, _mm_srli_epi16(x, 8));
 -  x = _mm_minpos_epu16(x);  // SSE4.1
 -  return _mm_extract_epi16(x, 0);
--}
--
--static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
--  return _mm_shuffle_epi8(items, choices);
--}
--
++  simde__m128i z = simde_mm256_castsi256_si128(x);
++  z = simde_mm_min_epu8(z, simde_mm256_extracti128_si256(x, 1));
++  z = simde_mm_min_epu8(z, simde_mm_srli_epi16(z, 8));
++  z = simde_mm_minpos_epu16(z);
++  return simde_mm_extract_epi16(z, 0);
+ }
+ 
+ static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
+-  return _mm_shuffle_epi8(items, choices);  // SSSE3
++  return simde_mm256_shuffle_epi8(items, choices);
+ }
+ 
 -#else
 -
 -typedef int SimdInt;
+-const int simdBytes = 1;
 -const int simdLen = 1;
+-static inline int simdZero() { return 0; }
 -static inline int simdSet(int x) { return x; }
 -static inline int simdFill(int x) { return x; }
 -static inline int simdLoad(const int *p) { return *p; }
@@ -493,7 +418,16 @@
  .SUFFIXES:
 --- last-align.orig/src/GappedXdropAlignerDna.cc
 +++ last-align/src/GappedXdropAlignerDna.cc
-@@ -41,12 +41,10 @@
+@@ -1,8 +1,6 @@
+ // Author: Martin C. Frith 2019
+ // SPDX-License-Identifier: GPL-3.0-or-later
+ 
+-#if defined __SSE4_1__
+-
+ #include "GappedXdropAligner.hh"
+ #include "GappedXdropAlignerInl.hh"
+ 
+@@ -43,12 +41,10 @@
  
    const SimdInt scorer4x4 =
      simdSet1(
@@ -506,7 +440,7 @@
  		 scorer[3][3], scorer[3][2], scorer[3][1], scorer[3][0],
  		 scorer[2][3], scorer[2][2], scorer[2][1], scorer[2][0],
  		 scorer[1][3], scorer[1][2], scorer[1][1], scorer[1][0],
-@@ -125,8 +123,6 @@
+@@ -127,8 +123,6 @@
  
        for (int i = 0; i < numCells; i += simdBytes) {
  	SimdInt s = simdSet1(
@@ -515,7 +449,7 @@
  			     scorer[s1[31]][s2[31]],
  			     scorer[s1[30]][s2[30]],
  			     scorer[s1[29]][s2[29]],
-@@ -143,7 +139,6 @@
+@@ -145,7 +139,6 @@
  			     scorer[s1[18]][s2[18]],
  			     scorer[s1[17]][s2[17]],
  			     scorer[s1[16]][s2[16]],
@@ -523,7 +457,7 @@
  			     scorer[s1[15]][s2[15]],
  			     scorer[s1[14]][s2[14]],
  			     scorer[s1[13]][s2[13]],
-@@ -159,7 +154,6 @@
+@@ -161,7 +154,6 @@
  			     scorer[s1[3]][s2[3]],
  			     scorer[s1[2]][s2[2]],
  			     scorer[s1[1]][s2[1]],
@@ -531,3 +465,58 @@
  			     scorer[s1[0]][s2[0]]);
  
  	SimdInt x = simdAdds1(simdLoad(x2+i), mScoreRise12);
+@@ -275,5 +267,3 @@
+ }
+ 
+ }
+-
+-#endif
+--- last-align.orig/src/Alignment.cc
++++ last-align/src/Alignment.cc
+@@ -347,13 +347,11 @@
+ 				  del.openCost, del.growCost,
+ 				  ins.openCost, ins.growCost,
+ 				  gap.pairCost, gap.isAffine, maxDrop, smMax)
+-#if defined __SSE4_1__
+     : isSimdMatrix ? aligner.alignDna(seq1 + start1, seq2 + start2,
+ 				      isForward, sm,
+ 				      del.openCost, del.growCost,
+ 				      ins.openCost, ins.growCost,
+ 				      maxDrop, smMax, alph.numbersToUppercase)
+-#endif
+     :           aligner.align(seq1 + start1, seq2 + start2,
+ 			      isForward, globality, sm,
+ 			      del.openCost, del.growCost,
+@@ -373,14 +371,12 @@
+       while( greedyAligner.getNextChunk( end1, end2, size ) )
+ 	chunks.push_back( SegmentPair( end1 - size, end2 - size, size ) );
+     }
+-#if defined __SSE4_1__
+     else if (isSimdMatrix && !pssm2 && !sm2qual) {
+       while (aligner.getNextChunkDna(end1, end2, size,
+ 				     del.openCost, del.growCost,
+ 				     ins.openCost, ins.growCost))
+ 	chunks.push_back(SegmentPair(end1 - size, end2 - size, size));
+     }
+-#endif
+     else {
+       while( aligner.getNextChunk( end1, end2, size,
+ 				   del.openCost, del.growCost,
+--- last-align.orig/src/GappedXdropAligner.hh
++++ last-align/src/GappedXdropAligner.hh
+@@ -317,7 +317,6 @@
+   void init3();
+ 
+   // Everything below here is for alignDna & getNextChunkDna
+-#if defined __SSE4_1__
+   std::vector<TinyScore> xTinyScores;
+   std::vector<TinyScore> yTinyScores;
+   std::vector<TinyScore> zTinyScores;
+@@ -362,7 +361,6 @@
+     while (*x2 != target) ++x2;
+     bestSeq1position = x2 - x2beg + seq1beg;
+   }
+-#endif
+ };
+ 
+ }


=====================================
src/Alignment.cc
=====================================
@@ -347,11 +347,13 @@ void Alignment::extend( std::vector< SegmentPair >& chunks,
 				  del.openCost, del.growCost,
 				  ins.openCost, ins.growCost,
 				  gap.pairCost, gap.isAffine, maxDrop, smMax)
+#if defined __SSE4_1__
     : isSimdMatrix ? aligner.alignDna(seq1 + start1, seq2 + start2,
 				      isForward, sm,
 				      del.openCost, del.growCost,
 				      ins.openCost, ins.growCost,
 				      maxDrop, smMax, alph.numbersToUppercase)
+#endif
     :           aligner.align(seq1 + start1, seq2 + start2,
 			      isForward, globality, sm,
 			      del.openCost, del.growCost,
@@ -370,12 +372,16 @@ void Alignment::extend( std::vector< SegmentPair >& chunks,
     if( isGreedy ){
       while( greedyAligner.getNextChunk( end1, end2, size ) )
 	chunks.push_back( SegmentPair( end1 - size, end2 - size, size ) );
-    } else if (isSimdMatrix && !pssm2 && !sm2qual) {
+    }
+#if defined __SSE4_1__
+    else if (isSimdMatrix && !pssm2 && !sm2qual) {
       while (aligner.getNextChunkDna(end1, end2, size,
 				     del.openCost, del.growCost,
 				     ins.openCost, ins.growCost))
 	chunks.push_back(SegmentPair(end1 - size, end2 - size, size));
-    }else{
+    }
+#endif
+    else {
       while( aligner.getNextChunk( end1, end2, size,
 				   del.openCost, del.growCost,
 				   ins.openCost, ins.growCost, gap.pairCost ) )


=====================================
src/GappedXdropAligner.hh
=====================================
@@ -317,7 +317,7 @@ class GappedXdropAligner {
   void init3();
 
   // Everything below here is for alignDna & getNextChunkDna
-
+#if defined __SSE4_1__
   std::vector<TinyScore> xTinyScores;
   std::vector<TinyScore> yTinyScores;
   std::vector<TinyScore> zTinyScores;
@@ -362,6 +362,7 @@ class GappedXdropAligner {
     while (*x2 != target) ++x2;
     bestSeq1position = x2 - x2beg + seq1beg;
   }
+#endif
 };
 
 }


=====================================
src/GappedXdropAlignerDna.cc
=====================================
@@ -1,6 +1,8 @@
 // Author: Martin C. Frith 2019
 // SPDX-License-Identifier: GPL-3.0-or-later
 
+#if defined __SSE4_1__
+
 #include "GappedXdropAligner.hh"
 #include "GappedXdropAlignerInl.hh"
 
@@ -109,7 +111,7 @@ int GappedXdropAligner::alignDna(const uchar *seq1,
 	SimdInt y = simdAdds1(simdLoad(y1+i), mDelGrowCost1);
 	SimdInt z = simdAdds1(simdLoad(z1+i), mInsGrowCost1);
 	SimdInt b = simdMin1(simdMin1(x, y), z);
-	SimdInt isDrop = simdEq1(simdMin1(b, mBadScore), mBadScore);
+	SimdInt isDrop = simdGe1(b, mBadScore);
 	mBestScore = simdMin1(b, mBestScore);
 	simdStore(x0+i, simdOr(simdSub1(b, s), isDrop));
 	simdStore(y0+i, simdMin1(simdAdds1(b, mDelOpenCost), y));
@@ -166,7 +168,7 @@ int GappedXdropAligner::alignDna(const uchar *seq1,
 	SimdInt y = simdAdds1(simdLoad(y1+i), mDelGrowCost1);
 	SimdInt z = simdAdds1(simdLoad(z1+i), mInsGrowCost1);
 	SimdInt b = simdMin1(simdMin1(x, y), z);
-	SimdInt isDrop = simdEq1(simdMin1(b, mBadScore), mBadScore);
+	SimdInt isDrop = simdGe1(b, mBadScore);
 	mBestScore = simdMin1(b, mBestScore);
 	simdStore(x0+i, simdOr(simdSub1(b, s), isDrop));
 	simdStore(y0+i, simdMin1(simdAdds1(b, mDelOpenCost), y));
@@ -273,3 +275,5 @@ bool GappedXdropAligner::getNextChunkDna(size_t &end1,
 }
 
 }
+
+#endif


=====================================
src/mcf_simd.hh
=====================================
@@ -4,7 +4,9 @@
 #ifndef MCF_SIMD_HH
 #define MCF_SIMD_HH
 
+#if defined __SSE4_1__
 #include <immintrin.h>
+#endif
 
 namespace mcf {
 
@@ -39,21 +41,12 @@ static inline SimdInt simdBlend(SimdInt x, SimdInt y, SimdInt mask) {
 }
 
 const int simdLen = 8;
-const int simdLen2 = 16;
 
 static inline SimdInt simdSet(int i7, int i6, int i5, int i4,
 			      int i3, int i2, int i1, int i0) {
   return _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0);
 }
 
-static inline SimdInt simdSet2(short iF, short iE, short iD, short iC,
-			       short iB, short iA, short i9, short i8,
-			       short i7, short i6, short i5, short i4,
-			       short i3, short i2, short i1, short i0) {
-  return _mm256_set_epi16(iF, iE, iD, iC, iB, iA, i9, i8,
-			  i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
 static inline SimdInt simdSet1(char jF, char jE, char jD, char jC,
 			       char jB, char jA, char j9, char j8,
 			       char j7, char j6, char j5, char j4,
@@ -72,34 +65,22 @@ static inline SimdInt simdFill(int x) {
   return _mm256_set1_epi32(x);
 }
 
-static inline SimdInt simdFill2(short x) {
-  return _mm256_set1_epi16(x);
-}
-
 static inline SimdInt simdFill1(char x) {
   return _mm256_set1_epi8(x);
 }
 
-static inline SimdInt simdEq1(SimdInt x, SimdInt y) {
-  return _mm256_cmpeq_epi8(x, y);
-}
-
 static inline SimdInt simdGt(SimdInt x, SimdInt y) {
   return _mm256_cmpgt_epi32(x, y);
 }
 
-static inline SimdInt simdGt2(SimdInt x, SimdInt y) {
-  return _mm256_cmpgt_epi16(x, y);
+static inline SimdInt simdGe1(SimdInt x, SimdInt y) {
+  return _mm256_cmpeq_epi8(_mm256_min_epu8(x, y), y);
 }
 
 static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
   return _mm256_add_epi32(x, y);
 }
 
-static inline SimdInt simdAdd2(SimdInt x, SimdInt y) {
-  return _mm256_add_epi16(x, y);
-}
-
 static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
   return _mm256_add_epi8(x, y);
 }
@@ -112,10 +93,6 @@ static inline SimdInt simdSub(SimdInt x, SimdInt y) {
   return _mm256_sub_epi32(x, y);
 }
 
-static inline SimdInt simdSub2(SimdInt x, SimdInt y) {
-  return _mm256_sub_epi16(x, y);
-}
-
 static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
   return _mm256_sub_epi8(x, y);
 }
@@ -128,10 +105,6 @@ static inline SimdInt simdMax(SimdInt x, SimdInt y) {
   return _mm256_max_epi32(x, y);
 }
 
-static inline SimdInt simdMax2(SimdInt x, SimdInt y) {
-  return _mm256_max_epi16(x, y);
-}
-
 static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
   return _mm256_min_epu8(x, y);
 }
@@ -144,14 +117,6 @@ static inline int simdHorizontalMax(SimdInt x) {
   return _mm_cvtsi128_si32(z);
 }
 
-static inline int simdHorizontalMax2(SimdInt x) {
-  __m128i z = _mm256_castsi256_si128(x);
-  z = _mm_max_epi16(z, _mm256_extracti128_si256(x, 1));
-  z = _mm_sub_epi16(_mm_set1_epi16(32767), z);
-  z = _mm_minpos_epu16(z);
-  return 32767 - _mm_extract_epi16(z, 0);
-}
-
 static inline int simdHorizontalMin1(SimdInt x) {
   __m128i z = _mm256_castsi256_si128(x);
   z = _mm_min_epu8(z, _mm256_extracti128_si256(x, 1));
@@ -195,17 +160,11 @@ static inline SimdInt simdBlend(SimdInt x, SimdInt y, SimdInt mask) {
 }
 
 const int simdLen = 4;
-const int simdLen2 = 8;
 
 static inline SimdInt simdSet(int i3, int i2, int i1, int i0) {
   return _mm_set_epi32(i3, i2, i1, i0);
 }
 
-static inline SimdInt simdSet2(short i7, short i6, short i5, short i4,
-			       short i3, short i2, short i1, short i0) {
-  return _mm_set_epi16(i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
 static inline SimdInt simdSet1(char iF, char iE, char iD, char iC,
 			       char iB, char iA, char i9, char i8,
 			       char i7, char i6, char i5, char i4,
@@ -218,34 +177,22 @@ static inline SimdInt simdFill(int x) {
   return _mm_set1_epi32(x);
 }
 
-static inline SimdInt simdFill2(short x) {
-  return _mm_set1_epi16(x);
-}
-
 static inline SimdInt simdFill1(char x) {
   return _mm_set1_epi8(x);
 }
 
-static inline SimdInt simdEq1(SimdInt x, SimdInt y) {
-  return _mm_cmpeq_epi8(x, y);
-}
-
 static inline SimdInt simdGt(SimdInt x, SimdInt y) {
   return _mm_cmpgt_epi32(x, y);
 }
 
-static inline SimdInt simdGt2(SimdInt x, SimdInt y) {
-  return _mm_cmpgt_epi16(x, y);
+static inline SimdInt simdGe1(SimdInt x, SimdInt y) {
+  return _mm_cmpeq_epi8(_mm_min_epu8(x, y), y);
 }
 
 static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
   return _mm_add_epi32(x, y);
 }
 
-static inline SimdInt simdAdd2(SimdInt x, SimdInt y) {
-  return _mm_add_epi16(x, y);
-}
-
 static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
   return _mm_add_epi8(x, y);
 }
@@ -258,10 +205,6 @@ static inline SimdInt simdSub(SimdInt x, SimdInt y) {
   return _mm_sub_epi32(x, y);
 }
 
-static inline SimdInt simdSub2(SimdInt x, SimdInt y) {
-  return _mm_sub_epi16(x, y);
-}
-
 static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
   return _mm_sub_epi8(x, y);
 }
@@ -274,10 +217,6 @@ static inline SimdInt simdMax(SimdInt x, SimdInt y) {
   return _mm_max_epi32(x, y);  // SSE4.1
 }
 
-static inline SimdInt simdMax2(SimdInt x, SimdInt y) {
-  return _mm_max_epi16(x, y);
-}
-
 static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
   return _mm_min_epu8(x, y);
 }
@@ -288,12 +227,6 @@ static inline int simdHorizontalMax(SimdInt x) {
   return _mm_cvtsi128_si32(x);
 }
 
-static inline int simdHorizontalMax2(SimdInt x) {
-  x = simdSub2(simdFill2(32767), x);
-  x = _mm_minpos_epu16(x);  // SSE4.1
-  return 32767 - _mm_extract_epi16(x, 0);
-}
-
 static inline int simdHorizontalMin1(SimdInt x) {
   x = _mm_min_epu8(x, _mm_srli_epi16(x, 8));
   x = _mm_minpos_epu16(x);  // SSE4.1
@@ -301,13 +234,15 @@ static inline int simdHorizontalMin1(SimdInt x) {
 }
 
 static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
-  return _mm_shuffle_epi8(items, choices);
+  return _mm_shuffle_epi8(items, choices);  // SSSE3
 }
 
 #else
 
 typedef int SimdInt;
+const int simdBytes = 1;
 const int simdLen = 1;
+static inline int simdZero() { return 0; }
 static inline int simdSet(int x) { return x; }
 static inline int simdFill(int x) { return x; }
 static inline int simdLoad(const int *p) { return *p; }


=====================================
src/version.hh
=====================================
@@ -1 +1 @@
-"1044"
+"1045"



View it on GitLab: https://salsa.debian.org/med-team/last-align/compare/fa87ac29dea20e703ae7a63893d29a740550fcd0...852e4d6aa834df6223ad3ff1ca2acee4fdc61b53

-- 
View it on GitLab: https://salsa.debian.org/med-team/last-align/compare/fa87ac29dea20e703ae7a63893d29a740550fcd0...852e4d6aa834df6223ad3ff1ca2acee4fdc61b53
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191230/d3916ed1/attachment-0001.html>


More information about the debian-med-commit mailing list