[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200620
Michael R. Crusoe
gitlab at salsa.debian.org
Sat Jun 20 14:55:58 BST 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / simde
Commits:
37adc71c by Michael R. Crusoe at 2020-06-20T15:35:45+02:00
New upstream version 0.0.0.git.20200620
- - - - -
3 changed files:
- simde/simde-diagnostic.h
- simde/x86/sse2.h
- test/x86/sse2.c
Changes:
=====================================
simde/simde-diagnostic.h
=====================================
@@ -207,7 +207,7 @@
/* Several compilers treat conformant array parameters as VLAs. We
* test to make sure we're in C mode (C++ doesn't support CAPs), and
- * that the version of the standard supports CAPs. We also blacklist
+ * that the version of the standard supports CAPs. We also reject
* some buggy compilers like MSVC (the logic is in Hedley if you want
* to take a look), but with certain warnings enabled some compilers
* still like to emit a diagnostic. */
=====================================
simde/x86/sse2.h
=====================================
@@ -5081,7 +5081,7 @@ simde_mm_slli_epi16 (simde__m128i a, const int imm8)
if ((imm8) <= 0) { \
ret = a; \
} else if ((imm8) > 15) { \
- ret = simde__m128i_from_neon_i32(vdupq_n_s32(0)); \
+ ret = simde_mm_setzero_si128(); \
} else { \
ret = simde__m128i_from_neon_i16( \
vshlq_n_s16(simde__m128i_to_neon_i16(a), (imm8))); \
@@ -5126,21 +5126,33 @@ simde_mm_slli_epi32 (simde__m128i a, const int imm8)
// The above is allowed by gcc/g++ 9 with -march=armv8-a, might work on A32V8 and elsewhere but needs testing
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) // clang can't handle the potential out of range use of imm8 even though that is handled
# define simde_mm_slli_epi32(a, imm8) \
- ({ \
- simde__m128i ret; \
- if ((imm8) <= 0) { \
- ret = a; \
- } else if ((imm8) > 31) { \
- ret = simde__m128i_from_neon_i32(vdupq_n_s32(0)); \
- } else { \
- ret = simde__m128i_from_neon_i32( \
- vshlq_n_s32(simde__m128i_to_neon_i32(a), (imm8))); \
- } \
- ret; \
+ ({ \
+ simde__m128i ret; \
+ if ((imm8) <= 0) { \
+ ret = a; \
+ } else if ((imm8) > 31) { \
+ ret = simde_mm_setzero_si128(); \
+ } else { \
+ ret = simde__m128i_from_neon_i32( \
+ vshlq_n_s32(simde__m128i_to_neon_i32(a), (imm8))); \
+ } \
+ ret; \
})
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
#define simde_mm_slli_epi32(a, imm8) \
- ((imm8 & ~31) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i32(vec_sl(simde__m128i_to_altivec_i32(a), vec_splat_u32(HEDLEY_STATIC_CAST(unsigned int, imm8)))))
+ ({ \
+ simde__m128i ret; \
+ if ((imm8) <= 0) { \
+ ret = a; \
+ } else if ((imm8) > 31) { \
+ ret = simde_mm_setzero_si128(); \
+ } else { \
+ ret = simde__m128i_from_altivec_i32( \
+ vec_sl(simde__m128i_to_altivec_i32(a), \
+ vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)))); \
+ } \
+ ret; \
+ })
#endif
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
# define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8)
@@ -5181,7 +5193,7 @@ simde_mm_slli_epi64 (simde__m128i a, const int imm8)
if ((imm8) <= 0) { \
ret = a; \
} else if ((imm8) > 63) { \
- ret = simde__m128i_from_neon_i32(vdupq_n_s32(0)); \
+ ret = simde_mm_setzero_si128(); \
} else { \
ret = simde__m128i_from_neon_i64( \
vshlq_n_s64(simde__m128i_to_neon_i64(a), (imm8))); \
@@ -5228,7 +5240,7 @@ simde_mm_srli_epi16 (simde__m128i a, const int imm8)
if ((imm8) <= 0) { \
ret = a; \
} else if ((imm8) > 15) { \
- ret = simde__m128i_from_neon_i32(vdupq_n_s32(0)); \
+ ret = simde_mm_setzero_si128(); \
} else { \
ret = simde__m128i_from_neon_u16( \
vshrq_n_u16(simde__m128i_to_neon_u16(a), (imm8))); \
@@ -5269,7 +5281,7 @@ simde_mm_srli_epi32 (simde__m128i a, const int imm8)
# define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8)
#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__)
# define simde_mm_srli_epi32(a, imm8) \
- simde__m128i_from_neon_u32(vshrq_n_u32(simde__m128i_to_neon_u32(a), imm8))
+ simde__m128i_from_neon_u32(vshrq_n_u32(simde__m128i_to_neon_u32(a), imm8))
// The above is allowed by gcc/g++ 9 with -march=armv8-a, might work on A32V8 and elsewhere but needs testing
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) // clang can't handle the potential out of range use of imm8 even though that is handled
# define simde_mm_srli_epi32(a, imm8) \
@@ -5278,7 +5290,7 @@ simde_mm_srli_epi32 (simde__m128i a, const int imm8)
if ((imm8) <= 0) { \
ret = a; \
} else if ((imm8) > 31) { \
- ret = simde__m128i_from_neon_i32(vdupq_n_s32(0)); \
+ ret = simde_mm_setzero_si128(); \
} else { \
ret = simde__m128i_from_neon_u32( \
vshrq_n_u32(simde__m128i_to_neon_u32(a), (imm8))); \
@@ -5286,8 +5298,20 @@ simde_mm_srli_epi32 (simde__m128i a, const int imm8)
ret; \
})
#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
- #define simde_mm_srli_epi32(a, imm8) \
- ((imm8 & ~31) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i32(vec_sr(simde__m128i_to_altivec_i32(a), vec_splat_u32(HEDLEY_STATIC_CAST(unsigned int, imm8)))))
+# define simde_mm_srli_epi32(a, imm8) \
+ ({ \
+ simde__m128i ret; \
+ if ((imm8) <= 0) { \
+ ret = a; \
+ } else if ((imm8) > 31) { \
+ ret = simde_mm_setzero_si128(); \
+ } else { \
+ ret = simde__m128i_from_altivec_i32( \
+ vec_sr(simde__m128i_to_altivec_i32(a), \
+ vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)))); \
+ } \
+ ret; \
+ })
#endif
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
# define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8)
@@ -5332,7 +5356,7 @@ simde_mm_srli_epi64 (simde__m128i a, const int imm8)
if ((imm8) <= 0) { \
ret = a; \
} else if ((imm8) > 63) { \
- ret = simde__m128i_from_neon_i32(vdupq_n_s32(0)); \
+ ret = simde_mm_setzero_si128(); \
} else { \
ret = simde__m128i_from_neon_u64( \
vshrq_n_u64(simde__m128i_to_neon_u64(a), (imm8))); \
=====================================
test/x86/sse2.c
=====================================
@@ -7280,6 +7280,28 @@ test_simde_mm_slli_epi32(SIMDE_MUNIT_TEST_ARGS) {
simde_mm_set_epi32(-1162179200, 348403872, -1645997312, -1766142624) }
};
+ static const struct {
+ const int32_t a[4];
+ const int32_t r[4];
+ } test_vec_18[] = {
+ { { INT32_C( 2018447505), INT32_C( 2072485070), -INT32_C( 1063800373), INT32_C( 1619529499) },
+ { INT32_C( 1111752704), INT32_C( 1933049856), -INT32_C( 953417728), INT32_C( 1013710848) } },
+ { { INT32_C( 1312528525), -INT32_C( 1886008265), INT32_C( 615191858), INT32_C( 1445629892) },
+ { INT32_C( 1647575040), INT32_C( 819724288), INT32_C( 1422393344), INT32_C( 1058013184) } },
+ { { INT32_C( 1842248351), -INT32_C( 504867562), INT32_C( 564232198), INT32_C( 495004047) },
+ { -INT32_C( 360972288), INT32_C( 1415053312), INT32_C( 1572864), -INT32_C( 1506017280) } },
+ { { -INT32_C( 127157055), -INT32_C( 1148780408), -INT32_C( 622906602), INT32_C( 1630538178) },
+ { -INT32_C( 217841664), INT32_C( 35651584), -INT32_C( 866648064), INT32_C( 654835712) } },
+ { { -INT32_C( 1714487421), INT32_C( 1534834260), -INT32_C( 964944842), INT32_C( 132382278) },
+ { -INT32_C( 32768000), -INT32_C( 649068544), INT32_C( 1893203968), -INT32_C( 115867648) } },
+ { { INT32_C( 1124093626), INT32_C( 1711179599), INT32_C( 2084560314), INT32_C( 1792897254) },
+ { INT32_C( 988282880), INT32_C( 490471424), INT32_C( 1994915840), -INT32_C( 1013448704) } },
+ { { -INT32_C( 1023169681), -INT32_C( 1742832030), -INT32_C( 513893477), INT32_C( 1407730073) },
+ { -INT32_C( 1380188160), -INT32_C( 108527616), INT32_C( 1852571648), INT32_C( 107216896) } },
+ { { -INT32_C( 543758192), INT32_C( 709137520), INT32_C( 1487373169), INT32_C( 1656915187) },
+ { -INT32_C( 1572864000), INT32_C( 1371537408), INT32_C( 230948864), INT32_C( 332136448) } },
+ };
+
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
@@ -7296,6 +7318,12 @@ test_simde_mm_slli_epi32(SIMDE_MUNIT_TEST_ARGS) {
simde_assert_m128i_i32(r, ==, zeros);
}
+ for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) {
+ simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a);
+ simde__m128i r = simde_mm_slli_epi32(a, 18);
+ simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r));
+ }
+
return 0;
}
@@ -7339,6 +7367,34 @@ test_simde_mm_srli_epi32(SIMDE_MUNIT_TEST_ARGS) {
simde_assert_m128i_i32(r, ==, zeros);
}
+ static const struct {
+ const int32_t a[4];
+ const int32_t r[4];
+ } test_vec_18[] = {
+ { { -INT32_C( 1359328745), -INT32_C( 408445706), INT32_C( 239121880), INT32_C( 748205077) },
+ { INT32_C( 11198), INT32_C( 14825), INT32_C( 912), INT32_C( 2854) } },
+ { { -INT32_C( 345859164), INT32_C( 1010393205), INT32_C( 1843309992), -INT32_C( 446698290) },
+ { INT32_C( 15064), INT32_C( 3854), INT32_C( 7031), INT32_C( 14679) } },
+ { { INT32_C( 764631350), -INT32_C( 837534730), INT32_C( 98325744), -INT32_C( 1405979384) },
+ { INT32_C( 2916), INT32_C( 13189), INT32_C( 375), INT32_C( 11020) } },
+ { { -INT32_C( 2053663728), -INT32_C( 1648176907), INT32_C( 1275764862), -INT32_C( 1020106099) },
+ { INT32_C( 8549), INT32_C( 10096), INT32_C( 4866), INT32_C( 12492) } },
+ { { -INT32_C( 1175403069), -INT32_C( 259586816), INT32_C( 1660314713), -INT32_C( 384948007) },
+ { INT32_C( 11900), INT32_C( 15393), INT32_C( 6333), INT32_C( 14915) } },
+ { { -INT32_C( 1318148420), -INT32_C( 196136842), INT32_C( 1581341137), -INT32_C( 2027850813) },
+ { INT32_C( 11355), INT32_C( 15635), INT32_C( 6032), INT32_C( 8648) } },
+ { { INT32_C( 960500280), INT32_C( 1881786391), INT32_C( 97656620), INT32_C( 82764103) },
+ { INT32_C( 3664), INT32_C( 7178), INT32_C( 372), INT32_C( 315) } },
+ { { -INT32_C( 38445945), INT32_C( 1592919181), INT32_C( 565982046), -INT32_C( 559358554) },
+ { INT32_C( 16237), INT32_C( 6076), INT32_C( 2159), INT32_C( 14250) } },
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) {
+ simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a);
+ simde__m128i r = simde_mm_srli_epi32(a, 18);
+ simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r));
+ }
+
return 0;
}
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/37adc71c53e912834fe9e7f3ec415783888d39eb
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/37adc71c53e912834fe9e7f3ec415783888d39eb
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200620/f9a57e9f/attachment-0001.html>
More information about the debian-med-commit
mailing list