[med-svn] [Git][med-team/simde][master] 4 commits: routine-update: New upstream version
Michael R. Crusoe
gitlab at salsa.debian.org
Sun Apr 19 22:09:14 BST 2020
Michael R. Crusoe pushed to branch master at Debian Med / simde
Commits:
1a7b0a1d by Michael R. Crusoe at 2020-04-19T21:34:33+02:00
routine-update: New upstream version
- - - - -
a86b7885 by Michael R. Crusoe at 2020-04-19T21:34:35+02:00
New upstream version 0.0.0.git.20200419
- - - - -
54809ba1 by Michael R. Crusoe at 2020-04-19T21:34:44+02:00
Update upstream source from tag 'upstream/0.0.0.git.20200419'
Update to upstream version '0.0.0.git.20200419'
with Debian dir 0ae472af5fc73a45f779270821faff8c5e4af8a0
- - - - -
4b7e5af1 by Michael R. Crusoe at 2020-04-19T21:48:43+02:00
routine-update: Ready to upload to unstable
- - - - -
15 changed files:
- .github/workflows/ci.yml
- debian/changelog
- meson.build
- simde/x86/avx2.h
- simde/x86/avx512bw.h
- simde/x86/avx512f.h
- + simde/x86/avx512vl.h
- test/CMakeLists.txt
- test/x86/avx2.c
- test/x86/avx512bw.c
- test/x86/avx512f.c
- + test/x86/avx512vl.c
- test/x86/meson.build
- test/x86/test-x86-internal.h
- test/x86/test-x86.c
Changes:
=====================================
.github/workflows/ci.yml
=====================================
@@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- isax: ["", "-msse3", "-mssse3", "-msse4.1", "-msse4.2", "-mavx", "-mfma", "-mavx2"]
+ isax: ["", "-msse3", "-mssse3", "-msse4.1", "-msse4.2", "-mavx", "-mfma", "-mavx2", "-march=native"]
steps:
- uses: actions/checkout at v2
- name: Install APT Dependencies
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+simde (0.0.0.git.20200419-1) unstable; urgency=medium
+
+ * New upstream version
+
+ -- Michael R. Crusoe <michael.crusoe at gmail.com> Sun, 19 Apr 2020 21:34:54 +0200
+
simde (0.0.0.git.20200415-1) unstable; urgency=medium
* Disable clang on ppc64
=====================================
meson.build
=====================================
@@ -18,6 +18,7 @@ install_headers(
'simde/x86/avx2.h',
'simde/x86/avx512bw.h',
+ 'simde/x86/avx512vl.h',
'simde/x86/avx512f.h',
'simde/x86/fma.h',
'simde/x86/mmx.h',
=====================================
simde/x86/avx2.h
=====================================
@@ -625,6 +625,48 @@ simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) {
# define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_broadcastb_epi8 (simde__m128i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm_broadcastb_epi8(a);
+#else
+ simde__m128i_private r_;
+ simde__m128i_private a_= simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] = a_.i8[0];
+ }
+
+ return simde__m128i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_broadcastb_epi8 (simde__m128i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_broadcastb_epi8(a);
+#else
+ simde__m256i_private r_;
+ simde__m128i_private a_= simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] = a_.i8[0];
+ }
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m256i
simde_mm256_broadcastsi128_si256 (simde__m128i a) {
@@ -1617,7 +1659,7 @@ simde_mm256_or_si256 (simde__m256i a, simde__m256i b) {
#endif
}
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-# define _mm256_or_si256(a, b) simde_mm256_or_si128(a, b)
+# define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/avx512bw.h
=====================================
@@ -59,25 +59,25 @@ SIMDE__BEGIN_DECLS
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_add_epi8(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i8 = a_.i8 + b_.i8;
-#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
- r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]);
- }
-#endif
-
- return simde__m512i_from_private(r_);
-#endif
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_add_epi8(a, b);
+ #else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
+ r_.i8 = a_.i8 + b_.i8;
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
+ r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]);
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
+ #endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b)
@@ -86,25 +86,25 @@ simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_add_epi16(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
-#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
- r_.i16 = a_.i16 + b_.i16;
-#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
- r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]);
- }
-#endif
-
- return simde__m512i_from_private(r_);
-#endif
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_add_epi16(a, b);
+ #else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
+ r_.i16 = a_.i16 + b_.i16;
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
+ r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]);
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
+ #endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b)
@@ -113,31 +113,31 @@ simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_adds_epi8 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_adds_epi8(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
- #if !defined(HEDLEY_INTEL_VERSION)
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
- r_.m256i[i] = simde_mm256_adds_epi8(a_.m256i[i], b_.m256i[i]);
- }
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_adds_epi8(a, b);
#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- const int16_t tmp =
- HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) +
- HEDLEY_STATIC_CAST(int16_t, b_.i8[i]);
- r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX));
- }
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if !defined(HEDLEY_INTEL_VERSION)
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
+ r_.m256i[i] = simde_mm256_adds_epi8(a_.m256i[i], b_.m256i[i]);
+ }
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ const int16_t tmp =
+ HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) +
+ HEDLEY_STATIC_CAST(int16_t, b_.i8[i]);
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX));
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
#endif
-
- return simde__m512i_from_private(r_);
-#endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_adds_epi8(a, b) simde_mm512_adds_epi8(a, b)
@@ -146,31 +146,31 @@ simde_mm512_adds_epi8 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_adds_epi16 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_adds_epi16(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
- #if !defined(HEDLEY_INTEL_VERSION)
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
- r_.m256i[i] = simde_mm256_adds_epi16(a_.m256i[i], b_.m256i[i]);
- }
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_adds_epi16(a, b);
#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- const int32_t tmp =
- HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) +
- HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);
- r_.i16[i] = HEDLEY_STATIC_CAST(int32_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX));
- }
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if !defined(HEDLEY_INTEL_VERSION)
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
+ r_.m256i[i] = simde_mm256_adds_epi16(a_.m256i[i], b_.m256i[i]);
+ }
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+ const int32_t tmp =
+ HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) +
+ HEDLEY_STATIC_CAST(int32_t, b_.i16[i]);
+ r_.i16[i] = HEDLEY_STATIC_CAST(int32_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX));
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
#endif
-
- return simde__m512i_from_private(r_);
-#endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_adds_epi16(a, b) simde_mm512_adds_epi16(a, b)
@@ -179,28 +179,28 @@ simde_mm512_adds_epi16 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_adds_epu8 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_adds_epu8(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
- #if !defined(HEDLEY_INTEL_VERSION)
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
- r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]);
- }
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_adds_epu8(a, b);
#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) ? (a_.u8[i] + b_.u8[i]) : UINT8_MAX;
- }
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if !defined(HEDLEY_INTEL_VERSION)
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
+ r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]);
+ }
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
+ r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) ? (a_.u8[i] + b_.u8[i]) : UINT8_MAX;
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
#endif
-
- return simde__m512i_from_private(r_);
-#endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_adds_epu8(a, b) simde_mm512_adds_epu8(a, b)
@@ -209,28 +209,28 @@ simde_mm512_adds_epu8 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_adds_epu16(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
- #if !defined(HEDLEY_INTEL_VERSION)
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
- r_.m256i[i] = simde_mm256_adds_epu16(a_.m256i[i], b_.m256i[i]);
- }
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_adds_epu16(a, b);
#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
- r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) ? (a_.u16[i] + b_.u16[i]) : UINT16_MAX;
- }
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if !defined(HEDLEY_INTEL_VERSION)
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
+ r_.m256i[i] = simde_mm256_adds_epu16(a_.m256i[i], b_.m256i[i]);
+ }
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
+ r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) ? (a_.u16[i] + b_.u16[i]) : UINT16_MAX;
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
#endif
-
- return simde__m512i_from_private(r_);
-#endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b)
@@ -239,28 +239,28 @@ simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_shuffle_epi8(a, b);
-#else
- simde__m512i_private
- r_,
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-
-#if defined(SIMDE_ARCH_X86_AVX2)
- r_.m256i[0] = simde_mm256_shuffle_epi8(a_.m256i[0], b_.m256i[0]);
- r_.m256i[1] = simde_mm256_shuffle_epi8(a_.m256i[1], b_.m256i[1]);
-#elif defined(SIMDE_ARCH_X86_SSSE3)
- r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
- r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
- r_.m128i[2] = simde_mm_shuffle_epi8(a_.m128i[2], b_.m128i[2]);
- r_.m128i[3] = simde_mm_shuffle_epi8(a_.m128i[3], b_.m128i[3]);
-#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
- r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)];
- }
-#endif
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_shuffle_epi8(a, b);
+ #else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if defined(SIMDE_ARCH_X86_AVX2)
+ r_.m256i[0] = simde_mm256_shuffle_epi8(a_.m256i[0], b_.m256i[0]);
+ r_.m256i[1] = simde_mm256_shuffle_epi8(a_.m256i[1], b_.m256i[1]);
+ #elif defined(SIMDE_ARCH_X86_SSSE3)
+ r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
+ r_.m128i[2] = simde_mm_shuffle_epi8(a_.m128i[2], b_.m128i[2]);
+ r_.m128i[3] = simde_mm_shuffle_epi8(a_.m128i[3], b_.m128i[3]);
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
+ r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)];
+ }
+ #endif
return simde__m512i_from_private(r_);
#endif
@@ -273,34 +273,36 @@ simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__mmask64
simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_cmpeq_epi8_mask(a, b);
-#else
- simde__m512i_private
- a_ = simde__m512i_to_private(a),
- b_ = simde__m512i_to_private(b);
-#if defined(SIMDE_ARCH_X86_AVX2)
- simde__mmask64 r_;
-
- // The second cast is absolutely necessary otherwise if the sign bit is set it will be sign extended to 64 bits
- r_ = (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[1], b_.m256i[1]));
- r_ = (r_ << 32) | (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[0], b_.m256i[0]));
-#elif defined(SIMDE_ARCH_X86_SSE2)
- simde__mmask64 r_;
-
- r_ = simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[3], b_.m128i[3]));
- r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[2], b_.m128i[2]));
- r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]));
- r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]));
-#else
- simde__mmask64 r_ = 0;
-
- for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
- r_ |= (a_.u8[i] == b_.u8[i]) ? (1ULL << i) : 0;
- }
-#endif
- return r_;
-#endif
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_cmpeq_epi8_mask(a, b);
+ #else
+ simde__m512i_private
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+ #if defined(SIMDE_ARCH_X86_AVX2)
+ simde__mmask64 r_;
+
+ // The second cast is absolutely necessary otherwise if the sign bit is set it will be sign extended to 64 bits
+ r_ = (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[1], b_.m256i[1]));
+ r_ = (r_ << 32) | (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[0], b_.m256i[0]));
+ #elif defined(SIMDE_ARCH_X86_SSE2)
+ simde__mmask64 r_;
+
+ r_ = simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[3], b_.m128i[3]));
+ r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[2], b_.m128i[2]));
+ r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]));
+ r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]));
+ #else
+ simde__mmask64 r_ = 0;
+
+ for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
+ r_ |= (a_.u8[i] == b_.u8[i]) ? (1ULL << i) : 0;
+ }
+ #endif
+
+ return r_;
+ #endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b)
@@ -309,23 +311,23 @@ simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m256i
simde_mm512_cvtepi16_epi8 (simde__m512i a) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_cvtepi16_epi8(a);
-#else
- simde__m256i_private r_;
- simde__m512i_private a_ = simde__m512i_to_private(a);
-
-#if defined(SIMDE__CONVERT_VECTOR)
- SIMDE__CONVERT_VECTOR(r_.i8, a_.i16);
-#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
- r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
- }
-#endif
-
- return simde__m256i_from_private(r_);
-#endif
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_cvtepi16_epi8(a);
+ #else
+ simde__m256i_private r_;
+ simde__m512i_private a_ = simde__m512i_to_private(a);
+
+ #if defined(SIMDE__CONVERT_VECTOR)
+ SIMDE__CONVERT_VECTOR(r_.i8, a_.i16);
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]);
+ }
+ #endif
+
+ return simde__m256i_from_private(r_);
+ #endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_cvtepi16_epi8(a) simde_mm512_cvtepi16_epi8(a)
@@ -334,28 +336,54 @@ simde_mm512_cvtepi16_epi8 (simde__m512i a) {
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_cvtepi8_epi16 (simde__m256i a) {
-#if defined(SIMDE_AVX512BW_NATIVE)
- return _mm512_cvtepi8_epi16(a);
-#else
- simde__m512i_private r_;
- simde__m256i_private a_ = simde__m256i_to_private(a);
-
-#if defined(SIMDE__CONVERT_VECTOR)
- SIMDE__CONVERT_VECTOR(r_.i16, a_.i8);
-#else
- SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
- r_.i16[i] = a_.i8[i];
- }
-#endif
-
- return simde__m512i_from_private(r_);
-#endif
+ #if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_cvtepi8_epi16(a);
+ #else
+ simde__m512i_private r_;
+ simde__m256i_private a_ = simde__m256i_to_private(a);
+
+ #if defined(SIMDE__CONVERT_VECTOR)
+ SIMDE__CONVERT_VECTOR(r_.i16, a_.i8);
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+ r_.i16[i] = a_.i8[i];
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
+ #endif
}
#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
# define _mm512_cvtepi8_epi16(a) simde_mm512_cvtepi8_epi16(a)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm512_cvtsepi16_epi8 (simde__m512i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_cvtsepi16_epi8(a);
+ #else
+ simde__m256i_private r_;
+ simde__m512i_private a_ = simde__m512i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] =
+ (a_.i16[i] < INT8_MIN)
+ ? (INT8_MIN)
+ : ((a_.i16[i] > INT8_MAX)
+ ? (INT8_MAX)
+ : HEDLEY_STATIC_CAST(int8_t, a_.i16[i]));
+ }
+
+ return simde__m256i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a)
+#endif
+
SIMDE__END_DECLS
HEDLEY_DIAGNOSTIC_POP
=====================================
simde/x86/avx512f.h
=====================================
@@ -1772,6 +1772,102 @@ simde_mm512_maskz_mov_pd(simde__mmask8 k, simde__m512d a) {
#define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_abs_epi32(simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_abs_epi32(a);
+#else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
+ r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i];
+ }
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_abs_epi32(src, k, a);
+#else
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_abs_epi32(k, a);
+#else
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_abs_epi64(simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_abs_epi64(a);
+#else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) {
+ r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i];
+ }
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_abs_epi64(src, k, a);
+#else
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_abs_epi64(k, a);
+#else
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
=====================================
simde/x86/avx512vl.h
=====================================
@@ -0,0 +1,112 @@
+/* Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Copyright:
+ * 2020 Evan Nemerson <evan at nemerson.com>
+ */
+
+#if !defined(SIMDE__AVX512VL_H)
+#define SIMDE__AVX512VL_H
+#include "avx512bw.h"
+
+HEDLEY_DIAGNOSTIC_PUSH
+SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
+
+#if defined(SIMDE_AVX512VL_NATIVE)
+ #undef SIMDE_AVX512VL_NATIVE
+#endif
+#if defined(SIMDE_ARCH_X86_AVX512VL) && !defined(SIMDE_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
+ #define SIMDE_AVX512VL_NATIVE
+#elif defined(SIMDE_ARCH_ARM_NEON) && !defined(SIMDE_AVX512VL_NO_NEON) && !defined(SIMDE_NO_NEON)
+ #define SIMDE_AVX512VL_NEON
+#elif defined(SIMDE_ARCH_POWER_ALTIVEC)
+ #define SIMDE_AVX512VL_POWER_ALTIVEC
+#endif
+
+#if defined(SIMDE_AVX512VL_NATIVE)
+ #include <immintrin.h>
+#endif
+
+#if defined(SIMDE_AVX512VL_POWER_ALTIVEC)
+ #include <altivec.h>
+#endif
+
+#if !defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
+ #define SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES
+#endif
+SIMDE__BEGIN_DECLS
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_cvtsepi16_epi8 (simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+ return _mm_cvtsepi16_epi8(a);
+ #else
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());;
+ simde__m128i_private a_ = simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) {
+ r_.i8[i] =
+ (a_.i16[i] < INT8_MIN)
+ ? (INT8_MIN)
+ : ((a_.i16[i] > INT8_MAX)
+ ? (INT8_MAX)
+ : HEDLEY_STATIC_CAST(int8_t, a_.i16[i]));
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_cvtsepi16_epi8(a) simde_mm_cvtsepi16_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm256_cvtsepi16_epi8 (simde__m256i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+ return _mm256_cvtsepi16_epi8(a);
+ #else
+ simde__m128i_private r_;
+ simde__m256i_private a_ = simde__m256i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] =
+ (a_.i16[i] < INT8_MIN)
+ ? (INT8_MIN)
+ : ((a_.i16[i] > INT8_MAX)
+ ? (INT8_MAX)
+ : HEDLEY_STATIC_CAST(int8_t, a_.i16[i]));
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a)
+#endif
+
+SIMDE__END_DECLS
+
+HEDLEY_DIAGNOSTIC_POP
+
+#endif /* !defined(SIMDE__AVX512VL_H) */
=====================================
test/CMakeLists.txt
=====================================
@@ -100,6 +100,7 @@ set(TEST_SOURCES_C
x86/avx2.c
x86/avx512f.c
x86/avx512bw.c
+ x86/avx512vl.c
x86/fma.c
x86/svml.c
@@ -158,6 +159,7 @@ foreach(tst
"/x86/avx2"
"/x86/avx512f"
"/x86/avx512bw"
+ "/x86/avx512vl"
"/x86/svml"
)
add_test(NAME "${tst}/${variant}" COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $<TARGET_FILE:run-tests> "${tst}")
=====================================
test/x86/avx2.c
=====================================
@@ -4129,6 +4129,204 @@ test_simde_mm256_cmpgt_epi64(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm_broadcastb_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi8(INT8_C( -20), INT8_C( 103), INT8_C( -20), INT8_C( 116),
+ INT8_C( -9), INT8_C( 73), INT8_C( 44), INT8_C( 79),
+ INT8_C( -20), INT8_C( -81), INT8_C(-114), INT8_C( -81),
+ INT8_C( 10), INT8_C( 63), INT8_C( -41), INT8_C(-117)),
+ simde_mm_set_epi8(INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117)) },
+ { simde_mm_set_epi8(INT8_C( -22), INT8_C(-124), INT8_C( -97), INT8_C( -1),
+ INT8_C( -4), INT8_C( -87), INT8_C( -49), INT8_C(-124),
+ INT8_C( -37), INT8_C( -17), INT8_C( -57), INT8_C( 0),
+ INT8_C(-121), INT8_C( 57), INT8_C( 49), INT8_C(-112)),
+ simde_mm_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) },
+ { simde_mm_set_epi8(INT8_C( -37), INT8_C( -43), INT8_C( -30), INT8_C( 45),
+ INT8_C( 86), INT8_C(-100), INT8_C( -53), INT8_C( -4),
+ INT8_C( 54), INT8_C(-107), INT8_C( 40), INT8_C( -68),
+ INT8_C( 49), INT8_C( 59), INT8_C( -37), INT8_C( 60)),
+ simde_mm_set_epi8(INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60)) },
+ { simde_mm_set_epi8(INT8_C( 35), INT8_C( -98), INT8_C( -73), INT8_C( 22),
+ INT8_C(-127), INT8_C( 78), INT8_C( 125), INT8_C( 84),
+ INT8_C(-115), INT8_C(-124), INT8_C(-103), INT8_C( 59),
+ INT8_C( 15), INT8_C( -58), INT8_C( 81), INT8_C( 4)),
+ simde_mm_set_epi8(INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4)) },
+ { simde_mm_set_epi8(INT8_C( -56), INT8_C( 88), INT8_C( 1), INT8_C( -59),
+ INT8_C( -14), INT8_C( -27), INT8_C( -63), INT8_C( -89),
+ INT8_C( -31), INT8_C( -81), INT8_C( -92), INT8_C( 115),
+ INT8_C( 106), INT8_C( -49), INT8_C( -19), INT8_C( 92)),
+ simde_mm_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) },
+ { simde_mm_set_epi8(INT8_C( 7), INT8_C( 47), INT8_C( 55), INT8_C( 77),
+ INT8_C( 16), INT8_C( -71), INT8_C(-122), INT8_C( -81),
+ INT8_C(-120), INT8_C( -64), INT8_C( -77), INT8_C( 13),
+ INT8_C( 80), INT8_C(-114), INT8_C(-121), INT8_C( 92)),
+ simde_mm_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) },
+ { simde_mm_set_epi8(INT8_C( 14), INT8_C( -85), INT8_C( -8), INT8_C( -80),
+ INT8_C( -58), INT8_C( 84), INT8_C( 52), INT8_C(-106),
+ INT8_C( -46), INT8_C( 118), INT8_C( -96), INT8_C( 88),
+ INT8_C( 35), INT8_C( 50), INT8_C(-112), INT8_C(-112)),
+ simde_mm_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) },
+ { simde_mm_set_epi8(INT8_C( 17), INT8_C( 88), INT8_C( -82), INT8_C(-109),
+ INT8_C( 56), INT8_C( 18), INT8_C( 73), INT8_C( 115),
+ INT8_C( -99), INT8_C( -71), INT8_C( -92), INT8_C( 121),
+ INT8_C( 93), INT8_C( 51), INT8_C( -65), INT8_C( 55)),
+ simde_mm_set_epi8(INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_broadcastb_epi8(test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_broadcastb_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi8(INT8_C( -20), INT8_C( 103), INT8_C( -20), INT8_C( 116),
+ INT8_C( -9), INT8_C( 73), INT8_C( 44), INT8_C( 79),
+ INT8_C( -20), INT8_C( -81), INT8_C(-114), INT8_C( -81),
+ INT8_C( 10), INT8_C( 63), INT8_C( -41), INT8_C(-117)),
+ simde_mm256_set_epi8(INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117),
+ INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117)) },
+ { simde_mm_set_epi8(INT8_C( -22), INT8_C(-124), INT8_C( -97), INT8_C( -1),
+ INT8_C( -4), INT8_C( -87), INT8_C( -49), INT8_C(-124),
+ INT8_C( -37), INT8_C( -17), INT8_C( -57), INT8_C( 0),
+ INT8_C(-121), INT8_C( 57), INT8_C( 49), INT8_C(-112)),
+ simde_mm256_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) },
+ { simde_mm_set_epi8(INT8_C( -37), INT8_C( -43), INT8_C( -30), INT8_C( 45),
+ INT8_C( 86), INT8_C(-100), INT8_C( -53), INT8_C( -4),
+ INT8_C( 54), INT8_C(-107), INT8_C( 40), INT8_C( -68),
+ INT8_C( 49), INT8_C( 59), INT8_C( -37), INT8_C( 60)),
+ simde_mm256_set_epi8(INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
+ INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60)) },
+ { simde_mm_set_epi8(INT8_C( 35), INT8_C( -98), INT8_C( -73), INT8_C( 22),
+ INT8_C(-127), INT8_C( 78), INT8_C( 125), INT8_C( 84),
+ INT8_C(-115), INT8_C(-124), INT8_C(-103), INT8_C( 59),
+ INT8_C( 15), INT8_C( -58), INT8_C( 81), INT8_C( 4)),
+ simde_mm256_set_epi8(INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4),
+ INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4)) },
+ { simde_mm_set_epi8(INT8_C( -56), INT8_C( 88), INT8_C( 1), INT8_C( -59),
+ INT8_C( -14), INT8_C( -27), INT8_C( -63), INT8_C( -89),
+ INT8_C( -31), INT8_C( -81), INT8_C( -92), INT8_C( 115),
+ INT8_C( 106), INT8_C( -49), INT8_C( -19), INT8_C( 92)),
+ simde_mm256_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) },
+ { simde_mm_set_epi8(INT8_C( 7), INT8_C( 47), INT8_C( 55), INT8_C( 77),
+ INT8_C( 16), INT8_C( -71), INT8_C(-122), INT8_C( -81),
+ INT8_C(-120), INT8_C( -64), INT8_C( -77), INT8_C( 13),
+ INT8_C( 80), INT8_C(-114), INT8_C(-121), INT8_C( 92)),
+ simde_mm256_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92),
+ INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) },
+ { simde_mm_set_epi8(INT8_C( 14), INT8_C( -85), INT8_C( -8), INT8_C( -80),
+ INT8_C( -58), INT8_C( 84), INT8_C( 52), INT8_C(-106),
+ INT8_C( -46), INT8_C( 118), INT8_C( -96), INT8_C( 88),
+ INT8_C( 35), INT8_C( 50), INT8_C(-112), INT8_C(-112)),
+ simde_mm256_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112),
+ INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) },
+ { simde_mm_set_epi8(INT8_C( 17), INT8_C( 88), INT8_C( -82), INT8_C(-109),
+ INT8_C( 56), INT8_C( 18), INT8_C( 73), INT8_C( 115),
+ INT8_C( -99), INT8_C( -71), INT8_C( -92), INT8_C( 121),
+ INT8_C( 93), INT8_C( 51), INT8_C( -65), INT8_C( 55)),
+ simde_mm256_set_epi8(INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55),
+ INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_broadcastb_epi8(test_vec[i].a);
+ simde_assert_m256i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm256_broadcastsi128_si256(const MunitParameter params[], void* data) {
(void) params;
@@ -9671,6 +9869,8 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm256_blend_epi32),
SIMDE_TESTS_DEFINE_TEST(mm256_blendv_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm_broadcastb_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm256_broadcastb_epi8),
SIMDE_TESTS_DEFINE_TEST(mm256_broadcastsi128_si256),
SIMDE_TESTS_DEFINE_TEST(mm256_cmpeq_epi8),
=====================================
test/x86/avx512bw.c
=====================================
@@ -2473,6 +2473,153 @@ test_simde_mm512_cvtepi8_epi16(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_cvtsepi16_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi16(INT16_C( 8002), INT16_C( -42), INT16_C( 317), INT16_C( 3),
+ INT16_C( 2), INT16_C( 22), INT16_C( 2), INT16_C( 102),
+ INT16_C( 0), INT16_C( 130), INT16_C( 297), INT16_C( 4068),
+ INT16_C( -202), INT16_C( 4593), INT16_C( 1545), INT16_C( -2),
+ INT16_C( -4392), INT16_C( -3947), INT16_C( 301), INT16_C(-11526),
+ INT16_C( -1), INT16_C( 1), INT16_C( -437), INT16_C( 4),
+ INT16_C( -139), INT16_C( -4859), INT16_C( -1), INT16_C( 12),
+ INT16_C( -591), INT16_C( -40), INT16_C( -269), INT16_C( -24)),
+ simde_mm256_set_epi8(INT8_C( 127), INT8_C( -42), INT8_C( 127), INT8_C( 3),
+ INT8_C( 2), INT8_C( 22), INT8_C( 2), INT8_C( 102),
+ INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127),
+ INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -2),
+ INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128),
+ INT8_C( -1), INT8_C( 1), INT8_C(-128), INT8_C( 4),
+ INT8_C(-128), INT8_C(-128), INT8_C( -1), INT8_C( 12),
+ INT8_C(-128), INT8_C( -40), INT8_C(-128), INT8_C( -24)) },
+ { simde_mm512_set_epi16(INT16_C( 2), INT16_C( 49), INT16_C( 2), INT16_C( 131),
+ INT16_C( 1), INT16_C(-15743), INT16_C( 1984), INT16_C( 9),
+ INT16_C(-20648), INT16_C( -5458), INT16_C( -1), INT16_C( -1),
+ INT16_C( -3713), INT16_C( -60), INT16_C( -1328), INT16_C( 4),
+ INT16_C( 150), INT16_C( -8), INT16_C( 1864), INT16_C( -14),
+ INT16_C( 20), INT16_C( 194), INT16_C( 0), INT16_C( 5954),
+ INT16_C( -13), INT16_C( -288), INT16_C( 3162), INT16_C( 1),
+ INT16_C( 14), INT16_C( 2628), INT16_C( -1), INT16_C( -8)),
+ simde_mm256_set_epi8(INT8_C( 2), INT8_C( 49), INT8_C( 2), INT8_C( 127),
+ INT8_C( 1), INT8_C(-128), INT8_C( 127), INT8_C( 9),
+ INT8_C(-128), INT8_C(-128), INT8_C( -1), INT8_C( -1),
+ INT8_C(-128), INT8_C( -60), INT8_C(-128), INT8_C( 4),
+ INT8_C( 127), INT8_C( -8), INT8_C( 127), INT8_C( -14),
+ INT8_C( 20), INT8_C( 127), INT8_C( 0), INT8_C( 127),
+ INT8_C( -13), INT8_C(-128), INT8_C( 127), INT8_C( 1),
+ INT8_C( 14), INT8_C( 127), INT8_C( -1), INT8_C( -8)) },
+ { simde_mm512_set_epi16(INT16_C( -562), INT16_C( -4), INT16_C( 14830), INT16_C( 23439),
+ INT16_C( -872), INT16_C( 2), INT16_C( -152), INT16_C( 42),
+ INT16_C( 23882), INT16_C( -7), INT16_C(-27326), INT16_C( 5025),
+ INT16_C( 7663), INT16_C(-14858), INT16_C( 0), INT16_C( -1),
+ INT16_C( 27), INT16_C( 3), INT16_C( -6), INT16_C( 54),
+ INT16_C( -16), INT16_C( -120), INT16_C( -1), INT16_C( 0),
+ INT16_C( 50), INT16_C( -3), INT16_C( 1), INT16_C( -128),
+ INT16_C( -6425), INT16_C( 1), INT16_C( -1), INT16_C( -167)),
+ simde_mm256_set_epi8(INT8_C(-128), INT8_C( -4), INT8_C( 127), INT8_C( 127),
+ INT8_C(-128), INT8_C( 2), INT8_C(-128), INT8_C( 42),
+ INT8_C( 127), INT8_C( -7), INT8_C(-128), INT8_C( 127),
+ INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( -1),
+ INT8_C( 27), INT8_C( 3), INT8_C( -6), INT8_C( 54),
+ INT8_C( -16), INT8_C(-120), INT8_C( -1), INT8_C( 0),
+ INT8_C( 50), INT8_C( -3), INT8_C( 1), INT8_C(-128),
+ INT8_C(-128), INT8_C( 1), INT8_C( -1), INT8_C(-128)) },
+ { simde_mm512_set_epi16(INT16_C( 117), INT16_C( 75), INT16_C( -56), INT16_C( -1),
+ INT16_C( 53), INT16_C( -283), INT16_C( 138), INT16_C( 3),
+ INT16_C( -1), INT16_C( 6705), INT16_C( 138), INT16_C( -3521),
+ INT16_C( 22), INT16_C( 1779), INT16_C( 2888), INT16_C( -1),
+ INT16_C( -189), INT16_C( 17712), INT16_C( -18), INT16_C( 923),
+ INT16_C( 1), INT16_C( 1), INT16_C( 2605), INT16_C( 353),
+ INT16_C( 201), INT16_C( -120), INT16_C(-28865), INT16_C( -13),
+ INT16_C( -701), INT16_C( 945), INT16_C( -2270), INT16_C( 0)),
+ simde_mm256_set_epi8(INT8_C( 117), INT8_C( 75), INT8_C( -56), INT8_C( -1),
+ INT8_C( 53), INT8_C(-128), INT8_C( 127), INT8_C( 3),
+ INT8_C( -1), INT8_C( 127), INT8_C( 127), INT8_C(-128),
+ INT8_C( 22), INT8_C( 127), INT8_C( 127), INT8_C( -1),
+ INT8_C(-128), INT8_C( 127), INT8_C( -18), INT8_C( 127),
+ INT8_C( 1), INT8_C( 1), INT8_C( 127), INT8_C( 127),
+ INT8_C( 127), INT8_C(-120), INT8_C(-128), INT8_C( -13),
+ INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 0)) },
+ { simde_mm512_set_epi16(INT16_C( 127), INT16_C( 406), INT16_C( 74), INT16_C( -2),
+ INT16_C( -12), INT16_C( 196), INT16_C( -4), INT16_C( -4852),
+ INT16_C( 38), INT16_C( -10), INT16_C( -408), INT16_C( 453),
+ INT16_C( -6833), INT16_C( 37), INT16_C( -3464), INT16_C( -2),
+ INT16_C( -488), INT16_C( 0), INT16_C( 643), INT16_C( 6),
+ INT16_C( 51), INT16_C( 0), INT16_C( -4), INT16_C( -1),
+ INT16_C(-14928), INT16_C( -46), INT16_C( 58), INT16_C( -3),
+ INT16_C( 0), INT16_C( -343), INT16_C(-10019), INT16_C( 0)),
+ simde_mm256_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C( 74), INT8_C( -2),
+ INT8_C( -12), INT8_C( 127), INT8_C( -4), INT8_C(-128),
+ INT8_C( 38), INT8_C( -10), INT8_C(-128), INT8_C( 127),
+ INT8_C(-128), INT8_C( 37), INT8_C(-128), INT8_C( -2),
+ INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 6),
+ INT8_C( 51), INT8_C( 0), INT8_C( -4), INT8_C( -1),
+ INT8_C(-128), INT8_C( -46), INT8_C( 58), INT8_C( -3),
+ INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0)) },
+ { simde_mm512_set_epi16(INT16_C( -1), INT16_C( -378), INT16_C( 3797), INT16_C( 1),
+ INT16_C( -61), INT16_C( -6468), INT16_C( 0), INT16_C(-18745),
+ INT16_C( 3), INT16_C( -5), INT16_C( -1), INT16_C( 4066),
+ INT16_C( -1), INT16_C( -11), INT16_C( -2), INT16_C( 881),
+ INT16_C( -116), INT16_C( 22), INT16_C( 1), INT16_C( -1127),
+ INT16_C( 15), INT16_C( 2276), INT16_C( -149), INT16_C( 7345),
+ INT16_C( 850), INT16_C( -84), INT16_C( -2), INT16_C( 0),
+ INT16_C( -6), INT16_C( -3696), INT16_C( -2), INT16_C( 2136)),
+ simde_mm256_set_epi8(INT8_C( -1), INT8_C(-128), INT8_C( 127), INT8_C( 1),
+ INT8_C( -61), INT8_C(-128), INT8_C( 0), INT8_C(-128),
+ INT8_C( 3), INT8_C( -5), INT8_C( -1), INT8_C( 127),
+ INT8_C( -1), INT8_C( -11), INT8_C( -2), INT8_C( 127),
+ INT8_C(-116), INT8_C( 22), INT8_C( 1), INT8_C(-128),
+ INT8_C( 15), INT8_C( 127), INT8_C(-128), INT8_C( 127),
+ INT8_C( 127), INT8_C( -84), INT8_C( -2), INT8_C( 0),
+ INT8_C( -6), INT8_C(-128), INT8_C( -2), INT8_C( 127)) },
+ { simde_mm512_set_epi16(INT16_C( -368), INT16_C( -1403), INT16_C( -10), INT16_C( 2787),
+ INT16_C( 6145), INT16_C( -228), INT16_C( 2519), INT16_C( -1),
+ INT16_C( -244), INT16_C( 10), INT16_C( 14), INT16_C( 113),
+ INT16_C( 23), INT16_C( 1), INT16_C( 21613), INT16_C( -2),
+ INT16_C( -2), INT16_C( -29), INT16_C( 9866), INT16_C( 356),
+ INT16_C( -254), INT16_C( -21), INT16_C( 9), INT16_C( 9),
+ INT16_C( 249), INT16_C( 3052), INT16_C( -8880), INT16_C( 7),
+ INT16_C( 0), INT16_C( -4602), INT16_C( -8839), INT16_C( 18288)),
+ simde_mm256_set_epi8(INT8_C(-128), INT8_C(-128), INT8_C( -10), INT8_C( 127),
+ INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -1),
+ INT8_C(-128), INT8_C( 10), INT8_C( 14), INT8_C( 113),
+ INT8_C( 23), INT8_C( 1), INT8_C( 127), INT8_C( -2),
+ INT8_C( -2), INT8_C( -29), INT8_C( 127), INT8_C( 127),
+ INT8_C(-128), INT8_C( -21), INT8_C( 9), INT8_C( 9),
+ INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 7),
+ INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127)) },
+ { simde_mm512_set_epi16(INT16_C( 502), INT16_C( 22), INT16_C( -4), INT16_C( 1),
+ INT16_C( 2), INT16_C( -1148), INT16_C( 1), INT16_C( 8),
+ INT16_C( 25), INT16_C( 8616), INT16_C( -1), INT16_C( 0),
+ INT16_C( 1091), INT16_C( -370), INT16_C( 0), INT16_C( -1),
+ INT16_C( -1549), INT16_C( 12), INT16_C( -2), INT16_C( 4),
+ INT16_C( -3952), INT16_C( 1681), INT16_C( 3), INT16_C( 35),
+ INT16_C( -6), INT16_C( 0), INT16_C( -2), INT16_C( 51),
+ INT16_C( 0), INT16_C( 3760), INT16_C( -1), INT16_C( -1)),
+ simde_mm256_set_epi8(INT8_C( 127), INT8_C( 22), INT8_C( -4), INT8_C( 1),
+ INT8_C( 2), INT8_C(-128), INT8_C( 1), INT8_C( 8),
+ INT8_C( 25), INT8_C( 127), INT8_C( -1), INT8_C( 0),
+ INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( -1),
+ INT8_C(-128), INT8_C( 12), INT8_C( -2), INT8_C( 4),
+ INT8_C(-128), INT8_C( 127), INT8_C( 3), INT8_C( 35),
+ INT8_C( -6), INT8_C( 0), INT8_C( -2), INT8_C( 51),
+ INT8_C( 0), INT8_C( 127), INT8_C( -1), INT8_C( -1)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm512_cvtsepi16_epi8(test_vec[i].a);
+ simde_assert_m256i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
#endif /* defined(SIMDE_avx512bw_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
HEDLEY_DIAGNOSTIC_PUSH
@@ -2491,9 +2638,10 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm512_shuffle_epi8),
SIMDE_TESTS_DEFINE_TEST(mm512_cmpeq_epi8_mask),
- SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi16_epi8),
-
SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi8_epi16),
+ SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi16_epi8),
+
+ SIMDE_TESTS_DEFINE_TEST(mm512_cvtsepi16_epi8),
#endif /* defined(SIMDE_AVX512bw_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
=====================================
test/x86/avx512f.c
=====================================
@@ -27,6 +27,605 @@
#if defined(SIMDE_AVX512F_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+static MunitResult
+test_simde_mm512_abs_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi32(INT32_C(-1095158286), INT32_C( -133595553), INT32_C( -941949577), INT32_C(-1117722052),
+ INT32_C(-1053667317), INT32_C( -662420643), INT32_C( 2095193825), INT32_C( -799061081),
+ INT32_C( 347912513), INT32_C( -439299809), INT32_C( 2053030698), INT32_C( -277514113),
+ INT32_C( 1476262970), INT32_C( 1955038119), INT32_C( -77085072), INT32_C( 1014069144)),
+ simde_mm512_set_epi32(INT32_C( 1095158286), INT32_C( 133595553), INT32_C( 941949577), INT32_C( 1117722052),
+ INT32_C( 1053667317), INT32_C( 662420643), INT32_C( 2095193825), INT32_C( 799061081),
+ INT32_C( 347912513), INT32_C( 439299809), INT32_C( 2053030698), INT32_C( 277514113),
+ INT32_C( 1476262970), INT32_C( 1955038119), INT32_C( 77085072), INT32_C( 1014069144)) },
+ { simde_mm512_set_epi32(INT32_C( 1865049709), INT32_C( -408997463), INT32_C( 1771073477), INT32_C( 1463780468),
+ INT32_C( 268019741), INT32_C(-1725054429), INT32_C( 1408597864), INT32_C( 1576985133),
+ INT32_C( 170783936), INT32_C( 836522882), INT32_C( 1364040350), INT32_C( 563663058),
+ INT32_C(-1491438903), INT32_C( -873504608), INT32_C( 1431273511), INT32_C( -164765086)),
+ simde_mm512_set_epi32(INT32_C( 1865049709), INT32_C( 408997463), INT32_C( 1771073477), INT32_C( 1463780468),
+ INT32_C( 268019741), INT32_C( 1725054429), INT32_C( 1408597864), INT32_C( 1576985133),
+ INT32_C( 170783936), INT32_C( 836522882), INT32_C( 1364040350), INT32_C( 563663058),
+ INT32_C( 1491438903), INT32_C( 873504608), INT32_C( 1431273511), INT32_C( 164765086)) },
+ { simde_mm512_set_epi32(INT32_C( 1505063340), INT32_C( -79208486), INT32_C( -115790145), INT32_C( 1137793635),
+ INT32_C( -719063760), INT32_C( -465633360), INT32_C( 1417132608), INT32_C( 1715322300),
+ INT32_C( 1194443989), INT32_C( 1598244723), INT32_C( -360509626), INT32_C( -844528776),
+ INT32_C( -291907566), INT32_C( -980752736), INT32_C( 701363552), INT32_C( 1148036152)),
+ simde_mm512_set_epi32(INT32_C( 1505063340), INT32_C( 79208486), INT32_C( 115790145), INT32_C( 1137793635),
+ INT32_C( 719063760), INT32_C( 465633360), INT32_C( 1417132608), INT32_C( 1715322300),
+ INT32_C( 1194443989), INT32_C( 1598244723), INT32_C( 360509626), INT32_C( 844528776),
+ INT32_C( 291907566), INT32_C( 980752736), INT32_C( 701363552), INT32_C( 1148036152)) },
+ { simde_mm512_set_epi32(INT32_C(-1538804784), INT32_C( -43683957), INT32_C( -70380459), INT32_C( 259050545),
+ INT32_C(-1140217223), INT32_C( -24242506), INT32_C(-1281378925), INT32_C( -426768587),
+ INT32_C(-1825251144), INT32_C( -975195895), INT32_C( 758020113), INT32_C( -3401471),
+ INT32_C( 154668063), INT32_C( -827616009), INT32_C( 793625070), INT32_C( -735990247)),
+ simde_mm512_set_epi32(INT32_C( 1538804784), INT32_C( 43683957), INT32_C( 70380459), INT32_C( 259050545),
+ INT32_C( 1140217223), INT32_C( 24242506), INT32_C( 1281378925), INT32_C( 426768587),
+ INT32_C( 1825251144), INT32_C( 975195895), INT32_C( 758020113), INT32_C( 3401471),
+ INT32_C( 154668063), INT32_C( 827616009), INT32_C( 793625070), INT32_C( 735990247)) },
+ { simde_mm512_set_epi32(INT32_C( -919197120), INT32_C( 1902742720), INT32_C( 576001152), INT32_C( 772608991),
+ INT32_C( 1373611304), INT32_C( 156079462), INT32_C( 392030686), INT32_C( 1159450969),
+ INT32_C( 1376625025), INT32_C( -701917672), INT32_C( 1911493359), INT32_C( -115817480),
+ INT32_C( -875216623), INT32_C( 1333681477), INT32_C(-1067533891), INT32_C( 1671330781)),
+ simde_mm512_set_epi32(INT32_C( 919197120), INT32_C( 1902742720), INT32_C( 576001152), INT32_C( 772608991),
+ INT32_C( 1373611304), INT32_C( 156079462), INT32_C( 392030686), INT32_C( 1159450969),
+ INT32_C( 1376625025), INT32_C( 701917672), INT32_C( 1911493359), INT32_C( 115817480),
+ INT32_C( 875216623), INT32_C( 1333681477), INT32_C( 1067533891), INT32_C( 1671330781)) },
+ { simde_mm512_set_epi32(INT32_C(-1168385947), INT32_C(-1671882855), INT32_C(-1182456995), INT32_C(-1803534861),
+ INT32_C( 443878759), INT32_C( 702169153), INT32_C(-1879742181), INT32_C( 1627978919),
+ INT32_C( 583873330), INT32_C( -857098109), INT32_C( 710347808), INT32_C( 1707849385),
+ INT32_C( 1863512780), INT32_C( -371421167), INT32_C( 1902179408), INT32_C(-1189025654)),
+ simde_mm512_set_epi32(INT32_C( 1168385947), INT32_C( 1671882855), INT32_C( 1182456995), INT32_C( 1803534861),
+ INT32_C( 443878759), INT32_C( 702169153), INT32_C( 1879742181), INT32_C( 1627978919),
+ INT32_C( 583873330), INT32_C( 857098109), INT32_C( 710347808), INT32_C( 1707849385),
+ INT32_C( 1863512780), INT32_C( 371421167), INT32_C( 1902179408), INT32_C( 1189025654)) },
+ { simde_mm512_set_epi32(INT32_C( 7990856), INT32_C(-1991291137), INT32_C( 1404443548), INT32_C(-1023849862),
+ INT32_C( 2054941409), INT32_C(-1604088325), INT32_C( 721271909), INT32_C(-1622295089),
+ INT32_C( 1869222605), INT32_C(-1583998423), INT32_C( -801626928), INT32_C( -940395766),
+ INT32_C( 1108931720), INT32_C( -471669445), INT32_C( 1204289475), INT32_C( -752679106)),
+ simde_mm512_set_epi32(INT32_C( 7990856), INT32_C( 1991291137), INT32_C( 1404443548), INT32_C( 1023849862),
+ INT32_C( 2054941409), INT32_C( 1604088325), INT32_C( 721271909), INT32_C( 1622295089),
+ INT32_C( 1869222605), INT32_C( 1583998423), INT32_C( 801626928), INT32_C( 940395766),
+ INT32_C( 1108931720), INT32_C( 471669445), INT32_C( 1204289475), INT32_C( 752679106)) },
+ { simde_mm512_set_epi32(INT32_C( 1399806844), INT32_C( 1131841699), INT32_C( -346937782), INT32_C( 567816154),
+ INT32_C(-1589012616), INT32_C(-2005496894), INT32_C( 1401681986), INT32_C( 423760716),
+ INT32_C( 431684101), INT32_C( 852583616), INT32_C(-1369299290), INT32_C( -663899319),
+ INT32_C( 1580470265), INT32_C( 298083241), INT32_C( -630373638), INT32_C(-1937828661)),
+ simde_mm512_set_epi32(INT32_C( 1399806844), INT32_C( 1131841699), INT32_C( 346937782), INT32_C( 567816154),
+ INT32_C( 1589012616), INT32_C( 2005496894), INT32_C( 1401681986), INT32_C( 423760716),
+ INT32_C( 431684101), INT32_C( 852583616), INT32_C( 1369299290), INT32_C( 663899319),
+ INT32_C( 1580470265), INT32_C( 298083241), INT32_C( 630373638), INT32_C( 1937828661)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_abs_epi32(test_vec[i].a);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_abs_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i src;
+ simde__mmask16 k;
+ simde__m512i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi32(INT32_C( 114710097), INT32_C( 1837246098), INT32_C(-1399577225), INT32_C(-1388127606),
+ INT32_C( 1116027725), INT32_C( -871797325), INT32_C(-1979326643), INT32_C( 1477004857),
+ INT32_C( 1670723749), INT32_C(-1006052339), INT32_C( 1863789116), INT32_C( -690396684),
+ INT32_C( -629773535), INT32_C( 667046523), INT32_C( 1317445565), INT32_C( -732937024)),
+ UINT16_C(28888),
+ simde_mm512_set_epi32(INT32_C(-1877548571), INT32_C( -399920351), INT32_C( 15525797), INT32_C( 18165921),
+ INT32_C( 2085930596), INT32_C( 1662282658), INT32_C(-1842752263), INT32_C( 499820912),
+ INT32_C( 1419797765), INT32_C( -44818966), INT32_C( 1761152620), INT32_C(-1288657930),
+ INT32_C( 894078020), INT32_C(-1369431563), INT32_C( -103362440), INT32_C(-1559726025)),
+ simde_mm512_set_epi32(INT32_C( 114710097), INT32_C( 399920351), INT32_C( 15525797), INT32_C( 18165921),
+ INT32_C( 1116027725), INT32_C( -871797325), INT32_C(-1979326643), INT32_C( 1477004857),
+ INT32_C( 1419797765), INT32_C( 44818966), INT32_C( 1863789116), INT32_C( 1288657930),
+ INT32_C( 894078020), INT32_C( 667046523), INT32_C( 1317445565), INT32_C( -732937024)) },
+ { simde_mm512_set_epi32(INT32_C( 1505436737), INT32_C( 342253548), INT32_C( 1435179252), INT32_C( 1326766533),
+ INT32_C( 156769011), INT32_C( 343760696), INT32_C( 611303965), INT32_C( 1457341409),
+ INT32_C( 165452421), INT32_C(-1824090116), INT32_C( -184738383), INT32_C( -191086464),
+ INT32_C( -117301127), INT32_C(-1526262537), INT32_C( -208138847), INT32_C( 807348777)),
+ UINT16_C(61134),
+ simde_mm512_set_epi32(INT32_C( 1427056174), INT32_C( 2097896620), INT32_C( 1813263538), INT32_C( 1909821993),
+ INT32_C( 1439822042), INT32_C(-1049213292), INT32_C( 1557133349), INT32_C( 1168931268),
+ INT32_C( -810546774), INT32_C(-1283013132), INT32_C( 654302587), INT32_C( 314275905),
+ INT32_C(-1091094079), INT32_C( -114174508), INT32_C( 407580338), INT32_C( 1906809805)),
+ simde_mm512_set_epi32(INT32_C( 1427056174), INT32_C( 2097896620), INT32_C( 1813263538), INT32_C( 1326766533),
+ INT32_C( 1439822042), INT32_C( 1049213292), INT32_C( 1557133349), INT32_C( 1457341409),
+ INT32_C( 810546774), INT32_C( 1283013132), INT32_C( -184738383), INT32_C( -191086464),
+ INT32_C( 1091094079), INT32_C( 114174508), INT32_C( 407580338), INT32_C( 807348777)) },
+ { simde_mm512_set_epi32(INT32_C( 905172649), INT32_C(-1044778809), INT32_C(-1938215986), INT32_C(-1138753169),
+ INT32_C(-1689961651), INT32_C( 890456168), INT32_C( 1382435241), INT32_C( -803845344),
+ INT32_C( 430838507), INT32_C( 1075259040), INT32_C(-1956785379), INT32_C(-1586468297),
+ INT32_C( 622055688), INT32_C(-1127740382), INT32_C( 466514910), INT32_C(-1745879628)),
+ UINT16_C(30570),
+ simde_mm512_set_epi32(INT32_C( -310045086), INT32_C( 560822999), INT32_C( -680371476), INT32_C( 1838395052),
+ INT32_C(-1152635838), INT32_C( -481448106), INT32_C( 871399876), INT32_C( -939960538),
+ INT32_C( -898000986), INT32_C( -641497176), INT32_C( 657638908), INT32_C(-1796735419),
+ INT32_C(-1032150818), INT32_C( 151713087), INT32_C( 1554707006), INT32_C( -318690470)),
+ simde_mm512_set_epi32(INT32_C( 905172649), INT32_C( 560822999), INT32_C( 680371476), INT32_C( 1838395052),
+ INT32_C(-1689961651), INT32_C( 481448106), INT32_C( 871399876), INT32_C( 939960538),
+ INT32_C( 430838507), INT32_C( 641497176), INT32_C( 657638908), INT32_C(-1586468297),
+ INT32_C( 1032150818), INT32_C(-1127740382), INT32_C( 1554707006), INT32_C(-1745879628)) },
+ { simde_mm512_set_epi32(INT32_C(-1675700291), INT32_C( -85412591), INT32_C(-1865493216), INT32_C(-1122257925),
+ INT32_C( 955620837), INT32_C( -725693586), INT32_C( 1056307491), INT32_C( 1924019839),
+ INT32_C(-2012466116), INT32_C(-1808881746), INT32_C( -887453452), INT32_C( 160221724),
+ INT32_C( -886018282), INT32_C( 1222780200), INT32_C( 1877396684), INT32_C( 283360472)),
+ UINT16_C(28339),
+ simde_mm512_set_epi32(INT32_C(-1238615237), INT32_C( 583893938), INT32_C( -594441984), INT32_C( 1561597956),
+ INT32_C( 174377227), INT32_C( 319460903), INT32_C(-1295208114), INT32_C( 659707887),
+ INT32_C( 1117898731), INT32_C( -209622907), INT32_C(-1431480123), INT32_C(-2058827609),
+ INT32_C(-1519596795), INT32_C( 24332922), INT32_C( -338106630), INT32_C(-1565374776)),
+ simde_mm512_set_epi32(INT32_C(-1675700291), INT32_C( 583893938), INT32_C( 594441984), INT32_C(-1122257925),
+ INT32_C( 174377227), INT32_C( 319460903), INT32_C( 1295208114), INT32_C( 1924019839),
+ INT32_C( 1117898731), INT32_C(-1808881746), INT32_C( 1431480123), INT32_C( 2058827609),
+ INT32_C( -886018282), INT32_C( 1222780200), INT32_C( 338106630), INT32_C( 1565374776)) },
+ { simde_mm512_set_epi32(INT32_C( 178377352), INT32_C( -324510384), INT32_C( 446946466), INT32_C(-1323398690),
+ INT32_C( -720979875), INT32_C( -512216094), INT32_C( 1145272930), INT32_C( -706074883),
+ INT32_C(-1863795060), INT32_C( -525595897), INT32_C( 1357119557), INT32_C( 837734387),
+ INT32_C( -607392699), INT32_C( -498581669), INT32_C(-2108693629), INT32_C( -476969927)),
+ UINT16_C(42507),
+ simde_mm512_set_epi32(INT32_C( -5472621), INT32_C( -263868960), INT32_C(-1867831731), INT32_C( 955254216),
+ INT32_C( 1990179011), INT32_C(-1729740457), INT32_C( 1711933869), INT32_C(-1566075058),
+ INT32_C( -550106516), INT32_C(-1087591249), INT32_C( 919917002), INT32_C(-1410389997),
+ INT32_C( -188117230), INT32_C( 1025569327), INT32_C(-1456210246), INT32_C( -254945819)),
+ simde_mm512_set_epi32(INT32_C( 5472621), INT32_C( -324510384), INT32_C( 1867831731), INT32_C(-1323398690),
+ INT32_C( -720979875), INT32_C( 1729740457), INT32_C( 1711933869), INT32_C( -706074883),
+ INT32_C(-1863795060), INT32_C( -525595897), INT32_C( 1357119557), INT32_C( 837734387),
+ INT32_C( 188117230), INT32_C( -498581669), INT32_C( 1456210246), INT32_C( 254945819)) },
+ { simde_mm512_set_epi32(INT32_C(-1007934437), INT32_C( 201253136), INT32_C( 2123754123), INT32_C( 1034305262),
+ INT32_C( 2139323878), INT32_C( -545410429), INT32_C(-1549231865), INT32_C( 1779895500),
+ INT32_C( 1932853973), INT32_C( 2135732954), INT32_C( 1232725518), INT32_C( 339564914),
+ INT32_C( -113030707), INT32_C(-1715459937), INT32_C( -492435091), INT32_C(-1720946495)),
+ UINT16_C(49758),
+ simde_mm512_set_epi32(INT32_C( 348473993), INT32_C(-1624874318), INT32_C( 361690252), INT32_C( 165927413),
+ INT32_C(-1864332117), INT32_C( -524477604), INT32_C( 481484649), INT32_C(-1499715490),
+ INT32_C(-1683117466), INT32_C(-2055457330), INT32_C( -850617531), INT32_C(-2081246973),
+ INT32_C( 1276057415), INT32_C( 1619064589), INT32_C(-1536816688), INT32_C( 2060578085)),
+ simde_mm512_set_epi32(INT32_C( 348473993), INT32_C( 1624874318), INT32_C( 2123754123), INT32_C( 1034305262),
+ INT32_C( 2139323878), INT32_C( -545410429), INT32_C( 481484649), INT32_C( 1779895500),
+ INT32_C( 1932853973), INT32_C( 2055457330), INT32_C( 1232725518), INT32_C( 2081246973),
+ INT32_C( 1276057415), INT32_C( 1619064589), INT32_C( 1536816688), INT32_C(-1720946495)) },
+ { simde_mm512_set_epi32(INT32_C( 860828042), INT32_C( 1459856596), INT32_C(-1901530659), INT32_C( 1296141157),
+ INT32_C( 778663095), INT32_C(-1872048536), INT32_C(-1115787645), INT32_C(-1142406643),
+ INT32_C( 1518955242), INT32_C( -174688543), INT32_C( 1537062129), INT32_C( -974095643),
+ INT32_C( 125816377), INT32_C(-1032428044), INT32_C( -374455538), INT32_C( -648832583)),
+ UINT16_C(41340),
+ simde_mm512_set_epi32(INT32_C( 1553986008), INT32_C( -808715903), INT32_C(-2114331727), INT32_C( 878797396),
+ INT32_C( 1547560130), INT32_C( -931453209), INT32_C( 639671594), INT32_C( 734358771),
+ INT32_C(-1802430748), INT32_C( 38083245), INT32_C( 636500349), INT32_C( 2020438947),
+ INT32_C( 89083218), INT32_C( 2041918986), INT32_C(-2068453500), INT32_C( 1772569863)),
+ simde_mm512_set_epi32(INT32_C( 1553986008), INT32_C( 1459856596), INT32_C( 2114331727), INT32_C( 1296141157),
+ INT32_C( 778663095), INT32_C(-1872048536), INT32_C(-1115787645), INT32_C( 734358771),
+ INT32_C( 1518955242), INT32_C( 38083245), INT32_C( 636500349), INT32_C( 2020438947),
+ INT32_C( 89083218), INT32_C( 2041918986), INT32_C( -374455538), INT32_C( -648832583)) },
+ { simde_mm512_set_epi32(INT32_C(-1208548961), INT32_C( 1705109710), INT32_C( -159097588), INT32_C( -879037423),
+ INT32_C( 2121552533), INT32_C( 595529007), INT32_C( -405863552), INT32_C( 1431630584),
+ INT32_C( -616000216), INT32_C( 444327364), INT32_C( 613413664), INT32_C(-2128463203),
+ INT32_C( 939927077), INT32_C(-1255659348), INT32_C(-1631544337), INT32_C(-1727626838)),
+ UINT16_C(49163),
+ simde_mm512_set_epi32(INT32_C( 895846723), INT32_C( 449272422), INT32_C( 1127330699), INT32_C(-1084895433),
+ INT32_C( -399265722), INT32_C( 697840482), INT32_C( -598276089), INT32_C( -50403840),
+ INT32_C( 1970006978), INT32_C( 1602141812), INT32_C(-1773480652), INT32_C( 740913018),
+ INT32_C( 1668822994), INT32_C( 698152405), INT32_C( 1772335922), INT32_C( 847772835)),
+ simde_mm512_set_epi32(INT32_C( 895846723), INT32_C( 449272422), INT32_C( -159097588), INT32_C( -879037423),
+ INT32_C( 2121552533), INT32_C( 595529007), INT32_C( -405863552), INT32_C( 1431630584),
+ INT32_C( -616000216), INT32_C( 444327364), INT32_C( 613413664), INT32_C(-2128463203),
+ INT32_C( 1668822994), INT32_C(-1255659348), INT32_C( 1772335922), INT32_C( 847772835)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_abs_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_maskz_abs_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__mmask16 k;
+ simde__m512i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { UINT16_C(17600),
+ simde_mm512_set_epi32(INT32_C( 393115914), INT32_C( -9604904), INT32_C( 114710097), INT32_C( 1837246098),
+ INT32_C(-1399577225), INT32_C(-1388127606), INT32_C( 1116027725), INT32_C( -871797325),
+ INT32_C(-1979326643), INT32_C( 1477004857), INT32_C( 1670723749), INT32_C(-1006052339),
+ INT32_C( 1863789116), INT32_C( -690396684), INT32_C( -629773535), INT32_C( 667046523)),
+ simde_mm512_set_epi32(INT32_C( 0), INT32_C( 9604904), INT32_C( 0), INT32_C( 0),
+ INT32_C( 0), INT32_C( 1388127606), INT32_C( 0), INT32_C( 0),
+ INT32_C( 1979326643), INT32_C( 1477004857), INT32_C( 0), INT32_C( 0),
+ INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
+ { UINT16_C(30775),
+ simde_mm512_set_epi32(INT32_C( -208138847), INT32_C( 807348777), INT32_C(-1877548571), INT32_C( -399920351),
+ INT32_C( 15525797), INT32_C( 18165921), INT32_C( 2085930596), INT32_C( 1662282658),
+ INT32_C(-1842752263), INT32_C( 499820912), INT32_C( 1419797765), INT32_C( -44818966),
+ INT32_C( 1761152620), INT32_C(-1288657930), INT32_C( 894078020), INT32_C(-1369431563)),
+ simde_mm512_set_epi32(INT32_C( 0), INT32_C( 807348777), INT32_C( 1877548571), INT32_C( 399920351),
+ INT32_C( 15525797), INT32_C( 0), INT32_C( 0), INT32_C( 0),
+ INT32_C( 0), INT32_C( 0), INT32_C( 1419797765), INT32_C( 44818966),
+ INT32_C( 0), INT32_C( 1288657930), INT32_C( 894078020), INT32_C( 1369431563)) },
+ { UINT16_C( 5367),
+ simde_mm512_set_epi32(INT32_C( 407580338), INT32_C( 1906809805), INT32_C( -849801752), INT32_C(-1965822258),
+ INT32_C( 1505436737), INT32_C( 342253548), INT32_C( 1435179252), INT32_C( 1326766533),
+ INT32_C( 156769011), INT32_C( 343760696), INT32_C( 611303965), INT32_C( 1457341409),
+ INT32_C( 165452421), INT32_C(-1824090116), INT32_C( -184738383), INT32_C( -191086464)),
+ simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1965822258),
+ INT32_C( 0), INT32_C( 342253548), INT32_C( 0), INT32_C( 0),
+ INT32_C( 156769011), INT32_C( 343760696), INT32_C( 611303965), INT32_C( 1457341409),
+ INT32_C( 0), INT32_C( 1824090116), INT32_C( 184738383), INT32_C( 191086464)) },
+ { UINT16_C(54740),
+ simde_mm512_set_epi32(INT32_C( 622055688), INT32_C(-1127740382), INT32_C( 466514910), INT32_C(-1745879628),
+ INT32_C( 1427056174), INT32_C( 2097896620), INT32_C( 1813263538), INT32_C( 1909821993),
+ INT32_C( 1439822042), INT32_C(-1049213292), INT32_C( 1557133349), INT32_C( 1168931268),
+ INT32_C( -810546774), INT32_C(-1283013132), INT32_C( 654302587), INT32_C( 314275905)),
+ simde_mm512_set_epi32(INT32_C( 622055688), INT32_C( 1127740382), INT32_C( 0), INT32_C( 1745879628),
+ INT32_C( 0), INT32_C( 2097896620), INT32_C( 0), INT32_C( 1909821993),
+ INT32_C( 1439822042), INT32_C( 1049213292), INT32_C( 0), INT32_C( 1168931268),
+ INT32_C( 0), INT32_C( 1283013132), INT32_C( 0), INT32_C( 0)) },
+ { UINT16_C(27191),
+ simde_mm512_set_epi32(INT32_C(-1032150818), INT32_C( 151713087), INT32_C( 1554707006), INT32_C( -318690470),
+ INT32_C( 788893537), INT32_C( -230394006), INT32_C( 905172649), INT32_C(-1044778809),
+ INT32_C(-1938215986), INT32_C(-1138753169), INT32_C(-1689961651), INT32_C( 890456168),
+ INT32_C( 1382435241), INT32_C( -803845344), INT32_C( 430838507), INT32_C( 1075259040)),
+ simde_mm512_set_epi32(INT32_C( 0), INT32_C( 151713087), INT32_C( 1554707006), INT32_C( 0),
+ INT32_C( 788893537), INT32_C( 0), INT32_C( 905172649), INT32_C( 0),
+ INT32_C( 0), INT32_C( 0), INT32_C( 1689961651), INT32_C( 890456168),
+ INT32_C( 0), INT32_C( 803845344), INT32_C( 430838507), INT32_C( 1075259040)) },
+ { UINT16_C(65093),
+ simde_mm512_set_epi32(INT32_C( -887453452), INT32_C( 160221724), INT32_C( -886018282), INT32_C( 1222780200),
+ INT32_C( 1877396684), INT32_C( 283360472), INT32_C( -310045086), INT32_C( 560822999),
+ INT32_C( -680371476), INT32_C( 1838395052), INT32_C(-1152635838), INT32_C( -481448106),
+ INT32_C( 871399876), INT32_C( -939960538), INT32_C( -898000986), INT32_C( -641497176)),
+ simde_mm512_set_epi32(INT32_C( 887453452), INT32_C( 160221724), INT32_C( 886018282), INT32_C( 1222780200),
+ INT32_C( 1877396684), INT32_C( 283360472), INT32_C( 310045086), INT32_C( 0),
+ INT32_C( 0), INT32_C( 1838395052), INT32_C( 0), INT32_C( 0),
+ INT32_C( 0), INT32_C( 939960538), INT32_C( 0), INT32_C( 641497176)) },
+ { UINT16_C(42926),
+ simde_mm512_set_epi32(INT32_C(-1431480123), INT32_C(-2058827609), INT32_C(-1519596795), INT32_C( 24332922),
+ INT32_C( -338106630), INT32_C(-1565374776), INT32_C(-1426452996), INT32_C( -680300877),
+ INT32_C(-1675700291), INT32_C( -85412591), INT32_C(-1865493216), INT32_C(-1122257925),
+ INT32_C( 955620837), INT32_C( -725693586), INT32_C( 1056307491), INT32_C( 1924019839)),
+ simde_mm512_set_epi32(INT32_C( 1431480123), INT32_C( 0), INT32_C( 1519596795), INT32_C( 0),
+ INT32_C( 0), INT32_C( 1565374776), INT32_C( 1426452996), INT32_C( 680300877),
+ INT32_C( 1675700291), INT32_C( 0), INT32_C( 1865493216), INT32_C( 0),
+ INT32_C( 955620837), INT32_C( 725693586), INT32_C( 1056307491), INT32_C( 0)) },
+ { UINT16_C(26757),
+ simde_mm512_set_epi32(INT32_C(-1863795060), INT32_C( -525595897), INT32_C( 1357119557), INT32_C( 837734387),
+ INT32_C( -607392699), INT32_C( -498581669), INT32_C(-2108693629), INT32_C( -476969927),
+ INT32_C(-1238615237), INT32_C( 583893938), INT32_C( -594441984), INT32_C( 1561597956),
+ INT32_C( 174377227), INT32_C( 319460903), INT32_C(-1295208114), INT32_C( 659707887)),
+ simde_mm512_set_epi32(INT32_C( 0), INT32_C( 525595897), INT32_C( 1357119557), INT32_C( 0),
+ INT32_C( 607392699), INT32_C( 0), INT32_C( 0), INT32_C( 0),
+ INT32_C( 1238615237), INT32_C( 0), INT32_C( 0), INT32_C( 0),
+ INT32_C( 0), INT32_C( 319460903), INT32_C( 0), INT32_C( 659707887)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_maskz_abs_epi32(test_vec[i].k, test_vec[i].a);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_abs_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi64(INT64_C(-4703669018152042913), INT64_C(-4045642624518788548),
+ INT64_C(-4525466663746518179), INT64_C( 8998788960652053415),
+ INT64_C( 1494272869059842335), INT64_C( 8817699709611505791),
+ INT64_C( 6340501178400867239), INT64_C( -331077862235736168)),
+ simde_mm512_set_epi64(INT64_C( 4703669018152042913), INT64_C( 4045642624518788548),
+ INT64_C( 4525466663746518179), INT64_C( 8998788960652053415),
+ INT64_C( 1494272869059842335), INT64_C( 8817699709611505791),
+ INT64_C( 6340501178400867239), INT64_C( 331077862235736168)) },
+ { simde_mm512_set_epi64(INT64_C( 8010327509455286697), INT64_C( 7606702663991788660),
+ INT64_C( 1151136024847303203), INT64_C( 6049881760672440877),
+ INT64_C( 733511420638679938), INT64_C( 5858508694238056658),
+ INT64_C(-6405681308945653600), INT64_C( 6147272925506298466)),
+ simde_mm512_set_epi64(INT64_C( 8010327509455286697), INT64_C( 7606702663991788660),
+ INT64_C( 1151136024847303203), INT64_C( 6049881760672440877),
+ INT64_C( 733511420638679938), INT64_C( 5858508694238056658),
+ INT64_C( 6405681308945653600), INT64_C( 6147272925506298466)) },
+ { simde_mm512_set_epi64(INT64_C( 6464197827924287450), INT64_C( -497314884836304285),
+ INT64_C(-3088355329109459024), INT64_C( 6086538207170510268),
+ INT64_C( 5130097871257028467), INT64_C(-1548377050112752776),
+ INT64_C(-1253733446110746976), INT64_C( 3012333519594431544)),
+ simde_mm512_set_epi64(INT64_C( 6464197827924287450), INT64_C( 497314884836304285),
+ INT64_C( 3088355329109459024), INT64_C( 6086538207170510268),
+ INT64_C( 5130097871257028467), INT64_C( 1548377050112752776),
+ INT64_C( 1253733446110746976), INT64_C( 3012333519594431544)) },
+ { simde_mm512_set_epi64(INT64_C(-6609116217957060725), INT64_C( -302281769423418319),
+ INT64_C(-4897195678850214218), INT64_C(-5503480572790438091),
+ INT64_C(-7839393967146815223), INT64_C( 3255671599336790273),
+ INT64_C( 664294275788018935), INT64_C( 3408593724494687769)),
+ simde_mm512_set_epi64(INT64_C( 6609116217957060725), INT64_C( 302281769423418319),
+ INT64_C( 4897195678850214218), INT64_C( 5503480572790438091),
+ INT64_C( 7839393967146815223), INT64_C( 3255671599336790273),
+ INT64_C( 664294275788018935), INT64_C( 3408593724494687769)) },
+ { simde_mm512_set_epi64(INT64_C(-3947921567074644800), INT64_C( 2473906111070933983),
+ INT64_C( 5899615628251993446), INT64_C( 1683758976557896025),
+ INT64_C( 5912559464823232024), INT64_C( 8209801467605337080),
+ INT64_C(-3759026771366879931), INT64_C(-4585023147545297955)),
+ simde_mm512_set_epi64(INT64_C( 3947921567074644800), INT64_C( 2473906111070933983),
+ INT64_C( 5899615628251993446), INT64_C( 1683758976557896025),
+ INT64_C( 5912559464823232024), INT64_C( 8209801467605337080),
+ INT64_C( 3759026771366879931), INT64_C( 4585023147545297955)) },
+ { simde_mm512_set_epi64(INT64_C(-5018179428847904871), INT64_C(-5078614119960003085),
+ INT64_C( 1906444753996234817), INT64_C(-8073431190678733657),
+ INT64_C( 2507716860794484867), INT64_C( 3050920605853136553),
+ INT64_C( 8003726449701589009), INT64_C( 8169798351590582410)),
+ simde_mm512_set_epi64(INT64_C( 5018179428847904871), INT64_C( 5078614119960003085),
+ INT64_C( 1906444753996234817), INT64_C( 8073431190678733657),
+ INT64_C( 2507716860794484867), INT64_C( 3050920605853136553),
+ INT64_C( 8003726449701589009), INT64_C( 8169798351590582410)) },
+ { simde_mm512_set_epi64(INT64_C( 34320467490721535), INT64_C( 6032039111009323642),
+ INT64_C( 8825906149542039035), INT64_C( 3097839263351160271),
+ INT64_C( 8028249960129894953), INT64_C(-3442961435998375158),
+ INT64_C( 4762825474720326971), INT64_C( 5172383913584297790)),
+ simde_mm512_set_epi64(INT64_C( 34320467490721535), INT64_C( 6032039111009323642),
+ INT64_C( 8825906149542039035), INT64_C( 3097839263351160271),
+ INT64_C( 8028249960129894953), INT64_C( 3442961435998375158),
+ INT64_C( 4762825474720326971), INT64_C( 5172383913584297790)) },
+ { simde_mm512_set_epi64(INT64_C( 6012124616828815523), INT64_C(-1490086426868961318),
+ INT64_C(-6824757216361935934), INT64_C( 6020178289686090572),
+ INT64_C( 1854069096850744512), INT64_C(-5881095665354951863),
+ INT64_C( 6788068100773536681), INT64_C(-2707434157113404213)),
+ simde_mm512_set_epi64(INT64_C( 6012124616828815523), INT64_C( 1490086426868961318),
+ INT64_C( 6824757216361935934), INT64_C( 6020178289686090572),
+ INT64_C( 1854069096850744512), INT64_C( 5881095665354951863),
+ INT64_C( 6788068100773536681), INT64_C( 2707434157113404213)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_abs_epi64(test_vec[i].a);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_abs_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i src;
+ simde__mmask8 k;
+ simde__m512i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi64(INT64_C( 492676116973233810), INT64_C(-6011138406694593910),
+ INT64_C( 4793302583727451571), INT64_C(-8501143198309462471),
+ INT64_C( 7175703865894427661), INT64_C( 8004913303465320948),
+ INT64_C(-2704856736044264837), INT64_C( 5658385619497272512)),
+ UINT8_C(216),
+ simde_mm512_set_epi64(INT64_C(-1717644828534315099), INT64_C( 78022038682650212),
+ INT64_C( 7139449655270167801), INT64_C( 2146714472316691717),
+ INT64_C( -192495991449383316), INT64_C(-5534743664186979260),
+ INT64_C(-5881663773003558792), INT64_C(-6698972267701962486)),
+ simde_mm512_set_epi64(INT64_C( 1717644828534315099), INT64_C( 78022038682650212),
+ INT64_C( 4793302583727451571), INT64_C( 2146714472316691717),
+ INT64_C( 192495991449383316), INT64_C( 8004913303465320948),
+ INT64_C(-2704856736044264837), INT64_C( 5658385619497272512)) },
+ { simde_mm512_set_epi64(INT64_C( 1469967797035145460), INT64_C( 5698418868819073779),
+ INT64_C( 1476440947581501981), INT64_C( 6259233690927012485),
+ INT64_C(-7834407389066617423), INT64_C( -820710109410615175),
+ INT64_C(-6555247677438161503), INT64_C( 3467536596098015717)),
+ UINT8_C( 65),
+ simde_mm512_set_epi64(INT64_C( 7787907596649075241), INT64_C( 6183988585695692436),
+ INT64_C( 6687836810634885572), INT64_C(-3481271883196348940),
+ INT64_C( 2810208213167470657), INT64_C(-4686213381983447596),
+ INT64_C( 1750544224109435853), INT64_C(-3649870730594357554)),
+ simde_mm512_set_epi64(INT64_C( 1469967797035145460), INT64_C( 6183988585695692436),
+ INT64_C( 1476440947581501981), INT64_C( 6259233690927012485),
+ INT64_C(-7834407389066617423), INT64_C( -820710109410615175),
+ INT64_C(-6555247677438161503), INT64_C( 3649870730594357554)) },
+ { simde_mm512_set_epi64(INT64_C(-8324574269298179729), INT64_C(-7258330021648709528),
+ INT64_C( 5937514152424000288), INT64_C( 1850437298497726112),
+ INT64_C(-8404329205387466185), INT64_C( 2671708839418006562),
+ INT64_C( 2003666284095471028), INT64_C( 6129159598982782124)),
+ UINT8_C(199),
+ simde_mm512_set_epi64(INT64_C( 7895846628610550850), INT64_C(-2067803869119741500),
+ INT64_C(-4037099766843598938), INT64_C(-2755209390738717188),
+ INT64_C(-7716919860907040546), INT64_C( 651602748594909758),
+ INT64_C(-1368765145407975583), INT64_C( -989534720059255127)),
+ simde_mm512_set_epi64(INT64_C( 7895846628610550850), INT64_C( 2067803869119741500),
+ INT64_C( 5937514152424000288), INT64_C( 1850437298497726112),
+ INT64_C(-8404329205387466185), INT64_C( 651602748594909758),
+ INT64_C( 1368765145407975583), INT64_C( 989534720059255127)) },
+ { simde_mm512_set_epi64(INT64_C(-4820061084596199963), INT64_C(-3116830217730655965),
+ INT64_C( 8263602287642686524), INT64_C(-7769087937993864972),
+ INT64_C( 688147068097687318), INT64_C( 5251800971073735884),
+ INT64_C( 1217023964204045922), INT64_C( 2408716443164236524)),
+ UINT8_C( 32),
+ simde_mm512_set_epi64(INT64_C( 748944487451629095), INT64_C(-5562876490484131857),
+ INT64_C( 4801338493970245765), INT64_C(-6148160310922917721),
+ INT64_C(-6526618537607083398), INT64_C(-1452156915681179960),
+ INT64_C(-6126568963486552397), INT64_C(-7197077943533128431)),
+ simde_mm512_set_epi64(INT64_C(-4820061084596199963), INT64_C(-3116830217730655965),
+ INT64_C( 4801338493970245765), INT64_C(-7769087937993864972),
+ INT64_C( 688147068097687318), INT64_C( 5251800971073735884),
+ INT64_C( 1217023964204045922), INT64_C( 2408716443164236524)) },
+ { simde_mm512_set_epi64(INT64_C(-3096584980416416798), INT64_C( 4918909782932989693),
+ INT64_C(-8004938825376986361), INT64_C( 5828784114914742259),
+ INT64_C(-2608731774237786277), INT64_C(-9056770170020559815),
+ INT64_C(-5319811934658395214), INT64_C(-2553108879087757308)),
+ UINT8_C(222),
+ simde_mm512_set_epi64(INT64_C(-7429178691671160403), INT64_C(-6726241153446442388),
+ INT64_C(-4671168844950875702), INT64_C(-6057578907613688046),
+ INT64_C( 4404786722084486842), INT64_C(-1094983952222664046),
+ INT64_C( 5287971478839612040), INT64_C(-1393761486045455198)),
+ simde_mm512_set_epi64(INT64_C( 7429178691671160403), INT64_C( 6726241153446442388),
+ INT64_C(-8004938825376986361), INT64_C( 6057578907613688046),
+ INT64_C( 4404786722084486842), INT64_C( 1094983952222664046),
+ INT64_C( 5287971478839612040), INT64_C(-2553108879087757308)) },
+ { simde_mm512_set_epi64(INT64_C(-2342519952706594553), INT64_C( 7644592964730421973),
+ INT64_C( 9172903191652197902), INT64_C( 1458420204680989133),
+ INT64_C(-7367844323210688147), INT64_C(-7391408909901332845),
+ INT64_C(-1133308551202396595), INT64_C( 4102785619076298947)),
+ UINT8_C(230),
+ simde_mm512_set_epi64(INT64_C( 2067960823776290910), INT64_C(-7228934469556881970),
+ INT64_C(-3653374474835545853), INT64_C( 5480624866862364429),
+ INT64_C(-6600577412846457563), INT64_C( 2529415530022027870),
+ INT64_C(-4329045443225919216), INT64_C( 9121454504064466670)),
+ simde_mm512_set_epi64(INT64_C( 2067960823776290910), INT64_C( 7228934469556881970),
+ INT64_C( 3653374474835545853), INT64_C( 1458420204680989133),
+ INT64_C(-7367844323210688147), INT64_C( 2529415530022027870),
+ INT64_C( 4329045443225919216), INT64_C( 4102785619076298947)) },
+ { simde_mm512_set_epi64(INT64_C(-4792271441403297267), INT64_C( 6523863092598044385),
+ INT64_C( 6601631579296004837), INT64_C( 540377227778745844),
+ INT64_C(-1608274285869950535), INT64_C( 1496684406111625906),
+ INT64_C( 1553447803787926005), INT64_C(-8007245467626955940)),
+ UINT8_C(104),
+ simde_mm512_set_epi64(INT64_C( 3154046907468289764), INT64_C( 163566292437055869),
+ INT64_C( 8677719201018760530), INT64_C( 8769975268177995652),
+ INT64_C( 7613129594859420923), INT64_C(-1998576254813523574),
+ INT64_C( 6270036339063321053), INT64_C( 5566883881093264567)),
+ simde_mm512_set_epi64(INT64_C(-4792271441403297267), INT64_C( 163566292437055869),
+ INT64_C( 8677719201018760530), INT64_C( 540377227778745844),
+ INT64_C( 7613129594859420923), INT64_C( 1496684406111625906),
+ INT64_C( 1553447803787926005), INT64_C(-8007245467626955940)) },
+ { simde_mm512_set_epi64(INT64_C( 6148806541912347944), INT64_C( 1908371497711301408),
+ INT64_C(-9141679846684482011), INT64_C(-5393015831913260049),
+ INT64_C(-7420100767347904040), INT64_C(-3473408352959472719),
+ INT64_C( 3774406077177521346), INT64_C(-4000561069769581270)),
+ UINT8_C(128),
+ simde_mm512_set_epi64(INT64_C( 8461115545003933300), INT64_C(-7617041399687843974),
+ INT64_C( 7167540182740956629), INT64_C( 7612124823363779747),
+ INT64_C(-1891905030773424117), INT64_C(-5190678261404669746),
+ INT64_C( -683318933916552175), INT64_C( 9111998746576489775)),
+ simde_mm512_set_epi64(INT64_C( 8461115545003933300), INT64_C( 1908371497711301408),
+ INT64_C(-9141679846684482011), INT64_C(-5393015831913260049),
+ INT64_C(-7420100767347904040), INT64_C(-3473408352959472719),
+ INT64_C( 3774406077177521346), INT64_C(-4000561069769581270)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_abs_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_maskz_abs_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__mmask8 k;
+ simde__m512i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { UINT8_C(192),
+ simde_mm512_set_epi64(INT64_C( -41252748446509487), INT64_C( 7890911908509001079),
+ INT64_C(-5961962669328745651), INT64_C(-3744340997299642547),
+ INT64_C( 6343687558518880421), INT64_C(-4320961892205516228),
+ INT64_C(-2965231175381652703), INT64_C( 2864943002512957373)),
+ simde_mm512_set_epi64(INT64_C( 41252748446509487), INT64_C( 7890911908509001079),
+ INT64_C( 0), INT64_C( 0),
+ INT64_C( 0), INT64_C( 0),
+ INT64_C( 0), INT64_C( 0)) },
+ { UINT8_C( 10),
+ simde_mm512_set_epi64(INT64_C(-8064009705201487071), INT64_C( 66682790377500833),
+ INT64_C( 8959003693208071074), INT64_C(-7914560703715169936),
+ INT64_C( 6097984971859041770), INT64_C( 7564092909171024886),
+ INT64_C( 3840035858897969653), INT64_C( -443938296699520969)),
+ simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0),
+ INT64_C( 0), INT64_C( 0),
+ INT64_C( 6097984971859041770), INT64_C( 0),
+ INT64_C( 3840035858897969653), INT64_C( 0)) },
+ { UINT8_C( 41),
+ simde_mm512_set_epi64(INT64_C(-8443142306353437631), INT64_C( 1469967797035145460),
+ INT64_C( 5698418868819073779), INT64_C( 1476440947581501981),
+ INT64_C( 6259233690927012485), INT64_C(-7834407389066617423),
+ INT64_C( -820710109410615175), INT64_C(-6555247677438161503)),
+ simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0),
+ INT64_C( 5698418868819073779), INT64_C( 0),
+ INT64_C( 6259233690927012485), INT64_C( 0),
+ INT64_C( 0), INT64_C( 6555247677438161503)) },
+ { UINT8_C(232),
+ simde_mm512_set_epi64(INT64_C( 6129159598982782124), INT64_C( 7787907596649075241),
+ INT64_C( 6183988585695692436), INT64_C( 6687836810634885572),
+ INT64_C(-3481271883196348940), INT64_C( 2810208213167470657),
+ INT64_C(-4686213381983447596), INT64_C( 1750544224109435853)),
+ simde_mm512_set_epi64(INT64_C( 6129159598982782124), INT64_C( 7787907596649075241),
+ INT64_C( 6183988585695692436), INT64_C( 0),
+ INT64_C( 3481271883196348940), INT64_C( 0),
+ INT64_C( 0), INT64_C( 0)) },
+ { UINT8_C(180),
+ simde_mm512_set_epi64(INT64_C( -989534720059255127), INT64_C(-4487290813852079154),
+ INT64_C(-4890907616466355379), INT64_C( 3824480121463916969),
+ INT64_C(-3452489463091031317), INT64_C( 4618202413866537757),
+ INT64_C(-6813829451133759224), INT64_C(-4843608058602032162)),
+ simde_mm512_set_epi64(INT64_C( 989534720059255127), INT64_C( 0),
+ INT64_C( 4890907616466355379), INT64_C( 3824480121463916969),
+ INT64_C( 0), INT64_C( 4618202413866537757),
+ INT64_C( 0), INT64_C( 0)) },
+ { UINT8_C( 97),
+ simde_mm512_set_epi64(INT64_C(-1331633504094684457), INT64_C(-2922173236712853844),
+ INT64_C(-4950533224594034858), INT64_C( 3742633972513462054),
+ INT64_C(-3856884862992283736), INT64_C( 2824537604935384645),
+ INT64_C(-4433054007697935041), INT64_C( 6677415749608352602)),
+ simde_mm512_set_epi64(INT64_C( 0), INT64_C( 2922173236712853844),
+ INT64_C( 4950533224594034858), INT64_C( 0),
+ INT64_C( 0), INT64_C( 0),
+ INT64_C( 0), INT64_C( 6677415749608352602)) },
+ { UINT8_C(216),
+ simde_mm512_set_epi64(INT64_C(-2921870015535851587), INT64_C( -366844282582149856),
+ INT64_C(-4820061084596199963), INT64_C(-3116830217730655965),
+ INT64_C( 8263602287642686524), INT64_C(-7769087937993864972),
+ INT64_C( 688147068097687318), INT64_C( 5251800971073735884)),
+ simde_mm512_set_epi64(INT64_C( 2921870015535851587), INT64_C( 366844282582149856),
+ INT64_C( 0), INT64_C( 3116830217730655965),
+ INT64_C( 8263602287642686524), INT64_C( 0),
+ INT64_C( 0), INT64_C( 0)) },
+ { UINT8_C(252),
+ simde_mm512_set_epi64(INT64_C(-5319811934658395214), INT64_C(-2553108879087757308),
+ INT64_C( 748944487451629095), INT64_C(-5562876490484131857),
+ INT64_C( 4801338493970245765), INT64_C(-6148160310922917721),
+ INT64_C(-6526618537607083398), INT64_C(-1452156915681179960)),
+ simde_mm512_set_epi64(INT64_C( 5319811934658395214), INT64_C( 2553108879087757308),
+ INT64_C( 748944487451629095), INT64_C( 5562876490484131857),
+ INT64_C( 4801338493970245765), INT64_C( 6148160310922917721),
+ INT64_C( 0), INT64_C( 0)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_maskz_abs_epi64(test_vec[i].k, test_vec[i].a);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
static MunitResult
test_simde_mm512_add_epi32(const MunitParameter params[], void* data) {
@@ -9681,6 +10280,13 @@ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
static MunitTest test_suite_tests[] = {
#if defined(SIMDE_AVX512F_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+ SIMDE_TESTS_DEFINE_TEST(mm512_abs_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_abs_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_maskz_abs_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_abs_epi64),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_abs_epi64),
+ SIMDE_TESTS_DEFINE_TEST(mm512_maskz_abs_epi64),
+
SIMDE_TESTS_DEFINE_TEST(mm512_add_epi32),
SIMDE_TESTS_DEFINE_TEST(mm512_mask_add_epi32),
SIMDE_TESTS_DEFINE_TEST(mm512_maskz_add_epi32),
=====================================
test/x86/avx512vl.c
=====================================
@@ -0,0 +1,210 @@
+/* Copyright (c) 2018, 2019 Evan Nemerson <evan at nemerson.com>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define SIMDE_TESTS_CURRENT_ISAX avx512vl
+#include <simde/x86/avx512vl.h>
+#include <test/x86/test-avx512.h>
+
+#if defined(SIMDE_AVX512VL_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+
+static MunitResult
+test_simde_mm_cvtsepi16_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi16(INT16_C( -385), INT16_C(-14682), INT16_C( -6), INT16_C( 418),
+ INT16_C( 0), INT16_C(-24263), INT16_C(-21423), INT16_C( -13)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C(-128), INT8_C( -6), INT8_C( 127),
+ INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( -13)) },
+ { simde_mm_set_epi16(INT16_C( 12), INT16_C( -1449), INT16_C( -1), INT16_C( -8),
+ INT16_C( 151), INT16_C( 68), INT16_C( -857), INT16_C( -1)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 12), INT8_C(-128), INT8_C( -1), INT8_C( -8),
+ INT8_C( 127), INT8_C( 68), INT8_C(-128), INT8_C( -1)) },
+ { simde_mm_set_epi16(INT16_C( 2343), INT16_C(-10678), INT16_C( -7895), INT16_C(-27557),
+ INT16_C( 4), INT16_C( 6853), INT16_C( -1), INT16_C( -4386)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128),
+ INT8_C( 4), INT8_C( 127), INT8_C( -1), INT8_C(-128)) },
+ { simde_mm_set_epi16(INT16_C( 2603), INT16_C(-10075), INT16_C( 1), INT16_C( -1),
+ INT16_C( -267), INT16_C( 0), INT16_C( 0), INT16_C( -3)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 127), INT8_C(-128), INT8_C( 1), INT8_C( -1),
+ INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( -3)) },
+ { simde_mm_set_epi16(INT16_C( 318), INT16_C( 609), INT16_C( 127), INT16_C( 2),
+ INT16_C( 326), INT16_C( 20), INT16_C( -1), INT16_C( -7)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 2),
+ INT8_C( 127), INT8_C( 20), INT8_C( -1), INT8_C( -7)) },
+ { simde_mm_set_epi16(INT16_C( -57), INT16_C( 2093), INT16_C( 3059), INT16_C( 12),
+ INT16_C( 10), INT16_C( 274), INT16_C( 50), INT16_C( -7)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -57), INT8_C( 127), INT8_C( 127), INT8_C( 12),
+ INT8_C( 10), INT8_C( 127), INT8_C( 50), INT8_C( -7)) },
+ { simde_mm_set_epi16(INT16_C( 0), INT16_C( -3570), INT16_C( 1), INT16_C( 0),
+ INT16_C( -36), INT16_C( 0), INT16_C( 54), INT16_C( -5)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C(-128), INT8_C( 1), INT8_C( 0),
+ INT8_C( -36), INT8_C( 0), INT8_C( 54), INT8_C( -5)) },
+ { simde_mm_set_epi16(INT16_C( 54), INT16_C( 92), INT16_C( 2), INT16_C( 185),
+ INT16_C( 4), INT16_C( 1983), INT16_C( 2567), INT16_C( 136)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 54), INT8_C( 92), INT8_C( 2), INT8_C( 127),
+ INT8_C( 4), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_cvtsepi16_epi8(test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_cvtsepi16_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi16(INT16_C( 447), INT16_C( -3887), INT16_C( 9), INT16_C( 6277),
+ INT16_C( 2), INT16_C( -314), INT16_C( 1617), INT16_C( 64),
+ INT16_C( 0), INT16_C( 1725), INT16_C( 801), INT16_C( -2),
+ INT16_C( -782), INT16_C( -381), INT16_C( 0), INT16_C( -1)),
+ simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( 9), INT8_C( 127),
+ INT8_C( 2), INT8_C(-128), INT8_C( 127), INT8_C( 64),
+ INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( -2),
+ INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( -1)) },
+ { simde_mm256_set_epi16(INT16_C( -1378), INT16_C( 1), INT16_C( 2482), INT16_C( 0),
+ INT16_C( 0), INT16_C( -2), INT16_C( -107), INT16_C( 7074),
+ INT16_C( -117), INT16_C( 3648), INT16_C( -25), INT16_C( -225),
+ INT16_C( 587), INT16_C( 15), INT16_C( 116), INT16_C( -1)),
+ simde_mm_set_epi8(INT8_C(-128), INT8_C( 1), INT8_C( 127), INT8_C( 0),
+ INT8_C( 0), INT8_C( -2), INT8_C(-107), INT8_C( 127),
+ INT8_C(-117), INT8_C( 127), INT8_C( -25), INT8_C(-128),
+ INT8_C( 127), INT8_C( 15), INT8_C( 116), INT8_C( -1)) },
+ { simde_mm256_set_epi16(INT16_C( -602), INT16_C( 22836), INT16_C( -36), INT16_C( -417),
+ INT16_C( 7314), INT16_C( -135), INT16_C( 1), INT16_C( -1693),
+ INT16_C( -121), INT16_C( -342), INT16_C( 7), INT16_C( 3079),
+ INT16_C( 14), INT16_C( 56), INT16_C( -16), INT16_C( -10)),
+ simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( -36), INT8_C(-128),
+ INT8_C( 127), INT8_C(-128), INT8_C( 1), INT8_C(-128),
+ INT8_C(-121), INT8_C(-128), INT8_C( 7), INT8_C( 127),
+ INT8_C( 14), INT8_C( 56), INT8_C( -16), INT8_C( -10)) },
+ { simde_mm256_set_epi16(INT16_C( -171), INT16_C( 138), INT16_C( 235), INT16_C( 33),
+ INT16_C( 102), INT16_C( -4), INT16_C( 2), INT16_C( 461),
+ INT16_C( -30), INT16_C( -120), INT16_C( 34), INT16_C( 1),
+ INT16_C( 1637), INT16_C( 2), INT16_C( 2), INT16_C( -2)),
+ simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 33),
+ INT8_C( 102), INT8_C( -4), INT8_C( 2), INT8_C( 127),
+ INT8_C( -30), INT8_C(-120), INT8_C( 34), INT8_C( 1),
+ INT8_C( 127), INT8_C( 2), INT8_C( 2), INT8_C( -2)) },
+ { simde_mm256_set_epi16(INT16_C( -3), INT16_C( 0), INT16_C(-16994), INT16_C( 475),
+ INT16_C( 1), INT16_C( 5629), INT16_C( -14), INT16_C( 3),
+ INT16_C( -1612), INT16_C( 2680), INT16_C( -183), INT16_C( -202),
+ INT16_C( 0), INT16_C( -114), INT16_C( 11175), INT16_C( -3)),
+ simde_mm_set_epi8(INT8_C( -3), INT8_C( 0), INT8_C(-128), INT8_C( 127),
+ INT8_C( 1), INT8_C( 127), INT8_C( -14), INT8_C( 3),
+ INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128),
+ INT8_C( 0), INT8_C(-114), INT8_C( 127), INT8_C( -3)) },
+ { simde_mm256_set_epi16(INT16_C( -62), INT16_C( -29), INT16_C( -13), INT16_C( 0),
+ INT16_C( -4), INT16_C( 6), INT16_C( 21), INT16_C( -2),
+ INT16_C( -55), INT16_C( 5), INT16_C( -7030), INT16_C(-31314),
+ INT16_C( 65), INT16_C( -7656), INT16_C( -53), INT16_C( -12)),
+ simde_mm_set_epi8(INT8_C( -62), INT8_C( -29), INT8_C( -13), INT8_C( 0),
+ INT8_C( -4), INT8_C( 6), INT8_C( 21), INT8_C( -2),
+ INT8_C( -55), INT8_C( 5), INT8_C(-128), INT8_C(-128),
+ INT8_C( 65), INT8_C(-128), INT8_C( -53), INT8_C( -12)) },
+ { simde_mm256_set_epi16(INT16_C( -52), INT16_C( 250), INT16_C( -4), INT16_C( 163),
+ INT16_C( -1), INT16_C( -72), INT16_C( -689), INT16_C( -98),
+ INT16_C( -1), INT16_C( 27), INT16_C(-29046), INT16_C( 504),
+ INT16_C( 1), INT16_C( -668), INT16_C( 6), INT16_C( -130)),
+ simde_mm_set_epi8(INT8_C( -52), INT8_C( 127), INT8_C( -4), INT8_C( 127),
+ INT8_C( -1), INT8_C( -72), INT8_C(-128), INT8_C( -98),
+ INT8_C( -1), INT8_C( 27), INT8_C(-128), INT8_C( 127),
+ INT8_C( 1), INT8_C(-128), INT8_C( 6), INT8_C(-128)) },
+ { simde_mm256_set_epi16(INT16_C( 3869), INT16_C( -3), INT16_C( 3307), INT16_C( -5),
+ INT16_C( -61), INT16_C( -5), INT16_C( -43), INT16_C( -7512),
+ INT16_C( 226), INT16_C( 75), INT16_C( 0), INT16_C( -1),
+ INT16_C( 1923), INT16_C( -25), INT16_C( 4919), INT16_C( -1)),
+ simde_mm_set_epi8(INT8_C( 127), INT8_C( -3), INT8_C( 127), INT8_C( -5),
+ INT8_C( -61), INT8_C( -5), INT8_C( -43), INT8_C(-128),
+ INT8_C( 127), INT8_C( 75), INT8_C( 0), INT8_C( -1),
+ INT8_C( 127), INT8_C( -25), INT8_C( 127), INT8_C( -1)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm256_cvtsepi16_epi8(test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+#endif /* defined(SIMDE_avx512vl_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
+
+HEDLEY_DIAGNOSTIC_PUSH
+HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
+
+static MunitTest test_suite_tests[] = {
+#if defined(SIMDE_AVX512VL_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi16_epi8),
+
+ SIMDE_TESTS_DEFINE_TEST(mm256_cvtsepi16_epi8),
+
+#endif /* defined(SIMDE_AVX512vl_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
+ { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
+};
+
+HEDLEY_C_DECL MunitSuite* SIMDE_TESTS_GENERATE_SYMBOL(suite)(void) {
+ static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
+
+ return &suite;
+}
+
+#if defined(SIMDE_TESTS_SINGLE_ISAX)
+int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) {
+ static MunitSuite suite = { "", test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
+
+ return munit_suite_main(&suite, NULL, argc, argv);
+}
+#endif /* defined(SIMDE_TESTS_SINGLE_ISAX) */
+
+HEDLEY_DIAGNOSTIC_POP
=====================================
test/x86/meson.build
=====================================
@@ -11,6 +11,7 @@ simde_test_x86_sources = [
'fma.c',
'avx512f.c',
'avx512bw.c',
+ 'avx512vl.c',
'svml.c'
]
=====================================
test/x86/test-x86-internal.h
=====================================
@@ -188,6 +188,7 @@ SIMDE_TESTS_GENERATE_SUITE_GETTERS(fma);
SIMDE_TESTS_GENERATE_SUITE_GETTERS(avx2);
SIMDE_TESTS_GENERATE_SUITE_GETTERS(avx512f);
SIMDE_TESTS_GENERATE_SUITE_GETTERS(avx512bw);
+SIMDE_TESTS_GENERATE_SUITE_GETTERS(avx512vl);
SIMDE_TESTS_GENERATE_SUITE_GETTERS(svml);
HEDLEY_END_C_DECLS
=====================================
test/x86/test-x86.c
=====================================
@@ -19,7 +19,7 @@
MunitSuite*
simde_tests_x86_get_suite(void) {
- static MunitSuite children[(13 * SUITES_PER_ISAX) + 1];
+ static MunitSuite children[(14 * SUITES_PER_ISAX) + 1];
static MunitSuite suite = { "/x86", NULL, children, 1, MUNIT_SUITE_OPTION_NONE };
static const MunitSuite empty = { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE };
@@ -37,6 +37,7 @@ simde_tests_x86_get_suite(void) {
SET_CHILDREN_FOR_ARCH(avx2);
SET_CHILDREN_FOR_ARCH(avx512f);
SET_CHILDREN_FOR_ARCH(avx512bw);
+ SET_CHILDREN_FOR_ARCH(avx512vl);
SET_CHILDREN_FOR_ARCH(svml);
children[i++] = empty;
View it on GitLab: https://salsa.debian.org/med-team/simde/-/compare/25c845adaacaaec417ef96bb51c6090626774992...4b7e5af134b912f78cbfbf197ff97a856e674195
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/compare/25c845adaacaaec417ef96bb51c6090626774992...4b7e5af134b912f78cbfbf197ff97a856e674195
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200419/492922ac/attachment-0001.html>
More information about the debian-med-commit
mailing list