[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200403
Michael R. Crusoe
gitlab at salsa.debian.org
Fri Apr 3 10:45:15 BST 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / simde
Commits:
91b3e715 by Michael R. Crusoe at 2020-04-03T08:39:41+02:00
New upstream version 0.0.0.git.20200403
- - - - -
11 changed files:
- simde/x86/avx.h
- simde/x86/avx2.h
- simde/x86/avx512bw.h
- simde/x86/avx512f.h
- simde/x86/sse.h
- simde/x86/sse2.h
- test/x86/avx2.c
- test/x86/avx512bw.c
- test/x86/avx512f.c
- test/x86/skel.c
- test/x86/svml.c
Changes:
=====================================
simde/x86/avx.h
=====================================
@@ -1488,7 +1488,7 @@ simde_mm256_broadcast_ps (simde__m128 const * mem_addr) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(mem_addr)
+# define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -1508,7 +1508,7 @@ simde_mm256_broadcast_sd (simde_float64 const * a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(mem_addr)
+# define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3358,7 +3358,7 @@ simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(mem_addr, mask)
+# define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3379,7 +3379,7 @@ simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], si
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(mem_addr, mask)
+# define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3400,7 +3400,7 @@ simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(mem_addr, mask)
+# define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3421,7 +3421,7 @@ simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], si
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(mem_addr, mask)
+# define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3441,7 +3441,7 @@ simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m12
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(mem_addr, mask, a)
+# define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3461,7 +3461,7 @@ simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(mem_addr, mask, a)
+# define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3481,7 +3481,7 @@ simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m12
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(mem_addr, mask, a)
+# define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3501,7 +3501,7 @@ simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(mem_addr, mask, a)
+# define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4486,7 +4486,7 @@ simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(mem_addr, a)
+# define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4501,7 +4501,7 @@ simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(mem_addr, a)
+# define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4529,7 +4529,7 @@ simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(mem_addr, a)
+# define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4542,7 +4542,7 @@ simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(mem_addr, a)
+# define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4600,7 +4600,7 @@ simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(mem_addr, a)
+# define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4615,7 +4615,7 @@ simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(mem_addr, a)
+# define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/avx2.h
=====================================
@@ -813,7 +813,7 @@ simde_mm256_cvtepi8_epi32 (simde__m128i a) {
#endif
}
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-# define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi16(a)
+# define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -2028,7 +2028,7 @@ simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
b_ = simde__m256i_to_private(b);
#if defined(SIMDE__SHUFFLE_VECTOR)
- r_.i16 =SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
+ r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
4, 20, 5, 21, 6, 22, 7, 23,
12, 28, 13, 29, 14, 30, 15, 31);
#else
@@ -2043,6 +2043,57 @@ simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
# define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_unpackhi_epi32(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE__SHUFFLE_VECTOR)
+ r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 32, a_.i32, b_.i32,
+ 2, 10, 3, 11, 6, 14, 7, 15);
+#else
+ r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]);
+#endif
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_unpackhi_epi64(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE__SHUFFLE_VECTOR)
+ r_.i64 = SIMDE__SHUFFLE_VECTOR(64, 32, a_.i64, b_.i64, 1, 5, 3, 7);
+#else
+ r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]);
+#endif
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m256i
simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) {
=====================================
simde/x86/avx512bw.h
=====================================
@@ -236,6 +236,76 @@ simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) {
# define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_shuffle_epi8(a, b);
+#else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_AVX2)
+ r_.m256i[0] = simde_mm256_shuffle_epi8(a_.m256i[0], b_.m256i[0]);
+ r_.m256i[1] = simde_mm256_shuffle_epi8(a_.m256i[1], b_.m256i[1]);
+#elif defined(SIMDE_ARCH_X86_SSSE3)
+ r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
+ r_.m128i[2] = simde_mm_shuffle_epi8(a_.m128i[2], b_.m128i[2]);
+ r_.m128i[3] = simde_mm_shuffle_epi8(a_.m128i[3], b_.m128i[3]);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
+ r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)];
+ }
+#endif
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
+# define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b)
+#endif
+
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask64
+simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512BW_NATIVE)
+ return _mm512_cmpeq_epi8_mask(a, b);
+#else
+ simde__m512i_private
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+#if defined(SIMDE_ARCH_X86_AVX2)
+ simde__mmask64 r_;
+
+ // The second cast is absolutely necessary otherwise if the sign bit is set it will be sign extended to 64 bits
+ r_ = (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[1], b_.m256i[1]));
+ r_ = (r_ << 32) | (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[0], b_.m256i[0]));
+#elif defined(SIMDE_ARCH_X86_SSE2)
+ simde__mmask64 r_;
+
+ r_ = simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[3], b_.m128i[3]));
+ r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[2], b_.m128i[2]));
+ r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]));
+ r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]));
+#else
+ simde__mmask64 r_ = 0;
+
+ for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
+ r_ |= (a_.u8[i] == b_.u8[i]) ? (1ULL << i) : 0;
+ }
+#endif
+ return r_;
+#endif
+}
+#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
+# define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b)
+#endif
+
SIMDE__END_DECLS
HEDLEY_DIAGNOSTIC_POP
=====================================
simde/x86/avx512f.h
=====================================
@@ -1669,6 +1669,106 @@ simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m51
# define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_cvtepi8_epi32 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_cvtepi8_epi32(a);
+#else
+ simde__m512i_private r_;
+ simde__m128i_private a_ = simde__m128i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+ SIMDE__CONVERT_VECTOR(r_.i32, a_.i8);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+ r_.i32[i] = a_.i8[i];
+ }
+#endif
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_cvtepi8_epi64 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_cvtepi8_epi64(a);
+#else
+ simde__m512i_private r_;
+ simde__m128i_private a_ = simde__m128i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+ SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i8);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+ r_.i64[i] = a_.i8[i];
+ }
+#endif
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm512_cvtepi32_epi8 (simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_cvtepi32_epi8(a);
+#else
+ simde__m128i_private r_;
+ simde__m512i_private a_ = simde__m512i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+ SIMDE__CONVERT_VECTOR(r_.i8, a_.i32);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]);
+ }
+#endif
+
+ return simde__m128i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm512_cvtepi64_epi8 (simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_cvtepi64_epi8(a);
+#else
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
+ simde__m512i_private a_ = simde__m512i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+ SIMDE__CONVERT_VECTOR(r_.m64_private[0].i8, a_.i64);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
+ r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]);
+ }
+#endif
+
+ return simde__m128i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a)
+#endif
+
SIMDE__END_DECLS
HEDLEY_DIAGNOSTIC_POP
=====================================
simde/x86/sse.h
=====================================
@@ -2984,7 +2984,7 @@ simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(mem_addr, (a))
+# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3004,7 +3004,7 @@ simde_mm_store_ps1 (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(mem_addr, (a))
+# define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3023,7 +3023,7 @@ simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(mem_addr, (a))
+# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3038,7 +3038,7 @@ simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(mem_addr, (a))
+# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3097,7 +3097,7 @@ simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(mem_addr, (a))
+# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3116,7 +3116,7 @@ simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(mem_addr, (a))
+# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3491,7 +3491,7 @@ simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(mem_addr, (a))
+# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/sse2.h
=====================================
@@ -244,7 +244,7 @@ typedef union {
typedef __m128i simde__m128i;
typedef __m128d simde__m128d;
#elif defined(SIMDE_SSE2_NEON)
- typedef float32x4_t simde__m128i;
+ typedef int64x2_t simde__m128i;
# if defined(SIMDE_ARCH_AARCH64)
typedef float64x2_t simde__m128d;
# elif defined(SIMDE_VECTOR_SUBSCRIPT)
@@ -4834,7 +4834,7 @@ simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_store_pd(mem_addr, a) simde_mm_store_pd(mem_addr, a)
+# define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4851,10 +4851,10 @@ simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d
mem_addr[1] = a_.f64[0];
#endif
}
-#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(mem_addr, a)
+#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(mem_addr, a)
-# define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(mem_addr, a)
+# define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
+# define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4875,7 +4875,7 @@ simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) {
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_store_sd(mem_addr, a) simde_mm_store_sd(mem_addr, a)
+# define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4914,7 +4914,7 @@ simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) {
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(mem_addr, a)
+# define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4948,7 +4948,7 @@ simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) {
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(mem_addr, a)
+# define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4966,7 +4966,7 @@ simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) {
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(mem_addr, a)
+# define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4979,7 +4979,7 @@ simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) {
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(mem_addr, a)
+# define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -5013,7 +5013,7 @@ simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(mem_addr, a)
+# define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -5969,6 +5969,11 @@ simde_x_mm_not_si128 (simde__m128i a) {
return simde__m128i_from_private(r_);
}
+#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y))
+#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
+# define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y)
+#endif
+
SIMDE__END_DECLS
HEDLEY_DIAGNOSTIC_POP
=====================================
test/x86/avx2.c
=====================================
@@ -5558,54 +5558,54 @@ test_simde_mm256_min_epu32(const MunitParameter params[], void* data) {
simde__m256i b;
simde__m256i r;
} test_vec[8] = {
- { simde_x_mm256_set_epu32(UINT32_C( 1440846368), UINT32_C( -63978166), UINT32_C( 1399113445), UINT32_C(-1455222788),
- UINT32_C( -469490798), UINT32_C( 1991020107), UINT32_C(-1863870898), UINT32_C( 1328594239)),
- simde_x_mm256_set_epu32(UINT32_C( 102513809), UINT32_C( -329455244), UINT32_C( 307870409), UINT32_C( 295630559),
- UINT32_C(-1234945944), UINT32_C( -774625773), UINT32_C( -390354839), UINT32_C(-1495847433)),
- simde_x_mm256_set_epu32(UINT32_C( 102513809), UINT32_C( -329455244), UINT32_C( 307870409), UINT32_C( 295630559),
- UINT32_C(-1234945944), UINT32_C( 1991020107), UINT32_C(-1863870898), UINT32_C( 1328594239)) },
- { simde_x_mm256_set_epu32(UINT32_C( 1307771138), UINT32_C( 491465332), UINT32_C( 393240962), UINT32_C( 1290764124),
- UINT32_C( 1683625215), UINT32_C( 490731907), UINT32_C( -455852593), UINT32_C( -956210525)),
- simde_x_mm256_set_epu32(UINT32_C(-1635890822), UINT32_C( 910213294), UINT32_C( -85983471), UINT32_C( -240357962),
- UINT32_C(-1606645062), UINT32_C( -435549149), UINT32_C( 512067539), UINT32_C(-2046648155)),
- simde_x_mm256_set_epu32(UINT32_C( 1307771138), UINT32_C( 491465332), UINT32_C( 393240962), UINT32_C( 1290764124),
- UINT32_C( 1683625215), UINT32_C( 490731907), UINT32_C( 512067539), UINT32_C(-2046648155)) },
- { simde_x_mm256_set_epu32(UINT32_C( -714376287), UINT32_C(-1856787219), UINT32_C(-1948205449), UINT32_C( 1985002804),
- UINT32_C(-1632505450), UINT32_C( 967094020), UINT32_C( 258843795), UINT32_C(-1730834268)),
- simde_x_mm256_set_epu32(UINT32_C( -497194011), UINT32_C( 2046480498), UINT32_C( 1801500737), UINT32_C( 2033606287),
- UINT32_C( 289880638), UINT32_C( 2013746580), UINT32_C(-1003047754), UINT32_C( 1692706041)),
- simde_x_mm256_set_epu32(UINT32_C( -714376287), UINT32_C( 2046480498), UINT32_C( 1801500737), UINT32_C( 1985002804),
- UINT32_C( 289880638), UINT32_C( 967094020), UINT32_C( 258843795), UINT32_C( 1692706041)) },
- { simde_x_mm256_set_epu32(UINT32_C( 1320709158), UINT32_C( 816999148), UINT32_C(-1366813775), UINT32_C( -28642953),
- UINT32_C(-1227971629), UINT32_C(-1379423721), UINT32_C(-1139745869), UINT32_C( -293312984)),
- simde_x_mm256_set_epu32(UINT32_C( 332902085), UINT32_C( 1709201405), UINT32_C( 1874659226), UINT32_C( -372944690),
- UINT32_C(-2106372518), UINT32_C( 2058700648), UINT32_C( -582719177), UINT32_C( 1001999947)),
- simde_x_mm256_set_epu32(UINT32_C( 332902085), UINT32_C( 816999148), UINT32_C( 1874659226), UINT32_C( -372944690),
- UINT32_C(-2106372518), UINT32_C( 2058700648), UINT32_C(-1139745869), UINT32_C( 1001999947)) },
- { simde_x_mm256_set_epu32(UINT32_C( -964274667), UINT32_C(-1648595768), UINT32_C( 1471545107), UINT32_C( 27318365),
- UINT32_C( 260329589), UINT32_C( 227849259), UINT32_C( 1920321444), UINT32_C(-1936907575)),
- simde_x_mm256_set_epu32(UINT32_C( 1696458631), UINT32_C( 1060012303), UINT32_C( -667172636), UINT32_C(-1886502820),
- UINT32_C( 1497838086), UINT32_C( 207594118), UINT32_C( -532680463), UINT32_C( 1866090536)),
- simde_x_mm256_set_epu32(UINT32_C( 1696458631), UINT32_C( 1060012303), UINT32_C( 1471545107), UINT32_C( 27318365),
- UINT32_C( 260329589), UINT32_C( 207594118), UINT32_C( 1920321444), UINT32_C( 1866090536)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1777625938), UINT32_C( -154135345), UINT32_C( 174494711), UINT32_C(-1775989674),
- UINT32_C(-1837023557), UINT32_C( 1967600763), UINT32_C( -631660232), UINT32_C(-1996836188)),
- simde_x_mm256_set_epu32(UINT32_C( 281157684), UINT32_C( 1154116325), UINT32_C(-1602584536), UINT32_C( 765551471),
- UINT32_C( 391237042), UINT32_C( -467359652), UINT32_C( 1535761889), UINT32_C(-1668896011)),
- simde_x_mm256_set_epu32(UINT32_C( 281157684), UINT32_C( 1154116325), UINT32_C( 174494711), UINT32_C( 765551471),
- UINT32_C( 391237042), UINT32_C( 1967600763), UINT32_C( 1535761889), UINT32_C(-1996836188)) },
- { simde_x_mm256_set_epu32(UINT32_C( 256903792), UINT32_C( 8208695), UINT32_C( 591537249), UINT32_C(-1897294295),
- UINT32_C( -958845995), UINT32_C( 1843022267), UINT32_C( 1972328098), UINT32_C( -907457076)),
- simde_x_mm256_set_epu32(UINT32_C( -469518946), UINT32_C( -182686980), UINT32_C( -65053560), UINT32_C(-1520653503),
- UINT32_C( 1810925047), UINT32_C( 1432780821), UINT32_C(-1505705572), UINT32_C( 709984577)),
- simde_x_mm256_set_epu32(UINT32_C( 256903792), UINT32_C( 8208695), UINT32_C( 591537249), UINT32_C(-1897294295),
- UINT32_C( 1810925047), UINT32_C( 1432780821), UINT32_C( 1972328098), UINT32_C( 709984577)) },
- { simde_x_mm256_set_epu32(UINT32_C( 2107372218), UINT32_C(-1339950000), UINT32_C( -517566965), UINT32_C( 1104805878),
- UINT32_C( 321684264), UINT32_C( -268829241), UINT32_C( 2019330239), UINT32_C( 204124644)),
- simde_x_mm256_set_epu32(UINT32_C(-1246161689), UINT32_C( 1484088388), UINT32_C( 936889694), UINT32_C( 628920619),
- UINT32_C( 1829277398), UINT32_C(-2046097299), UINT32_C( -938854488), UINT32_C( 2001330537)),
- simde_x_mm256_set_epu32(UINT32_C( 2107372218), UINT32_C( 1484088388), UINT32_C( 936889694), UINT32_C( 628920619),
- UINT32_C( 321684264), UINT32_C(-2046097299), UINT32_C( 2019330239), UINT32_C( 204124644)) }
+ { simde_x_mm256_set_epu32(UINT32_C(1967336836), UINT32_C(4229175532), UINT32_C(3383228965), UINT32_C( 979444700),
+ UINT32_C( 138906106), UINT32_C( 146006389), UINT32_C(2494756173), UINT32_C(2105904126)),
+ simde_x_mm256_set_epu32(UINT32_C(2893387611), UINT32_C(2460478173), UINT32_C(3299027518), UINT32_C( 896184310),
+ UINT32_C(1699227452), UINT32_C(1922730015), UINT32_C(1220777705), UINT32_C( 483108197)),
+ simde_x_mm256_set_epu32(UINT32_C(1967336836), UINT32_C(2460478173), UINT32_C(3299027518), UINT32_C( 896184310),
+ UINT32_C( 138906106), UINT32_C( 146006389), UINT32_C(1220777705), UINT32_C( 483108197)) },
+ { simde_x_mm256_set_epu32(UINT32_C( 596030925), UINT32_C(1623933296), UINT32_C(4116080130), UINT32_C(2140384796),
+ UINT32_C(1701047302), UINT32_C( 386212074), UINT32_C( 328829151), UINT32_C(3670208831)),
+ simde_x_mm256_set_epu32(UINT32_C(3740465827), UINT32_C(1627250042), UINT32_C(2355376593), UINT32_C(1538421544),
+ UINT32_C( 533181547), UINT32_C(1474624420), UINT32_C( 464742120), UINT32_C(1469792713)),
+ simde_x_mm256_set_epu32(UINT32_C( 596030925), UINT32_C(1623933296), UINT32_C(2355376593), UINT32_C(1538421544),
+ UINT32_C( 533181547), UINT32_C( 386212074), UINT32_C( 328829151), UINT32_C(1469792713)) },
+ { simde_x_mm256_set_epu32(UINT32_C(3969449230), UINT32_C(1656808571), UINT32_C(3512723294), UINT32_C(1894827634),
+ UINT32_C( 143242580), UINT32_C(1411474427), UINT32_C(1474375050), UINT32_C(2576321811)),
+ simde_x_mm256_set_epu32(UINT32_C(3817774721), UINT32_C(1995677222), UINT32_C(2801037071), UINT32_C(1160265207),
+ UINT32_C( 536799050), UINT32_C(3040213718), UINT32_C(2960203135), UINT32_C( 387289056)),
+ simde_x_mm256_set_epu32(UINT32_C(3817774721), UINT32_C(1656808571), UINT32_C(2801037071), UINT32_C(1160265207),
+ UINT32_C( 143242580), UINT32_C(1411474427), UINT32_C(1474375050), UINT32_C( 387289056)) },
+ { simde_x_mm256_set_epu32(UINT32_C(4117731720), UINT32_C(2896203570), UINT32_C(2130869721), UINT32_C( 624464130),
+ UINT32_C(3920434556), UINT32_C(3882005287), UINT32_C(2912704980), UINT32_C(3353740323)),
+ simde_x_mm256_set_epu32(UINT32_C(1011878374), UINT32_C( 876977997), UINT32_C( 633144937), UINT32_C(2591224872),
+ UINT32_C(1689064732), UINT32_C(1345088039), UINT32_C(3928457299), UINT32_C(1317789172)),
+ simde_x_mm256_set_epu32(UINT32_C(1011878374), UINT32_C( 876977997), UINT32_C( 633144937), UINT32_C( 624464130),
+ UINT32_C(1689064732), UINT32_C(1345088039), UINT32_C(2912704980), UINT32_C(1317789172)) },
+ { simde_x_mm256_set_epu32(UINT32_C(1278890315), UINT32_C(3068059236), UINT32_C( 937423722), UINT32_C( 545836753),
+ UINT32_C(3944086739), UINT32_C(2155957693), UINT32_C( 750306742), UINT32_C( 995938818)),
+ simde_x_mm256_set_epu32(UINT32_C(2300290567), UINT32_C(3884383026), UINT32_C( 682756216), UINT32_C( 434660596),
+ UINT32_C(3895444851), UINT32_C(3229085704), UINT32_C(3028266335), UINT32_C(2490258842)),
+ simde_x_mm256_set_epu32(UINT32_C(1278890315), UINT32_C(3068059236), UINT32_C( 682756216), UINT32_C( 434660596),
+ UINT32_C(3895444851), UINT32_C(2155957693), UINT32_C( 750306742), UINT32_C( 995938818)) },
+ { simde_x_mm256_set_epu32(UINT32_C( 953347239), UINT32_C(3233201384), UINT32_C( 883460426), UINT32_C( 630153716),
+ UINT32_C(2626834474), UINT32_C(4260188706), UINT32_C(4276291548), UINT32_C(1697478493)),
+ simde_x_mm256_set_epu32(UINT32_C(3841797977), UINT32_C( 519016629), UINT32_C( 54139722), UINT32_C(4160085404),
+ UINT32_C(2354740665), UINT32_C( 224670449), UINT32_C(2606748626), UINT32_C(2604287898)),
+ simde_x_mm256_set_epu32(UINT32_C( 953347239), UINT32_C( 519016629), UINT32_C( 54139722), UINT32_C( 630153716),
+ UINT32_C(2354740665), UINT32_C( 224670449), UINT32_C(2606748626), UINT32_C(1697478493)) },
+ { simde_x_mm256_set_epu32(UINT32_C(4276795094), UINT32_C(3517498069), UINT32_C(3286132221), UINT32_C(1640896057),
+ UINT32_C(1497672480), UINT32_C( 40644986), UINT32_C(1824934232), UINT32_C(1194285849)),
+ simde_x_mm256_set_epu32(UINT32_C(1850977199), UINT32_C(1916865152), UINT32_C(2772610612), UINT32_C(2574813520),
+ UINT32_C(1514631464), UINT32_C(2960447777), UINT32_C(2276426609), UINT32_C(1419491712)),
+ simde_x_mm256_set_epu32(UINT32_C(1850977199), UINT32_C(1916865152), UINT32_C(2772610612), UINT32_C(1640896057),
+ UINT32_C(1497672480), UINT32_C( 40644986), UINT32_C(1824934232), UINT32_C(1194285849)) },
+ { simde_x_mm256_set_epu32(UINT32_C( 990577222), UINT32_C(1025245358), UINT32_C(2406551265), UINT32_C(4071927667),
+ UINT32_C(3189139328), UINT32_C(1739830541), UINT32_C(1739044254), UINT32_C( 584922997)),
+ simde_x_mm256_set_epu32(UINT32_C( 72076484), UINT32_C(2950382985), UINT32_C( 915753842), UINT32_C( 355749909),
+ UINT32_C(3000181234), UINT32_C(3918623971), UINT32_C(3711879869), UINT32_C(3103913192)),
+ simde_x_mm256_set_epu32(UINT32_C( 72076484), UINT32_C(1025245358), UINT32_C( 915753842), UINT32_C( 355749909),
+ UINT32_C(3000181234), UINT32_C(1739830541), UINT32_C(1739044254), UINT32_C( 584922997)) }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
@@ -8039,6 +8039,142 @@ test_simde_mm256_unpackhi_epi16(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm256_unpackhi_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi32(INT32_C( 1912850859), INT32_C( 967654585), INT32_C( 1199101495), INT32_C( 1020867807),
+ INT32_C(-1113017403), INT32_C( 1207205853), INT32_C(-1283015323), INT32_C( -865603422)),
+ simde_mm256_set_epi32(INT32_C( 439671122), INT32_C( -834176430), INT32_C( 1316719462), INT32_C( 794894521),
+ INT32_C( -364012057), INT32_C(-1283491930), INT32_C( -229222523), INT32_C( -489560867)),
+ simde_mm256_set_epi32(INT32_C( 439671122), INT32_C( 1912850859), INT32_C( -834176430), INT32_C( 967654585),
+ INT32_C( -364012057), INT32_C(-1113017403), INT32_C(-1283491930), INT32_C( 1207205853)) },
+ { simde_mm256_set_epi32(INT32_C(-1043075301), INT32_C(-1205379203), INT32_C( -623218356), INT32_C(-1709643548),
+ INT32_C( -53386540), INT32_C( 1999540953), INT32_C( 2146270924), INT32_C( 1577977486)),
+ simde_mm256_set_epi32(INT32_C( 1416708497), INT32_C( 1977290590), INT32_C( -799631345), INT32_C( -699557932),
+ INT32_C( -779666453), INT32_C(-1556766589), INT32_C( 1459438848), INT32_C( 777616978)),
+ simde_mm256_set_epi32(INT32_C( 1416708497), INT32_C(-1043075301), INT32_C( 1977290590), INT32_C(-1205379203),
+ INT32_C( -779666453), INT32_C( -53386540), INT32_C(-1556766589), INT32_C( 1999540953)) },
+ { simde_mm256_set_epi32(INT32_C(-1262008529), INT32_C( 995722237), INT32_C( 1921619936), INT32_C(-2112826366),
+ INT32_C( -393450028), INT32_C( 1344594991), INT32_C( 2093675145), INT32_C( 760241232)),
+ simde_mm256_set_epi32(INT32_C( 1015857134), INT32_C( 1981903036), INT32_C( 1927114874), INT32_C( 1164834454),
+ INT32_C( -157704373), INT32_C(-1887449946), INT32_C(-1985956729), INT32_C(-2099334634)),
+ simde_mm256_set_epi32(INT32_C( 1015857134), INT32_C(-1262008529), INT32_C( 1981903036), INT32_C( 995722237),
+ INT32_C( -157704373), INT32_C( -393450028), INT32_C(-1887449946), INT32_C( 1344594991)) },
+ { simde_mm256_set_epi32(INT32_C( 212170692), INT32_C( 815869922), INT32_C( 20080222), INT32_C( -75417640),
+ INT32_C( -503678651), INT32_C( -834592925), INT32_C( 1707747009), INT32_C( -254760969)),
+ simde_mm256_set_epi32(INT32_C( -574627621), INT32_C( -890978529), INT32_C( 2077265887), INT32_C( 842919754),
+ INT32_C( -486905662), INT32_C( 988851226), INT32_C(-1518229684), INT32_C( 1816570667)),
+ simde_mm256_set_epi32(INT32_C( -574627621), INT32_C( 212170692), INT32_C( -890978529), INT32_C( 815869922),
+ INT32_C( -486905662), INT32_C( -503678651), INT32_C( 988851226), INT32_C( -834592925)) },
+ { simde_mm256_set_epi32(INT32_C( 1459349320), INT32_C( -943790006), INT32_C( 424384832), INT32_C( 113065932),
+ INT32_C( -80059372), INT32_C( 814075306), INT32_C( 1255708904), INT32_C( 894835823)),
+ simde_mm256_set_epi32(INT32_C( 1159658953), INT32_C( 513900351), INT32_C( 1274799760), INT32_C( 661217108),
+ INT32_C(-1116902016), INT32_C( 1264134407), INT32_C( 1996134185), INT32_C( -620124201)),
+ simde_mm256_set_epi32(INT32_C( 1159658953), INT32_C( 1459349320), INT32_C( 513900351), INT32_C( -943790006),
+ INT32_C(-1116902016), INT32_C( -80059372), INT32_C( 1264134407), INT32_C( 814075306)) },
+ { simde_mm256_set_epi32(INT32_C( 1669821560), INT32_C( -524933447), INT32_C(-1923407638), INT32_C( 1748809176),
+ INT32_C( -67073492), INT32_C(-1589224355), INT32_C(-1890395480), INT32_C( 650020033)),
+ simde_mm256_set_epi32(INT32_C( 1449046752), INT32_C( 11693105), INT32_C( 225889559), INT32_C( 1195957602),
+ INT32_C( -790861669), INT32_C( 894225381), INT32_C( 1788797029), INT32_C( 1410983650)),
+ simde_mm256_set_epi32(INT32_C( 1449046752), INT32_C( 1669821560), INT32_C( 11693105), INT32_C( -524933447),
+ INT32_C( -790861669), INT32_C( -67073492), INT32_C( 894225381), INT32_C(-1589224355)) },
+ { simde_mm256_set_epi32(INT32_C( 166836505), INT32_C( 35920603), INT32_C( 209405330), INT32_C(-1870274444),
+ INT32_C(-2065187438), INT32_C(-1749203354), INT32_C(-1834771489), INT32_C( 1795755804)),
+ simde_mm256_set_epi32(INT32_C( 863607209), INT32_C( 671761907), INT32_C(-1319646828), INT32_C( 236938511),
+ INT32_C( 277292323), INT32_C(-1226502074), INT32_C( 1319858080), INT32_C( 1103529470)),
+ simde_mm256_set_epi32(INT32_C( 863607209), INT32_C( 166836505), INT32_C( 671761907), INT32_C( 35920603),
+ INT32_C( 277292323), INT32_C(-2065187438), INT32_C(-1226502074), INT32_C(-1749203354)) },
+ { simde_mm256_set_epi32(INT32_C( -91786742), INT32_C(-1505313832), INT32_C( 1530067112), INT32_C(-1201437931),
+ INT32_C(-1252606163), INT32_C( 1723954910), INT32_C( 348258249), INT32_C( -864462904)),
+ simde_mm256_set_epi32(INT32_C( 926816633), INT32_C( -705859720), INT32_C( 1728076763), INT32_C(-1714964607),
+ INT32_C( -194394697), INT32_C( -494196608), INT32_C( 1804204829), INT32_C(-1267214668)),
+ simde_mm256_set_epi32(INT32_C( 926816633), INT32_C( -91786742), INT32_C( -705859720), INT32_C(-1505313832),
+ INT32_C( -194394697), INT32_C(-1252606163), INT32_C( -494196608), INT32_C( 1723954910)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_unpackhi_epi32(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_unpackhi_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi64x(INT64_C( 8215631882498161849), INT64_C( 5150101706630575327),
+ INT64_C(-4780373344556646435), INT64_C(-5510508849122512734)),
+ simde_mm256_set_epi64x(INT64_C( 1888373093446416978), INT64_C( 5655267028091609273),
+ INT64_C(-1563419877153212506), INT64_C( -984503235986201379)),
+ simde_mm256_set_epi64x(INT64_C( 1888373093446416978), INT64_C( 8215631882498161849),
+ INT64_C(-1563419877153212506), INT64_C(-4780373344556646435)) },
+ { simde_mm256_set_epi64x(INT64_C(-4479974301970768003), INT64_C(-2676702454701561628),
+ INT64_C( -229293441347054887), INT64_C( 9218163428513678990)),
+ simde_mm256_set_epi64x(INT64_C( 6084716664557604702), INT64_C(-3434390472036083756),
+ INT64_C(-3348641914685120381), INT64_C( 6268242123449531986)),
+ simde_mm256_set_epi64x(INT64_C( 6084716664557604702), INT64_C(-4479974301970768003),
+ INT64_C(-3348641914685120381), INT64_C( -229293441347054887)) },
+ { simde_mm256_set_epi64x(INT64_C(-5420285358332345347), INT64_C( 8253294782643753986),
+ INT64_C(-1689855001525689297), INT64_C( 8992266276983299152)),
+ simde_mm256_set_epi64x(INT64_C( 4363073169920192700), INT64_C( 8276895360629995158),
+ INT64_C( -677335122063668058), INT64_C(-8529619200130502122)),
+ simde_mm256_set_epi64x(INT64_C( 4363073169920192700), INT64_C(-5420285358332345347),
+ INT64_C( -677335122063668058), INT64_C(-1689855001525689297)) },
+ { simde_mm256_set_epi64x(INT64_C( 911266184125558754), INT64_C( 86243901005969368),
+ INT64_C(-2163283330278023325), INT64_C( 7334717557537023991)),
+ simde_mm256_set_epi64x(INT64_C(-2468006836169294049), INT64_C( 8921789050604351306),
+ INT64_C(-2091243893538378726), INT64_C(-6520746838779843797)),
+ simde_mm256_set_epi64x(INT64_C(-2468006836169294049), INT64_C( 911266184125558754),
+ INT64_C(-2091243893538378726), INT64_C(-2163283330278023325)) },
+ { simde_mm256_set_epi64x(INT64_C( 6267857606191016010), INT64_C( 1822718974471520204),
+ INT64_C( -343852383664222806), INT64_C( 5393228676870839407)),
+ simde_mm256_set_epi64x(INT64_C( 4980697278162501439), INT64_C( 5475223278809866068),
+ INT64_C(-4797057630292334329), INT64_C( 8573331046677456855)),
+ simde_mm256_set_epi64x(INT64_C( 4980697278162501439), INT64_C( 6267857606191016010),
+ INT64_C(-4797057630292334329), INT64_C( -343852383664222806)) },
+ { simde_mm256_set_epi64x(INT64_C( 7171828994125735609), INT64_C(-8260972900337797672),
+ INT64_C( -288078451862774691), INT64_C(-8119186762456202047)),
+ simde_mm256_set_epi64x(INT64_C( 6223608410226715697), INT64_C( 970188269608820066),
+ INT64_C(-3396725003120751643), INT64_C( 7682824740147947234)),
+ simde_mm256_set_epi64x(INT64_C( 6223608410226715697), INT64_C( 7171828994125735609),
+ INT64_C(-3396725003120751643), INT64_C( -288078451862774691)) },
+ { simde_mm256_set_epi64x(INT64_C( 716557332789861083), INT64_C( 899389046382780532),
+ INT64_C(-8869912503774263706), INT64_C(-7880283539092467940)),
+ simde_mm256_set_epi64x(INT64_C( 3709164719916598771), INT64_C(-5667839968293198577),
+ INT64_C( 1190961461785333830), INT64_C( 5668747290064881150)),
+ simde_mm256_set_epi64x(INT64_C( 3709164719916598771), INT64_C( 716557332789861083),
+ INT64_C( 1190961461785333830), INT64_C(-8869912503774263706)) },
+ { simde_mm256_set_epi64x(INT64_C( -394221052306736168), INT64_C( 6571588209818698517),
+ INT64_C(-5379902503129090338), INT64_C( 1495757793447729096)),
+ simde_mm256_set_epi64x(INT64_C( 3980647131712941944), INT64_C( 7422033184642545537),
+ INT64_C( -834918862330058624), INT64_C( 7749000738868025012)),
+ simde_mm256_set_epi64x(INT64_C( 3980647131712941944), INT64_C( -394221052306736168),
+ INT64_C( -834918862330058624), INT64_C(-5379902503129090338)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_unpackhi_epi64(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm256_xor_si256(const MunitParameter params[], void* data) {
(void) params;
@@ -8302,54 +8438,54 @@ test_simde_mm256_max_epu32(const MunitParameter params[], void* data) {
simde__m256i b;
simde__m256i r;
} test_vec[8] = {
- { simde_x_mm256_set_epu32(UINT32_C( 1789296413), UINT32_C(-1030321700), UINT32_C( -395124404), UINT32_C( 1616539383),
- UINT32_C( -652488309), UINT32_C( 449218617), UINT32_C(-1851432383), UINT32_C(-1484019778)),
- simde_x_mm256_set_epu32(UINT32_C(-1866543367), UINT32_C(-1970790635), UINT32_C( 815561745), UINT32_C( 2005519330),
- UINT32_C( -11134399), UINT32_C(-1100361913), UINT32_C( 1735044051), UINT32_C( -32078364)),
- simde_x_mm256_set_epu32(UINT32_C(-1866543367), UINT32_C(-1030321700), UINT32_C( -395124404), UINT32_C( 2005519330),
- UINT32_C( -11134399), UINT32_C(-1100361913), UINT32_C(-1851432383), UINT32_C( -32078364)) },
- { simde_x_mm256_set_epu32(UINT32_C( 2144707340), UINT32_C( 252486079), UINT32_C( 1391688801), UINT32_C( -448135501),
- UINT32_C( -148645134), UINT32_C( -445330965), UINT32_C( 1851297716), UINT32_C( -217759707)),
- simde_x_mm256_set_epu32(UINT32_C( 1523413770), UINT32_C( 1207768640), UINT32_C( 306982396), UINT32_C( -481827394),
- UINT32_C( 1401479563), UINT32_C(-1470970496), UINT32_C( 235708602), UINT32_C( 1837412708)),
- simde_x_mm256_set_epu32(UINT32_C( 2144707340), UINT32_C( 1207768640), UINT32_C( 1391688801), UINT32_C( -448135501),
- UINT32_C( -148645134), UINT32_C( -445330965), UINT32_C( 1851297716), UINT32_C( -217759707)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1081195710), UINT32_C( 556196449), UINT32_C( 904499224), UINT32_C( -311476862),
- UINT32_C(-1068051344), UINT32_C( 316495777), UINT32_C( 1987852209), UINT32_C( -82353218)),
- simde_x_mm256_set_epu32(UINT32_C( 1251434410), UINT32_C( 1491058763), UINT32_C( 32741970), UINT32_C( 2096924550),
- UINT32_C( 992045009), UINT32_C( 951482693), UINT32_C( 1326403571), UINT32_C( -785668634)),
- simde_x_mm256_set_epu32(UINT32_C(-1081195710), UINT32_C( 1491058763), UINT32_C( 904499224), UINT32_C( -311476862),
- UINT32_C(-1068051344), UINT32_C( 951482693), UINT32_C( 1987852209), UINT32_C( -82353218)) },
- { simde_x_mm256_set_epu32(UINT32_C( 85161761), UINT32_C( -831060163), UINT32_C( -47466501), UINT32_C(-1193967773),
- UINT32_C( -873716901), UINT32_C( 868999768), UINT32_C( -212766777), UINT32_C( -774920647)),
- simde_x_mm256_set_epu32(UINT32_C( 488094589), UINT32_C(-1161944830), UINT32_C( -324067336), UINT32_C( 795722459),
- UINT32_C( 285945408), UINT32_C( -219645108), UINT32_C( 1108477887), UINT32_C( 1206196184)),
- simde_x_mm256_set_epu32(UINT32_C( 488094589), UINT32_C( -831060163), UINT32_C( -47466501), UINT32_C(-1193967773),
- UINT32_C( -873716901), UINT32_C( -219645108), UINT32_C( -212766777), UINT32_C( -774920647)) },
- { simde_x_mm256_set_epu32(UINT32_C( -145586798), UINT32_C( 2043772759), UINT32_C( 282954497), UINT32_C(-1391199635),
- UINT32_C(-1974699849), UINT32_C( 376785085), UINT32_C( -495950106), UINT32_C( 1285787501)),
- simde_x_mm256_set_epu32(UINT32_C( 1298223933), UINT32_C( -246257442), UINT32_C(-1589240302), UINT32_C(-2123797480),
- UINT32_C(-2103929130), UINT32_C(-1905518436), UINT32_C( 1471816273), UINT32_C(-1959021648)),
- simde_x_mm256_set_epu32(UINT32_C( -145586798), UINT32_C( -246257442), UINT32_C(-1589240302), UINT32_C(-1391199635),
- UINT32_C(-1974699849), UINT32_C(-1905518436), UINT32_C( -495950106), UINT32_C(-1959021648)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1857605381), UINT32_C(-1139820821), UINT32_C(-2120556261), UINT32_C( 945707022),
- UINT32_C(-1819541735), UINT32_C( -865122624), UINT32_C(-1456304785), UINT32_C( 2025802961)),
- simde_x_mm256_set_epu32(UINT32_C( -506483570), UINT32_C(-2083330411), UINT32_C( 1782301120), UINT32_C( -298692339),
- UINT32_C(-1283750394), UINT32_C( 1929029045), UINT32_C( -611912448), UINT32_C( 487591866)),
- simde_x_mm256_set_epu32(UINT32_C( -506483570), UINT32_C(-1139820821), UINT32_C(-2120556261), UINT32_C( -298692339),
- UINT32_C(-1283750394), UINT32_C( -865122624), UINT32_C( -611912448), UINT32_C( 2025802961)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1676495265), UINT32_C(-1239554047), UINT32_C( -227736186), UINT32_C( 441497328),
- UINT32_C( -308281524), UINT32_C(-1923017863), UINT32_C( 1763763374), UINT32_C( -249830824)),
- simde_x_mm256_set_epu32(UINT32_C( 1186988143), UINT32_C( -274704652), UINT32_C( -136552265), UINT32_C( -512830229),
- UINT32_C( 1698748935), UINT32_C( 532239246), UINT32_C( -341231509), UINT32_C( 222942642)),
- simde_x_mm256_set_epu32(UINT32_C(-1676495265), UINT32_C( -274704652), UINT32_C( -136552265), UINT32_C( -512830229),
- UINT32_C( -308281524), UINT32_C(-1923017863), UINT32_C( -341231509), UINT32_C( -249830824)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1693096720), UINT32_C(-1850078774), UINT32_C( 1042415884), UINT32_C( -995300065),
- UINT32_C( -726650347), UINT32_C( 1364370576), UINT32_C( 1007565517), UINT32_C( 23772290)),
- simde_x_mm256_set_epu32(UINT32_C(-1218092357), UINT32_C(-1601849960), UINT32_C( 1609800037), UINT32_C( 731809179),
- UINT32_C(-1388796435), UINT32_C( -545596065), UINT32_C(-1231479917), UINT32_C(-1348660048)),
- simde_x_mm256_set_epu32(UINT32_C(-1218092357), UINT32_C(-1601849960), UINT32_C( 1609800037), UINT32_C( -995300065),
- UINT32_C( -726650347), UINT32_C( -545596065), UINT32_C(-1231479917), UINT32_C(-1348660048)) }
+ { simde_x_mm256_set_epu32(UINT32_C(3051572045), UINT32_C(3545123096), UINT32_C( 539532434), UINT32_C(2726067579),
+ UINT32_C(3419329411), UINT32_C(3056421163), UINT32_C(2937475413), UINT32_C( 891304178)),
+ simde_x_mm256_set_epu32(UINT32_C(3684521838), UINT32_C(3664092042), UINT32_C( 648541541), UINT32_C( 272930365),
+ UINT32_C( 675916127), UINT32_C(1720002587), UINT32_C(1082929595), UINT32_C( 984837149)),
+ simde_x_mm256_set_epu32(UINT32_C(3684521838), UINT32_C(3664092042), UINT32_C( 648541541), UINT32_C(2726067579),
+ UINT32_C(3419329411), UINT32_C(3056421163), UINT32_C(2937475413), UINT32_C( 984837149)) },
+ { simde_x_mm256_set_epu32(UINT32_C(2114544969), UINT32_C( 863431180), UINT32_C(2929790297), UINT32_C(1508406092),
+ UINT32_C(1766188399), UINT32_C(3527468216), UINT32_C(1207097510), UINT32_C(3902158588)),
+ simde_x_mm256_set_epu32(UINT32_C(4246012734), UINT32_C(1929345650), UINT32_C(3025365238), UINT32_C(3355971563),
+ UINT32_C(3310140776), UINT32_C(2715842442), UINT32_C(2891355465), UINT32_C(4115361740)),
+ simde_x_mm256_set_epu32(UINT32_C(4246012734), UINT32_C(1929345650), UINT32_C(3025365238), UINT32_C(3355971563),
+ UINT32_C(3310140776), UINT32_C(3527468216), UINT32_C(2891355465), UINT32_C(4115361740)) },
+ { simde_x_mm256_set_epu32(UINT32_C(1001663617), UINT32_C(3733133836), UINT32_C(2215256808), UINT32_C(2636746621),
+ UINT32_C(1004506231), UINT32_C(4292057274), UINT32_C(1794485537), UINT32_C(2483395991)),
+ simde_x_mm256_set_epu32(UINT32_C(3187837407), UINT32_C( 282146620), UINT32_C(3384289392), UINT32_C(3287894196),
+ UINT32_C(1516921107), UINT32_C( 874312086), UINT32_C(1365725481), UINT32_C(2010925515)),
+ simde_x_mm256_set_epu32(UINT32_C(3187837407), UINT32_C(3733133836), UINT32_C(3384289392), UINT32_C(3287894196),
+ UINT32_C(1516921107), UINT32_C(4292057274), UINT32_C(1794485537), UINT32_C(2483395991)) },
+ { simde_x_mm256_set_epu32(UINT32_C(1825864990), UINT32_C(3651632677), UINT32_C(1848482644), UINT32_C(4217806782),
+ UINT32_C(3557654096), UINT32_C( 849332445), UINT32_C( 114697269), UINT32_C( 674786807)),
+ simde_x_mm256_set_epu32(UINT32_C(1779504954), UINT32_C(1699927403), UINT32_C(2934040606), UINT32_C(4177282687),
+ UINT32_C(3450483742), UINT32_C(3231995683), UINT32_C(2093833738), UINT32_C( 336221368)),
+ simde_x_mm256_set_epu32(UINT32_C(1825864990), UINT32_C(3651632677), UINT32_C(2934040606), UINT32_C(4217806782),
+ UINT32_C(3557654096), UINT32_C(3231995683), UINT32_C(2093833738), UINT32_C( 674786807)) },
+ { simde_x_mm256_set_epu32(UINT32_C( 837260657), UINT32_C(3220613586), UINT32_C(2642631733), UINT32_C(2252900450),
+ UINT32_C(3061609531), UINT32_C(2252923032), UINT32_C(1030296341), UINT32_C(2720361240)),
+ simde_x_mm256_set_epu32(UINT32_C( 273034038), UINT32_C(2374878315), UINT32_C( 248014486), UINT32_C( 363234795),
+ UINT32_C(2126760034), UINT32_C(4029819680), UINT32_C(2144543040), UINT32_C(4227450764)),
+ simde_x_mm256_set_epu32(UINT32_C( 837260657), UINT32_C(3220613586), UINT32_C(2642631733), UINT32_C(2252900450),
+ UINT32_C(3061609531), UINT32_C(4029819680), UINT32_C(2144543040), UINT32_C(4227450764)) },
+ { simde_x_mm256_set_epu32(UINT32_C(1832345572), UINT32_C(3528531140), UINT32_C(1174695155), UINT32_C( 287919562),
+ UINT32_C( 792094828), UINT32_C(2183085761), UINT32_C(4262203031), UINT32_C(3585219150)),
+ simde_x_mm256_set_epu32(UINT32_C( 890341915), UINT32_C(2131853812), UINT32_C(2693768197), UINT32_C(1107526035),
+ UINT32_C(3439504205), UINT32_C( 751425977), UINT32_C(3946340711), UINT32_C(2999018213)),
+ simde_x_mm256_set_epu32(UINT32_C(1832345572), UINT32_C(3528531140), UINT32_C(2693768197), UINT32_C(1107526035),
+ UINT32_C(3439504205), UINT32_C(2183085761), UINT32_C(4262203031), UINT32_C(3585219150)) },
+ { simde_x_mm256_set_epu32(UINT32_C(3937957510), UINT32_C(1450021822), UINT32_C(1539281783), UINT32_C( 551303701),
+ UINT32_C(1703578262), UINT32_C(2645712321), UINT32_C(2567475981), UINT32_C(3368049591)),
+ simde_x_mm256_set_epu32(UINT32_C(3778291576), UINT32_C(3382324216), UINT32_C(1328766962), UINT32_C(4275285368),
+ UINT32_C( 709773804), UINT32_C(1207236213), UINT32_C(1754387913), UINT32_C(3060418664)),
+ simde_x_mm256_set_epu32(UINT32_C(3937957510), UINT32_C(3382324216), UINT32_C(1539281783), UINT32_C(4275285368),
+ UINT32_C(1703578262), UINT32_C(2645712321), UINT32_C(2567475981), UINT32_C(3368049591)) },
+ { simde_x_mm256_set_epu32(UINT32_C(1036418017), UINT32_C(1863476858), UINT32_C(1083883567), UINT32_C(3943520867),
+ UINT32_C( 70129580), UINT32_C(1560905984), UINT32_C(2897296070), UINT32_C(2575520666)),
+ simde_x_mm256_set_epu32(UINT32_C(2683682856), UINT32_C( 255193154), UINT32_C( 693351345), UINT32_C(1449340919),
+ UINT32_C(2448587623), UINT32_C(2723008994), UINT32_C(2741828276), UINT32_C(3254255853)),
+ simde_x_mm256_set_epu32(UINT32_C(2683682856), UINT32_C(1863476858), UINT32_C(1083883567), UINT32_C(3943520867),
+ UINT32_C(2448587623), UINT32_C(2723008994), UINT32_C(2897296070), UINT32_C(3254255853)) }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
@@ -8512,6 +8648,8 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi8),
SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi16),
+ SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi64),
SIMDE_TESTS_DEFINE_TEST(mm256_xor_si256),
#endif /* defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
=====================================
test/x86/avx512bw.c
=====================================
@@ -1491,6 +1491,694 @@ test_simde_mm512_adds_epu16(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_shuffle_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m512i b;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi8(INT8_C( 56), INT8_C( -94), INT8_C( -41), INT8_C( -59),
+ INT8_C( 40), INT8_C( 78), INT8_C( 93), INT8_C( 107),
+ INT8_C( -47), INT8_C( 66), INT8_C( -8), INT8_C( -52),
+ INT8_C( 50), INT8_C( 78), INT8_C( 13), INT8_C( 26),
+ INT8_C( -33), INT8_C( 120), INT8_C( 67), INT8_C( -12),
+ INT8_C( 80), INT8_C( 88), INT8_C( -65), INT8_C( 49),
+ INT8_C( 28), INT8_C( 96), INT8_C( -99), INT8_C(-119),
+ INT8_C( 126), INT8_C( 24), INT8_C( 35), INT8_C( 11),
+ INT8_C( -43), INT8_C( -24), INT8_C( 1), INT8_C( -61),
+ INT8_C( 101), INT8_C( 5), INT8_C( 67), INT8_C( -42),
+ INT8_C( -46), INT8_C(-115), INT8_C(-105), INT8_C( -92),
+ INT8_C( -3), INT8_C( -44), INT8_C( 13), INT8_C( -36),
+ INT8_C( 110), INT8_C( -38), INT8_C( -48), INT8_C( 36),
+ INT8_C( 117), INT8_C( -59), INT8_C( 109), INT8_C( -27),
+ INT8_C( -62), INT8_C( 98), INT8_C(-121), INT8_C(-108),
+ INT8_C( -7), INT8_C(-112), INT8_C( -52), INT8_C( -84)),
+ simde_mm512_set_epi8(INT8_C( 34), INT8_C( 34), INT8_C( -71), INT8_C( -63),
+ INT8_C( -34), INT8_C(-128), INT8_C( 113), INT8_C( -72),
+ INT8_C( 86), INT8_C( -73), INT8_C( -43), INT8_C( 95),
+ INT8_C( 3), INT8_C( 13), INT8_C( 19), INT8_C( -1),
+ INT8_C( -54), INT8_C( 114), INT8_C( 27), INT8_C( -30),
+ INT8_C( 52), INT8_C( -54), INT8_C( 78), INT8_C( 23),
+ INT8_C( 71), INT8_C( 87), INT8_C( 107), INT8_C( -27),
+ INT8_C( -35), INT8_C(-122), INT8_C( 40), INT8_C( 55),
+ INT8_C( -2), INT8_C( 40), INT8_C( -63), INT8_C( 85),
+ INT8_C( -25), INT8_C( -93), INT8_C( 25), INT8_C( 70),
+ INT8_C( 117), INT8_C( 66), INT8_C( -79), INT8_C( 57),
+ INT8_C(-115), INT8_C( -27), INT8_C( 59), INT8_C( 25),
+ INT8_C( -6), INT8_C( 42), INT8_C( -27), INT8_C( -34),
+ INT8_C( -16), INT8_C( 5), INT8_C( -55), INT8_C( 74),
+ INT8_C( -29), INT8_C( 77), INT8_C( -17), INT8_C( 16),
+ INT8_C( -98), INT8_C( -38), INT8_C( 116), INT8_C( -56)),
+ simde_mm512_set_epi8(INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C( 0),
+ INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 56),
+ INT8_C( 50), INT8_C( -41), INT8_C( 50), INT8_C( 0),
+ INT8_C( 0), INT8_C( 24), INT8_C( 80), INT8_C( 0),
+ INT8_C(-119), INT8_C( 0), INT8_C( 120), INT8_C( 28),
+ INT8_C( 28), INT8_C( 28), INT8_C( 80), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 28),
+ INT8_C( 0), INT8_C( -42), INT8_C( 0), INT8_C(-105),
+ INT8_C( 0), INT8_C( 0), INT8_C( 67), INT8_C(-115),
+ INT8_C(-105), INT8_C( -44), INT8_C( 0), INT8_C( 67),
+ INT8_C( 0), INT8_C( 0), INT8_C( 101), INT8_C( 67),
+ INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C(-121), INT8_C( 0), INT8_C( -59),
+ INT8_C( 0), INT8_C( -48), INT8_C( 0), INT8_C( -84),
+ INT8_C( 0), INT8_C( 0), INT8_C(-108), INT8_C( 0)) },
+ { simde_mm512_set_epi8(INT8_C( -75), INT8_C(-123), INT8_C( -44), INT8_C( -97),
+ INT8_C( 61), INT8_C(-112), INT8_C( 109), INT8_C( 27),
+ INT8_C( -9), INT8_C( -31), INT8_C(-111), INT8_C( -59),
+ INT8_C( 94), INT8_C( 110), INT8_C( 31), INT8_C( 50),
+ INT8_C( -86), INT8_C( 24), INT8_C( 91), INT8_C(-105),
+ INT8_C( 39), INT8_C( 25), INT8_C( -65), INT8_C( 69),
+ INT8_C( -59), INT8_C( -57), INT8_C( -98), INT8_C( 26),
+ INT8_C( -35), INT8_C( -2), INT8_C( 114), INT8_C( -51),
+ INT8_C( -16), INT8_C( 101), INT8_C( -92), INT8_C( 65),
+ INT8_C( -88), INT8_C( 19), INT8_C(-102), INT8_C( -51),
+ INT8_C( -94), INT8_C( -30), INT8_C( 60), INT8_C( -9),
+ INT8_C( 4), INT8_C( -93), INT8_C( 110), INT8_C(-102),
+ INT8_C( 109), INT8_C( 1), INT8_C( 113), INT8_C( 114),
+ INT8_C( 100), INT8_C( 80), INT8_C( 18), INT8_C( -7),
+ INT8_C( -87), INT8_C( 0), INT8_C( -55), INT8_C( 37),
+ INT8_C( 13), INT8_C( 86), INT8_C(-114), INT8_C(-125)),
+ simde_mm512_set_epi8(INT8_C( 18), INT8_C( 11), INT8_C( 73), INT8_C( 57),
+ INT8_C( -54), INT8_C( 9), INT8_C( 69), INT8_C( -45),
+ INT8_C( 71), INT8_C( -6), INT8_C( -68), INT8_C( -8),
+ INT8_C( 14), INT8_C( -27), INT8_C( 84), INT8_C( -90),
+ INT8_C( -10), INT8_C( -81), INT8_C( 77), INT8_C(-113),
+ INT8_C( 32), INT8_C( 17), INT8_C( 62), INT8_C( -18),
+ INT8_C( 124), INT8_C( -42), INT8_C( -52), INT8_C(-109),
+ INT8_C( -19), INT8_C( 30), INT8_C( -51), INT8_C(-111),
+ INT8_C( 100), INT8_C( 63), INT8_C( -80), INT8_C(-126),
+ INT8_C( 3), INT8_C( 119), INT8_C( 38), INT8_C( -44),
+ INT8_C( -48), INT8_C( -43), INT8_C( 6), INT8_C(-113),
+ INT8_C( -5), INT8_C( -18), INT8_C( 58), INT8_C( 77),
+ INT8_C( -49), INT8_C(-107), INT8_C( -77), INT8_C( 119),
+ INT8_C( -6), INT8_C( 92), INT8_C(-122), INT8_C( 43),
+ INT8_C( 85), INT8_C(-111), INT8_C( -15), INT8_C( 90),
+ INT8_C(-104), INT8_C( 31), INT8_C( -4), INT8_C( 57)),
+ simde_mm512_set_epi8(INT8_C( 110), INT8_C( 61), INT8_C( 109), INT8_C( 109),
+ INT8_C( 0), INT8_C( 109), INT8_C(-111), INT8_C( 0),
+ INT8_C( -9), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-123), INT8_C( 0), INT8_C( -59), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0),
+ INT8_C( -51), INT8_C( 114), INT8_C( 24), INT8_C( 0),
+ INT8_C(-105), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 24), INT8_C( 0), INT8_C( 0),
+ INT8_C( -9), INT8_C( -16), INT8_C( 0), INT8_C( 0),
+ INT8_C( 4), INT8_C( -94), INT8_C( -30), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( -30), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( -92),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -87),
+ INT8_C( 0), INT8_C( 114), INT8_C( 0), INT8_C( 100),
+ INT8_C( -55), INT8_C( 0), INT8_C( 0), INT8_C( 80),
+ INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 18)) },
+ { simde_mm512_set_epi8(INT8_C( -54), INT8_C( 53), INT8_C( 35), INT8_C( 51),
+ INT8_C( 24), INT8_C( 100), INT8_C( 80), INT8_C( 7),
+ INT8_C( 54), INT8_C( 114), INT8_C( -42), INT8_C( -2),
+ INT8_C( 18), INT8_C( 100), INT8_C( 26), INT8_C( -68),
+ INT8_C( 82), INT8_C( -30), INT8_C( -4), INT8_C( 110),
+ INT8_C( 24), INT8_C( 88), INT8_C(-100), INT8_C(-120),
+ INT8_C( -17), INT8_C(-109), INT8_C( 92), INT8_C(-105),
+ INT8_C( -20), INT8_C( 34), INT8_C( 98), INT8_C( -17),
+ INT8_C( 2), INT8_C( 6), INT8_C( 37), INT8_C( -85),
+ INT8_C( 118), INT8_C( 28), INT8_C( 34), INT8_C( 117),
+ INT8_C( 77), INT8_C( -32), INT8_C( 25), INT8_C( -74),
+ INT8_C( 93), INT8_C(-108), INT8_C( 3), INT8_C( -90),
+ INT8_C( 13), INT8_C( -29), INT8_C( -88), INT8_C( 116),
+ INT8_C( 14), INT8_C( -29), INT8_C( 30), INT8_C( -26),
+ INT8_C(-115), INT8_C( -36), INT8_C( 94), INT8_C(-125),
+ INT8_C( 118), INT8_C( 125), INT8_C( -91), INT8_C( 31)),
+ simde_mm512_set_epi8(INT8_C( -74), INT8_C( -13), INT8_C( -87), INT8_C( -10),
+ INT8_C( 21), INT8_C( 49), INT8_C( -64), INT8_C( 52),
+ INT8_C( -97), INT8_C(-103), INT8_C(-127), INT8_C( 53),
+ INT8_C( 99), INT8_C( 80), INT8_C( 55), INT8_C( 111),
+ INT8_C( 61), INT8_C( -93), INT8_C( -87), INT8_C( 81),
+ INT8_C( 108), INT8_C( 89), INT8_C( 63), INT8_C(-109),
+ INT8_C( -47), INT8_C( -32), INT8_C( 105), INT8_C( 91),
+ INT8_C( 88), INT8_C( 85), INT8_C( -96), INT8_C( 88),
+ INT8_C( -85), INT8_C( 3), INT8_C( 124), INT8_C( -33),
+ INT8_C( -21), INT8_C( -7), INT8_C( -31), INT8_C(-126),
+ INT8_C( 3), INT8_C( -17), INT8_C( 40), INT8_C( 5),
+ INT8_C( 126), INT8_C( -60), INT8_C( -91), INT8_C(-112),
+ INT8_C( 88), INT8_C( 11), INT8_C( 100), INT8_C( 114),
+ INT8_C( 112), INT8_C( -53), INT8_C( 89), INT8_C( 78),
+ INT8_C( 115), INT8_C( 117), INT8_C( -33), INT8_C( 14),
+ INT8_C( -42), INT8_C( -15), INT8_C( 80), INT8_C( -46)),
+ simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -42), INT8_C( 26), INT8_C( 0), INT8_C( -2),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -42),
+ INT8_C( 18), INT8_C( -68), INT8_C( 54), INT8_C( -54),
+ INT8_C( -4), INT8_C( 0), INT8_C( 0), INT8_C( 98),
+ INT8_C( 110), INT8_C(-100), INT8_C( 82), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C(-100), INT8_C( 24),
+ INT8_C(-120), INT8_C( 92), INT8_C( 0), INT8_C(-120),
+ INT8_C( 0), INT8_C( 93), INT8_C( -85), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 93), INT8_C( 0), INT8_C( 117), INT8_C( 25),
+ INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -26), INT8_C( 14), INT8_C(-125), INT8_C( 125),
+ INT8_C( 31), INT8_C( 0), INT8_C( 30), INT8_C( -29),
+ INT8_C( 118), INT8_C( 94), INT8_C( 0), INT8_C( -29),
+ INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 0)) },
+ { simde_mm512_set_epi8(INT8_C(-109), INT8_C( 14), INT8_C( -91), INT8_C( -44),
+ INT8_C( 79), INT8_C( -4), INT8_C( 88), INT8_C( 49),
+ INT8_C( -95), INT8_C( -92), INT8_C(-116), INT8_C( 8),
+ INT8_C( 25), INT8_C( 30), INT8_C( -55), INT8_C(-120),
+ INT8_C( -58), INT8_C( -84), INT8_C( 99), INT8_C( -92),
+ INT8_C( -29), INT8_C( -47), INT8_C( 42), INT8_C( -56),
+ INT8_C( 109), INT8_C( -71), INT8_C( -73), INT8_C( -30),
+ INT8_C( -15), INT8_C(-106), INT8_C( -91), INT8_C( -57),
+ INT8_C( 51), INT8_C( -8), INT8_C( 55), INT8_C( 36),
+ INT8_C( -55), INT8_C(-103), INT8_C( 51), INT8_C( -28),
+ INT8_C( 70), INT8_C( 114), INT8_C( -3), INT8_C( 49),
+ INT8_C( 23), INT8_C( -28), INT8_C( -87), INT8_C( 57),
+ INT8_C( 28), INT8_C( -12), INT8_C( -51), INT8_C( 37),
+ INT8_C( 29), INT8_C( -58), INT8_C( -69), INT8_C( 30),
+ INT8_C( 43), INT8_C( -65), INT8_C( -2), INT8_C( -83),
+ INT8_C( -27), INT8_C(-122), INT8_C( 77), INT8_C( 47)),
+ simde_mm512_set_epi8(INT8_C(-116), INT8_C(-127), INT8_C( -95), INT8_C( -97),
+ INT8_C( -78), INT8_C( -53), INT8_C( -36), INT8_C( 62),
+ INT8_C( 63), INT8_C( 28), INT8_C( -37), INT8_C( 40),
+ INT8_C( -14), INT8_C( 2), INT8_C( -65), INT8_C( -67),
+ INT8_C( 79), INT8_C( 51), INT8_C( -51), INT8_C( -64),
+ INT8_C( -40), INT8_C(-102), INT8_C(-107), INT8_C( 82),
+ INT8_C( 20), INT8_C( 54), INT8_C( 66), INT8_C( 7),
+ INT8_C( 33), INT8_C( 108), INT8_C( -88), INT8_C( 69),
+ INT8_C( 98), INT8_C( 43), INT8_C( 25), INT8_C( 58),
+ INT8_C( 117), INT8_C( -34), INT8_C( -70), INT8_C( 3),
+ INT8_C( 1), INT8_C( 100), INT8_C( 3), INT8_C( 14),
+ INT8_C( 117), INT8_C( -91), INT8_C( -89), INT8_C( 66),
+ INT8_C( 32), INT8_C( -35), INT8_C( 104), INT8_C( 65),
+ INT8_C(-100), INT8_C( 27), INT8_C( 42), INT8_C( 91),
+ INT8_C( -47), INT8_C( 39), INT8_C( -65), INT8_C(-124),
+ INT8_C( -84), INT8_C( -95), INT8_C( -46), INT8_C(-113)),
+ simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 14),
+ INT8_C(-109), INT8_C( -44), INT8_C( 0), INT8_C( 49),
+ INT8_C( 0), INT8_C( 30), INT8_C( 0), INT8_C( 0),
+ INT8_C( -58), INT8_C( -15), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-106),
+ INT8_C( -30), INT8_C( -71), INT8_C(-106), INT8_C( 109),
+ INT8_C( -91), INT8_C( -92), INT8_C( 0), INT8_C( -73),
+ INT8_C( -28), INT8_C( -55), INT8_C( 51), INT8_C(-103),
+ INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 23),
+ INT8_C( -87), INT8_C( 49), INT8_C( 23), INT8_C( -8),
+ INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( -28),
+ INT8_C( 47), INT8_C( 0), INT8_C( 30), INT8_C( 77),
+ INT8_C( 0), INT8_C( 29), INT8_C( -58), INT8_C( 29),
+ INT8_C( 0), INT8_C( 43), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
+ { simde_mm512_set_epi8(INT8_C(-119), INT8_C( 55), INT8_C( -13), INT8_C(-118),
+ INT8_C( 5), INT8_C( 14), INT8_C(-103), INT8_C( 58),
+ INT8_C( 109), INT8_C(-113), INT8_C( -44), INT8_C( 126),
+ INT8_C( 70), INT8_C(-126), INT8_C( 24), INT8_C(-117),
+ INT8_C( 80), INT8_C( 25), INT8_C( 64), INT8_C( -89),
+ INT8_C( 60), INT8_C( 5), INT8_C(-119), INT8_C( -29),
+ INT8_C( -95), INT8_C( -26), INT8_C( 25), INT8_C( -65),
+ INT8_C( -81), INT8_C( -84), INT8_C( 46), INT8_C( 62),
+ INT8_C( -25), INT8_C(-116), INT8_C(-126), INT8_C(-103),
+ INT8_C( 126), INT8_C( -39), INT8_C( 111), INT8_C( -10),
+ INT8_C( -48), INT8_C( 122), INT8_C( -86), INT8_C( -25),
+ INT8_C( 35), INT8_C( -4), INT8_C( -7), INT8_C( 76),
+ INT8_C(-116), INT8_C( 1), INT8_C( 126), INT8_C( -30),
+ INT8_C( -11), INT8_C( 82), INT8_C( 81), INT8_C( 27),
+ INT8_C( -15), INT8_C( -14), INT8_C( 56), INT8_C( 60),
+ INT8_C( -29), INT8_C( -65), INT8_C( -35), INT8_C( 21)),
+ simde_mm512_set_epi8(INT8_C( 125), INT8_C( -43), INT8_C( -42), INT8_C( -51),
+ INT8_C( -36), INT8_C( 38), INT8_C( 42), INT8_C( 18),
+ INT8_C( 24), INT8_C( -25), INT8_C( 19), INT8_C( 15),
+ INT8_C(-119), INT8_C(-124), INT8_C( 104), INT8_C( -93),
+ INT8_C( 22), INT8_C( -16), INT8_C( 31), INT8_C( 116),
+ INT8_C( -99), INT8_C( 45), INT8_C( -54), INT8_C( 50),
+ INT8_C( -72), INT8_C(-107), INT8_C( 48), INT8_C( 118),
+ INT8_C( -68), INT8_C( -15), INT8_C(-108), INT8_C( -29),
+ INT8_C( 118), INT8_C( 96), INT8_C( -51), INT8_C( 107),
+ INT8_C( -23), INT8_C( -97), INT8_C( -6), INT8_C( 91),
+ INT8_C( -57), INT8_C( -67), INT8_C(-105), INT8_C( 68),
+ INT8_C( -50), INT8_C( 16), INT8_C( -86), INT8_C( 6),
+ INT8_C( 55), INT8_C(-118), INT8_C( 112), INT8_C( 125),
+ INT8_C( 92), INT8_C( 91), INT8_C( -27), INT8_C( 53),
+ INT8_C( 28), INT8_C( -66), INT8_C( -69), INT8_C(-117),
+ INT8_C( -30), INT8_C(-106), INT8_C( 1), INT8_C( 95)),
+ simde_mm512_set_epi8(INT8_C( -13), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C(-113), INT8_C( 14), INT8_C(-126),
+ INT8_C( 58), INT8_C( 0), INT8_C( 70), INT8_C(-119),
+ INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( 0),
+ INT8_C( -26), INT8_C( 0), INT8_C( 80), INT8_C( -65),
+ INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( -84),
+ INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( -26),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 122), INT8_C( 76), INT8_C( 0), INT8_C( 126),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 126),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -25),
+ INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 122),
+ INT8_C( -15), INT8_C( 0), INT8_C( 21), INT8_C( 126),
+ INT8_C( -30), INT8_C( -11), INT8_C( 0), INT8_C( 56),
+ INT8_C( -30), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C(-116)) },
+ { simde_mm512_set_epi8(INT8_C( 18), INT8_C(-124), INT8_C( -6), INT8_C( -41),
+ INT8_C(-109), INT8_C( 19), INT8_C( 47), INT8_C( 73),
+ INT8_C( -58), INT8_C( -50), INT8_C( 73), INT8_C( 110),
+ INT8_C( 56), INT8_C( -30), INT8_C( -79), INT8_C( 123),
+ INT8_C( 77), INT8_C( 62), INT8_C( -28), INT8_C( 45),
+ INT8_C( -95), INT8_C( 12), INT8_C( 53), INT8_C( 75),
+ INT8_C( 66), INT8_C( -89), INT8_C( 46), INT8_C( -47),
+ INT8_C( 6), INT8_C( 4), INT8_C( 112), INT8_C( -34),
+ INT8_C( 121), INT8_C( -3), INT8_C(-102), INT8_C(-115),
+ INT8_C( -12), INT8_C( 80), INT8_C( 3), INT8_C( 56),
+ INT8_C( 85), INT8_C( -43), INT8_C( -93), INT8_C( 34),
+ INT8_C( -58), INT8_C(-101), INT8_C( 51), INT8_C( 1),
+ INT8_C( -18), INT8_C( -90), INT8_C( -71), INT8_C( 74),
+ INT8_C( -5), INT8_C(-113), INT8_C( 71), INT8_C( 61),
+ INT8_C( 92), INT8_C( 73), INT8_C( 116), INT8_C( 110),
+ INT8_C(-117), INT8_C( 96), INT8_C( -77), INT8_C( 45)),
+ simde_mm512_set_epi8(INT8_C( 52), INT8_C( -84), INT8_C( 103), INT8_C(-105),
+ INT8_C( -57), INT8_C( 17), INT8_C(-110), INT8_C(-113),
+ INT8_C( 56), INT8_C( 125), INT8_C( 56), INT8_C( -88),
+ INT8_C( -31), INT8_C( 119), INT8_C( -86), INT8_C( 114),
+ INT8_C( -85), INT8_C( 14), INT8_C( 60), INT8_C( 84),
+ INT8_C( -61), INT8_C(-111), INT8_C( 74), INT8_C( 113),
+ INT8_C( 13), INT8_C( -89), INT8_C(-114), INT8_C( -79),
+ INT8_C( -34), INT8_C( -73), INT8_C( -9), INT8_C(-108),
+ INT8_C( 93), INT8_C( -78), INT8_C( -1), INT8_C(-102),
+ INT8_C( 12), INT8_C( 117), INT8_C( 80), INT8_C( 44),
+ INT8_C( 29), INT8_C( 18), INT8_C( 91), INT8_C( -23),
+ INT8_C( 110), INT8_C(-126), INT8_C( -71), INT8_C( 65),
+ INT8_C( -76), INT8_C( 85), INT8_C( 93), INT8_C( -86),
+ INT8_C(-123), INT8_C(-115), INT8_C(-105), INT8_C( 75),
+ INT8_C( 45), INT8_C( 22), INT8_C( 73), INT8_C( 74),
+ INT8_C( 107), INT8_C( -60), INT8_C( -28), INT8_C( -57)),
+ simde_mm512_set_epi8(INT8_C( 110), INT8_C( 0), INT8_C( -58), INT8_C( 0),
+ INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 0),
+ INT8_C( 73), INT8_C( -6), INT8_C( 73), INT8_C( 0),
+ INT8_C( 0), INT8_C( -58), INT8_C( 0), INT8_C( -30),
+ INT8_C( 0), INT8_C( 62), INT8_C( 45), INT8_C( -47),
+ INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 112),
+ INT8_C( -28), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-115), INT8_C( -93), INT8_C( 1), INT8_C(-115),
+ INT8_C(-102), INT8_C(-101), INT8_C( -12), INT8_C( 0),
+ INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 51),
+ INT8_C( 0), INT8_C( 116), INT8_C( -71), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -5),
+ INT8_C( -71), INT8_C( 73), INT8_C( 71), INT8_C(-113),
+ INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
+ { simde_mm512_set_epi8(INT8_C( 4), INT8_C( -30), INT8_C( -68), INT8_C( 57),
+ INT8_C( 73), INT8_C( 25), INT8_C( -45), INT8_C( 112),
+ INT8_C( 41), INT8_C( 41), INT8_C( -56), INT8_C( -93),
+ INT8_C( 29), INT8_C( 99), INT8_C( 15), INT8_C( 122),
+ INT8_C( 74), INT8_C( 0), INT8_C( 89), INT8_C( 61),
+ INT8_C( 124), INT8_C( 36), INT8_C( -87), INT8_C( -25),
+ INT8_C( 42), INT8_C(-125), INT8_C( -87), INT8_C( 69),
+ INT8_C( 81), INT8_C( 4), INT8_C( 55), INT8_C( 107),
+ INT8_C( 0), INT8_C( -7), INT8_C( 37), INT8_C( -15),
+ INT8_C( 117), INT8_C( 9), INT8_C( -27), INT8_C( 11),
+ INT8_C( 16), INT8_C( 87), INT8_C( 119), INT8_C(-107),
+ INT8_C( -22), INT8_C( 27), INT8_C( -86), INT8_C( -63),
+ INT8_C( 90), INT8_C( -44), INT8_C(-113), INT8_C(-126),
+ INT8_C( -75), INT8_C( 67), INT8_C( 88), INT8_C( 105),
+ INT8_C( 80), INT8_C( 7), INT8_C( 66), INT8_C( 126),
+ INT8_C( 17), INT8_C( -20), INT8_C( -91), INT8_C( -99)),
+ simde_mm512_set_epi8(INT8_C( 123), INT8_C( 117), INT8_C( 18), INT8_C( 2),
+ INT8_C(-105), INT8_C( -51), INT8_C( -9), INT8_C( 72),
+ INT8_C( 69), INT8_C(-116), INT8_C( 95), INT8_C(-110),
+ INT8_C( -32), INT8_C( 110), INT8_C( -8), INT8_C( 85),
+ INT8_C( -41), INT8_C( 80), INT8_C( -44), INT8_C( 110),
+ INT8_C( -28), INT8_C( 116), INT8_C( 41), INT8_C( -31),
+ INT8_C( 12), INT8_C( 40), INT8_C( 5), INT8_C( 1),
+ INT8_C( -14), INT8_C( 66), INT8_C( -95), INT8_C( 106),
+ INT8_C( 66), INT8_C( -91), INT8_C( -20), INT8_C(-109),
+ INT8_C( -46), INT8_C( 41), INT8_C( 82), INT8_C( -61),
+ INT8_C( 2), INT8_C( 3), INT8_C( 62), INT8_C( -90),
+ INT8_C( 99), INT8_C( -54), INT8_C( -28), INT8_C( -21),
+ INT8_C( 126), INT8_C( 115), INT8_C( -46), INT8_C( 76),
+ INT8_C( -43), INT8_C( 46), INT8_C( 107), INT8_C( 75),
+ INT8_C( -93), INT8_C( 67), INT8_C( -37), INT8_C( 4),
+ INT8_C(-127), INT8_C( -50), INT8_C( -26), INT8_C( 99)),
+ simde_mm512_set_epi8(INT8_C( 73), INT8_C( -56), INT8_C( 99), INT8_C( 99),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 112),
+ INT8_C( -56), INT8_C( 0), INT8_C( 4), INT8_C( 0),
+ INT8_C( 0), INT8_C( -30), INT8_C( 0), INT8_C( -56),
+ INT8_C( 0), INT8_C( 107), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 69), INT8_C( -87), INT8_C( 0),
+ INT8_C( 61), INT8_C( -25), INT8_C( -87), INT8_C( 55),
+ INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 36),
+ INT8_C( 27), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( -27), INT8_C( 27), INT8_C( 0),
+ INT8_C( 27), INT8_C( -22), INT8_C( -7), INT8_C( 0),
+ INT8_C( -22), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -44), INT8_C( 17), INT8_C( 0), INT8_C(-126),
+ INT8_C( 0), INT8_C( -44), INT8_C( -75), INT8_C( -75),
+ INT8_C( 0), INT8_C( 17), INT8_C( 0), INT8_C( 126),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 17)) },
+ { simde_mm512_set_epi8(INT8_C( 56), INT8_C( 24), INT8_C( -33), INT8_C( 82),
+ INT8_C( 72), INT8_C( 63), INT8_C(-114), INT8_C( 14),
+ INT8_C( 2), INT8_C( 71), INT8_C( -31), INT8_C( -46),
+ INT8_C( 64), INT8_C( -11), INT8_C( 35), INT8_C( -7),
+ INT8_C( -78), INT8_C( 125), INT8_C( -28), INT8_C( -59),
+ INT8_C( 6), INT8_C(-127), INT8_C( 34), INT8_C( 64),
+ INT8_C( 88), INT8_C( -88), INT8_C( 70), INT8_C( -90),
+ INT8_C( 47), INT8_C( -21), INT8_C(-104), INT8_C( -27),
+ INT8_C( -16), INT8_C( -29), INT8_C( 51), INT8_C(-116),
+ INT8_C( -87), INT8_C( 91), INT8_C(-106), INT8_C( -3),
+ INT8_C( -55), INT8_C( -15), INT8_C(-109), INT8_C(-108),
+ INT8_C( -41), INT8_C( 79), INT8_C( 54), INT8_C( 68),
+ INT8_C( -51), INT8_C(-108), INT8_C( -83), INT8_C( 104),
+ INT8_C( 18), INT8_C(-128), INT8_C( -75), INT8_C( -12),
+ INT8_C( 56), INT8_C(-104), INT8_C( 56), INT8_C( 47),
+ INT8_C( -30), INT8_C( -15), INT8_C(-115), INT8_C( 88)),
+ simde_mm512_set_epi8(INT8_C( 107), INT8_C( 125), INT8_C(-111), INT8_C( -89),
+ INT8_C( -9), INT8_C( 122), INT8_C( -2), INT8_C( -26),
+ INT8_C(-100), INT8_C( 80), INT8_C( 89), INT8_C( 38),
+ INT8_C( 95), INT8_C( -82), INT8_C( -66), INT8_C( 108),
+ INT8_C( 104), INT8_C( -20), INT8_C( 72), INT8_C( -82),
+ INT8_C( 23), INT8_C( -45), INT8_C( -20), INT8_C( 8),
+ INT8_C( -2), INT8_C( -77), INT8_C(-112), INT8_C( 102),
+ INT8_C( 83), INT8_C( 80), INT8_C( 56), INT8_C( -53),
+ INT8_C( -83), INT8_C( -86), INT8_C( 15), INT8_C( 30),
+ INT8_C( -92), INT8_C( -2), INT8_C( 42), INT8_C( -79),
+ INT8_C( 108), INT8_C( 44), INT8_C( 17), INT8_C( 83),
+ INT8_C( -49), INT8_C( -44), INT8_C( -42), INT8_C( -54),
+ INT8_C(-127), INT8_C( 12), INT8_C( 88), INT8_C(-118),
+ INT8_C( -23), INT8_C( -69), INT8_C( -23), INT8_C( -36),
+ INT8_C( -23), INT8_C( -80), INT8_C( 33), INT8_C( 82),
+ INT8_C( -43), INT8_C( -91), INT8_C( -36), INT8_C( -56)),
+ simde_mm512_set_epi8(INT8_C( 72), INT8_C( -33), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( -7), INT8_C(-114), INT8_C( 71),
+ INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 82),
+ INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_C( 0),
+ INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 64),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -88),
+ INT8_C( 47), INT8_C( -27), INT8_C( 64), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C( -29),
+ INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0),
+ INT8_C(-116), INT8_C(-116), INT8_C( 54), INT8_C( -41),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 104), INT8_C( -12), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C(-115), INT8_C( -15),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_shuffle_epi8(test_vec[i].a, test_vec[i].b);
+ simde_assert_m512i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cmpeq_epi8_mask(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m512i b;
+ simde__mmask64 r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi8(INT8_C( 73), INT8_C( 68), INT8_C( -71), INT8_C( -32),
+ INT8_C( 100), INT8_C( 125), INT8_C( 89), INT8_C( 95),
+ INT8_C( -23), INT8_C( 76), INT8_C( 84), INT8_C( -43),
+ INT8_C( 86), INT8_C( 29), INT8_C( 64), INT8_C( 30),
+ INT8_C( -17), INT8_C( -61), INT8_C( 115), INT8_C( -53),
+ INT8_C(-100), INT8_C( 104), INT8_C( 111), INT8_C( -59),
+ INT8_C( -31), INT8_C( 20), INT8_C( 31), INT8_C(-121),
+ INT8_C( 90), INT8_C( 18), INT8_C( 6), INT8_C( 39),
+ INT8_C( 15), INT8_C( 62), INT8_C( 39), INT8_C( -25),
+ INT8_C( 60), INT8_C( 110), INT8_C( 45), INT8_C( 113),
+ INT8_C( 81), INT8_C( 78), INT8_C( -53), INT8_C( 11),
+ INT8_C( -27), INT8_C(-113), INT8_C( -3), INT8_C( 14),
+ INT8_C( 109), INT8_C( 43), INT8_C( -54), INT8_C( 111),
+ INT8_C( -91), INT8_C( -21), INT8_C( 102), INT8_C( 8),
+ INT8_C( -41), INT8_C( -47), INT8_C( -90), INT8_C( 3),
+ INT8_C( 18), INT8_C( 32), INT8_C( 89), INT8_C( -62)),
+ simde_mm512_set_epi8(INT8_C( 73), INT8_C( 68), INT8_C( -71), INT8_C( -46),
+ INT8_C( 72), INT8_C( 42), INT8_C( 65), INT8_C( -27),
+ INT8_C( -37), INT8_C( 76), INT8_C( 84), INT8_C( -95),
+ INT8_C( -29), INT8_C( 29), INT8_C( 64), INT8_C( 75),
+ INT8_C( -17), INT8_C( -61), INT8_C( 76), INT8_C( 88),
+ INT8_C(-100), INT8_C( -91), INT8_C( 111), INT8_C( -59),
+ INT8_C( -31), INT8_C( -17), INT8_C( 31), INT8_C( 68),
+ INT8_C( 90), INT8_C( 18), INT8_C(-110), INT8_C( 39),
+ INT8_C( 15), INT8_C( 62), INT8_C( 24), INT8_C( -25),
+ INT8_C( 80), INT8_C( 122), INT8_C( 103), INT8_C(-115),
+ INT8_C( 81), INT8_C( 24), INT8_C( 32), INT8_C( 82),
+ INT8_C( -27), INT8_C( 105), INT8_C( -3), INT8_C( 14),
+ INT8_C( 109), INT8_C( 43), INT8_C( -54), INT8_C(-121),
+ INT8_C( -91), INT8_C(-124), INT8_C( 102), INT8_C( 46),
+ INT8_C( -41), INT8_C( 101), INT8_C( 51), INT8_C( 3),
+ INT8_C( 18), INT8_C( 32), INT8_C( -12), INT8_C( -62)),
+ UINT64_C(0xe066cbadd08bea9d) },
+ { simde_mm512_set_epi8(INT8_C( -84), INT8_C(-108), INT8_C( 13), INT8_C( -97),
+ INT8_C( -34), INT8_C( 27), INT8_C( 124), INT8_C(-120),
+ INT8_C( 3), INT8_C( 26), INT8_C( 43), INT8_C( -96),
+ INT8_C( -63), INT8_C( 49), INT8_C( 127), INT8_C( 0),
+ INT8_C( -20), INT8_C( -31), INT8_C( 125), INT8_C( 14),
+ INT8_C( -53), INT8_C( -87), INT8_C( 115), INT8_C( -20),
+ INT8_C( -93), INT8_C( 70), INT8_C( 29), INT8_C( -90),
+ INT8_C( 105), INT8_C( -54), INT8_C( 96), INT8_C(-106),
+ INT8_C( 94), INT8_C( -52), INT8_C( -50), INT8_C( -79),
+ INT8_C( -54), INT8_C( 78), INT8_C( 17), INT8_C( 81),
+ INT8_C( 35), INT8_C( 120), INT8_C( 47), INT8_C( -25),
+ INT8_C( 110), INT8_C( 55), INT8_C( 40), INT8_C( -31),
+ INT8_C( 93), INT8_C( -23), INT8_C( 4), INT8_C( 45),
+ INT8_C( 59), INT8_C( -33), INT8_C( 124), INT8_C( -52),
+ INT8_C( 42), INT8_C( -14), INT8_C(-119), INT8_C( 88),
+ INT8_C( -38), INT8_C( -31), INT8_C(-113), INT8_C( 33)),
+ simde_mm512_set_epi8(INT8_C( 25), INT8_C(-108), INT8_C( 13), INT8_C( -97),
+ INT8_C( -34), INT8_C( 119), INT8_C( 122), INT8_C( 82),
+ INT8_C( 3), INT8_C(-111), INT8_C( 60), INT8_C( -96),
+ INT8_C( 26), INT8_C( 3), INT8_C( -7), INT8_C( -8),
+ INT8_C( 94), INT8_C( -31), INT8_C( -71), INT8_C( 14),
+ INT8_C(-105), INT8_C( -87), INT8_C( 115), INT8_C( -68),
+ INT8_C( -93), INT8_C( 70), INT8_C( -47), INT8_C(-106),
+ INT8_C( 105), INT8_C( -54), INT8_C( 96), INT8_C( 105),
+ INT8_C( 94), INT8_C( 84), INT8_C( -50), INT8_C( -79),
+ INT8_C( -54), INT8_C( 78), INT8_C( 17), INT8_C( 39),
+ INT8_C( 35), INT8_C( -87), INT8_C( -83), INT8_C( -25),
+ INT8_C( 110), INT8_C( 2), INT8_C( -90), INT8_C( -31),
+ INT8_C( 19), INT8_C( -23), INT8_C( 4), INT8_C( 16),
+ INT8_C( 59), INT8_C( -33), INT8_C( 124), INT8_C( 127),
+ INT8_C( -60), INT8_C( -14), INT8_C(-119), INT8_C( 88),
+ INT8_C( -38), INT8_C( 109), INT8_C(-113), INT8_C( 25)),
+ UINT64_C(0x789056cebe996e7a) },
+ { simde_mm512_set_epi8(INT8_C( 93), INT8_C( 110), INT8_C( 120), INT8_C( 25),
+ INT8_C( -37), INT8_C( -25), INT8_C( -34), INT8_C(-108),
+ INT8_C( -77), INT8_C(-114), INT8_C( 79), INT8_C( -50),
+ INT8_C( -94), INT8_C( 22), INT8_C( -28), INT8_C(-105),
+ INT8_C( 110), INT8_C( 29), INT8_C( -9), INT8_C( -13),
+ INT8_C( -71), INT8_C( 107), INT8_C(-115), INT8_C( 86),
+ INT8_C(-127), INT8_C(-100), INT8_C( 1), INT8_C( 21),
+ INT8_C( -55), INT8_C( -85), INT8_C( -55), INT8_C( -81),
+ INT8_C( -41), INT8_C( 39), INT8_C( 18), INT8_C( -92),
+ INT8_C( 11), INT8_C( -32), INT8_C( -53), INT8_C( 38),
+ INT8_C( -49), INT8_C(-118), INT8_C( 20), INT8_C( 66),
+ INT8_C(-106), INT8_C(-109), INT8_C( 45), INT8_C( -24),
+ INT8_C( -47), INT8_C( 95), INT8_C( 50), INT8_C( 105),
+ INT8_C( 58), INT8_C( 25), INT8_C( -53), INT8_C( -61),
+ INT8_C( -90), INT8_C( 92), INT8_C( 83), INT8_C( 120),
+ INT8_C( 107), INT8_C( -72), INT8_C( 3), INT8_C( -1)),
+ simde_mm512_set_epi8(INT8_C( 93), INT8_C( 110), INT8_C( 120), INT8_C( -75),
+ INT8_C( -37), INT8_C( -25), INT8_C( 14), INT8_C(-108),
+ INT8_C( 5), INT8_C(-114), INT8_C( 79), INT8_C( 57),
+ INT8_C( -94), INT8_C( 22), INT8_C( 4), INT8_C( 3),
+ INT8_C( 66), INT8_C( 29), INT8_C( -9), INT8_C( 20),
+ INT8_C( 93), INT8_C( 107), INT8_C( 6), INT8_C( 86),
+ INT8_C( 84), INT8_C(-100), INT8_C( 81), INT8_C( 21),
+ INT8_C( 117), INT8_C( 22), INT8_C( -55), INT8_C( -81),
+ INT8_C( -41), INT8_C( -17), INT8_C( 19), INT8_C( 106),
+ INT8_C( 114), INT8_C( -32), INT8_C( 98), INT8_C( -16),
+ INT8_C( -49), INT8_C(-118), INT8_C( 20), INT8_C( 66),
+ INT8_C(-106), INT8_C(-109), INT8_C( 97), INT8_C( 29),
+ INT8_C( -47), INT8_C( -47), INT8_C( 50), INT8_C(-111),
+ INT8_C( 58), INT8_C( 115), INT8_C( -53), INT8_C( 93),
+ INT8_C( -90), INT8_C( -27), INT8_C( 41), INT8_C( 120),
+ INT8_C( 119), INT8_C( 86), INT8_C( -36), INT8_C( -1)),
+ UINT64_C(0xed6c655384fcaa91) },
+ { simde_mm512_set_epi8(INT8_C( 10), INT8_C( 75), INT8_C( 91), INT8_C( -99),
+ INT8_C( -88), INT8_C( 99), INT8_C( -86), INT8_C( 96),
+ INT8_C( 14), INT8_C( -1), INT8_C( 14), INT8_C( 100),
+ INT8_C(-114), INT8_C( 63), INT8_C( 68), INT8_C(-113),
+ INT8_C( -59), INT8_C( -42), INT8_C( -14), INT8_C(-111),
+ INT8_C( 6), INT8_C( 68), INT8_C( 11), INT8_C(-108),
+ INT8_C( -62), INT8_C( 87), INT8_C( -72), INT8_C( -23),
+ INT8_C( 78), INT8_C( -18), INT8_C( -36), INT8_C( -6),
+ INT8_C( -68), INT8_C(-115), INT8_C( -24), INT8_C( 127),
+ INT8_C( -36), INT8_C( 21), INT8_C( 38), INT8_C(-106),
+ INT8_C( 33), INT8_C( -66), INT8_C(-121), INT8_C( 36),
+ INT8_C( 24), INT8_C( 61), INT8_C( 66), INT8_C( 20),
+ INT8_C( 63), INT8_C( -18), INT8_C( 11), INT8_C(-103),
+ INT8_C( -19), INT8_C( -42), INT8_C( -69), INT8_C( 53),
+ INT8_C( -40), INT8_C( 112), INT8_C( 8), INT8_C( -69),
+ INT8_C(-102), INT8_C( 62), INT8_C( 85), INT8_C( 62)),
+ simde_mm512_set_epi8(INT8_C( 10), INT8_C( 75), INT8_C( -74), INT8_C( -47),
+ INT8_C( -88), INT8_C( 99), INT8_C( -86), INT8_C(-128),
+ INT8_C( 94), INT8_C( -1), INT8_C( 99), INT8_C( 100),
+ INT8_C( -25), INT8_C( 7), INT8_C( 59), INT8_C(-113),
+ INT8_C( 119), INT8_C( -42), INT8_C( -14), INT8_C( 79),
+ INT8_C( 4), INT8_C(-111), INT8_C( 11), INT8_C( 80),
+ INT8_C( -78), INT8_C( 87), INT8_C( -72), INT8_C(-111),
+ INT8_C( -95), INT8_C( -18), INT8_C( -36), INT8_C( -40),
+ INT8_C( -68), INT8_C(-115), INT8_C( -24), INT8_C( -50),
+ INT8_C( -36), INT8_C( 10), INT8_C( 47), INT8_C( 62),
+ INT8_C( -15), INT8_C( -66), INT8_C(-122), INT8_C( 36),
+ INT8_C( -22), INT8_C( 61), INT8_C( -11), INT8_C( 20),
+ INT8_C( 63), INT8_C( 82), INT8_C(-113), INT8_C(-103),
+ INT8_C( -19), INT8_C( -42), INT8_C( -69), INT8_C( 53),
+ INT8_C( -40), INT8_C( 112), INT8_C( 8), INT8_C( -69),
+ INT8_C(-102), INT8_C( 23), INT8_C( 85), INT8_C( 62)),
+ UINT64_C(0xce516266e8559ffb) },
+ { simde_mm512_set_epi8(INT8_C( -7), INT8_C( 2), INT8_C(-111), INT8_C( 64),
+ INT8_C(-100), INT8_C( 87), INT8_C( 100), INT8_C( -30),
+ INT8_C( -39), INT8_C( -38), INT8_C( 121), INT8_C( 55),
+ INT8_C( -64), INT8_C( 81), INT8_C( -3), INT8_C( 79),
+ INT8_C( -41), INT8_C( 118), INT8_C( -37), INT8_C( -34),
+ INT8_C( -13), INT8_C( 63), INT8_C( 26), INT8_C( -81),
+ INT8_C( 90), INT8_C( 43), INT8_C( -31), INT8_C( -17),
+ INT8_C(-100), INT8_C( -71), INT8_C(-104), INT8_C( -66),
+ INT8_C( -94), INT8_C( -89), INT8_C( 100), INT8_C( 36),
+ INT8_C( 17), INT8_C( 116), INT8_C( -30), INT8_C( 16),
+ INT8_C( 110), INT8_C( 98), INT8_C( 11), INT8_C( -42),
+ INT8_C( -78), INT8_C( -68), INT8_C( -26), INT8_C( -35),
+ INT8_C( 12), INT8_C( -40), INT8_C( -27), INT8_C( -40),
+ INT8_C(-102), INT8_C(-109), INT8_C( 39), INT8_C( 29),
+ INT8_C( 21), INT8_C( 9), INT8_C( 49), INT8_C( -13),
+ INT8_C( -49), INT8_C( 7), INT8_C( 91), INT8_C( 15)),
+ simde_mm512_set_epi8(INT8_C( 78), INT8_C( 2), INT8_C( -91), INT8_C( 64),
+ INT8_C(-100), INT8_C( 41), INT8_C( -34), INT8_C( -46),
+ INT8_C( -39), INT8_C( 31), INT8_C( 13), INT8_C( 55),
+ INT8_C( -42), INT8_C( 33), INT8_C( -3), INT8_C( 79),
+ INT8_C( -41), INT8_C( 118), INT8_C( -37), INT8_C( 90),
+ INT8_C( -13), INT8_C( 63), INT8_C( 51), INT8_C( -81),
+ INT8_C( 90), INT8_C( 43), INT8_C( -31), INT8_C(-112),
+ INT8_C(-100), INT8_C( 41), INT8_C(-104), INT8_C( -66),
+ INT8_C( -94), INT8_C( -89), INT8_C( -85), INT8_C(-109),
+ INT8_C( 113), INT8_C( 116), INT8_C( 100), INT8_C( 16),
+ INT8_C( 5), INT8_C( -50), INT8_C( -51), INT8_C( -42),
+ INT8_C( -95), INT8_C( -68), INT8_C( -26), INT8_C( -35),
+ INT8_C( -73), INT8_C( 71), INT8_C( 65), INT8_C( -40),
+ INT8_C(-102), INT8_C( 7), INT8_C( 94), INT8_C( 29),
+ INT8_C( 65), INT8_C( 9), INT8_C( 49), INT8_C( -13),
+ INT8_C( -33), INT8_C( 7), INT8_C(-101), INT8_C( 15)),
+ UINT64_C(0x5893edebc5171975) },
+ { simde_mm512_set_epi8(INT8_C( -34), INT8_C( -12), INT8_C( 105), INT8_C(-124),
+ INT8_C( -33), INT8_C( -79), INT8_C( -6), INT8_C( 54),
+ INT8_C( 81), INT8_C( -11), INT8_C( 67), INT8_C( 63),
+ INT8_C( 103), INT8_C( 119), INT8_C( -89), INT8_C( 40),
+ INT8_C( 8), INT8_C( -38), INT8_C( 71), INT8_C( 66),
+ INT8_C(-106), INT8_C( -45), INT8_C( 18), INT8_C( 100),
+ INT8_C( 122), INT8_C( 93), INT8_C( -42), INT8_C( 5),
+ INT8_C( -39), INT8_C( 37), INT8_C( -70), INT8_C( 13),
+ INT8_C( 99), INT8_C( -57), INT8_C( -88), INT8_C( -36),
+ INT8_C(-103), INT8_C( 25), INT8_C( 94), INT8_C(-107),
+ INT8_C( -32), INT8_C( -12), INT8_C( -14), INT8_C( 32),
+ INT8_C( -38), INT8_C( 10), INT8_C( 89), INT8_C( -69),
+ INT8_C( -8), INT8_C( 69), INT8_C( -20), INT8_C(-122),
+ INT8_C( -75), INT8_C( -71), INT8_C( 3), INT8_C( 102),
+ INT8_C( 119), INT8_C( -58), INT8_C( -49), INT8_C( 80),
+ INT8_C( -15), INT8_C( -97), INT8_C( 45), INT8_C( 96)),
+ simde_mm512_set_epi8(INT8_C( -34), INT8_C( -12), INT8_C( 81), INT8_C(-115),
+ INT8_C( -33), INT8_C( -79), INT8_C(-117), INT8_C( -34),
+ INT8_C( 81), INT8_C( -11), INT8_C( -63), INT8_C( -61),
+ INT8_C( 53), INT8_C( 119), INT8_C( 26), INT8_C( 40),
+ INT8_C( 8), INT8_C( -38), INT8_C( 25), INT8_C( -23),
+ INT8_C( -16), INT8_C( -45), INT8_C( -64), INT8_C( 100),
+ INT8_C( 91), INT8_C( 93), INT8_C( -42), INT8_C( 5),
+ INT8_C( 81), INT8_C( -76), INT8_C( -70), INT8_C( 13),
+ INT8_C( 26), INT8_C( -57), INT8_C( -88), INT8_C( -64),
+ INT8_C( -68), INT8_C( -91), INT8_C(-123), INT8_C( 38),
+ INT8_C( -32), INT8_C( 29), INT8_C( 82), INT8_C( 54),
+ INT8_C(-107), INT8_C( 10), INT8_C( 89), INT8_C( 28),
+ INT8_C( -27), INT8_C( 41), INT8_C( -20), INT8_C(-122),
+ INT8_C( -75), INT8_C( -71), INT8_C( 3), INT8_C( -30),
+ INT8_C( 97), INT8_C( 18), INT8_C( -90), INT8_C( 107),
+ INT8_C( 99), INT8_C( 10), INT8_C( 45), INT8_C( 96)),
+ UINT64_C(0xccc5c57360863e03) },
+ { simde_mm512_set_epi8(INT8_C( 48), INT8_C( 94), INT8_C( 112), INT8_C(-107),
+ INT8_C( -34), INT8_C( -86), INT8_C( 65), INT8_C( 92),
+ INT8_C( 97), INT8_C( -99), INT8_C( 28), INT8_C( 47),
+ INT8_C(-117), INT8_C( -22), INT8_C(-111), INT8_C( -67),
+ INT8_C( 113), INT8_C(-107), INT8_C( -23), INT8_C( 77),
+ INT8_C( 60), INT8_C( 104), INT8_C(-116), INT8_C( -86),
+ INT8_C(-113), INT8_C( -79), INT8_C( -64), INT8_C( -15),
+ INT8_C(-123), INT8_C( 99), INT8_C( 25), INT8_C( 27),
+ INT8_C( -40), INT8_C( 126), INT8_C( -66), INT8_C( -45),
+ INT8_C( 57), INT8_C( -30), INT8_C( -12), INT8_C( 16),
+ INT8_C( 122), INT8_C( 124), INT8_C( -75), INT8_C( 50),
+ INT8_C( -6), INT8_C( 41), INT8_C( -47), INT8_C( -3),
+ INT8_C( 29), INT8_C( -20), INT8_C( -45), INT8_C( -46),
+ INT8_C( -45), INT8_C( -14), INT8_C( 99), INT8_C( 84),
+ INT8_C( -62), INT8_C( -99), INT8_C(-104), INT8_C( -78),
+ INT8_C( 106), INT8_C(-117), INT8_C( -94), INT8_C( 3)),
+ simde_mm512_set_epi8(INT8_C( -88), INT8_C( -74), INT8_C( 2), INT8_C( -83),
+ INT8_C(-112), INT8_C( -91), INT8_C( 65), INT8_C( 92),
+ INT8_C( 97), INT8_C( 52), INT8_C( 28), INT8_C( -65),
+ INT8_C(-117), INT8_C( -90), INT8_C(-111), INT8_C( -67),
+ INT8_C( 113), INT8_C( 16), INT8_C(-124), INT8_C( 77),
+ INT8_C( 60), INT8_C( -64), INT8_C(-116), INT8_C( -89),
+ INT8_C( -7), INT8_C( -79), INT8_C( 46), INT8_C( 114),
+ INT8_C(-107), INT8_C( 99), INT8_C( -79), INT8_C( 80),
+ INT8_C( -40), INT8_C( -81), INT8_C( -66), INT8_C( -45),
+ INT8_C( 57), INT8_C( -30), INT8_C( 66), INT8_C( 71),
+ INT8_C( 122), INT8_C( 95), INT8_C( -43), INT8_C( 50),
+ INT8_C( -6), INT8_C( 41), INT8_C( -47), INT8_C( 95),
+ INT8_C( 5), INT8_C( -20), INT8_C( -45), INT8_C(-118),
+ INT8_C( 70), INT8_C( 81), INT8_C( 0), INT8_C( 84),
+ INT8_C( -62), INT8_C(-101), INT8_C( 13), INT8_C( 33),
+ INT8_C(-104), INT8_C(-117), INT8_C( -11), INT8_C( 20)),
+ UINT64_C(0x3ab9a44bc9e6184) },
+ { simde_mm512_set_epi8(INT8_C( 67), INT8_C( 34), INT8_C( -33), INT8_C( 31),
+ INT8_C(-128), INT8_C( 55), INT8_C( 93), INT8_C( 58),
+ INT8_C( 57), INT8_C( 104), INT8_C(-110), INT8_C( 59),
+ INT8_C( 55), INT8_C( 33), INT8_C(-122), INT8_C( 69),
+ INT8_C( 57), INT8_C( 30), INT8_C( -13), INT8_C( -65),
+ INT8_C( -22), INT8_C(-100), INT8_C( 18), INT8_C( -65),
+ INT8_C( -60), INT8_C(-105), INT8_C( 27), INT8_C( -71),
+ INT8_C( 52), INT8_C( 12), INT8_C( -4), INT8_C( 64),
+ INT8_C( 20), INT8_C( 51), INT8_C( 87), INT8_C( 43),
+ INT8_C( 26), INT8_C( 6), INT8_C( -66), INT8_C( -40),
+ INT8_C( 87), INT8_C( 1), INT8_C( -26), INT8_C( 92),
+ INT8_C( -33), INT8_C( 8), INT8_C( 42), INT8_C( -93),
+ INT8_C( 44), INT8_C( -55), INT8_C(-113), INT8_C( -43),
+ INT8_C( 32), INT8_C( 105), INT8_C( -27), INT8_C( 96),
+ INT8_C( 72), INT8_C( 48), INT8_C( -46), INT8_C( 24),
+ INT8_C( -10), INT8_C( -98), INT8_C( -56), INT8_C( -41)),
+ simde_mm512_set_epi8(INT8_C( 67), INT8_C( -63), INT8_C( -33), INT8_C( 31),
+ INT8_C(-128), INT8_C( 55), INT8_C( 93), INT8_C( -8),
+ INT8_C( 82), INT8_C( 104), INT8_C(-110), INT8_C( 59),
+ INT8_C( 55), INT8_C( 0), INT8_C( -25), INT8_C( 69),
+ INT8_C( 27), INT8_C( 30), INT8_C( -13), INT8_C( -65),
+ INT8_C( -7), INT8_C( -28), INT8_C( 18), INT8_C( -65),
+ INT8_C( 67), INT8_C( -3), INT8_C( 57), INT8_C( -68),
+ INT8_C( 52), INT8_C( 12), INT8_C( -4), INT8_C(-128),
+ INT8_C( 20), INT8_C( 37), INT8_C( 9), INT8_C( 80),
+ INT8_C( 26), INT8_C( 6), INT8_C( -66), INT8_C( 9),
+ INT8_C( -98), INT8_C( 1), INT8_C( -26), INT8_C( 92),
+ INT8_C( -33), INT8_C( 8), INT8_C( -81), INT8_C( -93),
+ INT8_C( 116), INT8_C( -55), INT8_C(-113), INT8_C( -43),
+ INT8_C( 32), INT8_C( 105), INT8_C( -27), INT8_C( 37),
+ INT8_C( 72), INT8_C( -73), INT8_C( -19), INT8_C( 96),
+ INT8_C( 52), INT8_C( -98), INT8_C( -45), INT8_C( -41)),
+ UINT64_C(0xbe79730e8e7d7e85) },
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__mmask64 r = simde_mm512_cmpeq_epi8_mask(test_vec[i].a, test_vec[i].b);
+ simde_assert_uint64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
#endif /* defined(SIMDE_avx512bw_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
HEDLEY_DIAGNOSTIC_PUSH
@@ -1506,6 +2194,9 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm512_adds_epu8),
SIMDE_TESTS_DEFINE_TEST(mm512_adds_epu16),
+ SIMDE_TESTS_DEFINE_TEST(mm512_shuffle_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm512_cmpeq_epi8_mask),
+
#endif /* defined(SIMDE_AVX512bw_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
};
=====================================
test/x86/avx512f.c
=====================================
@@ -2207,6 +2207,338 @@ test_simde_mm512_mask_cmpeq_epi32_mask(const MunitParameter params[], void* data
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_cvtepi8_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi8(INT8_C( 6), INT8_C( 33), INT8_C( 124), INT8_C(-128),
+ INT8_C( 38), INT8_C( 59), INT8_C( -37), INT8_C( 121),
+ INT8_C( 67), INT8_C( 112), INT8_C( 62), INT8_C(-113),
+ INT8_C(-121), INT8_C( -89), INT8_C( 63), INT8_C( -72)),
+ simde_mm512_set_epi32(INT32_C( 6), INT32_C( 33), INT32_C( 124), INT32_C( -128),
+ INT32_C( 38), INT32_C( 59), INT32_C( -37), INT32_C( 121),
+ INT32_C( 67), INT32_C( 112), INT32_C( 62), INT32_C( -113),
+ INT32_C( -121), INT32_C( -89), INT32_C( 63), INT32_C( -72)) },
+ { simde_mm_set_epi8(INT8_C( -94), INT8_C( 125), INT8_C( 105), INT8_C( -92),
+ INT8_C(-107), INT8_C( -69), INT8_C( 25), INT8_C(-125),
+ INT8_C( -11), INT8_C( -44), INT8_C( 60), INT8_C( -96),
+ INT8_C( 119), INT8_C( -40), INT8_C( 29), INT8_C( 9)),
+ simde_mm512_set_epi32(INT32_C( -94), INT32_C( 125), INT32_C( 105), INT32_C( -92),
+ INT32_C( -107), INT32_C( -69), INT32_C( 25), INT32_C( -125),
+ INT32_C( -11), INT32_C( -44), INT32_C( 60), INT32_C( -96),
+ INT32_C( 119), INT32_C( -40), INT32_C( 29), INT32_C( 9)) },
+ { simde_mm_set_epi8(INT8_C( 47), INT8_C( -9), INT8_C(-127), INT8_C( 66),
+ INT8_C( 126), INT8_C( -95), INT8_C( 99), INT8_C( -25),
+ INT8_C( -19), INT8_C( 84), INT8_C( -96), INT8_C( -77),
+ INT8_C( -34), INT8_C(-108), INT8_C( -33), INT8_C( 62)),
+ simde_mm512_set_epi32(INT32_C( 47), INT32_C( -9), INT32_C( -127), INT32_C( 66),
+ INT32_C( 126), INT32_C( -95), INT32_C( 99), INT32_C( -25),
+ INT32_C( -19), INT32_C( 84), INT32_C( -96), INT32_C( -77),
+ INT32_C( -34), INT32_C( -108), INT32_C( -33), INT32_C( 62)) },
+ { simde_mm_set_epi8(INT8_C( 93), INT8_C(-106), INT8_C( 61), INT8_C( 64),
+ INT8_C( -98), INT8_C( 31), INT8_C( -75), INT8_C( -23),
+ INT8_C( 113), INT8_C( -94), INT8_C( -70), INT8_C( -73),
+ INT8_C( 66), INT8_C( -84), INT8_C( 92), INT8_C(-113)),
+ simde_mm512_set_epi32(INT32_C( 93), INT32_C( -106), INT32_C( 61), INT32_C( 64),
+ INT32_C( -98), INT32_C( 31), INT32_C( -75), INT32_C( -23),
+ INT32_C( 113), INT32_C( -94), INT32_C( -70), INT32_C( -73),
+ INT32_C( 66), INT32_C( -84), INT32_C( 92), INT32_C( -113)) },
+ { simde_mm_set_epi8(INT8_C( -92), INT8_C( -17), INT8_C( -42), INT8_C( 103),
+ INT8_C( 111), INT8_C( -23), INT8_C( 14), INT8_C(-122),
+ INT8_C( -61), INT8_C( 1), INT8_C( 16), INT8_C(-124),
+ INT8_C( -62), INT8_C( 59), INT8_C( 106), INT8_C(-105)),
+ simde_mm512_set_epi32(INT32_C( -92), INT32_C( -17), INT32_C( -42), INT32_C( 103),
+ INT32_C( 111), INT32_C( -23), INT32_C( 14), INT32_C( -122),
+ INT32_C( -61), INT32_C( 1), INT32_C( 16), INT32_C( -124),
+ INT32_C( -62), INT32_C( 59), INT32_C( 106), INT32_C( -105)) },
+ { simde_mm_set_epi8(INT8_C( 71), INT8_C( 82), INT8_C( -83), INT8_C( 118),
+ INT8_C( 127), INT8_C( -5), INT8_C( 43), INT8_C( -48),
+ INT8_C( 67), INT8_C(-117), INT8_C( -15), INT8_C( 105),
+ INT8_C( -88), INT8_C( 127), INT8_C( 85), INT8_C(-110)),
+ simde_mm512_set_epi32(INT32_C( 71), INT32_C( 82), INT32_C( -83), INT32_C( 118),
+ INT32_C( 127), INT32_C( -5), INT32_C( 43), INT32_C( -48),
+ INT32_C( 67), INT32_C( -117), INT32_C( -15), INT32_C( 105),
+ INT32_C( -88), INT32_C( 127), INT32_C( 85), INT32_C( -110)) },
+ { simde_mm_set_epi8(INT8_C( 8), INT8_C( 35), INT8_C( -44), INT8_C( 18),
+ INT8_C( 3), INT8_C( -7), INT8_C( 34), INT8_C( 98),
+ INT8_C( 43), INT8_C( 97), INT8_C(-127), INT8_C( 121),
+ INT8_C( -47), INT8_C( -94), INT8_C( -74), INT8_C( -59)),
+ simde_mm512_set_epi32(INT32_C( 8), INT32_C( 35), INT32_C( -44), INT32_C( 18),
+ INT32_C( 3), INT32_C( -7), INT32_C( 34), INT32_C( 98),
+ INT32_C( 43), INT32_C( 97), INT32_C( -127), INT32_C( 121),
+ INT32_C( -47), INT32_C( -94), INT32_C( -74), INT32_C( -59)) },
+ { simde_mm_set_epi8(INT8_C( 79), INT8_C( 98), INT8_C( 70), INT8_C( -29),
+ INT8_C( 61), INT8_C( 100), INT8_C( -92), INT8_C( 10),
+ INT8_C(-107), INT8_C( -13), INT8_C( 83), INT8_C(-102),
+ INT8_C( -54), INT8_C( 80), INT8_C( 43), INT8_C( 56)),
+ simde_mm512_set_epi32(INT32_C( 79), INT32_C( 98), INT32_C( 70), INT32_C( -29),
+ INT32_C( 61), INT32_C( 100), INT32_C( -92), INT32_C( 10),
+ INT32_C( -107), INT32_C( -13), INT32_C( 83), INT32_C( -102),
+ INT32_C( -54), INT32_C( 80), INT32_C( 43), INT32_C( 56)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_cvtepi8_epi32(test_vec[i].a);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cvtepi8_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi8(INT8_C( 40), INT8_C( -85), INT8_C( 94), INT8_C( 35),
+ INT8_C( -54), INT8_C( -71), INT8_C(-106), INT8_C(-127),
+ INT8_C( 11), INT8_C( 105), INT8_C( 37), INT8_C(-105),
+ INT8_C( 6), INT8_C( -65), INT8_C( 17), INT8_C( 29)),
+ simde_mm512_set_epi64(INT64_C( 11), INT64_C( 105),
+ INT64_C( 37), INT64_C( -105),
+ INT64_C( 6), INT64_C( -65),
+ INT64_C( 17), INT64_C( 29)) },
+ { simde_mm_set_epi8(INT8_C( 65), INT8_C( -98), INT8_C( -80), INT8_C( -71),
+ INT8_C( -32), INT8_C( 61), INT8_C( -91), INT8_C( 2),
+ INT8_C( 62), INT8_C( 86), INT8_C( -39), INT8_C( -20),
+ INT8_C( 65), INT8_C( -87), INT8_C( 116), INT8_C(-112)),
+ simde_mm512_set_epi64(INT64_C( 62), INT64_C( 86),
+ INT64_C( -39), INT64_C( -20),
+ INT64_C( 65), INT64_C( -87),
+ INT64_C( 116), INT64_C( -112)) },
+ { simde_mm_set_epi8(INT8_C( -35), INT8_C( 91), INT8_C( 95), INT8_C( -91),
+ INT8_C(-120), INT8_C( -72), INT8_C( 6), INT8_C( 127),
+ INT8_C( -64), INT8_C( 43), INT8_C( -93), INT8_C( 8),
+ INT8_C( 105), INT8_C( -16), INT8_C( 39), INT8_C( 125)),
+ simde_mm512_set_epi64(INT64_C( -64), INT64_C( 43),
+ INT64_C( -93), INT64_C( 8),
+ INT64_C( 105), INT64_C( -16),
+ INT64_C( 39), INT64_C( 125)) },
+ { simde_mm_set_epi8(INT8_C( 108), INT8_C( 105), INT8_C( 98), INT8_C( -57),
+ INT8_C( -42), INT8_C( -18), INT8_C( -55), INT8_C( -1),
+ INT8_C( -97), INT8_C( -26), INT8_C( -21), INT8_C(-119),
+ INT8_C( 95), INT8_C( 83), INT8_C( -95), INT8_C( 86)),
+ simde_mm512_set_epi64(INT64_C( -97), INT64_C( -26),
+ INT64_C( -21), INT64_C( -119),
+ INT64_C( 95), INT64_C( 83),
+ INT64_C( -95), INT64_C( 86)) },
+ { simde_mm_set_epi8(INT8_C( 63), INT8_C(-119), INT8_C( 65), INT8_C( 108),
+ INT8_C( -17), INT8_C( -16), INT8_C( -4), INT8_C( 16),
+ INT8_C(-117), INT8_C( -62), INT8_C( -96), INT8_C( 5),
+ INT8_C( 116), INT8_C( -58), INT8_C( -56), INT8_C(-121)),
+ simde_mm512_set_epi64(INT64_C( -117), INT64_C( -62),
+ INT64_C( -96), INT64_C( 5),
+ INT64_C( 116), INT64_C( -58),
+ INT64_C( -56), INT64_C( -121)) },
+ { simde_mm_set_epi8(INT8_C( -69), INT8_C( 98), INT8_C( 52), INT8_C( -73),
+ INT8_C( 71), INT8_C( 44), INT8_C( -92), INT8_C( 54),
+ INT8_C( 126), INT8_C( 88), INT8_C( 32), INT8_C(-122),
+ INT8_C( -69), INT8_C( -55), INT8_C(-123), INT8_C( 79)),
+ simde_mm512_set_epi64(INT64_C( 126), INT64_C( 88),
+ INT64_C( 32), INT64_C( -122),
+ INT64_C( -69), INT64_C( -55),
+ INT64_C( -123), INT64_C( 79)) },
+ { simde_mm_set_epi8(INT8_C(-110), INT8_C( -17), INT8_C( -48), INT8_C( -64),
+ INT8_C( 9), INT8_C( 96), INT8_C( 113), INT8_C( -55),
+ INT8_C( -45), INT8_C( -9), INT8_C( 104), INT8_C( -61),
+ INT8_C( 127), INT8_C( 121), INT8_C( 70), INT8_C( -22)),
+ simde_mm512_set_epi64(INT64_C( -45), INT64_C( -9),
+ INT64_C( 104), INT64_C( -61),
+ INT64_C( 127), INT64_C( 121),
+ INT64_C( 70), INT64_C( -22)) },
+ { simde_mm_set_epi8(INT8_C( 68), INT8_C( -24), INT8_C( 90), INT8_C( -28),
+ INT8_C( 55), INT8_C( -48), INT8_C( 13), INT8_C( 95),
+ INT8_C( 10), INT8_C( -72), INT8_C( 109), INT8_C( -27),
+ INT8_C( 94), INT8_C( 121), INT8_C( 33), INT8_C( 52)),
+ simde_mm512_set_epi64(INT64_C( 10), INT64_C( -72),
+ INT64_C( 109), INT64_C( -27),
+ INT64_C( 94), INT64_C( 121),
+ INT64_C( 33), INT64_C( 52)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_cvtepi8_epi64(test_vec[i].a);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cvtepi32_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi32(INT32_C( -2), INT32_C( -4), INT32_C( -120451969), INT32_C( 14509),
+ INT32_C( -1510), INT32_C( 98804), INT32_C( 1802), INT32_C( -32352),
+ INT32_C( 14540), INT32_C( -2), INT32_C( 222), INT32_C( 152),
+ INT32_C( -48720), INT32_C( 250746), INT32_C( 15), INT32_C( -2)),
+ simde_mm_set_epi8(INT8_C( -2), INT8_C( -4), INT8_C( 127), INT8_C( -83),
+ INT8_C( 26), INT8_C( -12), INT8_C( 10), INT8_C( -96),
+ INT8_C( -52), INT8_C( -2), INT8_C( -34), INT8_C(-104),
+ INT8_C( -80), INT8_C( 122), INT8_C( 15), INT8_C( -2)) },
+ { simde_mm512_set_epi32(INT32_C( -2537924), INT32_C( 0), INT32_C( 3842), INT32_C( -439330),
+ INT32_C( 39001), INT32_C( -1), INT32_C( 32480192), INT32_C( 0),
+ INT32_C( 4), INT32_C( -11661865), INT32_C( 0), INT32_C( 2),
+ INT32_C( 63322), INT32_C( -33), INT32_C( 14448), INT32_C( 2)),
+ simde_mm_set_epi8(INT8_C( 60), INT8_C( 0), INT8_C( 2), INT8_C( -34),
+ INT8_C( 89), INT8_C( -1), INT8_C( -64), INT8_C( 0),
+ INT8_C( 4), INT8_C( -41), INT8_C( 0), INT8_C( 2),
+ INT8_C( 90), INT8_C( -33), INT8_C( 112), INT8_C( 2)) },
+ { simde_mm512_set_epi32(INT32_C( -18), INT32_C( -2011647), INT32_C( -7768922), INT32_C( -921783558),
+ INT32_C( -1941821), INT32_C( 0), INT32_C( -647690), INT32_C( -5119),
+ INT32_C( -1), INT32_C( 343), INT32_C( 113610714), INT32_C( 3),
+ INT32_C( 38353), INT32_C( 246), INT32_C( -10559231), INT32_C( 8543)),
+ simde_mm_set_epi8(INT8_C( -18), INT8_C( 1), INT8_C( -90), INT8_C( -6),
+ INT8_C( -61), INT8_C( 0), INT8_C( -10), INT8_C( 1),
+ INT8_C( -1), INT8_C( 87), INT8_C( -38), INT8_C( 3),
+ INT8_C( -47), INT8_C( -10), INT8_C( 1), INT8_C( 95)) },
+ { simde_mm512_set_epi32(INT32_C( -177879544), INT32_C( -4), INT32_C( -1), INT32_C( -1874),
+ INT32_C( -4), INT32_C( -469598096), INT32_C( -1647), INT32_C( 129553),
+ INT32_C( 9710669), INT32_C( 3), INT32_C( 1473856), INT32_C( 134714256),
+ INT32_C( 70527996), INT32_C( 2339642), INT32_C( 148218), INT32_C( -32767248)),
+ simde_mm_set_epi8(INT8_C( 8), INT8_C( -4), INT8_C( -1), INT8_C( -82),
+ INT8_C( -4), INT8_C( 112), INT8_C(-111), INT8_C( 17),
+ INT8_C( 77), INT8_C( 3), INT8_C( 64), INT8_C(-112),
+ INT8_C( -4), INT8_C( 58), INT8_C( -6), INT8_C( -16)) },
+ { simde_mm512_set_epi32(INT32_C( -1418204), INT32_C( -122943), INT32_C( 799821), INT32_C( -51),
+ INT32_C( 207931), INT32_C( -11778782), INT32_C( 66993), INT32_C( -15812),
+ INT32_C( 55345677), INT32_C( -194873886), INT32_C( -3955), INT32_C( -22),
+ INT32_C( 1761), INT32_C( 751), INT32_C( 19), INT32_C( -4)),
+ simde_mm_set_epi8(INT8_C( 36), INT8_C( -63), INT8_C( 77), INT8_C( -51),
+ INT8_C( 59), INT8_C( 34), INT8_C( -79), INT8_C( 60),
+ INT8_C( 13), INT8_C( -30), INT8_C(-115), INT8_C( -22),
+ INT8_C( -31), INT8_C( -17), INT8_C( 19), INT8_C( -4)) },
+ { simde_mm512_set_epi32(INT32_C( -17), INT32_C( -26), INT32_C( 854285), INT32_C( 51),
+ INT32_C( -60746537), INT32_C( 3687234), INT32_C( 52848365), INT32_C( 26958727),
+ INT32_C( 2), INT32_C( 104), INT32_C( 4725058), INT32_C( -56297),
+ INT32_C( 5336), INT32_C( 443041), INT32_C( -35), INT32_C( 229612)),
+ simde_mm_set_epi8(INT8_C( -17), INT8_C( -26), INT8_C( 13), INT8_C( 51),
+ INT8_C( -41), INT8_C( 66), INT8_C( -19), INT8_C(-121),
+ INT8_C( 2), INT8_C( 104), INT8_C( 66), INT8_C( 23),
+ INT8_C( -40), INT8_C( -95), INT8_C( -35), INT8_C( -20)) },
+ { simde_mm512_set_epi32(INT32_C( -27), INT32_C( 127397292), INT32_C( 29325489), INT32_C( 691),
+ INT32_C( -978), INT32_C( -559751), INT32_C( -3037707), INT32_C( 189833),
+ INT32_C( 11), INT32_C( -4085970), INT32_C( -3499), INT32_C( -16323),
+ INT32_C( 732682), INT32_C( 108115), INT32_C( 29565452), INT32_C( -145574324)),
+ simde_mm_set_epi8(INT8_C( -27), INT8_C( -84), INT8_C( -79), INT8_C( -77),
+ INT8_C( 46), INT8_C( 121), INT8_C( -11), INT8_C(-119),
+ INT8_C( 11), INT8_C( 46), INT8_C( 85), INT8_C( 61),
+ INT8_C( 10), INT8_C( 83), INT8_C( 12), INT8_C( 76)) },
+ { simde_mm512_set_epi32(INT32_C( -14), INT32_C( 6208981), INT32_C( 133763173), INT32_C( -30227251),
+ INT32_C( -17898651), INT32_C( -197203605), INT32_C( -4), INT32_C( 13),
+ INT32_C( -1312564), INT32_C( -3), INT32_C( 5632807), INT32_C( 2549),
+ INT32_C( -3), INT32_C( -2772), INT32_C( -1504), INT32_C( 1)),
+ simde_mm_set_epi8(INT8_C( -14), INT8_C( -43), INT8_C( 101), INT8_C( -51),
+ INT8_C( 101), INT8_C( 107), INT8_C( -4), INT8_C( 13),
+ INT8_C( -52), INT8_C( -3), INT8_C( 39), INT8_C( -11),
+ INT8_C( -3), INT8_C( 44), INT8_C( 32), INT8_C( 1)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm512_cvtepi32_epi8(test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cvtepi64_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi64(INT64_C( 273955), INT64_C( -4013),
+ INT64_C( -7033556), INT64_C( -1383025729160),
+ INT64_C( -218214744), INT64_C( -9402863842296753),
+ INT64_C( 0), INT64_C( -240066712)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 35), INT8_C( 83), INT8_C( 44), INT8_C( 120),
+ INT8_C( -88), INT8_C( 79), INT8_C( 0), INT8_C( 104)) },
+ { simde_mm512_set_epi64(INT64_C( 50833), INT64_C( 484208564),
+ INT64_C( -124), INT64_C( -8907018595),
+ INT64_C( -1053135968), INT64_C( 2128258677497261),
+ INT64_C( 3), INT64_C( 263107913893504060)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-111), INT8_C( -76), INT8_C(-124), INT8_C( -99),
+ INT8_C( -96), INT8_C( -83), INT8_C( 3), INT8_C( 60)) },
+ { simde_mm512_set_epi64(INT64_C( 6119961081599912), INT64_C( 949),
+ INT64_C( 761714638418543261), INT64_C( -7281666562),
+ INT64_C( -3399190417), INT64_C( 231),
+ INT64_C(-1217801394263696454), INT64_C( 43)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -88), INT8_C( -75), INT8_C( -99), INT8_C( -2),
+ INT8_C( 111), INT8_C( -25), INT8_C( -70), INT8_C( 43)) },
+ { simde_mm512_set_epi64(INT64_C( -74999030828832), INT64_C( 3805264232880),
+ INT64_C( 321542), INT64_C( -18067967511),
+ INT64_C( 21602392348127), INT64_C( -482358781),
+ INT64_C( -12189), INT64_C( 16807900958735709)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -32), INT8_C( -80), INT8_C( 6), INT8_C( -23),
+ INT8_C( -33), INT8_C( 3), INT8_C( 99), INT8_C( 93)) },
+ { simde_mm512_set_epi64(INT64_C( 11997), INT64_C( 192),
+ INT64_C( -32039837063), INT64_C( 39788),
+ INT64_C( 684970521), INT64_C( 361409660761858),
+ INT64_C( 27655177518327113), INT64_C( -7050752136)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -35), INT8_C( -64), INT8_C( 121), INT8_C( 108),
+ INT8_C( 25), INT8_C( 2), INT8_C( 73), INT8_C( 120)) },
+ { simde_mm512_set_epi64(INT64_C( -131967536383739), INT64_C( -32),
+ INT64_C( -54), INT64_C( -245),
+ INT64_C( -42658), INT64_C( 3),
+ INT64_C( 26934708458), INT64_C( -6255)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 5), INT8_C( -32), INT8_C( -54), INT8_C( 11),
+ INT8_C( 94), INT8_C( 3), INT8_C( -22), INT8_C(-111)) },
+ { simde_mm512_set_epi64(INT64_C( 0), INT64_C(-4069220464223700234),
+ INT64_C( 0), INT64_C(-8071151262900075656),
+ INT64_C( -1), INT64_C( 4132460747),
+ INT64_C( -27), INT64_C( -7116923)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( -10), INT8_C( 0), INT8_C( 120),
+ INT8_C( -1), INT8_C( -53), INT8_C( -27), INT8_C(-123)) },
+ { simde_mm512_set_epi64(INT64_C( 54634445793), INT64_C( 63569905620),
+ INT64_C( 20981054), INT64_C( 2614918),
+ INT64_C( 11917164823), INT64_C( 15401551242937960),
+ INT64_C( -1), INT64_C( 366397165244)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( -31), INT8_C( -44), INT8_C( 62), INT8_C(-122),
+ INT8_C( 23), INT8_C( 104), INT8_C( -1), INT8_C( -68)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm512_cvtepi64_epi8(test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
#endif /* defined(SIMDE_avx512f_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
HEDLEY_DIAGNOSTIC_PUSH
@@ -2246,6 +2578,13 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm512_mask_cmpeq_epi32_mask),
+ SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi8_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi8_epi64),
+
+ SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi32_epi8),
+
+ SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi64_epi8),
+
#endif /* defined(SIMDE_AVX512f_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
};
=====================================
test/x86/skel.c
=====================================
@@ -1204,14 +1204,14 @@ test_simde_mm256_xxx_epu32(const MunitParameter params[], void* data) {
r = simde__m256i_to_private(simde_mm256_xxx_epu32(simde__m256i_from_private(a), simde__m256i_from_private(b)));
- printf(" { simde_x_mm256_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+ printf(" { simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
a.u32[7], a.u32[6], a.u32[5], a.u32[4], a.u32[3], a.u32[2], a.u32[1], a.u32[0]);
- printf(" simde_x_mm256_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+ printf(" simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
b.u32[7], b.u32[6], b.u32[5], b.u32[4], b.u32[3], b.u32[2], b.u32[1], b.u32[0]);
- printf(" simde_x_mm256_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")) },\n",
+ printf(" simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
r.u32[7], r.u32[6], r.u32[5], r.u32[4], r.u32[3], r.u32[2], r.u32[1], r.u32[0]);
}
return MUNIT_FAIL;
@@ -1934,7 +1934,7 @@ test_simde_mm512_xxx_epi64(const MunitParameter params[], void* data) {
munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
- r = simde__m512i_to_private(simde_mm512_add_epi64(simde__m512i_from_private(a), simde__m512i_from_private(b)));
+ r = simde__m512i_to_private(simde_mm512_xxx_epi64(simde__m512i_from_private(a), simde__m512i_from_private(b)));
printf(" { simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
@@ -2161,22 +2161,22 @@ test_simde_mm512_xxx_epu32(const MunitParameter params[], void* data) {
r = simde__m512i_to_private(simde_mm512_xxx_epu32(simde__m512i_from_private(a), simde__m512i_from_private(b)));
- printf(" { simde_x_mm512_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+ printf(" { simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
- printf(" simde_x_mm512_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
b.u32[15], b.u32[14], b.u32[13], b.u32[12], b.u32[11], b.u32[10], b.u32[ 9], b.u32[ 8],
b.u32[ 7], b.u32[ 6], b.u32[ 5], b.u32[ 4], b.u32[ 3], b.u32[ 2], b.u32[ 1], b.u32[ 0]);
- printf(" simde_x_mm512_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
- " UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")) },\n",
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
r.u32[15], r.u32[14], r.u32[13], r.u32[12], r.u32[11], r.u32[10], r.u32[ 9], r.u32[ 8],
r.u32[ 7], r.u32[ 6], r.u32[ 5], r.u32[ 4], r.u32[ 3], r.u32[ 2], r.u32[ 1], r.u32[ 0]);
}
=====================================
test/x86/svml.c
=====================================
@@ -1374,54 +1374,54 @@ test_simde_mm256_div_epu32(const MunitParameter params[], void* data) {
simde__m256i b;
simde__m256i r;
} test_vec[8] = {
- { simde_x_mm256_set_epu32(UINT32_C(-1508907154), UINT32_C( -957483036), UINT32_C( 212354336), UINT32_C( -857281839),
- UINT32_C(-1973841665), UINT32_C( -629468237), UINT32_C( 1276969649), UINT32_C(-1275754054)),
- simde_x_mm256_set_epu32(UINT32_C( 451417), UINT32_C( 462783328), UINT32_C( 8576290), UINT32_C( 64270331),
- UINT32_C( 37146), UINT32_C( 620145), UINT32_C( 21213860), UINT32_C( 1)),
- simde_x_mm256_set_epu32(UINT32_C( 6171), UINT32_C( 7), UINT32_C( 24), UINT32_C( 53),
- UINT32_C( 62486), UINT32_C( 5910), UINT32_C( 60), UINT32_C(-1275754054)) },
- { simde_x_mm256_set_epu32(UINT32_C( 2104697030), UINT32_C( 1350155206), UINT32_C( -106916491), UINT32_C( 1455998635),
- UINT32_C( 2131081108), UINT32_C( 1971786408), UINT32_C( -939494414), UINT32_C(-1613886259)),
- simde_x_mm256_set_epu32(UINT32_C( 231), UINT32_C( 73705552), UINT32_C( 130241), UINT32_C( 7832),
- UINT32_C( 16162638), UINT32_C( 109685672), UINT32_C( 8), UINT32_C( 334915670)),
- simde_x_mm256_set_epu32(UINT32_C( 9111242), UINT32_C( 18), UINT32_C( 32156), UINT32_C( 185903),
- UINT32_C( 131), UINT32_C( 17), UINT32_C( 419434110), UINT32_C( 8)) },
- { simde_x_mm256_set_epu32(UINT32_C( 1264216498), UINT32_C( 1299581643), UINT32_C( 1889525960), UINT32_C(-2008114472),
- UINT32_C( 423719032), UINT32_C( 1818267804), UINT32_C( 762708763), UINT32_C( 1501733683)),
- simde_x_mm256_set_epu32(UINT32_C( 77), UINT32_C( 824), UINT32_C( 60), UINT32_C( 3129809),
- UINT32_C( 5), UINT32_C( 468), UINT32_C( 236), UINT32_C( 7848050)),
- simde_x_mm256_set_epu32(UINT32_C( 16418396), UINT32_C( 1577162), UINT32_C( 31492099), UINT32_C( 730),
- UINT32_C( 84743806), UINT32_C( 3885187), UINT32_C( 3231816), UINT32_C( 191)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1361590415), UINT32_C( 542028757), UINT32_C( 1611666138), UINT32_C( -924139483),
- UINT32_C( 1692903269), UINT32_C( 733100015), UINT32_C( 366410692), UINT32_C(-1939074859)),
- simde_x_mm256_set_epu32(UINT32_C( 24336788), UINT32_C( 1642338), UINT32_C( 666), UINT32_C( 2067),
- UINT32_C( 85), UINT32_C( 1531095), UINT32_C( 40675), UINT32_C( 263833438)),
- simde_x_mm256_set_epu32(UINT32_C( 120), UINT32_C( 330), UINT32_C( 2419919), UINT32_C( 1630782),
- UINT32_C( 19916509), UINT32_C( 478), UINT32_C( 9008), UINT32_C( 8)) },
- { simde_x_mm256_set_epu32(UINT32_C( 1625411893), UINT32_C( 116526), UINT32_C( -479225158), UINT32_C( 1269098691),
- UINT32_C( 138365868), UINT32_C(-1760846141), UINT32_C(-1763002020), UINT32_C( 350477953)),
- simde_x_mm256_set_epu32(UINT32_C( 121261), UINT32_C( 100533765), UINT32_C( 239), UINT32_C( 273093756),
- UINT32_C( 2), UINT32_C( 123647188), UINT32_C( 137667), UINT32_C( 690)),
- simde_x_mm256_set_epu32(UINT32_C( 13404), UINT32_C( 0), UINT32_C( 15965448), UINT32_C( 4),
- UINT32_C( 69182934), UINT32_C( 20), UINT32_C( 18391), UINT32_C( 507939)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1111689783), UINT32_C( 410237776), UINT32_C( 6985125), UINT32_C(-2092161126),
- UINT32_C(-1515805182), UINT32_C( 269424680), UINT32_C( -261060991), UINT32_C( -409004229)),
- simde_x_mm256_set_epu32(UINT32_C( 180), UINT32_C( 114), UINT32_C( 14804), UINT32_C( 11),
- UINT32_C( 178), UINT32_C( 1), UINT32_C( 8684605), UINT32_C( 1201514)),
- simde_x_mm256_set_epu32(UINT32_C( 17684875), UINT32_C( 3598576), UINT32_C( 471), UINT32_C( 200255106),
- UINT32_C( 15613270), UINT32_C( 269424680), UINT32_C( 464), UINT32_C( 3234)) },
- { simde_x_mm256_set_epu32(UINT32_C( -856773668), UINT32_C( 524035790), UINT32_C( 1177944500), UINT32_C( -122549241),
- UINT32_C( 860434944), UINT32_C( 1217684690), UINT32_C( -195086485), UINT32_C(-1324434909)),
- simde_x_mm256_set_epu32(UINT32_C( 1), UINT32_C( 5), UINT32_C( 7), UINT32_C( 10503049),
- UINT32_C( 837677), UINT32_C( 272262), UINT32_C( 72522), UINT32_C( 54731225)),
- simde_x_mm256_set_epu32(UINT32_C( -856773668), UINT32_C( 104807158), UINT32_C( 168277785), UINT32_C( 397),
- UINT32_C( 1027), UINT32_C( 4472), UINT32_C( 56532), UINT32_C( 54)) },
- { simde_x_mm256_set_epu32(UINT32_C(-1829840745), UINT32_C( -322582834), UINT32_C( 1754306261), UINT32_C( -847843135),
- UINT32_C( -659439141), UINT32_C(-1948366208), UINT32_C( -120901755), UINT32_C( -755292688)),
- simde_x_mm256_set_epu32(UINT32_C( 24846326), UINT32_C( 572777), UINT32_C( 39081742), UINT32_C( 3635908),
- UINT32_C( 1180), UINT32_C( 359797), UINT32_C( 107711383), UINT32_C( 2)),
- simde_x_mm256_set_epu32(UINT32_C( 99), UINT32_C( 6935), UINT32_C( 44), UINT32_C( 948),
- UINT32_C( 3080956), UINT32_C( 6522), UINT32_C( 38), UINT32_C( 1769837304)) }
+ { simde_x_mm256_set_epu32(UINT32_C( 621216267), UINT32_C(2973447507), UINT32_C(1814279233), UINT32_C(3673557536),
+ UINT32_C(4015780858), UINT32_C(1070914538), UINT32_C(2707640519), UINT32_C(3041291274)),
+ simde_x_mm256_set_epu32(UINT32_C( 122731), UINT32_C( 51630147), UINT32_C( 152670), UINT32_C( 7731229),
+ UINT32_C( 711400), UINT32_C( 1744981), UINT32_C( 164943127), UINT32_C( 169494)),
+ simde_x_mm256_set_epu32(UINT32_C( 5061), UINT32_C( 57), UINT32_C( 11883), UINT32_C( 475),
+ UINT32_C( 5644), UINT32_C( 613), UINT32_C( 16), UINT32_C( 17943)) },
+ { simde_x_mm256_set_epu32(UINT32_C(1084014678), UINT32_C(1666523830), UINT32_C(3454667769), UINT32_C(4029614313),
+ UINT32_C(3425016021), UINT32_C(2449839571), UINT32_C(1601532569), UINT32_C(1519388398)),
+ simde_x_mm256_set_epu32(UINT32_C( 130157), UINT32_C( 5585515), UINT32_C( 62691231), UINT32_C( 37123),
+ UINT32_C( 2515600), UINT32_C( 106484982), UINT32_C(4168501606), UINT32_C( 2781814)),
+ simde_x_mm256_set_epu32(UINT32_C( 8328), UINT32_C( 298), UINT32_C( 55), UINT32_C( 108547),
+ UINT32_C( 1361), UINT32_C( 23), UINT32_C( 0), UINT32_C( 546)) },
+ { simde_x_mm256_set_epu32(UINT32_C(2187853776), UINT32_C( 131263503), UINT32_C( 20338031), UINT32_C(3062800456),
+ UINT32_C(1802896354), UINT32_C( 22231847), UINT32_C(3438214155), UINT32_C(1776513196)),
+ simde_x_mm256_set_epu32(UINT32_C( 28353115), UINT32_C( 92496104), UINT32_C( 15335526), UINT32_C( 99105532),
+ UINT32_C( 5905009), UINT32_C( 27824), UINT32_C( 28986), UINT32_C( 12459911)),
+ simde_x_mm256_set_epu32(UINT32_C( 77), UINT32_C( 1), UINT32_C( 1), UINT32_C( 30),
+ UINT32_C( 305), UINT32_C( 799), UINT32_C( 118616), UINT32_C( 142)) },
+ { simde_x_mm256_set_epu32(UINT32_C( 524596333), UINT32_C(3965897825), UINT32_C(1593754725), UINT32_C( 694203496),
+ UINT32_C(1917650066), UINT32_C(2692610113), UINT32_C(1620259645), UINT32_C( 607116294)),
+ simde_x_mm256_set_epu32(UINT32_C( 29757558), UINT32_C( 80117), UINT32_C( 412054571), UINT32_C( 878110),
+ UINT32_C(4124070325), UINT32_C( 8250706), UINT32_C( 7930575), UINT32_C( 51813)),
+ simde_x_mm256_set_epu32(UINT32_C( 17), UINT32_C( 49501), UINT32_C( 3), UINT32_C( 790),
+ UINT32_C( 0), UINT32_C( 326), UINT32_C( 204), UINT32_C( 11717)) },
+ { simde_x_mm256_set_epu32(UINT32_C( 625862951), UINT32_C( 793130310), UINT32_C(2489185635), UINT32_C(2468815203),
+ UINT32_C(3079066921), UINT32_C( 802958712), UINT32_C(1537818066), UINT32_C(1678295724)),
+ simde_x_mm256_set_epu32(UINT32_C( 8259237), UINT32_C( 229091), UINT32_C( 7899398), UINT32_C( 41009690),
+ UINT32_C( 26030333), UINT32_C( 228627), UINT32_C(1200021710), UINT32_C( 186204)),
+ simde_x_mm256_set_epu32(UINT32_C( 75), UINT32_C( 3462), UINT32_C( 315), UINT32_C( 60),
+ UINT32_C( 118), UINT32_C( 3512), UINT32_C( 1), UINT32_C( 9013)) },
+ { simde_x_mm256_set_epu32(UINT32_C(3334078645), UINT32_C(2226952893), UINT32_C(1901933944), UINT32_C(3456551705),
+ UINT32_C(3394846076), UINT32_C(2592342753), UINT32_C(1822000161), UINT32_C(3060682219)),
+ simde_x_mm256_set_epu32(UINT32_C( 55529), UINT32_C( 95077), UINT32_C( 61849330), UINT32_C( 77269),
+ UINT32_C( 181901), UINT32_C( 66287), UINT32_C( 46407), UINT32_C( 1962)),
+ simde_x_mm256_set_epu32(UINT32_C( 60042), UINT32_C( 23422), UINT32_C( 30), UINT32_C( 44734),
+ UINT32_C( 18663), UINT32_C( 39107), UINT32_C( 39261), UINT32_C( 1559980)) },
+ { simde_x_mm256_set_epu32(UINT32_C(2418478797), UINT32_C(3856569345), UINT32_C(2562700829), UINT32_C(2670510577),
+ UINT32_C(3958231909), UINT32_C(3386864730), UINT32_C(2249491002), UINT32_C( 367242130)),
+ simde_x_mm256_set_epu32(UINT32_C( 106591767), UINT32_C( 591565864), UINT32_C( 241208), UINT32_C( 384474),
+ UINT32_C( 63569588), UINT32_C(1007016971), UINT32_C( 701090048), UINT32_C( 4482965)),
+ simde_x_mm256_set_epu32(UINT32_C( 22), UINT32_C( 6), UINT32_C( 10624), UINT32_C( 6945),
+ UINT32_C( 62), UINT32_C( 3), UINT32_C( 3), UINT32_C( 81)) },
+ { simde_x_mm256_set_epu32(UINT32_C(3497551851), UINT32_C(3538232808), UINT32_C(3581222707), UINT32_C(2092274030),
+ UINT32_C(1202922035), UINT32_C(3381143079), UINT32_C(1645890362), UINT32_C(2497764821)),
+ simde_x_mm256_set_epu32(UINT32_C( 7255461), UINT32_C( 387871), UINT32_C( 216379987), UINT32_C( 1108325),
+ UINT32_C( 9779926), UINT32_C( 265173482), UINT32_C( 305369), UINT32_C(1628979148)),
+ simde_x_mm256_set_epu32(UINT32_C( 482), UINT32_C( 9122), UINT32_C( 16), UINT32_C( 1887),
+ UINT32_C( 122), UINT32_C( 12), UINT32_C( 5389), UINT32_C( 1)) }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/91b3e7158b82a5197616a3ff3b3dca8766494758
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/91b3e7158b82a5197616a3ff3b3dca8766494758
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200403/f18e8096/attachment-0001.html>
More information about the debian-med-commit
mailing list