[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200403

Michael R. Crusoe gitlab at salsa.debian.org
Fri Apr 3 10:45:15 BST 2020



Michael R. Crusoe pushed to branch upstream at Debian Med / simde


Commits:
91b3e715 by Michael R. Crusoe at 2020-04-03T08:39:41+02:00
New upstream version 0.0.0.git.20200403
- - - - -


11 changed files:

- simde/x86/avx.h
- simde/x86/avx2.h
- simde/x86/avx512bw.h
- simde/x86/avx512f.h
- simde/x86/sse.h
- simde/x86/sse2.h
- test/x86/avx2.c
- test/x86/avx512bw.c
- test/x86/avx512f.c
- test/x86/skel.c
- test/x86/svml.c


Changes:

=====================================
simde/x86/avx.h
=====================================
@@ -1488,7 +1488,7 @@ simde_mm256_broadcast_ps (simde__m128 const * mem_addr) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(mem_addr)
+#  define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -1508,7 +1508,7 @@ simde_mm256_broadcast_sd (simde_float64 const * a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(mem_addr)
+#  define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3358,7 +3358,7 @@ simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(mem_addr, mask)
+#  define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3379,7 +3379,7 @@ simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], si
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(mem_addr, mask)
+#  define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3400,7 +3400,7 @@ simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(mem_addr, mask)
+#  define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3421,7 +3421,7 @@ simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], si
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(mem_addr, mask)
+#  define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3441,7 +3441,7 @@ simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m12
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(mem_addr, mask, a)
+#  define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3461,7 +3461,7 @@ simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(mem_addr, mask, a)
+#  define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3481,7 +3481,7 @@ simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m12
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(mem_addr, mask, a)
+#  define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3501,7 +3501,7 @@ simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(mem_addr, mask, a)
+#  define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4486,7 +4486,7 @@ simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(mem_addr, a)
+#  define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4501,7 +4501,7 @@ simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(mem_addr, a)
+#  define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4529,7 +4529,7 @@ simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(mem_addr, a)
+#  define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4542,7 +4542,7 @@ simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(mem_addr, a)
+#  define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4600,7 +4600,7 @@ simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(mem_addr, a)
+#  define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4615,7 +4615,7 @@ simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(mem_addr, a)
+#  define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/avx2.h
=====================================
@@ -813,7 +813,7 @@ simde_mm256_cvtepi8_epi32 (simde__m128i a) {
 #endif
 }
 #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-#  define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi16(a)
+#  define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -2028,7 +2028,7 @@ simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
       b_ = simde__m256i_to_private(b);
 
     #if defined(SIMDE__SHUFFLE_VECTOR)
-      r_.i16 =SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
+      r_.i16 = SIMDE__SHUFFLE_VECTOR(16, 32, a_.i16, b_.i16,
          4, 20,  5, 21,  6, 22,  7, 23,
         12, 28, 13, 29, 14, 30, 15, 31);
     #else
@@ -2043,6 +2043,57 @@ simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) {
 #  define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_unpackhi_epi32(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE__SHUFFLE_VECTOR)
+  r_.i32 = SIMDE__SHUFFLE_VECTOR(32, 32, a_.i32, b_.i32,
+                                 2, 10, 3, 11, 6, 14, 7, 15);
+#else
+  r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]);
+  r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]);
+#endif
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_unpackhi_epi64(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE__SHUFFLE_VECTOR)
+  r_.i64 = SIMDE__SHUFFLE_VECTOR(64, 32, a_.i64, b_.i64, 1, 5, 3, 7);
+#else
+  r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]);
+  r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]);
+#endif
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m256i
 simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) {


=====================================
simde/x86/avx512bw.h
=====================================
@@ -236,6 +236,76 @@ simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) {
 #  define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512BW_NATIVE)
+  return _mm512_shuffle_epi8(a, b);
+#else
+  simde__m512i_private
+    r_,
+    a_ = simde__m512i_to_private(a),
+    b_ = simde__m512i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_AVX2)
+  r_.m256i[0] = simde_mm256_shuffle_epi8(a_.m256i[0], b_.m256i[0]);
+  r_.m256i[1] = simde_mm256_shuffle_epi8(a_.m256i[1], b_.m256i[1]);
+#elif defined(SIMDE_ARCH_X86_SSSE3)
+  r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]);
+  r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]);
+  r_.m128i[2] = simde_mm_shuffle_epi8(a_.m128i[2], b_.m128i[2]);
+  r_.m128i[3] = simde_mm_shuffle_epi8(a_.m128i[3], b_.m128i[3]);
+#else
+ SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
+    r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)];
+  }
+#endif
+
+  return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
+#  define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b)
+#endif
+
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask64
+simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512BW_NATIVE)
+  return _mm512_cmpeq_epi8_mask(a, b);
+#else
+  simde__m512i_private
+    a_ = simde__m512i_to_private(a),
+    b_ = simde__m512i_to_private(b);
+#if defined(SIMDE_ARCH_X86_AVX2)
+  simde__mmask64 r_;
+
+  // The second cast is absolutely necessary otherwise if the sign bit is set it will be sign extended to 64 bits
+  r_ =              (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[1], b_.m256i[1]));
+  r_ = (r_ << 32) | (uint32_t) simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[0], b_.m256i[0]));
+#elif defined(SIMDE_ARCH_X86_SSE2)
+  simde__mmask64 r_;
+  
+  r_ =              simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[3], b_.m128i[3]));
+  r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[2], b_.m128i[2]));
+  r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]));
+  r_ = (r_ << 16) | simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]));
+#else
+  simde__mmask64 r_ = 0;
+
+  for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) {
+    r_ |= (a_.u8[i] == b_.u8[i]) ? (1ULL << i) : 0;
+  }
+#endif
+  return r_;
+#endif
+}
+#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b)
+#endif
+
 SIMDE__END_DECLS
 
 HEDLEY_DIAGNOSTIC_POP


=====================================
simde/x86/avx512f.h
=====================================
@@ -1669,6 +1669,106 @@ simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m51
 #  define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_cvtepi8_epi32 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_cvtepi8_epi32(a);
+#else
+  simde__m512i_private r_;
+  simde__m128i_private a_ = simde__m128i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+  SIMDE__CONVERT_VECTOR(r_.i32, a_.i8);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+    r_.i32[i] = a_.i8[i];
+  }
+#endif
+
+  return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_cvtepi8_epi64 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_cvtepi8_epi64(a);
+#else
+  simde__m512i_private r_;
+  simde__m128i_private a_ = simde__m128i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+  SIMDE__CONVERT_VECTOR(r_.i64, a_.m64_private[0].i8);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+    r_.i64[i] = a_.i8[i];
+  }
+#endif
+
+  return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm512_cvtepi32_epi8 (simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_cvtepi32_epi8(a);
+#else
+  simde__m128i_private r_;
+  simde__m512i_private a_ = simde__m512i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+  SIMDE__CONVERT_VECTOR(r_.i8, a_.i32);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
+    r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]);
+  }
+#endif
+
+  return simde__m128i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm512_cvtepi64_epi8 (simde__m512i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_cvtepi64_epi8(a);
+#else
+  simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());
+  simde__m512i_private a_ = simde__m512i_to_private(a);
+
+#if defined(SIMDE__CONVERT_VECTOR)
+  SIMDE__CONVERT_VECTOR(r_.m64_private[0].i8, a_.i64);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
+    r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]);
+  }
+#endif
+
+  return simde__m128i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a)
+#endif
+
 SIMDE__END_DECLS
 
 HEDLEY_DIAGNOSTIC_POP


=====================================
simde/x86/sse.h
=====================================
@@ -2984,7 +2984,7 @@ simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_ps(mem_addr, a) simde_mm_store_ps(mem_addr, (a))
+#  define _mm_store_ps(mem_addr, a) simde_mm_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3004,7 +3004,7 @@ simde_mm_store_ps1 (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(mem_addr, (a))
+#  define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3023,7 +3023,7 @@ simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_ss(mem_addr, a) simde_mm_store_ss(mem_addr, (a))
+#  define _mm_store_ss(mem_addr, a) simde_mm_store_ss(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3038,7 +3038,7 @@ simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(mem_addr, (a))
+#  define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3097,7 +3097,7 @@ simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(mem_addr, (a))
+#  define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3116,7 +3116,7 @@ simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(mem_addr, (a))
+#  define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3491,7 +3491,7 @@ simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(mem_addr, (a))
+#  define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/sse2.h
=====================================
@@ -244,7 +244,7 @@ typedef union {
   typedef __m128i simde__m128i;
   typedef __m128d simde__m128d;
 #elif defined(SIMDE_SSE2_NEON)
-   typedef float32x4_t simde__m128i;
+   typedef int64x2_t simde__m128i;
 #  if defined(SIMDE_ARCH_AARCH64)
      typedef float64x2_t simde__m128d;
 #  elif defined(SIMDE_VECTOR_SUBSCRIPT)
@@ -4834,7 +4834,7 @@ simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_pd(mem_addr, a) simde_mm_store_pd(mem_addr, a)
+#  define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4851,10 +4851,10 @@ simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d
   mem_addr[1] = a_.f64[0];
 #endif
 }
-#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(mem_addr, a)
+#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(mem_addr, a)
-#  define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(mem_addr, a)
+#  define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
+#  define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4875,7 +4875,7 @@ simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) {
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_sd(mem_addr, a) simde_mm_store_sd(mem_addr, a)
+#  define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4914,7 +4914,7 @@ simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) {
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(mem_addr, a)
+#  define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4948,7 +4948,7 @@ simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) {
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(mem_addr, a)
+#  define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4966,7 +4966,7 @@ simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) {
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(mem_addr, a)
+#  define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4979,7 +4979,7 @@ simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) {
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(mem_addr, a)
+#  define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -5013,7 +5013,7 @@ simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(mem_addr, a)
+#  define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -5969,6 +5969,11 @@ simde_x_mm_not_si128 (simde__m128i a) {
   return simde__m128i_from_private(r_);
 }
 
+#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y))
+#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
+#  define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y)
+#endif
+
 SIMDE__END_DECLS
 
 HEDLEY_DIAGNOSTIC_POP


=====================================
test/x86/avx2.c
=====================================
@@ -5558,54 +5558,54 @@ test_simde_mm256_min_epu32(const MunitParameter params[], void* data) {
     simde__m256i b;
     simde__m256i r;
   } test_vec[8] = {
-    { simde_x_mm256_set_epu32(UINT32_C( 1440846368), UINT32_C(  -63978166), UINT32_C( 1399113445), UINT32_C(-1455222788),
-                              UINT32_C( -469490798), UINT32_C( 1991020107), UINT32_C(-1863870898), UINT32_C( 1328594239)),
-      simde_x_mm256_set_epu32(UINT32_C(  102513809), UINT32_C( -329455244), UINT32_C(  307870409), UINT32_C(  295630559),
-                              UINT32_C(-1234945944), UINT32_C( -774625773), UINT32_C( -390354839), UINT32_C(-1495847433)),
-      simde_x_mm256_set_epu32(UINT32_C(  102513809), UINT32_C( -329455244), UINT32_C(  307870409), UINT32_C(  295630559),
-                              UINT32_C(-1234945944), UINT32_C( 1991020107), UINT32_C(-1863870898), UINT32_C( 1328594239)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 1307771138), UINT32_C(  491465332), UINT32_C(  393240962), UINT32_C( 1290764124),
-                              UINT32_C( 1683625215), UINT32_C(  490731907), UINT32_C( -455852593), UINT32_C( -956210525)),
-      simde_x_mm256_set_epu32(UINT32_C(-1635890822), UINT32_C(  910213294), UINT32_C(  -85983471), UINT32_C( -240357962),
-                              UINT32_C(-1606645062), UINT32_C( -435549149), UINT32_C(  512067539), UINT32_C(-2046648155)),
-      simde_x_mm256_set_epu32(UINT32_C( 1307771138), UINT32_C(  491465332), UINT32_C(  393240962), UINT32_C( 1290764124),
-                              UINT32_C( 1683625215), UINT32_C(  490731907), UINT32_C(  512067539), UINT32_C(-2046648155)) },
-    { simde_x_mm256_set_epu32(UINT32_C( -714376287), UINT32_C(-1856787219), UINT32_C(-1948205449), UINT32_C( 1985002804),
-                              UINT32_C(-1632505450), UINT32_C(  967094020), UINT32_C(  258843795), UINT32_C(-1730834268)),
-      simde_x_mm256_set_epu32(UINT32_C( -497194011), UINT32_C( 2046480498), UINT32_C( 1801500737), UINT32_C( 2033606287),
-                              UINT32_C(  289880638), UINT32_C( 2013746580), UINT32_C(-1003047754), UINT32_C( 1692706041)),
-      simde_x_mm256_set_epu32(UINT32_C( -714376287), UINT32_C( 2046480498), UINT32_C( 1801500737), UINT32_C( 1985002804),
-                              UINT32_C(  289880638), UINT32_C(  967094020), UINT32_C(  258843795), UINT32_C( 1692706041)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 1320709158), UINT32_C(  816999148), UINT32_C(-1366813775), UINT32_C(  -28642953),
-                              UINT32_C(-1227971629), UINT32_C(-1379423721), UINT32_C(-1139745869), UINT32_C( -293312984)),
-      simde_x_mm256_set_epu32(UINT32_C(  332902085), UINT32_C( 1709201405), UINT32_C( 1874659226), UINT32_C( -372944690),
-                              UINT32_C(-2106372518), UINT32_C( 2058700648), UINT32_C( -582719177), UINT32_C( 1001999947)),
-      simde_x_mm256_set_epu32(UINT32_C(  332902085), UINT32_C(  816999148), UINT32_C( 1874659226), UINT32_C( -372944690),
-                              UINT32_C(-2106372518), UINT32_C( 2058700648), UINT32_C(-1139745869), UINT32_C( 1001999947)) },
-    { simde_x_mm256_set_epu32(UINT32_C( -964274667), UINT32_C(-1648595768), UINT32_C( 1471545107), UINT32_C(   27318365),
-                              UINT32_C(  260329589), UINT32_C(  227849259), UINT32_C( 1920321444), UINT32_C(-1936907575)),
-      simde_x_mm256_set_epu32(UINT32_C( 1696458631), UINT32_C( 1060012303), UINT32_C( -667172636), UINT32_C(-1886502820),
-                              UINT32_C( 1497838086), UINT32_C(  207594118), UINT32_C( -532680463), UINT32_C( 1866090536)),
-      simde_x_mm256_set_epu32(UINT32_C( 1696458631), UINT32_C( 1060012303), UINT32_C( 1471545107), UINT32_C(   27318365),
-                              UINT32_C(  260329589), UINT32_C(  207594118), UINT32_C( 1920321444), UINT32_C( 1866090536)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1777625938), UINT32_C( -154135345), UINT32_C(  174494711), UINT32_C(-1775989674),
-                              UINT32_C(-1837023557), UINT32_C( 1967600763), UINT32_C( -631660232), UINT32_C(-1996836188)),
-      simde_x_mm256_set_epu32(UINT32_C(  281157684), UINT32_C( 1154116325), UINT32_C(-1602584536), UINT32_C(  765551471),
-                              UINT32_C(  391237042), UINT32_C( -467359652), UINT32_C( 1535761889), UINT32_C(-1668896011)),
-      simde_x_mm256_set_epu32(UINT32_C(  281157684), UINT32_C( 1154116325), UINT32_C(  174494711), UINT32_C(  765551471),
-                              UINT32_C(  391237042), UINT32_C( 1967600763), UINT32_C( 1535761889), UINT32_C(-1996836188)) },
-    { simde_x_mm256_set_epu32(UINT32_C(  256903792), UINT32_C(    8208695), UINT32_C(  591537249), UINT32_C(-1897294295),
-                              UINT32_C( -958845995), UINT32_C( 1843022267), UINT32_C( 1972328098), UINT32_C( -907457076)),
-      simde_x_mm256_set_epu32(UINT32_C( -469518946), UINT32_C( -182686980), UINT32_C(  -65053560), UINT32_C(-1520653503),
-                              UINT32_C( 1810925047), UINT32_C( 1432780821), UINT32_C(-1505705572), UINT32_C(  709984577)),
-      simde_x_mm256_set_epu32(UINT32_C(  256903792), UINT32_C(    8208695), UINT32_C(  591537249), UINT32_C(-1897294295),
-                              UINT32_C( 1810925047), UINT32_C( 1432780821), UINT32_C( 1972328098), UINT32_C(  709984577)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 2107372218), UINT32_C(-1339950000), UINT32_C( -517566965), UINT32_C( 1104805878),
-                              UINT32_C(  321684264), UINT32_C( -268829241), UINT32_C( 2019330239), UINT32_C(  204124644)),
-      simde_x_mm256_set_epu32(UINT32_C(-1246161689), UINT32_C( 1484088388), UINT32_C(  936889694), UINT32_C(  628920619),
-                              UINT32_C( 1829277398), UINT32_C(-2046097299), UINT32_C( -938854488), UINT32_C( 2001330537)),
-      simde_x_mm256_set_epu32(UINT32_C( 2107372218), UINT32_C( 1484088388), UINT32_C(  936889694), UINT32_C(  628920619),
-                              UINT32_C(  321684264), UINT32_C(-2046097299), UINT32_C( 2019330239), UINT32_C(  204124644)) }
+    { simde_x_mm256_set_epu32(UINT32_C(1967336836), UINT32_C(4229175532), UINT32_C(3383228965), UINT32_C( 979444700),
+                              UINT32_C( 138906106), UINT32_C( 146006389), UINT32_C(2494756173), UINT32_C(2105904126)),
+      simde_x_mm256_set_epu32(UINT32_C(2893387611), UINT32_C(2460478173), UINT32_C(3299027518), UINT32_C( 896184310),
+                              UINT32_C(1699227452), UINT32_C(1922730015), UINT32_C(1220777705), UINT32_C( 483108197)),
+      simde_x_mm256_set_epu32(UINT32_C(1967336836), UINT32_C(2460478173), UINT32_C(3299027518), UINT32_C( 896184310),
+                              UINT32_C( 138906106), UINT32_C( 146006389), UINT32_C(1220777705), UINT32_C( 483108197)) },
+    { simde_x_mm256_set_epu32(UINT32_C( 596030925), UINT32_C(1623933296), UINT32_C(4116080130), UINT32_C(2140384796),
+                              UINT32_C(1701047302), UINT32_C( 386212074), UINT32_C( 328829151), UINT32_C(3670208831)),
+      simde_x_mm256_set_epu32(UINT32_C(3740465827), UINT32_C(1627250042), UINT32_C(2355376593), UINT32_C(1538421544),
+                              UINT32_C( 533181547), UINT32_C(1474624420), UINT32_C( 464742120), UINT32_C(1469792713)),
+      simde_x_mm256_set_epu32(UINT32_C( 596030925), UINT32_C(1623933296), UINT32_C(2355376593), UINT32_C(1538421544),
+                              UINT32_C( 533181547), UINT32_C( 386212074), UINT32_C( 328829151), UINT32_C(1469792713)) },
+    { simde_x_mm256_set_epu32(UINT32_C(3969449230), UINT32_C(1656808571), UINT32_C(3512723294), UINT32_C(1894827634),
+                              UINT32_C( 143242580), UINT32_C(1411474427), UINT32_C(1474375050), UINT32_C(2576321811)),
+      simde_x_mm256_set_epu32(UINT32_C(3817774721), UINT32_C(1995677222), UINT32_C(2801037071), UINT32_C(1160265207),
+                              UINT32_C( 536799050), UINT32_C(3040213718), UINT32_C(2960203135), UINT32_C( 387289056)),
+      simde_x_mm256_set_epu32(UINT32_C(3817774721), UINT32_C(1656808571), UINT32_C(2801037071), UINT32_C(1160265207),
+                              UINT32_C( 143242580), UINT32_C(1411474427), UINT32_C(1474375050), UINT32_C( 387289056)) },
+    { simde_x_mm256_set_epu32(UINT32_C(4117731720), UINT32_C(2896203570), UINT32_C(2130869721), UINT32_C( 624464130),
+                              UINT32_C(3920434556), UINT32_C(3882005287), UINT32_C(2912704980), UINT32_C(3353740323)),
+      simde_x_mm256_set_epu32(UINT32_C(1011878374), UINT32_C( 876977997), UINT32_C( 633144937), UINT32_C(2591224872),
+                              UINT32_C(1689064732), UINT32_C(1345088039), UINT32_C(3928457299), UINT32_C(1317789172)),
+      simde_x_mm256_set_epu32(UINT32_C(1011878374), UINT32_C( 876977997), UINT32_C( 633144937), UINT32_C( 624464130),
+                              UINT32_C(1689064732), UINT32_C(1345088039), UINT32_C(2912704980), UINT32_C(1317789172)) },
+    { simde_x_mm256_set_epu32(UINT32_C(1278890315), UINT32_C(3068059236), UINT32_C( 937423722), UINT32_C( 545836753),
+                              UINT32_C(3944086739), UINT32_C(2155957693), UINT32_C( 750306742), UINT32_C( 995938818)),
+      simde_x_mm256_set_epu32(UINT32_C(2300290567), UINT32_C(3884383026), UINT32_C( 682756216), UINT32_C( 434660596),
+                              UINT32_C(3895444851), UINT32_C(3229085704), UINT32_C(3028266335), UINT32_C(2490258842)),
+      simde_x_mm256_set_epu32(UINT32_C(1278890315), UINT32_C(3068059236), UINT32_C( 682756216), UINT32_C( 434660596),
+                              UINT32_C(3895444851), UINT32_C(2155957693), UINT32_C( 750306742), UINT32_C( 995938818)) },
+    { simde_x_mm256_set_epu32(UINT32_C( 953347239), UINT32_C(3233201384), UINT32_C( 883460426), UINT32_C( 630153716),
+                              UINT32_C(2626834474), UINT32_C(4260188706), UINT32_C(4276291548), UINT32_C(1697478493)),
+      simde_x_mm256_set_epu32(UINT32_C(3841797977), UINT32_C( 519016629), UINT32_C(  54139722), UINT32_C(4160085404),
+                              UINT32_C(2354740665), UINT32_C( 224670449), UINT32_C(2606748626), UINT32_C(2604287898)),
+      simde_x_mm256_set_epu32(UINT32_C( 953347239), UINT32_C( 519016629), UINT32_C(  54139722), UINT32_C( 630153716),
+                              UINT32_C(2354740665), UINT32_C( 224670449), UINT32_C(2606748626), UINT32_C(1697478493)) },
+    { simde_x_mm256_set_epu32(UINT32_C(4276795094), UINT32_C(3517498069), UINT32_C(3286132221), UINT32_C(1640896057),
+                              UINT32_C(1497672480), UINT32_C(  40644986), UINT32_C(1824934232), UINT32_C(1194285849)),
+      simde_x_mm256_set_epu32(UINT32_C(1850977199), UINT32_C(1916865152), UINT32_C(2772610612), UINT32_C(2574813520),
+                              UINT32_C(1514631464), UINT32_C(2960447777), UINT32_C(2276426609), UINT32_C(1419491712)),
+      simde_x_mm256_set_epu32(UINT32_C(1850977199), UINT32_C(1916865152), UINT32_C(2772610612), UINT32_C(1640896057),
+                              UINT32_C(1497672480), UINT32_C(  40644986), UINT32_C(1824934232), UINT32_C(1194285849)) },
+    { simde_x_mm256_set_epu32(UINT32_C( 990577222), UINT32_C(1025245358), UINT32_C(2406551265), UINT32_C(4071927667),
+                              UINT32_C(3189139328), UINT32_C(1739830541), UINT32_C(1739044254), UINT32_C( 584922997)),
+      simde_x_mm256_set_epu32(UINT32_C(  72076484), UINT32_C(2950382985), UINT32_C( 915753842), UINT32_C( 355749909),
+                              UINT32_C(3000181234), UINT32_C(3918623971), UINT32_C(3711879869), UINT32_C(3103913192)),
+      simde_x_mm256_set_epu32(UINT32_C(  72076484), UINT32_C(1025245358), UINT32_C( 915753842), UINT32_C( 355749909),
+                              UINT32_C(3000181234), UINT32_C(1739830541), UINT32_C(1739044254), UINT32_C( 584922997)) }
   };
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
@@ -8039,6 +8039,142 @@ test_simde_mm256_unpackhi_epi16(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm256_unpackhi_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi32(INT32_C( 1912850859), INT32_C(  967654585), INT32_C( 1199101495), INT32_C( 1020867807),
+                            INT32_C(-1113017403), INT32_C( 1207205853), INT32_C(-1283015323), INT32_C( -865603422)),
+      simde_mm256_set_epi32(INT32_C(  439671122), INT32_C( -834176430), INT32_C( 1316719462), INT32_C(  794894521),
+                            INT32_C( -364012057), INT32_C(-1283491930), INT32_C( -229222523), INT32_C( -489560867)),
+      simde_mm256_set_epi32(INT32_C(  439671122), INT32_C( 1912850859), INT32_C( -834176430), INT32_C(  967654585),
+                            INT32_C( -364012057), INT32_C(-1113017403), INT32_C(-1283491930), INT32_C( 1207205853)) },
+    { simde_mm256_set_epi32(INT32_C(-1043075301), INT32_C(-1205379203), INT32_C( -623218356), INT32_C(-1709643548),
+                            INT32_C(  -53386540), INT32_C( 1999540953), INT32_C( 2146270924), INT32_C( 1577977486)),
+      simde_mm256_set_epi32(INT32_C( 1416708497), INT32_C( 1977290590), INT32_C( -799631345), INT32_C( -699557932),
+                            INT32_C( -779666453), INT32_C(-1556766589), INT32_C( 1459438848), INT32_C(  777616978)),
+      simde_mm256_set_epi32(INT32_C( 1416708497), INT32_C(-1043075301), INT32_C( 1977290590), INT32_C(-1205379203),
+                            INT32_C( -779666453), INT32_C(  -53386540), INT32_C(-1556766589), INT32_C( 1999540953)) },
+    { simde_mm256_set_epi32(INT32_C(-1262008529), INT32_C(  995722237), INT32_C( 1921619936), INT32_C(-2112826366),
+                            INT32_C( -393450028), INT32_C( 1344594991), INT32_C( 2093675145), INT32_C(  760241232)),
+      simde_mm256_set_epi32(INT32_C( 1015857134), INT32_C( 1981903036), INT32_C( 1927114874), INT32_C( 1164834454),
+                            INT32_C( -157704373), INT32_C(-1887449946), INT32_C(-1985956729), INT32_C(-2099334634)),
+      simde_mm256_set_epi32(INT32_C( 1015857134), INT32_C(-1262008529), INT32_C( 1981903036), INT32_C(  995722237),
+                            INT32_C( -157704373), INT32_C( -393450028), INT32_C(-1887449946), INT32_C( 1344594991)) },
+    { simde_mm256_set_epi32(INT32_C(  212170692), INT32_C(  815869922), INT32_C(   20080222), INT32_C(  -75417640),
+                            INT32_C( -503678651), INT32_C( -834592925), INT32_C( 1707747009), INT32_C( -254760969)),
+      simde_mm256_set_epi32(INT32_C( -574627621), INT32_C( -890978529), INT32_C( 2077265887), INT32_C(  842919754),
+                            INT32_C( -486905662), INT32_C(  988851226), INT32_C(-1518229684), INT32_C( 1816570667)),
+      simde_mm256_set_epi32(INT32_C( -574627621), INT32_C(  212170692), INT32_C( -890978529), INT32_C(  815869922),
+                            INT32_C( -486905662), INT32_C( -503678651), INT32_C(  988851226), INT32_C( -834592925)) },
+    { simde_mm256_set_epi32(INT32_C( 1459349320), INT32_C( -943790006), INT32_C(  424384832), INT32_C(  113065932),
+                            INT32_C(  -80059372), INT32_C(  814075306), INT32_C( 1255708904), INT32_C(  894835823)),
+      simde_mm256_set_epi32(INT32_C( 1159658953), INT32_C(  513900351), INT32_C( 1274799760), INT32_C(  661217108),
+                            INT32_C(-1116902016), INT32_C( 1264134407), INT32_C( 1996134185), INT32_C( -620124201)),
+      simde_mm256_set_epi32(INT32_C( 1159658953), INT32_C( 1459349320), INT32_C(  513900351), INT32_C( -943790006),
+                            INT32_C(-1116902016), INT32_C(  -80059372), INT32_C( 1264134407), INT32_C(  814075306)) },
+    { simde_mm256_set_epi32(INT32_C( 1669821560), INT32_C( -524933447), INT32_C(-1923407638), INT32_C( 1748809176),
+                            INT32_C(  -67073492), INT32_C(-1589224355), INT32_C(-1890395480), INT32_C(  650020033)),
+      simde_mm256_set_epi32(INT32_C( 1449046752), INT32_C(   11693105), INT32_C(  225889559), INT32_C( 1195957602),
+                            INT32_C( -790861669), INT32_C(  894225381), INT32_C( 1788797029), INT32_C( 1410983650)),
+      simde_mm256_set_epi32(INT32_C( 1449046752), INT32_C( 1669821560), INT32_C(   11693105), INT32_C( -524933447),
+                            INT32_C( -790861669), INT32_C(  -67073492), INT32_C(  894225381), INT32_C(-1589224355)) },
+    { simde_mm256_set_epi32(INT32_C(  166836505), INT32_C(   35920603), INT32_C(  209405330), INT32_C(-1870274444),
+                            INT32_C(-2065187438), INT32_C(-1749203354), INT32_C(-1834771489), INT32_C( 1795755804)),
+      simde_mm256_set_epi32(INT32_C(  863607209), INT32_C(  671761907), INT32_C(-1319646828), INT32_C(  236938511),
+                            INT32_C(  277292323), INT32_C(-1226502074), INT32_C( 1319858080), INT32_C( 1103529470)),
+      simde_mm256_set_epi32(INT32_C(  863607209), INT32_C(  166836505), INT32_C(  671761907), INT32_C(   35920603),
+                            INT32_C(  277292323), INT32_C(-2065187438), INT32_C(-1226502074), INT32_C(-1749203354)) },
+    { simde_mm256_set_epi32(INT32_C(  -91786742), INT32_C(-1505313832), INT32_C( 1530067112), INT32_C(-1201437931),
+                            INT32_C(-1252606163), INT32_C( 1723954910), INT32_C(  348258249), INT32_C( -864462904)),
+      simde_mm256_set_epi32(INT32_C(  926816633), INT32_C( -705859720), INT32_C( 1728076763), INT32_C(-1714964607),
+                            INT32_C( -194394697), INT32_C( -494196608), INT32_C( 1804204829), INT32_C(-1267214668)),
+      simde_mm256_set_epi32(INT32_C(  926816633), INT32_C(  -91786742), INT32_C( -705859720), INT32_C(-1505313832),
+                            INT32_C( -194394697), INT32_C(-1252606163), INT32_C( -494196608), INT32_C( 1723954910)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_unpackhi_epi32(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_unpackhi_epi64(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi64x(INT64_C( 8215631882498161849), INT64_C( 5150101706630575327),
+                             INT64_C(-4780373344556646435), INT64_C(-5510508849122512734)),
+      simde_mm256_set_epi64x(INT64_C( 1888373093446416978), INT64_C( 5655267028091609273),
+                             INT64_C(-1563419877153212506), INT64_C( -984503235986201379)),
+      simde_mm256_set_epi64x(INT64_C( 1888373093446416978), INT64_C( 8215631882498161849),
+                             INT64_C(-1563419877153212506), INT64_C(-4780373344556646435)) },
+    { simde_mm256_set_epi64x(INT64_C(-4479974301970768003), INT64_C(-2676702454701561628),
+                             INT64_C( -229293441347054887), INT64_C( 9218163428513678990)),
+      simde_mm256_set_epi64x(INT64_C( 6084716664557604702), INT64_C(-3434390472036083756),
+                             INT64_C(-3348641914685120381), INT64_C( 6268242123449531986)),
+      simde_mm256_set_epi64x(INT64_C( 6084716664557604702), INT64_C(-4479974301970768003),
+                             INT64_C(-3348641914685120381), INT64_C( -229293441347054887)) },
+    { simde_mm256_set_epi64x(INT64_C(-5420285358332345347), INT64_C( 8253294782643753986),
+                             INT64_C(-1689855001525689297), INT64_C( 8992266276983299152)),
+      simde_mm256_set_epi64x(INT64_C( 4363073169920192700), INT64_C( 8276895360629995158),
+                             INT64_C( -677335122063668058), INT64_C(-8529619200130502122)),
+      simde_mm256_set_epi64x(INT64_C( 4363073169920192700), INT64_C(-5420285358332345347),
+                             INT64_C( -677335122063668058), INT64_C(-1689855001525689297)) },
+    { simde_mm256_set_epi64x(INT64_C(  911266184125558754), INT64_C(   86243901005969368),
+                             INT64_C(-2163283330278023325), INT64_C( 7334717557537023991)),
+      simde_mm256_set_epi64x(INT64_C(-2468006836169294049), INT64_C( 8921789050604351306),
+                             INT64_C(-2091243893538378726), INT64_C(-6520746838779843797)),
+      simde_mm256_set_epi64x(INT64_C(-2468006836169294049), INT64_C(  911266184125558754),
+                             INT64_C(-2091243893538378726), INT64_C(-2163283330278023325)) },
+    { simde_mm256_set_epi64x(INT64_C( 6267857606191016010), INT64_C( 1822718974471520204),
+                             INT64_C( -343852383664222806), INT64_C( 5393228676870839407)),
+      simde_mm256_set_epi64x(INT64_C( 4980697278162501439), INT64_C( 5475223278809866068),
+                             INT64_C(-4797057630292334329), INT64_C( 8573331046677456855)),
+      simde_mm256_set_epi64x(INT64_C( 4980697278162501439), INT64_C( 6267857606191016010),
+                             INT64_C(-4797057630292334329), INT64_C( -343852383664222806)) },
+    { simde_mm256_set_epi64x(INT64_C( 7171828994125735609), INT64_C(-8260972900337797672),
+                             INT64_C( -288078451862774691), INT64_C(-8119186762456202047)),
+      simde_mm256_set_epi64x(INT64_C( 6223608410226715697), INT64_C(  970188269608820066),
+                             INT64_C(-3396725003120751643), INT64_C( 7682824740147947234)),
+      simde_mm256_set_epi64x(INT64_C( 6223608410226715697), INT64_C( 7171828994125735609),
+                             INT64_C(-3396725003120751643), INT64_C( -288078451862774691)) },
+    { simde_mm256_set_epi64x(INT64_C(  716557332789861083), INT64_C(  899389046382780532),
+                             INT64_C(-8869912503774263706), INT64_C(-7880283539092467940)),
+      simde_mm256_set_epi64x(INT64_C( 3709164719916598771), INT64_C(-5667839968293198577),
+                             INT64_C( 1190961461785333830), INT64_C( 5668747290064881150)),
+      simde_mm256_set_epi64x(INT64_C( 3709164719916598771), INT64_C(  716557332789861083),
+                             INT64_C( 1190961461785333830), INT64_C(-8869912503774263706)) },
+    { simde_mm256_set_epi64x(INT64_C( -394221052306736168), INT64_C( 6571588209818698517),
+                             INT64_C(-5379902503129090338), INT64_C( 1495757793447729096)),
+      simde_mm256_set_epi64x(INT64_C( 3980647131712941944), INT64_C( 7422033184642545537),
+                             INT64_C( -834918862330058624), INT64_C( 7749000738868025012)),
+      simde_mm256_set_epi64x(INT64_C( 3980647131712941944), INT64_C( -394221052306736168),
+                             INT64_C( -834918862330058624), INT64_C(-5379902503129090338)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_unpackhi_epi64(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm256_xor_si256(const MunitParameter params[], void* data) {
   (void) params;
@@ -8302,54 +8438,54 @@ test_simde_mm256_max_epu32(const MunitParameter params[], void* data) {
     simde__m256i b;
     simde__m256i r;
   } test_vec[8] = {
-    { simde_x_mm256_set_epu32(UINT32_C( 1789296413), UINT32_C(-1030321700), UINT32_C( -395124404), UINT32_C( 1616539383),
-                              UINT32_C( -652488309), UINT32_C(  449218617), UINT32_C(-1851432383), UINT32_C(-1484019778)),
-      simde_x_mm256_set_epu32(UINT32_C(-1866543367), UINT32_C(-1970790635), UINT32_C(  815561745), UINT32_C( 2005519330),
-                              UINT32_C(  -11134399), UINT32_C(-1100361913), UINT32_C( 1735044051), UINT32_C(  -32078364)),
-      simde_x_mm256_set_epu32(UINT32_C(-1866543367), UINT32_C(-1030321700), UINT32_C( -395124404), UINT32_C( 2005519330),
-                              UINT32_C(  -11134399), UINT32_C(-1100361913), UINT32_C(-1851432383), UINT32_C(  -32078364)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 2144707340), UINT32_C(  252486079), UINT32_C( 1391688801), UINT32_C( -448135501),
-                              UINT32_C( -148645134), UINT32_C( -445330965), UINT32_C( 1851297716), UINT32_C( -217759707)),
-      simde_x_mm256_set_epu32(UINT32_C( 1523413770), UINT32_C( 1207768640), UINT32_C(  306982396), UINT32_C( -481827394),
-                              UINT32_C( 1401479563), UINT32_C(-1470970496), UINT32_C(  235708602), UINT32_C( 1837412708)),
-      simde_x_mm256_set_epu32(UINT32_C( 2144707340), UINT32_C( 1207768640), UINT32_C( 1391688801), UINT32_C( -448135501),
-                              UINT32_C( -148645134), UINT32_C( -445330965), UINT32_C( 1851297716), UINT32_C( -217759707)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1081195710), UINT32_C(  556196449), UINT32_C(  904499224), UINT32_C( -311476862),
-                              UINT32_C(-1068051344), UINT32_C(  316495777), UINT32_C( 1987852209), UINT32_C(  -82353218)),
-      simde_x_mm256_set_epu32(UINT32_C( 1251434410), UINT32_C( 1491058763), UINT32_C(   32741970), UINT32_C( 2096924550),
-                              UINT32_C(  992045009), UINT32_C(  951482693), UINT32_C( 1326403571), UINT32_C( -785668634)),
-      simde_x_mm256_set_epu32(UINT32_C(-1081195710), UINT32_C( 1491058763), UINT32_C(  904499224), UINT32_C( -311476862),
-                              UINT32_C(-1068051344), UINT32_C(  951482693), UINT32_C( 1987852209), UINT32_C(  -82353218)) },
-    { simde_x_mm256_set_epu32(UINT32_C(   85161761), UINT32_C( -831060163), UINT32_C(  -47466501), UINT32_C(-1193967773),
-                              UINT32_C( -873716901), UINT32_C(  868999768), UINT32_C( -212766777), UINT32_C( -774920647)),
-      simde_x_mm256_set_epu32(UINT32_C(  488094589), UINT32_C(-1161944830), UINT32_C( -324067336), UINT32_C(  795722459),
-                              UINT32_C(  285945408), UINT32_C( -219645108), UINT32_C( 1108477887), UINT32_C( 1206196184)),
-      simde_x_mm256_set_epu32(UINT32_C(  488094589), UINT32_C( -831060163), UINT32_C(  -47466501), UINT32_C(-1193967773),
-                              UINT32_C( -873716901), UINT32_C( -219645108), UINT32_C( -212766777), UINT32_C( -774920647)) },
-    { simde_x_mm256_set_epu32(UINT32_C( -145586798), UINT32_C( 2043772759), UINT32_C(  282954497), UINT32_C(-1391199635),
-                              UINT32_C(-1974699849), UINT32_C(  376785085), UINT32_C( -495950106), UINT32_C( 1285787501)),
-      simde_x_mm256_set_epu32(UINT32_C( 1298223933), UINT32_C( -246257442), UINT32_C(-1589240302), UINT32_C(-2123797480),
-                              UINT32_C(-2103929130), UINT32_C(-1905518436), UINT32_C( 1471816273), UINT32_C(-1959021648)),
-      simde_x_mm256_set_epu32(UINT32_C( -145586798), UINT32_C( -246257442), UINT32_C(-1589240302), UINT32_C(-1391199635),
-                              UINT32_C(-1974699849), UINT32_C(-1905518436), UINT32_C( -495950106), UINT32_C(-1959021648)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1857605381), UINT32_C(-1139820821), UINT32_C(-2120556261), UINT32_C(  945707022),
-                              UINT32_C(-1819541735), UINT32_C( -865122624), UINT32_C(-1456304785), UINT32_C( 2025802961)),
-      simde_x_mm256_set_epu32(UINT32_C( -506483570), UINT32_C(-2083330411), UINT32_C( 1782301120), UINT32_C( -298692339),
-                              UINT32_C(-1283750394), UINT32_C( 1929029045), UINT32_C( -611912448), UINT32_C(  487591866)),
-      simde_x_mm256_set_epu32(UINT32_C( -506483570), UINT32_C(-1139820821), UINT32_C(-2120556261), UINT32_C( -298692339),
-                              UINT32_C(-1283750394), UINT32_C( -865122624), UINT32_C( -611912448), UINT32_C( 2025802961)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1676495265), UINT32_C(-1239554047), UINT32_C( -227736186), UINT32_C(  441497328),
-                              UINT32_C( -308281524), UINT32_C(-1923017863), UINT32_C( 1763763374), UINT32_C( -249830824)),
-      simde_x_mm256_set_epu32(UINT32_C( 1186988143), UINT32_C( -274704652), UINT32_C( -136552265), UINT32_C( -512830229),
-                              UINT32_C( 1698748935), UINT32_C(  532239246), UINT32_C( -341231509), UINT32_C(  222942642)),
-      simde_x_mm256_set_epu32(UINT32_C(-1676495265), UINT32_C( -274704652), UINT32_C( -136552265), UINT32_C( -512830229),
-                              UINT32_C( -308281524), UINT32_C(-1923017863), UINT32_C( -341231509), UINT32_C( -249830824)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1693096720), UINT32_C(-1850078774), UINT32_C( 1042415884), UINT32_C( -995300065),
-                              UINT32_C( -726650347), UINT32_C( 1364370576), UINT32_C( 1007565517), UINT32_C(   23772290)),
-      simde_x_mm256_set_epu32(UINT32_C(-1218092357), UINT32_C(-1601849960), UINT32_C( 1609800037), UINT32_C(  731809179),
-                              UINT32_C(-1388796435), UINT32_C( -545596065), UINT32_C(-1231479917), UINT32_C(-1348660048)),
-      simde_x_mm256_set_epu32(UINT32_C(-1218092357), UINT32_C(-1601849960), UINT32_C( 1609800037), UINT32_C( -995300065),
-                              UINT32_C( -726650347), UINT32_C( -545596065), UINT32_C(-1231479917), UINT32_C(-1348660048)) }
+    { simde_x_mm256_set_epu32(UINT32_C(3051572045), UINT32_C(3545123096), UINT32_C( 539532434), UINT32_C(2726067579),
+                              UINT32_C(3419329411), UINT32_C(3056421163), UINT32_C(2937475413), UINT32_C( 891304178)),
+      simde_x_mm256_set_epu32(UINT32_C(3684521838), UINT32_C(3664092042), UINT32_C( 648541541), UINT32_C( 272930365),
+                              UINT32_C( 675916127), UINT32_C(1720002587), UINT32_C(1082929595), UINT32_C( 984837149)),
+      simde_x_mm256_set_epu32(UINT32_C(3684521838), UINT32_C(3664092042), UINT32_C( 648541541), UINT32_C(2726067579),
+                              UINT32_C(3419329411), UINT32_C(3056421163), UINT32_C(2937475413), UINT32_C( 984837149)) },
+    { simde_x_mm256_set_epu32(UINT32_C(2114544969), UINT32_C( 863431180), UINT32_C(2929790297), UINT32_C(1508406092),
+                              UINT32_C(1766188399), UINT32_C(3527468216), UINT32_C(1207097510), UINT32_C(3902158588)),
+      simde_x_mm256_set_epu32(UINT32_C(4246012734), UINT32_C(1929345650), UINT32_C(3025365238), UINT32_C(3355971563),
+                              UINT32_C(3310140776), UINT32_C(2715842442), UINT32_C(2891355465), UINT32_C(4115361740)),
+      simde_x_mm256_set_epu32(UINT32_C(4246012734), UINT32_C(1929345650), UINT32_C(3025365238), UINT32_C(3355971563),
+                              UINT32_C(3310140776), UINT32_C(3527468216), UINT32_C(2891355465), UINT32_C(4115361740)) },
+    { simde_x_mm256_set_epu32(UINT32_C(1001663617), UINT32_C(3733133836), UINT32_C(2215256808), UINT32_C(2636746621),
+                              UINT32_C(1004506231), UINT32_C(4292057274), UINT32_C(1794485537), UINT32_C(2483395991)),
+      simde_x_mm256_set_epu32(UINT32_C(3187837407), UINT32_C( 282146620), UINT32_C(3384289392), UINT32_C(3287894196),
+                              UINT32_C(1516921107), UINT32_C( 874312086), UINT32_C(1365725481), UINT32_C(2010925515)),
+      simde_x_mm256_set_epu32(UINT32_C(3187837407), UINT32_C(3733133836), UINT32_C(3384289392), UINT32_C(3287894196),
+                              UINT32_C(1516921107), UINT32_C(4292057274), UINT32_C(1794485537), UINT32_C(2483395991)) },
+    { simde_x_mm256_set_epu32(UINT32_C(1825864990), UINT32_C(3651632677), UINT32_C(1848482644), UINT32_C(4217806782),
+                              UINT32_C(3557654096), UINT32_C( 849332445), UINT32_C( 114697269), UINT32_C( 674786807)),
+      simde_x_mm256_set_epu32(UINT32_C(1779504954), UINT32_C(1699927403), UINT32_C(2934040606), UINT32_C(4177282687),
+                              UINT32_C(3450483742), UINT32_C(3231995683), UINT32_C(2093833738), UINT32_C( 336221368)),
+      simde_x_mm256_set_epu32(UINT32_C(1825864990), UINT32_C(3651632677), UINT32_C(2934040606), UINT32_C(4217806782),
+                              UINT32_C(3557654096), UINT32_C(3231995683), UINT32_C(2093833738), UINT32_C( 674786807)) },
+    { simde_x_mm256_set_epu32(UINT32_C( 837260657), UINT32_C(3220613586), UINT32_C(2642631733), UINT32_C(2252900450),
+                              UINT32_C(3061609531), UINT32_C(2252923032), UINT32_C(1030296341), UINT32_C(2720361240)),
+      simde_x_mm256_set_epu32(UINT32_C( 273034038), UINT32_C(2374878315), UINT32_C( 248014486), UINT32_C( 363234795),
+                              UINT32_C(2126760034), UINT32_C(4029819680), UINT32_C(2144543040), UINT32_C(4227450764)),
+      simde_x_mm256_set_epu32(UINT32_C( 837260657), UINT32_C(3220613586), UINT32_C(2642631733), UINT32_C(2252900450),
+                              UINT32_C(3061609531), UINT32_C(4029819680), UINT32_C(2144543040), UINT32_C(4227450764)) },
+    { simde_x_mm256_set_epu32(UINT32_C(1832345572), UINT32_C(3528531140), UINT32_C(1174695155), UINT32_C( 287919562),
+                              UINT32_C( 792094828), UINT32_C(2183085761), UINT32_C(4262203031), UINT32_C(3585219150)),
+      simde_x_mm256_set_epu32(UINT32_C( 890341915), UINT32_C(2131853812), UINT32_C(2693768197), UINT32_C(1107526035),
+                              UINT32_C(3439504205), UINT32_C( 751425977), UINT32_C(3946340711), UINT32_C(2999018213)),
+      simde_x_mm256_set_epu32(UINT32_C(1832345572), UINT32_C(3528531140), UINT32_C(2693768197), UINT32_C(1107526035),
+                              UINT32_C(3439504205), UINT32_C(2183085761), UINT32_C(4262203031), UINT32_C(3585219150)) },
+    { simde_x_mm256_set_epu32(UINT32_C(3937957510), UINT32_C(1450021822), UINT32_C(1539281783), UINT32_C( 551303701),
+                              UINT32_C(1703578262), UINT32_C(2645712321), UINT32_C(2567475981), UINT32_C(3368049591)),
+      simde_x_mm256_set_epu32(UINT32_C(3778291576), UINT32_C(3382324216), UINT32_C(1328766962), UINT32_C(4275285368),
+                              UINT32_C( 709773804), UINT32_C(1207236213), UINT32_C(1754387913), UINT32_C(3060418664)),
+      simde_x_mm256_set_epu32(UINT32_C(3937957510), UINT32_C(3382324216), UINT32_C(1539281783), UINT32_C(4275285368),
+                              UINT32_C(1703578262), UINT32_C(2645712321), UINT32_C(2567475981), UINT32_C(3368049591)) },
+    { simde_x_mm256_set_epu32(UINT32_C(1036418017), UINT32_C(1863476858), UINT32_C(1083883567), UINT32_C(3943520867),
+                              UINT32_C(  70129580), UINT32_C(1560905984), UINT32_C(2897296070), UINT32_C(2575520666)),
+      simde_x_mm256_set_epu32(UINT32_C(2683682856), UINT32_C( 255193154), UINT32_C( 693351345), UINT32_C(1449340919),
+                              UINT32_C(2448587623), UINT32_C(2723008994), UINT32_C(2741828276), UINT32_C(3254255853)),
+      simde_x_mm256_set_epu32(UINT32_C(2683682856), UINT32_C(1863476858), UINT32_C(1083883567), UINT32_C(3943520867),
+                              UINT32_C(2448587623), UINT32_C(2723008994), UINT32_C(2897296070), UINT32_C(3254255853)) }
   };
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
@@ -8512,6 +8648,8 @@ static MunitTest test_suite_tests[] = {
 
   SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi8),
   SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi16),
+  SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi32),
+  SIMDE_TESTS_DEFINE_TEST(mm256_unpackhi_epi64),
 
   SIMDE_TESTS_DEFINE_TEST(mm256_xor_si256),
 #endif /* defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */


=====================================
test/x86/avx512bw.c
=====================================
@@ -1491,6 +1491,694 @@ test_simde_mm512_adds_epu16(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm512_shuffle_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i a;
+    simde__m512i b;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm512_set_epi8(INT8_C(  56), INT8_C( -94), INT8_C( -41), INT8_C( -59),
+                           INT8_C(  40), INT8_C(  78), INT8_C(  93), INT8_C( 107),
+                           INT8_C( -47), INT8_C(  66), INT8_C(  -8), INT8_C( -52),
+                           INT8_C(  50), INT8_C(  78), INT8_C(  13), INT8_C(  26),
+                           INT8_C( -33), INT8_C( 120), INT8_C(  67), INT8_C( -12),
+                           INT8_C(  80), INT8_C(  88), INT8_C( -65), INT8_C(  49),
+                           INT8_C(  28), INT8_C(  96), INT8_C( -99), INT8_C(-119),
+                           INT8_C( 126), INT8_C(  24), INT8_C(  35), INT8_C(  11),
+                           INT8_C( -43), INT8_C( -24), INT8_C(   1), INT8_C( -61),
+                           INT8_C( 101), INT8_C(   5), INT8_C(  67), INT8_C( -42),
+                           INT8_C( -46), INT8_C(-115), INT8_C(-105), INT8_C( -92),
+                           INT8_C(  -3), INT8_C( -44), INT8_C(  13), INT8_C( -36),
+                           INT8_C( 110), INT8_C( -38), INT8_C( -48), INT8_C(  36),
+                           INT8_C( 117), INT8_C( -59), INT8_C( 109), INT8_C( -27),
+                           INT8_C( -62), INT8_C(  98), INT8_C(-121), INT8_C(-108),
+                           INT8_C(  -7), INT8_C(-112), INT8_C( -52), INT8_C( -84)),
+      simde_mm512_set_epi8(INT8_C(  34), INT8_C(  34), INT8_C( -71), INT8_C( -63),
+                           INT8_C( -34), INT8_C(-128), INT8_C( 113), INT8_C( -72),
+                           INT8_C(  86), INT8_C( -73), INT8_C( -43), INT8_C(  95),
+                           INT8_C(   3), INT8_C(  13), INT8_C(  19), INT8_C(  -1),
+                           INT8_C( -54), INT8_C( 114), INT8_C(  27), INT8_C( -30),
+                           INT8_C(  52), INT8_C( -54), INT8_C(  78), INT8_C(  23),
+                           INT8_C(  71), INT8_C(  87), INT8_C( 107), INT8_C( -27),
+                           INT8_C( -35), INT8_C(-122), INT8_C(  40), INT8_C(  55),
+                           INT8_C(  -2), INT8_C(  40), INT8_C( -63), INT8_C(  85),
+                           INT8_C( -25), INT8_C( -93), INT8_C(  25), INT8_C(  70),
+                           INT8_C( 117), INT8_C(  66), INT8_C( -79), INT8_C(  57),
+                           INT8_C(-115), INT8_C( -27), INT8_C(  59), INT8_C(  25),
+                           INT8_C(  -6), INT8_C(  42), INT8_C( -27), INT8_C( -34),
+                           INT8_C( -16), INT8_C(   5), INT8_C( -55), INT8_C(  74),
+                           INT8_C( -29), INT8_C(  77), INT8_C( -17), INT8_C(  16),
+                           INT8_C( -98), INT8_C( -38), INT8_C( 116), INT8_C( -56)),
+      simde_mm512_set_epi8(INT8_C(  78), INT8_C(  78), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  13), INT8_C(   0),
+                           INT8_C(  66), INT8_C(   0), INT8_C(   0), INT8_C(  56),
+                           INT8_C(  50), INT8_C( -41), INT8_C(  50), INT8_C(   0),
+                           INT8_C(   0), INT8_C(  24), INT8_C(  80), INT8_C(   0),
+                           INT8_C(-119), INT8_C(   0), INT8_C( 120), INT8_C(  28),
+                           INT8_C(  28), INT8_C(  28), INT8_C(  80), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  49), INT8_C(  28),
+                           INT8_C(   0), INT8_C( -42), INT8_C(   0), INT8_C(-105),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  67), INT8_C(-115),
+                           INT8_C(-105), INT8_C( -44), INT8_C(   0), INT8_C(  67),
+                           INT8_C(   0), INT8_C(   0), INT8_C( 101), INT8_C(  67),
+                           INT8_C(   0), INT8_C( -59), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(-121), INT8_C(   0), INT8_C( -59),
+                           INT8_C(   0), INT8_C( -48), INT8_C(   0), INT8_C( -84),
+                           INT8_C(   0), INT8_C(   0), INT8_C(-108), INT8_C(   0)) },
+    { simde_mm512_set_epi8(INT8_C( -75), INT8_C(-123), INT8_C( -44), INT8_C( -97),
+                           INT8_C(  61), INT8_C(-112), INT8_C( 109), INT8_C(  27),
+                           INT8_C(  -9), INT8_C( -31), INT8_C(-111), INT8_C( -59),
+                           INT8_C(  94), INT8_C( 110), INT8_C(  31), INT8_C(  50),
+                           INT8_C( -86), INT8_C(  24), INT8_C(  91), INT8_C(-105),
+                           INT8_C(  39), INT8_C(  25), INT8_C( -65), INT8_C(  69),
+                           INT8_C( -59), INT8_C( -57), INT8_C( -98), INT8_C(  26),
+                           INT8_C( -35), INT8_C(  -2), INT8_C( 114), INT8_C( -51),
+                           INT8_C( -16), INT8_C( 101), INT8_C( -92), INT8_C(  65),
+                           INT8_C( -88), INT8_C(  19), INT8_C(-102), INT8_C( -51),
+                           INT8_C( -94), INT8_C( -30), INT8_C(  60), INT8_C(  -9),
+                           INT8_C(   4), INT8_C( -93), INT8_C( 110), INT8_C(-102),
+                           INT8_C( 109), INT8_C(   1), INT8_C( 113), INT8_C( 114),
+                           INT8_C( 100), INT8_C(  80), INT8_C(  18), INT8_C(  -7),
+                           INT8_C( -87), INT8_C(   0), INT8_C( -55), INT8_C(  37),
+                           INT8_C(  13), INT8_C(  86), INT8_C(-114), INT8_C(-125)),
+      simde_mm512_set_epi8(INT8_C(  18), INT8_C(  11), INT8_C(  73), INT8_C(  57),
+                           INT8_C( -54), INT8_C(   9), INT8_C(  69), INT8_C( -45),
+                           INT8_C(  71), INT8_C(  -6), INT8_C( -68), INT8_C(  -8),
+                           INT8_C(  14), INT8_C( -27), INT8_C(  84), INT8_C( -90),
+                           INT8_C( -10), INT8_C( -81), INT8_C(  77), INT8_C(-113),
+                           INT8_C(  32), INT8_C(  17), INT8_C(  62), INT8_C( -18),
+                           INT8_C( 124), INT8_C( -42), INT8_C( -52), INT8_C(-109),
+                           INT8_C( -19), INT8_C(  30), INT8_C( -51), INT8_C(-111),
+                           INT8_C( 100), INT8_C(  63), INT8_C( -80), INT8_C(-126),
+                           INT8_C(   3), INT8_C( 119), INT8_C(  38), INT8_C( -44),
+                           INT8_C( -48), INT8_C( -43), INT8_C(   6), INT8_C(-113),
+                           INT8_C(  -5), INT8_C( -18), INT8_C(  58), INT8_C(  77),
+                           INT8_C( -49), INT8_C(-107), INT8_C( -77), INT8_C( 119),
+                           INT8_C(  -6), INT8_C(  92), INT8_C(-122), INT8_C(  43),
+                           INT8_C(  85), INT8_C(-111), INT8_C( -15), INT8_C(  90),
+                           INT8_C(-104), INT8_C(  31), INT8_C(  -4), INT8_C(  57)),
+      simde_mm512_set_epi8(INT8_C( 110), INT8_C(  61), INT8_C( 109), INT8_C( 109),
+                           INT8_C(   0), INT8_C( 109), INT8_C(-111), INT8_C(   0),
+                           INT8_C(  -9), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(-123), INT8_C(   0), INT8_C( -59), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  91), INT8_C(   0),
+                           INT8_C( -51), INT8_C( 114), INT8_C(  24), INT8_C(   0),
+                           INT8_C(-105), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(  24), INT8_C(   0), INT8_C(   0),
+                           INT8_C(  -9), INT8_C( -16), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   4), INT8_C( -94), INT8_C( -30), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C( -30), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  19), INT8_C( -92),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C( -87),
+                           INT8_C(   0), INT8_C( 114), INT8_C(   0), INT8_C( 100),
+                           INT8_C( -55), INT8_C(   0), INT8_C(   0), INT8_C(  80),
+                           INT8_C(   0), INT8_C( 109), INT8_C(   0), INT8_C(  18)) },
+    { simde_mm512_set_epi8(INT8_C( -54), INT8_C(  53), INT8_C(  35), INT8_C(  51),
+                           INT8_C(  24), INT8_C( 100), INT8_C(  80), INT8_C(   7),
+                           INT8_C(  54), INT8_C( 114), INT8_C( -42), INT8_C(  -2),
+                           INT8_C(  18), INT8_C( 100), INT8_C(  26), INT8_C( -68),
+                           INT8_C(  82), INT8_C( -30), INT8_C(  -4), INT8_C( 110),
+                           INT8_C(  24), INT8_C(  88), INT8_C(-100), INT8_C(-120),
+                           INT8_C( -17), INT8_C(-109), INT8_C(  92), INT8_C(-105),
+                           INT8_C( -20), INT8_C(  34), INT8_C(  98), INT8_C( -17),
+                           INT8_C(   2), INT8_C(   6), INT8_C(  37), INT8_C( -85),
+                           INT8_C( 118), INT8_C(  28), INT8_C(  34), INT8_C( 117),
+                           INT8_C(  77), INT8_C( -32), INT8_C(  25), INT8_C( -74),
+                           INT8_C(  93), INT8_C(-108), INT8_C(   3), INT8_C( -90),
+                           INT8_C(  13), INT8_C( -29), INT8_C( -88), INT8_C( 116),
+                           INT8_C(  14), INT8_C( -29), INT8_C(  30), INT8_C( -26),
+                           INT8_C(-115), INT8_C( -36), INT8_C(  94), INT8_C(-125),
+                           INT8_C( 118), INT8_C( 125), INT8_C( -91), INT8_C(  31)),
+      simde_mm512_set_epi8(INT8_C( -74), INT8_C( -13), INT8_C( -87), INT8_C( -10),
+                           INT8_C(  21), INT8_C(  49), INT8_C( -64), INT8_C(  52),
+                           INT8_C( -97), INT8_C(-103), INT8_C(-127), INT8_C(  53),
+                           INT8_C(  99), INT8_C(  80), INT8_C(  55), INT8_C( 111),
+                           INT8_C(  61), INT8_C( -93), INT8_C( -87), INT8_C(  81),
+                           INT8_C( 108), INT8_C(  89), INT8_C(  63), INT8_C(-109),
+                           INT8_C( -47), INT8_C( -32), INT8_C( 105), INT8_C(  91),
+                           INT8_C(  88), INT8_C(  85), INT8_C( -96), INT8_C(  88),
+                           INT8_C( -85), INT8_C(   3), INT8_C( 124), INT8_C( -33),
+                           INT8_C( -21), INT8_C(  -7), INT8_C( -31), INT8_C(-126),
+                           INT8_C(   3), INT8_C( -17), INT8_C(  40), INT8_C(   5),
+                           INT8_C( 126), INT8_C( -60), INT8_C( -91), INT8_C(-112),
+                           INT8_C(  88), INT8_C(  11), INT8_C( 100), INT8_C( 114),
+                           INT8_C( 112), INT8_C( -53), INT8_C(  89), INT8_C(  78),
+                           INT8_C( 115), INT8_C( 117), INT8_C( -33), INT8_C(  14),
+                           INT8_C( -42), INT8_C( -15), INT8_C(  80), INT8_C( -46)),
+      simde_mm512_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C( -42), INT8_C(  26), INT8_C(   0), INT8_C(  -2),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C( -42),
+                           INT8_C(  18), INT8_C( -68), INT8_C(  54), INT8_C( -54),
+                           INT8_C(  -4), INT8_C(   0), INT8_C(   0), INT8_C(  98),
+                           INT8_C( 110), INT8_C(-100), INT8_C(  82), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(-100), INT8_C(  24),
+                           INT8_C(-120), INT8_C(  92), INT8_C(   0), INT8_C(-120),
+                           INT8_C(   0), INT8_C(  93), INT8_C( -85), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(  93), INT8_C(   0), INT8_C( 117), INT8_C(  25),
+                           INT8_C(   6), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C( -26), INT8_C(  14), INT8_C(-125), INT8_C( 125),
+                           INT8_C(  31), INT8_C(   0), INT8_C(  30), INT8_C( -29),
+                           INT8_C( 118), INT8_C(  94), INT8_C(   0), INT8_C( -29),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  31), INT8_C(   0)) },
+    { simde_mm512_set_epi8(INT8_C(-109), INT8_C(  14), INT8_C( -91), INT8_C( -44),
+                           INT8_C(  79), INT8_C(  -4), INT8_C(  88), INT8_C(  49),
+                           INT8_C( -95), INT8_C( -92), INT8_C(-116), INT8_C(   8),
+                           INT8_C(  25), INT8_C(  30), INT8_C( -55), INT8_C(-120),
+                           INT8_C( -58), INT8_C( -84), INT8_C(  99), INT8_C( -92),
+                           INT8_C( -29), INT8_C( -47), INT8_C(  42), INT8_C( -56),
+                           INT8_C( 109), INT8_C( -71), INT8_C( -73), INT8_C( -30),
+                           INT8_C( -15), INT8_C(-106), INT8_C( -91), INT8_C( -57),
+                           INT8_C(  51), INT8_C(  -8), INT8_C(  55), INT8_C(  36),
+                           INT8_C( -55), INT8_C(-103), INT8_C(  51), INT8_C( -28),
+                           INT8_C(  70), INT8_C( 114), INT8_C(  -3), INT8_C(  49),
+                           INT8_C(  23), INT8_C( -28), INT8_C( -87), INT8_C(  57),
+                           INT8_C(  28), INT8_C( -12), INT8_C( -51), INT8_C(  37),
+                           INT8_C(  29), INT8_C( -58), INT8_C( -69), INT8_C(  30),
+                           INT8_C(  43), INT8_C( -65), INT8_C(  -2), INT8_C( -83),
+                           INT8_C( -27), INT8_C(-122), INT8_C(  77), INT8_C(  47)),
+      simde_mm512_set_epi8(INT8_C(-116), INT8_C(-127), INT8_C( -95), INT8_C( -97),
+                           INT8_C( -78), INT8_C( -53), INT8_C( -36), INT8_C(  62),
+                           INT8_C(  63), INT8_C(  28), INT8_C( -37), INT8_C(  40),
+                           INT8_C( -14), INT8_C(   2), INT8_C( -65), INT8_C( -67),
+                           INT8_C(  79), INT8_C(  51), INT8_C( -51), INT8_C( -64),
+                           INT8_C( -40), INT8_C(-102), INT8_C(-107), INT8_C(  82),
+                           INT8_C(  20), INT8_C(  54), INT8_C(  66), INT8_C(   7),
+                           INT8_C(  33), INT8_C( 108), INT8_C( -88), INT8_C(  69),
+                           INT8_C(  98), INT8_C(  43), INT8_C(  25), INT8_C(  58),
+                           INT8_C( 117), INT8_C( -34), INT8_C( -70), INT8_C(   3),
+                           INT8_C(   1), INT8_C( 100), INT8_C(   3), INT8_C(  14),
+                           INT8_C( 117), INT8_C( -91), INT8_C( -89), INT8_C(  66),
+                           INT8_C(  32), INT8_C( -35), INT8_C( 104), INT8_C(  65),
+                           INT8_C(-100), INT8_C(  27), INT8_C(  42), INT8_C(  91),
+                           INT8_C( -47), INT8_C(  39), INT8_C( -65), INT8_C(-124),
+                           INT8_C( -84), INT8_C( -95), INT8_C( -46), INT8_C(-113)),
+      simde_mm512_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  14),
+                           INT8_C(-109), INT8_C( -44), INT8_C(   0), INT8_C(  49),
+                           INT8_C(   0), INT8_C(  30), INT8_C(   0), INT8_C(   0),
+                           INT8_C( -58), INT8_C( -15), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(-106),
+                           INT8_C( -30), INT8_C( -71), INT8_C(-106), INT8_C( 109),
+                           INT8_C( -91), INT8_C( -92), INT8_C(   0), INT8_C( -73),
+                           INT8_C( -28), INT8_C( -55), INT8_C(  51), INT8_C(-103),
+                           INT8_C(  -3), INT8_C(   0), INT8_C(   0), INT8_C(  23),
+                           INT8_C( -87), INT8_C(  49), INT8_C(  23), INT8_C(  -8),
+                           INT8_C(  -3), INT8_C(   0), INT8_C(   0), INT8_C( -28),
+                           INT8_C(  47), INT8_C(   0), INT8_C(  30), INT8_C(  77),
+                           INT8_C(   0), INT8_C(  29), INT8_C( -58), INT8_C(  29),
+                           INT8_C(   0), INT8_C(  43), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
+    { simde_mm512_set_epi8(INT8_C(-119), INT8_C(  55), INT8_C( -13), INT8_C(-118),
+                           INT8_C(   5), INT8_C(  14), INT8_C(-103), INT8_C(  58),
+                           INT8_C( 109), INT8_C(-113), INT8_C( -44), INT8_C( 126),
+                           INT8_C(  70), INT8_C(-126), INT8_C(  24), INT8_C(-117),
+                           INT8_C(  80), INT8_C(  25), INT8_C(  64), INT8_C( -89),
+                           INT8_C(  60), INT8_C(   5), INT8_C(-119), INT8_C( -29),
+                           INT8_C( -95), INT8_C( -26), INT8_C(  25), INT8_C( -65),
+                           INT8_C( -81), INT8_C( -84), INT8_C(  46), INT8_C(  62),
+                           INT8_C( -25), INT8_C(-116), INT8_C(-126), INT8_C(-103),
+                           INT8_C( 126), INT8_C( -39), INT8_C( 111), INT8_C( -10),
+                           INT8_C( -48), INT8_C( 122), INT8_C( -86), INT8_C( -25),
+                           INT8_C(  35), INT8_C(  -4), INT8_C(  -7), INT8_C(  76),
+                           INT8_C(-116), INT8_C(   1), INT8_C( 126), INT8_C( -30),
+                           INT8_C( -11), INT8_C(  82), INT8_C(  81), INT8_C(  27),
+                           INT8_C( -15), INT8_C( -14), INT8_C(  56), INT8_C(  60),
+                           INT8_C( -29), INT8_C( -65), INT8_C( -35), INT8_C(  21)),
+      simde_mm512_set_epi8(INT8_C( 125), INT8_C( -43), INT8_C( -42), INT8_C( -51),
+                           INT8_C( -36), INT8_C(  38), INT8_C(  42), INT8_C(  18),
+                           INT8_C(  24), INT8_C( -25), INT8_C(  19), INT8_C(  15),
+                           INT8_C(-119), INT8_C(-124), INT8_C( 104), INT8_C( -93),
+                           INT8_C(  22), INT8_C( -16), INT8_C(  31), INT8_C( 116),
+                           INT8_C( -99), INT8_C(  45), INT8_C( -54), INT8_C(  50),
+                           INT8_C( -72), INT8_C(-107), INT8_C(  48), INT8_C( 118),
+                           INT8_C( -68), INT8_C( -15), INT8_C(-108), INT8_C( -29),
+                           INT8_C( 118), INT8_C(  96), INT8_C( -51), INT8_C( 107),
+                           INT8_C( -23), INT8_C( -97), INT8_C(  -6), INT8_C(  91),
+                           INT8_C( -57), INT8_C( -67), INT8_C(-105), INT8_C(  68),
+                           INT8_C( -50), INT8_C(  16), INT8_C( -86), INT8_C(   6),
+                           INT8_C(  55), INT8_C(-118), INT8_C( 112), INT8_C( 125),
+                           INT8_C(  92), INT8_C(  91), INT8_C( -27), INT8_C(  53),
+                           INT8_C(  28), INT8_C( -66), INT8_C( -69), INT8_C(-117),
+                           INT8_C( -30), INT8_C(-106), INT8_C(   1), INT8_C(  95)),
+      simde_mm512_set_epi8(INT8_C( -13), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(-113), INT8_C(  14), INT8_C(-126),
+                           INT8_C(  58), INT8_C(   0), INT8_C(  70), INT8_C(-119),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  58), INT8_C(   0),
+                           INT8_C( -26), INT8_C(   0), INT8_C(  80), INT8_C( -65),
+                           INT8_C(   0), INT8_C(  64), INT8_C(   0), INT8_C( -84),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  62), INT8_C( -26),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C( 122), INT8_C(  76), INT8_C(   0), INT8_C( 126),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C( 126),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C( -25),
+                           INT8_C(   0), INT8_C(  76), INT8_C(   0), INT8_C( 122),
+                           INT8_C( -15), INT8_C(   0), INT8_C(  21), INT8_C( 126),
+                           INT8_C( -30), INT8_C( -11), INT8_C(   0), INT8_C(  56),
+                           INT8_C( -30), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C( -35), INT8_C(-116)) },
+    { simde_mm512_set_epi8(INT8_C(  18), INT8_C(-124), INT8_C(  -6), INT8_C( -41),
+                           INT8_C(-109), INT8_C(  19), INT8_C(  47), INT8_C(  73),
+                           INT8_C( -58), INT8_C( -50), INT8_C(  73), INT8_C( 110),
+                           INT8_C(  56), INT8_C( -30), INT8_C( -79), INT8_C( 123),
+                           INT8_C(  77), INT8_C(  62), INT8_C( -28), INT8_C(  45),
+                           INT8_C( -95), INT8_C(  12), INT8_C(  53), INT8_C(  75),
+                           INT8_C(  66), INT8_C( -89), INT8_C(  46), INT8_C( -47),
+                           INT8_C(   6), INT8_C(   4), INT8_C( 112), INT8_C( -34),
+                           INT8_C( 121), INT8_C(  -3), INT8_C(-102), INT8_C(-115),
+                           INT8_C( -12), INT8_C(  80), INT8_C(   3), INT8_C(  56),
+                           INT8_C(  85), INT8_C( -43), INT8_C( -93), INT8_C(  34),
+                           INT8_C( -58), INT8_C(-101), INT8_C(  51), INT8_C(   1),
+                           INT8_C( -18), INT8_C( -90), INT8_C( -71), INT8_C(  74),
+                           INT8_C(  -5), INT8_C(-113), INT8_C(  71), INT8_C(  61),
+                           INT8_C(  92), INT8_C(  73), INT8_C( 116), INT8_C( 110),
+                           INT8_C(-117), INT8_C(  96), INT8_C( -77), INT8_C(  45)),
+      simde_mm512_set_epi8(INT8_C(  52), INT8_C( -84), INT8_C( 103), INT8_C(-105),
+                           INT8_C( -57), INT8_C(  17), INT8_C(-110), INT8_C(-113),
+                           INT8_C(  56), INT8_C( 125), INT8_C(  56), INT8_C( -88),
+                           INT8_C( -31), INT8_C( 119), INT8_C( -86), INT8_C( 114),
+                           INT8_C( -85), INT8_C(  14), INT8_C(  60), INT8_C(  84),
+                           INT8_C( -61), INT8_C(-111), INT8_C(  74), INT8_C( 113),
+                           INT8_C(  13), INT8_C( -89), INT8_C(-114), INT8_C( -79),
+                           INT8_C( -34), INT8_C( -73), INT8_C(  -9), INT8_C(-108),
+                           INT8_C(  93), INT8_C( -78), INT8_C(  -1), INT8_C(-102),
+                           INT8_C(  12), INT8_C( 117), INT8_C(  80), INT8_C(  44),
+                           INT8_C(  29), INT8_C(  18), INT8_C(  91), INT8_C( -23),
+                           INT8_C( 110), INT8_C(-126), INT8_C( -71), INT8_C(  65),
+                           INT8_C( -76), INT8_C(  85), INT8_C(  93), INT8_C( -86),
+                           INT8_C(-123), INT8_C(-115), INT8_C(-105), INT8_C(  75),
+                           INT8_C(  45), INT8_C(  22), INT8_C(  73), INT8_C(  74),
+                           INT8_C( 107), INT8_C( -60), INT8_C( -28), INT8_C( -57)),
+      simde_mm512_set_epi8(INT8_C( 110), INT8_C(   0), INT8_C( -58), INT8_C(   0),
+                           INT8_C(   0), INT8_C( -79), INT8_C(   0), INT8_C(   0),
+                           INT8_C(  73), INT8_C(  -6), INT8_C(  73), INT8_C(   0),
+                           INT8_C(   0), INT8_C( -58), INT8_C(   0), INT8_C( -30),
+                           INT8_C(   0), INT8_C(  62), INT8_C(  45), INT8_C( -47),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  12), INT8_C( 112),
+                           INT8_C( -28), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(-102), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(-115), INT8_C( -93), INT8_C(   1), INT8_C(-115),
+                           INT8_C(-102), INT8_C(-101), INT8_C( -12), INT8_C(   0),
+                           INT8_C(  -3), INT8_C(   0), INT8_C(   0), INT8_C(  51),
+                           INT8_C(   0), INT8_C( 116), INT8_C( -71), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -5),
+                           INT8_C( -71), INT8_C(  73), INT8_C(  71), INT8_C(-113),
+                           INT8_C(  -5), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
+    { simde_mm512_set_epi8(INT8_C(   4), INT8_C( -30), INT8_C( -68), INT8_C(  57),
+                           INT8_C(  73), INT8_C(  25), INT8_C( -45), INT8_C( 112),
+                           INT8_C(  41), INT8_C(  41), INT8_C( -56), INT8_C( -93),
+                           INT8_C(  29), INT8_C(  99), INT8_C(  15), INT8_C( 122),
+                           INT8_C(  74), INT8_C(   0), INT8_C(  89), INT8_C(  61),
+                           INT8_C( 124), INT8_C(  36), INT8_C( -87), INT8_C( -25),
+                           INT8_C(  42), INT8_C(-125), INT8_C( -87), INT8_C(  69),
+                           INT8_C(  81), INT8_C(   4), INT8_C(  55), INT8_C( 107),
+                           INT8_C(   0), INT8_C(  -7), INT8_C(  37), INT8_C( -15),
+                           INT8_C( 117), INT8_C(   9), INT8_C( -27), INT8_C(  11),
+                           INT8_C(  16), INT8_C(  87), INT8_C( 119), INT8_C(-107),
+                           INT8_C( -22), INT8_C(  27), INT8_C( -86), INT8_C( -63),
+                           INT8_C(  90), INT8_C( -44), INT8_C(-113), INT8_C(-126),
+                           INT8_C( -75), INT8_C(  67), INT8_C(  88), INT8_C( 105),
+                           INT8_C(  80), INT8_C(   7), INT8_C(  66), INT8_C( 126),
+                           INT8_C(  17), INT8_C( -20), INT8_C( -91), INT8_C( -99)),
+      simde_mm512_set_epi8(INT8_C( 123), INT8_C( 117), INT8_C(  18), INT8_C(   2),
+                           INT8_C(-105), INT8_C( -51), INT8_C(  -9), INT8_C(  72),
+                           INT8_C(  69), INT8_C(-116), INT8_C(  95), INT8_C(-110),
+                           INT8_C( -32), INT8_C( 110), INT8_C(  -8), INT8_C(  85),
+                           INT8_C( -41), INT8_C(  80), INT8_C( -44), INT8_C( 110),
+                           INT8_C( -28), INT8_C( 116), INT8_C(  41), INT8_C( -31),
+                           INT8_C(  12), INT8_C(  40), INT8_C(   5), INT8_C(   1),
+                           INT8_C( -14), INT8_C(  66), INT8_C( -95), INT8_C( 106),
+                           INT8_C(  66), INT8_C( -91), INT8_C( -20), INT8_C(-109),
+                           INT8_C( -46), INT8_C(  41), INT8_C(  82), INT8_C( -61),
+                           INT8_C(   2), INT8_C(   3), INT8_C(  62), INT8_C( -90),
+                           INT8_C(  99), INT8_C( -54), INT8_C( -28), INT8_C( -21),
+                           INT8_C( 126), INT8_C( 115), INT8_C( -46), INT8_C(  76),
+                           INT8_C( -43), INT8_C(  46), INT8_C( 107), INT8_C(  75),
+                           INT8_C( -93), INT8_C(  67), INT8_C( -37), INT8_C(   4),
+                           INT8_C(-127), INT8_C( -50), INT8_C( -26), INT8_C(  99)),
+      simde_mm512_set_epi8(INT8_C(  73), INT8_C( -56), INT8_C(  99), INT8_C(  99),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C( 112),
+                           INT8_C( -56), INT8_C(   0), INT8_C(   4), INT8_C(   0),
+                           INT8_C(   0), INT8_C( -30), INT8_C(   0), INT8_C( -56),
+                           INT8_C(   0), INT8_C( 107), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(  69), INT8_C( -87), INT8_C(   0),
+                           INT8_C(  61), INT8_C( -25), INT8_C( -87), INT8_C(  55),
+                           INT8_C(   0), INT8_C(   4), INT8_C(   0), INT8_C(  36),
+                           INT8_C(  27), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C( -27), INT8_C(  27), INT8_C(   0),
+                           INT8_C(  27), INT8_C( -22), INT8_C(  -7), INT8_C(   0),
+                           INT8_C( -22), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C( -44), INT8_C(  17), INT8_C(   0), INT8_C(-126),
+                           INT8_C(   0), INT8_C( -44), INT8_C( -75), INT8_C( -75),
+                           INT8_C(   0), INT8_C(  17), INT8_C(   0), INT8_C( 126),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  17)) },
+    { simde_mm512_set_epi8(INT8_C(  56), INT8_C(  24), INT8_C( -33), INT8_C(  82),
+                           INT8_C(  72), INT8_C(  63), INT8_C(-114), INT8_C(  14),
+                           INT8_C(   2), INT8_C(  71), INT8_C( -31), INT8_C( -46),
+                           INT8_C(  64), INT8_C( -11), INT8_C(  35), INT8_C(  -7),
+                           INT8_C( -78), INT8_C( 125), INT8_C( -28), INT8_C( -59),
+                           INT8_C(   6), INT8_C(-127), INT8_C(  34), INT8_C(  64),
+                           INT8_C(  88), INT8_C( -88), INT8_C(  70), INT8_C( -90),
+                           INT8_C(  47), INT8_C( -21), INT8_C(-104), INT8_C( -27),
+                           INT8_C( -16), INT8_C( -29), INT8_C(  51), INT8_C(-116),
+                           INT8_C( -87), INT8_C(  91), INT8_C(-106), INT8_C(  -3),
+                           INT8_C( -55), INT8_C( -15), INT8_C(-109), INT8_C(-108),
+                           INT8_C( -41), INT8_C(  79), INT8_C(  54), INT8_C(  68),
+                           INT8_C( -51), INT8_C(-108), INT8_C( -83), INT8_C( 104),
+                           INT8_C(  18), INT8_C(-128), INT8_C( -75), INT8_C( -12),
+                           INT8_C(  56), INT8_C(-104), INT8_C(  56), INT8_C(  47),
+                           INT8_C( -30), INT8_C( -15), INT8_C(-115), INT8_C(  88)),
+      simde_mm512_set_epi8(INT8_C( 107), INT8_C( 125), INT8_C(-111), INT8_C( -89),
+                           INT8_C(  -9), INT8_C( 122), INT8_C(  -2), INT8_C( -26),
+                           INT8_C(-100), INT8_C(  80), INT8_C(  89), INT8_C(  38),
+                           INT8_C(  95), INT8_C( -82), INT8_C( -66), INT8_C( 108),
+                           INT8_C( 104), INT8_C( -20), INT8_C(  72), INT8_C( -82),
+                           INT8_C(  23), INT8_C( -45), INT8_C( -20), INT8_C(   8),
+                           INT8_C(  -2), INT8_C( -77), INT8_C(-112), INT8_C( 102),
+                           INT8_C(  83), INT8_C(  80), INT8_C(  56), INT8_C( -53),
+                           INT8_C( -83), INT8_C( -86), INT8_C(  15), INT8_C(  30),
+                           INT8_C( -92), INT8_C(  -2), INT8_C(  42), INT8_C( -79),
+                           INT8_C( 108), INT8_C(  44), INT8_C(  17), INT8_C(  83),
+                           INT8_C( -49), INT8_C( -44), INT8_C( -42), INT8_C( -54),
+                           INT8_C(-127), INT8_C(  12), INT8_C(  88), INT8_C(-118),
+                           INT8_C( -23), INT8_C( -69), INT8_C( -23), INT8_C( -36),
+                           INT8_C( -23), INT8_C( -80), INT8_C(  33), INT8_C(  82),
+                           INT8_C( -43), INT8_C( -91), INT8_C( -36), INT8_C( -56)),
+      simde_mm512_set_epi8(INT8_C(  72), INT8_C( -33), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(  63), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(  -7), INT8_C(-114), INT8_C(  71),
+                           INT8_C(  56), INT8_C(   0), INT8_C(   0), INT8_C(  82),
+                           INT8_C(  64), INT8_C(   0), INT8_C(  64), INT8_C(   0),
+                           INT8_C(  88), INT8_C(   0), INT8_C(   0), INT8_C(  64),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C( -88),
+                           INT8_C(  47), INT8_C( -27), INT8_C(  64), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C( -16), INT8_C( -29),
+                           INT8_C(   0), INT8_C(   0), INT8_C(  91), INT8_C(   0),
+                           INT8_C(-116), INT8_C(-116), INT8_C(  54), INT8_C( -41),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C( 104), INT8_C( -12), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                           INT8_C(   0), INT8_C(   0), INT8_C(-115), INT8_C( -15),
+                           INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_shuffle_epi8(test_vec[i].a, test_vec[i].b);
+    simde_assert_m512i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cmpeq_epi8_mask(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i   a;
+    simde__m512i   b;
+    simde__mmask64 r;
+  } test_vec[8] = {
+    { simde_mm512_set_epi8(INT8_C(  73), INT8_C(  68), INT8_C( -71), INT8_C( -32),
+                           INT8_C( 100), INT8_C( 125), INT8_C(  89), INT8_C(  95),
+                           INT8_C( -23), INT8_C(  76), INT8_C(  84), INT8_C( -43),
+                           INT8_C(  86), INT8_C(  29), INT8_C(  64), INT8_C(  30),
+                           INT8_C( -17), INT8_C( -61), INT8_C( 115), INT8_C( -53),
+                           INT8_C(-100), INT8_C( 104), INT8_C( 111), INT8_C( -59),
+                           INT8_C( -31), INT8_C(  20), INT8_C(  31), INT8_C(-121),
+                           INT8_C(  90), INT8_C(  18), INT8_C(   6), INT8_C(  39),
+                           INT8_C(  15), INT8_C(  62), INT8_C(  39), INT8_C( -25),
+                           INT8_C(  60), INT8_C( 110), INT8_C(  45), INT8_C( 113),
+                           INT8_C(  81), INT8_C(  78), INT8_C( -53), INT8_C(  11),
+                           INT8_C( -27), INT8_C(-113), INT8_C(  -3), INT8_C(  14),
+                           INT8_C( 109), INT8_C(  43), INT8_C( -54), INT8_C( 111),
+                           INT8_C( -91), INT8_C( -21), INT8_C( 102), INT8_C(   8),
+                           INT8_C( -41), INT8_C( -47), INT8_C( -90), INT8_C(   3),
+                           INT8_C(  18), INT8_C(  32), INT8_C(  89), INT8_C( -62)),
+      simde_mm512_set_epi8(INT8_C(  73), INT8_C(  68), INT8_C( -71), INT8_C( -46),
+                           INT8_C(  72), INT8_C(  42), INT8_C(  65), INT8_C( -27),
+                           INT8_C( -37), INT8_C(  76), INT8_C(  84), INT8_C( -95),
+                           INT8_C( -29), INT8_C(  29), INT8_C(  64), INT8_C(  75),
+                           INT8_C( -17), INT8_C( -61), INT8_C(  76), INT8_C(  88),
+                           INT8_C(-100), INT8_C( -91), INT8_C( 111), INT8_C( -59),
+                           INT8_C( -31), INT8_C( -17), INT8_C(  31), INT8_C(  68),
+                           INT8_C(  90), INT8_C(  18), INT8_C(-110), INT8_C(  39),
+                           INT8_C(  15), INT8_C(  62), INT8_C(  24), INT8_C( -25),
+                           INT8_C(  80), INT8_C( 122), INT8_C( 103), INT8_C(-115),
+                           INT8_C(  81), INT8_C(  24), INT8_C(  32), INT8_C(  82),
+                           INT8_C( -27), INT8_C( 105), INT8_C(  -3), INT8_C(  14),
+                           INT8_C( 109), INT8_C(  43), INT8_C( -54), INT8_C(-121),
+                           INT8_C( -91), INT8_C(-124), INT8_C( 102), INT8_C(  46),
+                           INT8_C( -41), INT8_C( 101), INT8_C(  51), INT8_C(   3),
+                           INT8_C(  18), INT8_C(  32), INT8_C( -12), INT8_C( -62)),
+      UINT64_C(0xe066cbadd08bea9d) },
+    { simde_mm512_set_epi8(INT8_C( -84), INT8_C(-108), INT8_C(  13), INT8_C( -97),
+                           INT8_C( -34), INT8_C(  27), INT8_C( 124), INT8_C(-120),
+                           INT8_C(   3), INT8_C(  26), INT8_C(  43), INT8_C( -96),
+                           INT8_C( -63), INT8_C(  49), INT8_C( 127), INT8_C(   0),
+                           INT8_C( -20), INT8_C( -31), INT8_C( 125), INT8_C(  14),
+                           INT8_C( -53), INT8_C( -87), INT8_C( 115), INT8_C( -20),
+                           INT8_C( -93), INT8_C(  70), INT8_C(  29), INT8_C( -90),
+                           INT8_C( 105), INT8_C( -54), INT8_C(  96), INT8_C(-106),
+                           INT8_C(  94), INT8_C( -52), INT8_C( -50), INT8_C( -79),
+                           INT8_C( -54), INT8_C(  78), INT8_C(  17), INT8_C(  81),
+                           INT8_C(  35), INT8_C( 120), INT8_C(  47), INT8_C( -25),
+                           INT8_C( 110), INT8_C(  55), INT8_C(  40), INT8_C( -31),
+                           INT8_C(  93), INT8_C( -23), INT8_C(   4), INT8_C(  45),
+                           INT8_C(  59), INT8_C( -33), INT8_C( 124), INT8_C( -52),
+                           INT8_C(  42), INT8_C( -14), INT8_C(-119), INT8_C(  88),
+                           INT8_C( -38), INT8_C( -31), INT8_C(-113), INT8_C(  33)),
+      simde_mm512_set_epi8(INT8_C(  25), INT8_C(-108), INT8_C(  13), INT8_C( -97),
+                           INT8_C( -34), INT8_C( 119), INT8_C( 122), INT8_C(  82),
+                           INT8_C(   3), INT8_C(-111), INT8_C(  60), INT8_C( -96),
+                           INT8_C(  26), INT8_C(   3), INT8_C(  -7), INT8_C(  -8),
+                           INT8_C(  94), INT8_C( -31), INT8_C( -71), INT8_C(  14),
+                           INT8_C(-105), INT8_C( -87), INT8_C( 115), INT8_C( -68),
+                           INT8_C( -93), INT8_C(  70), INT8_C( -47), INT8_C(-106),
+                           INT8_C( 105), INT8_C( -54), INT8_C(  96), INT8_C( 105),
+                           INT8_C(  94), INT8_C(  84), INT8_C( -50), INT8_C( -79),
+                           INT8_C( -54), INT8_C(  78), INT8_C(  17), INT8_C(  39),
+                           INT8_C(  35), INT8_C( -87), INT8_C( -83), INT8_C( -25),
+                           INT8_C( 110), INT8_C(   2), INT8_C( -90), INT8_C( -31),
+                           INT8_C(  19), INT8_C( -23), INT8_C(   4), INT8_C(  16),
+                           INT8_C(  59), INT8_C( -33), INT8_C( 124), INT8_C( 127),
+                           INT8_C( -60), INT8_C( -14), INT8_C(-119), INT8_C(  88),
+                           INT8_C( -38), INT8_C( 109), INT8_C(-113), INT8_C(  25)),
+      UINT64_C(0x789056cebe996e7a) },
+    { simde_mm512_set_epi8(INT8_C(  93), INT8_C( 110), INT8_C( 120), INT8_C(  25),
+                           INT8_C( -37), INT8_C( -25), INT8_C( -34), INT8_C(-108),
+                           INT8_C( -77), INT8_C(-114), INT8_C(  79), INT8_C( -50),
+                           INT8_C( -94), INT8_C(  22), INT8_C( -28), INT8_C(-105),
+                           INT8_C( 110), INT8_C(  29), INT8_C(  -9), INT8_C( -13),
+                           INT8_C( -71), INT8_C( 107), INT8_C(-115), INT8_C(  86),
+                           INT8_C(-127), INT8_C(-100), INT8_C(   1), INT8_C(  21),
+                           INT8_C( -55), INT8_C( -85), INT8_C( -55), INT8_C( -81),
+                           INT8_C( -41), INT8_C(  39), INT8_C(  18), INT8_C( -92),
+                           INT8_C(  11), INT8_C( -32), INT8_C( -53), INT8_C(  38),
+                           INT8_C( -49), INT8_C(-118), INT8_C(  20), INT8_C(  66),
+                           INT8_C(-106), INT8_C(-109), INT8_C(  45), INT8_C( -24),
+                           INT8_C( -47), INT8_C(  95), INT8_C(  50), INT8_C( 105),
+                           INT8_C(  58), INT8_C(  25), INT8_C( -53), INT8_C( -61),
+                           INT8_C( -90), INT8_C(  92), INT8_C(  83), INT8_C( 120),
+                           INT8_C( 107), INT8_C( -72), INT8_C(   3), INT8_C(  -1)),
+      simde_mm512_set_epi8(INT8_C(  93), INT8_C( 110), INT8_C( 120), INT8_C( -75),
+                           INT8_C( -37), INT8_C( -25), INT8_C(  14), INT8_C(-108),
+                           INT8_C(   5), INT8_C(-114), INT8_C(  79), INT8_C(  57),
+                           INT8_C( -94), INT8_C(  22), INT8_C(   4), INT8_C(   3),
+                           INT8_C(  66), INT8_C(  29), INT8_C(  -9), INT8_C(  20),
+                           INT8_C(  93), INT8_C( 107), INT8_C(   6), INT8_C(  86),
+                           INT8_C(  84), INT8_C(-100), INT8_C(  81), INT8_C(  21),
+                           INT8_C( 117), INT8_C(  22), INT8_C( -55), INT8_C( -81),
+                           INT8_C( -41), INT8_C( -17), INT8_C(  19), INT8_C( 106),
+                           INT8_C( 114), INT8_C( -32), INT8_C(  98), INT8_C( -16),
+                           INT8_C( -49), INT8_C(-118), INT8_C(  20), INT8_C(  66),
+                           INT8_C(-106), INT8_C(-109), INT8_C(  97), INT8_C(  29),
+                           INT8_C( -47), INT8_C( -47), INT8_C(  50), INT8_C(-111),
+                           INT8_C(  58), INT8_C( 115), INT8_C( -53), INT8_C(  93),
+                           INT8_C( -90), INT8_C( -27), INT8_C(  41), INT8_C( 120),
+                           INT8_C( 119), INT8_C(  86), INT8_C( -36), INT8_C(  -1)),
+      UINT64_C(0xed6c655384fcaa91) },
+    { simde_mm512_set_epi8(INT8_C(  10), INT8_C(  75), INT8_C(  91), INT8_C( -99),
+                           INT8_C( -88), INT8_C(  99), INT8_C( -86), INT8_C(  96),
+                           INT8_C(  14), INT8_C(  -1), INT8_C(  14), INT8_C( 100),
+                           INT8_C(-114), INT8_C(  63), INT8_C(  68), INT8_C(-113),
+                           INT8_C( -59), INT8_C( -42), INT8_C( -14), INT8_C(-111),
+                           INT8_C(   6), INT8_C(  68), INT8_C(  11), INT8_C(-108),
+                           INT8_C( -62), INT8_C(  87), INT8_C( -72), INT8_C( -23),
+                           INT8_C(  78), INT8_C( -18), INT8_C( -36), INT8_C(  -6),
+                           INT8_C( -68), INT8_C(-115), INT8_C( -24), INT8_C( 127),
+                           INT8_C( -36), INT8_C(  21), INT8_C(  38), INT8_C(-106),
+                           INT8_C(  33), INT8_C( -66), INT8_C(-121), INT8_C(  36),
+                           INT8_C(  24), INT8_C(  61), INT8_C(  66), INT8_C(  20),
+                           INT8_C(  63), INT8_C( -18), INT8_C(  11), INT8_C(-103),
+                           INT8_C( -19), INT8_C( -42), INT8_C( -69), INT8_C(  53),
+                           INT8_C( -40), INT8_C( 112), INT8_C(   8), INT8_C( -69),
+                           INT8_C(-102), INT8_C(  62), INT8_C(  85), INT8_C(  62)),
+      simde_mm512_set_epi8(INT8_C(  10), INT8_C(  75), INT8_C( -74), INT8_C( -47),
+                           INT8_C( -88), INT8_C(  99), INT8_C( -86), INT8_C(-128),
+                           INT8_C(  94), INT8_C(  -1), INT8_C(  99), INT8_C( 100),
+                           INT8_C( -25), INT8_C(   7), INT8_C(  59), INT8_C(-113),
+                           INT8_C( 119), INT8_C( -42), INT8_C( -14), INT8_C(  79),
+                           INT8_C(   4), INT8_C(-111), INT8_C(  11), INT8_C(  80),
+                           INT8_C( -78), INT8_C(  87), INT8_C( -72), INT8_C(-111),
+                           INT8_C( -95), INT8_C( -18), INT8_C( -36), INT8_C( -40),
+                           INT8_C( -68), INT8_C(-115), INT8_C( -24), INT8_C( -50),
+                           INT8_C( -36), INT8_C(  10), INT8_C(  47), INT8_C(  62),
+                           INT8_C( -15), INT8_C( -66), INT8_C(-122), INT8_C(  36),
+                           INT8_C( -22), INT8_C(  61), INT8_C( -11), INT8_C(  20),
+                           INT8_C(  63), INT8_C(  82), INT8_C(-113), INT8_C(-103),
+                           INT8_C( -19), INT8_C( -42), INT8_C( -69), INT8_C(  53),
+                           INT8_C( -40), INT8_C( 112), INT8_C(   8), INT8_C( -69),
+                           INT8_C(-102), INT8_C(  23), INT8_C(  85), INT8_C(  62)),
+      UINT64_C(0xce516266e8559ffb) },
+    { simde_mm512_set_epi8(INT8_C(  -7), INT8_C(   2), INT8_C(-111), INT8_C(  64),
+                           INT8_C(-100), INT8_C(  87), INT8_C( 100), INT8_C( -30),
+                           INT8_C( -39), INT8_C( -38), INT8_C( 121), INT8_C(  55),
+                           INT8_C( -64), INT8_C(  81), INT8_C(  -3), INT8_C(  79),
+                           INT8_C( -41), INT8_C( 118), INT8_C( -37), INT8_C( -34),
+                           INT8_C( -13), INT8_C(  63), INT8_C(  26), INT8_C( -81),
+                           INT8_C(  90), INT8_C(  43), INT8_C( -31), INT8_C( -17),
+                           INT8_C(-100), INT8_C( -71), INT8_C(-104), INT8_C( -66),
+                           INT8_C( -94), INT8_C( -89), INT8_C( 100), INT8_C(  36),
+                           INT8_C(  17), INT8_C( 116), INT8_C( -30), INT8_C(  16),
+                           INT8_C( 110), INT8_C(  98), INT8_C(  11), INT8_C( -42),
+                           INT8_C( -78), INT8_C( -68), INT8_C( -26), INT8_C( -35),
+                           INT8_C(  12), INT8_C( -40), INT8_C( -27), INT8_C( -40),
+                           INT8_C(-102), INT8_C(-109), INT8_C(  39), INT8_C(  29),
+                           INT8_C(  21), INT8_C(   9), INT8_C(  49), INT8_C( -13),
+                           INT8_C( -49), INT8_C(   7), INT8_C(  91), INT8_C(  15)),
+      simde_mm512_set_epi8(INT8_C(  78), INT8_C(   2), INT8_C( -91), INT8_C(  64),
+                           INT8_C(-100), INT8_C(  41), INT8_C( -34), INT8_C( -46),
+                           INT8_C( -39), INT8_C(  31), INT8_C(  13), INT8_C(  55),
+                           INT8_C( -42), INT8_C(  33), INT8_C(  -3), INT8_C(  79),
+                           INT8_C( -41), INT8_C( 118), INT8_C( -37), INT8_C(  90),
+                           INT8_C( -13), INT8_C(  63), INT8_C(  51), INT8_C( -81),
+                           INT8_C(  90), INT8_C(  43), INT8_C( -31), INT8_C(-112),
+                           INT8_C(-100), INT8_C(  41), INT8_C(-104), INT8_C( -66),
+                           INT8_C( -94), INT8_C( -89), INT8_C( -85), INT8_C(-109),
+                           INT8_C( 113), INT8_C( 116), INT8_C( 100), INT8_C(  16),
+                           INT8_C(   5), INT8_C( -50), INT8_C( -51), INT8_C( -42),
+                           INT8_C( -95), INT8_C( -68), INT8_C( -26), INT8_C( -35),
+                           INT8_C( -73), INT8_C(  71), INT8_C(  65), INT8_C( -40),
+                           INT8_C(-102), INT8_C(   7), INT8_C(  94), INT8_C(  29),
+                           INT8_C(  65), INT8_C(   9), INT8_C(  49), INT8_C( -13),
+                           INT8_C( -33), INT8_C(   7), INT8_C(-101), INT8_C(  15)),
+      UINT64_C(0x5893edebc5171975) },
+    { simde_mm512_set_epi8(INT8_C( -34), INT8_C( -12), INT8_C( 105), INT8_C(-124),
+                           INT8_C( -33), INT8_C( -79), INT8_C(  -6), INT8_C(  54),
+                           INT8_C(  81), INT8_C( -11), INT8_C(  67), INT8_C(  63),
+                           INT8_C( 103), INT8_C( 119), INT8_C( -89), INT8_C(  40),
+                           INT8_C(   8), INT8_C( -38), INT8_C(  71), INT8_C(  66),
+                           INT8_C(-106), INT8_C( -45), INT8_C(  18), INT8_C( 100),
+                           INT8_C( 122), INT8_C(  93), INT8_C( -42), INT8_C(   5),
+                           INT8_C( -39), INT8_C(  37), INT8_C( -70), INT8_C(  13),
+                           INT8_C(  99), INT8_C( -57), INT8_C( -88), INT8_C( -36),
+                           INT8_C(-103), INT8_C(  25), INT8_C(  94), INT8_C(-107),
+                           INT8_C( -32), INT8_C( -12), INT8_C( -14), INT8_C(  32),
+                           INT8_C( -38), INT8_C(  10), INT8_C(  89), INT8_C( -69),
+                           INT8_C(  -8), INT8_C(  69), INT8_C( -20), INT8_C(-122),
+                           INT8_C( -75), INT8_C( -71), INT8_C(   3), INT8_C( 102),
+                           INT8_C( 119), INT8_C( -58), INT8_C( -49), INT8_C(  80),
+                           INT8_C( -15), INT8_C( -97), INT8_C(  45), INT8_C(  96)),
+      simde_mm512_set_epi8(INT8_C( -34), INT8_C( -12), INT8_C(  81), INT8_C(-115),
+                           INT8_C( -33), INT8_C( -79), INT8_C(-117), INT8_C( -34),
+                           INT8_C(  81), INT8_C( -11), INT8_C( -63), INT8_C( -61),
+                           INT8_C(  53), INT8_C( 119), INT8_C(  26), INT8_C(  40),
+                           INT8_C(   8), INT8_C( -38), INT8_C(  25), INT8_C( -23),
+                           INT8_C( -16), INT8_C( -45), INT8_C( -64), INT8_C( 100),
+                           INT8_C(  91), INT8_C(  93), INT8_C( -42), INT8_C(   5),
+                           INT8_C(  81), INT8_C( -76), INT8_C( -70), INT8_C(  13),
+                           INT8_C(  26), INT8_C( -57), INT8_C( -88), INT8_C( -64),
+                           INT8_C( -68), INT8_C( -91), INT8_C(-123), INT8_C(  38),
+                           INT8_C( -32), INT8_C(  29), INT8_C(  82), INT8_C(  54),
+                           INT8_C(-107), INT8_C(  10), INT8_C(  89), INT8_C(  28),
+                           INT8_C( -27), INT8_C(  41), INT8_C( -20), INT8_C(-122),
+                           INT8_C( -75), INT8_C( -71), INT8_C(   3), INT8_C( -30),
+                           INT8_C(  97), INT8_C(  18), INT8_C( -90), INT8_C( 107),
+                           INT8_C(  99), INT8_C(  10), INT8_C(  45), INT8_C(  96)),
+      UINT64_C(0xccc5c57360863e03) },
+    { simde_mm512_set_epi8(INT8_C(  48), INT8_C(  94), INT8_C( 112), INT8_C(-107),
+                           INT8_C( -34), INT8_C( -86), INT8_C(  65), INT8_C(  92),
+                           INT8_C(  97), INT8_C( -99), INT8_C(  28), INT8_C(  47),
+                           INT8_C(-117), INT8_C( -22), INT8_C(-111), INT8_C( -67),
+                           INT8_C( 113), INT8_C(-107), INT8_C( -23), INT8_C(  77),
+                           INT8_C(  60), INT8_C( 104), INT8_C(-116), INT8_C( -86),
+                           INT8_C(-113), INT8_C( -79), INT8_C( -64), INT8_C( -15),
+                           INT8_C(-123), INT8_C(  99), INT8_C(  25), INT8_C(  27),
+                           INT8_C( -40), INT8_C( 126), INT8_C( -66), INT8_C( -45),
+                           INT8_C(  57), INT8_C( -30), INT8_C( -12), INT8_C(  16),
+                           INT8_C( 122), INT8_C( 124), INT8_C( -75), INT8_C(  50),
+                           INT8_C(  -6), INT8_C(  41), INT8_C( -47), INT8_C(  -3),
+                           INT8_C(  29), INT8_C( -20), INT8_C( -45), INT8_C( -46),
+                           INT8_C( -45), INT8_C( -14), INT8_C(  99), INT8_C(  84),
+                           INT8_C( -62), INT8_C( -99), INT8_C(-104), INT8_C( -78),
+                           INT8_C( 106), INT8_C(-117), INT8_C( -94), INT8_C(   3)),
+      simde_mm512_set_epi8(INT8_C( -88), INT8_C( -74), INT8_C(   2), INT8_C( -83),
+                           INT8_C(-112), INT8_C( -91), INT8_C(  65), INT8_C(  92),
+                           INT8_C(  97), INT8_C(  52), INT8_C(  28), INT8_C( -65),
+                           INT8_C(-117), INT8_C( -90), INT8_C(-111), INT8_C( -67),
+                           INT8_C( 113), INT8_C(  16), INT8_C(-124), INT8_C(  77),
+                           INT8_C(  60), INT8_C( -64), INT8_C(-116), INT8_C( -89),
+                           INT8_C(  -7), INT8_C( -79), INT8_C(  46), INT8_C( 114),
+                           INT8_C(-107), INT8_C(  99), INT8_C( -79), INT8_C(  80),
+                           INT8_C( -40), INT8_C( -81), INT8_C( -66), INT8_C( -45),
+                           INT8_C(  57), INT8_C( -30), INT8_C(  66), INT8_C(  71),
+                           INT8_C( 122), INT8_C(  95), INT8_C( -43), INT8_C(  50),
+                           INT8_C(  -6), INT8_C(  41), INT8_C( -47), INT8_C(  95),
+                           INT8_C(   5), INT8_C( -20), INT8_C( -45), INT8_C(-118),
+                           INT8_C(  70), INT8_C(  81), INT8_C(   0), INT8_C(  84),
+                           INT8_C( -62), INT8_C(-101), INT8_C(  13), INT8_C(  33),
+                           INT8_C(-104), INT8_C(-117), INT8_C( -11), INT8_C(  20)),
+      UINT64_C(0x3ab9a44bc9e6184) },
+    { simde_mm512_set_epi8(INT8_C(  67), INT8_C(  34), INT8_C( -33), INT8_C(  31),
+                           INT8_C(-128), INT8_C(  55), INT8_C(  93), INT8_C(  58),
+                           INT8_C(  57), INT8_C( 104), INT8_C(-110), INT8_C(  59),
+                           INT8_C(  55), INT8_C(  33), INT8_C(-122), INT8_C(  69),
+                           INT8_C(  57), INT8_C(  30), INT8_C( -13), INT8_C( -65),
+                           INT8_C( -22), INT8_C(-100), INT8_C(  18), INT8_C( -65),
+                           INT8_C( -60), INT8_C(-105), INT8_C(  27), INT8_C( -71),
+                           INT8_C(  52), INT8_C(  12), INT8_C(  -4), INT8_C(  64),
+                           INT8_C(  20), INT8_C(  51), INT8_C(  87), INT8_C(  43),
+                           INT8_C(  26), INT8_C(   6), INT8_C( -66), INT8_C( -40),
+                           INT8_C(  87), INT8_C(   1), INT8_C( -26), INT8_C(  92),
+                           INT8_C( -33), INT8_C(   8), INT8_C(  42), INT8_C( -93),
+                           INT8_C(  44), INT8_C( -55), INT8_C(-113), INT8_C( -43),
+                           INT8_C(  32), INT8_C( 105), INT8_C( -27), INT8_C(  96),
+                           INT8_C(  72), INT8_C(  48), INT8_C( -46), INT8_C(  24),
+                           INT8_C( -10), INT8_C( -98), INT8_C( -56), INT8_C( -41)),
+      simde_mm512_set_epi8(INT8_C(  67), INT8_C( -63), INT8_C( -33), INT8_C(  31),
+                           INT8_C(-128), INT8_C(  55), INT8_C(  93), INT8_C(  -8),
+                           INT8_C(  82), INT8_C( 104), INT8_C(-110), INT8_C(  59),
+                           INT8_C(  55), INT8_C(   0), INT8_C( -25), INT8_C(  69),
+                           INT8_C(  27), INT8_C(  30), INT8_C( -13), INT8_C( -65),
+                           INT8_C(  -7), INT8_C( -28), INT8_C(  18), INT8_C( -65),
+                           INT8_C(  67), INT8_C(  -3), INT8_C(  57), INT8_C( -68),
+                           INT8_C(  52), INT8_C(  12), INT8_C(  -4), INT8_C(-128),
+                           INT8_C(  20), INT8_C(  37), INT8_C(   9), INT8_C(  80),
+                           INT8_C(  26), INT8_C(   6), INT8_C( -66), INT8_C(   9),
+                           INT8_C( -98), INT8_C(   1), INT8_C( -26), INT8_C(  92),
+                           INT8_C( -33), INT8_C(   8), INT8_C( -81), INT8_C( -93),
+                           INT8_C( 116), INT8_C( -55), INT8_C(-113), INT8_C( -43),
+                           INT8_C(  32), INT8_C( 105), INT8_C( -27), INT8_C(  37),
+                           INT8_C(  72), INT8_C( -73), INT8_C( -19), INT8_C(  96),
+                           INT8_C(  52), INT8_C( -98), INT8_C( -45), INT8_C( -41)),
+      UINT64_C(0xbe79730e8e7d7e85) },
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__mmask64 r = simde_mm512_cmpeq_epi8_mask(test_vec[i].a, test_vec[i].b);
+    simde_assert_uint64(r, ==, test_vec[i].r);
+   }
+
+  return MUNIT_OK;
+}
+
 #endif /* defined(SIMDE_avx512bw_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
 
 HEDLEY_DIAGNOSTIC_PUSH
@@ -1506,6 +2194,9 @@ static MunitTest test_suite_tests[] = {
   SIMDE_TESTS_DEFINE_TEST(mm512_adds_epu8),
   SIMDE_TESTS_DEFINE_TEST(mm512_adds_epu16),
 
+  SIMDE_TESTS_DEFINE_TEST(mm512_shuffle_epi8),
+  SIMDE_TESTS_DEFINE_TEST(mm512_cmpeq_epi8_mask),
+
 #endif /* defined(SIMDE_AVX512bw_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
   { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
 };


=====================================
test/x86/avx512f.c
=====================================
@@ -2207,6 +2207,338 @@ test_simde_mm512_mask_cmpeq_epi32_mask(const MunitParameter params[], void* data
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm512_cvtepi8_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i a;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm_set_epi8(INT8_C(   6), INT8_C(  33), INT8_C( 124), INT8_C(-128),
+                        INT8_C(  38), INT8_C(  59), INT8_C( -37), INT8_C( 121),
+                        INT8_C(  67), INT8_C( 112), INT8_C(  62), INT8_C(-113),
+                        INT8_C(-121), INT8_C( -89), INT8_C(  63), INT8_C( -72)),
+      simde_mm512_set_epi32(INT32_C(          6), INT32_C(         33), INT32_C(        124), INT32_C(       -128),
+                            INT32_C(         38), INT32_C(         59), INT32_C(        -37), INT32_C(        121),
+                            INT32_C(         67), INT32_C(        112), INT32_C(         62), INT32_C(       -113),
+                            INT32_C(       -121), INT32_C(        -89), INT32_C(         63), INT32_C(        -72)) },
+    { simde_mm_set_epi8(INT8_C( -94), INT8_C( 125), INT8_C( 105), INT8_C( -92),
+                        INT8_C(-107), INT8_C( -69), INT8_C(  25), INT8_C(-125),
+                        INT8_C( -11), INT8_C( -44), INT8_C(  60), INT8_C( -96),
+                        INT8_C( 119), INT8_C( -40), INT8_C(  29), INT8_C(   9)),
+      simde_mm512_set_epi32(INT32_C(        -94), INT32_C(        125), INT32_C(        105), INT32_C(        -92),
+                            INT32_C(       -107), INT32_C(        -69), INT32_C(         25), INT32_C(       -125),
+                            INT32_C(        -11), INT32_C(        -44), INT32_C(         60), INT32_C(        -96),
+                            INT32_C(        119), INT32_C(        -40), INT32_C(         29), INT32_C(          9)) },
+    { simde_mm_set_epi8(INT8_C(  47), INT8_C(  -9), INT8_C(-127), INT8_C(  66),
+                        INT8_C( 126), INT8_C( -95), INT8_C(  99), INT8_C( -25),
+                        INT8_C( -19), INT8_C(  84), INT8_C( -96), INT8_C( -77),
+                        INT8_C( -34), INT8_C(-108), INT8_C( -33), INT8_C(  62)),
+      simde_mm512_set_epi32(INT32_C(         47), INT32_C(         -9), INT32_C(       -127), INT32_C(         66),
+                            INT32_C(        126), INT32_C(        -95), INT32_C(         99), INT32_C(        -25),
+                            INT32_C(        -19), INT32_C(         84), INT32_C(        -96), INT32_C(        -77),
+                            INT32_C(        -34), INT32_C(       -108), INT32_C(        -33), INT32_C(         62)) },
+    { simde_mm_set_epi8(INT8_C(  93), INT8_C(-106), INT8_C(  61), INT8_C(  64),
+                        INT8_C( -98), INT8_C(  31), INT8_C( -75), INT8_C( -23),
+                        INT8_C( 113), INT8_C( -94), INT8_C( -70), INT8_C( -73),
+                        INT8_C(  66), INT8_C( -84), INT8_C(  92), INT8_C(-113)),
+      simde_mm512_set_epi32(INT32_C(         93), INT32_C(       -106), INT32_C(         61), INT32_C(         64),
+                            INT32_C(        -98), INT32_C(         31), INT32_C(        -75), INT32_C(        -23),
+                            INT32_C(        113), INT32_C(        -94), INT32_C(        -70), INT32_C(        -73),
+                            INT32_C(         66), INT32_C(        -84), INT32_C(         92), INT32_C(       -113)) },
+    { simde_mm_set_epi8(INT8_C( -92), INT8_C( -17), INT8_C( -42), INT8_C( 103),
+                        INT8_C( 111), INT8_C( -23), INT8_C(  14), INT8_C(-122),
+                        INT8_C( -61), INT8_C(   1), INT8_C(  16), INT8_C(-124),
+                        INT8_C( -62), INT8_C(  59), INT8_C( 106), INT8_C(-105)),
+      simde_mm512_set_epi32(INT32_C(        -92), INT32_C(        -17), INT32_C(        -42), INT32_C(        103),
+                            INT32_C(        111), INT32_C(        -23), INT32_C(         14), INT32_C(       -122),
+                            INT32_C(        -61), INT32_C(          1), INT32_C(         16), INT32_C(       -124),
+                            INT32_C(        -62), INT32_C(         59), INT32_C(        106), INT32_C(       -105)) },
+    { simde_mm_set_epi8(INT8_C(  71), INT8_C(  82), INT8_C( -83), INT8_C( 118),
+                        INT8_C( 127), INT8_C(  -5), INT8_C(  43), INT8_C( -48),
+                        INT8_C(  67), INT8_C(-117), INT8_C( -15), INT8_C( 105),
+                        INT8_C( -88), INT8_C( 127), INT8_C(  85), INT8_C(-110)),
+      simde_mm512_set_epi32(INT32_C(         71), INT32_C(         82), INT32_C(        -83), INT32_C(        118),
+                            INT32_C(        127), INT32_C(         -5), INT32_C(         43), INT32_C(        -48),
+                            INT32_C(         67), INT32_C(       -117), INT32_C(        -15), INT32_C(        105),
+                            INT32_C(        -88), INT32_C(        127), INT32_C(         85), INT32_C(       -110)) },
+    { simde_mm_set_epi8(INT8_C(   8), INT8_C(  35), INT8_C( -44), INT8_C(  18),
+                        INT8_C(   3), INT8_C(  -7), INT8_C(  34), INT8_C(  98),
+                        INT8_C(  43), INT8_C(  97), INT8_C(-127), INT8_C( 121),
+                        INT8_C( -47), INT8_C( -94), INT8_C( -74), INT8_C( -59)),
+      simde_mm512_set_epi32(INT32_C(          8), INT32_C(         35), INT32_C(        -44), INT32_C(         18),
+                            INT32_C(          3), INT32_C(         -7), INT32_C(         34), INT32_C(         98),
+                            INT32_C(         43), INT32_C(         97), INT32_C(       -127), INT32_C(        121),
+                            INT32_C(        -47), INT32_C(        -94), INT32_C(        -74), INT32_C(        -59)) },
+    { simde_mm_set_epi8(INT8_C(  79), INT8_C(  98), INT8_C(  70), INT8_C( -29),
+                        INT8_C(  61), INT8_C( 100), INT8_C( -92), INT8_C(  10),
+                        INT8_C(-107), INT8_C( -13), INT8_C(  83), INT8_C(-102),
+                        INT8_C( -54), INT8_C(  80), INT8_C(  43), INT8_C(  56)),
+      simde_mm512_set_epi32(INT32_C(         79), INT32_C(         98), INT32_C(         70), INT32_C(        -29),
+                            INT32_C(         61), INT32_C(        100), INT32_C(        -92), INT32_C(         10),
+                            INT32_C(       -107), INT32_C(        -13), INT32_C(         83), INT32_C(       -102),
+                            INT32_C(        -54), INT32_C(         80), INT32_C(         43), INT32_C(         56)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_cvtepi8_epi32(test_vec[i].a);
+    simde_assert_m512i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cvtepi8_epi64(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i a;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm_set_epi8(INT8_C(  40), INT8_C( -85), INT8_C(  94), INT8_C(  35),
+                        INT8_C( -54), INT8_C( -71), INT8_C(-106), INT8_C(-127),
+                        INT8_C(  11), INT8_C( 105), INT8_C(  37), INT8_C(-105),
+                        INT8_C(   6), INT8_C( -65), INT8_C(  17), INT8_C(  29)),
+      simde_mm512_set_epi64(INT64_C(                  11), INT64_C(                 105),
+                            INT64_C(                  37), INT64_C(                -105),
+                            INT64_C(                   6), INT64_C(                 -65),
+                            INT64_C(                  17), INT64_C(                  29)) },
+    { simde_mm_set_epi8(INT8_C(  65), INT8_C( -98), INT8_C( -80), INT8_C( -71),
+                        INT8_C( -32), INT8_C(  61), INT8_C( -91), INT8_C(   2),
+                        INT8_C(  62), INT8_C(  86), INT8_C( -39), INT8_C( -20),
+                        INT8_C(  65), INT8_C( -87), INT8_C( 116), INT8_C(-112)),
+      simde_mm512_set_epi64(INT64_C(                  62), INT64_C(                  86),
+                            INT64_C(                 -39), INT64_C(                 -20),
+                            INT64_C(                  65), INT64_C(                 -87),
+                            INT64_C(                 116), INT64_C(                -112)) },
+    { simde_mm_set_epi8(INT8_C( -35), INT8_C(  91), INT8_C(  95), INT8_C( -91),
+                        INT8_C(-120), INT8_C( -72), INT8_C(   6), INT8_C( 127),
+                        INT8_C( -64), INT8_C(  43), INT8_C( -93), INT8_C(   8),
+                        INT8_C( 105), INT8_C( -16), INT8_C(  39), INT8_C( 125)),
+      simde_mm512_set_epi64(INT64_C(                 -64), INT64_C(                  43),
+                            INT64_C(                 -93), INT64_C(                   8),
+                            INT64_C(                 105), INT64_C(                 -16),
+                            INT64_C(                  39), INT64_C(                 125)) },
+    { simde_mm_set_epi8(INT8_C( 108), INT8_C( 105), INT8_C(  98), INT8_C( -57),
+                        INT8_C( -42), INT8_C( -18), INT8_C( -55), INT8_C(  -1),
+                        INT8_C( -97), INT8_C( -26), INT8_C( -21), INT8_C(-119),
+                        INT8_C(  95), INT8_C(  83), INT8_C( -95), INT8_C(  86)),
+      simde_mm512_set_epi64(INT64_C(                 -97), INT64_C(                 -26),
+                            INT64_C(                 -21), INT64_C(                -119),
+                            INT64_C(                  95), INT64_C(                  83),
+                            INT64_C(                 -95), INT64_C(                  86)) },
+    { simde_mm_set_epi8(INT8_C(  63), INT8_C(-119), INT8_C(  65), INT8_C( 108),
+                        INT8_C( -17), INT8_C( -16), INT8_C(  -4), INT8_C(  16),
+                        INT8_C(-117), INT8_C( -62), INT8_C( -96), INT8_C(   5),
+                        INT8_C( 116), INT8_C( -58), INT8_C( -56), INT8_C(-121)),
+      simde_mm512_set_epi64(INT64_C(                -117), INT64_C(                 -62),
+                            INT64_C(                 -96), INT64_C(                   5),
+                            INT64_C(                 116), INT64_C(                 -58),
+                            INT64_C(                 -56), INT64_C(                -121)) },
+    { simde_mm_set_epi8(INT8_C( -69), INT8_C(  98), INT8_C(  52), INT8_C( -73),
+                        INT8_C(  71), INT8_C(  44), INT8_C( -92), INT8_C(  54),
+                        INT8_C( 126), INT8_C(  88), INT8_C(  32), INT8_C(-122),
+                        INT8_C( -69), INT8_C( -55), INT8_C(-123), INT8_C(  79)),
+      simde_mm512_set_epi64(INT64_C(                 126), INT64_C(                  88),
+                            INT64_C(                  32), INT64_C(                -122),
+                            INT64_C(                 -69), INT64_C(                 -55),
+                            INT64_C(                -123), INT64_C(                  79)) },
+    { simde_mm_set_epi8(INT8_C(-110), INT8_C( -17), INT8_C( -48), INT8_C( -64),
+                        INT8_C(   9), INT8_C(  96), INT8_C( 113), INT8_C( -55),
+                        INT8_C( -45), INT8_C(  -9), INT8_C( 104), INT8_C( -61),
+                        INT8_C( 127), INT8_C( 121), INT8_C(  70), INT8_C( -22)),
+      simde_mm512_set_epi64(INT64_C(                 -45), INT64_C(                  -9),
+                            INT64_C(                 104), INT64_C(                 -61),
+                            INT64_C(                 127), INT64_C(                 121),
+                            INT64_C(                  70), INT64_C(                 -22)) },
+    { simde_mm_set_epi8(INT8_C(  68), INT8_C( -24), INT8_C(  90), INT8_C( -28),
+                        INT8_C(  55), INT8_C( -48), INT8_C(  13), INT8_C(  95),
+                        INT8_C(  10), INT8_C( -72), INT8_C( 109), INT8_C( -27),
+                        INT8_C(  94), INT8_C( 121), INT8_C(  33), INT8_C(  52)),
+      simde_mm512_set_epi64(INT64_C(                  10), INT64_C(                 -72),
+                            INT64_C(                 109), INT64_C(                 -27),
+                            INT64_C(                  94), INT64_C(                 121),
+                            INT64_C(                  33), INT64_C(                  52)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_cvtepi8_epi64(test_vec[i].a);
+    simde_assert_m512i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cvtepi32_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i a;
+    simde__m128i r;
+  } test_vec[8] = {
+    { simde_mm512_set_epi32(INT32_C(         -2), INT32_C(         -4), INT32_C( -120451969), INT32_C(      14509),
+                            INT32_C(      -1510), INT32_C(      98804), INT32_C(       1802), INT32_C(     -32352),
+                            INT32_C(      14540), INT32_C(         -2), INT32_C(        222), INT32_C(        152),
+                            INT32_C(     -48720), INT32_C(     250746), INT32_C(         15), INT32_C(         -2)),
+      simde_mm_set_epi8(INT8_C(  -2), INT8_C(  -4), INT8_C( 127), INT8_C( -83),
+                        INT8_C(  26), INT8_C( -12), INT8_C(  10), INT8_C( -96),
+                        INT8_C( -52), INT8_C(  -2), INT8_C( -34), INT8_C(-104),
+                        INT8_C( -80), INT8_C( 122), INT8_C(  15), INT8_C(  -2)) },
+    { simde_mm512_set_epi32(INT32_C(   -2537924), INT32_C(          0), INT32_C(       3842), INT32_C(    -439330),
+                            INT32_C(      39001), INT32_C(         -1), INT32_C(   32480192), INT32_C(          0),
+                            INT32_C(          4), INT32_C(  -11661865), INT32_C(          0), INT32_C(          2),
+                            INT32_C(      63322), INT32_C(        -33), INT32_C(      14448), INT32_C(          2)),
+      simde_mm_set_epi8(INT8_C(  60), INT8_C(   0), INT8_C(   2), INT8_C( -34),
+                        INT8_C(  89), INT8_C(  -1), INT8_C( -64), INT8_C(   0),
+                        INT8_C(   4), INT8_C( -41), INT8_C(   0), INT8_C(   2),
+                        INT8_C(  90), INT8_C( -33), INT8_C( 112), INT8_C(   2)) },
+    { simde_mm512_set_epi32(INT32_C(        -18), INT32_C(   -2011647), INT32_C(   -7768922), INT32_C( -921783558),
+                            INT32_C(   -1941821), INT32_C(          0), INT32_C(    -647690), INT32_C(      -5119),
+                            INT32_C(         -1), INT32_C(        343), INT32_C(  113610714), INT32_C(          3),
+                            INT32_C(      38353), INT32_C(        246), INT32_C(  -10559231), INT32_C(       8543)),
+      simde_mm_set_epi8(INT8_C( -18), INT8_C(   1), INT8_C( -90), INT8_C(  -6),
+                        INT8_C( -61), INT8_C(   0), INT8_C( -10), INT8_C(   1),
+                        INT8_C(  -1), INT8_C(  87), INT8_C( -38), INT8_C(   3),
+                        INT8_C( -47), INT8_C( -10), INT8_C(   1), INT8_C(  95)) },
+    { simde_mm512_set_epi32(INT32_C( -177879544), INT32_C(         -4), INT32_C(         -1), INT32_C(      -1874),
+                            INT32_C(         -4), INT32_C( -469598096), INT32_C(      -1647), INT32_C(     129553),
+                            INT32_C(    9710669), INT32_C(          3), INT32_C(    1473856), INT32_C(  134714256),
+                            INT32_C(   70527996), INT32_C(    2339642), INT32_C(     148218), INT32_C(  -32767248)),
+      simde_mm_set_epi8(INT8_C(   8), INT8_C(  -4), INT8_C(  -1), INT8_C( -82),
+                        INT8_C(  -4), INT8_C( 112), INT8_C(-111), INT8_C(  17),
+                        INT8_C(  77), INT8_C(   3), INT8_C(  64), INT8_C(-112),
+                        INT8_C(  -4), INT8_C(  58), INT8_C(  -6), INT8_C( -16)) },
+    { simde_mm512_set_epi32(INT32_C(   -1418204), INT32_C(    -122943), INT32_C(     799821), INT32_C(        -51),
+                            INT32_C(     207931), INT32_C(  -11778782), INT32_C(      66993), INT32_C(     -15812),
+                            INT32_C(   55345677), INT32_C( -194873886), INT32_C(      -3955), INT32_C(        -22),
+                            INT32_C(       1761), INT32_C(        751), INT32_C(         19), INT32_C(         -4)),
+      simde_mm_set_epi8(INT8_C(  36), INT8_C( -63), INT8_C(  77), INT8_C( -51),
+                        INT8_C(  59), INT8_C(  34), INT8_C( -79), INT8_C(  60),
+                        INT8_C(  13), INT8_C( -30), INT8_C(-115), INT8_C( -22),
+                        INT8_C( -31), INT8_C( -17), INT8_C(  19), INT8_C(  -4)) },
+    { simde_mm512_set_epi32(INT32_C(        -17), INT32_C(        -26), INT32_C(     854285), INT32_C(         51),
+                            INT32_C(  -60746537), INT32_C(    3687234), INT32_C(   52848365), INT32_C(   26958727),
+                            INT32_C(          2), INT32_C(        104), INT32_C(    4725058), INT32_C(     -56297),
+                            INT32_C(       5336), INT32_C(     443041), INT32_C(        -35), INT32_C(     229612)),
+      simde_mm_set_epi8(INT8_C( -17), INT8_C( -26), INT8_C(  13), INT8_C(  51),
+                        INT8_C( -41), INT8_C(  66), INT8_C( -19), INT8_C(-121),
+                        INT8_C(   2), INT8_C( 104), INT8_C(  66), INT8_C(  23),
+                        INT8_C( -40), INT8_C( -95), INT8_C( -35), INT8_C( -20)) },
+    { simde_mm512_set_epi32(INT32_C(        -27), INT32_C(  127397292), INT32_C(   29325489), INT32_C(        691),
+                            INT32_C(       -978), INT32_C(    -559751), INT32_C(   -3037707), INT32_C(     189833),
+                            INT32_C(         11), INT32_C(   -4085970), INT32_C(      -3499), INT32_C(     -16323),
+                            INT32_C(     732682), INT32_C(     108115), INT32_C(   29565452), INT32_C( -145574324)),
+      simde_mm_set_epi8(INT8_C( -27), INT8_C( -84), INT8_C( -79), INT8_C( -77),
+                        INT8_C(  46), INT8_C( 121), INT8_C( -11), INT8_C(-119),
+                        INT8_C(  11), INT8_C(  46), INT8_C(  85), INT8_C(  61),
+                        INT8_C(  10), INT8_C(  83), INT8_C(  12), INT8_C(  76)) },
+    { simde_mm512_set_epi32(INT32_C(        -14), INT32_C(    6208981), INT32_C(  133763173), INT32_C(  -30227251),
+                            INT32_C(  -17898651), INT32_C( -197203605), INT32_C(         -4), INT32_C(         13),
+                            INT32_C(   -1312564), INT32_C(         -3), INT32_C(    5632807), INT32_C(       2549),
+                            INT32_C(         -3), INT32_C(      -2772), INT32_C(      -1504), INT32_C(          1)),
+      simde_mm_set_epi8(INT8_C( -14), INT8_C( -43), INT8_C( 101), INT8_C( -51),
+                        INT8_C( 101), INT8_C( 107), INT8_C(  -4), INT8_C(  13),
+                        INT8_C( -52), INT8_C(  -3), INT8_C(  39), INT8_C( -11),
+                        INT8_C(  -3), INT8_C(  44), INT8_C(  32), INT8_C(   1)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm512_cvtepi32_epi8(test_vec[i].a);
+    simde_assert_m128i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_cvtepi64_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i a;
+    simde__m128i r;
+  } test_vec[8] = {
+    { simde_mm512_set_epi64(INT64_C(              273955), INT64_C(               -4013),
+                            INT64_C(            -7033556), INT64_C(      -1383025729160),
+                            INT64_C(          -218214744), INT64_C(   -9402863842296753),
+                            INT64_C(                   0), INT64_C(          -240066712)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(  35), INT8_C(  83), INT8_C(  44), INT8_C( 120),
+                        INT8_C( -88), INT8_C(  79), INT8_C(   0), INT8_C( 104)) },
+    { simde_mm512_set_epi64(INT64_C(               50833), INT64_C(           484208564),
+                            INT64_C(                -124), INT64_C(         -8907018595),
+                            INT64_C(         -1053135968), INT64_C(    2128258677497261),
+                            INT64_C(                   3), INT64_C(  263107913893504060)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-111), INT8_C( -76), INT8_C(-124), INT8_C( -99),
+                        INT8_C( -96), INT8_C( -83), INT8_C(   3), INT8_C(  60)) },
+    { simde_mm512_set_epi64(INT64_C(    6119961081599912), INT64_C(                 949),
+                            INT64_C(  761714638418543261), INT64_C(         -7281666562),
+                            INT64_C(         -3399190417), INT64_C(                 231),
+                            INT64_C(-1217801394263696454), INT64_C(                  43)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C( -88), INT8_C( -75), INT8_C( -99), INT8_C(  -2),
+                        INT8_C( 111), INT8_C( -25), INT8_C( -70), INT8_C(  43)) },
+    { simde_mm512_set_epi64(INT64_C(     -74999030828832), INT64_C(       3805264232880),
+                            INT64_C(              321542), INT64_C(        -18067967511),
+                            INT64_C(      21602392348127), INT64_C(          -482358781),
+                            INT64_C(              -12189), INT64_C(   16807900958735709)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C( -32), INT8_C( -80), INT8_C(   6), INT8_C( -23),
+                        INT8_C( -33), INT8_C(   3), INT8_C(  99), INT8_C(  93)) },
+    { simde_mm512_set_epi64(INT64_C(               11997), INT64_C(                 192),
+                            INT64_C(        -32039837063), INT64_C(               39788),
+                            INT64_C(           684970521), INT64_C(     361409660761858),
+                            INT64_C(   27655177518327113), INT64_C(         -7050752136)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C( -35), INT8_C( -64), INT8_C( 121), INT8_C( 108),
+                        INT8_C(  25), INT8_C(   2), INT8_C(  73), INT8_C( 120)) },
+    { simde_mm512_set_epi64(INT64_C(    -131967536383739), INT64_C(                 -32),
+                            INT64_C(                 -54), INT64_C(                -245),
+                            INT64_C(              -42658), INT64_C(                   3),
+                            INT64_C(         26934708458), INT64_C(               -6255)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   5), INT8_C( -32), INT8_C( -54), INT8_C(  11),
+                        INT8_C(  94), INT8_C(   3), INT8_C( -22), INT8_C(-111)) },
+    { simde_mm512_set_epi64(INT64_C(                   0), INT64_C(-4069220464223700234),
+                            INT64_C(                   0), INT64_C(-8071151262900075656),
+                            INT64_C(                  -1), INT64_C(          4132460747),
+                            INT64_C(                 -27), INT64_C(            -7116923)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C( -10), INT8_C(   0), INT8_C( 120),
+                        INT8_C(  -1), INT8_C( -53), INT8_C( -27), INT8_C(-123)) },
+    { simde_mm512_set_epi64(INT64_C(         54634445793), INT64_C(         63569905620),
+                            INT64_C(            20981054), INT64_C(             2614918),
+                            INT64_C(         11917164823), INT64_C(   15401551242937960),
+                            INT64_C(                  -1), INT64_C(        366397165244)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C( -31), INT8_C( -44), INT8_C(  62), INT8_C(-122),
+                        INT8_C(  23), INT8_C( 104), INT8_C(  -1), INT8_C( -68)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm512_cvtepi64_epi8(test_vec[i].a);
+    simde_assert_m128i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 #endif /* defined(SIMDE_avx512f_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
 
 HEDLEY_DIAGNOSTIC_PUSH
@@ -2246,6 +2578,13 @@ static MunitTest test_suite_tests[] = {
 
   SIMDE_TESTS_DEFINE_TEST(mm512_mask_cmpeq_epi32_mask),
 
+  SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi8_epi32),
+  SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi8_epi64),
+
+  SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi32_epi8),
+
+  SIMDE_TESTS_DEFINE_TEST(mm512_cvtepi64_epi8),
+
 #endif /* defined(SIMDE_AVX512f_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
   { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
 };


=====================================
test/x86/skel.c
=====================================
@@ -1204,14 +1204,14 @@ test_simde_mm256_xxx_epu32(const MunitParameter params[], void* data) {
 
     r = simde__m256i_to_private(simde_mm256_xxx_epu32(simde__m256i_from_private(a), simde__m256i_from_private(b)));
 
-    printf("    { simde_x_mm256_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+    printf("    { simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
            a.u32[7], a.u32[6], a.u32[5], a.u32[4], a.u32[3], a.u32[2], a.u32[1], a.u32[0]);
-    printf("      simde_x_mm256_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+    printf("      simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
            b.u32[7], b.u32[6], b.u32[5], b.u32[4], b.u32[3], b.u32[2], b.u32[1], b.u32[0]);
-    printf("      simde_x_mm256_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")) },\n",
+    printf("      simde_x_mm256_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
            r.u32[7], r.u32[6], r.u32[5], r.u32[4], r.u32[3], r.u32[2], r.u32[1], r.u32[0]);
   }
   return MUNIT_FAIL;
@@ -1934,7 +1934,7 @@ test_simde_mm512_xxx_epi64(const MunitParameter params[], void* data) {
     munit_rand_memory(sizeof(a), (uint8_t*) &a);
     munit_rand_memory(sizeof(b), (uint8_t*) &b);
 
-    r = simde__m512i_to_private(simde_mm512_add_epi64(simde__m512i_from_private(a), simde__m512i_from_private(b)));
+    r = simde__m512i_to_private(simde_mm512_xxx_epi64(simde__m512i_from_private(a), simde__m512i_from_private(b)));
 
     printf("    { simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
@@ -2161,22 +2161,22 @@ test_simde_mm512_xxx_epu32(const MunitParameter params[], void* data) {
 
     r = simde__m512i_to_private(simde_mm512_xxx_epu32(simde__m512i_from_private(a), simde__m512i_from_private(b)));
 
-    printf("    { simde_x_mm512_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+    printf("    { simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
            a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
            a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
-    printf("      simde_x_mm512_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")),\n",
+    printf("      simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
            b.u32[15], b.u32[14], b.u32[13], b.u32[12], b.u32[11], b.u32[10], b.u32[ 9], b.u32[ 8],
            b.u32[ 7], b.u32[ 6], b.u32[ 5], b.u32[ 4], b.u32[ 3], b.u32[ 2], b.u32[ 1], b.u32[ 0]);
-    printf("      simde_x_mm512_set_epu32(UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "),\n"
-           "                              UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 "), UINT32_C(%11" PRId32 ")) },\n",
+    printf("      simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
            r.u32[15], r.u32[14], r.u32[13], r.u32[12], r.u32[11], r.u32[10], r.u32[ 9], r.u32[ 8],
            r.u32[ 7], r.u32[ 6], r.u32[ 5], r.u32[ 4], r.u32[ 3], r.u32[ 2], r.u32[ 1], r.u32[ 0]);
   }


=====================================
test/x86/svml.c
=====================================
@@ -1374,54 +1374,54 @@ test_simde_mm256_div_epu32(const MunitParameter params[], void* data) {
     simde__m256i b;
     simde__m256i r;
   } test_vec[8] = {
-    { simde_x_mm256_set_epu32(UINT32_C(-1508907154), UINT32_C( -957483036), UINT32_C(  212354336), UINT32_C( -857281839),
-                              UINT32_C(-1973841665), UINT32_C( -629468237), UINT32_C( 1276969649), UINT32_C(-1275754054)),
-      simde_x_mm256_set_epu32(UINT32_C(     451417), UINT32_C(  462783328), UINT32_C(    8576290), UINT32_C(   64270331),
-                              UINT32_C(      37146), UINT32_C(     620145), UINT32_C(   21213860), UINT32_C(          1)),
-      simde_x_mm256_set_epu32(UINT32_C(       6171), UINT32_C(          7), UINT32_C(         24), UINT32_C(         53),
-                              UINT32_C(      62486), UINT32_C(       5910), UINT32_C(         60), UINT32_C(-1275754054)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 2104697030), UINT32_C( 1350155206), UINT32_C( -106916491), UINT32_C( 1455998635),
-                              UINT32_C( 2131081108), UINT32_C( 1971786408), UINT32_C( -939494414), UINT32_C(-1613886259)),
-      simde_x_mm256_set_epu32(UINT32_C(        231), UINT32_C(   73705552), UINT32_C(     130241), UINT32_C(       7832),
-                              UINT32_C(   16162638), UINT32_C(  109685672), UINT32_C(          8), UINT32_C(  334915670)),
-      simde_x_mm256_set_epu32(UINT32_C(    9111242), UINT32_C(         18), UINT32_C(      32156), UINT32_C(     185903),
-                              UINT32_C(        131), UINT32_C(         17), UINT32_C(  419434110), UINT32_C(          8)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 1264216498), UINT32_C( 1299581643), UINT32_C( 1889525960), UINT32_C(-2008114472),
-                              UINT32_C(  423719032), UINT32_C( 1818267804), UINT32_C(  762708763), UINT32_C( 1501733683)),
-      simde_x_mm256_set_epu32(UINT32_C(         77), UINT32_C(        824), UINT32_C(         60), UINT32_C(    3129809),
-                              UINT32_C(          5), UINT32_C(        468), UINT32_C(        236), UINT32_C(    7848050)),
-      simde_x_mm256_set_epu32(UINT32_C(   16418396), UINT32_C(    1577162), UINT32_C(   31492099), UINT32_C(        730),
-                              UINT32_C(   84743806), UINT32_C(    3885187), UINT32_C(    3231816), UINT32_C(        191)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1361590415), UINT32_C(  542028757), UINT32_C( 1611666138), UINT32_C( -924139483),
-                              UINT32_C( 1692903269), UINT32_C(  733100015), UINT32_C(  366410692), UINT32_C(-1939074859)),
-      simde_x_mm256_set_epu32(UINT32_C(   24336788), UINT32_C(    1642338), UINT32_C(        666), UINT32_C(       2067),
-                              UINT32_C(         85), UINT32_C(    1531095), UINT32_C(      40675), UINT32_C(  263833438)),
-      simde_x_mm256_set_epu32(UINT32_C(        120), UINT32_C(        330), UINT32_C(    2419919), UINT32_C(    1630782),
-                              UINT32_C(   19916509), UINT32_C(        478), UINT32_C(       9008), UINT32_C(          8)) },
-    { simde_x_mm256_set_epu32(UINT32_C( 1625411893), UINT32_C(     116526), UINT32_C( -479225158), UINT32_C( 1269098691),
-                              UINT32_C(  138365868), UINT32_C(-1760846141), UINT32_C(-1763002020), UINT32_C(  350477953)),
-      simde_x_mm256_set_epu32(UINT32_C(     121261), UINT32_C(  100533765), UINT32_C(        239), UINT32_C(  273093756),
-                              UINT32_C(          2), UINT32_C(  123647188), UINT32_C(     137667), UINT32_C(        690)),
-      simde_x_mm256_set_epu32(UINT32_C(      13404), UINT32_C(          0), UINT32_C(   15965448), UINT32_C(          4),
-                              UINT32_C(   69182934), UINT32_C(         20), UINT32_C(      18391), UINT32_C(     507939)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1111689783), UINT32_C(  410237776), UINT32_C(    6985125), UINT32_C(-2092161126),
-                              UINT32_C(-1515805182), UINT32_C(  269424680), UINT32_C( -261060991), UINT32_C( -409004229)),
-      simde_x_mm256_set_epu32(UINT32_C(        180), UINT32_C(        114), UINT32_C(      14804), UINT32_C(         11),
-                              UINT32_C(        178), UINT32_C(          1), UINT32_C(    8684605), UINT32_C(    1201514)),
-      simde_x_mm256_set_epu32(UINT32_C(   17684875), UINT32_C(    3598576), UINT32_C(        471), UINT32_C(  200255106),
-                              UINT32_C(   15613270), UINT32_C(  269424680), UINT32_C(        464), UINT32_C(       3234)) },
-    { simde_x_mm256_set_epu32(UINT32_C( -856773668), UINT32_C(  524035790), UINT32_C( 1177944500), UINT32_C( -122549241),
-                              UINT32_C(  860434944), UINT32_C( 1217684690), UINT32_C( -195086485), UINT32_C(-1324434909)),
-      simde_x_mm256_set_epu32(UINT32_C(          1), UINT32_C(          5), UINT32_C(          7), UINT32_C(   10503049),
-                              UINT32_C(     837677), UINT32_C(     272262), UINT32_C(      72522), UINT32_C(   54731225)),
-      simde_x_mm256_set_epu32(UINT32_C( -856773668), UINT32_C(  104807158), UINT32_C(  168277785), UINT32_C(        397),
-                              UINT32_C(       1027), UINT32_C(       4472), UINT32_C(      56532), UINT32_C(         54)) },
-    { simde_x_mm256_set_epu32(UINT32_C(-1829840745), UINT32_C( -322582834), UINT32_C( 1754306261), UINT32_C( -847843135),
-                              UINT32_C( -659439141), UINT32_C(-1948366208), UINT32_C( -120901755), UINT32_C( -755292688)),
-      simde_x_mm256_set_epu32(UINT32_C(   24846326), UINT32_C(     572777), UINT32_C(   39081742), UINT32_C(    3635908),
-                              UINT32_C(       1180), UINT32_C(     359797), UINT32_C(  107711383), UINT32_C(          2)),
-      simde_x_mm256_set_epu32(UINT32_C(         99), UINT32_C(       6935), UINT32_C(         44), UINT32_C(        948),
-                              UINT32_C(    3080956), UINT32_C(       6522), UINT32_C(         38), UINT32_C( 1769837304)) }
+    { simde_x_mm256_set_epu32(UINT32_C( 621216267), UINT32_C(2973447507), UINT32_C(1814279233), UINT32_C(3673557536),
+                              UINT32_C(4015780858), UINT32_C(1070914538), UINT32_C(2707640519), UINT32_C(3041291274)),
+      simde_x_mm256_set_epu32(UINT32_C(    122731), UINT32_C(  51630147), UINT32_C(    152670), UINT32_C(   7731229),
+                              UINT32_C(    711400), UINT32_C(   1744981), UINT32_C( 164943127), UINT32_C(    169494)),
+      simde_x_mm256_set_epu32(UINT32_C(      5061), UINT32_C(        57), UINT32_C(     11883), UINT32_C(       475),
+                              UINT32_C(      5644), UINT32_C(       613), UINT32_C(        16), UINT32_C(     17943)) },
+    { simde_x_mm256_set_epu32(UINT32_C(1084014678), UINT32_C(1666523830), UINT32_C(3454667769), UINT32_C(4029614313),
+                              UINT32_C(3425016021), UINT32_C(2449839571), UINT32_C(1601532569), UINT32_C(1519388398)),
+      simde_x_mm256_set_epu32(UINT32_C(    130157), UINT32_C(   5585515), UINT32_C(  62691231), UINT32_C(     37123),
+                              UINT32_C(   2515600), UINT32_C( 106484982), UINT32_C(4168501606), UINT32_C(   2781814)),
+      simde_x_mm256_set_epu32(UINT32_C(      8328), UINT32_C(       298), UINT32_C(        55), UINT32_C(    108547),
+                              UINT32_C(      1361), UINT32_C(        23), UINT32_C(         0), UINT32_C(       546)) },
+    { simde_x_mm256_set_epu32(UINT32_C(2187853776), UINT32_C( 131263503), UINT32_C(  20338031), UINT32_C(3062800456),
+                              UINT32_C(1802896354), UINT32_C(  22231847), UINT32_C(3438214155), UINT32_C(1776513196)),
+      simde_x_mm256_set_epu32(UINT32_C(  28353115), UINT32_C(  92496104), UINT32_C(  15335526), UINT32_C(  99105532),
+                              UINT32_C(   5905009), UINT32_C(     27824), UINT32_C(     28986), UINT32_C(  12459911)),
+      simde_x_mm256_set_epu32(UINT32_C(        77), UINT32_C(         1), UINT32_C(         1), UINT32_C(        30),
+                              UINT32_C(       305), UINT32_C(       799), UINT32_C(    118616), UINT32_C(       142)) },
+    { simde_x_mm256_set_epu32(UINT32_C( 524596333), UINT32_C(3965897825), UINT32_C(1593754725), UINT32_C( 694203496),
+                              UINT32_C(1917650066), UINT32_C(2692610113), UINT32_C(1620259645), UINT32_C( 607116294)),
+      simde_x_mm256_set_epu32(UINT32_C(  29757558), UINT32_C(     80117), UINT32_C( 412054571), UINT32_C(    878110),
+                              UINT32_C(4124070325), UINT32_C(   8250706), UINT32_C(   7930575), UINT32_C(     51813)),
+      simde_x_mm256_set_epu32(UINT32_C(        17), UINT32_C(     49501), UINT32_C(         3), UINT32_C(       790),
+                              UINT32_C(         0), UINT32_C(       326), UINT32_C(       204), UINT32_C(     11717)) },
+    { simde_x_mm256_set_epu32(UINT32_C( 625862951), UINT32_C( 793130310), UINT32_C(2489185635), UINT32_C(2468815203),
+                              UINT32_C(3079066921), UINT32_C( 802958712), UINT32_C(1537818066), UINT32_C(1678295724)),
+      simde_x_mm256_set_epu32(UINT32_C(   8259237), UINT32_C(    229091), UINT32_C(   7899398), UINT32_C(  41009690),
+                              UINT32_C(  26030333), UINT32_C(    228627), UINT32_C(1200021710), UINT32_C(    186204)),
+      simde_x_mm256_set_epu32(UINT32_C(        75), UINT32_C(      3462), UINT32_C(       315), UINT32_C(        60),
+                              UINT32_C(       118), UINT32_C(      3512), UINT32_C(         1), UINT32_C(      9013)) },
+    { simde_x_mm256_set_epu32(UINT32_C(3334078645), UINT32_C(2226952893), UINT32_C(1901933944), UINT32_C(3456551705),
+                              UINT32_C(3394846076), UINT32_C(2592342753), UINT32_C(1822000161), UINT32_C(3060682219)),
+      simde_x_mm256_set_epu32(UINT32_C(     55529), UINT32_C(     95077), UINT32_C(  61849330), UINT32_C(     77269),
+                              UINT32_C(    181901), UINT32_C(     66287), UINT32_C(     46407), UINT32_C(      1962)),
+      simde_x_mm256_set_epu32(UINT32_C(     60042), UINT32_C(     23422), UINT32_C(        30), UINT32_C(     44734),
+                              UINT32_C(     18663), UINT32_C(     39107), UINT32_C(     39261), UINT32_C(   1559980)) },
+    { simde_x_mm256_set_epu32(UINT32_C(2418478797), UINT32_C(3856569345), UINT32_C(2562700829), UINT32_C(2670510577),
+                              UINT32_C(3958231909), UINT32_C(3386864730), UINT32_C(2249491002), UINT32_C( 367242130)),
+      simde_x_mm256_set_epu32(UINT32_C( 106591767), UINT32_C( 591565864), UINT32_C(    241208), UINT32_C(    384474),
+                              UINT32_C(  63569588), UINT32_C(1007016971), UINT32_C( 701090048), UINT32_C(   4482965)),
+      simde_x_mm256_set_epu32(UINT32_C(        22), UINT32_C(         6), UINT32_C(     10624), UINT32_C(      6945),
+                              UINT32_C(        62), UINT32_C(         3), UINT32_C(         3), UINT32_C(        81)) },
+    { simde_x_mm256_set_epu32(UINT32_C(3497551851), UINT32_C(3538232808), UINT32_C(3581222707), UINT32_C(2092274030),
+                              UINT32_C(1202922035), UINT32_C(3381143079), UINT32_C(1645890362), UINT32_C(2497764821)),
+      simde_x_mm256_set_epu32(UINT32_C(   7255461), UINT32_C(    387871), UINT32_C( 216379987), UINT32_C(   1108325),
+                              UINT32_C(   9779926), UINT32_C( 265173482), UINT32_C(    305369), UINT32_C(1628979148)),
+      simde_x_mm256_set_epu32(UINT32_C(       482), UINT32_C(      9122), UINT32_C(        16), UINT32_C(      1887),
+                              UINT32_C(       122), UINT32_C(        12), UINT32_C(      5389), UINT32_C(         1)) }
   };
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {



View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/91b3e7158b82a5197616a3ff3b3dca8766494758

-- 
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/91b3e7158b82a5197616a3ff3b3dca8766494758
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200403/f18e8096/attachment-0001.html>


More information about the debian-med-commit mailing list