[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200421

Michael R. Crusoe gitlab at salsa.debian.org
Tue Apr 21 17:32:49 BST 2020



Michael R. Crusoe pushed to branch upstream at Debian Med / simde


Commits:
54ff0a0f by Michael R. Crusoe at 2020-04-21T18:26:07+02:00
New upstream version 0.0.0.git.20200421
- - - - -


13 changed files:

- .drone.star
- simde/simde-common.h
- simde/x86/avx.h
- simde/x86/avx2.h
- simde/x86/avx512bw.h
- simde/x86/avx512f.h
- simde/x86/avx512vl.h
- simde/x86/fma.h
- simde/x86/sse.h
- simde/x86/sse2.h
- test/x86/avx.c
- test/x86/avx512f.c
- test/x86/avx512vl.c


Changes:

=====================================
.drone.star
=====================================
@@ -6,8 +6,8 @@ def get_test_commands():
   return [
     "mkdir -p build",
     "cd build",
-    'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
-    "ninja -v",
+    'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson .. || (cat meson-logs/meson-log.txt; false)',
+    '"$(command -v ninja || command -v ninja-build)" -v',
     "./test/run-tests",
   ]
 
@@ -24,6 +24,12 @@ def get_dnf_install_commands(extra_pkgs = []):
     "pip3 install meson",
   ]
 
+def get_yum_install_commands(extra_pkgs = []):
+  return [
+    "yum install -y epel-release",
+    "yum install -y %s meson ninja-build git-core" % " ".join(extra_pkgs),
+  ]
+
 def get_default_job():
   return {
     "kind": "pipeline",
@@ -250,6 +256,24 @@ def get_jobs():
     }
   }
 
+  job_centos7_clang3 = {
+    "name": "centos7 clang3",
+    "steps": [
+      {
+        "image": "centos:7",
+        "environment": {
+          "CC": "clang",
+          "CXX": "clang++",
+        },
+        "failure": "ignore"
+      }
+    ],
+    "custom": {
+      # gcc, gcc-c++ are necessary to build on clang.
+      "install": get_yum_install_commands(["clang", "gcc", "gcc-c++"])
+    }
+  }
+
   return [
     job_clang9_armv7,
     job_clang9_armv8,
@@ -261,6 +285,7 @@ def get_jobs():
     job_gcc7_armv8,
     # job_fedora,
     job_fedora_clang_arm64_flags,
+    job_centos7_clang3,
   ]
 
 def main(ctx):


=====================================
simde/simde-common.h
=====================================
@@ -360,6 +360,32 @@ HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating
 #  define SIMDE_CONVERT_FTOI(T,v) ((T) (v))
 #endif
 
+/* This behaves like reinterpret_cast<to>(value), except that it will
+   attempt te verify that value is of type "from" or "to". */
+#if defined(__cplusplus)
+  template <typename To, typename From> class SIMDeCheckedReinterpretCastImpl {
+    public:
+      static To convert (To value) { return value; };
+      static To convert (From value) { return reinterpret_cast<To>(value); };
+  };
+  #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) (SIMDeCheckedReinterpretCastImpl<to, from>::convert(value))
+#elif \
+    HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \
+    HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
+    HEDLEY_ARM_VERSION_CHECK(5,0,4) || \
+    HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
+    HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+    HEDLEY_IBM_VERSION_CHECK(16,1,0)
+  #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
+    (__extension__({ \
+      HEDLEY_STATIC_ASSERT(__builtin_types_compatible_p(from, __typeof__(value)) || \
+		    __builtin_types_compatible_p(to, __typeof__(value)), \
+		    "Type of `" #value "` must be either `" #to "` or `" #from "`"); \
+      HEDLEY_REINTERPRET_CAST(to, value); \
+    }))
+#else
+  #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value)
+#endif
 
 #if HEDLEY_HAS_WARNING("-Wfloat-equal")
 #  define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"")


=====================================
simde/x86/avx.h
=====================================
@@ -1490,7 +1490,7 @@ simde_mm256_broadcast_ps (simde__m128 const * mem_addr) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr))
+#  define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -4315,7 +4315,7 @@ simde_mm256_setr_epi32 (
 
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m256i
-simde_mm256_setr_epi64 (int64_t  e3, int64_t  e2, int64_t  e1, int64_t  e0) {
+simde_mm256_setr_epi64x (int64_t  e3, int64_t  e2, int64_t  e1, int64_t  e0) {
 #if defined(SIMDE_AVX_NATIVE)
   return _mm256_setr_epi64x(e3, e2, e1, e0);
 #else
@@ -4323,8 +4323,8 @@ simde_mm256_setr_epi64 (int64_t  e3, int64_t  e2, int64_t  e1, int64_t  e0) {
 #endif
 }
 #if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-#  define _mm256_setr_epi64(e3, e2, e1, e0) \
-    simde_mm256_setr_epi64(e3, e2, e1, e0)
+#  define _mm256_setr_epi64x(e3, e2, e1, e0) \
+    simde_mm256_setr_epi64x(e3, e2, e1, e0)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/avx2.h
=====================================
@@ -1266,7 +1266,7 @@ simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) {
 #endif
 }
 #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-#  define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
+#  define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -2119,7 +2119,7 @@ simde_mm256_srli_si256 (simde__m256i a, const int imm8) {
        simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
 #endif
 #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-#  define _mm256_srli_si256(a, imm8) simde_mm_srli_si256(a, imm8)
+#  define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/avx512bw.h
=====================================
@@ -380,7 +380,7 @@ simde_mm512_cvtsepi16_epi8 (simde__m512i a) {
     return simde__m256i_from_private(r_);
   #endif
 }
-#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
   #define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a)
 #endif
 


=====================================
simde/x86/avx512f.h
=====================================
@@ -2140,6 +2140,58 @@ simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) {
   #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_blend_epi32(k, a, b);
+#else
+  return simde_mm512_mask_mov_epi32(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_blend_epi64(k, a, b);
+#else
+  return simde_mm512_mask_mov_epi64(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_blend_ps(k, a, b);
+#else
+  return simde_mm512_mask_mov_ps(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_blend_pd(k, a, b);
+#else
+  return simde_mm512_mask_mov_pd(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_broadcast_i32x4 (simde__m128i a) {
@@ -2166,6 +2218,48 @@ simde_mm512_broadcast_i32x4 (simde__m128i a) {
   #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_broadcastd_epi32 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_broadcastd_epi32(a);
+#else
+  simde__m512i_private r_;
+  simde__m128i_private a_= simde__m128i_to_private(a);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+    r_.i32[i] = a_.i32[0];
+  }
+
+  return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_broadcastq_epi64 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_broadcastq_epi64(a);
+#else
+  simde__m512i_private r_;
+  simde__m128i_private a_= simde__m128i_to_private(a);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+    r_.i64[i] = a_.i64[0];
+  }
+
+  return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__mmask16
 simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
@@ -3321,7 +3415,7 @@ simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512
   #endif
 }
 #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
-  #define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
+  #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3344,7 +3438,7 @@ simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i
   #endif
 }
 #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
-  #define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
+  #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/avx512vl.h
=====================================
@@ -53,6 +53,98 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
 #endif
 SIMDE__BEGIN_DECLS
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) {
+  #if defined(SIMDE_AVX512VL_NATIVE)
+    return _mm_mask_mov_epi8(src, k, a);
+  #else
+    simde__m128i_private
+      src_ = simde__m128i_to_private(src),
+      a_ = simde__m128i_to_private(a),
+      r_;
+
+    SIMDE__VECTORIZE
+    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+      r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i];
+    }
+
+    return simde__m128i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+  #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) {
+  #if defined(SIMDE_AVX512VL_NATIVE)
+    return _mm_mask_mov_epi16(src, k, a);
+  #else
+    simde__m128i_private
+      src_ = simde__m128i_to_private(src),
+      a_ = simde__m128i_to_private(a),
+      r_;
+
+    SIMDE__VECTORIZE
+    for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+      r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i];
+    }
+
+    return simde__m128i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+  #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) {
+  #if defined(SIMDE_AVX512VL_NATIVE)
+    return _mm_mask_mov_epi32(src, k, a);
+  #else
+    simde__m128i_private
+      src_ = simde__m128i_to_private(src),
+      a_ = simde__m128i_to_private(a),
+      r_;
+
+    SIMDE__VECTORIZE
+    for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+      r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i];
+    }
+
+    return simde__m128i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+  #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) {
+  #if defined(SIMDE_AVX512VL_NATIVE)
+    return _mm_mask_mov_epi64(src, k, a);
+  #else
+    simde__m128i_private
+      src_ = simde__m128i_to_private(src),
+      a_ = simde__m128i_to_private(a),
+      r_;
+
+    SIMDE__VECTORIZE
+    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+      r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
+    }
+
+    return simde__m128i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+  #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m128i
 simde_mm_cvtsepi16_epi8 (simde__m128i a) {
@@ -105,6 +197,58 @@ simde_mm256_cvtsepi16_epi8 (simde__m256i a) {
   #define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_cvtsepi32_epi8 (simde__m128i a) {
+  #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+    return _mm_cvtsepi32_epi8(a);
+  #else
+    simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());;
+    simde__m128i_private a_ = simde__m128i_to_private(a);
+
+    SIMDE__VECTORIZE
+    for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
+      r_.i8[i] =
+          (a_.i32[i] < INT8_MIN)
+            ? (INT8_MIN)
+            : ((a_.i32[i] > INT8_MAX)
+              ? (INT8_MAX)
+              : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]));
+    }
+
+    return simde__m128i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+  #define _mm_cvtsepi32_epi8(a) simde_mm_cvtsepi32_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_cvtsepi32_epi16 (simde__m128i a) {
+  #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+    return _mm_cvtsepi32_epi16(a);
+  #else
+    simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());;
+    simde__m128i_private a_ = simde__m128i_to_private(a);
+
+    SIMDE__VECTORIZE
+    for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
+      r_.i16[i] =
+          (a_.i32[i] < INT16_MIN)
+            ? (INT16_MIN)
+            : ((a_.i32[i] > INT16_MAX)
+              ? (INT16_MAX)
+              : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]));
+    }
+
+    return simde__m128i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+  #define _mm_cvtsepi32_epi16(a) simde_mm_cvtsepi32_epi16(a)
+#endif
+
 SIMDE__END_DECLS
 
 HEDLEY_DIAGNOSTIC_POP


=====================================
simde/x86/fma.h
=====================================
@@ -87,7 +87,7 @@ simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)
+#  define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -113,7 +113,7 @@ simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)
+#  define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -165,7 +165,7 @@ simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)
+#  define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -191,7 +191,7 @@ simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)
+#  define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -217,7 +217,7 @@ simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)
+#  define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -243,7 +243,7 @@ simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)
+#  define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -319,7 +319,7 @@ simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)
+#  define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -369,7 +369,7 @@ simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)
+#  define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -417,7 +417,7 @@ simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)
+#  define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -465,7 +465,7 @@ simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)
+#  define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -557,7 +557,7 @@ simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)
+#  define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -605,7 +605,7 @@ simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
 #endif
 }
 #if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-#  define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)
+#  define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/sse.h
=====================================
@@ -2080,7 +2080,7 @@ simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {
 }
 #define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64(a, (mask), mem_addr)
+#  define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, mask), (mem_addr))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -2991,7 +2991,7 @@ simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_ps(mem_addr, a) simde_mm_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3011,7 +3011,7 @@ simde_mm_store_ps1 (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3030,7 +3030,7 @@ simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store_ss(mem_addr, a) simde_mm_store_ss(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3045,7 +3045,7 @@ simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3104,7 +3104,7 @@ simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3123,7 +3123,7 @@ simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -3498,7 +3498,7 @@ simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
 #endif
 }
 #if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-#  define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+#  define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
simde/x86/sse2.h
=====================================
@@ -2902,7 +2902,7 @@ simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HED
 #endif
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128(a, (mask), HEDLEY_REINTERPRET_CAST(int8_t*, mem_addr))
+#  define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128(a, (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, mem_addr))
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES
@@ -5060,7 +5060,7 @@ simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) {
   *mem_addr = a;
 }
 #if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-#  define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(mem_addr, a)
+#  define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a)
 #endif
 
 SIMDE__FUNCTION_ATTRIBUTES


=====================================
test/x86/avx.c
=====================================
@@ -10318,7 +10318,7 @@ test_simde_mm256_setr_epi32(const MunitParameter params[], void* data) {
 }
 
 static MunitResult
-test_simde_mm256_setr_epi64(const MunitParameter params[], void* data) {
+test_simde_mm256_setr_epi64x(const MunitParameter params[], void* data) {
   (void) params;
   (void) data;
 
@@ -10361,7 +10361,7 @@ test_simde_mm256_setr_epi64(const MunitParameter params[], void* data) {
   };
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
-    simde__m256i r = simde_mm256_setr_epi64(
+    simde__m256i r = simde_mm256_setr_epi64x(
         test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]);
     simde_assert_m256i_i64(r, ==, test_vec[i].r);
   }
@@ -13669,7 +13669,7 @@ static MunitTest test_suite_tests[] = {
   SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi8),
   SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi16),
   SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi32),
-  SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi64),
+  SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi64x),
   SIMDE_TESTS_DEFINE_TEST(mm256_setr_ps),
   SIMDE_TESTS_DEFINE_TEST(mm256_setr_pd),
   SIMDE_TESTS_DEFINE_TEST(mm256_setr_m128),


=====================================
test/x86/avx512f.c
=====================================
@@ -2455,6 +2455,506 @@ test_simde_mm512_andnot_si512(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm512_mask_blend_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__mmask16 k;
+    simde__m512i a;
+    simde__m512i b;
+    simde__m512i r;
+  } test_vec[8] = {
+    { UINT16_C(12684),
+      simde_mm512_set_epi32(INT32_C( 2139597246), INT32_C(-2035467821), INT32_C(-1381016544), INT32_C( -293624181),
+                            INT32_C( 1610331725), INT32_C(  134146865), INT32_C(  837546022), INT32_C(-1561535917),
+                            INT32_C(  522767958), INT32_C(  350987310), INT32_C( 1200416302), INT32_C( 1035712103),
+                            INT32_C( -697441354), INT32_C(-1209277994), INT32_C( 1905768221), INT32_C(  941422574)),
+      simde_mm512_set_epi32(INT32_C( -854245863), INT32_C(   77819890), INT32_C( -597706880), INT32_C(  -28687979),
+                            INT32_C( 1384494246), INT32_C(-1885694903), INT32_C(  479641666), INT32_C(  436747778),
+                            INT32_C( 1142933685), INT32_C(  -36150185), INT32_C(  234764144), INT32_C( -925516387),
+                            INT32_C( 1528722995), INT32_C( 1957265461), INT32_C( -558613563), INT32_C(  535737103)),
+      simde_mm512_set_epi32(INT32_C( 2139597246), INT32_C(-2035467821), INT32_C( -597706880), INT32_C(  -28687979),
+                            INT32_C( 1610331725), INT32_C(  134146865), INT32_C(  837546022), INT32_C(  436747778),
+                            INT32_C( 1142933685), INT32_C(  350987310), INT32_C( 1200416302), INT32_C( 1035712103),
+                            INT32_C( 1528722995), INT32_C( 1957265461), INT32_C( 1905768221), INT32_C(  941422574)) },
+    { UINT16_C(12889),
+      simde_mm512_set_epi32(INT32_C(-1009854213), INT32_C(   19207470), INT32_C( 2053019824), INT32_C( 1679893080),
+                            INT32_C( -761309092), INT32_C(-1797634461), INT32_C( 1499461014), INT32_C( -349931656),
+                            INT32_C( 1308377490), INT32_C(-1862232386), INT32_C( -706282442), INT32_C( 1752887042),
+                            INT32_C( 1045610342), INT32_C(   88096217), INT32_C(-1144289151), INT32_C( 1410502196)),
+      simde_mm512_set_epi32(INT32_C( 1437842356), INT32_C(-1817562257), INT32_C(  808924311), INT32_C( 1765692072),
+                            INT32_C(-1346910557), INT32_C(  -92284700), INT32_C( 1533217965), INT32_C( 1732689820),
+                            INT32_C(-1401128233), INT32_C( -762168473), INT32_C(   97276971), INT32_C( 2145432631),
+                            INT32_C(-1561525899), INT32_C(-2005427238), INT32_C( -455460474), INT32_C( -933959435)),
+      simde_mm512_set_epi32(INT32_C(-1009854213), INT32_C(   19207470), INT32_C(  808924311), INT32_C( 1765692072),
+                            INT32_C( -761309092), INT32_C(-1797634461), INT32_C( 1533217965), INT32_C( -349931656),
+                            INT32_C( 1308377490), INT32_C( -762168473), INT32_C( -706282442), INT32_C( 2145432631),
+                            INT32_C(-1561525899), INT32_C(   88096217), INT32_C(-1144289151), INT32_C( -933959435)) },
+    { UINT16_C(18390),
+      simde_mm512_set_epi32(INT32_C(  191788932), INT32_C(  410937469), INT32_C(  218604234), INT32_C( -632545043),
+                            INT32_C(  246082482), INT32_C( 2029197195), INT32_C( 1188965621), INT32_C( -844747875),
+                            INT32_C(  989502056), INT32_C( 2119540790), INT32_C(-1766179858), INT32_C(-1109416221),
+                            INT32_C(-1963025204), INT32_C( -932958949), INT32_C(   47867627), INT32_C( -567270366)),
+      simde_mm512_set_epi32(INT32_C(  558500028), INT32_C( 1400907983), INT32_C(-1581715774), INT32_C(  -29022872),
+                            INT32_C( 1773849857), INT32_C( -311975417), INT32_C( 1183760637), INT32_C(-1160252785),
+                            INT32_C( 2107838031), INT32_C( 1909470743), INT32_C(-2018375211), INT32_C(  267812095),
+                            INT32_C( -225335539), INT32_C( -871226308), INT32_C( -872412082), INT32_C( 1435481672)),
+      simde_mm512_set_epi32(INT32_C(  191788932), INT32_C( 1400907983), INT32_C(  218604234), INT32_C( -632545043),
+                            INT32_C(  246082482), INT32_C( -311975417), INT32_C( 1183760637), INT32_C(-1160252785),
+                            INT32_C( 2107838031), INT32_C( 1909470743), INT32_C(-1766179858), INT32_C(  267812095),
+                            INT32_C(-1963025204), INT32_C( -871226308), INT32_C( -872412082), INT32_C( -567270366)) },
+    { UINT16_C(46409),
+      simde_mm512_set_epi32(INT32_C( -239336968), INT32_C( 1154172094), INT32_C( 1382102779), INT32_C(-1946237060),
+                            INT32_C( -995869857), INT32_C( 1050338310), INT32_C(-2047829467), INT32_C( -881484106),
+                            INT32_C(  -67227529), INT32_C( -510303256), INT32_C( -387177060), INT32_C( 2007995362),
+                            INT32_C( 1596393504), INT32_C( 1716443052), INT32_C( 1056333857), INT32_C( -879795312)),
+      simde_mm512_set_epi32(INT32_C(-1321156942), INT32_C(-1763902924), INT32_C( -145319736), INT32_C(  356975558),
+                            INT32_C(  109934631), INT32_C( 1326272066), INT32_C(  723198088), INT32_C(-1812908400),
+                            INT32_C( 1665386649), INT32_C( 1770577849), INT32_C(-2015166919), INT32_C(-1565649496),
+                            INT32_C( 1045296779), INT32_C( 1401153164), INT32_C( -294475079), INT32_C(  378377774)),
+      simde_mm512_set_epi32(INT32_C(-1321156942), INT32_C( 1154172094), INT32_C( -145319736), INT32_C(  356975558),
+                            INT32_C( -995869857), INT32_C( 1326272066), INT32_C(-2047829467), INT32_C(-1812908400),
+                            INT32_C(  -67227529), INT32_C( 1770577849), INT32_C( -387177060), INT32_C( 2007995362),
+                            INT32_C( 1045296779), INT32_C( 1716443052), INT32_C( 1056333857), INT32_C(  378377774)) },
+    { UINT16_C(35033),
+      simde_mm512_set_epi32(INT32_C(  576121858), INT32_C(  -83274089), INT32_C( 1081604364), INT32_C( 1853977291),
+                            INT32_C(-1408149319), INT32_C(-1793071292), INT32_C( -580417531), INT32_C( 1708989591),
+                            INT32_C(-1803428364), INT32_C(-1884594628), INT32_C(-1049896819), INT32_C( 1351777033),
+                            INT32_C( -543435799), INT32_C(   45073785), INT32_C(  310971883), INT32_C(  586295496)),
+      simde_mm512_set_epi32(INT32_C(  274706498), INT32_C( 1339140311), INT32_C(  773365916), INT32_C( -407443831),
+                            INT32_C(  -44487881), INT32_C( -363465063), INT32_C( -778555208), INT32_C( -640038352),
+                            INT32_C( -428291654), INT32_C(-1080717955), INT32_C(  906661653), INT32_C( 1353353955),
+                            INT32_C(-1311936279), INT32_C( 1168846380), INT32_C(  -71806717), INT32_C(  617275327)),
+      simde_mm512_set_epi32(INT32_C(  274706498), INT32_C(  -83274089), INT32_C( 1081604364), INT32_C( 1853977291),
+                            INT32_C(  -44487881), INT32_C(-1793071292), INT32_C( -580417531), INT32_C( 1708989591),
+                            INT32_C( -428291654), INT32_C(-1080717955), INT32_C(-1049896819), INT32_C( 1353353955),
+                            INT32_C(-1311936279), INT32_C(   45073785), INT32_C(  310971883), INT32_C(  617275327)) },
+    { UINT16_C(62826),
+      simde_mm512_set_epi32(INT32_C( -943712419), INT32_C( -981833223), INT32_C( 2020022414), INT32_C(  630972788),
+                            INT32_C( 1615502534), INT32_C(  991949979), INT32_C(  601817641), INT32_C(-2063962607),
+                            INT32_C(-1545145030), INT32_C( 1626575612), INT32_C(-1511315708), INT32_C( 1422623346),
+                            INT32_C( 1496301111), INT32_C(-1751918881), INT32_C(  333195983), INT32_C( 1655699275)),
+      simde_mm512_set_epi32(INT32_C(-1770653828), INT32_C( -674401292), INT32_C(-2023667251), INT32_C( 1038799540),
+                            INT32_C(-1877506849), INT32_C(  791301479), INT32_C(-2115975814), INT32_C( 1430860109),
+                            INT32_C(-2123570597), INT32_C(-2010985064), INT32_C( 1367050649), INT32_C( -268988786),
+                            INT32_C( 1975120887), INT32_C(   83320183), INT32_C( 2120549505), INT32_C(  163195572)),
+      simde_mm512_set_epi32(INT32_C(-1770653828), INT32_C( -674401292), INT32_C(-2023667251), INT32_C( 1038799540),
+                            INT32_C( 1615502534), INT32_C(  791301479), INT32_C(  601817641), INT32_C( 1430860109),
+                            INT32_C(-1545145030), INT32_C(-2010985064), INT32_C( 1367050649), INT32_C( 1422623346),
+                            INT32_C( 1975120887), INT32_C(-1751918881), INT32_C( 2120549505), INT32_C( 1655699275)) },
+    { UINT16_C( 2185),
+      simde_mm512_set_epi32(INT32_C( 1990735603), INT32_C( -226564964), INT32_C( 2030923034), INT32_C( 1100474296),
+                            INT32_C( 1370205128), INT32_C( 1609607637), INT32_C( -586788969), INT32_C( 1072075481),
+                            INT32_C( -723316478), INT32_C(-1331909036), INT32_C(-1524508879), INT32_C( -832646284),
+                            INT32_C(-1636241911), INT32_C(  215718784), INT32_C(  478814832), INT32_C( 1231303010)),
+      simde_mm512_set_epi32(INT32_C(   96871414), INT32_C(  245175863), INT32_C( 1584772139), INT32_C( 1604795180),
+                            INT32_C(-1972440261), INT32_C(-1471456371), INT32_C( 1108051273), INT32_C(   -4141330),
+                            INT32_C(-1064630270), INT32_C( -911670021), INT32_C(-1808660435), INT32_C(-2035294308),
+                            INT32_C(-2014140232), INT32_C(-1194657062), INT32_C(-1732550793), INT32_C(-1643523135)),
+      simde_mm512_set_epi32(INT32_C( 1990735603), INT32_C( -226564964), INT32_C( 2030923034), INT32_C( 1100474296),
+                            INT32_C(-1972440261), INT32_C( 1609607637), INT32_C( -586788969), INT32_C( 1072075481),
+                            INT32_C(-1064630270), INT32_C(-1331909036), INT32_C(-1524508879), INT32_C( -832646284),
+                            INT32_C(-2014140232), INT32_C(  215718784), INT32_C(  478814832), INT32_C(-1643523135)) },
+    { UINT16_C(11244),
+      simde_mm512_set_epi32(INT32_C( 1605800253), INT32_C( 1825375434), INT32_C( -280209407), INT32_C( 1616462798),
+                            INT32_C( 1380939760), INT32_C( 1670822362), INT32_C(-1872387325), INT32_C(  451486273),
+                            INT32_C( -978012107), INT32_C( -718128180), INT32_C(-1625787118), INT32_C( -879101117),
+                            INT32_C(  173600397), INT32_C( 1426384314), INT32_C( -517748272), INT32_C(   76760759)),
+      simde_mm512_set_epi32(INT32_C(  805228357), INT32_C(  980137697), INT32_C( -555168446), INT32_C(-2016549382),
+                            INT32_C(  927860791), INT32_C(-1515288559), INT32_C( -918296563), INT32_C(-1858382028),
+                            INT32_C( 1365882699), INT32_C(  616589376), INT32_C(-1573056329), INT32_C( 1014781400),
+                            INT32_C( -260360112), INT32_C( -953114112), INT32_C( 1191757764), INT32_C( -706360509)),
+      simde_mm512_set_epi32(INT32_C( 1605800253), INT32_C( 1825375434), INT32_C( -555168446), INT32_C( 1616462798),
+                            INT32_C(  927860791), INT32_C( 1670822362), INT32_C( -918296563), INT32_C(-1858382028),
+                            INT32_C( 1365882699), INT32_C(  616589376), INT32_C(-1573056329), INT32_C( -879101117),
+                            INT32_C( -260360112), INT32_C( -953114112), INT32_C( -517748272), INT32_C(   76760759)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_mask_blend_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_blend_epi64(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__mmask8 k;
+    simde__m512i a;
+    simde__m512i b;
+    simde__m512i r;
+  } test_vec[8] = {
+    { UINT8_C(140),
+      simde_mm512_set_epi64(INT64_C(-8742267720341431264), INT64_C(-1261106253099452851),
+                            INT64_C(  576156398873473062), INT64_C(-6706745694521602474),
+                            INT64_C( 1507479018961430062), INT64_C( 4448349614053909430),
+                            INT64_C(-5193809434096716003), INT64_C( 4043379170809950035)),
+      simde_mm512_set_epi64(INT64_C(  334233886225577856), INT64_C( -123213930208840538),
+                            INT64_C(-8098997938139250622), INT64_C( 1875817424253601973),
+                            INT64_C( -155263862084585616), INT64_C(-3975062612548356557),
+                            INT64_C( 8406391148321717189), INT64_C( 2300973338778380734)),
+      simde_mm512_set_epi64(INT64_C(  334233886225577856), INT64_C(-1261106253099452851),
+                            INT64_C(  576156398873473062), INT64_C(-6706745694521602474),
+                            INT64_C( -155263862084585616), INT64_C(-3975062612548356557),
+                            INT64_C(-5193809434096716003), INT64_C( 4043379170809950035)) },
+    { UINT8_C( 25),
+      simde_mm512_set_epi64(INT64_C( 8817653003799568984), INT64_C(-3269797649790122397),
+                            INT64_C( 6440136020702033784), INT64_C( 5619438532805301950),
+                            INT64_C(-3033459988376129790), INT64_C( 4490862223337471449),
+                            INT64_C(-4914684479302103500), INT64_C(  357175739365339737)),
+      simde_mm512_set_epi64(INT64_C( 3474303462450025128), INT64_C(-5784936788749461276),
+                            INT64_C( 6585121019047362460), INT64_C(-6017799934704469145),
+                            INT64_C(  417801411244373047), INT64_C(-6706702665772459046),
+                            INT64_C(-1956187837089650443), INT64_C(-4337290818543610578)),
+      simde_mm512_set_epi64(INT64_C( 8817653003799568984), INT64_C(-3269797649790122397),
+                            INT64_C( 6440136020702033784), INT64_C(-6017799934704469145),
+                            INT64_C(  417801411244373047), INT64_C( 4490862223337471449),
+                            INT64_C(-4914684479302103500), INT64_C(-4337290818543610578)) },
+    { UINT8_C(111),
+      simde_mm512_set_epi64(INT64_C(-2716760272685831246), INT64_C( 8715335590848900341),
+                            INT64_C(-3628164495500993944), INT64_C( 9103358378116791278),
+                            INT64_C(-4764906384514966324), INT64_C(-4007028174417664277),
+                            INT64_C(-2436407666547579589), INT64_C( 1334270375494925236)),
+      simde_mm512_set_epi64(INT64_C( -124652284302144255), INT64_C(-1339924211987201795),
+                            INT64_C(-4983247764560081329), INT64_C( 8201114396130413013),
+                            INT64_C( 1150244193567876877), INT64_C(-3741888496852267954),
+                            INT64_C( 6165346835439187844), INT64_C( 1764962990274618058)),
+      simde_mm512_set_epi64(INT64_C(-2716760272685831246), INT64_C(-1339924211987201795),
+                            INT64_C(-4983247764560081329), INT64_C( 9103358378116791278),
+                            INT64_C( 1150244193567876877), INT64_C(-3741888496852267954),
+                            INT64_C( 6165346835439187844), INT64_C( 1764962990274618058)) },
+    { UINT8_C(194),
+      simde_mm512_set_epi64(INT64_C(-4277228465836858362), INT64_C(-8795360585136628042),
+                            INT64_C( -288740034661227544), INT64_C(-1662912808453434398),
+                            INT64_C( 6856457892943288236), INT64_C( 4536919372887712656),
+                            INT64_C(-1781208167188155063), INT64_C( 2398739356475992271)),
+      simde_mm512_set_epi64(INT64_C(  472165646169099842), INT64_C( 3106112138971788944),
+                            INT64_C( 7152781194420608953), INT64_C(-8655076010356763224),
+                            INT64_C( 4489515481820292748), INT64_C(-1264760833413638610),
+                            INT64_C(-1027944449129626434), INT64_C( 5936086237864445820)),
+      simde_mm512_set_epi64(INT64_C(  472165646169099842), INT64_C( 3106112138971788944),
+                            INT64_C( -288740034661227544), INT64_C(-1662912808453434398),
+                            INT64_C( 6856457892943288236), INT64_C( 4536919372887712656),
+                            INT64_C(-1027944449129626434), INT64_C( 2398739356475992271)) },
+    { UINT8_C(198),
+      simde_mm512_set_epi64(INT64_C(-7701182554821916667), INT64_C( 7340054405040954868),
+                            INT64_C(-8094272290232215411), INT64_C( 5805838151970444265),
+                            INT64_C(  193590432792907243), INT64_C( 2518119983696523684),
+                            INT64_C(-7781788212556415310), INT64_C(-7575905367749125944)),
+      simde_mm512_set_epi64(INT64_C(-1561070555307167560), INT64_C(-2748943786159060550),
+                            INT64_C(-4641648272018338027), INT64_C( 5812610979620286697),
+                            INT64_C( 5020156980371149059), INT64_C( 2651177342668827650),
+                            INT64_C( -357659487777588980), INT64_C( 7962771835258493113)),
+      simde_mm512_set_epi64(INT64_C(-1561070555307167560), INT64_C(-2748943786159060550),
+                            INT64_C(-8094272290232215411), INT64_C( 5805838151970444265),
+                            INT64_C(  193590432792907243), INT64_C( 2651177342668827650),
+                            INT64_C( -357659487777588980), INT64_C(-7575905367749125944)) },
+    { UINT8_C( 55),
+      simde_mm512_set_epi64(INT64_C( 2584787088481873425), INT64_C(-6636347369800363268),
+                            INT64_C(-6491051538368462222), INT64_C( 6426564339256514271),
+                            INT64_C( 1431065851799271243), INT64_C(-5052971989011532438),
+                            INT64_C( 1179855426247829719), INT64_C( 3321581320948606601)),
+      simde_mm512_set_epi64(INT64_C(-9088046918826118835), INT64_C(-9120666262578213480),
+                            INT64_C( 5871437833456553614), INT64_C( 8483079615394831735),
+                            INT64_C( 9107690773687184052), INT64_C(-4053213973120914951),
+                            INT64_C( 8675930205947945332), INT64_C( 6938530551127078043)),
+      simde_mm512_set_epi64(INT64_C( 2584787088481873425), INT64_C(-6636347369800363268),
+                            INT64_C( 5871437833456553614), INT64_C( 8483079615394831735),
+                            INT64_C( 1431065851799271243), INT64_C(-4053213973120914951),
+                            INT64_C( 8675930205947945332), INT64_C( 6938530551127078043)) },
+    { UINT8_C(103),
+      simde_mm512_set_epi64(INT64_C( 4604529133310120194), INT64_C(-5720505748096428239),
+                            INT64_C(-3576188556257202679), INT64_C(  926505122891702896),
+                            INT64_C( 5288406162053320871), INT64_C(-5908713324082235524),
+                            INT64_C(-2896531491248846387), INT64_C( 4461610053817304287)),
+      simde_mm512_set_epi64(INT64_C(  -17786873681606654), INT64_C(-3915592922452326355),
+                            INT64_C(-8741522488314124104), INT64_C(-5131013008663027849),
+                            INT64_C(-7058878113053657357), INT64_C( -973089108768494310),
+                            INT64_C( 4726501112778828744), INT64_C( 6913212164015017879)),
+      simde_mm512_set_epi64(INT64_C( 4604529133310120194), INT64_C(-3915592922452326355),
+                            INT64_C(-8741522488314124104), INT64_C(  926505122891702896),
+                            INT64_C( 5288406162053320871), INT64_C( -973089108768494310),
+                            INT64_C( 4726501112778828744), INT64_C( 6913212164015017879)) },
+    { UINT8_C( 73),
+      simde_mm512_set_epi64(INT64_C(-4200530011080213556), INT64_C(-6982702498652226749),
+                            INT64_C(  745608029114000826), INT64_C(-2223711895723751753),
+                            INT64_C( 4918324162995104748), INT64_C(  416059555292452407),
+                            INT64_C( 6806544510221761324), INT64_C(-8471566411485193331)),
+      simde_mm512_set_epi64(INT64_C( 5866421522993801280), INT64_C(-6756225486806034984),
+                            INT64_C(-1118238162881043968), INT64_C( 5118560624722692931),
+                            INT64_C( 6896859572368901322), INT64_C(-1203490237480090674),
+                            INT64_C( 5931091108616911322), INT64_C(-8041842325868436927)),
+      simde_mm512_set_epi64(INT64_C(-4200530011080213556), INT64_C(-6756225486806034984),
+                            INT64_C(  745608029114000826), INT64_C(-2223711895723751753),
+                            INT64_C( 6896859572368901322), INT64_C(  416059555292452407),
+                            INT64_C( 6806544510221761324), INT64_C(-8041842325868436927)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_mask_blend_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_blend_ps(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__mmask16 k;
+    simde__m512 a;
+    simde__m512 b;
+    simde__m512 r;
+  } test_vec[8] = {
+    { UINT16_C(28658),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   986.64), SIMDE_FLOAT32_C(   121.90), SIMDE_FLOAT32_C(  -796.62), SIMDE_FLOAT32_C(   983.17),
+                         SIMDE_FLOAT32_C(   569.02), SIMDE_FLOAT32_C(   -88.58), SIMDE_FLOAT32_C(  -750.53), SIMDE_FLOAT32_C(    52.16),
+                         SIMDE_FLOAT32_C(   863.27), SIMDE_FLOAT32_C(  -937.53), SIMDE_FLOAT32_C(   272.85), SIMDE_FLOAT32_C(  -836.56),
+                         SIMDE_FLOAT32_C(  -517.71), SIMDE_FLOAT32_C(   436.89), SIMDE_FLOAT32_C(  -561.62), SIMDE_FLOAT32_C(  -796.29)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   721.67), SIMDE_FLOAT32_C(  -355.29), SIMDE_FLOAT32_C(  -776.65), SIMDE_FLOAT32_C(  -467.78),
+                         SIMDE_FLOAT32_C(  -890.68), SIMDE_FLOAT32_C(  -288.13), SIMDE_FLOAT32_C(   739.88), SIMDE_FLOAT32_C(    -3.67),
+                         SIMDE_FLOAT32_C(   356.91), SIMDE_FLOAT32_C(  -250.13), SIMDE_FLOAT32_C(  -609.99), SIMDE_FLOAT32_C(  -756.57),
+                         SIMDE_FLOAT32_C(  -441.01), SIMDE_FLOAT32_C(   675.23), SIMDE_FLOAT32_C(  -112.56), SIMDE_FLOAT32_C(   752.66)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   986.64), SIMDE_FLOAT32_C(  -355.29), SIMDE_FLOAT32_C(  -776.65), SIMDE_FLOAT32_C(   983.17),
+                         SIMDE_FLOAT32_C(  -890.68), SIMDE_FLOAT32_C(  -288.13), SIMDE_FLOAT32_C(   739.88), SIMDE_FLOAT32_C(    -3.67),
+                         SIMDE_FLOAT32_C(   356.91), SIMDE_FLOAT32_C(  -250.13), SIMDE_FLOAT32_C(  -609.99), SIMDE_FLOAT32_C(  -756.57),
+                         SIMDE_FLOAT32_C(  -517.71), SIMDE_FLOAT32_C(   436.89), SIMDE_FLOAT32_C(  -112.56), SIMDE_FLOAT32_C(  -796.29)) },
+    { UINT16_C(13167),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -177.79), SIMDE_FLOAT32_C(   957.03), SIMDE_FLOAT32_C(  -193.15), SIMDE_FLOAT32_C(   645.09),
+                         SIMDE_FLOAT32_C(    -0.96), SIMDE_FLOAT32_C(    66.15), SIMDE_FLOAT32_C(   565.09), SIMDE_FLOAT32_C(  -991.06),
+                         SIMDE_FLOAT32_C(  -217.74), SIMDE_FLOAT32_C(   162.91), SIMDE_FLOAT32_C(   837.05), SIMDE_FLOAT32_C(   132.83),
+                         SIMDE_FLOAT32_C(  -183.75), SIMDE_FLOAT32_C(  -958.98), SIMDE_FLOAT32_C(  -343.18), SIMDE_FLOAT32_C(  -412.04)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -623.32), SIMDE_FLOAT32_C(   372.80), SIMDE_FLOAT32_C(  -286.04), SIMDE_FLOAT32_C(   347.55),
+                         SIMDE_FLOAT32_C(  -954.70), SIMDE_FLOAT32_C(   272.86), SIMDE_FLOAT32_C(   787.91), SIMDE_FLOAT32_C(   529.75),
+                         SIMDE_FLOAT32_C(   -43.99), SIMDE_FLOAT32_C(   645.49), SIMDE_FLOAT32_C(  -301.76), SIMDE_FLOAT32_C(  -390.74),
+                         SIMDE_FLOAT32_C(   671.11), SIMDE_FLOAT32_C(  -513.10), SIMDE_FLOAT32_C(   467.15), SIMDE_FLOAT32_C(  -961.27)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -177.79), SIMDE_FLOAT32_C(   957.03), SIMDE_FLOAT32_C(  -286.04), SIMDE_FLOAT32_C(   347.55),
+                         SIMDE_FLOAT32_C(    -0.96), SIMDE_FLOAT32_C(    66.15), SIMDE_FLOAT32_C(   787.91), SIMDE_FLOAT32_C(   529.75),
+                         SIMDE_FLOAT32_C(  -217.74), SIMDE_FLOAT32_C(   645.49), SIMDE_FLOAT32_C(  -301.76), SIMDE_FLOAT32_C(   132.83),
+                         SIMDE_FLOAT32_C(   671.11), SIMDE_FLOAT32_C(  -513.10), SIMDE_FLOAT32_C(   467.15), SIMDE_FLOAT32_C(  -961.27)) },
+    { UINT16_C(10447),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   986.49), SIMDE_FLOAT32_C(   854.73), SIMDE_FLOAT32_C(   459.72), SIMDE_FLOAT32_C(  -110.83),
+                         SIMDE_FLOAT32_C(  -875.29), SIMDE_FLOAT32_C(   594.30), SIMDE_FLOAT32_C(  -331.55), SIMDE_FLOAT32_C(  -808.64),
+                         SIMDE_FLOAT32_C(   705.45), SIMDE_FLOAT32_C(   -55.08), SIMDE_FLOAT32_C(   606.63), SIMDE_FLOAT32_C(   -13.01),
+                         SIMDE_FLOAT32_C(   483.39), SIMDE_FLOAT32_C(   565.56), SIMDE_FLOAT32_C(   735.84), SIMDE_FLOAT32_C(  -855.34)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   263.46), SIMDE_FLOAT32_C(  -173.99), SIMDE_FLOAT32_C(  -448.77), SIMDE_FLOAT32_C(   -18.46),
+                         SIMDE_FLOAT32_C(    60.12), SIMDE_FLOAT32_C(   895.07), SIMDE_FLOAT32_C(   593.75), SIMDE_FLOAT32_C(  -910.69),
+                         SIMDE_FLOAT32_C(  -898.20), SIMDE_FLOAT32_C(  -885.41), SIMDE_FLOAT32_C(  -446.34), SIMDE_FLOAT32_C(  -539.23),
+                         SIMDE_FLOAT32_C(   177.56), SIMDE_FLOAT32_C(    85.90), SIMDE_FLOAT32_C(  -977.71), SIMDE_FLOAT32_C(   589.01)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   986.49), SIMDE_FLOAT32_C(   854.73), SIMDE_FLOAT32_C(  -448.77), SIMDE_FLOAT32_C(  -110.83),
+                         SIMDE_FLOAT32_C(    60.12), SIMDE_FLOAT32_C(   594.30), SIMDE_FLOAT32_C(  -331.55), SIMDE_FLOAT32_C(  -808.64),
+                         SIMDE_FLOAT32_C(  -898.20), SIMDE_FLOAT32_C(  -885.41), SIMDE_FLOAT32_C(   606.63), SIMDE_FLOAT32_C(   -13.01),
+                         SIMDE_FLOAT32_C(   177.56), SIMDE_FLOAT32_C(    85.90), SIMDE_FLOAT32_C(  -977.71), SIMDE_FLOAT32_C(   589.01)) },
+    { UINT16_C(64052),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -833.77), SIMDE_FLOAT32_C(  -382.41), SIMDE_FLOAT32_C(   155.80), SIMDE_FLOAT32_C(  -175.51),
+                         SIMDE_FLOAT32_C(   270.94), SIMDE_FLOAT32_C(  -347.54), SIMDE_FLOAT32_C(  -823.80), SIMDE_FLOAT32_C(  -462.55),
+                         SIMDE_FLOAT32_C(    93.71), SIMDE_FLOAT32_C(  -510.90), SIMDE_FLOAT32_C(   589.53), SIMDE_FLOAT32_C(   762.37),
+                         SIMDE_FLOAT32_C(   -64.95), SIMDE_FLOAT32_C(  -200.72), SIMDE_FLOAT32_C(   590.31), SIMDE_FLOAT32_C(   904.10)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   932.33), SIMDE_FLOAT32_C(  -948.81), SIMDE_FLOAT32_C(  -663.23), SIMDE_FLOAT32_C(  -224.49),
+                         SIMDE_FLOAT32_C(    61.61), SIMDE_FLOAT32_C(  -513.25), SIMDE_FLOAT32_C(   862.87), SIMDE_FLOAT32_C(   888.55),
+                         SIMDE_FLOAT32_C(  -356.41), SIMDE_FLOAT32_C(   536.26), SIMDE_FLOAT32_C(    46.41), SIMDE_FLOAT32_C(   968.69),
+                         SIMDE_FLOAT32_C(   819.71), SIMDE_FLOAT32_C(  -256.62), SIMDE_FLOAT32_C(  -508.11), SIMDE_FLOAT32_C(   806.88)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   932.33), SIMDE_FLOAT32_C(  -948.81), SIMDE_FLOAT32_C(  -663.23), SIMDE_FLOAT32_C(  -224.49),
+                         SIMDE_FLOAT32_C(    61.61), SIMDE_FLOAT32_C(  -347.54), SIMDE_FLOAT32_C(   862.87), SIMDE_FLOAT32_C(  -462.55),
+                         SIMDE_FLOAT32_C(    93.71), SIMDE_FLOAT32_C(  -510.90), SIMDE_FLOAT32_C(    46.41), SIMDE_FLOAT32_C(   968.69),
+                         SIMDE_FLOAT32_C(   -64.95), SIMDE_FLOAT32_C(  -256.62), SIMDE_FLOAT32_C(   590.31), SIMDE_FLOAT32_C(   904.10)) },
+    { UINT16_C(43223),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   810.27), SIMDE_FLOAT32_C(   830.75), SIMDE_FLOAT32_C(   701.96), SIMDE_FLOAT32_C(   496.75),
+                         SIMDE_FLOAT32_C(  -369.80), SIMDE_FLOAT32_C(  -455.71), SIMDE_FLOAT32_C(  -712.56), SIMDE_FLOAT32_C(   961.22),
+                         SIMDE_FLOAT32_C(  -136.67), SIMDE_FLOAT32_C(   165.04), SIMDE_FLOAT32_C(  -204.19), SIMDE_FLOAT32_C(   122.42),
+                         SIMDE_FLOAT32_C(  -370.53), SIMDE_FLOAT32_C(  -979.01), SIMDE_FLOAT32_C(  -726.98), SIMDE_FLOAT32_C(   156.30)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -639.87), SIMDE_FLOAT32_C(   979.28), SIMDE_FLOAT32_C(   637.46), SIMDE_FLOAT32_C(   800.56),
+                         SIMDE_FLOAT32_C(  -577.80), SIMDE_FLOAT32_C(   389.08), SIMDE_FLOAT32_C(   966.56), SIMDE_FLOAT32_C(  -731.72),
+                         SIMDE_FLOAT32_C(  -496.34), SIMDE_FLOAT32_C(   344.28), SIMDE_FLOAT32_C(   729.72), SIMDE_FLOAT32_C(   160.21),
+                         SIMDE_FLOAT32_C(   511.10), SIMDE_FLOAT32_C(   746.94), SIMDE_FLOAT32_C(  -855.19), SIMDE_FLOAT32_C(   203.47)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -639.87), SIMDE_FLOAT32_C(   830.75), SIMDE_FLOAT32_C(   637.46), SIMDE_FLOAT32_C(   496.75),
+                         SIMDE_FLOAT32_C(  -577.80), SIMDE_FLOAT32_C(  -455.71), SIMDE_FLOAT32_C(  -712.56), SIMDE_FLOAT32_C(   961.22),
+                         SIMDE_FLOAT32_C(  -496.34), SIMDE_FLOAT32_C(   344.28), SIMDE_FLOAT32_C(  -204.19), SIMDE_FLOAT32_C(   160.21),
+                         SIMDE_FLOAT32_C(  -370.53), SIMDE_FLOAT32_C(   746.94), SIMDE_FLOAT32_C(  -855.19), SIMDE_FLOAT32_C(   203.47)) },
+    { UINT16_C(29684),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -516.27), SIMDE_FLOAT32_C(  -631.52), SIMDE_FLOAT32_C(  -333.70), SIMDE_FLOAT32_C(    63.56),
+                         SIMDE_FLOAT32_C(   874.74), SIMDE_FLOAT32_C(  -961.20), SIMDE_FLOAT32_C(  -924.01), SIMDE_FLOAT32_C(   542.80),
+                         SIMDE_FLOAT32_C(  -706.18), SIMDE_FLOAT32_C(  -538.09), SIMDE_FLOAT32_C(    38.89), SIMDE_FLOAT32_C(  -242.57),
+                         SIMDE_FLOAT32_C(  -337.54), SIMDE_FLOAT32_C(   184.20), SIMDE_FLOAT32_C(  -229.00), SIMDE_FLOAT32_C(  -133.06)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(    57.66), SIMDE_FLOAT32_C(   125.72), SIMDE_FLOAT32_C(    14.67), SIMDE_FLOAT32_C(    11.14),
+                         SIMDE_FLOAT32_C(  -363.42), SIMDE_FLOAT32_C(   -80.26), SIMDE_FLOAT32_C(   -12.54), SIMDE_FLOAT32_C(   560.55),
+                         SIMDE_FLOAT32_C(   -59.35), SIMDE_FLOAT32_C(  -247.72), SIMDE_FLOAT32_C(  -719.76), SIMDE_FLOAT32_C(   280.49),
+                         SIMDE_FLOAT32_C(   296.24), SIMDE_FLOAT32_C(  -303.23), SIMDE_FLOAT32_C(  -844.84), SIMDE_FLOAT32_C(   452.16)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -516.27), SIMDE_FLOAT32_C(   125.72), SIMDE_FLOAT32_C(    14.67), SIMDE_FLOAT32_C(    11.14),
+                         SIMDE_FLOAT32_C(   874.74), SIMDE_FLOAT32_C(  -961.20), SIMDE_FLOAT32_C(   -12.54), SIMDE_FLOAT32_C(   560.55),
+                         SIMDE_FLOAT32_C(   -59.35), SIMDE_FLOAT32_C(  -247.72), SIMDE_FLOAT32_C(  -719.76), SIMDE_FLOAT32_C(   280.49),
+                         SIMDE_FLOAT32_C(  -337.54), SIMDE_FLOAT32_C(  -303.23), SIMDE_FLOAT32_C(  -229.00), SIMDE_FLOAT32_C(  -133.06)) },
+    { UINT16_C( 5687),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -252.71), SIMDE_FLOAT32_C(   314.80), SIMDE_FLOAT32_C(   998.07), SIMDE_FLOAT32_C(   575.47),
+                         SIMDE_FLOAT32_C(    52.24), SIMDE_FLOAT32_C(   443.69), SIMDE_FLOAT32_C(   234.67), SIMDE_FLOAT32_C(   894.50),
+                         SIMDE_FLOAT32_C(  -487.55), SIMDE_FLOAT32_C(  -250.47), SIMDE_FLOAT32_C(  -500.78), SIMDE_FLOAT32_C(   379.78),
+                         SIMDE_FLOAT32_C(   612.27), SIMDE_FLOAT32_C(  -899.55), SIMDE_FLOAT32_C(  -426.63), SIMDE_FLOAT32_C(   359.38)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -262.03), SIMDE_FLOAT32_C(    81.51), SIMDE_FLOAT32_C(  -484.02), SIMDE_FLOAT32_C(   504.24),
+                         SIMDE_FLOAT32_C(   157.78), SIMDE_FLOAT32_C(    62.09), SIMDE_FLOAT32_C(   193.22), SIMDE_FLOAT32_C(   -72.99),
+                         SIMDE_FLOAT32_C(   -54.28), SIMDE_FLOAT32_C(  -361.95), SIMDE_FLOAT32_C(   726.76), SIMDE_FLOAT32_C(   663.18),
+                         SIMDE_FLOAT32_C(   290.10), SIMDE_FLOAT32_C(   238.07), SIMDE_FLOAT32_C(  -777.03), SIMDE_FLOAT32_C(   227.93)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -252.71), SIMDE_FLOAT32_C(   314.80), SIMDE_FLOAT32_C(   998.07), SIMDE_FLOAT32_C(   504.24),
+                         SIMDE_FLOAT32_C(    52.24), SIMDE_FLOAT32_C(    62.09), SIMDE_FLOAT32_C(   193.22), SIMDE_FLOAT32_C(   894.50),
+                         SIMDE_FLOAT32_C(  -487.55), SIMDE_FLOAT32_C(  -250.47), SIMDE_FLOAT32_C(   726.76), SIMDE_FLOAT32_C(   663.18),
+                         SIMDE_FLOAT32_C(   612.27), SIMDE_FLOAT32_C(   238.07), SIMDE_FLOAT32_C(  -777.03), SIMDE_FLOAT32_C(   227.93)) },
+    { UINT16_C(46817),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(    60.97), SIMDE_FLOAT32_C(   294.39), SIMDE_FLOAT32_C(   134.62), SIMDE_FLOAT32_C(  -712.88),
+                         SIMDE_FLOAT32_C(  -527.46), SIMDE_FLOAT32_C(   556.17), SIMDE_FLOAT32_C(   671.08), SIMDE_FLOAT32_C(  -149.99),
+                         SIMDE_FLOAT32_C(  -247.28), SIMDE_FLOAT32_C(  -221.96), SIMDE_FLOAT32_C(  -789.76), SIMDE_FLOAT32_C(   665.60),
+                         SIMDE_FLOAT32_C(   590.64), SIMDE_FLOAT32_C(  -335.79), SIMDE_FLOAT32_C(  -964.26), SIMDE_FLOAT32_C(   137.06)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   741.48), SIMDE_FLOAT32_C(  -567.93), SIMDE_FLOAT32_C(   572.38), SIMDE_FLOAT32_C(  -363.96),
+                         SIMDE_FLOAT32_C(   267.49), SIMDE_FLOAT32_C(   878.76), SIMDE_FLOAT32_C(  -445.04), SIMDE_FLOAT32_C(  -252.24),
+                         SIMDE_FLOAT32_C(   869.52), SIMDE_FLOAT32_C(  -356.95), SIMDE_FLOAT32_C(   128.10), SIMDE_FLOAT32_C(   544.58),
+                         SIMDE_FLOAT32_C(   242.93), SIMDE_FLOAT32_C(  -919.16), SIMDE_FLOAT32_C(   758.90), SIMDE_FLOAT32_C(  -466.75)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   741.48), SIMDE_FLOAT32_C(   294.39), SIMDE_FLOAT32_C(   572.38), SIMDE_FLOAT32_C(  -363.96),
+                         SIMDE_FLOAT32_C(  -527.46), SIMDE_FLOAT32_C(   878.76), SIMDE_FLOAT32_C(  -445.04), SIMDE_FLOAT32_C(  -149.99),
+                         SIMDE_FLOAT32_C(   869.52), SIMDE_FLOAT32_C(  -356.95), SIMDE_FLOAT32_C(   128.10), SIMDE_FLOAT32_C(   665.60),
+                         SIMDE_FLOAT32_C(   590.64), SIMDE_FLOAT32_C(  -335.79), SIMDE_FLOAT32_C(  -964.26), SIMDE_FLOAT32_C(  -466.75)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512 r = simde_mm512_mask_blend_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512_close(r, test_vec[i].r, 1);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_blend_pd(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__mmask8 k;
+    simde__m512d a;
+    simde__m512d b;
+    simde__m512d r;
+  } test_vec[8] = {
+    { UINT8_C(211),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  863.27), SIMDE_FLOAT64_C( -937.53),
+                         SIMDE_FLOAT64_C(  272.85), SIMDE_FLOAT64_C( -836.56),
+                         SIMDE_FLOAT64_C( -517.71), SIMDE_FLOAT64_C(  436.89),
+                         SIMDE_FLOAT64_C( -561.62), SIMDE_FLOAT64_C( -796.29)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  356.91), SIMDE_FLOAT64_C( -250.13),
+                         SIMDE_FLOAT64_C( -609.99), SIMDE_FLOAT64_C( -756.57),
+                         SIMDE_FLOAT64_C( -441.01), SIMDE_FLOAT64_C(  675.23),
+                         SIMDE_FLOAT64_C( -112.56), SIMDE_FLOAT64_C(  752.66)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  356.91), SIMDE_FLOAT64_C( -250.13),
+                         SIMDE_FLOAT64_C(  272.85), SIMDE_FLOAT64_C( -756.57),
+                         SIMDE_FLOAT64_C( -517.71), SIMDE_FLOAT64_C(  436.89),
+                         SIMDE_FLOAT64_C( -112.56), SIMDE_FLOAT64_C(  752.66)) },
+    { UINT8_C( 25),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  721.67), SIMDE_FLOAT64_C( -355.29),
+                         SIMDE_FLOAT64_C( -776.65), SIMDE_FLOAT64_C( -467.78),
+                         SIMDE_FLOAT64_C( -890.68), SIMDE_FLOAT64_C( -288.13),
+                         SIMDE_FLOAT64_C(  739.88), SIMDE_FLOAT64_C(   -3.67)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -963.76), SIMDE_FLOAT64_C(  986.64),
+                         SIMDE_FLOAT64_C(  121.90), SIMDE_FLOAT64_C( -796.62),
+                         SIMDE_FLOAT64_C(  983.17), SIMDE_FLOAT64_C(  569.02),
+                         SIMDE_FLOAT64_C(  -88.58), SIMDE_FLOAT64_C( -750.53)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  721.67), SIMDE_FLOAT64_C( -355.29),
+                         SIMDE_FLOAT64_C( -776.65), SIMDE_FLOAT64_C( -796.62),
+                         SIMDE_FLOAT64_C(  983.17), SIMDE_FLOAT64_C( -288.13),
+                         SIMDE_FLOAT64_C(  739.88), SIMDE_FLOAT64_C( -750.53)) },
+    { UINT8_C( 46),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.74), SIMDE_FLOAT64_C(  162.91),
+                         SIMDE_FLOAT64_C(  837.05), SIMDE_FLOAT64_C(  132.83),
+                         SIMDE_FLOAT64_C( -183.75), SIMDE_FLOAT64_C( -958.98),
+                         SIMDE_FLOAT64_C( -343.18), SIMDE_FLOAT64_C( -412.04)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  -43.99), SIMDE_FLOAT64_C(  645.49),
+                         SIMDE_FLOAT64_C( -301.76), SIMDE_FLOAT64_C( -390.74),
+                         SIMDE_FLOAT64_C(  671.11), SIMDE_FLOAT64_C( -513.10),
+                         SIMDE_FLOAT64_C(  467.15), SIMDE_FLOAT64_C( -961.27)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.74), SIMDE_FLOAT64_C(  162.91),
+                         SIMDE_FLOAT64_C( -301.76), SIMDE_FLOAT64_C(  132.83),
+                         SIMDE_FLOAT64_C(  671.11), SIMDE_FLOAT64_C( -513.10),
+                         SIMDE_FLOAT64_C(  467.15), SIMDE_FLOAT64_C( -412.04)) },
+    { UINT8_C(180),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -623.32), SIMDE_FLOAT64_C(  372.80),
+                         SIMDE_FLOAT64_C( -286.04), SIMDE_FLOAT64_C(  347.55),
+                         SIMDE_FLOAT64_C( -954.70), SIMDE_FLOAT64_C(  272.86),
+                         SIMDE_FLOAT64_C(  787.91), SIMDE_FLOAT64_C(  529.75)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  153.63), SIMDE_FLOAT64_C( -177.79),
+                         SIMDE_FLOAT64_C(  957.03), SIMDE_FLOAT64_C( -193.15),
+                         SIMDE_FLOAT64_C(  645.09), SIMDE_FLOAT64_C(   -0.96),
+                         SIMDE_FLOAT64_C(   66.15), SIMDE_FLOAT64_C(  565.09)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  153.63), SIMDE_FLOAT64_C(  372.80),
+                         SIMDE_FLOAT64_C(  957.03), SIMDE_FLOAT64_C( -193.15),
+                         SIMDE_FLOAT64_C( -954.70), SIMDE_FLOAT64_C(   -0.96),
+                         SIMDE_FLOAT64_C(  787.91), SIMDE_FLOAT64_C(  529.75)) },
+    { UINT8_C(125),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  705.45), SIMDE_FLOAT64_C(  -55.08),
+                         SIMDE_FLOAT64_C(  606.63), SIMDE_FLOAT64_C(  -13.01),
+                         SIMDE_FLOAT64_C(  483.39), SIMDE_FLOAT64_C(  565.56),
+                         SIMDE_FLOAT64_C(  735.84), SIMDE_FLOAT64_C( -855.34)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -898.20), SIMDE_FLOAT64_C( -885.41),
+                         SIMDE_FLOAT64_C( -446.34), SIMDE_FLOAT64_C( -539.23),
+                         SIMDE_FLOAT64_C(  177.56), SIMDE_FLOAT64_C(   85.90),
+                         SIMDE_FLOAT64_C( -977.71), SIMDE_FLOAT64_C(  589.01)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  705.45), SIMDE_FLOAT64_C( -885.41),
+                         SIMDE_FLOAT64_C( -446.34), SIMDE_FLOAT64_C( -539.23),
+                         SIMDE_FLOAT64_C(  177.56), SIMDE_FLOAT64_C(   85.90),
+                         SIMDE_FLOAT64_C(  735.84), SIMDE_FLOAT64_C(  589.01)) },
+    { UINT8_C(188),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  263.46), SIMDE_FLOAT64_C( -173.99),
+                         SIMDE_FLOAT64_C( -448.77), SIMDE_FLOAT64_C(  -18.46),
+                         SIMDE_FLOAT64_C(   60.12), SIMDE_FLOAT64_C(  895.07),
+                         SIMDE_FLOAT64_C(  593.75), SIMDE_FLOAT64_C( -910.69)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -347.65), SIMDE_FLOAT64_C(  986.49),
+                         SIMDE_FLOAT64_C(  854.73), SIMDE_FLOAT64_C(  459.72),
+                         SIMDE_FLOAT64_C( -110.83), SIMDE_FLOAT64_C( -875.29),
+                         SIMDE_FLOAT64_C(  594.30), SIMDE_FLOAT64_C( -331.55)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -347.65), SIMDE_FLOAT64_C( -173.99),
+                         SIMDE_FLOAT64_C(  854.73), SIMDE_FLOAT64_C(  459.72),
+                         SIMDE_FLOAT64_C( -110.83), SIMDE_FLOAT64_C( -875.29),
+                         SIMDE_FLOAT64_C(  593.75), SIMDE_FLOAT64_C( -910.69)) },
+    { UINT8_C(190),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(   93.71), SIMDE_FLOAT64_C( -510.90),
+                         SIMDE_FLOAT64_C(  589.53), SIMDE_FLOAT64_C(  762.37),
+                         SIMDE_FLOAT64_C(  -64.95), SIMDE_FLOAT64_C( -200.72),
+                         SIMDE_FLOAT64_C(  590.31), SIMDE_FLOAT64_C(  904.10)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -356.41), SIMDE_FLOAT64_C(  536.26),
+                         SIMDE_FLOAT64_C(   46.41), SIMDE_FLOAT64_C(  968.69),
+                         SIMDE_FLOAT64_C(  819.71), SIMDE_FLOAT64_C( -256.62),
+                         SIMDE_FLOAT64_C( -508.11), SIMDE_FLOAT64_C(  806.88)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -356.41), SIMDE_FLOAT64_C( -510.90),
+                         SIMDE_FLOAT64_C(   46.41), SIMDE_FLOAT64_C(  968.69),
+                         SIMDE_FLOAT64_C(  819.71), SIMDE_FLOAT64_C( -256.62),
+                         SIMDE_FLOAT64_C( -508.11), SIMDE_FLOAT64_C(  904.10)) },
+    { UINT8_C(178),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  932.33), SIMDE_FLOAT64_C( -948.81),
+                         SIMDE_FLOAT64_C( -663.23), SIMDE_FLOAT64_C( -224.49),
+                         SIMDE_FLOAT64_C(   61.61), SIMDE_FLOAT64_C( -513.25),
+                         SIMDE_FLOAT64_C(  862.87), SIMDE_FLOAT64_C(  888.55)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  178.62), SIMDE_FLOAT64_C( -833.77),
+                         SIMDE_FLOAT64_C( -382.41), SIMDE_FLOAT64_C(  155.80),
+                         SIMDE_FLOAT64_C( -175.51), SIMDE_FLOAT64_C(  270.94),
+                         SIMDE_FLOAT64_C( -347.54), SIMDE_FLOAT64_C( -823.80)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  178.62), SIMDE_FLOAT64_C( -948.81),
+                         SIMDE_FLOAT64_C( -382.41), SIMDE_FLOAT64_C(  155.80),
+                         SIMDE_FLOAT64_C(   61.61), SIMDE_FLOAT64_C( -513.25),
+                         SIMDE_FLOAT64_C( -347.54), SIMDE_FLOAT64_C(  888.55)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512d r = simde_mm512_mask_blend_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512d_close(r, test_vec[i].r, 1);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm512_broadcast_i32x4(const MunitParameter params[], void* data) {
   (void) params;
@@ -2514,6 +3014,124 @@ test_simde_mm512_broadcast_i32x4(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm512_broadcastd_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i a;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm_set_epi32(INT32_C(-1051270324), INT32_C(-1977183446), INT32_C( -548195640), INT32_C(-1363461466)),
+      simde_mm512_set_epi32(INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466),
+                            INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466),
+                            INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466),
+                            INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466)) },
+    { simde_mm_set_epi32(INT32_C(  979094891), INT32_C(  416506319), INT32_C( 2123490297), INT32_C(  200388421)),
+      simde_mm512_set_epi32(INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421),
+                            INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421),
+                            INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421),
+                            INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421), INT32_C(  200388421)) },
+    { simde_mm_set_epi32(INT32_C( 1927260635), INT32_C( 1201458882), INT32_C(-1448742498), INT32_C(-1111904220)),
+      simde_mm512_set_epi32(INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220),
+                            INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220),
+                            INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220),
+                            INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220)) },
+    { simde_mm_set_epi32(INT32_C( -976455818), INT32_C(  542613123), INT32_C(  -15911923), INT32_C( -562895064)),
+      simde_mm512_set_epi32(INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064),
+                            INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064),
+                            INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064),
+                            INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064)) },
+    { simde_mm_set_epi32(INT32_C(  836747087), INT32_C(-1431045412), INT32_C(-1356396683), INT32_C( 1489138473)),
+      simde_mm512_set_epi32(INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473),
+                            INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473),
+                            INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473),
+                            INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473)) },
+    { simde_mm_set_epi32(INT32_C(-1783426961), INT32_C( -263517415), INT32_C(-1697630001), INT32_C( 2025142863)),
+      simde_mm512_set_epi32(INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863),
+                            INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863),
+                            INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863),
+                            INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863)) },
+    { simde_mm_set_epi32(INT32_C(  300619496), INT32_C( -659754204), INT32_C(-1019736463), INT32_C( 1022872166)),
+      simde_mm512_set_epi32(INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166),
+                            INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166),
+                            INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166),
+                            INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166)) },
+    { simde_mm_set_epi32(INT32_C( -274893610), INT32_C(  171227717), INT32_C( 1187872667), INT32_C( -590903223)),
+      simde_mm512_set_epi32(INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223),
+                            INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223),
+                            INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223),
+                            INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_broadcastd_epi32(test_vec[i].a);
+    simde_assert_m512i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_broadcastq_epi64(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i a;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm_set_epi64x(INT64_C(-4515171658517540054), INT64_C(-2354482342678283610)),
+      simde_mm512_set_epi64(INT64_C(-2354482342678283610), INT64_C(-2354482342678283610),
+                            INT64_C(-2354482342678283610), INT64_C(-2354482342678283610),
+                            INT64_C(-2354482342678283610), INT64_C(-2354482342678283610),
+                            INT64_C(-2354482342678283610), INT64_C(-2354482342678283610)) },
+    { simde_mm_set_epi64x(INT64_C( 4205180536942191055), INT64_C( 9120321379188715333)),
+      simde_mm512_set_epi64(INT64_C( 9120321379188715333), INT64_C( 9120321379188715333),
+                            INT64_C( 9120321379188715333), INT64_C( 9120321379188715333),
+                            INT64_C( 9120321379188715333), INT64_C( 9120321379188715333),
+                            INT64_C( 9120321379188715333), INT64_C( 9120321379188715333)) },
+    { simde_mm_set_epi64x(INT64_C( 8277521399394651842), INT64_C(-6222301646052282332)),
+      simde_mm512_set_epi64(INT64_C(-6222301646052282332), INT64_C(-6222301646052282332),
+                            INT64_C(-6222301646052282332), INT64_C(-6222301646052282332),
+                            INT64_C(-6222301646052282332), INT64_C(-6222301646052282332),
+                            INT64_C(-6222301646052282332), INT64_C(-6222301646052282332)) },
+    { simde_mm_set_epi64x(INT64_C(-4193845803756315005), INT64_C(  -68341185169397976)),
+      simde_mm512_set_epi64(INT64_C(  -68341185169397976), INT64_C(  -68341185169397976),
+                            INT64_C(  -68341185169397976), INT64_C(  -68341185169397976),
+                            INT64_C(  -68341185169397976), INT64_C(  -68341185169397976),
+                            INT64_C(  -68341185169397976), INT64_C(  -68341185169397976)) },
+    { simde_mm_set_epi64x(INT64_C( 3593801376552188636), INT64_C(-5825679392398740695)),
+      simde_mm512_set_epi64(INT64_C(-5825679392398740695), INT64_C(-5825679392398740695),
+                            INT64_C(-5825679392398740695), INT64_C(-5825679392398740695),
+                            INT64_C(-5825679392398740695), INT64_C(-5825679392398740695),
+                            INT64_C(-5825679392398740695), INT64_C(-5825679392398740695)) },
+    { simde_mm_set_epi64x(INT64_C(-7659760468268217575), INT64_C(-7291265332978304433)),
+      simde_mm512_set_epi64(INT64_C(-7291265332978304433), INT64_C(-7291265332978304433),
+                            INT64_C(-7291265332978304433), INT64_C(-7291265332978304433),
+                            INT64_C(-7291265332978304433), INT64_C(-7291265332978304433),
+                            INT64_C(-7291265332978304433), INT64_C(-7291265332978304433)) },
+    { simde_mm_set_epi64x(INT64_C( 1291150907495215908), INT64_C(-4379734758100841882)),
+      simde_mm512_set_epi64(INT64_C(-4379734758100841882), INT64_C(-4379734758100841882),
+                            INT64_C(-4379734758100841882), INT64_C(-4379734758100841882),
+                            INT64_C(-4379734758100841882), INT64_C(-4379734758100841882),
+                            INT64_C(-4379734758100841882), INT64_C(-4379734758100841882)) },
+    { simde_mm_set_epi64x(INT64_C(-1180659064658150843), INT64_C( 5101874260281362505)),
+      simde_mm512_set_epi64(INT64_C( 5101874260281362505), INT64_C( 5101874260281362505),
+                            INT64_C( 5101874260281362505), INT64_C( 5101874260281362505),
+                            INT64_C( 5101874260281362505), INT64_C( 5101874260281362505),
+                            INT64_C( 5101874260281362505), INT64_C( 5101874260281362505)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_broadcastq_epi64(test_vec[i].a);
+    simde_assert_m512i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm512_castpd512_pd128(const MunitParameter params[], void* data) {
   (void) params;
@@ -10304,7 +10922,14 @@ static MunitTest test_suite_tests[] = {
 
   SIMDE_TESTS_DEFINE_TEST(mm512_andnot_si512),
 
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_epi32),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_epi64),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_ps),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_pd),
+
   SIMDE_TESTS_DEFINE_TEST(mm512_broadcast_i32x4),
+  SIMDE_TESTS_DEFINE_TEST(mm512_broadcastd_epi32),
+  SIMDE_TESTS_DEFINE_TEST(mm512_broadcastq_epi64),
 
   SIMDE_TESTS_DEFINE_TEST(mm512_castpd512_pd128),
   SIMDE_TESTS_DEFINE_TEST(mm512_castpd512_pd256),


=====================================
test/x86/avx512vl.c
=====================================
@@ -177,6 +177,416 @@ test_simde_mm256_cvtsepi16_epi8(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm_cvtsepi32_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i a;
+    simde__m128i r;
+  } test_vec[8] = {
+        { simde_mm_set_epi32(INT32_C(-2145190814), INT32_C(  369095719), INT32_C(   35558368), INT32_C( -760875473)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
+    { simde_mm_set_epi32(INT32_C( -891201105), INT32_C( 2065808871), INT32_C(  421929391), INT32_C(  587313056)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) },
+    { simde_mm_set_epi32(INT32_C(-1908802801), INT32_C( -442332083), INT32_C(-1878740578), INT32_C(-1559213492)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) },
+    { simde_mm_set_epi32(INT32_C(  615110155), INT32_C( -809405494), INT32_C( 1459512749), INT32_C( -889064834)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) },
+    { simde_mm_set_epi32(INT32_C( -125398563), INT32_C( 1544839586), INT32_C(  657472508), INT32_C( -763651133)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
+    { simde_mm_set_epi32(INT32_C( 1244639853), INT32_C(-1576601619), INT32_C(  458581376), INT32_C(-1764676112)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) },
+    { simde_mm_set_epi32(INT32_C(-1516069112), INT32_C( 1985219066), INT32_C(  436268231), INT32_C( -342699987)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
+    { simde_mm_set_epi32(INT32_C( -672406129), INT32_C( 1062677939), INT32_C(   54896059), INT32_C( -667800710)),
+      simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
+                        INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm_cvtsepi32_epi8(test_vec[i].a);
+    simde_assert_m128i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_cvtsepi32_epi16(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i a;
+    simde__m128i r;
+  } test_vec[8] = {
+       { simde_mm_set_epi32(INT32_C(  115673074), INT32_C(-1486060937), INT32_C( -104167420), INT32_C(   40162333)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) },
+    { simde_mm_set_epi32(INT32_C(-2128159397), INT32_C(-1560280329), INT32_C(  394598915), INT32_C(  425183512)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) },
+    { simde_mm_set_epi32(INT32_C( -746969032), INT32_C( -718350441), INT32_C( -917354043), INT32_C( -740787295)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
+    { simde_mm_set_epi32(INT32_C( 1489645447), INT32_C(  206085999), INT32_C(-1252698771), INT32_C( 1551612893)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
+    { simde_mm_set_epi32(INT32_C( -503631451), INT32_C( -997616848), INT32_C( -691694514), INT32_C( -383740168)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
+    { simde_mm_set_epi32(INT32_C(-1905774960), INT32_C(   -8801329), INT32_C( 1184144481), INT32_C( 1001484550)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) },
+    { simde_mm_set_epi32(INT32_C( 1461694678), INT32_C( 2088398452), INT32_C(-1522574509), INT32_C(-1848551844)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) },
+    { simde_mm_set_epi32(INT32_C(  344147543), INT32_C( 1208328320), INT32_C(  846887925), INT32_C(-1224326570)),
+      simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
+                         INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm_cvtsepi32_epi16(test_vec[i].a);
+    simde_assert_m128i_i16(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i src;
+    simde__mmask16 k;
+    simde__m128i a;
+    simde__m128i r;
+  } test_vec[8] = {
+    { simde_mm_set_epi8(INT8_C( -98), INT8_C( -63), INT8_C( -58), INT8_C(  25),
+                        INT8_C(   9), INT8_C(  49), INT8_C( -12), INT8_C( -31),
+                        INT8_C( -48), INT8_C(   0), INT8_C( -99), INT8_C( -81),
+                        INT8_C( -77), INT8_C(  27), INT8_C( -33), INT8_C(-124)),
+      UINT16_C(64699),
+     simde_mm_set_epi8(INT8_C(  79), INT8_C( 100), INT8_C(-123), INT8_C(  95),
+                        INT8_C( -79), INT8_C(  48), INT8_C( 112), INT8_C(   8),
+                        INT8_C(  69), INT8_C( -78), INT8_C(  54), INT8_C( -48),
+                        INT8_C(-119), INT8_C(  -5), INT8_C( -97), INT8_C( -44)),
+      simde_mm_set_epi8(INT8_C(  79), INT8_C( 100), INT8_C(-123), INT8_C(  95),
+                        INT8_C( -79), INT8_C(  48), INT8_C( -12), INT8_C( -31),
+                        INT8_C(  69), INT8_C(   0), INT8_C(  54), INT8_C( -48),
+                        INT8_C(-119), INT8_C(  27), INT8_C( -97), INT8_C( -44)) },
+    { simde_mm_set_epi8(INT8_C(  47), INT8_C(  36), INT8_C(  45), INT8_C( -44),
+                        INT8_C(  27), INT8_C( -15), INT8_C( 105), INT8_C( -69),
+                        INT8_C( -10), INT8_C(   1), INT8_C(  12), INT8_C( -44),
+                        INT8_C( -32), INT8_C( 113), INT8_C( 105), INT8_C( -92)),
+      UINT16_C(33046),
+     simde_mm_set_epi8(INT8_C(-118), INT8_C( -61), INT8_C( 108), INT8_C(   4),
+                        INT8_C(  56), INT8_C(  96), INT8_C( -73), INT8_C( -39),
+                        INT8_C(-112), INT8_C(-115), INT8_C(-113), INT8_C( -74),
+                        INT8_C( -79), INT8_C(-116), INT8_C( 117), INT8_C(  -3)),
+      simde_mm_set_epi8(INT8_C(-118), INT8_C(  36), INT8_C(  45), INT8_C( -44),
+                        INT8_C(  27), INT8_C( -15), INT8_C( 105), INT8_C( -39),
+                        INT8_C( -10), INT8_C(   1), INT8_C(  12), INT8_C( -74),
+                        INT8_C( -32), INT8_C(-116), INT8_C( 117), INT8_C( -92)) },
+    { simde_mm_set_epi8(INT8_C(  41), INT8_C(-106), INT8_C( -67), INT8_C(-116),
+                        INT8_C( -34), INT8_C(  21), INT8_C(  64), INT8_C(  44),
+                        INT8_C(  97), INT8_C( -46), INT8_C( 122), INT8_C(  42),
+                        INT8_C( -54), INT8_C( -79), INT8_C(  21), INT8_C(  59)),
+      UINT16_C(27487),
+     simde_mm_set_epi8(INT8_C(   6), INT8_C(-124), INT8_C(-111), INT8_C( -39),
+                        INT8_C(  55), INT8_C( -55), INT8_C( -72), INT8_C(  77),
+                        INT8_C(  51), INT8_C(-103), INT8_C( -80), INT8_C(  75),
+                        INT8_C( -87), INT8_C(-120), INT8_C( -14), INT8_C(  99)),
+      simde_mm_set_epi8(INT8_C(  41), INT8_C(-124), INT8_C(-111), INT8_C(-116),
+                        INT8_C(  55), INT8_C(  21), INT8_C( -72), INT8_C(  77),
+                        INT8_C(  97), INT8_C(-103), INT8_C( 122), INT8_C(  75),
+                        INT8_C( -87), INT8_C(-120), INT8_C( -14), INT8_C(  99)) },
+    { simde_mm_set_epi8(INT8_C(  31), INT8_C( -90), INT8_C(-127), INT8_C( 105),
+                        INT8_C( -89), INT8_C(-121), INT8_C(-110), INT8_C( -58),
+                        INT8_C( -95), INT8_C(-101), INT8_C( -56), INT8_C(  22),
+                        INT8_C(  18), INT8_C(   2), INT8_C(  46), INT8_C(-125)),
+      UINT16_C(48165),
+     simde_mm_set_epi8(INT8_C( 103), INT8_C(  26), INT8_C( 108), INT8_C(   4),
+                        INT8_C( -49), INT8_C( -62), INT8_C(-103), INT8_C( -42),
+                        INT8_C( 103), INT8_C( 115), INT8_C( 126), INT8_C(-112),
+                        INT8_C( -81), INT8_C( -35), INT8_C(-106), INT8_C(  45)),
+      simde_mm_set_epi8(INT8_C( 103), INT8_C( -90), INT8_C( 108), INT8_C(   4),
+                        INT8_C( -49), INT8_C( -62), INT8_C(-110), INT8_C( -58),
+                        INT8_C( -95), INT8_C(-101), INT8_C( 126), INT8_C(  22),
+                        INT8_C(  18), INT8_C( -35), INT8_C(  46), INT8_C(  45)) },
+    { simde_mm_set_epi8(INT8_C( 106), INT8_C(  23), INT8_C( -78), INT8_C( -57),
+                        INT8_C(  24), INT8_C(  56), INT8_C( -46), INT8_C( -15),
+                        INT8_C( -33), INT8_C(  28), INT8_C( -40), INT8_C(-116),
+                        INT8_C( -34), INT8_C(  92), INT8_C( 109), INT8_C(  33)),
+      UINT16_C(14870),
+     simde_mm_set_epi8(INT8_C( -75), INT8_C(  55), INT8_C(-127), INT8_C(  70),
+                        INT8_C(  78), INT8_C( 126), INT8_C( -96), INT8_C( 119),
+                        INT8_C( 108), INT8_C(  50), INT8_C(  17), INT8_C( -71),
+                        INT8_C( 127), INT8_C(  91), INT8_C( 110), INT8_C( -90)),
+      simde_mm_set_epi8(INT8_C( 106), INT8_C(  23), INT8_C(-127), INT8_C(  70),
+                        INT8_C(  78), INT8_C(  56), INT8_C( -96), INT8_C( -15),
+                        INT8_C( -33), INT8_C(  28), INT8_C( -40), INT8_C( -71),
+                        INT8_C( -34), INT8_C(  91), INT8_C( 110), INT8_C(  33)) },
+    { simde_mm_set_epi8(INT8_C( -21), INT8_C(-122), INT8_C(-127), INT8_C(  95),
+                        INT8_C( -34), INT8_C( -51), INT8_C( 107), INT8_C(  75),
+                        INT8_C(  63), INT8_C(-117), INT8_C(-118), INT8_C(  52),
+                        INT8_C(  15), INT8_C( 123), INT8_C( -76), INT8_C(-117)),
+      UINT16_C(54314),
+     simde_mm_set_epi8(INT8_C( 124), INT8_C( -12), INT8_C(   0), INT8_C( -14),
+                        INT8_C( -54), INT8_C(  92), INT8_C(  73), INT8_C(  69),
+                        INT8_C( -47), INT8_C( -62), INT8_C( 113), INT8_C( 100),
+                        INT8_C(  31), INT8_C( -98), INT8_C( -86), INT8_C(  19)),
+      simde_mm_set_epi8(INT8_C( 124), INT8_C( -12), INT8_C(-127), INT8_C( -14),
+                        INT8_C( -34), INT8_C(  92), INT8_C( 107), INT8_C(  75),
+                        INT8_C(  63), INT8_C(-117), INT8_C( 113), INT8_C(  52),
+                        INT8_C(  31), INT8_C( 123), INT8_C( -86), INT8_C(-117)) },
+    { simde_mm_set_epi8(INT8_C(  -9), INT8_C( -43), INT8_C(  83), INT8_C(  21),
+                        INT8_C(  88), INT8_C( -52), INT8_C(-115), INT8_C(  63),
+                        INT8_C(  92), INT8_C( -15), INT8_C( -24), INT8_C( -84),
+                        INT8_C(-120), INT8_C( -96), INT8_C(  46), INT8_C( -78)),
+      UINT16_C(44998),
+     simde_mm_set_epi8(INT8_C( -10), INT8_C(  79), INT8_C(-113), INT8_C( -93),
+                        INT8_C(  24), INT8_C(  78), INT8_C(  40), INT8_C(  22),
+                        INT8_C(  31), INT8_C( -15), INT8_C(  -8), INT8_C(  60),
+                        INT8_C( 114), INT8_C( -85), INT8_C(-105), INT8_C( -47)),
+      simde_mm_set_epi8(INT8_C( -10), INT8_C( -43), INT8_C(-113), INT8_C(  21),
+                        INT8_C(  24), INT8_C(  78), INT8_C(  40), INT8_C(  22),
+                        INT8_C(  31), INT8_C( -15), INT8_C( -24), INT8_C( -84),
+                        INT8_C(-120), INT8_C( -85), INT8_C(-105), INT8_C( -78)) },
+    { simde_mm_set_epi8(INT8_C( -62), INT8_C( 117), INT8_C(-114), INT8_C(   7),
+                        INT8_C(  17), INT8_C( 123), INT8_C(  -2), INT8_C( -15),
+                        INT8_C(-120), INT8_C(  77), INT8_C(  81), INT8_C( -39),
+                        INT8_C(-114), INT8_C( -52), INT8_C(-119), INT8_C(  82)),
+      UINT16_C(48425),
+     simde_mm_set_epi8(INT8_C(  68), INT8_C( -65), INT8_C(  13), INT8_C( -27),
+                        INT8_C(  55), INT8_C(   2), INT8_C( -43), INT8_C(   9),
+                        INT8_C( -57), INT8_C(  65), INT8_C(-111), INT8_C( -60),
+                        INT8_C(  75), INT8_C(  74), INT8_C(  16), INT8_C(  19)),
+      simde_mm_set_epi8(INT8_C(  68), INT8_C( 117), INT8_C(  13), INT8_C( -27),
+                        INT8_C(  55), INT8_C(   2), INT8_C(  -2), INT8_C(   9),
+                        INT8_C(-120), INT8_C(  77), INT8_C(-111), INT8_C( -39),
+                        INT8_C(  75), INT8_C( -52), INT8_C(-119), INT8_C(  19)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm_mask_mov_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde_assert_m128i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi16(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i src;
+    simde__mmask8 k;
+    simde__m128i a;
+    simde__m128i r;
+  } test_vec[8] = {
+    { simde_mm_set_epi16(INT16_C(-14576), INT16_C( 14205), INT16_C( -2433), INT16_C(-27972),
+                         INT16_C(  6192), INT16_C(-29093), INT16_C( 24144), INT16_C(-19045)),
+      UINT8_C(231),
+      simde_mm_set_epi16(INT16_C(-16025), INT16_C( -5226), INT16_C( -6596), INT16_C(-11796),
+                         INT16_C(-24692), INT16_C( 20335), INT16_C( 26237), INT16_C( 23499)),
+      simde_mm_set_epi16(INT16_C(-16025), INT16_C( -5226), INT16_C( -6596), INT16_C(-27972),
+                         INT16_C(  6192), INT16_C( 20335), INT16_C( 26237), INT16_C( 23499)) },
+    { simde_mm_set_epi16(INT16_C(  -839), INT16_C(-10951), INT16_C(  1310), INT16_C( -6285),
+                         INT16_C(-21252), INT16_C( -7582), INT16_C(-12381), INT16_C( 24902)),
+      UINT8_C(  7),
+      simde_mm_set_epi16(INT16_C( -3233), INT16_C( 25022), INT16_C(-12043), INT16_C( 17022),
+                         INT16_C(-25543), INT16_C(-17145), INT16_C(  8881), INT16_C( 28844)),
+      simde_mm_set_epi16(INT16_C(  -839), INT16_C(-10951), INT16_C(  1310), INT16_C( -6285),
+                         INT16_C(-21252), INT16_C(-17145), INT16_C(  8881), INT16_C( 28844)) },
+    { simde_mm_set_epi16(INT16_C( 30807), INT16_C( 12936), INT16_C(-14387), INT16_C(-15179),
+                         INT16_C( 23907), INT16_C(-17160), INT16_C( 23916), INT16_C( 14132)),
+      UINT8_C(139),
+      simde_mm_set_epi16(INT16_C( -1315), INT16_C(-31661), INT16_C(-10075), INT16_C(-22609),
+                         INT16_C(  9167), INT16_C(  6456), INT16_C( -7329), INT16_C( -8326)),
+      simde_mm_set_epi16(INT16_C( -1315), INT16_C( 12936), INT16_C(-14387), INT16_C(-15179),
+                         INT16_C(  9167), INT16_C(-17160), INT16_C( -7329), INT16_C( -8326)) },
+    { simde_mm_set_epi16(INT16_C( 26421), INT16_C(-12708), INT16_C( 22525), INT16_C(-31426),
+                         INT16_C( 15010), INT16_C(-27490), INT16_C(-12766), INT16_C(-25791)),
+      UINT8_C( 65),
+      simde_mm_set_epi16(INT16_C( -1553), INT16_C(-19304), INT16_C( 20094), INT16_C( -2808),
+                         INT16_C(-12327), INT16_C( 15252), INT16_C( 25789), INT16_C(-23968)),
+      simde_mm_set_epi16(INT16_C( 26421), INT16_C(-19304), INT16_C( 22525), INT16_C(-31426),
+                         INT16_C( 15010), INT16_C(-27490), INT16_C(-12766), INT16_C(-23968)) },
+    { simde_mm_set_epi16(INT16_C(  7823), INT16_C( 19443), INT16_C( 13219), INT16_C( 17015),
+                         INT16_C(-11739), INT16_C(-13030), INT16_C(-14482), INT16_C(-27926)),
+      UINT8_C(249),
+      simde_mm_set_epi16(INT16_C(-25131), INT16_C( 30189), INT16_C(-22900), INT16_C( 28700),
+                         INT16_C(  1116), INT16_C( 30184), INT16_C(-12164), INT16_C( -7443)),
+      simde_mm_set_epi16(INT16_C(-25131), INT16_C( 30189), INT16_C(-22900), INT16_C( 28700),
+                         INT16_C(  1116), INT16_C(-13030), INT16_C(-14482), INT16_C( -7443)) },
+    { simde_mm_set_epi16(INT16_C(-26628), INT16_C( 25963), INT16_C(-26322), INT16_C( -8077),
+                         INT16_C(-22868), INT16_C( 28633), INT16_C( -4168), INT16_C( 28595)),
+      UINT8_C(112),
+      simde_mm_set_epi16(INT16_C( 14185), INT16_C( -5351), INT16_C( -8435), INT16_C(-11233),
+                         INT16_C( -8273), INT16_C(-29718), INT16_C( -8221), INT16_C( 18236)),
+      simde_mm_set_epi16(INT16_C(-26628), INT16_C( -5351), INT16_C( -8435), INT16_C(-11233),
+                         INT16_C(-22868), INT16_C( 28633), INT16_C( -4168), INT16_C( 28595)) },
+    { simde_mm_set_epi16(INT16_C(-14557), INT16_C(-28064), INT16_C( 11696), INT16_C(-19213),
+                         INT16_C( 15613), INT16_C( 26380), INT16_C( 30063), INT16_C( 26293)),
+      UINT8_C( 24),
+      simde_mm_set_epi16(INT16_C( 23790), INT16_C( 10772), INT16_C( -8418), INT16_C(-27527),
+                         INT16_C(  -163), INT16_C( 10898), INT16_C(-12995), INT16_C(   287)),
+      simde_mm_set_epi16(INT16_C(-14557), INT16_C(-28064), INT16_C( 11696), INT16_C(-27527),
+                         INT16_C(  -163), INT16_C( 26380), INT16_C( 30063), INT16_C( 26293)) },
+    { simde_mm_set_epi16(INT16_C(-14768), INT16_C(-23816), INT16_C(-22775), INT16_C( -4812),
+                         INT16_C(-19595), INT16_C(-14349), INT16_C( 11039), INT16_C( 15081)),
+      UINT8_C( 22),
+      simde_mm_set_epi16(INT16_C( 27063), INT16_C(  8226), INT16_C(-13582), INT16_C( 14344),
+                         INT16_C(-27643), INT16_C( -1125), INT16_C(-27147), INT16_C( -4132)),
+      simde_mm_set_epi16(INT16_C(-14768), INT16_C(-23816), INT16_C(-22775), INT16_C( 14344),
+                         INT16_C(-19595), INT16_C( -1125), INT16_C(-27147), INT16_C( 15081)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm_mask_mov_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde_assert_m128i_i16(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i src;
+    simde__mmask8 k;
+    simde__m128i a;
+    simde__m128i r;
+  } test_vec[8] = {
+       { simde_mm_set_epi32(INT32_C(-1311777535), INT32_C(  871351059), INT32_C(-1795529748), INT32_C(-1018886524)),
+      UINT8_C(193),
+      simde_mm_set_epi32(INT32_C(-1402384713), INT32_C(  349677639), INT32_C(-2062419968), INT32_C(-2110667873)),
+      simde_mm_set_epi32(INT32_C(-1311777535), INT32_C(  871351059), INT32_C(-1795529748), INT32_C(-2110667873)) },
+    { simde_mm_set_epi32(INT32_C(  738895957), INT32_C(-2052149671), INT32_C( 1275190943), INT32_C(-1073987906)),
+      UINT8_C(211),
+      simde_mm_set_epi32(INT32_C(  899624021), INT32_C(-1740875066), INT32_C(  196568235), INT32_C(  146964985)),
+      simde_mm_set_epi32(INT32_C(  738895957), INT32_C(-2052149671), INT32_C(  196568235), INT32_C(  146964985)) },
+    { simde_mm_set_epi32(INT32_C(  692992965), INT32_C(  836600954), INT32_C(-1461227321), INT32_C( -625910795)),
+      UINT8_C(122),
+      simde_mm_set_epi32(INT32_C(-1617549669), INT32_C( 1989374100), INT32_C(-1502577107), INT32_C(-1017994073)),
+      simde_mm_set_epi32(INT32_C(-1617549669), INT32_C(  836600954), INT32_C(-1502577107), INT32_C( -625910795)) },
+    { simde_mm_set_epi32(INT32_C( 1143677167), INT32_C(  846204550), INT32_C( -804913221), INT32_C( 1445583278)),
+      UINT8_C(231),
+      simde_mm_set_epi32(INT32_C(-1730413187), INT32_C(-1695584840), INT32_C( -227526716), INT32_C(   -3425875)),
+      simde_mm_set_epi32(INT32_C( 1143677167), INT32_C(-1695584840), INT32_C( -227526716), INT32_C(   -3425875)) },
+    { simde_mm_set_epi32(INT32_C(  645689114), INT32_C(-2084714818), INT32_C( 1764055823), INT32_C(   52635923)),
+      UINT8_C( 92),
+      simde_mm_set_epi32(INT32_C(-1571852402), INT32_C(  630152776), INT32_C( -128726906), INT32_C( 1269444726)),
+      simde_mm_set_epi32(INT32_C(-1571852402), INT32_C(  630152776), INT32_C( 1764055823), INT32_C(   52635923)) },
+    { simde_mm_set_epi32(INT32_C(    1563221), INT32_C( -134802286), INT32_C(  714712077), INT32_C(-1827172967)),
+      UINT8_C( 81),
+      simde_mm_set_epi32(INT32_C( 1929131576), INT32_C(-1816110300), INT32_C( 1278219947), INT32_C( 1799312980)),
+      simde_mm_set_epi32(INT32_C(    1563221), INT32_C( -134802286), INT32_C(  714712077), INT32_C( 1799312980)) },
+    { simde_mm_set_epi32(INT32_C(  398082434), INT32_C(-1574168894), INT32_C(  -78364073), INT32_C(-1210427726)),
+      UINT8_C( 81),
+      simde_mm_set_epi32(INT32_C( -743499294), INT32_C(-2007549651), INT32_C(  404949426), INT32_C(-1228263526)),
+      simde_mm_set_epi32(INT32_C(  398082434), INT32_C(-1574168894), INT32_C(  -78364073), INT32_C(-1228263526)) },
+    { simde_mm_set_epi32(INT32_C( -588057094), INT32_C(-1885829296), INT32_C( 1969228625), INT32_C( 1326338893)),
+      UINT8_C(219),
+      simde_mm_set_epi32(INT32_C( 1932026039), INT32_C(-1013786585), INT32_C( 1485053584), INT32_C( 1979373999)),
+      simde_mm_set_epi32(INT32_C( 1932026039), INT32_C(-1885829296), INT32_C( 1485053584), INT32_C( 1979373999)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm_mask_mov_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde_assert_m128i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi64(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m128i src;
+    simde__mmask8 k;
+    simde__m128i a;
+    simde__m128i r;
+  } test_vec[8] = {
+       { simde_mm_set_epi64x(INT64_C( 2277107027088284737), INT64_C( -794576880036979785)),
+      UINT8_C(133),
+      simde_mm_set_epi64x(INT64_C( 8097713530582561529), INT64_C( 1585963766693842069)),
+      simde_mm_set_epi64x(INT64_C( 2277107027088284737), INT64_C( 1585963766693842069)) },
+    { simde_mm_set_epi64x(INT64_C(  386114209698075166), INT64_C( 5207265957388900927)),
+      UINT8_C(158),
+      simde_mm_set_epi64x(INT64_C( 8803705323655107871), INT64_C(-8422781366242531322)),
+      simde_mm_set_epi64x(INT64_C( 8803705323655107871), INT64_C( 5207265957388900927)) },
+    { simde_mm_set_epi64x(INT64_C(-2685854854617637911), INT64_C( 5000183764696508529)),
+      UINT8_C(188),
+      simde_mm_set_epi64x(INT64_C( 3366037084418714211), INT64_C(-4379786006937181803)),
+      simde_mm_set_epi64x(INT64_C(-2685854854617637911), INT64_C( 5000183764696508529)) },
+    { simde_mm_set_epi64x(INT64_C( 5087362917606608352), INT64_C( 7748994405920281726)),
+      UINT8_C( 72),
+      simde_mm_set_epi64x(INT64_C(-3993157906773187111), INT64_C( 5848124444216740966)),
+      simde_mm_set_epi64x(INT64_C( 5087362917606608352), INT64_C( 7748994405920281726)) },
+    { simde_mm_set_epi64x(INT64_C(-6262495515547444433), INT64_C( 3943684472219148405)),
+      UINT8_C( 56),
+      simde_mm_set_epi64x(INT64_C( 6021985363878171356), INT64_C(-9003751561505293092)),
+      simde_mm_set_epi64x(INT64_C(-6262495515547444433), INT64_C( 3943684472219148405)) },
+    { simde_mm_set_epi64x(INT64_C( 7378184861631570903), INT64_C( 5065745925883054243)),
+      UINT8_C(107),
+      simde_mm_set_epi64x(INT64_C( 3940656342452910480), INT64_C( 3350136105944417294)),
+      simde_mm_set_epi64x(INT64_C( 3940656342452910480), INT64_C( 3350136105944417294)) },
+    { simde_mm_set_epi64x(INT64_C( 4422823463426654219), INT64_C( 1827699444722609855)),
+      UINT8_C( 23),
+      simde_mm_set_epi64x(INT64_C(-2966751886069965026), INT64_C(-8494473672325004777)),
+      simde_mm_set_epi64x(INT64_C(-2966751886069965026), INT64_C(-8494473672325004777)) },
+    { simde_mm_set_epi64x(INT64_C(-8917676865649705108), INT64_C( 6229148348133862992)),
+      UINT8_C( 48),
+      simde_mm_set_epi64x(INT64_C(-7968457113297908477), INT64_C(-6793891334661924961)),
+      simde_mm_set_epi64x(INT64_C(-8917676865649705108), INT64_C( 6229148348133862992)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128i r = simde_mm_mask_mov_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde_assert_m128i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 #endif /* defined(SIMDE_avx512vl_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
 
 HEDLEY_DIAGNOSTIC_PUSH
@@ -184,10 +594,15 @@ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
 
 static MunitTest test_suite_tests[] = {
 #if defined(SIMDE_AVX512VL_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
-
   SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi16_epi8),
-
   SIMDE_TESTS_DEFINE_TEST(mm256_cvtsepi16_epi8),
+  SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi32_epi8),
+  SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi32_epi16),
+
+  SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi8),
+  SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi16),
+  SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi32),
+  SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi64),
 
 #endif /* defined(SIMDE_AVX512vl_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
   { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }



View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/54ff0a0f2c27cc4914cc18e170cedabe9546b016

-- 
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/54ff0a0f2c27cc4914cc18e170cedabe9546b016
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200421/f23df131/attachment-0001.html>


More information about the debian-med-commit mailing list