[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200421
Michael R. Crusoe
gitlab at salsa.debian.org
Tue Apr 21 17:32:49 BST 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / simde
Commits:
54ff0a0f by Michael R. Crusoe at 2020-04-21T18:26:07+02:00
New upstream version 0.0.0.git.20200421
- - - - -
13 changed files:
- .drone.star
- simde/simde-common.h
- simde/x86/avx.h
- simde/x86/avx2.h
- simde/x86/avx512bw.h
- simde/x86/avx512f.h
- simde/x86/avx512vl.h
- simde/x86/fma.h
- simde/x86/sse.h
- simde/x86/sse2.h
- test/x86/avx.c
- test/x86/avx512f.c
- test/x86/avx512vl.c
Changes:
=====================================
.drone.star
=====================================
@@ -6,8 +6,8 @@ def get_test_commands():
return [
"mkdir -p build",
"cd build",
- 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
- "ninja -v",
+ 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson .. || (cat meson-logs/meson-log.txt; false)',
+ '"$(command -v ninja || command -v ninja-build)" -v',
"./test/run-tests",
]
@@ -24,6 +24,12 @@ def get_dnf_install_commands(extra_pkgs = []):
"pip3 install meson",
]
+def get_yum_install_commands(extra_pkgs = []):
+ return [
+ "yum install -y epel-release",
+ "yum install -y %s meson ninja-build git-core" % " ".join(extra_pkgs),
+ ]
+
def get_default_job():
return {
"kind": "pipeline",
@@ -250,6 +256,24 @@ def get_jobs():
}
}
+ job_centos7_clang3 = {
+ "name": "centos7 clang3",
+ "steps": [
+ {
+ "image": "centos:7",
+ "environment": {
+ "CC": "clang",
+ "CXX": "clang++",
+ },
+ "failure": "ignore"
+ }
+ ],
+ "custom": {
+ # gcc, gcc-c++ are necessary to build on clang.
+ "install": get_yum_install_commands(["clang", "gcc", "gcc-c++"])
+ }
+ }
+
return [
job_clang9_armv7,
job_clang9_armv8,
@@ -261,6 +285,7 @@ def get_jobs():
job_gcc7_armv8,
# job_fedora,
job_fedora_clang_arm64_flags,
+ job_centos7_clang3,
]
def main(ctx):
=====================================
simde/simde-common.h
=====================================
@@ -360,6 +360,32 @@ HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8, "Unable to find 64-bit floating
# define SIMDE_CONVERT_FTOI(T,v) ((T) (v))
#endif
+/* This behaves like reinterpret_cast<to>(value), except that it will
+ attempt te verify that value is of type "from" or "to". */
+#if defined(__cplusplus)
+ template <typename To, typename From> class SIMDeCheckedReinterpretCastImpl {
+ public:
+ static To convert (To value) { return value; };
+ static To convert (From value) { return reinterpret_cast<To>(value); };
+ };
+ #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) (SIMDeCheckedReinterpretCastImpl<to, from>::convert(value))
+#elif \
+ HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \
+ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
+ HEDLEY_ARM_VERSION_CHECK(5,0,4) || \
+ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
+ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
+ HEDLEY_IBM_VERSION_CHECK(16,1,0)
+ #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \
+ (__extension__({ \
+ HEDLEY_STATIC_ASSERT(__builtin_types_compatible_p(from, __typeof__(value)) || \
+ __builtin_types_compatible_p(to, __typeof__(value)), \
+ "Type of `" #value "` must be either `" #to "` or `" #from "`"); \
+ HEDLEY_REINTERPRET_CAST(to, value); \
+ }))
+#else
+ #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value)
+#endif
#if HEDLEY_HAS_WARNING("-Wfloat-equal")
# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"")
=====================================
simde/x86/avx.h
=====================================
@@ -1490,7 +1490,7 @@ simde_mm256_broadcast_ps (simde__m128 const * mem_addr) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr))
+# define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -4315,7 +4315,7 @@ simde_mm256_setr_epi32 (
SIMDE__FUNCTION_ATTRIBUTES
simde__m256i
-simde_mm256_setr_epi64 (int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
+simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
#if defined(SIMDE_AVX_NATIVE)
return _mm256_setr_epi64x(e3, e2, e1, e0);
#else
@@ -4323,8 +4323,8 @@ simde_mm256_setr_epi64 (int64_t e3, int64_t e2, int64_t e1, int64_t e0) {
#endif
}
#if defined(SIMDE_AVX_ENABLE_NATIVE_ALIASES)
-# define _mm256_setr_epi64(e3, e2, e1, e0) \
- simde_mm256_setr_epi64(e3, e2, e1, e0)
+# define _mm256_setr_epi64x(e3, e2, e1, e0) \
+ simde_mm256_setr_epi64x(e3, e2, e1, e0)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/avx2.h
=====================================
@@ -1266,7 +1266,7 @@ simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) {
#endif
}
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-# define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b)
+# define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -2119,7 +2119,7 @@ simde_mm256_srli_si256 (simde__m256i a, const int imm8) {
simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8)))
#endif
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
-# define _mm256_srli_si256(a, imm8) simde_mm_srli_si256(a, imm8)
+# define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/avx512bw.h
=====================================
@@ -380,7 +380,7 @@ simde_mm512_cvtsepi16_epi8 (simde__m512i a) {
return simde__m256i_from_private(r_);
#endif
}
-#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+#if defined(SIMDE_AVX512BW_ENABLE_NATIVE_ALIASES)
#define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a)
#endif
=====================================
simde/x86/avx512f.h
=====================================
@@ -2140,6 +2140,58 @@ simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) {
#define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_blend_epi32(k, a, b);
+#else
+ return simde_mm512_mask_mov_epi32(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_blend_epi64(k, a, b);
+#else
+ return simde_mm512_mask_mov_epi64(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_blend_ps(k, a, b);
+#else
+ return simde_mm512_mask_mov_ps(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_blend_pd(k, a, b);
+#else
+ return simde_mm512_mask_mov_pd(a, k, b);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_broadcast_i32x4 (simde__m128i a) {
@@ -2166,6 +2218,48 @@ simde_mm512_broadcast_i32x4 (simde__m128i a) {
#define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_broadcastd_epi32 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_broadcastd_epi32(a);
+#else
+ simde__m512i_private r_;
+ simde__m128i_private a_= simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+ r_.i32[i] = a_.i32[0];
+ }
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_broadcastq_epi64 (simde__m128i a) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_broadcastq_epi64(a);
+#else
+ simde__m512i_private r_;
+ simde__m128i_private a_= simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+ r_.i64[i] = a_.i64[0];
+ }
+
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__mmask16
simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
@@ -3321,7 +3415,7 @@ simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512
#endif
}
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
- #define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
+ #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3344,7 +3438,7 @@ simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i
#endif
}
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
- #define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
+ #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/avx512vl.h
=====================================
@@ -53,6 +53,98 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
#endif
SIMDE__BEGIN_DECLS
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE)
+ return _mm_mask_mov_epi8(src, k, a);
+ #else
+ simde__m128i_private
+ src_ = simde__m128i_to_private(src),
+ a_ = simde__m128i_to_private(a),
+ r_;
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i];
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE)
+ return _mm_mask_mov_epi16(src, k, a);
+ #else
+ simde__m128i_private
+ src_ = simde__m128i_to_private(src),
+ a_ = simde__m128i_to_private(a),
+ r_;
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+ r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i];
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE)
+ return _mm_mask_mov_epi32(src, k, a);
+ #else
+ simde__m128i_private
+ src_ = simde__m128i_to_private(src),
+ a_ = simde__m128i_to_private(a),
+ r_;
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+ r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i];
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_mask_mov_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE)
+ return _mm_mask_mov_epi64(src, k, a);
+ #else
+ simde__m128i_private
+ src_ = simde__m128i_to_private(src),
+ a_ = simde__m128i_to_private(a),
+ r_;
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+ r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cvtsepi16_epi8 (simde__m128i a) {
@@ -105,6 +197,58 @@ simde_mm256_cvtsepi16_epi8 (simde__m256i a) {
#define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_cvtsepi32_epi8 (simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+ return _mm_cvtsepi32_epi8(a);
+ #else
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());;
+ simde__m128i_private a_ = simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
+ r_.i8[i] =
+ (a_.i32[i] < INT8_MIN)
+ ? (INT8_MIN)
+ : ((a_.i32[i] > INT8_MAX)
+ ? (INT8_MAX)
+ : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]));
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_cvtsepi32_epi8(a) simde_mm_cvtsepi32_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m128i
+simde_mm_cvtsepi32_epi16 (simde__m128i a) {
+ #if defined(SIMDE_AVX512VL_NATIVE) && defined(SIMDE_AVX512BW_NATIVE)
+ return _mm_cvtsepi32_epi16(a);
+ #else
+ simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128());;
+ simde__m128i_private a_ = simde__m128i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) {
+ r_.i16[i] =
+ (a_.i32[i] < INT16_MIN)
+ ? (INT16_MIN)
+ : ((a_.i32[i] > INT16_MAX)
+ ? (INT16_MAX)
+ : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]));
+ }
+
+ return simde__m128i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512VL_ENABLE_NATIVE_ALIASES)
+ #define _mm_cvtsepi32_epi16(a) simde_mm_cvtsepi32_epi16(a)
+#endif
+
SIMDE__END_DECLS
HEDLEY_DIAGNOSTIC_POP
=====================================
simde/x86/fma.h
=====================================
@@ -87,7 +87,7 @@ simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)
+# define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -113,7 +113,7 @@ simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)
+# define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -165,7 +165,7 @@ simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)
+# define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -191,7 +191,7 @@ simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)
+# define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -217,7 +217,7 @@ simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)
+# define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -243,7 +243,7 @@ simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)
+# define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -319,7 +319,7 @@ simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)
+# define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -369,7 +369,7 @@ simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)
+# define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -417,7 +417,7 @@ simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)
+# define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -465,7 +465,7 @@ simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)
+# define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -557,7 +557,7 @@ simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)
+# define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -605,7 +605,7 @@ simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
#endif
}
#if defined(SIMDE_FMA_ENABLE_NATIVE_ALIASES)
-# define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)
+# define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/sse.h
=====================================
@@ -2080,7 +2080,7 @@ simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) {
}
#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr)
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64(a, (mask), mem_addr)
+# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, mask), (mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -2991,7 +2991,7 @@ simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3011,7 +3011,7 @@ simde_mm_store_ps1 (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_store_ps1(mem_addr, a) simde_mm_store_ps1(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3030,7 +3030,7 @@ simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3045,7 +3045,7 @@ simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3104,7 +3104,7 @@ simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3123,7 +3123,7 @@ simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -3498,7 +3498,7 @@ simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) {
#endif
}
#if defined(SIMDE_SSE_ENABLE_NATIVE_ALIASES)
-# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), (a))
+# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a))
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
simde/x86/sse2.h
=====================================
@@ -2902,7 +2902,7 @@ simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HED
#endif
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128(a, (mask), HEDLEY_REINTERPRET_CAST(int8_t*, mem_addr))
+# define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128(a, (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, mem_addr))
#endif
SIMDE__FUNCTION_ATTRIBUTES
@@ -5060,7 +5060,7 @@ simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) {
*mem_addr = a;
}
#if defined(SIMDE_SSE2_ENABLE_NATIVE_ALIASES)
-# define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(mem_addr, a)
+# define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a)
#endif
SIMDE__FUNCTION_ATTRIBUTES
=====================================
test/x86/avx.c
=====================================
@@ -10318,7 +10318,7 @@ test_simde_mm256_setr_epi32(const MunitParameter params[], void* data) {
}
static MunitResult
-test_simde_mm256_setr_epi64(const MunitParameter params[], void* data) {
+test_simde_mm256_setr_epi64x(const MunitParameter params[], void* data) {
(void) params;
(void) data;
@@ -10361,7 +10361,7 @@ test_simde_mm256_setr_epi64(const MunitParameter params[], void* data) {
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
- simde__m256i r = simde_mm256_setr_epi64(
+ simde__m256i r = simde_mm256_setr_epi64x(
test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]);
simde_assert_m256i_i64(r, ==, test_vec[i].r);
}
@@ -13669,7 +13669,7 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi8),
SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi16),
SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi32),
- SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi64),
+ SIMDE_TESTS_DEFINE_TEST(mm256_setr_epi64x),
SIMDE_TESTS_DEFINE_TEST(mm256_setr_ps),
SIMDE_TESTS_DEFINE_TEST(mm256_setr_pd),
SIMDE_TESTS_DEFINE_TEST(mm256_setr_m128),
=====================================
test/x86/avx512f.c
=====================================
@@ -2455,6 +2455,506 @@ test_simde_mm512_andnot_si512(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_mask_blend_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__mmask16 k;
+ simde__m512i a;
+ simde__m512i b;
+ simde__m512i r;
+ } test_vec[8] = {
+ { UINT16_C(12684),
+ simde_mm512_set_epi32(INT32_C( 2139597246), INT32_C(-2035467821), INT32_C(-1381016544), INT32_C( -293624181),
+ INT32_C( 1610331725), INT32_C( 134146865), INT32_C( 837546022), INT32_C(-1561535917),
+ INT32_C( 522767958), INT32_C( 350987310), INT32_C( 1200416302), INT32_C( 1035712103),
+ INT32_C( -697441354), INT32_C(-1209277994), INT32_C( 1905768221), INT32_C( 941422574)),
+ simde_mm512_set_epi32(INT32_C( -854245863), INT32_C( 77819890), INT32_C( -597706880), INT32_C( -28687979),
+ INT32_C( 1384494246), INT32_C(-1885694903), INT32_C( 479641666), INT32_C( 436747778),
+ INT32_C( 1142933685), INT32_C( -36150185), INT32_C( 234764144), INT32_C( -925516387),
+ INT32_C( 1528722995), INT32_C( 1957265461), INT32_C( -558613563), INT32_C( 535737103)),
+ simde_mm512_set_epi32(INT32_C( 2139597246), INT32_C(-2035467821), INT32_C( -597706880), INT32_C( -28687979),
+ INT32_C( 1610331725), INT32_C( 134146865), INT32_C( 837546022), INT32_C( 436747778),
+ INT32_C( 1142933685), INT32_C( 350987310), INT32_C( 1200416302), INT32_C( 1035712103),
+ INT32_C( 1528722995), INT32_C( 1957265461), INT32_C( 1905768221), INT32_C( 941422574)) },
+ { UINT16_C(12889),
+ simde_mm512_set_epi32(INT32_C(-1009854213), INT32_C( 19207470), INT32_C( 2053019824), INT32_C( 1679893080),
+ INT32_C( -761309092), INT32_C(-1797634461), INT32_C( 1499461014), INT32_C( -349931656),
+ INT32_C( 1308377490), INT32_C(-1862232386), INT32_C( -706282442), INT32_C( 1752887042),
+ INT32_C( 1045610342), INT32_C( 88096217), INT32_C(-1144289151), INT32_C( 1410502196)),
+ simde_mm512_set_epi32(INT32_C( 1437842356), INT32_C(-1817562257), INT32_C( 808924311), INT32_C( 1765692072),
+ INT32_C(-1346910557), INT32_C( -92284700), INT32_C( 1533217965), INT32_C( 1732689820),
+ INT32_C(-1401128233), INT32_C( -762168473), INT32_C( 97276971), INT32_C( 2145432631),
+ INT32_C(-1561525899), INT32_C(-2005427238), INT32_C( -455460474), INT32_C( -933959435)),
+ simde_mm512_set_epi32(INT32_C(-1009854213), INT32_C( 19207470), INT32_C( 808924311), INT32_C( 1765692072),
+ INT32_C( -761309092), INT32_C(-1797634461), INT32_C( 1533217965), INT32_C( -349931656),
+ INT32_C( 1308377490), INT32_C( -762168473), INT32_C( -706282442), INT32_C( 2145432631),
+ INT32_C(-1561525899), INT32_C( 88096217), INT32_C(-1144289151), INT32_C( -933959435)) },
+ { UINT16_C(18390),
+ simde_mm512_set_epi32(INT32_C( 191788932), INT32_C( 410937469), INT32_C( 218604234), INT32_C( -632545043),
+ INT32_C( 246082482), INT32_C( 2029197195), INT32_C( 1188965621), INT32_C( -844747875),
+ INT32_C( 989502056), INT32_C( 2119540790), INT32_C(-1766179858), INT32_C(-1109416221),
+ INT32_C(-1963025204), INT32_C( -932958949), INT32_C( 47867627), INT32_C( -567270366)),
+ simde_mm512_set_epi32(INT32_C( 558500028), INT32_C( 1400907983), INT32_C(-1581715774), INT32_C( -29022872),
+ INT32_C( 1773849857), INT32_C( -311975417), INT32_C( 1183760637), INT32_C(-1160252785),
+ INT32_C( 2107838031), INT32_C( 1909470743), INT32_C(-2018375211), INT32_C( 267812095),
+ INT32_C( -225335539), INT32_C( -871226308), INT32_C( -872412082), INT32_C( 1435481672)),
+ simde_mm512_set_epi32(INT32_C( 191788932), INT32_C( 1400907983), INT32_C( 218604234), INT32_C( -632545043),
+ INT32_C( 246082482), INT32_C( -311975417), INT32_C( 1183760637), INT32_C(-1160252785),
+ INT32_C( 2107838031), INT32_C( 1909470743), INT32_C(-1766179858), INT32_C( 267812095),
+ INT32_C(-1963025204), INT32_C( -871226308), INT32_C( -872412082), INT32_C( -567270366)) },
+ { UINT16_C(46409),
+ simde_mm512_set_epi32(INT32_C( -239336968), INT32_C( 1154172094), INT32_C( 1382102779), INT32_C(-1946237060),
+ INT32_C( -995869857), INT32_C( 1050338310), INT32_C(-2047829467), INT32_C( -881484106),
+ INT32_C( -67227529), INT32_C( -510303256), INT32_C( -387177060), INT32_C( 2007995362),
+ INT32_C( 1596393504), INT32_C( 1716443052), INT32_C( 1056333857), INT32_C( -879795312)),
+ simde_mm512_set_epi32(INT32_C(-1321156942), INT32_C(-1763902924), INT32_C( -145319736), INT32_C( 356975558),
+ INT32_C( 109934631), INT32_C( 1326272066), INT32_C( 723198088), INT32_C(-1812908400),
+ INT32_C( 1665386649), INT32_C( 1770577849), INT32_C(-2015166919), INT32_C(-1565649496),
+ INT32_C( 1045296779), INT32_C( 1401153164), INT32_C( -294475079), INT32_C( 378377774)),
+ simde_mm512_set_epi32(INT32_C(-1321156942), INT32_C( 1154172094), INT32_C( -145319736), INT32_C( 356975558),
+ INT32_C( -995869857), INT32_C( 1326272066), INT32_C(-2047829467), INT32_C(-1812908400),
+ INT32_C( -67227529), INT32_C( 1770577849), INT32_C( -387177060), INT32_C( 2007995362),
+ INT32_C( 1045296779), INT32_C( 1716443052), INT32_C( 1056333857), INT32_C( 378377774)) },
+ { UINT16_C(35033),
+ simde_mm512_set_epi32(INT32_C( 576121858), INT32_C( -83274089), INT32_C( 1081604364), INT32_C( 1853977291),
+ INT32_C(-1408149319), INT32_C(-1793071292), INT32_C( -580417531), INT32_C( 1708989591),
+ INT32_C(-1803428364), INT32_C(-1884594628), INT32_C(-1049896819), INT32_C( 1351777033),
+ INT32_C( -543435799), INT32_C( 45073785), INT32_C( 310971883), INT32_C( 586295496)),
+ simde_mm512_set_epi32(INT32_C( 274706498), INT32_C( 1339140311), INT32_C( 773365916), INT32_C( -407443831),
+ INT32_C( -44487881), INT32_C( -363465063), INT32_C( -778555208), INT32_C( -640038352),
+ INT32_C( -428291654), INT32_C(-1080717955), INT32_C( 906661653), INT32_C( 1353353955),
+ INT32_C(-1311936279), INT32_C( 1168846380), INT32_C( -71806717), INT32_C( 617275327)),
+ simde_mm512_set_epi32(INT32_C( 274706498), INT32_C( -83274089), INT32_C( 1081604364), INT32_C( 1853977291),
+ INT32_C( -44487881), INT32_C(-1793071292), INT32_C( -580417531), INT32_C( 1708989591),
+ INT32_C( -428291654), INT32_C(-1080717955), INT32_C(-1049896819), INT32_C( 1353353955),
+ INT32_C(-1311936279), INT32_C( 45073785), INT32_C( 310971883), INT32_C( 617275327)) },
+ { UINT16_C(62826),
+ simde_mm512_set_epi32(INT32_C( -943712419), INT32_C( -981833223), INT32_C( 2020022414), INT32_C( 630972788),
+ INT32_C( 1615502534), INT32_C( 991949979), INT32_C( 601817641), INT32_C(-2063962607),
+ INT32_C(-1545145030), INT32_C( 1626575612), INT32_C(-1511315708), INT32_C( 1422623346),
+ INT32_C( 1496301111), INT32_C(-1751918881), INT32_C( 333195983), INT32_C( 1655699275)),
+ simde_mm512_set_epi32(INT32_C(-1770653828), INT32_C( -674401292), INT32_C(-2023667251), INT32_C( 1038799540),
+ INT32_C(-1877506849), INT32_C( 791301479), INT32_C(-2115975814), INT32_C( 1430860109),
+ INT32_C(-2123570597), INT32_C(-2010985064), INT32_C( 1367050649), INT32_C( -268988786),
+ INT32_C( 1975120887), INT32_C( 83320183), INT32_C( 2120549505), INT32_C( 163195572)),
+ simde_mm512_set_epi32(INT32_C(-1770653828), INT32_C( -674401292), INT32_C(-2023667251), INT32_C( 1038799540),
+ INT32_C( 1615502534), INT32_C( 791301479), INT32_C( 601817641), INT32_C( 1430860109),
+ INT32_C(-1545145030), INT32_C(-2010985064), INT32_C( 1367050649), INT32_C( 1422623346),
+ INT32_C( 1975120887), INT32_C(-1751918881), INT32_C( 2120549505), INT32_C( 1655699275)) },
+ { UINT16_C( 2185),
+ simde_mm512_set_epi32(INT32_C( 1990735603), INT32_C( -226564964), INT32_C( 2030923034), INT32_C( 1100474296),
+ INT32_C( 1370205128), INT32_C( 1609607637), INT32_C( -586788969), INT32_C( 1072075481),
+ INT32_C( -723316478), INT32_C(-1331909036), INT32_C(-1524508879), INT32_C( -832646284),
+ INT32_C(-1636241911), INT32_C( 215718784), INT32_C( 478814832), INT32_C( 1231303010)),
+ simde_mm512_set_epi32(INT32_C( 96871414), INT32_C( 245175863), INT32_C( 1584772139), INT32_C( 1604795180),
+ INT32_C(-1972440261), INT32_C(-1471456371), INT32_C( 1108051273), INT32_C( -4141330),
+ INT32_C(-1064630270), INT32_C( -911670021), INT32_C(-1808660435), INT32_C(-2035294308),
+ INT32_C(-2014140232), INT32_C(-1194657062), INT32_C(-1732550793), INT32_C(-1643523135)),
+ simde_mm512_set_epi32(INT32_C( 1990735603), INT32_C( -226564964), INT32_C( 2030923034), INT32_C( 1100474296),
+ INT32_C(-1972440261), INT32_C( 1609607637), INT32_C( -586788969), INT32_C( 1072075481),
+ INT32_C(-1064630270), INT32_C(-1331909036), INT32_C(-1524508879), INT32_C( -832646284),
+ INT32_C(-2014140232), INT32_C( 215718784), INT32_C( 478814832), INT32_C(-1643523135)) },
+ { UINT16_C(11244),
+ simde_mm512_set_epi32(INT32_C( 1605800253), INT32_C( 1825375434), INT32_C( -280209407), INT32_C( 1616462798),
+ INT32_C( 1380939760), INT32_C( 1670822362), INT32_C(-1872387325), INT32_C( 451486273),
+ INT32_C( -978012107), INT32_C( -718128180), INT32_C(-1625787118), INT32_C( -879101117),
+ INT32_C( 173600397), INT32_C( 1426384314), INT32_C( -517748272), INT32_C( 76760759)),
+ simde_mm512_set_epi32(INT32_C( 805228357), INT32_C( 980137697), INT32_C( -555168446), INT32_C(-2016549382),
+ INT32_C( 927860791), INT32_C(-1515288559), INT32_C( -918296563), INT32_C(-1858382028),
+ INT32_C( 1365882699), INT32_C( 616589376), INT32_C(-1573056329), INT32_C( 1014781400),
+ INT32_C( -260360112), INT32_C( -953114112), INT32_C( 1191757764), INT32_C( -706360509)),
+ simde_mm512_set_epi32(INT32_C( 1605800253), INT32_C( 1825375434), INT32_C( -555168446), INT32_C( 1616462798),
+ INT32_C( 927860791), INT32_C( 1670822362), INT32_C( -918296563), INT32_C(-1858382028),
+ INT32_C( 1365882699), INT32_C( 616589376), INT32_C(-1573056329), INT32_C( -879101117),
+ INT32_C( -260360112), INT32_C( -953114112), INT32_C( -517748272), INT32_C( 76760759)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_blend_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_blend_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__mmask8 k;
+ simde__m512i a;
+ simde__m512i b;
+ simde__m512i r;
+ } test_vec[8] = {
+ { UINT8_C(140),
+ simde_mm512_set_epi64(INT64_C(-8742267720341431264), INT64_C(-1261106253099452851),
+ INT64_C( 576156398873473062), INT64_C(-6706745694521602474),
+ INT64_C( 1507479018961430062), INT64_C( 4448349614053909430),
+ INT64_C(-5193809434096716003), INT64_C( 4043379170809950035)),
+ simde_mm512_set_epi64(INT64_C( 334233886225577856), INT64_C( -123213930208840538),
+ INT64_C(-8098997938139250622), INT64_C( 1875817424253601973),
+ INT64_C( -155263862084585616), INT64_C(-3975062612548356557),
+ INT64_C( 8406391148321717189), INT64_C( 2300973338778380734)),
+ simde_mm512_set_epi64(INT64_C( 334233886225577856), INT64_C(-1261106253099452851),
+ INT64_C( 576156398873473062), INT64_C(-6706745694521602474),
+ INT64_C( -155263862084585616), INT64_C(-3975062612548356557),
+ INT64_C(-5193809434096716003), INT64_C( 4043379170809950035)) },
+ { UINT8_C( 25),
+ simde_mm512_set_epi64(INT64_C( 8817653003799568984), INT64_C(-3269797649790122397),
+ INT64_C( 6440136020702033784), INT64_C( 5619438532805301950),
+ INT64_C(-3033459988376129790), INT64_C( 4490862223337471449),
+ INT64_C(-4914684479302103500), INT64_C( 357175739365339737)),
+ simde_mm512_set_epi64(INT64_C( 3474303462450025128), INT64_C(-5784936788749461276),
+ INT64_C( 6585121019047362460), INT64_C(-6017799934704469145),
+ INT64_C( 417801411244373047), INT64_C(-6706702665772459046),
+ INT64_C(-1956187837089650443), INT64_C(-4337290818543610578)),
+ simde_mm512_set_epi64(INT64_C( 8817653003799568984), INT64_C(-3269797649790122397),
+ INT64_C( 6440136020702033784), INT64_C(-6017799934704469145),
+ INT64_C( 417801411244373047), INT64_C( 4490862223337471449),
+ INT64_C(-4914684479302103500), INT64_C(-4337290818543610578)) },
+ { UINT8_C(111),
+ simde_mm512_set_epi64(INT64_C(-2716760272685831246), INT64_C( 8715335590848900341),
+ INT64_C(-3628164495500993944), INT64_C( 9103358378116791278),
+ INT64_C(-4764906384514966324), INT64_C(-4007028174417664277),
+ INT64_C(-2436407666547579589), INT64_C( 1334270375494925236)),
+ simde_mm512_set_epi64(INT64_C( -124652284302144255), INT64_C(-1339924211987201795),
+ INT64_C(-4983247764560081329), INT64_C( 8201114396130413013),
+ INT64_C( 1150244193567876877), INT64_C(-3741888496852267954),
+ INT64_C( 6165346835439187844), INT64_C( 1764962990274618058)),
+ simde_mm512_set_epi64(INT64_C(-2716760272685831246), INT64_C(-1339924211987201795),
+ INT64_C(-4983247764560081329), INT64_C( 9103358378116791278),
+ INT64_C( 1150244193567876877), INT64_C(-3741888496852267954),
+ INT64_C( 6165346835439187844), INT64_C( 1764962990274618058)) },
+ { UINT8_C(194),
+ simde_mm512_set_epi64(INT64_C(-4277228465836858362), INT64_C(-8795360585136628042),
+ INT64_C( -288740034661227544), INT64_C(-1662912808453434398),
+ INT64_C( 6856457892943288236), INT64_C( 4536919372887712656),
+ INT64_C(-1781208167188155063), INT64_C( 2398739356475992271)),
+ simde_mm512_set_epi64(INT64_C( 472165646169099842), INT64_C( 3106112138971788944),
+ INT64_C( 7152781194420608953), INT64_C(-8655076010356763224),
+ INT64_C( 4489515481820292748), INT64_C(-1264760833413638610),
+ INT64_C(-1027944449129626434), INT64_C( 5936086237864445820)),
+ simde_mm512_set_epi64(INT64_C( 472165646169099842), INT64_C( 3106112138971788944),
+ INT64_C( -288740034661227544), INT64_C(-1662912808453434398),
+ INT64_C( 6856457892943288236), INT64_C( 4536919372887712656),
+ INT64_C(-1027944449129626434), INT64_C( 2398739356475992271)) },
+ { UINT8_C(198),
+ simde_mm512_set_epi64(INT64_C(-7701182554821916667), INT64_C( 7340054405040954868),
+ INT64_C(-8094272290232215411), INT64_C( 5805838151970444265),
+ INT64_C( 193590432792907243), INT64_C( 2518119983696523684),
+ INT64_C(-7781788212556415310), INT64_C(-7575905367749125944)),
+ simde_mm512_set_epi64(INT64_C(-1561070555307167560), INT64_C(-2748943786159060550),
+ INT64_C(-4641648272018338027), INT64_C( 5812610979620286697),
+ INT64_C( 5020156980371149059), INT64_C( 2651177342668827650),
+ INT64_C( -357659487777588980), INT64_C( 7962771835258493113)),
+ simde_mm512_set_epi64(INT64_C(-1561070555307167560), INT64_C(-2748943786159060550),
+ INT64_C(-8094272290232215411), INT64_C( 5805838151970444265),
+ INT64_C( 193590432792907243), INT64_C( 2651177342668827650),
+ INT64_C( -357659487777588980), INT64_C(-7575905367749125944)) },
+ { UINT8_C( 55),
+ simde_mm512_set_epi64(INT64_C( 2584787088481873425), INT64_C(-6636347369800363268),
+ INT64_C(-6491051538368462222), INT64_C( 6426564339256514271),
+ INT64_C( 1431065851799271243), INT64_C(-5052971989011532438),
+ INT64_C( 1179855426247829719), INT64_C( 3321581320948606601)),
+ simde_mm512_set_epi64(INT64_C(-9088046918826118835), INT64_C(-9120666262578213480),
+ INT64_C( 5871437833456553614), INT64_C( 8483079615394831735),
+ INT64_C( 9107690773687184052), INT64_C(-4053213973120914951),
+ INT64_C( 8675930205947945332), INT64_C( 6938530551127078043)),
+ simde_mm512_set_epi64(INT64_C( 2584787088481873425), INT64_C(-6636347369800363268),
+ INT64_C( 5871437833456553614), INT64_C( 8483079615394831735),
+ INT64_C( 1431065851799271243), INT64_C(-4053213973120914951),
+ INT64_C( 8675930205947945332), INT64_C( 6938530551127078043)) },
+ { UINT8_C(103),
+ simde_mm512_set_epi64(INT64_C( 4604529133310120194), INT64_C(-5720505748096428239),
+ INT64_C(-3576188556257202679), INT64_C( 926505122891702896),
+ INT64_C( 5288406162053320871), INT64_C(-5908713324082235524),
+ INT64_C(-2896531491248846387), INT64_C( 4461610053817304287)),
+ simde_mm512_set_epi64(INT64_C( -17786873681606654), INT64_C(-3915592922452326355),
+ INT64_C(-8741522488314124104), INT64_C(-5131013008663027849),
+ INT64_C(-7058878113053657357), INT64_C( -973089108768494310),
+ INT64_C( 4726501112778828744), INT64_C( 6913212164015017879)),
+ simde_mm512_set_epi64(INT64_C( 4604529133310120194), INT64_C(-3915592922452326355),
+ INT64_C(-8741522488314124104), INT64_C( 926505122891702896),
+ INT64_C( 5288406162053320871), INT64_C( -973089108768494310),
+ INT64_C( 4726501112778828744), INT64_C( 6913212164015017879)) },
+ { UINT8_C( 73),
+ simde_mm512_set_epi64(INT64_C(-4200530011080213556), INT64_C(-6982702498652226749),
+ INT64_C( 745608029114000826), INT64_C(-2223711895723751753),
+ INT64_C( 4918324162995104748), INT64_C( 416059555292452407),
+ INT64_C( 6806544510221761324), INT64_C(-8471566411485193331)),
+ simde_mm512_set_epi64(INT64_C( 5866421522993801280), INT64_C(-6756225486806034984),
+ INT64_C(-1118238162881043968), INT64_C( 5118560624722692931),
+ INT64_C( 6896859572368901322), INT64_C(-1203490237480090674),
+ INT64_C( 5931091108616911322), INT64_C(-8041842325868436927)),
+ simde_mm512_set_epi64(INT64_C(-4200530011080213556), INT64_C(-6756225486806034984),
+ INT64_C( 745608029114000826), INT64_C(-2223711895723751753),
+ INT64_C( 6896859572368901322), INT64_C( 416059555292452407),
+ INT64_C( 6806544510221761324), INT64_C(-8041842325868436927)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_blend_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_blend_ps(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__mmask16 k;
+ simde__m512 a;
+ simde__m512 b;
+ simde__m512 r;
+ } test_vec[8] = {
+ { UINT16_C(28658),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.64), SIMDE_FLOAT32_C( 121.90), SIMDE_FLOAT32_C( -796.62), SIMDE_FLOAT32_C( 983.17),
+ SIMDE_FLOAT32_C( 569.02), SIMDE_FLOAT32_C( -88.58), SIMDE_FLOAT32_C( -750.53), SIMDE_FLOAT32_C( 52.16),
+ SIMDE_FLOAT32_C( 863.27), SIMDE_FLOAT32_C( -937.53), SIMDE_FLOAT32_C( 272.85), SIMDE_FLOAT32_C( -836.56),
+ SIMDE_FLOAT32_C( -517.71), SIMDE_FLOAT32_C( 436.89), SIMDE_FLOAT32_C( -561.62), SIMDE_FLOAT32_C( -796.29)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 721.67), SIMDE_FLOAT32_C( -355.29), SIMDE_FLOAT32_C( -776.65), SIMDE_FLOAT32_C( -467.78),
+ SIMDE_FLOAT32_C( -890.68), SIMDE_FLOAT32_C( -288.13), SIMDE_FLOAT32_C( 739.88), SIMDE_FLOAT32_C( -3.67),
+ SIMDE_FLOAT32_C( 356.91), SIMDE_FLOAT32_C( -250.13), SIMDE_FLOAT32_C( -609.99), SIMDE_FLOAT32_C( -756.57),
+ SIMDE_FLOAT32_C( -441.01), SIMDE_FLOAT32_C( 675.23), SIMDE_FLOAT32_C( -112.56), SIMDE_FLOAT32_C( 752.66)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.64), SIMDE_FLOAT32_C( -355.29), SIMDE_FLOAT32_C( -776.65), SIMDE_FLOAT32_C( 983.17),
+ SIMDE_FLOAT32_C( -890.68), SIMDE_FLOAT32_C( -288.13), SIMDE_FLOAT32_C( 739.88), SIMDE_FLOAT32_C( -3.67),
+ SIMDE_FLOAT32_C( 356.91), SIMDE_FLOAT32_C( -250.13), SIMDE_FLOAT32_C( -609.99), SIMDE_FLOAT32_C( -756.57),
+ SIMDE_FLOAT32_C( -517.71), SIMDE_FLOAT32_C( 436.89), SIMDE_FLOAT32_C( -112.56), SIMDE_FLOAT32_C( -796.29)) },
+ { UINT16_C(13167),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -177.79), SIMDE_FLOAT32_C( 957.03), SIMDE_FLOAT32_C( -193.15), SIMDE_FLOAT32_C( 645.09),
+ SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 66.15), SIMDE_FLOAT32_C( 565.09), SIMDE_FLOAT32_C( -991.06),
+ SIMDE_FLOAT32_C( -217.74), SIMDE_FLOAT32_C( 162.91), SIMDE_FLOAT32_C( 837.05), SIMDE_FLOAT32_C( 132.83),
+ SIMDE_FLOAT32_C( -183.75), SIMDE_FLOAT32_C( -958.98), SIMDE_FLOAT32_C( -343.18), SIMDE_FLOAT32_C( -412.04)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -623.32), SIMDE_FLOAT32_C( 372.80), SIMDE_FLOAT32_C( -286.04), SIMDE_FLOAT32_C( 347.55),
+ SIMDE_FLOAT32_C( -954.70), SIMDE_FLOAT32_C( 272.86), SIMDE_FLOAT32_C( 787.91), SIMDE_FLOAT32_C( 529.75),
+ SIMDE_FLOAT32_C( -43.99), SIMDE_FLOAT32_C( 645.49), SIMDE_FLOAT32_C( -301.76), SIMDE_FLOAT32_C( -390.74),
+ SIMDE_FLOAT32_C( 671.11), SIMDE_FLOAT32_C( -513.10), SIMDE_FLOAT32_C( 467.15), SIMDE_FLOAT32_C( -961.27)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -177.79), SIMDE_FLOAT32_C( 957.03), SIMDE_FLOAT32_C( -286.04), SIMDE_FLOAT32_C( 347.55),
+ SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 66.15), SIMDE_FLOAT32_C( 787.91), SIMDE_FLOAT32_C( 529.75),
+ SIMDE_FLOAT32_C( -217.74), SIMDE_FLOAT32_C( 645.49), SIMDE_FLOAT32_C( -301.76), SIMDE_FLOAT32_C( 132.83),
+ SIMDE_FLOAT32_C( 671.11), SIMDE_FLOAT32_C( -513.10), SIMDE_FLOAT32_C( 467.15), SIMDE_FLOAT32_C( -961.27)) },
+ { UINT16_C(10447),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.49), SIMDE_FLOAT32_C( 854.73), SIMDE_FLOAT32_C( 459.72), SIMDE_FLOAT32_C( -110.83),
+ SIMDE_FLOAT32_C( -875.29), SIMDE_FLOAT32_C( 594.30), SIMDE_FLOAT32_C( -331.55), SIMDE_FLOAT32_C( -808.64),
+ SIMDE_FLOAT32_C( 705.45), SIMDE_FLOAT32_C( -55.08), SIMDE_FLOAT32_C( 606.63), SIMDE_FLOAT32_C( -13.01),
+ SIMDE_FLOAT32_C( 483.39), SIMDE_FLOAT32_C( 565.56), SIMDE_FLOAT32_C( 735.84), SIMDE_FLOAT32_C( -855.34)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 263.46), SIMDE_FLOAT32_C( -173.99), SIMDE_FLOAT32_C( -448.77), SIMDE_FLOAT32_C( -18.46),
+ SIMDE_FLOAT32_C( 60.12), SIMDE_FLOAT32_C( 895.07), SIMDE_FLOAT32_C( 593.75), SIMDE_FLOAT32_C( -910.69),
+ SIMDE_FLOAT32_C( -898.20), SIMDE_FLOAT32_C( -885.41), SIMDE_FLOAT32_C( -446.34), SIMDE_FLOAT32_C( -539.23),
+ SIMDE_FLOAT32_C( 177.56), SIMDE_FLOAT32_C( 85.90), SIMDE_FLOAT32_C( -977.71), SIMDE_FLOAT32_C( 589.01)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.49), SIMDE_FLOAT32_C( 854.73), SIMDE_FLOAT32_C( -448.77), SIMDE_FLOAT32_C( -110.83),
+ SIMDE_FLOAT32_C( 60.12), SIMDE_FLOAT32_C( 594.30), SIMDE_FLOAT32_C( -331.55), SIMDE_FLOAT32_C( -808.64),
+ SIMDE_FLOAT32_C( -898.20), SIMDE_FLOAT32_C( -885.41), SIMDE_FLOAT32_C( 606.63), SIMDE_FLOAT32_C( -13.01),
+ SIMDE_FLOAT32_C( 177.56), SIMDE_FLOAT32_C( 85.90), SIMDE_FLOAT32_C( -977.71), SIMDE_FLOAT32_C( 589.01)) },
+ { UINT16_C(64052),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -833.77), SIMDE_FLOAT32_C( -382.41), SIMDE_FLOAT32_C( 155.80), SIMDE_FLOAT32_C( -175.51),
+ SIMDE_FLOAT32_C( 270.94), SIMDE_FLOAT32_C( -347.54), SIMDE_FLOAT32_C( -823.80), SIMDE_FLOAT32_C( -462.55),
+ SIMDE_FLOAT32_C( 93.71), SIMDE_FLOAT32_C( -510.90), SIMDE_FLOAT32_C( 589.53), SIMDE_FLOAT32_C( 762.37),
+ SIMDE_FLOAT32_C( -64.95), SIMDE_FLOAT32_C( -200.72), SIMDE_FLOAT32_C( 590.31), SIMDE_FLOAT32_C( 904.10)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 932.33), SIMDE_FLOAT32_C( -948.81), SIMDE_FLOAT32_C( -663.23), SIMDE_FLOAT32_C( -224.49),
+ SIMDE_FLOAT32_C( 61.61), SIMDE_FLOAT32_C( -513.25), SIMDE_FLOAT32_C( 862.87), SIMDE_FLOAT32_C( 888.55),
+ SIMDE_FLOAT32_C( -356.41), SIMDE_FLOAT32_C( 536.26), SIMDE_FLOAT32_C( 46.41), SIMDE_FLOAT32_C( 968.69),
+ SIMDE_FLOAT32_C( 819.71), SIMDE_FLOAT32_C( -256.62), SIMDE_FLOAT32_C( -508.11), SIMDE_FLOAT32_C( 806.88)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 932.33), SIMDE_FLOAT32_C( -948.81), SIMDE_FLOAT32_C( -663.23), SIMDE_FLOAT32_C( -224.49),
+ SIMDE_FLOAT32_C( 61.61), SIMDE_FLOAT32_C( -347.54), SIMDE_FLOAT32_C( 862.87), SIMDE_FLOAT32_C( -462.55),
+ SIMDE_FLOAT32_C( 93.71), SIMDE_FLOAT32_C( -510.90), SIMDE_FLOAT32_C( 46.41), SIMDE_FLOAT32_C( 968.69),
+ SIMDE_FLOAT32_C( -64.95), SIMDE_FLOAT32_C( -256.62), SIMDE_FLOAT32_C( 590.31), SIMDE_FLOAT32_C( 904.10)) },
+ { UINT16_C(43223),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 810.27), SIMDE_FLOAT32_C( 830.75), SIMDE_FLOAT32_C( 701.96), SIMDE_FLOAT32_C( 496.75),
+ SIMDE_FLOAT32_C( -369.80), SIMDE_FLOAT32_C( -455.71), SIMDE_FLOAT32_C( -712.56), SIMDE_FLOAT32_C( 961.22),
+ SIMDE_FLOAT32_C( -136.67), SIMDE_FLOAT32_C( 165.04), SIMDE_FLOAT32_C( -204.19), SIMDE_FLOAT32_C( 122.42),
+ SIMDE_FLOAT32_C( -370.53), SIMDE_FLOAT32_C( -979.01), SIMDE_FLOAT32_C( -726.98), SIMDE_FLOAT32_C( 156.30)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -639.87), SIMDE_FLOAT32_C( 979.28), SIMDE_FLOAT32_C( 637.46), SIMDE_FLOAT32_C( 800.56),
+ SIMDE_FLOAT32_C( -577.80), SIMDE_FLOAT32_C( 389.08), SIMDE_FLOAT32_C( 966.56), SIMDE_FLOAT32_C( -731.72),
+ SIMDE_FLOAT32_C( -496.34), SIMDE_FLOAT32_C( 344.28), SIMDE_FLOAT32_C( 729.72), SIMDE_FLOAT32_C( 160.21),
+ SIMDE_FLOAT32_C( 511.10), SIMDE_FLOAT32_C( 746.94), SIMDE_FLOAT32_C( -855.19), SIMDE_FLOAT32_C( 203.47)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -639.87), SIMDE_FLOAT32_C( 830.75), SIMDE_FLOAT32_C( 637.46), SIMDE_FLOAT32_C( 496.75),
+ SIMDE_FLOAT32_C( -577.80), SIMDE_FLOAT32_C( -455.71), SIMDE_FLOAT32_C( -712.56), SIMDE_FLOAT32_C( 961.22),
+ SIMDE_FLOAT32_C( -496.34), SIMDE_FLOAT32_C( 344.28), SIMDE_FLOAT32_C( -204.19), SIMDE_FLOAT32_C( 160.21),
+ SIMDE_FLOAT32_C( -370.53), SIMDE_FLOAT32_C( 746.94), SIMDE_FLOAT32_C( -855.19), SIMDE_FLOAT32_C( 203.47)) },
+ { UINT16_C(29684),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -516.27), SIMDE_FLOAT32_C( -631.52), SIMDE_FLOAT32_C( -333.70), SIMDE_FLOAT32_C( 63.56),
+ SIMDE_FLOAT32_C( 874.74), SIMDE_FLOAT32_C( -961.20), SIMDE_FLOAT32_C( -924.01), SIMDE_FLOAT32_C( 542.80),
+ SIMDE_FLOAT32_C( -706.18), SIMDE_FLOAT32_C( -538.09), SIMDE_FLOAT32_C( 38.89), SIMDE_FLOAT32_C( -242.57),
+ SIMDE_FLOAT32_C( -337.54), SIMDE_FLOAT32_C( 184.20), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( -133.06)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 57.66), SIMDE_FLOAT32_C( 125.72), SIMDE_FLOAT32_C( 14.67), SIMDE_FLOAT32_C( 11.14),
+ SIMDE_FLOAT32_C( -363.42), SIMDE_FLOAT32_C( -80.26), SIMDE_FLOAT32_C( -12.54), SIMDE_FLOAT32_C( 560.55),
+ SIMDE_FLOAT32_C( -59.35), SIMDE_FLOAT32_C( -247.72), SIMDE_FLOAT32_C( -719.76), SIMDE_FLOAT32_C( 280.49),
+ SIMDE_FLOAT32_C( 296.24), SIMDE_FLOAT32_C( -303.23), SIMDE_FLOAT32_C( -844.84), SIMDE_FLOAT32_C( 452.16)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -516.27), SIMDE_FLOAT32_C( 125.72), SIMDE_FLOAT32_C( 14.67), SIMDE_FLOAT32_C( 11.14),
+ SIMDE_FLOAT32_C( 874.74), SIMDE_FLOAT32_C( -961.20), SIMDE_FLOAT32_C( -12.54), SIMDE_FLOAT32_C( 560.55),
+ SIMDE_FLOAT32_C( -59.35), SIMDE_FLOAT32_C( -247.72), SIMDE_FLOAT32_C( -719.76), SIMDE_FLOAT32_C( 280.49),
+ SIMDE_FLOAT32_C( -337.54), SIMDE_FLOAT32_C( -303.23), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( -133.06)) },
+ { UINT16_C( 5687),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -252.71), SIMDE_FLOAT32_C( 314.80), SIMDE_FLOAT32_C( 998.07), SIMDE_FLOAT32_C( 575.47),
+ SIMDE_FLOAT32_C( 52.24), SIMDE_FLOAT32_C( 443.69), SIMDE_FLOAT32_C( 234.67), SIMDE_FLOAT32_C( 894.50),
+ SIMDE_FLOAT32_C( -487.55), SIMDE_FLOAT32_C( -250.47), SIMDE_FLOAT32_C( -500.78), SIMDE_FLOAT32_C( 379.78),
+ SIMDE_FLOAT32_C( 612.27), SIMDE_FLOAT32_C( -899.55), SIMDE_FLOAT32_C( -426.63), SIMDE_FLOAT32_C( 359.38)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -262.03), SIMDE_FLOAT32_C( 81.51), SIMDE_FLOAT32_C( -484.02), SIMDE_FLOAT32_C( 504.24),
+ SIMDE_FLOAT32_C( 157.78), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 193.22), SIMDE_FLOAT32_C( -72.99),
+ SIMDE_FLOAT32_C( -54.28), SIMDE_FLOAT32_C( -361.95), SIMDE_FLOAT32_C( 726.76), SIMDE_FLOAT32_C( 663.18),
+ SIMDE_FLOAT32_C( 290.10), SIMDE_FLOAT32_C( 238.07), SIMDE_FLOAT32_C( -777.03), SIMDE_FLOAT32_C( 227.93)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -252.71), SIMDE_FLOAT32_C( 314.80), SIMDE_FLOAT32_C( 998.07), SIMDE_FLOAT32_C( 504.24),
+ SIMDE_FLOAT32_C( 52.24), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 193.22), SIMDE_FLOAT32_C( 894.50),
+ SIMDE_FLOAT32_C( -487.55), SIMDE_FLOAT32_C( -250.47), SIMDE_FLOAT32_C( 726.76), SIMDE_FLOAT32_C( 663.18),
+ SIMDE_FLOAT32_C( 612.27), SIMDE_FLOAT32_C( 238.07), SIMDE_FLOAT32_C( -777.03), SIMDE_FLOAT32_C( 227.93)) },
+ { UINT16_C(46817),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 60.97), SIMDE_FLOAT32_C( 294.39), SIMDE_FLOAT32_C( 134.62), SIMDE_FLOAT32_C( -712.88),
+ SIMDE_FLOAT32_C( -527.46), SIMDE_FLOAT32_C( 556.17), SIMDE_FLOAT32_C( 671.08), SIMDE_FLOAT32_C( -149.99),
+ SIMDE_FLOAT32_C( -247.28), SIMDE_FLOAT32_C( -221.96), SIMDE_FLOAT32_C( -789.76), SIMDE_FLOAT32_C( 665.60),
+ SIMDE_FLOAT32_C( 590.64), SIMDE_FLOAT32_C( -335.79), SIMDE_FLOAT32_C( -964.26), SIMDE_FLOAT32_C( 137.06)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 741.48), SIMDE_FLOAT32_C( -567.93), SIMDE_FLOAT32_C( 572.38), SIMDE_FLOAT32_C( -363.96),
+ SIMDE_FLOAT32_C( 267.49), SIMDE_FLOAT32_C( 878.76), SIMDE_FLOAT32_C( -445.04), SIMDE_FLOAT32_C( -252.24),
+ SIMDE_FLOAT32_C( 869.52), SIMDE_FLOAT32_C( -356.95), SIMDE_FLOAT32_C( 128.10), SIMDE_FLOAT32_C( 544.58),
+ SIMDE_FLOAT32_C( 242.93), SIMDE_FLOAT32_C( -919.16), SIMDE_FLOAT32_C( 758.90), SIMDE_FLOAT32_C( -466.75)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 741.48), SIMDE_FLOAT32_C( 294.39), SIMDE_FLOAT32_C( 572.38), SIMDE_FLOAT32_C( -363.96),
+ SIMDE_FLOAT32_C( -527.46), SIMDE_FLOAT32_C( 878.76), SIMDE_FLOAT32_C( -445.04), SIMDE_FLOAT32_C( -149.99),
+ SIMDE_FLOAT32_C( 869.52), SIMDE_FLOAT32_C( -356.95), SIMDE_FLOAT32_C( 128.10), SIMDE_FLOAT32_C( 665.60),
+ SIMDE_FLOAT32_C( 590.64), SIMDE_FLOAT32_C( -335.79), SIMDE_FLOAT32_C( -964.26), SIMDE_FLOAT32_C( -466.75)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512 r = simde_mm512_mask_blend_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512_close(r, test_vec[i].r, 1);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_blend_pd(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__mmask8 k;
+ simde__m512d a;
+ simde__m512d b;
+ simde__m512d r;
+ } test_vec[8] = {
+ { UINT8_C(211),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 863.27), SIMDE_FLOAT64_C( -937.53),
+ SIMDE_FLOAT64_C( 272.85), SIMDE_FLOAT64_C( -836.56),
+ SIMDE_FLOAT64_C( -517.71), SIMDE_FLOAT64_C( 436.89),
+ SIMDE_FLOAT64_C( -561.62), SIMDE_FLOAT64_C( -796.29)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 356.91), SIMDE_FLOAT64_C( -250.13),
+ SIMDE_FLOAT64_C( -609.99), SIMDE_FLOAT64_C( -756.57),
+ SIMDE_FLOAT64_C( -441.01), SIMDE_FLOAT64_C( 675.23),
+ SIMDE_FLOAT64_C( -112.56), SIMDE_FLOAT64_C( 752.66)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 356.91), SIMDE_FLOAT64_C( -250.13),
+ SIMDE_FLOAT64_C( 272.85), SIMDE_FLOAT64_C( -756.57),
+ SIMDE_FLOAT64_C( -517.71), SIMDE_FLOAT64_C( 436.89),
+ SIMDE_FLOAT64_C( -112.56), SIMDE_FLOAT64_C( 752.66)) },
+ { UINT8_C( 25),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 721.67), SIMDE_FLOAT64_C( -355.29),
+ SIMDE_FLOAT64_C( -776.65), SIMDE_FLOAT64_C( -467.78),
+ SIMDE_FLOAT64_C( -890.68), SIMDE_FLOAT64_C( -288.13),
+ SIMDE_FLOAT64_C( 739.88), SIMDE_FLOAT64_C( -3.67)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -963.76), SIMDE_FLOAT64_C( 986.64),
+ SIMDE_FLOAT64_C( 121.90), SIMDE_FLOAT64_C( -796.62),
+ SIMDE_FLOAT64_C( 983.17), SIMDE_FLOAT64_C( 569.02),
+ SIMDE_FLOAT64_C( -88.58), SIMDE_FLOAT64_C( -750.53)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 721.67), SIMDE_FLOAT64_C( -355.29),
+ SIMDE_FLOAT64_C( -776.65), SIMDE_FLOAT64_C( -796.62),
+ SIMDE_FLOAT64_C( 983.17), SIMDE_FLOAT64_C( -288.13),
+ SIMDE_FLOAT64_C( 739.88), SIMDE_FLOAT64_C( -750.53)) },
+ { UINT8_C( 46),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.74), SIMDE_FLOAT64_C( 162.91),
+ SIMDE_FLOAT64_C( 837.05), SIMDE_FLOAT64_C( 132.83),
+ SIMDE_FLOAT64_C( -183.75), SIMDE_FLOAT64_C( -958.98),
+ SIMDE_FLOAT64_C( -343.18), SIMDE_FLOAT64_C( -412.04)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -43.99), SIMDE_FLOAT64_C( 645.49),
+ SIMDE_FLOAT64_C( -301.76), SIMDE_FLOAT64_C( -390.74),
+ SIMDE_FLOAT64_C( 671.11), SIMDE_FLOAT64_C( -513.10),
+ SIMDE_FLOAT64_C( 467.15), SIMDE_FLOAT64_C( -961.27)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.74), SIMDE_FLOAT64_C( 162.91),
+ SIMDE_FLOAT64_C( -301.76), SIMDE_FLOAT64_C( 132.83),
+ SIMDE_FLOAT64_C( 671.11), SIMDE_FLOAT64_C( -513.10),
+ SIMDE_FLOAT64_C( 467.15), SIMDE_FLOAT64_C( -412.04)) },
+ { UINT8_C(180),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -623.32), SIMDE_FLOAT64_C( 372.80),
+ SIMDE_FLOAT64_C( -286.04), SIMDE_FLOAT64_C( 347.55),
+ SIMDE_FLOAT64_C( -954.70), SIMDE_FLOAT64_C( 272.86),
+ SIMDE_FLOAT64_C( 787.91), SIMDE_FLOAT64_C( 529.75)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 153.63), SIMDE_FLOAT64_C( -177.79),
+ SIMDE_FLOAT64_C( 957.03), SIMDE_FLOAT64_C( -193.15),
+ SIMDE_FLOAT64_C( 645.09), SIMDE_FLOAT64_C( -0.96),
+ SIMDE_FLOAT64_C( 66.15), SIMDE_FLOAT64_C( 565.09)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 153.63), SIMDE_FLOAT64_C( 372.80),
+ SIMDE_FLOAT64_C( 957.03), SIMDE_FLOAT64_C( -193.15),
+ SIMDE_FLOAT64_C( -954.70), SIMDE_FLOAT64_C( -0.96),
+ SIMDE_FLOAT64_C( 787.91), SIMDE_FLOAT64_C( 529.75)) },
+ { UINT8_C(125),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 705.45), SIMDE_FLOAT64_C( -55.08),
+ SIMDE_FLOAT64_C( 606.63), SIMDE_FLOAT64_C( -13.01),
+ SIMDE_FLOAT64_C( 483.39), SIMDE_FLOAT64_C( 565.56),
+ SIMDE_FLOAT64_C( 735.84), SIMDE_FLOAT64_C( -855.34)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -898.20), SIMDE_FLOAT64_C( -885.41),
+ SIMDE_FLOAT64_C( -446.34), SIMDE_FLOAT64_C( -539.23),
+ SIMDE_FLOAT64_C( 177.56), SIMDE_FLOAT64_C( 85.90),
+ SIMDE_FLOAT64_C( -977.71), SIMDE_FLOAT64_C( 589.01)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 705.45), SIMDE_FLOAT64_C( -885.41),
+ SIMDE_FLOAT64_C( -446.34), SIMDE_FLOAT64_C( -539.23),
+ SIMDE_FLOAT64_C( 177.56), SIMDE_FLOAT64_C( 85.90),
+ SIMDE_FLOAT64_C( 735.84), SIMDE_FLOAT64_C( 589.01)) },
+ { UINT8_C(188),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 263.46), SIMDE_FLOAT64_C( -173.99),
+ SIMDE_FLOAT64_C( -448.77), SIMDE_FLOAT64_C( -18.46),
+ SIMDE_FLOAT64_C( 60.12), SIMDE_FLOAT64_C( 895.07),
+ SIMDE_FLOAT64_C( 593.75), SIMDE_FLOAT64_C( -910.69)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -347.65), SIMDE_FLOAT64_C( 986.49),
+ SIMDE_FLOAT64_C( 854.73), SIMDE_FLOAT64_C( 459.72),
+ SIMDE_FLOAT64_C( -110.83), SIMDE_FLOAT64_C( -875.29),
+ SIMDE_FLOAT64_C( 594.30), SIMDE_FLOAT64_C( -331.55)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -347.65), SIMDE_FLOAT64_C( -173.99),
+ SIMDE_FLOAT64_C( 854.73), SIMDE_FLOAT64_C( 459.72),
+ SIMDE_FLOAT64_C( -110.83), SIMDE_FLOAT64_C( -875.29),
+ SIMDE_FLOAT64_C( 593.75), SIMDE_FLOAT64_C( -910.69)) },
+ { UINT8_C(190),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 93.71), SIMDE_FLOAT64_C( -510.90),
+ SIMDE_FLOAT64_C( 589.53), SIMDE_FLOAT64_C( 762.37),
+ SIMDE_FLOAT64_C( -64.95), SIMDE_FLOAT64_C( -200.72),
+ SIMDE_FLOAT64_C( 590.31), SIMDE_FLOAT64_C( 904.10)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -356.41), SIMDE_FLOAT64_C( 536.26),
+ SIMDE_FLOAT64_C( 46.41), SIMDE_FLOAT64_C( 968.69),
+ SIMDE_FLOAT64_C( 819.71), SIMDE_FLOAT64_C( -256.62),
+ SIMDE_FLOAT64_C( -508.11), SIMDE_FLOAT64_C( 806.88)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -356.41), SIMDE_FLOAT64_C( -510.90),
+ SIMDE_FLOAT64_C( 46.41), SIMDE_FLOAT64_C( 968.69),
+ SIMDE_FLOAT64_C( 819.71), SIMDE_FLOAT64_C( -256.62),
+ SIMDE_FLOAT64_C( -508.11), SIMDE_FLOAT64_C( 904.10)) },
+ { UINT8_C(178),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 932.33), SIMDE_FLOAT64_C( -948.81),
+ SIMDE_FLOAT64_C( -663.23), SIMDE_FLOAT64_C( -224.49),
+ SIMDE_FLOAT64_C( 61.61), SIMDE_FLOAT64_C( -513.25),
+ SIMDE_FLOAT64_C( 862.87), SIMDE_FLOAT64_C( 888.55)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.62), SIMDE_FLOAT64_C( -833.77),
+ SIMDE_FLOAT64_C( -382.41), SIMDE_FLOAT64_C( 155.80),
+ SIMDE_FLOAT64_C( -175.51), SIMDE_FLOAT64_C( 270.94),
+ SIMDE_FLOAT64_C( -347.54), SIMDE_FLOAT64_C( -823.80)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.62), SIMDE_FLOAT64_C( -948.81),
+ SIMDE_FLOAT64_C( -382.41), SIMDE_FLOAT64_C( 155.80),
+ SIMDE_FLOAT64_C( 61.61), SIMDE_FLOAT64_C( -513.25),
+ SIMDE_FLOAT64_C( -347.54), SIMDE_FLOAT64_C( 888.55)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512d r = simde_mm512_mask_blend_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512d_close(r, test_vec[i].r, 1);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm512_broadcast_i32x4(const MunitParameter params[], void* data) {
(void) params;
@@ -2514,6 +3014,124 @@ test_simde_mm512_broadcast_i32x4(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_broadcastd_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi32(INT32_C(-1051270324), INT32_C(-1977183446), INT32_C( -548195640), INT32_C(-1363461466)),
+ simde_mm512_set_epi32(INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466),
+ INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466),
+ INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466),
+ INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466)) },
+ { simde_mm_set_epi32(INT32_C( 979094891), INT32_C( 416506319), INT32_C( 2123490297), INT32_C( 200388421)),
+ simde_mm512_set_epi32(INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421),
+ INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421),
+ INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421),
+ INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421)) },
+ { simde_mm_set_epi32(INT32_C( 1927260635), INT32_C( 1201458882), INT32_C(-1448742498), INT32_C(-1111904220)),
+ simde_mm512_set_epi32(INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220),
+ INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220),
+ INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220),
+ INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220)) },
+ { simde_mm_set_epi32(INT32_C( -976455818), INT32_C( 542613123), INT32_C( -15911923), INT32_C( -562895064)),
+ simde_mm512_set_epi32(INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064),
+ INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064),
+ INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064),
+ INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064)) },
+ { simde_mm_set_epi32(INT32_C( 836747087), INT32_C(-1431045412), INT32_C(-1356396683), INT32_C( 1489138473)),
+ simde_mm512_set_epi32(INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473),
+ INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473),
+ INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473),
+ INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473)) },
+ { simde_mm_set_epi32(INT32_C(-1783426961), INT32_C( -263517415), INT32_C(-1697630001), INT32_C( 2025142863)),
+ simde_mm512_set_epi32(INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863),
+ INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863),
+ INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863),
+ INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863)) },
+ { simde_mm_set_epi32(INT32_C( 300619496), INT32_C( -659754204), INT32_C(-1019736463), INT32_C( 1022872166)),
+ simde_mm512_set_epi32(INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166),
+ INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166),
+ INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166),
+ INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166)) },
+ { simde_mm_set_epi32(INT32_C( -274893610), INT32_C( 171227717), INT32_C( 1187872667), INT32_C( -590903223)),
+ simde_mm512_set_epi32(INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223),
+ INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223),
+ INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223),
+ INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_broadcastd_epi32(test_vec[i].a);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_broadcastq_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi64x(INT64_C(-4515171658517540054), INT64_C(-2354482342678283610)),
+ simde_mm512_set_epi64(INT64_C(-2354482342678283610), INT64_C(-2354482342678283610),
+ INT64_C(-2354482342678283610), INT64_C(-2354482342678283610),
+ INT64_C(-2354482342678283610), INT64_C(-2354482342678283610),
+ INT64_C(-2354482342678283610), INT64_C(-2354482342678283610)) },
+ { simde_mm_set_epi64x(INT64_C( 4205180536942191055), INT64_C( 9120321379188715333)),
+ simde_mm512_set_epi64(INT64_C( 9120321379188715333), INT64_C( 9120321379188715333),
+ INT64_C( 9120321379188715333), INT64_C( 9120321379188715333),
+ INT64_C( 9120321379188715333), INT64_C( 9120321379188715333),
+ INT64_C( 9120321379188715333), INT64_C( 9120321379188715333)) },
+ { simde_mm_set_epi64x(INT64_C( 8277521399394651842), INT64_C(-6222301646052282332)),
+ simde_mm512_set_epi64(INT64_C(-6222301646052282332), INT64_C(-6222301646052282332),
+ INT64_C(-6222301646052282332), INT64_C(-6222301646052282332),
+ INT64_C(-6222301646052282332), INT64_C(-6222301646052282332),
+ INT64_C(-6222301646052282332), INT64_C(-6222301646052282332)) },
+ { simde_mm_set_epi64x(INT64_C(-4193845803756315005), INT64_C( -68341185169397976)),
+ simde_mm512_set_epi64(INT64_C( -68341185169397976), INT64_C( -68341185169397976),
+ INT64_C( -68341185169397976), INT64_C( -68341185169397976),
+ INT64_C( -68341185169397976), INT64_C( -68341185169397976),
+ INT64_C( -68341185169397976), INT64_C( -68341185169397976)) },
+ { simde_mm_set_epi64x(INT64_C( 3593801376552188636), INT64_C(-5825679392398740695)),
+ simde_mm512_set_epi64(INT64_C(-5825679392398740695), INT64_C(-5825679392398740695),
+ INT64_C(-5825679392398740695), INT64_C(-5825679392398740695),
+ INT64_C(-5825679392398740695), INT64_C(-5825679392398740695),
+ INT64_C(-5825679392398740695), INT64_C(-5825679392398740695)) },
+ { simde_mm_set_epi64x(INT64_C(-7659760468268217575), INT64_C(-7291265332978304433)),
+ simde_mm512_set_epi64(INT64_C(-7291265332978304433), INT64_C(-7291265332978304433),
+ INT64_C(-7291265332978304433), INT64_C(-7291265332978304433),
+ INT64_C(-7291265332978304433), INT64_C(-7291265332978304433),
+ INT64_C(-7291265332978304433), INT64_C(-7291265332978304433)) },
+ { simde_mm_set_epi64x(INT64_C( 1291150907495215908), INT64_C(-4379734758100841882)),
+ simde_mm512_set_epi64(INT64_C(-4379734758100841882), INT64_C(-4379734758100841882),
+ INT64_C(-4379734758100841882), INT64_C(-4379734758100841882),
+ INT64_C(-4379734758100841882), INT64_C(-4379734758100841882),
+ INT64_C(-4379734758100841882), INT64_C(-4379734758100841882)) },
+ { simde_mm_set_epi64x(INT64_C(-1180659064658150843), INT64_C( 5101874260281362505)),
+ simde_mm512_set_epi64(INT64_C( 5101874260281362505), INT64_C( 5101874260281362505),
+ INT64_C( 5101874260281362505), INT64_C( 5101874260281362505),
+ INT64_C( 5101874260281362505), INT64_C( 5101874260281362505),
+ INT64_C( 5101874260281362505), INT64_C( 5101874260281362505)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_broadcastq_epi64(test_vec[i].a);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm512_castpd512_pd128(const MunitParameter params[], void* data) {
(void) params;
@@ -10304,7 +10922,14 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm512_andnot_si512),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_epi64),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_ps),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_blend_pd),
+
SIMDE_TESTS_DEFINE_TEST(mm512_broadcast_i32x4),
+ SIMDE_TESTS_DEFINE_TEST(mm512_broadcastd_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_broadcastq_epi64),
SIMDE_TESTS_DEFINE_TEST(mm512_castpd512_pd128),
SIMDE_TESTS_DEFINE_TEST(mm512_castpd512_pd256),
=====================================
test/x86/avx512vl.c
=====================================
@@ -177,6 +177,416 @@ test_simde_mm256_cvtsepi16_epi8(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm_cvtsepi32_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi32(INT32_C(-2145190814), INT32_C( 369095719), INT32_C( 35558368), INT32_C( -760875473)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
+ { simde_mm_set_epi32(INT32_C( -891201105), INT32_C( 2065808871), INT32_C( 421929391), INT32_C( 587313056)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) },
+ { simde_mm_set_epi32(INT32_C(-1908802801), INT32_C( -442332083), INT32_C(-1878740578), INT32_C(-1559213492)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) },
+ { simde_mm_set_epi32(INT32_C( 615110155), INT32_C( -809405494), INT32_C( 1459512749), INT32_C( -889064834)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) },
+ { simde_mm_set_epi32(INT32_C( -125398563), INT32_C( 1544839586), INT32_C( 657472508), INT32_C( -763651133)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
+ { simde_mm_set_epi32(INT32_C( 1244639853), INT32_C(-1576601619), INT32_C( 458581376), INT32_C(-1764676112)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) },
+ { simde_mm_set_epi32(INT32_C(-1516069112), INT32_C( 1985219066), INT32_C( 436268231), INT32_C( -342699987)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) },
+ { simde_mm_set_epi32(INT32_C( -672406129), INT32_C( 1062677939), INT32_C( 54896059), INT32_C( -667800710)),
+ simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
+ INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_cvtsepi32_epi8(test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_cvtsepi32_epi16(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi32(INT32_C( 115673074), INT32_C(-1486060937), INT32_C( -104167420), INT32_C( 40162333)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) },
+ { simde_mm_set_epi32(INT32_C(-2128159397), INT32_C(-1560280329), INT32_C( 394598915), INT32_C( 425183512)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) },
+ { simde_mm_set_epi32(INT32_C( -746969032), INT32_C( -718350441), INT32_C( -917354043), INT32_C( -740787295)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
+ { simde_mm_set_epi32(INT32_C( 1489645447), INT32_C( 206085999), INT32_C(-1252698771), INT32_C( 1551612893)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
+ { simde_mm_set_epi32(INT32_C( -503631451), INT32_C( -997616848), INT32_C( -691694514), INT32_C( -383740168)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
+ { simde_mm_set_epi32(INT32_C(-1905774960), INT32_C( -8801329), INT32_C( 1184144481), INT32_C( 1001484550)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) },
+ { simde_mm_set_epi32(INT32_C( 1461694678), INT32_C( 2088398452), INT32_C(-1522574509), INT32_C(-1848551844)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) },
+ { simde_mm_set_epi32(INT32_C( 344147543), INT32_C( 1208328320), INT32_C( 846887925), INT32_C(-1224326570)),
+ simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
+ INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_cvtsepi32_epi16(test_vec[i].a);
+ simde_assert_m128i_i16(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i src;
+ simde__mmask16 k;
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi8(INT8_C( -98), INT8_C( -63), INT8_C( -58), INT8_C( 25),
+ INT8_C( 9), INT8_C( 49), INT8_C( -12), INT8_C( -31),
+ INT8_C( -48), INT8_C( 0), INT8_C( -99), INT8_C( -81),
+ INT8_C( -77), INT8_C( 27), INT8_C( -33), INT8_C(-124)),
+ UINT16_C(64699),
+ simde_mm_set_epi8(INT8_C( 79), INT8_C( 100), INT8_C(-123), INT8_C( 95),
+ INT8_C( -79), INT8_C( 48), INT8_C( 112), INT8_C( 8),
+ INT8_C( 69), INT8_C( -78), INT8_C( 54), INT8_C( -48),
+ INT8_C(-119), INT8_C( -5), INT8_C( -97), INT8_C( -44)),
+ simde_mm_set_epi8(INT8_C( 79), INT8_C( 100), INT8_C(-123), INT8_C( 95),
+ INT8_C( -79), INT8_C( 48), INT8_C( -12), INT8_C( -31),
+ INT8_C( 69), INT8_C( 0), INT8_C( 54), INT8_C( -48),
+ INT8_C(-119), INT8_C( 27), INT8_C( -97), INT8_C( -44)) },
+ { simde_mm_set_epi8(INT8_C( 47), INT8_C( 36), INT8_C( 45), INT8_C( -44),
+ INT8_C( 27), INT8_C( -15), INT8_C( 105), INT8_C( -69),
+ INT8_C( -10), INT8_C( 1), INT8_C( 12), INT8_C( -44),
+ INT8_C( -32), INT8_C( 113), INT8_C( 105), INT8_C( -92)),
+ UINT16_C(33046),
+ simde_mm_set_epi8(INT8_C(-118), INT8_C( -61), INT8_C( 108), INT8_C( 4),
+ INT8_C( 56), INT8_C( 96), INT8_C( -73), INT8_C( -39),
+ INT8_C(-112), INT8_C(-115), INT8_C(-113), INT8_C( -74),
+ INT8_C( -79), INT8_C(-116), INT8_C( 117), INT8_C( -3)),
+ simde_mm_set_epi8(INT8_C(-118), INT8_C( 36), INT8_C( 45), INT8_C( -44),
+ INT8_C( 27), INT8_C( -15), INT8_C( 105), INT8_C( -39),
+ INT8_C( -10), INT8_C( 1), INT8_C( 12), INT8_C( -74),
+ INT8_C( -32), INT8_C(-116), INT8_C( 117), INT8_C( -92)) },
+ { simde_mm_set_epi8(INT8_C( 41), INT8_C(-106), INT8_C( -67), INT8_C(-116),
+ INT8_C( -34), INT8_C( 21), INT8_C( 64), INT8_C( 44),
+ INT8_C( 97), INT8_C( -46), INT8_C( 122), INT8_C( 42),
+ INT8_C( -54), INT8_C( -79), INT8_C( 21), INT8_C( 59)),
+ UINT16_C(27487),
+ simde_mm_set_epi8(INT8_C( 6), INT8_C(-124), INT8_C(-111), INT8_C( -39),
+ INT8_C( 55), INT8_C( -55), INT8_C( -72), INT8_C( 77),
+ INT8_C( 51), INT8_C(-103), INT8_C( -80), INT8_C( 75),
+ INT8_C( -87), INT8_C(-120), INT8_C( -14), INT8_C( 99)),
+ simde_mm_set_epi8(INT8_C( 41), INT8_C(-124), INT8_C(-111), INT8_C(-116),
+ INT8_C( 55), INT8_C( 21), INT8_C( -72), INT8_C( 77),
+ INT8_C( 97), INT8_C(-103), INT8_C( 122), INT8_C( 75),
+ INT8_C( -87), INT8_C(-120), INT8_C( -14), INT8_C( 99)) },
+ { simde_mm_set_epi8(INT8_C( 31), INT8_C( -90), INT8_C(-127), INT8_C( 105),
+ INT8_C( -89), INT8_C(-121), INT8_C(-110), INT8_C( -58),
+ INT8_C( -95), INT8_C(-101), INT8_C( -56), INT8_C( 22),
+ INT8_C( 18), INT8_C( 2), INT8_C( 46), INT8_C(-125)),
+ UINT16_C(48165),
+ simde_mm_set_epi8(INT8_C( 103), INT8_C( 26), INT8_C( 108), INT8_C( 4),
+ INT8_C( -49), INT8_C( -62), INT8_C(-103), INT8_C( -42),
+ INT8_C( 103), INT8_C( 115), INT8_C( 126), INT8_C(-112),
+ INT8_C( -81), INT8_C( -35), INT8_C(-106), INT8_C( 45)),
+ simde_mm_set_epi8(INT8_C( 103), INT8_C( -90), INT8_C( 108), INT8_C( 4),
+ INT8_C( -49), INT8_C( -62), INT8_C(-110), INT8_C( -58),
+ INT8_C( -95), INT8_C(-101), INT8_C( 126), INT8_C( 22),
+ INT8_C( 18), INT8_C( -35), INT8_C( 46), INT8_C( 45)) },
+ { simde_mm_set_epi8(INT8_C( 106), INT8_C( 23), INT8_C( -78), INT8_C( -57),
+ INT8_C( 24), INT8_C( 56), INT8_C( -46), INT8_C( -15),
+ INT8_C( -33), INT8_C( 28), INT8_C( -40), INT8_C(-116),
+ INT8_C( -34), INT8_C( 92), INT8_C( 109), INT8_C( 33)),
+ UINT16_C(14870),
+ simde_mm_set_epi8(INT8_C( -75), INT8_C( 55), INT8_C(-127), INT8_C( 70),
+ INT8_C( 78), INT8_C( 126), INT8_C( -96), INT8_C( 119),
+ INT8_C( 108), INT8_C( 50), INT8_C( 17), INT8_C( -71),
+ INT8_C( 127), INT8_C( 91), INT8_C( 110), INT8_C( -90)),
+ simde_mm_set_epi8(INT8_C( 106), INT8_C( 23), INT8_C(-127), INT8_C( 70),
+ INT8_C( 78), INT8_C( 56), INT8_C( -96), INT8_C( -15),
+ INT8_C( -33), INT8_C( 28), INT8_C( -40), INT8_C( -71),
+ INT8_C( -34), INT8_C( 91), INT8_C( 110), INT8_C( 33)) },
+ { simde_mm_set_epi8(INT8_C( -21), INT8_C(-122), INT8_C(-127), INT8_C( 95),
+ INT8_C( -34), INT8_C( -51), INT8_C( 107), INT8_C( 75),
+ INT8_C( 63), INT8_C(-117), INT8_C(-118), INT8_C( 52),
+ INT8_C( 15), INT8_C( 123), INT8_C( -76), INT8_C(-117)),
+ UINT16_C(54314),
+ simde_mm_set_epi8(INT8_C( 124), INT8_C( -12), INT8_C( 0), INT8_C( -14),
+ INT8_C( -54), INT8_C( 92), INT8_C( 73), INT8_C( 69),
+ INT8_C( -47), INT8_C( -62), INT8_C( 113), INT8_C( 100),
+ INT8_C( 31), INT8_C( -98), INT8_C( -86), INT8_C( 19)),
+ simde_mm_set_epi8(INT8_C( 124), INT8_C( -12), INT8_C(-127), INT8_C( -14),
+ INT8_C( -34), INT8_C( 92), INT8_C( 107), INT8_C( 75),
+ INT8_C( 63), INT8_C(-117), INT8_C( 113), INT8_C( 52),
+ INT8_C( 31), INT8_C( 123), INT8_C( -86), INT8_C(-117)) },
+ { simde_mm_set_epi8(INT8_C( -9), INT8_C( -43), INT8_C( 83), INT8_C( 21),
+ INT8_C( 88), INT8_C( -52), INT8_C(-115), INT8_C( 63),
+ INT8_C( 92), INT8_C( -15), INT8_C( -24), INT8_C( -84),
+ INT8_C(-120), INT8_C( -96), INT8_C( 46), INT8_C( -78)),
+ UINT16_C(44998),
+ simde_mm_set_epi8(INT8_C( -10), INT8_C( 79), INT8_C(-113), INT8_C( -93),
+ INT8_C( 24), INT8_C( 78), INT8_C( 40), INT8_C( 22),
+ INT8_C( 31), INT8_C( -15), INT8_C( -8), INT8_C( 60),
+ INT8_C( 114), INT8_C( -85), INT8_C(-105), INT8_C( -47)),
+ simde_mm_set_epi8(INT8_C( -10), INT8_C( -43), INT8_C(-113), INT8_C( 21),
+ INT8_C( 24), INT8_C( 78), INT8_C( 40), INT8_C( 22),
+ INT8_C( 31), INT8_C( -15), INT8_C( -24), INT8_C( -84),
+ INT8_C(-120), INT8_C( -85), INT8_C(-105), INT8_C( -78)) },
+ { simde_mm_set_epi8(INT8_C( -62), INT8_C( 117), INT8_C(-114), INT8_C( 7),
+ INT8_C( 17), INT8_C( 123), INT8_C( -2), INT8_C( -15),
+ INT8_C(-120), INT8_C( 77), INT8_C( 81), INT8_C( -39),
+ INT8_C(-114), INT8_C( -52), INT8_C(-119), INT8_C( 82)),
+ UINT16_C(48425),
+ simde_mm_set_epi8(INT8_C( 68), INT8_C( -65), INT8_C( 13), INT8_C( -27),
+ INT8_C( 55), INT8_C( 2), INT8_C( -43), INT8_C( 9),
+ INT8_C( -57), INT8_C( 65), INT8_C(-111), INT8_C( -60),
+ INT8_C( 75), INT8_C( 74), INT8_C( 16), INT8_C( 19)),
+ simde_mm_set_epi8(INT8_C( 68), INT8_C( 117), INT8_C( 13), INT8_C( -27),
+ INT8_C( 55), INT8_C( 2), INT8_C( -2), INT8_C( 9),
+ INT8_C(-120), INT8_C( 77), INT8_C(-111), INT8_C( -39),
+ INT8_C( 75), INT8_C( -52), INT8_C(-119), INT8_C( 19)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_mask_mov_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde_assert_m128i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi16(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i src;
+ simde__mmask8 k;
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi16(INT16_C(-14576), INT16_C( 14205), INT16_C( -2433), INT16_C(-27972),
+ INT16_C( 6192), INT16_C(-29093), INT16_C( 24144), INT16_C(-19045)),
+ UINT8_C(231),
+ simde_mm_set_epi16(INT16_C(-16025), INT16_C( -5226), INT16_C( -6596), INT16_C(-11796),
+ INT16_C(-24692), INT16_C( 20335), INT16_C( 26237), INT16_C( 23499)),
+ simde_mm_set_epi16(INT16_C(-16025), INT16_C( -5226), INT16_C( -6596), INT16_C(-27972),
+ INT16_C( 6192), INT16_C( 20335), INT16_C( 26237), INT16_C( 23499)) },
+ { simde_mm_set_epi16(INT16_C( -839), INT16_C(-10951), INT16_C( 1310), INT16_C( -6285),
+ INT16_C(-21252), INT16_C( -7582), INT16_C(-12381), INT16_C( 24902)),
+ UINT8_C( 7),
+ simde_mm_set_epi16(INT16_C( -3233), INT16_C( 25022), INT16_C(-12043), INT16_C( 17022),
+ INT16_C(-25543), INT16_C(-17145), INT16_C( 8881), INT16_C( 28844)),
+ simde_mm_set_epi16(INT16_C( -839), INT16_C(-10951), INT16_C( 1310), INT16_C( -6285),
+ INT16_C(-21252), INT16_C(-17145), INT16_C( 8881), INT16_C( 28844)) },
+ { simde_mm_set_epi16(INT16_C( 30807), INT16_C( 12936), INT16_C(-14387), INT16_C(-15179),
+ INT16_C( 23907), INT16_C(-17160), INT16_C( 23916), INT16_C( 14132)),
+ UINT8_C(139),
+ simde_mm_set_epi16(INT16_C( -1315), INT16_C(-31661), INT16_C(-10075), INT16_C(-22609),
+ INT16_C( 9167), INT16_C( 6456), INT16_C( -7329), INT16_C( -8326)),
+ simde_mm_set_epi16(INT16_C( -1315), INT16_C( 12936), INT16_C(-14387), INT16_C(-15179),
+ INT16_C( 9167), INT16_C(-17160), INT16_C( -7329), INT16_C( -8326)) },
+ { simde_mm_set_epi16(INT16_C( 26421), INT16_C(-12708), INT16_C( 22525), INT16_C(-31426),
+ INT16_C( 15010), INT16_C(-27490), INT16_C(-12766), INT16_C(-25791)),
+ UINT8_C( 65),
+ simde_mm_set_epi16(INT16_C( -1553), INT16_C(-19304), INT16_C( 20094), INT16_C( -2808),
+ INT16_C(-12327), INT16_C( 15252), INT16_C( 25789), INT16_C(-23968)),
+ simde_mm_set_epi16(INT16_C( 26421), INT16_C(-19304), INT16_C( 22525), INT16_C(-31426),
+ INT16_C( 15010), INT16_C(-27490), INT16_C(-12766), INT16_C(-23968)) },
+ { simde_mm_set_epi16(INT16_C( 7823), INT16_C( 19443), INT16_C( 13219), INT16_C( 17015),
+ INT16_C(-11739), INT16_C(-13030), INT16_C(-14482), INT16_C(-27926)),
+ UINT8_C(249),
+ simde_mm_set_epi16(INT16_C(-25131), INT16_C( 30189), INT16_C(-22900), INT16_C( 28700),
+ INT16_C( 1116), INT16_C( 30184), INT16_C(-12164), INT16_C( -7443)),
+ simde_mm_set_epi16(INT16_C(-25131), INT16_C( 30189), INT16_C(-22900), INT16_C( 28700),
+ INT16_C( 1116), INT16_C(-13030), INT16_C(-14482), INT16_C( -7443)) },
+ { simde_mm_set_epi16(INT16_C(-26628), INT16_C( 25963), INT16_C(-26322), INT16_C( -8077),
+ INT16_C(-22868), INT16_C( 28633), INT16_C( -4168), INT16_C( 28595)),
+ UINT8_C(112),
+ simde_mm_set_epi16(INT16_C( 14185), INT16_C( -5351), INT16_C( -8435), INT16_C(-11233),
+ INT16_C( -8273), INT16_C(-29718), INT16_C( -8221), INT16_C( 18236)),
+ simde_mm_set_epi16(INT16_C(-26628), INT16_C( -5351), INT16_C( -8435), INT16_C(-11233),
+ INT16_C(-22868), INT16_C( 28633), INT16_C( -4168), INT16_C( 28595)) },
+ { simde_mm_set_epi16(INT16_C(-14557), INT16_C(-28064), INT16_C( 11696), INT16_C(-19213),
+ INT16_C( 15613), INT16_C( 26380), INT16_C( 30063), INT16_C( 26293)),
+ UINT8_C( 24),
+ simde_mm_set_epi16(INT16_C( 23790), INT16_C( 10772), INT16_C( -8418), INT16_C(-27527),
+ INT16_C( -163), INT16_C( 10898), INT16_C(-12995), INT16_C( 287)),
+ simde_mm_set_epi16(INT16_C(-14557), INT16_C(-28064), INT16_C( 11696), INT16_C(-27527),
+ INT16_C( -163), INT16_C( 26380), INT16_C( 30063), INT16_C( 26293)) },
+ { simde_mm_set_epi16(INT16_C(-14768), INT16_C(-23816), INT16_C(-22775), INT16_C( -4812),
+ INT16_C(-19595), INT16_C(-14349), INT16_C( 11039), INT16_C( 15081)),
+ UINT8_C( 22),
+ simde_mm_set_epi16(INT16_C( 27063), INT16_C( 8226), INT16_C(-13582), INT16_C( 14344),
+ INT16_C(-27643), INT16_C( -1125), INT16_C(-27147), INT16_C( -4132)),
+ simde_mm_set_epi16(INT16_C(-14768), INT16_C(-23816), INT16_C(-22775), INT16_C( 14344),
+ INT16_C(-19595), INT16_C( -1125), INT16_C(-27147), INT16_C( 15081)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_mask_mov_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde_assert_m128i_i16(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i src;
+ simde__mmask8 k;
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi32(INT32_C(-1311777535), INT32_C( 871351059), INT32_C(-1795529748), INT32_C(-1018886524)),
+ UINT8_C(193),
+ simde_mm_set_epi32(INT32_C(-1402384713), INT32_C( 349677639), INT32_C(-2062419968), INT32_C(-2110667873)),
+ simde_mm_set_epi32(INT32_C(-1311777535), INT32_C( 871351059), INT32_C(-1795529748), INT32_C(-2110667873)) },
+ { simde_mm_set_epi32(INT32_C( 738895957), INT32_C(-2052149671), INT32_C( 1275190943), INT32_C(-1073987906)),
+ UINT8_C(211),
+ simde_mm_set_epi32(INT32_C( 899624021), INT32_C(-1740875066), INT32_C( 196568235), INT32_C( 146964985)),
+ simde_mm_set_epi32(INT32_C( 738895957), INT32_C(-2052149671), INT32_C( 196568235), INT32_C( 146964985)) },
+ { simde_mm_set_epi32(INT32_C( 692992965), INT32_C( 836600954), INT32_C(-1461227321), INT32_C( -625910795)),
+ UINT8_C(122),
+ simde_mm_set_epi32(INT32_C(-1617549669), INT32_C( 1989374100), INT32_C(-1502577107), INT32_C(-1017994073)),
+ simde_mm_set_epi32(INT32_C(-1617549669), INT32_C( 836600954), INT32_C(-1502577107), INT32_C( -625910795)) },
+ { simde_mm_set_epi32(INT32_C( 1143677167), INT32_C( 846204550), INT32_C( -804913221), INT32_C( 1445583278)),
+ UINT8_C(231),
+ simde_mm_set_epi32(INT32_C(-1730413187), INT32_C(-1695584840), INT32_C( -227526716), INT32_C( -3425875)),
+ simde_mm_set_epi32(INT32_C( 1143677167), INT32_C(-1695584840), INT32_C( -227526716), INT32_C( -3425875)) },
+ { simde_mm_set_epi32(INT32_C( 645689114), INT32_C(-2084714818), INT32_C( 1764055823), INT32_C( 52635923)),
+ UINT8_C( 92),
+ simde_mm_set_epi32(INT32_C(-1571852402), INT32_C( 630152776), INT32_C( -128726906), INT32_C( 1269444726)),
+ simde_mm_set_epi32(INT32_C(-1571852402), INT32_C( 630152776), INT32_C( 1764055823), INT32_C( 52635923)) },
+ { simde_mm_set_epi32(INT32_C( 1563221), INT32_C( -134802286), INT32_C( 714712077), INT32_C(-1827172967)),
+ UINT8_C( 81),
+ simde_mm_set_epi32(INT32_C( 1929131576), INT32_C(-1816110300), INT32_C( 1278219947), INT32_C( 1799312980)),
+ simde_mm_set_epi32(INT32_C( 1563221), INT32_C( -134802286), INT32_C( 714712077), INT32_C( 1799312980)) },
+ { simde_mm_set_epi32(INT32_C( 398082434), INT32_C(-1574168894), INT32_C( -78364073), INT32_C(-1210427726)),
+ UINT8_C( 81),
+ simde_mm_set_epi32(INT32_C( -743499294), INT32_C(-2007549651), INT32_C( 404949426), INT32_C(-1228263526)),
+ simde_mm_set_epi32(INT32_C( 398082434), INT32_C(-1574168894), INT32_C( -78364073), INT32_C(-1228263526)) },
+ { simde_mm_set_epi32(INT32_C( -588057094), INT32_C(-1885829296), INT32_C( 1969228625), INT32_C( 1326338893)),
+ UINT8_C(219),
+ simde_mm_set_epi32(INT32_C( 1932026039), INT32_C(-1013786585), INT32_C( 1485053584), INT32_C( 1979373999)),
+ simde_mm_set_epi32(INT32_C( 1932026039), INT32_C(-1885829296), INT32_C( 1485053584), INT32_C( 1979373999)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_mask_mov_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde_assert_m128i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm_mask_mov_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m128i src;
+ simde__mmask8 k;
+ simde__m128i a;
+ simde__m128i r;
+ } test_vec[8] = {
+ { simde_mm_set_epi64x(INT64_C( 2277107027088284737), INT64_C( -794576880036979785)),
+ UINT8_C(133),
+ simde_mm_set_epi64x(INT64_C( 8097713530582561529), INT64_C( 1585963766693842069)),
+ simde_mm_set_epi64x(INT64_C( 2277107027088284737), INT64_C( 1585963766693842069)) },
+ { simde_mm_set_epi64x(INT64_C( 386114209698075166), INT64_C( 5207265957388900927)),
+ UINT8_C(158),
+ simde_mm_set_epi64x(INT64_C( 8803705323655107871), INT64_C(-8422781366242531322)),
+ simde_mm_set_epi64x(INT64_C( 8803705323655107871), INT64_C( 5207265957388900927)) },
+ { simde_mm_set_epi64x(INT64_C(-2685854854617637911), INT64_C( 5000183764696508529)),
+ UINT8_C(188),
+ simde_mm_set_epi64x(INT64_C( 3366037084418714211), INT64_C(-4379786006937181803)),
+ simde_mm_set_epi64x(INT64_C(-2685854854617637911), INT64_C( 5000183764696508529)) },
+ { simde_mm_set_epi64x(INT64_C( 5087362917606608352), INT64_C( 7748994405920281726)),
+ UINT8_C( 72),
+ simde_mm_set_epi64x(INT64_C(-3993157906773187111), INT64_C( 5848124444216740966)),
+ simde_mm_set_epi64x(INT64_C( 5087362917606608352), INT64_C( 7748994405920281726)) },
+ { simde_mm_set_epi64x(INT64_C(-6262495515547444433), INT64_C( 3943684472219148405)),
+ UINT8_C( 56),
+ simde_mm_set_epi64x(INT64_C( 6021985363878171356), INT64_C(-9003751561505293092)),
+ simde_mm_set_epi64x(INT64_C(-6262495515547444433), INT64_C( 3943684472219148405)) },
+ { simde_mm_set_epi64x(INT64_C( 7378184861631570903), INT64_C( 5065745925883054243)),
+ UINT8_C(107),
+ simde_mm_set_epi64x(INT64_C( 3940656342452910480), INT64_C( 3350136105944417294)),
+ simde_mm_set_epi64x(INT64_C( 3940656342452910480), INT64_C( 3350136105944417294)) },
+ { simde_mm_set_epi64x(INT64_C( 4422823463426654219), INT64_C( 1827699444722609855)),
+ UINT8_C( 23),
+ simde_mm_set_epi64x(INT64_C(-2966751886069965026), INT64_C(-8494473672325004777)),
+ simde_mm_set_epi64x(INT64_C(-2966751886069965026), INT64_C(-8494473672325004777)) },
+ { simde_mm_set_epi64x(INT64_C(-8917676865649705108), INT64_C( 6229148348133862992)),
+ UINT8_C( 48),
+ simde_mm_set_epi64x(INT64_C(-7968457113297908477), INT64_C(-6793891334661924961)),
+ simde_mm_set_epi64x(INT64_C(-8917676865649705108), INT64_C( 6229148348133862992)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128i r = simde_mm_mask_mov_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde_assert_m128i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
#endif /* defined(SIMDE_avx512vl_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
HEDLEY_DIAGNOSTIC_PUSH
@@ -184,10 +594,15 @@ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
static MunitTest test_suite_tests[] = {
#if defined(SIMDE_AVX512VL_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
-
SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi16_epi8),
-
SIMDE_TESTS_DEFINE_TEST(mm256_cvtsepi16_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi32_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm_cvtsepi32_epi16),
+
+ SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi16),
+ SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm_mask_mov_epi64),
#endif /* defined(SIMDE_AVX512vl_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/54ff0a0f2c27cc4914cc18e170cedabe9546b016
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/54ff0a0f2c27cc4914cc18e170cedabe9546b016
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200421/f23df131/attachment-0001.html>
More information about the debian-med-commit
mailing list