[med-svn] [Git][med-team/simde][master] 5 commits: Fix clang skip logic
Michael R. Crusoe
gitlab at salsa.debian.org
Tue Apr 14 10:57:40 BST 2020
Michael R. Crusoe pushed to branch master at Debian Med / simde
Commits:
5033fb8a by Michael R. Crusoe at 2020-04-13T11:39:17+02:00
Fix clang skip logic
- - - - -
c2f16f4c by Michael R. Crusoe at 2020-04-14T11:51:22+02:00
New upstream version 0.0.0.git.20200414
- - - - -
325aa50f by Michael R. Crusoe at 2020-04-14T11:51:22+02:00
routine-update: New upstream version
- - - - -
89c851f5 by Michael R. Crusoe at 2020-04-14T11:51:24+02:00
Update upstream source from tag 'upstream/0.0.0.git.20200414'
Update to upstream version '0.0.0.git.20200414'
with Debian dir 59cd5058a2fc66c58defb6d18f52a74dfb2fc40d
- - - - -
9f2751b0 by Michael R. Crusoe at 2020-04-14T11:53:02+02:00
routine-update: Ready to upload to unstable
- - - - -
7 changed files:
- .drone.star
- debian/changelog
- debian/rules
- simde/x86/avx2.h
- simde/x86/avx512f.h
- test/x86/avx2.c
- test/x86/avx512f.c
Changes:
=====================================
.drone.star
=====================================
@@ -2,6 +2,28 @@
# Drone CI Starlark configuration file.
# https://docs.drone.io/pipeline/scripting/starlark/
# Run `drone starlark convert --stdout` to verify `.drone.star`.
+def get_test_commands():
+ return [
+ "mkdir -p build",
+ "cd build",
+ 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
+ "ninja -v",
+ "./test/run-tests",
+ ]
+
+def get_apt_install_commands(extra_pkgs = []):
+ return [
+ "apt-get -yq update",
+ "apt-get -yq install %s ninja-build git-core python3-pip" % " ".join(extra_pkgs),
+ "pip3 install meson",
+ ]
+
+def get_dnf_install_commands(extra_pkgs = []):
+ return [
+ "dnf install -y %s ninja-build git-core python3-pip" % " ".join(extra_pkgs),
+ "pip3 install meson",
+ ]
+
def get_default_job():
return {
"kind": "pipeline",
@@ -24,16 +46,11 @@ def get_default_job():
"uname -m",
"cat /proc/cpuinfo",
],
+ "install": [],
"before_script": [
"git submodule --quiet update --init --recursive",
],
- "script": [
- "mkdir -p build",
- "cd build",
- 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
- "ninja -v",
- "./test/run-tests",
- ]
+ "script": get_test_commands()
}
}
@@ -53,11 +70,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install clang-9 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["clang-9"])
}
}
@@ -76,11 +89,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install clang-9 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["clang-9"])
}
}
@@ -99,11 +108,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["gcc-8", "g++-8"])
}
}
@@ -122,11 +127,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["gcc-8", "g++-8"])
}
}
@@ -145,11 +146,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install clang-7 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["clang-7"])
}
}
@@ -168,11 +165,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install clang-7 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["clang-7"])
}
}
@@ -191,11 +184,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["gcc-7", "g++-7"])
}
}
@@ -214,11 +203,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "apt-get -yq update",
- "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_apt_install_commands(["gcc-7", "g++-7"])
}
}
@@ -235,10 +220,7 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "dnf install -y gcc gcc-c++ ninja-build git-core python3-pip",
- "pip3 install meson",
- ]
+ "install": get_dnf_install_commands(["gcc", "gcc-c++"])
}
}
@@ -258,21 +240,13 @@ def get_jobs():
}
],
"custom": {
- "install": [
- "dnf install -y clang ninja-build git-core python3-pip",
- "pip3 install meson",
- ],
+ "install": get_dnf_install_commands(["clang"]),
"script": [
- "mkdir -p build",
- "cd build",
# optflags RPM macro works with gcc.
# Some flags and specs are not available with clang.
# https://lists.fedoraproject.org/archives/list/packaging@lists.fedoraproject.org/message/W5UFLUADNB4VF3OBUBSNAPOQL6XBCP74/
"ARCH_FLAGS=$(rpm -E '%{optflags}' | sed -e 's| -fstack-clash-protection||' -e 's| -specs=[^ ]*||g')",
- 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
- "ninja -v",
- "./test/run-tests",
- ]
+ ] + get_test_commands()
}
}
@@ -305,10 +279,8 @@ def main(ctx):
out[key] = value
# Create commands list from custom elements.
- out["steps"][0]["commands"].extend(out["custom"]["before_install"])
- out["steps"][0]["commands"].extend(out["custom"]["install"])
- out["steps"][0]["commands"].extend(out["custom"]["before_script"])
- out["steps"][0]["commands"].extend(out["custom"]["script"])
+ for element in ["before_install", "install", "before_script", "script"]:
+ out["steps"][0]["commands"].extend(out["custom"][element])
# Remove unused custom element.
out.pop("custom", None)
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+simde (0.0.0.git.20200414-1) unstable; urgency=medium
+
+ * Fix clang skip logic
+ * New upstream version
+
+ -- Michael R. Crusoe <michael.crusoe at gmail.com> Tue, 14 Apr 2020 11:51:26 +0200
+
simde (0.0.0.git.20200412-1) unstable; urgency=medium
* New upstream version
=====================================
debian/rules
=====================================
@@ -18,7 +18,7 @@ endif
override_dh_auto_configure:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
CC=gcc CXX=g++ dh_auto_configure --builddirectory=gcc_test --sourcedirectory=test
- ifneq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
+ ifeq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
CC=clang CXX=clang++ dh_auto_configure --builddirectory=clang_test --sourcedirectory=test
endif
endif
@@ -26,7 +26,7 @@ endif
override_dh_auto_build:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
dh_auto_build --builddirectory=gcc_test --sourcedirectory=test
- ifneq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
+ ifeq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
dh_auto_build --builddirectory=clang_test --sourcedirectory=test
endif
endif
@@ -34,13 +34,13 @@ endif
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
dh_auto_test --builddirectory=gcc_test --sourcedirectory=test
- ifneq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
+ ifeq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
dh_auto_test --builddirectory=clang_test --sourcedirectory=test
endif
endif
override_dh_auto_clean:
dh_auto_clean --builddirectory=gcc_test --sourcedirectory=test
-ifneq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
+ifeq (,$(filter $(DEB_HOST_ARCH),$(CLANG_SKIP)))
dh_auto_clean --builddirectory=clang_test --sourcedirectory=test
endif
=====================================
simde/x86/avx2.h
=====================================
@@ -472,6 +472,52 @@ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
# define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_avg_epu8(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
+ r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
+ }
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_avg_epu16(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
+ r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
+ }
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8)
@@ -1619,7 +1665,7 @@ simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8)
return simde__m256i_from_private(r_);
}
#if defined(SIMDE_AVX2_NATIVE)
-# define simde_mm256_permute2x128_si128(a, b, imm8) _mm256_permute2x128_si128(a, b, imm8)
+# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8)
#endif
#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
# define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8)
=====================================
simde/x86/avx512f.h
=====================================
@@ -2309,6 +2309,19 @@ simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a,
# define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_sub_epi32(k, a, b);
+#else
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
@@ -2349,6 +2362,19 @@ simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, s
# define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_sub_epi64(k, a, b);
+#else
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
@@ -2389,6 +2415,19 @@ simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde
# define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_sub_ps(k, a, b);
+#else
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
@@ -2429,6 +2468,19 @@ simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simd
# define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_sub_pd(k, a, b);
+#else
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__mmask16
simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
@@ -2746,6 +2798,19 @@ simde_mm512_div_ps (simde__m512 a, simde__m512 b) {
# define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_div_ps(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
@@ -2773,6 +2838,19 @@ simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
# define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_div_pd(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
@@ -2813,6 +2891,19 @@ simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde_
# define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_mul_ps(k, a, b);
+#else
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
@@ -2853,6 +2944,19 @@ simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde
# define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_mul_pd(k, a, b);
+#else
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
@@ -2899,6 +3003,19 @@ simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, si
# define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_mul_epi32(k, a, b);
+#else
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
@@ -2946,6 +3063,19 @@ simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, si
# define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_mul_epu32(k, a, b);
+#else
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {
=====================================
test/x86/avx2.c
=====================================
@@ -2412,6 +2412,334 @@ test_simde_mm256_adds_epu16(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm256_avg_epu8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_x_mm256_set_epu8(UINT8_C(132), UINT8_C(185), UINT8_C( 15), UINT8_C(235),
+ UINT8_C(102), UINT8_C( 8), UINT8_C(239), UINT8_C(181),
+ UINT8_C( 81), UINT8_C(155), UINT8_C(236), UINT8_C(191),
+ UINT8_C(133), UINT8_C( 65), UINT8_C( 73), UINT8_C( 40),
+ UINT8_C(181), UINT8_C( 86), UINT8_C( 73), UINT8_C(213),
+ UINT8_C( 85), UINT8_C( 28), UINT8_C( 53), UINT8_C(225),
+ UINT8_C(217), UINT8_C(129), UINT8_C( 68), UINT8_C(183),
+ UINT8_C(232), UINT8_C( 91), UINT8_C( 4), UINT8_C(129)),
+ simde_x_mm256_set_epu8(UINT8_C(199), UINT8_C(146), UINT8_C( 73), UINT8_C(129),
+ UINT8_C( 53), UINT8_C( 30), UINT8_C(178), UINT8_C(252),
+ UINT8_C(125), UINT8_C( 44), UINT8_C( 67), UINT8_C( 83),
+ UINT8_C( 52), UINT8_C( 79), UINT8_C(239), UINT8_C(118),
+ UINT8_C(100), UINT8_C( 25), UINT8_C( 74), UINT8_C( 78),
+ UINT8_C( 90), UINT8_C(145), UINT8_C(118), UINT8_C(211),
+ UINT8_C( 29), UINT8_C( 31), UINT8_C(128), UINT8_C( 53),
+ UINT8_C( 19), UINT8_C(239), UINT8_C(181), UINT8_C(108)),
+ simde_x_mm256_set_epu8(UINT8_C(166), UINT8_C(166), UINT8_C( 44), UINT8_C(182),
+ UINT8_C( 78), UINT8_C( 19), UINT8_C(209), UINT8_C(217),
+ UINT8_C(103), UINT8_C(100), UINT8_C(152), UINT8_C(137),
+ UINT8_C( 93), UINT8_C( 72), UINT8_C(156), UINT8_C( 79),
+ UINT8_C(141), UINT8_C( 56), UINT8_C( 74), UINT8_C(146),
+ UINT8_C( 88), UINT8_C( 87), UINT8_C( 86), UINT8_C(218),
+ UINT8_C(123), UINT8_C( 80), UINT8_C( 98), UINT8_C(118),
+ UINT8_C(126), UINT8_C(165), UINT8_C( 93), UINT8_C(119)) },
+ { simde_x_mm256_set_epu8(UINT8_C(209), UINT8_C(137), UINT8_C(159), UINT8_C(201),
+ UINT8_C(142), UINT8_C(123), UINT8_C(233), UINT8_C(210),
+ UINT8_C(180), UINT8_C( 1), UINT8_C( 19), UINT8_C( 5),
+ UINT8_C( 35), UINT8_C(203), UINT8_C(189), UINT8_C( 26),
+ UINT8_C(153), UINT8_C(140), UINT8_C( 62), UINT8_C(144),
+ UINT8_C( 20), UINT8_C(125), UINT8_C(185), UINT8_C(133),
+ UINT8_C( 90), UINT8_C(243), UINT8_C( 18), UINT8_C(117),
+ UINT8_C(102), UINT8_C(222), UINT8_C( 27), UINT8_C( 12)),
+ simde_x_mm256_set_epu8(UINT8_C(201), UINT8_C(154), UINT8_C( 75), UINT8_C(146),
+ UINT8_C( 84), UINT8_C(138), UINT8_C(110), UINT8_C( 18),
+ UINT8_C(142), UINT8_C(147), UINT8_C(192), UINT8_C(131),
+ UINT8_C(151), UINT8_C(232), UINT8_C(176), UINT8_C( 8),
+ UINT8_C(133), UINT8_C( 27), UINT8_C( 52), UINT8_C( 35),
+ UINT8_C(114), UINT8_C(148), UINT8_C(237), UINT8_C(121),
+ UINT8_C(209), UINT8_C( 97), UINT8_C(242), UINT8_C( 75),
+ UINT8_C(194), UINT8_C( 62), UINT8_C(242), UINT8_C(214)),
+ simde_x_mm256_set_epu8(UINT8_C(205), UINT8_C(146), UINT8_C(117), UINT8_C(174),
+ UINT8_C(113), UINT8_C(131), UINT8_C(172), UINT8_C(114),
+ UINT8_C(161), UINT8_C( 74), UINT8_C(106), UINT8_C( 68),
+ UINT8_C( 93), UINT8_C(218), UINT8_C(183), UINT8_C( 17),
+ UINT8_C(143), UINT8_C( 84), UINT8_C( 57), UINT8_C( 90),
+ UINT8_C( 67), UINT8_C(137), UINT8_C(211), UINT8_C(127),
+ UINT8_C(150), UINT8_C(170), UINT8_C(130), UINT8_C( 96),
+ UINT8_C(148), UINT8_C(142), UINT8_C(135), UINT8_C(113)) },
+ { simde_x_mm256_set_epu8(UINT8_C(223), UINT8_C( 80), UINT8_C( 95), UINT8_C( 57),
+ UINT8_C(173), UINT8_C( 14), UINT8_C( 75), UINT8_C( 79),
+ UINT8_C(206), UINT8_C( 37), UINT8_C(125), UINT8_C(225),
+ UINT8_C(172), UINT8_C(121), UINT8_C( 43), UINT8_C(249),
+ UINT8_C(122), UINT8_C( 3), UINT8_C( 25), UINT8_C(204),
+ UINT8_C(153), UINT8_C( 32), UINT8_C(245), UINT8_C( 58),
+ UINT8_C(211), UINT8_C(116), UINT8_C( 87), UINT8_C(228),
+ UINT8_C(139), UINT8_C(203), UINT8_C(106), UINT8_C(216)),
+ simde_x_mm256_set_epu8(UINT8_C( 18), UINT8_C(169), UINT8_C( 46), UINT8_C(100),
+ UINT8_C(246), UINT8_C( 6), UINT8_C(208), UINT8_C(224),
+ UINT8_C(134), UINT8_C(248), UINT8_C( 90), UINT8_C(243),
+ UINT8_C( 95), UINT8_C(216), UINT8_C(232), UINT8_C(245),
+ UINT8_C( 35), UINT8_C(118), UINT8_C( 23), UINT8_C(111),
+ UINT8_C(137), UINT8_C( 1), UINT8_C( 43), UINT8_C(212),
+ UINT8_C(143), UINT8_C(143), UINT8_C(106), UINT8_C(242),
+ UINT8_C(188), UINT8_C( 78), UINT8_C( 94), UINT8_C( 49)),
+ simde_x_mm256_set_epu8(UINT8_C(121), UINT8_C(125), UINT8_C( 71), UINT8_C( 79),
+ UINT8_C(210), UINT8_C( 10), UINT8_C(142), UINT8_C(152),
+ UINT8_C(170), UINT8_C(143), UINT8_C(108), UINT8_C(234),
+ UINT8_C(134), UINT8_C(169), UINT8_C(138), UINT8_C(247),
+ UINT8_C( 79), UINT8_C( 61), UINT8_C( 24), UINT8_C(158),
+ UINT8_C(145), UINT8_C( 17), UINT8_C(144), UINT8_C(135),
+ UINT8_C(177), UINT8_C(130), UINT8_C( 97), UINT8_C(235),
+ UINT8_C(164), UINT8_C(141), UINT8_C(100), UINT8_C(133)) },
+ { simde_x_mm256_set_epu8(UINT8_C(186), UINT8_C( 51), UINT8_C(166), UINT8_C(159),
+ UINT8_C( 61), UINT8_C(189), UINT8_C(148), UINT8_C(156),
+ UINT8_C(199), UINT8_C( 59), UINT8_C(214), UINT8_C( 21),
+ UINT8_C( 92), UINT8_C( 24), UINT8_C( 35), UINT8_C( 33),
+ UINT8_C( 27), UINT8_C(133), UINT8_C( 9), UINT8_C(114),
+ UINT8_C(170), UINT8_C( 78), UINT8_C(149), UINT8_C(203),
+ UINT8_C(212), UINT8_C(108), UINT8_C(116), UINT8_C(217),
+ UINT8_C(102), UINT8_C(192), UINT8_C(223), UINT8_C( 98)),
+ simde_x_mm256_set_epu8(UINT8_C( 51), UINT8_C(193), UINT8_C(129), UINT8_C(222),
+ UINT8_C(147), UINT8_C( 49), UINT8_C(210), UINT8_C(198),
+ UINT8_C(192), UINT8_C(158), UINT8_C( 49), UINT8_C(217),
+ UINT8_C( 20), UINT8_C(183), UINT8_C(213), UINT8_C( 71),
+ UINT8_C(164), UINT8_C( 92), UINT8_C(118), UINT8_C( 17),
+ UINT8_C(236), UINT8_C( 27), UINT8_C(162), UINT8_C( 98),
+ UINT8_C(196), UINT8_C(135), UINT8_C( 7), UINT8_C(172),
+ UINT8_C(233), UINT8_C( 47), UINT8_C(151), UINT8_C(128)),
+ simde_x_mm256_set_epu8(UINT8_C(119), UINT8_C(122), UINT8_C(148), UINT8_C(191),
+ UINT8_C(104), UINT8_C(119), UINT8_C(179), UINT8_C(177),
+ UINT8_C(196), UINT8_C(109), UINT8_C(132), UINT8_C(119),
+ UINT8_C( 56), UINT8_C(104), UINT8_C(124), UINT8_C( 52),
+ UINT8_C( 96), UINT8_C(113), UINT8_C( 64), UINT8_C( 66),
+ UINT8_C(203), UINT8_C( 53), UINT8_C(156), UINT8_C(151),
+ UINT8_C(204), UINT8_C(122), UINT8_C( 62), UINT8_C(195),
+ UINT8_C(168), UINT8_C(120), UINT8_C(187), UINT8_C(113)) },
+ { simde_x_mm256_set_epu8(UINT8_C(182), UINT8_C(141), UINT8_C( 93), UINT8_C( 91),
+ UINT8_C( 0), UINT8_C(189), UINT8_C(215), UINT8_C(221),
+ UINT8_C(105), UINT8_C(231), UINT8_C( 61), UINT8_C(224),
+ UINT8_C( 68), UINT8_C( 84), UINT8_C(247), UINT8_C(215),
+ UINT8_C(125), UINT8_C(197), UINT8_C( 69), UINT8_C(102),
+ UINT8_C(218), UINT8_C(120), UINT8_C(113), UINT8_C(175),
+ UINT8_C(134), UINT8_C( 33), UINT8_C(106), UINT8_C(117),
+ UINT8_C(129), UINT8_C(249), UINT8_C(194), UINT8_C( 70)),
+ simde_x_mm256_set_epu8(UINT8_C(137), UINT8_C( 27), UINT8_C( 17), UINT8_C( 94),
+ UINT8_C(244), UINT8_C(142), UINT8_C(142), UINT8_C( 48),
+ UINT8_C( 54), UINT8_C(217), UINT8_C(209), UINT8_C(122),
+ UINT8_C( 1), UINT8_C(190), UINT8_C( 59), UINT8_C(250),
+ UINT8_C(179), UINT8_C(176), UINT8_C(167), UINT8_C( 57),
+ UINT8_C( 90), UINT8_C( 15), UINT8_C( 31), UINT8_C(140),
+ UINT8_C(209), UINT8_C(104), UINT8_C(139), UINT8_C(154),
+ UINT8_C( 57), UINT8_C(248), UINT8_C(225), UINT8_C( 65)),
+ simde_x_mm256_set_epu8(UINT8_C(160), UINT8_C( 84), UINT8_C( 55), UINT8_C( 93),
+ UINT8_C(122), UINT8_C(166), UINT8_C(179), UINT8_C(135),
+ UINT8_C( 80), UINT8_C(224), UINT8_C(135), UINT8_C(173),
+ UINT8_C( 35), UINT8_C(137), UINT8_C(153), UINT8_C(233),
+ UINT8_C(152), UINT8_C(187), UINT8_C(118), UINT8_C( 80),
+ UINT8_C(154), UINT8_C( 68), UINT8_C( 72), UINT8_C(158),
+ UINT8_C(172), UINT8_C( 69), UINT8_C(123), UINT8_C(136),
+ UINT8_C( 93), UINT8_C(249), UINT8_C(210), UINT8_C( 68)) },
+ { simde_x_mm256_set_epu8(UINT8_C(125), UINT8_C(242), UINT8_C( 34), UINT8_C(120),
+ UINT8_C(106), UINT8_C(202), UINT8_C(100), UINT8_C( 61),
+ UINT8_C(105), UINT8_C(145), UINT8_C( 46), UINT8_C(129),
+ UINT8_C(208), UINT8_C( 57), UINT8_C( 82), UINT8_C( 21),
+ UINT8_C( 59), UINT8_C( 97), UINT8_C(206), UINT8_C( 4),
+ UINT8_C(182), UINT8_C( 81), UINT8_C(203), UINT8_C(252),
+ UINT8_C(111), UINT8_C( 28), UINT8_C(210), UINT8_C( 57),
+ UINT8_C(214), UINT8_C(124), UINT8_C(137), UINT8_C(114)),
+ simde_x_mm256_set_epu8(UINT8_C(208), UINT8_C( 60), UINT8_C( 51), UINT8_C( 61),
+ UINT8_C(249), UINT8_C(203), UINT8_C( 69), UINT8_C(195),
+ UINT8_C( 16), UINT8_C( 67), UINT8_C(241), UINT8_C(244),
+ UINT8_C(217), UINT8_C(201), UINT8_C(104), UINT8_C( 80),
+ UINT8_C( 30), UINT8_C( 40), UINT8_C( 69), UINT8_C( 88),
+ UINT8_C( 83), UINT8_C(141), UINT8_C(221), UINT8_C(174),
+ UINT8_C(165), UINT8_C(114), UINT8_C(107), UINT8_C( 42),
+ UINT8_C( 83), UINT8_C( 1), UINT8_C( 95), UINT8_C( 89)),
+ simde_x_mm256_set_epu8(UINT8_C(167), UINT8_C(151), UINT8_C( 43), UINT8_C( 91),
+ UINT8_C(178), UINT8_C(203), UINT8_C( 85), UINT8_C(128),
+ UINT8_C( 61), UINT8_C(106), UINT8_C(144), UINT8_C(187),
+ UINT8_C(213), UINT8_C(129), UINT8_C( 93), UINT8_C( 51),
+ UINT8_C( 45), UINT8_C( 69), UINT8_C(138), UINT8_C( 46),
+ UINT8_C(133), UINT8_C(111), UINT8_C(212), UINT8_C(213),
+ UINT8_C(138), UINT8_C( 71), UINT8_C(159), UINT8_C( 50),
+ UINT8_C(149), UINT8_C( 63), UINT8_C(116), UINT8_C(102)) },
+ { simde_x_mm256_set_epu8(UINT8_C( 59), UINT8_C(202), UINT8_C( 28), UINT8_C( 65),
+ UINT8_C( 60), UINT8_C( 92), UINT8_C(112), UINT8_C(105),
+ UINT8_C(229), UINT8_C(116), UINT8_C(242), UINT8_C(217),
+ UINT8_C(203), UINT8_C( 71), UINT8_C( 15), UINT8_C(143),
+ UINT8_C( 58), UINT8_C(228), UINT8_C( 36), UINT8_C(154),
+ UINT8_C( 96), UINT8_C( 2), UINT8_C( 86), UINT8_C( 36),
+ UINT8_C( 93), UINT8_C( 29), UINT8_C( 70), UINT8_C( 20),
+ UINT8_C(130), UINT8_C(172), UINT8_C(152), UINT8_C(189)),
+ simde_x_mm256_set_epu8(UINT8_C(100), UINT8_C( 42), UINT8_C( 77), UINT8_C( 21),
+ UINT8_C(144), UINT8_C(197), UINT8_C(242), UINT8_C(243),
+ UINT8_C(205), UINT8_C(204), UINT8_C( 75), UINT8_C(102),
+ UINT8_C( 21), UINT8_C(148), UINT8_C( 70), UINT8_C(128),
+ UINT8_C( 95), UINT8_C(147), UINT8_C( 39), UINT8_C(190),
+ UINT8_C( 20), UINT8_C(128), UINT8_C(196), UINT8_C(160),
+ UINT8_C( 8), UINT8_C(206), UINT8_C( 13), UINT8_C(197),
+ UINT8_C( 93), UINT8_C(253), UINT8_C( 16), UINT8_C( 27)),
+ simde_x_mm256_set_epu8(UINT8_C( 80), UINT8_C(122), UINT8_C( 53), UINT8_C( 43),
+ UINT8_C(102), UINT8_C(145), UINT8_C(177), UINT8_C(174),
+ UINT8_C(217), UINT8_C(160), UINT8_C(159), UINT8_C(160),
+ UINT8_C(112), UINT8_C(110), UINT8_C( 43), UINT8_C(136),
+ UINT8_C( 77), UINT8_C(188), UINT8_C( 38), UINT8_C(172),
+ UINT8_C( 58), UINT8_C( 65), UINT8_C(141), UINT8_C( 98),
+ UINT8_C( 51), UINT8_C(118), UINT8_C( 42), UINT8_C(109),
+ UINT8_C(112), UINT8_C(213), UINT8_C( 84), UINT8_C(108)) },
+ { simde_x_mm256_set_epu8(UINT8_C( 75), UINT8_C( 17), UINT8_C(162), UINT8_C( 64),
+ UINT8_C(129), UINT8_C(250), UINT8_C(112), UINT8_C(166),
+ UINT8_C( 98), UINT8_C(126), UINT8_C(129), UINT8_C(211),
+ UINT8_C( 27), UINT8_C( 12), UINT8_C(183), UINT8_C(140),
+ UINT8_C(106), UINT8_C(255), UINT8_C(252), UINT8_C(224),
+ UINT8_C(116), UINT8_C(208), UINT8_C( 69), UINT8_C( 4),
+ UINT8_C(193), UINT8_C( 46), UINT8_C(111), UINT8_C( 96),
+ UINT8_C(101), UINT8_C(183), UINT8_C( 99), UINT8_C( 60)),
+ simde_x_mm256_set_epu8(UINT8_C( 48), UINT8_C( 27), UINT8_C(253), UINT8_C(118),
+ UINT8_C(225), UINT8_C(134), UINT8_C(250), UINT8_C(133),
+ UINT8_C( 52), UINT8_C( 47), UINT8_C( 27), UINT8_C(213),
+ UINT8_C( 28), UINT8_C(208), UINT8_C( 73), UINT8_C( 89),
+ UINT8_C( 76), UINT8_C(160), UINT8_C( 57), UINT8_C(191),
+ UINT8_C( 34), UINT8_C(121), UINT8_C(194), UINT8_C(205),
+ UINT8_C(102), UINT8_C(106), UINT8_C(175), UINT8_C(219),
+ UINT8_C(174), UINT8_C(128), UINT8_C(137), UINT8_C(235)),
+ simde_x_mm256_set_epu8(UINT8_C( 62), UINT8_C( 22), UINT8_C(208), UINT8_C( 91),
+ UINT8_C(177), UINT8_C(192), UINT8_C(181), UINT8_C(150),
+ UINT8_C( 75), UINT8_C( 87), UINT8_C( 78), UINT8_C(212),
+ UINT8_C( 28), UINT8_C(110), UINT8_C(128), UINT8_C(115),
+ UINT8_C( 91), UINT8_C(208), UINT8_C(155), UINT8_C(208),
+ UINT8_C( 75), UINT8_C(165), UINT8_C(132), UINT8_C(105),
+ UINT8_C(148), UINT8_C( 76), UINT8_C(143), UINT8_C(158),
+ UINT8_C(138), UINT8_C(156), UINT8_C(118), UINT8_C(148)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_avg_epu8(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_u8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_avg_epu16(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_x_mm256_set_epu16(UINT16_C( 33977), UINT16_C( 4075), UINT16_C( 26120), UINT16_C( 61365),
+ UINT16_C( 20891), UINT16_C( 60607), UINT16_C( 34113), UINT16_C( 18728),
+ UINT16_C( 46422), UINT16_C( 18901), UINT16_C( 21788), UINT16_C( 13793),
+ UINT16_C( 55681), UINT16_C( 17591), UINT16_C( 59483), UINT16_C( 1153)),
+ simde_x_mm256_set_epu16(UINT16_C( 51090), UINT16_C( 18817), UINT16_C( 13598), UINT16_C( 45820),
+ UINT16_C( 32044), UINT16_C( 17235), UINT16_C( 13391), UINT16_C( 61302),
+ UINT16_C( 25625), UINT16_C( 19022), UINT16_C( 23185), UINT16_C( 30419),
+ UINT16_C( 7455), UINT16_C( 32821), UINT16_C( 5103), UINT16_C( 46444)),
+ simde_x_mm256_set_epu16(UINT16_C( 42534), UINT16_C( 11446), UINT16_C( 19859), UINT16_C( 53593),
+ UINT16_C( 26468), UINT16_C( 38921), UINT16_C( 23752), UINT16_C( 40015),
+ UINT16_C( 36024), UINT16_C( 18962), UINT16_C( 22487), UINT16_C( 22106),
+ UINT16_C( 31568), UINT16_C( 25206), UINT16_C( 32293), UINT16_C( 23799)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 53641), UINT16_C( 40905), UINT16_C( 36475), UINT16_C( 59858),
+ UINT16_C( 46081), UINT16_C( 4869), UINT16_C( 9163), UINT16_C( 48410),
+ UINT16_C( 39308), UINT16_C( 16016), UINT16_C( 5245), UINT16_C( 47493),
+ UINT16_C( 23283), UINT16_C( 4725), UINT16_C( 26334), UINT16_C( 6924)),
+ simde_x_mm256_set_epu16(UINT16_C( 51610), UINT16_C( 19346), UINT16_C( 21642), UINT16_C( 28178),
+ UINT16_C( 36499), UINT16_C( 49283), UINT16_C( 38888), UINT16_C( 45064),
+ UINT16_C( 34075), UINT16_C( 13347), UINT16_C( 29332), UINT16_C( 60793),
+ UINT16_C( 53601), UINT16_C( 62027), UINT16_C( 49726), UINT16_C( 62166)),
+ simde_x_mm256_set_epu16(UINT16_C( 52626), UINT16_C( 30126), UINT16_C( 29059), UINT16_C( 44018),
+ UINT16_C( 41290), UINT16_C( 27076), UINT16_C( 24026), UINT16_C( 46737),
+ UINT16_C( 36692), UINT16_C( 14682), UINT16_C( 17289), UINT16_C( 54143),
+ UINT16_C( 38442), UINT16_C( 33376), UINT16_C( 38030), UINT16_C( 34545)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 57168), UINT16_C( 24377), UINT16_C( 44302), UINT16_C( 19279),
+ UINT16_C( 52773), UINT16_C( 32225), UINT16_C( 44153), UINT16_C( 11257),
+ UINT16_C( 31235), UINT16_C( 6604), UINT16_C( 39200), UINT16_C( 62778),
+ UINT16_C( 54132), UINT16_C( 22500), UINT16_C( 35787), UINT16_C( 27352)),
+ simde_x_mm256_set_epu16(UINT16_C( 4777), UINT16_C( 11876), UINT16_C( 62982), UINT16_C( 53472),
+ UINT16_C( 34552), UINT16_C( 23283), UINT16_C( 24536), UINT16_C( 59637),
+ UINT16_C( 9078), UINT16_C( 5999), UINT16_C( 35073), UINT16_C( 11220),
+ UINT16_C( 36751), UINT16_C( 27378), UINT16_C( 48206), UINT16_C( 24113)),
+ simde_x_mm256_set_epu16(UINT16_C( 30973), UINT16_C( 18127), UINT16_C( 53642), UINT16_C( 36376),
+ UINT16_C( 43663), UINT16_C( 27754), UINT16_C( 34345), UINT16_C( 35447),
+ UINT16_C( 20157), UINT16_C( 6302), UINT16_C( 37137), UINT16_C( 36999),
+ UINT16_C( 45442), UINT16_C( 24939), UINT16_C( 41997), UINT16_C( 25733)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 47667), UINT16_C( 42655), UINT16_C( 15805), UINT16_C( 38044),
+ UINT16_C( 51003), UINT16_C( 54805), UINT16_C( 23576), UINT16_C( 8993),
+ UINT16_C( 7045), UINT16_C( 2418), UINT16_C( 43598), UINT16_C( 38347),
+ UINT16_C( 54380), UINT16_C( 29913), UINT16_C( 26304), UINT16_C( 57186)),
+ simde_x_mm256_set_epu16(UINT16_C( 13249), UINT16_C( 33246), UINT16_C( 37681), UINT16_C( 53958),
+ UINT16_C( 49310), UINT16_C( 12761), UINT16_C( 5303), UINT16_C( 54599),
+ UINT16_C( 42076), UINT16_C( 30225), UINT16_C( 60443), UINT16_C( 41570),
+ UINT16_C( 50311), UINT16_C( 1964), UINT16_C( 59695), UINT16_C( 38784)),
+ simde_x_mm256_set_epu16(UINT16_C( 30458), UINT16_C( 37951), UINT16_C( 26743), UINT16_C( 46001),
+ UINT16_C( 50157), UINT16_C( 33783), UINT16_C( 14440), UINT16_C( 31796),
+ UINT16_C( 24561), UINT16_C( 16322), UINT16_C( 52021), UINT16_C( 39959),
+ UINT16_C( 52346), UINT16_C( 15939), UINT16_C( 43000), UINT16_C( 47985)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 46733), UINT16_C( 23899), UINT16_C( 189), UINT16_C( 55261),
+ UINT16_C( 27111), UINT16_C( 15840), UINT16_C( 17492), UINT16_C( 63447),
+ UINT16_C( 32197), UINT16_C( 17766), UINT16_C( 55928), UINT16_C( 29103),
+ UINT16_C( 34337), UINT16_C( 27253), UINT16_C( 33273), UINT16_C( 49734)),
+ simde_x_mm256_set_epu16(UINT16_C( 35099), UINT16_C( 4446), UINT16_C( 62606), UINT16_C( 36400),
+ UINT16_C( 14041), UINT16_C( 53626), UINT16_C( 446), UINT16_C( 15354),
+ UINT16_C( 46000), UINT16_C( 42809), UINT16_C( 23055), UINT16_C( 8076),
+ UINT16_C( 53608), UINT16_C( 35738), UINT16_C( 14840), UINT16_C( 57665)),
+ simde_x_mm256_set_epu16(UINT16_C( 40916), UINT16_C( 14173), UINT16_C( 31398), UINT16_C( 45831),
+ UINT16_C( 20576), UINT16_C( 34733), UINT16_C( 8969), UINT16_C( 39401),
+ UINT16_C( 39099), UINT16_C( 30288), UINT16_C( 39492), UINT16_C( 18590),
+ UINT16_C( 43973), UINT16_C( 31496), UINT16_C( 24057), UINT16_C( 53700)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 32242), UINT16_C( 8824), UINT16_C( 27338), UINT16_C( 25661),
+ UINT16_C( 27025), UINT16_C( 11905), UINT16_C( 53305), UINT16_C( 21013),
+ UINT16_C( 15201), UINT16_C( 52740), UINT16_C( 46673), UINT16_C( 52220),
+ UINT16_C( 28444), UINT16_C( 53817), UINT16_C( 54908), UINT16_C( 35186)),
+ simde_x_mm256_set_epu16(UINT16_C( 53308), UINT16_C( 13117), UINT16_C( 63947), UINT16_C( 17859),
+ UINT16_C( 4163), UINT16_C( 61940), UINT16_C( 55753), UINT16_C( 26704),
+ UINT16_C( 7720), UINT16_C( 17752), UINT16_C( 21389), UINT16_C( 56750),
+ UINT16_C( 42354), UINT16_C( 27434), UINT16_C( 21249), UINT16_C( 24409)),
+ simde_x_mm256_set_epu16(UINT16_C( 42775), UINT16_C( 10971), UINT16_C( 45643), UINT16_C( 21760),
+ UINT16_C( 15594), UINT16_C( 36923), UINT16_C( 54529), UINT16_C( 23859),
+ UINT16_C( 11461), UINT16_C( 35246), UINT16_C( 34031), UINT16_C( 54485),
+ UINT16_C( 35399), UINT16_C( 40626), UINT16_C( 38079), UINT16_C( 29798)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 15306), UINT16_C( 7233), UINT16_C( 15452), UINT16_C( 28777),
+ UINT16_C( 58740), UINT16_C( 62169), UINT16_C( 52039), UINT16_C( 3983),
+ UINT16_C( 15076), UINT16_C( 9370), UINT16_C( 24578), UINT16_C( 22052),
+ UINT16_C( 23837), UINT16_C( 17940), UINT16_C( 33452), UINT16_C( 39101)),
+ simde_x_mm256_set_epu16(UINT16_C( 25642), UINT16_C( 19733), UINT16_C( 37061), UINT16_C( 62195),
+ UINT16_C( 52684), UINT16_C( 19302), UINT16_C( 5524), UINT16_C( 18048),
+ UINT16_C( 24467), UINT16_C( 10174), UINT16_C( 5248), UINT16_C( 50336),
+ UINT16_C( 2254), UINT16_C( 3525), UINT16_C( 24061), UINT16_C( 4123)),
+ simde_x_mm256_set_epu16(UINT16_C( 20474), UINT16_C( 13483), UINT16_C( 26257), UINT16_C( 45486),
+ UINT16_C( 55712), UINT16_C( 40736), UINT16_C( 28782), UINT16_C( 11016),
+ UINT16_C( 19772), UINT16_C( 9772), UINT16_C( 14913), UINT16_C( 36194),
+ UINT16_C( 13046), UINT16_C( 10733), UINT16_C( 28757), UINT16_C( 21612)) },
+ { simde_x_mm256_set_epu16(UINT16_C( 19217), UINT16_C( 41536), UINT16_C( 33274), UINT16_C( 28838),
+ UINT16_C( 25214), UINT16_C( 33235), UINT16_C( 6924), UINT16_C( 46988),
+ UINT16_C( 27391), UINT16_C( 64736), UINT16_C( 29904), UINT16_C( 17668),
+ UINT16_C( 49454), UINT16_C( 28512), UINT16_C( 26039), UINT16_C( 25404)),
+ simde_x_mm256_set_epu16(UINT16_C( 12315), UINT16_C( 64886), UINT16_C( 57734), UINT16_C( 64133),
+ UINT16_C( 13359), UINT16_C( 7125), UINT16_C( 7376), UINT16_C( 18777),
+ UINT16_C( 19616), UINT16_C( 14783), UINT16_C( 8825), UINT16_C( 49869),
+ UINT16_C( 26218), UINT16_C( 45019), UINT16_C( 44672), UINT16_C( 35307)),
+ simde_x_mm256_set_epu16(UINT16_C( 15766), UINT16_C( 53211), UINT16_C( 45504), UINT16_C( 46486),
+ UINT16_C( 19287), UINT16_C( 20180), UINT16_C( 7150), UINT16_C( 32883),
+ UINT16_C( 23504), UINT16_C( 39760), UINT16_C( 19365), UINT16_C( 33769),
+ UINT16_C( 37836), UINT16_C( 36766), UINT16_C( 35356), UINT16_C( 30356)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_avg_epu16(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_u16(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm256_blend_epi16(const MunitParameter params[], void* data) {
(void) params;
@@ -6191,6 +6519,95 @@ test_simde_mm256_permute4x64_epi64(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm256_permute2x128_si256(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi64x(INT64_C( 9096692030846176105), INT64_C( 644260392039444522),
+ INT64_C(-4583540275174352405), INT64_C(-6816753880857675259)),
+ simde_mm256_set_epi64x(INT64_C(-7886827988827131690), INT64_C(-2107575233125845054),
+ INT64_C(-8398644678734943287), INT64_C( 7172114359254607016)),
+ simde_mm256_set_epi64x(INT64_C( 9096692030846176105), INT64_C( 644260392039444522),
+ INT64_C(-7886827988827131690), INT64_C(-2107575233125845054)) },
+ { simde_mm256_set_epi64x(INT64_C(-6314946612387904819), INT64_C(-1883921203594067636),
+ INT64_C(-5030251871897832411), INT64_C( 8348493077761215789)),
+ simde_mm256_set_epi64x(INT64_C( 2582913386835954622), INT64_C( 153655168481379701),
+ INT64_C(-1087064137492042031), INT64_C( 1783808851867973139)),
+ simde_mm256_set_epi64x(INT64_C(-6314946612387904819), INT64_C(-1883921203594067636),
+ INT64_C( 2582913386835954622), INT64_C( 153655168481379701)) },
+ { simde_mm256_set_epi64x(INT64_C(-3208478614025680333), INT64_C(-3409066786741744502),
+ INT64_C(-6957643043766269700), INT64_C( 3219428559958296960)),
+ simde_mm256_set_epi64x(INT64_C(-1736924574103250292), INT64_C(-2810347538827243748),
+ INT64_C( -297965813118371045), INT64_C(-7618358042182251122)),
+ simde_mm256_set_epi64x(INT64_C(-3208478614025680333), INT64_C(-3409066786741744502),
+ INT64_C(-1736924574103250292), INT64_C(-2810347538827243748)) },
+ { simde_mm256_set_epi64x(INT64_C( 796366024780064289), INT64_C(-1489690745108457074),
+ INT64_C(-7990282097237082056), INT64_C(-7545130296515735090)),
+ simde_mm256_set_epi64x(INT64_C( 951803776889232332), INT64_C(-6640461449591045668),
+ INT64_C( 5271740244822761531), INT64_C( 3149915688837762175)),
+ simde_mm256_set_epi64x(INT64_C( 796366024780064289), INT64_C(-1489690745108457074),
+ INT64_C( 951803776889232332), INT64_C(-6640461449591045668)) },
+ { simde_mm256_set_epi64x(INT64_C( -95120238103258498), INT64_C(-1762353908339260045),
+ INT64_C( 6992845328844002662), INT64_C(-5939283762406250642)),
+ simde_mm256_set_epi64x(INT64_C(-5885001620821736092), INT64_C(-6745062192544323367),
+ INT64_C( 7803931770148523943), INT64_C(-8993062880293478576)),
+ simde_mm256_set_epi64x(INT64_C( -95120238103258498), INT64_C(-1762353908339260045),
+ INT64_C(-5885001620821736092), INT64_C(-6745062192544323367)) },
+ { simde_mm256_set_epi64x(INT64_C(-3708437875152674849), INT64_C( 8243162546537572005),
+ INT64_C(-1103721052327437925), INT64_C(-2925489198757650175)),
+ simde_mm256_set_epi64x(INT64_C( 1440085788748654982), INT64_C( 1725906984156202179),
+ INT64_C( 5845599904819452784), INT64_C( 7162548421658470679)),
+ simde_mm256_set_epi64x(INT64_C(-3708437875152674849), INT64_C( 8243162546537572005),
+ INT64_C( 1440085788748654982), INT64_C( 1725906984156202179)) },
+ { simde_mm256_set_epi64x(INT64_C( 6834943649491098623), INT64_C( 2759372331225584008),
+ INT64_C(-1659900994892419246), INT64_C(-2119655686628377164)),
+ simde_mm256_set_epi64x(INT64_C( 1954610004667753515), INT64_C( 5688482191974230934),
+ INT64_C(-3937849964004809456), INT64_C(-8694088207381845200)),
+ simde_mm256_set_epi64x(INT64_C( 6834943649491098623), INT64_C( 2759372331225584008),
+ INT64_C( 1954610004667753515), INT64_C( 5688482191974230934)) },
+ { simde_mm256_set_epi64x(INT64_C( 5051547726856501651), INT64_C( 7333908238294102632),
+ INT64_C( 7118133466490521985), INT64_C( 6243950982549416292)),
+ simde_mm256_set_epi64x(INT64_C(-6805527145604381785), INT64_C(-1282569833996306134),
+ INT64_C(-1497859500202369050), INT64_C( 1581543684384159070)),
+ simde_mm256_set_epi64x(INT64_C( 5051547726856501651), INT64_C( 7333908238294102632),
+ INT64_C(-6805527145604381785), INT64_C(-1282569833996306134)) },
+ };
+
+ //printf("\n");
+ //for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ // simde__m256i_private a, b, r;
+
+ // munit_rand_memory(sizeof(a), (uint8_t*) &a);
+ // munit_rand_memory(sizeof(b), (uint8_t*) &b);
+
+ // r = simde__m256i_to_private(simde_mm256_permute2x128_si256(simde__m256i_from_private(a), simde__m256i_from_private(b), 23));
+
+ // printf(" { simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+ // " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
+ // a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
+ // printf(" simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+ // " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
+ // b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
+ // printf(" simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+ // " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n",
+ // r.i64[3], r.i64[2], r.i64[1], r.i64[0]);
+ //}
+ //return MUNIT_FAIL;
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_permute2x128_si256(test_vec[i].a, test_vec[i].b, 23);
+ simde_assert_m256i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm256_shuffle_epi8(const MunitParameter params[], void* data) {
(void) params;
@@ -9246,6 +9663,9 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm256_and_si256),
SIMDE_TESTS_DEFINE_TEST(mm256_andnot_si256),
+ SIMDE_TESTS_DEFINE_TEST(mm256_avg_epu8),
+ SIMDE_TESTS_DEFINE_TEST(mm256_avg_epu16),
+
SIMDE_TESTS_DEFINE_TEST(mm_blend_epi32),
SIMDE_TESTS_DEFINE_TEST(mm256_blend_epi16),
SIMDE_TESTS_DEFINE_TEST(mm256_blend_epi32),
@@ -9304,6 +9724,7 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm256_packs_epi32),
SIMDE_TESTS_DEFINE_TEST(mm256_permute4x64_epi64),
+ SIMDE_TESTS_DEFINE_TEST(mm256_permute2x128_si256),
SIMDE_TESTS_DEFINE_TEST(mm256_shuffle_epi8),
SIMDE_TESTS_DEFINE_TEST(mm256_shuffle_epi32),
=====================================
test/x86/avx512f.c
=====================================
The diff for this file was not included because it is too large.
View it on GitLab: https://salsa.debian.org/med-team/simde/-/compare/2c04b61d108c62a966b73a225afcb84611d9a281...9f2751b0830361124d6e4f1b1d982e67cafe867f
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/compare/2c04b61d108c62a966b73a225afcb84611d9a281...9f2751b0830361124d6e4f1b1d982e67cafe867f
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200414/284f0469/attachment-0001.html>
More information about the debian-med-commit
mailing list