[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200412
Michael R. Crusoe
gitlab at salsa.debian.org
Mon Apr 13 09:36:30 BST 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / simde
Commits:
683bff07 by Michael R. Crusoe at 2020-04-13T09:34:54+02:00
New upstream version 0.0.0.git.20200412
- - - - -
9 changed files:
- + .drone.star
- − .drone.yml
- README.md
- simde/x86/avx2.h
- simde/x86/avx512f.h
- simde/x86/sse.h
- test/x86/avx2.c
- test/x86/avx512f.c
- test/x86/skel.c
Changes:
=====================================
.drone.star
=====================================
@@ -0,0 +1,318 @@
+# -*- Python -*-
+# Drone CI Starlark configuration file.
+# https://docs.drone.io/pipeline/scripting/starlark/
+# Run `drone starlark convert --stdout` to verify `.drone.star`.
+def get_default_job():
+ return {
+ "kind": "pipeline",
+ "type": "docker",
+ "name": "",
+ "platform": {
+ "os": "linux",
+ },
+ "steps": [
+ {
+ "name": "test",
+ "image": "ubuntu:bionic",
+ "environment": {},
+ "commands": [
+ ]
+ }
+ ],
+ "custom": {
+ "before_install": [
+ "uname -m",
+ "cat /proc/cpuinfo",
+ ],
+ "before_script": [
+ "git submodule --quiet update --init --recursive",
+ ],
+ "script": [
+ "mkdir -p build",
+ "cd build",
+ 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
+ "ninja -v",
+ "./test/run-tests",
+ ]
+ }
+ }
+
+def get_jobs():
+ job_clang9_armv7 = {
+ "name": "clang-9 armv7",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "clang-9",
+ "CXX": "clang++-9",
+ "ARCH_FLAGS": "-march=armv7a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install clang-9 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_clang9_armv8 = {
+ "name": "clang-9 armv8",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "clang-9",
+ "CXX": "clang++-9",
+ "ARCH_FLAGS": "-march=armv8a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install clang-9 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_gcc8_armv7 = {
+ "name": "gcc-8 armv7",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "gcc-8",
+ "CXX": "g++-8",
+ "ARCH_FLAGS": "-march=armv7-a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_gcc8_armv8 = {
+ "name": "gcc-8 armv8",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "gcc-8",
+ "CXX": "g++-8",
+ "ARCH_FLAGS": "-march=armv8-a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_clang7_armv7 = {
+ "name": "clang-7 armv7",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "clang-7",
+ "CXX": "clang++-7",
+ "ARCH_FLAGS": "-march=armv7a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install clang-7 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_clang7_armv8 = {
+ "name": "clang-7 armv8",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "clang-7",
+ "CXX": "clang++-7",
+ "ARCH_FLAGS": "-march=armv8a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install clang-7 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_gcc7_armv7 = {
+ "name": "gcc-7 armv7",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "gcc-7",
+ "CXX": "g++-7",
+ "ARCH_FLAGS": "-march=armv7-a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_gcc7_armv8 = {
+ "name": "gcc-7 armv8",
+ "platform": {
+ "arch": "arm",
+ },
+ "steps": [
+ {
+ "environment": {
+ "CC": "gcc-7",
+ "CXX": "g++-7",
+ "ARCH_FLAGS": "-march=armv8-a -mfpu=neon",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "apt-get -yq update",
+ "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_fedora = {
+ "name": "fedora",
+ "steps": [
+ {
+ "image": "fedora:latest",
+ "environment": {
+ "CC": "gcc",
+ "CXX": "g++",
+ "ARCH_FLAGS": "-march=native",
+ }
+ }
+ ],
+ "custom": {
+ "install": [
+ "dnf install -y gcc gcc-c++ ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ]
+ }
+ }
+
+ job_fedora_clang_arm64_flags = {
+ "name": "fedora clang arm64 flags",
+ "platform": {
+ "arch": "arm64",
+ },
+ "steps": [
+ {
+ "image": "fedora:rawhide",
+ "environment": {
+ "CC": "clang",
+ "CXX": "clang++",
+ },
+ "failure": "ignore"
+ }
+ ],
+ "custom": {
+ "install": [
+ "dnf install -y clang ninja-build git-core python3-pip",
+ "pip3 install meson",
+ ],
+ "script": [
+ "mkdir -p build",
+ "cd build",
+ # optflags RPM macro works with gcc.
+ # Some flags and specs are not available with clang.
+ # https://lists.fedoraproject.org/archives/list/packaging@lists.fedoraproject.org/message/W5UFLUADNB4VF3OBUBSNAPOQL6XBCP74/
+ "ARCH_FLAGS=$(rpm -E '%{optflags}' | sed -e 's| -fstack-clash-protection||' -e 's| -specs=[^ ]*||g')",
+ 'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
+ "ninja -v",
+ "./test/run-tests",
+ ]
+ }
+ }
+
+ return [
+ job_clang9_armv7,
+ job_clang9_armv8,
+ job_gcc8_armv7,
+ job_gcc8_armv8,
+ job_clang7_armv7,
+ job_clang7_armv8,
+ job_gcc7_armv7,
+ job_gcc7_armv8,
+ # job_fedora,
+ job_fedora_clang_arm64_flags,
+ ]
+
+def main(ctx):
+ merged_jobs = []
+ for job in get_jobs():
+ out = get_default_job()
+
+ # Merge the each elements in the dict.
+ for key, value in job.items():
+ if type(value) == "list":
+ for index, item in enumerate(value):
+ out[key][index].update(item)
+ elif type(value) == "dict":
+ out[key].update(value)
+ else:
+ out[key] = value
+
+ # Create commands list from custom elements.
+ out["steps"][0]["commands"].extend(out["custom"]["before_install"])
+ out["steps"][0]["commands"].extend(out["custom"]["install"])
+ out["steps"][0]["commands"].extend(out["custom"]["before_script"])
+ out["steps"][0]["commands"].extend(out["custom"]["script"])
+
+ # Remove unused custom element.
+ out.pop("custom", None)
+
+ merged_jobs.append(out)
+
+ return merged_jobs
=====================================
.drone.yml deleted
=====================================
@@ -1,238 +0,0 @@
----
-kind: pipeline
-type: docker
-name: "clang-9 armv7"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: clang-9
- CXX: clang++-9
- ARCH_FLAGS: -march=armv7a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install clang-9 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "clang-9 armv8"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: clang-9
- CXX: clang++-9
- ARCH_FLAGS: -march=armv8a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install clang-9 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-8 armv7"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: gcc-8
- CXX: g++-8
- ARCH_FLAGS: -march=armv7-a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-8 armv8"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: gcc-8
- CXX: g++-8
- ARCH_FLAGS: -march=armv8-a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "clang-7 armv7"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: clang-7
- CXX: clang++-7
- ARCH_FLAGS: -march=armv7a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install clang-7 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "clang-7 armv8"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: clang-7
- CXX: clang++-7
- ARCH_FLAGS: -march=armv8a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install clang-7 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-7 armv7"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: gcc-7
- CXX: g++-7
- ARCH_FLAGS: -march=armv7-a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-7 armv8"
-platform:
- os: linux
- arch: arm
-steps:
-- name: test
- image: ubuntu:bionic
- environment:
- CC: gcc-7
- CXX: g++-7
- ARCH_FLAGS: -march=armv8-a -mfpu=neon
- commands:
- - uname -m
- - cat /proc/cpuinfo
- - apt-get -yq update
- - apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip
- - pip3 install meson
- - git submodule update --init --recursive
- - mkdir -p build
- - cd build
- - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
- - ninja -v
- - ./test/run-tests
-
-# ---
-# kind: pipeline
-# type: docker
-# name: "fedora"
-# steps:
-# - name: test
-# image: fedora:latest
-# environment:
-# CC: gcc
-# CXX: g++
-# ARCH_FLAGS: -march=native
-# commands:
-# - uname -m
-# - cat /proc/cpuinfo
-# - dnf install -y gcc gcc-c++ ninja-build git-core python3-pip
-# - pip3 install meson
-# - git submodule update --init --recursive
-# - mkdir -p build
-# - cd build
-# - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-# - ninja -v
-# - ./test/run-tests
=====================================
README.md
=====================================
@@ -1,4 +1,5 @@
# SIMD Everywhere
+[![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/simd-everywhere/community)
The SIMDe header-only library provides fast, portable implementations of
[SIMD intrinsics](https://en.wikipedia.org/wiki/SIMD) on hardware which
=====================================
simde/x86/avx2.h
=====================================
@@ -67,6 +67,72 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE__BEGIN_DECLS
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_abs_epi8 (simde__m256i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_abs_epi8(a);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i];
+ }
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_abs_epi16 (simde__m256i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_abs_epi16(a);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+ r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i];
+ }
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_abs_epi32(simde__m256i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_abs_epi32(a);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a);
+
+ SIMDE__VECTORIZE
+ for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
+ r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
+ }
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m256i
simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) {
@@ -1283,6 +1349,90 @@ simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) {
# define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE) && !defined(__PGI)
+ return _mm256_min_epi8(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_SSE4_1)
+ r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+ r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
+ }
+#endif
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_min_epi16(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_SSE2)
+ r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+ r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
+ }
+#endif
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+ return _mm256_min_epi32(a, b);
+#else
+ simde__m256i_private
+ r_,
+ a_ = simde__m256i_to_private(a),
+ b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_SSE4_1)
+ r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]);
+ r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]);
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+ r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
+ }
+#endif
+
+ return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+# define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m256i
simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) {
=====================================
simde/x86/avx512f.h
=====================================
@@ -1595,6 +1595,46 @@ simde_mm512_setone_pd(void) {
return simde_mm512_castsi512_pd(simde_mm512_setone_si512());
}
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) {
+ #if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_srli_epi32(a, imm8);
+ #else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a);
+
+ #if defined(SIMDE_ARCH_X86_AVX2)
+ r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], imm8);
+ r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], imm8);
+ #elif defined(SIMDE_ARCH_X86_SSE2)
+ r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], imm8);
+ r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], imm8);
+ r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], imm8);
+ r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], imm8);
+ #else
+ if (imm8 > 31) {
+ simde_memset(&r_, 0, sizeof(r_));
+ } else {
+ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
+ r_.u32 = a_.u32 >> imm8;
+ #else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
+ r_.u32[i] = a_.u32[i] >> imm8;
+ }
+ #endif
+ }
+ #endif
+
+ return simde__m512i_from_private(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) {
@@ -1876,6 +1916,29 @@ simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512
# define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask8
+simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
+ #if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_test_epi64_mask(k1, a, b);
+ #else
+ simde__m512i_private
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+ simde__mmask8 r = 0;
+
+ SIMDE__VECTORIZE_REDUCTION(|:r)
+ for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
+ r |= !!(a_.i64[i] & b_.i64[i]) << i;
+ }
+
+ return r & k1;
+ #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde__m512i_from_mmask16 (simde__mmask16 k) {
@@ -2018,6 +2081,32 @@ simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
# define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_add_epi32(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_add_epi32(k, a, b);
+#else
+ return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
@@ -2045,6 +2134,33 @@ simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
# define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_add_epi64(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_add_epi64(k, a, b);
+#else
+ return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b)
+#endif
+
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
@@ -2072,6 +2188,33 @@ simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
# define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_add_ps(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_add_ps(k, a, b);
+#else
+ return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b)
+#endif
+
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
@@ -2099,6 +2242,33 @@ simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
# define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_add_pd(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_maskz_add_pd(k, a, b);
+#else
+ return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b)
+#endif
+
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
@@ -2331,9 +2501,9 @@ simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512
SIMDE__FUNCTION_ATTRIBUTES
simde__mmask16
-simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
+simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) {
#if defined(SIMDE_AVX512F_NATIVE)
- return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
+ return _mm512_cmpgt_epi32_mask(a, b);
#else
simde__m512i_private
r_,
@@ -2344,7 +2514,20 @@ simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m51
r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]);
}
- return simde__m512i_private_to_mmask16(r_) & k1;
+ return simde__m512i_private_to_mmask16(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask16
+simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
+ #if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
+ #else
+ return simde_mm512_cmpgt_epi32_mask(a, b) & k1;
#endif
}
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
@@ -2353,9 +2536,9 @@ simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m51
SIMDE__FUNCTION_ATTRIBUTES
simde__mmask8
-simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
+simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) {
#if defined(SIMDE_AVX512F_NATIVE)
- return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
+ return _mm512_cmpgt_epi64_mask(a, b);
#else
simde__m512i_private
r_,
@@ -2366,7 +2549,20 @@ simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512
r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]);
}
- return simde__m512i_private_to_mmask8(r_) & k1;
+ return simde__m512i_private_to_mmask8(r_);
+ #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask8
+simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
+ #if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
+ #else
+ return simde_mm512_cmpgt_epi64_mask(a, b) & k1;
#endif
}
#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
@@ -2604,6 +2800,19 @@ simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
# define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_mul_ps(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
@@ -2631,6 +2840,112 @@ simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
# define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_mul_pd(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mul_epi32(a, b);
+#else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+#if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
+ simde__m512i_private x;
+ __typeof__(r_.i64) ta, tb;
+
+ /* Get even numbered 32-bit values */
+ x.i32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+ /* Cast to 64 bits */
+ SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].i32);
+ SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].i32);
+ r_.i64 = ta * tb;
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+ r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]);
+ }
+#endif
+ return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_mul_epi32(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mul_epu32(a, b);
+#else
+ simde__m512i_private
+ r_,
+ a_ = simde__m512i_to_private(a),
+ b_ = simde__m512i_to_private(b);
+
+#if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
+ simde__m512i_private x;
+ __typeof__(r_.u64) ta, tb;
+
+ x.u32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+ SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].u32);
+ SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].u32);
+ r_.u64 = ta * tb;
+#else
+ SIMDE__VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
+ r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]);
+ }
+#endif
+
+ return simde__m512i_from_private(r_);
+
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_mul_epu32(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {
=====================================
simde/x86/sse.h
=====================================
@@ -776,6 +776,12 @@ simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {
r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
#elif defined(SIMDE_SSE_WASM_SIMD128)
r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
+ #elif defined(SIMDE_SSE_POWER_ALTIVEC) && (SIMDE_ARCH_POWER >= 900) && !defined(HEDLEY_IBM_VERSION)
+ /* vec_cmpne(vector float, vector float) is missing from XL C/C++ v16.1.1,
+ though the documentation (table 89 on page 432 of the IBM XL C/C++ for
+ Linux Compiler Reference, Version 16.1.1) shows that it should be
+ present. Both GCC and clang support it. */
+ r_.altivec_f32 = (vector float) vec_cmpne(a_.altivec_f32, b_.altivec_f32);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
r_.i32 = (__typeof__(r_.i32)) (a_.f32 != b_.f32);
#else
=====================================
test/x86/avx2.c
=====================================
@@ -27,6 +27,287 @@
#if defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+static MunitResult
+test_simde_mm256_abs_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi8(INT8_C( -27), INT8_C( 88), INT8_C(-122), INT8_C( -6),
+ INT8_C( -23), INT8_C( 108), INT8_C(-103), INT8_C( 32),
+ INT8_C( 43), INT8_C( 116), INT8_C( -6), INT8_C( -98),
+ INT8_C( -62), INT8_C( -87), INT8_C( 90), INT8_C( 82),
+ INT8_C( 86), INT8_C( 8), INT8_C(-126), INT8_C( -22),
+ INT8_C( -80), INT8_C(-125), INT8_C( -5), INT8_C(-101),
+ INT8_C( 36), INT8_C( 114), INT8_C( -51), INT8_C( 59),
+ INT8_C( -97), INT8_C( 124), INT8_C( 25), INT8_C( 90)),
+ simde_mm256_set_epi8(INT8_C( 27), INT8_C( 88), INT8_C( 122), INT8_C( 6),
+ INT8_C( 23), INT8_C( 108), INT8_C( 103), INT8_C( 32),
+ INT8_C( 43), INT8_C( 116), INT8_C( 6), INT8_C( 98),
+ INT8_C( 62), INT8_C( 87), INT8_C( 90), INT8_C( 82),
+ INT8_C( 86), INT8_C( 8), INT8_C( 126), INT8_C( 22),
+ INT8_C( 80), INT8_C( 125), INT8_C( 5), INT8_C( 101),
+ INT8_C( 36), INT8_C( 114), INT8_C( 51), INT8_C( 59),
+ INT8_C( 97), INT8_C( 124), INT8_C( 25), INT8_C( 90)) },
+ { simde_mm256_set_epi8(INT8_C( 111), INT8_C( 46), INT8_C( -44), INT8_C( 36),
+ INT8_C( -79), INT8_C( 101), INT8_C( 0), INT8_C( 2),
+ INT8_C( -69), INT8_C( 31), INT8_C( -68), INT8_C( -82),
+ INT8_C( -45), INT8_C( 120), INT8_C( 39), INT8_C( 46),
+ INT8_C( 66), INT8_C( 30), INT8_C(-106), INT8_C( 118),
+ INT8_C( 61), INT8_C( 98), INT8_C( -61), INT8_C( 98),
+ INT8_C( 49), INT8_C( -12), INT8_C(-117), INT8_C(-115),
+ INT8_C( 63), INT8_C( -92), INT8_C(-102), INT8_C(-110)),
+ simde_mm256_set_epi8(INT8_C( 111), INT8_C( 46), INT8_C( 44), INT8_C( 36),
+ INT8_C( 79), INT8_C( 101), INT8_C( 0), INT8_C( 2),
+ INT8_C( 69), INT8_C( 31), INT8_C( 68), INT8_C( 82),
+ INT8_C( 45), INT8_C( 120), INT8_C( 39), INT8_C( 46),
+ INT8_C( 66), INT8_C( 30), INT8_C( 106), INT8_C( 118),
+ INT8_C( 61), INT8_C( 98), INT8_C( 61), INT8_C( 98),
+ INT8_C( 49), INT8_C( 12), INT8_C( 117), INT8_C( 115),
+ INT8_C( 63), INT8_C( 92), INT8_C( 102), INT8_C( 110)) },
+ { simde_mm256_set_epi8(INT8_C( 64), INT8_C( -84), INT8_C( 54), INT8_C(-102),
+ INT8_C( -69), INT8_C( 12), INT8_C(-119), INT8_C( -19),
+ INT8_C( 19), INT8_C( -55), INT8_C( -11), INT8_C(-117),
+ INT8_C( -68), INT8_C( -51), INT8_C( 26), INT8_C( 72),
+ INT8_C( -15), INT8_C( 108), INT8_C( -66), INT8_C( -24),
+ INT8_C( -97), INT8_C( -48), INT8_C( 75), INT8_C( 35),
+ INT8_C( 48), INT8_C( -25), INT8_C( -43), INT8_C( 2),
+ INT8_C( -75), INT8_C( 28), INT8_C(-108), INT8_C( -43)),
+ simde_mm256_set_epi8(INT8_C( 64), INT8_C( 84), INT8_C( 54), INT8_C( 102),
+ INT8_C( 69), INT8_C( 12), INT8_C( 119), INT8_C( 19),
+ INT8_C( 19), INT8_C( 55), INT8_C( 11), INT8_C( 117),
+ INT8_C( 68), INT8_C( 51), INT8_C( 26), INT8_C( 72),
+ INT8_C( 15), INT8_C( 108), INT8_C( 66), INT8_C( 24),
+ INT8_C( 97), INT8_C( 48), INT8_C( 75), INT8_C( 35),
+ INT8_C( 48), INT8_C( 25), INT8_C( 43), INT8_C( 2),
+ INT8_C( 75), INT8_C( 28), INT8_C( 108), INT8_C( 43)) },
+ { simde_mm256_set_epi8(INT8_C( 8), INT8_C( -54), INT8_C( -1), INT8_C(-128),
+ INT8_C( 118), INT8_C( -15), INT8_C( 125), INT8_C( 76),
+ INT8_C( 47), INT8_C( 33), INT8_C( 69), INT8_C( 21),
+ INT8_C(-116), INT8_C( 34), INT8_C( 36), INT8_C( 31),
+ INT8_C( -32), INT8_C( -84), INT8_C( 23), INT8_C( -76),
+ INT8_C( 82), INT8_C(-115), INT8_C( 74), INT8_C(-110),
+ INT8_C( -46), INT8_C( 125), INT8_C( -52), INT8_C( -99),
+ INT8_C( 30), INT8_C(-106), INT8_C( 66), INT8_C( 5)),
+ simde_mm256_set_epi8(INT8_C( 8), INT8_C( 54), INT8_C( 1), INT8_C(-128),
+ INT8_C( 118), INT8_C( 15), INT8_C( 125), INT8_C( 76),
+ INT8_C( 47), INT8_C( 33), INT8_C( 69), INT8_C( 21),
+ INT8_C( 116), INT8_C( 34), INT8_C( 36), INT8_C( 31),
+ INT8_C( 32), INT8_C( 84), INT8_C( 23), INT8_C( 76),
+ INT8_C( 82), INT8_C( 115), INT8_C( 74), INT8_C( 110),
+ INT8_C( 46), INT8_C( 125), INT8_C( 52), INT8_C( 99),
+ INT8_C( 30), INT8_C( 106), INT8_C( 66), INT8_C( 5)) },
+ { simde_mm256_set_epi8(INT8_C( 122), INT8_C( 42), INT8_C(-121), INT8_C(-106),
+ INT8_C( 122), INT8_C( -8), INT8_C( 81), INT8_C(-109),
+ INT8_C( 124), INT8_C( 32), INT8_C( 63), INT8_C( -21),
+ INT8_C( -51), INT8_C( -42), INT8_C( 1), INT8_C( -78),
+ INT8_C( 74), INT8_C( 8), INT8_C( 25), INT8_C( 10),
+ INT8_C( 113), INT8_C( -75), INT8_C( -32), INT8_C( 126),
+ INT8_C( -87), INT8_C( 67), INT8_C( 78), INT8_C( -64),
+ INT8_C( 7), INT8_C( -40), INT8_C( -46), INT8_C( -59)),
+ simde_mm256_set_epi8(INT8_C( 122), INT8_C( 42), INT8_C( 121), INT8_C( 106),
+ INT8_C( 122), INT8_C( 8), INT8_C( 81), INT8_C( 109),
+ INT8_C( 124), INT8_C( 32), INT8_C( 63), INT8_C( 21),
+ INT8_C( 51), INT8_C( 42), INT8_C( 1), INT8_C( 78),
+ INT8_C( 74), INT8_C( 8), INT8_C( 25), INT8_C( 10),
+ INT8_C( 113), INT8_C( 75), INT8_C( 32), INT8_C( 126),
+ INT8_C( 87), INT8_C( 67), INT8_C( 78), INT8_C( 64),
+ INT8_C( 7), INT8_C( 40), INT8_C( 46), INT8_C( 59)) },
+ { simde_mm256_set_epi8(INT8_C( 10), INT8_C( 120), INT8_C( 81), INT8_C(-105),
+ INT8_C( 73), INT8_C( -95), INT8_C( 79), INT8_C( -86),
+ INT8_C( -93), INT8_C( -54), INT8_C( -43), INT8_C( -88),
+ INT8_C( 59), INT8_C( -27), INT8_C( 12), INT8_C( 10),
+ INT8_C( 73), INT8_C( -48), INT8_C( 112), INT8_C( 27),
+ INT8_C(-113), INT8_C( -31), INT8_C( -56), INT8_C( -96),
+ INT8_C( 48), INT8_C( -94), INT8_C(-111), INT8_C( 60),
+ INT8_C(-116), INT8_C( -77), INT8_C( -70), INT8_C( 17)),
+ simde_mm256_set_epi8(INT8_C( 10), INT8_C( 120), INT8_C( 81), INT8_C( 105),
+ INT8_C( 73), INT8_C( 95), INT8_C( 79), INT8_C( 86),
+ INT8_C( 93), INT8_C( 54), INT8_C( 43), INT8_C( 88),
+ INT8_C( 59), INT8_C( 27), INT8_C( 12), INT8_C( 10),
+ INT8_C( 73), INT8_C( 48), INT8_C( 112), INT8_C( 27),
+ INT8_C( 113), INT8_C( 31), INT8_C( 56), INT8_C( 96),
+ INT8_C( 48), INT8_C( 94), INT8_C( 111), INT8_C( 60),
+ INT8_C( 116), INT8_C( 77), INT8_C( 70), INT8_C( 17)) },
+ { simde_mm256_set_epi8(INT8_C( 61), INT8_C( -57), INT8_C( -99), INT8_C( 0),
+ INT8_C( 98), INT8_C(-121), INT8_C( 67), INT8_C( -20),
+ INT8_C( 44), INT8_C( 53), INT8_C(-128), INT8_C( 44),
+ INT8_C( 127), INT8_C( 53), INT8_C(-127), INT8_C( 58),
+ INT8_C( 35), INT8_C( 83), INT8_C( -56), INT8_C( 22),
+ INT8_C( -4), INT8_C( -6), INT8_C( -7), INT8_C( 121),
+ INT8_C( -22), INT8_C( -32), INT8_C( -52), INT8_C( 124),
+ INT8_C( -93), INT8_C( 55), INT8_C( -23), INT8_C( -62)),
+ simde_mm256_set_epi8(INT8_C( 61), INT8_C( 57), INT8_C( 99), INT8_C( 0),
+ INT8_C( 98), INT8_C( 121), INT8_C( 67), INT8_C( 20),
+ INT8_C( 44), INT8_C( 53), INT8_C(-128), INT8_C( 44),
+ INT8_C( 127), INT8_C( 53), INT8_C( 127), INT8_C( 58),
+ INT8_C( 35), INT8_C( 83), INT8_C( 56), INT8_C( 22),
+ INT8_C( 4), INT8_C( 6), INT8_C( 7), INT8_C( 121),
+ INT8_C( 22), INT8_C( 32), INT8_C( 52), INT8_C( 124),
+ INT8_C( 93), INT8_C( 55), INT8_C( 23), INT8_C( 62)) },
+ { simde_mm256_set_epi8(INT8_C( 71), INT8_C( -58), INT8_C( 24), INT8_C( 117),
+ INT8_C( 2), INT8_C( -31), INT8_C( -86), INT8_C( 101),
+ INT8_C( 3), INT8_C( 63), INT8_C( 2), INT8_C( -30),
+ INT8_C( -33), INT8_C( 51), INT8_C( 60), INT8_C( 81),
+ INT8_C( -91), INT8_C( -73), INT8_C( 66), INT8_C( 67),
+ INT8_C( 72), INT8_C( -7), INT8_C( 44), INT8_C( -32),
+ INT8_C( -80), INT8_C( 101), INT8_C( -98), INT8_C( 89),
+ INT8_C( 89), INT8_C( 94), INT8_C( 109), INT8_C(-109)),
+ simde_mm256_set_epi8(INT8_C( 71), INT8_C( 58), INT8_C( 24), INT8_C( 117),
+ INT8_C( 2), INT8_C( 31), INT8_C( 86), INT8_C( 101),
+ INT8_C( 3), INT8_C( 63), INT8_C( 2), INT8_C( 30),
+ INT8_C( 33), INT8_C( 51), INT8_C( 60), INT8_C( 81),
+ INT8_C( 91), INT8_C( 73), INT8_C( 66), INT8_C( 67),
+ INT8_C( 72), INT8_C( 7), INT8_C( 44), INT8_C( 32),
+ INT8_C( 80), INT8_C( 101), INT8_C( 98), INT8_C( 89),
+ INT8_C( 89), INT8_C( 94), INT8_C( 109), INT8_C( 109)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_abs_epi8(test_vec[i].a);
+ simde_assert_m256i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_abs_epi16(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi16(INT16_C( 9101), INT16_C( 13664), INT16_C( 14007), INT16_C( 17440),
+ INT16_C( 21201), INT16_C(-16892), INT16_C(-22702), INT16_C(-11875),
+ INT16_C( 9352), INT16_C( 21001), INT16_C( 15464), INT16_C( 27994),
+ INT16_C( 12104), INT16_C(-22404), INT16_C(-21433), INT16_C( -4031)),
+ simde_mm256_set_epi16(INT16_C( 9101), INT16_C( 13664), INT16_C( 14007), INT16_C( 17440),
+ INT16_C( 21201), INT16_C( 16892), INT16_C( 22702), INT16_C( 11875),
+ INT16_C( 9352), INT16_C( 21001), INT16_C( 15464), INT16_C( 27994),
+ INT16_C( 12104), INT16_C( 22404), INT16_C( 21433), INT16_C( 4031)) },
+ { simde_mm256_set_epi16(INT16_C( 20289), INT16_C( -8788), INT16_C( 5917), INT16_C(-28916),
+ INT16_C(-21540), INT16_C( -8179), INT16_C(-25392), INT16_C( 2609),
+ INT16_C( 12609), INT16_C(-11367), INT16_C( -70), INT16_C( 28633),
+ INT16_C(-21576), INT16_C(-23753), INT16_C(-11797), INT16_C(-17346)),
+ simde_mm256_set_epi16(INT16_C( 20289), INT16_C( 8788), INT16_C( 5917), INT16_C( 28916),
+ INT16_C( 21540), INT16_C( 8179), INT16_C( 25392), INT16_C( 2609),
+ INT16_C( 12609), INT16_C( 11367), INT16_C( 70), INT16_C( 28633),
+ INT16_C( 21576), INT16_C( 23753), INT16_C( 11797), INT16_C( 17346)) },
+ { simde_mm256_set_epi16(INT16_C( 11563), INT16_C(-31585), INT16_C( 24583), INT16_C( 2918),
+ INT16_C( 5705), INT16_C( 31274), INT16_C(-12388), INT16_C( 31454),
+ INT16_C( 5008), INT16_C( 10123), INT16_C( 28874), INT16_C(-27636),
+ INT16_C( 1380), INT16_C(-32687), INT16_C( 24141), INT16_C( 11570)),
+ simde_mm256_set_epi16(INT16_C( 11563), INT16_C( 31585), INT16_C( 24583), INT16_C( 2918),
+ INT16_C( 5705), INT16_C( 31274), INT16_C( 12388), INT16_C( 31454),
+ INT16_C( 5008), INT16_C( 10123), INT16_C( 28874), INT16_C( 27636),
+ INT16_C( 1380), INT16_C( 32687), INT16_C( 24141), INT16_C( 11570)) },
+ { simde_mm256_set_epi16(INT16_C(-28981), INT16_C(-21254), INT16_C( 12206), INT16_C( 17751),
+ INT16_C( 4887), INT16_C( 27025), INT16_C( 20436), INT16_C( -3143),
+ INT16_C( 5806), INT16_C( 19398), INT16_C( 23890), INT16_C( -1841),
+ INT16_C( -1212), INT16_C( -418), INT16_C( 2804), INT16_C(-24086)),
+ simde_mm256_set_epi16(INT16_C( 28981), INT16_C( 21254), INT16_C( 12206), INT16_C( 17751),
+ INT16_C( 4887), INT16_C( 27025), INT16_C( 20436), INT16_C( 3143),
+ INT16_C( 5806), INT16_C( 19398), INT16_C( 23890), INT16_C( 1841),
+ INT16_C( 1212), INT16_C( 418), INT16_C( 2804), INT16_C( 24086)) },
+ { simde_mm256_set_epi16(INT16_C(-32227), INT16_C( 26559), INT16_C( 32468), INT16_C( 9282),
+ INT16_C( 10212), INT16_C( 7157), INT16_C(-18109), INT16_C(-13716),
+ INT16_C( 3356), INT16_C( -6654), INT16_C( 3548), INT16_C(-31612),
+ INT16_C( -3226), INT16_C(-30156), INT16_C(-15323), INT16_C( 8689)),
+ simde_mm256_set_epi16(INT16_C( 32227), INT16_C( 26559), INT16_C( 32468), INT16_C( 9282),
+ INT16_C( 10212), INT16_C( 7157), INT16_C( 18109), INT16_C( 13716),
+ INT16_C( 3356), INT16_C( 6654), INT16_C( 3548), INT16_C( 31612),
+ INT16_C( 3226), INT16_C( 30156), INT16_C( 15323), INT16_C( 8689)) },
+ { simde_mm256_set_epi16(INT16_C( 14337), INT16_C(-20237), INT16_C( 7001), INT16_C( 29027),
+ INT16_C( -3029), INT16_C( 12894), INT16_C(-24482), INT16_C( -8195),
+ INT16_C( -7637), INT16_C(-26436), INT16_C( 15950), INT16_C( 5319),
+ INT16_C( 22977), INT16_C( -593), INT16_C(-29639), INT16_C( 23312)),
+ simde_mm256_set_epi16(INT16_C( 14337), INT16_C( 20237), INT16_C( 7001), INT16_C( 29027),
+ INT16_C( 3029), INT16_C( 12894), INT16_C( 24482), INT16_C( 8195),
+ INT16_C( 7637), INT16_C( 26436), INT16_C( 15950), INT16_C( 5319),
+ INT16_C( 22977), INT16_C( 593), INT16_C( 29639), INT16_C( 23312)) },
+ { simde_mm256_set_epi16(INT16_C( 4249), INT16_C( -3888), INT16_C( 15630), INT16_C(-11095),
+ INT16_C(-21648), INT16_C(-10947), INT16_C( -1651), INT16_C( 5821),
+ INT16_C( 25032), INT16_C( 26383), INT16_C(-18726), INT16_C(-14746),
+ INT16_C( 9694), INT16_C(-29231), INT16_C( 18526), INT16_C(-12816)),
+ simde_mm256_set_epi16(INT16_C( 4249), INT16_C( 3888), INT16_C( 15630), INT16_C( 11095),
+ INT16_C( 21648), INT16_C( 10947), INT16_C( 1651), INT16_C( 5821),
+ INT16_C( 25032), INT16_C( 26383), INT16_C( 18726), INT16_C( 14746),
+ INT16_C( 9694), INT16_C( 29231), INT16_C( 18526), INT16_C( 12816)) },
+ { simde_mm256_set_epi16(INT16_C( 6410), INT16_C( 4746), INT16_C( 16873), INT16_C(-29607),
+ INT16_C( 21314), INT16_C(-32512), INT16_C(-23052), INT16_C( 20594),
+ INT16_C( -1613), INT16_C( 26993), INT16_C( 28325), INT16_C( 406),
+ INT16_C(-19031), INT16_C( 6060), INT16_C(-29650), INT16_C( 8164)),
+ simde_mm256_set_epi16(INT16_C( 6410), INT16_C( 4746), INT16_C( 16873), INT16_C( 29607),
+ INT16_C( 21314), INT16_C( 32512), INT16_C( 23052), INT16_C( 20594),
+ INT16_C( 1613), INT16_C( 26993), INT16_C( 28325), INT16_C( 406),
+ INT16_C( 19031), INT16_C( 6060), INT16_C( 29650), INT16_C( 8164)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_abs_epi16(test_vec[i].a);
+ simde_assert_m256i_i16(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_abs_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi32(INT32_C( 596456800), INT32_C( 917980192), INT32_C( 1389477380), INT32_C(-1487744611),
+ INT32_C( 612913673), INT32_C( 1013476698), INT32_C( 793290876), INT32_C(-1404571583)),
+ simde_mm256_set_epi32(INT32_C( 596456800), INT32_C( 917980192), INT32_C( 1389477380), INT32_C( 1487744611),
+ INT32_C( 612913673), INT32_C( 1013476698), INT32_C( 793290876), INT32_C( 1404571583)) },
+ { simde_mm256_set_epi32(INT32_C( 1329716652), INT32_C( 387813132), INT32_C(-1411588083), INT32_C(-1664087503),
+ INT32_C( 826397593), INT32_C( -4558887), INT32_C(-1413962953), INT32_C( -773080002)),
+ simde_mm256_set_epi32(INT32_C( 1329716652), INT32_C( 387813132), INT32_C( 1411588083), INT32_C( 1664087503),
+ INT32_C( 826397593), INT32_C( 4558887), INT32_C( 1413962953), INT32_C( 773080002)) },
+ { simde_mm256_set_epi32(INT32_C( 757826719), INT32_C( 1611074406), INT32_C( 373914154), INT32_C( -811828514),
+ INT32_C( 328214411), INT32_C( 1892324364), INT32_C( 90472529), INT32_C( 1582116146)),
+ simde_mm256_set_epi32(INT32_C( 757826719), INT32_C( 1611074406), INT32_C( 373914154), INT32_C( 811828514),
+ INT32_C( 328214411), INT32_C( 1892324364), INT32_C( 90472529), INT32_C( 1582116146)) },
+ { simde_mm256_set_epi32(INT32_C(-1899254534), INT32_C( 799950167), INT32_C( 320301457), INT32_C( 1339356089),
+ INT32_C( 380521414), INT32_C( 1565718735), INT32_C( -79364514), INT32_C( 183804394)),
+ simde_mm256_set_epi32(INT32_C( 1899254534), INT32_C( 799950167), INT32_C( 320301457), INT32_C( 1339356089),
+ INT32_C( 380521414), INT32_C( 1565718735), INT32_C( 79364514), INT32_C( 183804394)) },
+ { simde_mm256_set_epi32(INT32_C(-2112002113), INT32_C( 2127832130), INT32_C( 669260789), INT32_C(-1186739604),
+ INT32_C( 219997698), INT32_C( 232555652), INT32_C( -211383756), INT32_C(-1004199439)),
+ simde_mm256_set_epi32(INT32_C( 2112002113), INT32_C( 2127832130), INT32_C( 669260789), INT32_C( 1186739604),
+ INT32_C( 219997698), INT32_C( 232555652), INT32_C( 211383756), INT32_C( 1004199439)) },
+ { simde_mm256_set_epi32(INT32_C( 939634931), INT32_C( 458846563), INT32_C( -198495650), INT32_C(-1604395011),
+ INT32_C( -500459332), INT32_C( 1045304519), INT32_C( 1505885615), INT32_C(-1942398192)),
+ simde_mm256_set_epi32(INT32_C( 939634931), INT32_C( 458846563), INT32_C( 198495650), INT32_C( 1604395011),
+ INT32_C( 500459332), INT32_C( 1045304519), INT32_C( 1505885615), INT32_C( 1942398192)) },
+ { simde_mm256_set_epi32(INT32_C( 278524112), INT32_C( 1024382121), INT32_C(-1418668739), INT32_C( -108194115),
+ INT32_C( 1640523535), INT32_C(-1227176346), INT32_C( 635342289), INT32_C( 1214172656)),
+ simde_mm256_set_epi32(INT32_C( 278524112), INT32_C( 1024382121), INT32_C( 1418668739), INT32_C( 108194115),
+ INT32_C( 1640523535), INT32_C( 1227176346), INT32_C( 635342289), INT32_C( 1214172656)) },
+ { simde_mm256_set_epi32(INT32_C( 420090506), INT32_C( 1105824857), INT32_C( 1396867328), INT32_C(-1510715278),
+ INT32_C( -105682575), INT32_C( 1856307606), INT32_C(-1247209556), INT32_C(-1943134236)),
+ simde_mm256_set_epi32(INT32_C( 420090506), INT32_C( 1105824857), INT32_C( 1396867328), INT32_C( 1510715278),
+ INT32_C( 105682575), INT32_C( 1856307606), INT32_C( 1247209556), INT32_C( 1943134236)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_abs_epi32(test_vec[i].a);
+ simde_assert_m256i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm256_add_epi8(const MunitParameter params[], void* data) {
(void) params;
@@ -8540,6 +8821,401 @@ test_simde_mm_blend_epi32(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm256_min_epi8(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi8(INT8_C(-113), INT8_C( 124), INT8_C( 15), INT8_C(-126),
+ INT8_C( -14), INT8_C( -39), INT8_C( -2), INT8_C( -33),
+ INT8_C( 77), INT8_C(-107), INT8_C( -95), INT8_C( -10),
+ INT8_C( 24), INT8_C( -89), INT8_C( 80), INT8_C( 69),
+ INT8_C( -99), INT8_C( 54), INT8_C( 5), INT8_C( 103),
+ INT8_C( 71), INT8_C( 11), INT8_C( -41), INT8_C( 122),
+ INT8_C( 57), INT8_C( -91), INT8_C(-106), INT8_C( 27),
+ INT8_C( 57), INT8_C( -8), INT8_C( -10), INT8_C( 38)),
+ simde_mm256_set_epi8(INT8_C( 58), INT8_C(-107), INT8_C( 43), INT8_C(-106),
+ INT8_C(-127), INT8_C( 87), INT8_C( 107), INT8_C( 115),
+ INT8_C( 91), INT8_C( 104), INT8_C( -95), INT8_C( -69),
+ INT8_C( -37), INT8_C( 126), INT8_C( 40), INT8_C( 53),
+ INT8_C(-103), INT8_C( 84), INT8_C( 99), INT8_C( 38),
+ INT8_C( 126), INT8_C( -61), INT8_C( 117), INT8_C( 48),
+ INT8_C( 65), INT8_C( 73), INT8_C( 55), INT8_C( 73),
+ INT8_C( -94), INT8_C( -78), INT8_C( -6), INT8_C( 49)),
+ simde_mm256_set_epi8(INT8_C(-113), INT8_C(-107), INT8_C( 15), INT8_C(-126),
+ INT8_C(-127), INT8_C( -39), INT8_C( -2), INT8_C( -33),
+ INT8_C( 77), INT8_C(-107), INT8_C( -95), INT8_C( -69),
+ INT8_C( -37), INT8_C( -89), INT8_C( 40), INT8_C( 53),
+ INT8_C(-103), INT8_C( 54), INT8_C( 5), INT8_C( 38),
+ INT8_C( 71), INT8_C( -61), INT8_C( -41), INT8_C( 48),
+ INT8_C( 57), INT8_C( -91), INT8_C(-106), INT8_C( 27),
+ INT8_C( -94), INT8_C( -78), INT8_C( -10), INT8_C( 38)) },
+ { simde_mm256_set_epi8(INT8_C( 53), INT8_C( 21), INT8_C( 98), INT8_C( 120),
+ INT8_C(-113), INT8_C( 39), INT8_C( -37), INT8_C( 99),
+ INT8_C(-124), INT8_C( -15), INT8_C( -78), INT8_C( 107),
+ INT8_C( 24), INT8_C( 124), INT8_C( -54), INT8_C( 81),
+ INT8_C( 115), INT8_C( -24), INT8_C( 66), INT8_C( 27),
+ INT8_C( 15), INT8_C( 61), INT8_C( 57), INT8_C(-118),
+ INT8_C(-113), INT8_C( -12), INT8_C( -8), INT8_C( -69),
+ INT8_C( -18), INT8_C( 21), INT8_C( 83), INT8_C( 113)),
+ simde_mm256_set_epi8(INT8_C(-103), INT8_C(-115), INT8_C( -82), INT8_C( 99),
+ INT8_C( 114), INT8_C( 96), INT8_C( 122), INT8_C(-112),
+ INT8_C( 16), INT8_C( 31), INT8_C( 14), INT8_C( -56),
+ INT8_C( -48), INT8_C( 2), INT8_C( -47), INT8_C( 23),
+ INT8_C( 105), INT8_C( -82), INT8_C( 84), INT8_C( 120),
+ INT8_C( 10), INT8_C( 6), INT8_C( -73), INT8_C( -69),
+ INT8_C( -15), INT8_C( -30), INT8_C( -84), INT8_C( 4),
+ INT8_C( 118), INT8_C( 78), INT8_C(-102), INT8_C(-121)),
+ simde_mm256_set_epi8(INT8_C(-103), INT8_C(-115), INT8_C( -82), INT8_C( 99),
+ INT8_C(-113), INT8_C( 39), INT8_C( -37), INT8_C(-112),
+ INT8_C(-124), INT8_C( -15), INT8_C( -78), INT8_C( -56),
+ INT8_C( -48), INT8_C( 2), INT8_C( -54), INT8_C( 23),
+ INT8_C( 105), INT8_C( -82), INT8_C( 66), INT8_C( 27),
+ INT8_C( 10), INT8_C( 6), INT8_C( -73), INT8_C(-118),
+ INT8_C(-113), INT8_C( -30), INT8_C( -84), INT8_C( -69),
+ INT8_C( -18), INT8_C( 21), INT8_C(-102), INT8_C(-121)) },
+ { simde_mm256_set_epi8(INT8_C( 99), INT8_C( 36), INT8_C( -33), INT8_C(-101),
+ INT8_C(-112), INT8_C(-121), INT8_C( 47), INT8_C( -29),
+ INT8_C( 104), INT8_C( 110), INT8_C( -84), INT8_C( 72),
+ INT8_C( -54), INT8_C( -40), INT8_C( 19), INT8_C(-103),
+ INT8_C( -69), INT8_C( -50), INT8_C( 26), INT8_C( -97),
+ INT8_C( -51), INT8_C( -78), INT8_C( 41), INT8_C( -85),
+ INT8_C( 36), INT8_C( 76), INT8_C( 111), INT8_C( 49),
+ INT8_C( 97), INT8_C( -3), INT8_C( 36), INT8_C( 67)),
+ simde_mm256_set_epi8(INT8_C( 66), INT8_C( -8), INT8_C( 56), INT8_C( 7),
+ INT8_C( 58), INT8_C(-107), INT8_C( 110), INT8_C( -95),
+ INT8_C( 31), INT8_C( -56), INT8_C( 42), INT8_C( 8),
+ INT8_C( -46), INT8_C( -16), INT8_C( 21), INT8_C( 126),
+ INT8_C( -12), INT8_C( -94), INT8_C( 109), INT8_C( 67),
+ INT8_C( 47), INT8_C( 103), INT8_C( 21), INT8_C( 104),
+ INT8_C( 99), INT8_C( 106), INT8_C( -91), INT8_C( 87),
+ INT8_C( -83), INT8_C( 2), INT8_C( 105), INT8_C(-105)),
+ simde_mm256_set_epi8(INT8_C( 66), INT8_C( -8), INT8_C( -33), INT8_C(-101),
+ INT8_C(-112), INT8_C(-121), INT8_C( 47), INT8_C( -95),
+ INT8_C( 31), INT8_C( -56), INT8_C( -84), INT8_C( 8),
+ INT8_C( -54), INT8_C( -40), INT8_C( 19), INT8_C(-103),
+ INT8_C( -69), INT8_C( -94), INT8_C( 26), INT8_C( -97),
+ INT8_C( -51), INT8_C( -78), INT8_C( 21), INT8_C( -85),
+ INT8_C( 36), INT8_C( 76), INT8_C( -91), INT8_C( 49),
+ INT8_C( -83), INT8_C( -3), INT8_C( 36), INT8_C(-105)) },
+ { simde_mm256_set_epi8(INT8_C( -79), INT8_C( -25), INT8_C( 15), INT8_C( 2),
+ INT8_C( -84), INT8_C( 9), INT8_C( 54), INT8_C( 99),
+ INT8_C( -69), INT8_C( 117), INT8_C( -20), INT8_C( 47),
+ INT8_C( 21), INT8_C( 44), INT8_C( 113), INT8_C( 59),
+ INT8_C( 9), INT8_C( 65), INT8_C( -7), INT8_C( -76),
+ INT8_C(-124), INT8_C( -64), INT8_C( 22), INT8_C( -79),
+ INT8_C( -58), INT8_C( 29), INT8_C( 40), INT8_C( 64),
+ INT8_C( -9), INT8_C(-123), INT8_C( -13), INT8_C( -32)),
+ simde_mm256_set_epi8(INT8_C(-114), INT8_C( -7), INT8_C( 9), INT8_C( -63),
+ INT8_C( 0), INT8_C( -82), INT8_C( -19), INT8_C( 75),
+ INT8_C( 69), INT8_C( 48), INT8_C( 42), INT8_C( -52),
+ INT8_C( 119), INT8_C( -20), INT8_C( 26), INT8_C( -57),
+ INT8_C( 88), INT8_C( 87), INT8_C( 99), INT8_C(-127),
+ INT8_C( 48), INT8_C( 19), INT8_C( 54), INT8_C( -35),
+ INT8_C( 5), INT8_C( 23), INT8_C( -21), INT8_C( 88),
+ INT8_C( 87), INT8_C(-109), INT8_C( -46), INT8_C(-127)),
+ simde_mm256_set_epi8(INT8_C(-114), INT8_C( -25), INT8_C( 9), INT8_C( -63),
+ INT8_C( -84), INT8_C( -82), INT8_C( -19), INT8_C( 75),
+ INT8_C( -69), INT8_C( 48), INT8_C( -20), INT8_C( -52),
+ INT8_C( 21), INT8_C( -20), INT8_C( 26), INT8_C( -57),
+ INT8_C( 9), INT8_C( 65), INT8_C( -7), INT8_C(-127),
+ INT8_C(-124), INT8_C( -64), INT8_C( 22), INT8_C( -79),
+ INT8_C( -58), INT8_C( 23), INT8_C( -21), INT8_C( 64),
+ INT8_C( -9), INT8_C(-123), INT8_C( -46), INT8_C(-127)) },
+ { simde_mm256_set_epi8(INT8_C( 40), INT8_C(-101), INT8_C(-116), INT8_C( -63),
+ INT8_C( -76), INT8_C( 83), INT8_C( -41), INT8_C( -2),
+ INT8_C( 74), INT8_C( 48), INT8_C(-104), INT8_C( -61),
+ INT8_C( 95), INT8_C( 50), INT8_C( 86), INT8_C(-110),
+ INT8_C( -24), INT8_C( 99), INT8_C( -15), INT8_C( 126),
+ INT8_C( -73), INT8_C( -17), INT8_C(-122), INT8_C( -7),
+ INT8_C( 95), INT8_C( -91), INT8_C( 66), INT8_C( -2),
+ INT8_C( 88), INT8_C( -54), INT8_C( 117), INT8_C( -86)),
+ simde_mm256_set_epi8(INT8_C( -45), INT8_C( -41), INT8_C( 87), INT8_C( -75),
+ INT8_C( 104), INT8_C( -1), INT8_C( 11), INT8_C( 125),
+ INT8_C(-123), INT8_C( 86), INT8_C( 26), INT8_C( 46),
+ INT8_C( 6), INT8_C( 54), INT8_C( -77), INT8_C( 63),
+ INT8_C( 79), INT8_C(-127), INT8_C(-118), INT8_C( 107),
+ INT8_C(-122), INT8_C( -36), INT8_C( -2), INT8_C( -64),
+ INT8_C( 65), INT8_C( -89), INT8_C( 86), INT8_C( 99),
+ INT8_C( -49), INT8_C( 59), INT8_C( -15), INT8_C( 103)),
+ simde_mm256_set_epi8(INT8_C( -45), INT8_C(-101), INT8_C(-116), INT8_C( -75),
+ INT8_C( -76), INT8_C( -1), INT8_C( -41), INT8_C( -2),
+ INT8_C(-123), INT8_C( 48), INT8_C(-104), INT8_C( -61),
+ INT8_C( 6), INT8_C( 50), INT8_C( -77), INT8_C(-110),
+ INT8_C( -24), INT8_C(-127), INT8_C(-118), INT8_C( 107),
+ INT8_C(-122), INT8_C( -36), INT8_C(-122), INT8_C( -64),
+ INT8_C( 65), INT8_C( -91), INT8_C( 66), INT8_C( -2),
+ INT8_C( -49), INT8_C( -54), INT8_C( -15), INT8_C( -86)) },
+ { simde_mm256_set_epi8(INT8_C( -6), INT8_C( 22), INT8_C( -98), INT8_C(-111),
+ INT8_C( -27), INT8_C( -45), INT8_C( -60), INT8_C( -64),
+ INT8_C( -14), INT8_C( -15), INT8_C( -20), INT8_C( 52),
+ INT8_C(-110), INT8_C( 52), INT8_C( 120), INT8_C( 119),
+ INT8_C( -35), INT8_C( 3), INT8_C( 42), INT8_C( 109),
+ INT8_C( 16), INT8_C( -18), INT8_C( -49), INT8_C( 102),
+ INT8_C( 87), INT8_C( -69), INT8_C( 10), INT8_C( 0),
+ INT8_C( 1), INT8_C( 21), INT8_C( 6), INT8_C( -56)),
+ simde_mm256_set_epi8(INT8_C( -60), INT8_C( 58), INT8_C( 68), INT8_C(-110),
+ INT8_C( 25), INT8_C( 53), INT8_C(-112), INT8_C( 1),
+ INT8_C( 104), INT8_C( 22), INT8_C( -20), INT8_C(-119),
+ INT8_C( -66), INT8_C( -43), INT8_C( 109), INT8_C( -61),
+ INT8_C( -86), INT8_C( -59), INT8_C( -31), INT8_C( -86),
+ INT8_C( -11), INT8_C( 72), INT8_C( 122), INT8_C(-127),
+ INT8_C( 21), INT8_C( 32), INT8_C( 109), INT8_C( 110),
+ INT8_C( 112), INT8_C( -23), INT8_C( 44), INT8_C( 69)),
+ simde_mm256_set_epi8(INT8_C( -60), INT8_C( 22), INT8_C( -98), INT8_C(-111),
+ INT8_C( -27), INT8_C( -45), INT8_C(-112), INT8_C( -64),
+ INT8_C( -14), INT8_C( -15), INT8_C( -20), INT8_C(-119),
+ INT8_C(-110), INT8_C( -43), INT8_C( 109), INT8_C( -61),
+ INT8_C( -86), INT8_C( -59), INT8_C( -31), INT8_C( -86),
+ INT8_C( -11), INT8_C( -18), INT8_C( -49), INT8_C(-127),
+ INT8_C( 21), INT8_C( -69), INT8_C( 10), INT8_C( 0),
+ INT8_C( 1), INT8_C( -23), INT8_C( 6), INT8_C( -56)) },
+ { simde_mm256_set_epi8(INT8_C( -81), INT8_C( 40), INT8_C( 34), INT8_C(-101),
+ INT8_C( 85), INT8_C( 102), INT8_C( -21), INT8_C( 6),
+ INT8_C( 23), INT8_C( -23), INT8_C( -36), INT8_C( 44),
+ INT8_C( -12), INT8_C( 110), INT8_C( -68), INT8_C( 47),
+ INT8_C( 45), INT8_C( -59), INT8_C(-112), INT8_C( 86),
+ INT8_C( -95), INT8_C( -9), INT8_C(-101), INT8_C( 83),
+ INT8_C( 37), INT8_C( 85), INT8_C( 31), INT8_C( -4),
+ INT8_C( 98), INT8_C( -87), INT8_C( -65), INT8_C( -71)),
+ simde_mm256_set_epi8(INT8_C( -58), INT8_C(-110), INT8_C( 4), INT8_C( 10),
+ INT8_C( -39), INT8_C( -8), INT8_C( 126), INT8_C( 76),
+ INT8_C( 85), INT8_C( -11), INT8_C(-106), INT8_C( 42),
+ INT8_C( 125), INT8_C( 46), INT8_C( 63), INT8_C( 35),
+ INT8_C( -24), INT8_C( -99), INT8_C( 71), INT8_C(-108),
+ INT8_C( 52), INT8_C(-105), INT8_C( -88), INT8_C( -28),
+ INT8_C( 12), INT8_C( -41), INT8_C( 81), INT8_C( 38),
+ INT8_C( -13), INT8_C( 29), INT8_C( -44), INT8_C( 59)),
+ simde_mm256_set_epi8(INT8_C( -81), INT8_C(-110), INT8_C( 4), INT8_C(-101),
+ INT8_C( -39), INT8_C( -8), INT8_C( -21), INT8_C( 6),
+ INT8_C( 23), INT8_C( -23), INT8_C(-106), INT8_C( 42),
+ INT8_C( -12), INT8_C( 46), INT8_C( -68), INT8_C( 35),
+ INT8_C( -24), INT8_C( -99), INT8_C(-112), INT8_C(-108),
+ INT8_C( -95), INT8_C(-105), INT8_C(-101), INT8_C( -28),
+ INT8_C( 12), INT8_C( -41), INT8_C( 31), INT8_C( -4),
+ INT8_C( -13), INT8_C( -87), INT8_C( -65), INT8_C( -71)) },
+ { simde_mm256_set_epi8(INT8_C( -36), INT8_C( -94), INT8_C( -61), INT8_C( 117),
+ INT8_C( 0), INT8_C( -1), INT8_C( 118), INT8_C( 99),
+ INT8_C( -54), INT8_C(-123), INT8_C( 18), INT8_C( 111),
+ INT8_C( 8), INT8_C( -59), INT8_C( 107), INT8_C( 77),
+ INT8_C( 7), INT8_C( 91), INT8_C( 25), INT8_C( 70),
+ INT8_C( 80), INT8_C( 81), INT8_C( 11), INT8_C( 29),
+ INT8_C( 109), INT8_C( 125), INT8_C( -51), INT8_C( 58),
+ INT8_C( 66), INT8_C( 126), INT8_C(-122), INT8_C( 96)),
+ simde_mm256_set_epi8(INT8_C( -91), INT8_C( -28), INT8_C( 59), INT8_C( 45),
+ INT8_C( 99), INT8_C( 67), INT8_C( -54), INT8_C( 122),
+ INT8_C( -4), INT8_C( -6), INT8_C( -77), INT8_C( 96),
+ INT8_C( 16), INT8_C( 37), INT8_C(-127), INT8_C( 76),
+ INT8_C( 56), INT8_C( -93), INT8_C( 7), INT8_C( -50),
+ INT8_C( 56), INT8_C(-112), INT8_C( 6), INT8_C( -49),
+ INT8_C( 107), INT8_C( -15), INT8_C( -42), INT8_C(-121),
+ INT8_C( -81), INT8_C( 97), INT8_C( -56), INT8_C( 97)),
+ simde_mm256_set_epi8(INT8_C( -91), INT8_C( -94), INT8_C( -61), INT8_C( 45),
+ INT8_C( 0), INT8_C( -1), INT8_C( -54), INT8_C( 99),
+ INT8_C( -54), INT8_C(-123), INT8_C( -77), INT8_C( 96),
+ INT8_C( 8), INT8_C( -59), INT8_C(-127), INT8_C( 76),
+ INT8_C( 7), INT8_C( -93), INT8_C( 7), INT8_C( -50),
+ INT8_C( 56), INT8_C(-112), INT8_C( 6), INT8_C( -49),
+ INT8_C( 107), INT8_C( -15), INT8_C( -51), INT8_C(-121),
+ INT8_C( -81), INT8_C( 97), INT8_C(-122), INT8_C( 96)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_min_epi8(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_i8(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_min_epi16(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi16(INT16_C(-28804), INT16_C( 3970), INT16_C( -3367), INT16_C( -289),
+ INT16_C( 19861), INT16_C(-24074), INT16_C( 6311), INT16_C( 20549),
+ INT16_C(-25290), INT16_C( 1383), INT16_C( 18187), INT16_C(-10374),
+ INT16_C( 14757), INT16_C(-27109), INT16_C( 14840), INT16_C( -2522)),
+ simde_mm256_set_epi16(INT16_C( 14997), INT16_C( 11158), INT16_C(-32425), INT16_C( 27507),
+ INT16_C( 23400), INT16_C(-24133), INT16_C( -9346), INT16_C( 10293),
+ INT16_C(-26284), INT16_C( 25382), INT16_C( 32451), INT16_C( 30000),
+ INT16_C( 16713), INT16_C( 14153), INT16_C(-23886), INT16_C( -1487)),
+ simde_mm256_set_epi16(INT16_C(-28804), INT16_C( 3970), INT16_C(-32425), INT16_C( -289),
+ INT16_C( 19861), INT16_C(-24133), INT16_C( -9346), INT16_C( 10293),
+ INT16_C(-26284), INT16_C( 1383), INT16_C( 18187), INT16_C(-10374),
+ INT16_C( 14757), INT16_C(-27109), INT16_C(-23886), INT16_C( -2522)) },
+ { simde_mm256_set_epi16(INT16_C( 13589), INT16_C( 25208), INT16_C(-28889), INT16_C( -9373),
+ INT16_C(-31503), INT16_C(-19861), INT16_C( 6268), INT16_C(-13743),
+ INT16_C( 29672), INT16_C( 16923), INT16_C( 3901), INT16_C( 14730),
+ INT16_C(-28684), INT16_C( -1861), INT16_C( -4587), INT16_C( 21361)),
+ simde_mm256_set_epi16(INT16_C(-26227), INT16_C(-20893), INT16_C( 29280), INT16_C( 31376),
+ INT16_C( 4127), INT16_C( 3784), INT16_C(-12286), INT16_C(-12009),
+ INT16_C( 27054), INT16_C( 21624), INT16_C( 2566), INT16_C(-18501),
+ INT16_C( -3614), INT16_C(-21500), INT16_C( 30286), INT16_C(-25977)),
+ simde_mm256_set_epi16(INT16_C(-26227), INT16_C(-20893), INT16_C(-28889), INT16_C( -9373),
+ INT16_C(-31503), INT16_C(-19861), INT16_C(-12286), INT16_C(-13743),
+ INT16_C( 27054), INT16_C( 16923), INT16_C( 2566), INT16_C(-18501),
+ INT16_C(-28684), INT16_C(-21500), INT16_C( -4587), INT16_C(-25977)) },
+ { simde_mm256_set_epi16(INT16_C( 25380), INT16_C( -8293), INT16_C(-28537), INT16_C( 12259),
+ INT16_C( 26734), INT16_C(-21432), INT16_C(-13608), INT16_C( 5017),
+ INT16_C(-17458), INT16_C( 6815), INT16_C(-12878), INT16_C( 10667),
+ INT16_C( 9292), INT16_C( 28465), INT16_C( 25085), INT16_C( 9283)),
+ simde_mm256_set_epi16(INT16_C( 17144), INT16_C( 14343), INT16_C( 14997), INT16_C( 28321),
+ INT16_C( 8136), INT16_C( 10760), INT16_C(-11536), INT16_C( 5502),
+ INT16_C( -2910), INT16_C( 27971), INT16_C( 12135), INT16_C( 5480),
+ INT16_C( 25450), INT16_C(-23209), INT16_C(-21246), INT16_C( 27031)),
+ simde_mm256_set_epi16(INT16_C( 17144), INT16_C( -8293), INT16_C(-28537), INT16_C( 12259),
+ INT16_C( 8136), INT16_C(-21432), INT16_C(-13608), INT16_C( 5017),
+ INT16_C(-17458), INT16_C( 6815), INT16_C(-12878), INT16_C( 5480),
+ INT16_C( 9292), INT16_C(-23209), INT16_C(-21246), INT16_C( 9283)) },
+ { simde_mm256_set_epi16(INT16_C(-19993), INT16_C( 3842), INT16_C(-21495), INT16_C( 13923),
+ INT16_C(-17547), INT16_C( -5073), INT16_C( 5420), INT16_C( 28987),
+ INT16_C( 2369), INT16_C( -1612), INT16_C(-31552), INT16_C( 5809),
+ INT16_C(-14819), INT16_C( 10304), INT16_C( -2171), INT16_C( -3104)),
+ simde_mm256_set_epi16(INT16_C(-28935), INT16_C( 2497), INT16_C( 174), INT16_C( -4789),
+ INT16_C( 17712), INT16_C( 10956), INT16_C( 30700), INT16_C( 6855),
+ INT16_C( 22615), INT16_C( 25473), INT16_C( 12307), INT16_C( 14045),
+ INT16_C( 1303), INT16_C( -5288), INT16_C( 22419), INT16_C(-11647)),
+ simde_mm256_set_epi16(INT16_C(-28935), INT16_C( 2497), INT16_C(-21495), INT16_C( -4789),
+ INT16_C(-17547), INT16_C( -5073), INT16_C( 5420), INT16_C( 6855),
+ INT16_C( 2369), INT16_C( -1612), INT16_C(-31552), INT16_C( 5809),
+ INT16_C(-14819), INT16_C( -5288), INT16_C( -2171), INT16_C(-11647)) },
+ { simde_mm256_set_epi16(INT16_C( 10395), INT16_C(-29503), INT16_C(-19373), INT16_C(-10242),
+ INT16_C( 18992), INT16_C(-26429), INT16_C( 24370), INT16_C( 22162),
+ INT16_C( -6045), INT16_C( -3714), INT16_C(-18449), INT16_C(-30983),
+ INT16_C( 24485), INT16_C( 17150), INT16_C( 22730), INT16_C( 30122)),
+ simde_mm256_set_epi16(INT16_C(-11305), INT16_C( 22453), INT16_C( 26879), INT16_C( 2941),
+ INT16_C(-31402), INT16_C( 6702), INT16_C( 1590), INT16_C(-19649),
+ INT16_C( 20353), INT16_C(-30101), INT16_C(-31012), INT16_C( -320),
+ INT16_C( 16807), INT16_C( 22115), INT16_C(-12485), INT16_C( -3737)),
+ simde_mm256_set_epi16(INT16_C(-11305), INT16_C(-29503), INT16_C(-19373), INT16_C(-10242),
+ INT16_C(-31402), INT16_C(-26429), INT16_C( 1590), INT16_C(-19649),
+ INT16_C( -6045), INT16_C(-30101), INT16_C(-31012), INT16_C(-30983),
+ INT16_C( 16807), INT16_C( 17150), INT16_C(-12485), INT16_C( -3737)) },
+ { simde_mm256_set_epi16(INT16_C( -1514), INT16_C(-24943), INT16_C( -6701), INT16_C(-15168),
+ INT16_C( -3343), INT16_C( -5068), INT16_C(-28108), INT16_C( 30839),
+ INT16_C( -8957), INT16_C( 10861), INT16_C( 4334), INT16_C(-12442),
+ INT16_C( 22459), INT16_C( 2560), INT16_C( 277), INT16_C( 1736)),
+ simde_mm256_set_epi16(INT16_C(-15302), INT16_C( 17554), INT16_C( 6453), INT16_C(-28671),
+ INT16_C( 26646), INT16_C( -4983), INT16_C(-16683), INT16_C( 28099),
+ INT16_C(-21819), INT16_C( -7766), INT16_C( -2744), INT16_C( 31361),
+ INT16_C( 5408), INT16_C( 28014), INT16_C( 28905), INT16_C( 11333)),
+ simde_mm256_set_epi16(INT16_C(-15302), INT16_C(-24943), INT16_C( -6701), INT16_C(-28671),
+ INT16_C( -3343), INT16_C( -5068), INT16_C(-28108), INT16_C( 28099),
+ INT16_C(-21819), INT16_C( -7766), INT16_C( -2744), INT16_C(-12442),
+ INT16_C( 5408), INT16_C( 2560), INT16_C( 277), INT16_C( 1736)) },
+ { simde_mm256_set_epi16(INT16_C(-20696), INT16_C( 8859), INT16_C( 21862), INT16_C( -5370),
+ INT16_C( 6121), INT16_C( -9172), INT16_C( -2962), INT16_C(-17361),
+ INT16_C( 11717), INT16_C(-28586), INT16_C(-24073), INT16_C(-25773),
+ INT16_C( 9557), INT16_C( 8188), INT16_C( 25257), INT16_C(-16455)),
+ simde_mm256_set_epi16(INT16_C(-14702), INT16_C( 1034), INT16_C( -9736), INT16_C( 32332),
+ INT16_C( 22005), INT16_C(-27094), INT16_C( 32046), INT16_C( 16163),
+ INT16_C( -5987), INT16_C( 18324), INT16_C( 13463), INT16_C(-22300),
+ INT16_C( 3287), INT16_C( 20774), INT16_C( -3299), INT16_C(-11205)),
+ simde_mm256_set_epi16(INT16_C(-20696), INT16_C( 1034), INT16_C( -9736), INT16_C( -5370),
+ INT16_C( 6121), INT16_C(-27094), INT16_C( -2962), INT16_C(-17361),
+ INT16_C( -5987), INT16_C(-28586), INT16_C(-24073), INT16_C(-25773),
+ INT16_C( 3287), INT16_C( 8188), INT16_C( -3299), INT16_C(-16455)) },
+ { simde_mm256_set_epi16(INT16_C( -9054), INT16_C(-15499), INT16_C( 255), INT16_C( 30307),
+ INT16_C(-13691), INT16_C( 4719), INT16_C( 2245), INT16_C( 27469),
+ INT16_C( 1883), INT16_C( 6470), INT16_C( 20561), INT16_C( 2845),
+ INT16_C( 28029), INT16_C(-12998), INT16_C( 17022), INT16_C(-31136)),
+ simde_mm256_set_epi16(INT16_C(-23068), INT16_C( 15149), INT16_C( 25411), INT16_C(-13702),
+ INT16_C( -774), INT16_C(-19616), INT16_C( 4133), INT16_C(-32436),
+ INT16_C( 14499), INT16_C( 1998), INT16_C( 14480), INT16_C( 1743),
+ INT16_C( 27633), INT16_C(-10617), INT16_C(-20639), INT16_C(-14239)),
+ simde_mm256_set_epi16(INT16_C(-23068), INT16_C(-15499), INT16_C( 255), INT16_C(-13702),
+ INT16_C(-13691), INT16_C(-19616), INT16_C( 2245), INT16_C(-32436),
+ INT16_C( 1883), INT16_C( 1998), INT16_C( 14480), INT16_C( 1743),
+ INT16_C( 27633), INT16_C(-12998), INT16_C(-20639), INT16_C(-31136)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_min_epi16(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_i16(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_min_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m256i a;
+ simde__m256i b;
+ simde__m256i r;
+ } test_vec[8] = {
+ { simde_mm256_set_epi32(INT32_C(-1887694974), INT32_C( -220594465), INT32_C( 1301651958), INT32_C( 413618245),
+ INT32_C(-1657404057), INT32_C( 1191958394), INT32_C( 967153179), INT32_C( 972617254)),
+ simde_mm256_set_epi32(INT32_C( 982854550), INT32_C(-2124977293), INT32_C( 1533583803), INT32_C( -612489163),
+ INT32_C(-1722522842), INT32_C( 2126738736), INT32_C( 1095317321), INT32_C(-1565328847)),
+ simde_mm256_set_epi32(INT32_C(-1887694974), INT32_C(-2124977293), INT32_C( 1301651958), INT32_C( -612489163),
+ INT32_C(-1722522842), INT32_C( 1191958394), INT32_C( 967153179), INT32_C(-1565328847)) },
+ { simde_mm256_set_epi32(INT32_C( 890593912), INT32_C(-1893213341), INT32_C(-2064534933), INT32_C( 410831441),
+ INT32_C( 1944601115), INT32_C( 255670666), INT32_C(-1879770949), INT32_C( -300592271)),
+ simde_mm256_set_epi32(INT32_C(-1718768029), INT32_C( 1918925456), INT32_C( 270470856), INT32_C( -805121769),
+ INT32_C( 1773032568), INT32_C( 168212411), INT32_C( -236803068), INT32_C( 1984862855)),
+ simde_mm256_set_epi32(INT32_C(-1718768029), INT32_C(-1893213341), INT32_C(-2064534933), INT32_C( -805121769),
+ INT32_C( 1773032568), INT32_C( 168212411), INT32_C(-1879770949), INT32_C( -300592271)) },
+ { simde_mm256_set_epi32(INT32_C( 1663360923), INT32_C(-1870188573), INT32_C( 1752083528), INT32_C( -891808871),
+ INT32_C(-1144120673), INT32_C( -843961941), INT32_C( 608988977), INT32_C( 1643979843)),
+ simde_mm256_set_epi32(INT32_C( 1123563527), INT32_C( 982871713), INT32_C( 533211656), INT32_C( -756017794),
+ INT32_C( -190681789), INT32_C( 795284840), INT32_C( 1667933527), INT32_C(-1392350825)),
+ simde_mm256_set_epi32(INT32_C( 1123563527), INT32_C(-1870188573), INT32_C( 533211656), INT32_C( -891808871),
+ INT32_C(-1144120673), INT32_C( -843961941), INT32_C( 608988977), INT32_C(-1392350825)) },
+ { simde_mm256_set_epi32(INT32_C(-1310257406), INT32_C(-1408682397), INT32_C(-1149899729), INT32_C( 355234107),
+ INT32_C( 155318708), INT32_C(-2067786063), INT32_C( -971167680), INT32_C( -142216224)),
+ simde_mm256_set_epi32(INT32_C(-1896281663), INT32_C( 11464011), INT32_C( 1160784588), INT32_C( 2011962055),
+ INT32_C( 1482122113), INT32_C( 806565597), INT32_C( 85453656), INT32_C( 1469305473)),
+ simde_mm256_set_epi32(INT32_C(-1896281663), INT32_C(-1408682397), INT32_C(-1149899729), INT32_C( 355234107),
+ INT32_C( 155318708), INT32_C(-2067786063), INT32_C( -971167680), INT32_C( -142216224)) },
+ { simde_mm256_set_epi32(INT32_C( 681282753), INT32_C(-1269573634), INT32_C( 1244698819), INT32_C( 1597134482),
+ INT32_C( -396103298), INT32_C(-1209039111), INT32_C( 1604666110), INT32_C( 1489663402)),
+ simde_mm256_set_epi32(INT32_C( -740862027), INT32_C( 1761545085), INT32_C(-2057954770), INT32_C( 104248127),
+ INT32_C( 1333889643), INT32_C(-2032337216), INT32_C( 1101485667), INT32_C( -818155161)),
+ simde_mm256_set_epi32(INT32_C( -740862027), INT32_C(-1269573634), INT32_C(-2057954770), INT32_C( 104248127),
+ INT32_C( -396103298), INT32_C(-2032337216), INT32_C( 1101485667), INT32_C( -818155161)) },
+ { simde_mm256_set_epi32(INT32_C( -99180911), INT32_C( -439106368), INT32_C( -219026380), INT32_C(-1842055049),
+ INT32_C( -586995091), INT32_C( 284086118), INT32_C( 1471875584), INT32_C( 18155208)),
+ simde_mm256_set_epi32(INT32_C(-1002814318), INT32_C( 422940673), INT32_C( 1746332809), INT32_C(-1093308989),
+ INT32_C(-1429872214), INT32_C( -179799423), INT32_C( 354446702), INT32_C( 1894329413)),
+ simde_mm256_set_epi32(INT32_C(-1002814318), INT32_C( -439106368), INT32_C( -219026380), INT32_C(-1842055049),
+ INT32_C(-1429872214), INT32_C( -179799423), INT32_C( 354446702), INT32_C( 18155208)) },
+ { simde_mm256_set_epi32(INT32_C(-1356324197), INT32_C( 1432808198), INT32_C( 401202220), INT32_C( -194069457),
+ INT32_C( 767922262), INT32_C(-1577608365), INT32_C( 626335740), INT32_C( 1655291833)),
+ simde_mm256_set_epi32(INT32_C( -963509238), INT32_C( -638026164), INT32_C( 1442158122), INT32_C( 2100182819),
+ INT32_C( -392345708), INT32_C( 882354404), INT32_C( 215437606), INT32_C( -216148933)),
+ simde_mm256_set_epi32(INT32_C(-1356324197), INT32_C( -638026164), INT32_C( 401202220), INT32_C( -194069457),
+ INT32_C( -392345708), INT32_C(-1577608365), INT32_C( 215437606), INT32_C( -216148933)) },
+ { simde_mm256_set_epi32(INT32_C( -593312907), INT32_C( 16741987), INT32_C( -897248657), INT32_C( 147155789),
+ INT32_C( 123410758), INT32_C( 1347488541), INT32_C( 1836961082), INT32_C( 1115588192)),
+ simde_mm256_set_epi32(INT32_C(-1511769299), INT32_C( 1665387130), INT32_C( -50678944), INT32_C( 270893388),
+ INT32_C( 950208462), INT32_C( 948963023), INT32_C( 1811011207), INT32_C(-1352546207)),
+ simde_mm256_set_epi32(INT32_C(-1511769299), INT32_C( 16741987), INT32_C( -897248657), INT32_C( 147155789),
+ INT32_C( 123410758), INT32_C( 948963023), INT32_C( 1811011207), INT32_C(-1352546207)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m256i r = simde_mm256_min_epi32(test_vec[i].a, test_vec[i].b);
+ simde_assert_m256i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
#endif /* defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
@@ -8548,6 +9224,10 @@ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
static MunitTest test_suite_tests[] = {
#if defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+ SIMDE_TESTS_DEFINE_TEST(mm256_abs_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm256_abs_epi16),
+ SIMDE_TESTS_DEFINE_TEST(mm256_abs_epi32),
+
SIMDE_TESTS_DEFINE_TEST(mm256_add_epi8),
SIMDE_TESTS_DEFINE_TEST(mm256_add_epi16),
SIMDE_TESTS_DEFINE_TEST(mm256_add_epi32),
@@ -8610,6 +9290,9 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm256_max_epu16),
SIMDE_TESTS_DEFINE_TEST(mm256_max_epu32),
+ SIMDE_TESTS_DEFINE_TEST(mm256_min_epi8),
+ SIMDE_TESTS_DEFINE_TEST(mm256_min_epi16),
+ SIMDE_TESTS_DEFINE_TEST(mm256_min_epi32),
SIMDE_TESTS_DEFINE_TEST(mm256_min_epu8),
SIMDE_TESTS_DEFINE_TEST(mm256_min_epu16),
SIMDE_TESTS_DEFINE_TEST(mm256_min_epu32),
=====================================
test/x86/avx512f.c
=====================================
The diff for this file was not included because it is too large.
=====================================
test/x86/skel.c
=====================================
@@ -2165,8 +2165,8 @@ test_simde_mm512_xxx_epu32(const MunitParameter params[], void* data) {
" UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
" UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
" UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
- a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
- a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
+ a.u32[15], a.u32[14], a.u32[13], a.u32[12], a.u32[11], a.u32[10], a.u32[ 9], a.u32[ 8],
+ a.u32[ 7], a.u32[ 6], a.u32[ 5], a.u32[ 4], a.u32[ 3], a.u32[ 2], a.u32[ 1], a.u32[ 0]);
printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
" UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
" UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
@@ -2241,6 +2241,70 @@ test_simde_mm512_xxx_epu64(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_mask_xxx_epu32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i src;
+ simde__mmask16 k;
+ simde__m512i a;
+ simde__m512i b;
+ simde__m512i r;
+ } test_vec[8] = {
+
+ };
+
+ printf("\n");
+ for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
+ simde__m512i_private src, a, b, r;
+ simde__mmask16 k;
+
+ munit_rand_memory(sizeof(src), (uint8_t*) &src);
+ munit_rand_memory(sizeof(k), (uint8_t*) &k);
+ munit_rand_memory(sizeof(a), (uint8_t*) &a);
+ munit_rand_memory(sizeof(a), (uint8_t*) &b);
+ k &= UINT16_C(0xffff);
+
+ r = simde__m512i_to_private(simde_mm512_mask_xxx_epu32(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
+
+ printf(" { simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
+ src.u32[15], src.u32[14], src.u32[13], src.u32[12], src.u32[11], src.u32[10], src.u32[ 9], src.u32[ 8],
+ src.u32[ 7], src.u32[ 6], src.u32[ 5], src.u32[ 4], src.u32[ 3], src.u32[ 2], src.u32[ 1], src.u32[ 0]);
+ printf(" UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
+ a.u32[15], a.u32[14], a.u32[13], a.u32[12], a.u32[11], a.u32[10], a.u32[ 9], a.u32[ 8],
+ a.u32[ 7], a.u32[ 6], a.u32[ 5], a.u32[ 4], a.u32[ 3], a.u32[ 2], a.u32[ 1], a.u32[ 0]);
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
+ b.u32[15], b.u32[14], b.u32[13], b.u32[12], b.u32[11], b.u32[10], b.u32[ 9], b.u32[ 8],
+ b.u32[ 7], b.u32[ 6], b.u32[ 5], b.u32[ 4], b.u32[ 3], b.u32[ 2], b.u32[ 1], b.u32[ 0]);
+ printf(" simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+ " UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
+ r.u32[15], r.u32[14], r.u32[13], r.u32[12], r.u32[11], r.u32[10], r.u32[ 9], r.u32[ 8],
+ r.u32[ 7], r.u32[ 6], r.u32[ 5], r.u32[ 4], r.u32[ 3], r.u32[ 2], r.u32[ 1], r.u32[ 0]);
+ }
+ return MUNIT_FAIL;
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_xxx_epu32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512i_u32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm512_xxx_ps(const MunitParameter params[], void* data) {
(void) params;
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/683bff07021df346a67704b68711fce76d9fa442
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/683bff07021df346a67704b68711fce76d9fa442
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200413/bd660252/attachment-0001.html>
More information about the debian-med-commit
mailing list