[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200412

Michael R. Crusoe gitlab at salsa.debian.org
Mon Apr 13 09:36:30 BST 2020



Michael R. Crusoe pushed to branch upstream at Debian Med / simde


Commits:
683bff07 by Michael R. Crusoe at 2020-04-13T09:34:54+02:00
New upstream version 0.0.0.git.20200412
- - - - -


9 changed files:

- + .drone.star
- − .drone.yml
- README.md
- simde/x86/avx2.h
- simde/x86/avx512f.h
- simde/x86/sse.h
- test/x86/avx2.c
- test/x86/avx512f.c
- test/x86/skel.c


Changes:

=====================================
.drone.star
=====================================
@@ -0,0 +1,318 @@
+# -*- Python -*-
+# Drone CI Starlark configuration file.
+# https://docs.drone.io/pipeline/scripting/starlark/
+# Run `drone starlark convert --stdout` to verify `.drone.star`.
+def get_default_job():
+  return {
+    "kind": "pipeline",
+    "type": "docker",
+    "name": "",
+    "platform": {
+      "os": "linux",
+    },
+    "steps": [
+      {
+        "name": "test",
+        "image": "ubuntu:bionic",
+        "environment": {},
+        "commands": [
+        ]
+      }
+    ],
+    "custom": {
+      "before_install": [
+        "uname -m",
+        "cat /proc/cpuinfo",
+      ],
+      "before_script": [
+        "git submodule --quiet update --init --recursive",
+      ],
+      "script": [
+        "mkdir -p build",
+        "cd build",
+        'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
+        "ninja -v",
+        "./test/run-tests",
+      ]
+    }
+  }
+
+def get_jobs():
+  job_clang9_armv7 = {
+    "name": "clang-9 armv7",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "clang-9",
+          "CXX": "clang++-9",
+          "ARCH_FLAGS": "-march=armv7a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install clang-9 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_clang9_armv8 = {
+    "name": "clang-9 armv8",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "clang-9",
+          "CXX": "clang++-9",
+          "ARCH_FLAGS": "-march=armv8a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install clang-9 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_gcc8_armv7 = {
+    "name": "gcc-8 armv7",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "gcc-8",
+          "CXX": "g++-8",
+          "ARCH_FLAGS": "-march=armv7-a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_gcc8_armv8 = {
+    "name": "gcc-8 armv8",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "gcc-8",
+          "CXX": "g++-8",
+          "ARCH_FLAGS": "-march=armv8-a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_clang7_armv7 = {
+    "name": "clang-7 armv7",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "clang-7",
+          "CXX": "clang++-7",
+          "ARCH_FLAGS": "-march=armv7a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install clang-7 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_clang7_armv8 = {
+    "name": "clang-7 armv8",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "clang-7",
+          "CXX": "clang++-7",
+          "ARCH_FLAGS": "-march=armv8a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install clang-7 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_gcc7_armv7 = {
+    "name": "gcc-7 armv7",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "gcc-7",
+          "CXX": "g++-7",
+          "ARCH_FLAGS": "-march=armv7-a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_gcc7_armv8 = {
+    "name": "gcc-7 armv8",
+    "platform": {
+      "arch": "arm",
+    },
+    "steps": [
+      {
+        "environment": {
+          "CC": "gcc-7",
+          "CXX": "g++-7",
+          "ARCH_FLAGS": "-march=armv8-a -mfpu=neon",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "apt-get -yq update",
+        "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_fedora = {
+    "name": "fedora",
+    "steps": [
+      {
+        "image": "fedora:latest",
+        "environment": {
+          "CC": "gcc",
+          "CXX": "g++",
+          "ARCH_FLAGS": "-march=native",
+        }
+      }
+    ],
+    "custom": {
+      "install": [
+        "dnf install -y gcc gcc-c++ ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ]
+    }
+  }
+
+  job_fedora_clang_arm64_flags = {
+    "name": "fedora clang arm64 flags",
+    "platform": {
+      "arch": "arm64",
+    },
+    "steps": [
+      {
+        "image": "fedora:rawhide",
+        "environment": {
+          "CC": "clang",
+          "CXX": "clang++",
+        },
+        "failure": "ignore"
+      }
+    ],
+    "custom": {
+      "install": [
+        "dnf install -y clang ninja-build git-core python3-pip",
+        "pip3 install meson",
+      ],
+      "script": [
+        "mkdir -p build",
+        "cd build",
+        # optflags RPM macro works with gcc.
+        # Some flags and specs are not available with clang.
+        # https://lists.fedoraproject.org/archives/list/packaging@lists.fedoraproject.org/message/W5UFLUADNB4VF3OBUBSNAPOQL6XBCP74/
+        "ARCH_FLAGS=$(rpm -E '%{optflags}' | sed -e 's| -fstack-clash-protection||' -e 's| -specs=[^ ]*||g')",
+        'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
+        "ninja -v",
+        "./test/run-tests",
+      ]
+    }
+  }
+
+  return [
+    job_clang9_armv7,
+    job_clang9_armv8,
+    job_gcc8_armv7,
+    job_gcc8_armv8,
+    job_clang7_armv7,
+    job_clang7_armv8,
+    job_gcc7_armv7,
+    job_gcc7_armv8,
+    # job_fedora,
+    job_fedora_clang_arm64_flags,
+  ]
+
+def main(ctx):
+  merged_jobs = []
+  for job in get_jobs():
+    out = get_default_job()
+
+    # Merge the each elements in the dict.
+    for key, value in job.items():
+      if type(value) == "list":
+        for index, item in enumerate(value):
+          out[key][index].update(item)
+      elif type(value) == "dict":
+        out[key].update(value)
+      else:
+        out[key] = value
+
+    # Create commands list from custom elements.
+    out["steps"][0]["commands"].extend(out["custom"]["before_install"])
+    out["steps"][0]["commands"].extend(out["custom"]["install"])
+    out["steps"][0]["commands"].extend(out["custom"]["before_script"])
+    out["steps"][0]["commands"].extend(out["custom"]["script"])
+
+    # Remove unused custom element.
+    out.pop("custom", None)
+
+    merged_jobs.append(out)
+
+  return merged_jobs


=====================================
.drone.yml deleted
=====================================
@@ -1,238 +0,0 @@
----
-kind: pipeline
-type: docker
-name: "clang-9 armv7"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: clang-9
-    CXX: clang++-9
-    ARCH_FLAGS: -march=armv7a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install clang-9 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-  
----
-kind: pipeline
-type: docker
-name: "clang-9 armv8"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: clang-9
-    CXX: clang++-9
-    ARCH_FLAGS: -march=armv8a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install clang-9 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-8 armv7"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: gcc-8
-    CXX: g++-8
-    ARCH_FLAGS: -march=armv7-a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-8 armv8"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: gcc-8
-    CXX: g++-8
-    ARCH_FLAGS: -march=armv8-a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "clang-7 armv7"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: clang-7
-    CXX: clang++-7
-    ARCH_FLAGS: -march=armv7a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install clang-7 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-  
----
-kind: pipeline
-type: docker
-name: "clang-7 armv8"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: clang-7
-    CXX: clang++-7
-    ARCH_FLAGS: -march=armv8a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install clang-7 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-7 armv7"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: gcc-7
-    CXX: g++-7
-    ARCH_FLAGS: -march=armv7-a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-
----
-kind: pipeline
-type: docker
-name: "gcc-7 armv8"
-platform:
-  os: linux
-  arch: arm
-steps:
-- name: test
-  image: ubuntu:bionic
-  environment:
-    CC: gcc-7
-    CXX: g++-7
-    ARCH_FLAGS: -march=armv8-a -mfpu=neon
-  commands:
-  - uname -m
-  - cat /proc/cpuinfo
-  - apt-get -yq update
-  - apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip
-  - pip3 install meson
-  - git submodule update --init --recursive
-  - mkdir -p build
-  - cd build
-  - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-  - ninja -v
-  - ./test/run-tests
-
-# ---
-# kind: pipeline
-# type: docker
-# name: "fedora"
-# steps:
-# - name: test
-#   image: fedora:latest
-#   environment:
-#     CC: gcc
-#     CXX: g++
-#     ARCH_FLAGS: -march=native
-#   commands:
-#   - uname -m
-#   - cat /proc/cpuinfo
-#   - dnf install -y gcc gcc-c++ ninja-build git-core python3-pip
-#   - pip3 install meson
-#   - git submodule update --init --recursive
-#   - mkdir -p build
-#   - cd build
-#   - CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..
-#   - ninja -v
-#   - ./test/run-tests


=====================================
README.md
=====================================
@@ -1,4 +1,5 @@
 # SIMD Everywhere
+[![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/simd-everywhere/community)
 
 The SIMDe header-only library provides fast, portable implementations of 
 [SIMD intrinsics](https://en.wikipedia.org/wiki/SIMD) on hardware which


=====================================
simde/x86/avx2.h
=====================================
@@ -67,6 +67,72 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
 
 SIMDE__BEGIN_DECLS
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_abs_epi8 (simde__m256i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_abs_epi8(a);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+    r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i];
+  }
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_abs_epi16 (simde__m256i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_abs_epi16(a);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+    r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i];
+  }
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_abs_epi32(simde__m256i a) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_abs_epi32(a);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) {
+    r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i];
+  }
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m256i
 simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) {
@@ -1283,6 +1349,90 @@ simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) {
 #  define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE) && !defined(__PGI)
+  return _mm256_min_epi8(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_SSE4_1)
+  r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]);
+  r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
+    r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i];
+  }
+#endif
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_min_epi16(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_SSE2)
+  r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]);
+  r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
+    r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i];
+  }
+#endif
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_min_epi32(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+#if defined(SIMDE_ARCH_X86_SSE4_1)
+  r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]);
+  r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]);
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
+    r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i];
+  }
+#endif
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m256i
 simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) {


=====================================
simde/x86/avx512f.h
=====================================
@@ -1595,6 +1595,46 @@ simde_mm512_setone_pd(void) {
   return simde_mm512_castsi512_pd(simde_mm512_setone_si512());
 }
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) {
+  #if defined(SIMDE_AVX512F_NATIVE)
+    return _mm512_srli_epi32(a, imm8);
+  #else
+    simde__m512i_private
+      r_,
+      a_ = simde__m512i_to_private(a);
+
+    #if defined(SIMDE_ARCH_X86_AVX2)
+      r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], imm8);
+      r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], imm8);
+    #elif defined(SIMDE_ARCH_X86_SSE2)
+      r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], imm8);
+      r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], imm8);
+      r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], imm8);
+      r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], imm8);
+    #else
+      if (imm8 > 31) {
+        simde_memset(&r_, 0, sizeof(r_));
+      } else {
+        #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
+          r_.u32 = a_.u32 >> imm8;
+        #else
+          SIMDE__VECTORIZE
+          for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
+            r_.u32[i] = a_.u32[i] >> imm8;
+          }
+        #endif
+      }
+    #endif
+
+    return simde__m512i_from_private(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) {
@@ -1876,6 +1916,29 @@ simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512
 #  define _mm512_mask_test_epi32_mask(a, b) simde_mm512_mask_test_epi32_mask(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask8
+simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
+  #if defined(SIMDE_AVX512F_NATIVE)
+    return _mm512_mask_test_epi64_mask(k1, a, b);
+  #else
+    simde__m512i_private
+      a_ = simde__m512i_to_private(a),
+      b_ = simde__m512i_to_private(b);
+    simde__mmask8 r = 0;
+
+    SIMDE__VECTORIZE_REDUCTION(|:r)
+    for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) {
+      r |= !!(a_.i64[i] & b_.i64[i]) << i;
+    }
+
+    return r & k1;
+  #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_test_epi64_mask(a, b) simde_mm512_mask_test_epi64_mask(a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde__m512i_from_mmask16 (simde__mmask16 k) {
@@ -2018,6 +2081,32 @@ simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) {
 #  define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_add_epi32(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_add_epi32(k, a, b);
+#else
+  return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
@@ -2045,6 +2134,33 @@ simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) {
 #  define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_add_epi64(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_add_epi64(k, a, b);
+#else
+  return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b)
+#endif
+
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512
 simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
@@ -2072,6 +2188,33 @@ simde_mm512_add_ps (simde__m512 a, simde__m512 b) {
 #  define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_add_ps(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_add_ps(k, a, b);
+#else
+  return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b)
+#endif
+
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512d
 simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
@@ -2099,6 +2242,33 @@ simde_mm512_add_pd (simde__m512d a, simde__m512d b) {
 #  define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_add_pd(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_add_pd(k, a, b);
+#else
+  return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b)
+#endif
+
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
@@ -2331,9 +2501,9 @@ simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512
 
 SIMDE__FUNCTION_ATTRIBUTES
 simde__mmask16
-simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
+simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) {
   #if defined(SIMDE_AVX512F_NATIVE)
-    return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
+    return _mm512_cmpgt_epi32_mask(a, b);
   #else
     simde__m512i_private
       r_,
@@ -2344,7 +2514,20 @@ simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m51
       r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]);
     }
 
-    return simde__m512i_private_to_mmask16(r_) & k1;
+    return simde__m512i_private_to_mmask16(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask16
+simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) {
+  #if defined(SIMDE_AVX512F_NATIVE)
+    return _mm512_mask_cmpgt_epi32_mask(k1, a, b);
+  #else
+    return simde_mm512_cmpgt_epi32_mask(a, b) & k1;
   #endif
 }
 #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
@@ -2353,9 +2536,9 @@ simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m51
 
 SIMDE__FUNCTION_ATTRIBUTES
 simde__mmask8
-simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
+simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) {
   #if defined(SIMDE_AVX512F_NATIVE)
-    return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
+    return _mm512_cmpgt_epi64_mask(a, b);
   #else
     simde__m512i_private
       r_,
@@ -2366,7 +2549,20 @@ simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512
       r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]);
     }
 
-    return simde__m512i_private_to_mmask8(r_) & k1;
+    return simde__m512i_private_to_mmask8(r_);
+  #endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__mmask8
+simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) {
+  #if defined(SIMDE_AVX512F_NATIVE)
+    return _mm512_mask_cmpgt_epi64_mask(k1, a, b);
+  #else
+    return simde_mm512_cmpgt_epi64_mask(a, b) & k1;
   #endif
 }
 #if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
@@ -2604,6 +2800,19 @@ simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
 #  define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_mul_ps(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512d
 simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
@@ -2631,6 +2840,112 @@ simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
 #  define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_mul_pd(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mul_epi32(a, b);
+#else
+  simde__m512i_private
+    r_,
+    a_ = simde__m512i_to_private(a),
+    b_ = simde__m512i_to_private(b);
+#if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
+  simde__m512i_private x;
+  __typeof__(r_.i64) ta, tb;
+
+  /* Get even numbered 32-bit values */
+  x.i32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+  /* Cast to 64 bits */
+  SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].i32);
+  SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].i32);
+  r_.i64 = ta * tb;
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+    r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]);
+  }
+#endif
+  return simde__m512i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_mul_epi32(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mul_epu32(a, b);
+#else
+  simde__m512i_private
+    r_,
+    a_ = simde__m512i_to_private(a),
+    b_ = simde__m512i_to_private(b);
+
+#if defined(SIMDE__CONVERT_VECTOR) && defined(SIMDE__SHUFFLE_VECTOR)
+  simde__m512i_private x;
+  __typeof__(r_.u64) ta, tb;
+
+  x.u32 = SIMDE__SHUFFLE_VECTOR(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+  SIMDE__CONVERT_VECTOR(ta, x.m256i_private[0].u32);
+  SIMDE__CONVERT_VECTOR(tb, x.m256i_private[1].u32);
+  r_.u64 = ta * tb;
+#else
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
+    r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]);
+  }
+#endif
+
+  return simde__m512i_from_private(r_);
+
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_mul_epu32(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {


=====================================
simde/x86/sse.h
=====================================
@@ -776,6 +776,12 @@ simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) {
     r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32));
   #elif defined(SIMDE_SSE_WASM_SIMD128)
     r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128);
+  #elif defined(SIMDE_SSE_POWER_ALTIVEC) && (SIMDE_ARCH_POWER >= 900) && !defined(HEDLEY_IBM_VERSION)
+    /* vec_cmpne(vector float, vector float) is missing from XL C/C++ v16.1.1,
+       though the documentation (table 89 on page 432 of the IBM XL C/C++ for
+       Linux Compiler Reference, Version 16.1.1) shows that it should be
+       present.  Both GCC and clang support it. */
+    r_.altivec_f32 = (vector float) vec_cmpne(a_.altivec_f32, b_.altivec_f32);
   #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
     r_.i32 = (__typeof__(r_.i32)) (a_.f32 != b_.f32);
   #else


=====================================
test/x86/avx2.c
=====================================
@@ -27,6 +27,287 @@
 
 #if defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
 
+static MunitResult
+test_simde_mm256_abs_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi8(INT8_C( -27), INT8_C(  88), INT8_C(-122), INT8_C(  -6),
+                           INT8_C( -23), INT8_C( 108), INT8_C(-103), INT8_C(  32),
+                           INT8_C(  43), INT8_C( 116), INT8_C(  -6), INT8_C( -98),
+                           INT8_C( -62), INT8_C( -87), INT8_C(  90), INT8_C(  82),
+                           INT8_C(  86), INT8_C(   8), INT8_C(-126), INT8_C( -22),
+                           INT8_C( -80), INT8_C(-125), INT8_C(  -5), INT8_C(-101),
+                           INT8_C(  36), INT8_C( 114), INT8_C( -51), INT8_C(  59),
+                           INT8_C( -97), INT8_C( 124), INT8_C(  25), INT8_C(  90)),
+      simde_mm256_set_epi8(INT8_C(  27), INT8_C(  88), INT8_C( 122), INT8_C(   6),
+                           INT8_C(  23), INT8_C( 108), INT8_C( 103), INT8_C(  32),
+                           INT8_C(  43), INT8_C( 116), INT8_C(   6), INT8_C(  98),
+                           INT8_C(  62), INT8_C(  87), INT8_C(  90), INT8_C(  82),
+                           INT8_C(  86), INT8_C(   8), INT8_C( 126), INT8_C(  22),
+                           INT8_C(  80), INT8_C( 125), INT8_C(   5), INT8_C( 101),
+                           INT8_C(  36), INT8_C( 114), INT8_C(  51), INT8_C(  59),
+                           INT8_C(  97), INT8_C( 124), INT8_C(  25), INT8_C(  90)) },
+    { simde_mm256_set_epi8(INT8_C( 111), INT8_C(  46), INT8_C( -44), INT8_C(  36),
+                           INT8_C( -79), INT8_C( 101), INT8_C(   0), INT8_C(   2),
+                           INT8_C( -69), INT8_C(  31), INT8_C( -68), INT8_C( -82),
+                           INT8_C( -45), INT8_C( 120), INT8_C(  39), INT8_C(  46),
+                           INT8_C(  66), INT8_C(  30), INT8_C(-106), INT8_C( 118),
+                           INT8_C(  61), INT8_C(  98), INT8_C( -61), INT8_C(  98),
+                           INT8_C(  49), INT8_C( -12), INT8_C(-117), INT8_C(-115),
+                           INT8_C(  63), INT8_C( -92), INT8_C(-102), INT8_C(-110)),
+      simde_mm256_set_epi8(INT8_C( 111), INT8_C(  46), INT8_C(  44), INT8_C(  36),
+                           INT8_C(  79), INT8_C( 101), INT8_C(   0), INT8_C(   2),
+                           INT8_C(  69), INT8_C(  31), INT8_C(  68), INT8_C(  82),
+                           INT8_C(  45), INT8_C( 120), INT8_C(  39), INT8_C(  46),
+                           INT8_C(  66), INT8_C(  30), INT8_C( 106), INT8_C( 118),
+                           INT8_C(  61), INT8_C(  98), INT8_C(  61), INT8_C(  98),
+                           INT8_C(  49), INT8_C(  12), INT8_C( 117), INT8_C( 115),
+                           INT8_C(  63), INT8_C(  92), INT8_C( 102), INT8_C( 110)) },
+    { simde_mm256_set_epi8(INT8_C(  64), INT8_C( -84), INT8_C(  54), INT8_C(-102),
+                           INT8_C( -69), INT8_C(  12), INT8_C(-119), INT8_C( -19),
+                           INT8_C(  19), INT8_C( -55), INT8_C( -11), INT8_C(-117),
+                           INT8_C( -68), INT8_C( -51), INT8_C(  26), INT8_C(  72),
+                           INT8_C( -15), INT8_C( 108), INT8_C( -66), INT8_C( -24),
+                           INT8_C( -97), INT8_C( -48), INT8_C(  75), INT8_C(  35),
+                           INT8_C(  48), INT8_C( -25), INT8_C( -43), INT8_C(   2),
+                           INT8_C( -75), INT8_C(  28), INT8_C(-108), INT8_C( -43)),
+      simde_mm256_set_epi8(INT8_C(  64), INT8_C(  84), INT8_C(  54), INT8_C( 102),
+                           INT8_C(  69), INT8_C(  12), INT8_C( 119), INT8_C(  19),
+                           INT8_C(  19), INT8_C(  55), INT8_C(  11), INT8_C( 117),
+                           INT8_C(  68), INT8_C(  51), INT8_C(  26), INT8_C(  72),
+                           INT8_C(  15), INT8_C( 108), INT8_C(  66), INT8_C(  24),
+                           INT8_C(  97), INT8_C(  48), INT8_C(  75), INT8_C(  35),
+                           INT8_C(  48), INT8_C(  25), INT8_C(  43), INT8_C(   2),
+                           INT8_C(  75), INT8_C(  28), INT8_C( 108), INT8_C(  43)) },
+    { simde_mm256_set_epi8(INT8_C(   8), INT8_C( -54), INT8_C(  -1), INT8_C(-128),
+                           INT8_C( 118), INT8_C( -15), INT8_C( 125), INT8_C(  76),
+                           INT8_C(  47), INT8_C(  33), INT8_C(  69), INT8_C(  21),
+                           INT8_C(-116), INT8_C(  34), INT8_C(  36), INT8_C(  31),
+                           INT8_C( -32), INT8_C( -84), INT8_C(  23), INT8_C( -76),
+                           INT8_C(  82), INT8_C(-115), INT8_C(  74), INT8_C(-110),
+                           INT8_C( -46), INT8_C( 125), INT8_C( -52), INT8_C( -99),
+                           INT8_C(  30), INT8_C(-106), INT8_C(  66), INT8_C(   5)),
+      simde_mm256_set_epi8(INT8_C(   8), INT8_C(  54), INT8_C(   1), INT8_C(-128),
+                           INT8_C( 118), INT8_C(  15), INT8_C( 125), INT8_C(  76),
+                           INT8_C(  47), INT8_C(  33), INT8_C(  69), INT8_C(  21),
+                           INT8_C( 116), INT8_C(  34), INT8_C(  36), INT8_C(  31),
+                           INT8_C(  32), INT8_C(  84), INT8_C(  23), INT8_C(  76),
+                           INT8_C(  82), INT8_C( 115), INT8_C(  74), INT8_C( 110),
+                           INT8_C(  46), INT8_C( 125), INT8_C(  52), INT8_C(  99),
+                           INT8_C(  30), INT8_C( 106), INT8_C(  66), INT8_C(   5)) },
+    { simde_mm256_set_epi8(INT8_C( 122), INT8_C(  42), INT8_C(-121), INT8_C(-106),
+                           INT8_C( 122), INT8_C(  -8), INT8_C(  81), INT8_C(-109),
+                           INT8_C( 124), INT8_C(  32), INT8_C(  63), INT8_C( -21),
+                           INT8_C( -51), INT8_C( -42), INT8_C(   1), INT8_C( -78),
+                           INT8_C(  74), INT8_C(   8), INT8_C(  25), INT8_C(  10),
+                           INT8_C( 113), INT8_C( -75), INT8_C( -32), INT8_C( 126),
+                           INT8_C( -87), INT8_C(  67), INT8_C(  78), INT8_C( -64),
+                           INT8_C(   7), INT8_C( -40), INT8_C( -46), INT8_C( -59)),
+      simde_mm256_set_epi8(INT8_C( 122), INT8_C(  42), INT8_C( 121), INT8_C( 106),
+                           INT8_C( 122), INT8_C(   8), INT8_C(  81), INT8_C( 109),
+                           INT8_C( 124), INT8_C(  32), INT8_C(  63), INT8_C(  21),
+                           INT8_C(  51), INT8_C(  42), INT8_C(   1), INT8_C(  78),
+                           INT8_C(  74), INT8_C(   8), INT8_C(  25), INT8_C(  10),
+                           INT8_C( 113), INT8_C(  75), INT8_C(  32), INT8_C( 126),
+                           INT8_C(  87), INT8_C(  67), INT8_C(  78), INT8_C(  64),
+                           INT8_C(   7), INT8_C(  40), INT8_C(  46), INT8_C(  59)) },
+    { simde_mm256_set_epi8(INT8_C(  10), INT8_C( 120), INT8_C(  81), INT8_C(-105),
+                           INT8_C(  73), INT8_C( -95), INT8_C(  79), INT8_C( -86),
+                           INT8_C( -93), INT8_C( -54), INT8_C( -43), INT8_C( -88),
+                           INT8_C(  59), INT8_C( -27), INT8_C(  12), INT8_C(  10),
+                           INT8_C(  73), INT8_C( -48), INT8_C( 112), INT8_C(  27),
+                           INT8_C(-113), INT8_C( -31), INT8_C( -56), INT8_C( -96),
+                           INT8_C(  48), INT8_C( -94), INT8_C(-111), INT8_C(  60),
+                           INT8_C(-116), INT8_C( -77), INT8_C( -70), INT8_C(  17)),
+      simde_mm256_set_epi8(INT8_C(  10), INT8_C( 120), INT8_C(  81), INT8_C( 105),
+                           INT8_C(  73), INT8_C(  95), INT8_C(  79), INT8_C(  86),
+                           INT8_C(  93), INT8_C(  54), INT8_C(  43), INT8_C(  88),
+                           INT8_C(  59), INT8_C(  27), INT8_C(  12), INT8_C(  10),
+                           INT8_C(  73), INT8_C(  48), INT8_C( 112), INT8_C(  27),
+                           INT8_C( 113), INT8_C(  31), INT8_C(  56), INT8_C(  96),
+                           INT8_C(  48), INT8_C(  94), INT8_C( 111), INT8_C(  60),
+                           INT8_C( 116), INT8_C(  77), INT8_C(  70), INT8_C(  17)) },
+    { simde_mm256_set_epi8(INT8_C(  61), INT8_C( -57), INT8_C( -99), INT8_C(   0),
+                           INT8_C(  98), INT8_C(-121), INT8_C(  67), INT8_C( -20),
+                           INT8_C(  44), INT8_C(  53), INT8_C(-128), INT8_C(  44),
+                           INT8_C( 127), INT8_C(  53), INT8_C(-127), INT8_C(  58),
+                           INT8_C(  35), INT8_C(  83), INT8_C( -56), INT8_C(  22),
+                           INT8_C(  -4), INT8_C(  -6), INT8_C(  -7), INT8_C( 121),
+                           INT8_C( -22), INT8_C( -32), INT8_C( -52), INT8_C( 124),
+                           INT8_C( -93), INT8_C(  55), INT8_C( -23), INT8_C( -62)),
+      simde_mm256_set_epi8(INT8_C(  61), INT8_C(  57), INT8_C(  99), INT8_C(   0),
+                           INT8_C(  98), INT8_C( 121), INT8_C(  67), INT8_C(  20),
+                           INT8_C(  44), INT8_C(  53), INT8_C(-128), INT8_C(  44),
+                           INT8_C( 127), INT8_C(  53), INT8_C( 127), INT8_C(  58),
+                           INT8_C(  35), INT8_C(  83), INT8_C(  56), INT8_C(  22),
+                           INT8_C(   4), INT8_C(   6), INT8_C(   7), INT8_C( 121),
+                           INT8_C(  22), INT8_C(  32), INT8_C(  52), INT8_C( 124),
+                           INT8_C(  93), INT8_C(  55), INT8_C(  23), INT8_C(  62)) },
+    { simde_mm256_set_epi8(INT8_C(  71), INT8_C( -58), INT8_C(  24), INT8_C( 117),
+                           INT8_C(   2), INT8_C( -31), INT8_C( -86), INT8_C( 101),
+                           INT8_C(   3), INT8_C(  63), INT8_C(   2), INT8_C( -30),
+                           INT8_C( -33), INT8_C(  51), INT8_C(  60), INT8_C(  81),
+                           INT8_C( -91), INT8_C( -73), INT8_C(  66), INT8_C(  67),
+                           INT8_C(  72), INT8_C(  -7), INT8_C(  44), INT8_C( -32),
+                           INT8_C( -80), INT8_C( 101), INT8_C( -98), INT8_C(  89),
+                           INT8_C(  89), INT8_C(  94), INT8_C( 109), INT8_C(-109)),
+      simde_mm256_set_epi8(INT8_C(  71), INT8_C(  58), INT8_C(  24), INT8_C( 117),
+                           INT8_C(   2), INT8_C(  31), INT8_C(  86), INT8_C( 101),
+                           INT8_C(   3), INT8_C(  63), INT8_C(   2), INT8_C(  30),
+                           INT8_C(  33), INT8_C(  51), INT8_C(  60), INT8_C(  81),
+                           INT8_C(  91), INT8_C(  73), INT8_C(  66), INT8_C(  67),
+                           INT8_C(  72), INT8_C(   7), INT8_C(  44), INT8_C(  32),
+                           INT8_C(  80), INT8_C( 101), INT8_C(  98), INT8_C(  89),
+                           INT8_C(  89), INT8_C(  94), INT8_C( 109), INT8_C( 109)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_abs_epi8(test_vec[i].a);
+    simde_assert_m256i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_abs_epi16(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi16(INT16_C(  9101), INT16_C( 13664), INT16_C( 14007), INT16_C( 17440),
+                            INT16_C( 21201), INT16_C(-16892), INT16_C(-22702), INT16_C(-11875),
+                            INT16_C(  9352), INT16_C( 21001), INT16_C( 15464), INT16_C( 27994),
+                            INT16_C( 12104), INT16_C(-22404), INT16_C(-21433), INT16_C( -4031)),
+      simde_mm256_set_epi16(INT16_C(  9101), INT16_C( 13664), INT16_C( 14007), INT16_C( 17440),
+                            INT16_C( 21201), INT16_C( 16892), INT16_C( 22702), INT16_C( 11875),
+                            INT16_C(  9352), INT16_C( 21001), INT16_C( 15464), INT16_C( 27994),
+                            INT16_C( 12104), INT16_C( 22404), INT16_C( 21433), INT16_C(  4031)) },
+    { simde_mm256_set_epi16(INT16_C( 20289), INT16_C( -8788), INT16_C(  5917), INT16_C(-28916),
+                            INT16_C(-21540), INT16_C( -8179), INT16_C(-25392), INT16_C(  2609),
+                            INT16_C( 12609), INT16_C(-11367), INT16_C(   -70), INT16_C( 28633),
+                            INT16_C(-21576), INT16_C(-23753), INT16_C(-11797), INT16_C(-17346)),
+      simde_mm256_set_epi16(INT16_C( 20289), INT16_C(  8788), INT16_C(  5917), INT16_C( 28916),
+                            INT16_C( 21540), INT16_C(  8179), INT16_C( 25392), INT16_C(  2609),
+                            INT16_C( 12609), INT16_C( 11367), INT16_C(    70), INT16_C( 28633),
+                            INT16_C( 21576), INT16_C( 23753), INT16_C( 11797), INT16_C( 17346)) },
+    { simde_mm256_set_epi16(INT16_C( 11563), INT16_C(-31585), INT16_C( 24583), INT16_C(  2918),
+                            INT16_C(  5705), INT16_C( 31274), INT16_C(-12388), INT16_C( 31454),
+                            INT16_C(  5008), INT16_C( 10123), INT16_C( 28874), INT16_C(-27636),
+                            INT16_C(  1380), INT16_C(-32687), INT16_C( 24141), INT16_C( 11570)),
+      simde_mm256_set_epi16(INT16_C( 11563), INT16_C( 31585), INT16_C( 24583), INT16_C(  2918),
+                            INT16_C(  5705), INT16_C( 31274), INT16_C( 12388), INT16_C( 31454),
+                            INT16_C(  5008), INT16_C( 10123), INT16_C( 28874), INT16_C( 27636),
+                            INT16_C(  1380), INT16_C( 32687), INT16_C( 24141), INT16_C( 11570)) },
+    { simde_mm256_set_epi16(INT16_C(-28981), INT16_C(-21254), INT16_C( 12206), INT16_C( 17751),
+                            INT16_C(  4887), INT16_C( 27025), INT16_C( 20436), INT16_C( -3143),
+                            INT16_C(  5806), INT16_C( 19398), INT16_C( 23890), INT16_C( -1841),
+                            INT16_C( -1212), INT16_C(  -418), INT16_C(  2804), INT16_C(-24086)),
+      simde_mm256_set_epi16(INT16_C( 28981), INT16_C( 21254), INT16_C( 12206), INT16_C( 17751),
+                            INT16_C(  4887), INT16_C( 27025), INT16_C( 20436), INT16_C(  3143),
+                            INT16_C(  5806), INT16_C( 19398), INT16_C( 23890), INT16_C(  1841),
+                            INT16_C(  1212), INT16_C(   418), INT16_C(  2804), INT16_C( 24086)) },
+    { simde_mm256_set_epi16(INT16_C(-32227), INT16_C( 26559), INT16_C( 32468), INT16_C(  9282),
+                            INT16_C( 10212), INT16_C(  7157), INT16_C(-18109), INT16_C(-13716),
+                            INT16_C(  3356), INT16_C( -6654), INT16_C(  3548), INT16_C(-31612),
+                            INT16_C( -3226), INT16_C(-30156), INT16_C(-15323), INT16_C(  8689)),
+      simde_mm256_set_epi16(INT16_C( 32227), INT16_C( 26559), INT16_C( 32468), INT16_C(  9282),
+                            INT16_C( 10212), INT16_C(  7157), INT16_C( 18109), INT16_C( 13716),
+                            INT16_C(  3356), INT16_C(  6654), INT16_C(  3548), INT16_C( 31612),
+                            INT16_C(  3226), INT16_C( 30156), INT16_C( 15323), INT16_C(  8689)) },
+    { simde_mm256_set_epi16(INT16_C( 14337), INT16_C(-20237), INT16_C(  7001), INT16_C( 29027),
+                            INT16_C( -3029), INT16_C( 12894), INT16_C(-24482), INT16_C( -8195),
+                            INT16_C( -7637), INT16_C(-26436), INT16_C( 15950), INT16_C(  5319),
+                            INT16_C( 22977), INT16_C(  -593), INT16_C(-29639), INT16_C( 23312)),
+      simde_mm256_set_epi16(INT16_C( 14337), INT16_C( 20237), INT16_C(  7001), INT16_C( 29027),
+                            INT16_C(  3029), INT16_C( 12894), INT16_C( 24482), INT16_C(  8195),
+                            INT16_C(  7637), INT16_C( 26436), INT16_C( 15950), INT16_C(  5319),
+                            INT16_C( 22977), INT16_C(   593), INT16_C( 29639), INT16_C( 23312)) },
+    { simde_mm256_set_epi16(INT16_C(  4249), INT16_C( -3888), INT16_C( 15630), INT16_C(-11095),
+                            INT16_C(-21648), INT16_C(-10947), INT16_C( -1651), INT16_C(  5821),
+                            INT16_C( 25032), INT16_C( 26383), INT16_C(-18726), INT16_C(-14746),
+                            INT16_C(  9694), INT16_C(-29231), INT16_C( 18526), INT16_C(-12816)),
+      simde_mm256_set_epi16(INT16_C(  4249), INT16_C(  3888), INT16_C( 15630), INT16_C( 11095),
+                            INT16_C( 21648), INT16_C( 10947), INT16_C(  1651), INT16_C(  5821),
+                            INT16_C( 25032), INT16_C( 26383), INT16_C( 18726), INT16_C( 14746),
+                            INT16_C(  9694), INT16_C( 29231), INT16_C( 18526), INT16_C( 12816)) },
+    { simde_mm256_set_epi16(INT16_C(  6410), INT16_C(  4746), INT16_C( 16873), INT16_C(-29607),
+                            INT16_C( 21314), INT16_C(-32512), INT16_C(-23052), INT16_C( 20594),
+                            INT16_C( -1613), INT16_C( 26993), INT16_C( 28325), INT16_C(   406),
+                            INT16_C(-19031), INT16_C(  6060), INT16_C(-29650), INT16_C(  8164)),
+      simde_mm256_set_epi16(INT16_C(  6410), INT16_C(  4746), INT16_C( 16873), INT16_C( 29607),
+                            INT16_C( 21314), INT16_C( 32512), INT16_C( 23052), INT16_C( 20594),
+                            INT16_C(  1613), INT16_C( 26993), INT16_C( 28325), INT16_C(   406),
+                            INT16_C( 19031), INT16_C(  6060), INT16_C( 29650), INT16_C(  8164)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_abs_epi16(test_vec[i].a);
+    simde_assert_m256i_i16(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_abs_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi32(INT32_C(  596456800), INT32_C(  917980192), INT32_C( 1389477380), INT32_C(-1487744611),
+                            INT32_C(  612913673), INT32_C( 1013476698), INT32_C(  793290876), INT32_C(-1404571583)),
+      simde_mm256_set_epi32(INT32_C(  596456800), INT32_C(  917980192), INT32_C( 1389477380), INT32_C( 1487744611),
+                            INT32_C(  612913673), INT32_C( 1013476698), INT32_C(  793290876), INT32_C( 1404571583)) },
+    { simde_mm256_set_epi32(INT32_C( 1329716652), INT32_C(  387813132), INT32_C(-1411588083), INT32_C(-1664087503),
+                            INT32_C(  826397593), INT32_C(   -4558887), INT32_C(-1413962953), INT32_C( -773080002)),
+      simde_mm256_set_epi32(INT32_C( 1329716652), INT32_C(  387813132), INT32_C( 1411588083), INT32_C( 1664087503),
+                            INT32_C(  826397593), INT32_C(    4558887), INT32_C( 1413962953), INT32_C(  773080002)) },
+    { simde_mm256_set_epi32(INT32_C(  757826719), INT32_C( 1611074406), INT32_C(  373914154), INT32_C( -811828514),
+                            INT32_C(  328214411), INT32_C( 1892324364), INT32_C(   90472529), INT32_C( 1582116146)),
+      simde_mm256_set_epi32(INT32_C(  757826719), INT32_C( 1611074406), INT32_C(  373914154), INT32_C(  811828514),
+                            INT32_C(  328214411), INT32_C( 1892324364), INT32_C(   90472529), INT32_C( 1582116146)) },
+    { simde_mm256_set_epi32(INT32_C(-1899254534), INT32_C(  799950167), INT32_C(  320301457), INT32_C( 1339356089),
+                            INT32_C(  380521414), INT32_C( 1565718735), INT32_C(  -79364514), INT32_C(  183804394)),
+      simde_mm256_set_epi32(INT32_C( 1899254534), INT32_C(  799950167), INT32_C(  320301457), INT32_C( 1339356089),
+                            INT32_C(  380521414), INT32_C( 1565718735), INT32_C(   79364514), INT32_C(  183804394)) },
+    { simde_mm256_set_epi32(INT32_C(-2112002113), INT32_C( 2127832130), INT32_C(  669260789), INT32_C(-1186739604),
+                            INT32_C(  219997698), INT32_C(  232555652), INT32_C( -211383756), INT32_C(-1004199439)),
+      simde_mm256_set_epi32(INT32_C( 2112002113), INT32_C( 2127832130), INT32_C(  669260789), INT32_C( 1186739604),
+                            INT32_C(  219997698), INT32_C(  232555652), INT32_C(  211383756), INT32_C( 1004199439)) },
+    { simde_mm256_set_epi32(INT32_C(  939634931), INT32_C(  458846563), INT32_C( -198495650), INT32_C(-1604395011),
+                            INT32_C( -500459332), INT32_C( 1045304519), INT32_C( 1505885615), INT32_C(-1942398192)),
+      simde_mm256_set_epi32(INT32_C(  939634931), INT32_C(  458846563), INT32_C(  198495650), INT32_C( 1604395011),
+                            INT32_C(  500459332), INT32_C( 1045304519), INT32_C( 1505885615), INT32_C( 1942398192)) },
+    { simde_mm256_set_epi32(INT32_C(  278524112), INT32_C( 1024382121), INT32_C(-1418668739), INT32_C( -108194115),
+                            INT32_C( 1640523535), INT32_C(-1227176346), INT32_C(  635342289), INT32_C( 1214172656)),
+      simde_mm256_set_epi32(INT32_C(  278524112), INT32_C( 1024382121), INT32_C( 1418668739), INT32_C(  108194115),
+                            INT32_C( 1640523535), INT32_C( 1227176346), INT32_C(  635342289), INT32_C( 1214172656)) },
+    { simde_mm256_set_epi32(INT32_C(  420090506), INT32_C( 1105824857), INT32_C( 1396867328), INT32_C(-1510715278),
+                            INT32_C( -105682575), INT32_C( 1856307606), INT32_C(-1247209556), INT32_C(-1943134236)),
+      simde_mm256_set_epi32(INT32_C(  420090506), INT32_C( 1105824857), INT32_C( 1396867328), INT32_C( 1510715278),
+                            INT32_C(  105682575), INT32_C( 1856307606), INT32_C( 1247209556), INT32_C( 1943134236)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_abs_epi32(test_vec[i].a);
+    simde_assert_m256i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm256_add_epi8(const MunitParameter params[], void* data) {
   (void) params;
@@ -8540,6 +8821,401 @@ test_simde_mm_blend_epi32(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm256_min_epi8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi8(INT8_C(-113), INT8_C( 124), INT8_C(  15), INT8_C(-126),
+                           INT8_C( -14), INT8_C( -39), INT8_C(  -2), INT8_C( -33),
+                           INT8_C(  77), INT8_C(-107), INT8_C( -95), INT8_C( -10),
+                           INT8_C(  24), INT8_C( -89), INT8_C(  80), INT8_C(  69),
+                           INT8_C( -99), INT8_C(  54), INT8_C(   5), INT8_C( 103),
+                           INT8_C(  71), INT8_C(  11), INT8_C( -41), INT8_C( 122),
+                           INT8_C(  57), INT8_C( -91), INT8_C(-106), INT8_C(  27),
+                           INT8_C(  57), INT8_C(  -8), INT8_C( -10), INT8_C(  38)),
+      simde_mm256_set_epi8(INT8_C(  58), INT8_C(-107), INT8_C(  43), INT8_C(-106),
+                           INT8_C(-127), INT8_C(  87), INT8_C( 107), INT8_C( 115),
+                           INT8_C(  91), INT8_C( 104), INT8_C( -95), INT8_C( -69),
+                           INT8_C( -37), INT8_C( 126), INT8_C(  40), INT8_C(  53),
+                           INT8_C(-103), INT8_C(  84), INT8_C(  99), INT8_C(  38),
+                           INT8_C( 126), INT8_C( -61), INT8_C( 117), INT8_C(  48),
+                           INT8_C(  65), INT8_C(  73), INT8_C(  55), INT8_C(  73),
+                           INT8_C( -94), INT8_C( -78), INT8_C(  -6), INT8_C(  49)),
+      simde_mm256_set_epi8(INT8_C(-113), INT8_C(-107), INT8_C(  15), INT8_C(-126),
+                           INT8_C(-127), INT8_C( -39), INT8_C(  -2), INT8_C( -33),
+                           INT8_C(  77), INT8_C(-107), INT8_C( -95), INT8_C( -69),
+                           INT8_C( -37), INT8_C( -89), INT8_C(  40), INT8_C(  53),
+                           INT8_C(-103), INT8_C(  54), INT8_C(   5), INT8_C(  38),
+                           INT8_C(  71), INT8_C( -61), INT8_C( -41), INT8_C(  48),
+                           INT8_C(  57), INT8_C( -91), INT8_C(-106), INT8_C(  27),
+                           INT8_C( -94), INT8_C( -78), INT8_C( -10), INT8_C(  38)) },
+    { simde_mm256_set_epi8(INT8_C(  53), INT8_C(  21), INT8_C(  98), INT8_C( 120),
+                           INT8_C(-113), INT8_C(  39), INT8_C( -37), INT8_C(  99),
+                           INT8_C(-124), INT8_C( -15), INT8_C( -78), INT8_C( 107),
+                           INT8_C(  24), INT8_C( 124), INT8_C( -54), INT8_C(  81),
+                           INT8_C( 115), INT8_C( -24), INT8_C(  66), INT8_C(  27),
+                           INT8_C(  15), INT8_C(  61), INT8_C(  57), INT8_C(-118),
+                           INT8_C(-113), INT8_C( -12), INT8_C(  -8), INT8_C( -69),
+                           INT8_C( -18), INT8_C(  21), INT8_C(  83), INT8_C( 113)),
+      simde_mm256_set_epi8(INT8_C(-103), INT8_C(-115), INT8_C( -82), INT8_C(  99),
+                           INT8_C( 114), INT8_C(  96), INT8_C( 122), INT8_C(-112),
+                           INT8_C(  16), INT8_C(  31), INT8_C(  14), INT8_C( -56),
+                           INT8_C( -48), INT8_C(   2), INT8_C( -47), INT8_C(  23),
+                           INT8_C( 105), INT8_C( -82), INT8_C(  84), INT8_C( 120),
+                           INT8_C(  10), INT8_C(   6), INT8_C( -73), INT8_C( -69),
+                           INT8_C( -15), INT8_C( -30), INT8_C( -84), INT8_C(   4),
+                           INT8_C( 118), INT8_C(  78), INT8_C(-102), INT8_C(-121)),
+      simde_mm256_set_epi8(INT8_C(-103), INT8_C(-115), INT8_C( -82), INT8_C(  99),
+                           INT8_C(-113), INT8_C(  39), INT8_C( -37), INT8_C(-112),
+                           INT8_C(-124), INT8_C( -15), INT8_C( -78), INT8_C( -56),
+                           INT8_C( -48), INT8_C(   2), INT8_C( -54), INT8_C(  23),
+                           INT8_C( 105), INT8_C( -82), INT8_C(  66), INT8_C(  27),
+                           INT8_C(  10), INT8_C(   6), INT8_C( -73), INT8_C(-118),
+                           INT8_C(-113), INT8_C( -30), INT8_C( -84), INT8_C( -69),
+                           INT8_C( -18), INT8_C(  21), INT8_C(-102), INT8_C(-121)) },
+    { simde_mm256_set_epi8(INT8_C(  99), INT8_C(  36), INT8_C( -33), INT8_C(-101),
+                           INT8_C(-112), INT8_C(-121), INT8_C(  47), INT8_C( -29),
+                           INT8_C( 104), INT8_C( 110), INT8_C( -84), INT8_C(  72),
+                           INT8_C( -54), INT8_C( -40), INT8_C(  19), INT8_C(-103),
+                           INT8_C( -69), INT8_C( -50), INT8_C(  26), INT8_C( -97),
+                           INT8_C( -51), INT8_C( -78), INT8_C(  41), INT8_C( -85),
+                           INT8_C(  36), INT8_C(  76), INT8_C( 111), INT8_C(  49),
+                           INT8_C(  97), INT8_C(  -3), INT8_C(  36), INT8_C(  67)),
+      simde_mm256_set_epi8(INT8_C(  66), INT8_C(  -8), INT8_C(  56), INT8_C(   7),
+                           INT8_C(  58), INT8_C(-107), INT8_C( 110), INT8_C( -95),
+                           INT8_C(  31), INT8_C( -56), INT8_C(  42), INT8_C(   8),
+                           INT8_C( -46), INT8_C( -16), INT8_C(  21), INT8_C( 126),
+                           INT8_C( -12), INT8_C( -94), INT8_C( 109), INT8_C(  67),
+                           INT8_C(  47), INT8_C( 103), INT8_C(  21), INT8_C( 104),
+                           INT8_C(  99), INT8_C( 106), INT8_C( -91), INT8_C(  87),
+                           INT8_C( -83), INT8_C(   2), INT8_C( 105), INT8_C(-105)),
+      simde_mm256_set_epi8(INT8_C(  66), INT8_C(  -8), INT8_C( -33), INT8_C(-101),
+                           INT8_C(-112), INT8_C(-121), INT8_C(  47), INT8_C( -95),
+                           INT8_C(  31), INT8_C( -56), INT8_C( -84), INT8_C(   8),
+                           INT8_C( -54), INT8_C( -40), INT8_C(  19), INT8_C(-103),
+                           INT8_C( -69), INT8_C( -94), INT8_C(  26), INT8_C( -97),
+                           INT8_C( -51), INT8_C( -78), INT8_C(  21), INT8_C( -85),
+                           INT8_C(  36), INT8_C(  76), INT8_C( -91), INT8_C(  49),
+                           INT8_C( -83), INT8_C(  -3), INT8_C(  36), INT8_C(-105)) },
+    { simde_mm256_set_epi8(INT8_C( -79), INT8_C( -25), INT8_C(  15), INT8_C(   2),
+                           INT8_C( -84), INT8_C(   9), INT8_C(  54), INT8_C(  99),
+                           INT8_C( -69), INT8_C( 117), INT8_C( -20), INT8_C(  47),
+                           INT8_C(  21), INT8_C(  44), INT8_C( 113), INT8_C(  59),
+                           INT8_C(   9), INT8_C(  65), INT8_C(  -7), INT8_C( -76),
+                           INT8_C(-124), INT8_C( -64), INT8_C(  22), INT8_C( -79),
+                           INT8_C( -58), INT8_C(  29), INT8_C(  40), INT8_C(  64),
+                           INT8_C(  -9), INT8_C(-123), INT8_C( -13), INT8_C( -32)),
+      simde_mm256_set_epi8(INT8_C(-114), INT8_C(  -7), INT8_C(   9), INT8_C( -63),
+                           INT8_C(   0), INT8_C( -82), INT8_C( -19), INT8_C(  75),
+                           INT8_C(  69), INT8_C(  48), INT8_C(  42), INT8_C( -52),
+                           INT8_C( 119), INT8_C( -20), INT8_C(  26), INT8_C( -57),
+                           INT8_C(  88), INT8_C(  87), INT8_C(  99), INT8_C(-127),
+                           INT8_C(  48), INT8_C(  19), INT8_C(  54), INT8_C( -35),
+                           INT8_C(   5), INT8_C(  23), INT8_C( -21), INT8_C(  88),
+                           INT8_C(  87), INT8_C(-109), INT8_C( -46), INT8_C(-127)),
+      simde_mm256_set_epi8(INT8_C(-114), INT8_C( -25), INT8_C(   9), INT8_C( -63),
+                           INT8_C( -84), INT8_C( -82), INT8_C( -19), INT8_C(  75),
+                           INT8_C( -69), INT8_C(  48), INT8_C( -20), INT8_C( -52),
+                           INT8_C(  21), INT8_C( -20), INT8_C(  26), INT8_C( -57),
+                           INT8_C(   9), INT8_C(  65), INT8_C(  -7), INT8_C(-127),
+                           INT8_C(-124), INT8_C( -64), INT8_C(  22), INT8_C( -79),
+                           INT8_C( -58), INT8_C(  23), INT8_C( -21), INT8_C(  64),
+                           INT8_C(  -9), INT8_C(-123), INT8_C( -46), INT8_C(-127)) },
+    { simde_mm256_set_epi8(INT8_C(  40), INT8_C(-101), INT8_C(-116), INT8_C( -63),
+                           INT8_C( -76), INT8_C(  83), INT8_C( -41), INT8_C(  -2),
+                           INT8_C(  74), INT8_C(  48), INT8_C(-104), INT8_C( -61),
+                           INT8_C(  95), INT8_C(  50), INT8_C(  86), INT8_C(-110),
+                           INT8_C( -24), INT8_C(  99), INT8_C( -15), INT8_C( 126),
+                           INT8_C( -73), INT8_C( -17), INT8_C(-122), INT8_C(  -7),
+                           INT8_C(  95), INT8_C( -91), INT8_C(  66), INT8_C(  -2),
+                           INT8_C(  88), INT8_C( -54), INT8_C( 117), INT8_C( -86)),
+      simde_mm256_set_epi8(INT8_C( -45), INT8_C( -41), INT8_C(  87), INT8_C( -75),
+                           INT8_C( 104), INT8_C(  -1), INT8_C(  11), INT8_C( 125),
+                           INT8_C(-123), INT8_C(  86), INT8_C(  26), INT8_C(  46),
+                           INT8_C(   6), INT8_C(  54), INT8_C( -77), INT8_C(  63),
+                           INT8_C(  79), INT8_C(-127), INT8_C(-118), INT8_C( 107),
+                           INT8_C(-122), INT8_C( -36), INT8_C(  -2), INT8_C( -64),
+                           INT8_C(  65), INT8_C( -89), INT8_C(  86), INT8_C(  99),
+                           INT8_C( -49), INT8_C(  59), INT8_C( -15), INT8_C( 103)),
+      simde_mm256_set_epi8(INT8_C( -45), INT8_C(-101), INT8_C(-116), INT8_C( -75),
+                           INT8_C( -76), INT8_C(  -1), INT8_C( -41), INT8_C(  -2),
+                           INT8_C(-123), INT8_C(  48), INT8_C(-104), INT8_C( -61),
+                           INT8_C(   6), INT8_C(  50), INT8_C( -77), INT8_C(-110),
+                           INT8_C( -24), INT8_C(-127), INT8_C(-118), INT8_C( 107),
+                           INT8_C(-122), INT8_C( -36), INT8_C(-122), INT8_C( -64),
+                           INT8_C(  65), INT8_C( -91), INT8_C(  66), INT8_C(  -2),
+                           INT8_C( -49), INT8_C( -54), INT8_C( -15), INT8_C( -86)) },
+    { simde_mm256_set_epi8(INT8_C(  -6), INT8_C(  22), INT8_C( -98), INT8_C(-111),
+                           INT8_C( -27), INT8_C( -45), INT8_C( -60), INT8_C( -64),
+                           INT8_C( -14), INT8_C( -15), INT8_C( -20), INT8_C(  52),
+                           INT8_C(-110), INT8_C(  52), INT8_C( 120), INT8_C( 119),
+                           INT8_C( -35), INT8_C(   3), INT8_C(  42), INT8_C( 109),
+                           INT8_C(  16), INT8_C( -18), INT8_C( -49), INT8_C( 102),
+                           INT8_C(  87), INT8_C( -69), INT8_C(  10), INT8_C(   0),
+                           INT8_C(   1), INT8_C(  21), INT8_C(   6), INT8_C( -56)),
+      simde_mm256_set_epi8(INT8_C( -60), INT8_C(  58), INT8_C(  68), INT8_C(-110),
+                           INT8_C(  25), INT8_C(  53), INT8_C(-112), INT8_C(   1),
+                           INT8_C( 104), INT8_C(  22), INT8_C( -20), INT8_C(-119),
+                           INT8_C( -66), INT8_C( -43), INT8_C( 109), INT8_C( -61),
+                           INT8_C( -86), INT8_C( -59), INT8_C( -31), INT8_C( -86),
+                           INT8_C( -11), INT8_C(  72), INT8_C( 122), INT8_C(-127),
+                           INT8_C(  21), INT8_C(  32), INT8_C( 109), INT8_C( 110),
+                           INT8_C( 112), INT8_C( -23), INT8_C(  44), INT8_C(  69)),
+      simde_mm256_set_epi8(INT8_C( -60), INT8_C(  22), INT8_C( -98), INT8_C(-111),
+                           INT8_C( -27), INT8_C( -45), INT8_C(-112), INT8_C( -64),
+                           INT8_C( -14), INT8_C( -15), INT8_C( -20), INT8_C(-119),
+                           INT8_C(-110), INT8_C( -43), INT8_C( 109), INT8_C( -61),
+                           INT8_C( -86), INT8_C( -59), INT8_C( -31), INT8_C( -86),
+                           INT8_C( -11), INT8_C( -18), INT8_C( -49), INT8_C(-127),
+                           INT8_C(  21), INT8_C( -69), INT8_C(  10), INT8_C(   0),
+                           INT8_C(   1), INT8_C( -23), INT8_C(   6), INT8_C( -56)) },
+    { simde_mm256_set_epi8(INT8_C( -81), INT8_C(  40), INT8_C(  34), INT8_C(-101),
+                           INT8_C(  85), INT8_C( 102), INT8_C( -21), INT8_C(   6),
+                           INT8_C(  23), INT8_C( -23), INT8_C( -36), INT8_C(  44),
+                           INT8_C( -12), INT8_C( 110), INT8_C( -68), INT8_C(  47),
+                           INT8_C(  45), INT8_C( -59), INT8_C(-112), INT8_C(  86),
+                           INT8_C( -95), INT8_C(  -9), INT8_C(-101), INT8_C(  83),
+                           INT8_C(  37), INT8_C(  85), INT8_C(  31), INT8_C(  -4),
+                           INT8_C(  98), INT8_C( -87), INT8_C( -65), INT8_C( -71)),
+      simde_mm256_set_epi8(INT8_C( -58), INT8_C(-110), INT8_C(   4), INT8_C(  10),
+                           INT8_C( -39), INT8_C(  -8), INT8_C( 126), INT8_C(  76),
+                           INT8_C(  85), INT8_C( -11), INT8_C(-106), INT8_C(  42),
+                           INT8_C( 125), INT8_C(  46), INT8_C(  63), INT8_C(  35),
+                           INT8_C( -24), INT8_C( -99), INT8_C(  71), INT8_C(-108),
+                           INT8_C(  52), INT8_C(-105), INT8_C( -88), INT8_C( -28),
+                           INT8_C(  12), INT8_C( -41), INT8_C(  81), INT8_C(  38),
+                           INT8_C( -13), INT8_C(  29), INT8_C( -44), INT8_C(  59)),
+      simde_mm256_set_epi8(INT8_C( -81), INT8_C(-110), INT8_C(   4), INT8_C(-101),
+                           INT8_C( -39), INT8_C(  -8), INT8_C( -21), INT8_C(   6),
+                           INT8_C(  23), INT8_C( -23), INT8_C(-106), INT8_C(  42),
+                           INT8_C( -12), INT8_C(  46), INT8_C( -68), INT8_C(  35),
+                           INT8_C( -24), INT8_C( -99), INT8_C(-112), INT8_C(-108),
+                           INT8_C( -95), INT8_C(-105), INT8_C(-101), INT8_C( -28),
+                           INT8_C(  12), INT8_C( -41), INT8_C(  31), INT8_C(  -4),
+                           INT8_C( -13), INT8_C( -87), INT8_C( -65), INT8_C( -71)) },
+    { simde_mm256_set_epi8(INT8_C( -36), INT8_C( -94), INT8_C( -61), INT8_C( 117),
+                           INT8_C(   0), INT8_C(  -1), INT8_C( 118), INT8_C(  99),
+                           INT8_C( -54), INT8_C(-123), INT8_C(  18), INT8_C( 111),
+                           INT8_C(   8), INT8_C( -59), INT8_C( 107), INT8_C(  77),
+                           INT8_C(   7), INT8_C(  91), INT8_C(  25), INT8_C(  70),
+                           INT8_C(  80), INT8_C(  81), INT8_C(  11), INT8_C(  29),
+                           INT8_C( 109), INT8_C( 125), INT8_C( -51), INT8_C(  58),
+                           INT8_C(  66), INT8_C( 126), INT8_C(-122), INT8_C(  96)),
+      simde_mm256_set_epi8(INT8_C( -91), INT8_C( -28), INT8_C(  59), INT8_C(  45),
+                           INT8_C(  99), INT8_C(  67), INT8_C( -54), INT8_C( 122),
+                           INT8_C(  -4), INT8_C(  -6), INT8_C( -77), INT8_C(  96),
+                           INT8_C(  16), INT8_C(  37), INT8_C(-127), INT8_C(  76),
+                           INT8_C(  56), INT8_C( -93), INT8_C(   7), INT8_C( -50),
+                           INT8_C(  56), INT8_C(-112), INT8_C(   6), INT8_C( -49),
+                           INT8_C( 107), INT8_C( -15), INT8_C( -42), INT8_C(-121),
+                           INT8_C( -81), INT8_C(  97), INT8_C( -56), INT8_C(  97)),
+      simde_mm256_set_epi8(INT8_C( -91), INT8_C( -94), INT8_C( -61), INT8_C(  45),
+                           INT8_C(   0), INT8_C(  -1), INT8_C( -54), INT8_C(  99),
+                           INT8_C( -54), INT8_C(-123), INT8_C( -77), INT8_C(  96),
+                           INT8_C(   8), INT8_C( -59), INT8_C(-127), INT8_C(  76),
+                           INT8_C(   7), INT8_C( -93), INT8_C(   7), INT8_C( -50),
+                           INT8_C(  56), INT8_C(-112), INT8_C(   6), INT8_C( -49),
+                           INT8_C( 107), INT8_C( -15), INT8_C( -51), INT8_C(-121),
+                           INT8_C( -81), INT8_C(  97), INT8_C(-122), INT8_C(  96)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_min_epi8(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_i8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_min_epi16(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi16(INT16_C(-28804), INT16_C(  3970), INT16_C( -3367), INT16_C(  -289),
+                            INT16_C( 19861), INT16_C(-24074), INT16_C(  6311), INT16_C( 20549),
+                            INT16_C(-25290), INT16_C(  1383), INT16_C( 18187), INT16_C(-10374),
+                            INT16_C( 14757), INT16_C(-27109), INT16_C( 14840), INT16_C( -2522)),
+      simde_mm256_set_epi16(INT16_C( 14997), INT16_C( 11158), INT16_C(-32425), INT16_C( 27507),
+                            INT16_C( 23400), INT16_C(-24133), INT16_C( -9346), INT16_C( 10293),
+                            INT16_C(-26284), INT16_C( 25382), INT16_C( 32451), INT16_C( 30000),
+                            INT16_C( 16713), INT16_C( 14153), INT16_C(-23886), INT16_C( -1487)),
+      simde_mm256_set_epi16(INT16_C(-28804), INT16_C(  3970), INT16_C(-32425), INT16_C(  -289),
+                            INT16_C( 19861), INT16_C(-24133), INT16_C( -9346), INT16_C( 10293),
+                            INT16_C(-26284), INT16_C(  1383), INT16_C( 18187), INT16_C(-10374),
+                            INT16_C( 14757), INT16_C(-27109), INT16_C(-23886), INT16_C( -2522)) },
+    { simde_mm256_set_epi16(INT16_C( 13589), INT16_C( 25208), INT16_C(-28889), INT16_C( -9373),
+                            INT16_C(-31503), INT16_C(-19861), INT16_C(  6268), INT16_C(-13743),
+                            INT16_C( 29672), INT16_C( 16923), INT16_C(  3901), INT16_C( 14730),
+                            INT16_C(-28684), INT16_C( -1861), INT16_C( -4587), INT16_C( 21361)),
+      simde_mm256_set_epi16(INT16_C(-26227), INT16_C(-20893), INT16_C( 29280), INT16_C( 31376),
+                            INT16_C(  4127), INT16_C(  3784), INT16_C(-12286), INT16_C(-12009),
+                            INT16_C( 27054), INT16_C( 21624), INT16_C(  2566), INT16_C(-18501),
+                            INT16_C( -3614), INT16_C(-21500), INT16_C( 30286), INT16_C(-25977)),
+      simde_mm256_set_epi16(INT16_C(-26227), INT16_C(-20893), INT16_C(-28889), INT16_C( -9373),
+                            INT16_C(-31503), INT16_C(-19861), INT16_C(-12286), INT16_C(-13743),
+                            INT16_C( 27054), INT16_C( 16923), INT16_C(  2566), INT16_C(-18501),
+                            INT16_C(-28684), INT16_C(-21500), INT16_C( -4587), INT16_C(-25977)) },
+    { simde_mm256_set_epi16(INT16_C( 25380), INT16_C( -8293), INT16_C(-28537), INT16_C( 12259),
+                            INT16_C( 26734), INT16_C(-21432), INT16_C(-13608), INT16_C(  5017),
+                            INT16_C(-17458), INT16_C(  6815), INT16_C(-12878), INT16_C( 10667),
+                            INT16_C(  9292), INT16_C( 28465), INT16_C( 25085), INT16_C(  9283)),
+      simde_mm256_set_epi16(INT16_C( 17144), INT16_C( 14343), INT16_C( 14997), INT16_C( 28321),
+                            INT16_C(  8136), INT16_C( 10760), INT16_C(-11536), INT16_C(  5502),
+                            INT16_C( -2910), INT16_C( 27971), INT16_C( 12135), INT16_C(  5480),
+                            INT16_C( 25450), INT16_C(-23209), INT16_C(-21246), INT16_C( 27031)),
+      simde_mm256_set_epi16(INT16_C( 17144), INT16_C( -8293), INT16_C(-28537), INT16_C( 12259),
+                            INT16_C(  8136), INT16_C(-21432), INT16_C(-13608), INT16_C(  5017),
+                            INT16_C(-17458), INT16_C(  6815), INT16_C(-12878), INT16_C(  5480),
+                            INT16_C(  9292), INT16_C(-23209), INT16_C(-21246), INT16_C(  9283)) },
+    { simde_mm256_set_epi16(INT16_C(-19993), INT16_C(  3842), INT16_C(-21495), INT16_C( 13923),
+                            INT16_C(-17547), INT16_C( -5073), INT16_C(  5420), INT16_C( 28987),
+                            INT16_C(  2369), INT16_C( -1612), INT16_C(-31552), INT16_C(  5809),
+                            INT16_C(-14819), INT16_C( 10304), INT16_C( -2171), INT16_C( -3104)),
+      simde_mm256_set_epi16(INT16_C(-28935), INT16_C(  2497), INT16_C(   174), INT16_C( -4789),
+                            INT16_C( 17712), INT16_C( 10956), INT16_C( 30700), INT16_C(  6855),
+                            INT16_C( 22615), INT16_C( 25473), INT16_C( 12307), INT16_C( 14045),
+                            INT16_C(  1303), INT16_C( -5288), INT16_C( 22419), INT16_C(-11647)),
+      simde_mm256_set_epi16(INT16_C(-28935), INT16_C(  2497), INT16_C(-21495), INT16_C( -4789),
+                            INT16_C(-17547), INT16_C( -5073), INT16_C(  5420), INT16_C(  6855),
+                            INT16_C(  2369), INT16_C( -1612), INT16_C(-31552), INT16_C(  5809),
+                            INT16_C(-14819), INT16_C( -5288), INT16_C( -2171), INT16_C(-11647)) },
+    { simde_mm256_set_epi16(INT16_C( 10395), INT16_C(-29503), INT16_C(-19373), INT16_C(-10242),
+                            INT16_C( 18992), INT16_C(-26429), INT16_C( 24370), INT16_C( 22162),
+                            INT16_C( -6045), INT16_C( -3714), INT16_C(-18449), INT16_C(-30983),
+                            INT16_C( 24485), INT16_C( 17150), INT16_C( 22730), INT16_C( 30122)),
+      simde_mm256_set_epi16(INT16_C(-11305), INT16_C( 22453), INT16_C( 26879), INT16_C(  2941),
+                            INT16_C(-31402), INT16_C(  6702), INT16_C(  1590), INT16_C(-19649),
+                            INT16_C( 20353), INT16_C(-30101), INT16_C(-31012), INT16_C(  -320),
+                            INT16_C( 16807), INT16_C( 22115), INT16_C(-12485), INT16_C( -3737)),
+      simde_mm256_set_epi16(INT16_C(-11305), INT16_C(-29503), INT16_C(-19373), INT16_C(-10242),
+                            INT16_C(-31402), INT16_C(-26429), INT16_C(  1590), INT16_C(-19649),
+                            INT16_C( -6045), INT16_C(-30101), INT16_C(-31012), INT16_C(-30983),
+                            INT16_C( 16807), INT16_C( 17150), INT16_C(-12485), INT16_C( -3737)) },
+    { simde_mm256_set_epi16(INT16_C( -1514), INT16_C(-24943), INT16_C( -6701), INT16_C(-15168),
+                            INT16_C( -3343), INT16_C( -5068), INT16_C(-28108), INT16_C( 30839),
+                            INT16_C( -8957), INT16_C( 10861), INT16_C(  4334), INT16_C(-12442),
+                            INT16_C( 22459), INT16_C(  2560), INT16_C(   277), INT16_C(  1736)),
+      simde_mm256_set_epi16(INT16_C(-15302), INT16_C( 17554), INT16_C(  6453), INT16_C(-28671),
+                            INT16_C( 26646), INT16_C( -4983), INT16_C(-16683), INT16_C( 28099),
+                            INT16_C(-21819), INT16_C( -7766), INT16_C( -2744), INT16_C( 31361),
+                            INT16_C(  5408), INT16_C( 28014), INT16_C( 28905), INT16_C( 11333)),
+      simde_mm256_set_epi16(INT16_C(-15302), INT16_C(-24943), INT16_C( -6701), INT16_C(-28671),
+                            INT16_C( -3343), INT16_C( -5068), INT16_C(-28108), INT16_C( 28099),
+                            INT16_C(-21819), INT16_C( -7766), INT16_C( -2744), INT16_C(-12442),
+                            INT16_C(  5408), INT16_C(  2560), INT16_C(   277), INT16_C(  1736)) },
+    { simde_mm256_set_epi16(INT16_C(-20696), INT16_C(  8859), INT16_C( 21862), INT16_C( -5370),
+                            INT16_C(  6121), INT16_C( -9172), INT16_C( -2962), INT16_C(-17361),
+                            INT16_C( 11717), INT16_C(-28586), INT16_C(-24073), INT16_C(-25773),
+                            INT16_C(  9557), INT16_C(  8188), INT16_C( 25257), INT16_C(-16455)),
+      simde_mm256_set_epi16(INT16_C(-14702), INT16_C(  1034), INT16_C( -9736), INT16_C( 32332),
+                            INT16_C( 22005), INT16_C(-27094), INT16_C( 32046), INT16_C( 16163),
+                            INT16_C( -5987), INT16_C( 18324), INT16_C( 13463), INT16_C(-22300),
+                            INT16_C(  3287), INT16_C( 20774), INT16_C( -3299), INT16_C(-11205)),
+      simde_mm256_set_epi16(INT16_C(-20696), INT16_C(  1034), INT16_C( -9736), INT16_C( -5370),
+                            INT16_C(  6121), INT16_C(-27094), INT16_C( -2962), INT16_C(-17361),
+                            INT16_C( -5987), INT16_C(-28586), INT16_C(-24073), INT16_C(-25773),
+                            INT16_C(  3287), INT16_C(  8188), INT16_C( -3299), INT16_C(-16455)) },
+    { simde_mm256_set_epi16(INT16_C( -9054), INT16_C(-15499), INT16_C(   255), INT16_C( 30307),
+                            INT16_C(-13691), INT16_C(  4719), INT16_C(  2245), INT16_C( 27469),
+                            INT16_C(  1883), INT16_C(  6470), INT16_C( 20561), INT16_C(  2845),
+                            INT16_C( 28029), INT16_C(-12998), INT16_C( 17022), INT16_C(-31136)),
+      simde_mm256_set_epi16(INT16_C(-23068), INT16_C( 15149), INT16_C( 25411), INT16_C(-13702),
+                            INT16_C(  -774), INT16_C(-19616), INT16_C(  4133), INT16_C(-32436),
+                            INT16_C( 14499), INT16_C(  1998), INT16_C( 14480), INT16_C(  1743),
+                            INT16_C( 27633), INT16_C(-10617), INT16_C(-20639), INT16_C(-14239)),
+      simde_mm256_set_epi16(INT16_C(-23068), INT16_C(-15499), INT16_C(   255), INT16_C(-13702),
+                            INT16_C(-13691), INT16_C(-19616), INT16_C(  2245), INT16_C(-32436),
+                            INT16_C(  1883), INT16_C(  1998), INT16_C( 14480), INT16_C(  1743),
+                            INT16_C( 27633), INT16_C(-12998), INT16_C(-20639), INT16_C(-31136)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_min_epi16(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_i16(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_min_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi32(INT32_C(-1887694974), INT32_C( -220594465), INT32_C( 1301651958), INT32_C(  413618245),
+                            INT32_C(-1657404057), INT32_C( 1191958394), INT32_C(  967153179), INT32_C(  972617254)),
+      simde_mm256_set_epi32(INT32_C(  982854550), INT32_C(-2124977293), INT32_C( 1533583803), INT32_C( -612489163),
+                            INT32_C(-1722522842), INT32_C( 2126738736), INT32_C( 1095317321), INT32_C(-1565328847)),
+      simde_mm256_set_epi32(INT32_C(-1887694974), INT32_C(-2124977293), INT32_C( 1301651958), INT32_C( -612489163),
+                            INT32_C(-1722522842), INT32_C( 1191958394), INT32_C(  967153179), INT32_C(-1565328847)) },
+    { simde_mm256_set_epi32(INT32_C(  890593912), INT32_C(-1893213341), INT32_C(-2064534933), INT32_C(  410831441),
+                            INT32_C( 1944601115), INT32_C(  255670666), INT32_C(-1879770949), INT32_C( -300592271)),
+      simde_mm256_set_epi32(INT32_C(-1718768029), INT32_C( 1918925456), INT32_C(  270470856), INT32_C( -805121769),
+                            INT32_C( 1773032568), INT32_C(  168212411), INT32_C( -236803068), INT32_C( 1984862855)),
+      simde_mm256_set_epi32(INT32_C(-1718768029), INT32_C(-1893213341), INT32_C(-2064534933), INT32_C( -805121769),
+                            INT32_C( 1773032568), INT32_C(  168212411), INT32_C(-1879770949), INT32_C( -300592271)) },
+    { simde_mm256_set_epi32(INT32_C( 1663360923), INT32_C(-1870188573), INT32_C( 1752083528), INT32_C( -891808871),
+                            INT32_C(-1144120673), INT32_C( -843961941), INT32_C(  608988977), INT32_C( 1643979843)),
+      simde_mm256_set_epi32(INT32_C( 1123563527), INT32_C(  982871713), INT32_C(  533211656), INT32_C( -756017794),
+                            INT32_C( -190681789), INT32_C(  795284840), INT32_C( 1667933527), INT32_C(-1392350825)),
+      simde_mm256_set_epi32(INT32_C( 1123563527), INT32_C(-1870188573), INT32_C(  533211656), INT32_C( -891808871),
+                            INT32_C(-1144120673), INT32_C( -843961941), INT32_C(  608988977), INT32_C(-1392350825)) },
+    { simde_mm256_set_epi32(INT32_C(-1310257406), INT32_C(-1408682397), INT32_C(-1149899729), INT32_C(  355234107),
+                            INT32_C(  155318708), INT32_C(-2067786063), INT32_C( -971167680), INT32_C( -142216224)),
+      simde_mm256_set_epi32(INT32_C(-1896281663), INT32_C(   11464011), INT32_C( 1160784588), INT32_C( 2011962055),
+                            INT32_C( 1482122113), INT32_C(  806565597), INT32_C(   85453656), INT32_C( 1469305473)),
+      simde_mm256_set_epi32(INT32_C(-1896281663), INT32_C(-1408682397), INT32_C(-1149899729), INT32_C(  355234107),
+                            INT32_C(  155318708), INT32_C(-2067786063), INT32_C( -971167680), INT32_C( -142216224)) },
+    { simde_mm256_set_epi32(INT32_C(  681282753), INT32_C(-1269573634), INT32_C( 1244698819), INT32_C( 1597134482),
+                            INT32_C( -396103298), INT32_C(-1209039111), INT32_C( 1604666110), INT32_C( 1489663402)),
+      simde_mm256_set_epi32(INT32_C( -740862027), INT32_C( 1761545085), INT32_C(-2057954770), INT32_C(  104248127),
+                            INT32_C( 1333889643), INT32_C(-2032337216), INT32_C( 1101485667), INT32_C( -818155161)),
+      simde_mm256_set_epi32(INT32_C( -740862027), INT32_C(-1269573634), INT32_C(-2057954770), INT32_C(  104248127),
+                            INT32_C( -396103298), INT32_C(-2032337216), INT32_C( 1101485667), INT32_C( -818155161)) },
+    { simde_mm256_set_epi32(INT32_C(  -99180911), INT32_C( -439106368), INT32_C( -219026380), INT32_C(-1842055049),
+                            INT32_C( -586995091), INT32_C(  284086118), INT32_C( 1471875584), INT32_C(   18155208)),
+      simde_mm256_set_epi32(INT32_C(-1002814318), INT32_C(  422940673), INT32_C( 1746332809), INT32_C(-1093308989),
+                            INT32_C(-1429872214), INT32_C( -179799423), INT32_C(  354446702), INT32_C( 1894329413)),
+      simde_mm256_set_epi32(INT32_C(-1002814318), INT32_C( -439106368), INT32_C( -219026380), INT32_C(-1842055049),
+                            INT32_C(-1429872214), INT32_C( -179799423), INT32_C(  354446702), INT32_C(   18155208)) },
+    { simde_mm256_set_epi32(INT32_C(-1356324197), INT32_C( 1432808198), INT32_C(  401202220), INT32_C( -194069457),
+                            INT32_C(  767922262), INT32_C(-1577608365), INT32_C(  626335740), INT32_C( 1655291833)),
+      simde_mm256_set_epi32(INT32_C( -963509238), INT32_C( -638026164), INT32_C( 1442158122), INT32_C( 2100182819),
+                            INT32_C( -392345708), INT32_C(  882354404), INT32_C(  215437606), INT32_C( -216148933)),
+      simde_mm256_set_epi32(INT32_C(-1356324197), INT32_C( -638026164), INT32_C(  401202220), INT32_C( -194069457),
+                            INT32_C( -392345708), INT32_C(-1577608365), INT32_C(  215437606), INT32_C( -216148933)) },
+    { simde_mm256_set_epi32(INT32_C( -593312907), INT32_C(   16741987), INT32_C( -897248657), INT32_C(  147155789),
+                            INT32_C(  123410758), INT32_C( 1347488541), INT32_C( 1836961082), INT32_C( 1115588192)),
+      simde_mm256_set_epi32(INT32_C(-1511769299), INT32_C( 1665387130), INT32_C(  -50678944), INT32_C(  270893388),
+                            INT32_C(  950208462), INT32_C(  948963023), INT32_C( 1811011207), INT32_C(-1352546207)),
+      simde_mm256_set_epi32(INT32_C(-1511769299), INT32_C(   16741987), INT32_C( -897248657), INT32_C(  147155789),
+                            INT32_C(  123410758), INT32_C(  948963023), INT32_C( 1811011207), INT32_C(-1352546207)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_min_epi32(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
 
 #endif /* defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS) */
 
@@ -8548,6 +9224,10 @@ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
 
 static MunitTest test_suite_tests[] = {
 #if defined(SIMDE_AVX2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
+  SIMDE_TESTS_DEFINE_TEST(mm256_abs_epi8),
+  SIMDE_TESTS_DEFINE_TEST(mm256_abs_epi16),
+  SIMDE_TESTS_DEFINE_TEST(mm256_abs_epi32),
+
   SIMDE_TESTS_DEFINE_TEST(mm256_add_epi8),
   SIMDE_TESTS_DEFINE_TEST(mm256_add_epi16),
   SIMDE_TESTS_DEFINE_TEST(mm256_add_epi32),
@@ -8610,6 +9290,9 @@ static MunitTest test_suite_tests[] = {
   SIMDE_TESTS_DEFINE_TEST(mm256_max_epu16),
   SIMDE_TESTS_DEFINE_TEST(mm256_max_epu32),
 
+  SIMDE_TESTS_DEFINE_TEST(mm256_min_epi8),
+  SIMDE_TESTS_DEFINE_TEST(mm256_min_epi16),
+  SIMDE_TESTS_DEFINE_TEST(mm256_min_epi32),
   SIMDE_TESTS_DEFINE_TEST(mm256_min_epu8),
   SIMDE_TESTS_DEFINE_TEST(mm256_min_epu16),
   SIMDE_TESTS_DEFINE_TEST(mm256_min_epu32),


=====================================
test/x86/avx512f.c
=====================================
The diff for this file was not included because it is too large.

=====================================
test/x86/skel.c
=====================================
@@ -2165,8 +2165,8 @@ test_simde_mm512_xxx_epu32(const MunitParameter params[], void* data) {
            "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
            "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
            "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
-           a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
-           a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
+           a.u32[15], a.u32[14], a.u32[13], a.u32[12], a.u32[11], a.u32[10], a.u32[ 9], a.u32[ 8],
+           a.u32[ 7], a.u32[ 6], a.u32[ 5], a.u32[ 4], a.u32[ 3], a.u32[ 2], a.u32[ 1], a.u32[ 0]);
     printf("      simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
            "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
            "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
@@ -2241,6 +2241,70 @@ test_simde_mm512_xxx_epu64(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm512_mask_xxx_epu32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i src;
+    simde__mmask16 k;
+    simde__m512i a;
+    simde__m512i b;
+    simde__m512i r;
+  } test_vec[8] = {
+
+  };
+
+  printf("\n");
+  for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
+    simde__m512i_private src, a, b, r;
+    simde__mmask16 k;
+
+    munit_rand_memory(sizeof(src), (uint8_t*) &src);
+    munit_rand_memory(sizeof(k), (uint8_t*) &k);
+    munit_rand_memory(sizeof(a), (uint8_t*) &a);
+    munit_rand_memory(sizeof(a), (uint8_t*) &b);
+    k &= UINT16_C(0xffff);
+
+    r = simde__m512i_to_private(simde_mm512_mask_xxx_epu32(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
+
+    printf("    { simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
+           src.u32[15], src.u32[14], src.u32[13], src.u32[12], src.u32[11], src.u32[10], src.u32[ 9], src.u32[ 8],
+           src.u32[ 7], src.u32[ 6], src.u32[ 5], src.u32[ 4], src.u32[ 3], src.u32[ 2], src.u32[ 1], src.u32[ 0]);
+    printf("      UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
+    printf("      simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
+           a.u32[15], a.u32[14], a.u32[13], a.u32[12], a.u32[11], a.u32[10], a.u32[ 9], a.u32[ 8],
+           a.u32[ 7], a.u32[ 6], a.u32[ 5], a.u32[ 4], a.u32[ 3], a.u32[ 2], a.u32[ 1], a.u32[ 0]);
+    printf("      simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")),\n",
+           b.u32[15], b.u32[14], b.u32[13], b.u32[12], b.u32[11], b.u32[10], b.u32[ 9], b.u32[ 8],
+           b.u32[ 7], b.u32[ 6], b.u32[ 5], b.u32[ 4], b.u32[ 3], b.u32[ 2], b.u32[ 1], b.u32[ 0]);
+    printf("      simde_x_mm512_set_epu32(UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "),\n"
+           "                              UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 "), UINT32_C(%10" PRIu32 ")) },\n",
+           r.u32[15], r.u32[14], r.u32[13], r.u32[12], r.u32[11], r.u32[10], r.u32[ 9], r.u32[ 8],
+           r.u32[ 7], r.u32[ 6], r.u32[ 5], r.u32[ 4], r.u32[ 3], r.u32[ 2], r.u32[ 1], r.u32[ 0]);
+  }
+  return MUNIT_FAIL;
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_mask_xxx_epu32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512i_u32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm512_xxx_ps(const MunitParameter params[], void* data) {
   (void) params;



View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/683bff07021df346a67704b68711fce76d9fa442

-- 
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/683bff07021df346a67704b68711fce76d9fa442
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200413/bd660252/attachment-0001.html>


More information about the debian-med-commit mailing list