[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200414

Michael R. Crusoe gitlab at salsa.debian.org
Tue Apr 14 10:57:53 BST 2020



Michael R. Crusoe pushed to branch upstream at Debian Med / simde


Commits:
c2f16f4c by Michael R. Crusoe at 2020-04-14T11:51:22+02:00
New upstream version 0.0.0.git.20200414
- - - - -


5 changed files:

- .drone.star
- simde/x86/avx2.h
- simde/x86/avx512f.h
- test/x86/avx2.c
- test/x86/avx512f.c


Changes:

=====================================
.drone.star
=====================================
@@ -2,6 +2,28 @@
 # Drone CI Starlark configuration file.
 # https://docs.drone.io/pipeline/scripting/starlark/
 # Run `drone starlark convert --stdout` to verify `.drone.star`.
+def get_test_commands():
+  return [
+    "mkdir -p build",
+    "cd build",
+    'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
+    "ninja -v",
+    "./test/run-tests",
+  ]
+
+def get_apt_install_commands(extra_pkgs = []):
+  return [
+    "apt-get -yq update",
+    "apt-get -yq install %s ninja-build git-core python3-pip" % " ".join(extra_pkgs),
+    "pip3 install meson",
+  ]
+
+def get_dnf_install_commands(extra_pkgs = []):
+  return [
+    "dnf install -y %s ninja-build git-core python3-pip" % " ".join(extra_pkgs),
+    "pip3 install meson",
+  ]
+
 def get_default_job():
   return {
     "kind": "pipeline",
@@ -24,16 +46,11 @@ def get_default_job():
         "uname -m",
         "cat /proc/cpuinfo",
       ],
+      "install": [],
       "before_script": [
         "git submodule --quiet update --init --recursive",
       ],
-      "script": [
-        "mkdir -p build",
-        "cd build",
-        'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
-        "ninja -v",
-        "./test/run-tests",
-      ]
+      "script": get_test_commands()
     }
   }
 
@@ -53,11 +70,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install clang-9 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["clang-9"])
     }
   }
 
@@ -76,11 +89,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install clang-9 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["clang-9"])
     }
   }
 
@@ -99,11 +108,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["gcc-8", "g++-8"])
     }
   }
 
@@ -122,11 +127,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install gcc-8 g++-8 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["gcc-8", "g++-8"])
     }
   }
 
@@ -145,11 +146,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install clang-7 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["clang-7"])
     }
   }
 
@@ -168,11 +165,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install clang-7 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["clang-7"])
     }
   }
 
@@ -191,11 +184,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["gcc-7", "g++-7"])
     }
   }
 
@@ -214,11 +203,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "apt-get -yq update",
-        "apt-get -yq install gcc-7 g++-7 ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_apt_install_commands(["gcc-7", "g++-7"])
     }
   }
 
@@ -235,10 +220,7 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "dnf install -y gcc gcc-c++ ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ]
+      "install": get_dnf_install_commands(["gcc", "gcc-c++"])
     }
   }
 
@@ -258,21 +240,13 @@ def get_jobs():
       }
     ],
     "custom": {
-      "install": [
-        "dnf install -y clang ninja-build git-core python3-pip",
-        "pip3 install meson",
-      ],
+      "install": get_dnf_install_commands(["clang"]),
       "script": [
-        "mkdir -p build",
-        "cd build",
         # optflags RPM macro works with gcc.
         # Some flags and specs are not available with clang.
         # https://lists.fedoraproject.org/archives/list/packaging@lists.fedoraproject.org/message/W5UFLUADNB4VF3OBUBSNAPOQL6XBCP74/
         "ARCH_FLAGS=$(rpm -E '%{optflags}' | sed -e 's| -fstack-clash-protection||' -e 's| -specs=[^ ]*||g')",
-        'CFLAGS="$ARCH_FLAGS" CXXFLAGS="$ARCH_FLAGS" meson ..',
-        "ninja -v",
-        "./test/run-tests",
-      ]
+      ] + get_test_commands()
     }
   }
 
@@ -305,10 +279,8 @@ def main(ctx):
         out[key] = value
 
     # Create commands list from custom elements.
-    out["steps"][0]["commands"].extend(out["custom"]["before_install"])
-    out["steps"][0]["commands"].extend(out["custom"]["install"])
-    out["steps"][0]["commands"].extend(out["custom"]["before_script"])
-    out["steps"][0]["commands"].extend(out["custom"]["script"])
+    for element in ["before_install", "install", "before_script", "script"]:
+      out["steps"][0]["commands"].extend(out["custom"][element])
 
     # Remove unused custom element.
     out.pop("custom", None)


=====================================
simde/x86/avx2.h
=====================================
@@ -472,6 +472,52 @@ for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
 #  define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_avg_epu8(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
+    r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
+  }
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b)
+#endif
+
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m256i
+simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) {
+#if defined(SIMDE_AVX2_NATIVE)
+  return _mm256_avg_epu16(a, b);
+#else
+  simde__m256i_private
+    r_,
+    a_ = simde__m256i_to_private(a),
+    b_ = simde__m256i_to_private(b);
+
+  SIMDE__VECTORIZE
+  for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
+    r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
+  }
+
+  return simde__m256i_from_private(r_);
+#endif
+}
+#if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
+#  define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m128i
 simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8)
@@ -1619,7 +1665,7 @@ simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8)
   return simde__m256i_from_private(r_);
 }
 #if defined(SIMDE_AVX2_NATIVE)
-#  define simde_mm256_permute2x128_si128(a, b, imm8) _mm256_permute2x128_si128(a, b, imm8)
+#  define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8)
 #endif
 #if defined(SIMDE_AVX2_ENABLE_NATIVE_ALIASES)
 #  define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8)


=====================================
simde/x86/avx512f.h
=====================================
@@ -2309,6 +2309,19 @@ simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a,
 #  define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_sub_epi32(k, a, b);
+#else
+  return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
@@ -2349,6 +2362,19 @@ simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, s
 #  define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_sub_epi64(k, a, b);
+#else
+  return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512
 simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
@@ -2389,6 +2415,19 @@ simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde
 #  define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_sub_ps(k, a, b);
+#else
+  return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512d
 simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
@@ -2429,6 +2468,19 @@ simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simd
 #  define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_sub_pd(k, a, b);
+#else
+  return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__mmask16
 simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
@@ -2746,6 +2798,19 @@ simde_mm512_div_ps (simde__m512 a, simde__m512 b) {
 #  define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_div_ps(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512d
 simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
@@ -2773,6 +2838,19 @@ simde_mm512_div_pd (simde__m512d a, simde__m512d b) {
 #  define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_div_pd(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512
 simde_mm512_mul_ps (simde__m512 a, simde__m512 b) {
@@ -2813,6 +2891,19 @@ simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde_
 #  define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_mul_ps(k, a, b);
+#else
+  return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512d
 simde_mm512_mul_pd (simde__m512d a, simde__m512d b) {
@@ -2853,6 +2944,19 @@ simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde
 #  define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_mul_pd(k, a, b);
+#else
+  return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) {
@@ -2899,6 +3003,19 @@ simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, si
 #  define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_mul_epi32(k, a, b);
+#else
+  return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) {
@@ -2946,6 +3063,19 @@ simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, si
 #  define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_maskz_mul_epu32(k, a, b);
+#else
+  return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_or_si512 (simde__m512i a, simde__m512i b) {


=====================================
test/x86/avx2.c
=====================================
@@ -2412,6 +2412,334 @@ test_simde_mm256_adds_epu16(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm256_avg_epu8(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_x_mm256_set_epu8(UINT8_C(132), UINT8_C(185), UINT8_C( 15), UINT8_C(235),
+                             UINT8_C(102), UINT8_C(  8), UINT8_C(239), UINT8_C(181),
+                             UINT8_C( 81), UINT8_C(155), UINT8_C(236), UINT8_C(191),
+                             UINT8_C(133), UINT8_C( 65), UINT8_C( 73), UINT8_C( 40),
+                             UINT8_C(181), UINT8_C( 86), UINT8_C( 73), UINT8_C(213),
+                             UINT8_C( 85), UINT8_C( 28), UINT8_C( 53), UINT8_C(225),
+                             UINT8_C(217), UINT8_C(129), UINT8_C( 68), UINT8_C(183),
+                             UINT8_C(232), UINT8_C( 91), UINT8_C(  4), UINT8_C(129)),
+      simde_x_mm256_set_epu8(UINT8_C(199), UINT8_C(146), UINT8_C( 73), UINT8_C(129),
+                             UINT8_C( 53), UINT8_C( 30), UINT8_C(178), UINT8_C(252),
+                             UINT8_C(125), UINT8_C( 44), UINT8_C( 67), UINT8_C( 83),
+                             UINT8_C( 52), UINT8_C( 79), UINT8_C(239), UINT8_C(118),
+                             UINT8_C(100), UINT8_C( 25), UINT8_C( 74), UINT8_C( 78),
+                             UINT8_C( 90), UINT8_C(145), UINT8_C(118), UINT8_C(211),
+                             UINT8_C( 29), UINT8_C( 31), UINT8_C(128), UINT8_C( 53),
+                             UINT8_C( 19), UINT8_C(239), UINT8_C(181), UINT8_C(108)),
+      simde_x_mm256_set_epu8(UINT8_C(166), UINT8_C(166), UINT8_C( 44), UINT8_C(182),
+                             UINT8_C( 78), UINT8_C( 19), UINT8_C(209), UINT8_C(217),
+                             UINT8_C(103), UINT8_C(100), UINT8_C(152), UINT8_C(137),
+                             UINT8_C( 93), UINT8_C( 72), UINT8_C(156), UINT8_C( 79),
+                             UINT8_C(141), UINT8_C( 56), UINT8_C( 74), UINT8_C(146),
+                             UINT8_C( 88), UINT8_C( 87), UINT8_C( 86), UINT8_C(218),
+                             UINT8_C(123), UINT8_C( 80), UINT8_C( 98), UINT8_C(118),
+                             UINT8_C(126), UINT8_C(165), UINT8_C( 93), UINT8_C(119)) },
+    { simde_x_mm256_set_epu8(UINT8_C(209), UINT8_C(137), UINT8_C(159), UINT8_C(201),
+                             UINT8_C(142), UINT8_C(123), UINT8_C(233), UINT8_C(210),
+                             UINT8_C(180), UINT8_C(  1), UINT8_C( 19), UINT8_C(  5),
+                             UINT8_C( 35), UINT8_C(203), UINT8_C(189), UINT8_C( 26),
+                             UINT8_C(153), UINT8_C(140), UINT8_C( 62), UINT8_C(144),
+                             UINT8_C( 20), UINT8_C(125), UINT8_C(185), UINT8_C(133),
+                             UINT8_C( 90), UINT8_C(243), UINT8_C( 18), UINT8_C(117),
+                             UINT8_C(102), UINT8_C(222), UINT8_C( 27), UINT8_C( 12)),
+      simde_x_mm256_set_epu8(UINT8_C(201), UINT8_C(154), UINT8_C( 75), UINT8_C(146),
+                             UINT8_C( 84), UINT8_C(138), UINT8_C(110), UINT8_C( 18),
+                             UINT8_C(142), UINT8_C(147), UINT8_C(192), UINT8_C(131),
+                             UINT8_C(151), UINT8_C(232), UINT8_C(176), UINT8_C(  8),
+                             UINT8_C(133), UINT8_C( 27), UINT8_C( 52), UINT8_C( 35),
+                             UINT8_C(114), UINT8_C(148), UINT8_C(237), UINT8_C(121),
+                             UINT8_C(209), UINT8_C( 97), UINT8_C(242), UINT8_C( 75),
+                             UINT8_C(194), UINT8_C( 62), UINT8_C(242), UINT8_C(214)),
+      simde_x_mm256_set_epu8(UINT8_C(205), UINT8_C(146), UINT8_C(117), UINT8_C(174),
+                             UINT8_C(113), UINT8_C(131), UINT8_C(172), UINT8_C(114),
+                             UINT8_C(161), UINT8_C( 74), UINT8_C(106), UINT8_C( 68),
+                             UINT8_C( 93), UINT8_C(218), UINT8_C(183), UINT8_C( 17),
+                             UINT8_C(143), UINT8_C( 84), UINT8_C( 57), UINT8_C( 90),
+                             UINT8_C( 67), UINT8_C(137), UINT8_C(211), UINT8_C(127),
+                             UINT8_C(150), UINT8_C(170), UINT8_C(130), UINT8_C( 96),
+                             UINT8_C(148), UINT8_C(142), UINT8_C(135), UINT8_C(113)) },
+    { simde_x_mm256_set_epu8(UINT8_C(223), UINT8_C( 80), UINT8_C( 95), UINT8_C( 57),
+                             UINT8_C(173), UINT8_C( 14), UINT8_C( 75), UINT8_C( 79),
+                             UINT8_C(206), UINT8_C( 37), UINT8_C(125), UINT8_C(225),
+                             UINT8_C(172), UINT8_C(121), UINT8_C( 43), UINT8_C(249),
+                             UINT8_C(122), UINT8_C(  3), UINT8_C( 25), UINT8_C(204),
+                             UINT8_C(153), UINT8_C( 32), UINT8_C(245), UINT8_C( 58),
+                             UINT8_C(211), UINT8_C(116), UINT8_C( 87), UINT8_C(228),
+                             UINT8_C(139), UINT8_C(203), UINT8_C(106), UINT8_C(216)),
+      simde_x_mm256_set_epu8(UINT8_C( 18), UINT8_C(169), UINT8_C( 46), UINT8_C(100),
+                             UINT8_C(246), UINT8_C(  6), UINT8_C(208), UINT8_C(224),
+                             UINT8_C(134), UINT8_C(248), UINT8_C( 90), UINT8_C(243),
+                             UINT8_C( 95), UINT8_C(216), UINT8_C(232), UINT8_C(245),
+                             UINT8_C( 35), UINT8_C(118), UINT8_C( 23), UINT8_C(111),
+                             UINT8_C(137), UINT8_C(  1), UINT8_C( 43), UINT8_C(212),
+                             UINT8_C(143), UINT8_C(143), UINT8_C(106), UINT8_C(242),
+                             UINT8_C(188), UINT8_C( 78), UINT8_C( 94), UINT8_C( 49)),
+      simde_x_mm256_set_epu8(UINT8_C(121), UINT8_C(125), UINT8_C( 71), UINT8_C( 79),
+                             UINT8_C(210), UINT8_C( 10), UINT8_C(142), UINT8_C(152),
+                             UINT8_C(170), UINT8_C(143), UINT8_C(108), UINT8_C(234),
+                             UINT8_C(134), UINT8_C(169), UINT8_C(138), UINT8_C(247),
+                             UINT8_C( 79), UINT8_C( 61), UINT8_C( 24), UINT8_C(158),
+                             UINT8_C(145), UINT8_C( 17), UINT8_C(144), UINT8_C(135),
+                             UINT8_C(177), UINT8_C(130), UINT8_C( 97), UINT8_C(235),
+                             UINT8_C(164), UINT8_C(141), UINT8_C(100), UINT8_C(133)) },
+    { simde_x_mm256_set_epu8(UINT8_C(186), UINT8_C( 51), UINT8_C(166), UINT8_C(159),
+                             UINT8_C( 61), UINT8_C(189), UINT8_C(148), UINT8_C(156),
+                             UINT8_C(199), UINT8_C( 59), UINT8_C(214), UINT8_C( 21),
+                             UINT8_C( 92), UINT8_C( 24), UINT8_C( 35), UINT8_C( 33),
+                             UINT8_C( 27), UINT8_C(133), UINT8_C(  9), UINT8_C(114),
+                             UINT8_C(170), UINT8_C( 78), UINT8_C(149), UINT8_C(203),
+                             UINT8_C(212), UINT8_C(108), UINT8_C(116), UINT8_C(217),
+                             UINT8_C(102), UINT8_C(192), UINT8_C(223), UINT8_C( 98)),
+      simde_x_mm256_set_epu8(UINT8_C( 51), UINT8_C(193), UINT8_C(129), UINT8_C(222),
+                             UINT8_C(147), UINT8_C( 49), UINT8_C(210), UINT8_C(198),
+                             UINT8_C(192), UINT8_C(158), UINT8_C( 49), UINT8_C(217),
+                             UINT8_C( 20), UINT8_C(183), UINT8_C(213), UINT8_C( 71),
+                             UINT8_C(164), UINT8_C( 92), UINT8_C(118), UINT8_C( 17),
+                             UINT8_C(236), UINT8_C( 27), UINT8_C(162), UINT8_C( 98),
+                             UINT8_C(196), UINT8_C(135), UINT8_C(  7), UINT8_C(172),
+                             UINT8_C(233), UINT8_C( 47), UINT8_C(151), UINT8_C(128)),
+      simde_x_mm256_set_epu8(UINT8_C(119), UINT8_C(122), UINT8_C(148), UINT8_C(191),
+                             UINT8_C(104), UINT8_C(119), UINT8_C(179), UINT8_C(177),
+                             UINT8_C(196), UINT8_C(109), UINT8_C(132), UINT8_C(119),
+                             UINT8_C( 56), UINT8_C(104), UINT8_C(124), UINT8_C( 52),
+                             UINT8_C( 96), UINT8_C(113), UINT8_C( 64), UINT8_C( 66),
+                             UINT8_C(203), UINT8_C( 53), UINT8_C(156), UINT8_C(151),
+                             UINT8_C(204), UINT8_C(122), UINT8_C( 62), UINT8_C(195),
+                             UINT8_C(168), UINT8_C(120), UINT8_C(187), UINT8_C(113)) },
+    { simde_x_mm256_set_epu8(UINT8_C(182), UINT8_C(141), UINT8_C( 93), UINT8_C( 91),
+                             UINT8_C(  0), UINT8_C(189), UINT8_C(215), UINT8_C(221),
+                             UINT8_C(105), UINT8_C(231), UINT8_C( 61), UINT8_C(224),
+                             UINT8_C( 68), UINT8_C( 84), UINT8_C(247), UINT8_C(215),
+                             UINT8_C(125), UINT8_C(197), UINT8_C( 69), UINT8_C(102),
+                             UINT8_C(218), UINT8_C(120), UINT8_C(113), UINT8_C(175),
+                             UINT8_C(134), UINT8_C( 33), UINT8_C(106), UINT8_C(117),
+                             UINT8_C(129), UINT8_C(249), UINT8_C(194), UINT8_C( 70)),
+      simde_x_mm256_set_epu8(UINT8_C(137), UINT8_C( 27), UINT8_C( 17), UINT8_C( 94),
+                             UINT8_C(244), UINT8_C(142), UINT8_C(142), UINT8_C( 48),
+                             UINT8_C( 54), UINT8_C(217), UINT8_C(209), UINT8_C(122),
+                             UINT8_C(  1), UINT8_C(190), UINT8_C( 59), UINT8_C(250),
+                             UINT8_C(179), UINT8_C(176), UINT8_C(167), UINT8_C( 57),
+                             UINT8_C( 90), UINT8_C( 15), UINT8_C( 31), UINT8_C(140),
+                             UINT8_C(209), UINT8_C(104), UINT8_C(139), UINT8_C(154),
+                             UINT8_C( 57), UINT8_C(248), UINT8_C(225), UINT8_C( 65)),
+      simde_x_mm256_set_epu8(UINT8_C(160), UINT8_C( 84), UINT8_C( 55), UINT8_C( 93),
+                             UINT8_C(122), UINT8_C(166), UINT8_C(179), UINT8_C(135),
+                             UINT8_C( 80), UINT8_C(224), UINT8_C(135), UINT8_C(173),
+                             UINT8_C( 35), UINT8_C(137), UINT8_C(153), UINT8_C(233),
+                             UINT8_C(152), UINT8_C(187), UINT8_C(118), UINT8_C( 80),
+                             UINT8_C(154), UINT8_C( 68), UINT8_C( 72), UINT8_C(158),
+                             UINT8_C(172), UINT8_C( 69), UINT8_C(123), UINT8_C(136),
+                             UINT8_C( 93), UINT8_C(249), UINT8_C(210), UINT8_C( 68)) },
+    { simde_x_mm256_set_epu8(UINT8_C(125), UINT8_C(242), UINT8_C( 34), UINT8_C(120),
+                             UINT8_C(106), UINT8_C(202), UINT8_C(100), UINT8_C( 61),
+                             UINT8_C(105), UINT8_C(145), UINT8_C( 46), UINT8_C(129),
+                             UINT8_C(208), UINT8_C( 57), UINT8_C( 82), UINT8_C( 21),
+                             UINT8_C( 59), UINT8_C( 97), UINT8_C(206), UINT8_C(  4),
+                             UINT8_C(182), UINT8_C( 81), UINT8_C(203), UINT8_C(252),
+                             UINT8_C(111), UINT8_C( 28), UINT8_C(210), UINT8_C( 57),
+                             UINT8_C(214), UINT8_C(124), UINT8_C(137), UINT8_C(114)),
+      simde_x_mm256_set_epu8(UINT8_C(208), UINT8_C( 60), UINT8_C( 51), UINT8_C( 61),
+                             UINT8_C(249), UINT8_C(203), UINT8_C( 69), UINT8_C(195),
+                             UINT8_C( 16), UINT8_C( 67), UINT8_C(241), UINT8_C(244),
+                             UINT8_C(217), UINT8_C(201), UINT8_C(104), UINT8_C( 80),
+                             UINT8_C( 30), UINT8_C( 40), UINT8_C( 69), UINT8_C( 88),
+                             UINT8_C( 83), UINT8_C(141), UINT8_C(221), UINT8_C(174),
+                             UINT8_C(165), UINT8_C(114), UINT8_C(107), UINT8_C( 42),
+                             UINT8_C( 83), UINT8_C(  1), UINT8_C( 95), UINT8_C( 89)),
+      simde_x_mm256_set_epu8(UINT8_C(167), UINT8_C(151), UINT8_C( 43), UINT8_C( 91),
+                             UINT8_C(178), UINT8_C(203), UINT8_C( 85), UINT8_C(128),
+                             UINT8_C( 61), UINT8_C(106), UINT8_C(144), UINT8_C(187),
+                             UINT8_C(213), UINT8_C(129), UINT8_C( 93), UINT8_C( 51),
+                             UINT8_C( 45), UINT8_C( 69), UINT8_C(138), UINT8_C( 46),
+                             UINT8_C(133), UINT8_C(111), UINT8_C(212), UINT8_C(213),
+                             UINT8_C(138), UINT8_C( 71), UINT8_C(159), UINT8_C( 50),
+                             UINT8_C(149), UINT8_C( 63), UINT8_C(116), UINT8_C(102)) },
+    { simde_x_mm256_set_epu8(UINT8_C( 59), UINT8_C(202), UINT8_C( 28), UINT8_C( 65),
+                             UINT8_C( 60), UINT8_C( 92), UINT8_C(112), UINT8_C(105),
+                             UINT8_C(229), UINT8_C(116), UINT8_C(242), UINT8_C(217),
+                             UINT8_C(203), UINT8_C( 71), UINT8_C( 15), UINT8_C(143),
+                             UINT8_C( 58), UINT8_C(228), UINT8_C( 36), UINT8_C(154),
+                             UINT8_C( 96), UINT8_C(  2), UINT8_C( 86), UINT8_C( 36),
+                             UINT8_C( 93), UINT8_C( 29), UINT8_C( 70), UINT8_C( 20),
+                             UINT8_C(130), UINT8_C(172), UINT8_C(152), UINT8_C(189)),
+      simde_x_mm256_set_epu8(UINT8_C(100), UINT8_C( 42), UINT8_C( 77), UINT8_C( 21),
+                             UINT8_C(144), UINT8_C(197), UINT8_C(242), UINT8_C(243),
+                             UINT8_C(205), UINT8_C(204), UINT8_C( 75), UINT8_C(102),
+                             UINT8_C( 21), UINT8_C(148), UINT8_C( 70), UINT8_C(128),
+                             UINT8_C( 95), UINT8_C(147), UINT8_C( 39), UINT8_C(190),
+                             UINT8_C( 20), UINT8_C(128), UINT8_C(196), UINT8_C(160),
+                             UINT8_C(  8), UINT8_C(206), UINT8_C( 13), UINT8_C(197),
+                             UINT8_C( 93), UINT8_C(253), UINT8_C( 16), UINT8_C( 27)),
+      simde_x_mm256_set_epu8(UINT8_C( 80), UINT8_C(122), UINT8_C( 53), UINT8_C( 43),
+                             UINT8_C(102), UINT8_C(145), UINT8_C(177), UINT8_C(174),
+                             UINT8_C(217), UINT8_C(160), UINT8_C(159), UINT8_C(160),
+                             UINT8_C(112), UINT8_C(110), UINT8_C( 43), UINT8_C(136),
+                             UINT8_C( 77), UINT8_C(188), UINT8_C( 38), UINT8_C(172),
+                             UINT8_C( 58), UINT8_C( 65), UINT8_C(141), UINT8_C( 98),
+                             UINT8_C( 51), UINT8_C(118), UINT8_C( 42), UINT8_C(109),
+                             UINT8_C(112), UINT8_C(213), UINT8_C( 84), UINT8_C(108)) },
+    { simde_x_mm256_set_epu8(UINT8_C( 75), UINT8_C( 17), UINT8_C(162), UINT8_C( 64),
+                             UINT8_C(129), UINT8_C(250), UINT8_C(112), UINT8_C(166),
+                             UINT8_C( 98), UINT8_C(126), UINT8_C(129), UINT8_C(211),
+                             UINT8_C( 27), UINT8_C( 12), UINT8_C(183), UINT8_C(140),
+                             UINT8_C(106), UINT8_C(255), UINT8_C(252), UINT8_C(224),
+                             UINT8_C(116), UINT8_C(208), UINT8_C( 69), UINT8_C(  4),
+                             UINT8_C(193), UINT8_C( 46), UINT8_C(111), UINT8_C( 96),
+                             UINT8_C(101), UINT8_C(183), UINT8_C( 99), UINT8_C( 60)),
+      simde_x_mm256_set_epu8(UINT8_C( 48), UINT8_C( 27), UINT8_C(253), UINT8_C(118),
+                             UINT8_C(225), UINT8_C(134), UINT8_C(250), UINT8_C(133),
+                             UINT8_C( 52), UINT8_C( 47), UINT8_C( 27), UINT8_C(213),
+                             UINT8_C( 28), UINT8_C(208), UINT8_C( 73), UINT8_C( 89),
+                             UINT8_C( 76), UINT8_C(160), UINT8_C( 57), UINT8_C(191),
+                             UINT8_C( 34), UINT8_C(121), UINT8_C(194), UINT8_C(205),
+                             UINT8_C(102), UINT8_C(106), UINT8_C(175), UINT8_C(219),
+                             UINT8_C(174), UINT8_C(128), UINT8_C(137), UINT8_C(235)),
+      simde_x_mm256_set_epu8(UINT8_C( 62), UINT8_C( 22), UINT8_C(208), UINT8_C( 91),
+                             UINT8_C(177), UINT8_C(192), UINT8_C(181), UINT8_C(150),
+                             UINT8_C( 75), UINT8_C( 87), UINT8_C( 78), UINT8_C(212),
+                             UINT8_C( 28), UINT8_C(110), UINT8_C(128), UINT8_C(115),
+                             UINT8_C( 91), UINT8_C(208), UINT8_C(155), UINT8_C(208),
+                             UINT8_C( 75), UINT8_C(165), UINT8_C(132), UINT8_C(105),
+                             UINT8_C(148), UINT8_C( 76), UINT8_C(143), UINT8_C(158),
+                             UINT8_C(138), UINT8_C(156), UINT8_C(118), UINT8_C(148)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_avg_epu8(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_u8(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm256_avg_epu16(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_x_mm256_set_epu16(UINT16_C( 33977), UINT16_C(  4075), UINT16_C( 26120), UINT16_C( 61365),
+                              UINT16_C( 20891), UINT16_C( 60607), UINT16_C( 34113), UINT16_C( 18728),
+                              UINT16_C( 46422), UINT16_C( 18901), UINT16_C( 21788), UINT16_C( 13793),
+                              UINT16_C( 55681), UINT16_C( 17591), UINT16_C( 59483), UINT16_C(  1153)),
+      simde_x_mm256_set_epu16(UINT16_C( 51090), UINT16_C( 18817), UINT16_C( 13598), UINT16_C( 45820),
+                              UINT16_C( 32044), UINT16_C( 17235), UINT16_C( 13391), UINT16_C( 61302),
+                              UINT16_C( 25625), UINT16_C( 19022), UINT16_C( 23185), UINT16_C( 30419),
+                              UINT16_C(  7455), UINT16_C( 32821), UINT16_C(  5103), UINT16_C( 46444)),
+      simde_x_mm256_set_epu16(UINT16_C( 42534), UINT16_C( 11446), UINT16_C( 19859), UINT16_C( 53593),
+                              UINT16_C( 26468), UINT16_C( 38921), UINT16_C( 23752), UINT16_C( 40015),
+                              UINT16_C( 36024), UINT16_C( 18962), UINT16_C( 22487), UINT16_C( 22106),
+                              UINT16_C( 31568), UINT16_C( 25206), UINT16_C( 32293), UINT16_C( 23799)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 53641), UINT16_C( 40905), UINT16_C( 36475), UINT16_C( 59858),
+                              UINT16_C( 46081), UINT16_C(  4869), UINT16_C(  9163), UINT16_C( 48410),
+                              UINT16_C( 39308), UINT16_C( 16016), UINT16_C(  5245), UINT16_C( 47493),
+                              UINT16_C( 23283), UINT16_C(  4725), UINT16_C( 26334), UINT16_C(  6924)),
+      simde_x_mm256_set_epu16(UINT16_C( 51610), UINT16_C( 19346), UINT16_C( 21642), UINT16_C( 28178),
+                              UINT16_C( 36499), UINT16_C( 49283), UINT16_C( 38888), UINT16_C( 45064),
+                              UINT16_C( 34075), UINT16_C( 13347), UINT16_C( 29332), UINT16_C( 60793),
+                              UINT16_C( 53601), UINT16_C( 62027), UINT16_C( 49726), UINT16_C( 62166)),
+      simde_x_mm256_set_epu16(UINT16_C( 52626), UINT16_C( 30126), UINT16_C( 29059), UINT16_C( 44018),
+                              UINT16_C( 41290), UINT16_C( 27076), UINT16_C( 24026), UINT16_C( 46737),
+                              UINT16_C( 36692), UINT16_C( 14682), UINT16_C( 17289), UINT16_C( 54143),
+                              UINT16_C( 38442), UINT16_C( 33376), UINT16_C( 38030), UINT16_C( 34545)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 57168), UINT16_C( 24377), UINT16_C( 44302), UINT16_C( 19279),
+                              UINT16_C( 52773), UINT16_C( 32225), UINT16_C( 44153), UINT16_C( 11257),
+                              UINT16_C( 31235), UINT16_C(  6604), UINT16_C( 39200), UINT16_C( 62778),
+                              UINT16_C( 54132), UINT16_C( 22500), UINT16_C( 35787), UINT16_C( 27352)),
+      simde_x_mm256_set_epu16(UINT16_C(  4777), UINT16_C( 11876), UINT16_C( 62982), UINT16_C( 53472),
+                              UINT16_C( 34552), UINT16_C( 23283), UINT16_C( 24536), UINT16_C( 59637),
+                              UINT16_C(  9078), UINT16_C(  5999), UINT16_C( 35073), UINT16_C( 11220),
+                              UINT16_C( 36751), UINT16_C( 27378), UINT16_C( 48206), UINT16_C( 24113)),
+      simde_x_mm256_set_epu16(UINT16_C( 30973), UINT16_C( 18127), UINT16_C( 53642), UINT16_C( 36376),
+                              UINT16_C( 43663), UINT16_C( 27754), UINT16_C( 34345), UINT16_C( 35447),
+                              UINT16_C( 20157), UINT16_C(  6302), UINT16_C( 37137), UINT16_C( 36999),
+                              UINT16_C( 45442), UINT16_C( 24939), UINT16_C( 41997), UINT16_C( 25733)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 47667), UINT16_C( 42655), UINT16_C( 15805), UINT16_C( 38044),
+                              UINT16_C( 51003), UINT16_C( 54805), UINT16_C( 23576), UINT16_C(  8993),
+                              UINT16_C(  7045), UINT16_C(  2418), UINT16_C( 43598), UINT16_C( 38347),
+                              UINT16_C( 54380), UINT16_C( 29913), UINT16_C( 26304), UINT16_C( 57186)),
+      simde_x_mm256_set_epu16(UINT16_C( 13249), UINT16_C( 33246), UINT16_C( 37681), UINT16_C( 53958),
+                              UINT16_C( 49310), UINT16_C( 12761), UINT16_C(  5303), UINT16_C( 54599),
+                              UINT16_C( 42076), UINT16_C( 30225), UINT16_C( 60443), UINT16_C( 41570),
+                              UINT16_C( 50311), UINT16_C(  1964), UINT16_C( 59695), UINT16_C( 38784)),
+      simde_x_mm256_set_epu16(UINT16_C( 30458), UINT16_C( 37951), UINT16_C( 26743), UINT16_C( 46001),
+                              UINT16_C( 50157), UINT16_C( 33783), UINT16_C( 14440), UINT16_C( 31796),
+                              UINT16_C( 24561), UINT16_C( 16322), UINT16_C( 52021), UINT16_C( 39959),
+                              UINT16_C( 52346), UINT16_C( 15939), UINT16_C( 43000), UINT16_C( 47985)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 46733), UINT16_C( 23899), UINT16_C(   189), UINT16_C( 55261),
+                              UINT16_C( 27111), UINT16_C( 15840), UINT16_C( 17492), UINT16_C( 63447),
+                              UINT16_C( 32197), UINT16_C( 17766), UINT16_C( 55928), UINT16_C( 29103),
+                              UINT16_C( 34337), UINT16_C( 27253), UINT16_C( 33273), UINT16_C( 49734)),
+      simde_x_mm256_set_epu16(UINT16_C( 35099), UINT16_C(  4446), UINT16_C( 62606), UINT16_C( 36400),
+                              UINT16_C( 14041), UINT16_C( 53626), UINT16_C(   446), UINT16_C( 15354),
+                              UINT16_C( 46000), UINT16_C( 42809), UINT16_C( 23055), UINT16_C(  8076),
+                              UINT16_C( 53608), UINT16_C( 35738), UINT16_C( 14840), UINT16_C( 57665)),
+      simde_x_mm256_set_epu16(UINT16_C( 40916), UINT16_C( 14173), UINT16_C( 31398), UINT16_C( 45831),
+                              UINT16_C( 20576), UINT16_C( 34733), UINT16_C(  8969), UINT16_C( 39401),
+                              UINT16_C( 39099), UINT16_C( 30288), UINT16_C( 39492), UINT16_C( 18590),
+                              UINT16_C( 43973), UINT16_C( 31496), UINT16_C( 24057), UINT16_C( 53700)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 32242), UINT16_C(  8824), UINT16_C( 27338), UINT16_C( 25661),
+                              UINT16_C( 27025), UINT16_C( 11905), UINT16_C( 53305), UINT16_C( 21013),
+                              UINT16_C( 15201), UINT16_C( 52740), UINT16_C( 46673), UINT16_C( 52220),
+                              UINT16_C( 28444), UINT16_C( 53817), UINT16_C( 54908), UINT16_C( 35186)),
+      simde_x_mm256_set_epu16(UINT16_C( 53308), UINT16_C( 13117), UINT16_C( 63947), UINT16_C( 17859),
+                              UINT16_C(  4163), UINT16_C( 61940), UINT16_C( 55753), UINT16_C( 26704),
+                              UINT16_C(  7720), UINT16_C( 17752), UINT16_C( 21389), UINT16_C( 56750),
+                              UINT16_C( 42354), UINT16_C( 27434), UINT16_C( 21249), UINT16_C( 24409)),
+      simde_x_mm256_set_epu16(UINT16_C( 42775), UINT16_C( 10971), UINT16_C( 45643), UINT16_C( 21760),
+                              UINT16_C( 15594), UINT16_C( 36923), UINT16_C( 54529), UINT16_C( 23859),
+                              UINT16_C( 11461), UINT16_C( 35246), UINT16_C( 34031), UINT16_C( 54485),
+                              UINT16_C( 35399), UINT16_C( 40626), UINT16_C( 38079), UINT16_C( 29798)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 15306), UINT16_C(  7233), UINT16_C( 15452), UINT16_C( 28777),
+                              UINT16_C( 58740), UINT16_C( 62169), UINT16_C( 52039), UINT16_C(  3983),
+                              UINT16_C( 15076), UINT16_C(  9370), UINT16_C( 24578), UINT16_C( 22052),
+                              UINT16_C( 23837), UINT16_C( 17940), UINT16_C( 33452), UINT16_C( 39101)),
+      simde_x_mm256_set_epu16(UINT16_C( 25642), UINT16_C( 19733), UINT16_C( 37061), UINT16_C( 62195),
+                              UINT16_C( 52684), UINT16_C( 19302), UINT16_C(  5524), UINT16_C( 18048),
+                              UINT16_C( 24467), UINT16_C( 10174), UINT16_C(  5248), UINT16_C( 50336),
+                              UINT16_C(  2254), UINT16_C(  3525), UINT16_C( 24061), UINT16_C(  4123)),
+      simde_x_mm256_set_epu16(UINT16_C( 20474), UINT16_C( 13483), UINT16_C( 26257), UINT16_C( 45486),
+                              UINT16_C( 55712), UINT16_C( 40736), UINT16_C( 28782), UINT16_C( 11016),
+                              UINT16_C( 19772), UINT16_C(  9772), UINT16_C( 14913), UINT16_C( 36194),
+                              UINT16_C( 13046), UINT16_C( 10733), UINT16_C( 28757), UINT16_C( 21612)) },
+    { simde_x_mm256_set_epu16(UINT16_C( 19217), UINT16_C( 41536), UINT16_C( 33274), UINT16_C( 28838),
+                              UINT16_C( 25214), UINT16_C( 33235), UINT16_C(  6924), UINT16_C( 46988),
+                              UINT16_C( 27391), UINT16_C( 64736), UINT16_C( 29904), UINT16_C( 17668),
+                              UINT16_C( 49454), UINT16_C( 28512), UINT16_C( 26039), UINT16_C( 25404)),
+      simde_x_mm256_set_epu16(UINT16_C( 12315), UINT16_C( 64886), UINT16_C( 57734), UINT16_C( 64133),
+                              UINT16_C( 13359), UINT16_C(  7125), UINT16_C(  7376), UINT16_C( 18777),
+                              UINT16_C( 19616), UINT16_C( 14783), UINT16_C(  8825), UINT16_C( 49869),
+                              UINT16_C( 26218), UINT16_C( 45019), UINT16_C( 44672), UINT16_C( 35307)),
+      simde_x_mm256_set_epu16(UINT16_C( 15766), UINT16_C( 53211), UINT16_C( 45504), UINT16_C( 46486),
+                              UINT16_C( 19287), UINT16_C( 20180), UINT16_C(  7150), UINT16_C( 32883),
+                              UINT16_C( 23504), UINT16_C( 39760), UINT16_C( 19365), UINT16_C( 33769),
+                              UINT16_C( 37836), UINT16_C( 36766), UINT16_C( 35356), UINT16_C( 30356)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_avg_epu16(test_vec[i].a, test_vec[i].b);
+    simde_assert_m256i_u16(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm256_blend_epi16(const MunitParameter params[], void* data) {
   (void) params;
@@ -6191,6 +6519,95 @@ test_simde_mm256_permute4x64_epi64(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm256_permute2x128_si256(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m256i a;
+    simde__m256i b;
+    simde__m256i r;
+  } test_vec[8] = {
+    { simde_mm256_set_epi64x(INT64_C( 9096692030846176105), INT64_C(  644260392039444522),
+                             INT64_C(-4583540275174352405), INT64_C(-6816753880857675259)),
+      simde_mm256_set_epi64x(INT64_C(-7886827988827131690), INT64_C(-2107575233125845054),
+                             INT64_C(-8398644678734943287), INT64_C( 7172114359254607016)),
+      simde_mm256_set_epi64x(INT64_C( 9096692030846176105), INT64_C(  644260392039444522),
+                             INT64_C(-7886827988827131690), INT64_C(-2107575233125845054)) },
+    { simde_mm256_set_epi64x(INT64_C(-6314946612387904819), INT64_C(-1883921203594067636),
+                             INT64_C(-5030251871897832411), INT64_C( 8348493077761215789)),
+      simde_mm256_set_epi64x(INT64_C( 2582913386835954622), INT64_C(  153655168481379701),
+                             INT64_C(-1087064137492042031), INT64_C( 1783808851867973139)),
+      simde_mm256_set_epi64x(INT64_C(-6314946612387904819), INT64_C(-1883921203594067636),
+                             INT64_C( 2582913386835954622), INT64_C(  153655168481379701)) },
+    { simde_mm256_set_epi64x(INT64_C(-3208478614025680333), INT64_C(-3409066786741744502),
+                             INT64_C(-6957643043766269700), INT64_C( 3219428559958296960)),
+      simde_mm256_set_epi64x(INT64_C(-1736924574103250292), INT64_C(-2810347538827243748),
+                             INT64_C( -297965813118371045), INT64_C(-7618358042182251122)),
+      simde_mm256_set_epi64x(INT64_C(-3208478614025680333), INT64_C(-3409066786741744502),
+                             INT64_C(-1736924574103250292), INT64_C(-2810347538827243748)) },
+    { simde_mm256_set_epi64x(INT64_C(  796366024780064289), INT64_C(-1489690745108457074),
+                             INT64_C(-7990282097237082056), INT64_C(-7545130296515735090)),
+      simde_mm256_set_epi64x(INT64_C(  951803776889232332), INT64_C(-6640461449591045668),
+                             INT64_C( 5271740244822761531), INT64_C( 3149915688837762175)),
+      simde_mm256_set_epi64x(INT64_C(  796366024780064289), INT64_C(-1489690745108457074),
+                             INT64_C(  951803776889232332), INT64_C(-6640461449591045668)) },
+    { simde_mm256_set_epi64x(INT64_C(  -95120238103258498), INT64_C(-1762353908339260045),
+                             INT64_C( 6992845328844002662), INT64_C(-5939283762406250642)),
+      simde_mm256_set_epi64x(INT64_C(-5885001620821736092), INT64_C(-6745062192544323367),
+                             INT64_C( 7803931770148523943), INT64_C(-8993062880293478576)),
+      simde_mm256_set_epi64x(INT64_C(  -95120238103258498), INT64_C(-1762353908339260045),
+                             INT64_C(-5885001620821736092), INT64_C(-6745062192544323367)) },
+    { simde_mm256_set_epi64x(INT64_C(-3708437875152674849), INT64_C( 8243162546537572005),
+                             INT64_C(-1103721052327437925), INT64_C(-2925489198757650175)),
+      simde_mm256_set_epi64x(INT64_C( 1440085788748654982), INT64_C( 1725906984156202179),
+                             INT64_C( 5845599904819452784), INT64_C( 7162548421658470679)),
+      simde_mm256_set_epi64x(INT64_C(-3708437875152674849), INT64_C( 8243162546537572005),
+                             INT64_C( 1440085788748654982), INT64_C( 1725906984156202179)) },
+    { simde_mm256_set_epi64x(INT64_C( 6834943649491098623), INT64_C( 2759372331225584008),
+                             INT64_C(-1659900994892419246), INT64_C(-2119655686628377164)),
+      simde_mm256_set_epi64x(INT64_C( 1954610004667753515), INT64_C( 5688482191974230934),
+                             INT64_C(-3937849964004809456), INT64_C(-8694088207381845200)),
+      simde_mm256_set_epi64x(INT64_C( 6834943649491098623), INT64_C( 2759372331225584008),
+                             INT64_C( 1954610004667753515), INT64_C( 5688482191974230934)) },
+    { simde_mm256_set_epi64x(INT64_C( 5051547726856501651), INT64_C( 7333908238294102632),
+                             INT64_C( 7118133466490521985), INT64_C( 6243950982549416292)),
+      simde_mm256_set_epi64x(INT64_C(-6805527145604381785), INT64_C(-1282569833996306134),
+                             INT64_C(-1497859500202369050), INT64_C( 1581543684384159070)),
+      simde_mm256_set_epi64x(INT64_C( 5051547726856501651), INT64_C( 7333908238294102632),
+                             INT64_C(-6805527145604381785), INT64_C(-1282569833996306134)) },
+  };
+
+  //printf("\n");
+  //for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+  //  simde__m256i_private a, b, r;
+
+  //  munit_rand_memory(sizeof(a), (uint8_t*) &a);
+  //  munit_rand_memory(sizeof(b), (uint8_t*) &b);
+
+  //  r = simde__m256i_to_private(simde_mm256_permute2x128_si256(simde__m256i_from_private(a), simde__m256i_from_private(b), 23));
+
+  //  printf("    { simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+  //         "                             INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
+  //         a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
+  //  printf("      simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+  //         "                             INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
+  //         b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
+  //  printf("      simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+  //         "                             INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n",
+  //         r.i64[3], r.i64[2], r.i64[1], r.i64[0]);
+  //}
+  //return MUNIT_FAIL;
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m256i r = simde_mm256_permute2x128_si256(test_vec[i].a, test_vec[i].b, 23);
+    simde_assert_m256i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm256_shuffle_epi8(const MunitParameter params[], void* data) {
   (void) params;
@@ -9246,6 +9663,9 @@ static MunitTest test_suite_tests[] = {
   SIMDE_TESTS_DEFINE_TEST(mm256_and_si256),
   SIMDE_TESTS_DEFINE_TEST(mm256_andnot_si256),
 
+  SIMDE_TESTS_DEFINE_TEST(mm256_avg_epu8),
+  SIMDE_TESTS_DEFINE_TEST(mm256_avg_epu16),
+
   SIMDE_TESTS_DEFINE_TEST(mm_blend_epi32),
   SIMDE_TESTS_DEFINE_TEST(mm256_blend_epi16),
   SIMDE_TESTS_DEFINE_TEST(mm256_blend_epi32),
@@ -9304,6 +9724,7 @@ static MunitTest test_suite_tests[] = {
   SIMDE_TESTS_DEFINE_TEST(mm256_packs_epi32),
 
   SIMDE_TESTS_DEFINE_TEST(mm256_permute4x64_epi64),
+  SIMDE_TESTS_DEFINE_TEST(mm256_permute2x128_si256),
 
   SIMDE_TESTS_DEFINE_TEST(mm256_shuffle_epi8),
   SIMDE_TESTS_DEFINE_TEST(mm256_shuffle_epi32),


=====================================
test/x86/avx512f.c
=====================================
The diff for this file was not included because it is too large.


View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/c2f16f4c4fd1b38c52e656b1cc4282bfa8253ebd

-- 
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/c2f16f4c4fd1b38c52e656b1cc4282bfa8253ebd
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200414/40727b90/attachment-0001.html>


More information about the debian-med-commit mailing list