[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200408

Michael R. Crusoe gitlab at salsa.debian.org
Wed Apr 8 14:44:20 BST 2020



Michael R. Crusoe pushed to branch upstream at Debian Med / simde


Commits:
181091ed by Michael R. Crusoe at 2020-04-08T15:20:59+02:00
New upstream version 0.0.0.git.20200408
- - - - -


8 changed files:

- .azure-pipelines.yml
- − .codecov.yml
- .github/workflows/ci.yml
- .travis.yml
- simde/x86/avx512f.h
- test/x86/avx512f.c
- test/x86/skel.c
- test/x86/sse.c


Changes:

=====================================
.azure-pipelines.yml
=====================================
@@ -13,8 +13,6 @@ jobs:
     displayName: Build
   - script: ./run-tests
     displayName: Tests
-  - script: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
-    displayName: 'Upload to codecov.io'
 
 - job: sse4_1
   steps:
@@ -26,8 +24,6 @@ jobs:
     displayName: Build
   - script: ./run-tests
     displayName: Tests
-  - script: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
-    displayName: 'Upload to codecov.io'
 
 - job: mipsel
   steps:
@@ -44,5 +40,3 @@ jobs:
     displayName: Build
   - script: ./dockcross-linux-mipsel ./run-tests
     displayName: Tests
-  - script: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
-    displayName: 'Upload to codecov.io'


=====================================
.codecov.yml deleted
=====================================
@@ -1,9 +0,0 @@
-coverage:
-  status:
-    project:
-      default:
-        if_no_uploads: success
-
-ignore:
-- test/**/*
-- test/*


=====================================
.github/workflows/ci.yml
=====================================
@@ -18,11 +18,6 @@ jobs:
       run: cmake --build .
     - name: Test
       run: ./run-tests
-    - uses: codecov/codecov-action at v1
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        yml: ./.codecov.yml
-        fail_ci_if_error: false
 
   # emscripten:
   #   runs-on: ubuntu-latest
@@ -40,6 +35,4 @@ jobs:
   #   - name: Build
   #     run: emsdk/upstream/emscripten/emmake ninja
   #   - name: Test
-  #     run: node ./run-tests
-  #   - name: Upload Coverage Data
-  #     run: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
+  #     run: node ./run-tests=


=====================================
.travis.yml
=====================================
@@ -292,11 +292,5 @@ script:
     fi
   fi
 
-after_success:
-- |
-  if [ "${BUILD_TYPE}" = "Coverage" ]; then
-    bash <(curl -s https://codecov.io/bash) -y "${TRAVIS_BUILD_DIR}"/.codecov.yml ${GCOV}
-  fi
-
 notifications:
   email: false


=====================================
simde/x86/avx512f.h
=====================================
@@ -1661,7 +1661,7 @@ simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) {
     r_.i32f = a_.i32f ^ b_.i32f;
   #else
     SIMDE__VECTORIZE
-    for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+    for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
       r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
     }
   #endif
@@ -2126,6 +2126,19 @@ simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
 #  define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_sub_epi32(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512i
 simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
@@ -2153,6 +2166,19 @@ simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
 #  define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_sub_epi64(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512
 simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
@@ -2180,6 +2206,19 @@ simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
 #  define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_sub_ps(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__m512d
 simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
@@ -2207,6 +2246,19 @@ simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
 #  define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b)
 #endif
 
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+  return _mm512_mask_sub_pd(src, k, a, b);
+#else
+  return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+#  define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
+#endif
+
 SIMDE__FUNCTION_ATTRIBUTES
 simde__mmask16
 simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {


=====================================
test/x86/avx512f.c
=====================================
@@ -2771,6 +2771,638 @@ test_simde_mm512_sub_pd(const MunitParameter params[], void* data) {
   return MUNIT_OK;
 }
 
+static MunitResult
+test_simde_mm512_mask_sub_epi32(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i src;
+    simde__mmask16 k;
+    simde__m512i a;
+    simde__m512i b;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm512_set_epi32(INT32_C( -957186609), INT32_C(-1524765283), INT32_C( 1290068568), INT32_C( 1887468775),
+                            INT32_C( -904096999), INT32_C(-1189693212), INT32_C(  221355870), INT32_C(-1952779315),
+                            INT32_C( 1347985035), INT32_C(-2063939133), INT32_C(-1602582649), INT32_C(-2096850611),
+                            INT32_C(-2084994527), INT32_C(  -75386963), INT32_C( 1835417512), INT32_C(-2072964471)),
+      UINT16_C(35396),
+      simde_mm512_set_epi32(INT32_C(  136551409), INT32_C( 1192962314), INT32_C( 2058621765), INT32_C(-2039270859),
+                            INT32_C(  -26254502), INT32_C(  733381108), INT32_C( -187934344), INT32_C(  989979336),
+                            INT32_C(-1964919382), INT32_C(  126554293), INT32_C(  254011928), INT32_C( 1490517506),
+                            INT32_C(-1065486850), INT32_C(   45941921), INT32_C(-1082899768), INT32_C( -219628031)),
+      simde_mm512_set_epi32(INT32_C( -680185335), INT32_C(  111102276), INT32_C( 1222454066), INT32_C( -422241261),
+                            INT32_C(  -78061198), INT32_C(-2084414007), INT32_C( 1367041146), INT32_C(-1471398421),
+                            INT32_C( -348147705), INT32_C(  673564238), INT32_C(-1457376577), INT32_C(  613875036),
+                            INT32_C( -859069431), INT32_C( -733638834), INT32_C(-1673403701), INT32_C(  842474288)),
+      simde_mm512_set_epi32(INT32_C(  816736744), INT32_C(-1524765283), INT32_C( 1290068568), INT32_C( 1887468775),
+                            INT32_C(   51806696), INT32_C(-1189693212), INT32_C(-1554975490), INT32_C(-1952779315),
+                            INT32_C( 1347985035), INT32_C( -547009945), INT32_C(-1602582649), INT32_C(-2096850611),
+                            INT32_C(-2084994527), INT32_C(  779580755), INT32_C( 1835417512), INT32_C(-2072964471)) },
+    { simde_mm512_set_epi32(INT32_C( 2077489237), INT32_C(-2066152618), INT32_C( -825076901), INT32_C(-1372574642),
+                            INT32_C( -889460158), INT32_C(  498921453), INT32_C(  943332338), INT32_C(-1383811831),
+                            INT32_C( 1827152592), INT32_C( 1728034912), INT32_C( 1350913629), INT32_C(  868578809),
+                            INT32_C( 1368636899), INT32_C( -389235219), INT32_C(  602990700), INT32_C( -400551366)),
+      UINT16_C(47779),
+      simde_mm512_set_epi32(INT32_C( 1704357216), INT32_C( -538157327), INT32_C( 1370875608), INT32_C( 1508504457),
+                            INT32_C(  -68294915), INT32_C(-1479685367), INT32_C( -615597542), INT32_C(-1638887359),
+                            INT32_C(-1417912572), INT32_C( 1479002949), INT32_C( -647118153), INT32_C( 1670566025),
+                            INT32_C(-1880268561), INT32_C(-1083232065), INT32_C( 2092339698), INT32_C(-1021873283)),
+      simde_mm512_set_epi32(INT32_C( -839277498), INT32_C(  551588590), INT32_C( 1834572496), INT32_C( 1613035598),
+                            INT32_C(-1678404828), INT32_C(-1769391216), INT32_C(-1638931514), INT32_C(  156804649),
+                            INT32_C( 1764158657), INT32_C( -132604621), INT32_C(  446542816), INT32_C( 2037189710),
+                            INT32_C(  109296986), INT32_C(  257019297), INT32_C(  473079611), INT32_C( 1127076998)),
+      simde_mm512_set_epi32(INT32_C(-1751332582), INT32_C(-2066152618), INT32_C( -463696888), INT32_C( -104531141),
+                            INT32_C( 1610109913), INT32_C(  498921453), INT32_C( 1023333972), INT32_C(-1383811831),
+                            INT32_C( 1112896067), INT32_C( 1728034912), INT32_C(-1093660969), INT32_C(  868578809),
+                            INT32_C( 1368636899), INT32_C( -389235219), INT32_C( 1619260087), INT32_C( 2146017015)) },
+    { simde_mm512_set_epi32(INT32_C(  307630641), INT32_C(-1560148595), INT32_C(  376284729), INT32_C(  278591183),
+                            INT32_C( -277186219), INT32_C( 1940926671), INT32_C(  662058232), INT32_C( 1091202812),
+                            INT32_C( -701136301), INT32_C( -504607320), INT32_C( -251380880), INT32_C( 1860616049),
+                            INT32_C(-1752161866), INT32_C(-1199997313), INT32_C(-1668691262), INT32_C( 1717921298)),
+      UINT16_C( 2459),
+      simde_mm512_set_epi32(INT32_C( 2079917891), INT32_C(-1199015072), INT32_C(  -98602729), INT32_C( -930567988),
+                            INT32_C(-1256209763), INT32_C( 1068967165), INT32_C( 1289079409), INT32_C( 1251085533),
+                            INT32_C( -727360546), INT32_C(-1724797341), INT32_C( 2093813635), INT32_C( 1051617285),
+                            INT32_C( 1264716001), INT32_C(  940727836), INT32_C( 1722577424), INT32_C(-1275657732)),
+      simde_mm512_set_epi32(INT32_C(  671797033), INT32_C(-1012795446), INT32_C( 2106088193), INT32_C( -458612579),
+                            INT32_C( -261772865), INT32_C( -550994046), INT32_C( 2105186719), INT32_C( 1074097751),
+                            INT32_C(-1251411324), INT32_C(   65867416), INT32_C(-1495248139), INT32_C(  315553116),
+                            INT32_C(-1869712369), INT32_C(-1246794510), INT32_C( 1218370652), INT32_C( -240388126)),
+      simde_mm512_set_epi32(INT32_C(  307630641), INT32_C(-1560148595), INT32_C(  376284729), INT32_C(  278591183),
+                            INT32_C( -994436898), INT32_C( 1940926671), INT32_C(  662058232), INT32_C(  176987782),
+                            INT32_C(  524050778), INT32_C( -504607320), INT32_C( -251380880), INT32_C(  736064169),
+                            INT32_C(-1160538926), INT32_C(-1199997313), INT32_C(  504206772), INT32_C(-1035269606)) },
+    { simde_mm512_set_epi32(INT32_C( -789716549), INT32_C(-1932674309), INT32_C(  548470804), INT32_C( -318652401),
+                            INT32_C(-2041118423), INT32_C(-2107945718), INT32_C( -715661009), INT32_C( 1609073505),
+                            INT32_C( 1214609500), INT32_C(  283085327), INT32_C(-1633515677), INT32_C( 1697029857),
+                            INT32_C( 1976447422), INT32_C(  904412076), INT32_C( 1198927422), INT32_C(-1498026761)),
+      UINT16_C(54315),
+      simde_mm512_set_epi32(INT32_C( 1385182319), INT32_C(  795273310), INT32_C( 1955628796), INT32_C( -526907127),
+                            INT32_C(-2141025282), INT32_C( -931446405), INT32_C(-1422139726), INT32_C(-1101084337),
+                            INT32_C( -254080461), INT32_C( -595291883), INT32_C( 1292692652), INT32_C(-1849951866),
+                            INT32_C( -815091127), INT32_C(  370112774), INT32_C( -520479179), INT32_C( 1681391452)),
+      simde_mm512_set_epi32(INT32_C(-1825216267), INT32_C( 1555513845), INT32_C(-2081576252), INT32_C(-1972081268),
+                            INT32_C( -563427058), INT32_C( 1922040193), INT32_C(-2102270715), INT32_C(-1257264155),
+                            INT32_C( -894851768), INT32_C( 1793334666), INT32_C( 1049305530), INT32_C(-1935379009),
+                            INT32_C(   -8279361), INT32_C(-1567490719), INT32_C(-2014130513), INT32_C(-1826154506)),
+      simde_mm512_set_epi32(INT32_C(-1084568710), INT32_C( -760240535), INT32_C(  548470804), INT32_C( 1445174141),
+                            INT32_C(-2041118423), INT32_C( 1441480698), INT32_C( -715661009), INT32_C( 1609073505),
+                            INT32_C( 1214609500), INT32_C(  283085327), INT32_C(  243387122), INT32_C( 1697029857),
+                            INT32_C( -806811766), INT32_C(  904412076), INT32_C( 1493651334), INT32_C( -787421338)) },
+    { simde_mm512_set_epi32(INT32_C(  997407681), INT32_C(  -83308341), INT32_C( 1430458288), INT32_C( -655910274),
+                            INT32_C(   17159218), INT32_C(  197891822), INT32_C(  -82165524), INT32_C(   98130061),
+                            INT32_C( -696255503), INT32_C(  616388941), INT32_C( 1383637516), INT32_C(  255219509),
+                            INT32_C(-1280964183), INT32_C(-1753221031), INT32_C(  480974923), INT32_C(-1444611560)),
+      UINT16_C(47568),
+      simde_mm512_set_epi32(INT32_C(-1796791424), INT32_C(  919413682), INT32_C(  907613991), INT32_C(-1471064632),
+                            INT32_C(-2017464794), INT32_C(  -67778959), INT32_C(-1033884668), INT32_C( -839095279),
+                            INT32_C( -881742684), INT32_C( 1193890045), INT32_C( -817450648), INT32_C( -450889209),
+                            INT32_C(-1829442769), INT32_C( -254239276), INT32_C( 1531184539), INT32_C(  204100550)),
+      simde_mm512_set_epi32(INT32_C(-1574624316), INT32_C( 1965632168), INT32_C( -507137262), INT32_C(  868285762),
+                            INT32_C( -287712967), INT32_C(-1275855491), INT32_C(-1948986373), INT32_C(  378189270),
+                            INT32_C( 2028975029), INT32_C( -983819985), INT32_C(-1530834794), INT32_C( -267906659),
+                            INT32_C( 2013371063), INT32_C( -972550977), INT32_C(-1345658151), INT32_C(-2001069348)),
+      simde_mm512_set_epi32(INT32_C( -222167108), INT32_C(  -83308341), INT32_C( 1414751253), INT32_C( 1955616902),
+                            INT32_C(-1729751827), INT32_C(  197891822), INT32_C(  -82165524), INT32_C(-1217284549),
+                            INT32_C( 1384249583), INT32_C(-2117257266), INT32_C( 1383637516), INT32_C( -182982550),
+                            INT32_C(-1280964183), INT32_C(-1753221031), INT32_C(  480974923), INT32_C(-1444611560)) },
+    { simde_mm512_set_epi32(INT32_C( 1875288432), INT32_C( 1158027251), INT32_C( -303056299), INT32_C( -939396673),
+                            INT32_C( 1585003262), INT32_C( 1365783459), INT32_C(  111845672), INT32_C(-1286713478),
+                            INT32_C(  674624782), INT32_C( 2020528740), INT32_C(  497192398), INT32_C( 1112540789),
+                            INT32_C(-1764167278), INT32_C(-1540772359), INT32_C(  395629026), INT32_C(  984304916)),
+      UINT16_C(16877),
+      simde_mm512_set_epi32(INT32_C( -344292944), INT32_C( 1968428151), INT32_C( 2086978939), INT32_C( 1501910543),
+                            INT32_C(-1262393002), INT32_C( 2081469023), INT32_C( 2016768793), INT32_C( 1922434397),
+                            INT32_C( -253304624), INT32_C(  515280842), INT32_C(-1708348294), INT32_C( 2107558843),
+                            INT32_C( 1919035054), INT32_C( 1742835915), INT32_C(  989439209), INT32_C( 2080310116)),
+      simde_mm512_set_epi32(INT32_C( 1560352883), INT32_C( -937050525), INT32_C(   15000953), INT32_C(  298895006),
+                            INT32_C( -255287325), INT32_C( -851082971), INT32_C( -981170631), INT32_C(   30364523),
+                            INT32_C( -626854551), INT32_C( 1776719697), INT32_C(-1286673883), INT32_C( 2134458392),
+                            INT32_C(-1884377437), INT32_C(-2042525337), INT32_C( 2143156805), INT32_C(-1045267304)),
+      simde_mm512_set_epi32(INT32_C( 1875288432), INT32_C(-1389488620), INT32_C( -303056299), INT32_C( -939396673),
+                            INT32_C( 1585003262), INT32_C( 1365783459), INT32_C(  111845672), INT32_C( 1892069874),
+                            INT32_C(  373549927), INT32_C(-1261438855), INT32_C( -421674411), INT32_C( 1112540789),
+                            INT32_C( -491554805), INT32_C( -509606044), INT32_C(  395629026), INT32_C(-1169389876)) },
+    { simde_mm512_set_epi32(INT32_C(  726531409), INT32_C( -606374582), INT32_C(-1057918709), INT32_C( -811736744),
+                            INT32_C(-1460245574), INT32_C( -627872087), INT32_C( 1799586442), INT32_C(-1105519928),
+                            INT32_C(-1288829692), INT32_C(-2144392739), INT32_C( 1110910857), INT32_C( -282270116),
+                            INT32_C(-1420141426), INT32_C( 1682561587), INT32_C( 1308021682), INT32_C(  712875579)),
+      UINT16_C(17567),
+      simde_mm512_set_epi32(INT32_C(-1065890522), INT32_C( 1362887862), INT32_C(-1905482051), INT32_C(  174767211),
+                            INT32_C( 1968089357), INT32_C(-1207243832), INT32_C( -701927204), INT32_C(-1701909648),
+                            INT32_C(-1822821880), INT32_C(-1418686446), INT32_C( 2002979046), INT32_C( -531029674),
+                            INT32_C( -233545704), INT32_C( 1270923539), INT32_C( -515398077), INT32_C(  870828526)),
+      simde_mm512_set_epi32(INT32_C(-1161246521), INT32_C(-1263382687), INT32_C( -761171059), INT32_C( 1052537110),
+                            INT32_C(-1225204820), INT32_C( 1299827393), INT32_C(  477328169), INT32_C( 2043159101),
+                            INT32_C(  984199920), INT32_C( 1963689737), INT32_C(-1149812166), INT32_C( -500241318),
+                            INT32_C( -953270640), INT32_C( 1180984926), INT32_C( -645305643), INT32_C( 1026486800)),
+      simde_mm512_set_epi32(INT32_C(  726531409), INT32_C(-1668696747), INT32_C(-1057918709), INT32_C( -811736744),
+                            INT32_C(-1460245574), INT32_C( 1787896071), INT32_C( 1799586442), INT32_C(-1105519928),
+                            INT32_C( 1487945496), INT32_C(-2144392739), INT32_C( 1110910857), INT32_C(  -30788356),
+                            INT32_C(  719724936), INT32_C(   89938613), INT32_C(  129907566), INT32_C( -155658274)) },
+    { simde_mm512_set_epi32(INT32_C( 1723004290), INT32_C(  721161302), INT32_C( 1077400739), INT32_C(  861837752),
+                            INT32_C(-1943224858), INT32_C( 2112602876), INT32_C(-1445821889), INT32_C(-2100432693),
+                            INT32_C(-1175934343), INT32_C(  805502143), INT32_C( 1163969458), INT32_C(  873642413),
+                            INT32_C( 2052720739), INT32_C(-1010971457), INT32_C(  199344228), INT32_C(  251460647)),
+      UINT16_C(59134),
+      simde_mm512_set_epi32(INT32_C(-1391704351), INT32_C( -847303025), INT32_C(-1711491580), INT32_C( -147993971),
+                            INT32_C(-1140349230), INT32_C(  172650828), INT32_C(-2090294261), INT32_C( -216506888),
+                            INT32_C(-1813744120), INT32_C( 1589656338), INT32_C( 1010967585), INT32_C(-2076714127),
+                            INT32_C( 1156626662), INT32_C( -264321123), INT32_C(-1099385436), INT32_C( -148901794)),
+      simde_mm512_set_epi32(INT32_C( 1003282629), INT32_C( 1250297288), INT32_C(   26548422), INT32_C(-1100962758),
+                            INT32_C( 1934048830), INT32_C( -886200980), INT32_C( -228926178), INT32_C(   21722717),
+                            INT32_C(-1321187708), INT32_C(  904822803), INT32_C( -875700432), INT32_C(-1302414558),
+                            INT32_C(  962131440), INT32_C( -729214075), INT32_C(-1094266114), INT32_C( 1122895720)),
+      simde_mm512_set_epi32(INT32_C( 1899980316), INT32_C(-2097600313), INT32_C(-1738040002), INT32_C(  861837752),
+                            INT32_C(-1943224858), INT32_C( 1058851808), INT32_C(-1861368083), INT32_C(-2100432693),
+                            INT32_C( -492556412), INT32_C(  684833535), INT32_C( 1886668017), INT32_C( -774299569),
+                            INT32_C(  194495222), INT32_C(  464892952), INT32_C(   -5119322), INT32_C(  251460647)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_mask_sub_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512i_i32(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_sub_epi64(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512i src;
+    simde__mmask8 k;
+    simde__m512i a;
+    simde__m512i b;
+    simde__m512i r;
+  } test_vec[8] = {
+    { simde_mm512_set_epi64(INT64_C( 8894478799917719473), INT64_C(-7614529333518044459),
+                            INT64_C( 8458392650500739529), INT64_C( 7085639313865748967),
+                            INT64_C(-7547504459018552290), INT64_C(-8310189466716392279),
+                            INT64_C(-1750715323825344235), INT64_C(-2532781790488219528)),
+      UINT8_C(106),
+      simde_mm512_set_epi64(INT64_C(-7192427816606966254), INT64_C(-1619523557840103557),
+                            INT64_C( 7616061596213068646), INT64_C( -560841280842371832),
+                            INT64_C( -806373115982863580), INT64_C( -816793021936842074),
+                            INT64_C( -317565234288882547), INT64_C(-7290553309909260368)),
+      simde_mm512_set_epi64(INT64_C(-9084839040863053259), INT64_C(  332697972184433101),
+                            INT64_C(-8959492887484217950), INT64_C( 7617292932467329680),
+                            INT64_C(-2740045277871922718), INT64_C(-3634413508032825567),
+                            INT64_C( -448440935066054877), INT64_C(-6805574594168851327)),
+      simde_mm512_set_epi64(INT64_C( 8894478799917719473), INT64_C(-1952221530024536658),
+                            INT64_C(-1871189590012265020), INT64_C( 7085639313865748967),
+                            INT64_C( 1933672161889059138), INT64_C(-8310189466716392279),
+                            INT64_C(  130875700777172330), INT64_C(-2532781790488219528)) },
+    { simde_mm512_set_epi64(INT64_C(-3459089877760882917), INT64_C( 1753327656617706405),
+                            INT64_C( 3932187030396497555), INT64_C(-4341921971190139713),
+                            INT64_C(-7354864635860030437), INT64_C(-7512931671900842140),
+                            INT64_C( 7677521206664265888), INT64_C(-8008068901606036732)),
+      UINT8_C(  1),
+      simde_mm512_set_epi64(INT64_C(-9084086707853197365), INT64_C( 5962789269656503800),
+                            INT64_C( 6806616562165680967), INT64_C( 8724516399523474076),
+                            INT64_C( -924171789017863248), INT64_C(-2255835938032964673),
+                            INT64_C(-4560088794132063361), INT64_C(-5517329800302195238)),
+      simde_mm512_set_epi64(INT64_C(-2849655299932577704), INT64_C( 2712991932590941674),
+                            INT64_C( 2564329750539599066), INT64_C(-4536455326234991583),
+                            INT64_C(-6477728239233614839), INT64_C(-5729565646249538826),
+                            INT64_C( 3092410715614407585), INT64_C( 7984397770129184299)),
+      simde_mm512_set_epi64(INT64_C(-3459089877760882917), INT64_C( 1753327656617706405),
+                            INT64_C( 3932187030396497555), INT64_C(-4341921971190139713),
+                            INT64_C(-7354864635860030437), INT64_C(-7512931671900842140),
+                            INT64_C( 7677521206664265888), INT64_C( 4945016503278172079)) },
+    { simde_mm512_set_epi64(INT64_C( -240340334077349403), INT64_C( 5647038489743797240),
+                            INT64_C( 5171415873092064400), INT64_C(-1851380595205120917),
+                            INT64_C( -836370148956202078), INT64_C( 8425549504970400810),
+                            INT64_C( 2808549870315159479), INT64_C( 3545474415643732634)),
+      UINT8_C(194),
+      simde_mm512_set_epi64(INT64_C(-5877702108931305293), INT64_C(-5372639016544358566),
+                            INT64_C(-4535660820549680684), INT64_C(-6747544612783901147),
+                            INT64_C( 6705850594648382655), INT64_C(-1906321743942105225),
+                            INT64_C( -281981608123407868), INT64_C(-5990711758326206044)),
+      simde_mm512_set_epi64(INT64_C( 8110080903340414341), INT64_C(-3598578875674169061),
+                            INT64_C( 4977285870543484474), INT64_C( 6776152673642620958),
+                            INT64_C( 4245929756722282054), INT64_C( 3649495924615361625),
+                            INT64_C( -638056186877872345), INT64_C(-8828385988165140326)),
+      simde_mm512_set_epi64(INT64_C( 4458961061437831982), INT64_C(-1774060140870189505),
+                            INT64_C( 5171415873092064400), INT64_C(-1851380595205120917),
+                            INT64_C( -836370148956202078), INT64_C( 8425549504970400810),
+                            INT64_C(  356074578754464477), INT64_C( 3545474415643732634)) },
+    { simde_mm512_set_epi64(INT64_C(-6385979888474332285), INT64_C( 3716758445629922885),
+                            INT64_C( 7861010731589253148), INT64_C(-6334773111204875550),
+                            INT64_C(-5054960975820633825), INT64_C( 8639514840721539279),
+                            INT64_C(-1027366943904624518), INT64_C(-4721195859159142702)),
+      UINT8_C(222),
+      simde_mm512_set_epi64(INT64_C(-7001132877809342173), INT64_C( 6512733899690414848),
+                            INT64_C(  988878120815000883), INT64_C(-5994563704199492012),
+                            INT64_C( 1587634372980811194), INT64_C( -914749563856678715),
+                            INT64_C( 7495962388934953888), INT64_C(-7831181051188885332)),
+      simde_mm512_set_epi64(INT64_C( 4229507402435677476), INT64_C( 2501842736425447642),
+                            INT64_C( 8009397189160901283), INT64_C( 3833558633773719409),
+                            INT64_C( 2852442819818074174), INT64_C(-8638015813272823849),
+                            INT64_C( 8579593880416924807), INT64_C( 2713766728753976690)),
+      simde_mm512_set_epi64(INT64_C( 7216103793464531967), INT64_C( 4010891163264967206),
+                            INT64_C( 7861010731589253148), INT64_C( 8618621735736340195),
+                            INT64_C(-1264808446837262980), INT64_C( 7723266249416145134),
+                            INT64_C(-1083631491481970919), INT64_C(-4721195859159142702)) },
+    { simde_mm512_set_epi64(INT64_C( 4051614369896270101), INT64_C( 6703896128856670897),
+                            INT64_C(-5750389130785475983), INT64_C(-7878547924784098469),
+                            INT64_C( 5491867996743881624), INT64_C(-2189602113514909499),
+                            INT64_C( -887220462507309287), INT64_C(-5733898489940979010)),
+      UINT8_C( 26),
+      simde_mm512_set_epi64(INT64_C(  -99656633840764240), INT64_C(-3479731851565468885),
+                            INT64_C(-7074577238264434881), INT64_C(-3836339826871533273),
+                            INT64_C( 4198283975631841849), INT64_C(-3829622956767240841),
+                            INT64_C( 5960966148924368684), INT64_C( -504125670847055963)),
+      simde_mm512_set_epi64(INT64_C(-8344319212574510912), INT64_C(-3371415321000668561),
+                            INT64_C(-8338525176508042897), INT64_C( 5173420397567361383),
+                            INT64_C(-6751809518396836721), INT64_C(-8388491552134432960),
+                            INT64_C(-9161028627110906680), INT64_C( 7472048750700349549)),
+      simde_mm512_set_epi64(INT64_C( 4051614369896270101), INT64_C( 6703896128856670897),
+                            INT64_C(-5750389130785475983), INT64_C(-9009760224438894656),
+                            INT64_C(-7496650579680873046), INT64_C(-2189602113514909499),
+                            INT64_C(-3324749297674276252), INT64_C(-5733898489940979010)) },
+    { simde_mm512_set_epi64(INT64_C(-6378393891104748170), INT64_C(-8478287659785501826),
+                            INT64_C(-2127236125072242134), INT64_C( 8702738982982040445),
+                            INT64_C(  645844328650761785), INT64_C(-4561773442934600720),
+                            INT64_C(-5793568656482259588), INT64_C( -379681413311801170)),
+      UINT8_C(230),
+      simde_mm512_set_epi64(INT64_C( -848706848545220792), INT64_C(-1124075123789220737),
+                            INT64_C(-2005439629632543252), INT64_C( 8274388146286059619),
+                            INT64_C( -261550962782015927), INT64_C(-8761037216848109215),
+                            INT64_C(-3016365966836321630), INT64_C( 2543055264688040393)),
+      simde_mm512_set_epi64(INT64_C( 1583638370136684317), INT64_C(-1184919915070849427),
+                            INT64_C( 6948286910398693964), INT64_C( 2437457976149582578),
+                            INT64_C( 3426542754873284897), INT64_C(-7983270512780038531),
+                            INT64_C( 1779296328975282374), INT64_C(-5362999871220584978)),
+      simde_mm512_set_epi64(INT64_C(-2432345218681905109), INT64_C(   60844791281628690),
+                            INT64_C(-8953726540031237216), INT64_C( 8702738982982040445),
+                            INT64_C(  645844328650761785), INT64_C( -777766704068070684),
+                            INT64_C(-4795662295811604004), INT64_C( -379681413311801170)) },
+    { simde_mm512_set_epi64(INT64_C(-2563692560784467599), INT64_C(-2764729313181954331),
+                            INT64_C( 7449793955604076666), INT64_C(-6302011830015535814),
+                            INT64_C(-5919077484698028869), INT64_C(-6127059769393124093),
+                            INT64_C( 2958642729945465911), INT64_C( 2772140786646472311)),
+      UINT8_C(198),
+      simde_mm512_set_epi64(INT64_C(-3934991658845807023), INT64_C( 7561755153516237296),
+                            INT64_C(-1521478373140770922), INT64_C( 6956443634033398294),
+                            INT64_C(-5307063963483146371), INT64_C( 6556039892370535969),
+                            INT64_C(-6645788521893978945), INT64_C(-6307512051127595595)),
+      simde_mm512_set_epi64(INT64_C(-7270561721689602230), INT64_C( 8935792808270452615),
+                            INT64_C( 1984489943341614372), INT64_C( 6860868624136070257),
+                            INT64_C(-2243581398369652256), INT64_C(-6592818671779181804),
+                            INT64_C( -308663241436655846), INT64_C(-8935526257161672911)),
+      simde_mm512_set_epi64(INT64_C( 3335570062843795207), INT64_C(-1374037654754215319),
+                            INT64_C( 7449793955604076666), INT64_C(-6302011830015535814),
+                            INT64_C(-5919077484698028869), INT64_C(-5297885509559833843),
+                            INT64_C(-6337125280457323099), INT64_C( 2772140786646472311)) },
+    { simde_mm512_set_epi64(INT64_C(-7511866029206584895), INT64_C( 6685003933657692663),
+                            INT64_C(  112057327023275278), INT64_C( 2785131907782223781),
+                            INT64_C( -403719025987547254), INT64_C(-5974279397850363938),
+                            INT64_C(-6601571580489345254), INT64_C( 1896379997419403836)),
+      UINT8_C( 70),
+      simde_mm512_set_epi64(INT64_C(-6334367433946281110), INT64_C(-5840485098030444461),
+                            INT64_C(-6383956557021185117), INT64_C(-7600398675722821668),
+                            INT64_C(-2279362749413199885), INT64_C(-8009539466982888201),
+                            INT64_C(  340327559398526723), INT64_C(-2438629088141247826)),
+      simde_mm512_set_epi64(INT64_C( 3758222621544461478), INT64_C( 8264387002851618510),
+                            INT64_C( 5256515298231032169), INT64_C( 4555501816451377355),
+                            INT64_C(-9184304616258229288), INT64_C( 5115688705834988612),
+                            INT64_C(-3795492187184599084), INT64_C(-3221204559120447653)),
+      simde_mm512_set_epi64(INT64_C(-7511866029206584895), INT64_C( 4341871972827488645),
+                            INT64_C(  112057327023275278), INT64_C( 2785131907782223781),
+                            INT64_C( -403719025987547254), INT64_C( 5321515900891674803),
+                            INT64_C( 4135819746583125807), INT64_C( 1896379997419403836)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512i r = simde_mm512_mask_sub_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512i_i64(r, ==, test_vec[i].r);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_sub_ps(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512 src;
+    simde__mmask16 k;
+    simde__m512 a;
+    simde__m512 b;
+    simde__m512 r;
+  } test_vec[8] = {
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(  -417.79), SIMDE_FLOAT32_C(  -912.83), SIMDE_FLOAT32_C(   111.29), SIMDE_FLOAT32_C(  -470.87),
+                         SIMDE_FLOAT32_C(   685.45), SIMDE_FLOAT32_C(   -92.85), SIMDE_FLOAT32_C(   704.55), SIMDE_FLOAT32_C(   450.79),
+                         SIMDE_FLOAT32_C(  -761.01), SIMDE_FLOAT32_C(  -759.35), SIMDE_FLOAT32_C(   646.77), SIMDE_FLOAT32_C(   616.33),
+                         SIMDE_FLOAT32_C(   922.76), SIMDE_FLOAT32_C(   721.94), SIMDE_FLOAT32_C(   721.78), SIMDE_FLOAT32_C(   651.66)),
+      UINT16_C(55049),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   492.15), SIMDE_FLOAT32_C(   363.86), SIMDE_FLOAT32_C(  -906.93), SIMDE_FLOAT32_C(   -51.88),
+                         SIMDE_FLOAT32_C(   976.36), SIMDE_FLOAT32_C(   844.84), SIMDE_FLOAT32_C(   525.57), SIMDE_FLOAT32_C(   575.43),
+                         SIMDE_FLOAT32_C(  -719.61), SIMDE_FLOAT32_C(   570.91), SIMDE_FLOAT32_C(  -748.06), SIMDE_FLOAT32_C(   823.89),
+                         SIMDE_FLOAT32_C(  -708.11), SIMDE_FLOAT32_C(  -805.87), SIMDE_FLOAT32_C(   626.28), SIMDE_FLOAT32_C(   344.43)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -814.48), SIMDE_FLOAT32_C(   843.19), SIMDE_FLOAT32_C(  -866.28), SIMDE_FLOAT32_C(  -230.51),
+                         SIMDE_FLOAT32_C(  -264.51), SIMDE_FLOAT32_C(   935.39), SIMDE_FLOAT32_C(   479.68), SIMDE_FLOAT32_C(  -375.52),
+                         SIMDE_FLOAT32_C(  -928.92), SIMDE_FLOAT32_C(  -243.75), SIMDE_FLOAT32_C(   771.60), SIMDE_FLOAT32_C(   150.31),
+                         SIMDE_FLOAT32_C(  -627.83), SIMDE_FLOAT32_C(  -720.61), SIMDE_FLOAT32_C(   345.13), SIMDE_FLOAT32_C(   203.00)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  1306.63), SIMDE_FLOAT32_C(  -479.33), SIMDE_FLOAT32_C(   111.29), SIMDE_FLOAT32_C(   178.63),
+                         SIMDE_FLOAT32_C(   685.45), SIMDE_FLOAT32_C(   -90.55), SIMDE_FLOAT32_C(    45.89), SIMDE_FLOAT32_C(   950.95),
+                         SIMDE_FLOAT32_C(  -761.01), SIMDE_FLOAT32_C(  -759.35), SIMDE_FLOAT32_C(   646.77), SIMDE_FLOAT32_C(   616.33),
+                         SIMDE_FLOAT32_C(   -80.28), SIMDE_FLOAT32_C(   721.94), SIMDE_FLOAT32_C(   721.78), SIMDE_FLOAT32_C(   141.43)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(  -594.79), SIMDE_FLOAT32_C(   -68.26), SIMDE_FLOAT32_C(   772.68), SIMDE_FLOAT32_C(  -615.12),
+                         SIMDE_FLOAT32_C(   489.20), SIMDE_FLOAT32_C(  -609.74), SIMDE_FLOAT32_C(  -297.42), SIMDE_FLOAT32_C(  -701.58),
+                         SIMDE_FLOAT32_C(    71.34), SIMDE_FLOAT32_C(  -811.20), SIMDE_FLOAT32_C(   -44.61), SIMDE_FLOAT32_C(   172.32),
+                         SIMDE_FLOAT32_C(  -336.24), SIMDE_FLOAT32_C(  -959.77), SIMDE_FLOAT32_C(   896.40), SIMDE_FLOAT32_C(   321.28)),
+      UINT16_C( 2266),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   136.73), SIMDE_FLOAT32_C(   408.70), SIMDE_FLOAT32_C(   907.04), SIMDE_FLOAT32_C(   175.32),
+                         SIMDE_FLOAT32_C(   125.78), SIMDE_FLOAT32_C(  -176.42), SIMDE_FLOAT32_C(  -192.20), SIMDE_FLOAT32_C(   636.29),
+                         SIMDE_FLOAT32_C(  -812.72), SIMDE_FLOAT32_C(  -295.02), SIMDE_FLOAT32_C(   426.00), SIMDE_FLOAT32_C(   348.29),
+                         SIMDE_FLOAT32_C(   859.20), SIMDE_FLOAT32_C(   -28.95), SIMDE_FLOAT32_C(  -637.06), SIMDE_FLOAT32_C(  -450.15)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -929.87), SIMDE_FLOAT32_C(  -208.53), SIMDE_FLOAT32_C(   561.71), SIMDE_FLOAT32_C(   -74.05),
+                         SIMDE_FLOAT32_C(   477.79), SIMDE_FLOAT32_C(   772.49), SIMDE_FLOAT32_C(   648.48), SIMDE_FLOAT32_C(   -58.61),
+                         SIMDE_FLOAT32_C(   835.38), SIMDE_FLOAT32_C(  -689.00), SIMDE_FLOAT32_C(   607.03), SIMDE_FLOAT32_C(   421.78),
+                         SIMDE_FLOAT32_C(  -574.15), SIMDE_FLOAT32_C(   302.76), SIMDE_FLOAT32_C(   178.11), SIMDE_FLOAT32_C(  -298.57)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -594.79), SIMDE_FLOAT32_C(   -68.26), SIMDE_FLOAT32_C(   772.68), SIMDE_FLOAT32_C(  -615.12),
+                         SIMDE_FLOAT32_C(  -352.01), SIMDE_FLOAT32_C(  -609.74), SIMDE_FLOAT32_C(  -297.42), SIMDE_FLOAT32_C(  -701.58),
+                         SIMDE_FLOAT32_C( -1648.10), SIMDE_FLOAT32_C(   393.98), SIMDE_FLOAT32_C(   -44.61), SIMDE_FLOAT32_C(   -73.49),
+                         SIMDE_FLOAT32_C(  1433.35), SIMDE_FLOAT32_C(  -959.77), SIMDE_FLOAT32_C(  -815.17), SIMDE_FLOAT32_C(   321.28)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(  -914.76), SIMDE_FLOAT32_C(   285.68), SIMDE_FLOAT32_C(   695.03), SIMDE_FLOAT32_C(  -235.78),
+                         SIMDE_FLOAT32_C(    90.17), SIMDE_FLOAT32_C(   891.02), SIMDE_FLOAT32_C(  -456.46), SIMDE_FLOAT32_C(   952.55),
+                         SIMDE_FLOAT32_C(  -153.33), SIMDE_FLOAT32_C(  -533.35), SIMDE_FLOAT32_C(  -130.02), SIMDE_FLOAT32_C(  -580.21),
+                         SIMDE_FLOAT32_C(  -857.73), SIMDE_FLOAT32_C(  -362.64), SIMDE_FLOAT32_C(   808.25), SIMDE_FLOAT32_C(   908.95)),
+      UINT16_C(53407),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   415.38), SIMDE_FLOAT32_C(   622.33), SIMDE_FLOAT32_C(   849.49), SIMDE_FLOAT32_C(  -552.97),
+                         SIMDE_FLOAT32_C(   837.01), SIMDE_FLOAT32_C(  -753.98), SIMDE_FLOAT32_C(   167.51), SIMDE_FLOAT32_C(   898.60),
+                         SIMDE_FLOAT32_C(   -36.68), SIMDE_FLOAT32_C(  -931.19), SIMDE_FLOAT32_C(   230.22), SIMDE_FLOAT32_C(  -885.80),
+                         SIMDE_FLOAT32_C(  -894.49), SIMDE_FLOAT32_C(  -402.23), SIMDE_FLOAT32_C(   -68.60), SIMDE_FLOAT32_C(  -153.88)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   247.18), SIMDE_FLOAT32_C(   507.40), SIMDE_FLOAT32_C(  -715.17), SIMDE_FLOAT32_C(   785.48),
+                         SIMDE_FLOAT32_C(  -543.41), SIMDE_FLOAT32_C(   761.08), SIMDE_FLOAT32_C(   479.07), SIMDE_FLOAT32_C(  -938.93),
+                         SIMDE_FLOAT32_C(  -655.56), SIMDE_FLOAT32_C(   618.55), SIMDE_FLOAT32_C(   224.83), SIMDE_FLOAT32_C(  -983.99),
+                         SIMDE_FLOAT32_C(   -18.22), SIMDE_FLOAT32_C(  -142.62), SIMDE_FLOAT32_C(   120.01), SIMDE_FLOAT32_C(   186.92)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   168.20), SIMDE_FLOAT32_C(   114.93), SIMDE_FLOAT32_C(   695.03), SIMDE_FLOAT32_C( -1338.45),
+                         SIMDE_FLOAT32_C(    90.17), SIMDE_FLOAT32_C(   891.02), SIMDE_FLOAT32_C(  -456.46), SIMDE_FLOAT32_C(   952.55),
+                         SIMDE_FLOAT32_C(   618.88), SIMDE_FLOAT32_C(  -533.35), SIMDE_FLOAT32_C(  -130.02), SIMDE_FLOAT32_C(    98.19),
+                         SIMDE_FLOAT32_C(  -876.27), SIMDE_FLOAT32_C(  -259.61), SIMDE_FLOAT32_C(  -188.61), SIMDE_FLOAT32_C(  -340.80)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(  -586.97), SIMDE_FLOAT32_C(  -706.71), SIMDE_FLOAT32_C(   862.31), SIMDE_FLOAT32_C(   901.76),
+                         SIMDE_FLOAT32_C(  -777.23), SIMDE_FLOAT32_C(  -615.23), SIMDE_FLOAT32_C(   540.06), SIMDE_FLOAT32_C(  -837.05),
+                         SIMDE_FLOAT32_C(   896.68), SIMDE_FLOAT32_C(  -818.79), SIMDE_FLOAT32_C(  -146.21), SIMDE_FLOAT32_C(  -751.20),
+                         SIMDE_FLOAT32_C(  -724.86), SIMDE_FLOAT32_C(  -446.10), SIMDE_FLOAT32_C(   747.21), SIMDE_FLOAT32_C(  -830.22)),
+      UINT16_C(24145),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   809.72), SIMDE_FLOAT32_C(  -191.45), SIMDE_FLOAT32_C(  -687.88), SIMDE_FLOAT32_C(  -561.69),
+                         SIMDE_FLOAT32_C(   623.06), SIMDE_FLOAT32_C(  -685.16), SIMDE_FLOAT32_C(   155.59), SIMDE_FLOAT32_C(   -91.67),
+                         SIMDE_FLOAT32_C(  -292.32), SIMDE_FLOAT32_C(   436.29), SIMDE_FLOAT32_C(   682.53), SIMDE_FLOAT32_C(  -427.71),
+                         SIMDE_FLOAT32_C(  -252.26), SIMDE_FLOAT32_C(  -814.33), SIMDE_FLOAT32_C(  -116.78), SIMDE_FLOAT32_C(  -176.18)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -476.63), SIMDE_FLOAT32_C(  -403.49), SIMDE_FLOAT32_C(  -129.06), SIMDE_FLOAT32_C(  -540.32),
+                         SIMDE_FLOAT32_C(  -296.84), SIMDE_FLOAT32_C(   354.93), SIMDE_FLOAT32_C(   301.70), SIMDE_FLOAT32_C(   818.26),
+                         SIMDE_FLOAT32_C(   152.41), SIMDE_FLOAT32_C(    -7.33), SIMDE_FLOAT32_C(   901.12), SIMDE_FLOAT32_C(   276.49),
+                         SIMDE_FLOAT32_C(  -421.45), SIMDE_FLOAT32_C(   -19.17), SIMDE_FLOAT32_C(   559.47), SIMDE_FLOAT32_C(   -62.60)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -586.97), SIMDE_FLOAT32_C(   212.04), SIMDE_FLOAT32_C(   862.31), SIMDE_FLOAT32_C(   -21.37),
+                         SIMDE_FLOAT32_C(   919.90), SIMDE_FLOAT32_C( -1040.09), SIMDE_FLOAT32_C(  -146.11), SIMDE_FLOAT32_C(  -837.05),
+                         SIMDE_FLOAT32_C(   896.68), SIMDE_FLOAT32_C(   443.62), SIMDE_FLOAT32_C(  -146.21), SIMDE_FLOAT32_C(  -704.20),
+                         SIMDE_FLOAT32_C(  -724.86), SIMDE_FLOAT32_C(  -446.10), SIMDE_FLOAT32_C(   747.21), SIMDE_FLOAT32_C(  -113.58)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(   853.44), SIMDE_FLOAT32_C(   804.93), SIMDE_FLOAT32_C(   753.54), SIMDE_FLOAT32_C(   129.42),
+                         SIMDE_FLOAT32_C(  -911.24), SIMDE_FLOAT32_C(  -795.01), SIMDE_FLOAT32_C(  -264.21), SIMDE_FLOAT32_C(   110.23),
+                         SIMDE_FLOAT32_C(   779.42), SIMDE_FLOAT32_C(   756.19), SIMDE_FLOAT32_C(   -61.94), SIMDE_FLOAT32_C(  -845.71),
+                         SIMDE_FLOAT32_C(   522.75), SIMDE_FLOAT32_C(   703.06), SIMDE_FLOAT32_C(   989.80), SIMDE_FLOAT32_C(   594.14)),
+      UINT16_C(58122),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   774.43), SIMDE_FLOAT32_C(   251.56), SIMDE_FLOAT32_C(  -915.66), SIMDE_FLOAT32_C(  -492.31),
+                         SIMDE_FLOAT32_C(   722.32), SIMDE_FLOAT32_C(   853.19), SIMDE_FLOAT32_C(   466.28), SIMDE_FLOAT32_C(   573.97),
+                         SIMDE_FLOAT32_C(  -516.73), SIMDE_FLOAT32_C(  -267.27), SIMDE_FLOAT32_C(   110.95), SIMDE_FLOAT32_C(   -68.16),
+                         SIMDE_FLOAT32_C(  -400.30), SIMDE_FLOAT32_C(   327.53), SIMDE_FLOAT32_C(  -638.51), SIMDE_FLOAT32_C(   -96.92)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   101.96), SIMDE_FLOAT32_C(  -734.61), SIMDE_FLOAT32_C(   219.43), SIMDE_FLOAT32_C(  -507.66),
+                         SIMDE_FLOAT32_C(  -747.54), SIMDE_FLOAT32_C(   794.68), SIMDE_FLOAT32_C(  -663.99), SIMDE_FLOAT32_C(  -123.94),
+                         SIMDE_FLOAT32_C(  -793.12), SIMDE_FLOAT32_C(   673.57), SIMDE_FLOAT32_C(  -777.14), SIMDE_FLOAT32_C(   175.88),
+                         SIMDE_FLOAT32_C(  -792.24), SIMDE_FLOAT32_C(  -246.51), SIMDE_FLOAT32_C(   848.21), SIMDE_FLOAT32_C(  -124.15)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   672.47), SIMDE_FLOAT32_C(   986.17), SIMDE_FLOAT32_C( -1135.09), SIMDE_FLOAT32_C(   129.42),
+                         SIMDE_FLOAT32_C(  -911.24), SIMDE_FLOAT32_C(  -795.01), SIMDE_FLOAT32_C(  1130.27), SIMDE_FLOAT32_C(   697.91),
+                         SIMDE_FLOAT32_C(   779.42), SIMDE_FLOAT32_C(   756.19), SIMDE_FLOAT32_C(   -61.94), SIMDE_FLOAT32_C(  -845.71),
+                         SIMDE_FLOAT32_C(   391.94), SIMDE_FLOAT32_C(   703.06), SIMDE_FLOAT32_C( -1486.72), SIMDE_FLOAT32_C(   594.14)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(  -670.29), SIMDE_FLOAT32_C(   821.01), SIMDE_FLOAT32_C(  -293.06), SIMDE_FLOAT32_C(   -56.42),
+                         SIMDE_FLOAT32_C(  -163.64), SIMDE_FLOAT32_C(  -919.47), SIMDE_FLOAT32_C(   636.75), SIMDE_FLOAT32_C(   555.64),
+                         SIMDE_FLOAT32_C(   630.28), SIMDE_FLOAT32_C(   798.33), SIMDE_FLOAT32_C(  -536.88), SIMDE_FLOAT32_C(   256.29),
+                         SIMDE_FLOAT32_C(   834.99), SIMDE_FLOAT32_C(  -678.50), SIMDE_FLOAT32_C(  -716.28), SIMDE_FLOAT32_C(  -235.17)),
+      UINT16_C( 7968),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   575.18), SIMDE_FLOAT32_C(  -655.63), SIMDE_FLOAT32_C(   986.91), SIMDE_FLOAT32_C(   710.96),
+                         SIMDE_FLOAT32_C(   921.30), SIMDE_FLOAT32_C(   -96.00), SIMDE_FLOAT32_C(   -68.75), SIMDE_FLOAT32_C(  -119.17),
+                         SIMDE_FLOAT32_C(  -795.52), SIMDE_FLOAT32_C(  -851.06), SIMDE_FLOAT32_C(   982.58), SIMDE_FLOAT32_C(   432.45),
+                         SIMDE_FLOAT32_C(   834.71), SIMDE_FLOAT32_C(  -931.48), SIMDE_FLOAT32_C(   421.86), SIMDE_FLOAT32_C(   549.54)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   699.42), SIMDE_FLOAT32_C(  -430.21), SIMDE_FLOAT32_C(  -842.83), SIMDE_FLOAT32_C(  -375.32),
+                         SIMDE_FLOAT32_C(  -889.13), SIMDE_FLOAT32_C(    77.46), SIMDE_FLOAT32_C(  -426.32), SIMDE_FLOAT32_C(  -319.52),
+                         SIMDE_FLOAT32_C(   633.46), SIMDE_FLOAT32_C(  -484.05), SIMDE_FLOAT32_C(   991.09), SIMDE_FLOAT32_C(   894.84),
+                         SIMDE_FLOAT32_C(   148.17), SIMDE_FLOAT32_C(  -167.11), SIMDE_FLOAT32_C(  -811.87), SIMDE_FLOAT32_C(  -574.29)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -670.29), SIMDE_FLOAT32_C(   821.01), SIMDE_FLOAT32_C(  -293.06), SIMDE_FLOAT32_C(  1086.28),
+                         SIMDE_FLOAT32_C(  1810.43), SIMDE_FLOAT32_C(  -173.46), SIMDE_FLOAT32_C(   357.57), SIMDE_FLOAT32_C(   200.35),
+                         SIMDE_FLOAT32_C(   630.28), SIMDE_FLOAT32_C(   798.33), SIMDE_FLOAT32_C(    -8.51), SIMDE_FLOAT32_C(   256.29),
+                         SIMDE_FLOAT32_C(   834.99), SIMDE_FLOAT32_C(  -678.50), SIMDE_FLOAT32_C(  -716.28), SIMDE_FLOAT32_C(  -235.17)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(   640.00), SIMDE_FLOAT32_C(   440.55), SIMDE_FLOAT32_C(   793.44), SIMDE_FLOAT32_C(   554.05),
+                         SIMDE_FLOAT32_C(   245.74), SIMDE_FLOAT32_C(  -388.16), SIMDE_FLOAT32_C(   -27.32), SIMDE_FLOAT32_C(  -923.44),
+                         SIMDE_FLOAT32_C(   109.81), SIMDE_FLOAT32_C(   855.67), SIMDE_FLOAT32_C(  -513.53), SIMDE_FLOAT32_C(  -921.47),
+                         SIMDE_FLOAT32_C(  -410.90), SIMDE_FLOAT32_C(  -404.15), SIMDE_FLOAT32_C(  -502.43), SIMDE_FLOAT32_C(  -674.13)),
+      UINT16_C(34235),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   455.94), SIMDE_FLOAT32_C(   822.75), SIMDE_FLOAT32_C(   672.52), SIMDE_FLOAT32_C(   418.16),
+                         SIMDE_FLOAT32_C(   993.17), SIMDE_FLOAT32_C(  -581.12), SIMDE_FLOAT32_C(   737.02), SIMDE_FLOAT32_C(   -48.12),
+                         SIMDE_FLOAT32_C(   169.53), SIMDE_FLOAT32_C(   875.02), SIMDE_FLOAT32_C(   325.94), SIMDE_FLOAT32_C(  -197.05),
+                         SIMDE_FLOAT32_C(   209.80), SIMDE_FLOAT32_C(   679.16), SIMDE_FLOAT32_C(  -743.34), SIMDE_FLOAT32_C(   192.93)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -400.32), SIMDE_FLOAT32_C(   747.89), SIMDE_FLOAT32_C(  -417.14), SIMDE_FLOAT32_C(  -149.76),
+                         SIMDE_FLOAT32_C(  -769.13), SIMDE_FLOAT32_C(   952.70), SIMDE_FLOAT32_C(    55.59), SIMDE_FLOAT32_C(  -118.59),
+                         SIMDE_FLOAT32_C(  -651.36), SIMDE_FLOAT32_C(   213.50), SIMDE_FLOAT32_C(   998.39), SIMDE_FLOAT32_C(   155.85),
+                         SIMDE_FLOAT32_C(   985.22), SIMDE_FLOAT32_C(  -399.37), SIMDE_FLOAT32_C(  -660.54), SIMDE_FLOAT32_C(  -918.87)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   856.26), SIMDE_FLOAT32_C(   440.55), SIMDE_FLOAT32_C(   793.44), SIMDE_FLOAT32_C(   554.05),
+                         SIMDE_FLOAT32_C(   245.74), SIMDE_FLOAT32_C( -1533.82), SIMDE_FLOAT32_C(   -27.32), SIMDE_FLOAT32_C(    70.47),
+                         SIMDE_FLOAT32_C(   820.89), SIMDE_FLOAT32_C(   855.67), SIMDE_FLOAT32_C(  -672.45), SIMDE_FLOAT32_C(  -352.90),
+                         SIMDE_FLOAT32_C(  -775.42), SIMDE_FLOAT32_C(  -404.15), SIMDE_FLOAT32_C(   -82.80), SIMDE_FLOAT32_C(  1111.80)) },
+    { simde_mm512_set_ps(SIMDE_FLOAT32_C(  -717.43), SIMDE_FLOAT32_C(   307.65), SIMDE_FLOAT32_C(  -776.64), SIMDE_FLOAT32_C(   883.24),
+                         SIMDE_FLOAT32_C(   462.38), SIMDE_FLOAT32_C(   941.52), SIMDE_FLOAT32_C(   465.21), SIMDE_FLOAT32_C(   772.92),
+                         SIMDE_FLOAT32_C(  -448.96), SIMDE_FLOAT32_C(   167.95), SIMDE_FLOAT32_C(  -770.79), SIMDE_FLOAT32_C(   607.02),
+                         SIMDE_FLOAT32_C(   588.25), SIMDE_FLOAT32_C(  -430.65), SIMDE_FLOAT32_C(  -379.22), SIMDE_FLOAT32_C(    62.66)),
+      UINT16_C(21184),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   745.31), SIMDE_FLOAT32_C(   528.47), SIMDE_FLOAT32_C(   260.56), SIMDE_FLOAT32_C(   756.92),
+                         SIMDE_FLOAT32_C(  -237.78), SIMDE_FLOAT32_C(   890.33), SIMDE_FLOAT32_C(  -276.66), SIMDE_FLOAT32_C(  -845.25),
+                         SIMDE_FLOAT32_C(    73.01), SIMDE_FLOAT32_C(  -169.10), SIMDE_FLOAT32_C(  -390.26), SIMDE_FLOAT32_C(    55.87),
+                         SIMDE_FLOAT32_C(   461.32), SIMDE_FLOAT32_C(  -911.03), SIMDE_FLOAT32_C(   362.01), SIMDE_FLOAT32_C(   998.06)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(   177.96), SIMDE_FLOAT32_C(  -105.40), SIMDE_FLOAT32_C(  -516.55), SIMDE_FLOAT32_C(   -62.31),
+                         SIMDE_FLOAT32_C(  -757.68), SIMDE_FLOAT32_C(   665.34), SIMDE_FLOAT32_C(   689.63), SIMDE_FLOAT32_C(   938.32),
+                         SIMDE_FLOAT32_C(  -408.00), SIMDE_FLOAT32_C(   998.26), SIMDE_FLOAT32_C(  -263.70), SIMDE_FLOAT32_C(   807.54),
+                         SIMDE_FLOAT32_C(   485.72), SIMDE_FLOAT32_C(   -74.68), SIMDE_FLOAT32_C(   725.36), SIMDE_FLOAT32_C(   301.00)),
+      simde_mm512_set_ps(SIMDE_FLOAT32_C(  -717.43), SIMDE_FLOAT32_C(   633.87), SIMDE_FLOAT32_C(  -776.64), SIMDE_FLOAT32_C(   819.23),
+                         SIMDE_FLOAT32_C(   462.38), SIMDE_FLOAT32_C(   941.52), SIMDE_FLOAT32_C(  -966.29), SIMDE_FLOAT32_C(   772.92),
+                         SIMDE_FLOAT32_C(   481.01), SIMDE_FLOAT32_C( -1167.36), SIMDE_FLOAT32_C(  -770.79), SIMDE_FLOAT32_C(   607.02),
+                         SIMDE_FLOAT32_C(   588.25), SIMDE_FLOAT32_C(  -430.65), SIMDE_FLOAT32_C(  -379.22), SIMDE_FLOAT32_C(    62.66)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512 r = simde_mm512_mask_sub_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512_close(r, test_vec[i].r, 1);
+  }
+
+  return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_sub_pd(const MunitParameter params[], void* data) {
+  (void) params;
+  (void) data;
+
+  const struct {
+    simde__m512d src;
+    simde__mmask8 k;
+    simde__m512d a;
+    simde__m512d b;
+    simde__m512d r;
+  } test_vec[8] = {
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C( -621.09), SIMDE_FLOAT64_C(  350.18),
+                         SIMDE_FLOAT64_C(  873.40), SIMDE_FLOAT64_C( -136.67),
+                         SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C(  672.37),
+                         SIMDE_FLOAT64_C( -983.97), SIMDE_FLOAT64_C( -747.18)),
+      UINT8_C(213),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C(  861.93),
+                         SIMDE_FLOAT64_C(  -99.63), SIMDE_FLOAT64_C( -760.72),
+                         SIMDE_FLOAT64_C(  803.54), SIMDE_FLOAT64_C( -811.65),
+                         SIMDE_FLOAT64_C( -888.48), SIMDE_FLOAT64_C(  353.19)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  451.54), SIMDE_FLOAT64_C(  490.96),
+                         SIMDE_FLOAT64_C( -563.07), SIMDE_FLOAT64_C( -968.95),
+                         SIMDE_FLOAT64_C( -964.80), SIMDE_FLOAT64_C( -259.48),
+                         SIMDE_FLOAT64_C(  -97.31), SIMDE_FLOAT64_C(  696.26)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(-1066.76), SIMDE_FLOAT64_C(  370.97),
+                         SIMDE_FLOAT64_C(  873.40), SIMDE_FLOAT64_C(  208.23),
+                         SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( -552.17),
+                         SIMDE_FLOAT64_C( -983.97), SIMDE_FLOAT64_C( -343.07)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C(  956.74), SIMDE_FLOAT64_C(  507.70),
+                         SIMDE_FLOAT64_C(  525.25), SIMDE_FLOAT64_C( -653.24),
+                         SIMDE_FLOAT64_C( -748.66), SIMDE_FLOAT64_C(  738.72),
+                         SIMDE_FLOAT64_C(  584.29), SIMDE_FLOAT64_C( -344.89)),
+      UINT8_C(200),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  -70.99), SIMDE_FLOAT64_C( -712.48),
+                         SIMDE_FLOAT64_C(  721.37), SIMDE_FLOAT64_C(  290.11),
+                         SIMDE_FLOAT64_C(  739.65), SIMDE_FLOAT64_C(  378.13),
+                         SIMDE_FLOAT64_C(  523.23), SIMDE_FLOAT64_C(  338.41)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -243.21), SIMDE_FLOAT64_C(   71.87),
+                         SIMDE_FLOAT64_C(   81.06), SIMDE_FLOAT64_C(  409.05),
+                         SIMDE_FLOAT64_C( -595.58), SIMDE_FLOAT64_C(  278.33),
+                         SIMDE_FLOAT64_C( -484.02), SIMDE_FLOAT64_C( -861.59)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  172.22), SIMDE_FLOAT64_C( -784.35),
+                         SIMDE_FLOAT64_C(  525.25), SIMDE_FLOAT64_C( -653.24),
+                         SIMDE_FLOAT64_C( 1335.23), SIMDE_FLOAT64_C(  738.72),
+                         SIMDE_FLOAT64_C(  584.29), SIMDE_FLOAT64_C( -344.89)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C(  475.39), SIMDE_FLOAT64_C(  345.93),
+                         SIMDE_FLOAT64_C(  233.76), SIMDE_FLOAT64_C( -401.11),
+                         SIMDE_FLOAT64_C( -964.57), SIMDE_FLOAT64_C(  939.13),
+                         SIMDE_FLOAT64_C( -392.63), SIMDE_FLOAT64_C( -585.02)),
+      UINT8_C( 75),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  496.11), SIMDE_FLOAT64_C( -235.94),
+                         SIMDE_FLOAT64_C( -715.35), SIMDE_FLOAT64_C(  338.71),
+                         SIMDE_FLOAT64_C( -776.11), SIMDE_FLOAT64_C(  941.96),
+                         SIMDE_FLOAT64_C(   76.10), SIMDE_FLOAT64_C( -188.31)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  824.70), SIMDE_FLOAT64_C( -886.45),
+                         SIMDE_FLOAT64_C(  497.17), SIMDE_FLOAT64_C( -965.13),
+                         SIMDE_FLOAT64_C( -601.99), SIMDE_FLOAT64_C( -657.07),
+                         SIMDE_FLOAT64_C(  201.36), SIMDE_FLOAT64_C( -807.98)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  475.39), SIMDE_FLOAT64_C(  650.51),
+                         SIMDE_FLOAT64_C(  233.76), SIMDE_FLOAT64_C( -401.11),
+                         SIMDE_FLOAT64_C( -174.12), SIMDE_FLOAT64_C(  939.13),
+                         SIMDE_FLOAT64_C( -125.26), SIMDE_FLOAT64_C(  619.67)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C( -246.72), SIMDE_FLOAT64_C( -493.17),
+                         SIMDE_FLOAT64_C( -501.93), SIMDE_FLOAT64_C(  -95.50),
+                         SIMDE_FLOAT64_C(  754.55), SIMDE_FLOAT64_C( -990.48),
+                         SIMDE_FLOAT64_C( -396.36), SIMDE_FLOAT64_C( -466.97)),
+      UINT8_C( 69),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  601.28), SIMDE_FLOAT64_C( -873.85),
+                         SIMDE_FLOAT64_C( -689.96), SIMDE_FLOAT64_C(   31.77),
+                         SIMDE_FLOAT64_C(  -97.11), SIMDE_FLOAT64_C(  971.94),
+                         SIMDE_FLOAT64_C(  389.02), SIMDE_FLOAT64_C( -650.79)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  136.61), SIMDE_FLOAT64_C(  436.94),
+                         SIMDE_FLOAT64_C( -777.02), SIMDE_FLOAT64_C(  166.29),
+                         SIMDE_FLOAT64_C( -377.75), SIMDE_FLOAT64_C(   71.16),
+                         SIMDE_FLOAT64_C(  481.01), SIMDE_FLOAT64_C( -926.81)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -246.72), SIMDE_FLOAT64_C(-1310.79),
+                         SIMDE_FLOAT64_C( -501.93), SIMDE_FLOAT64_C(  -95.50),
+                         SIMDE_FLOAT64_C(  754.55), SIMDE_FLOAT64_C(  900.78),
+                         SIMDE_FLOAT64_C( -396.36), SIMDE_FLOAT64_C(  276.02)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C( -389.46), SIMDE_FLOAT64_C(   -8.03),
+                         SIMDE_FLOAT64_C( -523.51), SIMDE_FLOAT64_C(  466.89),
+                         SIMDE_FLOAT64_C(  698.90), SIMDE_FLOAT64_C( -346.04),
+                         SIMDE_FLOAT64_C( -734.67), SIMDE_FLOAT64_C(  404.34)),
+      UINT8_C(100),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  989.13), SIMDE_FLOAT64_C(  228.14),
+                         SIMDE_FLOAT64_C(  840.94), SIMDE_FLOAT64_C( -718.83),
+                         SIMDE_FLOAT64_C(  274.95), SIMDE_FLOAT64_C(  -99.21),
+                         SIMDE_FLOAT64_C(   84.76), SIMDE_FLOAT64_C( -295.84)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -956.22), SIMDE_FLOAT64_C(  564.94),
+                         SIMDE_FLOAT64_C(  -97.16), SIMDE_FLOAT64_C( -407.99),
+                         SIMDE_FLOAT64_C(  352.62), SIMDE_FLOAT64_C(  244.25),
+                         SIMDE_FLOAT64_C(   43.92), SIMDE_FLOAT64_C(  624.69)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -389.46), SIMDE_FLOAT64_C( -336.80),
+                         SIMDE_FLOAT64_C(  938.10), SIMDE_FLOAT64_C(  466.89),
+                         SIMDE_FLOAT64_C(  698.90), SIMDE_FLOAT64_C( -343.46),
+                         SIMDE_FLOAT64_C( -734.67), SIMDE_FLOAT64_C(  404.34)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C( -571.96), SIMDE_FLOAT64_C(   40.27),
+                         SIMDE_FLOAT64_C(  676.69), SIMDE_FLOAT64_C( -150.37),
+                         SIMDE_FLOAT64_C(  945.34), SIMDE_FLOAT64_C(   75.83),
+                         SIMDE_FLOAT64_C(   64.75), SIMDE_FLOAT64_C(  239.06)),
+      UINT8_C(209),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  792.47), SIMDE_FLOAT64_C( -265.19),
+                         SIMDE_FLOAT64_C( -768.95), SIMDE_FLOAT64_C(  515.15),
+                         SIMDE_FLOAT64_C(  350.59), SIMDE_FLOAT64_C(  422.68),
+                         SIMDE_FLOAT64_C(  582.99), SIMDE_FLOAT64_C( -985.50)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(   12.38), SIMDE_FLOAT64_C(  -71.80),
+                         SIMDE_FLOAT64_C(  363.01), SIMDE_FLOAT64_C( -195.65),
+                         SIMDE_FLOAT64_C(  967.47), SIMDE_FLOAT64_C(   -4.13),
+                         SIMDE_FLOAT64_C( -478.81), SIMDE_FLOAT64_C(  909.10)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  780.09), SIMDE_FLOAT64_C( -193.39),
+                         SIMDE_FLOAT64_C(  676.69), SIMDE_FLOAT64_C(  710.80),
+                         SIMDE_FLOAT64_C(  945.34), SIMDE_FLOAT64_C(   75.83),
+                         SIMDE_FLOAT64_C(   64.75), SIMDE_FLOAT64_C(-1894.60)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C( -879.88), SIMDE_FLOAT64_C( -687.95),
+                         SIMDE_FLOAT64_C( -892.89), SIMDE_FLOAT64_C( -642.85),
+                         SIMDE_FLOAT64_C(  533.08), SIMDE_FLOAT64_C(  898.29),
+                         SIMDE_FLOAT64_C(  -29.99), SIMDE_FLOAT64_C(    5.58)),
+      UINT8_C(186),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  649.80), SIMDE_FLOAT64_C( -257.91),
+                         SIMDE_FLOAT64_C(  356.56), SIMDE_FLOAT64_C(  567.70),
+                         SIMDE_FLOAT64_C(  -80.43), SIMDE_FLOAT64_C( -499.15),
+                         SIMDE_FLOAT64_C( -866.12), SIMDE_FLOAT64_C(  639.40)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  702.45), SIMDE_FLOAT64_C(  464.79),
+                         SIMDE_FLOAT64_C(  387.80), SIMDE_FLOAT64_C( -528.10),
+                         SIMDE_FLOAT64_C( -409.82), SIMDE_FLOAT64_C( -696.40),
+                         SIMDE_FLOAT64_C(  455.43), SIMDE_FLOAT64_C(  856.81)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  -52.65), SIMDE_FLOAT64_C( -687.95),
+                         SIMDE_FLOAT64_C(  -31.24), SIMDE_FLOAT64_C( 1095.80),
+                         SIMDE_FLOAT64_C(  329.39), SIMDE_FLOAT64_C(  898.29),
+                         SIMDE_FLOAT64_C(-1321.55), SIMDE_FLOAT64_C(    5.58)) },
+    { simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C(  203.46),
+                         SIMDE_FLOAT64_C(  194.87), SIMDE_FLOAT64_C(  667.81),
+                         SIMDE_FLOAT64_C( -258.76), SIMDE_FLOAT64_C(  897.89),
+                         SIMDE_FLOAT64_C(  571.10), SIMDE_FLOAT64_C( -320.96)),
+      UINT8_C( 56),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -938.69), SIMDE_FLOAT64_C(   74.05),
+                         SIMDE_FLOAT64_C( -981.48), SIMDE_FLOAT64_C( -656.78),
+                         SIMDE_FLOAT64_C( -794.37), SIMDE_FLOAT64_C(  177.36),
+                         SIMDE_FLOAT64_C(  380.50), SIMDE_FLOAT64_C(  812.91)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C(  -10.37), SIMDE_FLOAT64_C( -894.99),
+                         SIMDE_FLOAT64_C( -148.09), SIMDE_FLOAT64_C(  314.75),
+                         SIMDE_FLOAT64_C( -740.28), SIMDE_FLOAT64_C( -372.00),
+                         SIMDE_FLOAT64_C( -357.36), SIMDE_FLOAT64_C( -791.79)),
+      simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C(  203.46),
+                         SIMDE_FLOAT64_C( -833.39), SIMDE_FLOAT64_C( -971.53),
+                         SIMDE_FLOAT64_C(  -54.09), SIMDE_FLOAT64_C(  897.89),
+                         SIMDE_FLOAT64_C(  571.10), SIMDE_FLOAT64_C( -320.96)) }
+  };
+
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m512d r = simde_mm512_mask_sub_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+    simde_assert_m512d_close(r, test_vec[i].r, 1);
+  }
+
+  return MUNIT_OK;
+}
+
 static MunitResult
 test_simde_mm512_and_si512(const MunitParameter params[], void* data) {
   (void) params;
@@ -4706,6 +5338,10 @@ static MunitTest test_suite_tests[] = {
   SIMDE_TESTS_DEFINE_TEST(mm512_sub_epi64),
   SIMDE_TESTS_DEFINE_TEST(mm512_sub_ps),
   SIMDE_TESTS_DEFINE_TEST(mm512_sub_pd),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_epi32),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_epi64),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_ps),
+  SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_pd),
 
   SIMDE_TESTS_DEFINE_TEST(mm512_and_si512),
   SIMDE_TESTS_DEFINE_TEST(mm512_andnot_si512),


=====================================
test/x86/skel.c
=====================================
@@ -2362,6 +2362,7 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
     simde__m512i src;
     simde__mmask16 k;
     simde__m512i a;
+    simde__m512i b;
     simde__m512i r;
   } test_vec[8] = {
 
@@ -2369,14 +2370,16 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
 
   printf("\n");
   for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
-    simde__m512i_private src, a, r;
+    simde__m512i_private src, a, b, r;
     simde__mmask16 k;
 
     munit_rand_memory(sizeof(src), (uint8_t*) &src);
     munit_rand_memory(sizeof(k), (uint8_t*) &k);
     munit_rand_memory(sizeof(a), (uint8_t*) &a);
+    munit_rand_memory(sizeof(a), (uint8_t*) &b);
+    k &= UINT16_C(0xffff);
 
-    r = simde__m512i_to_private(simde_mm512_mask_xxx_epi32(simde__m512i_from_private(src), k, simde__m512i_from_private(a)));
+    r = simde__m512i_to_private(simde_mm512_mask_xxx_epi32(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
 
     printf("    { simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
@@ -2384,13 +2387,19 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
            src.i32[15], src.i32[14], src.i32[13], src.i32[12], src.i32[11], src.i32[10], src.i32[ 9], src.i32[ 8],
            src.i32[ 7], src.i32[ 6], src.i32[ 5], src.i32[ 4], src.i32[ 3], src.i32[ 2], src.i32[ 1], src.i32[ 0]);
-    printf("      UINT16_C(%5" PRIu16 "),\n", k);
+    printf("      UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
     printf("      simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
            a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
            a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
+    printf("      simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
+           "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
+           "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
+           "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
+           b.i32[15], b.i32[14], b.i32[13], b.i32[12], b.i32[11], b.i32[10], b.i32[ 9], b.i32[ 8],
+           b.i32[ 7], b.i32[ 6], b.i32[ 5], b.i32[ 4], b.i32[ 3], b.i32[ 2], b.i32[ 1], b.i32[ 0]);
     printf("      simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
            "                            INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
@@ -2401,7 +2410,7 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
   return MUNIT_FAIL;
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
-    simde__m512i r = simde_mm512_mask_xxx_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde__m512i r = simde_mm512_mask_xxx_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
     simde_assert_m512i_i32(r, ==, test_vec[i].r);
   }
 
@@ -2417,6 +2426,7 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
     simde__m512i src;
     simde__mmask8 k;
     simde__m512i a;
+    simde__m512i b;
     simde__m512i r;
   } test_vec[8] = {
 
@@ -2424,14 +2434,16 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
 
   printf("\n");
   for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
-    simde__m512i_private src, a, r;
+    simde__m512i_private src, a, b, r;
     simde__mmask8 k;
 
     munit_rand_memory(sizeof(src), (uint8_t*) &src);
     munit_rand_memory(sizeof(k), (uint8_t*) &k);
     munit_rand_memory(sizeof(a), (uint8_t*) &a);
+    munit_rand_memory(sizeof(a), (uint8_t*) &b);
+    k &= UINT8_C(0xff);
 
-    r = simde__m512i_to_private(simde_mm512_mask_xxx_epi64(simde__m512i_from_private(src), k, simde__m512i_from_private(a)));
+    r = simde__m512i_to_private(simde_mm512_mask_xxx_epi64(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
 
     printf("    { simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
@@ -2439,13 +2451,19 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
            src.i64[7], src.i64[6], src.i64[5], src.i64[4],
            src.i64[3], src.i64[2], src.i64[1], src.i64[0]);
-    printf("      UINT8_C(%3" PRIu8 "),\n", k);
+    printf("      UINT8_C(%3" PRIu8 "),\n", HEDLEY_STATIC_CAST(uint8_t, k));
     printf("      simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
            a.i64[7], a.i64[6], a.i64[5], a.i64[4],
            a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
+    printf("      simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+           "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+           "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+           "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
+           b.i64[7], b.i64[6], b.i64[5], b.i64[4],
+           b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
     printf("      simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
            "                            INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
@@ -2456,7 +2474,7 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
   return MUNIT_FAIL;
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
-    simde__m512i r = simde_mm512_mask_xxx_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde__m512i r = simde_mm512_mask_xxx_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
     simde_assert_m512i_i64(r, ==, test_vec[i].r);
   }
 
@@ -2472,6 +2490,7 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
     simde__m512 src;
     simde__mmask16 k;
     simde__m512 a;
+    simde__m512 b;
     simde__m512 r;
   } test_vec[8] = {
 
@@ -2479,16 +2498,18 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
 
   printf("\n");
   for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
-    simde__m512_private src, a, r;
+    simde__m512_private src, a, b, r;
     simde__mmask16 k;
 
     for (size_t j = 0 ; j < sizeof(simde__m512) / sizeof(simde_float32) ; j++) {
       src.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
       a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
+      b.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
     }
     munit_rand_memory(sizeof(k), (uint8_t*) &k);
+    k &= UINT16_C(0xffff);
 
-    r = simde__m512_to_private(simde_mm512_mask_xxx_ps(k, simde__m512_from_private(a)));
+    r = simde__m512_to_private(simde_mm512_mask_xxx_ps(simde__m512_from_private(src), k, simde__m512_from_private(a), simde__m512_from_private(b)));
 
     printf("    { simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
            "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
@@ -2498,7 +2519,7 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
 	         9, src.f32[11], 9, src.f32[10], 9, src.f32[ 9], 9, src.f32[ 8],
 	         9, src.f32[ 7], 9, src.f32[ 6], 9, src.f32[ 5], 9, src.f32[ 4],
 	         9, src.f32[ 3], 9, src.f32[ 2], 9, src.f32[ 1], 9, src.f32[ 0]);
-    printf("      UINT16_C(%5" PRIu16 "),\n", k);
+    printf("      UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
     printf("      simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
            "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
            "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
@@ -2507,6 +2528,14 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
 	         9, a.f32[11], 9, a.f32[10], 9, a.f32[ 9], 9, a.f32[ 8],
 	         9, a.f32[ 7], 9, a.f32[ 6], 9, a.f32[ 5], 9, a.f32[ 4],
 	         9, a.f32[ 3], 9, a.f32[ 2], 9, a.f32[ 1], 9, a.f32[ 0]);
+    printf("      simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
+           "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
+           "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
+           "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
+	         9, b.f32[15], 9, b.f32[14], 9, b.f32[13], 9, b.f32[12],
+	         9, b.f32[11], 9, b.f32[10], 9, b.f32[ 9], 9, b.f32[ 8],
+	         9, b.f32[ 7], 9, b.f32[ 6], 9, b.f32[ 5], 9, b.f32[ 4],
+	         9, b.f32[ 3], 9, b.f32[ 2], 9, b.f32[ 1], 9, b.f32[ 0]);
     printf("      simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
            "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
            "                         SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
@@ -2519,7 +2548,7 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
   return MUNIT_FAIL;
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
-    simde__m512 r = simde_mm512_mask_xxx_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde__m512 r = simde_mm512_mask_xxx_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
     simde_assert_m512_close(r, test_vec[i].r, 1);
   }
 
@@ -2535,6 +2564,7 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
     simde__m512d src;
     simde__mmask8 k;
     simde__m512d a;
+    simde__m512d b;
     simde__m512d r;
   } test_vec[8] = {
 
@@ -2542,16 +2572,18 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
 
   printf("\n");
   for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
-    simde__m512d_private src, a, r;
+    simde__m512d_private src, a, b, r;
     simde__mmask8 k;
 
     for (size_t j = 0 ; j < sizeof(simde__m512d) / sizeof(simde_float64) ; j++) {
       src.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
       a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
+      b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
     }
     munit_rand_memory(sizeof(k), (uint8_t*) &k);
+    k &= UINT8_C(0xff);
 
-    r = simde__m512d_to_private(simde_mm512_mask_xxx_pd(simde__m512d_from_private(src), k, simde__m512d_from_private(a)));
+    r = simde__m512d_to_private(simde_mm512_mask_xxx_pd(simde__m512d_from_private(src), k, simde__m512d_from_private(a), simde__m512d_from_private(b)));
 
     printf("    { simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
@@ -2559,13 +2591,19 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
            8, src.f64[7], 8, src.f64[6], 8, src.f64[5], 8, src.f64[4],
            8, src.f64[3], 8, src.f64[2], 8, src.f64[1], 8, src.f64[0]);
-    printf("      UINT8_C(%3" PRIu8 "),\n", k);
+    printf("      UINT8_C(%3" PRIu8 "),\n", HEDLEY_STATIC_CAST(uint8_t, k));
     printf("      simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
            8, a.f64[7], 8, a.f64[6], 8, a.f64[5], 8, a.f64[4],
            8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
+    printf("      simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
+           "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
+           "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
+           "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
+           8, b.f64[7], 8, b.f64[6], 8, b.f64[5], 8, b.f64[4],
+           8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
     printf("      simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
            "                         SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
@@ -2576,7 +2614,7 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
   return MUNIT_FAIL;
 
   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
-    simde__m512d r = simde_mm512_mask_xxx_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+    simde__m512d r = simde_mm512_mask_xxx_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
     simde_assert_m512d_close(r, test_vec[i].r, 1);
   }
 


=====================================
test/x86/sse.c
=====================================
@@ -1987,26 +1987,42 @@ test_simde_mm_cvtpi32_ps(const MunitParameter params[], void* data) {
   (void) params;
   (void) data;
 
-  for (size_t i = 0 ; i < TEST_PREFERRED_ITERATIONS ; i++) {
-    simde__m128_private a, r;
-    simde__m64_private b;
-
-    munit_rand_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a));
-    for (size_t j = 0 ; j < 2 ; j++) {
-      a.i32[j] = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(-65536, 65535));
-      b.i32[j] = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(-65536, 65535));
-    }
-
-    r = simde__m128_to_private(simde_mm_cvtpi32_ps(simde__m128_from_private(a), simde__m64_from_private(b)));
+  const struct {
+    simde__m128 a;
+    simde__m64 b;
+    simde__m128 r;
+  } test_vec[8] = {
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   143.12), SIMDE_FLOAT32_C(   382.05), SIMDE_FLOAT32_C(  -756.03), SIMDE_FLOAT32_C(   501.27)),
+      simde_mm_set_pi32(INT32_C(        747), INT32_C(       -200)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   143.12), SIMDE_FLOAT32_C(   382.05), SIMDE_FLOAT32_C(   747.00), SIMDE_FLOAT32_C(  -200.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   972.78), SIMDE_FLOAT32_C(  -402.40), SIMDE_FLOAT32_C(   516.01), SIMDE_FLOAT32_C(   710.18)),
+      simde_mm_set_pi32(INT32_C(        533), INT32_C(       -843)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   972.78), SIMDE_FLOAT32_C(  -402.40), SIMDE_FLOAT32_C(   533.00), SIMDE_FLOAT32_C(  -843.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   472.16), SIMDE_FLOAT32_C(  -696.64), SIMDE_FLOAT32_C(  -295.14), SIMDE_FLOAT32_C(   252.19)),
+      simde_mm_set_pi32(INT32_C(       -428), INT32_C(        182)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   472.16), SIMDE_FLOAT32_C(  -696.64), SIMDE_FLOAT32_C(  -428.00), SIMDE_FLOAT32_C(   182.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -566.65), SIMDE_FLOAT32_C(   623.99), SIMDE_FLOAT32_C(   879.56), SIMDE_FLOAT32_C(   610.97)),
+      simde_mm_set_pi32(INT32_C(        176), INT32_C(        681)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(  -566.65), SIMDE_FLOAT32_C(   623.99), SIMDE_FLOAT32_C(   176.00), SIMDE_FLOAT32_C(   681.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   572.11), SIMDE_FLOAT32_C(  -357.92), SIMDE_FLOAT32_C(   212.83), SIMDE_FLOAT32_C(   936.07)),
+      simde_mm_set_pi32(INT32_C(       -310), INT32_C(        515)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   572.11), SIMDE_FLOAT32_C(  -357.92), SIMDE_FLOAT32_C(  -310.00), SIMDE_FLOAT32_C(   515.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   696.01), SIMDE_FLOAT32_C(  -960.55), SIMDE_FLOAT32_C(  -478.31), SIMDE_FLOAT32_C(  -831.25)),
+      simde_mm_set_pi32(INT32_C(        324), INT32_C(        -65)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   696.01), SIMDE_FLOAT32_C(  -960.55), SIMDE_FLOAT32_C(   324.00), SIMDE_FLOAT32_C(   -65.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -973.73), SIMDE_FLOAT32_C(   578.94), SIMDE_FLOAT32_C(   658.26), SIMDE_FLOAT32_C(   635.66)),
+      simde_mm_set_pi32(INT32_C(       -268), INT32_C(        691)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(  -973.73), SIMDE_FLOAT32_C(   578.94), SIMDE_FLOAT32_C(  -268.00), SIMDE_FLOAT32_C(   691.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   654.54), SIMDE_FLOAT32_C(  -615.74), SIMDE_FLOAT32_C(  -430.24), SIMDE_FLOAT32_C(   224.63)),
+      simde_mm_set_pi32(INT32_C(        370), INT32_C(       -505)),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   654.54), SIMDE_FLOAT32_C(  -615.74), SIMDE_FLOAT32_C(   370.00), SIMDE_FLOAT32_C(  -505.00)) }
+  };
 
-    simde_assert_int32_close(b.i32[0], HEDLEY_STATIC_CAST(int32_t, r.f32[0]));
-    simde_assert_int32_close(b.i32[1], HEDLEY_STATIC_CAST(int32_t, r.f32[1]));
-    munit_assert_int32(a.i32[2], ==, r.i32[2]);
-    munit_assert_int32(a.i32[3], ==, r.i32[3]);
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128 r = simde_mm_cvtpi32_ps(test_vec[i].a, test_vec[i].b);
+    simde_assert_m128_close(r, test_vec[i].r, 1);
   }
 
-  simde_mm_empty();
-
   return MUNIT_OK;
 }
 
@@ -2248,19 +2264,40 @@ test_simde_mm_cvtsi32_ss(const MunitParameter params[], void* data) {
   (void) params;
   (void) data;
 
-  for (size_t i = 0 ; i < TEST_PREFERRED_ITERATIONS ; i++) {
-    simde__m128_private a, r;
+  const struct {
+    simde__m128 a;
     int32_t b;
+    simde__m128 r;
+  } test_vec[8] = {
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -368.26), SIMDE_FLOAT32_C(  -772.15), SIMDE_FLOAT32_C(   700.78), SIMDE_FLOAT32_C(  -416.87)),
+      INT32_C(     -93207),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(  -368.26), SIMDE_FLOAT32_C(  -772.15), SIMDE_FLOAT32_C(   700.78), SIMDE_FLOAT32_C(-93207.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   774.49), SIMDE_FLOAT32_C(   920.32), SIMDE_FLOAT32_C(   159.83), SIMDE_FLOAT32_C(  -900.78)),
+      INT32_C(     -99810),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   774.49), SIMDE_FLOAT32_C(   920.32), SIMDE_FLOAT32_C(   159.83), SIMDE_FLOAT32_C(-99810.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -606.72), SIMDE_FLOAT32_C(  -127.65), SIMDE_FLOAT32_C(  -336.22), SIMDE_FLOAT32_C(  -528.09)),
+      INT32_C(     -24917),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(  -606.72), SIMDE_FLOAT32_C(  -127.65), SIMDE_FLOAT32_C(  -336.22), SIMDE_FLOAT32_C(-24917.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   534.13), SIMDE_FLOAT32_C(  -401.63), SIMDE_FLOAT32_C(  -949.41), SIMDE_FLOAT32_C(   -38.28)),
+      INT32_C(     -25377),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   534.13), SIMDE_FLOAT32_C(  -401.63), SIMDE_FLOAT32_C(  -949.41), SIMDE_FLOAT32_C(-25377.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   704.87), SIMDE_FLOAT32_C(   236.14), SIMDE_FLOAT32_C(   -91.25), SIMDE_FLOAT32_C(  -708.13)),
+      INT32_C(      83867),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   704.87), SIMDE_FLOAT32_C(   236.14), SIMDE_FLOAT32_C(   -91.25), SIMDE_FLOAT32_C( 83867.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   570.72), SIMDE_FLOAT32_C(  -412.30), SIMDE_FLOAT32_C(  -578.88), SIMDE_FLOAT32_C(   196.41)),
+      INT32_C(      72066),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   570.72), SIMDE_FLOAT32_C(  -412.30), SIMDE_FLOAT32_C(  -578.88), SIMDE_FLOAT32_C( 72066.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(   891.77), SIMDE_FLOAT32_C(  -473.67), SIMDE_FLOAT32_C(   332.65), SIMDE_FLOAT32_C(  -615.45)),
+      INT32_C(      12054),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(   891.77), SIMDE_FLOAT32_C(  -473.67), SIMDE_FLOAT32_C(   332.65), SIMDE_FLOAT32_C( 12054.00)) },
+    { simde_mm_set_ps(SIMDE_FLOAT32_C(  -552.37), SIMDE_FLOAT32_C(  -873.85), SIMDE_FLOAT32_C(   968.75), SIMDE_FLOAT32_C(  -669.38)),
+      INT32_C(      88818),
+      simde_mm_set_ps(SIMDE_FLOAT32_C(  -552.37), SIMDE_FLOAT32_C(  -873.85), SIMDE_FLOAT32_C(   968.75), SIMDE_FLOAT32_C( 88818.00)) }
+  };
 
-    munit_rand_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a));
-    b = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(INT16_MIN, INT16_MAX));
-
-    r = simde__m128_to_private(simde_mm_cvtsi32_ss(simde__m128_from_private(a), b));
-
-    simde_assert_int32_close(HEDLEY_STATIC_CAST(int, r.f32[0]), b);
-    munit_assert_int32(r.i32[1], ==, a.i32[1]);
-    munit_assert_int32(r.i32[2], ==, a.i32[2]);
-    munit_assert_int32(r.i32[3], ==, a.i32[3]);
+  for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+    simde__m128 r = simde_mm_cvtsi32_ss(test_vec[i].a, test_vec[i].b);
+    simde_assert_m128_close(r, test_vec[i].r, 1);
   }
 
   return MUNIT_OK;



View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/181091ed108825cc071c2422f908c103547a4029

-- 
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/181091ed108825cc071c2422f908c103547a4029
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200408/16344763/attachment-0001.html>


More information about the debian-med-commit mailing list