[med-svn] [Git][med-team/simde][upstream] New upstream version 0.0.0.git.20200408
Michael R. Crusoe
gitlab at salsa.debian.org
Wed Apr 8 14:44:20 BST 2020
Michael R. Crusoe pushed to branch upstream at Debian Med / simde
Commits:
181091ed by Michael R. Crusoe at 2020-04-08T15:20:59+02:00
New upstream version 0.0.0.git.20200408
- - - - -
8 changed files:
- .azure-pipelines.yml
- − .codecov.yml
- .github/workflows/ci.yml
- .travis.yml
- simde/x86/avx512f.h
- test/x86/avx512f.c
- test/x86/skel.c
- test/x86/sse.c
Changes:
=====================================
.azure-pipelines.yml
=====================================
@@ -13,8 +13,6 @@ jobs:
displayName: Build
- script: ./run-tests
displayName: Tests
- - script: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
- displayName: 'Upload to codecov.io'
- job: sse4_1
steps:
@@ -26,8 +24,6 @@ jobs:
displayName: Build
- script: ./run-tests
displayName: Tests
- - script: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
- displayName: 'Upload to codecov.io'
- job: mipsel
steps:
@@ -44,5 +40,3 @@ jobs:
displayName: Build
- script: ./dockcross-linux-mipsel ./run-tests
displayName: Tests
- - script: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
- displayName: 'Upload to codecov.io'
=====================================
.codecov.yml deleted
=====================================
@@ -1,9 +0,0 @@
-coverage:
- status:
- project:
- default:
- if_no_uploads: success
-
-ignore:
-- test/**/*
-- test/*
=====================================
.github/workflows/ci.yml
=====================================
@@ -18,11 +18,6 @@ jobs:
run: cmake --build .
- name: Test
run: ./run-tests
- - uses: codecov/codecov-action at v1
- with:
- token: ${{ secrets.CODECOV_TOKEN }}
- yml: ./.codecov.yml
- fail_ci_if_error: false
# emscripten:
# runs-on: ubuntu-latest
@@ -40,6 +35,4 @@ jobs:
# - name: Build
# run: emsdk/upstream/emscripten/emmake ninja
# - name: Test
- # run: node ./run-tests
- # - name: Upload Coverage Data
- # run: bash <(curl -s https://codecov.io/bash) -y ./.codecov.yml
+ # run: node ./run-tests=
=====================================
.travis.yml
=====================================
@@ -292,11 +292,5 @@ script:
fi
fi
-after_success:
-- |
- if [ "${BUILD_TYPE}" = "Coverage" ]; then
- bash <(curl -s https://codecov.io/bash) -y "${TRAVIS_BUILD_DIR}"/.codecov.yml ${GCOV}
- fi
-
notifications:
email: false
=====================================
simde/x86/avx512f.h
=====================================
@@ -1661,7 +1661,7 @@ simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) {
r_.i32f = a_.i32f ^ b_.i32f;
#else
SIMDE__VECTORIZE
- for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
+ for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
}
#endif
@@ -2126,6 +2126,19 @@ simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) {
# define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_sub_epi32(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
@@ -2153,6 +2166,19 @@ simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) {
# define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512i
+simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_sub_epi64(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
@@ -2180,6 +2206,19 @@ simde_mm512_sub_ps (simde__m512 a, simde__m512 b) {
# define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512
+simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_sub_ps(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__m512d
simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
@@ -2207,6 +2246,19 @@ simde_mm512_sub_pd (simde__m512d a, simde__m512d b) {
# define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b)
#endif
+SIMDE__FUNCTION_ATTRIBUTES
+simde__m512d
+simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
+#if defined(SIMDE_AVX512F_NATIVE)
+ return _mm512_mask_sub_pd(src, k, a, b);
+#else
+ return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b));
+#endif
+}
+#if defined(SIMDE_AVX512F_ENABLE_NATIVE_ALIASES)
+# define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b)
+#endif
+
SIMDE__FUNCTION_ATTRIBUTES
simde__mmask16
simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) {
=====================================
test/x86/avx512f.c
=====================================
@@ -2771,6 +2771,638 @@ test_simde_mm512_sub_pd(const MunitParameter params[], void* data) {
return MUNIT_OK;
}
+static MunitResult
+test_simde_mm512_mask_sub_epi32(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i src;
+ simde__mmask16 k;
+ simde__m512i a;
+ simde__m512i b;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi32(INT32_C( -957186609), INT32_C(-1524765283), INT32_C( 1290068568), INT32_C( 1887468775),
+ INT32_C( -904096999), INT32_C(-1189693212), INT32_C( 221355870), INT32_C(-1952779315),
+ INT32_C( 1347985035), INT32_C(-2063939133), INT32_C(-1602582649), INT32_C(-2096850611),
+ INT32_C(-2084994527), INT32_C( -75386963), INT32_C( 1835417512), INT32_C(-2072964471)),
+ UINT16_C(35396),
+ simde_mm512_set_epi32(INT32_C( 136551409), INT32_C( 1192962314), INT32_C( 2058621765), INT32_C(-2039270859),
+ INT32_C( -26254502), INT32_C( 733381108), INT32_C( -187934344), INT32_C( 989979336),
+ INT32_C(-1964919382), INT32_C( 126554293), INT32_C( 254011928), INT32_C( 1490517506),
+ INT32_C(-1065486850), INT32_C( 45941921), INT32_C(-1082899768), INT32_C( -219628031)),
+ simde_mm512_set_epi32(INT32_C( -680185335), INT32_C( 111102276), INT32_C( 1222454066), INT32_C( -422241261),
+ INT32_C( -78061198), INT32_C(-2084414007), INT32_C( 1367041146), INT32_C(-1471398421),
+ INT32_C( -348147705), INT32_C( 673564238), INT32_C(-1457376577), INT32_C( 613875036),
+ INT32_C( -859069431), INT32_C( -733638834), INT32_C(-1673403701), INT32_C( 842474288)),
+ simde_mm512_set_epi32(INT32_C( 816736744), INT32_C(-1524765283), INT32_C( 1290068568), INT32_C( 1887468775),
+ INT32_C( 51806696), INT32_C(-1189693212), INT32_C(-1554975490), INT32_C(-1952779315),
+ INT32_C( 1347985035), INT32_C( -547009945), INT32_C(-1602582649), INT32_C(-2096850611),
+ INT32_C(-2084994527), INT32_C( 779580755), INT32_C( 1835417512), INT32_C(-2072964471)) },
+ { simde_mm512_set_epi32(INT32_C( 2077489237), INT32_C(-2066152618), INT32_C( -825076901), INT32_C(-1372574642),
+ INT32_C( -889460158), INT32_C( 498921453), INT32_C( 943332338), INT32_C(-1383811831),
+ INT32_C( 1827152592), INT32_C( 1728034912), INT32_C( 1350913629), INT32_C( 868578809),
+ INT32_C( 1368636899), INT32_C( -389235219), INT32_C( 602990700), INT32_C( -400551366)),
+ UINT16_C(47779),
+ simde_mm512_set_epi32(INT32_C( 1704357216), INT32_C( -538157327), INT32_C( 1370875608), INT32_C( 1508504457),
+ INT32_C( -68294915), INT32_C(-1479685367), INT32_C( -615597542), INT32_C(-1638887359),
+ INT32_C(-1417912572), INT32_C( 1479002949), INT32_C( -647118153), INT32_C( 1670566025),
+ INT32_C(-1880268561), INT32_C(-1083232065), INT32_C( 2092339698), INT32_C(-1021873283)),
+ simde_mm512_set_epi32(INT32_C( -839277498), INT32_C( 551588590), INT32_C( 1834572496), INT32_C( 1613035598),
+ INT32_C(-1678404828), INT32_C(-1769391216), INT32_C(-1638931514), INT32_C( 156804649),
+ INT32_C( 1764158657), INT32_C( -132604621), INT32_C( 446542816), INT32_C( 2037189710),
+ INT32_C( 109296986), INT32_C( 257019297), INT32_C( 473079611), INT32_C( 1127076998)),
+ simde_mm512_set_epi32(INT32_C(-1751332582), INT32_C(-2066152618), INT32_C( -463696888), INT32_C( -104531141),
+ INT32_C( 1610109913), INT32_C( 498921453), INT32_C( 1023333972), INT32_C(-1383811831),
+ INT32_C( 1112896067), INT32_C( 1728034912), INT32_C(-1093660969), INT32_C( 868578809),
+ INT32_C( 1368636899), INT32_C( -389235219), INT32_C( 1619260087), INT32_C( 2146017015)) },
+ { simde_mm512_set_epi32(INT32_C( 307630641), INT32_C(-1560148595), INT32_C( 376284729), INT32_C( 278591183),
+ INT32_C( -277186219), INT32_C( 1940926671), INT32_C( 662058232), INT32_C( 1091202812),
+ INT32_C( -701136301), INT32_C( -504607320), INT32_C( -251380880), INT32_C( 1860616049),
+ INT32_C(-1752161866), INT32_C(-1199997313), INT32_C(-1668691262), INT32_C( 1717921298)),
+ UINT16_C( 2459),
+ simde_mm512_set_epi32(INT32_C( 2079917891), INT32_C(-1199015072), INT32_C( -98602729), INT32_C( -930567988),
+ INT32_C(-1256209763), INT32_C( 1068967165), INT32_C( 1289079409), INT32_C( 1251085533),
+ INT32_C( -727360546), INT32_C(-1724797341), INT32_C( 2093813635), INT32_C( 1051617285),
+ INT32_C( 1264716001), INT32_C( 940727836), INT32_C( 1722577424), INT32_C(-1275657732)),
+ simde_mm512_set_epi32(INT32_C( 671797033), INT32_C(-1012795446), INT32_C( 2106088193), INT32_C( -458612579),
+ INT32_C( -261772865), INT32_C( -550994046), INT32_C( 2105186719), INT32_C( 1074097751),
+ INT32_C(-1251411324), INT32_C( 65867416), INT32_C(-1495248139), INT32_C( 315553116),
+ INT32_C(-1869712369), INT32_C(-1246794510), INT32_C( 1218370652), INT32_C( -240388126)),
+ simde_mm512_set_epi32(INT32_C( 307630641), INT32_C(-1560148595), INT32_C( 376284729), INT32_C( 278591183),
+ INT32_C( -994436898), INT32_C( 1940926671), INT32_C( 662058232), INT32_C( 176987782),
+ INT32_C( 524050778), INT32_C( -504607320), INT32_C( -251380880), INT32_C( 736064169),
+ INT32_C(-1160538926), INT32_C(-1199997313), INT32_C( 504206772), INT32_C(-1035269606)) },
+ { simde_mm512_set_epi32(INT32_C( -789716549), INT32_C(-1932674309), INT32_C( 548470804), INT32_C( -318652401),
+ INT32_C(-2041118423), INT32_C(-2107945718), INT32_C( -715661009), INT32_C( 1609073505),
+ INT32_C( 1214609500), INT32_C( 283085327), INT32_C(-1633515677), INT32_C( 1697029857),
+ INT32_C( 1976447422), INT32_C( 904412076), INT32_C( 1198927422), INT32_C(-1498026761)),
+ UINT16_C(54315),
+ simde_mm512_set_epi32(INT32_C( 1385182319), INT32_C( 795273310), INT32_C( 1955628796), INT32_C( -526907127),
+ INT32_C(-2141025282), INT32_C( -931446405), INT32_C(-1422139726), INT32_C(-1101084337),
+ INT32_C( -254080461), INT32_C( -595291883), INT32_C( 1292692652), INT32_C(-1849951866),
+ INT32_C( -815091127), INT32_C( 370112774), INT32_C( -520479179), INT32_C( 1681391452)),
+ simde_mm512_set_epi32(INT32_C(-1825216267), INT32_C( 1555513845), INT32_C(-2081576252), INT32_C(-1972081268),
+ INT32_C( -563427058), INT32_C( 1922040193), INT32_C(-2102270715), INT32_C(-1257264155),
+ INT32_C( -894851768), INT32_C( 1793334666), INT32_C( 1049305530), INT32_C(-1935379009),
+ INT32_C( -8279361), INT32_C(-1567490719), INT32_C(-2014130513), INT32_C(-1826154506)),
+ simde_mm512_set_epi32(INT32_C(-1084568710), INT32_C( -760240535), INT32_C( 548470804), INT32_C( 1445174141),
+ INT32_C(-2041118423), INT32_C( 1441480698), INT32_C( -715661009), INT32_C( 1609073505),
+ INT32_C( 1214609500), INT32_C( 283085327), INT32_C( 243387122), INT32_C( 1697029857),
+ INT32_C( -806811766), INT32_C( 904412076), INT32_C( 1493651334), INT32_C( -787421338)) },
+ { simde_mm512_set_epi32(INT32_C( 997407681), INT32_C( -83308341), INT32_C( 1430458288), INT32_C( -655910274),
+ INT32_C( 17159218), INT32_C( 197891822), INT32_C( -82165524), INT32_C( 98130061),
+ INT32_C( -696255503), INT32_C( 616388941), INT32_C( 1383637516), INT32_C( 255219509),
+ INT32_C(-1280964183), INT32_C(-1753221031), INT32_C( 480974923), INT32_C(-1444611560)),
+ UINT16_C(47568),
+ simde_mm512_set_epi32(INT32_C(-1796791424), INT32_C( 919413682), INT32_C( 907613991), INT32_C(-1471064632),
+ INT32_C(-2017464794), INT32_C( -67778959), INT32_C(-1033884668), INT32_C( -839095279),
+ INT32_C( -881742684), INT32_C( 1193890045), INT32_C( -817450648), INT32_C( -450889209),
+ INT32_C(-1829442769), INT32_C( -254239276), INT32_C( 1531184539), INT32_C( 204100550)),
+ simde_mm512_set_epi32(INT32_C(-1574624316), INT32_C( 1965632168), INT32_C( -507137262), INT32_C( 868285762),
+ INT32_C( -287712967), INT32_C(-1275855491), INT32_C(-1948986373), INT32_C( 378189270),
+ INT32_C( 2028975029), INT32_C( -983819985), INT32_C(-1530834794), INT32_C( -267906659),
+ INT32_C( 2013371063), INT32_C( -972550977), INT32_C(-1345658151), INT32_C(-2001069348)),
+ simde_mm512_set_epi32(INT32_C( -222167108), INT32_C( -83308341), INT32_C( 1414751253), INT32_C( 1955616902),
+ INT32_C(-1729751827), INT32_C( 197891822), INT32_C( -82165524), INT32_C(-1217284549),
+ INT32_C( 1384249583), INT32_C(-2117257266), INT32_C( 1383637516), INT32_C( -182982550),
+ INT32_C(-1280964183), INT32_C(-1753221031), INT32_C( 480974923), INT32_C(-1444611560)) },
+ { simde_mm512_set_epi32(INT32_C( 1875288432), INT32_C( 1158027251), INT32_C( -303056299), INT32_C( -939396673),
+ INT32_C( 1585003262), INT32_C( 1365783459), INT32_C( 111845672), INT32_C(-1286713478),
+ INT32_C( 674624782), INT32_C( 2020528740), INT32_C( 497192398), INT32_C( 1112540789),
+ INT32_C(-1764167278), INT32_C(-1540772359), INT32_C( 395629026), INT32_C( 984304916)),
+ UINT16_C(16877),
+ simde_mm512_set_epi32(INT32_C( -344292944), INT32_C( 1968428151), INT32_C( 2086978939), INT32_C( 1501910543),
+ INT32_C(-1262393002), INT32_C( 2081469023), INT32_C( 2016768793), INT32_C( 1922434397),
+ INT32_C( -253304624), INT32_C( 515280842), INT32_C(-1708348294), INT32_C( 2107558843),
+ INT32_C( 1919035054), INT32_C( 1742835915), INT32_C( 989439209), INT32_C( 2080310116)),
+ simde_mm512_set_epi32(INT32_C( 1560352883), INT32_C( -937050525), INT32_C( 15000953), INT32_C( 298895006),
+ INT32_C( -255287325), INT32_C( -851082971), INT32_C( -981170631), INT32_C( 30364523),
+ INT32_C( -626854551), INT32_C( 1776719697), INT32_C(-1286673883), INT32_C( 2134458392),
+ INT32_C(-1884377437), INT32_C(-2042525337), INT32_C( 2143156805), INT32_C(-1045267304)),
+ simde_mm512_set_epi32(INT32_C( 1875288432), INT32_C(-1389488620), INT32_C( -303056299), INT32_C( -939396673),
+ INT32_C( 1585003262), INT32_C( 1365783459), INT32_C( 111845672), INT32_C( 1892069874),
+ INT32_C( 373549927), INT32_C(-1261438855), INT32_C( -421674411), INT32_C( 1112540789),
+ INT32_C( -491554805), INT32_C( -509606044), INT32_C( 395629026), INT32_C(-1169389876)) },
+ { simde_mm512_set_epi32(INT32_C( 726531409), INT32_C( -606374582), INT32_C(-1057918709), INT32_C( -811736744),
+ INT32_C(-1460245574), INT32_C( -627872087), INT32_C( 1799586442), INT32_C(-1105519928),
+ INT32_C(-1288829692), INT32_C(-2144392739), INT32_C( 1110910857), INT32_C( -282270116),
+ INT32_C(-1420141426), INT32_C( 1682561587), INT32_C( 1308021682), INT32_C( 712875579)),
+ UINT16_C(17567),
+ simde_mm512_set_epi32(INT32_C(-1065890522), INT32_C( 1362887862), INT32_C(-1905482051), INT32_C( 174767211),
+ INT32_C( 1968089357), INT32_C(-1207243832), INT32_C( -701927204), INT32_C(-1701909648),
+ INT32_C(-1822821880), INT32_C(-1418686446), INT32_C( 2002979046), INT32_C( -531029674),
+ INT32_C( -233545704), INT32_C( 1270923539), INT32_C( -515398077), INT32_C( 870828526)),
+ simde_mm512_set_epi32(INT32_C(-1161246521), INT32_C(-1263382687), INT32_C( -761171059), INT32_C( 1052537110),
+ INT32_C(-1225204820), INT32_C( 1299827393), INT32_C( 477328169), INT32_C( 2043159101),
+ INT32_C( 984199920), INT32_C( 1963689737), INT32_C(-1149812166), INT32_C( -500241318),
+ INT32_C( -953270640), INT32_C( 1180984926), INT32_C( -645305643), INT32_C( 1026486800)),
+ simde_mm512_set_epi32(INT32_C( 726531409), INT32_C(-1668696747), INT32_C(-1057918709), INT32_C( -811736744),
+ INT32_C(-1460245574), INT32_C( 1787896071), INT32_C( 1799586442), INT32_C(-1105519928),
+ INT32_C( 1487945496), INT32_C(-2144392739), INT32_C( 1110910857), INT32_C( -30788356),
+ INT32_C( 719724936), INT32_C( 89938613), INT32_C( 129907566), INT32_C( -155658274)) },
+ { simde_mm512_set_epi32(INT32_C( 1723004290), INT32_C( 721161302), INT32_C( 1077400739), INT32_C( 861837752),
+ INT32_C(-1943224858), INT32_C( 2112602876), INT32_C(-1445821889), INT32_C(-2100432693),
+ INT32_C(-1175934343), INT32_C( 805502143), INT32_C( 1163969458), INT32_C( 873642413),
+ INT32_C( 2052720739), INT32_C(-1010971457), INT32_C( 199344228), INT32_C( 251460647)),
+ UINT16_C(59134),
+ simde_mm512_set_epi32(INT32_C(-1391704351), INT32_C( -847303025), INT32_C(-1711491580), INT32_C( -147993971),
+ INT32_C(-1140349230), INT32_C( 172650828), INT32_C(-2090294261), INT32_C( -216506888),
+ INT32_C(-1813744120), INT32_C( 1589656338), INT32_C( 1010967585), INT32_C(-2076714127),
+ INT32_C( 1156626662), INT32_C( -264321123), INT32_C(-1099385436), INT32_C( -148901794)),
+ simde_mm512_set_epi32(INT32_C( 1003282629), INT32_C( 1250297288), INT32_C( 26548422), INT32_C(-1100962758),
+ INT32_C( 1934048830), INT32_C( -886200980), INT32_C( -228926178), INT32_C( 21722717),
+ INT32_C(-1321187708), INT32_C( 904822803), INT32_C( -875700432), INT32_C(-1302414558),
+ INT32_C( 962131440), INT32_C( -729214075), INT32_C(-1094266114), INT32_C( 1122895720)),
+ simde_mm512_set_epi32(INT32_C( 1899980316), INT32_C(-2097600313), INT32_C(-1738040002), INT32_C( 861837752),
+ INT32_C(-1943224858), INT32_C( 1058851808), INT32_C(-1861368083), INT32_C(-2100432693),
+ INT32_C( -492556412), INT32_C( 684833535), INT32_C( 1886668017), INT32_C( -774299569),
+ INT32_C( 194495222), INT32_C( 464892952), INT32_C( -5119322), INT32_C( 251460647)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_sub_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512i_i32(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_sub_epi64(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512i src;
+ simde__mmask8 k;
+ simde__m512i a;
+ simde__m512i b;
+ simde__m512i r;
+ } test_vec[8] = {
+ { simde_mm512_set_epi64(INT64_C( 8894478799917719473), INT64_C(-7614529333518044459),
+ INT64_C( 8458392650500739529), INT64_C( 7085639313865748967),
+ INT64_C(-7547504459018552290), INT64_C(-8310189466716392279),
+ INT64_C(-1750715323825344235), INT64_C(-2532781790488219528)),
+ UINT8_C(106),
+ simde_mm512_set_epi64(INT64_C(-7192427816606966254), INT64_C(-1619523557840103557),
+ INT64_C( 7616061596213068646), INT64_C( -560841280842371832),
+ INT64_C( -806373115982863580), INT64_C( -816793021936842074),
+ INT64_C( -317565234288882547), INT64_C(-7290553309909260368)),
+ simde_mm512_set_epi64(INT64_C(-9084839040863053259), INT64_C( 332697972184433101),
+ INT64_C(-8959492887484217950), INT64_C( 7617292932467329680),
+ INT64_C(-2740045277871922718), INT64_C(-3634413508032825567),
+ INT64_C( -448440935066054877), INT64_C(-6805574594168851327)),
+ simde_mm512_set_epi64(INT64_C( 8894478799917719473), INT64_C(-1952221530024536658),
+ INT64_C(-1871189590012265020), INT64_C( 7085639313865748967),
+ INT64_C( 1933672161889059138), INT64_C(-8310189466716392279),
+ INT64_C( 130875700777172330), INT64_C(-2532781790488219528)) },
+ { simde_mm512_set_epi64(INT64_C(-3459089877760882917), INT64_C( 1753327656617706405),
+ INT64_C( 3932187030396497555), INT64_C(-4341921971190139713),
+ INT64_C(-7354864635860030437), INT64_C(-7512931671900842140),
+ INT64_C( 7677521206664265888), INT64_C(-8008068901606036732)),
+ UINT8_C( 1),
+ simde_mm512_set_epi64(INT64_C(-9084086707853197365), INT64_C( 5962789269656503800),
+ INT64_C( 6806616562165680967), INT64_C( 8724516399523474076),
+ INT64_C( -924171789017863248), INT64_C(-2255835938032964673),
+ INT64_C(-4560088794132063361), INT64_C(-5517329800302195238)),
+ simde_mm512_set_epi64(INT64_C(-2849655299932577704), INT64_C( 2712991932590941674),
+ INT64_C( 2564329750539599066), INT64_C(-4536455326234991583),
+ INT64_C(-6477728239233614839), INT64_C(-5729565646249538826),
+ INT64_C( 3092410715614407585), INT64_C( 7984397770129184299)),
+ simde_mm512_set_epi64(INT64_C(-3459089877760882917), INT64_C( 1753327656617706405),
+ INT64_C( 3932187030396497555), INT64_C(-4341921971190139713),
+ INT64_C(-7354864635860030437), INT64_C(-7512931671900842140),
+ INT64_C( 7677521206664265888), INT64_C( 4945016503278172079)) },
+ { simde_mm512_set_epi64(INT64_C( -240340334077349403), INT64_C( 5647038489743797240),
+ INT64_C( 5171415873092064400), INT64_C(-1851380595205120917),
+ INT64_C( -836370148956202078), INT64_C( 8425549504970400810),
+ INT64_C( 2808549870315159479), INT64_C( 3545474415643732634)),
+ UINT8_C(194),
+ simde_mm512_set_epi64(INT64_C(-5877702108931305293), INT64_C(-5372639016544358566),
+ INT64_C(-4535660820549680684), INT64_C(-6747544612783901147),
+ INT64_C( 6705850594648382655), INT64_C(-1906321743942105225),
+ INT64_C( -281981608123407868), INT64_C(-5990711758326206044)),
+ simde_mm512_set_epi64(INT64_C( 8110080903340414341), INT64_C(-3598578875674169061),
+ INT64_C( 4977285870543484474), INT64_C( 6776152673642620958),
+ INT64_C( 4245929756722282054), INT64_C( 3649495924615361625),
+ INT64_C( -638056186877872345), INT64_C(-8828385988165140326)),
+ simde_mm512_set_epi64(INT64_C( 4458961061437831982), INT64_C(-1774060140870189505),
+ INT64_C( 5171415873092064400), INT64_C(-1851380595205120917),
+ INT64_C( -836370148956202078), INT64_C( 8425549504970400810),
+ INT64_C( 356074578754464477), INT64_C( 3545474415643732634)) },
+ { simde_mm512_set_epi64(INT64_C(-6385979888474332285), INT64_C( 3716758445629922885),
+ INT64_C( 7861010731589253148), INT64_C(-6334773111204875550),
+ INT64_C(-5054960975820633825), INT64_C( 8639514840721539279),
+ INT64_C(-1027366943904624518), INT64_C(-4721195859159142702)),
+ UINT8_C(222),
+ simde_mm512_set_epi64(INT64_C(-7001132877809342173), INT64_C( 6512733899690414848),
+ INT64_C( 988878120815000883), INT64_C(-5994563704199492012),
+ INT64_C( 1587634372980811194), INT64_C( -914749563856678715),
+ INT64_C( 7495962388934953888), INT64_C(-7831181051188885332)),
+ simde_mm512_set_epi64(INT64_C( 4229507402435677476), INT64_C( 2501842736425447642),
+ INT64_C( 8009397189160901283), INT64_C( 3833558633773719409),
+ INT64_C( 2852442819818074174), INT64_C(-8638015813272823849),
+ INT64_C( 8579593880416924807), INT64_C( 2713766728753976690)),
+ simde_mm512_set_epi64(INT64_C( 7216103793464531967), INT64_C( 4010891163264967206),
+ INT64_C( 7861010731589253148), INT64_C( 8618621735736340195),
+ INT64_C(-1264808446837262980), INT64_C( 7723266249416145134),
+ INT64_C(-1083631491481970919), INT64_C(-4721195859159142702)) },
+ { simde_mm512_set_epi64(INT64_C( 4051614369896270101), INT64_C( 6703896128856670897),
+ INT64_C(-5750389130785475983), INT64_C(-7878547924784098469),
+ INT64_C( 5491867996743881624), INT64_C(-2189602113514909499),
+ INT64_C( -887220462507309287), INT64_C(-5733898489940979010)),
+ UINT8_C( 26),
+ simde_mm512_set_epi64(INT64_C( -99656633840764240), INT64_C(-3479731851565468885),
+ INT64_C(-7074577238264434881), INT64_C(-3836339826871533273),
+ INT64_C( 4198283975631841849), INT64_C(-3829622956767240841),
+ INT64_C( 5960966148924368684), INT64_C( -504125670847055963)),
+ simde_mm512_set_epi64(INT64_C(-8344319212574510912), INT64_C(-3371415321000668561),
+ INT64_C(-8338525176508042897), INT64_C( 5173420397567361383),
+ INT64_C(-6751809518396836721), INT64_C(-8388491552134432960),
+ INT64_C(-9161028627110906680), INT64_C( 7472048750700349549)),
+ simde_mm512_set_epi64(INT64_C( 4051614369896270101), INT64_C( 6703896128856670897),
+ INT64_C(-5750389130785475983), INT64_C(-9009760224438894656),
+ INT64_C(-7496650579680873046), INT64_C(-2189602113514909499),
+ INT64_C(-3324749297674276252), INT64_C(-5733898489940979010)) },
+ { simde_mm512_set_epi64(INT64_C(-6378393891104748170), INT64_C(-8478287659785501826),
+ INT64_C(-2127236125072242134), INT64_C( 8702738982982040445),
+ INT64_C( 645844328650761785), INT64_C(-4561773442934600720),
+ INT64_C(-5793568656482259588), INT64_C( -379681413311801170)),
+ UINT8_C(230),
+ simde_mm512_set_epi64(INT64_C( -848706848545220792), INT64_C(-1124075123789220737),
+ INT64_C(-2005439629632543252), INT64_C( 8274388146286059619),
+ INT64_C( -261550962782015927), INT64_C(-8761037216848109215),
+ INT64_C(-3016365966836321630), INT64_C( 2543055264688040393)),
+ simde_mm512_set_epi64(INT64_C( 1583638370136684317), INT64_C(-1184919915070849427),
+ INT64_C( 6948286910398693964), INT64_C( 2437457976149582578),
+ INT64_C( 3426542754873284897), INT64_C(-7983270512780038531),
+ INT64_C( 1779296328975282374), INT64_C(-5362999871220584978)),
+ simde_mm512_set_epi64(INT64_C(-2432345218681905109), INT64_C( 60844791281628690),
+ INT64_C(-8953726540031237216), INT64_C( 8702738982982040445),
+ INT64_C( 645844328650761785), INT64_C( -777766704068070684),
+ INT64_C(-4795662295811604004), INT64_C( -379681413311801170)) },
+ { simde_mm512_set_epi64(INT64_C(-2563692560784467599), INT64_C(-2764729313181954331),
+ INT64_C( 7449793955604076666), INT64_C(-6302011830015535814),
+ INT64_C(-5919077484698028869), INT64_C(-6127059769393124093),
+ INT64_C( 2958642729945465911), INT64_C( 2772140786646472311)),
+ UINT8_C(198),
+ simde_mm512_set_epi64(INT64_C(-3934991658845807023), INT64_C( 7561755153516237296),
+ INT64_C(-1521478373140770922), INT64_C( 6956443634033398294),
+ INT64_C(-5307063963483146371), INT64_C( 6556039892370535969),
+ INT64_C(-6645788521893978945), INT64_C(-6307512051127595595)),
+ simde_mm512_set_epi64(INT64_C(-7270561721689602230), INT64_C( 8935792808270452615),
+ INT64_C( 1984489943341614372), INT64_C( 6860868624136070257),
+ INT64_C(-2243581398369652256), INT64_C(-6592818671779181804),
+ INT64_C( -308663241436655846), INT64_C(-8935526257161672911)),
+ simde_mm512_set_epi64(INT64_C( 3335570062843795207), INT64_C(-1374037654754215319),
+ INT64_C( 7449793955604076666), INT64_C(-6302011830015535814),
+ INT64_C(-5919077484698028869), INT64_C(-5297885509559833843),
+ INT64_C(-6337125280457323099), INT64_C( 2772140786646472311)) },
+ { simde_mm512_set_epi64(INT64_C(-7511866029206584895), INT64_C( 6685003933657692663),
+ INT64_C( 112057327023275278), INT64_C( 2785131907782223781),
+ INT64_C( -403719025987547254), INT64_C(-5974279397850363938),
+ INT64_C(-6601571580489345254), INT64_C( 1896379997419403836)),
+ UINT8_C( 70),
+ simde_mm512_set_epi64(INT64_C(-6334367433946281110), INT64_C(-5840485098030444461),
+ INT64_C(-6383956557021185117), INT64_C(-7600398675722821668),
+ INT64_C(-2279362749413199885), INT64_C(-8009539466982888201),
+ INT64_C( 340327559398526723), INT64_C(-2438629088141247826)),
+ simde_mm512_set_epi64(INT64_C( 3758222621544461478), INT64_C( 8264387002851618510),
+ INT64_C( 5256515298231032169), INT64_C( 4555501816451377355),
+ INT64_C(-9184304616258229288), INT64_C( 5115688705834988612),
+ INT64_C(-3795492187184599084), INT64_C(-3221204559120447653)),
+ simde_mm512_set_epi64(INT64_C(-7511866029206584895), INT64_C( 4341871972827488645),
+ INT64_C( 112057327023275278), INT64_C( 2785131907782223781),
+ INT64_C( -403719025987547254), INT64_C( 5321515900891674803),
+ INT64_C( 4135819746583125807), INT64_C( 1896379997419403836)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512i r = simde_mm512_mask_sub_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512i_i64(r, ==, test_vec[i].r);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_sub_ps(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512 src;
+ simde__mmask16 k;
+ simde__m512 a;
+ simde__m512 b;
+ simde__m512 r;
+ } test_vec[8] = {
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( -417.79), SIMDE_FLOAT32_C( -912.83), SIMDE_FLOAT32_C( 111.29), SIMDE_FLOAT32_C( -470.87),
+ SIMDE_FLOAT32_C( 685.45), SIMDE_FLOAT32_C( -92.85), SIMDE_FLOAT32_C( 704.55), SIMDE_FLOAT32_C( 450.79),
+ SIMDE_FLOAT32_C( -761.01), SIMDE_FLOAT32_C( -759.35), SIMDE_FLOAT32_C( 646.77), SIMDE_FLOAT32_C( 616.33),
+ SIMDE_FLOAT32_C( 922.76), SIMDE_FLOAT32_C( 721.94), SIMDE_FLOAT32_C( 721.78), SIMDE_FLOAT32_C( 651.66)),
+ UINT16_C(55049),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 492.15), SIMDE_FLOAT32_C( 363.86), SIMDE_FLOAT32_C( -906.93), SIMDE_FLOAT32_C( -51.88),
+ SIMDE_FLOAT32_C( 976.36), SIMDE_FLOAT32_C( 844.84), SIMDE_FLOAT32_C( 525.57), SIMDE_FLOAT32_C( 575.43),
+ SIMDE_FLOAT32_C( -719.61), SIMDE_FLOAT32_C( 570.91), SIMDE_FLOAT32_C( -748.06), SIMDE_FLOAT32_C( 823.89),
+ SIMDE_FLOAT32_C( -708.11), SIMDE_FLOAT32_C( -805.87), SIMDE_FLOAT32_C( 626.28), SIMDE_FLOAT32_C( 344.43)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -814.48), SIMDE_FLOAT32_C( 843.19), SIMDE_FLOAT32_C( -866.28), SIMDE_FLOAT32_C( -230.51),
+ SIMDE_FLOAT32_C( -264.51), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 479.68), SIMDE_FLOAT32_C( -375.52),
+ SIMDE_FLOAT32_C( -928.92), SIMDE_FLOAT32_C( -243.75), SIMDE_FLOAT32_C( 771.60), SIMDE_FLOAT32_C( 150.31),
+ SIMDE_FLOAT32_C( -627.83), SIMDE_FLOAT32_C( -720.61), SIMDE_FLOAT32_C( 345.13), SIMDE_FLOAT32_C( 203.00)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 1306.63), SIMDE_FLOAT32_C( -479.33), SIMDE_FLOAT32_C( 111.29), SIMDE_FLOAT32_C( 178.63),
+ SIMDE_FLOAT32_C( 685.45), SIMDE_FLOAT32_C( -90.55), SIMDE_FLOAT32_C( 45.89), SIMDE_FLOAT32_C( 950.95),
+ SIMDE_FLOAT32_C( -761.01), SIMDE_FLOAT32_C( -759.35), SIMDE_FLOAT32_C( 646.77), SIMDE_FLOAT32_C( 616.33),
+ SIMDE_FLOAT32_C( -80.28), SIMDE_FLOAT32_C( 721.94), SIMDE_FLOAT32_C( 721.78), SIMDE_FLOAT32_C( 141.43)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( -594.79), SIMDE_FLOAT32_C( -68.26), SIMDE_FLOAT32_C( 772.68), SIMDE_FLOAT32_C( -615.12),
+ SIMDE_FLOAT32_C( 489.20), SIMDE_FLOAT32_C( -609.74), SIMDE_FLOAT32_C( -297.42), SIMDE_FLOAT32_C( -701.58),
+ SIMDE_FLOAT32_C( 71.34), SIMDE_FLOAT32_C( -811.20), SIMDE_FLOAT32_C( -44.61), SIMDE_FLOAT32_C( 172.32),
+ SIMDE_FLOAT32_C( -336.24), SIMDE_FLOAT32_C( -959.77), SIMDE_FLOAT32_C( 896.40), SIMDE_FLOAT32_C( 321.28)),
+ UINT16_C( 2266),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 136.73), SIMDE_FLOAT32_C( 408.70), SIMDE_FLOAT32_C( 907.04), SIMDE_FLOAT32_C( 175.32),
+ SIMDE_FLOAT32_C( 125.78), SIMDE_FLOAT32_C( -176.42), SIMDE_FLOAT32_C( -192.20), SIMDE_FLOAT32_C( 636.29),
+ SIMDE_FLOAT32_C( -812.72), SIMDE_FLOAT32_C( -295.02), SIMDE_FLOAT32_C( 426.00), SIMDE_FLOAT32_C( 348.29),
+ SIMDE_FLOAT32_C( 859.20), SIMDE_FLOAT32_C( -28.95), SIMDE_FLOAT32_C( -637.06), SIMDE_FLOAT32_C( -450.15)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -929.87), SIMDE_FLOAT32_C( -208.53), SIMDE_FLOAT32_C( 561.71), SIMDE_FLOAT32_C( -74.05),
+ SIMDE_FLOAT32_C( 477.79), SIMDE_FLOAT32_C( 772.49), SIMDE_FLOAT32_C( 648.48), SIMDE_FLOAT32_C( -58.61),
+ SIMDE_FLOAT32_C( 835.38), SIMDE_FLOAT32_C( -689.00), SIMDE_FLOAT32_C( 607.03), SIMDE_FLOAT32_C( 421.78),
+ SIMDE_FLOAT32_C( -574.15), SIMDE_FLOAT32_C( 302.76), SIMDE_FLOAT32_C( 178.11), SIMDE_FLOAT32_C( -298.57)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -594.79), SIMDE_FLOAT32_C( -68.26), SIMDE_FLOAT32_C( 772.68), SIMDE_FLOAT32_C( -615.12),
+ SIMDE_FLOAT32_C( -352.01), SIMDE_FLOAT32_C( -609.74), SIMDE_FLOAT32_C( -297.42), SIMDE_FLOAT32_C( -701.58),
+ SIMDE_FLOAT32_C( -1648.10), SIMDE_FLOAT32_C( 393.98), SIMDE_FLOAT32_C( -44.61), SIMDE_FLOAT32_C( -73.49),
+ SIMDE_FLOAT32_C( 1433.35), SIMDE_FLOAT32_C( -959.77), SIMDE_FLOAT32_C( -815.17), SIMDE_FLOAT32_C( 321.28)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( -914.76), SIMDE_FLOAT32_C( 285.68), SIMDE_FLOAT32_C( 695.03), SIMDE_FLOAT32_C( -235.78),
+ SIMDE_FLOAT32_C( 90.17), SIMDE_FLOAT32_C( 891.02), SIMDE_FLOAT32_C( -456.46), SIMDE_FLOAT32_C( 952.55),
+ SIMDE_FLOAT32_C( -153.33), SIMDE_FLOAT32_C( -533.35), SIMDE_FLOAT32_C( -130.02), SIMDE_FLOAT32_C( -580.21),
+ SIMDE_FLOAT32_C( -857.73), SIMDE_FLOAT32_C( -362.64), SIMDE_FLOAT32_C( 808.25), SIMDE_FLOAT32_C( 908.95)),
+ UINT16_C(53407),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 415.38), SIMDE_FLOAT32_C( 622.33), SIMDE_FLOAT32_C( 849.49), SIMDE_FLOAT32_C( -552.97),
+ SIMDE_FLOAT32_C( 837.01), SIMDE_FLOAT32_C( -753.98), SIMDE_FLOAT32_C( 167.51), SIMDE_FLOAT32_C( 898.60),
+ SIMDE_FLOAT32_C( -36.68), SIMDE_FLOAT32_C( -931.19), SIMDE_FLOAT32_C( 230.22), SIMDE_FLOAT32_C( -885.80),
+ SIMDE_FLOAT32_C( -894.49), SIMDE_FLOAT32_C( -402.23), SIMDE_FLOAT32_C( -68.60), SIMDE_FLOAT32_C( -153.88)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 247.18), SIMDE_FLOAT32_C( 507.40), SIMDE_FLOAT32_C( -715.17), SIMDE_FLOAT32_C( 785.48),
+ SIMDE_FLOAT32_C( -543.41), SIMDE_FLOAT32_C( 761.08), SIMDE_FLOAT32_C( 479.07), SIMDE_FLOAT32_C( -938.93),
+ SIMDE_FLOAT32_C( -655.56), SIMDE_FLOAT32_C( 618.55), SIMDE_FLOAT32_C( 224.83), SIMDE_FLOAT32_C( -983.99),
+ SIMDE_FLOAT32_C( -18.22), SIMDE_FLOAT32_C( -142.62), SIMDE_FLOAT32_C( 120.01), SIMDE_FLOAT32_C( 186.92)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 168.20), SIMDE_FLOAT32_C( 114.93), SIMDE_FLOAT32_C( 695.03), SIMDE_FLOAT32_C( -1338.45),
+ SIMDE_FLOAT32_C( 90.17), SIMDE_FLOAT32_C( 891.02), SIMDE_FLOAT32_C( -456.46), SIMDE_FLOAT32_C( 952.55),
+ SIMDE_FLOAT32_C( 618.88), SIMDE_FLOAT32_C( -533.35), SIMDE_FLOAT32_C( -130.02), SIMDE_FLOAT32_C( 98.19),
+ SIMDE_FLOAT32_C( -876.27), SIMDE_FLOAT32_C( -259.61), SIMDE_FLOAT32_C( -188.61), SIMDE_FLOAT32_C( -340.80)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( -586.97), SIMDE_FLOAT32_C( -706.71), SIMDE_FLOAT32_C( 862.31), SIMDE_FLOAT32_C( 901.76),
+ SIMDE_FLOAT32_C( -777.23), SIMDE_FLOAT32_C( -615.23), SIMDE_FLOAT32_C( 540.06), SIMDE_FLOAT32_C( -837.05),
+ SIMDE_FLOAT32_C( 896.68), SIMDE_FLOAT32_C( -818.79), SIMDE_FLOAT32_C( -146.21), SIMDE_FLOAT32_C( -751.20),
+ SIMDE_FLOAT32_C( -724.86), SIMDE_FLOAT32_C( -446.10), SIMDE_FLOAT32_C( 747.21), SIMDE_FLOAT32_C( -830.22)),
+ UINT16_C(24145),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 809.72), SIMDE_FLOAT32_C( -191.45), SIMDE_FLOAT32_C( -687.88), SIMDE_FLOAT32_C( -561.69),
+ SIMDE_FLOAT32_C( 623.06), SIMDE_FLOAT32_C( -685.16), SIMDE_FLOAT32_C( 155.59), SIMDE_FLOAT32_C( -91.67),
+ SIMDE_FLOAT32_C( -292.32), SIMDE_FLOAT32_C( 436.29), SIMDE_FLOAT32_C( 682.53), SIMDE_FLOAT32_C( -427.71),
+ SIMDE_FLOAT32_C( -252.26), SIMDE_FLOAT32_C( -814.33), SIMDE_FLOAT32_C( -116.78), SIMDE_FLOAT32_C( -176.18)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -476.63), SIMDE_FLOAT32_C( -403.49), SIMDE_FLOAT32_C( -129.06), SIMDE_FLOAT32_C( -540.32),
+ SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( 354.93), SIMDE_FLOAT32_C( 301.70), SIMDE_FLOAT32_C( 818.26),
+ SIMDE_FLOAT32_C( 152.41), SIMDE_FLOAT32_C( -7.33), SIMDE_FLOAT32_C( 901.12), SIMDE_FLOAT32_C( 276.49),
+ SIMDE_FLOAT32_C( -421.45), SIMDE_FLOAT32_C( -19.17), SIMDE_FLOAT32_C( 559.47), SIMDE_FLOAT32_C( -62.60)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -586.97), SIMDE_FLOAT32_C( 212.04), SIMDE_FLOAT32_C( 862.31), SIMDE_FLOAT32_C( -21.37),
+ SIMDE_FLOAT32_C( 919.90), SIMDE_FLOAT32_C( -1040.09), SIMDE_FLOAT32_C( -146.11), SIMDE_FLOAT32_C( -837.05),
+ SIMDE_FLOAT32_C( 896.68), SIMDE_FLOAT32_C( 443.62), SIMDE_FLOAT32_C( -146.21), SIMDE_FLOAT32_C( -704.20),
+ SIMDE_FLOAT32_C( -724.86), SIMDE_FLOAT32_C( -446.10), SIMDE_FLOAT32_C( 747.21), SIMDE_FLOAT32_C( -113.58)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( 853.44), SIMDE_FLOAT32_C( 804.93), SIMDE_FLOAT32_C( 753.54), SIMDE_FLOAT32_C( 129.42),
+ SIMDE_FLOAT32_C( -911.24), SIMDE_FLOAT32_C( -795.01), SIMDE_FLOAT32_C( -264.21), SIMDE_FLOAT32_C( 110.23),
+ SIMDE_FLOAT32_C( 779.42), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( -61.94), SIMDE_FLOAT32_C( -845.71),
+ SIMDE_FLOAT32_C( 522.75), SIMDE_FLOAT32_C( 703.06), SIMDE_FLOAT32_C( 989.80), SIMDE_FLOAT32_C( 594.14)),
+ UINT16_C(58122),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 774.43), SIMDE_FLOAT32_C( 251.56), SIMDE_FLOAT32_C( -915.66), SIMDE_FLOAT32_C( -492.31),
+ SIMDE_FLOAT32_C( 722.32), SIMDE_FLOAT32_C( 853.19), SIMDE_FLOAT32_C( 466.28), SIMDE_FLOAT32_C( 573.97),
+ SIMDE_FLOAT32_C( -516.73), SIMDE_FLOAT32_C( -267.27), SIMDE_FLOAT32_C( 110.95), SIMDE_FLOAT32_C( -68.16),
+ SIMDE_FLOAT32_C( -400.30), SIMDE_FLOAT32_C( 327.53), SIMDE_FLOAT32_C( -638.51), SIMDE_FLOAT32_C( -96.92)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 101.96), SIMDE_FLOAT32_C( -734.61), SIMDE_FLOAT32_C( 219.43), SIMDE_FLOAT32_C( -507.66),
+ SIMDE_FLOAT32_C( -747.54), SIMDE_FLOAT32_C( 794.68), SIMDE_FLOAT32_C( -663.99), SIMDE_FLOAT32_C( -123.94),
+ SIMDE_FLOAT32_C( -793.12), SIMDE_FLOAT32_C( 673.57), SIMDE_FLOAT32_C( -777.14), SIMDE_FLOAT32_C( 175.88),
+ SIMDE_FLOAT32_C( -792.24), SIMDE_FLOAT32_C( -246.51), SIMDE_FLOAT32_C( 848.21), SIMDE_FLOAT32_C( -124.15)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 672.47), SIMDE_FLOAT32_C( 986.17), SIMDE_FLOAT32_C( -1135.09), SIMDE_FLOAT32_C( 129.42),
+ SIMDE_FLOAT32_C( -911.24), SIMDE_FLOAT32_C( -795.01), SIMDE_FLOAT32_C( 1130.27), SIMDE_FLOAT32_C( 697.91),
+ SIMDE_FLOAT32_C( 779.42), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( -61.94), SIMDE_FLOAT32_C( -845.71),
+ SIMDE_FLOAT32_C( 391.94), SIMDE_FLOAT32_C( 703.06), SIMDE_FLOAT32_C( -1486.72), SIMDE_FLOAT32_C( 594.14)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( -670.29), SIMDE_FLOAT32_C( 821.01), SIMDE_FLOAT32_C( -293.06), SIMDE_FLOAT32_C( -56.42),
+ SIMDE_FLOAT32_C( -163.64), SIMDE_FLOAT32_C( -919.47), SIMDE_FLOAT32_C( 636.75), SIMDE_FLOAT32_C( 555.64),
+ SIMDE_FLOAT32_C( 630.28), SIMDE_FLOAT32_C( 798.33), SIMDE_FLOAT32_C( -536.88), SIMDE_FLOAT32_C( 256.29),
+ SIMDE_FLOAT32_C( 834.99), SIMDE_FLOAT32_C( -678.50), SIMDE_FLOAT32_C( -716.28), SIMDE_FLOAT32_C( -235.17)),
+ UINT16_C( 7968),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 575.18), SIMDE_FLOAT32_C( -655.63), SIMDE_FLOAT32_C( 986.91), SIMDE_FLOAT32_C( 710.96),
+ SIMDE_FLOAT32_C( 921.30), SIMDE_FLOAT32_C( -96.00), SIMDE_FLOAT32_C( -68.75), SIMDE_FLOAT32_C( -119.17),
+ SIMDE_FLOAT32_C( -795.52), SIMDE_FLOAT32_C( -851.06), SIMDE_FLOAT32_C( 982.58), SIMDE_FLOAT32_C( 432.45),
+ SIMDE_FLOAT32_C( 834.71), SIMDE_FLOAT32_C( -931.48), SIMDE_FLOAT32_C( 421.86), SIMDE_FLOAT32_C( 549.54)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 699.42), SIMDE_FLOAT32_C( -430.21), SIMDE_FLOAT32_C( -842.83), SIMDE_FLOAT32_C( -375.32),
+ SIMDE_FLOAT32_C( -889.13), SIMDE_FLOAT32_C( 77.46), SIMDE_FLOAT32_C( -426.32), SIMDE_FLOAT32_C( -319.52),
+ SIMDE_FLOAT32_C( 633.46), SIMDE_FLOAT32_C( -484.05), SIMDE_FLOAT32_C( 991.09), SIMDE_FLOAT32_C( 894.84),
+ SIMDE_FLOAT32_C( 148.17), SIMDE_FLOAT32_C( -167.11), SIMDE_FLOAT32_C( -811.87), SIMDE_FLOAT32_C( -574.29)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -670.29), SIMDE_FLOAT32_C( 821.01), SIMDE_FLOAT32_C( -293.06), SIMDE_FLOAT32_C( 1086.28),
+ SIMDE_FLOAT32_C( 1810.43), SIMDE_FLOAT32_C( -173.46), SIMDE_FLOAT32_C( 357.57), SIMDE_FLOAT32_C( 200.35),
+ SIMDE_FLOAT32_C( 630.28), SIMDE_FLOAT32_C( 798.33), SIMDE_FLOAT32_C( -8.51), SIMDE_FLOAT32_C( 256.29),
+ SIMDE_FLOAT32_C( 834.99), SIMDE_FLOAT32_C( -678.50), SIMDE_FLOAT32_C( -716.28), SIMDE_FLOAT32_C( -235.17)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( 640.00), SIMDE_FLOAT32_C( 440.55), SIMDE_FLOAT32_C( 793.44), SIMDE_FLOAT32_C( 554.05),
+ SIMDE_FLOAT32_C( 245.74), SIMDE_FLOAT32_C( -388.16), SIMDE_FLOAT32_C( -27.32), SIMDE_FLOAT32_C( -923.44),
+ SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 855.67), SIMDE_FLOAT32_C( -513.53), SIMDE_FLOAT32_C( -921.47),
+ SIMDE_FLOAT32_C( -410.90), SIMDE_FLOAT32_C( -404.15), SIMDE_FLOAT32_C( -502.43), SIMDE_FLOAT32_C( -674.13)),
+ UINT16_C(34235),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 455.94), SIMDE_FLOAT32_C( 822.75), SIMDE_FLOAT32_C( 672.52), SIMDE_FLOAT32_C( 418.16),
+ SIMDE_FLOAT32_C( 993.17), SIMDE_FLOAT32_C( -581.12), SIMDE_FLOAT32_C( 737.02), SIMDE_FLOAT32_C( -48.12),
+ SIMDE_FLOAT32_C( 169.53), SIMDE_FLOAT32_C( 875.02), SIMDE_FLOAT32_C( 325.94), SIMDE_FLOAT32_C( -197.05),
+ SIMDE_FLOAT32_C( 209.80), SIMDE_FLOAT32_C( 679.16), SIMDE_FLOAT32_C( -743.34), SIMDE_FLOAT32_C( 192.93)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -400.32), SIMDE_FLOAT32_C( 747.89), SIMDE_FLOAT32_C( -417.14), SIMDE_FLOAT32_C( -149.76),
+ SIMDE_FLOAT32_C( -769.13), SIMDE_FLOAT32_C( 952.70), SIMDE_FLOAT32_C( 55.59), SIMDE_FLOAT32_C( -118.59),
+ SIMDE_FLOAT32_C( -651.36), SIMDE_FLOAT32_C( 213.50), SIMDE_FLOAT32_C( 998.39), SIMDE_FLOAT32_C( 155.85),
+ SIMDE_FLOAT32_C( 985.22), SIMDE_FLOAT32_C( -399.37), SIMDE_FLOAT32_C( -660.54), SIMDE_FLOAT32_C( -918.87)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 856.26), SIMDE_FLOAT32_C( 440.55), SIMDE_FLOAT32_C( 793.44), SIMDE_FLOAT32_C( 554.05),
+ SIMDE_FLOAT32_C( 245.74), SIMDE_FLOAT32_C( -1533.82), SIMDE_FLOAT32_C( -27.32), SIMDE_FLOAT32_C( 70.47),
+ SIMDE_FLOAT32_C( 820.89), SIMDE_FLOAT32_C( 855.67), SIMDE_FLOAT32_C( -672.45), SIMDE_FLOAT32_C( -352.90),
+ SIMDE_FLOAT32_C( -775.42), SIMDE_FLOAT32_C( -404.15), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( 1111.80)) },
+ { simde_mm512_set_ps(SIMDE_FLOAT32_C( -717.43), SIMDE_FLOAT32_C( 307.65), SIMDE_FLOAT32_C( -776.64), SIMDE_FLOAT32_C( 883.24),
+ SIMDE_FLOAT32_C( 462.38), SIMDE_FLOAT32_C( 941.52), SIMDE_FLOAT32_C( 465.21), SIMDE_FLOAT32_C( 772.92),
+ SIMDE_FLOAT32_C( -448.96), SIMDE_FLOAT32_C( 167.95), SIMDE_FLOAT32_C( -770.79), SIMDE_FLOAT32_C( 607.02),
+ SIMDE_FLOAT32_C( 588.25), SIMDE_FLOAT32_C( -430.65), SIMDE_FLOAT32_C( -379.22), SIMDE_FLOAT32_C( 62.66)),
+ UINT16_C(21184),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 745.31), SIMDE_FLOAT32_C( 528.47), SIMDE_FLOAT32_C( 260.56), SIMDE_FLOAT32_C( 756.92),
+ SIMDE_FLOAT32_C( -237.78), SIMDE_FLOAT32_C( 890.33), SIMDE_FLOAT32_C( -276.66), SIMDE_FLOAT32_C( -845.25),
+ SIMDE_FLOAT32_C( 73.01), SIMDE_FLOAT32_C( -169.10), SIMDE_FLOAT32_C( -390.26), SIMDE_FLOAT32_C( 55.87),
+ SIMDE_FLOAT32_C( 461.32), SIMDE_FLOAT32_C( -911.03), SIMDE_FLOAT32_C( 362.01), SIMDE_FLOAT32_C( 998.06)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( 177.96), SIMDE_FLOAT32_C( -105.40), SIMDE_FLOAT32_C( -516.55), SIMDE_FLOAT32_C( -62.31),
+ SIMDE_FLOAT32_C( -757.68), SIMDE_FLOAT32_C( 665.34), SIMDE_FLOAT32_C( 689.63), SIMDE_FLOAT32_C( 938.32),
+ SIMDE_FLOAT32_C( -408.00), SIMDE_FLOAT32_C( 998.26), SIMDE_FLOAT32_C( -263.70), SIMDE_FLOAT32_C( 807.54),
+ SIMDE_FLOAT32_C( 485.72), SIMDE_FLOAT32_C( -74.68), SIMDE_FLOAT32_C( 725.36), SIMDE_FLOAT32_C( 301.00)),
+ simde_mm512_set_ps(SIMDE_FLOAT32_C( -717.43), SIMDE_FLOAT32_C( 633.87), SIMDE_FLOAT32_C( -776.64), SIMDE_FLOAT32_C( 819.23),
+ SIMDE_FLOAT32_C( 462.38), SIMDE_FLOAT32_C( 941.52), SIMDE_FLOAT32_C( -966.29), SIMDE_FLOAT32_C( 772.92),
+ SIMDE_FLOAT32_C( 481.01), SIMDE_FLOAT32_C( -1167.36), SIMDE_FLOAT32_C( -770.79), SIMDE_FLOAT32_C( 607.02),
+ SIMDE_FLOAT32_C( 588.25), SIMDE_FLOAT32_C( -430.65), SIMDE_FLOAT32_C( -379.22), SIMDE_FLOAT32_C( 62.66)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512 r = simde_mm512_mask_sub_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512_close(r, test_vec[i].r, 1);
+ }
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_simde_mm512_mask_sub_pd(const MunitParameter params[], void* data) {
+ (void) params;
+ (void) data;
+
+ const struct {
+ simde__m512d src;
+ simde__mmask8 k;
+ simde__m512d a;
+ simde__m512d b;
+ simde__m512d r;
+ } test_vec[8] = {
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( -621.09), SIMDE_FLOAT64_C( 350.18),
+ SIMDE_FLOAT64_C( 873.40), SIMDE_FLOAT64_C( -136.67),
+ SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( 672.37),
+ SIMDE_FLOAT64_C( -983.97), SIMDE_FLOAT64_C( -747.18)),
+ UINT8_C(213),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( 861.93),
+ SIMDE_FLOAT64_C( -99.63), SIMDE_FLOAT64_C( -760.72),
+ SIMDE_FLOAT64_C( 803.54), SIMDE_FLOAT64_C( -811.65),
+ SIMDE_FLOAT64_C( -888.48), SIMDE_FLOAT64_C( 353.19)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 451.54), SIMDE_FLOAT64_C( 490.96),
+ SIMDE_FLOAT64_C( -563.07), SIMDE_FLOAT64_C( -968.95),
+ SIMDE_FLOAT64_C( -964.80), SIMDE_FLOAT64_C( -259.48),
+ SIMDE_FLOAT64_C( -97.31), SIMDE_FLOAT64_C( 696.26)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C(-1066.76), SIMDE_FLOAT64_C( 370.97),
+ SIMDE_FLOAT64_C( 873.40), SIMDE_FLOAT64_C( 208.23),
+ SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( -552.17),
+ SIMDE_FLOAT64_C( -983.97), SIMDE_FLOAT64_C( -343.07)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( 956.74), SIMDE_FLOAT64_C( 507.70),
+ SIMDE_FLOAT64_C( 525.25), SIMDE_FLOAT64_C( -653.24),
+ SIMDE_FLOAT64_C( -748.66), SIMDE_FLOAT64_C( 738.72),
+ SIMDE_FLOAT64_C( 584.29), SIMDE_FLOAT64_C( -344.89)),
+ UINT8_C(200),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -70.99), SIMDE_FLOAT64_C( -712.48),
+ SIMDE_FLOAT64_C( 721.37), SIMDE_FLOAT64_C( 290.11),
+ SIMDE_FLOAT64_C( 739.65), SIMDE_FLOAT64_C( 378.13),
+ SIMDE_FLOAT64_C( 523.23), SIMDE_FLOAT64_C( 338.41)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -243.21), SIMDE_FLOAT64_C( 71.87),
+ SIMDE_FLOAT64_C( 81.06), SIMDE_FLOAT64_C( 409.05),
+ SIMDE_FLOAT64_C( -595.58), SIMDE_FLOAT64_C( 278.33),
+ SIMDE_FLOAT64_C( -484.02), SIMDE_FLOAT64_C( -861.59)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 172.22), SIMDE_FLOAT64_C( -784.35),
+ SIMDE_FLOAT64_C( 525.25), SIMDE_FLOAT64_C( -653.24),
+ SIMDE_FLOAT64_C( 1335.23), SIMDE_FLOAT64_C( 738.72),
+ SIMDE_FLOAT64_C( 584.29), SIMDE_FLOAT64_C( -344.89)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.39), SIMDE_FLOAT64_C( 345.93),
+ SIMDE_FLOAT64_C( 233.76), SIMDE_FLOAT64_C( -401.11),
+ SIMDE_FLOAT64_C( -964.57), SIMDE_FLOAT64_C( 939.13),
+ SIMDE_FLOAT64_C( -392.63), SIMDE_FLOAT64_C( -585.02)),
+ UINT8_C( 75),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 496.11), SIMDE_FLOAT64_C( -235.94),
+ SIMDE_FLOAT64_C( -715.35), SIMDE_FLOAT64_C( 338.71),
+ SIMDE_FLOAT64_C( -776.11), SIMDE_FLOAT64_C( 941.96),
+ SIMDE_FLOAT64_C( 76.10), SIMDE_FLOAT64_C( -188.31)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 824.70), SIMDE_FLOAT64_C( -886.45),
+ SIMDE_FLOAT64_C( 497.17), SIMDE_FLOAT64_C( -965.13),
+ SIMDE_FLOAT64_C( -601.99), SIMDE_FLOAT64_C( -657.07),
+ SIMDE_FLOAT64_C( 201.36), SIMDE_FLOAT64_C( -807.98)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.39), SIMDE_FLOAT64_C( 650.51),
+ SIMDE_FLOAT64_C( 233.76), SIMDE_FLOAT64_C( -401.11),
+ SIMDE_FLOAT64_C( -174.12), SIMDE_FLOAT64_C( 939.13),
+ SIMDE_FLOAT64_C( -125.26), SIMDE_FLOAT64_C( 619.67)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( -246.72), SIMDE_FLOAT64_C( -493.17),
+ SIMDE_FLOAT64_C( -501.93), SIMDE_FLOAT64_C( -95.50),
+ SIMDE_FLOAT64_C( 754.55), SIMDE_FLOAT64_C( -990.48),
+ SIMDE_FLOAT64_C( -396.36), SIMDE_FLOAT64_C( -466.97)),
+ UINT8_C( 69),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 601.28), SIMDE_FLOAT64_C( -873.85),
+ SIMDE_FLOAT64_C( -689.96), SIMDE_FLOAT64_C( 31.77),
+ SIMDE_FLOAT64_C( -97.11), SIMDE_FLOAT64_C( 971.94),
+ SIMDE_FLOAT64_C( 389.02), SIMDE_FLOAT64_C( -650.79)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 136.61), SIMDE_FLOAT64_C( 436.94),
+ SIMDE_FLOAT64_C( -777.02), SIMDE_FLOAT64_C( 166.29),
+ SIMDE_FLOAT64_C( -377.75), SIMDE_FLOAT64_C( 71.16),
+ SIMDE_FLOAT64_C( 481.01), SIMDE_FLOAT64_C( -926.81)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -246.72), SIMDE_FLOAT64_C(-1310.79),
+ SIMDE_FLOAT64_C( -501.93), SIMDE_FLOAT64_C( -95.50),
+ SIMDE_FLOAT64_C( 754.55), SIMDE_FLOAT64_C( 900.78),
+ SIMDE_FLOAT64_C( -396.36), SIMDE_FLOAT64_C( 276.02)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( -389.46), SIMDE_FLOAT64_C( -8.03),
+ SIMDE_FLOAT64_C( -523.51), SIMDE_FLOAT64_C( 466.89),
+ SIMDE_FLOAT64_C( 698.90), SIMDE_FLOAT64_C( -346.04),
+ SIMDE_FLOAT64_C( -734.67), SIMDE_FLOAT64_C( 404.34)),
+ UINT8_C(100),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 989.13), SIMDE_FLOAT64_C( 228.14),
+ SIMDE_FLOAT64_C( 840.94), SIMDE_FLOAT64_C( -718.83),
+ SIMDE_FLOAT64_C( 274.95), SIMDE_FLOAT64_C( -99.21),
+ SIMDE_FLOAT64_C( 84.76), SIMDE_FLOAT64_C( -295.84)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -956.22), SIMDE_FLOAT64_C( 564.94),
+ SIMDE_FLOAT64_C( -97.16), SIMDE_FLOAT64_C( -407.99),
+ SIMDE_FLOAT64_C( 352.62), SIMDE_FLOAT64_C( 244.25),
+ SIMDE_FLOAT64_C( 43.92), SIMDE_FLOAT64_C( 624.69)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -389.46), SIMDE_FLOAT64_C( -336.80),
+ SIMDE_FLOAT64_C( 938.10), SIMDE_FLOAT64_C( 466.89),
+ SIMDE_FLOAT64_C( 698.90), SIMDE_FLOAT64_C( -343.46),
+ SIMDE_FLOAT64_C( -734.67), SIMDE_FLOAT64_C( 404.34)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( -571.96), SIMDE_FLOAT64_C( 40.27),
+ SIMDE_FLOAT64_C( 676.69), SIMDE_FLOAT64_C( -150.37),
+ SIMDE_FLOAT64_C( 945.34), SIMDE_FLOAT64_C( 75.83),
+ SIMDE_FLOAT64_C( 64.75), SIMDE_FLOAT64_C( 239.06)),
+ UINT8_C(209),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 792.47), SIMDE_FLOAT64_C( -265.19),
+ SIMDE_FLOAT64_C( -768.95), SIMDE_FLOAT64_C( 515.15),
+ SIMDE_FLOAT64_C( 350.59), SIMDE_FLOAT64_C( 422.68),
+ SIMDE_FLOAT64_C( 582.99), SIMDE_FLOAT64_C( -985.50)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 12.38), SIMDE_FLOAT64_C( -71.80),
+ SIMDE_FLOAT64_C( 363.01), SIMDE_FLOAT64_C( -195.65),
+ SIMDE_FLOAT64_C( 967.47), SIMDE_FLOAT64_C( -4.13),
+ SIMDE_FLOAT64_C( -478.81), SIMDE_FLOAT64_C( 909.10)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 780.09), SIMDE_FLOAT64_C( -193.39),
+ SIMDE_FLOAT64_C( 676.69), SIMDE_FLOAT64_C( 710.80),
+ SIMDE_FLOAT64_C( 945.34), SIMDE_FLOAT64_C( 75.83),
+ SIMDE_FLOAT64_C( 64.75), SIMDE_FLOAT64_C(-1894.60)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( -879.88), SIMDE_FLOAT64_C( -687.95),
+ SIMDE_FLOAT64_C( -892.89), SIMDE_FLOAT64_C( -642.85),
+ SIMDE_FLOAT64_C( 533.08), SIMDE_FLOAT64_C( 898.29),
+ SIMDE_FLOAT64_C( -29.99), SIMDE_FLOAT64_C( 5.58)),
+ UINT8_C(186),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 649.80), SIMDE_FLOAT64_C( -257.91),
+ SIMDE_FLOAT64_C( 356.56), SIMDE_FLOAT64_C( 567.70),
+ SIMDE_FLOAT64_C( -80.43), SIMDE_FLOAT64_C( -499.15),
+ SIMDE_FLOAT64_C( -866.12), SIMDE_FLOAT64_C( 639.40)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( 702.45), SIMDE_FLOAT64_C( 464.79),
+ SIMDE_FLOAT64_C( 387.80), SIMDE_FLOAT64_C( -528.10),
+ SIMDE_FLOAT64_C( -409.82), SIMDE_FLOAT64_C( -696.40),
+ SIMDE_FLOAT64_C( 455.43), SIMDE_FLOAT64_C( 856.81)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -52.65), SIMDE_FLOAT64_C( -687.95),
+ SIMDE_FLOAT64_C( -31.24), SIMDE_FLOAT64_C( 1095.80),
+ SIMDE_FLOAT64_C( 329.39), SIMDE_FLOAT64_C( 898.29),
+ SIMDE_FLOAT64_C(-1321.55), SIMDE_FLOAT64_C( 5.58)) },
+ { simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C( 203.46),
+ SIMDE_FLOAT64_C( 194.87), SIMDE_FLOAT64_C( 667.81),
+ SIMDE_FLOAT64_C( -258.76), SIMDE_FLOAT64_C( 897.89),
+ SIMDE_FLOAT64_C( 571.10), SIMDE_FLOAT64_C( -320.96)),
+ UINT8_C( 56),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -938.69), SIMDE_FLOAT64_C( 74.05),
+ SIMDE_FLOAT64_C( -981.48), SIMDE_FLOAT64_C( -656.78),
+ SIMDE_FLOAT64_C( -794.37), SIMDE_FLOAT64_C( 177.36),
+ SIMDE_FLOAT64_C( 380.50), SIMDE_FLOAT64_C( 812.91)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -10.37), SIMDE_FLOAT64_C( -894.99),
+ SIMDE_FLOAT64_C( -148.09), SIMDE_FLOAT64_C( 314.75),
+ SIMDE_FLOAT64_C( -740.28), SIMDE_FLOAT64_C( -372.00),
+ SIMDE_FLOAT64_C( -357.36), SIMDE_FLOAT64_C( -791.79)),
+ simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C( 203.46),
+ SIMDE_FLOAT64_C( -833.39), SIMDE_FLOAT64_C( -971.53),
+ SIMDE_FLOAT64_C( -54.09), SIMDE_FLOAT64_C( 897.89),
+ SIMDE_FLOAT64_C( 571.10), SIMDE_FLOAT64_C( -320.96)) }
+ };
+
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m512d r = simde_mm512_mask_sub_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
+ simde_assert_m512d_close(r, test_vec[i].r, 1);
+ }
+
+ return MUNIT_OK;
+}
+
static MunitResult
test_simde_mm512_and_si512(const MunitParameter params[], void* data) {
(void) params;
@@ -4706,6 +5338,10 @@ static MunitTest test_suite_tests[] = {
SIMDE_TESTS_DEFINE_TEST(mm512_sub_epi64),
SIMDE_TESTS_DEFINE_TEST(mm512_sub_ps),
SIMDE_TESTS_DEFINE_TEST(mm512_sub_pd),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_epi32),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_epi64),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_ps),
+ SIMDE_TESTS_DEFINE_TEST(mm512_mask_sub_pd),
SIMDE_TESTS_DEFINE_TEST(mm512_and_si512),
SIMDE_TESTS_DEFINE_TEST(mm512_andnot_si512),
=====================================
test/x86/skel.c
=====================================
@@ -2362,6 +2362,7 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
simde__m512i src;
simde__mmask16 k;
simde__m512i a;
+ simde__m512i b;
simde__m512i r;
} test_vec[8] = {
@@ -2369,14 +2370,16 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
- simde__m512i_private src, a, r;
+ simde__m512i_private src, a, b, r;
simde__mmask16 k;
munit_rand_memory(sizeof(src), (uint8_t*) &src);
munit_rand_memory(sizeof(k), (uint8_t*) &k);
munit_rand_memory(sizeof(a), (uint8_t*) &a);
+ munit_rand_memory(sizeof(a), (uint8_t*) &b);
+ k &= UINT16_C(0xffff);
- r = simde__m512i_to_private(simde_mm512_mask_xxx_epi32(simde__m512i_from_private(src), k, simde__m512i_from_private(a)));
+ r = simde__m512i_to_private(simde_mm512_mask_xxx_epi32(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
printf(" { simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
@@ -2384,13 +2387,19 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
src.i32[15], src.i32[14], src.i32[13], src.i32[12], src.i32[11], src.i32[10], src.i32[ 9], src.i32[ 8],
src.i32[ 7], src.i32[ 6], src.i32[ 5], src.i32[ 4], src.i32[ 3], src.i32[ 2], src.i32[ 1], src.i32[ 0]);
- printf(" UINT16_C(%5" PRIu16 "),\n", k);
+ printf(" UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
a.i32[15], a.i32[14], a.i32[13], a.i32[12], a.i32[11], a.i32[10], a.i32[ 9], a.i32[ 8],
a.i32[ 7], a.i32[ 6], a.i32[ 5], a.i32[ 4], a.i32[ 3], a.i32[ 2], a.i32[ 1], a.i32[ 0]);
+ printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
+ " INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")),\n",
+ b.i32[15], b.i32[14], b.i32[13], b.i32[12], b.i32[11], b.i32[10], b.i32[ 9], b.i32[ 8],
+ b.i32[ 7], b.i32[ 6], b.i32[ 5], b.i32[ 4], b.i32[ 3], b.i32[ 2], b.i32[ 1], b.i32[ 0]);
printf(" simde_mm512_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
" INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "),\n"
@@ -2401,7 +2410,7 @@ test_simde_mm512_mask_xxx_epi32(const MunitParameter params[], void* data) {
return MUNIT_FAIL;
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
- simde__m512i r = simde_mm512_mask_xxx_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde__m512i r = simde_mm512_mask_xxx_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
simde_assert_m512i_i32(r, ==, test_vec[i].r);
}
@@ -2417,6 +2426,7 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
simde__m512i src;
simde__mmask8 k;
simde__m512i a;
+ simde__m512i b;
simde__m512i r;
} test_vec[8] = {
@@ -2424,14 +2434,16 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
- simde__m512i_private src, a, r;
+ simde__m512i_private src, a, b, r;
simde__mmask8 k;
munit_rand_memory(sizeof(src), (uint8_t*) &src);
munit_rand_memory(sizeof(k), (uint8_t*) &k);
munit_rand_memory(sizeof(a), (uint8_t*) &a);
+ munit_rand_memory(sizeof(a), (uint8_t*) &b);
+ k &= UINT8_C(0xff);
- r = simde__m512i_to_private(simde_mm512_mask_xxx_epi64(simde__m512i_from_private(src), k, simde__m512i_from_private(a)));
+ r = simde__m512i_to_private(simde_mm512_mask_xxx_epi64(simde__m512i_from_private(src), k, simde__m512i_from_private(a), simde__m512i_from_private(b)));
printf(" { simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
@@ -2439,13 +2451,19 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
src.i64[7], src.i64[6], src.i64[5], src.i64[4],
src.i64[3], src.i64[2], src.i64[1], src.i64[0]);
- printf(" UINT8_C(%3" PRIu8 "),\n", k);
+ printf(" UINT8_C(%3" PRIu8 "),\n", HEDLEY_STATIC_CAST(uint8_t, k));
printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
a.i64[7], a.i64[6], a.i64[5], a.i64[4],
a.i64[3], a.i64[2], a.i64[1], a.i64[0]);
+ printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
+ " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n",
+ b.i64[7], b.i64[6], b.i64[5], b.i64[4],
+ b.i64[3], b.i64[2], b.i64[1], b.i64[0]);
printf(" simde_mm512_set_epi64(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
" INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n"
@@ -2456,7 +2474,7 @@ test_simde_mm512_mask_xxx_epi64(const MunitParameter params[], void* data) {
return MUNIT_FAIL;
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
- simde__m512i r = simde_mm512_mask_xxx_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde__m512i r = simde_mm512_mask_xxx_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
simde_assert_m512i_i64(r, ==, test_vec[i].r);
}
@@ -2472,6 +2490,7 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
simde__m512 src;
simde__mmask16 k;
simde__m512 a;
+ simde__m512 b;
simde__m512 r;
} test_vec[8] = {
@@ -2479,16 +2498,18 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
- simde__m512_private src, a, r;
+ simde__m512_private src, a, b, r;
simde__mmask16 k;
for (size_t j = 0 ; j < sizeof(simde__m512) / sizeof(simde_float32) ; j++) {
src.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
a.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
+ b.f32[j] = (simde_float32) (round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0);
}
munit_rand_memory(sizeof(k), (uint8_t*) &k);
+ k &= UINT16_C(0xffff);
- r = simde__m512_to_private(simde_mm512_mask_xxx_ps(k, simde__m512_from_private(a)));
+ r = simde__m512_to_private(simde_mm512_mask_xxx_ps(simde__m512_from_private(src), k, simde__m512_from_private(a), simde__m512_from_private(b)));
printf(" { simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
" SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
@@ -2498,7 +2519,7 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
9, src.f32[11], 9, src.f32[10], 9, src.f32[ 9], 9, src.f32[ 8],
9, src.f32[ 7], 9, src.f32[ 6], 9, src.f32[ 5], 9, src.f32[ 4],
9, src.f32[ 3], 9, src.f32[ 2], 9, src.f32[ 1], 9, src.f32[ 0]);
- printf(" UINT16_C(%5" PRIu16 "),\n", k);
+ printf(" UINT16_C(%5" PRIu16 "),\n", HEDLEY_STATIC_CAST(uint16_t, k));
printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
" SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
" SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
@@ -2507,6 +2528,14 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
9, a.f32[11], 9, a.f32[10], 9, a.f32[ 9], 9, a.f32[ 8],
9, a.f32[ 7], 9, a.f32[ 6], 9, a.f32[ 5], 9, a.f32[ 4],
9, a.f32[ 3], 9, a.f32[ 2], 9, a.f32[ 1], 9, a.f32[ 0]);
+ printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
+ " SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f)),\n",
+ 9, b.f32[15], 9, b.f32[14], 9, b.f32[13], 9, b.f32[12],
+ 9, b.f32[11], 9, b.f32[10], 9, b.f32[ 9], 9, b.f32[ 8],
+ 9, b.f32[ 7], 9, b.f32[ 6], 9, b.f32[ 5], 9, b.f32[ 4],
+ 9, b.f32[ 3], 9, b.f32[ 2], 9, b.f32[ 1], 9, b.f32[ 0]);
printf(" simde_mm512_set_ps(SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
" SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
" SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f), SIMDE_FLOAT32_C(%*.2f),\n"
@@ -2519,7 +2548,7 @@ test_simde_mm512_mask_xxx_ps(const MunitParameter params[], void* data) {
return MUNIT_FAIL;
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
- simde__m512 r = simde_mm512_mask_xxx_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde__m512 r = simde_mm512_mask_xxx_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
simde_assert_m512_close(r, test_vec[i].r, 1);
}
@@ -2535,6 +2564,7 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
simde__m512d src;
simde__mmask8 k;
simde__m512d a;
+ simde__m512d b;
simde__m512d r;
} test_vec[8] = {
@@ -2542,16 +2572,18 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
- simde__m512d_private src, a, r;
+ simde__m512d_private src, a, b, r;
simde__mmask8 k;
for (size_t j = 0 ; j < sizeof(simde__m512d) / sizeof(simde_float64) ; j++) {
src.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
a.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
+ b.f64[j] = round(random_f64_range(-1000.0, 1000.0) * 100.0) / 100.0;
}
munit_rand_memory(sizeof(k), (uint8_t*) &k);
+ k &= UINT8_C(0xff);
- r = simde__m512d_to_private(simde_mm512_mask_xxx_pd(simde__m512d_from_private(src), k, simde__m512d_from_private(a)));
+ r = simde__m512d_to_private(simde_mm512_mask_xxx_pd(simde__m512d_from_private(src), k, simde__m512d_from_private(a), simde__m512d_from_private(b)));
printf(" { simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
@@ -2559,13 +2591,19 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
8, src.f64[7], 8, src.f64[6], 8, src.f64[5], 8, src.f64[4],
8, src.f64[3], 8, src.f64[2], 8, src.f64[1], 8, src.f64[0]);
- printf(" UINT8_C(%3" PRIu8 "),\n", k);
+ printf(" UINT8_C(%3" PRIu8 "),\n", HEDLEY_STATIC_CAST(uint8_t, k));
printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
8, a.f64[7], 8, a.f64[6], 8, a.f64[5], 8, a.f64[4],
8, a.f64[3], 8, a.f64[2], 8, a.f64[1], 8, a.f64[0]);
+ printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
+ " SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)),\n",
+ 8, b.f64[7], 8, b.f64[6], 8, b.f64[5], 8, b.f64[4],
+ 8, b.f64[3], 8, b.f64[2], 8, b.f64[1], 8, b.f64[0]);
printf(" simde_mm512_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
" SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f),\n"
@@ -2576,7 +2614,7 @@ test_simde_mm512_mask_xxx_pd(const MunitParameter params[], void* data) {
return MUNIT_FAIL;
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
- simde__m512d r = simde_mm512_mask_xxx_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a);
+ simde__m512d r = simde_mm512_mask_xxx_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b);
simde_assert_m512d_close(r, test_vec[i].r, 1);
}
=====================================
test/x86/sse.c
=====================================
@@ -1987,26 +1987,42 @@ test_simde_mm_cvtpi32_ps(const MunitParameter params[], void* data) {
(void) params;
(void) data;
- for (size_t i = 0 ; i < TEST_PREFERRED_ITERATIONS ; i++) {
- simde__m128_private a, r;
- simde__m64_private b;
-
- munit_rand_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a));
- for (size_t j = 0 ; j < 2 ; j++) {
- a.i32[j] = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(-65536, 65535));
- b.i32[j] = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(-65536, 65535));
- }
-
- r = simde__m128_to_private(simde_mm_cvtpi32_ps(simde__m128_from_private(a), simde__m64_from_private(b)));
+ const struct {
+ simde__m128 a;
+ simde__m64 b;
+ simde__m128 r;
+ } test_vec[8] = {
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 143.12), SIMDE_FLOAT32_C( 382.05), SIMDE_FLOAT32_C( -756.03), SIMDE_FLOAT32_C( 501.27)),
+ simde_mm_set_pi32(INT32_C( 747), INT32_C( -200)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 143.12), SIMDE_FLOAT32_C( 382.05), SIMDE_FLOAT32_C( 747.00), SIMDE_FLOAT32_C( -200.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 972.78), SIMDE_FLOAT32_C( -402.40), SIMDE_FLOAT32_C( 516.01), SIMDE_FLOAT32_C( 710.18)),
+ simde_mm_set_pi32(INT32_C( 533), INT32_C( -843)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 972.78), SIMDE_FLOAT32_C( -402.40), SIMDE_FLOAT32_C( 533.00), SIMDE_FLOAT32_C( -843.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 472.16), SIMDE_FLOAT32_C( -696.64), SIMDE_FLOAT32_C( -295.14), SIMDE_FLOAT32_C( 252.19)),
+ simde_mm_set_pi32(INT32_C( -428), INT32_C( 182)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 472.16), SIMDE_FLOAT32_C( -696.64), SIMDE_FLOAT32_C( -428.00), SIMDE_FLOAT32_C( 182.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( -566.65), SIMDE_FLOAT32_C( 623.99), SIMDE_FLOAT32_C( 879.56), SIMDE_FLOAT32_C( 610.97)),
+ simde_mm_set_pi32(INT32_C( 176), INT32_C( 681)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( -566.65), SIMDE_FLOAT32_C( 623.99), SIMDE_FLOAT32_C( 176.00), SIMDE_FLOAT32_C( 681.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 572.11), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( 212.83), SIMDE_FLOAT32_C( 936.07)),
+ simde_mm_set_pi32(INT32_C( -310), INT32_C( 515)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 572.11), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( -310.00), SIMDE_FLOAT32_C( 515.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 696.01), SIMDE_FLOAT32_C( -960.55), SIMDE_FLOAT32_C( -478.31), SIMDE_FLOAT32_C( -831.25)),
+ simde_mm_set_pi32(INT32_C( 324), INT32_C( -65)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 696.01), SIMDE_FLOAT32_C( -960.55), SIMDE_FLOAT32_C( 324.00), SIMDE_FLOAT32_C( -65.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( -973.73), SIMDE_FLOAT32_C( 578.94), SIMDE_FLOAT32_C( 658.26), SIMDE_FLOAT32_C( 635.66)),
+ simde_mm_set_pi32(INT32_C( -268), INT32_C( 691)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( -973.73), SIMDE_FLOAT32_C( 578.94), SIMDE_FLOAT32_C( -268.00), SIMDE_FLOAT32_C( 691.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 654.54), SIMDE_FLOAT32_C( -615.74), SIMDE_FLOAT32_C( -430.24), SIMDE_FLOAT32_C( 224.63)),
+ simde_mm_set_pi32(INT32_C( 370), INT32_C( -505)),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 654.54), SIMDE_FLOAT32_C( -615.74), SIMDE_FLOAT32_C( 370.00), SIMDE_FLOAT32_C( -505.00)) }
+ };
- simde_assert_int32_close(b.i32[0], HEDLEY_STATIC_CAST(int32_t, r.f32[0]));
- simde_assert_int32_close(b.i32[1], HEDLEY_STATIC_CAST(int32_t, r.f32[1]));
- munit_assert_int32(a.i32[2], ==, r.i32[2]);
- munit_assert_int32(a.i32[3], ==, r.i32[3]);
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128 r = simde_mm_cvtpi32_ps(test_vec[i].a, test_vec[i].b);
+ simde_assert_m128_close(r, test_vec[i].r, 1);
}
- simde_mm_empty();
-
return MUNIT_OK;
}
@@ -2248,19 +2264,40 @@ test_simde_mm_cvtsi32_ss(const MunitParameter params[], void* data) {
(void) params;
(void) data;
- for (size_t i = 0 ; i < TEST_PREFERRED_ITERATIONS ; i++) {
- simde__m128_private a, r;
+ const struct {
+ simde__m128 a;
int32_t b;
+ simde__m128 r;
+ } test_vec[8] = {
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( -368.26), SIMDE_FLOAT32_C( -772.15), SIMDE_FLOAT32_C( 700.78), SIMDE_FLOAT32_C( -416.87)),
+ INT32_C( -93207),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( -368.26), SIMDE_FLOAT32_C( -772.15), SIMDE_FLOAT32_C( 700.78), SIMDE_FLOAT32_C(-93207.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 774.49), SIMDE_FLOAT32_C( 920.32), SIMDE_FLOAT32_C( 159.83), SIMDE_FLOAT32_C( -900.78)),
+ INT32_C( -99810),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 774.49), SIMDE_FLOAT32_C( 920.32), SIMDE_FLOAT32_C( 159.83), SIMDE_FLOAT32_C(-99810.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( -606.72), SIMDE_FLOAT32_C( -127.65), SIMDE_FLOAT32_C( -336.22), SIMDE_FLOAT32_C( -528.09)),
+ INT32_C( -24917),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( -606.72), SIMDE_FLOAT32_C( -127.65), SIMDE_FLOAT32_C( -336.22), SIMDE_FLOAT32_C(-24917.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 534.13), SIMDE_FLOAT32_C( -401.63), SIMDE_FLOAT32_C( -949.41), SIMDE_FLOAT32_C( -38.28)),
+ INT32_C( -25377),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 534.13), SIMDE_FLOAT32_C( -401.63), SIMDE_FLOAT32_C( -949.41), SIMDE_FLOAT32_C(-25377.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 704.87), SIMDE_FLOAT32_C( 236.14), SIMDE_FLOAT32_C( -91.25), SIMDE_FLOAT32_C( -708.13)),
+ INT32_C( 83867),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 704.87), SIMDE_FLOAT32_C( 236.14), SIMDE_FLOAT32_C( -91.25), SIMDE_FLOAT32_C( 83867.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 570.72), SIMDE_FLOAT32_C( -412.30), SIMDE_FLOAT32_C( -578.88), SIMDE_FLOAT32_C( 196.41)),
+ INT32_C( 72066),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 570.72), SIMDE_FLOAT32_C( -412.30), SIMDE_FLOAT32_C( -578.88), SIMDE_FLOAT32_C( 72066.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( 891.77), SIMDE_FLOAT32_C( -473.67), SIMDE_FLOAT32_C( 332.65), SIMDE_FLOAT32_C( -615.45)),
+ INT32_C( 12054),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( 891.77), SIMDE_FLOAT32_C( -473.67), SIMDE_FLOAT32_C( 332.65), SIMDE_FLOAT32_C( 12054.00)) },
+ { simde_mm_set_ps(SIMDE_FLOAT32_C( -552.37), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( 968.75), SIMDE_FLOAT32_C( -669.38)),
+ INT32_C( 88818),
+ simde_mm_set_ps(SIMDE_FLOAT32_C( -552.37), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( 968.75), SIMDE_FLOAT32_C( 88818.00)) }
+ };
- munit_rand_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a));
- b = HEDLEY_STATIC_CAST(int32_t, munit_rand_int_range(INT16_MIN, INT16_MAX));
-
- r = simde__m128_to_private(simde_mm_cvtsi32_ss(simde__m128_from_private(a), b));
-
- simde_assert_int32_close(HEDLEY_STATIC_CAST(int, r.f32[0]), b);
- munit_assert_int32(r.i32[1], ==, a.i32[1]);
- munit_assert_int32(r.i32[2], ==, a.i32[2]);
- munit_assert_int32(r.i32[3], ==, a.i32[3]);
+ for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
+ simde__m128 r = simde_mm_cvtsi32_ss(test_vec[i].a, test_vec[i].b);
+ simde_assert_m128_close(r, test_vec[i].r, 1);
}
return MUNIT_OK;
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/181091ed108825cc071c2422f908c103547a4029
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/commit/181091ed108825cc071c2422f908c103547a4029
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200408/16344763/attachment-0001.html>
More information about the debian-med-commit
mailing list