[med-svn] [Git][med-team/simde][master] 2 commits: Add two patches for 32bit x86 from upstream.
Michael R. Crusoe
gitlab at salsa.debian.org
Wed Sep 2 11:15:16 BST 2020
Michael R. Crusoe pushed to branch master at Debian Med / simde
Commits:
c375992e by Michael R. Crusoe at 2020-09-02T11:39:07+02:00
Add two patches for 32bit x86 from upstream.
- - - - -
8633237c by Michael R. Crusoe at 2020-09-02T11:56:42+02:00
routine-update: Ready to upload to unstable
- - - - -
5 changed files:
- debian/changelog
- + debian/patches/32bit_1.patch
- + debian/patches/32bit_2.patch
- debian/patches/munit
- debian/patches/series
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+simde (0.6.0-2) unstable; urgency=medium
+
+ * Add two patches for 32bit x86 from upstream.
+
+ -- Michael R. Crusoe <crusoe at debian.org> Wed, 02 Sep 2020 11:39:33 +0200
+
simde (0.6.0-1) unstable; urgency=medium
* New upstream version
=====================================
debian/patches/32bit_1.patch
=====================================
@@ -0,0 +1,222 @@
+Author: Evan Nemerson <evan at nemerson.com>
+Date: Tue Sep 1 22:26:26 2020 -0700
+Origin: upstream, https://github.com/simd-everywhere/simde/commit/25b5fbc06ec89ded06df5a7d7c5c62690b93d182
+Forwarded: not-needed
+Subject: Fix compilation failures when targeting 32-bit x86 with >= SSE2
+
+ There are two problems here. The primary issue is that several
+ functions are not available on x86, and it isn't obvious from the
+ Intel Intrinsics Guide which ones. Adding -march=native to our x86
+ Travis build should help us pick up on those, but unfortunately even
+ though it will build with this patch some tests fail.
+
+ The second issue is GCC bug #53784.
+
+--- simde.orig/simde/arm/neon/mla_n.h
++++ simde/simde/arm/neon/mla_n.h
+@@ -48,7 +48,7 @@
+ a_ = simde_float32x2_to_private(a),
+ b_ = simde_float32x2_to_private(b);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = (b_.values * c) + a_.values;
+ #else
+ SIMDE_VECTORIZE
+@@ -76,7 +76,7 @@
+ a_ = simde_int16x4_to_private(a),
+ b_ = simde_int16x4_to_private(b);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = (b_.values * c) + a_.values;
+ #else
+ SIMDE_VECTORIZE
+@@ -190,7 +190,7 @@
+ a_ = simde_float32x4_to_private(a),
+ b_ = simde_float32x4_to_private(b);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = (b_.values * c) + a_.values;
+ #else
+ SIMDE_VECTORIZE
+@@ -220,7 +220,7 @@
+ a_ = simde_int16x8_to_private(a),
+ b_ = simde_int16x8_to_private(b);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = (b_.values * c) + a_.values;
+ #else
+ SIMDE_VECTORIZE
+--- simde.orig/simde/arm/neon/mul_n.h
++++ simde/simde/arm/neon/mul_n.h
+@@ -44,7 +44,7 @@
+ r_,
+ a_ = simde_float32x2_to_private(a);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = a_.values * b;
+ #else
+ SIMDE_VECTORIZE
+@@ -71,7 +71,7 @@
+ r_,
+ a_ = simde_float64x1_to_private(a);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = a_.values * b;
+ #else
+ SIMDE_VECTORIZE
+@@ -213,7 +213,7 @@
+ r_,
+ a_ = simde_float32x4_to_private(a);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = a_.values * b;
+ #else
+ SIMDE_VECTORIZE
+@@ -244,7 +244,7 @@
+ r_,
+ a_ = simde_float64x2_to_private(a);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
+ r_.values = a_.values * b;
+ #else
+ SIMDE_VECTORIZE
+--- simde.orig/simde/arm/neon/rshl.h
++++ simde/simde/arm/neon/rshl.h
+@@ -491,7 +491,7 @@
+ return _mm_blendv_epi8(_mm_sllv_epi16(a, b_),
+ _mm_srai_epi16(_mm_sub_epi16(a_shr, ff), 1),
+ _mm_cmpgt_epi16(zero, b_));
+- #elif defined(SIMDE_X86_AVX2_NATIVE)
++ #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64)
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i ff = _mm256_cmpeq_epi32(zero, zero);
+ __m256i a256 = _mm256_cvtepi16_epi32(a);
+@@ -727,7 +727,7 @@
+ return _mm_blendv_epi8(_mm_sllv_epi16(a, b_),
+ _mm_srli_epi16(_mm_sub_epi16(a_shr, ff), 1),
+ _mm_cmpgt_epi16(zero, b_));
+- #elif defined(SIMDE_X86_AVX2_NATIVE)
++ #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64)
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i ff = _mm256_cmpeq_epi32(zero, zero);
+ __m256i a256 = _mm256_cvtepu16_epi32(a);
+--- simde.orig/simde/arm/neon/shl.h
++++ simde/simde/arm/neon/shl.h
+@@ -451,7 +451,7 @@
+ return _mm_blendv_epi8(_mm_sllv_epi16(a, b_),
+ _mm_srav_epi16(a, _mm_abs_epi16(b_)),
+ _mm_cmpgt_epi16(_mm_setzero_si128(), b_));
+- #elif defined(SIMDE_X86_AVX2_NATIVE)
++ #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64)
+ __m256i a256 = _mm256_cvtepi16_epi32(a);
+ __m256i b256 = _mm256_cvtepi16_epi32(b);
+ b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24);
+@@ -659,7 +659,7 @@
+ return _mm_blendv_epi8(_mm_sllv_epi16(a, b_),
+ _mm_srlv_epi16(a, _mm_abs_epi16(b_)),
+ _mm_cmpgt_epi16(_mm_setzero_si128(), b_));
+- #elif defined(SIMDE_X86_AVX2_NATIVE)
++ #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64)
+ __m256i a256 = _mm256_cvtepu16_epi32(a);
+ __m256i b256 = _mm256_cvtepi16_epi32(b);
+ b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24);
+--- simde.orig/simde/simde-common.h
++++ simde/simde/simde-common.h
+@@ -771,6 +771,7 @@
+ # define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR
+ # endif
+ # if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)
++# define SIMDE_BUG_GCC_53784
+ # define SIMDE_BUG_GCC_94482
+ # endif
+ # if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64)
+--- simde.orig/simde/x86/avx.h
++++ simde/simde/x86/avx.h
+@@ -3772,8 +3772,8 @@
+ simde__m256i_private a_ = simde__m256i_to_private(a);
+ return a_.i64[index];
+ }
+-#if defined(SIMDE_X86_AVX_NATIVE)
+- #if !defined(HEDLEY_MSVC_VERSION) || (HEDLEY_MSVC_VERSION_CHECK(19,20,0) && defined(_M_X64))
++#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64)
++ #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)
+ #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index)
+ #endif
+ #endif
+--- simde.orig/simde/x86/sse2.h
++++ simde/simde/x86/sse2.h
+@@ -5968,7 +5968,7 @@
+ simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) {
+ simde_assert_aligned(16, mem_addr);
+
+- #if defined(SIMDE_X86_SSE2_NATIVE)
++ #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64)
+ _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a);
+ #else
+ simde_memcpy(mem_addr, &a, sizeof(a));
+@@ -5994,7 +5994,7 @@
+ SIMDE_FUNCTION_ATTRIBUTES
+ void
+ simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) {
+- #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(HEDLEY_MSVC_VERSION)
++ #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION)
+ _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a);
+ #else
+ *mem_addr = a;
+--- simde.orig/simde/x86/sse4.2.h
++++ simde/simde/x86/sse4.2.h
+@@ -341,7 +341,7 @@
+ SIMDE_FUNCTION_ATTRIBUTES
+ uint64_t
+ simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) {
+- #if defined(SIMDE_X86_SSE4_2_NATIVE)
++ #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64)
+ return _mm_crc32_u64(prevcrc, v);
+ #else
+ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32)
+--- simde.orig/simde/x86/svml.h
++++ simde/simde/x86/svml.h
+@@ -2056,10 +2056,14 @@
+ r_,
+ a_ = simde__m128_to_private(a);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
+- r_.f32 = a_.f32 * (SIMDE_MATH_PIF / 180.0f);
+- #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
++ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
+ r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PIF / 180.0f);
++ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
++ r_.f32 = a_.f32 * (SIMDE_MATH_PIF / 180.0f);
++ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ const simde_float32 tmps = SIMDE_MATH_PIF / 180.0f;
++ const __typeof__(r_.f32) tmpv = { tmps, tmps, tmps, tmps };
++ r_.f32 = a_.f32 * tmpv;
+ #else
+ SIMDE_VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
+@@ -2081,10 +2085,14 @@
+ r_,
+ a_ = simde__m128d_to_private(a);
+
+- #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
+- r_.f64 = a_.f64 * (SIMDE_MATH_PI / 180.0);
+- #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
++ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
+ r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI / 180.0);
++ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
++ r_.f64 = a_.f64 * (SIMDE_MATH_PI / 180.0);
++ #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR)
++ const simde_float64 tmps = SIMDE_MATH_PI / 180.0;
++ const __typeof__(r_.f64) tmpv = { tmps, tmps };
++ r_.f64 = a_.f64 * tmpv;
+ #else
+ SIMDE_VECTORIZE
+ for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
=====================================
debian/patches/32bit_2.patch
=====================================
@@ -0,0 +1,24 @@
+Author: Evan Nemerson <evan at nemerson.com>
+Date: Wed Sep 2 00:44:48 2020 -0700
+Subject: avx: require x86_64 for _mm256_insert_epi64
+Origin: upstream, https://github.com/simd-everywhere/simde/commit/82d00658ff5bf91feca1266eee94767e8f616d28
+Forwarded: not-needed
+
+ GCC allows it on 32-bit, but clang doesn't. Since it's not backed up
+ by an instruction anyways it shouldn't hurt to use the fallback code.
+
+diff --git a/simde/x86/avx.h b/simde/x86/avx.h
+index b8259d5..c429bbd 100644
+--- a/simde/x86/avx.h
++++ b/simde/x86/avx.h
+@@ -3682,8 +3682,8 @@ simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index)
+
+ return simde__m256i_from_private(a_);
+ }
+-#if defined(SIMDE_X86_AVX_NATIVE) && \
+- (!defined(HEDLEY_MSVC_VERSION) || (HEDLEY_MSVC_VERSION_CHECK(19,20,0) && defined(_M_X64))) && \
++#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \
++ (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \
+ SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0)
+ #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index)
+ #endif
=====================================
debian/patches/munit
=====================================
@@ -1,5 +1,6 @@
Author: Evan Nemerson <evan at nemerson.com>
Description: Upstream dependency
+Forwarded: not-needed
Not worth separately packaging yet: rather small and not used anywhere else
=====================================
debian/patches/series
=====================================
@@ -1 +1,3 @@
+32bit_1.patch
+32bit_2.patch
munit
View it on GitLab: https://salsa.debian.org/med-team/simde/-/compare/67dc23ea078c66c110e73c15b55e2cfe0198153b...8633237c81d29782d0ee2588c56e17665187b986
--
View it on GitLab: https://salsa.debian.org/med-team/simde/-/compare/67dc23ea078c66c110e73c15b55e2cfe0198153b...8633237c81d29782d0ee2588c56e17665187b986
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200902/9fcfbd0d/attachment-0001.html>
More information about the debian-med-commit
mailing list