[med-svn] [Git][deflate-team/libdeflate][debian/latest] 2 commits: new upstream version 1.22

nick black (@nickblack) gitlab at salsa.debian.org
Mon Oct 7 09:27:14 BST 2024



nick black pushed to branch debian/latest at deflate team / libdeflate


Commits:
36de73a1 by nick black at 2024-10-07T02:45:32-04:00
new upstream version 1.22

update checkout in tree, update changelog

- - - - -
aadbde33 by nick black at 2024-10-07T02:50:42-04:00
merge master

- - - - -


9 changed files:

- CMakeLists.txt
- NEWS.md
- README.md
- debian/changelog
- lib/arm/adler32_impl.h
- lib/arm/crc32_impl.h
- lib/x86/adler32_impl.h
- lib/x86/crc32_impl.h
- libdeflate.h


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -90,6 +90,55 @@ if(LIBDEFLATE_FREESTANDING)
     add_definitions(-DFREESTANDING)
 endif()
 
+# Check for cases where the compiler supports an instruction set extension but
+# the assembler does not, and in those cases print a warning and add an
+# appropriate -DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_* flag.  libdeflate's C
+# source files already check the compiler version before using the corresponding
+# intrinsics, but in the rare case of gcc being paired with a binutils much
+# older than itself those checks are insufficient.  There is no way to check the
+# assembler version from C.  The proper fix for too-old binutils is for the user
+# to upgrade binutils.  Unfortunately, as libdeflate has started using newer
+# instructions, binutils incompatibilities have started being seen more
+# frequently.  Hence these checks for assembler support here in CMakeLists.txt
+# to provide a fallback for users who may be unable to fix their toolchain.
+# These don't solve the problem for users not using CMake, though such users can
+# add specific -DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_* flags they need.
+function(check_assembler_support  feature assembly_code)
+    execute_process(COMMAND echo "${assembly_code}"
+                    COMMAND ${CMAKE_C_COMPILER} -c -x assembler -o /dev/null -
+                    RESULT_VARIABLE result
+                    ERROR_QUIET)
+    if(NOT ${result} EQUAL 0)
+        add_definitions(-DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_${feature})
+        message(STATUS "Your gcc supports ${feature} instructions but it is paired with an assembler that does not.  Upgrading binutils is recommended.")
+    endif()
+endfunction()
+if(UNIX AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
+    execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
+                    OUTPUT_VARIABLE machine)
+    if(${machine} MATCHES "^(x86_64|i[3-6]86)")
+        if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 8.1)
+            # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI if needed.
+            check_assembler_support(AVX512VNNI "vpdpbusd %zmm0, %zmm0, %zmm0")
+            # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ if needed.
+            check_assembler_support(VPCLMULQDQ "vpclmulqdq $0, %zmm0, %zmm0, %zmm0")
+        endif()
+        if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.1)
+            # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI if needed.
+            check_assembler_support(AVX_VNNI "{vex} vpdpbusd %ymm0, %ymm0, %ymm0")
+        endif()
+    elseif(${machine} MATCHES "^aarch64")
+        if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 8.1)
+            # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_DOTPROD if needed.
+            check_assembler_support(DOTPROD ".arch armv8.2-a+dotprod\nudot v0.4s, v0.16b, v0.16b")
+        endif()
+        if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 9.1)
+            # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_SHA3 if needed.
+            check_assembler_support(SHA3 ".arch armv8.2-a+sha3\neor3 v0.16b, v0.16b, v0.16b, v0.16b")
+        endif()
+    endif()
+endif()
+
 # Determine the list of source files and the list of compiler options that will
 # be used for both the static library and the shared library.
 


=====================================
NEWS.md
=====================================
@@ -1,5 +1,10 @@
 # libdeflate release notes
 
+## Version 1.22
+
+* The CMake-based build system now implements a workaround for gcc being paired
+  with a too-old binutils version.  This can prevent build errors.
+
 ## Version 1.21
 
 * Fixed build error on x86 with gcc 8.1 and gcc 8.2.


=====================================
README.md
=====================================
@@ -37,6 +37,7 @@ For the release notes, see the [NEWS file](NEWS.md).
 - [Building](#building)
   - [Using CMake](#using-cmake)
   - [Directly integrating the library sources](#directly-integrating-the-library-sources)
+  - [Supported compilers](#supported-compilers)
 - [API](#api)
 - [Bindings for other programming languages](#bindings-for-other-programming-languages)
 - [DEFLATE vs. zlib vs. gzip](#deflate-vs-zlib-vs-gzip)
@@ -98,6 +99,19 @@ for release builds.  `-O3` is fine too, but often `-O2` actually gives better
 results.  It's unnecessary to add flags such as `-mavx2` or `/arch:AVX2`, though
 you can do so if you want to.  Most of the relevant optimized functions are
 built regardless of such flags, and appropriate ones are selected at runtime.
+For the same reason, flags like `-mno-avx2` do *not* cause all code using the
+corresponding instruction set extension to be omitted from the binary; this is
+working as intended due to the use of runtime CPU feature detection.
+
+If using gcc, your gcc should always be paired with a binutils version that is
+not much older than itself, to avoid problems where the compiler generates
+instructions the assembler cannot assemble.  Usually systems have their gcc and
+binutils paired properly, but rarely a mismatch can arise in cases such as the
+user installing a newer gcc version without a proper binutils alongside it.
+Since libdeflate v1.22, the CMake-based build system will detect incompatible
+binutils versions and disable some optimized code accordingly.  In older
+versions of libdeflate, or if CMake is not being used, a too-old binutils can
+cause build errors like "no such instruction" from the assembler.
 
 # API
 


=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+libdeflate (1.22-1) unstable; urgency=medium
+
+  * New upstream 1.22.
+
+ -- nick black <dankamongmen at gmail.com>  Mon, 07 Oct 2024 02:45:06 -0400
+
 libdeflate (1.21-1) unstable; urgency=medium
 
   [ Michael R. Crusoe ]


=====================================
lib/arm/adler32_impl.h
=====================================
@@ -209,18 +209,25 @@ adler32_arm_neon(u32 adler, const u8 *p, size_t len)
 #endif /* Regular NEON implementation */
 
 /* NEON+dotprod implementation */
-#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN()
+#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN() && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_DOTPROD)
 #  define adler32_arm_neon_dotprod	adler32_arm_neon_dotprod
 #  ifdef __clang__
 #    define ATTRIBUTES	_target_attribute("dotprod")
    /*
-    * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
-    * "aarch64: Remove architecture dependencies from intrinsics"),
-    * arch=armv8.2-a is needed for the dotprod intrinsics, unless the default
-    * target is armv8.3-a or later in which case it must be omitted.  armv8.3-a
-    * or later can be detected by checking for __ARM_FEATURE_JCVT.
+    * Both gcc and binutils originally considered dotprod to depend on
+    * arch=armv8.2-a or later.  This was fixed in gcc 13.2 by commit
+    * 9aac37ab8a7b ("aarch64: Remove architecture dependencies from intrinsics")
+    * and in binutils 2.41 by commit 205e4380c800 ("aarch64: Remove version
+    * dependencies from features").  Unfortunately, always using arch=armv8.2-a
+    * causes build errors with some compiler options because it may reduce the
+    * arch rather than increase it.  Therefore we try to omit the arch whenever
+    * possible.  If gcc is 14 or later, then both gcc and binutils are probably
+    * fixed, so we omit the arch.  We also omit the arch if a feature that
+    * depends on armv8.2-a or later (in gcc 13.1 and earlier) is present.
     */
-#  elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
+#  elif GCC_PREREQ(14, 0) || defined(__ARM_FEATURE_JCVT) \
+			  || defined(__ARM_FEATURE_DOTPROD)
 #    define ATTRIBUTES	_target_attribute("+dotprod")
 #  else
 #    define ATTRIBUTES	_target_attribute("arch=armv8.2-a+dotprod")


=====================================
lib/arm/crc32_impl.h
=====================================
@@ -545,19 +545,26 @@ crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
  * This like crc32_arm_pmullx12_crc(), but it adds the eor3 instruction (from
  * the sha3 extension) for even better performance.
  */
-#if HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && HAVE_SHA3_INTRIN
+#if HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && HAVE_SHA3_INTRIN && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_SHA3)
 #  define crc32_arm_pmullx12_crc_eor3	crc32_arm_pmullx12_crc_eor3
 #  define SUFFIX				 _pmullx12_crc_eor3
 #  ifdef __clang__
 #    define ATTRIBUTES	_target_attribute("aes,crc,sha3")
    /*
-    * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
-    * "aarch64: Remove architecture dependencies from intrinsics"),
-    * arch=armv8.2-a is needed for the sha3 intrinsics, unless the default
-    * target is armv8.3-a or later in which case it must be omitted.  armv8.3-a
-    * or later can be detected by checking for __ARM_FEATURE_JCVT.
+    * Both gcc and binutils originally considered sha3 to depend on
+    * arch=armv8.2-a or later.  This was fixed in gcc 13.2 by commit
+    * 9aac37ab8a7b ("aarch64: Remove architecture dependencies from intrinsics")
+    * and in binutils 2.41 by commit 205e4380c800 ("aarch64: Remove version
+    * dependencies from features").  Unfortunately, always using arch=armv8.2-a
+    * causes build errors with some compiler options because it may reduce the
+    * arch rather than increase it.  Therefore we try to omit the arch whenever
+    * possible.  If gcc is 14 or later, then both gcc and binutils are probably
+    * fixed, so we omit the arch.  We also omit the arch if a feature that
+    * depends on armv8.2-a or later (in gcc 13.1 and earlier) is present.
     */
-#  elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
+#  elif GCC_PREREQ(14, 0) || defined(__ARM_FEATURE_JCVT) \
+			  || defined(__ARM_FEATURE_DOTPROD)
 #    define ATTRIBUTES	_target_attribute("+crypto,+crc,+sha3")
 #  else
 #    define ATTRIBUTES	_target_attribute("arch=armv8.2-a+crypto+crc+sha3")


=====================================
lib/x86/adler32_impl.h
=====================================
@@ -60,7 +60,8 @@
  * instead of gcc 11.  (libdeflate supports direct compilation without a
  * configure step, so checking the binutils version is not always an option.)
  */
-#if GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)
+#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
 #  define adler32_x86_avx2_vnni	adler32_x86_avx2_vnni
 #  define SUFFIX			   _avx2_vnni
 #  define ATTRIBUTES		_target_attribute("avx2,avxvnni")
@@ -70,7 +71,8 @@
 #  include "adler32_template.h"
 #endif
 
-#if GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)
+#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
 /*
  * AVX512VNNI implementation using 256-bit vectors.  This is very similar to the
  * AVX-VNNI implementation but takes advantage of masking and more registers.


=====================================
lib/x86/crc32_impl.h
=====================================
@@ -84,7 +84,8 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
  * gcc 8.1 and 8.2 had a similar bug where they assumed that
  * _mm256_clmulepi64_epi128() always needed AVX512.  It's fixed in gcc 8.3.
  */
-#if GCC_PREREQ(8, 3) || CLANG_PREREQ(6, 0, 10000000)
+#if (GCC_PREREQ(8, 3) || CLANG_PREREQ(6, 0, 10000000)) && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
 #  define crc32_x86_vpclmulqdq_avx2	crc32_x86_vpclmulqdq_avx2
 #  define SUFFIX				 _vpclmulqdq_avx2
 #  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx2")
@@ -94,7 +95,8 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
 #  include "crc32_pclmul_template.h"
 #endif
 
-#if GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)
+#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
 /*
  * VPCLMULQDQ/AVX512 implementation using 256-bit vectors.  This is very similar
  * to the VPCLMULQDQ/AVX2 implementation but takes advantage of the vpternlog


=====================================
libdeflate.h
=====================================
@@ -13,8 +13,8 @@ extern "C" {
 #endif
 
 #define LIBDEFLATE_VERSION_MAJOR	1
-#define LIBDEFLATE_VERSION_MINOR	21
-#define LIBDEFLATE_VERSION_STRING	"1.21"
+#define LIBDEFLATE_VERSION_MINOR	22
+#define LIBDEFLATE_VERSION_STRING	"1.22"
 
 /*
  * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause



View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/compare/b370b326f09c0a4d54e3583e8304cf2319b9947a...aadbde3370db9278c86c07d23b54ad768a2b74f2

-- 
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/compare/b370b326f09c0a4d54e3583e8304cf2319b9947a...aadbde3370db9278c86c07d23b54ad768a2b74f2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241007/987b4d37/attachment-0001.htm>


More information about the debian-med-commit mailing list