[med-svn] [Git][deflate-team/libdeflate][debian/latest] 2 commits: new upstream version 1.22
nick black (@nickblack)
gitlab at salsa.debian.org
Mon Oct 7 09:27:14 BST 2024
nick black pushed to branch debian/latest at deflate team / libdeflate
Commits:
36de73a1 by nick black at 2024-10-07T02:45:32-04:00
new upstream version 1.22
update checkout in tree, update changelog
- - - - -
aadbde33 by nick black at 2024-10-07T02:50:42-04:00
merge master
- - - - -
9 changed files:
- CMakeLists.txt
- NEWS.md
- README.md
- debian/changelog
- lib/arm/adler32_impl.h
- lib/arm/crc32_impl.h
- lib/x86/adler32_impl.h
- lib/x86/crc32_impl.h
- libdeflate.h
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -90,6 +90,55 @@ if(LIBDEFLATE_FREESTANDING)
add_definitions(-DFREESTANDING)
endif()
+# Check for cases where the compiler supports an instruction set extension but
+# the assembler does not, and in those cases print a warning and add an
+# appropriate -DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_* flag. libdeflate's C
+# source files already check the compiler version before using the corresponding
+# intrinsics, but in the rare case of gcc being paired with a binutils much
+# older than itself those checks are insufficient. There is no way to check the
+# assembler version from C. The proper fix for too-old binutils is for the user
+# to upgrade binutils. Unfortunately, as libdeflate has started using newer
+# instructions, binutils incompatibilities have started being seen more
+# frequently. Hence these checks for assembler support here in CMakeLists.txt
+# to provide a fallback for users who may be unable to fix their toolchain.
+# These don't solve the problem for users not using CMake, though such users can
+# add specific -DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_* flags they need.
+function(check_assembler_support feature assembly_code)
+ execute_process(COMMAND echo "${assembly_code}"
+ COMMAND ${CMAKE_C_COMPILER} -c -x assembler -o /dev/null -
+ RESULT_VARIABLE result
+ ERROR_QUIET)
+ if(NOT ${result} EQUAL 0)
+ add_definitions(-DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_${feature})
+ message(STATUS "Your gcc supports ${feature} instructions but it is paired with an assembler that does not. Upgrading binutils is recommended.")
+ endif()
+endfunction()
+if(UNIX AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
+ execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
+ OUTPUT_VARIABLE machine)
+ if(${machine} MATCHES "^(x86_64|i[3-6]86)")
+ if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 8.1)
+ # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI if needed.
+ check_assembler_support(AVX512VNNI "vpdpbusd %zmm0, %zmm0, %zmm0")
+ # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ if needed.
+ check_assembler_support(VPCLMULQDQ "vpclmulqdq $0, %zmm0, %zmm0, %zmm0")
+ endif()
+ if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.1)
+ # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI if needed.
+ check_assembler_support(AVX_VNNI "{vex} vpdpbusd %ymm0, %ymm0, %ymm0")
+ endif()
+ elseif(${machine} MATCHES "^aarch64")
+ if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 8.1)
+ # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_DOTPROD if needed.
+ check_assembler_support(DOTPROD ".arch armv8.2-a+dotprod\nudot v0.4s, v0.16b, v0.16b")
+ endif()
+ if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 9.1)
+ # Set LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_SHA3 if needed.
+ check_assembler_support(SHA3 ".arch armv8.2-a+sha3\neor3 v0.16b, v0.16b, v0.16b, v0.16b")
+ endif()
+ endif()
+endif()
+
# Determine the list of source files and the list of compiler options that will
# be used for both the static library and the shared library.
=====================================
NEWS.md
=====================================
@@ -1,5 +1,10 @@
# libdeflate release notes
+## Version 1.22
+
+* The CMake-based build system now implements a workaround for gcc being paired
+ with a too-old binutils version. This can prevent build errors.
+
## Version 1.21
* Fixed build error on x86 with gcc 8.1 and gcc 8.2.
=====================================
README.md
=====================================
@@ -37,6 +37,7 @@ For the release notes, see the [NEWS file](NEWS.md).
- [Building](#building)
- [Using CMake](#using-cmake)
- [Directly integrating the library sources](#directly-integrating-the-library-sources)
+ - [Supported compilers](#supported-compilers)
- [API](#api)
- [Bindings for other programming languages](#bindings-for-other-programming-languages)
- [DEFLATE vs. zlib vs. gzip](#deflate-vs-zlib-vs-gzip)
@@ -98,6 +99,19 @@ for release builds. `-O3` is fine too, but often `-O2` actually gives better
results. It's unnecessary to add flags such as `-mavx2` or `/arch:AVX2`, though
you can do so if you want to. Most of the relevant optimized functions are
built regardless of such flags, and appropriate ones are selected at runtime.
+For the same reason, flags like `-mno-avx2` do *not* cause all code using the
+corresponding instruction set extension to be omitted from the binary; this is
+working as intended due to the use of runtime CPU feature detection.
+
+If using gcc, your gcc should always be paired with a binutils version that is
+not much older than itself, to avoid problems where the compiler generates
+instructions the assembler cannot assemble. Usually systems have their gcc and
+binutils paired properly, but rarely a mismatch can arise in cases such as the
+user installing a newer gcc version without a proper binutils alongside it.
+Since libdeflate v1.22, the CMake-based build system will detect incompatible
+binutils versions and disable some optimized code accordingly. In older
+versions of libdeflate, or if CMake is not being used, a too-old binutils can
+cause build errors like "no such instruction" from the assembler.
# API
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+libdeflate (1.22-1) unstable; urgency=medium
+
+ * New upstream 1.22.
+
+ -- nick black <dankamongmen at gmail.com> Mon, 07 Oct 2024 02:45:06 -0400
+
libdeflate (1.21-1) unstable; urgency=medium
[ Michael R. Crusoe ]
=====================================
lib/arm/adler32_impl.h
=====================================
@@ -209,18 +209,25 @@ adler32_arm_neon(u32 adler, const u8 *p, size_t len)
#endif /* Regular NEON implementation */
/* NEON+dotprod implementation */
-#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN()
+#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN() && \
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_DOTPROD)
# define adler32_arm_neon_dotprod adler32_arm_neon_dotprod
# ifdef __clang__
# define ATTRIBUTES _target_attribute("dotprod")
/*
- * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
- * "aarch64: Remove architecture dependencies from intrinsics"),
- * arch=armv8.2-a is needed for the dotprod intrinsics, unless the default
- * target is armv8.3-a or later in which case it must be omitted. armv8.3-a
- * or later can be detected by checking for __ARM_FEATURE_JCVT.
+ * Both gcc and binutils originally considered dotprod to depend on
+ * arch=armv8.2-a or later. This was fixed in gcc 13.2 by commit
+ * 9aac37ab8a7b ("aarch64: Remove architecture dependencies from intrinsics")
+ * and in binutils 2.41 by commit 205e4380c800 ("aarch64: Remove version
+ * dependencies from features"). Unfortunately, always using arch=armv8.2-a
+ * causes build errors with some compiler options because it may reduce the
+ * arch rather than increase it. Therefore we try to omit the arch whenever
+ * possible. If gcc is 14 or later, then both gcc and binutils are probably
+ * fixed, so we omit the arch. We also omit the arch if a feature that
+ * depends on armv8.2-a or later (in gcc 13.1 and earlier) is present.
*/
-# elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
+# elif GCC_PREREQ(14, 0) || defined(__ARM_FEATURE_JCVT) \
+ || defined(__ARM_FEATURE_DOTPROD)
# define ATTRIBUTES _target_attribute("+dotprod")
# else
# define ATTRIBUTES _target_attribute("arch=armv8.2-a+dotprod")
=====================================
lib/arm/crc32_impl.h
=====================================
@@ -545,19 +545,26 @@ crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
* This like crc32_arm_pmullx12_crc(), but it adds the eor3 instruction (from
* the sha3 extension) for even better performance.
*/
-#if HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && HAVE_SHA3_INTRIN
+#if HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && HAVE_SHA3_INTRIN && \
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_SHA3)
# define crc32_arm_pmullx12_crc_eor3 crc32_arm_pmullx12_crc_eor3
# define SUFFIX _pmullx12_crc_eor3
# ifdef __clang__
# define ATTRIBUTES _target_attribute("aes,crc,sha3")
/*
- * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
- * "aarch64: Remove architecture dependencies from intrinsics"),
- * arch=armv8.2-a is needed for the sha3 intrinsics, unless the default
- * target is armv8.3-a or later in which case it must be omitted. armv8.3-a
- * or later can be detected by checking for __ARM_FEATURE_JCVT.
+ * Both gcc and binutils originally considered sha3 to depend on
+ * arch=armv8.2-a or later. This was fixed in gcc 13.2 by commit
+ * 9aac37ab8a7b ("aarch64: Remove architecture dependencies from intrinsics")
+ * and in binutils 2.41 by commit 205e4380c800 ("aarch64: Remove version
+ * dependencies from features"). Unfortunately, always using arch=armv8.2-a
+ * causes build errors with some compiler options because it may reduce the
+ * arch rather than increase it. Therefore we try to omit the arch whenever
+ * possible. If gcc is 14 or later, then both gcc and binutils are probably
+ * fixed, so we omit the arch. We also omit the arch if a feature that
+ * depends on armv8.2-a or later (in gcc 13.1 and earlier) is present.
*/
-# elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
+# elif GCC_PREREQ(14, 0) || defined(__ARM_FEATURE_JCVT) \
+ || defined(__ARM_FEATURE_DOTPROD)
# define ATTRIBUTES _target_attribute("+crypto,+crc,+sha3")
# else
# define ATTRIBUTES _target_attribute("arch=armv8.2-a+crypto+crc+sha3")
=====================================
lib/x86/adler32_impl.h
=====================================
@@ -60,7 +60,8 @@
* instead of gcc 11. (libdeflate supports direct compilation without a
* configure step, so checking the binutils version is not always an option.)
*/
-#if GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)
+#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
# define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
# define SUFFIX _avx2_vnni
# define ATTRIBUTES _target_attribute("avx2,avxvnni")
@@ -70,7 +71,8 @@
# include "adler32_template.h"
#endif
-#if GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)
+#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
/*
* AVX512VNNI implementation using 256-bit vectors. This is very similar to the
* AVX-VNNI implementation but takes advantage of masking and more registers.
=====================================
lib/x86/crc32_impl.h
=====================================
@@ -84,7 +84,8 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
* gcc 8.1 and 8.2 had a similar bug where they assumed that
* _mm256_clmulepi64_epi128() always needed AVX512. It's fixed in gcc 8.3.
*/
-#if GCC_PREREQ(8, 3) || CLANG_PREREQ(6, 0, 10000000)
+#if (GCC_PREREQ(8, 3) || CLANG_PREREQ(6, 0, 10000000)) && \
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
# define crc32_x86_vpclmulqdq_avx2 crc32_x86_vpclmulqdq_avx2
# define SUFFIX _vpclmulqdq_avx2
# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx2")
@@ -94,7 +95,8 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
# include "crc32_pclmul_template.h"
#endif
-#if GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)
+#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
+ !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
/*
* VPCLMULQDQ/AVX512 implementation using 256-bit vectors. This is very similar
* to the VPCLMULQDQ/AVX2 implementation but takes advantage of the vpternlog
=====================================
libdeflate.h
=====================================
@@ -13,8 +13,8 @@ extern "C" {
#endif
#define LIBDEFLATE_VERSION_MAJOR 1
-#define LIBDEFLATE_VERSION_MINOR 21
-#define LIBDEFLATE_VERSION_STRING "1.21"
+#define LIBDEFLATE_VERSION_MINOR 22
+#define LIBDEFLATE_VERSION_STRING "1.22"
/*
* Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/compare/b370b326f09c0a4d54e3583e8304cf2319b9947a...aadbde3370db9278c86c07d23b54ad768a2b74f2
--
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/compare/b370b326f09c0a4d54e3583e8304cf2319b9947a...aadbde3370db9278c86c07d23b54ad768a2b74f2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241007/987b4d37/attachment-0001.htm>
More information about the debian-med-commit
mailing list