[med-svn] [Git][deflate-team/libdeflate][debian/latest] 6 commits: d/watch: delete watch, rely on upstream metadata instead

Andrea Pappacoda (@tachi) gitlab at salsa.debian.org
Sat May 30 09:23:53 BST 2026



Andrea Pappacoda pushed to branch debian/latest at deflate team / libdeflate


Commits:
67b1a954 by Andrea Pappacoda at 2026-05-30T10:08:37+02:00
d/watch: delete watch, rely on upstream metadata instead

- - - - -
ccd23afd by Andrea Pappacoda at 2026-05-30T10:10:37+02:00
New upstream version 1.25
- - - - -
b5339039 by Andrea Pappacoda at 2026-05-30T10:10:57+02:00
Update upstream source from tag 'upstream/1.25'

Update to upstream version '1.25'
with Debian dir da4dc95e2a360f3d08fbfff217564a5026ae2971
- - - - -
8ca915a7 by Andrea Pappacoda at 2026-05-30T10:22:39+02:00
d: small updates

Gbp-Dch: Ignore

- - - - -
fe9fc441 by Andrea Pappacoda at 2026-05-30T10:22:39+02:00
d/tests: simplify sample-programs test

Gbp-Dch: Ignore

- - - - -
9f86b99f by Andrea Pappacoda at 2026-05-30T10:22:39+02:00
Update changelog for 1.25-1 release

- - - - -


22 changed files:

- .cirrus.yml
- .github/workflows/ci.yml
- CMakeLists.txt
- NEWS.md
- README.md
- common_defs.h
- debian/changelog
- debian/control
- debian/copyright
- debian/tests/sample-programs
- debian/upstream/metadata
- − debian/watch
- lib/arm/crc32_impl.h
- lib/deflate_compress.c
- lib/matchfinder_common.h
- lib/x86/adler32_impl.h
- lib/x86/cpu_features.c
- lib/x86/cpu_features.h
- lib/x86/crc32_impl.h
- lib/x86/crc32_pclmul_template.h
- libdeflate.h
- scripts/deflate_benchmarks.sh


Changes:

=====================================
.cirrus.yml
=====================================
@@ -1,8 +1,7 @@
 task:
   freebsd_instance:
     matrix:
-      - image_family: freebsd-13-3
-      - image_family: freebsd-14-0
+      - image_family: freebsd-14-2
   install_script: pkg install -y cmake
   script:
     - cmake -B build -DLIBDEFLATE_BUILD_TESTS=1


=====================================
.github/workflows/ci.yml
=====================================
@@ -6,7 +6,7 @@ jobs:
     name: Build and test (x86_64, ${{ matrix.os }}, ${{ matrix.compiler }})
     strategy:
       matrix:
-        os: [ubuntu-22.04, ubuntu-20.04]
+        os: [ubuntu-24.04, ubuntu-22.04]
         compiler: [gcc, clang]
     runs-on: ${{ matrix.os }}
     env:
@@ -17,7 +17,6 @@ jobs:
       run: |
         sudo apt-get update
         sudo apt-get install -y clang llvm libz-dev valgrind
-    - run: sudo sysctl kernel.randomize_va_space=0 # https://bugs.launchpad.net/ubuntu/+source/llvm-toolchain-14/+bug/2048768
     - run: scripts/run_tests.sh
     - name: Direct compilation without official build system
       run: $CC -O2 -Wall -Werror lib/*{,/*}.c programs/{gzip,prog_util,tgetopt}.c -o libdeflate-gzip
@@ -27,22 +26,22 @@ jobs:
     strategy:
       matrix:
         include:
-        - { arch: armv6, distro: bullseye, compiler: gcc }
-        - { arch: armv6, distro: bullseye, compiler: clang }
-        - { arch: armv7, distro: bullseye, compiler: gcc }
-        - { arch: armv7, distro: bullseye, compiler: clang }
-        - { arch: aarch64, distro: bullseye, compiler: gcc }
-        - { arch: aarch64, distro: bullseye, compiler: clang }
-        - { arch: s390x, distro: bullseye, compiler: gcc }
-        - { arch: s390x, distro: bullseye, compiler: clang }
-        - { arch: ppc64le, distro: bullseye, compiler: gcc }
-        - { arch: ppc64le, distro: bullseye, compiler: clang }
+        # - { arch: armv6, distro: bookworm, compiler: gcc }
+        # - { arch: armv6, distro: bookworm, compiler: clang }
+        - { arch: armv7, distro: bookworm, compiler: gcc }
+        - { arch: armv7, distro: bookworm, compiler: clang }
+        - { arch: aarch64, distro: bookworm, compiler: gcc }
+        - { arch: aarch64, distro: bookworm, compiler: clang }
+        - { arch: s390x, distro: bookworm, compiler: gcc }
+        - { arch: s390x, distro: bookworm, compiler: clang }
+        - { arch: ppc64le, distro: bookworm, compiler: gcc }
+        - { arch: ppc64le, distro: bookworm, compiler: clang }
         - { arch: riscv64, distro: ubuntu_latest, compiler: gcc }
         - { arch: riscv64, distro: ubuntu_latest, compiler: clang }
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout at v4
-      - uses: uraimo/run-on-arch-action at v2.8.1
+      - uses: uraimo/run-on-arch-action at v3.0.0
         with:
           arch: ${{ matrix.arch }}
           distro: ${{ matrix.distro }}
@@ -109,12 +108,10 @@ jobs:
     strategy:
       matrix:
         include:
-        - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: v143,    vs: x64,   vcpkg: x64-windows}
-        - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: ClangCL, vs: x64,   vcpkg: x64-windows}
-        - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: v143,    vs: Win32, vcpkg: x86-windows}
-        - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: ClangCL, vs: Win32, vcpkg: x86-windows}
-        - {os: windows-2019, gen: "Visual Studio 16 2019", toolset: v142,    vs: x64,   vcpkg: x64-windows}
-        - {os: windows-2019, gen: "Visual Studio 16 2019", toolset: v142,    vs: Win32, vcpkg: x86-windows}
+        - {os: windows-latest, gen: "Visual Studio 17 2022", toolset: v143,    vs: x64,   vcpkg: x64-windows}
+        - {os: windows-latest, gen: "Visual Studio 17 2022", toolset: ClangCL, vs: x64,   vcpkg: x64-windows}
+        - {os: windows-latest, gen: "Visual Studio 17 2022", toolset: v143,    vs: Win32, vcpkg: x86-windows}
+        - {os: windows-latest, gen: "Visual Studio 17 2022", toolset: ClangCL, vs: Win32, vcpkg: x86-windows}
     runs-on: ${{matrix.os}}
     steps:
     - uses: actions/checkout at v4
@@ -139,8 +136,11 @@ jobs:
     name: Build (Windows, Visual Studio ${{matrix.toolset}}, ${{matrix.platform}})
     strategy:
       matrix:
-        platform: [ARM64]
+        platform: [ARM64, ARM64EC]
         toolset: [v143, ClangCL]
+        exclude: # Exclude unsupported combinations
+        - platform: ARM64EC
+          toolset: ClangCL
     runs-on: windows-latest
     steps:
     - uses: actions/checkout at v4
@@ -149,7 +149,7 @@ jobs:
     - run: >
         cmake -B build -G "Visual Studio 17 2022" -T ${{matrix.toolset}}
         -A ${{matrix.platform}} -DCMAKE_C_FLAGS="/W4 /WX"
-        -DCMAKE_INSTALL_PREFIX=build\install
+        -DCMAKE_INSTALL_PREFIX=build\install -DCMAKE_SYSTEM_VERSION=10
     - run: cmake --build build --verbose --config Release
     - run: cmake --install build --verbose --config Release
 
@@ -285,6 +285,5 @@ jobs:
         sudo apt-get install -y clang llvm
     - name: Fuzz
       run: |
-        sudo sysctl kernel.randomize_va_space=0 # https://bugs.launchpad.net/ubuntu/+source/llvm-toolchain-14/+bug/2048768
         scripts/libFuzzer/fuzz.sh --time=120 ${{matrix.sanitizer}} \
             ${{matrix.target}}


=====================================
CMakeLists.txt
=====================================
@@ -44,6 +44,9 @@ option(LIBDEFLATE_BUILD_TESTS "Build the test programs" OFF)
 option(LIBDEFLATE_USE_SHARED_LIB
        "Link the libdeflate-gzip and test programs to the shared library instead
        of the static library" OFF)
+if(APPLE)
+    option(LIBDEFLATE_APPLE_FRAMEWORK "Build as Apple Framework" OFF)
+endif()
 
 if(LIBDEFLATE_BUILD_TESTS)
     enable_testing()
@@ -90,6 +93,31 @@ if(LIBDEFLATE_FREESTANDING)
     add_definitions(-DFREESTANDING)
 endif()
 
+function(configure_framework libtype)
+    if(LIBDEFLATE_APPLE_FRAMEWORK)
+        set_target_properties(${libtype} PROPERTIES
+            FRAMEWORK TRUE
+            FRAMEWORK_VERSION "${VERSION_STRING}"
+            PRODUCT_BUNDLE_IDENTIFIER "github.com/ebiggers/libdeflate/${libtype}"
+            XCODE_ATTRIBUTE_INSTALL_PATH "@rpath"
+            XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ""
+            XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
+            XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO"
+            MACOSX_FRAMEWORK_IDENTIFIER "github.com/ebiggers/libdeflate/${libtype}"
+            MACOSX_FRAMEWORK_BUNDLE_VERSION "${VERSION_STRING}"
+            MACOSX_FRAMEWORK_SHORT_VERSION_STRING "${PROJECT_VERSION}"
+            MACOSX_RPATH TRUE
+        )
+
+        set(RES_DEST_DIR "$<TARGET_FILE_DIR:${libtype}>/Resources")
+        add_custom_command(TARGET ${libtype} POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E make_directory "${RES_DEST_DIR}"
+          COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LIBDEFLATE_RESOURCES} "${RES_DEST_DIR}/"
+          COMMENT "Copying resource files to ${libtype}.framework/Resources"
+        )
+    endif()
+endfunction()
+
 # Check for cases where the compiler supports an instruction set extension but
 # the assembler does not, and in those cases print a warning and add an
 # appropriate -DLIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_* flag.  libdeflate's C
@@ -216,7 +244,18 @@ endif()
 
 set(LIB_INCLUDE_DIRS
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
-    $<INSTALL_INTERFACE:${CMAKE_INSTALL_FULL_INCLUDEDIR}>)
+)
+
+if(LIBDEFLATE_APPLE_FRAMEWORK)
+# Define resource files for Apple Framework
+    set(LIBDEFLATE_RESOURCES
+        "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+        "${CMAKE_CURRENT_SOURCE_DIR}/COPYING"
+        "${CMAKE_CURRENT_BINARY_DIR}/libdeflate-config.cmake"
+        "${CMAKE_CURRENT_BINARY_DIR}/libdeflate-targets.cmake"
+    )
+    message(STATUS "Resource files: ${LIBDEFLATE_RESOURCES}")
+endif()
 
 # Build the static library.
 if(LIBDEFLATE_BUILD_STATIC_LIB)
@@ -234,6 +273,7 @@ if(LIBDEFLATE_BUILD_STATIC_LIB)
     set_target_properties(libdeflate_static PROPERTIES
                           OUTPUT_NAME ${STATIC_LIB_NAME}
                           PUBLIC_HEADER libdeflate.h)
+    configure_framework(libdeflate_static)
     target_include_directories(libdeflate_static PUBLIC ${LIB_INCLUDE_DIRS})
     target_compile_definitions(libdeflate_static PRIVATE ${LIB_COMPILE_DEFINITIONS})
     target_compile_options(libdeflate_static PRIVATE ${LIB_COMPILE_OPTIONS})
@@ -253,6 +293,7 @@ if(LIBDEFLATE_BUILD_SHARED_LIB)
                           PUBLIC_HEADER libdeflate.h
                           C_VISIBILITY_PRESET hidden
                           SOVERSION 0)
+    configure_framework(libdeflate_shared)
     target_include_directories(libdeflate_shared PUBLIC ${LIB_INCLUDE_DIRS})
     target_compile_definitions(libdeflate_shared PUBLIC LIBDEFLATE_DLL)
     target_compile_definitions(libdeflate_shared PRIVATE ${LIB_COMPILE_DEFINITIONS})
@@ -267,8 +308,17 @@ install(TARGETS ${LIB_TARGETS}
         LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
         RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+        FRAMEWORK DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime OPTIONAL
+        INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
         PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 
+# Install resource files to Resources/ directory in the framework
+if(LIBDEFLATE_APPLE_FRAMEWORK)
+    install(FILES ${LIBDEFLATE_RESOURCES}
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/libdeflate_static.framework/Resources"
+    )
+endif()
+
 # Generate and install the pkg-config file.  (Don't confuse this with the CMake
 # package config file, which is CMake-specific.)  Take care to define the
 # include and lib directories in terms of the ${prefix} and ${exec_prefix}
@@ -316,4 +366,4 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libdeflate-config.cmake
 # Build the programs subdirectory if needed.
 if(LIBDEFLATE_BUILD_GZIP OR LIBDEFLATE_BUILD_TESTS)
     add_subdirectory(programs)
-endif()
+endif()
\ No newline at end of file


=====================================
NEWS.md
=====================================
@@ -1,5 +1,21 @@
 # libdeflate release notes
 
+## Version 1.25
+
+* Fixed a build error with gcc 16.
+
+* The libdeflate CMake package is now relocatable.
+
+## Version 1.24
+
+* The CMake-based build system now supports Apple Framework builds.
+
+* libdeflate now builds for Windows ARM64EC.
+
+* Made another small optimization to the x86 and ARM CRC32 code.
+
+* Fixed a compiler warning on certain platforms (issue #416).
+
 ## Version 1.23
 
 * Fixed bug introduced in 1.20 where incorrect checksums could be calculated if


=====================================
README.md
=====================================
@@ -154,6 +154,7 @@ following bindings:
 * Python: [deflate](https://github.com/dcwatson/deflate)
 * Ruby: [libdeflate-ruby](https://github.com/kaorimatz/libdeflate-ruby)
 * Rust: [libdeflater](https://github.com/adamkewley/libdeflater)
+* Swift: [SwiftDeflate](https://github.com/dcwatson/SwiftDeflate)
 
 Note: these are third-party projects which haven't necessarily been vetted by
 the authors of libdeflate.  Please direct all questions, bugs, and improvements


=====================================
common_defs.h
=====================================
@@ -67,7 +67,8 @@
 #undef ARCH_ARM32
 #undef ARCH_RISCV
 #ifdef _MSC_VER
-#  if defined(_M_X64)
+   /* Way too many things are broken in ARM64EC to pretend that it is x86_64. */
+#  if defined(_M_X64) && !defined(_M_ARM64EC)
 #    define ARCH_X86_64
 #  elif defined(_M_IX86)
 #    define ARCH_X86_32


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+libdeflate (1.25-1) unstable; urgency=low
+
+  * New upstream version 1.25
+  * d/watch: delete watch, rely on upstream metadata instead
+
+ -- Andrea Pappacoda <tachi at debian.org>  Sat, 30 May 2026 10:17:44 +0200
+
 libdeflate (1.23-2) unstable; urgency=low
 
   * d/control: use my @debian.org email address


=====================================
debian/control
=====================================
@@ -3,7 +3,6 @@ Maintainer: Deflate Team <libdeflate at packages.debian.org>
 Uploaders: Nick Black <dankamongmen at gmail.com>,
            Andrea Pappacoda <tachi at debian.org>
 Section: libs
-Priority: optional
 Build-Depends: debhelper-compat (= 13),
                dpkg-build-api (= 1),
                cmake,
@@ -11,7 +10,7 @@ Build-Depends: debhelper-compat (= 13),
                python3 <!nocheck>,
                xxd <!nocheck>,
                help2man <!nodoc> <!cross>
-Standards-Version: 4.7.2
+Standards-Version: 4.7.4
 Vcs-Browser: https://salsa.debian.org/med-team/libdeflate
 Vcs-Git: https://salsa.debian.org/med-team/libdeflate.git
 Homepage: https://github.com/ebiggers/libdeflate


=====================================
debian/copyright
=====================================
@@ -15,7 +15,7 @@ Files: debian/*
 Copyright: 2018-2021, 2023-2024 Michael R. Crusoe <crusoe at debian.org>
            2023 Lance Lin <lq27267 at gmail.com>
            2024-2025 Nick Black <dankamongmen at gmail.com>
-           2024-2025 Andrea Pappacoda <tachi at debian.org>
+           2024-2026 Andrea Pappacoda <tachi at debian.org>
 License: Expat
 
 License: Expat


=====================================
debian/tests/sample-programs
=====================================
@@ -1,10 +1,9 @@
-#!/bin/bash -e
-pkg=libdeflate
-if [ "$AUTOPKGTEST_TMP" = "" ] ; then
-  AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
-fi
+#!/bin/sh
+
+set -euo pipefail
+
 cp README.md "${AUTOPKGTEST_TMP}/"
-cd "${AUTOPKGTEST_TMP}" || exit 1
+cd "${AUTOPKGTEST_TMP}"
 cp -a README.md README.md.save
 libdeflate-gzip README.md
 libdeflate-gunzip README.md.gz


=====================================
debian/upstream/metadata
=====================================
@@ -1,4 +1,5 @@
 ---
+Archive: GitHub
 Bug-Database: https://github.com/ebiggers/libdeflate/issues
 Bug-Submit: https://github.com/ebiggers/libdeflate/issues/new
 Repository: https://github.com/ebiggers/libdeflate.git


=====================================
debian/watch deleted
=====================================
@@ -1,4 +0,0 @@
-version=4
-
-opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%@PACKAGE at -$1.tar.gz%" \
-  https://github.com/ebiggers/libdeflate/tags .*/v?@ANY_VERSION@@ARCHIVE_EXT@


=====================================
lib/arm/crc32_impl.h
=====================================
@@ -434,12 +434,10 @@ crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
 		{ CRC32_X543_MODG, CRC32_X479_MODG }, /* 4 vecs */
 		{ CRC32_X287_MODG, CRC32_X223_MODG }, /* 2 vecs */
 	};
-	static const u64 _aligned_attribute(16) barrett_consts[2][2] = {
-		{ CRC32_BARRETT_CONSTANT_1, CRC32_BARRETT_CONSTANT_1 },
-		{ CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_2 },
-	};
-	static const u32 _aligned_attribute(16) mask32[4] = {
-		0, 0, 0xffffffff, 0
+	static const u64 _aligned_attribute(16) barrett_consts[3][2] = {
+		{ CRC32_X95_MODG, },
+		{ CRC32_BARRETT_CONSTANT_1, },
+		{ CRC32_BARRETT_CONSTANT_2, },
 	};
 	const poly64x2_t multipliers_1 = load_multipliers(mults[0]);
 	uint8x16_t v0, v1, v2, v3;
@@ -498,11 +496,11 @@ crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
 		v0 = fold_partial_vec(v0, p, len, multipliers_1);
 
 	/* Reduce to 32 bits, following lib/x86/crc32_pclmul_template.h */
-	v1 = clmul_low(v0, load_multipliers(barrett_consts[0]));
-	v1 = clmul_low(v1, load_multipliers(barrett_consts[1]));
-	v0 = veorq_u8(v0, vandq_u8(v1, vreinterpretq_u8_u32(vld1q_u32(mask32))));
-	v0 = clmul_high(v0, load_multipliers(barrett_consts[0]));
-	v0 = clmul_low(v0, load_multipliers(barrett_consts[1]));
+	v0 = veorq_u8(clmul_low(v0, load_multipliers(barrett_consts[0])),
+		      vextq_u8(v0, vdupq_n_u8(0), 8));
+	v1 = clmul_low(v0, load_multipliers(barrett_consts[1]));
+	v1 = clmul_low(v1, load_multipliers(barrett_consts[2]));
+	v0 = veorq_u8(v0, v1);
 	return vgetq_lane_u32(vreinterpretq_u32_u8(v0), 2);
 }
 #undef SUFFIX


=====================================
lib/deflate_compress.c
=====================================
@@ -234,7 +234,7 @@ check_buildtime_parameters(void)
 /******************************************************************************/
 
 /* Table: length slot => length slot base value */
-static const unsigned deflate_length_slot_base[] = {
+static const u32 deflate_length_slot_base[] = {
 	3,    4,    5,    6,    7,    8,    9,    10,
 	11,   13,   15,   17,   19,   23,   27,   31,
 	35,   43,   51,   59,   67,   83,   99,   115,
@@ -250,7 +250,7 @@ static const u8 deflate_extra_length_bits[] = {
 };
 
 /* Table: offset slot => offset slot base value */
-static const unsigned deflate_offset_slot_base[] = {
+static const u32 deflate_offset_slot_base[] = {
 	1,     2,     3,     4,     5,     7,     9,     13,
 	17,    25,    33,    49,    65,    97,    129,   193,
 	257,   385,   513,   769,   1025,  1537,  2049,  3073,
@@ -470,13 +470,13 @@ struct libdeflate_compressor {
 	 * The maximum search depth: consider at most this many potential
 	 * matches at each position
 	 */
-	unsigned max_search_depth;
+	u32 max_search_depth;
 
 	/*
 	 * The "nice" match length: if a match of this length is found, choose
 	 * it immediately without further consideration
 	 */
-	unsigned nice_match_length;
+	u32 nice_match_length;
 
 	/* Frequency counters for the current block */
 	struct deflate_freqs freqs;
@@ -626,7 +626,7 @@ struct libdeflate_compressor {
 			 * early, before max_optim_passes has been reached.
 			 * Smaller values = more compression.
 			 */
-			unsigned min_improvement_to_continue;
+			u32 min_improvement_to_continue;
 
 			/*
 			 * The minimum number of bits that would need to be
@@ -636,7 +636,7 @@ struct libdeflate_compressor {
 			 * optimization pass actually increased the cost.
 			 * Smaller values = more compression.
 			 */
-			unsigned min_bits_to_use_nonfinal_path;
+			u32 min_bits_to_use_nonfinal_path;
 
 			/*
 			 * The maximum block length, in uncompressed bytes, at
@@ -653,7 +653,7 @@ struct libdeflate_compressor {
 			 * match/literal list as the optimized dynamic block
 			 * happens to be cheaper than the dynamic block itself.
 			 */
-			unsigned max_len_to_optimize_static_block;
+			u32 max_len_to_optimize_static_block;
 
 		} n; /* (n)ear-optimal */
 	#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
@@ -1639,7 +1639,7 @@ static void
 deflate_compute_full_len_codewords(struct libdeflate_compressor *c,
 				   const struct deflate_codes *codes)
 {
-	unsigned len;
+	u32 len;
 
 	STATIC_ASSERT(MAX_LITLEN_CODEWORD_LEN +
 		      DEFLATE_MAX_EXTRA_LENGTH_BITS <= 32);
@@ -1662,8 +1662,8 @@ deflate_compute_full_len_codewords(struct libdeflate_compressor *c,
 do {									\
 	const struct libdeflate_compressor *c__ = (c_);			\
 	const struct deflate_codes *codes__ = (codes_);			\
-	unsigned length__ = (length_);					\
-	unsigned offset__ = (offset_);					\
+	u32 length__ = (length_);					\
+	u32 offset__ = (offset_);					\
 	unsigned offset_slot__ = (offset_slot_);			\
 									\
 	/* Litlen symbol and extra length bits */			\
@@ -1936,9 +1936,9 @@ deflate_flush_block(struct libdeflate_compressor *c,
 		struct deflate_optimum_node * const end_node =
 			&c->p.n.optimum_nodes[block_length];
 		do {
-			unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
-			unsigned offset = cur_node->item >>
-					  OPTIMUM_OFFSET_SHIFT;
+			u32 length = cur_node->item & OPTIMUM_LEN_MASK;
+			u32 offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
+
 			if (length == 1) {
 				/* Literal */
 				ADD_BITS(codes->codewords.litlen[offset],
@@ -1960,8 +1960,8 @@ deflate_flush_block(struct libdeflate_compressor *c,
 		for (seq = sequences; ; seq++) {
 			u32 litrunlen = seq->litrunlen_and_length &
 					SEQ_LITRUNLEN_MASK;
-			unsigned length = seq->litrunlen_and_length >>
-					  SEQ_LENGTH_SHIFT;
+			u32 length = seq->litrunlen_and_length >>
+				     SEQ_LENGTH_SHIFT;
 			unsigned lit;
 
 			/* Output a run of literals. */
@@ -2118,7 +2118,7 @@ observe_literal(struct block_split_stats *stats, u8 lit)
  * one observation type for "long match".
  */
 static forceinline void
-observe_match(struct block_split_stats *stats, unsigned length)
+observe_match(struct block_split_stats *stats, u32 length)
 {
 	stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES +
 				(length >= 9)]++;
@@ -2242,7 +2242,7 @@ deflate_choose_literal(struct libdeflate_compressor *c, unsigned literal,
 
 static forceinline void
 deflate_choose_match(struct libdeflate_compressor *c,
-		     unsigned length, unsigned offset, bool gather_split_stats,
+		     u32 length, u32 offset, bool gather_split_stats,
 		     struct deflate_sequence **seq_p)
 {
 	struct deflate_sequence *seq = *seq_p;
@@ -2254,7 +2254,7 @@ deflate_choose_match(struct libdeflate_compressor *c,
 	if (gather_split_stats)
 		observe_match(&c->split_stats, length);
 
-	seq->litrunlen_and_length |= (u32)length << SEQ_LENGTH_SHIFT;
+	seq->litrunlen_and_length |= length << SEQ_LENGTH_SHIFT;
 	seq->offset = offset;
 	seq->offset_slot = offset_slot;
 
@@ -2268,7 +2268,7 @@ deflate_choose_match(struct libdeflate_compressor *c,
  * the input buffer.
  */
 static forceinline void
-adjust_max_and_nice_len(unsigned *max_len, unsigned *nice_len, size_t remaining)
+adjust_max_and_nice_len(u32 *max_len, u32 *nice_len, size_t remaining)
 {
 	if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) {
 		*max_len = remaining;
@@ -2292,8 +2292,8 @@ adjust_max_and_nice_len(unsigned *max_len, unsigned *nice_len, size_t remaining)
  * probably be worthwhile.  Conversely, if not many literals are used, then
  * probably literals will be cheap and short matches won't be worthwhile.
  */
-static unsigned
-choose_min_match_len(unsigned num_used_literals, unsigned max_search_depth)
+static u32
+choose_min_match_len(u32 num_used_literals, u32 max_search_depth)
 {
 	/* map from num_used_literals to min_len */
 	static const u8 min_lens[] = {
@@ -2304,7 +2304,7 @@ choose_min_match_len(unsigned num_used_literals, unsigned max_search_depth)
 		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 		/* The rest is implicitly 3. */
 	};
-	unsigned min_len;
+	u32 min_len;
 
 	STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN <= 3);
 	STATIC_ASSERT(ARRAY_LEN(min_lens) <= DEFLATE_NUM_LITERALS + 1);
@@ -2326,12 +2326,11 @@ choose_min_match_len(unsigned num_used_literals, unsigned max_search_depth)
 	return min_len;
 }
 
-static unsigned
-calculate_min_match_len(const u8 *data, size_t data_len,
-			unsigned max_search_depth)
+static u32
+calculate_min_match_len(const u8 *data, size_t data_len, u32 max_search_depth)
 {
 	u8 used[256] = { 0 };
-	unsigned num_used_literals = 0;
+	u32 num_used_literals = 0;
 	size_t i;
 
 	/*
@@ -2357,13 +2356,13 @@ calculate_min_match_len(const u8 *data, size_t data_len,
  * Recalculate the minimum match length for a block, now that we know the
  * distribution of literals that are actually being used (freqs->litlen).
  */
-static unsigned
+static u32
 recalculate_min_match_len(const struct deflate_freqs *freqs,
-			  unsigned max_search_depth)
+			  u32 max_search_depth)
 {
 	u32 literal_freq = 0;
 	u32 cutoff;
-	unsigned num_used_literals = 0;
+	u32 num_used_literals = 0;
 	int i;
 
 	for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
@@ -2457,8 +2456,8 @@ deflate_compress_fastest(struct libdeflate_compressor * restrict c,
 	const u8 *in_next = in;
 	const u8 *in_end = in_next + in_nbytes;
 	const u8 *in_cur_base = in_next;
-	unsigned max_len = DEFLATE_MAX_MATCH_LEN;
-	unsigned nice_len = MIN(c->nice_match_length, max_len);
+	u32 max_len = DEFLATE_MAX_MATCH_LEN;
+	u32 nice_len = MIN(c->nice_match_length, max_len);
 	u32 next_hash = 0;
 
 	ht_matchfinder_init(&c->p.f.ht_mf);
@@ -2534,8 +2533,8 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
 	const u8 *in_next = in;
 	const u8 *in_end = in_next + in_nbytes;
 	const u8 *in_cur_base = in_next;
-	unsigned max_len = DEFLATE_MAX_MATCH_LEN;
-	unsigned nice_len = MIN(c->nice_match_length, max_len);
+	u32 max_len = DEFLATE_MAX_MATCH_LEN;
+	u32 nice_len = MIN(c->nice_match_length, max_len);
 	u32 next_hashes[2] = {0, 0};
 
 	hc_matchfinder_init(&c->p.g.hc_mf);
@@ -2547,7 +2546,7 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
 		const u8 * const in_max_block_end = choose_max_block_end(
 				in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
 		struct deflate_sequence *seq = c->p.g.sequences;
-		unsigned min_len;
+		u32 min_len;
 
 		init_block_split_stats(&c->split_stats);
 		deflate_begin_sequences(c, seq);
@@ -2610,8 +2609,8 @@ deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
 	const u8 *in_next = in;
 	const u8 *in_end = in_next + in_nbytes;
 	const u8 *in_cur_base = in_next;
-	unsigned max_len = DEFLATE_MAX_MATCH_LEN;
-	unsigned nice_len = MIN(c->nice_match_length, max_len);
+	u32 max_len = DEFLATE_MAX_MATCH_LEN;
+	u32 nice_len = MIN(c->nice_match_length, max_len);
 	u32 next_hashes[2] = {0, 0};
 
 	hc_matchfinder_init(&c->p.g.hc_mf);
@@ -2625,7 +2624,7 @@ deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
 		const u8 *next_recalc_min_len =
 			in_next + MIN(in_end - in_next, 10000);
 		struct deflate_sequence *seq = c->p.g.sequences;
-		unsigned min_len;
+		u32 min_len;
 
 		init_block_split_stats(&c->split_stats);
 		deflate_begin_sequences(c, seq);
@@ -2633,10 +2632,10 @@ deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
 						  in_max_block_end - in_next,
 						  c->max_search_depth);
 		do {
-			unsigned cur_len;
-			unsigned cur_offset;
-			unsigned next_len;
-			unsigned next_offset;
+			u32 cur_len;
+			u32 cur_offset;
+			u32 next_len;
+			u32 next_offset;
 
 			/*
 			 * Recalculate the minimum match length if it hasn't
@@ -2849,8 +2848,8 @@ deflate_tally_item_list(struct libdeflate_compressor *c, u32 block_length)
 		&c->p.n.optimum_nodes[block_length];
 
 	do {
-		unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
-		unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
+		u32 length = cur_node->item & OPTIMUM_LEN_MASK;
+		u32 offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
 
 		if (length == 1) {
 			/* Literal */
@@ -3111,7 +3110,7 @@ deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
 				    const u8 *block_begin, u32 block_length,
 				    u32 *lit_cost, u32 *len_sym_cost)
 {
-	unsigned num_used_literals = 0;
+	u32 num_used_literals = 0;
 	u32 literal_freq = block_length;
 	u32 match_freq = 0;
 	u32 cutoff;
@@ -3161,7 +3160,7 @@ deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
 }
 
 static forceinline u32
-deflate_default_length_cost(unsigned len, u32 len_sym_cost)
+deflate_default_length_cost(u32 len, u32 len_sym_cost)
 {
 	unsigned slot = deflate_length_slot[len];
 	u32 num_extra_bits = deflate_extra_length_bits[slot];
@@ -3188,7 +3187,7 @@ static void
 deflate_set_default_costs(struct libdeflate_compressor *c,
 			  u32 lit_cost, u32 len_sym_cost)
 {
-	unsigned i;
+	u32 i;
 
 	/* Literals */
 	for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
@@ -3224,7 +3223,7 @@ static forceinline void
 deflate_adjust_costs_impl(struct libdeflate_compressor *c,
 			  u32 lit_cost, u32 len_sym_cost, int change_amount)
 {
-	unsigned i;
+	u32 i;
 
 	/* Literals */
 	for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
@@ -3337,7 +3336,7 @@ deflate_find_min_cost_path(struct libdeflate_compressor *c,
 	cur_node->cost_to_end = 0;
 	do {
 		unsigned num_matches;
-		unsigned literal;
+		u32 literal;
 		u32 best_cost_to_end;
 
 		cur_node--;
@@ -3349,14 +3348,14 @@ deflate_find_min_cost_path(struct libdeflate_compressor *c,
 		/* It's always possible to choose a literal. */
 		best_cost_to_end = c->p.n.costs.literal[literal] +
 				   (cur_node + 1)->cost_to_end;
-		cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1;
+		cur_node->item = (literal << OPTIMUM_OFFSET_SHIFT) | 1;
 
 		/* Also consider matches if there are any. */
 		if (num_matches) {
 			const struct lz_match *match;
-			unsigned len;
-			unsigned offset;
-			unsigned offset_slot;
+			u32 len;
+			u32 offset;
+			u32 offset_slot;
 			u32 offset_cost;
 			u32 cost_to_end;
 
@@ -3384,7 +3383,7 @@ deflate_find_min_cost_path(struct libdeflate_compressor *c,
 					if (cost_to_end < best_cost_to_end) {
 						best_cost_to_end = cost_to_end;
 						cur_node->item = len |
-							((u32)offset <<
+							(offset <<
 							 OPTIMUM_OFFSET_SHIFT);
 					}
 				} while (++len <= match->length);
@@ -3601,8 +3600,8 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
 	const u8 *in_cur_base = in_next;
 	const u8 *in_next_slide =
 		in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE);
-	unsigned max_len = DEFLATE_MAX_MATCH_LEN;
-	unsigned nice_len = MIN(c->nice_match_length, max_len);
+	u32 max_len = DEFLATE_MAX_MATCH_LEN;
+	u32 nice_len = MIN(c->nice_match_length, max_len);
 	struct lz_match *cache_ptr = c->p.n.match_cache;
 	u32 next_hashes[2] = {0, 0};
 	bool prev_block_used_only_literals = false;
@@ -3617,7 +3616,7 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
 		const u8 *prev_end_block_check = NULL;
 		bool change_detected = false;
 		const u8 *next_observation = in_next;
-		unsigned min_len;
+		u32 min_len;
 
 		/*
 		 * Use the minimum match length heuristic to improve the
@@ -3648,7 +3647,7 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
 		 */
 		for (;;) {
 			struct lz_match *matches;
-			unsigned best_len;
+			u32 best_len;
 			size_t remaining = in_end - in_next;
 
 			/* Slide the window forward if needed. */
@@ -3853,9 +3852,9 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
 static void
 deflate_init_offset_slot_full(struct libdeflate_compressor *c)
 {
-	unsigned offset_slot;
-	unsigned offset;
-	unsigned offset_end;
+	u32 offset_slot;
+	u32 offset;
+	u32 offset_end;
 
 	for (offset_slot = 0; offset_slot < ARRAY_LEN(deflate_offset_slot_base);
 	     offset_slot++) {


=====================================
lib/matchfinder_common.h
=====================================
@@ -175,11 +175,11 @@ lz_hash(u32 seq, unsigned num_bits)
  * Return the number of bytes at @matchptr that match the bytes at @strptr, up
  * to a maximum of @max_len.  Initially, @start_len bytes are matched.
  */
-static forceinline unsigned
+static forceinline u32
 lz_extend(const u8 * const strptr, const u8 * const matchptr,
-	  const unsigned start_len, const unsigned max_len)
+	  const u32 start_len, const u32 max_len)
 {
-	unsigned len = start_len;
+	u32 len = start_len;
 	machine_word_t v_word;
 
 	if (UNALIGNED_ACCESS_IS_FAST) {


=====================================
lib/x86/adler32_impl.h
=====================================
@@ -76,13 +76,13 @@
 /*
  * AVX512VNNI implementation using 256-bit vectors.  This is very similar to the
  * AVX-VNNI implementation but takes advantage of masking and more registers.
- * This is used on CPUs that support AVX-512 but where using 512-bit vectors
- * causes downclocking.  This should also be the optimal implementation on CPUs
- * that support AVX10/256 but not AVX10/512.
+ * This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
+ * Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
+ * registers are used.
  */
 #  define adler32_x86_avx512_vl256_vnni	adler32_x86_avx512_vl256_vnni
 #  define SUFFIX				   _avx512_vl256_vnni
-#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vl,avx512vnni" NO_EVEX512)
+#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vl,avx512vnni")
 #  define VL			32
 #  define USE_VNNI		1
 #  define USE_AVX512		1
@@ -90,12 +90,11 @@
 
 /*
  * AVX512VNNI implementation using 512-bit vectors.  This is used on CPUs that
- * have a good AVX-512 implementation including AVX512VNNI.  This should also be
- * the optimal implementation on CPUs that support AVX10/512.
+ * have a good AVX-512 implementation including AVX512VNNI.
  */
 #  define adler32_x86_avx512_vl512_vnni	adler32_x86_avx512_vl512_vnni
 #  define SUFFIX				   _avx512_vl512_vnni
-#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vnni" EVEX512)
+#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vnni")
 #  define VL			64
 #  define USE_VNNI		1
 #  define USE_AVX512		1


=====================================
lib/x86/cpu_features.c
=====================================
@@ -110,8 +110,9 @@ os_supports_avx512(u64 xcr0)
 }
 
 /*
- * Don't use 512-bit vectors on Intel CPUs before Rocket Lake and Sapphire
- * Rapids, due to the downclocking penalty.
+ * Don't use 512-bit vectors (ZMM registers) on Intel CPUs before Rocket Lake
+ * and Sapphire Rapids, due to the overly-eager downclocking which can reduce
+ * the performance of workloads that use ZMM registers only occasionally.
  */
 static inline bool
 allow_512bit_vectors(const u32 manufacturer[3], u32 family, u32 model)


=====================================
lib/x86/cpu_features.h
=====================================
@@ -165,15 +165,6 @@ static inline u32 get_x86_cpu_features(void) { return 0; }
 #  define HAVE_AVXVNNI(features)	((features) & X86_CPU_FEATURE_AVXVNNI)
 #endif
 
-#if (GCC_PREREQ(14, 0) || CLANG_PREREQ(18, 0, 18000000)) \
-	&& !defined(__EVEX512__) /* avoid subtracting the evex512 feature */
-#  define EVEX512	",evex512"	/* needed to override potential -mno-evex512 */
-#  define NO_EVEX512	",no-evex512"	/* needed for AVX10/256 compatibility */
-#else
-#  define EVEX512	""
-#  define NO_EVEX512	""
-#endif
-
 #endif /* ARCH_X86_32 || ARCH_X86_64 */
 
 #endif /* LIB_X86_CPU_FEATURES_H */


=====================================
lib/x86/crc32_impl.h
=====================================
@@ -96,29 +96,28 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
 /*
  * VPCLMULQDQ/AVX512 implementation using 256-bit vectors.  This is very similar
  * to the VPCLMULQDQ/AVX2 implementation but takes advantage of the vpternlog
- * instruction and more registers.  This is used on CPUs that support AVX-512
- * but where using 512-bit vectors causes downclocking.  This should also be the
- * optimal implementation on CPUs that support AVX10/256 but not AVX10/512.
+ * instruction and more registers.  This is used on certain older Intel CPUs,
+ * specifically Ice Lake and Tiger Lake, which support VPCLMULQDQ and AVX512 but
+ * downclock a bit too eagerly when ZMM registers are used.
  *
  * _mm256_zextsi128_si256() requires gcc 10.
  */
 #  define crc32_x86_vpclmulqdq_avx512_vl256  crc32_x86_vpclmulqdq_avx512_vl256
 #  define SUFFIX				      _vpclmulqdq_avx512_vl256
-#  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl" NO_EVEX512)
+#  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl")
 #  define VL			32
 #  define USE_AVX512		1
 #  include "crc32_pclmul_template.h"
 
 /*
  * VPCLMULQDQ/AVX512 implementation using 512-bit vectors.  This is used on CPUs
- * that have a good AVX-512 implementation including VPCLMULQDQ.  This should
- * also be the optimal implementation on CPUs that support AVX10/512.
+ * that have a good AVX-512 implementation including VPCLMULQDQ.
  *
  * _mm512_zextsi128_si512() requires gcc 10.
  */
 #  define crc32_x86_vpclmulqdq_avx512_vl512  crc32_x86_vpclmulqdq_avx512_vl512
 #  define SUFFIX				      _vpclmulqdq_avx512_vl512
-#  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl" EVEX512)
+#  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl")
 #  define VL			64
 #  define USE_AVX512		1
 #  include "crc32_pclmul_template.h"


=====================================
lib/x86/crc32_pclmul_template.h
=====================================
@@ -198,7 +198,6 @@ ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
 	const __m128i mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
 	const __m128i barrett_reduction_constants =
 		_mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_1);
-	const __m128i mask32 = _mm_set_epi32(0, 0xFFFFFFFF, 0, 0);
 	vec_t v0, v1, v2, v3, v4, v5, v6, v7;
 	__m128i x0 = _mm_cvtsi32_si128(crc);
 	__m128i x1;
@@ -392,61 +391,19 @@ less_than_16_remaining:
 reduce_x0:
 #endif
 	/*
-	 * Generate the final n-bit CRC from the 128-bit x0 = A as follows:
+	 * Multiply the remaining 128-bit message polynomial 'x0' by x^32, then
+	 * reduce it modulo the generator polynomial G.  This gives the CRC.
 	 *
-	 *	crc = x^n * A mod G
-	 *	    = x^n * (x^64*A_H + A_L) mod G
-	 *	    = x^n * (x^(64-n)*(x^n*A_H mod G) + A_L) mod G
-	 *
-	 * I.e.:
-	 *	crc := 0
-	 *	crc := x^n * (x^(64-n)*crc + A_H) mod G
-	 *	crc := x^n * (x^(64-n)*crc + A_L) mod G
-	 *
-	 * A_H and A_L denote the high and low 64 polynomial coefficients in A.
-	 *
-	 * Using Barrett reduction to do the 'mod G', this becomes:
-	 *
-	 *	crc := floor((A_H * floor(x^(m+n) / G)) / x^m) * G mod x^n
-	 *	A_L := x^(64-n)*crc + A_L
-	 *	crc := floor((A_L * floor(x^(m+n) / G)) / x^m) * G mod x^n
-	 *
-	 * For the gzip crc, n = 32 and the bit order is LSB (least significant
-	 * bit) first.  'm' must be an integer >= 63 (the max degree of A_L and
-	 * A_H) for sufficient precision to be carried through the calculation.
-	 * As the gzip crc is LSB-first we use m == 63, which results in
-	 * floor(x^(m+n) / G) being 64-bit which is the most pclmulqdq can
-	 * accept.  The multiplication with floor(x^(63+n) / G) then produces a
-	 * 127-bit product, and the floored division by x^63 just takes the
-	 * first qword.
+	 * This implementation matches that used in crc-pclmul-template.S from
+	 * https://lore.kernel.org/r/20250210174540.161705-4-ebiggers@kernel.org/
+	 * with the parameters n=32 and LSB_CRC=1 (what the gzip CRC uses).  See
+	 * there for a detailed explanation of the math used here.
 	 */
-
-	/* tmp := floor((A_H * floor(x^(63+n) / G)) / x^63) */
+	x0 = _mm_xor_si128(_mm_clmulepi64_si128(x0, mults_128b, 0x10),
+			   _mm_bsrli_si128(x0, 8));
 	x1 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x00);
-	/* tmp is in bits [0:64) of x1. */
-
-	/* crc := tmp * G mod x^n */
 	x1 = _mm_clmulepi64_si128(x1, barrett_reduction_constants, 0x10);
-	/* crc is in bits [64:64+n) of x1. */
-
-	/*
-	 * A_L := x^(64-n)*crc + A_L
-	 * crc is already aligned to add (XOR) it directly to A_L, after
-	 * selecting it using a mask.
-	 */
-#if USE_AVX512
-	x0 = _mm_ternarylogic_epi32(x0, x1, mask32, 0x78);
-#else
-	x0 = _mm_xor_si128(x0, _mm_and_si128(x1, mask32));
-#endif
-	/*
-	 * crc := floor((A_L * floor(x^(m+n) / G)) / x^m) * G mod x^n
-	 * Same as previous but uses the low-order 64 coefficients of A.
-	 */
-	x0 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x01);
-	x0 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x10);
-
-	/* Extract the CRC from bits [64:64+n) of x0. */
+	x0 = _mm_xor_si128(x0, x1);
 	return _mm_extract_epi32(x0, 2);
 }
 


=====================================
libdeflate.h
=====================================
@@ -13,8 +13,8 @@ extern "C" {
 #endif
 
 #define LIBDEFLATE_VERSION_MAJOR	1
-#define LIBDEFLATE_VERSION_MINOR	23
-#define LIBDEFLATE_VERSION_STRING	"1.23"
+#define LIBDEFLATE_VERSION_MINOR	25
+#define LIBDEFLATE_VERSION_STRING	"1.25"
 
 /*
  * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause


=====================================
scripts/deflate_benchmarks.sh
=====================================
@@ -72,7 +72,7 @@ single_file()
 	local include_old=false
 
 	usize=$(stat -c "%s" "$file")
-	: ${NUM_ITERATIONS:=3}
+	: "${NUM_ITERATIONS:=3}"
 
 	if [ -e "$topdir/benchmark-old" ]; then
 		include_old=true



View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/compare/6557216e9749671349e5ca38e6c4e8f15c63c433...9f86b99fdfd1146be4ec71b80697c08a99f24b6c

-- 
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/compare/6557216e9749671349e5ca38e6c4e8f15c63c433...9f86b99fdfd1146be4ec71b80697c08a99f24b6c
You're receiving this email because of your account on salsa.debian.org. Manage all notifications: https://salsa.debian.org/-/profile/notifications | Help: https://salsa.debian.org/help


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20260530/683af2e3/attachment-0001.htm>


More information about the debian-med-commit mailing list