[med-svn] [Git][deflate-team/libdeflate][debian/latest] update with 1.21 contents
nick black (@nickblack)
gitlab at salsa.debian.org
Tue Aug 13 15:53:20 BST 2024
nick black pushed to branch debian/latest at deflate team / libdeflate
Commits:
b370b326 by nick black at 2024-08-13T10:52:46-04:00
update with 1.21 contents
- - - - -
13 changed files:
- COPYING
- NEWS.md
- README.md
- lib/arm/adler32_impl.h
- lib/arm/cpu_features.c
- lib/arm/cpu_features.h
- lib/arm/crc32_impl.h
- lib/x86/adler32_impl.h
- lib/x86/crc32_impl.h
- libdeflate.h
- scripts/libFuzzer/deflate_compress/fuzz.c
- scripts/libFuzzer/deflate_decompress/fuzz.c
- scripts/libFuzzer/fuzz.sh
Changes:
=====================================
COPYING
=====================================
@@ -1,4 +1,5 @@
Copyright 2016 Eric Biggers
+Copyright 2024 Google LLC
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
=====================================
NEWS.md
=====================================
@@ -1,5 +1,18 @@
# libdeflate release notes
+## Version 1.21
+
+* Fixed build error on x86 with gcc 8.1 and gcc 8.2.
+
+* Fixed build error on x86 when gcc 11 is paired with a binutils version that
+ doesn't support AVX-VNNI, e.g. as it is on RHEL 9.
+
+* Fixed build error on arm64 with gcc 6.
+
+* Fixed build error on arm64 with gcc 13.1 and later with some -mcpu options.
+
+* Enabled detection of dotprod support in Windows ARM64 builds.
+
## Version 1.20
* Improved CRC-32 performance on recent x86 CPUs by adding
=====================================
README.md
=====================================
@@ -10,7 +10,7 @@ The supported formats are:
- gzip (a.k.a. DEFLATE with a gzip wrapper)
libdeflate is heavily optimized. It is significantly faster than the zlib
-library, both for compression and decompression, and especially on x86
+library, both for compression and decompression, and especially on x86 and ARM
processors. In addition, libdeflate provides optional high compression modes
that provide a better compression ratio than the zlib's "level 9".
=====================================
lib/arm/adler32_impl.h
=====================================
@@ -214,11 +214,13 @@ adler32_arm_neon(u32 adler, const u8 *p, size_t len)
# ifdef __clang__
# define ATTRIBUTES _target_attribute("dotprod")
/*
- * With gcc, arch=armv8.2-a is needed for dotprod intrinsics, unless the
- * default target is armv8.3-a or later in which case it must be omitted.
- * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT.
+ * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
+ * "aarch64: Remove architecture dependencies from intrinsics"),
+ * arch=armv8.2-a is needed for the dotprod intrinsics, unless the default
+ * target is armv8.3-a or later in which case it must be omitted. armv8.3-a
+ * or later can be detected by checking for __ARM_FEATURE_JCVT.
*/
-# elif defined(__ARM_FEATURE_JCVT)
+# elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
# define ATTRIBUTES _target_attribute("+dotprod")
# else
# define ATTRIBUTES _target_attribute("arch=armv8.2-a+dotprod")
=====================================
lib/arm/cpu_features.c
=====================================
@@ -170,6 +170,10 @@ static u32 query_arm_cpu_features(void)
#include <windows.h>
+#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE /* added in Windows SDK 20348 */
+# define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
+#endif
+
static u32 query_arm_cpu_features(void)
{
u32 features = ARM_CPU_FEATURE_NEON;
@@ -178,8 +182,10 @@ static u32 query_arm_cpu_features(void)
features |= ARM_CPU_FEATURE_PMULL;
if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
features |= ARM_CPU_FEATURE_CRC32;
+ if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+ features |= ARM_CPU_FEATURE_DOTPROD;
- /* FIXME: detect SHA3 and DOTPROD support too. */
+ /* FIXME: detect SHA3 support too. */
return features;
}
=====================================
lib/arm/cpu_features.h
=====================================
@@ -93,7 +93,7 @@ static inline u32 get_arm_cpu_features(void) { return 0; }
# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
#endif
#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
- (GCC_PREREQ(6, 1) || defined(__clang__) || defined(_MSC_VER)) && \
+ (GCC_PREREQ(7, 1) || defined(__clang__) || defined(_MSC_VER)) && \
CPU_IS_LITTLE_ENDIAN() /* untested on big endian */
# define HAVE_PMULL_INTRIN 1
/* Work around MSVC's vmull_p64() taking poly64x1_t instead of poly64_t */
=====================================
lib/arm/crc32_impl.h
=====================================
@@ -551,11 +551,13 @@ crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
# ifdef __clang__
# define ATTRIBUTES _target_attribute("aes,crc,sha3")
/*
- * With gcc, arch=armv8.2-a is needed for the sha3 intrinsics, unless the
- * default target is armv8.3-a or later in which case it must be omitted.
- * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT.
+ * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
+ * "aarch64: Remove architecture dependencies from intrinsics"),
+ * arch=armv8.2-a is needed for the sha3 intrinsics, unless the default
+ * target is armv8.3-a or later in which case it must be omitted. armv8.3-a
+ * or later can be detected by checking for __ARM_FEATURE_JCVT.
*/
-# elif defined(__ARM_FEATURE_JCVT)
+# elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
# define ATTRIBUTES _target_attribute("+crypto,+crc,+sha3")
# else
# define ATTRIBUTES _target_attribute("arch=armv8.2-a+crypto+crc+sha3")
=====================================
lib/x86/adler32_impl.h
=====================================
@@ -52,8 +52,15 @@
/*
* AVX-VNNI implementation. This is used on CPUs that have AVX2 and AVX-VNNI
* but don't have AVX-512, for example Intel Alder Lake.
+ *
+ * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
+ * intrinsics (in gcc 11.1) slightly before binutils added support for
+ * assembling AVX-VNNI instructions (in binutils 2.36). Distros can reasonably
+ * have gcc 11 with binutils 2.35. Because of this issue, we check for gcc 12
+ * instead of gcc 11. (libdeflate supports direct compilation without a
+ * configure step, so checking the binutils version is not always an option.)
*/
-#if GCC_PREREQ(11, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)
+#if GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)
# define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
# define SUFFIX _avx2_vnni
# define ATTRIBUTES _target_attribute("avx2,avxvnni")
=====================================
lib/x86/crc32_impl.h
=====================================
@@ -80,8 +80,11 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
* Currently this can't be enabled with MSVC because MSVC has a bug where it
* incorrectly assumes that VPCLMULQDQ implies AVX-512:
* https://developercommunity.visualstudio.com/t/Compiler-incorrectly-assumes-VAES-and-VP/10578785
+ *
+ * gcc 8.1 and 8.2 had a similar bug where they assumed that
+ * _mm256_clmulepi64_epi128() always needed AVX512. It's fixed in gcc 8.3.
*/
-#if GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000)
+#if GCC_PREREQ(8, 3) || CLANG_PREREQ(6, 0, 10000000)
# define crc32_x86_vpclmulqdq_avx2 crc32_x86_vpclmulqdq_avx2
# define SUFFIX _vpclmulqdq_avx2
# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx2")
=====================================
libdeflate.h
=====================================
@@ -13,8 +13,8 @@ extern "C" {
#endif
#define LIBDEFLATE_VERSION_MAJOR 1
-#define LIBDEFLATE_VERSION_MINOR 20
-#define LIBDEFLATE_VERSION_STRING "1.20"
+#define LIBDEFLATE_VERSION_MINOR 21
+#define LIBDEFLATE_VERSION_STRING "1.21"
/*
* Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
=====================================
scripts/libFuzzer/deflate_compress/fuzz.c
=====================================
@@ -4,6 +4,36 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static void
+alloc_guarded_buffer(size_t size, uint8_t **start_ret, uint8_t **end_ret)
+{
+ const size_t pagesize = sysconf(_SC_PAGESIZE);
+ const size_t nr_pages = (size + pagesize - 1) / pagesize;
+ uint8_t *base_addr, *start, *end;
+
+ /* Allocate buffer and guard pages. */
+ base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(base_addr != (uint8_t *)MAP_FAILED);
+ start = base_addr + pagesize;
+ end = start + (nr_pages * pagesize);
+
+ /* Unmap the guard pages. */
+ munmap(base_addr, pagesize);
+ munmap(end, pagesize);
+
+ *start_ret = start;
+ *end_ret = end;
+}
+
+static void
+free_guarded_buffer(uint8_t *start, uint8_t *end)
+{
+ munmap(start, end - start);
+}
/* Fuzz the DEFLATE compression and decompression round trip. */
int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
@@ -13,8 +43,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
struct libdeflate_compressor *c;
struct libdeflate_decompressor *d;
size_t csize_avail;
- uint8_t *cbuf;
- uint8_t *decompressed;
+ uint8_t *ubuf_start, *ubuf_end, *ubuf;
+ uint8_t *cbuf_start, *cbuf_end, *cbuf;
+ uint8_t *dbuf_start, *dbuf_end, *dbuf;
size_t csize;
enum libdeflate_result res;
@@ -29,24 +60,36 @@ int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
c = libdeflate_alloc_compressor(level);
d = libdeflate_alloc_decompressor();
+ /* Use guard pages to make all input/output buffer overflows segfault */
+
+ alloc_guarded_buffer(insize, &ubuf_start, &ubuf_end);
+ ubuf = ubuf_end - insize;
+ memcpy(ubuf, in, insize);
+
csize_avail = use_bound ? libdeflate_deflate_compress_bound(c, insize) :
insize;
- cbuf = malloc(csize_avail);
- decompressed = malloc(insize);
+ alloc_guarded_buffer(csize_avail, &cbuf_start, &cbuf_end);
+ cbuf = cbuf_end - csize_avail;
+
+ alloc_guarded_buffer(insize, &dbuf_start, &dbuf_end);
+ dbuf = dbuf_end - insize;
- csize = libdeflate_deflate_compress(c, in, insize, cbuf, csize_avail);
+ csize = libdeflate_deflate_compress(c, ubuf, insize, cbuf, csize_avail);
if (csize != 0) {
- res = libdeflate_deflate_decompress(d, cbuf, csize, decompressed,
- insize, NULL);
+ assert(csize <= csize_avail);
+ memmove(cbuf_end - csize, cbuf, csize);
+ res = libdeflate_deflate_decompress(d, cbuf_end - csize, csize,
+ dbuf, insize, NULL);
assert(res == LIBDEFLATE_SUCCESS);
- assert(memcmp(in, decompressed, insize) == 0);
+ assert(memcmp(in, dbuf, insize) == 0);
} else {
assert(!use_bound);
}
libdeflate_free_compressor(c);
libdeflate_free_decompressor(d);
- free(cbuf);
- free(decompressed);
+ free_guarded_buffer(ubuf_start, ubuf_end);
+ free_guarded_buffer(cbuf_start, cbuf_end);
+ free_guarded_buffer(dbuf_start, dbuf_end);
return 0;
}
=====================================
scripts/libFuzzer/deflate_decompress/fuzz.c
=====================================
@@ -1,19 +1,62 @@
+#include <assert.h>
#include <libdeflate.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static void
+alloc_guarded_buffer(size_t size, uint8_t **start_ret, uint8_t **end_ret)
+{
+ const size_t pagesize = sysconf(_SC_PAGESIZE);
+ const size_t nr_pages = (size + pagesize - 1) / pagesize;
+ uint8_t *base_addr, *start, *end;
+
+ /* Allocate buffer and guard pages. */
+ base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(base_addr != (uint8_t *)MAP_FAILED);
+ start = base_addr + pagesize;
+ end = start + (nr_pages * pagesize);
+
+ /* Unmap the guard pages. */
+ munmap(base_addr, pagesize);
+ munmap(end, pagesize);
+
+ *start_ret = start;
+ *end_ret = end;
+}
+
+static void
+free_guarded_buffer(uint8_t *start, uint8_t *end)
+{
+ munmap(start, end - start);
+}
/* Fuzz DEFLATE decompression. */
int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
{
size_t outsize_avail = 3 * insize;
- uint8_t *out;
+ uint8_t *cbuf_start, *cbuf_end, *cbuf;
+ uint8_t *dbuf_start, *dbuf_end, *dbuf;
struct libdeflate_decompressor *d;
- out = malloc(outsize_avail);
+ /* Use guard pages to make all input/output buffer overflows segfault */
+
+ alloc_guarded_buffer(insize, &cbuf_start, &cbuf_end);
+ cbuf = cbuf_end - insize;
+ memcpy(cbuf, in, insize);
+
+ alloc_guarded_buffer(outsize_avail, &dbuf_start, &dbuf_end);
+ dbuf = dbuf_end - outsize_avail;
d = libdeflate_alloc_decompressor();
- libdeflate_deflate_decompress(d, in, insize, out, outsize_avail, NULL);
+ libdeflate_deflate_decompress(d, cbuf, insize, dbuf, outsize_avail,
+ NULL);
libdeflate_free_decompressor(d);
- free(out);
+ free_guarded_buffer(cbuf_start, cbuf_end);
+ free_guarded_buffer(dbuf_start, dbuf_end);
return 0;
}
=====================================
scripts/libFuzzer/fuzz.sh
=====================================
@@ -103,6 +103,6 @@ if [ ! -e "$TARGET/fuzz.c" ]; then
exit 1
fi
run_cmd clang -g -O1 -fsanitize=fuzzer$EXTRA_SANITIZERS \
- -Wall -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS=1 \
+ -Wall -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS=1 -I ../../ \
../../lib/*{,/*}.c "$TARGET/fuzz.c" -o "$TARGET/fuzz"
run_cmd "$TARGET/fuzz" "${EXTRA_FUZZER_ARGS[@]}" "$TARGET/corpus"
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/commit/b370b326f09c0a4d54e3583e8304cf2319b9947a
--
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/commit/b370b326f09c0a4d54e3583e8304cf2319b9947a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240813/c6d7a020/attachment-0001.htm>
More information about the debian-med-commit
mailing list