[med-svn] [Git][deflate-team/libdeflate][debian/latest] update with 1.21 contents

Tue Aug 13 15:53:20 BST 2024


nick black pushed to branch debian/latest at deflate team / libdeflate


Commits:
b370b326 by nick black at 2024-08-13T10:52:46-04:00
update with 1.21 contents

- - - - -


13 changed files:

- COPYING
- NEWS.md
- README.md
- lib/arm/adler32_impl.h
- lib/arm/cpu_features.c
- lib/arm/cpu_features.h
- lib/arm/crc32_impl.h
- lib/x86/adler32_impl.h
- lib/x86/crc32_impl.h
- libdeflate.h
- scripts/libFuzzer/deflate_compress/fuzz.c
- scripts/libFuzzer/deflate_decompress/fuzz.c
- scripts/libFuzzer/fuzz.sh


Changes:

=====================================
COPYING
=====================================
@@ -1,4 +1,5 @@
 Copyright 2016 Eric Biggers
+Copyright 2024 Google LLC
 
 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation files


=====================================
NEWS.md
=====================================
@@ -1,5 +1,18 @@
 # libdeflate release notes
 
+## Version 1.21
+
+* Fixed build error on x86 with gcc 8.1 and gcc 8.2.
+
+* Fixed build error on x86 when gcc 11 is paired with a binutils version that
+  doesn't support AVX-VNNI, e.g. as it is on RHEL 9.
+
+* Fixed build error on arm64 with gcc 6.
+
+* Fixed build error on arm64 with gcc 13.1 and later with some -mcpu options.
+
+* Enabled detection of dotprod support in Windows ARM64 builds.
+
 ## Version 1.20
 
 * Improved CRC-32 performance on recent x86 CPUs by adding


=====================================
README.md
=====================================
@@ -10,7 +10,7 @@ The supported formats are:
 - gzip (a.k.a. DEFLATE with a gzip wrapper)
 
 libdeflate is heavily optimized.  It is significantly faster than the zlib
-library, both for compression and decompression, and especially on x86
+library, both for compression and decompression, and especially on x86 and ARM
 processors.  In addition, libdeflate provides optional high compression modes
 that provide a better compression ratio than the zlib's "level 9".
 


=====================================
lib/arm/adler32_impl.h
=====================================
@@ -214,11 +214,13 @@ adler32_arm_neon(u32 adler, const u8 *p, size_t len)
 #  ifdef __clang__
 #    define ATTRIBUTES	_target_attribute("dotprod")
    /*
-    * With gcc, arch=armv8.2-a is needed for dotprod intrinsics, unless the
-    * default target is armv8.3-a or later in which case it must be omitted.
-    * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT.
+    * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
+    * "aarch64: Remove architecture dependencies from intrinsics"),
+    * arch=armv8.2-a is needed for the dotprod intrinsics, unless the default
+    * target is armv8.3-a or later in which case it must be omitted.  armv8.3-a
+    * or later can be detected by checking for __ARM_FEATURE_JCVT.
     */
-#  elif defined(__ARM_FEATURE_JCVT)
+#  elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
 #    define ATTRIBUTES	_target_attribute("+dotprod")
 #  else
 #    define ATTRIBUTES	_target_attribute("arch=armv8.2-a+dotprod")


=====================================
lib/arm/cpu_features.c
=====================================
@@ -170,6 +170,10 @@ static u32 query_arm_cpu_features(void)
 
 #include <windows.h>
 
+#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE /* added in Windows SDK 20348 */
+#  define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
+#endif
+
 static u32 query_arm_cpu_features(void)
 {
 	u32 features = ARM_CPU_FEATURE_NEON;
@@ -178,8 +182,10 @@ static u32 query_arm_cpu_features(void)
 		features |= ARM_CPU_FEATURE_PMULL;
 	if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
 		features |= ARM_CPU_FEATURE_CRC32;
+	if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+		features |= ARM_CPU_FEATURE_DOTPROD;
 
-	/* FIXME: detect SHA3 and DOTPROD support too. */
+	/* FIXME: detect SHA3 support too. */
 
 	return features;
 }


=====================================
lib/arm/cpu_features.h
=====================================
@@ -93,7 +93,7 @@ static inline u32 get_arm_cpu_features(void) { return 0; }
 #  define HAVE_PMULL(features)	((features) & ARM_CPU_FEATURE_PMULL)
 #endif
 #if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
-	(GCC_PREREQ(6, 1) || defined(__clang__) || defined(_MSC_VER)) && \
+	(GCC_PREREQ(7, 1) || defined(__clang__) || defined(_MSC_VER)) && \
 	CPU_IS_LITTLE_ENDIAN() /* untested on big endian */
 #  define HAVE_PMULL_INTRIN	1
    /* Work around MSVC's vmull_p64() taking poly64x1_t instead of poly64_t */


=====================================
lib/arm/crc32_impl.h
=====================================
@@ -551,11 +551,13 @@ crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
 #  ifdef __clang__
 #    define ATTRIBUTES	_target_attribute("aes,crc,sha3")
    /*
-    * With gcc, arch=armv8.2-a is needed for the sha3 intrinsics, unless the
-    * default target is armv8.3-a or later in which case it must be omitted.
-    * armv8.3-a or later can be detected by checking for __ARM_FEATURE_JCVT.
+    * With gcc 13.1 and earlier (before gcc commit 73d3bc348190 or 9aac37ab8a7b,
+    * "aarch64: Remove architecture dependencies from intrinsics"),
+    * arch=armv8.2-a is needed for the sha3 intrinsics, unless the default
+    * target is armv8.3-a or later in which case it must be omitted.  armv8.3-a
+    * or later can be detected by checking for __ARM_FEATURE_JCVT.
     */
-#  elif defined(__ARM_FEATURE_JCVT)
+#  elif GCC_PREREQ(13, 2) || defined(__ARM_FEATURE_JCVT)
 #    define ATTRIBUTES	_target_attribute("+crypto,+crc,+sha3")
 #  else
 #    define ATTRIBUTES	_target_attribute("arch=armv8.2-a+crypto+crc+sha3")


=====================================
lib/x86/adler32_impl.h
=====================================
@@ -52,8 +52,15 @@
 /*
  * AVX-VNNI implementation.  This is used on CPUs that have AVX2 and AVX-VNNI
  * but don't have AVX-512, for example Intel Alder Lake.
+ *
+ * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
+ * intrinsics (in gcc 11.1) slightly before binutils added support for
+ * assembling AVX-VNNI instructions (in binutils 2.36).  Distros can reasonably
+ * have gcc 11 with binutils 2.35.  Because of this issue, we check for gcc 12
+ * instead of gcc 11.  (libdeflate supports direct compilation without a
+ * configure step, so checking the binutils version is not always an option.)
  */
-#if GCC_PREREQ(11, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)
+#if GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)
 #  define adler32_x86_avx2_vnni	adler32_x86_avx2_vnni
 #  define SUFFIX			   _avx2_vnni
 #  define ATTRIBUTES		_target_attribute("avx2,avxvnni")


=====================================
lib/x86/crc32_impl.h
=====================================
@@ -80,8 +80,11 @@ static const u8 MAYBE_UNUSED shift_tab[48] = {
  * Currently this can't be enabled with MSVC because MSVC has a bug where it
  * incorrectly assumes that VPCLMULQDQ implies AVX-512:
  * https://developercommunity.visualstudio.com/t/Compiler-incorrectly-assumes-VAES-and-VP/10578785
+ *
+ * gcc 8.1 and 8.2 had a similar bug where they assumed that
+ * _mm256_clmulepi64_epi128() always needed AVX512.  It's fixed in gcc 8.3.
  */
-#if GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000)
+#if GCC_PREREQ(8, 3) || CLANG_PREREQ(6, 0, 10000000)
 #  define crc32_x86_vpclmulqdq_avx2	crc32_x86_vpclmulqdq_avx2
 #  define SUFFIX				 _vpclmulqdq_avx2
 #  define ATTRIBUTES		_target_attribute("vpclmulqdq,pclmul,avx2")


=====================================
libdeflate.h
=====================================
@@ -13,8 +13,8 @@ extern "C" {
 #endif
 
 #define LIBDEFLATE_VERSION_MAJOR	1
-#define LIBDEFLATE_VERSION_MINOR	20
-#define LIBDEFLATE_VERSION_STRING	"1.20"
+#define LIBDEFLATE_VERSION_MINOR	21
+#define LIBDEFLATE_VERSION_STRING	"1.21"
 
 /*
  * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause


=====================================
scripts/libFuzzer/deflate_compress/fuzz.c
=====================================
@@ -4,6 +4,36 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static void
+alloc_guarded_buffer(size_t size, uint8_t **start_ret, uint8_t **end_ret)
+{
+	const size_t pagesize = sysconf(_SC_PAGESIZE);
+	const size_t nr_pages = (size + pagesize - 1) / pagesize;
+	uint8_t *base_addr, *start, *end;
+
+	/* Allocate buffer and guard pages. */
+	base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
+			 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	assert(base_addr != (uint8_t *)MAP_FAILED);
+	start = base_addr + pagesize;
+	end = start + (nr_pages * pagesize);
+
+	/* Unmap the guard pages. */
+	munmap(base_addr, pagesize);
+	munmap(end, pagesize);
+
+	*start_ret = start;
+	*end_ret = end;
+}
+
+static void
+free_guarded_buffer(uint8_t *start, uint8_t *end)
+{
+	munmap(start, end - start);
+}
 
 /* Fuzz the DEFLATE compression and decompression round trip. */
 int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
@@ -13,8 +43,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
 	struct libdeflate_compressor *c;
 	struct libdeflate_decompressor *d;
 	size_t csize_avail;
-	uint8_t *cbuf;
-	uint8_t *decompressed;
+	uint8_t *ubuf_start, *ubuf_end, *ubuf;
+	uint8_t *cbuf_start, *cbuf_end, *cbuf;
+	uint8_t *dbuf_start, *dbuf_end, *dbuf;
 	size_t csize;
 	enum libdeflate_result res;
 
@@ -29,24 +60,36 @@ int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
 	c = libdeflate_alloc_compressor(level);
 	d = libdeflate_alloc_decompressor();
 
+	/* Use guard pages to make all input/output buffer overflows segfault */
+
+	alloc_guarded_buffer(insize, &ubuf_start, &ubuf_end);
+	ubuf = ubuf_end - insize;
+	memcpy(ubuf, in, insize);
+
 	csize_avail = use_bound ? libdeflate_deflate_compress_bound(c, insize) :
 				  insize;
-	cbuf = malloc(csize_avail);
-	decompressed = malloc(insize);
+	alloc_guarded_buffer(csize_avail, &cbuf_start, &cbuf_end);
+	cbuf = cbuf_end - csize_avail;
+
+	alloc_guarded_buffer(insize, &dbuf_start, &dbuf_end);
+	dbuf = dbuf_end - insize;
 
-	csize = libdeflate_deflate_compress(c, in, insize, cbuf, csize_avail);
+	csize = libdeflate_deflate_compress(c, ubuf, insize, cbuf, csize_avail);
 	if (csize != 0) {
-		res = libdeflate_deflate_decompress(d, cbuf, csize, decompressed,
-						    insize, NULL);
+		assert(csize <= csize_avail);
+		memmove(cbuf_end - csize, cbuf, csize);
+		res = libdeflate_deflate_decompress(d, cbuf_end - csize, csize,
+						    dbuf, insize, NULL);
 		assert(res == LIBDEFLATE_SUCCESS);
-		assert(memcmp(in, decompressed, insize) == 0);
+		assert(memcmp(in, dbuf, insize) == 0);
 	} else {
 		assert(!use_bound);
 	}
 
 	libdeflate_free_compressor(c);
 	libdeflate_free_decompressor(d);
-	free(cbuf);
-	free(decompressed);
+	free_guarded_buffer(ubuf_start, ubuf_end);
+	free_guarded_buffer(cbuf_start, cbuf_end);
+	free_guarded_buffer(dbuf_start, dbuf_end);
 	return 0;
 }


=====================================
scripts/libFuzzer/deflate_decompress/fuzz.c
=====================================
@@ -1,19 +1,62 @@
+#include <assert.h>
 #include <libdeflate.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static void
+alloc_guarded_buffer(size_t size, uint8_t **start_ret, uint8_t **end_ret)
+{
+	const size_t pagesize = sysconf(_SC_PAGESIZE);
+	const size_t nr_pages = (size + pagesize - 1) / pagesize;
+	uint8_t *base_addr, *start, *end;
+
+	/* Allocate buffer and guard pages. */
+	base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE,
+			 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	assert(base_addr != (uint8_t *)MAP_FAILED);
+	start = base_addr + pagesize;
+	end = start + (nr_pages * pagesize);
+
+	/* Unmap the guard pages. */
+	munmap(base_addr, pagesize);
+	munmap(end, pagesize);
+
+	*start_ret = start;
+	*end_ret = end;
+}
+
+static void
+free_guarded_buffer(uint8_t *start, uint8_t *end)
+{
+	munmap(start, end - start);
+}
 
 /* Fuzz DEFLATE decompression. */
 int LLVMFuzzerTestOneInput(const uint8_t *in, size_t insize)
 {
 	size_t outsize_avail = 3 * insize;
-	uint8_t *out;
+	uint8_t *cbuf_start, *cbuf_end, *cbuf;
+	uint8_t *dbuf_start, *dbuf_end, *dbuf;
 	struct libdeflate_decompressor *d;
 
-	out = malloc(outsize_avail);
+	/* Use guard pages to make all input/output buffer overflows segfault */
+
+	alloc_guarded_buffer(insize, &cbuf_start, &cbuf_end);
+	cbuf = cbuf_end - insize;
+	memcpy(cbuf, in, insize);
+
+	alloc_guarded_buffer(outsize_avail, &dbuf_start, &dbuf_end);
+	dbuf = dbuf_end - outsize_avail;
 
 	d = libdeflate_alloc_decompressor();
-	libdeflate_deflate_decompress(d, in, insize, out, outsize_avail, NULL);
+	libdeflate_deflate_decompress(d, cbuf, insize, dbuf, outsize_avail,
+				      NULL);
 	libdeflate_free_decompressor(d);
-	free(out);
+	free_guarded_buffer(cbuf_start, cbuf_end);
+	free_guarded_buffer(dbuf_start, dbuf_end);
 	return 0;
 }


=====================================
scripts/libFuzzer/fuzz.sh
=====================================
@@ -103,6 +103,6 @@ if [ ! -e "$TARGET/fuzz.c" ]; then
 	exit 1
 fi
 run_cmd clang -g -O1 -fsanitize=fuzzer$EXTRA_SANITIZERS \
-	-Wall -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS=1 \
+	-Wall -Werror -DLIBDEFLATE_ENABLE_ASSERTIONS=1 -I ../../ \
 	../../lib/*{,/*}.c "$TARGET/fuzz.c" -o "$TARGET/fuzz"
 run_cmd "$TARGET/fuzz" "${EXTRA_FUZZER_ARGS[@]}" "$TARGET/corpus"



View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/commit/b370b326f09c0a4d54e3583e8304cf2319b9947a

-- 
View it on GitLab: https://salsa.debian.org/deflate-team/libdeflate/-/commit/b370b326f09c0a4d54e3583e8304cf2319b9947a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240813/c6d7a020/attachment-0001.htm>