[med-svn] [Git][med-team/libdeflate][master] 5 commits: routine-update: New upstream version
Lance Lin (@linqigang)
gitlab at salsa.debian.org
Mon Jan 29 14:42:47 GMT 2024
Lance Lin pushed to branch master at Debian Med / libdeflate
Commits:
d8b24342 by Lance Lin at 2024-01-29T20:42:16+07:00
routine-update: New upstream version
- - - - -
17c54f33 by Lance Lin at 2024-01-29T20:42:31+07:00
New upstream version 1.19
- - - - -
121dc007 by Lance Lin at 2024-01-29T20:42:32+07:00
Update upstream source from tag 'upstream/1.19'
Update to upstream version '1.19'
with Debian dir d9612fe16f2b1f058eaad660fa761d37df89237a
- - - - -
312114db by Lance Lin at 2024-01-29T20:59:28+07:00
d/rules: Reduce compiler optimization on alpha (Closes: #1028624)
- - - - -
35cb04ce by Lance Lin at 2024-01-29T21:36:24+07:00
d/libdeflate0.symbols: Add new symbols (libdeflate_alloc_compressor_ex, libdeflate_alloc_decompressor_ex)
- - - - -
17 changed files:
- .cirrus.yml
- .github/workflows/ci.yml
- CMakeLists.txt
- NEWS.md
- README.md
- debian/changelog
- debian/libdeflate0.symbols
- debian/rules
- lib/deflate_compress.c
- lib/deflate_decompress.c
- lib/lib_common.h
- lib/utils.c
- lib/x86/cpu_features.c
- lib/x86/cpu_features.h
- libdeflate.h
- programs/test_custom_malloc.c
- scripts/gen_offset_slot_map.py
Changes:
=====================================
.cirrus.yml
=====================================
@@ -2,7 +2,7 @@ task:
freebsd_instance:
matrix:
- image_family: freebsd-12-3
- - image_family: freebsd-13-0
+ - image_family: freebsd-13-2
install_script: pkg install -y cmake
script:
- cmake -B build -DLIBDEFLATE_BUILD_TESTS=1
=====================================
.github/workflows/ci.yml
=====================================
@@ -93,26 +93,30 @@ jobs:
run: cc -O2 -Wall -Werror -municode lib/*{,/*}.c programs/{gzip,prog_util,tgetopt}.c -o libdeflate-gzip.exe
windows-visualstudio-build-and-test:
- name: Build and test (Windows, Visual Studio ${{matrix.toolset}}, ${{matrix.platform.vs}})
+ name: Build and test (Windows, ${{matrix.gen}}, ${{matrix.toolset}}, ${{matrix.vs}})
strategy:
matrix:
- platform: [ {vs: x64, vcpkg: x64-windows},
- {vs: Win32, vcpkg: x86-windows} ]
- toolset: [v143, ClangCL]
- runs-on: windows-latest
+ include:
+ - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: v143, vs: x64, vcpkg: x64-windows}
+ - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: ClangCL, vs: x64, vcpkg: x64-windows}
+ - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: v143, vs: Win32, vcpkg: x86-windows}
+ - {os: windows-2022, gen: "Visual Studio 17 2022", toolset: ClangCL, vs: Win32, vcpkg: x86-windows}
+ - {os: windows-2019, gen: "Visual Studio 16 2019", toolset: v142, vs: x64, vcpkg: x64-windows}
+ - {os: windows-2019, gen: "Visual Studio 16 2019", toolset: v142, vs: Win32, vcpkg: x86-windows}
+ runs-on: ${{matrix.os}}
steps:
- uses: actions/checkout at v3
- uses: microsoft/setup-msbuild at v1.1
- - run: vcpkg install zlib:${{matrix.platform.vcpkg}}
+ - run: vcpkg install zlib:${{matrix.vcpkg}}
- run: >
- echo C:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\bin
+ echo C:\vcpkg\packages\zlib_${{matrix.vcpkg}}\bin
| Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
# Note: as per the CMake documentation, DESTDIR is unsupported on Windows.
- run: >
- cmake -B build -G "Visual Studio 17 2022" -T ${{matrix.toolset}}
- -A ${{matrix.platform.vs}} -DLIBDEFLATE_BUILD_TESTS=1
- -DCMAKE_C_FLAGS="/W4 /WX /DLIBDEFLATE_ENABLE_ASSERTIONS /IC:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\include"
- -DZLIB_LIBRARY=C:\vcpkg\packages\zlib_${{matrix.platform.vcpkg}}\lib\zlib.lib
+ cmake -B build -G "${{matrix.gen}}" -T ${{matrix.toolset}}
+ -A ${{matrix.vs}} -DLIBDEFLATE_BUILD_TESTS=1
+ -DCMAKE_C_FLAGS="/W4 /WX /DLIBDEFLATE_ENABLE_ASSERTIONS /IC:\vcpkg\packages\zlib_${{matrix.vcpkg}}\include"
+ -DZLIB_LIBRARY=C:\vcpkg\packages\zlib_${{matrix.vcpkg}}\lib\zlib.lib
-DCMAKE_INSTALL_PREFIX=build\install
- run: cmake --build build --verbose --config Debug
- run: cmake --install build --verbose --config Debug
=====================================
CMakeLists.txt
=====================================
@@ -38,7 +38,7 @@ option(LIBDEFLATE_GZIP_SUPPORT "Support the gzip format" ON)
option(LIBDEFLATE_FREESTANDING
"Build a freestanding library, i.e. a library that doesn't link to any
libc functions like malloc(), free(), and memcpy(). Library users will
- need to call libdeflate_set_memory_allocator()." OFF)
+ need to provide a custom memory allocator." OFF)
option(LIBDEFLATE_BUILD_GZIP "Build the libdeflate-gzip program" ON)
option(LIBDEFLATE_BUILD_TESTS "Build the test programs" OFF)
option(LIBDEFLATE_USE_SHARED_LIB
=====================================
NEWS.md
=====================================
@@ -1,5 +1,26 @@
# libdeflate release notes
+## Version 1.19
+
+* Added new functions `libdeflate_alloc_compressor_ex()` and
+ `libdeflate_alloc_decompressor_ex()`. These functions allow specifying a
+ custom memory allocator on a per-compressor basis.
+
+* libdeflate now always generates Huffman codes with at least 2 codewords. This
+ fixes a compatibility issue where Windows Explorer's ZIP unpacker could not
+ decompress DEFLATE streams created by libdeflate. libdeflate's behavior was
+ allowed by the DEFLATE RFC, but not all software was okay with it. In rare
+ cases, compression ratios can be slightly reduced by this change.
+
+* Disabled the use of some compiler intrinsics on MSVC versions where they don't
+ work correctly.
+
+* libdeflate can now compress up to the exact size of the output buffer.
+
+* Slightly improved compression performance at levels 1-9.
+
+* Improved the compression ratio of very short inputs.
+
## Version 1.18
* Fixed a bug where the build type didn't default to "Release" when using
=====================================
README.md
=====================================
@@ -118,6 +118,7 @@ following bindings:
* Julia: [LibDeflate.jl](https://github.com/jakobnissen/LibDeflate.jl)
* Nim: [libdeflate-nim](https://github.com/gemesa/libdeflate-nim)
* Perl: [Gzip::Libdeflate](https://github.com/benkasminbullock/gzip-libdeflate)
+* PHP: [ext-libdeflate](https://github.com/pmmp/ext-libdeflate)
* Python: [deflate](https://github.com/dcwatson/deflate)
* Ruby: [libdeflate-ruby](https://github.com/kaorimatz/libdeflate-ruby)
* Rust: [libdeflater](https://github.com/adamkewley/libdeflater)
@@ -126,6 +127,11 @@ Note: these are third-party projects which haven't necessarily been vetted by
the authors of libdeflate. Please direct all questions, bugs, and improvements
for these bindings to their authors.
+Also, unfortunately many of these bindings bundle or pin an old version of
+libdeflate. To avoid known issues in old versions and to improve performance,
+before using any of these bindings please ensure that the bundled or pinned
+version of libdeflate has been upgraded to the latest release.
+
# DEFLATE vs. zlib vs. gzip
The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib
=====================================
debian/changelog
=====================================
@@ -1,3 +1,13 @@
+libdeflate (1.19-1) UNRELEASED; urgency=medium
+
+ * Team upload.
+ * New upstream version
+ * d/rules: Reduce compiler optimization on alpha (Closes: #1028624)
+ * d/libdeflate0.symbols: Add new symbols (libdeflate_alloc_compressor_ex,
+ libdeflate_alloc_decompressor_ex)
+
+ -- Lance Lin <lq27267 at gmail.com> Mon, 29 Jan 2024 20:42:16 +0700
+
libdeflate (1.18-1) unstable; urgency=medium
[ Lance Lin ]
=====================================
debian/libdeflate0.symbols
=====================================
@@ -2,7 +2,9 @@ libdeflate.so.0 libdeflate0 #MINVER#
* Build-Depends-Package: libdeflate-dev
libdeflate_adler32 at Base 1.0
libdeflate_alloc_compressor at Base 1.0
+ libdeflate_alloc_compressor_ex at Base 1.19
libdeflate_alloc_decompressor at Base 1.0
+ libdeflate_alloc_decompressor_ex at Base 1.19
libdeflate_crc32 at Base 1.0
libdeflate_deflate_compress at Base 1.0
libdeflate_deflate_compress_bound at Base 1.0
=====================================
debian/rules
=====================================
@@ -4,6 +4,10 @@ export LC_ALL=C.UTF-8
export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+ifeq (alpha,$(DEB_HOST_ARCH))
+ export DEB_CFLAGS_MAINT_APPEND=-O1
+endif
+
%:
dh $@
=====================================
lib/deflate_compress.c
=====================================
@@ -285,46 +285,26 @@ static const u8 deflate_length_slot[DEFLATE_MAX_MATCH_LEN + 1] = {
};
/*
- * A condensed table which maps offset => offset slot as follows:
- *
- * offset <= 256: deflate_offset_slot[offset]
- * offset > 256: deflate_offset_slot[256 + ((offset - 1) >> 7)]
- *
- * This table was generated by scripts/gen_offset_slot_map.py.
+ * Table: 'offset - 1 => offset_slot' for offset <= 256.
+ * This was generated by scripts/gen_offset_slot_map.py.
*/
-static const u8 deflate_offset_slot[512] = {
- 0, 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7,
- 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9,
- 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+static const u8 deflate_offset_slot[256] = {
+ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
};
/* The order in which precode codeword lengths are stored */
@@ -477,6 +457,9 @@ struct libdeflate_compressor {
void (*impl)(struct libdeflate_compressor *restrict c, const u8 *in,
size_t in_nbytes, struct deflate_output_bitstream *os);
+ /* The free() function for this struct, chosen at allocation time */
+ free_func_t free_func;
+
/* The compression level with which this compressor was created */
unsigned compression_level;
@@ -603,7 +586,8 @@ struct libdeflate_compressor {
/* The current cost model being used */
struct deflate_costs costs;
- struct deflate_costs costs_producing_best_true_cost;
+ /* Saved cost model */
+ struct deflate_costs costs_saved;
/*
* A table that maps match offset to offset slot. This
@@ -654,6 +638,23 @@ struct libdeflate_compressor {
*/
unsigned min_bits_to_use_nonfinal_path;
+ /*
+ * The maximum block length, in uncompressed bytes, at
+ * which to find and consider the optimal match/literal
+ * list for the static Huffman codes. This strategy
+ * improves the compression ratio produced by static
+ * Huffman blocks and can discover more cases in which
+ * static blocks are worthwhile. This helps mostly with
+ * small blocks, hence why this parameter is a max_len.
+ *
+ * Above this block length, static Huffman blocks are
+ * only used opportunistically. I.e. a static Huffman
+ * block is only used if a static block using the same
+ * match/literal list as the optimized dynamic block
+ * happens to be cheaper than the dynamic block itself.
+ */
+ unsigned max_len_to_optimize_static_block;
+
} n; /* (n)ear-optimal */
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
@@ -701,24 +702,12 @@ struct deflate_output_bitstream {
*/
u8 *next;
- /*
- * Pointer to near the end of the output buffer. 'next' will never
- * exceed this. There are OUTPUT_END_PADDING bytes reserved after this
- * to allow branchlessly writing a whole word at this location.
- */
+ /* Pointer to the end of the output buffer */
u8 *end;
-};
-/*
- * OUTPUT_END_PADDING is the size, in bytes, of the extra space that must be
- * present following os->end, in order to not overrun the buffer when generating
- * output. When UNALIGNED_ACCESS_IS_FAST, we need at least sizeof(bitbuf_t)
- * bytes for put_unaligned_leword(). Otherwise we need only 1 byte. However,
- * to make the compression algorithm produce the same result on all CPU
- * architectures (which is sometimes desirable), we have to unconditionally use
- * the maximum for any CPU, which is sizeof(bitbuf_t) == 8.
- */
-#define OUTPUT_END_PADDING 8
+ /* true if the output buffer ran out of space */
+ bool overflow;
+};
/*
* Add some bits to the bitbuffer variable of the output bitstream. The caller
@@ -732,21 +721,29 @@ do { \
ASSERT(bitcount <= BITBUF_NBITS); \
} while (0)
-/* Flush bits from the bitbuffer variable to the output buffer. */
+/*
+ * Flush bits from the bitbuffer variable to the output buffer. After this, the
+ * bitbuffer will contain at most 7 bits (a partial byte).
+ *
+ * Since deflate_flush_block() verified ahead of time that there is enough space
+ * remaining before actually writing the block, it's guaranteed that out_next
+ * won't exceed os->end. However, there might not be enough space remaining to
+ * flush a whole word, even though that's fastest. Therefore, flush a whole
+ * word if there is space for it, otherwise flush a byte at a time.
+ */
#define FLUSH_BITS() \
do { \
- if (UNALIGNED_ACCESS_IS_FAST) { \
+ if (UNALIGNED_ACCESS_IS_FAST && likely(out_next < out_fast_end)) { \
/* Flush a whole word (branchlessly). */ \
put_unaligned_leword(bitbuf, out_next); \
bitbuf >>= bitcount & ~7; \
- out_next += MIN(out_end - out_next, bitcount >> 3); \
+ out_next += bitcount >> 3; \
bitcount &= 7; \
} else { \
/* Flush a byte at a time. */ \
while (bitcount >= 8) { \
- *out_next = bitbuf; \
- if (out_next != out_end) \
- out_next++; \
+ ASSERT(out_next < os->end); \
+ *out_next++ = bitbuf; \
bitcount -= 8; \
bitbuf >>= 8; \
} \
@@ -1335,7 +1332,6 @@ deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
* eventually return the codewords.
*/
num_used_syms = sort_symbols(num_syms, freqs, lens, A);
-
/*
* 'num_used_syms' is the number of symbols with nonzero frequency.
* This may be less than @num_syms. 'num_used_syms' is also the number
@@ -1344,30 +1340,34 @@ deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
*/
/*
- * Handle special cases where only 0 or 1 symbols were used (had nonzero
- * frequency).
+ * A complete Huffman code must contain at least 2 codewords. Yet, it's
+ * possible that fewer than 2 symbols were used. When this happens,
+ * it's usually for the offset code (0-1 symbols used). But it's also
+ * theoretically possible for the litlen and pre codes (1 symbol used).
+ *
+ * The DEFLATE RFC explicitly allows the offset code to contain just 1
+ * codeword, or even be completely empty. But it's silent about the
+ * other codes. It also doesn't say whether, in the 1-codeword case,
+ * the codeword (which it says must be 1 bit) is '0' or '1'.
+ *
+ * In any case, some DEFLATE decompressors reject these cases. zlib
+ * generally allows them, but it does reject precodes that have just 1
+ * codeword. More problematically, zlib v1.2.1 and earlier rejected
+ * empty offset codes, and this behavior can also be seen in Windows
+ * Explorer's ZIP unpacker (supposedly even still in Windows 11).
+ *
+ * Other DEFLATE compressors, including zlib, always send at least 2
+ * codewords in order to make a complete Huffman code. Therefore, this
+ * is a case where practice does not entirely match the specification.
+ * We follow practice by generating 2 codewords of length 1: codeword
+ * '0' for symbol 0, and codeword '1' for another symbol -- the used
+ * symbol if it exists and is not symbol 0, otherwise symbol 1. This
+ * does worsen the compression ratio by having to send an unnecessary
+ * offset codeword length. But this only affects rare cases such as
+ * blocks containing all literals, and it only makes a tiny difference.
*/
-
- if (unlikely(num_used_syms == 0)) {
- /*
- * Code is empty. sort_symbols() already set all lengths to 0,
- * so there is nothing more to do.
- */
- return;
- }
-
- if (unlikely(num_used_syms == 1)) {
- /*
- * Only one symbol was used, so we only need one codeword. But
- * two codewords are needed to form the smallest complete
- * Huffman code, which uses codewords 0 and 1. Therefore, we
- * choose another symbol to which to assign a codeword. We use
- * 0 (if the used symbol is not 0) or 1 (if the used symbol is
- * 0). In either case, the lesser-valued symbol must be
- * assigned codeword 0 so that the resulting code is canonical.
- */
-
- unsigned sym = A[0] & SYMBOL_MASK;
+ if (unlikely(num_used_syms < 2)) {
+ unsigned sym = num_used_syms ? (A[0] & SYMBOL_MASK) : 0;
unsigned nonzero_idx = sym ? sym : 1;
codewords[0] = 0;
@@ -1451,20 +1451,30 @@ deflate_init_static_codes(struct libdeflate_compressor *c)
/* Return the offset slot for the given match offset, using the small map. */
static forceinline unsigned
-deflate_get_offset_slot(unsigned offset)
+deflate_get_offset_slot(u32 offset)
{
-#if 1
- if (offset <= 256)
- return deflate_offset_slot[offset];
- else
- return deflate_offset_slot[256 + ((offset - 1) >> 7)];
-#else /* Branchless version */
- u32 i1 = offset;
- u32 i2 = 256 + ((offset - 1) >> 7);
- u32 is_small = (s32)(offset - 257) >> 31;
+ /*
+ * 1 <= offset <= 32768 here. For 1 <= offset <= 256,
+ * deflate_offset_slot[offset - 1] gives the slot.
+ *
+ * For 257 <= offset <= 32768, we take advantage of the fact that 257 is
+ * the beginning of slot 16, and each slot [16..30) is exactly 1 << 7 ==
+ * 128 times larger than each slot [2..16) (since the number of extra
+ * bits increases by 1 every 2 slots). Thus, the slot is:
+ *
+ * deflate_offset_slot[2 + ((offset - 257) >> 7)] + (16 - 2)
+ * == deflate_offset_slot[((offset - 1) >> 7)] + 14
+ *
+ * Define 'n = (offset <= 256) ? 0 : 7'. Then any offset is handled by:
+ *
+ * deflate_offset_slot[(offset - 1) >> n] + (n << 1)
+ *
+ * For better performance, replace 'n = (offset <= 256) ? 0 : 7' with
+ * the equivalent (for offset <= 536871168) 'n = (256 - offset) >> 29'.
+ */
+ unsigned n = (256 - offset) >> 29;
- return deflate_offset_slot[(i1 & is_small) ^ (i2 & ~is_small)];
-#endif
+ return deflate_offset_slot[(offset - 1) >> n] + (n << 1);
}
static unsigned
@@ -1711,20 +1721,26 @@ deflate_flush_block(struct libdeflate_compressor *c,
bitbuf_t bitbuf = os->bitbuf;
unsigned bitcount = os->bitcount;
u8 *out_next = os->next;
- u8 * const out_end = os->end;
- /* The cost for each block type, in bits */
- u32 dynamic_cost = 0;
- u32 static_cost = 0;
- u32 uncompressed_cost = 0;
+ u8 * const out_fast_end =
+ os->end - MIN(WORDBYTES - 1, os->end - out_next);
+ /*
+ * The cost for each block type, in bits. Start with the cost of the
+ * block header which is 3 bits.
+ */
+ u32 dynamic_cost = 3;
+ u32 static_cost = 3;
+ u32 uncompressed_cost = 3;
u32 best_cost;
struct deflate_codes *codes;
unsigned sym;
- ASSERT(block_length >= MIN_BLOCK_LENGTH || is_final_block);
+ ASSERT(block_length >= MIN_BLOCK_LENGTH ||
+ (is_final_block && block_length > 0));
ASSERT(block_length <= MAX_BLOCK_LENGTH);
ASSERT(bitcount <= 7);
ASSERT((bitbuf & ~(((bitbuf_t)1 << bitcount) - 1)) == 0);
- ASSERT(out_next <= out_end);
+ ASSERT(out_next <= os->end);
+ ASSERT(!os->overflow);
/* Precompute the precode items and build the precode. */
deflate_precompute_huffman_header(c);
@@ -1782,9 +1798,71 @@ deflate_flush_block(struct libdeflate_compressor *c,
UINT16_MAX) - 1)) +
(8 * block_length);
- /* Choose and output the cheapest type of block. */
- best_cost = MIN(static_cost, uncompressed_cost);
- if (dynamic_cost < best_cost) {
+ /*
+ * Choose and output the cheapest type of block. If there is a tie,
+ * prefer uncompressed, then static, then dynamic.
+ */
+
+ best_cost = MIN(dynamic_cost, MIN(static_cost, uncompressed_cost));
+
+ /* If the block isn't going to fit, then stop early. */
+ if (DIV_ROUND_UP(bitcount + best_cost, 8) > os->end - out_next) {
+ os->overflow = true;
+ return;
+ }
+ /*
+ * Else, now we know that the block fits, so no further bounds checks on
+ * the output buffer are required until the next block.
+ */
+
+ if (best_cost == uncompressed_cost) {
+ /*
+ * Uncompressed block(s). DEFLATE limits the length of
+ * uncompressed blocks to UINT16_MAX bytes, so if the length of
+ * the "block" we're flushing is over UINT16_MAX, we actually
+ * output multiple blocks.
+ */
+ do {
+ u8 bfinal = 0;
+ size_t len = UINT16_MAX;
+
+ if (in_end - in_next <= UINT16_MAX) {
+ bfinal = is_final_block;
+ len = in_end - in_next;
+ }
+ /* It was already checked that there is enough space. */
+ ASSERT(os->end - out_next >=
+ DIV_ROUND_UP(bitcount + 3, 8) + 4 + len);
+ /*
+ * Output BFINAL (1 bit) and BTYPE (2 bits), then align
+ * to a byte boundary.
+ */
+ STATIC_ASSERT(DEFLATE_BLOCKTYPE_UNCOMPRESSED == 0);
+ *out_next++ = (bfinal << bitcount) | bitbuf;
+ if (bitcount > 5)
+ *out_next++ = 0;
+ bitbuf = 0;
+ bitcount = 0;
+ /* Output LEN and NLEN, then the data itself. */
+ put_unaligned_le16(len, out_next);
+ out_next += 2;
+ put_unaligned_le16(~len, out_next);
+ out_next += 2;
+ memcpy(out_next, in_next, len);
+ out_next += len;
+ in_next += len;
+ } while (in_next != in_end);
+ /* Done outputting uncompressed block(s) */
+ goto out;
+ }
+
+ if (best_cost == static_cost) {
+ /* Static Huffman block */
+ codes = &c->static_codes;
+ ADD_BITS(is_final_block, 1);
+ ADD_BITS(DEFLATE_BLOCKTYPE_STATIC_HUFFMAN, 2);
+ FLUSH_BITS();
+ } else {
const unsigned num_explicit_lens = c->o.precode.num_explicit_lens;
const unsigned num_precode_items = c->o.precode.num_items;
unsigned precode_sym, precode_item;
@@ -1792,7 +1870,6 @@ deflate_flush_block(struct libdeflate_compressor *c,
/* Dynamic Huffman block */
- best_cost = dynamic_cost;
codes = &c->codes;
STATIC_ASSERT(CAN_BUFFER(1 + 2 + 5 + 5 + 4 + 3));
ADD_BITS(is_final_block, 1);
@@ -1844,54 +1921,6 @@ deflate_flush_block(struct libdeflate_compressor *c,
deflate_extra_precode_bits[precode_sym]);
FLUSH_BITS();
} while (++i < num_precode_items);
- } else if (static_cost < uncompressed_cost) {
- /* Static Huffman block */
- codes = &c->static_codes;
- ADD_BITS(is_final_block, 1);
- ADD_BITS(DEFLATE_BLOCKTYPE_STATIC_HUFFMAN, 2);
- FLUSH_BITS();
- } else {
- /*
- * Uncompressed block(s). DEFLATE limits the length of
- * uncompressed blocks to UINT16_MAX bytes, so if the length of
- * the "block" we're flushing is over UINT16_MAX, we actually
- * output multiple blocks.
- */
- do {
- u8 bfinal = 0;
- size_t len = UINT16_MAX;
-
- if (in_end - in_next <= UINT16_MAX) {
- bfinal = is_final_block;
- len = in_end - in_next;
- }
- if (out_end - out_next <
- (bitcount + 3 + 7) / 8 + 4 + len) {
- /* Not enough output space remaining. */
- out_next = out_end;
- goto out;
- }
- /*
- * Output BFINAL (1 bit) and BTYPE (2 bits), then align
- * to a byte boundary.
- */
- STATIC_ASSERT(DEFLATE_BLOCKTYPE_UNCOMPRESSED == 0);
- *out_next++ = (bfinal << bitcount) | bitbuf;
- if (bitcount > 5)
- *out_next++ = 0;
- bitbuf = 0;
- bitcount = 0;
- /* Output LEN and NLEN, then the data itself. */
- put_unaligned_le16(len, out_next);
- out_next += 2;
- put_unaligned_le16(~len, out_next);
- out_next += 2;
- memcpy(out_next, in_next, len);
- out_next += len;
- in_next += len;
- } while (in_next != in_end);
- /* Done outputting uncompressed block(s) */
- goto out;
}
/* Output the literals and matches for a dynamic or static block. */
@@ -1995,13 +2024,12 @@ deflate_flush_block(struct libdeflate_compressor *c,
out:
ASSERT(bitcount <= 7);
/*
- * Assert that the block cost was computed correctly, as
+ * Assert that the block cost was computed correctly. This is relied on
+ * above for the bounds check on the output buffer. Also,
* libdeflate_deflate_compress_bound() relies on this via the assumption
- * that uncompressed blocks will always be used when cheaper.
+ * that uncompressed blocks will always be used when cheapest.
*/
- ASSERT(8 * (out_next - os->next) + bitcount - os->bitcount ==
- 3 + best_cost || out_next == out_end);
-
+ ASSERT(8 * (out_next - os->next) + bitcount - os->bitcount == best_cost);
os->bitbuf = bitbuf;
os->bitcount = bitcount;
os->next = out_next;
@@ -2304,6 +2332,13 @@ calculate_min_match_len(const u8 *data, size_t data_len,
unsigned num_used_literals = 0;
size_t i;
+ /*
+ * For very short inputs, the static Huffman code has a good chance of
+ * being best, in which case there is no reason to avoid short matches.
+ */
+ if (data_len < 512)
+ return DEFLATE_MIN_MATCH_LEN;
+
/*
* For an initial approximation, scan the first 4 KiB of data. The
* caller may use recalculate_min_match_len() to update min_len later.
@@ -2483,7 +2518,7 @@ deflate_compress_fastest(struct libdeflate_compressor * restrict c,
deflate_finish_block(c, os, in_block_begin,
in_next - in_block_begin,
c->p.f.sequences, in_next == in_end);
- } while (in_next != in_end);
+ } while (in_next != in_end && !os->overflow);
}
/*
@@ -2562,7 +2597,7 @@ deflate_compress_greedy(struct libdeflate_compressor * restrict c,
deflate_finish_block(c, os, in_block_begin,
in_next - in_block_begin,
c->p.g.sequences, in_next == in_end);
- } while (in_next != in_end);
+ } while (in_next != in_end && !os->overflow);
}
static forceinline void
@@ -2768,7 +2803,7 @@ have_cur_match:
deflate_finish_block(c, os, in_block_begin,
in_next - in_block_begin,
c->p.g.sequences, in_next == in_end);
- } while (in_next != in_end);
+ } while (in_next != in_end && !os->overflow);
}
/*
@@ -3389,6 +3424,7 @@ deflate_optimize_and_flush_block(struct libdeflate_compressor *c,
u32 best_true_cost = UINT32_MAX;
u32 true_cost;
u32 only_lits_cost;
+ u32 static_cost = UINT32_MAX;
struct deflate_sequence seq_;
struct deflate_sequence *seq = NULL;
u32 i;
@@ -3410,6 +3446,24 @@ deflate_optimize_and_flush_block(struct libdeflate_compressor *c,
ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
+ /*
+ * Sometimes a static Huffman block ends up being cheapest, particularly
+ * if the block is small. So, if the block is sufficiently small, find
+ * the optimal static block solution and remember its cost.
+ */
+ if (block_length <= c->p.n.max_len_to_optimize_static_block) {
+ /* Save c->p.n.costs temporarily. */
+ c->p.n.costs_saved = c->p.n.costs;
+
+ deflate_set_costs_from_codes(c, &c->static_codes.lens);
+ deflate_find_min_cost_path(c, block_length, cache_ptr);
+ static_cost = c->p.n.optimum_nodes[0].cost_to_end / BIT_COST;
+ static_cost += 7; /* for the end-of-block symbol */
+
+ /* Restore c->p.n.costs. */
+ c->p.n.costs = c->p.n.costs_saved;
+ }
+
/* Initialize c->p.n.costs with default costs. */
deflate_set_initial_costs(c, block_begin, block_length, is_first_block);
@@ -3437,7 +3491,9 @@ deflate_optimize_and_flush_block(struct libdeflate_compressor *c,
break;
best_true_cost = true_cost;
- c->p.n.costs_producing_best_true_cost = c->p.n.costs;
+
+ /* Save the cost model that gave 'best_true_cost'. */
+ c->p.n.costs_saved = c->p.n.costs;
/* Update the cost model from the Huffman codes. */
deflate_set_costs_from_codes(c, &c->codes.lens);
@@ -3445,20 +3501,26 @@ deflate_optimize_and_flush_block(struct libdeflate_compressor *c,
} while (--num_passes_remaining);
*used_only_literals = false;
- if (only_lits_cost < best_true_cost) {
- /* Using only literals ended up being best! */
- deflate_choose_all_literals(c, block_begin, block_length);
- deflate_set_costs_from_codes(c, &c->codes.lens);
- seq_.litrunlen_and_length = block_length;
- seq = &seq_;
- *used_only_literals = true;
+ if (MIN(only_lits_cost, static_cost) < best_true_cost) {
+ if (only_lits_cost < static_cost) {
+ /* Using only literals ended up being best! */
+ deflate_choose_all_literals(c, block_begin, block_length);
+ deflate_set_costs_from_codes(c, &c->codes.lens);
+ seq_.litrunlen_and_length = block_length;
+ seq = &seq_;
+ *used_only_literals = true;
+ } else {
+ /* Static block ended up being best! */
+ deflate_set_costs_from_codes(c, &c->static_codes.lens);
+ deflate_find_min_cost_path(c, block_length, cache_ptr);
+ }
} else if (true_cost >=
best_true_cost + c->p.n.min_bits_to_use_nonfinal_path) {
/*
* The best solution was actually from a non-final optimization
* pass, so recover and use the min-cost path from that pass.
*/
- c->p.n.costs = c->p.n.costs_producing_best_true_cost;
+ c->p.n.costs = c->p.n.costs_saved;
deflate_find_min_cost_path(c, block_length, cache_ptr);
deflate_set_costs_from_codes(c, &c->codes.lens);
}
@@ -3782,7 +3844,7 @@ deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
deflate_near_optimal_init_stats(c);
in_block_begin = in_next;
}
- } while (in_next != in_end);
+ } while (in_next != in_end && !os->overflow);
}
/* Initialize c->p.n.offset_slot_full. */
@@ -3807,13 +3869,21 @@ deflate_init_offset_slot_full(struct libdeflate_compressor *c)
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
LIBDEFLATEAPI struct libdeflate_compressor *
-libdeflate_alloc_compressor(int compression_level)
+libdeflate_alloc_compressor_ex(int compression_level,
+ const struct libdeflate_options *options)
{
struct libdeflate_compressor *c;
size_t size = offsetof(struct libdeflate_compressor, p);
check_buildtime_parameters();
+ /*
+ * Note: if more fields are added to libdeflate_options, this code will
+ * need to be updated to support both the old and new structs.
+ */
+ if (options->sizeof_options != sizeof(*options))
+ return NULL;
+
if (compression_level < 0 || compression_level > 12)
return NULL;
@@ -3829,9 +3899,14 @@ libdeflate_alloc_compressor(int compression_level)
size += sizeof(c->p.f);
}
- c = libdeflate_aligned_malloc(MATCHFINDER_MEM_ALIGNMENT, size);
+ c = libdeflate_aligned_malloc(options->malloc_func ?
+ options->malloc_func :
+ libdeflate_default_malloc_func,
+ MATCHFINDER_MEM_ALIGNMENT, size);
if (!c)
return NULL;
+ c->free_func = options->free_func ?
+ options->free_func : libdeflate_default_free_func;
c->compression_level = compression_level;
@@ -3902,6 +3977,7 @@ libdeflate_alloc_compressor(int compression_level)
c->p.n.max_optim_passes = 2;
c->p.n.min_improvement_to_continue = 32;
c->p.n.min_bits_to_use_nonfinal_path = 32;
+ c->p.n.max_len_to_optimize_static_block = 0;
deflate_init_offset_slot_full(c);
break;
case 11:
@@ -3911,6 +3987,7 @@ libdeflate_alloc_compressor(int compression_level)
c->p.n.max_optim_passes = 4;
c->p.n.min_improvement_to_continue = 16;
c->p.n.min_bits_to_use_nonfinal_path = 16;
+ c->p.n.max_len_to_optimize_static_block = 1000;
deflate_init_offset_slot_full(c);
break;
case 12:
@@ -3921,6 +3998,7 @@ libdeflate_alloc_compressor(int compression_level)
c->p.n.max_optim_passes = 10;
c->p.n.min_improvement_to_continue = 1;
c->p.n.min_bits_to_use_nonfinal_path = 1;
+ c->p.n.max_len_to_optimize_static_block = 10000;
deflate_init_offset_slot_full(c);
break;
#endif /* SUPPORT_NEAR_OPTIMAL_PARSING */
@@ -3931,6 +4009,16 @@ libdeflate_alloc_compressor(int compression_level)
return c;
}
+
+LIBDEFLATEAPI struct libdeflate_compressor *
+libdeflate_alloc_compressor(int compression_level)
+{
+ static const struct libdeflate_options defaults = {
+ .sizeof_options = sizeof(defaults),
+ };
+ return libdeflate_alloc_compressor_ex(compression_level, &defaults);
+}
+
LIBDEFLATEAPI size_t
libdeflate_deflate_compress(struct libdeflate_compressor *c,
const void *in, size_t in_nbytes,
@@ -3946,38 +4034,40 @@ libdeflate_deflate_compress(struct libdeflate_compressor *c,
return deflate_compress_none(in, in_nbytes,
out, out_nbytes_avail);
- /*
- * Initialize the output bitstream structure.
- *
- * The end is set to OUTPUT_END_PADDING below the true end, so that
- * FLUSH_BITS() can be more efficient.
- */
- if (unlikely(out_nbytes_avail <= OUTPUT_END_PADDING))
- return 0;
+ /* Initialize the output bitstream structure. */
os.bitbuf = 0;
os.bitcount = 0;
os.next = out;
- os.end = os.next + out_nbytes_avail - OUTPUT_END_PADDING;
+ os.end = os.next + out_nbytes_avail;
+ os.overflow = false;
+
+ /* Call the actual compression function. */
(*c->impl)(c, in, in_nbytes, &os);
+
+ /* Return 0 if the output buffer is too small. */
+ if (os.overflow)
+ return 0;
+
/*
- * If 'os.next' reached 'os.end', then either there was not enough space
- * in the output buffer, or the compressed size would have been within
- * OUTPUT_END_PADDING of the true end. For performance reasons we don't
- * distinguish between these cases; we just make sure to return some
- * extra space from libdeflate_deflate_compress_bound().
+ * Write the final byte if needed. This can't overflow the output
+ * buffer because deflate_flush_block() would have set the overflow flag
+ * if there wasn't enough space remaining for the full final block.
*/
- if (os.next >= os.end)
- return 0;
ASSERT(os.bitcount <= 7);
- if (os.bitcount)
+ if (os.bitcount) {
+ ASSERT(os.next < os.end);
*os.next++ = os.bitbuf;
+ }
+
+ /* Return the compressed size in bytes. */
return os.next - (u8 *)out;
}
LIBDEFLATEAPI void
libdeflate_free_compressor(struct libdeflate_compressor *c)
{
- libdeflate_aligned_free(c);
+ if (c)
+ libdeflate_aligned_free(c->free_func, c);
}
unsigned int
@@ -3990,7 +4080,6 @@ LIBDEFLATEAPI size_t
libdeflate_deflate_compress_bound(struct libdeflate_compressor *c,
size_t in_nbytes)
{
- size_t bound = 0;
size_t max_blocks;
/*
@@ -4007,10 +4096,12 @@ libdeflate_deflate_compress_bound(struct libdeflate_compressor *c,
*/
/*
- * The minimum length that is passed to deflate_flush_block() is
- * MIN_BLOCK_LENGTH bytes, except for the final block if needed.
+ * Calculate the maximum number of uncompressed blocks that the
+ * compressor can use for 'in_nbytes' of data.
*
- * If deflate_flush_block() decides to use an uncompressed block, it
+ * The minimum length that is passed to deflate_flush_block() is
+ * MIN_BLOCK_LENGTH bytes, except for the final block if needed. If
+ * deflate_flush_block() decides to use an uncompressed block, it
* actually will (in general) output a series of uncompressed blocks in
* order to stay within the UINT16_MAX limit of DEFLATE. But this can
* be disregarded here as long as '2 * MIN_BLOCK_LENGTH <= UINT16_MAX',
@@ -4029,20 +4120,8 @@ libdeflate_deflate_compress_bound(struct libdeflate_compressor *c,
* BTYPE, LEN, and NLEN fields. (For the reason explained earlier, the
* alignment bits at the very start of the block can be disregarded;
* they would otherwise increase the overhead to 6 bytes per block.)
+ * Therefore, the maximum number of overhead bytes is '5 * max_blocks'.
+ * To get the final bound, add the number of uncompressed bytes.
*/
- bound += 5 * max_blocks;
-
- /* Account for the data itself, stored uncompressed. */
- bound += in_nbytes;
-
- /*
- * Add 1 + OUTPUT_END_PADDING because for performance reasons, the
- * compressor doesn't distinguish between cases where there wasn't
- * enough space and cases where the compressed size would have been
- * 'out_nbytes_avail - OUTPUT_END_PADDING' or greater. Adding
- * 1 + OUTPUT_END_PADDING to the bound ensures the needed wiggle room.
- */
- bound += 1 + OUTPUT_END_PADDING;
-
- return bound;
+ return (5 * max_blocks) + in_nbytes;
}
=====================================
lib/deflate_decompress.c
=====================================
@@ -671,6 +671,9 @@ struct libdeflate_decompressor {
bool static_codes_loaded;
unsigned litlen_tablebits;
+
+ /* The free() function for this struct, chosen at allocation time */
+ free_func_t free_func;
};
/*
@@ -802,38 +805,48 @@ build_decode_table(u32 decode_table[],
u32 entry;
unsigned i;
+ /*
+ * The DEFLATE RFC explicitly allows the offset code to be
+ * incomplete in two cases: a code containing just 1 codeword,
+ * if that codeword has length 1; and a code containing no
+ * codewords. Note: the list of offset codeword lengths is
+ * always nonempty, but lengths of 0 don't count as codewords.
+ *
+ * The RFC doesn't say whether the same cases are allowed for
+ * the litlen and pre codes. It's actually impossible for no
+ * symbols to be used from these codes; however, it's
+ * technically possible for only one symbol to be used. zlib
+ * allows 1 codeword for the litlen code, but not the precode.
+ * The RFC also doesn't say whether, when there is 1 codeword,
+ * that codeword is '0' or '1'. zlib uses '0'.
+ *
+ * We accept what zlib accepts, plus a bit more. First, we
+ * don't treat the precode more strictly than the litlen and
+ * offset codes. There's no convincing reason to add a special
+ * case for the precode here.
+ *
+ * Second, we just map each allowed incompete code to a complete
+ * code with only real symbols. To do this, we choose a symbol,
+ * either the used symbol (for codes with 1 codeword) or an
+ * arbitrary symbol (for empty codes), and give it both
+ * codewords '0' and '1'. zlib instead uses a special ERROR
+ * symbol in the part of the codespace the code doesn't use.
+ * However, having an ERROR symbol reduces the performance of
+ * the Huffman decoder, for no real benefit. Our approach also
+ * avoids having to decide whether '0' or '1' is correct.
+ *
+ * Like zlib, we still reject all incomplete codes that contain
+ * more than 1 codeword or a codeword length greater than 1.
+ */
if (codespace_used == 0) {
- /*
- * An empty code is allowed. This can happen for the
- * offset code in DEFLATE, since a dynamic Huffman block
- * need not contain any matches.
- */
-
- /* sym=0, len=1 (arbitrary) */
- entry = make_decode_table_entry(decode_results, 0, 1);
+ sym = 0; /* arbitrary */
} else {
- /*
- * Allow codes with a single used symbol, with codeword
- * length 1. The DEFLATE RFC is unclear regarding this
- * case. What zlib's decompressor does is permit this
- * for the litlen and offset codes and assume the
- * codeword is '0' rather than '1'. We do the same
- * except we allow this for precodes too, since there's
- * no convincing reason to treat the codes differently.
- * We also assign both codewords '0' and '1' to the
- * symbol to avoid having to handle '1' specially.
- */
if (codespace_used != (1U << (max_codeword_len - 1)) ||
len_counts[1] != 1)
return false;
- entry = make_decode_table_entry(decode_results,
- *sorted_syms, 1);
+ sym = sorted_syms[0];
}
- /*
- * Note: the decode table still must be fully initialized, in
- * case the stream is malformed and contains bits from the part
- * of the codespace the incomplete code doesn't use.
- */
+ entry = make_decode_table_entry(decode_results, sym, 1);
for (i = 0; i < (1U << table_bits); i++)
decode_table[i] = entry;
return true;
@@ -1140,8 +1153,21 @@ libdeflate_deflate_decompress(struct libdeflate_decompressor *d,
}
LIBDEFLATEAPI struct libdeflate_decompressor *
-libdeflate_alloc_decompressor(void)
+libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options)
{
+ struct libdeflate_decompressor *d;
+
+ /*
+ * Note: if more fields are added to libdeflate_options, this code will
+ * need to be updated to support both the old and new structs.
+ */
+ if (options->sizeof_options != sizeof(*options))
+ return NULL;
+
+ d = (options->malloc_func ? options->malloc_func :
+ libdeflate_default_malloc_func)(sizeof(*d));
+ if (d == NULL)
+ return NULL;
/*
* Note that only certain parts of the decompressor actually must be
* initialized here:
@@ -1155,18 +1181,28 @@ libdeflate_alloc_decompressor(void)
* valgrind, since build_decode_table() is guaranteed to initialize
* all entries eventually anyway.)
*
+ * - 'free_func' must be set.
+ *
* But for simplicity, we currently just zero the whole decompressor.
*/
- struct libdeflate_decompressor *d = libdeflate_malloc(sizeof(*d));
-
- if (d == NULL)
- return NULL;
memset(d, 0, sizeof(*d));
+ d->free_func = options->free_func ?
+ options->free_func : libdeflate_default_free_func;
return d;
}
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor(void)
+{
+ static const struct libdeflate_options defaults = {
+ .sizeof_options = sizeof(defaults),
+ };
+ return libdeflate_alloc_decompressor_ex(&defaults);
+}
+
LIBDEFLATEAPI void
libdeflate_free_decompressor(struct libdeflate_decompressor *d)
{
- libdeflate_free(d);
+ if (d)
+ d->free_func(d);
}
=====================================
lib/lib_common.h
=====================================
@@ -39,11 +39,15 @@
#include "../common_defs.h"
-void *libdeflate_malloc(size_t size);
-void libdeflate_free(void *ptr);
+typedef void *(*malloc_func_t)(size_t);
+typedef void (*free_func_t)(void *);
-void *libdeflate_aligned_malloc(size_t alignment, size_t size);
-void libdeflate_aligned_free(void *ptr);
+extern malloc_func_t libdeflate_default_malloc_func;
+extern free_func_t libdeflate_default_free_func;
+
+void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
+ size_t alignment, size_t size);
+void libdeflate_aligned_free(free_func_t free_func, void *ptr);
#ifdef FREESTANDING
/*
=====================================
lib/utils.c
=====================================
@@ -34,27 +34,18 @@
# include <stdlib.h>
#endif
-static void *(*libdeflate_malloc_func)(size_t) = malloc;
-static void (*libdeflate_free_func)(void *) = free;
+malloc_func_t libdeflate_default_malloc_func = malloc;
+free_func_t libdeflate_default_free_func = free;
void *
-libdeflate_malloc(size_t size)
+libdeflate_aligned_malloc(malloc_func_t malloc_func,
+ size_t alignment, size_t size)
{
- return (*libdeflate_malloc_func)(size);
-}
-
-void
-libdeflate_free(void *ptr)
-{
- (*libdeflate_free_func)(ptr);
-}
+ void *ptr = (*malloc_func)(sizeof(void *) + alignment - 1 + size);
-void *
-libdeflate_aligned_malloc(size_t alignment, size_t size)
-{
- void *ptr = libdeflate_malloc(sizeof(void *) + alignment - 1 + size);
if (ptr) {
void *orig_ptr = ptr;
+
ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment);
((void **)ptr)[-1] = orig_ptr;
}
@@ -62,18 +53,17 @@ libdeflate_aligned_malloc(size_t alignment, size_t size)
}
void
-libdeflate_aligned_free(void *ptr)
+libdeflate_aligned_free(free_func_t free_func, void *ptr)
{
- if (ptr)
- libdeflate_free(((void **)ptr)[-1]);
+ (*free_func)(((void **)ptr)[-1]);
}
LIBDEFLATEAPI void
-libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
- void (*free_func)(void *))
+libdeflate_set_memory_allocator(malloc_func_t malloc_func,
+ free_func_t free_func)
{
- libdeflate_malloc_func = malloc_func;
- libdeflate_free_func = free_func;
+ libdeflate_default_malloc_func = malloc_func;
+ libdeflate_default_free_func = free_func;
}
/*
=====================================
lib/x86/cpu_features.c
=====================================
@@ -30,15 +30,17 @@
#if HAVE_DYNAMIC_X86_CPU_FEATURES
-/* With old GCC versions we have to manually save and restore the x86_32 PIC
- * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */
+/*
+ * With old GCC versions we have to manually save and restore the x86_32 PIC
+ * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
+ */
#if defined(ARCH_X86_32) && defined(__PIC__)
# define EBX_CONSTRAINT "=&r"
#else
# define EBX_CONSTRAINT "=b"
#endif
-/* Execute the CPUID instruction. */
+/* Execute the CPUID instruction. */
static inline void
cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
{
@@ -51,40 +53,37 @@ cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
*c = result[2];
*d = result[3];
#else
- __asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
- "cpuid \n"
- ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
- : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
- : "a" (leaf), "c" (subleaf));
+ __asm__ volatile(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n"
+ "cpuid \n"
+ ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n"
+ : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d)
+ : "a" (leaf), "c" (subleaf));
#endif
}
-/* Read an extended control register. */
+/* Read an extended control register. */
static inline u64
read_xcr(u32 index)
{
#ifdef _MSC_VER
return _xgetbv(index);
#else
- u32 edx, eax;
-
- /* Execute the "xgetbv" instruction. Old versions of binutils do not
- * recognize this instruction, so list the raw bytes instead. */
- __asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index));
-
- return ((u64)edx << 32) | eax;
+ u32 d, a;
+
+ /*
+ * Execute the "xgetbv" instruction. Old versions of binutils do not
+ * recognize this instruction, so list the raw bytes instead.
+ *
+ * This must be 'volatile' to prevent this code from being moved out
+ * from under the check for OSXSAVE.
+ */
+ __asm__ volatile(".byte 0x0f, 0x01, 0xd0" :
+ "=d" (d), "=a" (a) : "c" (index));
+
+ return ((u64)d << 32) | a;
#endif
}
-#undef BIT
-#define BIT(nr) (1UL << (nr))
-
-#define XCR0_BIT_SSE BIT(1)
-#define XCR0_BIT_AVX BIT(2)
-
-#define IS_SET(reg, nr) ((reg) & BIT(nr))
-#define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask))
-
static const struct cpu_feature x86_cpu_feature_table[] = {
{X86_CPU_FEATURE_SSE2, "sse2"},
{X86_CPU_FEATURE_PCLMUL, "pclmul"},
@@ -98,47 +97,34 @@ volatile u32 libdeflate_x86_cpu_features = 0;
/* Initialize libdeflate_x86_cpu_features. */
void libdeflate_init_x86_cpu_features(void)
{
+ u32 max_leaf, a, b, c, d;
+ u64 xcr0 = 0;
u32 features = 0;
- u32 dummy1, dummy2, dummy3, dummy4;
- u32 max_function;
- u32 features_1, features_2, features_3, features_4;
- bool os_avx_support = false;
-
- /* Get maximum supported function */
- cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
- if (max_function < 1)
- goto out;
- /* Standard feature flags */
- cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
+ /* EAX=0: Highest Function Parameter and Manufacturer ID */
+ cpuid(0, 0, &max_leaf, &b, &c, &d);
+ if (max_leaf < 1)
+ goto out;
- if (IS_SET(features_1, 26))
+ /* EAX=1: Processor Info and Feature Bits */
+ cpuid(1, 0, &a, &b, &c, &d);
+ if (d & (1 << 26))
features |= X86_CPU_FEATURE_SSE2;
-
- if (IS_SET(features_2, 1))
+ if (c & (1 << 1))
features |= X86_CPU_FEATURE_PCLMUL;
-
- if (IS_SET(features_2, 27)) { /* OSXSAVE set? */
- u64 xcr0 = read_xcr(0);
-
- os_avx_support = IS_ALL_SET(xcr0,
- XCR0_BIT_SSE |
- XCR0_BIT_AVX);
- }
-
- if (os_avx_support && IS_SET(features_2, 28))
+ if (c & (1 << 27))
+ xcr0 = read_xcr(0);
+ if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
features |= X86_CPU_FEATURE_AVX;
- if (max_function < 7)
+ if (max_leaf < 7)
goto out;
- /* Extended feature flags */
- cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);
-
- if (os_avx_support && IS_SET(features_3, 5))
+ /* EAX=7, ECX=0: Extended Features */
+ cpuid(7, 0, &a, &b, &c, &d);
+ if ((b & (1 << 5)) && ((xcr0 & 0x6) == 0x6))
features |= X86_CPU_FEATURE_AVX2;
-
- if (IS_SET(features_3, 8))
+ if (b & (1 << 8))
features |= X86_CPU_FEATURE_BMI2;
out:
=====================================
lib/x86/cpu_features.h
=====================================
@@ -145,6 +145,16 @@ static inline u32 get_x86_cpu_features(void) { return 0; }
#else
# define HAVE_BMI2_INTRIN 0
#endif
+/*
+ * MSVC from VS2017 (toolset v141) apparently miscompiles the _bzhi_*()
+ * intrinsics. It seems to be fixed in VS2022.
+ */
+#if defined(_MSC_VER) && _MSC_VER < 1930 /* older than VS2022 (toolset v143) */
+# undef HAVE_BMI2_NATIVE
+# undef HAVE_BMI2_INTRIN
+# define HAVE_BMI2_NATIVE 0
+# define HAVE_BMI2_INTRIN 0
+#endif
#endif /* ARCH_X86_32 || ARCH_X86_64 */
=====================================
libdeflate.h
=====================================
@@ -13,8 +13,8 @@ extern "C" {
#endif
#define LIBDEFLATE_VERSION_MAJOR 1
-#define LIBDEFLATE_VERSION_MINOR 18
-#define LIBDEFLATE_VERSION_STRING "1.18"
+#define LIBDEFLATE_VERSION_MINOR 19
+#define LIBDEFLATE_VERSION_STRING "1.19"
/*
* Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause
@@ -35,6 +35,7 @@ extern "C" {
/* ========================================================================== */
struct libdeflate_compressor;
+struct libdeflate_options;
/*
* libdeflate_alloc_compressor() allocates a new compressor that supports
@@ -57,12 +58,19 @@ struct libdeflate_compressor;
LIBDEFLATEAPI struct libdeflate_compressor *
libdeflate_alloc_compressor(int compression_level);
+/*
+ * Like libdeflate_alloc_compressor(), but adds the 'options' argument.
+ */
+LIBDEFLATEAPI struct libdeflate_compressor *
+libdeflate_alloc_compressor_ex(int compression_level,
+ const struct libdeflate_options *options);
+
/*
* libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of
* data. It attempts to compress 'in_nbytes' bytes of data located at 'in' and
* write the result to 'out', which has space for 'out_nbytes_avail' bytes. The
* return value is the compressed size in bytes, or 0 if the data could not be
- * compressed to 'out_nbytes_avail' bytes or fewer (but see note below).
+ * compressed to 'out_nbytes_avail' bytes or fewer.
*
* If compression is successful, then the output data is guaranteed to be a
* valid DEFLATE stream that decompresses to the input data. No other
@@ -72,22 +80,6 @@ libdeflate_alloc_compressor(int compression_level);
* writing tests that compare compressed data to a golden output, as this can
* break when libdeflate is updated. (This property isn't specific to
* libdeflate; the same is true for zlib and other compression libraries too.)
- *
- * Note: due to a performance optimization, libdeflate_deflate_compress()
- * currently needs a small amount of slack space at the end of the output
- * buffer. As a result, it can't actually report compressed sizes very close to
- * 'out_nbytes_avail'. This doesn't matter in real-world use cases, and
- * libdeflate_deflate_compress_bound() already includes the slack space.
- * However, it does mean that testing code that redundantly compresses data
- * using an exact-sized output buffer won't work as might be expected:
- *
- * out_nbytes = libdeflate_deflate_compress(c, in, in_nbytes, out,
- * libdeflate_deflate_compress_bound(in_nbytes));
- * // The following assertion will fail.
- * assert(libdeflate_deflate_compress(c, in, in_nbytes, out, out_nbytes) != 0);
- *
- * To avoid this, either don't write tests like the above, or make sure to
- * include at least 9 bytes of slack space in 'out_nbytes_avail'.
*/
LIBDEFLATEAPI size_t
libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
@@ -171,6 +163,7 @@ libdeflate_free_compressor(struct libdeflate_compressor *compressor);
/* ========================================================================== */
struct libdeflate_decompressor;
+struct libdeflate_options;
/*
* libdeflate_alloc_decompressor() allocates a new decompressor that can be used
@@ -187,6 +180,12 @@ struct libdeflate_decompressor;
LIBDEFLATEAPI struct libdeflate_decompressor *
libdeflate_alloc_decompressor(void);
+/*
+ * Like libdeflate_alloc_decompressor(), but adds the 'options' argument.
+ */
+LIBDEFLATEAPI struct libdeflate_decompressor *
+libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
+
/*
* Result of a call to libdeflate_deflate_decompress(),
* libdeflate_zlib_decompress(), or libdeflate_gzip_decompress().
@@ -351,16 +350,60 @@ libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
/*
* Install a custom memory allocator which libdeflate will use for all memory
- * allocations. 'malloc_func' is a function that must behave like malloc(), and
- * 'free_func' is a function that must behave like free().
+ * allocations by default. 'malloc_func' is a function that must behave like
+ * malloc(), and 'free_func' is a function that must behave like free().
+ *
+ * The per-(de)compressor custom memory allocator that can be specified in
+ * 'struct libdeflate_options' takes priority over this.
*
- * There must not be any libdeflate_compressor or libdeflate_decompressor
- * structures in existence when calling this function.
+ * This doesn't affect the free() function that will be used to free
+ * (de)compressors that were already in existence when this is called.
*/
LIBDEFLATEAPI void
libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
void (*free_func)(void *));
+/*
+ * Advanced options. This is the options structure that
+ * libdeflate_alloc_compressor_ex() and libdeflate_alloc_decompressor_ex()
+ * require. Most users won't need this and should just use the non-"_ex"
+ * functions instead. If you do need this, it should be initialized like this:
+ *
+ * struct libdeflate_options options;
+ *
+ * memset(&options, 0, sizeof(options));
+ * options.sizeof_options = sizeof(options);
+ * // Then set the fields that you need to override the defaults for.
+ */
+struct libdeflate_options {
+
+ /*
+ * This field must be set to the struct size. This field exists for
+ * extensibility, so that fields can be appended to this struct in
+ * future versions of libdeflate while still supporting old binaries.
+ */
+ size_t sizeof_options;
+
+ /*
+ * An optional custom memory allocator to use for this (de)compressor.
+ * 'malloc_func' must be a function that behaves like malloc(), and
+ * 'free_func' must be a function that behaves like free().
+ *
+ * This is useful in cases where a process might have multiple users of
+ * libdeflate who want to use different memory allocators. For example,
+ * a library might want to use libdeflate with a custom memory allocator
+ * without interfering with user code that might use libdeflate too.
+ *
+ * This takes priority over the "global" memory allocator (which by
+ * default is malloc() and free(), but can be changed by
+ * libdeflate_set_memory_allocator()). Moreover, libdeflate will never
+ * call the "global" memory allocator if a per-(de)compressor custom
+ * allocator is always given.
+ */
+ void *(*malloc_func)(size_t);
+ void (*free_func)(void *);
+};
+
#ifdef __cplusplus
}
#endif
=====================================
programs/test_custom_malloc.c
=====================================
@@ -1,7 +1,7 @@
/*
* test_custom_malloc.c
*
- * Test libdeflate_set_memory_allocator().
+ * Test the support for custom memory allocators.
* Also test injecting allocation failures.
*/
@@ -28,24 +28,34 @@ static void do_free(void *ptr)
free(ptr);
}
-int
-tmain(int argc, tchar *argv[])
+static void reset_state(void)
+{
+ libdeflate_set_memory_allocator(malloc, free);
+ malloc_count = 0;
+ free_count = 0;
+}
+
+/* Test that the custom allocator is actually used when requested. */
+static void do_custom_memalloc_test(bool global)
{
+ static const struct libdeflate_options options = {
+ .sizeof_options = sizeof(options),
+ .malloc_func = do_malloc,
+ .free_func = do_free,
+ };
int level;
struct libdeflate_compressor *c;
struct libdeflate_decompressor *d;
- begin_program(argv);
-
- /* Test that the custom allocator is actually used when requested. */
-
- libdeflate_set_memory_allocator(do_malloc, do_free);
- ASSERT(malloc_count == 0);
- ASSERT(free_count == 0);
+ if (global)
+ libdeflate_set_memory_allocator(do_malloc, do_free);
for (level = 0; level <= 12; level++) {
malloc_count = free_count = 0;
- c = libdeflate_alloc_compressor(level);
+ if (global)
+ c = libdeflate_alloc_compressor(level);
+ else
+ c = libdeflate_alloc_compressor_ex(level, &options);
ASSERT(c != NULL);
ASSERT(malloc_count == 1);
ASSERT(free_count == 0);
@@ -55,7 +65,10 @@ tmain(int argc, tchar *argv[])
}
malloc_count = free_count = 0;
- d = libdeflate_alloc_decompressor();
+ if (global)
+ d = libdeflate_alloc_decompressor();
+ else
+ d = libdeflate_alloc_decompressor_ex(&options);
ASSERT(d != NULL);
ASSERT(malloc_count == 1);
ASSERT(free_count == 0);
@@ -63,7 +76,52 @@ tmain(int argc, tchar *argv[])
ASSERT(malloc_count == 1);
ASSERT(free_count == 1);
- /* As long as we're here, also test injecting allocation failures. */
+ reset_state();
+}
+
+#define offsetofend(type, field) \
+ (offsetof(type, field) + sizeof(((type *)NULL)->field))
+
+/* Test some edge cases involving libdeflate_options. */
+static void do_options_test(void)
+{
+ struct libdeflate_options options = { 0 };
+ struct libdeflate_compressor *c;
+ struct libdeflate_decompressor *d;
+ /* Size in libdeflate v1.19 */
+ size_t min_size = offsetofend(struct libdeflate_options, free_func);
+
+ /* sizeof_options must be at least the minimum size. */
+ for (; options.sizeof_options < min_size;
+ options.sizeof_options++) {
+ c = libdeflate_alloc_compressor_ex(6, &options);
+ ASSERT(c == NULL);
+ d = libdeflate_alloc_decompressor_ex(&options);
+ ASSERT(d == NULL);
+ }
+
+ /* NULL malloc_func and free_func means "use the global allocator". */
+ options.sizeof_options = min_size;
+ malloc_count = free_count = 0;
+ libdeflate_set_memory_allocator(do_malloc, do_free);
+ c = libdeflate_alloc_compressor_ex(6, &options);
+ libdeflate_free_compressor(c);
+ ASSERT(malloc_count == 1);
+ ASSERT(free_count == 1);
+ d = libdeflate_alloc_decompressor_ex(&options);
+ libdeflate_free_decompressor(d);
+ ASSERT(malloc_count == 2);
+ ASSERT(free_count == 2);
+
+ reset_state();
+}
+
+/* Test injecting memory allocation failures. */
+static void do_fault_injection_test(void)
+{
+ int level;
+ struct libdeflate_compressor *c;
+ struct libdeflate_decompressor *d;
libdeflate_set_memory_allocator(do_fail_malloc, do_free);
@@ -81,5 +139,17 @@ tmain(int argc, tchar *argv[])
ASSERT(malloc_count == 1);
ASSERT(free_count == 0);
+ reset_state();
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ begin_program(argv);
+
+ do_custom_memalloc_test(true);
+ do_custom_memalloc_test(false);
+ do_options_test();
+ do_fault_injection_test();
return 0;
}
=====================================
scripts/gen_offset_slot_map.py
=====================================
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
#
-# This script generates the deflate_offset_slot[] array, which is a condensed
-# map from offsets to offset slots.
+# This script generates the deflate_offset_slot[] array, which maps
+# 'offset - 1 => offset_slot' for offset <= 256.
DEFLATE_OFFSET_SLOT_BASE = [
1 , 2 , 3 , 4 , 5 , 7 , 9 , 13 ,
@@ -10,26 +10,14 @@ DEFLATE_OFFSET_SLOT_BASE = [
4097 , 6145 , 8193 , 12289 , 16385 , 24577 ,
]
-DEFLATE_EXTRA_OFFSET_BITS = [
- 0 , 0 , 0 , 0 , 1 , 1 , 2 , 2 ,
- 3 , 3 , 4 , 4 , 5 , 5 , 6 , 6 ,
- 7 , 7 , 8 , 8 , 9 , 9 , 10 , 10 ,
- 11 , 11 , 12 , 12 , 13 , 13 ,
-]
-
-offset_slot_map = [0] * 512
-
-for offset_slot, offset_base in enumerate(DEFLATE_OFFSET_SLOT_BASE):
- num_extra_bits = DEFLATE_EXTRA_OFFSET_BITS[offset_slot]
- offset_end = offset_base + (1 << num_extra_bits)
- if offset_base <= 256:
- for offset in range(offset_base, offset_end):
- offset_slot_map[offset] = offset_slot
- else:
- for offset in range(offset_base, offset_end, 128):
- offset_slot_map[256 + ((offset - 1) >> 7)] = offset_slot
+offset_slot_map = [0] * 256
+offset_slot = -1
+for offset in range(1, len(offset_slot_map) + 1):
+ if offset >= DEFLATE_OFFSET_SLOT_BASE[offset_slot + 1]:
+ offset_slot += 1
+ offset_slot_map[offset - 1] = offset_slot
-print('static const u8 deflate_offset_slot_map[512] = {')
+print(f'static const u8 deflate_offset_slot[{len(offset_slot_map)}] = {{')
for i in range(0, len(offset_slot_map), 16):
print('\t', end='')
for j, v in enumerate(offset_slot_map[i:i+16]):
View it on GitLab: https://salsa.debian.org/med-team/libdeflate/-/compare/e4e6f5dc36e41bf5be5d7c44939adf05dcd910c5...35cb04cecf134709f30d7f1fc93c77d3416db62e
--
View it on GitLab: https://salsa.debian.org/med-team/libdeflate/-/compare/e4e6f5dc36e41bf5be5d7c44939adf05dcd910c5...35cb04cecf134709f30d7f1fc93c77d3416db62e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240129/8168a5f7/attachment-0001.htm>
More information about the debian-med-commit
mailing list