[med-svn] [Git][med-team/libdeflate][upstream] New upstream version 1.10
Andreas Tille (@tille)
gitlab at salsa.debian.org
Sun Feb 20 07:34:11 GMT 2022
Andreas Tille pushed to branch upstream at Debian Med / libdeflate
Commits:
fcefb483 by Andreas Tille at 2022-02-20T08:30:47+01:00
New upstream version 1.10
- - - - -
7 changed files:
- NEWS.md
- lib/decompress_template.h
- lib/deflate_compress.c
- lib/deflate_decompress.c
- libdeflate.h
- programs/gzip.c
- + programs/test_overread.c
Changes:
=====================================
NEWS.md
=====================================
@@ -1,5 +1,19 @@
# libdeflate release notes
+## Version 1.10
+
+* Added an additional check to the decompressor to make it quickly detect
+ certain bad inputs and not try to generate an unbounded amount of output.
+
+ Note: this was only a problem when decompressing with an unknown output size,
+ which isn't the recommended use case of libdeflate. However,
+ `libdeflate-gunzip` has to do this, and it would run out of memory as it would
+ keep trying to allocate a larger output buffer.
+
+* Fixed a build error on Solaris.
+
+* Cleaned up a few things in the compression code.
+
## Version 1.9
* Made many improvements to the compression algorithms, and rebalanced the
@@ -9,8 +23,8 @@
ratio on data where short matches aren't useful, such as DNA sequencing
data. This applies to all compression levels, but primarily to levels 1-9.
- * Levels 1 was made much faster, though it often compresses slightly worse
- than before (but still better than zlib).
+ * Level 1 was made much faster, though it often compresses slightly worse than
+ before (but still better than zlib).
* Levels 8-9 were also made faster, though they often compress slightly worse
than before (but still better than zlib). On some data, levels 8-9 are much
=====================================
lib/decompress_template.h
=====================================
@@ -43,7 +43,7 @@ FUNCNAME(struct libdeflate_decompressor * restrict d,
const u8 * const in_end = in_next + in_nbytes;
bitbuf_t bitbuf = 0;
unsigned bitsleft = 0;
- size_t overrun_count = 0;
+ size_t overread_count = 0;
unsigned i;
unsigned is_final_block;
unsigned block_type;
=====================================
lib/deflate_compress.c
=====================================
@@ -52,10 +52,10 @@
/*
* This is the minimum block length that the compressor will use, in
- * uncompressed bytes. It is also the amount by which the final block is
- * allowed to grow past the soft maximum length in order to avoid using a very
- * short block at the end. This should be a value below which using shorter
- * blocks is unlikely to be worthwhile, due to the per-block overhead.
+ * uncompressed bytes. It is also approximately the amount by which the final
+ * block is allowed to grow past the soft maximum length in order to avoid using
+ * a very short block at the end. This should be a value below which using
+ * shorter blocks is unlikely to be worthwhile, due to the per-block overhead.
*
* Defining a fixed minimum block length is needed in order to guarantee a
* reasonable upper bound on the compressed size. It's also needed because our
@@ -94,8 +94,8 @@
* For deflate_compress_fastest(): This is the soft maximum block length.
* deflate_compress_fastest() doesn't use the regular block splitting algorithm;
* it only ends blocks when they reach FAST_SOFT_MAX_BLOCK_LENGTH bytes or
- * FAST_SEQ_STORE_LENGTH - 1 matches. Therefore, this value should be lower
- * than the regular SOFT_MAX_BLOCK_LENGTH.
+ * FAST_SEQ_STORE_LENGTH matches. Therefore, this value should be lower than
+ * the regular SOFT_MAX_BLOCK_LENGTH.
*/
#define FAST_SOFT_MAX_BLOCK_LENGTH 65535
@@ -490,7 +490,7 @@ struct libdeflate_compressor {
/* Frequency counters for the current block */
struct deflate_freqs freqs;
- /* Block split statistics for the currently pending block */
+ /* Block split statistics for the current block */
struct block_split_stats split_stats;
/* Dynamic Huffman codes for the current block */
@@ -648,7 +648,7 @@ struct deflate_output_bitstream {
*/
u8 *next;
- /* Pointer just past the end of the output buffer */
+ /* Pointer to just past the end of the output buffer */
u8 *end;
};
@@ -664,8 +664,7 @@ struct deflate_output_bitstream {
#define OUTPUT_END_PADDING 8
/*
- * Initialize the output bitstream. 'size' is assumed to be at least
- * OUTPUT_END_PADDING.
+ * Initialize the output bitstream. 'size' must be at least OUTPUT_END_PADDING.
*/
static void
deflate_init_output(struct deflate_output_bitstream *os,
@@ -680,11 +679,12 @@ deflate_init_output(struct deflate_output_bitstream *os,
/*
* Add some bits to the bitbuffer variable of the output bitstream. The caller
- * must make sure there is enough room.
+ * must ensure that os->bitcount + num_bits <= BITBUF_NBITS, by calling
+ * deflate_flush_bits() frequently enough.
*/
static forceinline void
deflate_add_bits(struct deflate_output_bitstream *os,
- const bitbuf_t bits, const unsigned num_bits)
+ bitbuf_t bits, unsigned num_bits)
{
os->bitbuf |= bits << os->bitcount;
os->bitcount += num_bits;
@@ -712,6 +712,18 @@ deflate_flush_bits(struct deflate_output_bitstream *os)
}
}
+/*
+ * Add bits, then flush right away. Only use this where it is difficult to
+ * batch up calls to deflate_add_bits().
+ */
+static forceinline void
+deflate_write_bits(struct deflate_output_bitstream *os,
+ bitbuf_t bits, unsigned num_bits)
+{
+ deflate_add_bits(os, bits, num_bits);
+ deflate_flush_bits(os);
+}
+
/* Align the bitstream on a byte boundary. */
static forceinline void
deflate_align_bitstream(struct deflate_output_bitstream *os)
@@ -722,7 +734,8 @@ deflate_align_bitstream(struct deflate_output_bitstream *os)
/*
* Flush any remaining bits to the output buffer if needed. Return the total
- * number of bytes written to the output buffer, or 0 if an overflow occurred.
+ * number of bytes that have been written to the output buffer since
+ * deflate_init_output(), or 0 if an overflow occurred.
*/
static size_t
deflate_flush_output(struct deflate_output_bitstream *os)
@@ -743,7 +756,7 @@ deflate_flush_output(struct deflate_output_bitstream *os)
* Given the binary tree node A[subtree_idx] whose children already satisfy the
* maxheap property, swap the node with its greater child until it is greater
* than or equal to both of its children, so that the maxheap property is
- * satisfied in the subtree rooted at A[subtree_idx].
+ * satisfied in the subtree rooted at A[subtree_idx]. 'A' uses 1-based indices.
*/
static void
heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx)
@@ -805,7 +818,7 @@ heap_sort(u32 A[], unsigned length)
#define NUM_SYMBOL_BITS 10
#define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1)
-#define GET_NUM_COUNTERS(num_syms) ((((num_syms) + 3 / 4) + 3) & ~3)
+#define GET_NUM_COUNTERS(num_syms) (num_syms)
/*
* Sort the symbols primarily by frequency and secondarily by symbol value.
@@ -843,26 +856,13 @@ sort_symbols(unsigned num_syms, const u32 freqs[restrict],
unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)];
/*
- * We rely on heapsort, but with an added optimization. Since it's
- * common for most symbol frequencies to be low, we first do a count
- * sort using a limited number of counters. High frequencies will be
- * counted in the last counter, and only they will be sorted with
- * heapsort.
+ * We use heapsort, but with an added optimization. Since often most
+ * symbol frequencies are low, we first do a count sort using a limited
+ * number of counters. High frequencies are counted in the last
+ * counter, and only they will be sorted with heapsort.
*
* Note: with more symbols, it is generally beneficial to have more
- * counters. About 1 counter per 4 symbols seems fast.
- *
- * Note: I also tested radix sort, but even for large symbol counts (>
- * 255) and frequencies bounded at 16 bits (enabling radix sort by just
- * two base-256 digits), it didn't seem any faster than the method
- * implemented here.
- *
- * Note: I tested the optimized quicksort implementation from glibc
- * (with indirection overhead removed), but it was only marginally
- * faster than the simple heapsort implemented here.
- *
- * Tests were done with building the codes for LZX. Results may vary
- * for different compression algorithms...!
+ * counters. About 1 counter per symbol seems fastest.
*/
num_counters = GET_NUM_COUNTERS(num_syms);
@@ -909,7 +909,7 @@ sort_symbols(unsigned num_syms, const u32 freqs[restrict],
}
/*
- * Build the Huffman tree.
+ * Build a Huffman tree.
*
* This is an optimized implementation that
* (a) takes advantage of the frequencies being already sorted;
@@ -1103,6 +1103,33 @@ compute_length_counts(u32 A[restrict], unsigned root_idx,
}
}
+/* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */
+static u32
+reverse_codeword(u32 codeword, u8 len)
+{
+ /*
+ * The following branchless algorithm is faster than going bit by bit.
+ * Note: since no codewords are longer than 16 bits, we only need to
+ * reverse the low 16 bits of the 'u32'.
+ */
+ STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16);
+
+ /* Flip adjacent 1-bit fields. */
+ codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1);
+
+ /* Flip adjacent 2-bit fields. */
+ codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2);
+
+ /* Flip adjacent 4-bit fields. */
+ codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4);
+
+ /* Flip adjacent 8-bit fields. */
+ codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8);
+
+ /* Return the high 'len' bits of the bit-reversed 16 bit value. */
+ return codeword >> (16 - len);
+}
+
/*
* Generate the codewords for a canonical Huffman code.
*
@@ -1161,13 +1188,18 @@ gen_codewords(u32 A[restrict], u8 lens[restrict],
next_codewords[len] =
(next_codewords[len - 1] + len_counts[len - 1]) << 1;
- for (sym = 0; sym < num_syms; sym++)
- A[sym] = next_codewords[lens[sym]]++;
+ for (sym = 0; sym < num_syms; sym++) {
+ u8 len = lens[sym];
+ u32 codeword = next_codewords[len]++;
+
+ /* DEFLATE requires bit-reversed codewords. */
+ A[sym] = reverse_codeword(codeword, len);
+ }
}
/*
* ---------------------------------------------------------------------
- * make_canonical_huffman_code()
+ * deflate_make_huffman_code()
* ---------------------------------------------------------------------
*
* Given an alphabet and the frequency of each symbol in it, construct a
@@ -1266,9 +1298,9 @@ gen_codewords(u32 A[restrict], u8 lens[restrict],
* file: C/HuffEnc.c), which was placed in the public domain by Igor Pavlov.
*/
static void
-make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len,
- const u32 freqs[restrict],
- u8 lens[restrict], u32 codewords[restrict])
+deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
+ const u32 freqs[restrict],
+ u8 lens[restrict], u32 codewords[restrict])
{
u32 *A = codewords;
unsigned num_used_syms;
@@ -1352,53 +1384,11 @@ deflate_reset_symbol_frequencies(struct libdeflate_compressor *c)
memset(&c->freqs, 0, sizeof(c->freqs));
}
-/* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */
-static u32
-deflate_reverse_codeword(u32 codeword, u8 len)
-{
- /*
- * The following branchless algorithm is faster than going bit by bit.
- * Note: since no codewords are longer than 16 bits, we only need to
- * reverse the low 16 bits of the 'u32'.
- */
- STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16);
-
- /* Flip adjacent 1-bit fields. */
- codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1);
-
- /* Flip adjacent 2-bit fields. */
- codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2);
-
- /* Flip adjacent 4-bit fields. */
- codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4);
-
- /* Flip adjacent 8-bit fields. */
- codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8);
-
- /* Return the high 'len' bits of the bit-reversed 16 bit value. */
- return codeword >> (16 - len);
-}
-
-/* Make a canonical Huffman code with bit-reversed codewords. */
-static void
-deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
- const u32 freqs[], u8 lens[], u32 codewords[])
-{
- unsigned sym;
-
- make_canonical_huffman_code(num_syms, max_codeword_len,
- freqs, lens, codewords);
-
- for (sym = 0; sym < num_syms; sym++)
- codewords[sym] = deflate_reverse_codeword(codewords[sym],
- lens[sym]);
-}
-
/*
* Build the literal/length and offset Huffman codes for a DEFLATE block.
*
- * This takes as input the frequency tables for each code and produces as output
- * a set of tables that map symbols to codewords and codeword lengths.
+ * This takes as input the frequency tables for each alphabet and produces as
+ * output a set of tables that map symbols to codewords and codeword lengths.
*/
static void
deflate_make_huffman_codes(const struct deflate_freqs *freqs,
@@ -1633,9 +1623,8 @@ deflate_write_huffman_header(struct libdeflate_compressor *c,
/* Output the lengths of the codewords in the precode. */
for (i = 0; i < c->num_explicit_lens; i++) {
- deflate_add_bits(os, c->precode_lens[
+ deflate_write_bits(os, c->precode_lens[
deflate_precode_lens_permutation[i]], 3);
- deflate_flush_bits(os);
}
/* Output the encoded lengths of the codewords in the larger code. */
@@ -1719,13 +1708,51 @@ deflate_write_literal_run(struct deflate_output_bitstream *os,
do {
unsigned lit = *in_next++;
- deflate_add_bits(os, codes->codewords.litlen[lit],
- codes->lens.litlen[lit]);
- deflate_flush_bits(os);
+ deflate_write_bits(os, codes->codewords.litlen[lit],
+ codes->lens.litlen[lit]);
} while (--litrunlen);
#endif
}
+static forceinline void
+deflate_write_match(struct deflate_output_bitstream * restrict os,
+ unsigned length, unsigned length_slot,
+ unsigned offset, unsigned offset_symbol,
+ const struct deflate_codes * restrict codes)
+{
+ unsigned litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot;
+
+ /* Litlen symbol */
+ deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
+ codes->lens.litlen[litlen_symbol]);
+
+ /* Extra length bits */
+ STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
+ DEFLATE_MAX_EXTRA_LENGTH_BITS));
+ deflate_add_bits(os, length - deflate_length_slot_base[length_slot],
+ deflate_extra_length_bits[length_slot]);
+
+ if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
+ DEFLATE_MAX_EXTRA_LENGTH_BITS +
+ MAX_OFFSET_CODEWORD_LEN +
+ DEFLATE_MAX_EXTRA_OFFSET_BITS))
+ deflate_flush_bits(os);
+
+ /* Offset symbol */
+ deflate_add_bits(os, codes->codewords.offset[offset_symbol],
+ codes->lens.offset[offset_symbol]);
+
+ if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
+ DEFLATE_MAX_EXTRA_OFFSET_BITS))
+ deflate_flush_bits(os);
+
+ /* Extra offset bits */
+ deflate_add_bits(os, offset - deflate_offset_slot_base[offset_symbol],
+ deflate_extra_offset_bits[offset_symbol]);
+
+ deflate_flush_bits(os);
+}
+
static void
deflate_write_sequences(struct deflate_output_bitstream * restrict os,
const struct deflate_codes * restrict codes,
@@ -1737,9 +1764,6 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
for (;;) {
u32 litrunlen = seq->litrunlen_and_length & SEQ_LITRUNLEN_MASK;
unsigned length = seq->litrunlen_and_length >> SEQ_LENGTH_SHIFT;
- unsigned length_slot;
- unsigned litlen_symbol;
- unsigned offset_symbol;
if (litrunlen) {
deflate_write_literal_run(os, in_next, litrunlen,
@@ -1750,44 +1774,10 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
if (length == 0)
return;
- in_next += length;
-
- length_slot = seq->length_slot;
- litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot;
-
- /* Litlen symbol */
- deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
- codes->lens.litlen[litlen_symbol]);
-
- /* Extra length bits */
- STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_LENGTH_BITS));
- deflate_add_bits(os,
- length - deflate_length_slot_base[length_slot],
- deflate_extra_length_bits[length_slot]);
-
- if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_LENGTH_BITS +
- MAX_OFFSET_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_OFFSET_BITS))
- deflate_flush_bits(os);
-
- /* Offset symbol */
- offset_symbol = seq->offset_symbol;
- deflate_add_bits(os, codes->codewords.offset[offset_symbol],
- codes->lens.offset[offset_symbol]);
-
- if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_OFFSET_BITS))
- deflate_flush_bits(os);
-
- /* Extra offset bits */
- deflate_add_bits(os, seq->offset -
- deflate_offset_slot_base[offset_symbol],
- deflate_extra_offset_bits[offset_symbol]);
-
- deflate_flush_bits(os);
+ deflate_write_match(os, length, seq->length_slot,
+ seq->offset, seq->offset_symbol, codes);
+ in_next += length;
seq++;
}
}
@@ -1797,10 +1787,6 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
* Follow the minimum-cost path in the graph of possible match/literal choices
* for the current block and write out the matches/literals using the specified
* Huffman codes.
- *
- * Note: this is slightly duplicated with deflate_write_sequences(), the reason
- * being that we don't want to waste time translating between intermediate
- * match/literal representations.
*/
static void
deflate_write_item_list(struct deflate_output_bitstream *os,
@@ -1814,51 +1800,18 @@ deflate_write_item_list(struct deflate_output_bitstream *os,
do {
unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
- unsigned litlen_symbol;
- unsigned length_slot;
- unsigned offset_slot;
if (length == 1) {
/* Literal */
- litlen_symbol = offset;
- deflate_add_bits(os,
- codes->codewords.litlen[litlen_symbol],
- codes->lens.litlen[litlen_symbol]);
- deflate_flush_bits(os);
+ deflate_write_bits(os, codes->codewords.litlen[offset],
+ codes->lens.litlen[offset]);
} else {
- /* Match length */
- length_slot = deflate_length_slot[length];
- litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot;
- deflate_add_bits(os,
- codes->codewords.litlen[litlen_symbol],
- codes->lens.litlen[litlen_symbol]);
-
- deflate_add_bits(os,
- length - deflate_length_slot_base[length_slot],
- deflate_extra_length_bits[length_slot]);
-
- if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_LENGTH_BITS +
- MAX_OFFSET_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_OFFSET_BITS))
- deflate_flush_bits(os);
-
-
- /* Match offset */
- offset_slot = c->p.n.offset_slot_full[offset];
- deflate_add_bits(os,
- codes->codewords.offset[offset_slot],
- codes->lens.offset[offset_slot]);
-
- if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
- DEFLATE_MAX_EXTRA_OFFSET_BITS))
- deflate_flush_bits(os);
-
- deflate_add_bits(os,
- offset - deflate_offset_slot_base[offset_slot],
- deflate_extra_offset_bits[offset_slot]);
-
- deflate_flush_bits(os);
+ /* Match */
+ deflate_write_match(os, length,
+ deflate_length_slot[length],
+ offset,
+ c->p.n.offset_slot_full[offset],
+ codes);
}
cur_node += length;
} while (cur_node != end_node);
@@ -1870,9 +1823,8 @@ static void
deflate_write_end_of_block(struct deflate_output_bitstream *os,
const struct deflate_codes *codes)
{
- deflate_add_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK],
- codes->lens.litlen[DEFLATE_END_OF_BLOCK]);
- deflate_flush_bits(os);
+ deflate_write_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK],
+ codes->lens.litlen[DEFLATE_END_OF_BLOCK]);
}
static void
@@ -2054,19 +2006,19 @@ deflate_flush_block(struct libdeflate_compressor * restrict c,
* literals we only look at the high bits and low bits, and for matches we only
* look at whether the match is long or not. The assumption is that for typical
* "real" data, places that are good block boundaries will tend to be noticeable
- * based only on changes in these aggregate frequencies, without looking for
+ * based only on changes in these aggregate probabilities, without looking for
* subtle differences in individual symbols. For example, a change from ASCII
* bytes to non-ASCII bytes, or from few matches (generally less compressible)
* to many matches (generally more compressible), would be easily noticed based
* on the aggregates.
*
- * For determining whether the frequency distributions are "different enough" to
- * start a new block, the simply heuristic of splitting when the sum of absolute
- * differences exceeds a constant seems to be good enough. We also add a number
- * proportional to the block length so that the algorithm is more likely to end
- * long blocks than short blocks. This reflects the general expectation that it
- * will become increasingly beneficial to start a new block as the current
- * block grows longer.
+ * For determining whether the probability distributions are "different enough"
+ * to start a new block, the simple heuristic of splitting when the sum of
+ * absolute differences exceeds a constant seems to be good enough. We also add
+ * a number proportional to the block length so that the algorithm is more
+ * likely to end long blocks than short blocks. This reflects the general
+ * expectation that it will become increasingly beneficial to start a new block
+ * as the current block grows longer.
*
* Finally, for an approximation, it is not strictly necessary that the exact
* symbols being used are considered. With "near-optimal parsing", for example,
@@ -2130,9 +2082,14 @@ do_end_block_check(struct block_split_stats *stats, u32 block_length)
{
if (stats->num_observations > 0) {
/*
- * Note: to avoid slow divisions, we do not divide by
- * 'num_observations', but rather do all math with the numbers
- * multiplied by 'num_observations'.
+ * Compute the sum of absolute differences of probabilities. To
+ * avoid needing to use floating point arithmetic or do slow
+ * divisions, we do all arithmetic with the probabilities
+ * multiplied by num_observations * num_new_observations. E.g.,
+ * for the "old" observations the probabilities would be
+ * (double)observations[i] / num_observations, but since we
+ * multiply by both num_observations and num_new_observations we
+ * really do observations[i] * num_new_observations.
*/
u32 total_delta = 0;
u32 num_items;
@@ -2152,6 +2109,12 @@ do_end_block_check(struct block_split_stats *stats, u32 block_length)
num_items = stats->num_observations +
stats->num_new_observations;
+ /*
+ * Heuristic: the cutoff is when the sum of absolute differences
+ * of probabilities becomes at least 200/512. As above, the
+ * probability is multiplied by both num_new_observations and
+ * num_observations. Be careful to avoid integer overflow.
+ */
cutoff = stats->num_new_observations * 200 / 512 *
stats->num_observations;
/*
=====================================
lib/deflate_decompress.c
=====================================
@@ -98,11 +98,6 @@
#define LITLEN_ENOUGH 1334 /* enough 288 10 15 */
#define OFFSET_ENOUGH 402 /* enough 32 8 15 */
-/*
- * Type for codeword lengths.
- */
-typedef u8 len_t;
-
/*
* The main DEFLATE decompressor structure. Since this implementation only
* supports full buffer decompression, this structure does not store the entire
@@ -121,12 +116,12 @@ struct libdeflate_decompressor {
*/
union {
- len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
+ u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
struct {
- len_t lens[DEFLATE_NUM_LITLEN_SYMS +
- DEFLATE_NUM_OFFSET_SYMS +
- DEFLATE_MAX_LENS_OVERRUN];
+ u8 lens[DEFLATE_NUM_LITLEN_SYMS +
+ DEFLATE_NUM_OFFSET_SYMS +
+ DEFLATE_MAX_LENS_OVERRUN];
u32 precode_decode_table[PRECODE_ENOUGH];
} l;
@@ -204,25 +199,27 @@ typedef machine_word_t bitbuf_t;
*
* If we would overread the input buffer, we just don't read anything, leaving
* the bits zeroed but marking them filled. This simplifies the decompressor
- * because it removes the need to distinguish between real overreads and
- * overreads that occur only because of the decompressor's own lookahead.
- *
- * The disadvantage is that real overreads are not detected immediately.
- * However, this is safe because the decompressor is still guaranteed to make
- * forward progress when presented never-ending 0 bits. In an existing block
- * output will be getting generated, whereas new blocks can only be uncompressed
- * (since the type code for uncompressed blocks is 0), for which we check for
- * previous overread. But even if we didn't check, uncompressed blocks would
- * fail to validate because LEN would not equal ~NLEN. So the decompressor will
- * eventually either detect that the output buffer is full, or detect invalid
- * input, or finish the final block.
+ * because it removes the need to always be able to distinguish between real
+ * overreads and overreads caused only by the decompressor's own lookahead.
+ *
+ * We do still keep track of the number of bytes that have been overread, for
+ * two reasons. First, it allows us to determine the exact number of bytes that
+ * were consumed once the stream ends or an uncompressed block is reached.
+ * Second, it allows us to stop early if the overread amount gets so large (more
+ * than sizeof bitbuf) that it can only be caused by a real overread. (The
+ * second part is arguably unneeded, since libdeflate is buffer-based; given
+ * infinite zeroes, it will eventually either completely fill the output buffer
+ * or return an error. However, we do it to be slightly more friendly to the
+ * not-recommended use case of decompressing with an unknown output size.)
*/
#define FILL_BITS_BYTEWISE() \
do { \
- if (likely(in_next != in_end)) \
+ if (likely(in_next != in_end)) { \
bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \
- else \
- overrun_count++; \
+ } else { \
+ overread_count++; \
+ SAFETY_CHECK(overread_count <= sizeof(bitbuf)); \
+ } \
bitsleft += 8; \
} while (bitsleft <= BITBUF_NBITS - 8)
@@ -307,16 +304,16 @@ if (!HAVE_BITS(n)) { \
*/
#define ALIGN_INPUT() \
do { \
- SAFETY_CHECK(overrun_count <= (bitsleft >> 3)); \
- in_next -= (bitsleft >> 3) - overrun_count; \
- overrun_count = 0; \
+ SAFETY_CHECK(overread_count <= (bitsleft >> 3)); \
+ in_next -= (bitsleft >> 3) - overread_count; \
+ overread_count = 0; \
bitbuf = 0; \
bitsleft = 0; \
} while(0)
/*
* Read a 16-bit value from the input. This must have been preceded by a call
- * to ALIGN_INPUT(), and the caller must have already checked for overrun.
+ * to ALIGN_INPUT(), and the caller must have already checked for overread.
*/
#define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
@@ -554,7 +551,7 @@ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
*/
static bool
build_decode_table(u32 decode_table[],
- const len_t lens[],
+ const u8 lens[],
const unsigned num_syms,
const u32 decode_results[],
const unsigned table_bits,
=====================================
libdeflate.h
=====================================
@@ -10,8 +10,8 @@ extern "C" {
#endif
#define LIBDEFLATE_VERSION_MAJOR 1
-#define LIBDEFLATE_VERSION_MINOR 9
-#define LIBDEFLATE_VERSION_STRING "1.9"
+#define LIBDEFLATE_VERSION_MINOR 10
+#define LIBDEFLATE_VERSION_STRING "1.10"
#include <stddef.h>
#include <stdint.h>
=====================================
programs/gzip.c
=====================================
@@ -192,6 +192,7 @@ do_decompress(struct libdeflate_decompressor *decompressor,
size_t compressed_size = in->mmap_size;
void *uncompressed_data = NULL;
size_t uncompressed_size;
+ size_t max_uncompressed_size;
size_t actual_in_nbytes;
size_t actual_out_nbytes;
enum libdeflate_result result;
@@ -214,8 +215,23 @@ do_decompress(struct libdeflate_decompressor *decompressor,
if (uncompressed_size == 0)
uncompressed_size = 1;
+ /*
+ * DEFLATE cannot expand data more than 1032x, so there's no need to
+ * ever allocate a buffer more than 1032 times larger than the
+ * compressed data. This is a fail-safe, albeit not a very good one, if
+ * ISIZE becomes corrupted on a small file. (The 1032x number comes
+ * from each 2 bits generating a 258-byte match. This is a hard upper
+ * bound; the real upper bound is slightly smaller due to overhead.)
+ */
+ if (compressed_size <= SIZE_MAX / 1032)
+ max_uncompressed_size = compressed_size * 1032;
+ else
+ max_uncompressed_size = SIZE_MAX;
+
do {
if (uncompressed_data == NULL) {
+ uncompressed_size = MIN(uncompressed_size,
+ max_uncompressed_size);
uncompressed_data = xmalloc(uncompressed_size);
if (uncompressed_data == NULL) {
msg("%"TS": file is probably too large to be "
@@ -234,6 +250,11 @@ do_decompress(struct libdeflate_decompressor *decompressor,
&actual_out_nbytes);
if (result == LIBDEFLATE_INSUFFICIENT_SPACE) {
+ if (uncompressed_size >= max_uncompressed_size) {
+ msg("Bug in libdeflate_gzip_decompress_ex(): data expanded too much!");
+ ret = -1;
+ goto out;
+ }
if (uncompressed_size * 2 <= uncompressed_size) {
msg("%"TS": file corrupt or too large to be "
"processed by this program", in->name);
@@ -256,7 +277,7 @@ do_decompress(struct libdeflate_decompressor *decompressor,
if (actual_in_nbytes == 0 ||
actual_in_nbytes > compressed_size ||
actual_out_nbytes > uncompressed_size) {
- msg("Bug in libdeflate_gzip_decompress_ex()!");
+ msg("Bug in libdeflate_gzip_decompress_ex(): impossible actual_nbytes value!");
ret = -1;
goto out;
}
=====================================
programs/test_overread.c
=====================================
@@ -0,0 +1,95 @@
+/*
+ * test_overread.c
+ *
+ * Test that the decompressor doesn't produce an unbounded amount of output if
+ * it runs out of input, even when implicit zeroes appended to the input would
+ * continue producing output (as is the case when the input ends during a
+ * DYNAMIC_HUFFMAN block where a literal has an all-zeroes codeword).
+ *
+ * This is a regression test for commit 3f21ec9d6121 ("deflate_decompress: error
+ * out if overread count gets too large").
+ */
+
+#include "test_util.h"
+
+static void
+generate_test_input(struct output_bitstream *os)
+{
+ int i;
+
+ put_bits(os, 0, 1); /* BFINAL: 0 */
+ put_bits(os, 2, 2); /* BTYPE: DYNAMIC_HUFFMAN */
+
+ /*
+ * Write the Huffman codes.
+ *
+ * Litlen code:
+ * litlensym_0 (0) len=1 codeword=0
+ * litlensym_256 (end-of-block) len=1 codeword=1
+ * Offset code:
+ * offsetsym_0 (unused) len=1 codeword=0
+ *
+ * Litlen and offset codeword lengths:
+ * [0] = 1 presym_1
+ * [1..255] = 0 presym_{18,18}
+ * [256] = 1 presym_1
+ * [257] = 1 presym_1
+ *
+ * Precode:
+ * presym_1 len=1 codeword=0
+ * presym_18 len=1 codeword=1
+ */
+ put_bits(os, 0, 5); /* num_litlen_syms: 0 + 257 */
+ put_bits(os, 0, 5); /* num_offset_syms: 0 + 1 */
+ put_bits(os, 14, 4); /* num_explicit_precode_lens: 14 + 4 */
+ /*
+ * Precode codeword lengths: order is
+ * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+ */
+ put_bits(os, 0, 3); /* presym_16: len=0 */
+ put_bits(os, 0, 3); /* presym_17: len=0 */
+ put_bits(os, 1, 3); /* presym_18: len=1 */
+ for (i = 0; i < 14; i++) /* presym_{0,...,14}: len=0 */
+ put_bits(os, 0, 3);
+ put_bits(os, 1, 3); /* presym_1: len=1 */
+
+ /* Litlen and offset codeword lengths */
+ put_bits(os, 0, 1); /* presym_1 */
+ put_bits(os, 1, 1); /* presym_18 ... */
+ put_bits(os, 117, 7); /* ... 11 + 117 zeroes */
+ put_bits(os, 1, 1); /* presym_18 ... */
+ put_bits(os, 116, 7); /* ... 11 + 116 zeroes */
+ put_bits(os, 0, 1); /* presym_1 */
+ put_bits(os, 0, 1); /* presym_1 */
+
+ /* Implicit zeroes would generate endless literals from here. */
+
+ ASSERT(flush_bits(os));
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+ u8 cdata[16];
+ u8 udata[256];
+ struct output_bitstream os =
+ { .next = cdata, .end = cdata + sizeof(cdata) };
+ struct libdeflate_decompressor *d;
+ enum libdeflate_result res;
+ size_t actual_out_nbytes;
+
+ begin_program(argv);
+
+ generate_test_input(&os);
+ d = libdeflate_alloc_decompressor();
+ ASSERT(d != NULL);
+
+ res = libdeflate_deflate_decompress(d, cdata, os.next - cdata,
+ udata, sizeof(udata),
+ &actual_out_nbytes);
+ /* Before the fix, the result was LIBDEFLATE_INSUFFICIENT_SPACE here. */
+ ASSERT(res == LIBDEFLATE_BAD_DATA);
+
+ libdeflate_free_decompressor(d);
+ return 0;
+}
View it on GitLab: https://salsa.debian.org/med-team/libdeflate/-/commit/fcefb483d54f51568185f994d61771cef9a43a76
--
View it on GitLab: https://salsa.debian.org/med-team/libdeflate/-/commit/fcefb483d54f51568185f994d61771cef9a43a76
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220220/ee999015/attachment-0001.htm>
More information about the debian-med-commit
mailing list