[med-svn] [Git][med-team/libdeflate][upstream] New upstream version 1.10

Andreas Tille (@tille) gitlab at salsa.debian.org
Sun Feb 20 07:34:11 GMT 2022



Andreas Tille pushed to branch upstream at Debian Med / libdeflate


Commits:
fcefb483 by Andreas Tille at 2022-02-20T08:30:47+01:00
New upstream version 1.10
- - - - -


7 changed files:

- NEWS.md
- lib/decompress_template.h
- lib/deflate_compress.c
- lib/deflate_decompress.c
- libdeflate.h
- programs/gzip.c
- + programs/test_overread.c


Changes:

=====================================
NEWS.md
=====================================
@@ -1,5 +1,19 @@
 # libdeflate release notes
 
+## Version 1.10
+
+* Added an additional check to the decompressor to make it quickly detect
+  certain bad inputs and not try to generate an unbounded amount of output.
+
+  Note: this was only a problem when decompressing with an unknown output size,
+  which isn't the recommended use case of libdeflate.  However,
+  `libdeflate-gunzip` has to do this, and it would run out of memory as it would
+  keep trying to allocate a larger output buffer.
+
+* Fixed a build error on Solaris.
+
+* Cleaned up a few things in the compression code.
+
 ## Version 1.9
 
 * Made many improvements to the compression algorithms, and rebalanced the
@@ -9,8 +23,8 @@
     ratio on data where short matches aren't useful, such as DNA sequencing
     data.  This applies to all compression levels, but primarily to levels 1-9.
 
-  * Levels 1 was made much faster, though it often compresses slightly worse
-    than before (but still better than zlib).
+  * Level 1 was made much faster, though it often compresses slightly worse than
+    before (but still better than zlib).
 
   * Levels 8-9 were also made faster, though they often compress slightly worse
     than before (but still better than zlib).  On some data, levels 8-9 are much


=====================================
lib/decompress_template.h
=====================================
@@ -43,7 +43,7 @@ FUNCNAME(struct libdeflate_decompressor * restrict d,
 	const u8 * const in_end = in_next + in_nbytes;
 	bitbuf_t bitbuf = 0;
 	unsigned bitsleft = 0;
-	size_t overrun_count = 0;
+	size_t overread_count = 0;
 	unsigned i;
 	unsigned is_final_block;
 	unsigned block_type;


=====================================
lib/deflate_compress.c
=====================================
@@ -52,10 +52,10 @@
 
 /*
  * This is the minimum block length that the compressor will use, in
- * uncompressed bytes.  It is also the amount by which the final block is
- * allowed to grow past the soft maximum length in order to avoid using a very
- * short block at the end.  This should be a value below which using shorter
- * blocks is unlikely to be worthwhile, due to the per-block overhead.
+ * uncompressed bytes.  It is also approximately the amount by which the final
+ * block is allowed to grow past the soft maximum length in order to avoid using
+ * a very short block at the end.  This should be a value below which using
+ * shorter blocks is unlikely to be worthwhile, due to the per-block overhead.
  *
  * Defining a fixed minimum block length is needed in order to guarantee a
  * reasonable upper bound on the compressed size.  It's also needed because our
@@ -94,8 +94,8 @@
  * For deflate_compress_fastest(): This is the soft maximum block length.
  * deflate_compress_fastest() doesn't use the regular block splitting algorithm;
  * it only ends blocks when they reach FAST_SOFT_MAX_BLOCK_LENGTH bytes or
- * FAST_SEQ_STORE_LENGTH - 1 matches.  Therefore, this value should be lower
- * than the regular SOFT_MAX_BLOCK_LENGTH.
+ * FAST_SEQ_STORE_LENGTH matches.  Therefore, this value should be lower than
+ * the regular SOFT_MAX_BLOCK_LENGTH.
  */
 #define FAST_SOFT_MAX_BLOCK_LENGTH	65535
 
@@ -490,7 +490,7 @@ struct libdeflate_compressor {
 	/* Frequency counters for the current block */
 	struct deflate_freqs freqs;
 
-	/* Block split statistics for the currently pending block */
+	/* Block split statistics for the current block */
 	struct block_split_stats split_stats;
 
 	/* Dynamic Huffman codes for the current block */
@@ -648,7 +648,7 @@ struct deflate_output_bitstream {
 	 */
 	u8 *next;
 
-	/* Pointer just past the end of the output buffer */
+	/* Pointer to just past the end of the output buffer */
 	u8 *end;
 };
 
@@ -664,8 +664,7 @@ struct deflate_output_bitstream {
 #define OUTPUT_END_PADDING	8
 
 /*
- * Initialize the output bitstream.  'size' is assumed to be at least
- * OUTPUT_END_PADDING.
+ * Initialize the output bitstream.  'size' must be at least OUTPUT_END_PADDING.
  */
 static void
 deflate_init_output(struct deflate_output_bitstream *os,
@@ -680,11 +679,12 @@ deflate_init_output(struct deflate_output_bitstream *os,
 
 /*
  * Add some bits to the bitbuffer variable of the output bitstream.  The caller
- * must make sure there is enough room.
+ * must ensure that os->bitcount + num_bits <= BITBUF_NBITS, by calling
+ * deflate_flush_bits() frequently enough.
  */
 static forceinline void
 deflate_add_bits(struct deflate_output_bitstream *os,
-		 const bitbuf_t bits, const unsigned num_bits)
+		 bitbuf_t bits, unsigned num_bits)
 {
 	os->bitbuf |= bits << os->bitcount;
 	os->bitcount += num_bits;
@@ -712,6 +712,18 @@ deflate_flush_bits(struct deflate_output_bitstream *os)
 	}
 }
 
+/*
+ * Add bits, then flush right away.  Only use this where it is difficult to
+ * batch up calls to deflate_add_bits().
+ */
+static forceinline void
+deflate_write_bits(struct deflate_output_bitstream *os,
+		   bitbuf_t bits, unsigned num_bits)
+{
+	deflate_add_bits(os, bits, num_bits);
+	deflate_flush_bits(os);
+}
+
 /* Align the bitstream on a byte boundary. */
 static forceinline void
 deflate_align_bitstream(struct deflate_output_bitstream *os)
@@ -722,7 +734,8 @@ deflate_align_bitstream(struct deflate_output_bitstream *os)
 
 /*
  * Flush any remaining bits to the output buffer if needed.  Return the total
- * number of bytes written to the output buffer, or 0 if an overflow occurred.
+ * number of bytes that have been written to the output buffer since
+ * deflate_init_output(), or 0 if an overflow occurred.
  */
 static size_t
 deflate_flush_output(struct deflate_output_bitstream *os)
@@ -743,7 +756,7 @@ deflate_flush_output(struct deflate_output_bitstream *os)
  * Given the binary tree node A[subtree_idx] whose children already satisfy the
  * maxheap property, swap the node with its greater child until it is greater
  * than or equal to both of its children, so that the maxheap property is
- * satisfied in the subtree rooted at A[subtree_idx].
+ * satisfied in the subtree rooted at A[subtree_idx].  'A' uses 1-based indices.
  */
 static void
 heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx)
@@ -805,7 +818,7 @@ heap_sort(u32 A[], unsigned length)
 #define NUM_SYMBOL_BITS 10
 #define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1)
 
-#define GET_NUM_COUNTERS(num_syms)	((((num_syms) + 3 / 4) + 3) & ~3)
+#define GET_NUM_COUNTERS(num_syms)	(num_syms)
 
 /*
  * Sort the symbols primarily by frequency and secondarily by symbol value.
@@ -843,26 +856,13 @@ sort_symbols(unsigned num_syms, const u32 freqs[restrict],
 	unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)];
 
 	/*
-	 * We rely on heapsort, but with an added optimization.  Since it's
-	 * common for most symbol frequencies to be low, we first do a count
-	 * sort using a limited number of counters.  High frequencies will be
-	 * counted in the last counter, and only they will be sorted with
-	 * heapsort.
+	 * We use heapsort, but with an added optimization.  Since often most
+	 * symbol frequencies are low, we first do a count sort using a limited
+	 * number of counters.  High frequencies are counted in the last
+	 * counter, and only they will be sorted with heapsort.
 	 *
 	 * Note: with more symbols, it is generally beneficial to have more
-	 * counters.  About 1 counter per 4 symbols seems fast.
-	 *
-	 * Note: I also tested radix sort, but even for large symbol counts (>
-	 * 255) and frequencies bounded at 16 bits (enabling radix sort by just
-	 * two base-256 digits), it didn't seem any faster than the method
-	 * implemented here.
-	 *
-	 * Note: I tested the optimized quicksort implementation from glibc
-	 * (with indirection overhead removed), but it was only marginally
-	 * faster than the simple heapsort implemented here.
-	 *
-	 * Tests were done with building the codes for LZX.  Results may vary
-	 * for different compression algorithms...!
+	 * counters.  About 1 counter per symbol seems fastest.
 	 */
 
 	num_counters = GET_NUM_COUNTERS(num_syms);
@@ -909,7 +909,7 @@ sort_symbols(unsigned num_syms, const u32 freqs[restrict],
 }
 
 /*
- * Build the Huffman tree.
+ * Build a Huffman tree.
  *
  * This is an optimized implementation that
  *	(a) takes advantage of the frequencies being already sorted;
@@ -1103,6 +1103,33 @@ compute_length_counts(u32 A[restrict], unsigned root_idx,
 	}
 }
 
+/* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */
+static u32
+reverse_codeword(u32 codeword, u8 len)
+{
+	/*
+	 * The following branchless algorithm is faster than going bit by bit.
+	 * Note: since no codewords are longer than 16 bits, we only need to
+	 * reverse the low 16 bits of the 'u32'.
+	 */
+	STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16);
+
+	/* Flip adjacent 1-bit fields. */
+	codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1);
+
+	/* Flip adjacent 2-bit fields. */
+	codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2);
+
+	/* Flip adjacent 4-bit fields. */
+	codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4);
+
+	/* Flip adjacent 8-bit fields. */
+	codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8);
+
+	/* Return the high 'len' bits of the bit-reversed 16 bit value. */
+	return codeword >> (16 - len);
+}
+
 /*
  * Generate the codewords for a canonical Huffman code.
  *
@@ -1161,13 +1188,18 @@ gen_codewords(u32 A[restrict], u8 lens[restrict],
 		next_codewords[len] =
 			(next_codewords[len - 1] + len_counts[len - 1]) << 1;
 
-	for (sym = 0; sym < num_syms; sym++)
-		A[sym] = next_codewords[lens[sym]]++;
+	for (sym = 0; sym < num_syms; sym++) {
+		u8 len = lens[sym];
+		u32 codeword = next_codewords[len]++;
+
+		/* DEFLATE requires bit-reversed codewords. */
+		A[sym] = reverse_codeword(codeword, len);
+	}
 }
 
 /*
  * ---------------------------------------------------------------------
- *			make_canonical_huffman_code()
+ *			deflate_make_huffman_code()
  * ---------------------------------------------------------------------
  *
  * Given an alphabet and the frequency of each symbol in it, construct a
@@ -1266,9 +1298,9 @@ gen_codewords(u32 A[restrict], u8 lens[restrict],
  * file: C/HuffEnc.c), which was placed in the public domain by Igor Pavlov.
  */
 static void
-make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len,
-			    const u32 freqs[restrict],
-			    u8 lens[restrict], u32 codewords[restrict])
+deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
+			  const u32 freqs[restrict],
+			  u8 lens[restrict], u32 codewords[restrict])
 {
 	u32 *A = codewords;
 	unsigned num_used_syms;
@@ -1352,53 +1384,11 @@ deflate_reset_symbol_frequencies(struct libdeflate_compressor *c)
 	memset(&c->freqs, 0, sizeof(c->freqs));
 }
 
-/* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */
-static u32
-deflate_reverse_codeword(u32 codeword, u8 len)
-{
-	/*
-	 * The following branchless algorithm is faster than going bit by bit.
-	 * Note: since no codewords are longer than 16 bits, we only need to
-	 * reverse the low 16 bits of the 'u32'.
-	 */
-	STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16);
-
-	/* Flip adjacent 1-bit fields. */
-	codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1);
-
-	/* Flip adjacent 2-bit fields. */
-	codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2);
-
-	/* Flip adjacent 4-bit fields. */
-	codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4);
-
-	/* Flip adjacent 8-bit fields. */
-	codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8);
-
-	/* Return the high 'len' bits of the bit-reversed 16 bit value. */
-	return codeword >> (16 - len);
-}
-
-/* Make a canonical Huffman code with bit-reversed codewords. */
-static void
-deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
-			  const u32 freqs[], u8 lens[], u32 codewords[])
-{
-	unsigned sym;
-
-	make_canonical_huffman_code(num_syms, max_codeword_len,
-				    freqs, lens, codewords);
-
-	for (sym = 0; sym < num_syms; sym++)
-		codewords[sym] = deflate_reverse_codeword(codewords[sym],
-							  lens[sym]);
-}
-
 /*
  * Build the literal/length and offset Huffman codes for a DEFLATE block.
  *
- * This takes as input the frequency tables for each code and produces as output
- * a set of tables that map symbols to codewords and codeword lengths.
+ * This takes as input the frequency tables for each alphabet and produces as
+ * output a set of tables that map symbols to codewords and codeword lengths.
  */
 static void
 deflate_make_huffman_codes(const struct deflate_freqs *freqs,
@@ -1633,9 +1623,8 @@ deflate_write_huffman_header(struct libdeflate_compressor *c,
 
 	/* Output the lengths of the codewords in the precode. */
 	for (i = 0; i < c->num_explicit_lens; i++) {
-		deflate_add_bits(os, c->precode_lens[
+		deflate_write_bits(os, c->precode_lens[
 				       deflate_precode_lens_permutation[i]], 3);
-		deflate_flush_bits(os);
 	}
 
 	/* Output the encoded lengths of the codewords in the larger code. */
@@ -1719,13 +1708,51 @@ deflate_write_literal_run(struct deflate_output_bitstream *os,
 	do {
 		unsigned lit = *in_next++;
 
-		deflate_add_bits(os, codes->codewords.litlen[lit],
-				 codes->lens.litlen[lit]);
-		deflate_flush_bits(os);
+		deflate_write_bits(os, codes->codewords.litlen[lit],
+				   codes->lens.litlen[lit]);
 	} while (--litrunlen);
 #endif
 }
 
+static forceinline void
+deflate_write_match(struct deflate_output_bitstream * restrict os,
+		    unsigned length, unsigned length_slot,
+		    unsigned offset, unsigned offset_symbol,
+		    const struct deflate_codes * restrict codes)
+{
+	unsigned litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot;
+
+	/* Litlen symbol */
+	deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
+			 codes->lens.litlen[litlen_symbol]);
+
+	/* Extra length bits */
+	STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
+				 DEFLATE_MAX_EXTRA_LENGTH_BITS));
+	deflate_add_bits(os, length - deflate_length_slot_base[length_slot],
+			 deflate_extra_length_bits[length_slot]);
+
+	if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
+			DEFLATE_MAX_EXTRA_LENGTH_BITS +
+			MAX_OFFSET_CODEWORD_LEN +
+			DEFLATE_MAX_EXTRA_OFFSET_BITS))
+		deflate_flush_bits(os);
+
+	/* Offset symbol */
+	deflate_add_bits(os, codes->codewords.offset[offset_symbol],
+			 codes->lens.offset[offset_symbol]);
+
+	if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
+			DEFLATE_MAX_EXTRA_OFFSET_BITS))
+		deflate_flush_bits(os);
+
+	/* Extra offset bits */
+	deflate_add_bits(os, offset - deflate_offset_slot_base[offset_symbol],
+			 deflate_extra_offset_bits[offset_symbol]);
+
+	deflate_flush_bits(os);
+}
+
 static void
 deflate_write_sequences(struct deflate_output_bitstream * restrict os,
 			const struct deflate_codes * restrict codes,
@@ -1737,9 +1764,6 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
 	for (;;) {
 		u32 litrunlen = seq->litrunlen_and_length & SEQ_LITRUNLEN_MASK;
 		unsigned length = seq->litrunlen_and_length >> SEQ_LENGTH_SHIFT;
-		unsigned length_slot;
-		unsigned litlen_symbol;
-		unsigned offset_symbol;
 
 		if (litrunlen) {
 			deflate_write_literal_run(os, in_next, litrunlen,
@@ -1750,44 +1774,10 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
 		if (length == 0)
 			return;
 
-		in_next += length;
-
-		length_slot = seq->length_slot;
-		litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot;
-
-		/* Litlen symbol */
-		deflate_add_bits(os, codes->codewords.litlen[litlen_symbol],
-				 codes->lens.litlen[litlen_symbol]);
-
-		/* Extra length bits */
-		STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
-					 DEFLATE_MAX_EXTRA_LENGTH_BITS));
-		deflate_add_bits(os,
-				 length - deflate_length_slot_base[length_slot],
-				 deflate_extra_length_bits[length_slot]);
-
-		if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
-				DEFLATE_MAX_EXTRA_LENGTH_BITS +
-				MAX_OFFSET_CODEWORD_LEN +
-				DEFLATE_MAX_EXTRA_OFFSET_BITS))
-			deflate_flush_bits(os);
-
-		/* Offset symbol */
-		offset_symbol = seq->offset_symbol;
-		deflate_add_bits(os, codes->codewords.offset[offset_symbol],
-				 codes->lens.offset[offset_symbol]);
-
-		if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
-				DEFLATE_MAX_EXTRA_OFFSET_BITS))
-			deflate_flush_bits(os);
-
-		/* Extra offset bits */
-		deflate_add_bits(os, seq->offset -
-				 deflate_offset_slot_base[offset_symbol],
-				 deflate_extra_offset_bits[offset_symbol]);
-
-		deflate_flush_bits(os);
+		deflate_write_match(os, length, seq->length_slot,
+				    seq->offset, seq->offset_symbol, codes);
 
+		in_next += length;
 		seq++;
 	}
 }
@@ -1797,10 +1787,6 @@ deflate_write_sequences(struct deflate_output_bitstream * restrict os,
  * Follow the minimum-cost path in the graph of possible match/literal choices
  * for the current block and write out the matches/literals using the specified
  * Huffman codes.
- *
- * Note: this is slightly duplicated with deflate_write_sequences(), the reason
- * being that we don't want to waste time translating between intermediate
- * match/literal representations.
  */
 static void
 deflate_write_item_list(struct deflate_output_bitstream *os,
@@ -1814,51 +1800,18 @@ deflate_write_item_list(struct deflate_output_bitstream *os,
 	do {
 		unsigned length = cur_node->item & OPTIMUM_LEN_MASK;
 		unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
-		unsigned litlen_symbol;
-		unsigned length_slot;
-		unsigned offset_slot;
 
 		if (length == 1) {
 			/* Literal */
-			litlen_symbol = offset;
-			deflate_add_bits(os,
-					 codes->codewords.litlen[litlen_symbol],
-					 codes->lens.litlen[litlen_symbol]);
-			deflate_flush_bits(os);
+			deflate_write_bits(os, codes->codewords.litlen[offset],
+					   codes->lens.litlen[offset]);
 		} else {
-			/* Match length */
-			length_slot = deflate_length_slot[length];
-			litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot;
-			deflate_add_bits(os,
-				codes->codewords.litlen[litlen_symbol],
-				codes->lens.litlen[litlen_symbol]);
-
-			deflate_add_bits(os,
-				length - deflate_length_slot_base[length_slot],
-				deflate_extra_length_bits[length_slot]);
-
-			if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN +
-					DEFLATE_MAX_EXTRA_LENGTH_BITS +
-					MAX_OFFSET_CODEWORD_LEN +
-					DEFLATE_MAX_EXTRA_OFFSET_BITS))
-				deflate_flush_bits(os);
-
-
-			/* Match offset */
-			offset_slot = c->p.n.offset_slot_full[offset];
-			deflate_add_bits(os,
-				codes->codewords.offset[offset_slot],
-				codes->lens.offset[offset_slot]);
-
-			if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN +
-					DEFLATE_MAX_EXTRA_OFFSET_BITS))
-				deflate_flush_bits(os);
-
-			deflate_add_bits(os,
-				offset - deflate_offset_slot_base[offset_slot],
-				deflate_extra_offset_bits[offset_slot]);
-
-			deflate_flush_bits(os);
+			/* Match */
+			deflate_write_match(os, length,
+					    deflate_length_slot[length],
+					    offset,
+					    c->p.n.offset_slot_full[offset],
+					    codes);
 		}
 		cur_node += length;
 	} while (cur_node != end_node);
@@ -1870,9 +1823,8 @@ static void
 deflate_write_end_of_block(struct deflate_output_bitstream *os,
 			   const struct deflate_codes *codes)
 {
-	deflate_add_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK],
-			 codes->lens.litlen[DEFLATE_END_OF_BLOCK]);
-	deflate_flush_bits(os);
+	deflate_write_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK],
+			   codes->lens.litlen[DEFLATE_END_OF_BLOCK]);
 }
 
 static void
@@ -2054,19 +2006,19 @@ deflate_flush_block(struct libdeflate_compressor * restrict c,
  * literals we only look at the high bits and low bits, and for matches we only
  * look at whether the match is long or not.  The assumption is that for typical
  * "real" data, places that are good block boundaries will tend to be noticeable
- * based only on changes in these aggregate frequencies, without looking for
+ * based only on changes in these aggregate probabilities, without looking for
  * subtle differences in individual symbols.  For example, a change from ASCII
  * bytes to non-ASCII bytes, or from few matches (generally less compressible)
  * to many matches (generally more compressible), would be easily noticed based
  * on the aggregates.
  *
- * For determining whether the frequency distributions are "different enough" to
- * start a new block, the simply heuristic of splitting when the sum of absolute
- * differences exceeds a constant seems to be good enough.  We also add a number
- * proportional to the block length so that the algorithm is more likely to end
- * long blocks than short blocks.  This reflects the general expectation that it
- * will become increasingly beneficial to start a new block as the current
- * block grows longer.
+ * For determining whether the probability distributions are "different enough"
+ * to start a new block, the simple heuristic of splitting when the sum of
+ * absolute differences exceeds a constant seems to be good enough.  We also add
+ * a number proportional to the block length so that the algorithm is more
+ * likely to end long blocks than short blocks.  This reflects the general
+ * expectation that it will become increasingly beneficial to start a new block
+ * as the current block grows longer.
  *
  * Finally, for an approximation, it is not strictly necessary that the exact
  * symbols being used are considered.  With "near-optimal parsing", for example,
@@ -2130,9 +2082,14 @@ do_end_block_check(struct block_split_stats *stats, u32 block_length)
 {
 	if (stats->num_observations > 0) {
 		/*
-		 * Note: to avoid slow divisions, we do not divide by
-		 * 'num_observations', but rather do all math with the numbers
-		 * multiplied by 'num_observations'.
+		 * Compute the sum of absolute differences of probabilities.  To
+		 * avoid needing to use floating point arithmetic or do slow
+		 * divisions, we do all arithmetic with the probabilities
+		 * multiplied by num_observations * num_new_observations.  E.g.,
+		 * for the "old" observations the probabilities would be
+		 * (double)observations[i] / num_observations, but since we
+		 * multiply by both num_observations and num_new_observations we
+		 * really do observations[i] * num_new_observations.
 		 */
 		u32 total_delta = 0;
 		u32 num_items;
@@ -2152,6 +2109,12 @@ do_end_block_check(struct block_split_stats *stats, u32 block_length)
 
 		num_items = stats->num_observations +
 			    stats->num_new_observations;
+		/*
+		 * Heuristic: the cutoff is when the sum of absolute differences
+		 * of probabilities becomes at least 200/512.  As above, the
+		 * probability is multiplied by both num_new_observations and
+		 * num_observations.  Be careful to avoid integer overflow.
+		 */
 		cutoff = stats->num_new_observations * 200 / 512 *
 			 stats->num_observations;
 		/*


=====================================
lib/deflate_decompress.c
=====================================
@@ -98,11 +98,6 @@
 #define LITLEN_ENOUGH		1334	/* enough 288 10 15	*/
 #define OFFSET_ENOUGH		402	/* enough 32 8 15	*/
 
-/*
- * Type for codeword lengths.
- */
-typedef u8 len_t;
-
 /*
  * The main DEFLATE decompressor structure.  Since this implementation only
  * supports full buffer decompression, this structure does not store the entire
@@ -121,12 +116,12 @@ struct libdeflate_decompressor {
 	 */
 
 	union {
-		len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
+		u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
 
 		struct {
-			len_t lens[DEFLATE_NUM_LITLEN_SYMS +
-				   DEFLATE_NUM_OFFSET_SYMS +
-				   DEFLATE_MAX_LENS_OVERRUN];
+			u8 lens[DEFLATE_NUM_LITLEN_SYMS +
+				DEFLATE_NUM_OFFSET_SYMS +
+				DEFLATE_MAX_LENS_OVERRUN];
 
 			u32 precode_decode_table[PRECODE_ENOUGH];
 		} l;
@@ -204,25 +199,27 @@ typedef machine_word_t bitbuf_t;
  *
  * If we would overread the input buffer, we just don't read anything, leaving
  * the bits zeroed but marking them filled.  This simplifies the decompressor
- * because it removes the need to distinguish between real overreads and
- * overreads that occur only because of the decompressor's own lookahead.
- *
- * The disadvantage is that real overreads are not detected immediately.
- * However, this is safe because the decompressor is still guaranteed to make
- * forward progress when presented never-ending 0 bits.  In an existing block
- * output will be getting generated, whereas new blocks can only be uncompressed
- * (since the type code for uncompressed blocks is 0), for which we check for
- * previous overread.  But even if we didn't check, uncompressed blocks would
- * fail to validate because LEN would not equal ~NLEN.  So the decompressor will
- * eventually either detect that the output buffer is full, or detect invalid
- * input, or finish the final block.
+ * because it removes the need to always be able to distinguish between real
+ * overreads and overreads caused only by the decompressor's own lookahead.
+ *
+ * We do still keep track of the number of bytes that have been overread, for
+ * two reasons.  First, it allows us to determine the exact number of bytes that
+ * were consumed once the stream ends or an uncompressed block is reached.
+ * Second, it allows us to stop early if the overread amount gets so large (more
+ * than sizeof bitbuf) that it can only be caused by a real overread.  (The
+ * second part is arguably unneeded, since libdeflate is buffer-based; given
+ * infinite zeroes, it will eventually either completely fill the output buffer
+ * or return an error.  However, we do it to be slightly more friendly to the
+ * not-recommended use case of decompressing with an unknown output size.)
  */
 #define FILL_BITS_BYTEWISE()					\
 do {								\
-	if (likely(in_next != in_end))				\
+	if (likely(in_next != in_end)) {			\
 		bitbuf |= (bitbuf_t)*in_next++ << bitsleft;	\
-	else							\
-		overrun_count++;				\
+	} else {						\
+		overread_count++;				\
+		SAFETY_CHECK(overread_count <= sizeof(bitbuf));	\
+	}							\
 	bitsleft += 8;						\
 } while (bitsleft <= BITBUF_NBITS - 8)
 
@@ -307,16 +304,16 @@ if (!HAVE_BITS(n)) {						\
  */
 #define ALIGN_INPUT()							\
 do {									\
-	SAFETY_CHECK(overrun_count <= (bitsleft >> 3));			\
-	in_next -= (bitsleft >> 3) - overrun_count;			\
-	overrun_count = 0;						\
+	SAFETY_CHECK(overread_count <= (bitsleft >> 3));		\
+	in_next -= (bitsleft >> 3) - overread_count;			\
+	overread_count = 0;						\
 	bitbuf = 0;							\
 	bitsleft = 0;							\
 } while(0)
 
 /*
  * Read a 16-bit value from the input.  This must have been preceded by a call
- * to ALIGN_INPUT(), and the caller must have already checked for overrun.
+ * to ALIGN_INPUT(), and the caller must have already checked for overread.
  */
 #define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
 
@@ -554,7 +551,7 @@ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
  */
 static bool
 build_decode_table(u32 decode_table[],
-		   const len_t lens[],
+		   const u8 lens[],
 		   const unsigned num_syms,
 		   const u32 decode_results[],
 		   const unsigned table_bits,


=====================================
libdeflate.h
=====================================
@@ -10,8 +10,8 @@ extern "C" {
 #endif
 
 #define LIBDEFLATE_VERSION_MAJOR	1
-#define LIBDEFLATE_VERSION_MINOR	9
-#define LIBDEFLATE_VERSION_STRING	"1.9"
+#define LIBDEFLATE_VERSION_MINOR	10
+#define LIBDEFLATE_VERSION_STRING	"1.10"
 
 #include <stddef.h>
 #include <stdint.h>


=====================================
programs/gzip.c
=====================================
@@ -192,6 +192,7 @@ do_decompress(struct libdeflate_decompressor *decompressor,
 	size_t compressed_size = in->mmap_size;
 	void *uncompressed_data = NULL;
 	size_t uncompressed_size;
+	size_t max_uncompressed_size;
 	size_t actual_in_nbytes;
 	size_t actual_out_nbytes;
 	enum libdeflate_result result;
@@ -214,8 +215,23 @@ do_decompress(struct libdeflate_decompressor *decompressor,
 	if (uncompressed_size == 0)
 		uncompressed_size = 1;
 
+	/*
+	 * DEFLATE cannot expand data more than 1032x, so there's no need to
+	 * ever allocate a buffer more than 1032 times larger than the
+	 * compressed data.  This is a fail-safe, albeit not a very good one, if
+	 * ISIZE becomes corrupted on a small file.  (The 1032x number comes
+	 * from each 2 bits generating a 258-byte match.  This is a hard upper
+	 * bound; the real upper bound is slightly smaller due to overhead.)
+	 */
+	if (compressed_size <= SIZE_MAX / 1032)
+		max_uncompressed_size = compressed_size * 1032;
+	else
+		max_uncompressed_size = SIZE_MAX;
+
 	do {
 		if (uncompressed_data == NULL) {
+			uncompressed_size = MIN(uncompressed_size,
+						max_uncompressed_size);
 			uncompressed_data = xmalloc(uncompressed_size);
 			if (uncompressed_data == NULL) {
 				msg("%"TS": file is probably too large to be "
@@ -234,6 +250,11 @@ do_decompress(struct libdeflate_decompressor *decompressor,
 						       &actual_out_nbytes);
 
 		if (result == LIBDEFLATE_INSUFFICIENT_SPACE) {
+			if (uncompressed_size >= max_uncompressed_size) {
+				msg("Bug in libdeflate_gzip_decompress_ex(): data expanded too much!");
+				ret = -1;
+				goto out;
+			}
 			if (uncompressed_size * 2 <= uncompressed_size) {
 				msg("%"TS": file corrupt or too large to be "
 				    "processed by this program", in->name);
@@ -256,7 +277,7 @@ do_decompress(struct libdeflate_decompressor *decompressor,
 		if (actual_in_nbytes == 0 ||
 		    actual_in_nbytes > compressed_size ||
 		    actual_out_nbytes > uncompressed_size) {
-			msg("Bug in libdeflate_gzip_decompress_ex()!");
+			msg("Bug in libdeflate_gzip_decompress_ex(): impossible actual_nbytes value!");
 			ret = -1;
 			goto out;
 		}


=====================================
programs/test_overread.c
=====================================
@@ -0,0 +1,95 @@
+/*
+ * test_overread.c
+ *
+ * Test that the decompressor doesn't produce an unbounded amount of output if
+ * it runs out of input, even when implicit zeroes appended to the input would
+ * continue producing output (as is the case when the input ends during a
+ * DYNAMIC_HUFFMAN block where a literal has an all-zeroes codeword).
+ *
+ * This is a regression test for commit 3f21ec9d6121 ("deflate_decompress: error
+ * out if overread count gets too large").
+ */
+
+#include "test_util.h"
+
+static void
+generate_test_input(struct output_bitstream *os)
+{
+	int i;
+
+	put_bits(os, 0, 1);	/* BFINAL: 0 */
+	put_bits(os, 2, 2);	/* BTYPE: DYNAMIC_HUFFMAN */
+
+	/*
+	 * Write the Huffman codes.
+	 *
+	 * Litlen code:
+	 *	litlensym_0   (0)		len=1 codeword=0
+	 *	litlensym_256 (end-of-block)	len=1 codeword=1
+	 * Offset code:
+	 *	offsetsym_0 (unused)		len=1 codeword=0
+	 *
+	 * Litlen and offset codeword lengths:
+	 *	[0]	 = 1	presym_1
+	 *	[1..255] = 0	presym_{18,18}
+	 *	[256]	 = 1	presym_1
+	 *	[257]	 = 1	presym_1
+	 *
+	 * Precode:
+	 *	presym_1	len=1 codeword=0
+	 *	presym_18	len=1 codeword=1
+	 */
+	put_bits(os, 0, 5);	/* num_litlen_syms: 0 + 257 */
+	put_bits(os, 0, 5);	/* num_offset_syms: 0 + 1 */
+	put_bits(os, 14, 4);	/* num_explicit_precode_lens: 14 + 4 */
+	/*
+	 * Precode codeword lengths: order is
+	 * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
+	 */
+	put_bits(os, 0, 3);		/* presym_16: len=0 */
+	put_bits(os, 0, 3);		/* presym_17: len=0 */
+	put_bits(os, 1, 3);		/* presym_18: len=1 */
+	for (i = 0; i < 14; i++)	/* presym_{0,...,14}: len=0 */
+		put_bits(os, 0, 3);
+	put_bits(os, 1, 3);		/* presym_1: len=1 */
+
+	/* Litlen and offset codeword lengths */
+	put_bits(os, 0, 1);		/* presym_1 */
+	put_bits(os, 1, 1);		/* presym_18 ... */
+	put_bits(os, 117, 7);		/* ... 11 + 117 zeroes */
+	put_bits(os, 1, 1);		/* presym_18 ... */
+	put_bits(os, 116, 7);		/* ... 11 + 116 zeroes */
+	put_bits(os, 0, 1);		/* presym_1 */
+	put_bits(os, 0, 1);		/* presym_1 */
+
+	/* Implicit zeroes would generate endless literals from here. */
+
+	ASSERT(flush_bits(os));
+}
+
+int
+tmain(int argc, tchar *argv[])
+{
+	u8 cdata[16];
+	u8 udata[256];
+	struct output_bitstream os =
+		{ .next = cdata, .end = cdata + sizeof(cdata) };
+	struct libdeflate_decompressor *d;
+	enum libdeflate_result res;
+	size_t actual_out_nbytes;
+
+	begin_program(argv);
+
+	generate_test_input(&os);
+	d = libdeflate_alloc_decompressor();
+	ASSERT(d != NULL);
+
+	res = libdeflate_deflate_decompress(d, cdata, os.next - cdata,
+					    udata, sizeof(udata),
+					    &actual_out_nbytes);
+	/* Before the fix, the result was LIBDEFLATE_INSUFFICIENT_SPACE here. */
+	ASSERT(res == LIBDEFLATE_BAD_DATA);
+
+	libdeflate_free_decompressor(d);
+	return 0;
+}



View it on GitLab: https://salsa.debian.org/med-team/libdeflate/-/commit/fcefb483d54f51568185f994d61771cef9a43a76

-- 
View it on GitLab: https://salsa.debian.org/med-team/libdeflate/-/commit/fcefb483d54f51568185f994d61771cef9a43a76
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220220/ee999015/attachment-0001.htm>


More information about the debian-med-commit mailing list