[Reproducible-builds] [strip-nondeterminism] 01/01: Write a proper gzip handler

Andrew Ayer agwa-guest at moszumanska.debian.org
Fri Aug 29 18:54:15 UTC 2014


This is an automated email from the git hooks/post-receive script.

agwa-guest pushed a commit to branch master
in repository strip-nondeterminism.

commit 4ca8b362a0446de012834e8d12bd4a93b8a3593f
Author: Andrew Ayer <agwa at andrewayer.name>
Date:   Fri Aug 29 11:50:08 2014 -0700

    Write a proper gzip handler
    
    Unfortunately it doesn't support concatenated gzip files (only the first
    file is normalized; subsequent files are copied through with timestamps,
    etc.) because that would require understanding the DEFLATE format,
    which is non-trivial.  Hopefully this will not be an issue in practice.
---
 handlers/gzip | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 102 insertions(+), 3 deletions(-)

diff --git a/handlers/gzip b/handlers/gzip
index 37980fc..79134b1 100755
--- a/handlers/gzip
+++ b/handlers/gzip
@@ -1,5 +1,104 @@
-#!/bin/sh
+#!/usr/bin/perl
 
-# TODO: an intelligent gzip handler
+# Copyright 2014 Andrew Ayer
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-gunzip | gzip -n
+use strict;
+use warnings;
+
+use constant {
+	FTEXT    => 1 << 0,
+	FHCRC    => 1 << 1,
+	FEXTRA   => 1 << 2,
+	FNAME    => 1 << 3,
+	FCOMMENT => 1 << 4,
+};
+
+# See RFC 1952
+
+# 0   1   2   3   4   5   6   7   8   9   10
+# +---+---+---+---+---+---+---+---+---+---+
+# |ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
+# +---+---+---+---+---+---+---+---+---+---+
+
+# Read the current header
+my $hdr;
+my $bytes_read = read(*STDIN, $hdr, 10);
+die "Not a gzip file" unless $bytes_read == 10;
+my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr);
+die "Not a gzip file" unless $id1 == 31 and $id2 == 139;
+
+my $new_flg = $flg;
+$new_flg &= ~FNAME;	# Don't include filename
+$new_flg &= ~FHCRC;	# Don't include header CRC (not all implementations support it)
+$mtime = 0;		# Zero out mtime (this is what `gzip -n` does)
+# TODO: question: normalize some of the other fields, such as OS?
+
+# Write a new header
+print pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os);
+
+if ($flg & FEXTRA) {	# Copy through
+	# 0   1   2
+	# +---+---+=================================+
+	# | XLEN  |...XLEN bytes of "extra field"...|
+	# +---+---+=================================+
+	my $buf;
+	read(*STDIN, $buf, 2) == 2 or die "Malformed gzip file";
+	my ($xlen) = unpack('v', $buf);
+	read(*STDIN, $buf, $xlen) == $xlen or die "Malformed gzip file";
+	print pack('vA*', $xlen, $buf);
+}
+if ($flg & FNAME) {	# Read but do not copy through
+	# 0
+	# +=========================================+
+	# |...original file name, zero-terminated...|
+	# +=========================================+
+	while (1) {
+		my $buf;
+		read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file";
+		last if ord($buf) == 0;
+	}
+}
+if ($flg & FCOMMENT) {	# Copy through
+	# 0
+	# +===================================+
+	# |...file comment, zero-terminated...|
+	# +===================================+
+	while (1) {
+		my $buf;
+		read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file";
+		print $buf;
+		last if ord($buf) == 0;
+	}
+}
+if ($flg & FHCRC) {	# Read but do not copy through
+	# 0   1   2
+	# +---+---+
+	# | CRC16 |
+	# +---+---+
+	my $buf;
+	read(*STDIN, $buf, 2) == 2 or die "Not a gzip file";
+}
+
+# Copy through the rest of the file.
+# TODO: support concatenated gzip files.  This will require reading and understanding
+# each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere.
+while (1) {
+	my $buf;
+	my $bytes_read = read(*STDIN, $buf, 4096);
+	defined($bytes_read) or die "read failed: $!";
+	print $buf;
+	last if $bytes_read == 0;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/strip-nondeterminism.git



More information about the Reproducible-builds mailing list