[Reproducible-builds] [strip-nondeterminism] 01/01: Add dh_strip_nondeterminism

Andrew Ayer agwa at andrewayer.name
Sun Aug 31 04:38:48 UTC 2014


This is an automated email from the git hooks/post-receive script.

agwa-guest pushed a commit to branch master
in repository strip-nondeterminism.

commit 6c3dc25733df8a4fa10c040768d2f9ebdb1c7dbe
Author: Andrew Ayer <agwa at andrewayer.name>
Date:   Sat Aug 30 21:35:23 2014 -0700

    Add dh_strip_nondeterminism
    
    This is a self-contained Perl script that should hopefully be suitable
    for inclusion in debhelper itself.  I'm putting it in this repository
    temporarily but ultimately it should be added to the reproducible
    debhelper repo and a patch should be submitted to the BTS for debhelper.
    
    For now supports gzip only.
---
 dh_strip_nondeterminism | 195 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)

diff --git a/dh_strip_nondeterminism b/dh_strip_nondeterminism
new file mode 100755
index 0000000..1e9ea8c
--- /dev/null
+++ b/dh_strip_nondeterminism
@@ -0,0 +1,195 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+dh_strip_nondeterminism - strip uninteresting, non-deterministic information from files
+
+=cut
+
+use strict;
+use File::Find;
+use Debian::Debhelper::Dh_Lib;
+
+=head1 SYNOPSIS
+
+B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>]
+
+=head1 DESCRIPTION
+
+B<dh_strip_nondeterminism> is a debhelper program that is responsible
+for stripping uninteresting, non-deterministic information, such as
+timestamps, from compiled files so that the build is reproducible.
+
+This program examines your package build directories and works out what
+to strip on its own. It uses L<file(1)> and filenames to figure out what
+files should have non-determinism stripped from them.  In general it
+seems to make very good guesses, and will do the right thing in almost
+all cases.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-X>I<item>, B<--exclude=>I<item>
+
+Exclude files that contain I<item> anywhere in their filename from being
+stripped. You may use this option multiple times to build up a list of
+things to exclude.
+
+=back
+
+=cut
+
+init();
+
+# I could just use `file $_[0]`, but this is safer
+sub get_file_type {
+	my $file=shift;
+	open (FILE, '-|') # handle all filenames safely
+		|| exec('file', $file)
+		|| die "can't exec file: $!";
+	my $type=<FILE>;
+	close FILE;
+	return $type;
+}
+
+my @nondeterministic_files;
+sub testfile {
+	return if -l $_ or -d $_; # Skip directories and symlinks always.
+
+	# See if we were asked to exclude this file.
+	# Note that we have to test on the full filename, including directory.
+	my $fn="$File::Find::dir/$_";
+	foreach my $f (@{$dh{EXCLUDE}}) {
+		return if ($fn=~m/\Q$f\E/);
+	}
+
+	# gzip
+	if (m/\.gz$/) {
+		push @nondeterministic_files, [$fn, \&handlers::gzip::normalize];
+	}
+}
+
+foreach my $package (@{$dh{DOPACKAGES}}) {
+	my $tmp=tmpdir($package);
+
+	@nondeterministic_files=();
+	find(\&testfile,$tmp);
+
+	foreach (@nondeterministic_files) {
+		my ($path, $normalize) = @$_;
+		$normalize->($path);
+	}
+}
+
+package handlers::gzip;
+
+use Debian::Debhelper::Dh_Lib;
+use File::Temp qw/tempfile/;
+
+use constant {
+	FTEXT    => 1 << 0,
+	FHCRC    => 1 << 1,
+	FEXTRA   => 1 << 2,
+	FNAME    => 1 << 3,
+	FCOMMENT => 1 << 4,
+};
+
+sub normalize {
+	my ($filename) = @_;
+
+	open(my $fh, '<', $filename) or error "Unable to open $filename for reading: $!";
+	my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), UNLINK => 1);
+
+	# See RFC 1952
+
+	# 0   1   2   3   4   5   6   7   8   9   10
+	# +---+---+---+---+---+---+---+---+---+---+
+	# |ID1|ID2|CM |FLG|     MTIME     |XFL|OS |
+	# +---+---+---+---+---+---+---+---+---+---+
+
+	# Read the current header
+	my $hdr;
+	my $bytes_read = read($fh, $hdr, 10);
+	return unless $bytes_read == 10;
+	my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr);
+	return unless $id1 == 31 and $id2 == 139;
+
+	my $new_flg = $flg;
+	$new_flg &= ~FNAME;	# Don't include filename
+	$new_flg &= ~FHCRC;	# Don't include header CRC (not all implementations support it)
+	$mtime = 0;		# Zero out mtime (this is what `gzip -n` does)
+	# TODO: question: normalize some of the other fields, such as OS?
+
+	# Write a new header
+	print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os);
+
+	if ($flg & FEXTRA) {	# Copy through
+		# 0   1   2
+		# +---+---+=================================+
+		# | XLEN  |...XLEN bytes of "extra field"...|
+		# +---+---+=================================+
+		my $buf;
+		read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip file";
+		my ($xlen) = unpack('v', $buf);
+		read($fh, $buf, $xlen) == $xlen or error "$filename: Malformed gzip file";
+		print $out_fh pack('vA*', $xlen, $buf);
+	}
+	if ($flg & FNAME) {	# Read but do not copy through
+		# 0
+		# +=========================================+
+		# |...original file name, zero-terminated...|
+		# +=========================================+
+		while (1) {
+			my $buf;
+			read($fh, $buf, 1) == 1 or error "$filename: Malformed gzip file";
+			last if ord($buf) == 0;
+		}
+	}
+	if ($flg & FCOMMENT) {	# Copy through
+		# 0
+		# +===================================+
+		# |...file comment, zero-terminated...|
+		# +===================================+
+		while (1) {
+			my $buf;
+			read($fh, $buf, 1) == 1 or error "$filename: Malformed gzip file";
+			print $out_fh $buf;
+			last if ord($buf) == 0;
+		}
+	}
+	if ($flg & FHCRC) {	# Read but do not copy through
+		# 0   1   2
+		# +---+---+
+		# | CRC16 |
+		# +---+---+
+		my $buf;
+		read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip file";
+	}
+
+	# Copy through the rest of the file.
+	# TODO: also normalize concatenated gzip files.  This will require reading and understanding
+	# each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere.
+	while (1) {
+		my $buf;
+		my $bytes_read = read($fh, $buf, 4096);
+		defined($bytes_read) or error "$filename: read failed: $!";
+		print $out_fh $buf;
+		last if $bytes_read == 0;
+	}
+
+	chmod((stat($fh))[2] & 07777, $out_filename);
+	rename($out_filename, $filename) or error "$filename: unable to overwrite: rename: $!";
+}
+
+=head1 SEE ALSO
+
+L<debhelper(7)>
+
+This program is a part of debhelper.
+
+=head1 AUTHOR
+
+Andrew Ayer <agwa at andrewayer.name>
+
+=cut

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/strip-nondeterminism.git



More information about the Reproducible-builds mailing list