[Reproducible-builds] [strip-nondeterminism] 01/01: Completely reorganize and overhaul
Andrew Ayer
agwa at andrewayer.name
Mon Sep 8 01:15:56 UTC 2014
This is an automated email from the git hooks/post-receive script.
agwa-guest pushed a commit to branch master
in repository strip-nondeterminism.
commit ad9d61af15035e719d7a0223afcaa5fa25f0c815
Author: Andrew Ayer <agwa at andrewayer.name>
Date: Sun Sep 7 17:40:37 2014 -0700
Completely reorganize and overhaul
strip-nondeterminism is now a standalone command, and handlers are in
Perl modules. debhelper will depend on strip-nondeterminism.
---
.gitignore | 10 ++
MANIFEST | 11 ++
MANIFEST.SKIP | 21 +++
Makefile.PL | 20 +++
TODO | 6 +-
bin/dh_strip_nondeterminism | 85 +++++++++++
bin/strip-nondeterminism | 110 ++++++++++++++
dh_strip_nondeterminism | 248 -------------------------------
handlers/README | 3 -
handlers/gzip | 104 -------------
handlers/zip | 12 --
lib/StripNondeterminism.pm | 76 ++++++++++
lib/StripNondeterminism/handlers/ar.pm | 86 +++++++++++
lib/StripNondeterminism/handlers/gzip.pm | 122 +++++++++++++++
lib/StripNondeterminism/handlers/jar.pm | 43 ++++++
lib/StripNondeterminism/handlers/zip.pm | 45 ++++++
16 files changed, 632 insertions(+), 370 deletions(-)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..61bb845
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+# Makemaker generated files and dirs.
+/Makefile
+/blib
+/MakeMaker-*
+/pm_to_blib
+/MYMETA.yml
+
+# Temp and old files.
+*~
+*.old
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..c0c361b
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,11 @@
+COPYING
+MANIFEST This list of files
+Makefile.PL
+README
+bin/dh_strip_nondeterminism
+bin/strip-nondeterminism
+lib/StripNondeterminism.pm
+lib/StripNondeterminism/handlers/ar.pm
+lib/StripNondeterminism/handlers/gzip.pm
+lib/StripNondeterminism/handlers/jar.pm
+lib/StripNondeterminism/handlers/zip.pm
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 0000000..93dfd07
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1,21 @@
+# Version control files and dirs.
+\B\.git\b
+
+# Makemaker generated files and dirs.
+^MANIFEST\.
+^Makefile$
+^Makefile.old$
+^blib/
+^MakeMaker-\d
+^pm_to_blib
+
+# Temp, old and emacs backup files.
+~$
+\.old$
+\.swp$
+^#.*#$
+^\.#
+
+# Odds and ends
+\bLEFTOFF$
+\bTODO$
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..affb564
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,20 @@
+use strict;
+use warnings;
+use ExtUtils::MakeMaker;
+
+my $MMVER = eval $ExtUtils::MakeMaker::VERSION;
+
+WriteMakefile(
+ NAME => 'strip-nondeterminism',
+ AUTHOR => 'Andrew Ayer <agwa at andrewayer.name>',
+ VERSION_FROM => 'lib/StripNondeterminism.pm',
+ ABSTRACT => 'tool for stripping non-determinism from files',
+ EXE_FILES => [qw(bin/dh_strip_nondeterminism bin/strip-nondeterminism)],
+ PREREQ_PM => {
+ 'Archive::Zip' => 0,
+ 'Getopt::Long' => 0,
+ },
+ LICENSE => "gpl",
+ dist => { COMPRESS => 'gzip -9nf', SUFFIX => 'gz', },
+ clean => { FILES => 'StripNondeterminism-*' },
+);
diff --git a/TODO b/TODO
index 275cb94..8020fc4 100644
--- a/TODO
+++ b/TODO
@@ -1,10 +1,10 @@
Write handlers for:
- gzip
+ [DONE] gzip
javadocs
- ar
+ [DONE] ar
...
-Write main strip-nondeterminism command
+[DONE] Write main strip-nondeterminism command
It would just parse command line arguments and then execute the correct handler
Write dh_strip_nondeterminism
diff --git a/bin/dh_strip_nondeterminism b/bin/dh_strip_nondeterminism
new file mode 100755
index 0000000..4adbf93
--- /dev/null
+++ b/bin/dh_strip_nondeterminism
@@ -0,0 +1,85 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+dh_strip_nondeterminism - strip uninteresting, non-deterministic information from files
+
+=cut
+
+use strict;
+use File::Find;
+use Debian::Debhelper::Dh_Lib;
+use StripNondeterminism;
+
+=head1 SYNOPSIS
+
+B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>]
+
+=head1 DESCRIPTION
+
+B<dh_strip_nondeterminism> is a debhelper program that is responsible
+for stripping uninteresting, non-deterministic information, such as
+timestamps, from compiled files so that the build is reproducible.
+
+This program examines your package build directories and works out what
+to strip on its own. It uses L<file(1)> and filenames to figure out what
+files should have non-determinism stripped from them. In general it
+seems to make very good guesses, and will do the right thing in almost
+all cases.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-X>I<item>, B<--exclude=>I<item>
+
+Exclude files that contain I<item> anywhere in their filename from being
+stripped. You may use this option multiple times to build up a list of
+things to exclude.
+
+=back
+
+=cut
+
+init();
+
+my @nondeterministic_files;
+sub testfile {
+ return if -l $_ or -d $_; # Skip directories and symlinks always.
+
+ # See if we were asked to exclude this file.
+ # Note that we have to test on the full filename, including directory.
+ my $fn="$File::Find::dir/$_";
+ foreach my $f (@{$dh{EXCLUDE}}) {
+ return if ($fn=~m/\Q$f\E/);
+ }
+
+ my $normalizer = StripNondeterminism::get_normalizer_for_file($_);
+ if ($normalizer) {
+ push @nondeterministic_files, [$fn, $normalizer];
+ }
+}
+
+foreach my $package (@{$dh{DOPACKAGES}}) {
+ my $tmp=tmpdir($package);
+
+ @nondeterministic_files=();
+ find(\&testfile,$tmp);
+
+ foreach (@nondeterministic_files) {
+ my ($path, $normalize) = @$_;
+ $normalize->($path);
+ }
+}
+
+=head1 SEE ALSO
+
+L<debhelper(7)>
+
+This program is a part of debhelper.
+
+=head1 AUTHOR
+
+Andrew Ayer <agwa at andrewayer.name>
+
+=cut
diff --git a/bin/strip-nondeterminism b/bin/strip-nondeterminism
new file mode 100755
index 0000000..38c68ca
--- /dev/null
+++ b/bin/strip-nondeterminism
@@ -0,0 +1,110 @@
+#!/usr/bin/perl
+
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>.
+#
+
+use strict;
+use warnings;
+
+use StripNondeterminism;
+use Getopt::Long;
+Getopt::Long::Configure(qw(no_ignore_case permute bundling));
+
+my $cmd = $0;
+$cmd =~ s/.*\///;
+my $usage = "Usage: $cmd [-t|--type FILETYPE] FILENAME\n";
+
+my @opspec = (
+ 'type|t=s', 'help|h', 'version|V',
+);
+my $glop = {};
+GetOptions($glop, @opspec) || die $usage;
+
+if ($glop->{help}) {
+ print $usage;
+ exit 0;
+}
+
+if ($glop->{version}) {
+ print "$cmd version $StripNondeterminism::VERSION\n";
+ exit 0;
+}
+
+die $usage if @ARGV == 0;
+
+for my $filename (@ARGV) {
+ my $normalizer;
+ if ($glop->{type}) {
+ $normalizer = StripNondeterminism::get_normalizer_by_name($glop->{type});
+ die $glop->{type} . ": Unknown file type\n" unless $normalizer;
+ } else {
+ $normalizer = StripNondeterminism::get_normalizer_for_file($filename);
+ next unless $normalizer;
+ }
+
+ $normalizer->($filename);
+}
+
+__END__
+
+=head1 NAME
+
+strip-nondeterminism - strip non-deterministic information from files
+
+=head1 SYNOPSIS
+
+ strip-nondeterminism [-t filetype] filename ...
+
+=head1 DESCRIPTION
+
+B<strip-nondeterminism> is a tool to strip bits of non-deterministic
+information, such as timestamps, from files. It can be used as
+a post-processing step to make a build reproducible, when the build
+process itself cannot be made deterministic.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-t> I<filetype>, B<--type> I<filetype>
+
+Use the normalizer for the given file type (ar, gzip, jar, zip). If this
+option is not specified, the file type is detected automatically based on
+the file name extension.
+
+=item B<-h>, B<--help>
+
+Display this help message.
+
+=item B<-V>, B<--version>
+
+Print only the version string and then quit.
+
+=back
+
+=head1 AUTHOR
+
+Andrew Ayer
+
+=head1 COPYRIGHT
+
+strip-nondeterminism is free software. You can redistribute it and/or
+modify it under the terms of the GNU General Public License, version 3.
+
+=cut
diff --git a/dh_strip_nondeterminism b/dh_strip_nondeterminism
deleted file mode 100755
index 501b1a4..0000000
--- a/dh_strip_nondeterminism
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/perl -w
-
-=head1 NAME
-
-dh_strip_nondeterminism - strip uninteresting, non-deterministic information from files
-
-=cut
-
-use strict;
-use File::Find;
-use Debian::Debhelper::Dh_Lib;
-
-=head1 SYNOPSIS
-
-B<dh_strip_nondeterminism> [S<I<debhelper options>>] [B<-X>I<item>]
-
-=head1 DESCRIPTION
-
-B<dh_strip_nondeterminism> is a debhelper program that is responsible
-for stripping uninteresting, non-deterministic information, such as
-timestamps, from compiled files so that the build is reproducible.
-
-This program examines your package build directories and works out what
-to strip on its own. It uses L<file(1)> and filenames to figure out what
-files should have non-determinism stripped from them. In general it
-seems to make very good guesses, and will do the right thing in almost
-all cases.
-
-=head1 OPTIONS
-
-=over 4
-
-=item B<-X>I<item>, B<--exclude=>I<item>
-
-Exclude files that contain I<item> anywhere in their filename from being
-stripped. You may use this option multiple times to build up a list of
-things to exclude.
-
-=back
-
-=cut
-
-init();
-
-# I could just use `file $_[0]`, but this is safer
-sub get_file_type {
- my $file=shift;
- open (FILE, '-|') # handle all filenames safely
- || exec('file', $file)
- || die "can't exec file: $!";
- my $type=<FILE>;
- close FILE;
- return $type;
-}
-
-my @nondeterministic_files;
-sub testfile {
- return if -l $_ or -d $_; # Skip directories and symlinks always.
-
- # See if we were asked to exclude this file.
- # Note that we have to test on the full filename, including directory.
- my $fn="$File::Find::dir/$_";
- foreach my $f (@{$dh{EXCLUDE}}) {
- return if ($fn=~m/\Q$f\E/);
- }
-
- # gzip
- if (m/\.gz$/) {
- # No need for get_file_type b/c the gzip normalizer
- # silently ignores non-gzip files
- push @nondeterministic_files, [$fn, \&handlers::gzip::normalize];
- }
- # zip
- if (m/\.zip$/ && get_file_type($_) =~ m/Zip archive data/) {
- push @nondeterministic_files, [$fn, \&handlers::zip::normalize];
- }
- # jar
- if (m/\.jar$/ && get_file_type($_) =~ m/Zip archive data/) {
- push @nondeterministic_files, [$fn, \&handlers::jar::normalize];
- }
-}
-
-foreach my $package (@{$dh{DOPACKAGES}}) {
- my $tmp=tmpdir($package);
-
- @nondeterministic_files=();
- find(\&testfile,$tmp);
-
- foreach (@nondeterministic_files) {
- my ($path, $normalize) = @$_;
- $normalize->($path);
- }
-}
-
-package handlers::gzip;
-
-use Debian::Debhelper::Dh_Lib;
-use File::Temp qw/tempfile/;
-
-use constant {
- FTEXT => 1 << 0,
- FHCRC => 1 << 1,
- FEXTRA => 1 << 2,
- FNAME => 1 << 3,
- FCOMMENT => 1 << 4,
-};
-
-sub normalize {
- my ($filename) = @_;
-
- open(my $fh, '<', $filename) or error "Unable to open $filename for reading: $!";
- my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), UNLINK => 1);
-
- # See RFC 1952
-
- # 0 1 2 3 4 5 6 7 8 9 10
- # +---+---+---+---+---+---+---+---+---+---+
- # |ID1|ID2|CM |FLG| MTIME |XFL|OS |
- # +---+---+---+---+---+---+---+---+---+---+
-
- # Read the current header
- my $hdr;
- my $bytes_read = read($fh, $hdr, 10);
- return unless $bytes_read == 10;
- my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr);
- return unless $id1 == 31 and $id2 == 139;
-
- my $new_flg = $flg;
- $new_flg &= ~FNAME; # Don't include filename
- $new_flg &= ~FHCRC; # Don't include header CRC (not all implementations support it)
- $mtime = 0; # Zero out mtime (this is what `gzip -n` does)
- # TODO: question: normalize some of the other fields, such as OS?
-
- # Write a new header
- print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os);
-
- if ($flg & FEXTRA) { # Copy through
- # 0 1 2
- # +---+---+=================================+
- # | XLEN |...XLEN bytes of "extra field"...|
- # +---+---+=================================+
- my $buf;
- read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip file";
- my ($xlen) = unpack('v', $buf);
- read($fh, $buf, $xlen) == $xlen or error "$filename: Malformed gzip file";
- print $out_fh pack('vA*', $xlen, $buf);
- }
- if ($flg & FNAME) { # Read but do not copy through
- # 0
- # +=========================================+
- # |...original file name, zero-terminated...|
- # +=========================================+
- while (1) {
- my $buf;
- read($fh, $buf, 1) == 1 or error "$filename: Malformed gzip file";
- last if ord($buf) == 0;
- }
- }
- if ($flg & FCOMMENT) { # Copy through
- # 0
- # +===================================+
- # |...file comment, zero-terminated...|
- # +===================================+
- while (1) {
- my $buf;
- read($fh, $buf, 1) == 1 or error "$filename: Malformed gzip file";
- print $out_fh $buf;
- last if ord($buf) == 0;
- }
- }
- if ($flg & FHCRC) { # Read but do not copy through
- # 0 1 2
- # +---+---+
- # | CRC16 |
- # +---+---+
- my $buf;
- read($fh, $buf, 2) == 2 or error "$filename: Malformed gzip file";
- }
-
- # Copy through the rest of the file.
- # TODO: also normalize concatenated gzip files. This will require reading and understanding
- # each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere.
- while (1) {
- my $buf;
- my $bytes_read = read($fh, $buf, 4096);
- defined($bytes_read) or error "$filename: read failed: $!";
- print $out_fh $buf;
- last if $bytes_read == 0;
- }
-
- chmod((stat($fh))[2] & 07777, $out_filename);
- rename($out_filename, $filename) or error "$filename: unable to overwrite: rename: $!";
-}
-
-package handlers::zip;
-
-use Debian::Debhelper::Dh_Lib;
-use Archive::Zip;
-
-Archive::Zip::setErrorHandler(\&error);
-
-# A magic number from Archive::Zip for the earliest timestamp that
-# can be represented by a Zip file. From the Archive::Zip source:
-# "Note, this isn't exactly UTC 1980, it's 1980 + 12 hours and 1
-# minute so that nothing timezoney can muck us up."
-use constant SAFE_EPOCH => 315576060;
-
-sub normalize {
- my ($zip_filename, $filename_cmp) = @_;
- $filename_cmp ||= sub { $a cmp $b };
- my $zip = Archive::Zip->new($zip_filename);
- my @filenames = sort $filename_cmp $zip->memberNames();
- for my $filename (@filenames) {
- my $member = $zip->removeMember($filename);
- $zip->addMember($member);
- $member->setLastModFileDateTimeFromUnix(SAFE_EPOCH);
- }
- $zip->overwrite();
-}
-
-package handlers::jar;
-
-sub _jar_filename_cmp {
- # META-INF/ and META-INF/MANIFEST.MF are expected to be the first entries in the Zip archive.
- return 0 if $a eq $b;
- for (qw{META-INF/ META-INF/MANIFEST.MF}) {
- return -1 if $a eq $_;
- return 1 if $b eq $_;
- }
- return $a cmp $b;
-}
-
-sub normalize {
- my ($jar_filename) = @_;
- handlers::zip::normalize($jar_filename, \&_jar_filename_cmp);
-}
-
-=head1 SEE ALSO
-
-L<debhelper(7)>
-
-This program is a part of debhelper.
-
-=head1 AUTHOR
-
-Andrew Ayer <agwa at andrewayer.name>
-
-=cut
diff --git a/handlers/README b/handlers/README
deleted file mode 100644
index be83133..0000000
--- a/handlers/README
+++ /dev/null
@@ -1,3 +0,0 @@
-A handler reads a file of a particular format from stdin, strips the
-non-determinism from it, and writes the result to stdout. A handler
-can be written in any language.
diff --git a/handlers/gzip b/handlers/gzip
deleted file mode 100755
index 79134b1..0000000
--- a/handlers/gzip
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2014 Andrew Ayer
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-use strict;
-use warnings;
-
-use constant {
- FTEXT => 1 << 0,
- FHCRC => 1 << 1,
- FEXTRA => 1 << 2,
- FNAME => 1 << 3,
- FCOMMENT => 1 << 4,
-};
-
-# See RFC 1952
-
-# 0 1 2 3 4 5 6 7 8 9 10
-# +---+---+---+---+---+---+---+---+---+---+
-# |ID1|ID2|CM |FLG| MTIME |XFL|OS |
-# +---+---+---+---+---+---+---+---+---+---+
-
-# Read the current header
-my $hdr;
-my $bytes_read = read(*STDIN, $hdr, 10);
-die "Not a gzip file" unless $bytes_read == 10;
-my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr);
-die "Not a gzip file" unless $id1 == 31 and $id2 == 139;
-
-my $new_flg = $flg;
-$new_flg &= ~FNAME; # Don't include filename
-$new_flg &= ~FHCRC; # Don't include header CRC (not all implementations support it)
-$mtime = 0; # Zero out mtime (this is what `gzip -n` does)
-# TODO: question: normalize some of the other fields, such as OS?
-
-# Write a new header
-print pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os);
-
-if ($flg & FEXTRA) { # Copy through
- # 0 1 2
- # +---+---+=================================+
- # | XLEN |...XLEN bytes of "extra field"...|
- # +---+---+=================================+
- my $buf;
- read(*STDIN, $buf, 2) == 2 or die "Malformed gzip file";
- my ($xlen) = unpack('v', $buf);
- read(*STDIN, $buf, $xlen) == $xlen or die "Malformed gzip file";
- print pack('vA*', $xlen, $buf);
-}
-if ($flg & FNAME) { # Read but do not copy through
- # 0
- # +=========================================+
- # |...original file name, zero-terminated...|
- # +=========================================+
- while (1) {
- my $buf;
- read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file";
- last if ord($buf) == 0;
- }
-}
-if ($flg & FCOMMENT) { # Copy through
- # 0
- # +===================================+
- # |...file comment, zero-terminated...|
- # +===================================+
- while (1) {
- my $buf;
- read(*STDIN, $buf, 1) == 1 or die "Malformed gzip file";
- print $buf;
- last if ord($buf) == 0;
- }
-}
-if ($flg & FHCRC) { # Read but do not copy through
- # 0 1 2
- # +---+---+
- # | CRC16 |
- # +---+---+
- my $buf;
- read(*STDIN, $buf, 2) == 2 or die "Not a gzip file";
-}
-
-# Copy through the rest of the file.
-# TODO: support concatenated gzip files. This will require reading and understanding
-# each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere.
-while (1) {
- my $buf;
- my $bytes_read = read(*STDIN, $buf, 4096);
- defined($bytes_read) or die "read failed: $!";
- print $buf;
- last if $bytes_read == 0;
-}
diff --git a/handlers/zip b/handlers/zip
deleted file mode 100755
index 7a1dba0..0000000
--- a/handlers/zip
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/python
-
-from zipfile import ZipFile
-import sys
-
-with ZipFile(sys.stdout, 'w') as outzip:
- with ZipFile(sys.stdin, 'r') as inzip:
- for info in sorted(inzip.infolist(), key=lambda info: info.filename):
- # 1980-01-01 is the earliest date that the zip format supports
- info.date_time = (1980, 1, 1, 0, 0, 0)
- content = inzip.read(info.filename)
- outzip.writestr(info, content)
diff --git a/lib/StripNondeterminism.pm b/lib/StripNondeterminism.pm
new file mode 100644
index 0000000..b01d673
--- /dev/null
+++ b/lib/StripNondeterminism.pm
@@ -0,0 +1,76 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism;
+
+use strict;
+use warnings;
+
+use StripNondeterminism::handlers::ar;
+use StripNondeterminism::handlers::gzip;
+use StripNondeterminism::handlers::jar;
+use StripNondeterminism::handlers::zip;
+
+our($VERSION);
+
+$VERSION = '0.001'; # 0.001
+
+sub _get_file_type {
+ my $file=shift;
+ open (FILE, '-|') # handle all filenames safely
+ || exec('file', $file)
+ || die "can't exec file: $!";
+ my $type=<FILE>;
+ close FILE;
+ return $type;
+}
+
+sub get_normalizer_for_file {
+ $_ = shift;
+
+ return undef if -d $_; # Skip directories
+
+ # ar
+ if (m/\.a$/ && _get_file_type($_) =~ m/ar archive/) {
+ return \&StripNondeterminism::handlers::ar::normalize;
+ }
+ # gzip
+ if (m/\.gz$/ && _get_file_type($_) =~ m/gzip compressed data/) {
+ return \&StripNondeterminism::handlers::gzip::normalize;
+ }
+ # jar
+ if (m/\.jar$/ && _get_file_type($_) =~ m/Zip archive data/) {
+ return \&StripNondeterminism::handlers::jar::normalize;
+ }
+ # zip
+ if (m/\.zip$/ && _get_file_type($_) =~ m/Zip archive data/) {
+ return \&StripNondeterminism::handlers::zip::normalize;
+ }
+ return undef;
+}
+
+sub get_normalizer_by_name {
+ $_ = shift;
+ return \&StripNondeterminism::handlers::ar::normalize if $_ eq 'ar';
+ return \&StripNondeterminism::handlers::gzip::normalize if $_ eq 'gzip';
+ return \&StripNondeterminism::handlers::jar::normalize if $_ eq 'jar';
+ return \&StripNondeterminism::handlers::zip::normalize if $_ eq 'zip';
+ return undef;
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/ar.pm b/lib/StripNondeterminism/handlers/ar.pm
new file mode 100644
index 0000000..02d1b99
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/ar.pm
@@ -0,0 +1,86 @@
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+# Copyright © 2014 Niko Tyni <ntyni at debian.org>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Some code borrowed from ArFile
+# Copyright (C) 2007 Stefano Zacchiroli <zack at debian.org>
+# Copyright (C) 2007 Filippo Giunchedi <filippo at debian.org>
+
+package StripNondeterminism::handlers::ar;
+
+use strict;
+use warnings;
+
+use Fcntl q/SEEK_SET/;
+
+sub normalize {
+ my ($file) = @_;
+
+ my $GLOBAL_HEADER = "!<arch>\n";
+ my $GLOBAL_HEADER_LENGTH = length $GLOBAL_HEADER;
+
+ my $FILE_HEADER_LENGTH = 60;
+ my $FILE_MAGIC = "`\n";
+
+ my $buf;
+
+ open(my $fh, '+<', $file)
+ or die("failed to open $file for read+write: $!");
+
+ read $fh, $buf, $GLOBAL_HEADER_LENGTH;
+ return if $buf ne $GLOBAL_HEADER;
+
+ while (1) {
+ my $file_header_start = tell $fh;
+ my $count = read $fh, $buf, $FILE_HEADER_LENGTH;
+ die "reading $file failed: $!" if !defined $count;
+ last if $count == 0;
+
+ # http://en.wikipedia.org/wiki/Ar_(Unix)
+ #from to Name Format
+ #0 15 File name ASCII
+ #16 27 File modification date Decimal
+ #28 33 Owner ID Decimal
+ #34 39 Group ID Decimal
+ #40 47 File mode Octal
+ #48 57 File size in bytes Decimal
+ #58 59 File magic \140\012
+
+ # FIXME: is this correct?
+ last if $count == 1 and eof($fh) and $buf eq "\n";
+
+ die "Incorrect header length"
+ if length $buf != $FILE_HEADER_LENGTH;
+ die "Incorrect file magic"
+ if substr($buf, 58, length($FILE_MAGIC)) ne $FILE_MAGIC;
+
+ my $file_size = substr($buf, 48, 10);
+ seek $fh, $file_header_start + 16, SEEK_SET;
+
+ # mtime
+ syswrite $fh, sprintf("%-12d", 0);
+ # owner
+ syswrite $fh, sprintf("%-6d", 0);
+ # group
+ syswrite $fh, sprintf("%-6d", 0);
+ # file mode
+ syswrite $fh, sprintf("%-8o", 0644);
+
+ # move to next member
+ seek $fh, $file_header_start + $FILE_HEADER_LENGTH + $file_size, SEEK_SET;
+ }
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/gzip.pm b/lib/StripNondeterminism/handlers/gzip.pm
new file mode 100644
index 0000000..daa82d2
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/gzip.pm
@@ -0,0 +1,122 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism::handlers::gzip;
+
+use strict;
+use warnings;
+
+use File::Temp qw/tempfile/;
+use File::Basename;
+
+use constant {
+ FTEXT => 1 << 0,
+ FHCRC => 1 << 1,
+ FEXTRA => 1 << 2,
+ FNAME => 1 << 3,
+ FCOMMENT => 1 << 4,
+};
+
+sub normalize {
+ my ($filename) = @_;
+
+ open(my $fh, '<', $filename) or die "Unable to open $filename for reading: $!";
+ my ($out_fh, $out_filename) = tempfile(DIR => dirname($filename), UNLINK => 1);
+
+ # See RFC 1952
+
+ # 0 1 2 3 4 5 6 7 8 9 10
+ # +---+---+---+---+---+---+---+---+---+---+
+ # |ID1|ID2|CM |FLG| MTIME |XFL|OS |
+ # +---+---+---+---+---+---+---+---+---+---+
+
+ # Read the current header
+ my $hdr;
+ my $bytes_read = read($fh, $hdr, 10);
+ return unless $bytes_read == 10;
+ my ($id1, $id2, $cm, $flg, $mtime, $xfl, $os) = unpack('CCCCl<CC', $hdr);
+ return unless $id1 == 31 and $id2 == 139;
+
+ my $new_flg = $flg;
+ $new_flg &= ~FNAME; # Don't include filename
+ $new_flg &= ~FHCRC; # Don't include header CRC (not all implementations support it)
+ $mtime = 0; # Zero out mtime (this is what `gzip -n` does)
+ # TODO: question: normalize some of the other fields, such as OS?
+
+ # Write a new header
+ print $out_fh pack('CCCCl<CC', $id1, $id2, $cm, $new_flg, $mtime, $xfl, $os);
+
+ if ($flg & FEXTRA) { # Copy through
+ # 0 1 2
+ # +---+---+=================================+
+ # | XLEN |...XLEN bytes of "extra field"...|
+ # +---+---+=================================+
+ my $buf;
+ read($fh, $buf, 2) == 2 or die "$filename: Malformed gzip file";
+ my ($xlen) = unpack('v', $buf);
+ read($fh, $buf, $xlen) == $xlen or die "$filename: Malformed gzip file";
+ print $out_fh pack('vA*', $xlen, $buf);
+ }
+ if ($flg & FNAME) { # Read but do not copy through
+ # 0
+ # +=========================================+
+ # |...original file name, zero-terminated...|
+ # +=========================================+
+ while (1) {
+ my $buf;
+ read($fh, $buf, 1) == 1 or die "$filename: Malformed gzip file";
+ last if ord($buf) == 0;
+ }
+ }
+ if ($flg & FCOMMENT) { # Copy through
+ # 0
+ # +===================================+
+ # |...file comment, zero-terminated...|
+ # +===================================+
+ while (1) {
+ my $buf;
+ read($fh, $buf, 1) == 1 or die "$filename: Malformed gzip file";
+ print $out_fh $buf;
+ last if ord($buf) == 0;
+ }
+ }
+ if ($flg & FHCRC) { # Read but do not copy through
+ # 0 1 2
+ # +---+---+
+ # | CRC16 |
+ # +---+---+
+ my $buf;
+ read($fh, $buf, 2) == 2 or die "$filename: Malformed gzip file";
+ }
+
+ # Copy through the rest of the file.
+ # TODO: also normalize concatenated gzip files. This will require reading and understanding
+ # each DEFLATE block (see RFC 1951), since gzip doesn't include lengths anywhere.
+ while (1) {
+ my $buf;
+ my $bytes_read = read($fh, $buf, 4096);
+ defined($bytes_read) or die "$filename: read failed: $!";
+ print $out_fh $buf;
+ last if $bytes_read == 0;
+ }
+
+ chmod((stat($fh))[2] & 07777, $out_filename);
+ rename($out_filename, $filename) or die "$filename: unable to overwrite: rename: $!";
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/jar.pm b/lib/StripNondeterminism/handlers/jar.pm
new file mode 100644
index 0000000..6be7071
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/jar.pm
@@ -0,0 +1,43 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism::handlers::jar;
+
+use strict;
+use warnings;
+
+use Archive::Zip;
+use StripNondeterminism::handlers::zip;
+
+sub _jar_filename_cmp ($$) {
+ my ($a, $b) = @_;
+ # META-INF/ and META-INF/MANIFEST.MF are expected to be the first entries in the Zip archive.
+ return 0 if $a eq $b;
+ for (qw{META-INF/ META-INF/MANIFEST.MF}) {
+ return -1 if $a eq $_;
+ return 1 if $b eq $_;
+ }
+ return $a cmp $b;
+}
+
+sub normalize {
+ my ($jar_filename) = @_;
+ StripNondeterminism::handlers::zip::normalize($jar_filename, \&_jar_filename_cmp);
+}
+
+1;
diff --git a/lib/StripNondeterminism/handlers/zip.pm b/lib/StripNondeterminism/handlers/zip.pm
new file mode 100644
index 0000000..e19c7fc
--- /dev/null
+++ b/lib/StripNondeterminism/handlers/zip.pm
@@ -0,0 +1,45 @@
+#
+# Copyright 2014 Andrew Ayer
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>.
+#
+package StripNondeterminism::handlers::zip;
+
+use strict;
+use warnings;
+
+use Archive::Zip;
+
+# A magic number from Archive::Zip for the earliest timestamp that
+# can be represented by a Zip file. From the Archive::Zip source:
+# "Note, this isn't exactly UTC 1980, it's 1980 + 12 hours and 1
+# minute so that nothing timezoney can muck us up."
+use constant SAFE_EPOCH => 315576060;
+
+sub normalize {
+ my ($zip_filename, $filename_cmp) = @_;
+ $filename_cmp ||= sub { $a cmp $b };
+ my $zip = Archive::Zip->new($zip_filename);
+ my @filenames = sort $filename_cmp $zip->memberNames();
+ for my $filename (@filenames) {
+ my $member = $zip->removeMember($filename);
+ $zip->addMember($member);
+ $member->setLastModFileDateTimeFromUnix(SAFE_EPOCH);
+ }
+ $zip->overwrite();
+}
+
+1;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/strip-nondeterminism.git
More information about the Reproducible-builds
mailing list