[Reproducible-builds] Bug#808002: Bug#808002: diffoscope: Add support for Mozilla-optimized ZIPs
Mike Hommey
mh at glandium.org
Wed Dec 16 07:40:14 UTC 2015
On Tue, Dec 15, 2015 at 04:31:46PM +0100, Jérémy Bobbio wrote:
> Mike Hommey:
> > It would be useful for diffoscope to output differences in omni.ja files as
> > for other Zip files, instead of ending up with a diff of an hexdump.
> >
> > The attached patch implements a minimal support for this. It however doesn't
> > look at the difference in the `preload` value.
>
> Great! I think it's fine to just skip the preload value. It will show up
> in the fallback binary comparison if that's the only remaining difference.
>
> I was going to merge this, but actually I have to ask: would you be kind
> enough to amend the test suite as well?
Attached.
Mike
-------------- next part --------------
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index ed24f63..d308b6e 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -72,7 +72,7 @@ from diffoscope.comparators.symlink import Symlink
from diffoscope.comparators.text import TextFile
from diffoscope.comparators.tar import TarFile
from diffoscope.comparators.xz import XzFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
def bail_if_non_existing(*paths):
@@ -153,6 +153,7 @@ FILE_CLASSES = (
TarFile,
XzFile,
ZipFile,
+ MozillaZipFile,
ImageFile,
CbfsFile,
)
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index ecdc77b..42c9a9f 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -111,3 +111,54 @@ class ZipFile(File):
zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
Difference.from_command(ZipinfoVerbose, self.path, other.path)
return [zipinfo_difference]
+
+
+class MozillaZipCommandMixin(object):
+ def wait(self):
+ # zipinfo emits an error when reading Mozilla-optimized ZIPs,
+ # which is fine to ignore.
+ super(Zipinfo, self).wait()
+ return 0
+
+
+class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo): pass
+
+
+class MozillaZipinfoVerbose(MozillaZipCommandMixin, ZipinfoVerbose): pass
+
+
+class MozillaZipContainer(ZipContainer):
+ def open_archive(self):
+ # This is gross: Monkeypatch zipfile._EndRecData to work with
+ # Mozilla-optimized ZIPs
+ _orig_EndRecData = zipfile._EndRecData
+ def _EndRecData(fh):
+ endrec = _orig_EndRecData(fh)
+ if endrec:
+ endrec[zipfile._ECD_LOCATION] = (endrec[zipfile._ECD_OFFSET] +
+ endrec[zipfile._ECD_SIZE])
+ return endrec
+ zipfile._EndRecData = _EndRecData
+ result = super(MozillaZipContainer, self).open_archive()
+ zipfile._EndRecData = _orig_EndRecData
+ return result
+
+
+class MozillaZipFile(File):
+ CONTAINER_CLASS = MozillaZipContainer
+
+ @staticmethod
+ def recognizes(file):
+ # Mozilla-optimized ZIPs start with a 32-bit little endian integer
+ # indicating the amount of data to preload, followed by the ZIP
+ # central directory (with a PK\x01\x02 signature)
+ with open(file.path, 'rb') as f:
+ preload = f.read(4)
+ if len(preload) == 4:
+ signature = f.read(4)
+ return signature == b'PK\x01\x02'
+
+ def compare_details(self, other, source=None):
+ zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \
+ Difference.from_command(MozillaZipinfoVerbose, self.path, other.path)
+ return [zipinfo_difference]
diff --git a/tests/comparators/test_zip.py b/tests/comparators/test_zip.py
index d921b79..57255bc 100644
--- a/tests/comparators/test_zip.py
+++ b/tests/comparators/test_zip.py
@@ -21,7 +21,7 @@ import os.path
import pytest
from diffoscope.comparators import specialize
from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
from diffoscope.config import Config
from conftest import tool_missing
@@ -65,3 +65,46 @@ def test_compare_non_existing(monkeypatch, zip1):
difference = zip1.compare(NonExistingFile('/nonexisting', zip1))
assert difference.source2 == '/nonexisting'
assert difference.details[-1].source2 == '/dev/null'
+
+TEST_MOZZIP1_PATH = os.path.join(os.path.dirname(__file__), '../data/test1.mozzip')
+TEST_MOZZIP2_PATH = os.path.join(os.path.dirname(__file__), '../data/test2.mozzip')
+
+ at pytest.fixture
+def mozzip1():
+ return specialize(FilesystemFile(TEST_MOZZIP1_PATH))
+
+ at pytest.fixture
+def mozzip2():
+ return specialize(FilesystemFile(TEST_MOZZIP2_PATH))
+
+def test_mozzip_identification(mozzip1):
+ assert isinstance(mozzip1, MozillaZipFile)
+
+def test_mozzip_no_differences(mozzip1):
+ difference = mozzip1.compare(mozzip1)
+ assert difference is None
+
+ at pytest.fixture
+def mozzip_differences(mozzip1, mozzip2):
+ return mozzip1.compare(mozzip2).details
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_metadata(mozzip_differences):
+ expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/mozzip_zipinfo_expected_diff')).read()
+ diff = mozzip_differences[0].unified_diff
+ assert (diff.replace(TEST_MOZZIP1_PATH, 'test1.mozzip')
+ .replace(TEST_MOZZIP2_PATH, 'test2.mozzip')) == expected_diff
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_compressed_files(mozzip_differences):
+ assert mozzip_differences[1].source1 == 'dir/text'
+ assert mozzip_differences[1].source2 == 'dir/text'
+ expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/text_ascii_expected_diff')).read()
+ assert mozzip_differences[1].unified_diff == expected_diff
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_compare_non_existing(monkeypatch, mozzip1):
+ monkeypatch.setattr(Config.general, 'new_file', True)
+ difference = mozzip1.compare(NonExistingFile('/nonexisting', mozzip1))
+ assert difference.source2 == '/nonexisting'
+ assert difference.details[-1].source2 == '/dev/null'
diff --git a/tests/data/mozzip_zipinfo_expected_diff b/tests/data/mozzip_zipinfo_expected_diff
new file mode 100644
index 0000000..9cc8134
--- /dev/null
+++ b/tests/data/mozzip_zipinfo_expected_diff
@@ -0,0 +1,15 @@
+@@ -1,8 +1,8 @@
+-Zip file size: 409 bytes, number of entries: 1
+-warning [test1.mozzip]: 329 extra bytes at beginning or within zipfile
++Zip file size: 552 bytes, number of entries: 1
++warning [test2.mozzip]: 472 extra bytes at beginning or within zipfile
+ (attempting to process anyway)
+-error [test1.mozzip]: reported length of central directory is
+- -329 bytes too long (Atari STZip zipfile? J.H.Holm ZIPSPLIT 1.1
++error [test2.mozzip]: reported length of central directory is
++ -472 bytes too long (Atari STZip zipfile? J.H.Holm ZIPSPLIT 1.1
+ zipfile?). Compensating...
+--rw-r--r-- 2.0 unx 446 b- defX 10-Jan-01 00:00 dir/text
+-1 file, 446 bytes uncompressed, 269 bytes compressed: 39.7%
++-rw-r--r-- 2.0 unx 671 b- defX 10-Jan-01 00:00 dir/text
++1 file, 671 bytes uncompressed, 412 bytes compressed: 38.6%
diff --git a/tests/data/test1.mozzip b/tests/data/test1.mozzip
new file mode 100644
index 0000000000000000000000000000000000000000..d43bb74faacf04cc23be9de208935bc54bad0a49
GIT binary patch
literal 409
zcmZQzU|<OFW at Hj!7GYpw-~jRzZC<E7b>?McVAu!5AQ2c?(ii|_reqfBm!wvd1bDNu
zfn*qg&<sekfH*+2m|1X}1k-Ce;cTI^AxF#m^<BCL9&1gCUZ>gMapR`s-$<tZ)BLBr
z^PCQB`d#~FrKrql?IQi!q>BonZ=0^%zLb(>r#Q1&d*j`OtrufvB{O=(Z%v<Z?c(!;
z(`L-vD|sj2iT0C)WnS8=?#y=JlGh8bk=nAc%}ne2W67mgxaH23ZrPVp)wZuhZI(oJ
z^K>QCufa39d%J&MTCd{H<E+|q{lk^>427rG9gp^Kx+5}2&hOlsoEcWj>atH;DOc3k
zRDTqFZt}(@=tWf5k)VH^t_t>fb;~7gnYAbybIlLmvwsp_ at O_D9M}JR~qcNWj>TEMt
xIyz%+<n3iUJU7aoJUwBq^xv}NMGCJk?cBX}dakpX0$bnL<5ypQ{lSPmAOW4Vm!bdw
literal 0
HcmV?d00001
diff --git a/tests/data/test2.mozzip b/tests/data/test2.mozzip
new file mode 100644
index 0000000000000000000000000000000000000000..b0b88fb85ea11b17fd9e90a3a46cac4149d1ccd3
GIT binary patch
literal 552
zcmZQzU|<OFW at Hj!7GYpw-~jRzZ2~ek#m!-4V3^Ou02ToXg29r;03e-`S)^Z*T2T_<
z&B_K6W&}brAk6~e0L@}%!EF*uukWO at c_M}aE${2Pg7g};ND3A`7E{&M^C)N#esm%G
zdrs7+^GDvs>E`=ai!ZwKX5;ERU$5FIYzvAydaKGYdSdi*rkhu|tb#9{SFEZ1tZTZT
zaVn#hO_|>MQ-vQk at Of?ObX)0OAZ8!pr^nEpnaSCF`dZZlDOJx+Rtd~A`SSSX!n55L
zCaVTa6JN|~&%VQ}FkfV5_vHMQZ$2a~T4*MFSLnwX+fxe^<&G9D6S{phQY19wnYXz2
z%|Avn*6pm_;muH=rStHluA`a6rL*cg)26;s*mmxGjBq*sJ9$6d+t1E#?0 at Wea8-|n
z<$v`jjXG<}M5I;}E96|MO|CuoPo`2jM|iPuhM|!Ae(~JNF3DOpN3YFRSvUESimIv0
zY3Z|f?p at Xj<mjCG+eTOSykQ2D(D8-;cSJ=x?Wnwbzjt~;nfcK(MW5=f-Q!AY?ei<#
zey29(o&DW=_s-9M7^w99o8QMH5p(|?G?9s#yKnObr=Oc@{;y54G5%)ya!PIHS at 96z
tZ|Wy^H3u)T*%7XPqVey+_3h4^KU)7OxLW=?I=C)$AD6w(Lw)S=3IK1S*~tI^
literal 0
HcmV?d00001
--
2.6.1
More information about the Reproducible-builds
mailing list