[Reproducible-builds] Bug#808002: Bug#808002: diffoscope: Add support for Mozilla-optimized ZIPs

Mike Hommey mh at glandium.org
Wed Dec 16 07:40:14 UTC 2015


On Tue, Dec 15, 2015 at 04:31:46PM +0100, Jérémy Bobbio wrote:
> Mike Hommey:
> > It would be useful for diffoscope to output differences in omni.ja files as
> > for other Zip files, instead of ending up with a diff of an hexdump.
> > 
> > The attached patch implements a minimal support for this. It however doesn't
> > look at the difference in the `preload` value.
> 
> Great! I think it's fine to just skip the preload value. It will show up
> in the fallback binary comparison if that's the only remaining difference.
> 
> I was going to merge this, but actually I have to ask: would you be kind
> enough to amend the test suite as well?

Attached.

Mike
-------------- next part --------------
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index ed24f63..d308b6e 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -72,7 +72,7 @@ from diffoscope.comparators.symlink import Symlink
 from diffoscope.comparators.text import TextFile
 from diffoscope.comparators.tar import TarFile
 from diffoscope.comparators.xz import XzFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
 
 
 def bail_if_non_existing(*paths):
@@ -153,6 +153,7 @@ FILE_CLASSES = (
     TarFile,
     XzFile,
     ZipFile,
+    MozillaZipFile,
     ImageFile,
     CbfsFile,
     )
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index ecdc77b..42c9a9f 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -111,3 +111,54 @@ class ZipFile(File):
         zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
                              Difference.from_command(ZipinfoVerbose, self.path, other.path)
         return [zipinfo_difference]
+
+
+class MozillaZipCommandMixin(object):
+    def wait(self):
+        # zipinfo emits an error when reading Mozilla-optimized ZIPs,
+        # which is fine to ignore.
+        super(Zipinfo, self).wait()
+        return 0
+
+
+class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo): pass
+
+
+class MozillaZipinfoVerbose(MozillaZipCommandMixin, ZipinfoVerbose): pass
+
+
+class MozillaZipContainer(ZipContainer):
+    def open_archive(self):
+        # This is gross: Monkeypatch zipfile._EndRecData to work with
+        # Mozilla-optimized ZIPs
+        _orig_EndRecData = zipfile._EndRecData
+        def _EndRecData(fh):
+            endrec = _orig_EndRecData(fh)
+            if endrec:
+                endrec[zipfile._ECD_LOCATION] = (endrec[zipfile._ECD_OFFSET] +
+                                                 endrec[zipfile._ECD_SIZE])
+            return endrec
+        zipfile._EndRecData = _EndRecData
+        result = super(MozillaZipContainer, self).open_archive()
+        zipfile._EndRecData = _orig_EndRecData
+        return result
+
+
+class MozillaZipFile(File):
+    CONTAINER_CLASS = MozillaZipContainer
+
+    @staticmethod
+    def recognizes(file):
+        # Mozilla-optimized ZIPs start with a 32-bit little endian integer
+        # indicating the amount of data to preload, followed by the ZIP
+        # central directory (with a PK\x01\x02 signature)
+        with open(file.path, 'rb') as f:
+            preload = f.read(4)
+            if len(preload) == 4:
+                signature = f.read(4)
+                return signature == b'PK\x01\x02'
+
+    def compare_details(self, other, source=None):
+        zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \
+                             Difference.from_command(MozillaZipinfoVerbose, self.path, other.path)
+        return [zipinfo_difference]
diff --git a/tests/comparators/test_zip.py b/tests/comparators/test_zip.py
index d921b79..57255bc 100644
--- a/tests/comparators/test_zip.py
+++ b/tests/comparators/test_zip.py
@@ -21,7 +21,7 @@ import os.path
 import pytest
 from diffoscope.comparators import specialize
 from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
 from diffoscope.config import Config
 from conftest import tool_missing
 
@@ -65,3 +65,46 @@ def test_compare_non_existing(monkeypatch, zip1):
     difference = zip1.compare(NonExistingFile('/nonexisting', zip1))
     assert difference.source2 == '/nonexisting'
     assert difference.details[-1].source2 == '/dev/null'
+
+TEST_MOZZIP1_PATH = os.path.join(os.path.dirname(__file__), '../data/test1.mozzip')
+TEST_MOZZIP2_PATH = os.path.join(os.path.dirname(__file__), '../data/test2.mozzip')
+
+ at pytest.fixture
+def mozzip1():
+    return specialize(FilesystemFile(TEST_MOZZIP1_PATH))
+
+ at pytest.fixture
+def mozzip2():
+    return specialize(FilesystemFile(TEST_MOZZIP2_PATH))
+
+def test_mozzip_identification(mozzip1):
+    assert isinstance(mozzip1, MozillaZipFile)
+
+def test_mozzip_no_differences(mozzip1):
+    difference = mozzip1.compare(mozzip1)
+    assert difference is None
+
+ at pytest.fixture
+def mozzip_differences(mozzip1, mozzip2):
+    return mozzip1.compare(mozzip2).details
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_metadata(mozzip_differences):
+    expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/mozzip_zipinfo_expected_diff')).read()
+    diff = mozzip_differences[0].unified_diff
+    assert (diff.replace(TEST_MOZZIP1_PATH, 'test1.mozzip')
+                .replace(TEST_MOZZIP2_PATH, 'test2.mozzip')) == expected_diff
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_compressed_files(mozzip_differences):
+    assert mozzip_differences[1].source1 == 'dir/text'
+    assert mozzip_differences[1].source2 == 'dir/text'
+    expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/text_ascii_expected_diff')).read()
+    assert mozzip_differences[1].unified_diff == expected_diff
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_compare_non_existing(monkeypatch, mozzip1):
+    monkeypatch.setattr(Config.general, 'new_file', True)
+    difference = mozzip1.compare(NonExistingFile('/nonexisting', mozzip1))
+    assert difference.source2 == '/nonexisting'
+    assert difference.details[-1].source2 == '/dev/null'
diff --git a/tests/data/mozzip_zipinfo_expected_diff b/tests/data/mozzip_zipinfo_expected_diff
new file mode 100644
index 0000000..9cc8134
--- /dev/null
+++ b/tests/data/mozzip_zipinfo_expected_diff
@@ -0,0 +1,15 @@
+@@ -1,8 +1,8 @@
+-Zip file size: 409 bytes, number of entries: 1
+-warning [test1.mozzip]:  329 extra bytes at beginning or within zipfile
++Zip file size: 552 bytes, number of entries: 1
++warning [test2.mozzip]:  472 extra bytes at beginning or within zipfile
+   (attempting to process anyway)
+-error [test1.mozzip]:  reported length of central directory is
+-  -329 bytes too long (Atari STZip zipfile?  J.H.Holm ZIPSPLIT 1.1
++error [test2.mozzip]:  reported length of central directory is
++  -472 bytes too long (Atari STZip zipfile?  J.H.Holm ZIPSPLIT 1.1
+   zipfile?).  Compensating...
+--rw-r--r--  2.0 unx      446 b- defX 10-Jan-01 00:00 dir/text
+-1 file, 446 bytes uncompressed, 269 bytes compressed:  39.7%
++-rw-r--r--  2.0 unx      671 b- defX 10-Jan-01 00:00 dir/text
++1 file, 671 bytes uncompressed, 412 bytes compressed:  38.6%
diff --git a/tests/data/test1.mozzip b/tests/data/test1.mozzip
new file mode 100644
index 0000000000000000000000000000000000000000..d43bb74faacf04cc23be9de208935bc54bad0a49
GIT binary patch
literal 409
zcmZQzU|<OFW at Hj!7GYpw-~jRzZC<E7b>?McVAu!5AQ2c?(ii|_reqfBm!wvd1bDNu
zfn*qg&<sekfH*+2m|1X}1k-Ce;cTI^AxF#m^<BCL9&1gCUZ>gMapR`s-$<tZ)BLBr
z^PCQB`d#~FrKrql?IQi!q>BonZ=0^%zLb(>r#Q1&d*j`OtrufvB{O=(Z%v<Z?c(!;
z(`L-vD|sj2iT0C)WnS8=?#y=JlGh8bk=nAc%}ne2W67mgxaH23ZrPVp)wZuhZI(oJ
z^K>QCufa39d%J&MTCd{H<E+|q{lk^>427rG9gp^Kx+5}2&hOlsoEcWj>atH;DOc3k
zRDTqFZt}(@=tWf5k)VH^t_t>fb;~7gnYAbybIlLmvwsp_ at O_D9M}JR~qcNWj>TEMt
xIyz%+<n3iUJU7aoJUwBq^xv}NMGCJk?cBX}dakpX0$bnL<5ypQ{lSPmAOW4Vm!bdw

literal 0
HcmV?d00001

diff --git a/tests/data/test2.mozzip b/tests/data/test2.mozzip
new file mode 100644
index 0000000000000000000000000000000000000000..b0b88fb85ea11b17fd9e90a3a46cac4149d1ccd3
GIT binary patch
literal 552
zcmZQzU|<OFW at Hj!7GYpw-~jRzZ2~ek#m!-4V3^Ou02ToXg29r;03e-`S)^Z*T2T_<
z&B_K6W&}brAk6~e0L@}%!EF*uukWO at c_M}aE${2Pg7g};ND3A`7E{&M^C)N#esm%G
zdrs7+^GDvs>E`=ai!ZwKX5;ERU$5FIYzvAydaKGYdSdi*rkhu|tb#9{SFEZ1tZTZT
zaVn#hO_|>MQ-vQk at Of?ObX)0OAZ8!pr^nEpnaSCF`dZZlDOJx+Rtd~A`SSSX!n55L
zCaVTa6JN|~&%VQ}FkfV5_vHMQZ$2a~T4*MFSLnwX+fxe^<&G9D6S{phQY19wnYXz2
z%|Avn*6pm_;muH=rStHluA`a6rL*cg)26;s*mmxGjBq*sJ9$6d+t1E#?0 at Wea8-|n
z<$v`jjXG<}M5I;}E96|MO|CuoPo`2jM|iPuhM|!Ae(~JNF3DOpN3YFRSvUESimIv0
zY3Z|f?p at Xj<mjCG+eTOSykQ2D(D8-;cSJ=x?Wnwbzjt~;nfcK(MW5=f-Q!AY?ei<#
zey29(o&DW=_s-9M7^w99o8QMH5p(|?G?9s#yKnObr=Oc@{;y54G5%)ya!PIHS at 96z
tZ|Wy^H3u)T*%7XPqVey+_3h4^KU)7OxLW=?I=C)$AD6w(Lw)S=3IK1S*~tI^

literal 0
HcmV?d00001

-- 
2.6.1



More information about the Reproducible-builds mailing list