Bug#1009196: texlive-binaries: Reproducible content of .fmt files

Roland Clobus rclobus at rclobus.nl
Fri Apr 8 17:57:56 BST 2022


Package: texlive-binaries
Version: 2021.20210626.59705-1
Severity: wishlist
Tags: patch
User: reproducible-builds at lists.alioth.debian.org
Usertags: randomness
X-Debbugs-Cc: reproducible-bugs at lists.alioth.debian.org

Hello maintainers of texlive-binaries,

While working on the “reproducible builds” effort [1], I have noticed that the
live image for Cinnamon in bookworm is no longer reproducible [2].

The attached patch ensures that the output of the function 'exception_strings'
always uses the same order of the hyphenation exceptions.
I've written the solution in C, perhaps someone more versed in lua could
rewrite it more elegantly.
(The lua manual says for the 'next' function: 'The order in which the indices
are enumerated is not specified' [3])

With the attached patch applied, I'm able (with the help of FORCE_SOURCE_DATE=1
and SOURCE_DATE_EPOCH) to reproducibly rebuild the .fmt files, as created by
'fmtutil --sys --all'.

Small test case to reproduce:
export FORCE_SOURCE_DATE=1
export SOURCE_DATE_EPOCH=$(date +%s)
for i in `seq 1 10`; do luahbtex -ini -jobname=luahbtex -progname=luabhtex
luatex.ini > /dev/null; md5sum luahbtex.*; done

With kind regards,
Roland Clobus

 [1]: https://wiki.debian.org/ReproducibleBuilds
 [2]:
https://jenkins.debian.net/view/live/job/reproducible_debian_live_build_cinnamon_bookworm/
 [3]: http://www.lua.org/manual/5.4/manual.html#pdf-next


-- System Information:
Debian Release: bookworm/sid
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'testing-debug'), (50, 'unstable')
Architecture: amd64 (x86_64)
Foreign Architectures: i386

Kernel: Linux 5.16.0-5-amd64 (SMP w/8 CPU threads; PREEMPT)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE=en_GB:en
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages texlive-binaries depends on:
ii  dpkg            1.21.7
ii  install-info    6.8-4+b1
ii  libc6           2.33-7
ii  libcairo2       1.16.0-5
ii  libfontconfig1  2.13.1-4.4
ii  libfreetype6    2.11.1+dfsg-1
ii  libgcc-s1       12-20220319-1
ii  libgraphite2-3  1.3.14-1
ii  libharfbuzz0b   2.7.4-1
ii  libicu67        67.1-7
ii  libkpathsea6    2021.20210626.59705-1
ii  libmpfr6        4.1.0-3
ii  libpaper1       1.1.28+b1
ii  libpixman-1-0   0.40.0-1
ii  libpng16-16     1.6.37-3
ii  libptexenc1     2021.20210626.59705-1
ii  libstdc++6      12-20220319-1
ii  libsynctex2     2021.20210626.59705-1
ii  libteckit0      2.5.11+ds1-1
ii  libtexlua53     2021.20210626.59705-1
ii  libtexluajit2   2021.20210626.59705-1
ii  libx11-6        2:1.7.2-2+b1
ii  libxaw7         2:1.0.14-1
ii  libxi6          2:1.8-1
ii  libxmu6         2:1.1.3-3
ii  libxpm4         1:3.5.12-1
ii  libxt6          1:1.2.1-1
ii  libzzip-0-13    0.13.72+dfsg.1-1.1
ii  perl            5.34.0-3
ii  t1utils         1.41-4
ii  tex-common      6.17
ii  zlib1g          1:1.2.11.dfsg-4

Versions of packages texlive-binaries recommends:
ii  dvisvgm       2.13.3-1
ii  texlive-base  2021.20220204-1

texlive-binaries suggests no packages.

-- no debconf information
-------------- next part --------------
diff --git a/texk/web2c/luatexdir/lang/texlang.c b/texk/web2c/luatexdir/lang/texlang.c
index ba7614ff..ccc0ec90 100644
--- a/texk/web2c/luatexdir/lang/texlang.c
+++ b/texk/web2c/luatexdir/lang/texlang.c
@@ -498,10 +498,20 @@ static char *hyphenation_exception(int exceptions, char *w)
     return ret;
 }
 
+char *unsorted_buffer = NULL;
+size_t *indexes = NULL;
+
+static int sort_func(const void *a, const void *b) {
+    size_t ia = *(size_t*)a;
+    size_t ib = *(size_t*)b;
+    return strcmp(&unsorted_buffer[ia], &unsorted_buffer[ib]);
+}
+
 char *exception_strings(struct tex_language *lang)
 {
     const char *value;
     size_t size = 0, current = 0;
+    size_t num_bytes = 0;
     size_t l = 0;
     char *ret = NULL;
     if (lang->exceptions == 0)
@@ -509,19 +519,42 @@ char *exception_strings(struct tex_language *lang)
     lua_checkstack(Luas, 2);
     lua_rawgeti(Luas, LUA_REGISTRYINDEX, lang->exceptions);
     if (lua_istable(Luas, -1)) {
-        /*tex Iterate and join. */
+        /*tex Determine required memory. */
         lua_pushnil(Luas);
         while (lua_next(Luas, -2) != 0) {
             value = lua_tolstring(Luas, -1, &l);
-            if (current + 2 + l > size) {
-                ret = xrealloc(ret, (unsigned) ((size + size / 5) + current + l + 1024));
-                size = (size + size / 5) + current + l + 1024;
-            }
-            *(ret + current) = ' ';
-            strcpy(ret + current + 1, value);
+            num_bytes += l + 1;
+            size++;
+            lua_pop(Luas, 1);
+        }
+        unsorted_buffer = xmalloc(num_bytes);
+        indexes = xmalloc(sizeof(size_t)*size);
+
+        /*tex Fetch values. */
+        current = 0;
+        size = 0;
+        lua_pushnil(Luas);
+        while (lua_next(Luas, -2) != 0) {
+            value = lua_tolstring(Luas, -1, &l);
+            strcpy(unsorted_buffer + current, value);
+            indexes[size++] = current;
             current += l + 1;
             lua_pop(Luas, 1);
         }
+        /*tex Sort and join. */
+        qsort(indexes, size, sizeof(size_t), sort_func);
+        ret = xmalloc(num_bytes);
+        current = 0;
+        for (l = 0; l < size; l++) {
+           strcpy(ret + current, &unsorted_buffer[indexes[l]]);
+           current += strlen(&unsorted_buffer[indexes[l]]);
+           ret[current] = ' ';
+           current += 1;
+        }
+        ret[current - 1] = '\0';
+
+        free(unsorted_buffer);
+        free(indexes);
     }
     return ret;
 }


More information about the Reproducible-bugs mailing list