[Git][qa/jenkins.debian.net][master] 2 commits: rdn: switch to argparse and separate output()

Holger Levsen (@holger) gitlab at salsa.debian.org
Tue Jul 1 13:24:51 BST 2025



Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net


Commits:
52af44ce by Jochen Sprickerhof at 2025-07-01T14:24:33+02:00
rdn: switch to argparse and separate output()

(cherry picked from commit 9dc03146a0141e3f74552df2855816a687737cb8)
Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -
c8b1bac2 by Jochen Sprickerhof at 2025-07-01T14:24:39+02:00
rdn stats: add a page sorted by diffoscope size

(cherry picked from commit 5589afc2064a68c756c1c8d466b91dfb0dc0556a)
Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -


2 changed files:

- bin/rebuilderd_stats.py
- hosts/osuosl5-amd64/etc/cron.d/rebuilderd


Changes:

=====================================
bin/rebuilderd_stats.py
=====================================
@@ -7,6 +7,7 @@
 #
 # Licensed under GPL-2
 
+from argparse import ArgumentParser, FileType
 from datetime import datetime
 from hashlib import file_digest
 from os import path
@@ -23,22 +24,44 @@ from zstandard import ZstdDecompressor
 
 
 def main() -> None:
-    arch = argv[1]
-    db = argv[2]
-    cx = connect(db)
+    parser = ArgumentParser(description="Analyze rebuilderd DB failed packages")
+    parser.add_argument(
+        "arch",
+        help="Architecture",
+    )
+    parser.add_argument(
+        "db",
+        help="rebuilderd DB file",
+    )
+    parser.add_argument(
+        "--cache",
+        type=Path,
+        default=Path(),
+        help="Cache file (will be ignored when this script changes).",
+    )
+    parser.add_argument(
+        "datefile",
+        type=Path,
+        help="Output HTML sorted by build date",
+    )
+    parser.add_argument(
+        "sizefile",
+        type=Path,
+        help="Output HTML sorted by diffoscope size",
+    )
+    args = parser.parse_args()
+    cx = connect(args.db)
     cx.create_function("regexp", 2, lambda x, y: 1 if search(x, y) else 0)
     cu = cx.cursor()
 
-    cache_filename = Path(argv[3]) if len(argv) > 3 else Path()
-
     hash_key = None
     with open(argv[0], "rb") as fp:
         hash_key = file_digest(fp, "blake2b").hexdigest()
 
     cache = {}
     cache_new = {}
-    if cache_filename.is_file():
-        with open(cache_filename, "rb") as fp:
+    if args.cache.is_file():
+        with args.cache.open("rb") as fp:
             hash_key_old, cache_old = load(fp)
         if hash_key == hash_key_old:
             cache = cache_old
@@ -105,7 +128,7 @@ def main() -> None:
         "dpkg-buildpackage failed: dh-r (#1089197)": log_has_re(r"(Source|Binary): r-(cran|bioc|other)-.*dpkg-buildpackage died"),
         "dpkg-buildpackage failed": log_has("E: Build failure (dpkg-buildpackage died"),
         "failed to reproduce: diffoscope timeout (not fatal)": diff_has("TRUNCATED DUE TO TIMEOUT: 600 seconds"),
-        "failed to reproduce: dh-buildinfo (#1068809)": diff_has_any([f"buildinfo_{arch}.gz", "buildinfo_all.gz"]),
+        "failed to reproduce: dh-buildinfo (#1068809)": diff_has_any([f"buildinfo_{args.arch}.gz", "buildinfo_all.gz"]),
         one_second_string: diff_has_re(r" -([drwx-]{10} [0-9a-z() ]{,60} [0-9]+ [0-9-]{10} [012][0-9]:).* \+\1"),
         "failed to reproduce: dh-r (#1089197)": log_has_re(r"(Source|Binary): r-(cran|bioc|other)-"),
         "failed to reproduce": lambda log, diff: diff,
@@ -113,7 +136,7 @@ def main() -> None:
         "other errors": lambda *_: True,
     }
 
-    if arch == "all":
+    if args.arch == "all":
         del error_messages[one_second_string]
 
     unzstd = ZstdDecompressor()
@@ -129,25 +152,37 @@ def main() -> None:
 
         if build_id in cache:
             cache_new[build_id] = cache[build_id]
-            messages_packages[cache[build_id]].append({"name": name, "src": src_name, "id": build_id, "diff": diffoscope is not None})
+            message, diffsize = cache[build_id]
+            messages_packages[message].append({"name": name, "src": src_name, "id": build_id, "diff": diff_size})
             continue
 
         build_log = unzstd.stream_reader(build_log).read().decode("utf-8")
         if diffoscope:
             diffoscope = unzstd.stream_reader(diffoscope).read().decode("utf-8")
+        else:
+            diffoscope = ""
 
         for message, error_match_fn in error_messages.items():
             if error_match_fn(build_log, diffoscope):
-                cache_new[build_id] = message
-                messages_packages[message].append({"name": name, "src": src_name, "id": build_id, "diff": diffoscope is not None})
+                difflen = len(diffoscope)
+                cache_new[build_id] = (message, difflen)
+                messages_packages[message].append({"name": name, "src": src_name, "id": build_id, "diff": difflen})
                 break
 
     messages_packages = {k: v for k, v in messages_packages.items() if v}  # Remove empty categories
 
-    if cache_filename.name:
-        with open(cache_filename, "wb") as fb:
+    if args.cache.name:
+        with open(args.cache, "wb") as fb:
             dump((hash_key, cache_new), fb)
 
+    db_size = path.getsize(args.db)
+    with args.datefile.open("w") as outfile:
+        output(outfile, args.arch, db_size, total, messages_packages, False)
+    with args.sizefile.open("w") as outfile:
+        output(outfile, args.arch, db_size, total, messages_packages, True)
+
+
+def output(outfile, arch, db_size, total, messages_packages, size_sort):
     with urlopen("https://salsa.debian.org/reproducible-builds/reproducible-notes/-/raw/master/packages.yml?ref_type=heads") as rn:
         rn_bugs = safe_load(rn)
 
@@ -157,12 +192,13 @@ def main() -> None:
         f"<title>https://reproduce.debian.net/{arch}/stats</title>"
         '<meta name="viewport" content="width=device-width, initial-scale=1">'
         "</head><body>"
-        f'<header><h1>https://<a href="/">reproduce</a>.debian.net/<a href="/{arch}">{arch}</a>/stats</h1></header> <main>'
+        f'<header><h1>https://<a href="/">reproduce</a>.debian.net/<a href="/{arch}">{arch}</a>/stats</h1></header> <main>',
+        file=outfile
     )
-    print(f"Database size: {bytes2human(path.getsize(db))}<br/>")
-    print(f"Last changed: {datetime.now().replace(microsecond=0)} - updated every 3h.")
+    print(f"Database size: {bytes2human(db_size)}<br/>", file=outfile)
+    print(f"Last changed: {datetime.now().replace(microsecond=0)} - updated every 3h.", file=outfile)
 
-    print("<table> <tr> <th>error</th> <th colspan=2>number of affected<br/>bad binary packages</th><th>number of affected<br/>bad source packages</th> </tr>")
+    print("<table> <tr> <th>error</th> <th colspan=2>number of affected<br/>bad binary packages</th><th>number of affected<br/>bad source packages</th> </tr>", file=outfile)
     bad_packages=0
     bad_sources=0
     for message, packages in messages_packages.items():
@@ -170,18 +206,30 @@ def main() -> None:
         print(
             f'<tr><td><a href="#{anchor}">{message}</a></td>'
             f'<td style="text-align:right">{len(packages)}</td><td style="text-align:right">({len(packages)/total*100:.2f}%)</td>'
-            f'<td style="text-align:right">{len(set([pkg['src'] for pkg in packages]))}</td></tr>'
+            f'<td style="text-align:right">{len(set([pkg['src'] for pkg in packages]))}</td></tr>',
+            file=outfile
         )
         bad_packages += len(packages)
         bad_sources += len(set([pkg['src'] for pkg in packages]))
     print(
         f'<tr><td><b>total amount of unreproduced packages</b></td>'
         f'<td style="text-align:right"><b>{bad_packages}</b></td>'
-        f'<td style="text-align:right" colspan="2"><b>{bad_sources}</b></td></tr>'
+        f'<td style="text-align:right" colspan="2"><b>{bad_sources}</b></td></tr>',
+        file=outfile
     )
-    print("</table>")
+    print("</table>", file=outfile)
+
+    if size_sort:
+        print('<p>Packages are grouped by having a bug in <a href="https://salsa.debian.org/reproducible-builds/reproducible-notes">reproducible-notes</a> and sorted by diffoscope size (bigger logs later) inside the group.</p>', file=outfile)
+    else:
+        print('<p>Packages are grouped by having a bug in <a href="https://salsa.debian.org/reproducible-builds/reproducible-notes">reproducible-notes</a> and sorted by build time (newer logs later) inside the group.</p>', file=outfile)
 
-    print('<p>Packages are grouped by having a bug in <a href="https://salsa.debian.org/reproducible-builds/reproducible-notes">reproducible-notes</a> and sorted by build time (newer logs later) inside the group.</p>')
+    def sorter(pkg):
+        if pkg["src"] in rn_bugs and "bugs" in rn_bugs[pkg["src"]]:
+            return -1
+        if size_sort:
+            return pkg["diff"]
+        return 1
 
     if arch not in ['amd64', 'arm64', 'armhf', 'i386']:
         ci_arch = 'amd64'
@@ -191,31 +239,31 @@ def main() -> None:
         anchor = message.replace(" ", "-")
         message = sub(r'(#)([0-9]*)', r'https://bugs.debian.org/\2', message)
         message = sub(r'(https://[a-z.A-Z_0-9/]*)', r'<a href="\1">\1</a>', message)
-        print(f'\n<h2 id="{anchor}">{message}</h2>')
+        print(f'\n<h2 id="{anchor}">{message}</h2>', file=outfile)
         span = 0
         bug_break = False
-        print("<p><span>")
-        for pkg in sorted(packages, key=lambda pkg: pkg["src"] in rn_bugs and "bugs" in rn_bugs[pkg["src"]], reverse=True):
+        print("<p><span>", file=outfile)
+        for pkg in sorted(packages, key=sorter):
             if not bug_break and not (pkg["src"] in rn_bugs and "bugs" in rn_bugs[pkg["src"]]):
                 bug_break = True
-                print("</span></p><p><span>")
-            print(f'<a href="https://reproduce.debian.net/{arch}/api/v0/builds/{pkg["id"]}/log">{pkg["name"]}</a> ', end='')
+                print("</span></p><p><span>", file=outfile)
+            print(f'<a href="https://reproduce.debian.net/{arch}/api/v0/builds/{pkg["id"]}/log">{pkg["name"]}</a> ', end='', file=outfile)
             if pkg["diff"]:
-                print(f'<a href="https://reproduce.debian.net/{arch}/api/v0/builds/{pkg["id"]}/diffoscope">💠</a>', end='')
-            print(f'<a href="https://tracker.debian.org/pkg/{pkg["src"]}">🍥</a>', end='')
-            print(f'<a href="https://tests.reproducible-builds.org/debian/rb-pkg/trixie/{ci_arch}/{pkg["src"]}.html">🔬</a>', end='')
+                print(f'<a href="https://reproduce.debian.net/{arch}/api/v0/builds/{pkg["id"]}/diffoscope">💠</a>', end='', file=outfile)
+            print(f'<a href="https://tracker.debian.org/pkg/{pkg["src"]}">🍥</a>', end='', file=outfile)
+            print(f'<a href="https://tests.reproducible-builds.org/debian/rb-pkg/trixie/{ci_arch}/{pkg["src"]}.html">🔬</a>', end='', file=outfile)
             if pkg["src"] in rn_bugs and "bugs" in rn_bugs[pkg["src"]]:
                 for bug in rn_bugs[pkg["src"]]["bugs"]:
-                    print(f'<a style="color: #FF0000;" href="https://bugs.debian.org/cgi-bin/bugreport.cgi?bug={bug}">#</a>', end='')
-            print(" ")
+                    print(f'<a style="color: #FF0000;" href="https://bugs.debian.org/cgi-bin/bugreport.cgi?bug={bug}">#</a>', end='', file=outfile)
+            print(" ", file=outfile)
             span += 1
             if span == 256:
                 span = 0
-                print("</span><span>")
-        print("</span> ")
-        print("</p>")
+                print("</span><span>", file=outfile)
+        print("</span> ", file=outfile)
+        print("</p>", file=outfile)
 
-    print('<br/><br/><hr/><a href="https://salsa.debian.org/qa/jenkins.debian.net/-/blob/master/bin/rebuilderd_stats.py">rebuilderd_stats.py<a/> - patches welcome.</main></body></html>')
+    print('<br/><br/><hr/><a href="https://salsa.debian.org/qa/jenkins.debian.net/-/blob/master/bin/rebuilderd_stats.py">rebuilderd_stats.py<a/> - patches welcome.</main></body></html>', file=outfile)
 
 
 if __name__ == "__main__":


=====================================
hosts/osuosl5-amd64/etc/cron.d/rebuilderd
=====================================
@@ -1,4 +1,4 @@
 MAILTO=root
-42 */3 * * * rebuilderd for arch in all amd64 arm64 armel armhf i386 ppc64el riscv64 ; do mkdir -p /srv/rebuilderd/$arch/stats/ && /srv/jenkins/bin/rebuilderd_stats.py $arch /srv/rebuilderd/$arch/rebuilderd.db /srv/rebuilderd/$arch/stats/cache.pickle > /srv/rebuilderd/$arch/stats/index.html.tmp && mv /srv/rebuilderd/$arch/stats/index.html.tmp /srv/rebuilderd/$arch/stats/index.html ; done
+42 */3 * * * rebuilderd for arch in all amd64 arm64 armel armhf i386 ppc64el riscv64 ; do mkdir -p /srv/rebuilderd/$arch/stats/ && /srv/jenkins/bin/rebuilderd_stats.py $arch /srv/rebuilderd/$arch/rebuilderd.db --cache /srv/rebuilderd/$arch/stats/cache.pickle /srv/rebuilderd/$arch/stats/index.html.tmp /srv/rebuilderd/$arch/stats/index_date.html.tmp && mv /srv/rebuilderd/$arch/stats/index.html.tmp /srv/rebuilderd/$arch/stats/index.html && mv /srv/rebuilderd/$arch/stats/index_date.html.tmp /srv/rebuilderd/$arch/stats/index_date.html ; done
 23 0,6,12,18 * * * rebuilderd for arch in all amd64 arm64 armel armhf i386 ppc64el riscv64 ; do /srv/jenkins/bin/rebuilderd_graph.sh $arch ; done
 



View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/compare/ec7825b9819c4d839c9a69ca549cd9c439bdb8f1...c8b1bac248ab940328e7a45e85fd0d38bd99f81c

-- 
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/compare/ec7825b9819c4d839c9a69ca549cd9c439bdb8f1...c8b1bac248ab940328e7a45e85fd0d38bd99f81c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20250701/7e3b1b0e/attachment-0001.htm>


More information about the Qa-jenkins-scm mailing list