[Git][qa/jenkins.debian.net][master] reproduce.d.n/stats: Rework and simplify

Holger Levsen (@holger) gitlab at salsa.debian.org
Mon Jan 27 11:50:44 GMT 2025



Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net


Commits:
29312960 by Jochen Sprickerhof at 2025-01-27T12:50:34+01:00
reproduce.d.n/stats: Rework and simplify

Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -


1 changed file:

- bin/rebuilderd_stats.py


Changes:

=====================================
bin/rebuilderd_stats.py
=====================================
@@ -1,12 +1,12 @@
 #!/usr/bin/python3
 
-from collections import defaultdict
 from datetime import datetime
-from re import search, sub, DOTALL
+from os import path
+from re import DOTALL, search, sub
 from sqlite3 import connect
 from sys import argv
+
 from psutil._common import bytes2human
-from os import path
 
 
 def main() -> None:
@@ -16,9 +16,6 @@ def main() -> None:
     cx.create_function("regexp", 2, lambda x, y: 1 if search(x, y) else 0)
     cu = cx.cursor()
 
-    log = "CAST(b.build_log AS TEXT)"
-    r_packages = "p.name like 'r-cran-%' or p.name like 'r-bioc-%' or p.name like 'r-other-%'"
-
     def log_has(s):
         return lambda log, _: s in log
 
@@ -75,34 +72,22 @@ def main() -> None:
         "failed to reproduce: 1-second offset (#1089088)": diff_has_re(r" -([drwx-]{10} [0-9a-z() ]{,60} [0-9]+ [0-9-]{10} [012][0-9]:).* \+\1"),
         "failed to reproduce": lambda log, diff: diff,
         "size limit (not fatal)": log_has("TRUNCATED DUE TO SIZE LIMIT: "),
+        "other errors": lambda *_: True,
     }
 
-    messages_packages = {k: [] for k in error_messages.keys()} # Preserve keys order
+    messages_packages = {k: [] for k in error_messages.keys()}  # Preserve keys order
+    total = 0
     for row in cu.execute(
-        "SELECT p.name, CAST(b.build_log AS TEXT), SUBSTR(b.diffoscope, 1, 100000) FROM packages p LEFT JOIN builds b"
-        " ON b.id = p.build_id WHERE p.status = 'BAD'"
+        "SELECT p.name, s.name, p.build_id, p.has_diffoscope, CAST(b.build_log AS TEXT), SUBSTR(b.diffoscope, 1, 100000)"
+        " FROM packages p JOIN pkgbases s ON s.id = p.pkgbase_id LEFT JOIN builds b ON b.id = p.build_id"
+        " WHERE p.status ='BAD' ORDER BY p.build_id"
     ):
+        total += 1
         for message, error_match_fn in error_messages.items():
-            if error_match_fn(row[1], row[2]):
-                messages_packages[message].append(row[0])
+            if error_match_fn(row[4], row[5]):
+                messages_packages[message].append({"name": row[0], "src": row[1], "id": row[2], "diff": bool(row[3])})
                 break
-    messages_packages = {k: v for k, v in messages_packages.items() if v} # Remove empty categories
-
-    package_logs = dict()
-    bad_packages = set()
-    has_diffoscope = set()
-    src_package = dict()
-    for row in cu.execute("SELECT p.name, s.name, p.build_id, p.has_diffoscope FROM packages p JOIN pkgbases s ON s.id = p.pkgbase_id WHERE p.status ='BAD'"):
-        bad_packages.add(row[0])
-        package_logs[row[0]] = int(row[2])
-        src_package[row[0]] = row[1]
-        if row[3]:
-            has_diffoscope.add(row[0])
-    found_packages = {pkg for lst in messages_packages.values() for pkg in lst}
-
-    other_errors = list(bad_packages.difference(found_packages))
-    if other_errors:
-        messages_packages["other errors"] = other_errors
+    messages_packages = {k: v for k, v in messages_packages.items() if v}  # Remove empty categories
 
     print(
         '<!DOCTYPE html><html lang="en"><head>'
@@ -110,7 +95,7 @@ def main() -> None:
         f"<title>https://{arch}.reproduce.debian.net/ stats</title>"
         '<meta name="viewport" content="width=device-width, initial-scale=1">'
         "</head><body>"
-        f"<header><h1>https://<a href=\"/\">{arch}</a>.reproduce.debian.net/ stats</h1></header> <main>"
+        f'<header><h1>https://<a href="/">{arch}</a>.reproduce.debian.net/ stats</h1></header> <main>'
     )
     print(f"Database size: {bytes2human(path.getsize(db))}<br/>")
     print(f"Last changed: {datetime.now().replace(microsecond=0)} - updated every 3h.")
@@ -120,7 +105,7 @@ def main() -> None:
         anchor = message.replace(" ", "-")
         print(
             f'<tr><td><a href="#{anchor}">{message}</a></td>'
-            f'<td style="text-align:right">{len(packages)}</td><td style="text-align:right">({len(packages)/len(bad_packages)*100:.2f}%)</td></tr>'
+            f'<td style="text-align:right">{len(packages)}</td><td style="text-align:right">({len(packages)/total*100:.2f}%)</td></tr>'
         )
     print("</table>")
 
@@ -129,16 +114,15 @@ def main() -> None:
         message = sub(r'(#)([0-9]*)', r'https://bugs.debian.org/\2', message)
         message = sub(r'(https://[a-z.A-Z_0-9/]*)', r'<a href="\1">\1</a>', message)
         print(f'\n<h2 id="{anchor}">{message}</h2>')
-        packages = sorted(packages, key=lambda pkg: package_logs.get(pkg, 0))
         for pkg in packages:
-            print(f'<a href="https://{arch}.reproduce.debian.net/api/v0/builds/{package_logs.get(pkg, 0)}/log">{pkg}</a>', end='')
-            if pkg in has_diffoscope:
-                print(f'<a href="https://{arch}.reproduce.debian.net/api/v0/builds/{package_logs.get(pkg, 0)}/diffoscope">💠</a>', end='')
-            print(f'<a href="https://tracker.debian.org/pkg/{src_package[pkg]}">🍥</a>', end='')
-            print(f'<a href="https://tests.reproducible-builds.org/debian/rb-pkg/trixie/{arch}/{src_package[pkg]}.html">♻</a>', end='')
+            print(f'<a href="https://{arch}.reproduce.debian.net/api/v0/builds/{pkg["id"]}/log">{pkg["name"]}</a>', end='')
+            if pkg["diff"]:
+                print(f'<a href="https://{arch}.reproduce.debian.net/api/v0/builds/{pkg["id"]}/diffoscope">💠</a>', end='')
+            print(f'<a href="https://tracker.debian.org/pkg/{pkg["src"]}">🍥</a>', end='')
+            print(f'<a href="https://tests.reproducible-builds.org/debian/rb-pkg/trixie/{arch}/{pkg["src"]}.html">♻</a>', end='')
             print(" ")
 
-    print("<br/><br/><hr/><a href=\"https://salsa.debian.org/qa/jenkins.debian.net/-/blob/master/bin/rebuilderd_stats.py\">rebuilderd_stats.py<a/> - patches welcome.</main></body></html>")
+    print('<br/><br/><hr/><a href="https://salsa.debian.org/qa/jenkins.debian.net/-/blob/master/bin/rebuilderd_stats.py">rebuilderd_stats.py<a/> - patches welcome.</main></body></html>')
 
 
 if __name__ == "__main__":



View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/commit/29312960059e61440798f4de37727099380513fc

-- 
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/commit/29312960059e61440798f4de37727099380513fc
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20250127/b955ce92/attachment-0001.htm>


More information about the Qa-jenkins-scm mailing list