[Git][haskell-team/tools][master] Add script to help with transitions

Ilias Tsitsimpis (@iliastsi) gitlab at salsa.debian.org
Sun Oct 6 17:03:57 BST 2024



Ilias Tsitsimpis pushed to branch master at Debian Haskell Group / tools


Commits:
457f634c by Ilias Tsitsimpis at 2024-10-06T19:02:00+03:00
Add script to help with transitions

Add a script that examines the current state of all Haskell packages in
the Debian archive and suggests actions that will help resolve
issues/update packages needed to complete a transition.

- - - - -


1 changed file:

- + transition.py


Changes:

=====================================
transition.py
=====================================
@@ -0,0 +1,651 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2024 Ilias Tsitsimpis <iliastsi at debian.org>
+
+"""Suggest actions to complete a Haskell transition.
+
+This program examines the current state of all Haskell packages in the Debian
+archive and suggests actions that will help resolve issues/update packages
+needed to complete a transition.
+"""
+
+import os
+import re
+import git
+import sys
+import lzma
+import yaml
+import apt_pkg
+import logging
+import argparse
+import requests
+from subprocess import check_output
+try:
+    from functools import cached_property
+except Exception:
+    from cached_property import cached_property
+from debian.deb822 import Sources, Packages
+
+log = logging.getLogger(__name__)
+
+
+DEBIAN_MIRROR = "http://deb.debian.org/debian/"
+DEBIAN_SRC_URL = DEBIAN_MIRROR + "dists/%s/main/source/Sources.xz"
+DEBIAN_PKG_URL = DEBIAN_MIRROR + "dists/%s/main/binary-%s/Packages.xz"
+
+DHG_SOURCES_EXTRA = ["haskell-devscripts",
+                     "haskell-diagrams",
+                     "haskell-platform",
+                     ]
+# Ignore packages that depend on GHC but are not Haskell libraries.
+DHG_SOURCES_IGNORE = ["diffoscope",
+                      "hkl",
+                      "haskell-devscripts",
+                      "haskell-mode",
+                      "haskell-platform",
+                      "nginx-confgen",
+                      "dh-runit",
+                      "pkg-haskell-tools",
+                      "pandoc"
+                      ]
+# Ignore these binary packages. This may be because:
+# - They provide modules that are now available in GHC.
+# - They are causing other problems, and not managed by us.
+DHG_BINARIES_IGNORE = ["ganeti-htools",
+                       "libghc-exceptions-dev",
+                       "libghc-exceptions-doc",
+                       "libghc-exceptions-prof",
+                       ]
+
+# Parse these packages (e.g., retrieve their dependencies etc)
+# but don't perform any actions on them.
+DHG_SOURCES_NO_ACTION = ["ghc",
+                         "ganeti",
+                         ]
+
+# These binary packages don't have a proper Provides/Ghc-Package field
+DHG_BINARIES_PROVIDES_IGNORE = [
+    "libghc-diagrams-dev",
+    "libghc-diagrams-prof",
+    "haskell-platform-prof"
+]
+
+
+class DHGPackages(object):
+    def __init__(self, srcf, pkgf, excf, repos):
+        apt_pkg.init_system()
+
+        self.sources = {}
+        self.binaries = {}
+        self.excuses = {}
+        self.pplan = {}
+        self.haddock_interface = None
+
+        self.srcf = srcf
+        self.pkgf = pkgf
+        self.excf = excf
+        self.planrepo = os.path.join(os.path.expanduser(repos), "package-plan")
+        self.dhgrepo = git.Repo(os.path.join(os.path.expanduser(repos),
+                                             "DHG_packages"))
+
+    def parse(self):
+        # Parse Sources
+        log.info("Parsing Sources file...")
+        with open(self.srcf, "r") as f:
+            for s in Sources.iter_paragraphs(f):
+                self.add_source(s)
+
+        # Parse Binaries
+        log.info("Parsing Packages file...")
+        with open(self.pkgf, "r") as f:
+            for p in Packages.iter_paragraphs(f):
+                self.add_binary(p)
+
+        # Compute current haddock interface
+        self.haddock_interface = [p for p in self.binaries["ghc"].provides
+                                  if p.startswith("haddock-interface-")]
+        assert len(self.haddock_interface) == 1
+        self.haddock_interface = self.haddock_interface[0]
+
+        # Parse package-plan
+        log.info("Parsing package-plan file...")
+        with open(os.path.join(self.planrepo, "packages.txt"), "r") as f:
+            for p in f:
+                p = p.partition("#")[0].split()
+                if (not p) or ("avoid" in p):
+                    continue
+                ignore = ("ignore" in p) or ("obsolete" in p)
+                name = p[0]
+                if name in self.pplan:
+                    self.pplan[name]["version"] = p[1]
+                    self.pplan[name]["ignore"] = ignore
+                    continue
+                bname = None
+                for opt in p[2:]:
+                    if not opt.startswith("binary"):
+                        continue
+                    bname = name
+                    if "=" in opt:
+                        bname = opt.split("=")[1]
+                    break
+                if bname in self.binaries:
+                    self.pplan[name] = {
+                        "version": p[1],
+                        "ignore": ignore,
+                        "debian_version": self.binaries[bname].source.version,
+                    }
+                    self.binaries[bname].source.pplan = self.pplan[name]
+                    continue
+                log.warning("Haskell package '%s' is missing from Debian",
+                            name)
+
+        # Parse britney excuses
+        log.info("Parsing britney excuses...")
+        with open(self.excf, "r") as f:
+            excuses = yaml.safe_load(f)
+        for src in self.sources.keys():
+            self.excuses[src] = {"reason": [], "excuses": []}
+        for src in excuses["sources"]:
+            if src["source"] in self.sources:
+                exc = self.excuses.get(src["source"])
+                exc["excuses"].extend(src["excuses"])
+                exc["reason"].extend(src["reason"])
+                self.excuses[src["source"]] = exc
+
+        # Add ghc to pplan
+        self.pplan["ghc"] = {"version": self.sources["ghc"].version,
+                             "debian_version": self.sources["ghc"].version,
+                             "ignore": False}
+        self.sources["ghc"].pplan = self.pplan["ghc"]
+
+    def add_source(self, srcdict):
+        # Register to DHGPackages
+        if srcdict["Package"] in DHG_SOURCES_IGNORE:
+            # Ignore non-haskell packages
+            return
+        elif "ghc" in srcdict.get("Build-Depends", []):
+            pass
+        elif srcdict["Package"] in DHG_SOURCES_EXTRA:
+            pass
+        else:
+            return
+
+        source = DHGSource(self, srcdict)
+
+        if source.name not in self.sources:
+            self.sources[source.name] = source
+        else:
+            log.debug("Double entry for '%s' source package", source.name)
+            old_source = self.sources[source.name]
+            if apt_pkg.version_compare(source.version, old_source.version) > 0:
+                self.sources[source.name] = source
+
+    def add_binary(self, bindict):
+        # Register to DHGPackages
+        srcname = bindict.get("Source", bindict["Package"]).split()[0]
+        if srcname in DHG_SOURCES_IGNORE:
+            return
+        if bindict["Package"] in DHG_BINARIES_IGNORE:
+            # These packages are also provided by others
+            return
+        elif "ghc" in bindict["Package"]:
+            pass
+        elif "ghc" in bindict.get("Depends", []):
+            pass
+        elif srcname in self.sources:
+            pass
+        else:
+            return
+
+        binary = DHGBinary(self, bindict)
+
+        assert binary.name not in self.binaries, binary.name
+        self.binaries[binary.name] = binary
+        for p in binary.provides:
+            assert p not in self.binaries, p
+            self.binaries[p] = binary
+
+    def actions(self, args):
+        log.info("Evaluating available actions...")
+        res = [s.action(args) for s in self.sources.values()]
+        if not any(res):
+            for s in self.sources.values():
+                if s.error:
+                    log.error(s.error)
+
+    @cached_property
+    def ignored_bugs(self):
+        # Exclude ftbfs-source-after-build bugs
+        cmd = ["bts", "select",
+               "maintainer:pkg-haskell-maintainers at lists.alioth.debian.org",
+               "user:lucas at debian.org", "tag:ftbfs-source-after-build"]
+        bugs = set(check_output(cmd, text=True).split())
+        cmd = ["bts", "select",
+               "maintainer:pkg-haskell-maintainers at lists.alioth.debian.org",
+               "user:lucas at debian.org", "tag:ftbfs-binary-after-build"]
+        bugs.update(set(check_output(cmd, text=True).split()))
+        return bugs
+
+
+class DHGSource(object):
+    def __init__(self, packages, srcdict):
+        self.packages = packages
+        self.name = srcdict["Package"]
+        self.version = srcdict["Version"]
+        self.vcs_git = srcdict.get("Vcs-Git")
+        self._binaries = [b.strip() for b in srcdict["Binary"].split(",")]
+        self._builddeps = []
+        if "Build-Depends" in srcdict:
+            self._builddeps.extend(
+                [b.strip().split()[0].strip()
+                 for b in srcdict["Build-Depends"].split(",")
+                 if "ghc" in b and b != "ghc:native"])
+        if "Build-Depends-Indep" in srcdict:
+            self._builddeps.extend(
+                [b.strip().split()[0].strip()
+                 for b in srcdict["Build-Depends-Indep"].split(",")
+                 if "ghc" in b])
+        self._missing_builddeps = []
+        self.error = None
+        self.pplan = None
+
+    def __eq__(self, other):
+        return (self.name == other.name and self.version == other.version)
+
+    def __hash__(self):
+        return hash("%s-%s" % (self.name, self.version))
+
+    @cached_property
+    def binaries(self):
+        bs = []
+        for b in self._binaries:
+            if b in self.packages.binaries:
+                bs.append(self.packages.binaries[b])
+            else:
+                log.warning("Missing binary package '%s' for source package"
+                            " '%s'", b, self)
+        return bs
+
+    @cached_property
+    def builddeps(self):
+        bds = []
+        for b in self._builddeps:
+            if b in self.packages.binaries:
+                bds.append(self.packages.binaries[b])
+            else:
+                log.warning("Missing build-dependency '%s' for source"
+                            " package '%s'", b, self)
+                self._missing_builddeps.append(b)
+        return bds
+
+    def __str__(self):
+        return "%s_%s" % (self.name, self.version)
+
+    @cached_property
+    def missing_builddeps(self):
+        _ = self.builddeps  # noqa: F841
+        return self._missing_builddeps
+
+    @cached_property
+    def uninstallable_builddeps(self):
+        ubds = []
+        for bd in self.builddeps:
+            if not bd.is_installable:
+                ubds.append(bd)
+        return ubds
+
+    @property
+    def is_buildable(self):
+        """Whether the package is buildable.
+
+        Return False if the build dependencies of the package are missing.
+        """
+        for ub in self.uninstallable_builddeps:
+            if ub.is_ignored and not self.is_old:
+                self.error = ("Source package '%s' depends on ignored"
+                              " package '%s'" % (self, ub.source))
+                log.warning(self.error)
+                return False
+        return not (self.uninstallable_builddeps or self.missing_builddeps)
+
+    def has_cyclic_dependencies(self):
+        """Whether the package has cyclic dependencies.
+
+        Return True if the package build-depends on itself.
+        """
+        binaries = set(b.name for b in self.binaries)
+        bd = set([b for b in self.builddeps
+                  if b.name not in ["ghc", "ghc-prof", "ghc-doc"]])
+        while bd:
+            if not binaries.isdisjoint(set(b.name for b in bd)):
+                return True
+            sp = [b.source for b in bd]
+            bd = set([b for s in sp for b in s.builddeps
+                      if b.name not in ["ghc", "ghc-prof", "ghc-doc"]])
+        return False
+
+    @property
+    def pplan_version(self):
+        assert self.pplan, self
+        return self.pplan["version"]
+
+    @cached_property
+    def is_old(self):
+        # Remove epoch, if any
+        version = self.version.split(":", 1)[-1]
+        return (apt_pkg.version_compare(self.pplan_version, version) > 0)
+
+    @cached_property
+    def is_dhg_package(self):
+        # Package is maintained inside the DHG_packages repo
+        return (self.vcs_git and "DHG_packages" in self.vcs_git)
+
+    def non_dhg_package(self, args):
+        # Do nothing if this package is not part of DHG_packages repo
+        if args.ignore_non_dhg_packages:
+            if not self.is_dhg_package:
+                self.error = ("Ignoring source package '%s' which is not"
+                              " part of the DHG_packages repo" % self)
+                log.debug(self.error)
+                return True
+        return False
+
+    def get_bugs(self):
+        cmd = ["bts", "select", "source:%s" % self.name]
+        bugs = check_output(cmd, text=True)
+        bugs = set(bugs.split())
+        return bugs - self.packages.ignored_bugs
+
+    def can_upload(self):
+        if self.pplan is None:
+            self.error = "Source package '%s' not on our plan" % self
+            log.debug(self.error)
+            return False
+        assert "version" in self.pplan, self
+        if self.pplan["ignore"]:
+            self.error = "Package '%s' is marked as ignored" % self
+            log.debug(self.error)
+            return False
+        if self.has_cyclic_dependencies():
+            self.error = "Package '%s' has cyclic dependencies" % self
+            log.warning(self.error)
+            return False
+        if not self.is_buildable:
+            self.error = "Package '%s' is unbuildable" % self
+            log.debug(self.error)
+            return False
+        # See if a newer version has been tagged on our repo
+        tname = self.name + "_v"
+        for t in self.packages.dhgrepo.git.tag(l="%s*" % tname).split():  # noqa
+            tver = t[len(tname):]
+            if tver.startswith("darcs"):
+                # errata
+                continue
+            # Handle epoch
+            if ':' in self.version:
+                tver = tver.replace("_", ":", 1)
+            if apt_pkg.version_compare(tver, self.version) > 0:
+                self.error = ("A newer version for source package '%s'"
+                              " exists on repo but not on archive" % self)
+                log.warning(self.error)
+                return False
+        return True
+
+    def stale_haddock_interface(self):
+        for p in self.binaries:
+            for d in p._deps:
+                if d.startswith("haddock-interface-"):
+                    if d != self.packages.haddock_interface:
+                        return True
+                    break
+        return False
+
+    def stale_binaries(self):
+        """Package has been uploaded, but not built yet."""
+        for b in self.binaries:
+            binver = b.version
+            s = re.search(r"\+b\d+$", binver)
+            if s:
+                binver = binver[:s.start()]
+            if binver != self.version:
+                self.error = "Stale binaries for source package '%s'" % self
+                log.debug(self.error)
+                return True
+        return False
+
+    def depends_on_old(self):
+        """Package has dependencies with newer versions available."""
+        for bd in self.builddeps:
+            if bd.source.is_old:
+                log.debug("Package '%s' depends on '%s', which needs update",
+                          self, bd.source)
+                return True
+        return False
+
+    def post_new_upload(self):
+        """Package needs a post-NEW sourceful upload."""
+        excuses = "".join(self.packages.excuses[self.name]["excuses"])
+        return ("a new source-only upload is needed" in excuses)
+
+    def missing_build(self):
+        """Package is missing a build in one of the core architectures."""
+        excuses = self.packages.excuses[self.name]
+        return ("missingbuild" in excuses["reason"])
+
+    def print_action(self, action):
+        """Print an action for the current package."""
+        bugs = self.get_bugs()
+        if bugs:
+            bugs = " (Open bugs: %s)" % ", ".join(bugs)
+        else:
+            bugs = ""
+
+        print("Action for '%s': %s%s" % (self, action, bugs))
+
+    def action(self, args):
+        if self.name in DHG_SOURCES_NO_ACTION:
+            # Nothing to do for these packages
+            return False
+
+        log.debug("Examining package '%s'...", self)
+
+        if self.non_dhg_package(args):
+            return False
+
+        if not self.can_upload():
+            return False
+
+        if self.stale_binaries():
+            if self.missing_build():
+                self.print_action("Missing build")
+                return True
+            return False
+
+        if self.depends_on_old():
+            return False
+
+        if self.is_old:
+            self.print_action("Update to '%s'" % self.pplan_version)
+            return True
+
+        if self.post_new_upload():
+            self.print_action("Sourceful upload (post-NEW)")
+            return True
+
+        if self.stale_haddock_interface():
+            self.print_action("Sourceful upload (stale haddock interface)")
+            return True
+
+        log.debug("Nothing to do for package '%s'", self)
+        return False
+
+
+class DHGBinary(object):
+    def __init__(self, packages, bindict):
+        self.packages = packages
+        self.name = bindict["Package"]
+        self.version = bindict["Version"]
+        self._source = bindict.get("Source", self.name).split()[0].strip()
+        self.provides = []
+        if "Provides" in bindict:
+            self.provides = [p.strip().split()[0].strip()
+                             for p in bindict["Provides"].split(",")]
+        elif self.name in DHG_BINARIES_PROVIDES_IGNORE:
+            pass
+        elif self.name.endswith("-dev") or self.name.endswith("-prof"):
+            log.warning("Binary package '%s' doesn't have a Provides field",
+                        self.name)
+
+        self._deps = []
+        if "Depends" in bindict:
+            self._deps = [d.strip().split()[0].strip()
+                          for d in bindict["Depends"].split(",")
+                          if "ghc" in d or "haddock" in d]
+        self._missing_deps = []
+
+        # Populate package-plan properties
+        if "Ghc-Package" in bindict:
+            pname, pver, _ = bindict["Ghc-Package"].rsplit("-", 2)
+            assert pname not in self.packages.pplan, pname
+            self.packages.pplan[pname] = {"debian_version": pver}
+            self.source.pplan = self.packages.pplan[pname]
+        elif self.name in DHG_BINARIES_PROVIDES_IGNORE:
+            pass
+        elif self.name.endswith("-dev"):
+            log.warning("Binary package '%s' doesn't have a Ghc-Package field",
+                        self.name)
+
+    def __eq__(self, other):
+        return (self.name == other.name and self.version == other.version)
+
+    def __hash__(self):
+        return hash("%s-%s" % (self.name, self.version))
+
+    @cached_property
+    def source(self):
+        if self._source not in self.packages.sources:
+            raise RuntimeError("Missing source package '%s' for binary package"
+                               " '%s'" % (self._source, self))
+        return self.packages.sources[self._source]
+
+    @cached_property
+    def deps(self):
+        ds = []
+        for d in self._deps:
+            if d in self.packages.binaries:
+                ds.append(self.packages.binaries[d])
+            else:
+                # log.warning("Missing dependency '%s' for binary"
+                #             " package '%s'", d, self)
+                self._missing_deps.append(d)
+        return ds
+
+    @cached_property
+    def missing_deps(self):
+        _ = self.deps  # noqa: F841
+        return self._missing_deps
+
+    @cached_property
+    def uninstallable_deps(self):
+        udeps = []
+        for d in self.deps:
+            if not d.is_installable:
+                udeps.append(d)
+        return udeps
+
+    def __str__(self):
+        return "%s_%s" % (self.name, self.version)
+
+    @property
+    def is_installable(self):
+        return not (self.missing_deps or self.uninstallable_deps)
+
+    @property
+    def is_ignored(self):
+        if self.name in DHG_BINARIES_IGNORE:
+            return True
+        return self.source.pplan["ignore"]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="DHG Packages Overview")
+    parser.add_argument("--distribution", metavar="DIST",
+                        dest="dist", default="unstable",
+                        help="Examine distribution DIST")
+    parser.add_argument("--arch", metavar="ARCH", default="amd64",
+                        help="Examine architecture ARCH")
+    parser.add_argument("--offline", action="store_true", default=False,
+                        help="Do not download files (cached files must"
+                             " be available)")
+    parser.add_argument("-v", "--verbose", action="store_true", default=False,
+                        help="Turn on debug printing")
+    parser.add_argument("-C", "--cache-dir",  metavar="DIR", default="~/.cache/dhg/",
+                        help="Use DIR to save cached data")
+    parser.add_argument("-R", "--repo-dir", metavar="DIR", default="./",
+                        help="Locate DHG repos under given DIR")
+    parser.add_argument("--ignore-non-dhg-packages", action="store_true",
+                        default=False,
+                        help="Ignore packages that are outside the"
+                             " DHG_packages repo")
+
+    return parser.parse_args()
+
+
+def get_archives(args):
+    # Get location for cached files
+    srcf = os.path.join(os.path.expanduser(args.cache_dir),
+                        "Sources_%s" % args.dist)
+    pkgf = os.path.join(os.path.expanduser(args.cache_dir),
+                        "Packages_%s_%s" % (args.dist, args.arch))
+    excf = os.path.join(os.path.expanduser(args.cache_dir), "Excuses.yaml")
+
+    if args.offline:
+        if not (os.path.isfile(srcf) and os.path.isfile(pkgf)
+                and os.path.isfile(excf)):
+            raise RuntimeError("Cannot run in offline mode: Cached files"
+                               " do not exist")
+        log.debug("Using cached files found at '%s'", args.cache_dir)
+        return (srcf, pkgf, excf)
+
+    os.makedirs(os.path.dirname(srcf), exist_ok=True)
+
+    url = DEBIAN_SRC_URL % args.dist
+    log.info("Downloading Sources from %s", url)
+    r = requests.get(url, allow_redirects=True)
+    with open(srcf, "wb") as f:
+        f.write(lzma.decompress(r.content))
+
+    url = DEBIAN_PKG_URL % (args.dist, args.arch)
+    log.info("Downloading Packages from %s", url)
+    r = requests.get(url, allow_redirects=True)
+    with open(pkgf, "wb") as f:
+        f.write(lzma.decompress(r.content))
+
+    url = "https://release.debian.org/britney/excuses.yaml"
+    log.info("Downloading britney excuses from %s", url)
+    r = requests.get(url, allow_redirects=True)
+    with open(excf, "wb") as f:
+        f.write(r.content)
+
+    return (srcf, pkgf, excf)
+
+
+# p = DHGPackages(sources, packages)
+# p.parse()
+
+def main():
+    args = parse_args()
+
+    loglvl = logging.DEBUG if args.verbose else logging.INFO
+    logging.basicConfig(level=loglvl,
+                        format="%(levelname)s: %(message)s")
+
+    (srcf, pkgf, excf) = get_archives(args)
+    p = DHGPackages(srcf, pkgf, excf, args.repo_dir)
+    p.parse()
+    p.actions(args)
+    # import pdb; pdb.set_trace()
+
+
+if __name__ == "__main__":
+    sys.exit(main())



View it on GitLab: https://salsa.debian.org/haskell-team/tools/-/commit/457f634ca479785648019c0ed74899cdca14955e

-- 
View it on GitLab: https://salsa.debian.org/haskell-team/tools/-/commit/457f634ca479785648019c0ed74899cdca14955e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-haskell-commits/attachments/20241006/78b0caa8/attachment-0001.htm>


More information about the Pkg-haskell-commits mailing list