[Git][haskell-team/tools][master] Add script to help with transitions
Ilias Tsitsimpis (@iliastsi)
gitlab at salsa.debian.org
Sun Oct 6 17:03:57 BST 2024
Ilias Tsitsimpis pushed to branch master at Debian Haskell Group / tools
Commits:
457f634c by Ilias Tsitsimpis at 2024-10-06T19:02:00+03:00
Add script to help with transitions
Add a script that examines the current state of all Haskell packages in
the Debian archive and suggests actions that will help resolve
issues/update packages needed to complete a transition.
- - - - -
1 changed file:
- + transition.py
Changes:
=====================================
transition.py
=====================================
@@ -0,0 +1,651 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2024 Ilias Tsitsimpis <iliastsi at debian.org>
+
+"""Suggest actions to complete a Haskell transition.
+
+This program examines the current state of all Haskell packages in the Debian
+archive and suggests actions that will help resolve issues/update packages
+needed to complete a transition.
+"""
+
+import os
+import re
+import git
+import sys
+import lzma
+import yaml
+import apt_pkg
+import logging
+import argparse
+import requests
+from subprocess import check_output
+try:
+ from functools import cached_property
+except Exception:
+ from cached_property import cached_property
+from debian.deb822 import Sources, Packages
+
+log = logging.getLogger(__name__)
+
+
+DEBIAN_MIRROR = "http://deb.debian.org/debian/"
+DEBIAN_SRC_URL = DEBIAN_MIRROR + "dists/%s/main/source/Sources.xz"
+DEBIAN_PKG_URL = DEBIAN_MIRROR + "dists/%s/main/binary-%s/Packages.xz"
+
+DHG_SOURCES_EXTRA = ["haskell-devscripts",
+ "haskell-diagrams",
+ "haskell-platform",
+ ]
+# Ignore packages that depend on GHC but are not Haskell libraries.
+DHG_SOURCES_IGNORE = ["diffoscope",
+ "hkl",
+ "haskell-devscripts",
+ "haskell-mode",
+ "haskell-platform",
+ "nginx-confgen",
+ "dh-runit",
+ "pkg-haskell-tools",
+ "pandoc"
+ ]
+# Ignore these binary packages. This may be because:
+# - They provide modules that are now available in GHC.
+# - They are causing other problems, and not managed by us.
+DHG_BINARIES_IGNORE = ["ganeti-htools",
+ "libghc-exceptions-dev",
+ "libghc-exceptions-doc",
+ "libghc-exceptions-prof",
+ ]
+
+# Parse these packages (e.g., retrieve their dependencies etc)
+# but don't perform any actions on them.
+DHG_SOURCES_NO_ACTION = ["ghc",
+ "ganeti",
+ ]
+
+# These binary packages don't have a proper Provides/Ghc-Package field
+DHG_BINARIES_PROVIDES_IGNORE = [
+ "libghc-diagrams-dev",
+ "libghc-diagrams-prof",
+ "haskell-platform-prof"
+]
+
+
+class DHGPackages(object):
+ def __init__(self, srcf, pkgf, excf, repos):
+ apt_pkg.init_system()
+
+ self.sources = {}
+ self.binaries = {}
+ self.excuses = {}
+ self.pplan = {}
+ self.haddock_interface = None
+
+ self.srcf = srcf
+ self.pkgf = pkgf
+ self.excf = excf
+ self.planrepo = os.path.join(os.path.expanduser(repos), "package-plan")
+ self.dhgrepo = git.Repo(os.path.join(os.path.expanduser(repos),
+ "DHG_packages"))
+
+ def parse(self):
+ # Parse Sources
+ log.info("Parsing Sources file...")
+ with open(self.srcf, "r") as f:
+ for s in Sources.iter_paragraphs(f):
+ self.add_source(s)
+
+ # Parse Binaries
+ log.info("Parsing Packages file...")
+ with open(self.pkgf, "r") as f:
+ for p in Packages.iter_paragraphs(f):
+ self.add_binary(p)
+
+ # Compute current haddock interface
+ self.haddock_interface = [p for p in self.binaries["ghc"].provides
+ if p.startswith("haddock-interface-")]
+ assert len(self.haddock_interface) == 1
+ self.haddock_interface = self.haddock_interface[0]
+
+ # Parse package-plan
+ log.info("Parsing package-plan file...")
+ with open(os.path.join(self.planrepo, "packages.txt"), "r") as f:
+ for p in f:
+ p = p.partition("#")[0].split()
+ if (not p) or ("avoid" in p):
+ continue
+ ignore = ("ignore" in p) or ("obsolete" in p)
+ name = p[0]
+ if name in self.pplan:
+ self.pplan[name]["version"] = p[1]
+ self.pplan[name]["ignore"] = ignore
+ continue
+ bname = None
+ for opt in p[2:]:
+ if not opt.startswith("binary"):
+ continue
+ bname = name
+ if "=" in opt:
+ bname = opt.split("=")[1]
+ break
+ if bname in self.binaries:
+ self.pplan[name] = {
+ "version": p[1],
+ "ignore": ignore,
+ "debian_version": self.binaries[bname].source.version,
+ }
+ self.binaries[bname].source.pplan = self.pplan[name]
+ continue
+ log.warning("Haskell package '%s' is missing from Debian",
+ name)
+
+ # Parse britney excuses
+ log.info("Parsing britney excuses...")
+ with open(self.excf, "r") as f:
+ excuses = yaml.safe_load(f)
+ for src in self.sources.keys():
+ self.excuses[src] = {"reason": [], "excuses": []}
+ for src in excuses["sources"]:
+ if src["source"] in self.sources:
+ exc = self.excuses.get(src["source"])
+ exc["excuses"].extend(src["excuses"])
+ exc["reason"].extend(src["reason"])
+ self.excuses[src["source"]] = exc
+
+ # Add ghc to pplan
+ self.pplan["ghc"] = {"version": self.sources["ghc"].version,
+ "debian_version": self.sources["ghc"].version,
+ "ignore": False}
+ self.sources["ghc"].pplan = self.pplan["ghc"]
+
+ def add_source(self, srcdict):
+ # Register to DHGPackages
+ if srcdict["Package"] in DHG_SOURCES_IGNORE:
+ # Ignore non-haskell packages
+ return
+ elif "ghc" in srcdict.get("Build-Depends", []):
+ pass
+ elif srcdict["Package"] in DHG_SOURCES_EXTRA:
+ pass
+ else:
+ return
+
+ source = DHGSource(self, srcdict)
+
+ if source.name not in self.sources:
+ self.sources[source.name] = source
+ else:
+ log.debug("Double entry for '%s' source package", source.name)
+ old_source = self.sources[source.name]
+ if apt_pkg.version_compare(source.version, old_source.version) > 0:
+ self.sources[source.name] = source
+
+ def add_binary(self, bindict):
+ # Register to DHGPackages
+ srcname = bindict.get("Source", bindict["Package"]).split()[0]
+ if srcname in DHG_SOURCES_IGNORE:
+ return
+ if bindict["Package"] in DHG_BINARIES_IGNORE:
+ # These packages are also provided by others
+ return
+ elif "ghc" in bindict["Package"]:
+ pass
+ elif "ghc" in bindict.get("Depends", []):
+ pass
+ elif srcname in self.sources:
+ pass
+ else:
+ return
+
+ binary = DHGBinary(self, bindict)
+
+ assert binary.name not in self.binaries, binary.name
+ self.binaries[binary.name] = binary
+ for p in binary.provides:
+ assert p not in self.binaries, p
+ self.binaries[p] = binary
+
+ def actions(self, args):
+ log.info("Evaluating available actions...")
+ res = [s.action(args) for s in self.sources.values()]
+ if not any(res):
+ for s in self.sources.values():
+ if s.error:
+ log.error(s.error)
+
+ @cached_property
+ def ignored_bugs(self):
+ # Exclude ftbfs-source-after-build bugs
+ cmd = ["bts", "select",
+ "maintainer:pkg-haskell-maintainers at lists.alioth.debian.org",
+ "user:lucas at debian.org", "tag:ftbfs-source-after-build"]
+ bugs = set(check_output(cmd, text=True).split())
+ cmd = ["bts", "select",
+ "maintainer:pkg-haskell-maintainers at lists.alioth.debian.org",
+ "user:lucas at debian.org", "tag:ftbfs-binary-after-build"]
+ bugs.update(set(check_output(cmd, text=True).split()))
+ return bugs
+
+
+class DHGSource(object):
+ def __init__(self, packages, srcdict):
+ self.packages = packages
+ self.name = srcdict["Package"]
+ self.version = srcdict["Version"]
+ self.vcs_git = srcdict.get("Vcs-Git")
+ self._binaries = [b.strip() for b in srcdict["Binary"].split(",")]
+ self._builddeps = []
+ if "Build-Depends" in srcdict:
+ self._builddeps.extend(
+ [b.strip().split()[0].strip()
+ for b in srcdict["Build-Depends"].split(",")
+ if "ghc" in b and b != "ghc:native"])
+ if "Build-Depends-Indep" in srcdict:
+ self._builddeps.extend(
+ [b.strip().split()[0].strip()
+ for b in srcdict["Build-Depends-Indep"].split(",")
+ if "ghc" in b])
+ self._missing_builddeps = []
+ self.error = None
+ self.pplan = None
+
+ def __eq__(self, other):
+ return (self.name == other.name and self.version == other.version)
+
+ def __hash__(self):
+ return hash("%s-%s" % (self.name, self.version))
+
+ @cached_property
+ def binaries(self):
+ bs = []
+ for b in self._binaries:
+ if b in self.packages.binaries:
+ bs.append(self.packages.binaries[b])
+ else:
+ log.warning("Missing binary package '%s' for source package"
+ " '%s'", b, self)
+ return bs
+
+ @cached_property
+ def builddeps(self):
+ bds = []
+ for b in self._builddeps:
+ if b in self.packages.binaries:
+ bds.append(self.packages.binaries[b])
+ else:
+ log.warning("Missing build-dependency '%s' for source"
+ " package '%s'", b, self)
+ self._missing_builddeps.append(b)
+ return bds
+
+ def __str__(self):
+ return "%s_%s" % (self.name, self.version)
+
+ @cached_property
+ def missing_builddeps(self):
+ _ = self.builddeps # noqa: F841
+ return self._missing_builddeps
+
+ @cached_property
+ def uninstallable_builddeps(self):
+ ubds = []
+ for bd in self.builddeps:
+ if not bd.is_installable:
+ ubds.append(bd)
+ return ubds
+
+ @property
+ def is_buildable(self):
+ """Whether the package is buildable.
+
+ Return False if the build dependencies of the package are missing.
+ """
+ for ub in self.uninstallable_builddeps:
+ if ub.is_ignored and not self.is_old:
+ self.error = ("Source package '%s' depends on ignored"
+ " package '%s'" % (self, ub.source))
+ log.warning(self.error)
+ return False
+ return not (self.uninstallable_builddeps or self.missing_builddeps)
+
+ def has_cyclic_dependencies(self):
+ """Whether the package has cyclic dependencies.
+
+ Return True if the package build-depends on itself.
+ """
+ binaries = set(b.name for b in self.binaries)
+ bd = set([b for b in self.builddeps
+ if b.name not in ["ghc", "ghc-prof", "ghc-doc"]])
+ while bd:
+ if not binaries.isdisjoint(set(b.name for b in bd)):
+ return True
+ sp = [b.source for b in bd]
+ bd = set([b for s in sp for b in s.builddeps
+ if b.name not in ["ghc", "ghc-prof", "ghc-doc"]])
+ return False
+
+ @property
+ def pplan_version(self):
+ assert self.pplan, self
+ return self.pplan["version"]
+
+ @cached_property
+ def is_old(self):
+ # Remove epoch, if any
+ version = self.version.split(":", 1)[-1]
+ return (apt_pkg.version_compare(self.pplan_version, version) > 0)
+
+ @cached_property
+ def is_dhg_package(self):
+ # Package is maintained inside the DHG_packages repo
+ return (self.vcs_git and "DHG_packages" in self.vcs_git)
+
+ def non_dhg_package(self, args):
+ # Do nothing if this package is not part of DHG_packages repo
+ if args.ignore_non_dhg_packages:
+ if not self.is_dhg_package:
+ self.error = ("Ignoring source package '%s' which is not"
+ " part of the DHG_packages repo" % self)
+ log.debug(self.error)
+ return True
+ return False
+
+ def get_bugs(self):
+ cmd = ["bts", "select", "source:%s" % self.name]
+ bugs = check_output(cmd, text=True)
+ bugs = set(bugs.split())
+ return bugs - self.packages.ignored_bugs
+
+ def can_upload(self):
+ if self.pplan is None:
+ self.error = "Source package '%s' not on our plan" % self
+ log.debug(self.error)
+ return False
+ assert "version" in self.pplan, self
+ if self.pplan["ignore"]:
+ self.error = "Package '%s' is marked as ignored" % self
+ log.debug(self.error)
+ return False
+ if self.has_cyclic_dependencies():
+ self.error = "Package '%s' has cyclic dependencies" % self
+ log.warning(self.error)
+ return False
+ if not self.is_buildable:
+ self.error = "Package '%s' is unbuildable" % self
+ log.debug(self.error)
+ return False
+ # See if a newer version has been tagged on our repo
+ tname = self.name + "_v"
+ for t in self.packages.dhgrepo.git.tag(l="%s*" % tname).split(): # noqa
+ tver = t[len(tname):]
+ if tver.startswith("darcs"):
+ # errata
+ continue
+ # Handle epoch
+ if ':' in self.version:
+ tver = tver.replace("_", ":", 1)
+ if apt_pkg.version_compare(tver, self.version) > 0:
+ self.error = ("A newer version for source package '%s'"
+ " exists on repo but not on archive" % self)
+ log.warning(self.error)
+ return False
+ return True
+
+ def stale_haddock_interface(self):
+ for p in self.binaries:
+ for d in p._deps:
+ if d.startswith("haddock-interface-"):
+ if d != self.packages.haddock_interface:
+ return True
+ break
+ return False
+
+ def stale_binaries(self):
+ """Package has been uploaded, but not built yet."""
+ for b in self.binaries:
+ binver = b.version
+ s = re.search(r"\+b\d+$", binver)
+ if s:
+ binver = binver[:s.start()]
+ if binver != self.version:
+ self.error = "Stale binaries for source package '%s'" % self
+ log.debug(self.error)
+ return True
+ return False
+
+ def depends_on_old(self):
+ """Package has dependencies with newer versions available."""
+ for bd in self.builddeps:
+ if bd.source.is_old:
+ log.debug("Package '%s' depends on '%s', which needs update",
+ self, bd.source)
+ return True
+ return False
+
+ def post_new_upload(self):
+ """Package needs a post-NEW sourceful upload."""
+ excuses = "".join(self.packages.excuses[self.name]["excuses"])
+ return ("a new source-only upload is needed" in excuses)
+
+ def missing_build(self):
+ """Package is missing a build in one of the core architectures."""
+ excuses = self.packages.excuses[self.name]
+ return ("missingbuild" in excuses["reason"])
+
+ def print_action(self, action):
+ """Print an action for the current package."""
+ bugs = self.get_bugs()
+ if bugs:
+ bugs = " (Open bugs: %s)" % ", ".join(bugs)
+ else:
+ bugs = ""
+
+ print("Action for '%s': %s%s" % (self, action, bugs))
+
+ def action(self, args):
+ if self.name in DHG_SOURCES_NO_ACTION:
+ # Nothing to do for these packages
+ return False
+
+ log.debug("Examining package '%s'...", self)
+
+ if self.non_dhg_package(args):
+ return False
+
+ if not self.can_upload():
+ return False
+
+ if self.stale_binaries():
+ if self.missing_build():
+ self.print_action("Missing build")
+ return True
+ return False
+
+ if self.depends_on_old():
+ return False
+
+ if self.is_old:
+ self.print_action("Update to '%s'" % self.pplan_version)
+ return True
+
+ if self.post_new_upload():
+ self.print_action("Sourceful upload (post-NEW)")
+ return True
+
+ if self.stale_haddock_interface():
+ self.print_action("Sourceful upload (stale haddock interface)")
+ return True
+
+ log.debug("Nothing to do for package '%s'", self)
+ return False
+
+
+class DHGBinary(object):
+ def __init__(self, packages, bindict):
+ self.packages = packages
+ self.name = bindict["Package"]
+ self.version = bindict["Version"]
+ self._source = bindict.get("Source", self.name).split()[0].strip()
+ self.provides = []
+ if "Provides" in bindict:
+ self.provides = [p.strip().split()[0].strip()
+ for p in bindict["Provides"].split(",")]
+ elif self.name in DHG_BINARIES_PROVIDES_IGNORE:
+ pass
+ elif self.name.endswith("-dev") or self.name.endswith("-prof"):
+ log.warning("Binary package '%s' doesn't have a Provides field",
+ self.name)
+
+ self._deps = []
+ if "Depends" in bindict:
+ self._deps = [d.strip().split()[0].strip()
+ for d in bindict["Depends"].split(",")
+ if "ghc" in d or "haddock" in d]
+ self._missing_deps = []
+
+ # Populate package-plan properties
+ if "Ghc-Package" in bindict:
+ pname, pver, _ = bindict["Ghc-Package"].rsplit("-", 2)
+ assert pname not in self.packages.pplan, pname
+ self.packages.pplan[pname] = {"debian_version": pver}
+ self.source.pplan = self.packages.pplan[pname]
+ elif self.name in DHG_BINARIES_PROVIDES_IGNORE:
+ pass
+ elif self.name.endswith("-dev"):
+ log.warning("Binary package '%s' doesn't have a Ghc-Package field",
+ self.name)
+
+ def __eq__(self, other):
+ return (self.name == other.name and self.version == other.version)
+
+ def __hash__(self):
+ return hash("%s-%s" % (self.name, self.version))
+
+ @cached_property
+ def source(self):
+ if self._source not in self.packages.sources:
+ raise RuntimeError("Missing source package '%s' for binary package"
+ " '%s'" % (self._source, self))
+ return self.packages.sources[self._source]
+
+ @cached_property
+ def deps(self):
+ ds = []
+ for d in self._deps:
+ if d in self.packages.binaries:
+ ds.append(self.packages.binaries[d])
+ else:
+ # log.warning("Missing dependency '%s' for binary"
+ # " package '%s'", d, self)
+ self._missing_deps.append(d)
+ return ds
+
+ @cached_property
+ def missing_deps(self):
+ _ = self.deps # noqa: F841
+ return self._missing_deps
+
+ @cached_property
+ def uninstallable_deps(self):
+ udeps = []
+ for d in self.deps:
+ if not d.is_installable:
+ udeps.append(d)
+ return udeps
+
+ def __str__(self):
+ return "%s_%s" % (self.name, self.version)
+
+ @property
+ def is_installable(self):
+ return not (self.missing_deps or self.uninstallable_deps)
+
+ @property
+ def is_ignored(self):
+ if self.name in DHG_BINARIES_IGNORE:
+ return True
+ return self.source.pplan["ignore"]
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="DHG Packages Overview")
+ parser.add_argument("--distribution", metavar="DIST",
+ dest="dist", default="unstable",
+ help="Examine distribution DIST")
+ parser.add_argument("--arch", metavar="ARCH", default="amd64",
+ help="Examine architecture ARCH")
+ parser.add_argument("--offline", action="store_true", default=False,
+ help="Do not download files (cached files must"
+ " be available)")
+ parser.add_argument("-v", "--verbose", action="store_true", default=False,
+ help="Turn on debug printing")
+ parser.add_argument("-C", "--cache-dir", metavar="DIR", default="~/.cache/dhg/",
+ help="Use DIR to save cached data")
+ parser.add_argument("-R", "--repo-dir", metavar="DIR", default="./",
+ help="Locate DHG repos under given DIR")
+ parser.add_argument("--ignore-non-dhg-packages", action="store_true",
+ default=False,
+ help="Ignore packages that are outside the"
+ " DHG_packages repo")
+
+ return parser.parse_args()
+
+
+def get_archives(args):
+ # Get location for cached files
+ srcf = os.path.join(os.path.expanduser(args.cache_dir),
+ "Sources_%s" % args.dist)
+ pkgf = os.path.join(os.path.expanduser(args.cache_dir),
+ "Packages_%s_%s" % (args.dist, args.arch))
+ excf = os.path.join(os.path.expanduser(args.cache_dir), "Excuses.yaml")
+
+ if args.offline:
+ if not (os.path.isfile(srcf) and os.path.isfile(pkgf)
+ and os.path.isfile(excf)):
+ raise RuntimeError("Cannot run in offline mode: Cached files"
+ " do not exist")
+ log.debug("Using cached files found at '%s'", args.cache_dir)
+ return (srcf, pkgf, excf)
+
+ os.makedirs(os.path.dirname(srcf), exist_ok=True)
+
+ url = DEBIAN_SRC_URL % args.dist
+ log.info("Downloading Sources from %s", url)
+ r = requests.get(url, allow_redirects=True)
+ with open(srcf, "wb") as f:
+ f.write(lzma.decompress(r.content))
+
+ url = DEBIAN_PKG_URL % (args.dist, args.arch)
+ log.info("Downloading Packages from %s", url)
+ r = requests.get(url, allow_redirects=True)
+ with open(pkgf, "wb") as f:
+ f.write(lzma.decompress(r.content))
+
+ url = "https://release.debian.org/britney/excuses.yaml"
+ log.info("Downloading britney excuses from %s", url)
+ r = requests.get(url, allow_redirects=True)
+ with open(excf, "wb") as f:
+ f.write(r.content)
+
+ return (srcf, pkgf, excf)
+
+
+# p = DHGPackages(sources, packages)
+# p.parse()
+
+def main():
+ args = parse_args()
+
+ loglvl = logging.DEBUG if args.verbose else logging.INFO
+ logging.basicConfig(level=loglvl,
+ format="%(levelname)s: %(message)s")
+
+ (srcf, pkgf, excf) = get_archives(args)
+ p = DHGPackages(srcf, pkgf, excf, args.repo_dir)
+ p.parse()
+ p.actions(args)
+ # import pdb; pdb.set_trace()
+
+
+if __name__ == "__main__":
+ sys.exit(main())
View it on GitLab: https://salsa.debian.org/haskell-team/tools/-/commit/457f634ca479785648019c0ed74899cdca14955e
--
View it on GitLab: https://salsa.debian.org/haskell-team/tools/-/commit/457f634ca479785648019c0ed74899cdca14955e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-haskell-commits/attachments/20241006/78b0caa8/attachment-0001.htm>
More information about the Pkg-haskell-commits
mailing list