[Piuparts-devel] Bug#705977: Faster reverse dependency calculation, and factor rdep chain length into package selection

Dave Steele dsteele at gmail.com
Tue Apr 23 03:14:54 UTC 2013


Package: piuparts
Version: 0.50
Severity: wishlist
Tags: patch
thanks

This is a placekeeper for merging the faster-rdep-calc(10) and
rdep-chain-len(2) branches. The first attempts to speed master by only
calculating reverse dependency metrics on demand, by package. The
second builds on that by adding a reverse dependency chain length
metric, and uses that to weight the packages returned by reserve().


https://github.com/davesteele/piuparts/commits/faster-rdep-calc
https://github.com/davesteele/piuparts/commits/rdep-chain-len

I've chosen not to restore the dependency on the current working
directory in detect_well_known_errors. That feature is localized to
one commit.


>From 9b5eba21462bc9e1e32df39c14f6559ebf42e271 Mon Sep 17 00:00:00 2001
From: David Steele <dsteele at gmail.com>
Date: Wed, 27 Feb 2013 19:07:09 -0500
Subject: [PATCH] Rework rdep metric calculations for faster operation.

---
 debian/changelog                    |    6 ++
 master-bin/detect_well_known_errors |   24 ++---
 piuparts-report.py                  |   34 +++----
 piupartslib/packagesdb.py           |  166 ++++++++++++++++++-----------------
 4 files changed, 115 insertions(+), 115 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index b4d1c4d..7cfff9e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -70,6 +70,12 @@ piuparts (0.51) UNRELEASED; urgency=low
     - Validate field names, and only use valid known problem conf files.
     - Minor HTML syntax fix.
     - Minor template integration.
+  * pupartslib/packagesdb.py
+    - Reworked rdep metrics (block_count, rrdep_count, waiting_count) to
+      calculate on-demand. Access methods are moved from Package to
+      PackagesDB.
+    - Add a metric for the worst-case reverse-dependency chain length.
+    - Weight the reserved packages returned by this additional factor.

   [ Holger Levsen ]
   * piuparts.py:
diff --git a/master-bin/detect_well_known_errors b/master-bin/detect_well_known_
index 6810a26..2ca1f4e 100755
--- a/master-bin/detect_well_known_errors
+++ b/master-bin/detect_well_known_errors
@@ -225,8 +225,9 @@ class FailureManager():
         def keyfunc( x, pkgsdb=self.pkgsdb, logdict=self.logdict):
             try:
                 pkg_name = get_pkg(x.pkgspec)
-                rdeps = pkgsdb.get_package(pkg_name).rrdep_count()
-            except KeyError:
+                rdeps = pkgsdb.rrdep_count(
+                               pkgsdb.get_package(pkg_name))
+            except (KeyError, AttributeError):
                 rdeps = 0

             is_failed = get_where(logdict[x.pkgspec]) == "fail"
@@ -374,8 +375,8 @@ def update_tpl( basedir, section, problem, failures, logdict
         bin_pkg = get_pkg(pkgspec)
         try:
             src_pkg = source_pkg(pkgspec, pkgsdb)
-            rdep_cnt = pkgsdb.get_package(bin_pkg).rrdep_count()
-        except KeyError:
+            rdep_cnt = pkgsdb.rrdep_count(pkgsdb.get_package(bin_pkg))
+        except (KeyError, AttributeError):
             src_pkg = bin_pkg
             rdep_cnt = 0

@@ -428,8 +429,8 @@ def update_html( section, logdict, problem_list, failures, c
     def keyfunc( x, pkgsdb=pkgsdb, logdict=logdict):
         try:
             pkg_name = get_pkg(x.pkgspec)
-            rdeps = pkgsdb.get_package(pkg_name).rrdep_count()
-        except KeyError:
+            rdeps = pkgsdb.rrdep_count(pkgsdb.get_package(pkg_name))
+        except (KeyError, AttributeError):
             rdeps = 0

         is_failed = get_where(logdict[x.pkgspec]) == "fail"
@@ -469,13 +470,11 @@ def process_section( section, config, problem_list,
     add_cnt = make_kprs( logdict, kprdict, problem_list )

     if not pkgsdb:
-        oldcwd = os.getcwd()
-        os.chdir(config['master-directory'])
-
         distro_config = piupartslib.conf.DistroConfig(
                         DISTRO_CONFIG_FILE, section_config["mirror"])

-        pkgsdb = piupartslib.packagesdb.PackagesDB(prefix=section)
+        sectiondir = os.path.join(config['master-directory'], section)
+        pkgsdb = piupartslib.packagesdb.PackagesDB(prefix=sectiondir)

         pkgs_url = distro_config.get_packages_url(
                    section_config.get_distro(),
@@ -485,11 +484,6 @@ def process_section( section, config, problem_list,
         pkgsdb.read_packages_file(pkg_fl)
         pkg_fl.close()

-        pkgsdb.compute_package_states()
-        pkgsdb.calc_rrdep_counts()
-
-        os.chdir(oldcwd)
-
     failures = FailureManager( logdict )
     failures.sort_by_bugged_and_rdeps(pkgsdb)

diff --git a/piuparts-report.py b/piuparts-report.py
index 996ca2a..48aaa6a 100644
--- a/piuparts-report.py
+++ b/piuparts-report.py
@@ -593,15 +593,10 @@ class Section:
         self._doc_root = doc_root

         logging.debug("Loading and parsing Packages file")
-        oldcwd = os.getcwd()
-        os.chdir(master_directory)
         self._packagedb_cache = packagedb_cache
         self._package_databases = {}
-        self._load_package_database(section)
+        self._load_package_database(section, master_directory)
         self._binary_db = self._package_databases[section]
-        self._binary_db.compute_package_states()
-        self._binary_db.calc_rrdep_counts()
-        os.chdir(oldcwd)

         sources_url = self._distro_config.get_sources_url(
                 self._config.get_distro(), self._config.get_area())
@@ -613,7 +608,7 @@ class Section:

         self._log_name_cache = {}

-    def _load_package_database(self, section):
+    def _load_package_database(self, section, master_directory):
         if section in self._package_databases:
             return
         elif section in self._packagedb_cache:
@@ -626,12 +621,13 @@ class Section:
             # this is a base database eligible for caching
             # only cache the most recent base database
             self._packagedb_cache.clear()
-        db = piupartslib.packagesdb.PackagesDB(prefix=section)
+        sectiondir = os.path.join(master_directory, section)
+        db = piupartslib.packagesdb.PackagesDB(prefix=sectiondir)
         self._package_databases[section] = db
         if config["depends-sections"]:
             deps = config["depends-sections"].split()
             for dep in deps:
-                self._load_package_database(dep)
+                self._load_package_database(dep, master_directory)
             db.set_dependency_databases([self._package_databases[dep] for dep i
         else:
             # only cache the big base databases that don't have additional depe
@@ -1217,31 +1213,27 @@ class Section:
     def write_state_pages(self):
         for state in self._binary_db.get_active_states():
             logging.debug("Writing page for %s" % state)
-            with_counts = False
-            aside = ""
             vlist = ""
             if state in self._binary_db.get_error_states():
                 with_counts = True
                 aside = " (reverse deps, blocked pkgs)"
-
-            def cmp_func(a, b):
-                """Sort by block count first"""
-                rrdep_cmp = cmp( a.block_count(), b.block_count())
-                if rrdep_cmp != 0:
-                    return -rrdep_cmp
-                else:
-                    return cmp( a["Package"], b["Package"] )
+                sort_key = lambda x: (-self._binary_db.block_count(x),x["Packag
+            else:
+                with_counts = False
+                aside = ""
+                sort_key = lambda x: x["Package"]

             names = self._binary_db.get_pkg_names_in_state(state)
             packages = [self._binary_db.get_package(name) for name in names]
-            packages.sort( cmp_func )
+            packages.sort( key = sort_key )

             for package in packages:
                 vlist += "<li id=\"%s\">%s" % (
                                          package["Package"],
                                          self.link_to_source_summary(package["P
                 if with_counts:
-                    vlist += " (%d, %d)" % (package.rrdep_count(), package.bloc
+                    vlist += " (%d, %d)" % (self._binary_db.rrdep_count(package
+                                            self._binary_db.block_count(package
                 vlist += " (%s)" % html_protect(package["Maintainer"])
                 all_deps = package.all_dependencies()
                 if all_deps:
diff --git a/piupartslib/packagesdb.py b/piupartslib/packagesdb.py
index 13e0dfb..d3aa1da 100644
--- a/piupartslib/packagesdb.py
+++ b/piupartslib/packagesdb.py
@@ -62,9 +62,10 @@ class Package(UserDict.UserDict):
             self[name.strip()] = value.strip()
         self._parsed_deps = {}
         self._parsed_alt_deps = {}
-        self._rrdep_count = None
-        self._block_count = None
-        self._waiting_count = None
+        self.rrdep_cnt = None
+        self.block_cnt = None
+        self.waiting_cnt = None
+        self.rdep_chain_len = None

     def _parse_dependencies(self, header_name):
         if header_name in self._parsed_deps:
@@ -123,33 +124,6 @@ class Package(UserDict.UserDict):
         """Are we testable at all? Required aren't."""
         return self.get("Priority", "") != "required"

-    def rrdep_count(self):
-        """Get the recursive dependency count, if it has been calculated"""
-        if self._rrdep_count == None:
-            raise Exception('Reverse dependency count has not been calculated')
-        return(self._rrdep_count)
-
-    def set_rrdep_count(self, val):
-        self._rrdep_count = val
-
-    def block_count(self):
-        """Get the number of packages blocked by this package"""
-        if self._block_count == None:
-            raise Exception('Block count has not been calculated')
-        return(self._block_count)
-
-    def set_block_count(self, val):
-        self._block_count = val
-
-    def waiting_count(self):
-        """Get the number of packages waiting for this package"""
-        if self._waiting_count == None:
-            raise Exception('Waiting count has not been calculated')
-        return(self._waiting_count)
-
-    def set_waiting_count(self, val):
-        self._waiting_count = val
-
     def dump(self, output_file):
         output_file.write("".join(self.headers))

@@ -313,6 +287,7 @@ class PackagesDB:
         self._dependency_databases = []
         self._recycle_mode = False
         self._candidates_for_testing = None
+        self._rdeps = None
         self.set_subdirs(ok="pass", fail="fail", evil="untestable",
                          reserved="reserved", morefail=["bugged", "affected"],
                          recycle="recycle")
@@ -682,8 +657,8 @@ class PackagesDB:
                     if not self._logdb.log_exists(p, [self._reserved]) or \
                             self._logdb.log_exists(p, [self._recycle])]
             if len(self._candidates_for_testing) > 1:
-                self.calc_rrdep_counts()
-                tuples = [(p.waiting_count(), random.random(), p)
+                tuples = [(self.waiting_count(p) * self.rdep_chain_len(p),
+                           random.random(), p)
                         for p in self._candidates_for_testing]
                 self._candidates_for_testing = [x[2]
                         for x in sorted(tuples, reverse = True)]
@@ -749,59 +724,92 @@ class PackagesDB:
         else:
             raise LogfileExists(self._evil, package, version)

-    def calc_rrdep_counts(self):
-        """Calculate recursive reverse dependency counts for Packages"""
+    def _get_rdep_dict(self):
+        """Return dict of one-level reverse dependencies by package"""
+
+        if self._rdeps is None:
+
+            self._rdeps = {}

-        self._find_all_packages()       # populate _packages
+            for pkg in self.get_all_package_names():
+                pkg_obj = self.get_package(pkg)
+
+                for dep in pkg_obj.dependencies():
+                    dep_pkg = self.get_package(dep, resolve_virtual=True)
+
+                    if dep_pkg is not None:
+                        dep = dep_pkg["Package"]
+
+                    if not dep in self._rdeps:
+                        self._rdeps[dep] = set()
+                    self._rdeps[dep].add(pkg)
+
+        return( self._rdeps )
+
+    def _calc_rrdep_pkg_counts(self, pkg):
+
+        pkg_name = pkg['Package']
         self._compute_package_states()  # populate _package_state
+
+        # calc full recursive reverse dependency package set
+        rrdep_set = set()
+        rdeps = self._get_rdep_dict()
+        next_level = set([pkg_name])
+        chain_len = 0
+
+        while next_level:
+            chain_len += 1
+            rrdep_set |= next_level
+            new_pkgs = next_level
+            next_level = set([y for x in new_pkgs if x in rdeps for y in rdeps[
+            next_level -= rrdep_set
+
+        rrdep_set.remove(pkg_name)
+
+        # calculate and set the metrics
+        pkg.rrdep_cnt = len(rrdep_set)
+
         error_states = self.get_error_states()
+        if self._package_state[pkg_name] in error_states:
+            block_list = [x for x in rrdep_set
+                          if self._package_state[x] in error_states]
+            pkg.block_cnt = len(block_list)
+        else:
+            pkg.block_cnt = 0
+
         waiting_states = self.get_waiting_states()
+        if self._package_state[pkg_name] in waiting_states:
+            waiting_list = [x for x in rrdep_set
+                            if self._package_state[x] in waiting_states]
+            pkg.waiting_cnt = len(waiting_list)
+        else:
+            pkg.waiting_cnt = 0

-        # create a reverse dependency dictionary.
-        # entries consist of a one-level list of reverse dependency package nam
-        # by package name
-        rdeps = {}
-        for pkg_name in self._packages.keys():
-            # use the Packages dependencies() method for a conservative count
-            for dep in self._packages[pkg_name].dependencies():
-                if dep in rdeps:
-                    rdeps[dep].append( pkg_name )
-                else:
-                    rdeps[dep] = [pkg_name]
-
-        def recurse_rdeps( pkg_name, rdeps, rrdep_dict ):
-            """ Recurse through the reverse dep arrays to determine the recursi
-                dependency count for a package. rrdep_dict.keys() contains the
-                accumulation of rdeps encountered"""
-
-            # are there any rdeps for this package?
-            if pkg_name in rdeps:
-                for rdep in rdeps[pkg_name]:
-                    # break circular dependency loops
-                    if not rdep in rrdep_dict:
-                        rrdep_dict[rdep] = 1
-                        rrdep_dict = recurse_rdeps( rdep, rdeps, rrdep_dict )
-
-            return rrdep_dict
-
-        # calculate all of the rrdeps and block counts
-        for pkg_name in self._packages.keys():
-            rrdep_list = recurse_rdeps( pkg_name, rdeps, {} ).keys()
-            self._packages[pkg_name].set_rrdep_count( len(rrdep_list) )
-
-            if self._package_state[pkg_name] in error_states:
-                block_list = [x for x in rrdep_list
-                              if self._package_state[x] in error_states]
-            else:
-                block_list = []
-            self._packages[pkg_name].set_block_count( len(block_list) )
+        pkg.rdep_chain_len = chain_len

-            if self._package_state[pkg_name] in waiting_states:
-                waiting_list = [x for x in rrdep_list
-                              if self._package_state[x] in waiting_states]
-            else:
-                waiting_list = []
-            self._packages[pkg_name].set_waiting_count(len(waiting_list))
+    def block_count(self, pkg):
+        if pkg.block_cnt is None:
+            self._calc_rrdep_pkg_counts(pkg)
+
+        return pkg.block_cnt
+
+    def rrdep_count(self, pkg):
+        if pkg.rrdep_cnt is None:
+            self._calc_rrdep_pkg_counts(pkg)
+
+        return pkg.rrdep_cnt
+
+    def waiting_count(self, pkg):
+        if pkg.waiting_cnt is None:
+            self._calc_rrdep_pkg_counts(pkg)
+
+        return pkg.waiting_cnt
+
+    def rdep_chain_len(self, pkg):
+        if pkg.rdep_chain_len is None:
+            self.calc_rrdep_pkg_counts(pkg)
+
+        return pkg.rdep_chain_len


 # vi:set et ts=4 sw=4 :
--
1.7.10.4

(END)



More information about the Piuparts-devel mailing list