[Piuparts-devel] Bug#705977: Faster reverse dependency calculation, and factor rdep chain length into package selection
Dave Steele
dsteele at gmail.com
Tue Apr 23 03:14:54 UTC 2013
Package: piuparts
Version: 0.50
Severity: wishlist
Tags: patch
thanks
This is a placekeeper for merging the faster-rdep-calc(10) and
rdep-chain-len(2) branches. The first attempts to speed master by only
calculating reverse dependency metrics on demand, by package. The
second builds on that by adding a reverse dependency chain length
metric, and uses that to weight the packages returned by reserve().
https://github.com/davesteele/piuparts/commits/faster-rdep-calc
https://github.com/davesteele/piuparts/commits/rdep-chain-len
I've chosen not to restore the dependency on the current working
directory in detect_well_known_errors. That feature is localized to
one commit.
>From 9b5eba21462bc9e1e32df39c14f6559ebf42e271 Mon Sep 17 00:00:00 2001
From: David Steele <dsteele at gmail.com>
Date: Wed, 27 Feb 2013 19:07:09 -0500
Subject: [PATCH] Rework rdep metric calculations for faster operation.
---
debian/changelog | 6 ++
master-bin/detect_well_known_errors | 24 ++---
piuparts-report.py | 34 +++----
piupartslib/packagesdb.py | 166 ++++++++++++++++++-----------------
4 files changed, 115 insertions(+), 115 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index b4d1c4d..7cfff9e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -70,6 +70,12 @@ piuparts (0.51) UNRELEASED; urgency=low
- Validate field names, and only use valid known problem conf files.
- Minor HTML syntax fix.
- Minor template integration.
+ * pupartslib/packagesdb.py
+ - Reworked rdep metrics (block_count, rrdep_count, waiting_count) to
+ calculate on-demand. Access methods are moved from Package to
+ PackagesDB.
+ - Add a metric for the worst-case reverse-dependency chain length.
+ - Weight the reserved packages returned by this additional factor.
[ Holger Levsen ]
* piuparts.py:
diff --git a/master-bin/detect_well_known_errors b/master-bin/detect_well_known_
index 6810a26..2ca1f4e 100755
--- a/master-bin/detect_well_known_errors
+++ b/master-bin/detect_well_known_errors
@@ -225,8 +225,9 @@ class FailureManager():
def keyfunc( x, pkgsdb=self.pkgsdb, logdict=self.logdict):
try:
pkg_name = get_pkg(x.pkgspec)
- rdeps = pkgsdb.get_package(pkg_name).rrdep_count()
- except KeyError:
+ rdeps = pkgsdb.rrdep_count(
+ pkgsdb.get_package(pkg_name))
+ except (KeyError, AttributeError):
rdeps = 0
is_failed = get_where(logdict[x.pkgspec]) == "fail"
@@ -374,8 +375,8 @@ def update_tpl( basedir, section, problem, failures, logdict
bin_pkg = get_pkg(pkgspec)
try:
src_pkg = source_pkg(pkgspec, pkgsdb)
- rdep_cnt = pkgsdb.get_package(bin_pkg).rrdep_count()
- except KeyError:
+ rdep_cnt = pkgsdb.rrdep_count(pkgsdb.get_package(bin_pkg))
+ except (KeyError, AttributeError):
src_pkg = bin_pkg
rdep_cnt = 0
@@ -428,8 +429,8 @@ def update_html( section, logdict, problem_list, failures, c
def keyfunc( x, pkgsdb=pkgsdb, logdict=logdict):
try:
pkg_name = get_pkg(x.pkgspec)
- rdeps = pkgsdb.get_package(pkg_name).rrdep_count()
- except KeyError:
+ rdeps = pkgsdb.rrdep_count(pkgsdb.get_package(pkg_name))
+ except (KeyError, AttributeError):
rdeps = 0
is_failed = get_where(logdict[x.pkgspec]) == "fail"
@@ -469,13 +470,11 @@ def process_section( section, config, problem_list,
add_cnt = make_kprs( logdict, kprdict, problem_list )
if not pkgsdb:
- oldcwd = os.getcwd()
- os.chdir(config['master-directory'])
-
distro_config = piupartslib.conf.DistroConfig(
DISTRO_CONFIG_FILE, section_config["mirror"])
- pkgsdb = piupartslib.packagesdb.PackagesDB(prefix=section)
+ sectiondir = os.path.join(config['master-directory'], section)
+ pkgsdb = piupartslib.packagesdb.PackagesDB(prefix=sectiondir)
pkgs_url = distro_config.get_packages_url(
section_config.get_distro(),
@@ -485,11 +484,6 @@ def process_section( section, config, problem_list,
pkgsdb.read_packages_file(pkg_fl)
pkg_fl.close()
- pkgsdb.compute_package_states()
- pkgsdb.calc_rrdep_counts()
-
- os.chdir(oldcwd)
-
failures = FailureManager( logdict )
failures.sort_by_bugged_and_rdeps(pkgsdb)
diff --git a/piuparts-report.py b/piuparts-report.py
index 996ca2a..48aaa6a 100644
--- a/piuparts-report.py
+++ b/piuparts-report.py
@@ -593,15 +593,10 @@ class Section:
self._doc_root = doc_root
logging.debug("Loading and parsing Packages file")
- oldcwd = os.getcwd()
- os.chdir(master_directory)
self._packagedb_cache = packagedb_cache
self._package_databases = {}
- self._load_package_database(section)
+ self._load_package_database(section, master_directory)
self._binary_db = self._package_databases[section]
- self._binary_db.compute_package_states()
- self._binary_db.calc_rrdep_counts()
- os.chdir(oldcwd)
sources_url = self._distro_config.get_sources_url(
self._config.get_distro(), self._config.get_area())
@@ -613,7 +608,7 @@ class Section:
self._log_name_cache = {}
- def _load_package_database(self, section):
+ def _load_package_database(self, section, master_directory):
if section in self._package_databases:
return
elif section in self._packagedb_cache:
@@ -626,12 +621,13 @@ class Section:
# this is a base database eligible for caching
# only cache the most recent base database
self._packagedb_cache.clear()
- db = piupartslib.packagesdb.PackagesDB(prefix=section)
+ sectiondir = os.path.join(master_directory, section)
+ db = piupartslib.packagesdb.PackagesDB(prefix=sectiondir)
self._package_databases[section] = db
if config["depends-sections"]:
deps = config["depends-sections"].split()
for dep in deps:
- self._load_package_database(dep)
+ self._load_package_database(dep, master_directory)
db.set_dependency_databases([self._package_databases[dep] for dep i
else:
# only cache the big base databases that don't have additional depe
@@ -1217,31 +1213,27 @@ class Section:
def write_state_pages(self):
for state in self._binary_db.get_active_states():
logging.debug("Writing page for %s" % state)
- with_counts = False
- aside = ""
vlist = ""
if state in self._binary_db.get_error_states():
with_counts = True
aside = " (reverse deps, blocked pkgs)"
-
- def cmp_func(a, b):
- """Sort by block count first"""
- rrdep_cmp = cmp( a.block_count(), b.block_count())
- if rrdep_cmp != 0:
- return -rrdep_cmp
- else:
- return cmp( a["Package"], b["Package"] )
+ sort_key = lambda x: (-self._binary_db.block_count(x),x["Packag
+ else:
+ with_counts = False
+ aside = ""
+ sort_key = lambda x: x["Package"]
names = self._binary_db.get_pkg_names_in_state(state)
packages = [self._binary_db.get_package(name) for name in names]
- packages.sort( cmp_func )
+ packages.sort( key = sort_key )
for package in packages:
vlist += "<li id=\"%s\">%s" % (
package["Package"],
self.link_to_source_summary(package["P
if with_counts:
- vlist += " (%d, %d)" % (package.rrdep_count(), package.bloc
+ vlist += " (%d, %d)" % (self._binary_db.rrdep_count(package
+ self._binary_db.block_count(package
vlist += " (%s)" % html_protect(package["Maintainer"])
all_deps = package.all_dependencies()
if all_deps:
diff --git a/piupartslib/packagesdb.py b/piupartslib/packagesdb.py
index 13e0dfb..d3aa1da 100644
--- a/piupartslib/packagesdb.py
+++ b/piupartslib/packagesdb.py
@@ -62,9 +62,10 @@ class Package(UserDict.UserDict):
self[name.strip()] = value.strip()
self._parsed_deps = {}
self._parsed_alt_deps = {}
- self._rrdep_count = None
- self._block_count = None
- self._waiting_count = None
+ self.rrdep_cnt = None
+ self.block_cnt = None
+ self.waiting_cnt = None
+ self.rdep_chain_len = None
def _parse_dependencies(self, header_name):
if header_name in self._parsed_deps:
@@ -123,33 +124,6 @@ class Package(UserDict.UserDict):
"""Are we testable at all? Required aren't."""
return self.get("Priority", "") != "required"
- def rrdep_count(self):
- """Get the recursive dependency count, if it has been calculated"""
- if self._rrdep_count == None:
- raise Exception('Reverse dependency count has not been calculated')
- return(self._rrdep_count)
-
- def set_rrdep_count(self, val):
- self._rrdep_count = val
-
- def block_count(self):
- """Get the number of packages blocked by this package"""
- if self._block_count == None:
- raise Exception('Block count has not been calculated')
- return(self._block_count)
-
- def set_block_count(self, val):
- self._block_count = val
-
- def waiting_count(self):
- """Get the number of packages waiting for this package"""
- if self._waiting_count == None:
- raise Exception('Waiting count has not been calculated')
- return(self._waiting_count)
-
- def set_waiting_count(self, val):
- self._waiting_count = val
-
def dump(self, output_file):
output_file.write("".join(self.headers))
@@ -313,6 +287,7 @@ class PackagesDB:
self._dependency_databases = []
self._recycle_mode = False
self._candidates_for_testing = None
+ self._rdeps = None
self.set_subdirs(ok="pass", fail="fail", evil="untestable",
reserved="reserved", morefail=["bugged", "affected"],
recycle="recycle")
@@ -682,8 +657,8 @@ class PackagesDB:
if not self._logdb.log_exists(p, [self._reserved]) or \
self._logdb.log_exists(p, [self._recycle])]
if len(self._candidates_for_testing) > 1:
- self.calc_rrdep_counts()
- tuples = [(p.waiting_count(), random.random(), p)
+ tuples = [(self.waiting_count(p) * self.rdep_chain_len(p),
+ random.random(), p)
for p in self._candidates_for_testing]
self._candidates_for_testing = [x[2]
for x in sorted(tuples, reverse = True)]
@@ -749,59 +724,92 @@ class PackagesDB:
else:
raise LogfileExists(self._evil, package, version)
- def calc_rrdep_counts(self):
- """Calculate recursive reverse dependency counts for Packages"""
+ def _get_rdep_dict(self):
+ """Return dict of one-level reverse dependencies by package"""
+
+ if self._rdeps is None:
+
+ self._rdeps = {}
- self._find_all_packages() # populate _packages
+ for pkg in self.get_all_package_names():
+ pkg_obj = self.get_package(pkg)
+
+ for dep in pkg_obj.dependencies():
+ dep_pkg = self.get_package(dep, resolve_virtual=True)
+
+ if dep_pkg is not None:
+ dep = dep_pkg["Package"]
+
+ if not dep in self._rdeps:
+ self._rdeps[dep] = set()
+ self._rdeps[dep].add(pkg)
+
+ return( self._rdeps )
+
+ def _calc_rrdep_pkg_counts(self, pkg):
+
+ pkg_name = pkg['Package']
self._compute_package_states() # populate _package_state
+
+ # calc full recursive reverse dependency package set
+ rrdep_set = set()
+ rdeps = self._get_rdep_dict()
+ next_level = set([pkg_name])
+ chain_len = 0
+
+ while next_level:
+ chain_len += 1
+ rrdep_set |= next_level
+ new_pkgs = next_level
+ next_level = set([y for x in new_pkgs if x in rdeps for y in rdeps[
+ next_level -= rrdep_set
+
+ rrdep_set.remove(pkg_name)
+
+ # calculate and set the metrics
+ pkg.rrdep_cnt = len(rrdep_set)
+
error_states = self.get_error_states()
+ if self._package_state[pkg_name] in error_states:
+ block_list = [x for x in rrdep_set
+ if self._package_state[x] in error_states]
+ pkg.block_cnt = len(block_list)
+ else:
+ pkg.block_cnt = 0
+
waiting_states = self.get_waiting_states()
+ if self._package_state[pkg_name] in waiting_states:
+ waiting_list = [x for x in rrdep_set
+ if self._package_state[x] in waiting_states]
+ pkg.waiting_cnt = len(waiting_list)
+ else:
+ pkg.waiting_cnt = 0
- # create a reverse dependency dictionary.
- # entries consist of a one-level list of reverse dependency package nam
- # by package name
- rdeps = {}
- for pkg_name in self._packages.keys():
- # use the Packages dependencies() method for a conservative count
- for dep in self._packages[pkg_name].dependencies():
- if dep in rdeps:
- rdeps[dep].append( pkg_name )
- else:
- rdeps[dep] = [pkg_name]
-
- def recurse_rdeps( pkg_name, rdeps, rrdep_dict ):
- """ Recurse through the reverse dep arrays to determine the recursi
- dependency count for a package. rrdep_dict.keys() contains the
- accumulation of rdeps encountered"""
-
- # are there any rdeps for this package?
- if pkg_name in rdeps:
- for rdep in rdeps[pkg_name]:
- # break circular dependency loops
- if not rdep in rrdep_dict:
- rrdep_dict[rdep] = 1
- rrdep_dict = recurse_rdeps( rdep, rdeps, rrdep_dict )
-
- return rrdep_dict
-
- # calculate all of the rrdeps and block counts
- for pkg_name in self._packages.keys():
- rrdep_list = recurse_rdeps( pkg_name, rdeps, {} ).keys()
- self._packages[pkg_name].set_rrdep_count( len(rrdep_list) )
-
- if self._package_state[pkg_name] in error_states:
- block_list = [x for x in rrdep_list
- if self._package_state[x] in error_states]
- else:
- block_list = []
- self._packages[pkg_name].set_block_count( len(block_list) )
+ pkg.rdep_chain_len = chain_len
- if self._package_state[pkg_name] in waiting_states:
- waiting_list = [x for x in rrdep_list
- if self._package_state[x] in waiting_states]
- else:
- waiting_list = []
- self._packages[pkg_name].set_waiting_count(len(waiting_list))
+ def block_count(self, pkg):
+ if pkg.block_cnt is None:
+ self._calc_rrdep_pkg_counts(pkg)
+
+ return pkg.block_cnt
+
+ def rrdep_count(self, pkg):
+ if pkg.rrdep_cnt is None:
+ self._calc_rrdep_pkg_counts(pkg)
+
+ return pkg.rrdep_cnt
+
+ def waiting_count(self, pkg):
+ if pkg.waiting_cnt is None:
+ self._calc_rrdep_pkg_counts(pkg)
+
+ return pkg.waiting_cnt
+
+ def rdep_chain_len(self, pkg):
+ if pkg.rdep_chain_len is None:
+ self.calc_rrdep_pkg_counts(pkg)
+
+ return pkg.rdep_chain_len
# vi:set et ts=4 sw=4 :
--
1.7.10.4
(END)
More information about the Piuparts-devel
mailing list