[Qa-jenkins-scm] [Git][qa/jenkins.debian.net][master] reproducible Archlinux scheduler: refactoring for huge speed gain
Holger Levsen
gitlab at salsa.debian.org
Sat Sep 22 23:32:29 BST 2018
Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net
Commits:
26377e6b by Holger Levsen at 2018-09-22T22:31:38Z
reproducible Archlinux scheduler: refactoring for huge speed gain
Signed-off-by: Holger Levsen <holger at layer-acht.org>
- - - - -
2 changed files:
- TODO
- bin/reproducible_archlinux_scheduler.sh
Changes:
=====================================
TODO
=====================================
@@ -309,8 +309,6 @@ See link:https://jenkins.debian.net/userContent/about.html["about jenkins.debian
==== reproducible Arch Linux
-* use pacman's error code, see FIXME in _html_.sh
-
* setup_archlinux_schroot job:
** needs to be made idempotent (currently it removes the schroot at the beginning of the job, instead of creating it elsewhere and replacing it on success at the job end…)
** use schroot tarballs (gzipped), moves are atomic then
@@ -320,21 +318,17 @@ See link:https://jenkins.debian.net/userContent/about.html["about jenkins.debian
** check for archlinux schroot sessions which should not be there and delete them. complain if that fails.
* use db
-** problem: we currently have more (detailed) stati in archlinux
** extend scheduler.sh:
-*** stop checking making 8000 queries instead of 1...
-*** comparing versions is also unneeded: if its not empty it must be higher (due to repo constraints)
-*** actually schedule old packages once queue is empty (except blacklisted packages)
+*** comparing versions is probably needed: if its not empty it must be higher (due to repo constraints), but it can be even higher than in the repo, because we build trunk
**** at first reschedule packages which never have been build (according to the db)
-**** once there are no more left, reschedule based on last build_date
*** also delete unknown packages from db: sources and schedule, later results as well
*** check/make sure that packages which are newer in trunk than repo are only scheduled once
+*** fix irc notification
** make build.sh
*** write db
-**** write temp script to populate db with known date: pkg.state, pkg.build_date. stop builders and html job when running this
-**** save state details on fs until we know better / the answer to the problem above
+**** write temp script to populate results table with known data: pkg.state, pkg.build_date. stop builders and html job when running this
*** do not share /var/log/jenkins/reproducible-race-conditions.log with debian
-*** make build respect pacman exit code
+*** make build respect pacman exit code, see FIXME in _html_.sh
** html
*** disable all current html creation
*** leave all files, delete them (much) later
=====================================
bin/reproducible_archlinux_scheduler.sh
=====================================
@@ -20,7 +20,7 @@ update_archlinux_repositories() {
#
UPDATED=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
NEW=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
- OLDER=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
+ KNOWN=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
local SESSION="archlinux-scheduler-$RANDOM"
schroot --begin-session --session-name=$SESSION -c jenkins-reproducible-archlinux
schroot --run-session -c $SESSION --directory /var/tmp -- sudo pacman -Syu --noconfirm
@@ -72,21 +72,19 @@ update_archlinux_repositories() {
#
# schedule packages
#
+ query_db "select suite, name, version FROM sources WHERE architecture='$ARCH';" > $KNOWN
+
for REPO in $ARCHLINUX_REPOS ; do
TMPPKGLIST=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
echo "$(date -u ) - updating list of available packages in repository '$REPO'."
DATE="$(date -u +'%Y-%m-%d %H:%M')"
grep "^$REPO" "$ARCHLINUX_PKGS"_full_pkgbase_list | \
while read repo pkgbase version; do
- #
- # db based scheduler
- #
PKG=$pkgbase
SUITE="archlinux_$repo"
- ARCH="x86_64"
- # FIXME: doing the next line 8000 times is grossly inefficient and should be replaced by one single query
- VERSION=$(query_db "SELECT version FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';" || query_db "SELECT version FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
- if [ -z "$VERSION" ] ; then
+ PKG_IN_DB=$(grep "^archlinux_$repo|$pkgbase|" $KNOWN | head -1) # FIXME: why oh why is head -1 needed here?
+ VERSION=$(echo ${PKG_IN_DB} | cut -d "|" -f3)
+ if [ -z "${PKG_IN_DB}" ] ; then
# new package, add to db and schedule
echo "new package found: $repo/$pkgbase $version "
query_db "INSERT into sources (name, version, suite, architecture) VALUES ('$PKG', '$version', '$SUITE', '$ARCH');"
@@ -98,37 +96,38 @@ update_archlinux_repositories() {
# known package with new version, so update db and schedule
echo $REPO/$pkgbase >> $UPDATED
echo "$REPO/$pkgbase $VERSION is known in the database, but repo has $version which is newer, so rescheduling... "
- echo " UPDATE sources SET version = '$version' WHERE name = '$PKG' AND suite = '$SUITE' AND architecture='$ARCH';"
query_db "UPDATE sources SET version = '$version' WHERE name = '$PKG' AND suite = '$SUITE' AND architecture='$ARCH';"
if [ -z $(echo $PKG | egrep -v "$BLACKLIST") ] ; then
echo "$PKG is blacklisted, so not scheduling it."
else
PKGID=$(query_db "SELECT id FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
- echo " INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');"
- query_db "INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');"
+ echo " SELECT FROM schedule WHERE package_id = '$PKGID';"
+ SCHEDULED=$(query_db "SELECT FROM schedule WHERE package_id = '$PKGID';")
+ if [ -z "$SCHEDULED" ] ; then
+ echo " INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');"
+ query_db "INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');" ||true
+ else
+ " $PKG (package_id: $PKG_ID) already scheduled, not scheduling again."
+ fi
fi
elif [ "$VERCMP" = "-1" ] ; then
# our version is higher than what's in the repo because we build trunk
echo "$REPO/$pkgbase $VERSION in db is higher than $version in repo because we build trunk."
- echo "$REPO/$pkgbase $VERSION > $version" >> $OLDER
else
echo " Boom boom boom boom boom."
- echo " This should never happen: we know about $pkgbase $VERSION, but repo has $version. \$VERCMP=$VERCMP"
+ echo " This should never happen: we know about $pkgbase with $VERSION, but repo has $version. VERCMP=$VERCMP"
+ echo " PKG_IN_DB=${PKG_IN_DB}"
fi
fi
printf '%s %s\n' "$pkgbase" "$version" >> $TMPPKGLIST
done
mv $TMPPKGLIST "$ARCHLINUX_PKGS"_"$REPO"
- #FIXME: echo "$(date -u ) - $(cat ${ARCHLINUX_PKGS}_$REPO | wc -l) packages in repository '$REPO' are known to us."
- new=$(grep -c ^$REPO $NEW || true)
- updated=$(grep -c ^$REPO $UPDATED || true)
+ #new=$(grep -c ^$REPO $NEW || true)
+ #updated=$(grep -c ^$REPO $UPDATED || true)
#FIXME echo "$(date -u ) - scheduled $new/$updated packages in repository '$REPO'."
done
schroot --end-session -c $SESSION
- echo "$(date -u) - the following packages are known to us with higher versions than the repo because we build trunk:"
- cat $OLDER
- echo
#
# schedule up to $MAX packages we already know about
@@ -161,15 +160,16 @@ update_archlinux_repositories() {
old=", plus$old"
fi
MESSAGE="${message}$old, for $total scheduled out of $TOTAL."
- irc_message archlinux-reproducible "$MESSAGE"
- echo "$(date -u ) - $MESSAGE"
- else
- echo "$(date -u ) - didn't schedule any packages."
+ #FIXME irc_message archlinux-reproducible "$MESSAGE"
+ #echo "$(date -u ) - $MESSAGE"
+ #else
+ #echo "$(date -u ) - didn't schedule any packages."
fi
- rm $NEW $UPDATED > /dev/null
+ rm $NEW $UPDATED $KNOWN > /dev/null
echo "$(date -u) - Done updating Arch Linux repositories, currently $TOTAL packages known."
}
+ARCH="x86_64"
update_archlinux_repositories
# vim: set sw=0 noet :
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/commit/26377e6b234fa5f927d952050e1b9aa83dd6b7ea
--
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/commit/26377e6b234fa5f927d952050e1b9aa83dd6b7ea
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20180922/9af9619a/attachment-0001.html>
More information about the Qa-jenkins-scm
mailing list