[Qa-jenkins-scm] [Git][qa/jenkins.debian.net][master] reproducible Archlinux scheduler: refactoring for huge speed gain

Holger Levsen gitlab at salsa.debian.org
Sat Sep 22 23:32:29 BST 2018


Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net


Commits:
26377e6b by Holger Levsen at 2018-09-22T22:31:38Z
reproducible Archlinux scheduler: refactoring for huge speed gain

Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -


2 changed files:

- TODO
- bin/reproducible_archlinux_scheduler.sh


Changes:

=====================================
TODO
=====================================
@@ -309,8 +309,6 @@ See link:https://jenkins.debian.net/userContent/about.html["about jenkins.debian
 
 ==== reproducible Arch Linux
 
-* use pacman's error code, see FIXME in _html_.sh
-
 * setup_archlinux_schroot job:
 ** needs to be made idempotent (currently it removes the schroot at the beginning of the job, instead of creating it elsewhere and replacing it on success at the job end…)
 ** use schroot tarballs (gzipped), moves are atomic then
@@ -320,21 +318,17 @@ See link:https://jenkins.debian.net/userContent/about.html["about jenkins.debian
 ** check for archlinux schroot sessions which should not be there and delete them. complain if that fails.
 
 * use db
-** problem: we currently have more (detailed) stati in archlinux
 ** extend scheduler.sh:
-*** stop checking making 8000 queries instead of 1...
-*** comparing versions is also unneeded: if its not empty it must be higher (due to repo constraints)
-*** actually schedule old packages once queue is empty (except blacklisted packages)
+*** comparing versions is probably needed: if its not empty it must be higher (due to repo constraints), but it can be even higher than in the repo, because we build trunk
 **** at first reschedule packages which never have been build (according to the db)
-**** once there are no more left, reschedule based on last build_date
 *** also delete unknown packages from db: sources and schedule, later results as well
 *** check/make sure that packages which are newer in trunk than repo are only scheduled once
+*** fix irc notification
 ** make build.sh
 *** write db
-**** write temp script to populate db with known date: pkg.state, pkg.build_date. stop builders and html job when running this
-**** save state details on fs until we know better / the answer to the problem above
+**** write temp script to populate results table with known data: pkg.state, pkg.build_date. stop builders and html job when running this
 *** do not share /var/log/jenkins/reproducible-race-conditions.log with debian
-*** make build respect pacman exit code
+*** make build respect pacman exit code, see FIXME in _html_.sh
 ** html
 *** disable all current html creation
 *** leave all files, delete them (much) later


=====================================
bin/reproducible_archlinux_scheduler.sh
=====================================
@@ -20,7 +20,7 @@ update_archlinux_repositories() {
 	#
 	UPDATED=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
 	NEW=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
-	OLDER=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
+	KNOWN=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
 	local SESSION="archlinux-scheduler-$RANDOM"
 	schroot --begin-session --session-name=$SESSION -c jenkins-reproducible-archlinux
 	schroot --run-session -c $SESSION --directory /var/tmp -- sudo pacman -Syu --noconfirm
@@ -72,21 +72,19 @@ update_archlinux_repositories() {
 	#
 	# schedule packages
 	#
+	query_db "select suite, name, version FROM sources WHERE architecture='$ARCH';" > $KNOWN
+
 	for REPO in $ARCHLINUX_REPOS ; do
 		TMPPKGLIST=$(mktemp -t archlinuxrb-scheduler-XXXXXXXX)
 		echo "$(date -u ) - updating list of available packages in repository '$REPO'."
 		DATE="$(date -u +'%Y-%m-%d %H:%M')"
 		grep "^$REPO" "$ARCHLINUX_PKGS"_full_pkgbase_list | \
 			while read repo pkgbase version; do
-				#
-				# db based scheduler
-				#
 				PKG=$pkgbase
 				SUITE="archlinux_$repo"
-				ARCH="x86_64"
-				# FIXME: doing the next line 8000 times is grossly inefficient and should be replaced by one single query
-				VERSION=$(query_db "SELECT version FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';" || query_db "SELECT version FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
-				if [ -z "$VERSION" ] ; then
+				PKG_IN_DB=$(grep "^archlinux_$repo|$pkgbase|" $KNOWN | head -1) # FIXME: why oh why is head -1 needed here?
+				VERSION=$(echo ${PKG_IN_DB} | cut -d "|" -f3)
+			        if [ -z "${PKG_IN_DB}" ] ; then
 					# new package, add to db and schedule
 					echo "new package found: $repo/$pkgbase $version "
 					query_db "INSERT into sources (name, version, suite, architecture) VALUES ('$PKG', '$version', '$SUITE', '$ARCH');"
@@ -98,37 +96,38 @@ update_archlinux_repositories() {
 						# known package with new version, so update db and schedule
 						echo $REPO/$pkgbase >> $UPDATED
 						echo "$REPO/$pkgbase $VERSION is known in the database, but repo has $version which is newer, so rescheduling... "
-						echo " UPDATE sources SET version = '$version' WHERE name = '$PKG' AND suite = '$SUITE' AND architecture='$ARCH';"
 						query_db "UPDATE sources SET version = '$version' WHERE name = '$PKG' AND suite = '$SUITE' AND architecture='$ARCH';"
 						if [ -z $(echo $PKG | egrep -v "$BLACKLIST") ] ; then
 							echo "$PKG is blacklisted, so not scheduling it."
 						else
 							PKGID=$(query_db "SELECT id FROM sources WHERE name='$PKG' AND suite='$SUITE' AND architecture='$ARCH';")
-							echo " INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');"
-							query_db "INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');"
+							echo " SELECT FROM schedule WHERE package_id = '$PKGID';"
+							SCHEDULED=$(query_db "SELECT FROM schedule WHERE package_id = '$PKGID';")
+							if [ -z "$SCHEDULED" ] ; then
+								echo " INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');"
+								query_db "INSERT INTO schedule (package_id, date_scheduled) VALUES ('$PKGID', '$DATE');" ||true
+							else
+								" $PKG (package_id: $PKG_ID) already scheduled, not scheduling again."
+							fi
 						fi
 					elif [ "$VERCMP" = "-1" ] ; then
 						# our version is higher than what's in the repo because we build trunk
 						echo "$REPO/$pkgbase $VERSION in db is higher than $version in repo because we build trunk."
-						echo "$REPO/$pkgbase $VERSION > $version" >> $OLDER
 					else
 						echo " Boom boom boom boom boom."
-						echo " This should never happen: we know about $pkgbase $VERSION, but repo has $version. \$VERCMP=$VERCMP"
+						echo " This should never happen: we know about $pkgbase with $VERSION, but repo has $version. VERCMP=$VERCMP"
+						echo " PKG_IN_DB=${PKG_IN_DB}"
 					fi
 				fi
 
 				printf '%s %s\n' "$pkgbase" "$version" >> $TMPPKGLIST
 			done
 		mv $TMPPKGLIST "$ARCHLINUX_PKGS"_"$REPO"
-		#FIXME: echo "$(date -u ) - $(cat ${ARCHLINUX_PKGS}_$REPO | wc -l) packages in repository '$REPO' are known to us."
-		new=$(grep -c ^$REPO $NEW || true)
-		updated=$(grep -c ^$REPO $UPDATED || true)
+		#new=$(grep -c ^$REPO $NEW || true)
+		#updated=$(grep -c ^$REPO $UPDATED || true)
 		#FIXME echo "$(date -u ) - scheduled $new/$updated packages in repository '$REPO'."
 	done
 	schroot --end-session -c $SESSION
-	echo "$(date -u) - the following packages are known to us with higher versions than the repo because we build trunk:"
-	cat $OLDER
-	echo
 
 	#
 	# schedule up to $MAX packages we already know about
@@ -161,15 +160,16 @@ update_archlinux_repositories() {
 			old=", plus$old"
 		fi
 		MESSAGE="${message}$old, for $total scheduled out of $TOTAL."
-		irc_message archlinux-reproducible "$MESSAGE"
-		echo "$(date -u ) - $MESSAGE"
-	else
-		echo "$(date -u ) - didn't schedule any packages."
+		#FIXME irc_message archlinux-reproducible "$MESSAGE"
+		#echo "$(date -u ) - $MESSAGE"
+	#else
+		#echo "$(date -u ) - didn't schedule any packages."
 	fi
-	rm $NEW $UPDATED > /dev/null
+	rm $NEW $UPDATED $KNOWN > /dev/null
 	echo "$(date -u) - Done updating Arch Linux repositories, currently $TOTAL packages known."
 }
 
+ARCH="x86_64"
 update_archlinux_repositories
 
 # vim: set sw=0 noet :



View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/commit/26377e6b234fa5f927d952050e1b9aa83dd6b7ea

-- 
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/commit/26377e6b234fa5f927d952050e1b9aa83dd6b7ea
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20180922/9af9619a/attachment-0001.html>


More information about the Qa-jenkins-scm mailing list