[Qa-jenkins-scm] [Git][qa/jenkins.debian.net][master] reproducible: automatically mark nodes with health problems for more than 4h as offline

Holger Levsen gitlab at salsa.debian.org
Sat Sep 22 15:33:16 BST 2018


Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net


Commits:
0c85f950 by Holger Levsen at 2018-09-22T14:31:53Z
reproducible: automatically mark nodes with health problems for more than 4h as offline

Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -


1 changed file:

- bin/reproducible_maintenance.sh


Changes:

=====================================
bin/reproducible_maintenance.sh
=====================================
@@ -111,6 +111,52 @@ if [ $? -ne 0 ] ; then
 	exit 1
 fi
 
+if [ "$HOSTNAME" = "$MAINNODE" ] ; then
+	#
+	# find nodes with problems and temporarily turn them offline
+	#
+	echo "$(date -u) - Looking for unhealthy nodes."
+	cd ~/jobs
+	for i in reproducible_node_health_check_* ; do
+		NODE_ALIAS=$(echo $i | cut -d '_' -f6)
+		NODE_ARCH=$(echo $i | cut -d '_' -f5)
+		case $NODE_ARCH in
+			amd64)	NODE="profitbricks-build${NODE_ALIAS#profitbricks}-amd64.debian.net" ;;
+			i386)	NODE="profitbricks-build${NODE_ALIAS#profitbricks}-i386.debian.net" ;;
+			arm64)	NODE="codethink-sled${NODE_ALIAS#codethink}-arm64.debian.net" ;;
+			armhf)	NODE="${NODE_ALIAS}-armhf-rb.debian.net" ;;
+		esac
+		if [ "$NODE" == "jenkins" ] ; then
+			echo 'Skipping jenkins...'
+			continue
+		fi
+		cd $i/builds
+		LAST=$(ls -rt1 | tail -1)
+		GOOD=$(basename $(readlink -f lastStableBuild))
+		if [ "$LAST" == "$GOOD" ] ; then
+			DIFF=0
+		else
+			let DIFF=$LAST-$GOOD || DIFF=-1
+		fi
+		if [ $DIFF -eq -1 ] ; then
+			echo "Problems analysing $i build logs, ignoring $NODE."
+		elif [ $DIFF -gt 16 ] ; then
+			echo -n "$i jobs has issues since more than 4h"
+			if grep -q $NODE ~/offline_nodes >/dev/null 2>&1 ; then
+				echo " and $NODE already marked as offline, good."
+			else
+				echo $NODE >> ~/offline_nodes
+				echo " so $NODE has (temporarily) been marked as offline now."
+				irc_message reproducible-builds "$NODE has health problems and has temporarily been marked as offline. To make this permanent, edit jenkins-home/offline_nodes in git."
+
+			fi
+		else
+			echo "$NODE is doing fine, good."
+		fi
+		cd ../..
+	done
+fi
+
 echo "$(date -u) - updating the schroots and pbuilder now..."
 # use host architecture (only)
 ARCH=$(dpkg --print-architecture)



View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/commit/0c85f9507e10c22256b2bb86048d87316cbe20ea

-- 
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/commit/0c85f9507e10c22256b2bb86048d87316cbe20ea
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20180922/a939e3f6/attachment-0001.html>


More information about the Qa-jenkins-scm mailing list