[Qa-jenkins-scm] [Git][qa/jenkins.debian.net][master] 2 commits: reproducible trbo system health check: clarify wording, less is more

Holger Levsen gitlab at salsa.debian.org
Wed Sep 2 20:04:18 BST 2020



Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net


Commits:
6e6bae6a by Holger Levsen at 2020-09-02T21:03:38+02:00
reproducible trbo system health check: clarify wording, less is more

Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -
d099a852 by Holger Levsen at 2020-09-02T21:04:07+02:00
reproducible trbo system health check: color highlight important bad conditions

Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -


2 changed files:

- bin/jenkins-shell-monitor.sh
- bin/reproducible_system_health.sh


Changes:

=====================================
bin/jenkins-shell-monitor.sh
=====================================
@@ -30,6 +30,11 @@ main_loop() {
 	ps fax > $PSFAX
 	LSOF=$(lsof -n | wc -l)
 	SCHROOT_SESSIONS=$(find /var/lib/schroot/session/ | wc -l)
+	if [ $SCHROOT_SESSIONS -gt 30000 ] ; then
+		SCHROOT_SESSIONS="\033[91m$SCHROOT_SESSIONS\033[0m"
+	elif [ $SCHROOT_SESSIONS -gt 15000 ] ; then
+		SCHROOT_SESSIONS="\033[93m$SCHROOT_SESSIONS\033[0m"
+	fi
 	SCHROOT_MOUNTS=$(mount | grep /run/schroot| wc -l)
 	REPRO_JOBS=$(ls ~jenkins/jobs/reproducible_* -1d | wc -l)
 	JOBS_RUNNING_TOTAL=$(grep '_ /bin/bash /srv/jenkins/bin/' $PSFAX | egrep -v 'reproducible_worker.sh|reproducible_build.sh|jenkins-shell-monitor.sh' | wc -l)
@@ -58,12 +63,17 @@ main_loop() {
 		echo "logged in user sessions:                      $(uptime | rev |cut -d ',' -f1-4 | rev | cut -d ',' -f1 | sed "s#users##" | xargs echo)"
 		echo "logged in users:                              $(w -h | awk '{print $1}' | sort -u | xargs echo)"
 		echo "number of open files:                         $LSOF"
-		echo "schroot: (sessions / mounts)                  $SCHROOT_SESSIONS / $SCHROOT_MOUNTS"
+		echo -e "schroot: (sessions / mounts)                  $SCHROOT_SESSIONS / $SCHROOT_MOUNTS"
 		echo "configured r-b jobs:                          $REPRO_JOBS"
 		echo "running jenkins jobs: (total/local/remote)    $JOBS_RUNNING_TOTAL / $JOBS_RUNNING_LOCAL / $JOBS_RUNNING_REMOTE"
 		echo "running debian r-b workers:                   $REPRO_WORKERS"
 		echo "running jenkings agents/nodes:                $JENKINS_AGENTS"
-		echo "nodes: (total/auto-offline/offline in git)    $NODES_TOTAL / $(grep -v ^# ~jenkins/offline_nodes | grep -c debian) / $(grep -v ^# ~jenkins-adm/jenkins.debian.net/jenkins-home/offline_nodes | grep -c debian)"
+		NODES_OFFLINE="$(grep -v ^# ~jenkins/offline_nodes | grep -c debian)"
+		NODES_GIT_OFFLINE="$(grep -v ^# ~jenkins-adm/jenkins.debian.net/jenkins-home/offline_nodes | grep -c debian)"
+		if [ "$NODES_OFFLINE" != "$NODES_GITOFFLINE" ] ; then
+			NODES_OFFLINE="\033[93m$NODES_OFFLINE\033[0m"
+		fi
+		echo -e "nodes: (total/auto-offline/offline in git)    $NODES_TOTAL / $NODES_OFFLINE / $NODES_GITOFFLINE"
 		echo
 		df -h  /dev/vda1 /var/lib/jenkins/userContent/reproducible /srv/workspace /tmp | awk '{printf "%-45s %-6s %-6s %s\n", $6, $2, $4, $5}'
 		free -h | cut -b1-47


=====================================
bin/reproducible_system_health.sh
=====================================
@@ -363,7 +363,7 @@ conditional_paragraph ${FAILED_SUSPECTS} "Failed jobs on nodes automatically mar
 conditional_paragraph ${UNSTABLE_SUSPECTS} "Unstable jobs on nodes automatically marked down by jenkins (with modifier > 1)"
 write2healthfile "<hr><p>A stable jobs adds 3 to the score, an unstable job adds 1 and a failed job substracts something between 1 and 500 (indicated in brackets after the job name if not equal 1), depending on the importance of the job for the setup. Ignored jobs are not counted at all. If the final score is below zero it will be set to zero. Finally status is calculated by diving the score by three times the number of considered jobs and this gets multiplied with 255 to get a status between 0 and 255.</p><hr>"
 conditional_paragraph ${IGNORED_JOBS} "Ignored jobs, because the nodes these are running on are <a href=\"https://salsa.debian.org/qa/jenkins.debian.net/-/blob/master/jenkins-home/offline_nodes\">documented</a> to be offline"
-conditional_paragraph ${KNOWN_BAD_JOBS} "Ignored jobs, because they are known to be disabled as non-funtional"
+conditional_paragraph ${KNOWN_BAD_JOBS} "Ignored jobs, because they are known to be disabled"
 write2healthfile "<hr><p><small>This page was last updated on $(date -u) by the <a href=\"https://jenkins.debian.net/job/reproducible_system_health/\">reproducible_system_health</a> job.</small></p>"
 write2healthfile "</body></html>"
 echo "$(date -u) - $(basename $HEALTH_FILE).html updated, visible at $REPRODUCIBLE_URL/$(basename $HEALTH_FILE).html."



View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/compare/13db97547efcd6b0001afd6ebaa0ca77ec8232d5...d099a852bd994f1ff7e34dae6176f25f46d8ed13

-- 
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/compare/13db97547efcd6b0001afd6ebaa0ca77ec8232d5...d099a852bd994f1ff7e34dae6176f25f46d8ed13
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20200902/92df390f/attachment-0001.html>


More information about the Qa-jenkins-scm mailing list