[Qa-jenkins-scm] [Git][qa/jenkins.debian.net][master] reproducible trbo system health check: deal with zipped job logfiles
Holger Levsen
gitlab at salsa.debian.org
Sun Jul 26 18:02:40 BST 2020
Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net
Commits:
ced06afd by Holger Levsen at 2020-07-26T19:02:30+02:00
reproducible trbo system health check: deal with zipped job logfiles
Signed-off-by: Holger Levsen <holger at layer-acht.org>
- - - - -
1 changed file:
- bin/reproducible_system_health.sh
Changes:
=====================================
bin/reproducible_system_health.sh
=====================================
@@ -35,11 +35,22 @@ IGNORED_JOBS=$(mktemp --tmpdir=$TMPDIR trbo-status-XXXXXXX)
BAD_JOBS=$(mktemp --tmpdir=$TMPDIR trbo-status-XXXXXXX)
FAILED_SUSPECTS=$(mktemp --tmpdir=$TMPDIR trbo-status-XXXXXXX)
UNSTABLE_SUSPECTS=$(mktemp --tmpdir=$TMPDIR trbo-status-XXXXXXX)
+LOG=$(mktemp --tmpdir=$TMPDIR trbo-status-XXXXXXX)
small_note() {
NOTE="<small>$1</small>"
}
+define_log(){
+ if [ -f $JOB/builds/$LAST/log ] ; then
+ ln -sf $LOG $JOB/builds/$LAST/log
+ elif [ -f $JOB/builds/$LAST/log.gz ] ; then
+ zcat $JOB/builds/$LAST/log.gz > $LOG
+ else
+ echo > $LOG
+ fi
+}
+
# gather data
echo "$(date -u) - starting up."
cd /var/lib/jenkins/jobs/
@@ -118,20 +129,21 @@ for JOB in reproducible_* ; do
elif [ "$LAST" = "$UNSTABLE" ] ; then
echo "unstable job: $JOB"
let SCORE+=1 || SCORE=0
+ define_log
# only show the most severe problem, don't aggregate them
- if $(grep -q "failed Squid Web Proxy Server" $JOB/builds/$LAST/log) ; then
+ if $(grep -q "failed Squid Web Proxy Server" $LOG) ; then
small_note " (squid.service failed)"
- elif $(grep -q "Kernel needs upgrade" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "Kernel needs upgrade" $LOG) ; then
small_note " (reboot needed for kernel upgrade)"
- elif $(grep -q "Warning, more than one kernel in /boot" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "Warning, more than one kernel in /boot" $LOG) ; then
small_note " (more than one kernel installed)"
- elif $(grep -q "failed failed /etc/rc.local Compatibility" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "failed failed /etc/rc.local Compatibility" $LOG) ; then
small_note " (rc-local.service failed)"
- elif $(egrep -q "failed Session [0-9]+ of user jenkins" $JOB/builds/$LAST/log) ; then
+ elif $(grep -E -q "failed Session [0-9]+ of user jenkins" $LOG) ; then
small_note " (session failed for user jenkins)"
- elif $(egrep -q "failed failed pbuilder_build" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "failed failed pbuilder_build" $LOG) ; then
small_note " (pbuilder build scope failed)"
- elif $(egrep -q "failed failed Rotate log files" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "failed failed Rotate log files" $LOG) ; then
small_note " (logrotate failed)"
fi
if ! $SUSPECT ; then
@@ -140,14 +152,15 @@ for JOB in reproducible_* ; do
echo " <li><a href=\"https://jenkins.debian.net/job/$JOB/\">$JOB</a>$NOTE</li>" >> ${UNSTABLE_SUSPECTS}
fi
else
+ define_log
# only show the most severe problem, don't aggregate them
- if $(egrep -q "Failed to connect to [.0-9]+ port 3128: Connection refused" $JOB/builds/$LAST/log) ; then
+ if $(grep -E -q "Failed to connect to [.0-9]+ port 3128: Connection refused" $LOG) ; then
small_note " (failed to connect to https-proxy)"
- elif $(grep -q "seems to be down, sleeping" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "seems to be down, sleeping" $LOG) ; then
small_note " (node seemed down)"
- elif $(tail -1 $JOB/builds/$LAST/log | grep -q "Finished: ABORTED") ; then
+ elif $(tail -1 $LOG | grep -q "Finished: ABORTED") ; then
small_note " (job was aborted)"
- elif $(grep -q "^make -r world: build failed. Please re-run" $JOB/builds/$LAST/log) ; then
+ elif $(grep -q "^make -r world: build failed. Please re-run" $LOG) ; then
small_note " (make world failed)"
fi
case $JOB in
@@ -255,4 +268,4 @@ write2healthfile "<p><small>last updated: $(date -u) by the <a href=\"https://je
write2healthfile "</body></html>"
echo "$(date -u) - $(basename $HEALTH_FILE).html updated, visible at $REPRODUCIBLE_URL/$(basename $HEALTH_FILE).html."
echo "$(date -u) - the end."
-rm -f ${FAILED_JOBS} ${UNSTABLE_JOBS} ${IGNORED_JOBS} ${BAD_JOBS} ${FAILED_SUSPECTS} ${UNSTABLE_SUSPECTS}
+rm -f ${FAILED_JOBS} ${UNSTABLE_JOBS} ${IGNORED_JOBS} ${BAD_JOBS} ${FAILED_SUSPECTS} ${UNSTABLE_SUSPECTS} $LOG
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/commit/ced06afd506c003dc5b7a73f185a62f2866ddc8d
--
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/commit/ced06afd506c003dc5b7a73f185a62f2866ddc8d
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20200726/39c273ec/attachment-0001.html>
More information about the Qa-jenkins-scm
mailing list