[Qa-jenkins-scm] [Git][qa/jenkins.debian.net][master] djm: automatically shrink job run logparser cache and save backup of old data

Holger Levsen (@holger) gitlab at salsa.debian.org
Thu Jun 1 21:23:06 BST 2023



Holger Levsen pushed to branch master at Debian QA / jenkins.debian.net


Commits:
340e50e4 by Holger Levsen at 2023-06-01T22:22:50+02:00
djm: automatically shrink job run logparser cache and save backup of old data

Signed-off-by: Holger Levsen <holger at layer-acht.org>

- - - - -


3 changed files:

- TODO
- bin/djm
- bin/djm-jenkins-parser


Changes:

=====================================
TODO
=====================================
@@ -32,10 +32,7 @@ See link:https://jenkins.debian.net/userContent/about.html["about jenkins.debian
 ** maybe: rm /tmp/mmdebstrap.* older than 3 days
 * split TODO in TODO and TODO.legacy?
 * djm:
-** djm-jenkins-parser needs to split out old data too, maybe run parser with triggering option after having done this locally?
-** rename .djm-jenkins-parser.log to something with .raw
-** rename .djm-jenkins-ui.log to reflect its from .raw (but now has dates)
-** cleanup parser-log too, eg if its too big. then also figlet a warning...
+** "total jobs run" is buggy, see FIXME
 ** new feature: --show-month 04
 ** --report: include hours with manual jobs triggered
 ** action: rk / remove-oldest-kernel


=====================================
bin/djm
=====================================
@@ -28,7 +28,7 @@ REASON=
 COMMAND=
 LOCAL_LOGFILE=~/.djm.log
 UI_LOGFILE=~/.djm-jenkins-ui.log
-PARSER_LOGFILE=~/.djm-jenkins-parser.log
+PARSER_CACHE=~/.djm-jenkins-parser.cache
 LOGMONTH="$(date -u '+%Y-%m')"
 JOBS=~/.djm-jobs.txt
 if [ -z "$DJM_USER" ] ; then
@@ -271,7 +271,7 @@ djm_fetch() {
 		ssh $DJM_USER at jenkins.debian.net "DJM_USER=$DJM_USER /srv/jenkins/bin/djm-jenkins-parser"
 		# fetch
 		scp $DJM_USER at jenkins.debian.net:$(basename $UI_LOGFILE) $UI_LOGFILE
-		scp $DJM_USER at jenkins.debian.net:$(basename $PARSER_LOGFILE) $PARSER_LOGFILE
+		scp $DJM_USER at jenkins.debian.net:$(basename $PARSER_CACHE) $PARSER_CACHE
 		ssh $DJM_USER at jenkins.debian.net "cd ~jenkins/jobs ; ls -1d reproducible_* |wc -l" > $JOBS
 		# split old data out
 		if grep -q -v ^$LOGMONTH $LOGFILE || grep -q -v ^$LOGMONTH $UI_LOGFILE ; then
@@ -290,6 +290,8 @@ djm_fetch() {
 					rm $tmpfile
 				done
 			done
+			# copy back cleaned logfile to jenkins server
+			scp $UI_LOGFILE $DJM_USER at jenkins.debian.net:$(basename $UI_LOGFILE)
 			rm -f $tmpfile
 		fi
 		rm $LOCK
@@ -372,14 +374,14 @@ djm_report() {
 
 	if $VERBOSE ; then
 		seperator
-		HEADING="$(cat $PARSER_LOGFILE | wc -l) total jobs run:"
+		HEADING="$(cat $PARSER_CACHE | wc -l) total jobs run:" # FIXME: this misses job runs not in cache...
 		printf_heading "$HEADING"
 		(
 			for i in $PATTERNS  ; do
-				printf_if_not_zero "$TWO_C" $(grep -c reproducible_$i $PARSER_LOGFILE) "reproducible_${i}_.*"
+				printf_if_not_zero "$TWO_C" $(grep -c reproducible_$i $PARSER_CACHE) "reproducible_${i}_.*"
 			done
-			for i in $(cut -d '/' -f2 $PARSER_LOGFILE | grep -v -E $PIPE_PATTERNS|sort -u) ; do
-				printf_if_not_zero "$TWO_C" $(grep -c $i $PARSER_LOGFILE) "$i"
+			for i in $(cut -d '/' -f2 $PARSER_CACHE | grep -v -E $PIPE_PATTERNS|sort -u) ; do
+				printf_if_not_zero "$TWO_C" $(grep -c $i $PARSER_CACHE) "$i"
 			done
 		) | sort -n -r
 	fi


=====================================
bin/djm-jenkins-parser
=====================================
@@ -23,25 +23,33 @@ fi
 cd ~jenkins/jobs
 
 export TZ="/usr/share/zoneinfo/UTC"
-MYLOG=~/.djm-jenkins-parser.log
-MYRESULTS=~/.djm-jenkins-ui.log
+MY_CACHE=~/.djm-jenkins-parser.cache
+MY_RESULTS=~/.djm-jenkins-ui.log
+LOGS=""
+ZLOGS=""
+
+find_all_logs() {
+	echo "Note: freshly initialized run detected, this will take a few minutes."
+	LOGS=$(find ./reproducible_*/builds/*/log 2>/dev/null || true)
+	ZLOGS=$(find ./reproducible_*/builds/*/log.gz 2>/dev/null || true)
+	touch $MY_CACHE
+}
 
 #
 # main
 #
 
 # find recent / unparsed logfiles
-if [ ! -f $MYLOG ] ; then
-	LOGS=$(find ./reproducible_*/builds/*/log 2>/dev/null || true)
-	ZLOGS=$(find ./reproducible_*/builds/*/log.gz 2>/dev/null || true)
-	echo "Note: initial run detected, this will take a few minutes."
-	touch $MYLOG
+if [ ! -f $MY_CACHE ] ; then
+	find_all_logs
+elif [ $(cat $MY_CACHE | wc -l) -gt 150000 ] ; then
+	figlet "to note:"
+	echo "$MY_CACHE has become very big, moving it away."
+	mv -v $MY_CACHE ${MY_CACHE}.$(date -u '+%Y-%m-%d')
+	find_all_logs
 else
-	if [ $(cat $MYLOG | wc -l) -gt 150000 ] ; then
-		echo "Note: $MYLOG has become very big, maybe time to backup $MYLOG and $MYRESULTS?"
-	fi
-	LOGS=$(find ./reproducible_*/builds/*/log -newer $MYLOG 2>/dev/null || true)
-	ZLOGS=$(find ./reproducible_*/builds/*/log.gz -newer $MYLOG 2>/dev/null || true)
+	LOGS=$(find ./reproducible_*/builds/*/log -newer $MY_CACHE 2>/dev/null || true)
+	ZLOGS=$(find ./reproducible_*/builds/*/log.gz -newer $MY_CACHE 2>/dev/null || true)
 fi
 echo "Parsing $(echo $LOGS $ZLOGS | sed 's# #\n#g' | wc -l) logfiles now."
 
@@ -49,10 +57,10 @@ echo "Parsing $(echo $LOGS $ZLOGS | sed 's# #\n#g' | wc -l) logfiles now."
 (
 for i in $LOGS $ZLOGS ; do
 	DIRNAME="$(dirname $i)"
-	if [ ! -f $i ] || grep -q "$DIRNAME/log" $MYLOG ; then
+	if [ ! -f $i ] || grep -q "$DIRNAME/log" $MY_CACHE ; then
 		: # echo $i already processed, continue.
 	else
-		echo "$DIRNAME/log" >> $MYLOG
+		echo "$DIRNAME/log" >> $MY_CACHE
 		if [ "$(basename $i)" == "log" ] ; then
 			RESULT=$(head -1 $i | grep -E -i "Started by user.*$DJM_USER" || true)
 		else
@@ -66,4 +74,4 @@ for i in $LOGS $ZLOGS ; do
 		fi
 	fi
 done
- ) | sort | tee -a $MYRESULTS
+ ) | sort | tee -a $MY_RESULTS



View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/commit/340e50e4236bbba73f05135805dcf1d888fc7682

-- 
View it on GitLab: https://salsa.debian.org/qa/jenkins.debian.net/-/commit/340e50e4236bbba73f05135805dcf1d888fc7682
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/qa-jenkins-scm/attachments/20230601/98a07041/attachment-0001.htm>


More information about the Qa-jenkins-scm mailing list