[Pkg-nagios-changes] [pkg-mod-gearman] 23/48: improved dead worker detection

Stig Sandbeck Mathisen ssm at debian.org
Sun Nov 24 22:38:10 UTC 2013


This is an automated email from the git hooks/post-receive script.

ssm pushed a commit to branch master
in repository pkg-mod-gearman.

commit e1bd2c00556ce40cb2ff8a871eb21a480b2f7f66
Author: Sven Nierlein <sven at nierlein.de>
Date:   Wed Oct 30 15:02:08 2013 +0100

    improved dead worker detection
---
 Changes         |  1 +
 common/utils.c  | 16 ++++++++++++++--
 worker/worker.c | 15 ++++++++-------
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/Changes b/Changes
index 75a87de..d15044e 100644
--- a/Changes
+++ b/Changes
@@ -2,6 +2,7 @@ This file documents the revision history for mod_gearman.
 
 next:
           - fixed send_gearman to send duplicate result to normal server (Brian Christiansen)
+          - improved dead worker detection
 
 1.4.10 Mon Aug  5 11:38:53 CEST 2013
           - fixed segfault on empty arguments (Michael Friedrich)
diff --git a/common/utils.c b/common/utils.c
index 8b30b0c..a4c88ed 100644
--- a/common/utils.c
+++ b/common/utils.c
@@ -1215,14 +1215,26 @@ int free_job(gm_job_t *job) {
 
 /* verify if a pid is alive */
 int pid_alive(int pid) {
+    int status;
+
     if(pid < 0) { pid = -pid; }
 
     /* 1/-1 are undefined pids in our case */
     if(pid == 1)
-        return TRUE;
+        return FALSE;
 
-    /* send kill 0 to verify the proc is alive */
+    /* send kill 0 to verify the process still exists */
     if(kill(pid, 0) == 0) {
+        if(waitpid(pid, &status, WNOHANG) == -1) {
+            perror("waitpid");
+        }
+        if(WIFEXITED(status)) {
+            return FALSE;
+        }
+        if(WIFSIGNALED(status) && (WTERMSIG(status) == SIGINT || WTERMSIG(status) == SIGQUIT)) {
+            return FALSE;
+        }
+
         return TRUE;
     }
 
diff --git a/worker/worker.c b/worker/worker.c
index d93524c..e817de8 100644
--- a/worker/worker.c
+++ b/worker/worker.c
@@ -174,17 +174,13 @@ int main (int argc, char **argv) {
 
 /* main loop for checking worker */
 void monitor_loop() {
-    int status;
 
     /* maintain the population */
     while (1) {
         /* check number of workers every second */
         sleep(GM_DEFAULT_WORKER_LOOP_SLEEP);
 
-        /* collect finished workers */
-        while(waitpid(-1, &status, WNOHANG) > 0)
-            gm_log( GM_LOG_TRACE, "waitpid() worker exited with: %d\n", status);
-
+        /* make sure our worker are running */
         check_worker_population();
     }
     return;
@@ -245,10 +241,15 @@ void count_current_worker(int restart) {
 
 /* start new worker if needed */
 void check_worker_population() {
-    int x, now, target_number_of_workers;
+    int x, now, status, target_number_of_workers;
 
     gm_log( GM_LOG_TRACE3, "check_worker_population()\n");
 
+    /* collect finished workers */
+    while(waitpid(-1, &status, WNOHANG) > 0)
+        gm_log( GM_LOG_TRACE, "waitpid() worker exited with: %d\n", status);
+
+
     /* set current worker number */
     count_current_worker(GM_ENABLED);
 
@@ -263,7 +264,7 @@ void check_worker_population() {
         current_number_of_workers++;
     }
 
-    /* check every second */
+    /* check every second if we need to increase worker population */
     now = (int)time(NULL);
     if(last_time_increased >= now)
         return;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-nagios/pkg-mod-gearman



More information about the Pkg-nagios-changes mailing list