[Pkg-nagios-changes] [SCM] UNNAMED PROJECT branch, debian/master, updated. 810edbdd3feedbfe37f4a65bee50b57b2f60fa2a

Gerhard Lausser gerhard.lausser at consol.de
Tue Feb 28 22:17:19 UTC 2012


The following commit has been merged in the debian/master branch:
commit e76dd2293472bdfcfcc0e3ab4f2d0137228f5b6d
Author: Gerhard Lausser <gerhard.lausser at consol.de>
Date:   Sun Jan 29 18:46:02 2012 +0100

    Make the logstore_mongodb more robust against database outages

diff --git a/shinken/modules/logstore_mongodb.py b/shinken/modules/logstore_mongodb.py
index 4d0f760..1339072 100644
--- a/shinken/modules/logstore_mongodb.py
+++ b/shinken/modules/logstore_mongodb.py
@@ -10,6 +10,7 @@ import os
 import time
 import datetime
 import re
+import sys
 import pymongo
 from shinken.objects.service import Service
 from livestatus_broker.livestatus_stack import LiveStatusStack
@@ -114,7 +115,12 @@ class LiveStatusLogStoreMongoDB(BaseModule):
             print "LiveStatusLogStoreMongoDB.AutoReconnect", exp
             # The mongodb is hopefully available until this module is restarted
             raise LiveStatusLogStoreError
-            pass
+        except Exception, exp:
+            # If there is a replica_set, but the host is a simple standalone one
+            # we get a "No suitable hosts found" here.
+            # But other reasons are possible too. 
+            print "Could not open the database", exp
+            raise LiveStatusLogStoreError
 
     def close(self):
         self.conn.disconnect()
@@ -150,43 +156,44 @@ class LiveStatusLogStoreMongoDB(BaseModule):
     def manage_log_brok(self, b):
         data = b.data
         line = data['log']
-        try:
-            logline = Logline(line=line)
-            values = logline.as_dict()
-        except Exception, exp:
-            print "Unexpected error:", exp
-        try:
-            if logline.logclass != LOGCLASS_INVALID:
+        logline = Logline(line=line)
+        values = logline.as_dict()
+        if logline.logclass != LOGCLASS_INVALID:
+            try:
                 self.db[self.collection].insert(values, safe=True)
                 self.is_connected = CONNECTED
                 # If we have a backlog from an outage, we flush these lines
-                for oldvalues in self.backlog[:]:
+                # First we make a copy, so we can delete elements from
+                # the original self.backlog
+                backloglines = [bl for bl in self.backlog]
+                for backlogline in backloglines:
                     try:
-                        self.db[self.collection].insert(oldvalues, safe=True)
-                        self.backlog.delete(oldvalues)
+                        self.db[self.collection].insert(backlogline, safe=True)
+                        self.backlog.remove(backlogline)
                     except Autoreconnect, exp:
                         self.is_connected = SWITCHING
-                        pass
-                        # increment some counter
-                        # sleep
                     except Exception, exp:
-                        pass
-                    
-            else:
-                print "This line is invalid", line
-
-        except AutoReconnect, exp:
-            self.backlog.append(values)
-            self.is_connected = SWITCHING
-            time.sleep(5)
-            # At this point we must save the logline for a later attempt
-            # After 5 seconds we either have a successful write
-            # or another exception which means, we are disconnected
-        except Exception, exp:
-            self.is_connected = DISCONNECTED
-            print "An error occurred:", exp
-            print "DATABASE ERROR!!!!!!!!!!!!!!!!!"
-        #FIXME need access to this#self.livestatus.count_event('log_message')
+                        print "Got an exception inserting the backlog", str(exp)
+            except AutoReconnect, exp:
+                if self.is_connected != SWITCHING:
+                    self.is_connected = SWITCHING
+                    time.sleep(5)
+                    # Under normal circumstances after these 5 seconds
+                    # we should have a new primary node
+                else:
+                    # Not yet? Wait, but try harder.
+                    time.sleep(0.1)
+                # At this point we must save the logline for a later attempt
+                # After 5 seconds we either have a successful write
+                # or another exception which means, we are disconnected
+                self.backlog.append(values)
+            except Exception, exp:
+                self.is_connected = DISCONNECTED
+                print "An error occurred:", exp
+                print "DATABASE ERROR!!!!!!!!!!!!!!!!!"
+            #FIXME need access to this#self.livestatus.count_event('log_message')
+        else:
+            print "This line is invalid", line
 
     def add_filter(self, operator, attribute, reference):
 	if attribute == 'time':
@@ -230,7 +237,6 @@ class LiveStatusLogStoreMongoDB(BaseModule):
             print "sorry, not connected"
         else:
             dbresult = [Logline([(c, ) for c in columns], [x[col] for col in columns]) for x in self.db[self.collection].find(filter_element).sort([(u'time', pymongo.ASCENDING), (u'lineno', pymongo.ASCENDING)])]
-        print "i count", self.db[self.collection].find(filter_element).count()
         return dbresult
 
     def make_mongo_filter(self, operator, attribute, reference):
diff --git a/test/test_livestatus_mongodb.py b/test/test_livestatus_mongodb.py
index 3975c2a..14608a5 100755
--- a/test/test_livestatus_mongodb.py
+++ b/test/test_livestatus_mongodb.py
@@ -293,9 +293,9 @@ class TestConfigBig(TestConfig):
         dbmodconf = Module({'module_name' : 'LogStore',
             'module_type' : 'logstore_mongodb',
             'database' : 'bigbigbig',
-            #'mongodb_uri' : "mongodb://127.0.0.1:27017",
-            'mongodb_uri' : "mongodb://10.0.12.50:27017",
-            'replica_set' : 'livestatus',
+            'mongodb_uri' : "mongodb://127.0.0.1:27017",
+            #'mongodb_uri' : "mongodb://10.0.12.50:27017,10.0.12.51:27017",
+        #    'replica_set' : 'livestatus',
             'max_logs_age' : '14',
         })
         modconf.modules = [dbmodconf]
@@ -528,8 +528,11 @@ OutputFormat: json"""
             print "day -%02d %d..%d - %d" % (day, one_day_earlier, now, numlogs)
             now = one_day_earlier
         numlogs = self.livestatus_broker.db.conn.bigbigbig.logs.find().count()
-        print numlogs, sum(daycount[:14]), daycount[:14]
-        self.assert_(numlogs == sum(daycount[:14]))
+        # simply an estimation. the cleanup-routine in the mongodb logstore
+        # cuts off the old data at midnight, but here in the test we have
+        # only accuracy of a day.
+        self.assert_(numlogs >= sum(daycount[:14]))
+        self.assert_(numlogs <= sum(daycount[:15]))
 
 
 if __name__ == '__main__':

-- 
UNNAMED PROJECT



More information about the Pkg-nagios-changes mailing list