[med-svn] [Git][med-team/biomaj3][master] 3 commits: New upstream version 3.1.6

Olivier Sallou gitlab at salsa.debian.org
Thu Oct 25 10:22:25 BST 2018


Olivier Sallou pushed to branch master at Debian Med / biomaj3


Commits:
f8ef7eba by Olivier Sallou at 2018-10-25T09:17:55Z
New upstream version 3.1.6
- - - - -
8c773d7c by Olivier Sallou at 2018-10-25T09:17:58Z
Updated version 3.1.6 from 'upstream/3.1.6'

with Debian dir 0f9b6dbeb059aac198a793757bb8ab7fb01dbcb0
- - - - -
ee45a1ac by Olivier Sallou at 2018-10-25T09:18:57Z
new upstream release

- - - - -


9 changed files:

- CHANGES.txt
- README.md
- biomaj/bank.py
- biomaj/mongo_connector.py
- biomaj/process/metaprocess.py
- biomaj/workflow.py
- debian/changelog
- requirements.txt
- setup.py


Changes:

=====================================
CHANGES.txt
=====================================
@@ -1,3 +1,13 @@
+3.1.6:
+  Fix #100 Catch error and log error if biomaj fails to connect to InfluxDB
+  Add history to update/remove operations
+  Add log in case of file deletion error during bank removal
+  check lock file exists when removing it
+  Update protobuf to work with biomaj.download 3.0.18
+
+3.1.5:
+  Fix #97 Wrong offline dir checks
+
 3.1.4:
   Fix #88 Unset 'last_update_session' when found in pending sessions using --remove-pending
   Add formats in bank info request


=====================================
README.md
=====================================
@@ -1,7 +1,7 @@
 BioMAJ3
 =====
 
-This project is a complete rewrite of BioMAJ (http://biomaj.genouest.org).
+This project is a complete rewrite of BioMAJ and the documentation is available here : http://biomaj.genouest.org.
 
 BioMAJ (BIOlogie Mise A Jour) is a workflow engine dedicated to data
 synchronization and processing. The Software automates the update cycle and the
@@ -176,7 +176,7 @@ Execute unit tests but disable ones needing network access
 Monitoring
 ==========
 
-InfluxDB can be used to monitor biomaj. Following series are available:
+InfluxDB (optional) can be used to monitor biomaj. Following series are available:
 
 * biomaj.banks.quantity (number of banks)
 * biomaj.production.size.total (size of all production directories)
@@ -185,6 +185,8 @@ InfluxDB can be used to monitor biomaj. Following series are available:
 * biomaj.bank.update.downloaded_files (number of downloaded files)
 * biomaj.bank.update.new (track updates)
 
+*WARNING* Influxdb database must be created, biomaj does not create the database (see https://docs.influxdata.com/influxdb/v1.6/query_language/database_management/#create-database)
+
 License
 =======
 


=====================================
biomaj/bank.py
=====================================
@@ -74,6 +74,7 @@ class Bank(object):
                            BiomajConfig.global_config.get('GENERAL', 'db.name'))
 
         self.banks = MongoConnector.banks
+        self.history = MongoConnector.history
         self.bank = self.banks.find_one({'name': self.name})
 
         if self.bank is None:
@@ -107,6 +108,22 @@ class Bank(object):
         else:
             return False
 
+    @staticmethod
+    def get_history(limit=100):
+        """
+        Get list of bank update/remove operations
+        """
+        if MongoConnector.db is None:
+            MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
+                           BiomajConfig.global_config.get('GENERAL', 'db.name'))
+
+        hist_list = []
+        hist = MongoConnector.history.find({}).sort("start", -1).limit(limit)
+        for h in hist:
+            del h['_id']
+            hist_list.append(h)
+        return hist_list
+
     @staticmethod
     def get_banks_disk_usage():
         """
@@ -561,10 +578,18 @@ class Bank(object):
                           'freeze': False}
             self.bank['production'].append(production)
             self.banks.update({'name': self.name},
-                              {'$push': {'production': production},
-                               '$pull': {'pending': {'release': self.session.get('release'),
-                                                     'id': self.session._session['id']}}
-                               })
+                              {
+                              '$push': {'production': production}
+                              })
+            self.banks.update({'name': self.name},
+                              {
+                              '$pull': {
+                                  'pending': {
+                                      'release': self.session.get('release'),
+                                      'id': self.session._session['id']
+                                  }
+                              }
+                              })
 
         self.bank = self.banks.find_one({'name': self.name})
 
@@ -917,6 +942,8 @@ class Bank(object):
         :type force: bool
         :return: bool
         """
+        start_time = datetime.now()
+        start_time = time.mktime(start_time.timetuple())
         if not force:
             has_freeze = False
             for prod in self.bank['production']:
@@ -932,13 +959,35 @@ class Bank(object):
         bank_data_dir = self.get_data_dir()
         logging.warn('DELETE ' + bank_data_dir)
         if os.path.exists(bank_data_dir):
-            shutil.rmtree(bank_data_dir)
+            try:
+                shutil.rmtree(bank_data_dir)
+            except Exception:
+                logging.exception('Failed to delete bank directory: ' + bank_data_dir)
+                logging.error('Bank will be deleted but some files/dirs may still be present on system, you can safely manually delete them')
         bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name'))
         if os.path.exists(bank_offline_dir):
-            shutil.rmtree(bank_offline_dir)
+            try:
+                shutil.rmtree(bank_offline_dir)
+            except Exception:
+                logging.exception('Failed to delete bank offline directory: ' + bank_offline_dir)
+                logging.error('Bank will be deleted but some files/dirs may still be present on system, you can safely manually delete them')
         bank_log_dir = os.path.join(self.config.get('log.dir'), self.name)
         if os.path.exists(bank_log_dir) and self.no_log:
-            shutil.rmtree(bank_log_dir)
+            try:
+                shutil.rmtree(bank_log_dir)
+            except Exception:
+                logging.exception('Failed to delete bank log directory: ' + bank_log_dir)
+                logging.error('Bank will be deleted but some files/dirs may still be present on system, you can safely manually delete them')
+        end_time = datetime.now()
+        end_time = time.mktime(end_time.timetuple())
+        self.history.insert({
+            'bank': self.name,
+            'error': False,
+            'start': start_time,
+            'end': end_time,
+            'action': 'remove',
+            'updated': None
+        })
         return True
 
     def get_status(self):
@@ -1012,6 +1061,8 @@ class Bank(object):
         """
         release = str(release)
         logging.warning('Bank:' + self.name + ':Remove')
+        start_time = datetime.now()
+        start_time = time.mktime(start_time.timetuple())
 
         if not self.is_owner():
             logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
@@ -1047,8 +1098,17 @@ class Bank(object):
         # Reset status, we take an update session
         res = self.start_remove(session)
         self.session.set('workflow_status', res)
-
         self.save_session()
+        end_time = datetime.now()
+        end_time = time.mktime(end_time.timetuple())
+        self.history.insert({
+            'bank': self.name,
+            'error': not res,
+            'start': start_time,
+            'end': end_time,
+            'action': 'remove',
+            'updated': None
+        })
 
         return res
 
@@ -1061,6 +1121,8 @@ class Bank(object):
         :return: bool
         """
         logging.warning('Bank:' + self.name + ':Update')
+        start_time = datetime.now()
+        start_time = time.mktime(start_time.timetuple())
 
         if not self.is_owner():
             logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
@@ -1108,7 +1170,20 @@ class Bank(object):
         res = self.start_update()
         self.session.set('workflow_status', res)
         self.save_session()
-        self.__stats()
+        try:
+            self.__stats()
+        except Exception:
+            logging.exception('Failed to send stats')
+        end_time = datetime.now()
+        end_time = time.mktime(end_time.timetuple())
+        self.history.insert({
+            'bank': self.name,
+            'error': not res,
+            'start': start_time,
+            'end': end_time,
+            'action': 'update',
+            'updated': self.session.get('update')
+        })
         return res
 
     def __stats(self):
@@ -1139,7 +1214,8 @@ class Bank(object):
             else:
                 influxdb = InfluxDBClient(host=db_host, port=db_port, database=db_name)
         except Exception as e:
-            logging.error('InfluxDB connection error: ' + str(e))
+            logging.error('Failed to connect to InfluxDB, database may not be created, skipping the record of statistics')
+            logging.exception('InfluxDB connection error: ' + str(e))
             return
         metrics = []
 
@@ -1230,7 +1306,14 @@ class Bank(object):
             }
             metrics.append(influx_metric)
 
-        res = influxdb.write_points(metrics, time_precision="s")
+        res = None
+        try:
+            res = influxdb.write_points(metrics, time_precision="s")
+        except Exception as e:
+            logging.error('Failed to connect to InfluxDB, database may not be created, skipping the record of statistics')
+            logging.exception('InfluxDB connection error: ' + str(e))
+            return
+
         if not res:
             logging.error('Failed to send metrics to database')
 


=====================================
biomaj/mongo_connector.py
=====================================
@@ -18,3 +18,4 @@ class MongoConnector(object):
         MongoConnector.banks = MongoConnector.db.banks
         MongoConnector.users = MongoConnector.db.users
         MongoConnector.db_schema = MongoConnector.db.db_schema
+        MongoConnector.history = MongoConnector.db.history


=====================================
biomaj/process/metaprocess.py
=====================================
@@ -58,7 +58,7 @@ class MetaProcess(threading.Thread):
             self.bmaj_env[conf] = self.bank.config.config_bank.get('GENERAL', conf)
             if self.bmaj_env[conf] is None:
                 self.bmaj_env[conf] = ''
-                self.bmaj_only_env[conf] = self.bmaj_env[conf]
+            self.bmaj_only_env[conf] = self.bmaj_env[conf]
 
         self.bmaj_env['dbname'] = self.bank.name
         self.bmaj_only_env['dbname'] = self.bmaj_env['dbname']


=====================================
biomaj/workflow.py
=====================================
@@ -14,7 +14,7 @@ import hashlib
 
 from biomaj_core.utils import Utils
 from biomaj_download.downloadclient import DownloadClient
-from biomaj_download.message import message_pb2
+from biomaj_download.message import downmessage_pb2
 from biomaj_download.download.http import HTTPParse
 from biomaj_download.download.localcopy import LocalDownload
 
@@ -251,7 +251,8 @@ class Workflow(object):
         data_dir = self.session.config.get('data.dir')
         lock_dir = self.session.config.get('lock.dir', default=data_dir)
         lock_file = os.path.join(lock_dir, self.name + '.lock')
-        os.remove(lock_file)
+        if os.path.exists(lock_file):
+            os.remove(lock_file)
         return True
 
 
@@ -287,7 +288,12 @@ class RemoveWorkflow(Workflow):
             return False
 
         if os.path.exists(self.session.get_full_release_directory()):
-            shutil.rmtree(self.session.get_full_release_directory())
+            logging.info('Workflow:wf_remove:delete:' + self.session.get_full_release_directory())
+            try:
+                shutil.rmtree(self.session.get_full_release_directory())
+            except Exception:
+                logging.exception('Failed to delete bank release directory: ' + self.session.get_full_release_directory())
+                logging.error('Bank will be deleted but some files/dirs may still be present on system, you can safely manually delete them')
         return self.bank.remove_session(self.session.get('update_session_id'))
 
     def wf_removeprocess(self):
@@ -1082,6 +1088,7 @@ class UpdateWorkflow(Workflow):
             credentials = cf.get('server.credentials')
 
             remote_dir = cf.get('remote.dir')
+
             if protocol == 'directhttp' or protocol == 'directhttps' or protocol == 'directftp':
                 keys = cf.get('url.params')
                 if keys is not None:
@@ -1240,12 +1247,18 @@ class UpdateWorkflow(Workflow):
             keep_files = []
             nb_expected_files += len(downloader.files_to_download)
             if os.path.exists(offline_dir):
+                logging.debug('Workflow:wf_download:offline_check_dir:' + offline_dir)
                 for file_to_download in downloader.files_to_download:
                     # If file is in offline dir and has same date and size, do not download again
-                    if os.path.exists(offline_dir + '/' + file_to_download['name']):
+                    offline_file = file_to_download['name']
+                    if file_to_download.get('save_as', None):
+                        offline_file = file_to_download['save_as']
+                    logging.debug('Workflow:wf_download:offline_check_file:' + offline_file)
+                    if os.path.exists(offline_dir + '/' + offline_file):
+                        logging.debug('Workflow:wf_download:offline_check_file_identical:' + offline_file)
                         try:
-                            file_stat = os.stat(offline_dir + '/' + file_to_download['name'])
-                            f_stat = datetime.datetime.fromtimestamp(os.path.getmtime(offline_dir + '/' + file_to_download['name']))
+                            file_stat = os.stat(offline_dir + '/' + offline_file)
+                            f_stat = datetime.datetime.fromtimestamp(os.path.getmtime(offline_dir + '/' + offline_file))
                             year = str(f_stat.year)
                             month = str(f_stat.month)
                             day = str(f_stat.day)
@@ -1253,16 +1266,16 @@ class UpdateWorkflow(Workflow):
                                str(year) != str(file_to_download['year']) or \
                                str(month) != str(file_to_download['month']) or \
                                str(day) != str(file_to_download['day']):
-                                logging.debug('Workflow:wf_download:different_from_offline:' + file_to_download['name'])
+                                logging.debug('Workflow:wf_download:different_from_offline:' + offline_file)
                                 keep_files.append(file_to_download)
                             else:
-                                logging.debug('Workflow:wf_download:offline:' + file_to_download['name'])
+                                logging.debug('Workflow:wf_download:same_as_offline:' + offline_file)
                                 files_in_offline += 1
                                 copied_files.append(file_to_download)
                         except Exception as e:
                             # Could not get stats on file
                             logging.debug('Workflow:wf_download:offline:failed to stat file: ' + str(e))
-                            os.remove(offline_dir + '/' + file_to_download['name'])
+                            os.remove(offline_dir + '/' + offline_file)
                             keep_files.append(file_to_download)
                     else:
                         keep_files.append(file_to_download)
@@ -1352,15 +1365,15 @@ class UpdateWorkflow(Workflow):
 
         for downloader in downloaders:
             for file_to_download in downloader.files_to_download:
-                operation = message_pb2.Operation()
+                operation = downmessage_pb2.Operation()
                 operation.type = 1
-                message = message_pb2.DownloadFile()
+                message = downmessage_pb2.DownloadFile()
                 message.bank = self.name
                 message.session = session
                 message.local_dir = offline_dir
-                remote_file = message_pb2.DownloadFile.RemoteFile()
+                remote_file = downmessage_pb2.DownloadFile.RemoteFile()
                 protocol = downloader.protocol
-                remote_file.protocol = message_pb2.DownloadFile.Protocol.Value(protocol.upper())
+                remote_file.protocol = downmessage_pb2.DownloadFile.Protocol.Value(protocol.upper())
 
                 if downloader.credentials:
                     remote_file.credentials = downloader.credentials
@@ -1372,7 +1385,7 @@ class UpdateWorkflow(Workflow):
                     remote_file.remote_dir = ''
 
                 if http_parse:
-                    msg_http_parse = message_pb2.DownloadFile.HttpParse()
+                    msg_http_parse = downmessage_pb2.DownloadFile.HttpParse()
                     msg_http_parse.dir_line = http_parse.dir_line
                     msg_http_parse.file_line = http_parse.file_line
                     msg_http_parse.dir_name = http_parse.dir_name
@@ -1412,7 +1425,7 @@ class UpdateWorkflow(Workflow):
                 if 'md5' in file_to_download and file_to_download['md5']:
                     biomaj_file.metadata.md5 = file_to_download['md5']
 
-                message.http_method = message_pb2.DownloadFile.HTTP_METHOD.Value(downloader.method.upper())
+                message.http_method = downmessage_pb2.DownloadFile.HTTP_METHOD.Value(downloader.method.upper())
 
                 timeout_download = cf.get('timeout.download', default=None)
                 if timeout_download:
@@ -1422,7 +1435,7 @@ class UpdateWorkflow(Workflow):
                         logging.error('Wrong timeout type for timeout.download: ' + str(e))
 
                 if self.span:
-                    trace = message_pb2.Operation.Trace()
+                    trace = downmessage_pb2.Operation.Trace()
                     trace.trace_id = self.span.get_trace_id()
                     trace.span_id = self.span.get_span_id()
                     operation.trace.MergeFrom(trace)


=====================================
debian/changelog
=====================================
@@ -1,8 +1,12 @@
-biomaj3 (3.1.4-2) UNRELEASED; urgency=medium
+biomaj3 (3.1.6-1) unstable; urgency=medium
 
+  [ Jelmer Vernooij ]
   * Use secure copyright file specification URI.
 
- -- Jelmer Vernooij <jelmer at debian.org>  Sat, 20 Oct 2018 13:14:48 +0000
+  [ Olivier Sallou ]
+  * New upstream release 
+
+ -- Olivier Sallou <osallou at debian.org>  Thu, 25 Oct 2018 09:18:06 +0000
 
 biomaj3 (3.1.4-1) unstable; urgency=medium
 


=====================================
requirements.txt
=====================================
@@ -1,6 +1,6 @@
 biomaj_core
 biomaj_user
-biomaj_download>=3.0.17
+biomaj_download>=3.0.18
 biomaj_process
 biomaj_cli
 mock


=====================================
setup.py
=====================================
@@ -35,7 +35,7 @@ config = {
     'url': 'http://biomaj.genouest.org',
     'download_url': 'http://biomaj.genouest.org',
     'author_email': 'olivier.sallou at irisa.fr',
-    'version': '3.1.4',
+    'version': '3.1.6',
      'classifiers': [
         # How mature is this project? Common values are
         #   3 - Alpha



View it on GitLab: https://salsa.debian.org/med-team/biomaj3/compare/791c6301a57da87423799bffdec6a12c56ad2ce9...ee45a1ac7ad767d9ff2f49b29e990ba4b029c950

-- 
View it on GitLab: https://salsa.debian.org/med-team/biomaj3/compare/791c6301a57da87423799bffdec6a12c56ad2ce9...ee45a1ac7ad767d9ff2f49b29e990ba4b029c950
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181025/088b8e6b/attachment-0001.html>


More information about the debian-med-commit mailing list