[med-svn] [Git][med-team/biomaj3-download][upstream] New upstream version 3.0.19

Wed Jan 30 09:43:22 GMT 2019

Andreas Tille pushed to branch upstream at Debian Med / biomaj3-download


Commits:
511284eb by Andreas Tille at 2019-01-30T09:36:54Z
New upstream version 3.0.19
- - - - -


9 changed files:

- CHANGES.txt
- biomaj_download/download/direct.py
- biomaj_download/download/ftp.py
- biomaj_download/download/http.py
- biomaj_download/download/protocolirods.py
- biomaj_download/download/rsync.py
- requirements.txt
- setup.py
- tests/biomaj_tests.py


Changes:

=====================================
CHANGES.txt
=====================================
@@ -1,3 +1,6 @@
+3.0.19:
+  Check archives after download
+  Fix python regexps syntax (deprecation)
 3.0.18:
   Rename protobuf and use specific package to avoid conflicts
 3.0.17:


=====================================
biomaj_download/download/direct.py
=====================================
@@ -238,7 +238,7 @@ class DirectHttpDownload(DirectFTPDownload):
             encoding = None
             if 'content-type' in self.headers:
                 content_type = self.headers['content-type'].lower()
-                match = re.search('charset=(\S+)', content_type)
+                match = re.search(r'charset=(\S+)', content_type)
                 if match:
                     encoding = match.group(1)
             if encoding is None:
@@ -257,7 +257,7 @@ class DirectHttpDownload(DirectFTPDownload):
                     rfile['size'] = int(parts[1].strip())
                 if parts[0].strip() == 'Last-Modified':
                     # Sun, 06 Nov 1994
-                    res = re.match('(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
+                    res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
                     if res:
                         rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
                         rfile['day'] = int(res.group(2))
@@ -265,7 +265,7 @@ class DirectHttpDownload(DirectFTPDownload):
                         rfile['year'] = int(res.group(4))
                         continue
                     # Sunday, 06-Nov-94
-                    res = re.match('(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
+                    res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
                     if res:
                         rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
                         rfile['day'] = int(res.group(2))
@@ -273,7 +273,7 @@ class DirectHttpDownload(DirectFTPDownload):
                         rfile['year'] = 2000 + int(res.group(4))
                         continue
                     # Sun Nov  6 08:49:37 1994
-                    res = re.match('(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
+                    res = re.match(r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
                     if res:
                         rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
                         rfile['day'] = int(res.group(3))


=====================================
biomaj_download/download/ftp.py
=====================================
@@ -129,9 +129,18 @@ class FTPDownload(DownloadInterface):
                     error = False
             except Exception as e:
                 self.logger.error('Could not get errcode:' + str(e))
+
             nbtry += 1
             curl.close()
             fp.close()
+            skip_check_uncompress = os.environ.get('UNCOMPRESS_SKIP_CHECK', None)
+            if not error and skip_check_uncompress is None:
+                archive_status = Utils.archive_check(file_path)
+                if not archive_status:
+                    self.logger.error('Archive is invalid or corrupted, deleting file and retrying download')
+                    error = True
+                    if os.path.exists(file_path):
+                        os.remove(file_path)
         return error
 
     def download(self, local_dir, keep_dirs=True):
@@ -253,7 +262,7 @@ class FTPDownload(DownloadInterface):
         encoding = None
         if 'content-type' in self.headers:
             content_type = self.headers['content-type'].lower()
-            match = re.search('charset=(\S+)', content_type)
+            match = re.search(r'charset=(\S+)', content_type)
             if match:
                 encoding = match.group(1)
         if encoding is None:
@@ -288,7 +297,7 @@ class FTPDownload(DownloadInterface):
             rfile['hash'] = hashlib.md5(line.encode('utf-8')).hexdigest()
             try:
                 rfile['year'] = int(parts[7])
-            except Exception as e:
+            except Exception:
                 # specific ftp case issues at getting date info
                 curdate = datetime.now()
                 rfile['year'] = curdate.year


=====================================
biomaj_download/download/http.py
=====================================
@@ -17,7 +17,7 @@ except ImportError:
 class HTTPParse(object):
 
     def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
-        """
+        r'''
         http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
         http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
         http.group.dir.name: 1
@@ -25,7 +25,7 @@ class HTTPParse(object):
         http.group.file.name: 1
         http.group.file.date: 2
         http.group.file.size: 3
-        """
+        '''
         self.dir_line = dir_line
         self.file_line = file_line
         self.dir_name = dir_name
@@ -85,7 +85,7 @@ class HTTPDownload(FTPDownload):
         encoding = None
         if 'content-type' in self.headers:
             content_type = self.headers['content-type'].lower()
-            match = re.search('charset=(\S+)', content_type)
+            match = re.search(r'charset=(\S+)', content_type)
             if match:
                 encoding = match.group(1)
         if encoding is None:
@@ -96,14 +96,14 @@ class HTTPDownload(FTPDownload):
 
         # lets get the output in a string
         result = output.getvalue().decode(encoding)
-        '''
-        'http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
-        'http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
-        'http.group.dir.name': 1,
-        'http.group.dir.date': 2,
-        'http.group.file.name': 1,
-        'http.group.file.date': 2,
-        'http.group.file.size': 3,
+        r'''
+        http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
+        http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
+        http.group.dir.name': 1,
+        http.group.dir.date': 2,
+        http.group.file.name': 1,
+        http.group.file.date': 2,
+        http.group.file.size': 3,
         '''
 
         rfiles = []


=====================================
biomaj_download/download/protocolirods.py
=====================================
@@ -3,6 +3,7 @@ import os
 from datetime import datetime
 import time
 
+from biomaj_core.utils import Utils
 from biomaj_download.download.interface import DownloadInterface
 from irods.session import iRODSSession
 from irods.models import Collection, DataObject, User
@@ -96,6 +97,15 @@ class IRODSDownload(DownloadInterface):
                 rfile['download_time'] = 0
                 rfile['error'] = True
                 raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
+            else:
+                archive_status = Utils.archive_check(file_path)
+                if not archive_status:
+                    self.logger.error('Archive is invalid or corrupted, deleting file')
+                    rfile['error'] = True
+                    if os.path.exists(file_path):
+                        os.remove(file_path)
+                    raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
+
             end_time = datetime.now()
             end_time = time.mktime(end_time.timetuple())
             rfile['download_time'] = end_time - start_time


=====================================
biomaj_download/download/rsync.py
=====================================
@@ -119,7 +119,7 @@ class RSYNCDownload(DownloadInterface):
                 rfile['save_as'] = rfile['name']
             if keep_dirs:
                 file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
-            if re.match('\S*\/$', file_dir):
+            if re.match(r'\S*\/$', file_dir):
                 file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
             else:
                 file_path = file_dir + os.path.basename(rfile['save_as'])


=====================================
requirements.txt
=====================================
@@ -2,7 +2,7 @@ mock
 nose
 pycurl
 py-bcrypt
-pika
+pika==0.11.2
 redis
 PyYAML
 protobuf


=====================================
setup.py
=====================================
@@ -21,7 +21,7 @@ config = {
     'url': 'http://biomaj.genouest.org',
     'download_url': 'http://biomaj.genouest.org',
     'author_email': 'olivier.sallou at irisa.fr',
-    'version': '3.0.18',
+    'version': '3.0.19',
      'classifiers': [
         # How mature is this project? Common values are
         #   3 - Alpha
@@ -46,7 +46,7 @@ config = {
                          'biomaj_zipkin',
                          'pycurl',
                          'py-bcrypt',
-                         'pika',
+                         'pika==0.11.2',
                          'redis',
                          'PyYAML',
                          'flask',


=====================================
tests/biomaj_tests.py
=====================================
@@ -443,9 +443,26 @@ class TestBiomajFTPDownload(unittest.TestCase):
     (file_list, dir_list) = ftpd.list()
     # ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
     ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
+    try:
+        ftpd.download(self.utils.data_dir)
+    except Exception:
+        self.assertTrue(1==1)
+    else:
+        self.assertTrue(1==0)
+    ftpd.close()
+    # self.assertTrue(len(ftpd.files_to_download) == 2)
+
+  def test_download_skip_uncompress_checks(self):
+    # ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/db/FASTA/')
+    os.environ['UNCOMPRESS_SKIP_CHECK'] = "1"
+    ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
+    (file_list, dir_list) = ftpd.list()
+    # ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
+    ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
     ftpd.download(self.utils.data_dir)
     ftpd.close()
     self.assertTrue(len(ftpd.files_to_download) == 2)
+    del os.environ['UNCOMPRESS_SKIP_CHECK']
 
   def test_download_in_subdir(self):
     ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/')
@@ -577,7 +594,7 @@ class iRodsResult(object):
         elif "COLL_NAME" in str(index):
             return self.Collname
         elif "D_OWNER_NAME" in str(index):
-            return self.Dataowner_name    
+            return self.Dataowner_name
 
 
 class MockiRODSSession(object):
@@ -601,19 +618,19 @@ class MockiRODSSession(object):
             return self.Collid
         if "COLL_NAME" in str(index):
             return self.Collname
-    
+
     def configure(self):
         return MockiRODSSession()
 
     def query(self,Collname, Dataname, Datasize, Dataowner_name, Datamodify_time):
         return self
-    
+
     def all(self):
         return self
 
     def one(self):
         return self
-    
+
     def filter(self,boo):
         return self
 
@@ -640,7 +657,7 @@ class TestBiomajIRODSDownload(unittest.TestCase):
         self.curdir = os.path.dirname(os.path.realpath(__file__))
         self.examples = os.path.join(self.curdir,'bank') + '/'
         BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
-        
+
     def tearDown(self):
         self.utils.clean()
 
@@ -657,4 +674,3 @@ class TestBiomajIRODSDownload(unittest.TestCase):
         irodsd.set_offline_dir(self.utils.data_dir)
         (files_list, dir_list) = irodsd.list()
         self.assertTrue(len(files_list) != 0)
-



View it on GitLab: https://salsa.debian.org/med-team/biomaj3-download/commit/511284eb9c68d85c1be4103b68366eacfdb178ed

-- 
View it on GitLab: https://salsa.debian.org/med-team/biomaj3-download/commit/511284eb9c68d85c1be4103b68366eacfdb178ed
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190130/54556d78/attachment-0001.html>