[med-svn] [Git][med-team/biomaj3-download][upstream] New upstream version 3.0.19
Andreas Tille
gitlab at salsa.debian.org
Wed Jan 30 09:43:22 GMT 2019
Andreas Tille pushed to branch upstream at Debian Med / biomaj3-download
Commits:
511284eb by Andreas Tille at 2019-01-30T09:36:54Z
New upstream version 3.0.19
- - - - -
9 changed files:
- CHANGES.txt
- biomaj_download/download/direct.py
- biomaj_download/download/ftp.py
- biomaj_download/download/http.py
- biomaj_download/download/protocolirods.py
- biomaj_download/download/rsync.py
- requirements.txt
- setup.py
- tests/biomaj_tests.py
Changes:
=====================================
CHANGES.txt
=====================================
@@ -1,3 +1,6 @@
+3.0.19:
+ Check archives after download
+ Fix python regexps syntax (deprecation)
3.0.18:
Rename protobuf and use specific package to avoid conflicts
3.0.17:
=====================================
biomaj_download/download/direct.py
=====================================
@@ -238,7 +238,7 @@ class DirectHttpDownload(DirectFTPDownload):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
- match = re.search('charset=(\S+)', content_type)
+ match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
@@ -257,7 +257,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['size'] = int(parts[1].strip())
if parts[0].strip() == 'Last-Modified':
# Sun, 06 Nov 1994
- res = re.match('(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
+ res = re.match(r'(\w+),\s+(\d+)\s+(\w+)\s+(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(2))
@@ -265,7 +265,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['year'] = int(res.group(4))
continue
# Sunday, 06-Nov-94
- res = re.match('(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
+ res = re.match(r'(\w+),\s+(\d+)-(\w+)-(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(2))
@@ -273,7 +273,7 @@ class DirectHttpDownload(DirectFTPDownload):
rfile['year'] = 2000 + int(res.group(4))
continue
# Sun Nov 6 08:49:37 1994
- res = re.match('(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
+ res = re.match(r'(\w+)\s+(\w+)\s+(\d+)\s+\d{2}:\d{2}:\d{2}\s+(\d+)', parts[1].strip())
if res:
rfile['hash'] = hashlib.md5(str(res.group(0)).encode('utf-8')).hexdigest()
rfile['day'] = int(res.group(3))
=====================================
biomaj_download/download/ftp.py
=====================================
@@ -129,9 +129,18 @@ class FTPDownload(DownloadInterface):
error = False
except Exception as e:
self.logger.error('Could not get errcode:' + str(e))
+
nbtry += 1
curl.close()
fp.close()
+ skip_check_uncompress = os.environ.get('UNCOMPRESS_SKIP_CHECK', None)
+ if not error and skip_check_uncompress is None:
+ archive_status = Utils.archive_check(file_path)
+ if not archive_status:
+ self.logger.error('Archive is invalid or corrupted, deleting file and retrying download')
+ error = True
+ if os.path.exists(file_path):
+ os.remove(file_path)
return error
def download(self, local_dir, keep_dirs=True):
@@ -253,7 +262,7 @@ class FTPDownload(DownloadInterface):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
- match = re.search('charset=(\S+)', content_type)
+ match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
@@ -288,7 +297,7 @@ class FTPDownload(DownloadInterface):
rfile['hash'] = hashlib.md5(line.encode('utf-8')).hexdigest()
try:
rfile['year'] = int(parts[7])
- except Exception as e:
+ except Exception:
# specific ftp case issues at getting date info
curdate = datetime.now()
rfile['year'] = curdate.year
=====================================
biomaj_download/download/http.py
=====================================
@@ -17,7 +17,7 @@ except ImportError:
class HTTPParse(object):
def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
- """
+ r'''
http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
http.group.dir.name: 1
@@ -25,7 +25,7 @@ class HTTPParse(object):
http.group.file.name: 1
http.group.file.date: 2
http.group.file.size: 3
- """
+ '''
self.dir_line = dir_line
self.file_line = file_line
self.dir_name = dir_name
@@ -85,7 +85,7 @@ class HTTPDownload(FTPDownload):
encoding = None
if 'content-type' in self.headers:
content_type = self.headers['content-type'].lower()
- match = re.search('charset=(\S+)', content_type)
+ match = re.search(r'charset=(\S+)', content_type)
if match:
encoding = match.group(1)
if encoding is None:
@@ -96,14 +96,14 @@ class HTTPDownload(FTPDownload):
# lets get the output in a string
result = output.getvalue().decode(encoding)
- '''
- 'http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
- 'http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
- 'http.group.dir.name': 1,
- 'http.group.dir.date': 2,
- 'http.group.file.name': 1,
- 'http.group.file.date': 2,
- 'http.group.file.size': 3,
+ r'''
+ http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
+ http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
+ http.group.dir.name': 1,
+ http.group.dir.date': 2,
+ http.group.file.name': 1,
+ http.group.file.date': 2,
+ http.group.file.size': 3,
'''
rfiles = []
=====================================
biomaj_download/download/protocolirods.py
=====================================
@@ -3,6 +3,7 @@ import os
from datetime import datetime
import time
+from biomaj_core.utils import Utils
from biomaj_download.download.interface import DownloadInterface
from irods.session import iRODSSession
from irods.models import Collection, DataObject, User
@@ -96,6 +97,15 @@ class IRODSDownload(DownloadInterface):
rfile['download_time'] = 0
rfile['error'] = True
raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
+ else:
+ archive_status = Utils.archive_check(file_path)
+ if not archive_status:
+ self.logger.error('Archive is invalid or corrupted, deleting file')
+ rfile['error'] = True
+ if os.path.exists(file_path):
+ os.remove(file_path)
+ raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
+
end_time = datetime.now()
end_time = time.mktime(end_time.timetuple())
rfile['download_time'] = end_time - start_time
=====================================
biomaj_download/download/rsync.py
=====================================
@@ -119,7 +119,7 @@ class RSYNCDownload(DownloadInterface):
rfile['save_as'] = rfile['name']
if keep_dirs:
file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
- if re.match('\S*\/$', file_dir):
+ if re.match(r'\S*\/$', file_dir):
file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
else:
file_path = file_dir + os.path.basename(rfile['save_as'])
=====================================
requirements.txt
=====================================
@@ -2,7 +2,7 @@ mock
nose
pycurl
py-bcrypt
-pika
+pika==0.11.2
redis
PyYAML
protobuf
=====================================
setup.py
=====================================
@@ -21,7 +21,7 @@ config = {
'url': 'http://biomaj.genouest.org',
'download_url': 'http://biomaj.genouest.org',
'author_email': 'olivier.sallou at irisa.fr',
- 'version': '3.0.18',
+ 'version': '3.0.19',
'classifiers': [
# How mature is this project? Common values are
# 3 - Alpha
@@ -46,7 +46,7 @@ config = {
'biomaj_zipkin',
'pycurl',
'py-bcrypt',
- 'pika',
+ 'pika==0.11.2',
'redis',
'PyYAML',
'flask',
=====================================
tests/biomaj_tests.py
=====================================
@@ -443,9 +443,26 @@ class TestBiomajFTPDownload(unittest.TestCase):
(file_list, dir_list) = ftpd.list()
# ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
+ try:
+ ftpd.download(self.utils.data_dir)
+ except Exception:
+ self.assertTrue(1==1)
+ else:
+ self.assertTrue(1==0)
+ ftpd.close()
+ # self.assertTrue(len(ftpd.files_to_download) == 2)
+
+ def test_download_skip_uncompress_checks(self):
+ # ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/db/FASTA/')
+ os.environ['UNCOMPRESS_SKIP_CHECK'] = "1"
+ ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
+ (file_list, dir_list) = ftpd.list()
+ # ftpd.match([r'^alu.*\.gz$'], file_list, dir_list)
+ ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
ftpd.download(self.utils.data_dir)
ftpd.close()
self.assertTrue(len(ftpd.files_to_download) == 2)
+ del os.environ['UNCOMPRESS_SKIP_CHECK']
def test_download_in_subdir(self):
ftpd = FTPDownload('ftp', 'ftp.ncbi.nih.gov', '/blast/')
@@ -577,7 +594,7 @@ class iRodsResult(object):
elif "COLL_NAME" in str(index):
return self.Collname
elif "D_OWNER_NAME" in str(index):
- return self.Dataowner_name
+ return self.Dataowner_name
class MockiRODSSession(object):
@@ -601,19 +618,19 @@ class MockiRODSSession(object):
return self.Collid
if "COLL_NAME" in str(index):
return self.Collname
-
+
def configure(self):
return MockiRODSSession()
def query(self,Collname, Dataname, Datasize, Dataowner_name, Datamodify_time):
return self
-
+
def all(self):
return self
def one(self):
return self
-
+
def filter(self,boo):
return self
@@ -640,7 +657,7 @@ class TestBiomajIRODSDownload(unittest.TestCase):
self.curdir = os.path.dirname(os.path.realpath(__file__))
self.examples = os.path.join(self.curdir,'bank') + '/'
BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
-
+
def tearDown(self):
self.utils.clean()
@@ -657,4 +674,3 @@ class TestBiomajIRODSDownload(unittest.TestCase):
irodsd.set_offline_dir(self.utils.data_dir)
(files_list, dir_list) = irodsd.list()
self.assertTrue(len(files_list) != 0)
-
View it on GitLab: https://salsa.debian.org/med-team/biomaj3-download/commit/511284eb9c68d85c1be4103b68366eacfdb178ed
--
View it on GitLab: https://salsa.debian.org/med-team/biomaj3-download/commit/511284eb9c68d85c1be4103b68366eacfdb178ed
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190130/54556d78/attachment-0001.html>
More information about the debian-med-commit
mailing list