[med-svn] [Git][med-team/biomaj3-download][master] 5 commits: python3-ftputil dep is in repo now

Olivier Sallou gitlab at salsa.debian.org
Tue Nov 12 10:52:49 GMT 2019



Olivier Sallou pushed to branch master at Debian Med / biomaj3-download


Commits:
37eaef82 by Olivier Sallou at 2019-11-12T10:18:03Z
python3-ftputil dep is in repo now

- - - - -
d6208921 by Olivier Sallou at 2019-11-12T10:18:07Z
New upstream version 3.1.0
- - - - -
62d4073e by Olivier Sallou at 2019-11-12T10:18:07Z
Update upstream source from tag 'upstream/3.1.0'

Update to upstream version '3.1.0'
with Debian dir 4763fa4a4c2159c5f59675313757544599708aef
- - - - -
88fe24ee by Olivier Sallou at 2019-11-12T10:18:39Z
upstream release 3.1.0

- - - - -
9c723776 by Olivier Sallou at 2019-11-12T10:24:50Z
update deps

- - - - -


15 changed files:

- .travis.yml
- CHANGES.txt
- README.md
- biomaj_download/download/ftp.py → biomaj_download/download/curl.py
- biomaj_download/download/direct.py
- − biomaj_download/download/http.py
- biomaj_download/download/interface.py
- biomaj_download/download/localcopy.py
- biomaj_download/download/protocolirods.py
- biomaj_download/download/rsync.py
- biomaj_download/downloadservice.py
- debian/changelog
- debian/control
- setup.py
- tests/biomaj_tests.py


Changes:

=====================================
.travis.yml
=====================================
@@ -20,7 +20,7 @@ install:
 - pip install python-coveralls
 - python setup.py -q install
 script:
-- nosetests -a '!network'
+- nosetests -a '!network,!local_irods'
 - flake8 --ignore E501 biomaj_download/*.py biomaj_download/download
 deploy:
   provider: pypi


=====================================
CHANGES.txt
=====================================
@@ -1,3 +1,6 @@
+3.1.0:
+  #16 Don't change name after download in DirectHTTPDownloader
+  PR #7 Refactor downloaders (*WARNING* breaks API)
 3.0.27:
   Fix previous release broken with a bug in direct protocols
 3.0.26:


=====================================
README.md
=====================================
@@ -17,6 +17,19 @@ To compile protobuf, in biomaj_download/message:
 
     flake8  biomaj_download/\*.py biomaj_download/download
 
+# Test
+
+To run the test suite, use:
+
+    nosetests -a '!local_irods' tests/biomaj_tests.py
+
+This command skips the test that need a local iRODS server.
+
+Some test might fail due to network connection. You can skip them with:
+
+    nosetests -a '!network' tests/biomaj_tests.py
+
+(To skip the local iRODS test and the network tests, use `-a '!network,!local_irods'`).
 
 # Run
 


=====================================
biomaj_download/download/ftp.py → biomaj_download/download/curl.py
=====================================
@@ -1,15 +1,24 @@
-import pycurl
-import re
+import sys
 import os
-import time
+import re
 from datetime import datetime
-import stat
 import hashlib
+import time
+import stat
+
+import pycurl
 import ftputil
 
+import humanfriendly
+
 from biomaj_core.utils import Utils
 from biomaj_download.download.interface import DownloadInterface
 
+if sys.version_info[0] < 3:
+    from urllib import urlencode
+else:
+    from urllib.parse import urlencode
+
 try:
     from io import BytesIO
 except ImportError:
@@ -58,9 +67,31 @@ if 'filemode' not in stat.__dict__:
     stat.filemode = _filemode
 
 
-class FTPDownload(DownloadInterface):
+class HTTPParse(object):
+
+    def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
+        r'''
+        http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
+        http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
+        http.group.dir.name: 1
+        http.group.dir.date: 2
+        http.group.file.name: 1
+        http.group.file.date: 2
+        http.group.file.size: 3
+        '''
+        self.dir_line = dir_line
+        self.file_line = file_line
+        self.dir_name = dir_name
+        self.dir_date = dir_date
+        self.file_name = file_name
+        self.file_date = file_date
+        self.file_size = file_size
+        self.file_date_format = file_date_format
+
+
+class CurlDownload(DownloadInterface):
     '''
-    Base class to download files from FTP
+    Base class to download files from FTP(S), HTTP(S) and SFTP.
 
     protocol=ftp
     server=ftp.ncbi.nih.gov
@@ -69,6 +100,12 @@ class FTPDownload(DownloadInterface):
     remote.files=^alu.*\\.gz$
 
     '''
+
+    FTP_PROTOCOL_FAMILY = ["ftp", "ftps"]
+    HTTP_PROTOCOL_FAMILY = ["http", "https"]
+    SFTP_PROTOCOL_FAMILY = ["sftp"]
+    ALL_PROTOCOLS = FTP_PROTOCOL_FAMILY + HTTP_PROTOCOL_FAMILY + SFTP_PROTOCOL_FAMILY
+
     # Utilities to parse ftp listings: UnixParser is the more common hence we
     # put it first
     ftp_listing_parsers = [
@@ -76,14 +113,55 @@ class FTPDownload(DownloadInterface):
         ftputil.stat.MSParser(),
     ]
 
-    def __init__(self, protocol, host, rootdir):
+    def __init__(self, curl_protocol, host, rootdir, http_parse=None):
+        """
+        Initialize a CurlDownloader.
+
+        :param curl_protocol: (real) protocol to use
+        :type curl_protocol: str (see :py:var:~CurlDownload.ALL_PROTOCOLS)
+
+        :param host: server name
+        :type host: str
+
+        :param rootdir: base directory
+        :type rootdir: str
+
+        :param http_parse: object used to extract file information from HTML pages
+        :type http_parse: py:class:HTTPParse.
+        """
         DownloadInterface.__init__(self)
         self.logger.debug('Download')
-        self.crl = pycurl.Curl()
-        url = protocol + '://' + host
+        # Initialize curl_protocol.
+        # Note that we don't change that field in set_protocol since this
+        # method uses the protocol from the configuration file. It's not clear
+        # what to do in this case.
+        curl_protocol = curl_protocol.lower()
+        if curl_protocol not in self.ALL_PROTOCOLS:
+            raise ValueError("curl_protocol must be one of %s (case insensitive). Got %s." % (self.ALL_PROTOCOLS, curl_protocol))
+        self.curl_protocol = curl_protocol
+        # Initialize protocol specific constants
+        if self.curl_protocol in self.FTP_PROTOCOL_FAMILY:
+            self.protocol_family = "ftp"
+            self._parse_result = self._ftp_parse_result
+            self.ERRCODE_OK = 226
+        elif self.curl_protocol in self.HTTP_PROTOCOL_FAMILY:
+            self.protocol_family = "http"
+            self._parse_result = self._http_parse_result
+            self.ERRCODE_OK = 200
+        elif self.curl_protocol in self.SFTP_PROTOCOL_FAMILY:
+            self.protocol_family = "sftp"
+            self._parse_result = self._ftp_parse_result
+            self.ERRCODE_OK = 0
+        else:  # Should not happen since we check before
+            raise ValueError("Unknown protocol")
         self.rootdir = rootdir
-        self.url = url
+        self.set_server(host)
         self.headers = {}
+        self.http_parse = http_parse
+        # Create the cURL object
+        # This object is shared by all operations to use the cache.
+        # Before using it, call method:`_basic_curl_configuration`.
+        self.crl = pycurl.Curl()
         # Initialize options
         # Should we skip SSL verification (cURL -k/--insecure option)
         self.ssl_verifyhost = True
@@ -93,8 +171,85 @@ class FTPDownload(DownloadInterface):
         # Keep alive
         self.tcp_keepalive = 0
 
+    def _basic_curl_configuration(self):
+        """
+        Perform basic configuration (i.e. that doesn't depend on the
+        operation: _download or list). This method shoulmd be called before any
+        operation.
+        """
+        # Reset cURL options before setting them
+        self.crl.reset()
+
+        if self.proxy is not None:
+            self.crl.setopt(pycurl.PROXY, self.proxy)
+            if self.proxy_auth is not None:
+                self.crl.setopt(pycurl.PROXYUSERPWD, self.proxy_auth)
+
+        if self.credentials is not None:
+            self.crl.setopt(pycurl.USERPWD, self.credentials)
+
+        # Configure TCP keepalive
+        if self.tcp_keepalive:
+            self.crl.setopt(pycurl.TCP_KEEPALIVE, True)
+            self.crl.setopt(pycurl.TCP_KEEPIDLE, self.tcp_keepalive * 2)
+            self.crl.setopt(pycurl.TCP_KEEPINTVL, self.tcp_keepalive)
+
+        # Configure SSL verification (on some platforms, disabling
+        # SSL_VERIFYPEER implies disabling SSL_VERIFYHOST so we set
+        # SSL_VERIFYPEER after)
+        self.crl.setopt(pycurl.SSL_VERIFYHOST, 2 if self.ssl_verifyhost else 0)
+        self.crl.setopt(pycurl.SSL_VERIFYPEER, 1 if self.ssl_verifypeer else 0)
+        if self.ssl_server_cert:
+            # cacert is the name of the option for the curl command. The
+            # corresponding cURL option is CURLOPT_CAINFO.
+            # See https://curl.haxx.se/libcurl/c/CURLOPT_CAINFO.html
+            # This is inspired by that https://curl.haxx.se/docs/sslcerts.html
+            # (section "Certificate Verification", option 2) but the option
+            # CURLOPT_CAPATH is for a directory of certificates.
+            self.crl.setopt(pycurl.CAINFO, self.ssl_server_cert)
+
+        # Configure timeouts
+        self.crl.setopt(pycurl.CONNECTTIMEOUT, 300)
+        self.crl.setopt(pycurl.TIMEOUT, self.timeout)
+        self.crl.setopt(pycurl.NOSIGNAL, 1)
+
+        # Header function
+        self.crl.setopt(pycurl.HEADERFUNCTION, self._header_function)
+
+    def _header_function(self, header_line):
+        # HTTP standard specifies that headers are encoded in iso-8859-1.
+        # On Python 2, decoding step can be skipped.
+        # On Python 3, decoding step is required.
+        header_line = header_line.decode('iso-8859-1')
+
+        # Header lines include the first status line (HTTP/1.x ...).
+        # We are going to ignore all lines that don't have a colon in them.
+        # This will botch headers that are split on multiple lines...
+        if ':' not in header_line:
+            return
+
+        # Break the header line into header name and value.
+        name, value = header_line.split(':', 1)
+
+        # Remove whitespace that may be present.
+        # Header lines include the trailing newline, and there may be whitespace
+        # around the colon.
+        name = name.strip()
+        value = value.strip()
+
+        # Header names are case insensitive.
+        # Lowercase name here.
+        name = name.lower()
+
+        # Now we can actually record the header name and value.
+        self.headers[name] = value
+
+    def set_server(self, server):
+        super(CurlDownload, self).set_server(server)
+        self.url = self.curl_protocol + '://' + self.server
+
     def set_options(self, protocol_options):
-        super(FTPDownload, self).set_options(protocol_options)
+        super(CurlDownload, self).set_options(protocol_options)
         if "ssl_verifyhost" in protocol_options:
             self.ssl_verifyhost = Utils.to_bool(protocol_options["ssl_verifyhost"])
         if "ssl_verifypeer" in protocol_options:
@@ -104,127 +259,65 @@ class FTPDownload(DownloadInterface):
         if "tcp_keepalive" in protocol_options:
             self.tcp_keepalive = Utils.to_int(protocol_options["tcp_keepalive"])
 
-    def match(self, patterns, file_list, dir_list=None, prefix='', submatch=False):
-        '''
-        Find files matching patterns. Sets instance variable files_to_download.
-
-        :param patterns: regexps to match
-        :type patterns: list
-        :param file_list: list of files to match
-        :type file_list: list
-        :param dir_list: sub directories in current dir
-        :type dir_list: list
-        :param prefix: directory prefix
-        :type prefix: str
-        :param submatch: first call to match, or called from match
-        :type submatch: bool
-        '''
-        self.logger.debug('Download:File:RegExp:' + str(patterns))
-        if dir_list is None:
-            dir_list = []
-        if not submatch:
-            self.files_to_download = []
-        for pattern in patterns:
-            subdirs_pattern = pattern.split('/')
-            if len(subdirs_pattern) > 1:
-                # Pattern contains sub directories
-                subdir = subdirs_pattern[0]
-                if subdir == '^':
-                    subdirs_pattern = subdirs_pattern[1:]
-                    subdir = subdirs_pattern[0]
-                # If getting all, get all files
-                if pattern == '**/*':
-                    for rfile in file_list:
-                        rfile['root'] = self.rootdir
-                        if prefix != '':
-                            rfile['name'] = prefix + '/' + rfile['name']
-                        self.files_to_download.append(rfile)
-                        self.logger.debug('Download:File:MatchRegExp:' + rfile['name'])
-                for direlt in dir_list:
-                    subdir = direlt['name']
-                    self.logger.debug('Download:File:Subdir:Check:' + subdir)
-                    if pattern == '**/*':
-                        (subfile_list, subdirs_list) = self.list(prefix + '/' + subdir + '/')
-                        self.match([pattern], subfile_list, subdirs_list, prefix + '/' + subdir, True)
-
-                    else:
-                        if re.match(subdirs_pattern[0], subdir):
-                            self.logger.debug('Download:File:Subdir:Match:' + subdir)
-                            # subdir match the beginning of the pattern
-                            # check match in subdir
-                            (subfile_list, subdirs_list) = self.list(prefix + '/' + subdir + '/')
-                            self.match(['/'.join(subdirs_pattern[1:])], subfile_list, subdirs_list, prefix + '/' + subdir, True)
-
-            else:
-                for rfile in file_list:
-                    if re.match(pattern, rfile['name']):
-                        rfile['root'] = self.rootdir
-                        if prefix != '':
-                            rfile['name'] = prefix + '/' + rfile['name']
-                        self.files_to_download.append(rfile)
-                        self.logger.debug('Download:File:MatchRegExp:' + rfile['name'])
-        if not submatch and len(self.files_to_download) == 0:
-            raise Exception('no file found matching expressions')
-
-    def curl_download(self, file_path, file_to_download):
+    def _append_file_to_download(self, rfile):
+        # Add url and root to the file if needed (for safety)
+        if 'url' not in rfile or not rfile['url']:
+            rfile['url'] = self.url
+        if 'root' not in rfile or not rfile['root']:
+            rfile['root'] = self.rootdir
+        super(CurlDownload, self)._append_file_to_download(rfile)
+
+    def _file_url(self, rfile):
+        # rfile['root'] is set to self.rootdir if needed but may be different.
+        # We don't use os.path.join because rfile['name'] may starts with /
+        return self.url + '/' + rfile['root'] + rfile['name']
+
+    def _download(self, file_path, rfile):
+        """
+        This method is designed to work for FTP(S), HTTP(S) and SFTP.
+        """
         error = True
         nbtry = 1
+        # Forge URL of remote file
+        file_url = self._file_url(rfile)
         while(error is True and nbtry < 3):
-            fp = open(file_path, "wb")
-            curl = pycurl.Curl()
-
-            # Configure TCP keepalive
-            if self.tcp_keepalive:
-                curl.setopt(pycurl.TCP_KEEPALIVE, True)
-                curl.setopt(pycurl.TCP_KEEPIDLE, self.tcp_keepalive * 2)
-                curl.setopt(pycurl.TCP_KEEPINTVL, self.tcp_keepalive)
-
-            # Configure SSL verification (on some platforms, disabling
-            # SSL_VERIFYPEER implies disabling SSL_VERIFYHOST so we set
-            # SSL_VERIFYPEER after)
-            curl.setopt(pycurl.SSL_VERIFYHOST, 2 if self.ssl_verifyhost else 0)
-            curl.setopt(pycurl.SSL_VERIFYPEER, 1 if self.ssl_verifypeer else 0)
-            if self.ssl_server_cert:
-                # cacert is the name of the option for the curl command. The
-                # corresponding cURL option is CURLOPT_CAINFO.
-                # See https://curl.haxx.se/libcurl/c/CURLOPT_CAINFO.html
-                # This is inspired by that https://curl.haxx.se/docs/sslcerts.html
-                # (section "Certificate Verification", option 2) but the option
-                # CURLOPT_CAPATH is for a directory of certificates.
-                curl.setopt(pycurl.CAINFO, self.ssl_server_cert)
+
+            self._basic_curl_configuration()
 
             try:
-                curl.setopt(pycurl.URL, file_to_download)
+                self.crl.setopt(pycurl.URL, file_url)
             except Exception:
-                curl.setopt(pycurl.URL, file_to_download.encode('ascii', 'ignore'))
-            if self.proxy is not None:
-                curl.setopt(pycurl.PROXY, self.proxy)
-                if self.proxy_auth is not None:
-                    curl.setopt(pycurl.PROXYUSERPWD, self.proxy_auth)
-
-            if self.credentials is not None:
-                curl.setopt(pycurl.USERPWD, self.credentials)
-
-            curl.setopt(pycurl.CONNECTTIMEOUT, 300)
-            # Download should not take more than 5minutes
-            curl.setopt(pycurl.TIMEOUT, self.timeout)
-            curl.setopt(pycurl.NOSIGNAL, 1)
-            curl.setopt(pycurl.WRITEDATA, fp)
+                self.crl.setopt(pycurl.URL, file_url.encode('ascii', 'ignore'))
 
+            # Create file and assign it to the pycurl object
+            fp = open(file_path, "wb")
+            self.crl.setopt(pycurl.WRITEFUNCTION, fp.write)
+
+            # This is specific to HTTP
+            if self.method == 'POST':
+                # Form data must be provided already urlencoded.
+                postfields = urlencode(self.param)
+                # Sets request method to POST,
+                # Content-Type header to application/x-www-form-urlencoded
+                # and data to send in request body.
+                self.crl.setopt(pycurl.POSTFIELDS, postfields)
+
+            # Try download
             try:
-                curl.perform()
-                errcode = curl.getinfo(pycurl.HTTP_CODE)
-                if int(errcode) != 226 and int(errcode) != 200:
+                self.crl.perform()
+                errcode = self.crl.getinfo(pycurl.RESPONSE_CODE)
+                if int(errcode) != self.ERRCODE_OK:
                     error = True
-                    self.logger.error('Error while downloading ' + file_to_download + ' - ' + str(errcode))
+                    self.logger.error('Error while downloading ' + file_url + ' - ' + str(errcode))
                 else:
                     error = False
             except Exception as e:
                 self.logger.error('Could not get errcode:' + str(e))
 
-            nbtry += 1
-            curl.close()
+            # Close file
             fp.close()
+
+            # Check that the archive is correct
             if not error and not self.skip_check_uncompress:
                 archive_status = Utils.archive_check(file_path)
                 if not archive_status:
@@ -232,130 +325,36 @@ class FTPDownload(DownloadInterface):
                     error = True
                     if os.path.exists(file_path):
                         os.remove(file_path)
-        return error
-
-    def download(self, local_dir, keep_dirs=True):
-        '''
-        Download remote files to local_dir
-
-        :param local_dir: Directory where files should be downloaded
-        :type local_dir: str
-        :param keep_dirs: keep file name directory structure or copy file in local_dir directly
-        :param keep_dirs: bool
-        :return: list of downloaded files
-        '''
-        self.logger.debug('FTP:Download')
-
-        nb_files = len(self.files_to_download)
-        cur_files = 1
-
-        for rfile in self.files_to_download:
-            if self.kill_received:
-                raise Exception('Kill request received, exiting')
-            file_dir = local_dir
-            if 'save_as' not in rfile or not rfile['save_as']:
-                rfile['save_as'] = rfile['name']
-            if keep_dirs:
-                file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
-            file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
-
-            # For unit tests only, workflow will take in charge directory creation before to avoid thread multi access
-            if not os.path.exists(file_dir):
-                os.makedirs(file_dir)
-
-            self.logger.debug('FTP:Download:Progress:' + str(cur_files) + '/' + str(nb_files) + ' downloading file ' + rfile['name'])
-            self.logger.debug('FTP:Download:Progress:' + str(cur_files) + '/' + str(nb_files) + ' save as ' + rfile['save_as'])
-            cur_files += 1
-            if 'url' not in rfile or not rfile['url']:
-                rfile['url'] = self.url
-            if 'root' not in rfile or not rfile['root']:
-                rfile['root'] = self.rootdir
-            start_time = datetime.now()
-            start_time = time.mktime(start_time.timetuple())
-            error = self.curl_download(file_path, rfile['url'] + rfile['root'] + '/' + rfile['name'])
-            if error:
-                rfile['download_time'] = 0
-                rfile['error'] = True
-                raise Exception("FTP:Download:Error:" + rfile['url'] + rfile['root'] + '/' + rfile['name'])
-            else:
-                end_time = datetime.now()
-                end_time = time.mktime(end_time.timetuple())
-                rfile['download_time'] = end_time - start_time
-
-            self.set_permissions(file_path, rfile)
-
-        return self.files_to_download
-
-    def header_function(self, header_line):
-        # HTTP standard specifies that headers are encoded in iso-8859-1.
-        # On Python 2, decoding step can be skipped.
-        # On Python 3, decoding step is required.
-        header_line = header_line.decode('iso-8859-1')
-
-        # Header lines include the first status line (HTTP/1.x ...).
-        # We are going to ignore all lines that don't have a colon in them.
-        # This will botch headers that are split on multiple lines...
-        if ':' not in header_line:
-            return
-
-        # Break the header line into header name and value.
-        name, value = header_line.split(':', 1)
-
-        # Remove whitespace that may be present.
-        # Header lines include the trailing newline, and there may be whitespace
-        # around the colon.
-        name = name.strip()
-        value = value.strip()
 
-        # Header names are case insensitive.
-        # Lowercase name here.
-        name = name.lower()
+            # Increment retry counter
+            nbtry += 1
 
-        # Now we can actually record the header name and value.
-        self.headers[name] = value
+        return error
 
     def list(self, directory=''):
         '''
-        List FTP directory
+        List remote directory
 
         :return: tuple of file and dirs in current directory with details
-        '''
-        self.logger.debug('Download:List:' + self.url + self.rootdir + directory)
 
-        # Configure TCP keepalive
-        if self.tcp_keepalive:
-            self.crl.setopt(pycurl.TCP_KEEPALIVE, True)
-            self.crl.setopt(pycurl.TCP_KEEPIDLE, self.tcp_keepalive * 2)
-            self.crl.setopt(pycurl.TCP_KEEPINTVL, self.tcp_keepalive)
+        This is a generic method for HTTP and FTP. The protocol-specific parts
+        are done in _<protocol>_parse_result.
+        '''
+        dir_url = self.url + self.rootdir + directory
+        self.logger.debug('Download:List:' + dir_url)
 
-        # See the corresponding lines in method:`curl_download`
-        self.crl.setopt(pycurl.SSL_VERIFYHOST, 2 if self.ssl_verifyhost else 0)
-        self.crl.setopt(pycurl.SSL_VERIFYPEER, 1 if self.ssl_verifypeer else 0)
-        if self.ssl_server_cert:
-            self.crl.setopt(pycurl.CAINFO, self.ssl_server_cert)
+        self._basic_curl_configuration()
 
         try:
-            self.crl.setopt(pycurl.URL, self.url + self.rootdir + directory)
+            self.crl.setopt(pycurl.URL, dir_url)
         except Exception:
-            self.crl.setopt(pycurl.URL, (self.url + self.rootdir + directory).encode('ascii', 'ignore'))
+            self.crl.setopt(pycurl.URL, dir_url.encode('ascii', 'ignore'))
 
-        if self.proxy is not None:
-            self.crl.setopt(pycurl.PROXY, self.proxy)
-            if self.proxy_auth is not None:
-                self.crl.setopt(pycurl.PROXYUSERPWD, self.proxy_auth)
-
-        if self.credentials is not None:
-            self.crl.setopt(pycurl.USERPWD, self.credentials)
+        # Create buffer and assign it to the pycurl object
         output = BytesIO()
-        # lets assign this buffer to pycurl object
         self.crl.setopt(pycurl.WRITEFUNCTION, output.write)
-        self.crl.setopt(pycurl.HEADERFUNCTION, self.header_function)
-
-        self.crl.setopt(pycurl.CONNECTTIMEOUT, 300)
-        # Download should not take more than 5minutes
-        self.crl.setopt(pycurl.TIMEOUT, self.timeout)
-        self.crl.setopt(pycurl.NOSIGNAL, 1)
 
+        # Try to list
         try:
             self.crl.perform()
         except Exception as e:
@@ -378,6 +377,9 @@ class FTPDownload(DownloadInterface):
         # lets get the output in a string
         result = output.getvalue().decode(encoding)
 
+        return self._parse_result(result)
+
+    def _ftp_parse_result(self, result):
         # FTP LIST output is separated by \r\n
         # lets split the output in lines
         lines = re.split(r'[\n\r]+', result)
@@ -428,8 +430,65 @@ class FTPDownload(DownloadInterface):
                     rdirs.append(rfile)
         return (rfiles, rdirs)
 
-    def chroot(self, cwd):
-        self.logger.debug('Download: change dir ' + cwd)
+    def _http_parse_result(self, result):
+        rfiles = []
+        rdirs = []
+
+        dirs = re.findall(self.http_parse.dir_line, result)
+        if dirs is not None and len(dirs) > 0:
+            for founddir in dirs:
+                rfile = {}
+                rfile['permissions'] = ''
+                rfile['group'] = ''
+                rfile['user'] = ''
+                rfile['size'] = 0
+                date = founddir[self.http_parse.dir_date - 1]
+                dirdate = date.split()
+                parts = dirdate[0].split('-')
+                # 19-Jul-2014 13:02
+                rfile['month'] = Utils.month_to_num(parts[1])
+                rfile['day'] = int(parts[0])
+                rfile['year'] = int(parts[2])
+                rfile['name'] = founddir[self.http_parse.dir_name - 1]
+                rdirs.append(rfile)
+
+        files = re.findall(self.http_parse.file_line, result)
+
+        if files is not None and len(files) > 0:
+            for foundfile in files:
+                rfile = {}
+                rfile['permissions'] = ''
+                rfile['group'] = ''
+                rfile['user'] = ''
+                if self.http_parse.file_size != -1:
+                    rfile['size'] = humanfriendly.parse_size(foundfile[self.http_parse.file_size - 1])
+                else:
+                    rfile['size'] = 0
+                if self.http_parse.file_date != -1:
+                    date = foundfile[self.http_parse.file_date - 1]
+                    if self.http_parse.file_date_format:
+                        date_object = datetime.strptime(date, self.http_parse.file_date_format.replace('%%', '%'))
+                        rfile['month'] = date_object.month
+                        rfile['day'] = date_object.day
+                        rfile['year'] = date_object.year
+                    else:
+                        dirdate = date.split()
+                        parts = dirdate[0].split('-')
+                        # 19-Jul-2014 13:02
+                        rfile['month'] = Utils.month_to_num(parts[1])
+                        rfile['day'] = int(parts[0])
+                        rfile['year'] = int(parts[2])
+                else:
+                    today = datetime.now()
+                    date = '%s-%s-%s' % (today.year, today.month, today.day)
+                    rfile['month'] = today.month
+                    rfile['day'] = today.day
+                    rfile['year'] = today.year
+                rfile['name'] = foundfile[self.http_parse.file_name - 1]
+                filehash = (rfile['name'] + str(date) + str(rfile['size'])).encode('utf-8')
+                rfile['hash'] = hashlib.md5(filehash).hexdigest()
+                rfiles.append(rfile)
+        return (rfiles, rdirs)
 
     def close(self):
         if self.crl is not None:


=====================================
biomaj_download/download/direct.py
=====================================
@@ -1,12 +1,29 @@
+"""
+Subclasses for direct download (i.e. downloading without regexp). The usage is
+a bit different: instead of calling method:`list` and method:`match`, client
+code explicitely calls method:`set_files_to_download` (passing a list
+containing only the file name). method:`list` is used to get more information
+about the file (if possile). method:`match` matches everything.
+Also client code can use method:`set_save_as` to indicate the name of the file
+to save.
+
+The trick for the implementation is to override
+method:`_append_file_to_download` to initialize the rfile with the file name
+and dummy values. Note that we use a list of rfile even if it contains only one
+file.
+method:`list` will modify directly the files_to_download.
+method:``match` don't call method:`_append_file_to_download` (since the list of
+files to download is already set up).
+We also override method:`set_files_to_download` to check that we pass only one
+file.
+"""
 import datetime
-import time
 import pycurl
-import os
 import re
 import hashlib
 import sys
 
-from biomaj_download.download.ftp import FTPDownload
+from biomaj_download.download.curl import CurlDownload
 from biomaj_core.utils import Utils
 
 if sys.version_info[0] < 3:
@@ -20,217 +37,98 @@ except ImportError:
     from StringIO import StringIO as BytesIO
 
 
-class DirectFTPDownload(FTPDownload):
+class DirectFTPDownload(CurlDownload):
     '''
     download a list of files from FTP, no regexp
     '''
 
-    def __init__(self, protocol, host, rootdir=''):
-        '''
+    ALL_PROTOCOLS = ["ftp", "ftps"]
 
+    def _append_file_to_download(self, filename):
+        '''
         Initialize the files in list with today as last-modification date.
-        Size is also preset to zero, size will be set after download
-
+        Size is also preset to zero.
         '''
-        FTPDownload.__init__(self, protocol, host, rootdir)
-        self.save_as = None
-        self.headers = {}
-
-    def set_files_to_download(self, files):
         today = datetime.date.today()
-        self.files_to_download = []
-        for file_to_download in files:
-            rfile = {}
-            rfile['root'] = ''
-            rfile['permissions'] = ''
-            rfile['group'] = ''
-            rfile['user'] = ''
-            rfile['size'] = 0
-            rfile['month'] = today.month
-            rfile['day'] = today.day
-            rfile['year'] = today.year
-            if file_to_download.endswith('/'):
-                rfile['name'] = file_to_download[:-1]
-            else:
-                rfile['name'] = file_to_download
-            rfile['hash'] = None
-            if self.param:
-                if 'param' not in file_to_download or not file_to_download['param']:
-                    rfile['param'] = self.param
-            self.files_to_download.append(rfile)
+        rfile = {}
+        rfile['root'] = self.rootdir
+        rfile['permissions'] = ''
+        rfile['group'] = ''
+        rfile['user'] = ''
+        rfile['size'] = 0
+        rfile['month'] = today.month
+        rfile['day'] = today.day
+        rfile['year'] = today.year
+        if filename.endswith('/'):
+            rfile['name'] = filename[:-1]
+        else:
+            rfile['name'] = filename
+        rfile['hash'] = None
+        # Use self.save_as even if we use it in list(). This is important.
+        rfile['save_as'] = self.save_as
+        super(DirectFTPDownload, self)._append_file_to_download(rfile)
+
+    def set_files_to_download(self, files_to_download):
+        if len(files_to_download) > 1:
+            self.files_to_download = []
+            msg = self.__class__.__name__ + ' accepts only 1 file'
+            self.logger.error(msg)
+            raise ValueError(msg)
+        return super(DirectFTPDownload, self).set_files_to_download(files_to_download)
 
     def list(self, directory=''):
         '''
         FTP protocol does not give us the possibility to get file date from remote url
         '''
-        for rfile in self.files_to_download:
-            if self.save_as is None:
-                self.save_as = rfile['name']
-            rfile['save_as'] = self.save_as
+        # TODO: are we sure about this implementation ?
         return (self.files_to_download, [])
 
     def match(self, patterns, file_list, dir_list=None, prefix='', submatch=False):
         '''
         All files to download match, no pattern
         '''
-        if dir_list is None:
-            dir_list = []
-        self.files_to_download = file_list
+        pass
 
 
-class DirectHttpDownload(DirectFTPDownload):
+class DirectHTTPDownload(DirectFTPDownload):
 
-    def __init__(self, protocol, host, rootdir=''):
-        '''
-        :param file_list: list of files to download on server
-        :type file_list: list
-        '''
-        DirectFTPDownload.__init__(self, protocol, host, rootdir)
-        self.save_as = None
+    ALL_PROTOCOLS = ["http", "https"]
+
+    def __init__(self, curl_protocol, host, rootdir=''):
+        DirectFTPDownload.__init__(self, curl_protocol, host, rootdir)
         self.method = 'GET'
         self.param = {}
 
-    def download(self, local_dir, keep_dirs=True):
-        '''
-        Download remote files to local_dir
-
-        :param local_dir: Directory where files should be downloaded
-        :type local_dir: str
-        :param keep_dirs: keep file name directory structure or copy file in local_dir directly
-        :param keep_dirs: bool
-        :return: list of downloaded files
-        '''
-        self.logger.debug('DirectHTTP:Download')
-        nb_files = len(self.files_to_download)
-
-        if nb_files > 1:
-            self.files_to_download = []
-            self.logger.error('DirectHTTP accepts only 1 file')
-
-        cur_files = 1
-
-        for rfile in self.files_to_download:
-            if self.kill_received:
-                raise Exception('Kill request received, exiting')
-
-            if not self.save_as:
-                self.save_as = rfile['name']
-            else:
-                rfile['save_as'] = self.save_as
-            file_dir = local_dir
-            if keep_dirs:
-                file_dir = local_dir + os.path.dirname(self.save_as)
-            file_path = file_dir + '/' + os.path.basename(self.save_as)
-
-            # For unit tests only, workflow will take in charge directory creation before to avoid thread multi access
-            if not os.path.exists(file_dir):
-                os.makedirs(file_dir)
-            self.logger.debug('DirectHTTP:Download:Progress' + str(cur_files) + '/' + str(nb_files) + ' downloading file ' + rfile['name'] + ', save as ' + self.save_as)
-            cur_files += 1
-            if 'url' not in rfile:
-                rfile['url'] = self.url
-            fp = open(file_path, "wb")
-            curl = pycurl.Curl()
-
-            if self.proxy is not None:
-                curl.setopt(pycurl.PROXY, self.proxy)
-                if self.proxy_auth is not None:
-                    curl.setopt(pycurl.PROXYUSERPWD, self.proxy_auth)
-
-            if self.method == 'POST':
-                # Form data must be provided already urlencoded.
-                postfields = urlencode(self.param)
-                # Sets request method to POST,
-                # Content-Type header to application/x-www-form-urlencoded
-                # and data to send in request body.
-                if self.credentials is not None:
-                    curl.setopt(pycurl.USERPWD, self.credentials)
-
-                curl.setopt(pycurl.POSTFIELDS, postfields)
-                try:
-                    curl.setopt(pycurl.URL, rfile['url'] + rfile['root'] + '/' + rfile['name'])
-                except Exception:
-                    curl.setopt(pycurl.URL, (rfile['url'] + rfile['root'] + '/' + rfile['name']).encode('ascii', 'ignore'))
-
-            else:
-                url = rfile['url'] + rfile['root'] + '/' + rfile['name'] + '?' + urlencode(self.param)
-                try:
-                    curl.setopt(pycurl.URL, url)
-                except Exception:
-                    curl.setopt(pycurl.URL, url.encode('ascii', 'ignore'))
-
-            curl.setopt(pycurl.WRITEDATA, fp)
-            start_time = datetime.datetime.now()
-            start_time = time.mktime(start_time.timetuple())
-            curl.perform()
-            end_time = datetime.datetime.now()
-            end_time = time.mktime(end_time.timetuple())
-            rfile['download_time'] = end_time - start_time
-
-            curl.close()
-            fp.close()
-            self.logger.debug('downloaded!')
-            rfile['name'] = self.save_as
-            self.set_permissions(file_path, rfile)
-        return self.files_to_download
-
-    def header_function(self, header_line):
-        # HTTP standard specifies that headers are encoded in iso-8859-1.
-        # On Python 2, decoding step can be skipped.
-        # On Python 3, decoding step is required.
-        header_line = header_line.decode('iso-8859-1')
-
-        # Header lines include the first status line (HTTP/1.x ...).
-        # We are going to ignore all lines that don't have a colon in them.
-        # This will botch headers that are split on multiple lines...
-        if ':' not in header_line:
-            return
-
-        # Break the header line into header name and value.
-        name, value = header_line.split(':', 1)
-
-        # Remove whitespace that may be present.
-        # Header lines include the trailing newline, and there may be whitespace
-        # around the colon.
-        name = name.strip()
-        value = value.strip()
-
-        # Header names are case insensitive.
-        # Lowercase name here.
-        name = name.lower()
-
-        # Now we can actually record the header name and value.
-        self.headers[name] = value
+    def _file_url(self, file_to_download):
+        url = super(DirectHTTPDownload, self)._file_url(file_to_download)
+        if self.method == "GET":
+            url += '?' + urlencode(self.param)
+        return url
 
     def list(self, directory=''):
         '''
         Try to get file headers to get last_modification and size
         '''
+        self._basic_curl_configuration()
+        # Specific configuration
+        self.crl.setopt(pycurl.HEADER, True)
+        self.crl.setopt(pycurl.NOBODY, True)
         for rfile in self.files_to_download:
             if self.save_as is None:
                 self.save_as = rfile['name']
 
             rfile['save_as'] = self.save_as
 
-            self.crl.setopt(pycurl.HEADER, True)
-            if self.credentials is not None:
-                self.crl.setopt(pycurl.USERPWD, self.credentials)
-
-            if self.proxy is not None:
-                self.crl.setopt(pycurl.PROXY, self.proxy)
-                if self.proxy_auth is not None:
-                    self.crl.setopt(pycurl.PROXYUSERPWD, self.proxy_auth)
-
-            self.crl.setopt(pycurl.NOBODY, True)
+            file_url = self._file_url(rfile)
             try:
-                self.crl.setopt(pycurl.URL, self.url + self.rootdir + rfile['name'])
+                self.crl.setopt(pycurl.URL, file_url)
             except Exception:
-                self.crl.setopt(pycurl.URL, (self.url + self.rootdir + rfile['name']).encode('ascii', 'ignore'))
+                self.crl.setopt(pycurl.URL, file_url.encode('ascii', 'ignore'))
 
+            # Create a buffer and assign it to the pycurl object
             output = BytesIO()
-            # lets assign this buffer to pycurl object
             self.crl.setopt(pycurl.WRITEFUNCTION, output.write)
-            self.crl.setopt(pycurl.HEADERFUNCTION, self.header_function)
+
             self.crl.perform()
 
             # Figure out what encoding was sent with the response, if any.


=====================================
biomaj_download/download/http.py deleted
=====================================
@@ -1,166 +0,0 @@
-import pycurl
-import re
-import hashlib
-import datetime
-
-import humanfriendly
-
-from biomaj_core.utils import Utils
-from biomaj_download.download.ftp import FTPDownload
-
-try:
-    from io import BytesIO
-except ImportError:
-    from StringIO import StringIO as BytesIO
-
-
-class HTTPParse(object):
-
-    def __init__(self, dir_line, file_line, dir_name=1, dir_date=2, file_name=1, file_date=2, file_date_format=None, file_size=3):
-        r'''
-        http.parse.dir.line: <img[\s]+src="[\S]+"[\s]+alt="\[DIR\]"[\s]*/?>[\s]*<a[\s]+href="([\S]+)/"[\s]*>.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})
-        http.parse.file.line: <img[\s]+src="[\S]+"[\s]+alt="\[[\s]+\]"[\s]*/?>[\s]<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})
-        http.group.dir.name: 1
-        http.group.dir.date: 2
-        http.group.file.name: 1
-        http.group.file.date: 2
-        http.group.file.size: 3
-        '''
-        self.dir_line = dir_line
-        self.file_line = file_line
-        self.dir_name = dir_name
-        self.dir_date = dir_date
-        self.file_name = file_name
-        self.file_date = file_date
-        self.file_size = file_size
-        self.file_date_format = file_date_format
-
-
-class HTTPDownload(FTPDownload):
-    '''
-    Base class to download files from HTTP
-
-    Makes use of http.parse.dir.line etc.. regexps to extract page information
-
-    protocol=http
-    server=ftp.ncbi.nih.gov
-    remote.dir=/blast/db/FASTA/
-
-    remote.files=^alu.*\\.gz$
-
-    '''
-
-    def __init__(self, protocol, host, rootdir, http_parse=None):
-        FTPDownload.__init__(self, protocol, host, rootdir)
-        self.http_parse = http_parse
-
-    def list(self, directory=''):
-        '''
-        List FTP directory
-
-        :return: tuple of file and dirs in current directory with details
-        '''
-        self.logger.debug('Download:List:' + self.url + self.rootdir + directory)
-
-        try:
-            self.crl.setopt(pycurl.URL, self.url + self.rootdir + directory)
-        except Exception:
-            self.crl.setopt(pycurl.URL, (self.url + self.rootdir + directory).encode('ascii', 'ignore'))
-
-        if self.proxy is not None:
-            self.crl.setopt(pycurl.PROXY, self.proxy)
-            if self.proxy_auth is not None:
-                self.crl.setopt(pycurl.PROXYUSERPWD, self.proxy_auth)
-
-        if self.credentials is not None:
-            self.crl.setopt(pycurl.USERPWD, self.credentials)
-
-        output = BytesIO()
-        # lets assign this buffer to pycurl object
-        self.crl.setopt(pycurl.WRITEFUNCTION, output.write)
-        self.crl.setopt(pycurl.HEADERFUNCTION, self.header_function)
-        self.crl.perform()
-        # Figure out what encoding was sent with the response, if any.
-        # Check against lowercased header name.
-        encoding = None
-        if 'content-type' in self.headers:
-            content_type = self.headers['content-type'].lower()
-            match = re.search(r'charset=(\S+)', content_type)
-            if match:
-                encoding = match.group(1)
-        if encoding is None:
-            # Default encoding for HTML is iso-8859-1.
-            # Other content types may have different default encoding,
-            # or in case of binary data, may have no encoding at all.
-            encoding = 'iso-8859-1'
-
-        # lets get the output in a string
-        result = output.getvalue().decode(encoding)
-        r'''
-        http.parse.dir.line': r'<a[\s]+href="([\S]+)/".*alt="\[DIR\]">.*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})',
-        http.parse.file.line': r'<a[\s]+href="([\S]+)".*([\d]{2}-[\w\d]{2,5}-[\d]{4}\s[\d]{2}:[\d]{2})[\s]+([\d\.]+[MKG]{0,1})',
-        http.group.dir.name': 1,
-        http.group.dir.date': 2,
-        http.group.file.name': 1,
-        http.group.file.date': 2,
-        http.group.file.size': 3,
-        '''
-
-        rfiles = []
-        rdirs = []
-
-        dirs = re.findall(self.http_parse.dir_line, result)
-        if dirs is not None and len(dirs) > 0:
-            for founddir in dirs:
-                rfile = {}
-                rfile['permissions'] = ''
-                rfile['group'] = ''
-                rfile['user'] = ''
-                rfile['size'] = 0
-                date = founddir[self.http_parse.dir_date - 1]
-                dirdate = date.split()
-                parts = dirdate[0].split('-')
-                # 19-Jul-2014 13:02
-                rfile['month'] = Utils.month_to_num(parts[1])
-                rfile['day'] = int(parts[0])
-                rfile['year'] = int(parts[2])
-                rfile['name'] = founddir[self.http_parse.dir_name - 1]
-                rdirs.append(rfile)
-
-        files = re.findall(self.http_parse.file_line, result)
-        if files is not None and len(files) > 0:
-            for foundfile in files:
-                rfile = {}
-                rfile['permissions'] = ''
-                rfile['group'] = ''
-                rfile['user'] = ''
-                if self.http_parse.file_size != -1:
-                    rfile['size'] = humanfriendly.parse_size(foundfile[self.http_parse.file_size - 1])
-                else:
-                    rfile['size'] = 0
-                if self.http_parse.file_date != -1:
-                    date = foundfile[self.http_parse.file_date - 1]
-                    if self.http_parse.file_date_format:
-                        date_object = datetime.datetime.strptime(date, self.http_parse.file_date_format.replace('%%', '%'))
-                        rfile['month'] = date_object.month
-                        rfile['day'] = date_object.day
-                        rfile['year'] = date_object.year
-                    else:
-                        dirdate = date.split()
-                        parts = dirdate[0].split('-')
-                        # 19-Jul-2014 13:02
-                        rfile['month'] = Utils.month_to_num(parts[1])
-                        rfile['day'] = int(parts[0])
-                        rfile['year'] = int(parts[2])
-                else:
-                    today = datetime.datetime.now()
-                    date = '%s-%s-%s' % (today.year, today.month, today.day)
-                    rfile['month'] = today.month
-                    rfile['day'] = today.day
-                    rfile['year'] = today.year
-                rfile['name'] = foundfile[self.http_parse.file_name - 1]
-                filehash = (rfile['name'] + str(date) + str(rfile['size'])).encode('utf-8')
-                rfile['hash'] = hashlib.md5(filehash).hexdigest()
-                rfiles.append(rfile)
-
-        return (rfiles, rdirs)


=====================================
biomaj_download/download/interface.py
=====================================
@@ -24,12 +24,30 @@ class _FakeLock(object):
 
 class DownloadInterface(object):
     '''
-    Main interface that all downloaders must extend
+    Main interface that all downloaders must extend.
+
+    The methods are divided into 2 broad categories:
+      - setters which act on properties of the downloader; those methods are
+        important in microservice mode
+      - file operations which are used to list and match remote files, download
+        them, etc.
+
+    Usually, it is enough to overload list, _append_file_to_download and
+    _download.
+
+    TODO:
+      - the purpose of some setters (set_server, set_protocol) is not clear
+        since a subclass cannot always change those parameters arbitrarily
+      - chroot is not used in BioMaJ
     '''
 
     files_num_threads = 4
 
     def __init__(self):
+        # This variable defines the protocol as passed by the config file (i.e.
+        # this is directftp for DirectFTPDownload). It is used by the workflow
+        # to send the download message so it must be set.
+        self.protocol = None
         self.config = None
         self.files_to_download = []
         self.files_to_copy = []
@@ -47,13 +65,16 @@ class DownloadInterface(object):
         self.logger = logging.getLogger('biomaj')
         self.param = None
         self.method = None
-        self.protocol = None
         self.server = None
         self.offline_dir = None
         # Options
         self.protocol_options = {}
         self.skip_check_uncompress = False
 
+    #
+    # Setters for downloader
+    #
+
     def set_offline_dir(self, offline_dir):
         self.offline_dir = offline_dir
 
@@ -61,15 +82,13 @@ class DownloadInterface(object):
         self.server = server
 
     def set_protocol(self, protocol):
+        """
+        Method used by DownloadService to set the protocol. This value is
+        passed from the config file so is not always a real protocol (for
+        instance it can be "directhttp" for a direct downloader).
+        """
         self.protocol = protocol
 
-    def set_files_to_download(self, files):
-        self.files_to_download = files
-        for file_to_download in self.files_to_download:
-            if self.param:
-                if 'param' not in file_to_download or not file_to_download['param']:
-                    file_to_download['param'] = self.param
-
     def set_param(self, param):
         self.param = param
 
@@ -100,6 +119,54 @@ class DownloadInterface(object):
     def set_method(self, method):
         self.method = method
 
+    def set_credentials(self, userpwd):
+        '''
+        Set credentials in format user:pwd
+
+        :param userpwd: credentials
+        :type userpwd: str
+        '''
+        self.credentials = userpwd
+
+    def set_options(self, protocol_options):
+        """
+        Set protocol specific options.
+
+        Subclasses that override this method must call the
+        parent implementation.
+        """
+        self.protocol_options = protocol_options
+        if "skip_check_uncompress" in protocol_options:
+            self.skip_check_uncompress = Utils.to_bool(protocol_options["skip_check_uncompress"])
+
+    #
+    # File operations (match, list, download) and associated hook methods
+    #
+
+    def _append_file_to_download(self, rfile):
+        """
+        Add a file to the download list and check its properties (this method
+        is called in `match` and `set_files_to_download`).
+
+        Downloaders can override this to add some properties to the file (for
+        instance, most of them will add "root").
+        """
+        # Add properties to the file if needed (for safety)
+        if 'save_as' not in rfile or rfile['save_as'] is None:
+            rfile['save_as'] = rfile['name']
+        if self.param:
+            if 'param' not in rfile or not rfile['param']:
+                rfile['param'] = self.param
+        self.files_to_download.append(rfile)
+
+    def set_files_to_download(self, files):
+        """
+        Convenience method to set the list of files to download.
+        """
+        self.files_to_download = []
+        for file_to_download in files:
+            self._append_file_to_download(file_to_download)
+
     def match(self, patterns, file_list, dir_list=None, prefix='', submatch=False):
         '''
         Find files matching patterns. Sets instance variable files_to_download.
@@ -130,13 +197,12 @@ class DownloadInterface(object):
                 if subdir == '^':
                     subdirs_pattern = subdirs_pattern[1:]
                     subdir = subdirs_pattern[0]
-                if not dir_list and pattern == '**/*':
-                    # Take all and no more dirs, take all files
+                # If getting all, get all files
+                if pattern == '**/*':
                     for rfile in file_list:
-                        rfile['root'] = self.rootdir
                         if prefix != '':
                             rfile['name'] = prefix + '/' + rfile['name']
-                        self.files_to_download.append(rfile)
+                        self._append_file_to_download(rfile)
                         self.logger.debug('Download:File:MatchRegExp:' + rfile['name'])
                     return
                 for direlt in dir_list:
@@ -147,10 +213,9 @@ class DownloadInterface(object):
                         self.match([pattern], subfile_list, subdirs_list, prefix + '/' + subdir, True)
                         for rfile in file_list:
                             if pattern == '**/*' or re.match(pattern, rfile['name']):
-                                rfile['root'] = self.rootdir
                                 if prefix != '':
                                     rfile['name'] = prefix + '/' + rfile['name']
-                                self.files_to_download.append(rfile)
+                                self._append_file_to_download(rfile)
                                 self.logger.debug('Download:File:MatchRegExp:' + rfile['name'])
                     else:
                         if re.match(subdirs_pattern[0], subdir):
@@ -163,10 +228,9 @@ class DownloadInterface(object):
             else:
                 for rfile in file_list:
                     if re.match(pattern, rfile['name']):
-                        rfile['root'] = self.rootdir
                         if prefix != '':
                             rfile['name'] = prefix + '/' + rfile['name']
-                        self.files_to_download.append(rfile)
+                        self._append_file_to_download(rfile)
                         self.logger.debug('Download:File:MatchRegExp:' + rfile['name'])
         if not submatch and len(self.files_to_download) == 0:
             raise Exception('no file found matching expressions')
@@ -226,7 +290,6 @@ class DownloadInterface(object):
                         self.files_to_copy.append(dfile)
                     else:
                         new_files_to_download.append(dfile)
-
         else:
             # Copy everything
             for dfile in self.files_to_download:
@@ -236,17 +299,66 @@ class DownloadInterface(object):
                 else:
                     new_files_to_download.append(dfile)
 
-        self.files_to_download = new_files_to_download
+        self.set_files_to_download(new_files_to_download)
 
-    def download(self, local_dir):
+    def _download(self, file_path, rfile):
+        '''
+        Download one file and return False in case of success and True
+        otherwise. This must be implemented in subclasses.
+        '''
+        raise NotImplementedError()
+
+    def download(self, local_dir, keep_dirs=True):
         '''
         Download remote files to local_dir
 
         :param local_dir: Directory where files should be downloaded
         :type local_dir: str
+        :param keep_dirs: keep file name directory structure or copy file in local_dir directly
+        :param keep_dirs: bool
         :return: list of downloaded files
         '''
-        pass
+        self.logger.debug(self.__class__.__name__ + ':Download')
+        nb_files = len(self.files_to_download)
+        cur_files = 1
+        self.offline_dir = local_dir
+        for rfile in self.files_to_download:
+            if self.kill_received:
+                raise Exception('Kill request received, exiting')
+            # Determine where to store file (directory and name)
+            file_dir = local_dir
+            if keep_dirs:
+                file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
+            if file_dir[-1] == "/":
+                file_path = file_dir + os.path.basename(rfile['save_as'])
+            else:
+                file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
+
+            # For unit tests only, workflow will take in charge directory
+            # creation before to avoid thread multi access
+            if not os.path.exists(file_dir):
+                os.makedirs(file_dir)
+
+            msg = self.__class__.__name__ + ':Download:Progress:'
+            msg += str(cur_files) + '/' + str(nb_files)
+            msg += ' downloading file ' + rfile['name'] + ' save as ' + rfile['save_as']
+            self.logger.debug(msg)
+            cur_files += 1
+            start_time = datetime.datetime.now()
+            start_time = time.mktime(start_time.timetuple())
+            error = self._download(file_path, rfile)
+            if error:
+                rfile['download_time'] = 0
+                rfile['error'] = True
+                raise Exception(self.__class__.__name__ + ":Download:Error:" + rfile["name"])
+            else:
+                end_time = datetime.datetime.now()
+                end_time = time.mktime(end_time.timetuple())
+                rfile['download_time'] = end_time - start_time
+            # Set permissions
+            self.set_permissions(file_path, rfile)
+
+        return self.files_to_download
 
     def list(self):
         '''
@@ -262,26 +374,6 @@ class DownloadInterface(object):
         '''
         pass
 
-    def set_credentials(self, userpwd):
-        '''
-        Set credentials in format user:pwd
-
-        :param userpwd: credentials
-        :type userpwd: str
-        '''
-        self.credentials = userpwd
-
-    def set_options(self, protocol_options):
-        """
-        Set protocol specific options.
-
-        Subclasses that override this method must call the
-        parent implementation.
-        """
-        self.protocol_options = protocol_options
-        if "skip_check_uncompress" in protocol_options:
-            self.skip_check_uncompress = Utils.to_bool(protocol_options["skip_check_uncompress"])
-
     def close(self):
         '''
         Close connection


=====================================
biomaj_download/download/localcopy.py
=====================================
@@ -15,7 +15,6 @@ class LocalDownload(DownloadInterface):
     remote.dir=/blast/db/FASTA/
 
     remote.files=^alu.*\\.gz$
-
     '''
 
     def __init__(self, rootdir, use_hardlinks=False):
@@ -24,6 +23,11 @@ class LocalDownload(DownloadInterface):
         self.rootdir = rootdir
         self.use_hardlinks = use_hardlinks
 
+    def _append_file_to_download(self, rfile):
+        if 'root' not in rfile or not rfile['root']:
+            rfile['root'] = self.rootdir
+        super(LocalDownload, self)._append_file_to_download(rfile)
+
     def download(self, local_dir):
         '''
         Copy local files to local_dir


=====================================
biomaj_download/download/protocolirods.py
=====================================
@@ -1,34 +1,37 @@
-import logging
-import os
-from datetime import datetime
-import time
-
-from biomaj_core.utils import Utils
 from biomaj_download.download.interface import DownloadInterface
 from irods.session import iRODSSession
-from irods.models import Collection, DataObject, User
+from irods.models import DataObject, User
 
 
 class IRODSDownload(DownloadInterface):
-    # To connect to irods session : sess = iRODSSession(host='localhost', port=1247, user='rods', password='rods', zone='tempZone')
-    # password : self.credentials
-    def __init__(self, protocol, server, remote_dir):
+
+    # This is used only for messages
+    real_protocol = "irods"
+
+    def __init__(self, server, remote_dir):
         DownloadInterface.__init__(self)
-        self.port = None
-        self.remote_dir = remote_dir  # directory on the remote server : zone
+        self.port = 1247
+        self.remote_dir = remote_dir  # directory on the remote server including zone
         self.rootdir = remote_dir
         self.user = None
         self.password = None
         self.server = server
-        self.zone = None
+        self.zone = remote_dir.split("/")[0]
+
+    def _append_file_to_download(self, rfile):
+        if 'root' not in rfile or not rfile['root']:
+            rfile['root'] = self.rootdir
+        super(IRODSDownload, self)._append_file_to_download(rfile)
 
     def set_param(self, param):
-        # self.param is a dictionnary which has the following form :{'password': u'biomaj', 'protocol': u'iget', 'user': u'biomaj', 'port': u'port'}
+        # param is a dictionary which has the following form :
+        # {'password': u'biomaj', 'user': u'biomaj', 'port': u'port'}
+        # port is optional
         self.param = param
-        self.port = int(param['port'])
         self.user = str(param['user'])
         self.password = str(param['password'])
-        self.zone = str(param['zone'])
+        if 'port' in param:
+            self.port = int(param['port'])
 
     def list(self, directory=''):
         session = iRODSSession(host=self.server, port=self.port, user=self.user, password=self.password, zone=self.zone)
@@ -36,10 +39,13 @@ class IRODSDownload(DownloadInterface):
         rdirs = []
         rfile = {}
         date = None
-        for result in session.query(Collection.name, DataObject.name, DataObject.size, DataObject.owner_name, DataObject.modify_time).filter(User.name == self.user).get_results():
-            # if the user is biomaj : he will have access to all the irods data (biomaj ressource) : drwxr-xr-x
+        query = session.query(DataObject.name, DataObject.size,
+                              DataObject.owner_name, DataObject.modify_time)
+        results = query.filter(User.name == self.user).get_results()
+        for result in results:
             # Avoid duplication
-            if rfile != {} and rfile['name'] == str(result[DataObject.name]) and date == str(result[DataObject.modify_time]).split(" ")[0].split('-'):
+            if rfile != {} and rfile['name'] == str(result[DataObject.name]) \
+               and date == str(result[DataObject.modify_time]).split(" ")[0].split('-'):
                 continue
             rfile = {}
             date = str(result[DataObject.modify_time]).split(" ")[0].split('-')
@@ -49,81 +55,28 @@ class IRODSDownload(DownloadInterface):
             rfile['day'] = int(date[2])
             rfile['year'] = int(date[0])
             rfile['name'] = str(result[DataObject.name])
-            rfile['download_path'] = str(result[Collection.name])
             rfiles.append(rfile)
         session.cleanup()
         return (rfiles, rdirs)
 
-    def download(self, local_dir, keep_dirs=True):
-        '''
-        Download remote files to local_dir
-
-        :param local_dir: Directory where files should be downloaded
-        :type local_dir: str
-        :param keep_dirs: keep file name directory structure or copy file in local_dir directly
-        :param keep_dirs: bool
-        :return: list of downloaded files
-        '''
-        logging.debug('IRODS:Download')
-        try:
-            os.chdir(local_dir)
-        except TypeError:
-            logging.error("IRODS:list:Could not find offline_dir")
-        nb_files = len(self.files_to_download)
-        cur_files = 1
-        # give a working directory to copy the file from irods
-        remote_dir = self.remote_dir
-        for rfile in self.files_to_download:
-            if self.kill_received:
-                raise Exception('Kill request received, exiting')
-            file_dir = local_dir
-            if 'save_as' not in rfile or rfile['save_as'] is None:
-                rfile['save_as'] = rfile['name']
-            if keep_dirs:
-                file_dir = local_dir + os.path.dirname(rfile['save_as'])
-            file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
-            # For unit tests only, workflow will take in charge directory creation before to avoid thread multi access
-            if not os.path.exists(file_dir):
-                os.makedirs(file_dir)
-
-            logging.debug('IRODS:Download:Progress:' + str(cur_files) + '/' + str(nb_files) + ' downloading file ' + rfile['name'])
-            logging.debug('IRODS:Download:Progress:' + str(cur_files) + '/' + str(nb_files) + ' save as ' + rfile['save_as'])
-            cur_files += 1
-            start_time = datetime.now()
-            start_time = time.mktime(start_time.timetuple())
-            self.remote_dir = rfile['root']
-            error = self.irods_download(file_dir, str(self.remote_dir), str(rfile['name']))
-            if error:
-                rfile['download_time'] = 0
-                rfile['error'] = True
-                raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
-            else:
-                archive_status = Utils.archive_check(file_path)
-                if not archive_status:
-                    self.logger.error('Archive is invalid or corrupted, deleting file')
-                    rfile['error'] = True
-                    if os.path.exists(file_path):
-                        os.remove(file_path)
-                    raise Exception("IRODS:Download:Error:" + rfile['root'] + '/' + rfile['name'])
-
-            end_time = datetime.now()
-            end_time = time.mktime(end_time.timetuple())
-            rfile['download_time'] = end_time - start_time
-            self.set_permissions(file_path, rfile)
-        self.remote_dir = remote_dir
-        return(self.files_to_download)
-
-    def irods_download(self, file_dir, file_path, file_to_download):
+    def _download(self, file_dir, rfile):
         error = False
-        logging.debug('IRODS:IRODS DOWNLOAD')
-        session = iRODSSession(host=self.server, port=self.port, user=self.user, password=self.password, zone=self.zone)
+        self.logger.debug('IRODS:IRODS DOWNLOAD')
+        session = iRODSSession(host=self.server, port=self.port,
+                               user=self.user, password=self.password,
+                               zone=self.zone)
         try:
-            file_to_get = str(file_path) + str(file_to_download)
-            # Write the file to download in the wanted file_dir : with the python-irods iget
+            # iRODS don't like multiple "/"
+            if rfile['root'][-1] == "/":
+                file_to_get = rfile['root'] + rfile['name']
+            else:
+                file_to_get = rfile['root'] + "/" + rfile['name']
+            # Write the file to download in the wanted file_dir with the
+            # python-irods iget
             obj = session.data_objects.get(file_to_get, file_dir)
         except ExceptionIRODS as e:
-            logging.error("RsyncError:" + str(e))
-            logging.error("RsyncError: irods object" + str(obj))
+            self.logger.error(self.__class__.__name__ + ":Download:Error:Can't get irods object " + str(obj))
+            self.logger.error(self.__class__.__name__ + ":Download:Error:" + str(e))
         session.cleanup()
         return(error)
 


=====================================
biomaj_download/download/rsync.py
=====================================
@@ -1,19 +1,16 @@
 # from future import standard_library
 # standard_library.install_aliases()
 # from builtins import str
-import logging
 import re
 import os
 import subprocess
-from datetime import datetime
-import time
 
 from biomaj_download.download.interface import DownloadInterface
 
 
 class RSYNCDownload(DownloadInterface):
     '''
-    Base class to download files from rsyncc
+    Base class to download files from rsync
     protocol = rsync
     server =
     remote.dir =
@@ -21,18 +18,76 @@ class RSYNCDownload(DownloadInterface):
     remote.files =
     '''
 
-    def __init__(self, protocol, server, remote_dir):
+    # This is used to forge the command
+    real_protocol = "rsync"
+
+    def __init__(self, server, rootdir):
         DownloadInterface.__init__(self)
-        logging.debug('Download')
-        self.rootdir = remote_dir
-        self.protocol = protocol
-        if server and remote_dir:
+        self.logger.debug('Download')
+        # If rootdir is not given, we are in local mode. In this case, server
+        # is interpreted as rootdir
+        self.local_mode = not rootdir
+        if not self.local_mode:
             self.server = server  # name of the remote server
-            self.remote_dir = remote_dir  # directory on the remote server
+            self.rootdir = rootdir  # directory on the remote server
+        else:
+            self.server = None
+            self.rootdir = server
+        # give a working directory to run rsync
+        if self.local_mode:
+            try:
+                os.chdir(self.rootdir)
+            except TypeError:
+                self.logger.error("RSYNC:Could not find local dir " + self.rootdir)
+
+    def _append_file_to_download(self, rfile):
+        if 'root' not in rfile or not rfile['root']:
+            rfile['root'] = self.rootdir
+        super(RSYNCDownload, self)._append_file_to_download(rfile)
+
+    def _remote_file_name(self, rfile):
+        # rfile['root'] is set to self.rootdir. We don't use os.path.join
+        # because rfile['name'] may starts with /
+        url = rfile['root'] + "/" + rfile['name']
+        if not self.local_mode:
+            url = self.server + ":" + url
+        return url
+
+    def _download(self, file_path, rfile):
+        error = False
+        err_code = ''
+        url = self._remote_file_name(rfile)
+        # Create the rsync command
+        if self.credentials:
+            cmd = str(self.real_protocol) + " " + str(self.credentials) + "@" + url + " " + str(file_path)
         else:
-            if server:
-                self.server = server
-                self.remote_dir = ""
+            cmd = str(self.real_protocol) + " " + url + " " + str(file_path)
+        self.logger.debug('RSYNC:RSYNC DOwNLOAD:' + cmd)
+        # Launch the command (we are in offline_dir)
+        try:
+            p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+            stdout, stderr = p.communicate()
+            err_code = p.returncode
+            self.test_stderr_rsync_message(stderr)
+            self.test_stderr_rsync_error(stderr)
+        except ExceptionRsync as e:
+            self.logger.error(str(self.real_protocol) + " error:" + str(e))
+        if err_code != 0:
+            self.logger.error('Error while downloading ' + rfile["name"] + ' - ' + str(err_code))
+            error = True
+        return(error)
+
+    def test_stderr_rsync_error(self, stderr):
+        stderr = str(stderr.decode('utf-8'))
+        if "rsync error" in str(stderr):
+            reason = stderr.split(str(self.real_protocol) + " error:")[1].split("\n")[0]
+            raise ExceptionRsync(reason)
+
+    def test_stderr_rsync_message(self, stderr):
+        stderr = str(stderr.decode('utf-8'))
+        if "rsync:" in str(stderr):
+            reason = stderr.split(str(self.real_protocol) + ":")[1].split("\n")[0]
+            raise ExceptionRsync(reason)
 
     def list(self, directory=''):
         '''
@@ -43,18 +98,14 @@ class RSYNCDownload(DownloadInterface):
         err_code = None
         rfiles = []
         rdirs = []
-        logging.debug('RSYNC:List')
-        # give a working directory to run rsync
-        try:
-            os.chdir(self.offline_dir)
-        except TypeError:
-            logging.error("RSYNC:list:Could not find offline_dir")
-        if self.remote_dir and self.credentials:
-            cmd = str(self.protocol) + " --list-only " + str(self.credentials) + "@" + str(self.server) + ":" + str(self.remote_dir) + str(directory)
-        elif (self.remote_dir and not self.credentials):
-            cmd = str(self.protocol) + " --list-only " + str(self.server) + ":" + str(self.remote_dir) + str(directory)
-        else:  # Local rsync for unitest
-            cmd = str(self.protocol) + " --list-only " + str(self.server) + str(directory)
+        self.logger.debug('RSYNC:List')
+        if self.local_mode:
+            remote = str(self.rootdir) + str(directory)
+        else:
+            remote = str(self.server) + ":" + str(self.rootdir) + str(directory)
+        if self.credentials:
+            remote = str(self.credentials) + "@" + remote
+        cmd = str(self.real_protocol) + " --list-only " + remote
         try:
             p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
             list_rsync, err = p.communicate()
@@ -62,9 +113,9 @@ class RSYNCDownload(DownloadInterface):
             self.test_stderr_rsync_error(err)
             err_code = p.returncode
         except ExceptionRsync as e:
-            logging.error("RsyncError:" + str(e))
+            self.logger.error("RsyncError:" + str(e))
         if err_code != 0:
-            logging.error('Error while listing ' + str(err_code))
+            self.logger.error('Error while listing ' + str(err_code))
             return(rfiles, rdirs)
         list_rsync = str(list_rsync.decode('utf-8'))
         lines = list_rsync.rstrip().split("\n")
@@ -92,97 +143,6 @@ class RSYNCDownload(DownloadInterface):
 
         return (rfiles, rdirs)
 
-    def download(self, local_dir, keep_dirs=True):
-        '''
-        Download remote files to local_dir
-
-        :param local_dir: Directory where files should be downloaded
-        :type local_dir: str
-        :param keep_dirs: keep file name directory structure or copy file in local_dir directly
-        :param keep_dirs: bool
-        :return: list of downloaded files
-        '''
-
-        logging.debug('RSYNC:Download')
-        nb_files = len(self.files_to_download)
-        cur_files = 1
-        # give a working directory to run rsync
-        try:
-            os.chdir(self.offline_dir)
-        except TypeError:
-            logging.error("RSYNC:list:Could not find offline_dir")
-        for rfile in self.files_to_download:
-            if self.kill_received:
-                raise Exception('Kill request received, exiting')
-            file_dir = local_dir
-            if 'save_as' not in rfile or rfile['save_as'] is None:
-                rfile['save_as'] = rfile['name']
-            if keep_dirs:
-                file_dir = local_dir + '/' + os.path.dirname(rfile['save_as'])
-            if re.match(r'\S*\/$', file_dir):
-                file_path = file_dir + '/' + os.path.basename(rfile['save_as'])
-            else:
-                file_path = file_dir + os.path.basename(rfile['save_as'])
-            # For unit tests only, workflow will take in charge directory creation before to avoid thread multi access
-            if not os.path.exists(file_dir):
-                os.makedirs(file_dir)
-
-            logging.debug('RSYNC:Download:Progress:' + str(cur_files) + '/' + str(nb_files) + ' downloading file ' + rfile['name'])
-            logging.debug('RSYNC:Download:Progress:' + str(cur_files) + '/' + str(nb_files) + ' save as ' + rfile['save_as'])
-            cur_files += 1
-            start_time = datetime.now()
-            start_time = time.mktime(start_time.timetuple())
-            error = self.rsync_download(file_path, rfile['name'])
-            if error:
-                rfile['download_time'] = 0
-                rfile['error'] = True
-                raise Exception("RSYNC:Download:Error:" + rfile['root'] + '/' + rfile['name'])
-            end_time = datetime.now()
-            end_time = time.mktime(end_time.timetuple())
-            rfile['download_time'] = end_time - start_time
-            self.set_permissions(file_path, rfile)
-        return(self.files_to_download)
-
-    def rsync_download(self, file_path, file_to_download):
-        error = False
-        err_code = ''
-        logging.debug('RSYNC:RSYNC DOwNLOAD')
-        # give a working directory to run rsync
-        try:
-            os.chdir(self.offline_dir)
-        except TypeError:
-            logging.error("RSYNC:list:Could not find offline_dir")
-        try:
-            if self.remote_dir and self.credentials:  # download on server
-                cmd = str(self.protocol) + " " + str(self.credentials) + "@" + str(self.server) + ":" + str(self.remote_dir) + str(file_to_download) + " " + str(file_path)
-            elif self.remote_dir and not self.credentials:
-                cmd = str(self.protocol) + " " + str(self.server) + ":" + str(self.remote_dir) + str(file_to_download) + " " + str(file_path)
-            else:  # Local rsync for unitest
-                cmd = str(self.protocol) + " " + str(self.server) + str(file_to_download) + " " + str(file_path)
-            p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
-            stdout, stderr = p.communicate()
-            err_code = p.returncode
-            self.test_stderr_rsync_message(stderr)
-            self.test_stderr_rsync_error(stderr)
-        except ExceptionRsync as e:
-            logging.error("RsyncError:" + str(e))
-        if err_code != 0:
-            logging.error('Error while downloading ' + file_to_download + ' - ' + str(err_code))
-            error = True
-        return(error)
-
-    def test_stderr_rsync_error(self, stderr):
-        stderr = str(stderr.decode('utf-8'))
-        if "rsync error" in str(stderr):
-            reason = stderr.split(str(self.protocol) + " error:")[1].split("\n")[0]
-            raise ExceptionRsync(reason)
-
-    def test_stderr_rsync_message(self, stderr):
-        stderr = str(stderr.decode('utf-8'))
-        if "rsync:" in str(stderr):
-            reason = stderr.split(str(self.protocol) + ":")[1].split("\n")[0]
-            raise ExceptionRsync(reason)
-
 
 class ExceptionRsync(Exception):
     def __init__(self, exception_reason):


=====================================
biomaj_download/downloadservice.py
=====================================
@@ -13,10 +13,9 @@ import pika
 from flask import Flask
 from flask import jsonify
 
-from biomaj_download.download.ftp import FTPDownload
-from biomaj_download.download.http import HTTPDownload
+from biomaj_download.download.curl import CurlDownload
 from biomaj_download.download.direct import DirectFTPDownload
-from biomaj_download.download.direct import DirectHttpDownload
+from biomaj_download.download.direct import DirectHTTPDownload
 from biomaj_download.download.localcopy import LocalDownload
 from biomaj_download.message import downmessage_pb2
 from biomaj_download.download.rsync import RSYNCDownload
@@ -134,24 +133,24 @@ class DownloadService(object):
                     protocol_options={}):
         protocol = downmessage_pb2.DownloadFile.Protocol.Value(protocol_name.upper())
         downloader = None
-        if protocol in [0, 1]:
-            downloader = FTPDownload(protocol_name, server, remote_dir)
-        if protocol in [2, 3]:
-            downloader = HTTPDownload(protocol_name, server, remote_dir, http_parse)
-        if protocol == 7:
-            downloader = LocalDownload(remote_dir)
-        if protocol == 4:
-            downloader = DirectFTPDownload('ftp', server, '/')
-        if protocol == 10:
+        if protocol in [0, 1]:  # FTP, SFTP
+            downloader = CurlDownload(protocol_name, server, remote_dir)
+        if protocol in [2, 3]:  # HTTP, HTTPS (could be factored with previous case)
+            downloader = CurlDownload(protocol_name, server, remote_dir, http_parse)
+        if protocol == 4:  # DirectFTP
+            downloader = DirectFTPDownload("ftp", server, '/')
+        if protocol == 5:  # DirectHTTP
+            downloader = DirectHTTPDownload("http", server, '/')
+        if protocol == 6:  # DirectHTTPS
+            downloader = DirectHTTPDownload("https", server, '/')
+        if protocol == 10:  # DirectFTPS
             downloader = DirectFTPDownload('ftps', server, '/')
-        if protocol == 5:
-            downloader = DirectHttpDownload('http', server, '/')
-        if protocol == 6:
-            downloader = DirectHttpDownload('https', server, '/')
-        if protocol == 8:
-            downloader = RSYNCDownload('rsync', server, remote_dir)
-        if protocol == 9:
-            downloader = IRODSDownload('irods', server, remote_dir)
+        if protocol == 7:  # Local
+            downloader = LocalDownload(remote_dir)
+        if protocol == 8:  # RSYNC
+            downloader = RSYNCDownload(server, remote_dir)
+        if protocol == 9:  # iRods
+            downloader = IRODSDownload(server, remote_dir)
         if downloader is None:
             return None
 
@@ -182,11 +181,13 @@ class DownloadService(object):
 
         if save_as:
             downloader.set_save_as(save_as)
+
         if param:
             downloader.set_param(param)
 
         downloader.set_server(server)
 
+        # Set the name of the BioMAJ protocol to which we respond.
         downloader.set_protocol(protocol_name)
 
         if protocol_options is not None:


=====================================
debian/changelog
=====================================
@@ -1,12 +1,9 @@
-biomaj3-download (3.0.27-1) UNRELEASED; urgency=medium
-
-  [ PENDING ]
-  Needs python3-ftputil, in NEW queue
+biomaj3-download (3.1.0-1) unstable; urgency=medium
 
   [ Olivier Sallou ]
   * New upstream release  
 
- -- Olivier Sallou <osallou at debian.org>  Wed, 16 Oct 2019 13:17:33 +0000
+ -- Olivier Sallou <osallou at debian.org>  Tue, 12 Nov 2019 10:18:15 +0000
 
 biomaj3-download (3.0.21-1) unstable; urgency=medium
 


=====================================
debian/control
=====================================
@@ -12,6 +12,7 @@ Build-Depends: debhelper (>= 12~),
                python3-consul,
                python3-flask,
                python3-humanfriendly,
+               python3-irodsclient,
                python3-mock,
                python3-nose,
                python3-pika,
@@ -25,7 +26,8 @@ Build-Depends: debhelper (>= 12~),
                python3-yaml,
                python3-biomaj3-core (>= 3.0.19),
                python3-biomaj3-zipkin,
-               python3-ftputil
+               python3-ftputil,
+               rsync
 Standards-Version: 4.3.0
 Vcs-Browser: https://salsa.debian.org/med-team/biomaj3-download
 Vcs-Git: https://salsa.debian.org/med-team/biomaj3-download.git


=====================================
setup.py
=====================================
@@ -22,7 +22,7 @@ config = {
     'url': 'http://biomaj.genouest.org',
     'download_url': 'http://biomaj.genouest.org',
     'author_email': 'olivier.sallou at irisa.fr',
-    'version': '3.0.27',
+    'version': '3.1.0',
      'classifiers': [
         # How mature is this project? Common values are
         #   3 - Alpha


=====================================
tests/biomaj_tests.py
=====================================
@@ -1,33 +1,25 @@
-from nose.tools import *
+"""
+Note that attributes 'network' and 'local_irods' are ignored for CI.
+"""
 from nose.plugins.attrib import attr
 
 import json
 import shutil
 import os
-import sys
 import tempfile
 import logging
-import copy
 import stat
-import time
 
 from mock import patch
 
-from optparse import OptionParser
-
-
 from biomaj_core.config import BiomajConfig
 from biomaj_core.utils import Utils
-from biomaj_download.download.ftp import FTPDownload
-from biomaj_download.download.direct import DirectFTPDownload, DirectHttpDownload
-from biomaj_download.download.http import HTTPDownload, HTTPParse
+from biomaj_download.download.curl import CurlDownload, HTTPParse
+from biomaj_download.download.direct import DirectFTPDownload, DirectHTTPDownload
 from biomaj_download.download.localcopy  import LocalDownload
-from biomaj_download.download.downloadthreads import DownloadThread
 from biomaj_download.download.rsync import RSYNCDownload
 from biomaj_download.download.protocolirods import IRODSDownload
 
-import pprint
-
 import unittest
 
 class UtilsForTest():
@@ -263,7 +255,7 @@ class TestBiomajHTTPDownload(unittest.TestCase):
     self.utils.clean()
 
   def test_http_list(self):
-    httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
+    httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
     (file_list, dir_list) = httpd.list()
     httpd.close()
     self.assertTrue(len(file_list) == 1)
@@ -271,7 +263,7 @@ class TestBiomajHTTPDownload(unittest.TestCase):
   def test_http_list_dateregexp(self):
     #self.http_parse.file_date_format = "%%d-%%b-%%Y %%H:%%M"
     self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M"
-    httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
+    httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
     (file_list, dir_list) = httpd.list()
     httpd.close()
     self.assertTrue(len(file_list) == 1)
@@ -287,7 +279,7 @@ class TestBiomajHTTPDownload(unittest.TestCase):
         -1
     )
     self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M"
-    httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
+    httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
     (file_list, dir_list) = httpd.list()
     httpd.match([r'^README$'], file_list, dir_list)
     httpd.download(self.utils.data_dir)
@@ -304,7 +296,7 @@ class TestBiomajHTTPDownload(unittest.TestCase):
         self.config.get('http.group.file.date_format', None),
         int(self.config.get('http.group.file.size'))
     )
-    httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
+    httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
     (file_list, dir_list) = httpd.list()
     httpd.match([r'^README$'], file_list, dir_list)
     httpd.download(self.utils.data_dir)
@@ -313,7 +305,7 @@ class TestBiomajHTTPDownload(unittest.TestCase):
 
   def test_http_download(self):
     self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M"
-    httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
+    httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse)
     (file_list, dir_list) = httpd.list()
     print(str(file_list))
     httpd.match([r'^README$'], file_list, dir_list)
@@ -323,7 +315,7 @@ class TestBiomajHTTPDownload(unittest.TestCase):
 
   def test_http_download_in_subdir(self):
     self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M"
-    httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/', self.http_parse)
+    httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/', self.http_parse)
     (file_list, dir_list) = httpd.list()
     httpd.match([r'^dists/README$'], file_list, dir_list)
     httpd.download(self.utils.data_dir)
@@ -331,6 +323,65 @@ class TestBiomajHTTPDownload(unittest.TestCase):
     self.assertTrue(len(httpd.files_to_download) == 1)
 
 
+ at attr('network')
+ at attr('https')
+class TestBiomajHTTPSDownload(unittest.TestCase):
+  """
+  Test HTTPS downloader
+  """
+
+  def setUp(self):
+    self.utils = UtilsForTest()
+
+  def tearDown(self):
+    self.utils.clean()
+
+  def test_download(self):
+    self.utils = UtilsForTest()
+    self.http_parse = HTTPParse(
+        "<a[\s]+href=\"([\w\-\.]+\">[\w\-\.]+.tar.gz)<\/a>[\s]+([0-9]{2}-[A-Za-z]{3}-[0-9]{4}[\s][0-9]{2}:[0-9]{2})[\s]+([0-9]+[A-Za-z])",
+        "<a[\s]+href=\"[\w\-\.]+\">([\w\-\.]+.tar.gz)<\/a>[\s]+([0-9]{2}-[A-Za-z]{3}-[0-9]{4}[\s][0-9]{2}:[0-9]{2})[\s]+([0-9]+[A-Za-z])",
+        1,
+        2,
+        1,
+        2,
+        None,
+        3
+    )
+    self.http_parse.file_date_format = "%%d-%%b-%%Y %%H:%%M"
+    httpd = CurlDownload('https', 'mirrors.edge.kernel.org', '/pub/software/scm/git/debian/', self.http_parse)
+    (file_list, dir_list) = httpd.list()
+    httpd.match([r'^git-core-0.99.6.tar.gz$'], file_list, dir_list)
+    httpd.download(self.utils.data_dir)
+    httpd.close()
+    self.assertTrue(len(httpd.files_to_download) == 1)
+
+
+ at attr('network')
+ at attr('sftp')
+class TestBiomajSFTPDownload(unittest.TestCase):
+  """
+  Test SFTP downloader
+  """
+
+  PROTOCOL = "ftps"
+  
+  def setUp(self):
+    self.utils = UtilsForTest()
+
+  def tearDown(self):
+    self.utils.clean()
+
+  def test_download(self):
+    sftpd = CurlDownload(self.PROTOCOL, "test.rebex.net", "/")
+    sftpd.set_credentials("demo:password")
+    (file_list, dir_list) = sftpd.list()
+    sftpd.match([r'^readme.txt$'], file_list, dir_list)
+    sftpd.download(self.utils.data_dir)
+    sftpd.close()
+    self.assertTrue(len(sftpd.files_to_download) == 1)
+
+
 @attr('directftp')
 @attr('network')
 class TestBiomajDirectFTPDownload(unittest.TestCase):
@@ -411,7 +462,7 @@ class TestBiomajDirectHTTPDownload(unittest.TestCase):
 
   def test_http_list(self):
     file_list = ['/debian/README.html']
-    ftpd = DirectHttpDownload('http', 'ftp2.fr.debian.org', '')
+    ftpd = DirectHTTPDownload('http', 'ftp2.fr.debian.org', '')
     ftpd.set_files_to_download(file_list)
     fday = ftpd.files_to_download[0]['day']
     fmonth = ftpd.files_to_download[0]['month']
@@ -424,7 +475,7 @@ class TestBiomajDirectHTTPDownload(unittest.TestCase):
 
   def test_download(self):
     file_list = ['/debian/README.html']
-    ftpd = DirectHttpDownload('http', 'ftp2.fr.debian.org', '')
+    ftpd = DirectHTTPDownload('http', 'ftp2.fr.debian.org', '')
     ftpd.set_files_to_download(file_list)
     (file_list, dir_list) = ftpd.list()
     ftpd.download(self.utils.data_dir, False)
@@ -433,7 +484,7 @@ class TestBiomajDirectHTTPDownload(unittest.TestCase):
 
   def test_download_get_params_save_as(self):
     file_list = ['/get']
-    ftpd = DirectHttpDownload('http', 'httpbin.org', '')
+    ftpd = DirectHTTPDownload('http', 'httpbin.org', '')
     ftpd.set_files_to_download(file_list)
     ftpd.param = { 'key1': 'value1', 'key2': 'value2'}
     ftpd.save_as = 'test.json'
@@ -449,7 +500,7 @@ class TestBiomajDirectHTTPDownload(unittest.TestCase):
   @attr('test')
   def test_download_save_as(self):
     file_list = ['/debian/README.html']
-    ftpd = DirectHttpDownload('http', 'ftp2.fr.debian.org', '')
+    ftpd = DirectHTTPDownload('http', 'ftp2.fr.debian.org', '')
     ftpd.set_files_to_download(file_list)
     ftpd.save_as = 'test.html'
     (file_list, dir_list) = ftpd.list()
@@ -460,7 +511,7 @@ class TestBiomajDirectHTTPDownload(unittest.TestCase):
   def test_download_post_params(self):
     #file_list = ['/debian/README.html']
     file_list = ['/post']
-    ftpd = DirectHttpDownload('http', 'httpbin.org', '')
+    ftpd = DirectHTTPDownload('http', 'httpbin.org', '')
     ftpd.set_files_to_download(file_list)
     ftpd.param = { 'key1': 'value1', 'key2': 'value2'}
     ftpd.save_as = 'test.json'
@@ -489,19 +540,19 @@ class TestBiomajFTPDownload(unittest.TestCase):
     self.utils.clean()
 
   def test_ftp_list(self):
-    ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
+    ftpd = CurlDownload('ftp', 'speedtest.tele2.net', '/')
     (file_list, dir_list) = ftpd.list()
     ftpd.close()
     self.assertTrue(len(file_list) > 1)
 
   @attr('test')
   def test_download(self):
-    ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
+    ftpd = CurlDownload('ftp', 'speedtest.tele2.net', '/')
     (file_list, dir_list) = ftpd.list()
     ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
     # This tests fails because the zip file is fake. We intercept the failure
     # and continue.
-    # See test_download_skip_uncompress_checks
+    # See test_download_skip_check_uncompress
     try:
         ftpd.download(self.utils.data_dir)
     except Exception:
@@ -511,9 +562,9 @@ class TestBiomajFTPDownload(unittest.TestCase):
         self.assertTrue(len(ftpd.files_to_download) == 2)
     ftpd.close()
 
-  def test_download_skip_checks_uncompress(self):
+  def test_download_skip_check_uncompress(self):
     # This test is similar to test_download but we skip test of zip file.
-    ftpd = FTPDownload('ftp', 'speedtest.tele2.net', '/')
+    ftpd = CurlDownload('ftp', 'speedtest.tele2.net', '/')
     ftpd.set_options(dict(skip_check_uncompress=True))
     (file_list, dir_list) = ftpd.list()
     ftpd.match([r'^1.*KB\.zip$'], file_list, dir_list)
@@ -522,7 +573,7 @@ class TestBiomajFTPDownload(unittest.TestCase):
     self.assertTrue(len(ftpd.files_to_download) == 2)
 
   def test_download_in_subdir(self):
-    ftpd = FTPDownload('ftp', 'ftp.fr.debian.org', '/debian/')
+    ftpd = CurlDownload('ftp', 'ftp.fr.debian.org', '/debian/')
     (file_list, dir_list) = ftpd.list()
     try:
         ftpd.match([r'^doc/mailing-lists.txt$'], file_list, dir_list)
@@ -534,20 +585,20 @@ class TestBiomajFTPDownload(unittest.TestCase):
     self.assertTrue(len(ftpd.files_to_download) == 1)
 
   def test_download_or_copy(self):
-    ftpd = FTPDownload('ftp', 'ftp.fr.debian.org', '/debian/')
+    ftpd = CurlDownload('ftp', 'ftp.fr.debian.org', '/debian/')
     ftpd.files_to_download = [
           {'name':'/test1', 'year': '2013', 'month': '11', 'day': '10', 'size': 10},
           {'name':'/test2', 'year': '2013', 'month': '11', 'day': '10', 'size': 10},
           {'name':'/test/test1', 'year': '2013', 'month': '11', 'day': '10', 'size': 10},
           {'name':'/test/test11', 'year': '2013', 'month': '11', 'day': '10', 'size': 10}
-          ]
+    ]
     available_files = [
           {'name':'/test1', 'year': '2013', 'month': '11', 'day': '10', 'size': 10},
           {'name':'/test12', 'year': '2013', 'month': '11', 'day': '10', 'size': 10},
           {'name':'/test3', 'year': '2013', 'month': '11', 'day': '10', 'size': 10},
           {'name':'/test/test1', 'year': '2013', 'month': '11', 'day': '10', 'size': 20},
           {'name':'/test/test11', 'year': '2013', 'month': '11', 'day': '10', 'size': 10}
-          ]
+    ]
     ftpd.download_or_copy(available_files, '/biomaj', False)
     ftpd.close()
     self.assertTrue(len(ftpd.files_to_download)==2)
@@ -566,7 +617,7 @@ class TestBiomajFTPDownload(unittest.TestCase):
     self.assertTrue(release['day']=='12')
 
   def test_ms_server(self):
-      ftpd = FTPDownload("ftp", "test.rebex.net", "/")
+      ftpd = CurlDownload("ftp", "test.rebex.net", "/")
       ftpd.set_credentials("demo:password")
       (file_list, dir_list) = ftpd.list()
       ftpd.match(["^readme.txt$"], file_list, dir_list)
@@ -579,7 +630,7 @@ class TestBiomajFTPDownload(unittest.TestCase):
       Test setting tcp_keepalive (it probably doesn't change anything here but
       we test that there is no obvious mistake in the code).
       """
-      ftpd = FTPDownload("ftp", "test.rebex.net", "/")
+      ftpd = CurlDownload("ftp", "test.rebex.net", "/")
       ftpd.set_options(dict(tcp_keepalive=10))
       ftpd.set_credentials("demo:password")
       (file_list, dir_list) = ftpd.list()
@@ -604,14 +655,14 @@ class TestBiomajFTPSDownload(unittest.TestCase):
     self.utils.clean()
 
   def test_ftps_list(self):
-    ftpd = FTPDownload(self.PROTOCOL, "test.rebex.net", "/")
+    ftpd = CurlDownload(self.PROTOCOL, "test.rebex.net", "/")
     ftpd.set_credentials("demo:password")
     (file_list, dir_list) = ftpd.list()
     ftpd.close()
     self.assertTrue(len(file_list) == 1)
 
   def test_download(self):
-    ftpd = FTPDownload(self.PROTOCOL, "test.rebex.net", "/")
+    ftpd = CurlDownload(self.PROTOCOL, "test.rebex.net", "/")
     ftpd.set_credentials("demo:password")
     (file_list, dir_list) = ftpd.list()
     ftpd.match([r'^readme.txt$'], file_list, dir_list)
@@ -624,7 +675,7 @@ class TestBiomajFTPSDownload(unittest.TestCase):
     SERVER = "demo.wftpserver.com"
     DIRECTORY = "/download/"
     CREDENTIALS = "demo-user:demo-user"
-    ftpd = FTPDownload(self.PROTOCOL, SERVER, DIRECTORY)
+    ftpd = CurlDownload(self.PROTOCOL, SERVER, DIRECTORY)
     ftpd.set_options(dict(ssl_verifyhost="False", ssl_verifypeer="False"))
     ftpd.set_credentials(CREDENTIALS)
     (file_list, dir_list) = ftpd.list()
@@ -636,7 +687,7 @@ class TestBiomajFTPSDownload(unittest.TestCase):
     SERVER = "demo.wftpserver.com"
     DIRECTORY = "/download/"
     CREDENTIALS = "demo-user:demo-user"
-    ftpd = FTPDownload(self.PROTOCOL, SERVER, DIRECTORY)
+    ftpd = CurlDownload(self.PROTOCOL, SERVER, DIRECTORY)
     ftpd.set_options(dict(ssl_verifyhost="False", ssl_verifypeer="False"))
     ftpd.set_credentials(CREDENTIALS)
     (file_list, dir_list) = ftpd.list()
@@ -651,7 +702,7 @@ class TestBiomajFTPSDownload(unittest.TestCase):
     SERVER = "demo.wftpserver.com"
     DIRECTORY = "/download/"
     CREDENTIALS = "demo-user:demo-user"
-    ftpd = FTPDownload(self.PROTOCOL, SERVER, DIRECTORY)
+    ftpd = CurlDownload(self.PROTOCOL, SERVER, DIRECTORY)
     curdir = os.path.dirname(os.path.realpath(__file__))
     cert_file = os.path.join(curdir, "caert.demo.wftpserver.com.pem")
     ftpd.set_options(dict(ssl_verifyhost="False", ssl_server_cert=cert_file))
@@ -672,7 +723,7 @@ class TestBiomajRSYNCDownload(unittest.TestCase):
     def setUp(self):
         self.utils = UtilsForTest()
 
-        self.curdir = os.path.dirname(os.path.realpath(__file__))
+        self.curdir = os.path.dirname(os.path.realpath(__file__)) + '/'
         self.examples = os.path.join(self.curdir,'bank') + '/'
         BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
 
@@ -680,40 +731,34 @@ class TestBiomajRSYNCDownload(unittest.TestCase):
         self.utils.clean()
 
     def test_rsync_list(self):
-        rsyncd =  RSYNCDownload('rsync', self.examples, "")
-        rsyncd.set_credentials(None)
-        rsyncd.set_offline_dir(self.utils.data_dir)
+        rsyncd = RSYNCDownload(self.examples, "")
         (files_list, dir_list) = rsyncd.list()
         self.assertTrue(len(files_list) != 0)
 
     def test_rsync_match(self):
-        rsyncd =  RSYNCDownload('rsync', self.examples, "")
-        rsyncd.set_credentials(None)
-        rsyncd.set_offline_dir(self.utils.data_dir)
+        rsyncd = RSYNCDownload(self.examples, "")
         (files_list, dir_list) = rsyncd.list()
         rsyncd.match([r'^test.*\.gz$'], files_list, dir_list, prefix='', submatch=False)
         self.assertTrue(len(rsyncd.files_to_download) != 0)
 
     def test_rsync_download(self):
-        rsyncd = RSYNCDownload('rsync', self.examples, "")
-        rsyncd.set_credentials(None)
-        rsyncd.set_offline_dir(self.utils.data_dir)
-        error = rsyncd.rsync_download(self.utils.data_dir, "test2.fasta")
-        self.assertTrue(error == 0)
-
+        rsyncd = RSYNCDownload(self.examples, "")
+        rfile = {
+            "name": "test2.fasta",
+            "root": self.examples
+        }
+        error = rsyncd._download(self.utils.data_dir, rfile)
+        self.assertFalse(error)
 
     def test_rsync_general_download(self):
-        rsyncd =  RSYNCDownload('rsync', self.examples, "")
-        rsyncd.set_credentials(None)
-        rsyncd.set_offline_dir(self.utils.data_dir)
+        rsyncd = RSYNCDownload(self.examples, "")
         (files_list, dir_list) = rsyncd.list()
         rsyncd.match([r'^test.*\.gz$'],files_list,dir_list, prefix='')
         download_files=rsyncd.download(self.curdir)
         self.assertTrue(len(download_files)==1)
 
     def test_rsync_download_or_copy(self):
-        rsyncd =  RSYNCDownload('rsync', self.examples, "")
-        rsyncd.set_offline_dir(self.utils.data_dir)
+        rsyncd = RSYNCDownload(self.examples, "")
         (file_list, dir_list) = rsyncd.list()
         rsyncd.match([r'^test.*\.gz$'], file_list, dir_list, prefix='')
         files_to_download_prev = rsyncd.files_to_download
@@ -721,8 +766,7 @@ class TestBiomajRSYNCDownload(unittest.TestCase):
         self.assertTrue(files_to_download_prev != rsyncd.files_to_download)
 
     def test_rsync_download_in_subdir(self):
-        rsyncd = RSYNCDownload('rsync', self.curdir+'/', "")
-        rsyncd.set_offline_dir(self.curdir+'/')
+        rsyncd = RSYNCDownload(self.curdir, "")
         (file_list, dir_list) = rsyncd.list()
         rsyncd.match([r'^/bank/test*'], file_list, dir_list, prefix='')
         rsyncd.download(self.utils.data_dir)
@@ -732,7 +776,7 @@ class TestBiomajRSYNCDownload(unittest.TestCase):
 class iRodsResult(object):
 
     def __init__(self, collname, dataname, datasize, owner, modify):
-        self.Collname =  'tests/'
+        self.Collname = 'tests/'
         self.Dataname = 'test.fasta.gz'
         self.Datasize = 45
         self.Dataowner_name = 'biomaj'
@@ -824,8 +868,22 @@ class TestBiomajIRODSDownload(unittest.TestCase):
         initialize_mock.return_value=mock_session.configure()
         query_mock.return_value = mock_session.query(None,None,None,None,None)
         cleanup_mock.return_value = mock_session.cleanup()
-        irodsd =  IRODSDownload('irods', self.examples, "")
-        irodsd.set_credentials(None)
-        irodsd.set_offline_dir(self.utils.data_dir)
+        irodsd = IRODSDownload(self.examples, "")
         (files_list, dir_list) = irodsd.list()
         self.assertTrue(len(files_list) != 0)
+
+    @attr('local_irods')
+    def test_irods_download(self):
+        # To run this test, you need an iRODS server on localhost (default
+        # port, user 'rods', password 'rods'), and populate a zone
+        # /tempZone/home/rods with a file that matches r'^test.*\.gz$' (for
+        # instance, by copying tests/bank/test/test.fasta.gz).
+        irodsd = IRODSDownload("localhost", "/tempZone/home/rods")
+        irodsd.set_param(dict(
+            user='rods',
+            password='rods',
+        ))
+        (file_list, dir_list) = irodsd.list()
+        irodsd.match([r'^test.*\.gz$'], file_list, dir_list, prefix='')
+        irodsd.download(self.utils.data_dir)
+        self.assertTrue(len(irodsd.files_to_download) == 1)



View it on GitLab: https://salsa.debian.org/med-team/biomaj3-download/compare/02c3fd52d59228019b106c6ab120f141b14d0c64...9c72377612716f7f62a95b24203c19963d4f595b

-- 
View it on GitLab: https://salsa.debian.org/med-team/biomaj3-download/compare/02c3fd52d59228019b106c6ab120f141b14d0c64...9c72377612716f7f62a95b24203c19963d4f595b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191112/26ec9edb/attachment-0001.html>


More information about the debian-med-commit mailing list