[med-svn] [Git][med-team/busco][upstream] New upstream version 5.2.0

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Thu Jul 1 22:04:25 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / busco


Commits:
de037af5 by Nilesh Patra at 2021-07-02T00:32:49+05:30
New upstream version 5.2.0
- - - - -


11 changed files:

- CHANGELOG
- README.md
- src/busco/Actions.py
- src/busco/BuscoConfig.py
- src/busco/BuscoDownloadManager.py
- src/busco/BuscoLogger.py
- src/busco/BuscoRunner.py
- src/busco/_version.py
- src/busco/busco_tools/Toolset.py
- src/busco/run_BUSCO.py
- tests/unittests/run_BUSCO_unittests.py


Changes:

=====================================
CHANGELOG
=====================================
@@ -1,3 +1,13 @@
+5.2.0
+- Issue #224 fixed
+- Issue #232 fixed
+- Issue #266 fixed
+- Issue #270 fixed
+- Issue #352 fixed
+- Issue #368 fixed
+- Issue #386 fixed
+- Introduce direct download option (Issues #252, #288)
+
 5.1.3
 - Issue #408 fixed
 - Bug fixes


=====================================
README.md
=====================================
@@ -32,20 +32,16 @@ Report problems on the BUSCO issue board at https://gitlab.com/ezlab/busco/issue
 ***
 ### How to cite BUSCO
 
-*BUSCO: Assessing Genome Assembly and Annotation Completeness.*
-Mathieu Seppey, Mosè Manni, Evgeny M. Zdobnov
-In: Kollmar M. (eds) Gene Prediction. Methods in Molecular Biology, vol 1962. Humana, New York, NY. 2019
-doi.org/10.1007/978-1-4939-9173-0_14
-
-*BUSCO applications from quality assessments to gene prediction and phylogenomics.*
-Robert M. Waterhouse, Mathieu Seppey, Felipe A. Simão, Mosè Manni, Panagiotis Ioannidis, Guennadi Klioutchnikov, Evgenia V. Kriventseva, and Evgeny M. Zdobnov
-*Mol Biol Evol*, published online Dec 6, 2017 
-doi: 10.1093/molbev/msx319 
-
-*BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs.*
-Felipe A. Simão, Robert M. Waterhouse, Panagiotis Ioannidis, Evgenia V. Kriventseva, and Evgeny M. Zdobnov
-*Bioinformatics*, published online June 9, 2015 
-doi: 10.1093/bioinformatics/btv351
+If you have used BUSCO v4 or V5 in your analyses, please cite:
+
+Manni M., Berkeley M.R., Seppey M., Simao F.A., Zdobnov E.M. 2021. BUSCO update: novel and streamlined workflows along with broader and deeper phylogenetic coverage for scoring of eukaryotic, prokaryotic, and viral genomes. arXiv:2106.11799 [q-bio] [Internet]. Available from: http://arxiv.org/abs/2106.11799
+
+Previous publications:
+BUSCO: Assessing Genome Assembly and Annotation Completeness. Mathieu Seppey, Mosè Manni, Evgeny M. Zdobnov In: Kollmar M. (eds) Gene Prediction. Methods in Molecular Biology, vol 1962. Humana, New York, NY. 2019 doi.org/10.1007/978-1-4939-9173-0_14
+
+BUSCO applications from quality assessments to gene prediction and phylogenomics. Robert M. Waterhouse, Mathieu Seppey, Felipe A. Simão, Mosè Manni, Panagiotis Ioannidis, Guennadi Klioutchnikov, Evgenia V. Kriventseva, and Evgeny M. Zdobnov Mol Biol Evol, published online Dec 6, 2017 doi: 10.1093/molbev/msx319
+
+BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs. Felipe A. Simão, Robert M. Waterhouse, Panagiotis Ioannidis, Evgenia V. Kriventseva, and Evgeny M. Zdobnov Bioinformatics, published online June 9, 2015 doi: 10.1093/bioinformatics/btv351
 
 Copyright (c) 2016-2021, Evgeny Zdobnov (ez at ezlab.org)
 Licensed under the MIT license. See LICENSE.md file.


=====================================
src/busco/Actions.py
=====================================
@@ -2,6 +2,7 @@ import argparse
 from busco.BuscoConfig import PseudoConfig
 from busco.ConfigManager import BuscoConfigManager
 from busco.BuscoLogger import BuscoLogger
+from busco.BuscoDownloadManager import BuscoDownloadManager
 import os
 import sys
 
@@ -15,19 +16,22 @@ class ListLineagesAction(argparse.Action):
 
     def __call__(self, parser, namespace, values, option_string=None):
         try:
-            self.config_manager = BuscoConfigManager({})
+            self.config_manager = BuscoConfigManager(namespace.__dict__)
         except SystemExit as se:
             type(self).logger.error(se)
             raise SystemExit(1)
 
-        self.config = PseudoConfig(self.config_manager.config_file)
+        self.config = PseudoConfig(
+            self.config_manager.config_file, self.config_manager.params
+        )
         try:
             self.config.load()
             self.print_lineages()
         except SystemExit as se:
             type(self).logger.error(se)
+            raise SystemExit(1)
         finally:
-            os.remove("busco_{}.log".format(BuscoLogger.random_id))
+            os.remove("busco_{}.log".format(BuscoLogger.pid))
             parser.exit()
 
     def print_lineages(self):
@@ -45,6 +49,76 @@ class ListLineagesAction(argparse.Action):
         return lineages_list_file
 
 
+class DirectDownload(argparse.Action):
+    logger = BuscoLogger.get_logger(__name__)
+
+    def __init__(
+        self, option_strings, dest, nargs="*", default="==SUPPRESS==", **kwargs
+    ):
+        super().__init__(option_strings, dest, nargs=nargs, default=default, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        try:
+            self.config_manager = BuscoConfigManager(namespace.__dict__)
+        except SystemExit as se:
+            type(self).logger.error(se)
+            raise SystemExit(1)
+
+        self.config = PseudoConfig(
+            self.config_manager.config_file, self.config_manager.params
+        )
+        try:
+            self.config.load()
+            bdm = BuscoDownloadManager(self.config)
+            bdm.update_data = True
+            self.download_datasets(bdm, values)
+
+        except SystemExit as se:
+            type(self).logger.error(se)
+            raise SystemExit(1)
+        finally:
+            os.remove("busco_{}.log".format(BuscoLogger.random_id))
+            parser.exit()
+
+    def download_datasets(self, bdm, values):
+        for item in values:
+            self.get(bdm, item)
+
+    def get(self, bdm, item):
+        if item == "all":
+            files_to_get = bdm.version_files.index
+        elif item == "prokaryota":
+            files_to_get = [
+                item
+                for item in bdm.version_files.index
+                if bdm.version_files.loc[item]["domain"] == "Prokaryota"
+            ]
+        elif item == "eukaryota":
+            files_to_get = [
+                item
+                for item in bdm.version_files.index
+                if bdm.version_files.loc[item]["domain"] == "Eukaryota"
+            ]
+        elif item == "virus":
+            files_to_get = [
+                item
+                for item in bdm.version_files.index
+                if bdm.version_files.loc[item]["domain"] == "Virus"
+            ]
+        else:
+            try:
+                if isinstance(item, str) and item in bdm.version_files.index:
+                    files_to_get = [item]
+                else:
+                    raise KeyError
+            except KeyError:
+                "{} is not a recognized option".format(item)
+
+        filetypes = [bdm.version_files.loc[f]["type"] for f in files_to_get]
+        for f, filename in enumerate(files_to_get):
+            bdm.get(filename, filetypes[f])
+
+
 class CleanHelpAction(argparse.Action):
     def __init__(self, option_strings, dest, nargs=0, default="==SUPPRESS==", **kwargs):
         super().__init__(option_strings, dest, nargs=nargs, default=default, **kwargs)
@@ -52,7 +126,7 @@ class CleanHelpAction(argparse.Action):
     def __call__(self, parser, namespace, values, option_string=None):
         parser.print_help()
         try:
-            os.remove("busco_{}.log".format(BuscoLogger.random_id))
+            os.remove("busco_{}.log".format(BuscoLogger.pid))
         except OSError:
             pass
         parser.exit()
@@ -79,7 +153,7 @@ class CleanVersionAction(argparse.Action):
         formatter.add_text(version)
         parser._print_message(formatter.format_help(), sys.stdout)
         try:
-            os.remove("busco_{}.log".format(BuscoLogger.random_id))
+            os.remove("busco_{}.log".format(BuscoLogger.pid))
         except OSError:
             pass
         parser.exit()


=====================================
src/busco/BuscoConfig.py
=====================================
@@ -52,6 +52,35 @@ class BaseConfig(ConfigParser):
         "hmmsearch",
     }
 
+    PERMITTED_OPTIONS = [
+        "in",
+        "out",
+        "out_path",
+        "mode",
+        "auto-lineage",
+        "auto-lineage-prok",
+        "auto-lineage-euk",
+        "cpu",
+        "force",
+        "restart",
+        "download_path",
+        "datasets_version",
+        "quiet",
+        "offline",
+        "long",
+        "augustus_parameters",
+        "augustus_species",
+        "download_base_url",
+        "lineage_dataset",
+        "update-data",
+        "metaeuk_parameters",
+        "metaeuk_rerun_parameters",
+        "evalue",
+        "limit",
+        "use_augustus",
+        "batch_mode",
+    ]
+
     def __init__(self):
         super().__init__()
         config_dict = {"busco_run": type(self).DEFAULT_ARGS_VALUES}
@@ -95,21 +124,36 @@ class BaseConfig(ConfigParser):
         self.downloader = BuscoDownloadManager(self)
         return
 
-    # @log("Setting value in config")
-    # def set(self, *args, **kwargs):
-    #     super().set(*args, **kwargs)
+    def _update_config_with_args(self, args):
+        """
+        Include command line arguments in config. Overwrite any values given in the config file.
+        :param args: Dictionary of parsed command line arguments. To see full list, inspect run_BUSCO_unittests.py or
+        type busco -h
+        :type args: dict
+        :return:
+        """
+        for key, val in args.items():
+            if key in type(self).PERMITTED_OPTIONS:
+                if val is not None and type(val) is not bool:
+                    self.set("busco_run", key, str(val))
+                elif val:  # if True
+                    self.set("busco_run", key, "True")
+        return
 
 
 class PseudoConfig(BaseConfig):
-    def __init__(self, conf_file):
+    def __init__(self, conf_file, params):
         super().__init__()
         self.conf_file = conf_file
+        self.params = params
+        self.params = params
 
     def load(self):
         if self.conf_file != "local environment":
             self._load_config_file()
         else:
             self.set("busco_run", "update-data", "True")
+        self._update_config_with_args(self.params)
         self._fill_default_values()
         self._init_downloader()
 
@@ -329,35 +373,6 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
 
     MANDATORY_USER_PROVIDED_PARAMS = ["in", "out", "mode"]
 
-    PERMITTED_OPTIONS = [
-        "in",
-        "out",
-        "out_path",
-        "mode",
-        "auto-lineage",
-        "auto-lineage-prok",
-        "auto-lineage-euk",
-        "cpu",
-        "force",
-        "restart",
-        "download_path",
-        "datasets_version",
-        "quiet",
-        "offline",
-        "long",
-        "augustus_parameters",
-        "augustus_species",
-        "download_base_url",
-        "lineage_dataset",
-        "update-data",
-        "metaeuk_parameters",
-        "metaeuk_rerun_parameters",
-        "evalue",
-        "limit",
-        "use_augustus",
-        "batch_mode",
-    ]
-
     def __init__(self, conf_file, params, **kwargs):
         """
         :param conf_file: a path to a config.ini file
@@ -693,22 +708,6 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
 
         return
 
-    def _update_config_with_args(self, args):
-        """
-        Include command line arguments in config. Overwrite any values given in the config file.
-        :param args: Dictionary of parsed command line arguments. To see full list, inspect run_BUSCO_unittests.py or
-        type busco -h
-        :type args: dict
-        :return:
-        """
-        for key, val in args.items():
-            if key in type(self).PERMITTED_OPTIONS:
-                if val is not None and type(val) is not bool:
-                    self.set("busco_run", key, str(val))
-                elif val:  # if True
-                    self.set("busco_run", key, "True")
-        return
-
 
 # Code taken from https://dave.dkjones.org/posts/2013/pretty-print-log-python/
 class PrettyLog:


=====================================
src/busco/BuscoDownloadManager.py
=====================================
@@ -19,6 +19,7 @@ import hashlib
 import urllib.request
 from urllib.error import URLError
 import gzip
+import pandas as pd
 
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
@@ -35,7 +36,7 @@ class BuscoDownloadManager:
     Else, a warning is produced.
     """
 
-    version_files = {}
+    version_files = None
 
     def __init__(self, config):
         """
@@ -47,7 +48,7 @@ class BuscoDownloadManager:
         self.download_base_url = config.get("busco_run", "download_base_url")
         self.local_download_path = config.get("busco_run", "download_path")
         self._create_main_download_dir()
-        if not type(self).version_files and not self.offline:
+        if not type(self).version_files is not None and not self.offline:
             self._load_versions()
 
     def _create_main_download_dir(self):
@@ -58,15 +59,12 @@ class BuscoDownloadManager:
     def _load_versions(self):
         try:
             versions_file = self._obtain_versions_file()
-            with open(versions_file, "r") as v_file:
-                for line in v_file:
-                    line = line.strip().split("\t")
-                    dataset_name = line[0]
-                    dataset_date = line[1]
-                    dataset_hash = line[2]
-                    type(self).version_files.update(
-                        {dataset_name: (dataset_date, dataset_hash)}
-                    )
+            type(self).version_files = pd.read_csv(
+                versions_file,
+                sep="\t",
+                names=["dataset", "date", "hash", "domain", "type"],
+                index_col="dataset",
+            )
         except URLError as e:
             if self.offline:
                 logger.warning(
@@ -118,7 +116,7 @@ class BuscoDownloadManager:
 
     def _check_existing_version(self, local_filepath, category, data_basename):
         try:
-            latest_update = type(self).version_files[data_basename][0]
+            latest_update = type(self).version_files.loc[data_basename]["date"]
         except KeyError:
             raise BuscoError(
                 "{} is not a valid option for '{}'".format(data_basename, category)
@@ -153,7 +151,7 @@ class BuscoDownloadManager:
                 > 0
             )
 
-        hash = type(self).version_files[data_basename][1]
+        hash = type(self).version_files.loc[data_basename]["hash"]
 
         return present, up_to_date, latest_version, local_filepath, hash
 


=====================================
src/busco/BuscoLogger.py
=====================================
@@ -18,10 +18,6 @@ import logging.handlers
 import sys
 import io
 import os
-import select
-import time
-import threading
-import random
 from busco.Exceptions import BatchFatalError, BuscoError
 
 from configparser import NoOptionError
@@ -138,103 +134,6 @@ class LogDecorator:
         return
 
 
-class ToolLogger(logging.getLoggerClass()):
-
-    _level = logging.DEBUG
-
-    def __init__(self, filename):
-        super().__init__(filename)
-        self.setLevel(type(self)._level)
-        self._external_formatter = logging.Formatter("%(message)s")
-        self._file_hdlr = logging.FileHandler(filename, mode="a", encoding="UTF-8")
-        self._file_hdlr.setFormatter(self._external_formatter)
-        self.addHandler(self._file_hdlr)
-
-
-# The following code was created by combining code based on a SO answer here:
-# https://stackoverflow.com/questions/4713932/decorate-delegate-a-file-object-to-add-functionality/4838875#4838875
-# with code from the Python docs here:
-# https://docs.python.org/3.7/howto/logging-cookbook.html#network-logging
-# and is used to log multiple processes to the same file by way of a SocketHandler and separate the streams of
-# external tools into stdout and stderr.
-
-
-class StreamLogger(io.IOBase):
-    def __init__(self, level, logger, augustus_out=False):
-        self.logger = logger
-        self.level = level
-        self.augustus_out = augustus_out
-        self._run = None
-        self.pipe = os.pipe()
-        if self.augustus_out:
-            self.gene_found = False
-            self.output_complete = False
-            self.thread = threading.Thread(target=self._flusher_augustus_out)
-        else:
-            self.thread = threading.Thread(target=self._flusher)
-        self.thread.start()
-
-    def _flusher_augustus_out(self):
-        self._run = True
-        buf = b""
-        timeout = 10
-        read_only = select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLERR
-        # Switched from select.select() to select.poll() using examples at https://pymotw.com/2/select/
-        # This is necessary to handle greater than 1024 file descriptors, but makes BUSCO incompatible with Windows.
-        poller = select.poll()
-        server = self.pipe[0]
-        poller.register(server, read_only)
-        while self._run or (self.gene_found and not self.output_complete):
-            events = poller.poll(timeout)
-            for fd, flag in events:
-                if flag & (select.POLLIN | select.POLLPRI):
-                    buf += os.read(fd, 4096)
-                    while b"\n" in buf:
-                        if b"start gene" in buf:
-                            self.gene_found = True
-                        if b"command line" in buf:
-                            self.output_complete = True
-                        data, buf = buf.split(b"\n", 1)
-                        self.write(data.decode())
-
-        self._run = None
-
-    def _flusher(self):
-        self._run = True
-        buf = b""
-        timeout = 10
-        read_only = select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLERR
-        # Switched from select.select() to select.poll() using examples at https://pymotw.com/2/select/
-        # This is necessary to handle greater than 1024 file descriptors, but makes BUSCO incompatible with Windows.
-        poller = select.poll()
-        server = self.pipe[0]
-        poller.register(server, read_only)
-        while self._run:
-            events = poller.poll(timeout)
-            for fd, flag in events:
-                if flag & (select.POLLIN | select.POLLPRI):
-                    buf += os.read(fd, 4096)
-                    while b"\n" in buf:
-                        data, buf = buf.split(b"\n", 1)
-                        self.write(data.decode())
-
-        self._run = None
-
-    def write(self, data):
-        return self.logger.log(self.level, data)
-
-    def fileno(self):
-        return self.pipe[1]
-
-    def close(self):
-        if self._run:
-            self._run = False
-            while self._run is not None:
-                time.sleep(0.01)
-            os.close(self.pipe[0])
-            os.close(self.pipe[1])
-
-
 class BuscoLogger(logging.getLoggerClass()):
     """
     This class customizes the _logger class
@@ -243,7 +142,8 @@ class BuscoLogger(logging.getLoggerClass()):
     _level = logging.DEBUG
     _has_warning = False
     warn_output = io.StringIO()
-    random_id = str(random.getrandbits(32))
+    ppid = str(os.getppid())
+    pid = str(os.getpid())
 
     def __init__(self, name):
         """
@@ -252,9 +152,12 @@ class BuscoLogger(logging.getLoggerClass()):
         """
         super(BuscoLogger, self).__init__(name)
         self.setLevel(BuscoLogger._level)
-        self._normal_formatter = logging.Formatter("%(levelname)s:\t%(message)s")
+        self._normal_formatter = logging.Formatter(
+            "%(asctime)s %(levelname)s:\t%(message)s", datefmt="%Y-%m-%d %H:%M:%S"
+        )
         self._verbose_formatter = logging.Formatter(
-            "%(levelname)s:%(name)s\t%(message)s"
+            "%(asctime)s %(levelname)s:%(name)s\t%(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
         )
         self._external_formatter = logging.Formatter("%(message)s")
 
@@ -270,10 +173,13 @@ class BuscoLogger(logging.getLoggerClass()):
         self.addHandler(self._err_hdlr)
 
         try:
-            # Random id used in filename to avoid complications for parallel BUSCO runs.
-            self._file_hdlr = logging.FileHandler(
-                "busco_{}.log".format(type(self).random_id), mode="a"
-            )
+            # Identify main log file with process ID and have all spawned processes log to the correct file
+            log_filename = "busco_{}.log".format(type(self).ppid)
+            if not os.path.exists(log_filename):
+                log_filename = "busco_{}.log".format(type(self).pid)
+
+            # Process id used in filename to avoid complications for parallel BUSCO runs.
+            self._file_hdlr = logging.FileHandler(log_filename, mode="a")
         except IOError as e:
             errStr = (
                 "No permission to write in the current directory: {}".format(
@@ -296,6 +202,13 @@ class BuscoLogger(logging.getLoggerClass()):
     def __call__(self):
         pass
 
+    @classmethod
+    def reset(cls):
+        cls._level = logging.DEBUG
+        cls._has_warning = False
+        cls.warn_output = io.StringIO()
+        return
+
     @staticmethod
     def get_logger(name, config=None):
         """


=====================================
src/busco/BuscoRunner.py
=====================================
@@ -188,6 +188,7 @@ class BatchRunner:
                 type(self).batch_results.append(run_summary)
 
                 AnalysisRunner.reset()
+                BuscoLogger.reset()
 
             except NoOptionError as noe:
                 raise BatchFatalError(noe)
@@ -195,7 +196,17 @@ class BatchRunner:
             except BatchFatalError:
                 raise
 
-            except BuscoError:
+            except BuscoError as be:
+                if "did not recognize any genes" in be.value:
+                    type(self).batch_results.append(
+                        "{}\tNo genes found\n".format(os.path.basename(input_file))
+                    )
+                else:
+                    type(self).batch_results.append(
+                        "{}\tRun failed; check logs\n".format(
+                            os.path.basename(input_file)
+                        )
+                    )
                 continue
 
         try:
@@ -579,13 +590,13 @@ class AnalysisRunner:
             if not os.path.exists(log_folder):
                 os.makedirs(log_folder)
             shutil.move(
-                "busco_{}.log".format(BuscoLogger.random_id),
+                "busco_{}.log".format(BuscoLogger.pid),
                 os.path.join(log_folder, "busco.log"),
             )
         except OSError:
             logger.warning(
                 "Unable to move 'busco_{}.log' to the 'logs' folder.".format(
-                    BuscoLogger.random_id
+                    BuscoLogger.pid
                 )
             )
         return
@@ -616,6 +627,9 @@ class AnalysisRunner:
             "For assistance with interpreting the results, please consult the userguide: "
             "https://busco.ezlab.org/busco_userguide.html\n"
         )
+        logger.info(
+            "Visit this page https://gitlab.com/ezlab/busco#how-to-cite-busco to see how to cite BUSCO"
+        )
 
 
 class SmartBox:


=====================================
src/busco/_version.py
=====================================
@@ -6,4 +6,4 @@ Copyright (c) 2016-2021, Evgeny Zdobnov (ez at ezlab.org)
 Licensed under the MIT license. See LICENSE.md file.
 
 """
-__version__ = "5.1.3"
+__version__ = "5.2.0"


=====================================
src/busco/busco_tools/Toolset.py
=====================================
@@ -14,14 +14,12 @@ Licensed under the MIT license. See LICENSE.md file.
 import os
 import subprocess
 from subprocess import TimeoutExpired
-from multiprocessing import Process, Pool, Value, Lock
+from multiprocessing import Process, Pool, Value, set_start_method
 import time
 from abc import ABCMeta, abstractmethod
-from busco.BuscoLogger import BuscoLogger, ToolLogger
+from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
-from busco.BuscoLogger import StreamLogger
 from busco.Exceptions import BatchFatalError
-import logging
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -65,26 +63,25 @@ class Job(Process):
         Start external process and block the current thread's execution
         till the process' run is over
         """
-        with StreamLogger(logging.DEBUG, self.job_outlogger, **self.kwargs) as out:
-            with StreamLogger(logging.ERROR, self.job_errlogger) as err:
+
+        with open(self.job_outlogger, "wb") as f_out:
+            with open(self.job_errlogger, "wb") as f_err:
                 try:
-                    # Stick with Popen(), communicate() and wait() instead of just run() to ensure compatibility with
-                    # Python versions < 3.5.
-                    p = subprocess.Popen(
-                        self.cmd_line, shell=False, stdout=out, stderr=err, cwd=self.cwd
+                    process = subprocess.run(
+                        self.cmd_line,
+                        capture_output=True,  # stdout and stderr streams are stored and written to file after job completion
+                        cwd=self.cwd,
+                        shell=False,
+                        timeout=self.timeout,
                     )
-                    p.wait(self.timeout)
                 except TimeoutExpired:
-                    p.kill()
                     logger.warning(
                         "The following job was killed as it was taking too long (>1hr) to "
                         "complete.\n{}".format(" ".join(self.cmd_line))
                     )
+                f_out.write(process.stdout)
+                f_err.write(process.stderr)
 
-        self.job_outlogger._file_hdlr.close()
-        self.job_outlogger.removeHandler(self.job_outlogger._file_hdlr)
-        self.job_errlogger._file_hdlr.close()
-        self.job_errlogger.removeHandler(self.job_errlogger._file_hdlr)
         with cnt.get_lock():
             cnt.value += 1
 
@@ -106,6 +103,8 @@ class Tool(metaclass=ABCMeta):
     Collection of utility methods used by all tools
     """
 
+    set_start_method("spawn", force=True)
+
     def __init__(self):
         """
         Initialize job list for a tool
@@ -116,7 +115,7 @@ class Tool(metaclass=ABCMeta):
         """
         if self.name == "augustus":
             self.kwargs = {"augustus_out": True}
-            self.timeout = 3600
+            self.timeout = 3600  # Possibly no longer necessary from 5.2.0 with the new logging system in place, but no harm to leave it here
         else:
             self.kwargs = {}
             self.timeout = None
@@ -149,20 +148,16 @@ class Tool(metaclass=ABCMeta):
         """
         Create one work item
         """
-        self.tool_outlogger = ToolLogger(self.logfile_path_out)
-        self.tool_errlogger = ToolLogger(self.logfile_path_err)
         job = Job(
             self.name,
             self.cmd[:],
-            self.tool_outlogger,
-            self.tool_errlogger,
+            self.logfile_path_out,
+            self.logfile_path_err,
             self.timeout,
             self.cwd,
             **self.kwargs
         )
         self.jobs_to_run.append(job)
-        # if self.count_jobs_created:
-        #     self.total += 1
         return job
 
     def remove_job(self, job):


=====================================
src/busco/run_BUSCO.py
=====================================
@@ -27,7 +27,12 @@ from busco.Exceptions import BatchFatalError, BuscoError
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
 from busco.ConfigManager import BuscoConfigManager
-from busco.Actions import ListLineagesAction, CleanHelpAction, CleanVersionAction
+from busco.Actions import (
+    ListLineagesAction,
+    CleanHelpAction,
+    CleanVersionAction,
+    DirectDownload,
+)
 from busco.ConfigManager import BuscoConfigMain
 
 # from busco.busco_tools.Toolset import ToolException
@@ -130,7 +135,9 @@ def _parse_args():
     parser = argparse.ArgumentParser(
         description="Welcome to BUSCO {}: the Benchmarking Universal Single-Copy Ortholog assessment tool.\n"
         "For more detailed usage information, please review the README file provided with "
-        "this distribution and the BUSCO user guide.".format(busco.__version__),
+        "this distribution and the BUSCO user guide. Visit this page https://gitlab.com/ezlab/busco#how-to-cite-busco to see how to cite BUSCO".format(
+            busco.__version__
+        ),
         usage="busco -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS]",
         formatter_class=RawTextHelpFormatter,
         add_help=False,
@@ -248,6 +255,16 @@ def _parse_args():
         help="Specify the version of BUSCO datasets, e.g. odb10",
     )
 
+    optional.add_argument(
+        "--download",
+        dest="download",
+        required=False,
+        type=str,
+        metavar="dataset",
+        action=DirectDownload,
+        help='Download dataset. Possible values are a specific dataset name, "all", "prokaryota", "eukaryota", or "virus". If used together with other command line arguments, make sure to place this last.',
+    )
+
     optional.add_argument(
         "--download_base_url",
         dest="download_base_url",


=====================================
tests/unittests/run_BUSCO_unittests.py
=====================================
@@ -86,6 +86,7 @@ class TestParams(unittest.TestCase):
             "augustus_species": None,
             "long": False,
             "datasets_version": None,
+            "download": "==SUPPRESS==",
             "download_base_url": None,
             "download_path": None,
             "update-data": False,
@@ -142,6 +143,7 @@ class TestParams(unittest.TestCase):
             "augustus_species": None,
             "long": False,
             "datasets_version": None,
+            "download": "==SUPPRESS==",
             "download_base_url": None,
             "download_path": None,
             "update-data": False,
@@ -209,6 +211,7 @@ class TestParams(unittest.TestCase):
             "metaeuk_parameters": metaeuk_parameters,
             "metaeuk_rerun_parameters": metaeuk_rerun_parameters,
             "datasets_version": datasets_version,
+            "download": "==SUPPRESS==",
             "download_base_url": download_base_url,
             "download_path": download_path,
             "auto-lineage": True,



View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/de037af5f0c0046066bc51e3d27ca54c5cd0092b

-- 
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/de037af5f0c0046066bc51e3d27ca54c5cd0092b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210701/2f9a6ba1/attachment-0001.htm>


More information about the debian-med-commit mailing list