[med-svn] [Git][med-team/busco][upstream] New upstream version 5.1.2

Nilesh Patra gitlab at salsa.debian.org
Tue Apr 20 12:32:43 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / busco


Commits:
eb1970a6 by Nilesh Patra at 2021-04-20T16:59:41+05:30
New upstream version 5.1.2
- - - - -


29 changed files:

- CHANGELOG
- README.md
- src/busco/Actions.py
- src/busco/AutoLineage.py
- src/busco/BuscoConfig.py
- src/busco/BuscoDownloadManager.py
- src/busco/BuscoLogger.py
- src/busco/BuscoPlacer.py
- src/busco/BuscoRunner.py
- src/busco/ConfigManager.py
- + src/busco/Exceptions.py
- src/busco/_version.py
- src/busco/analysis/Analysis.py
- src/busco/analysis/BuscoAnalysis.py
- src/busco/analysis/GenomeAnalysis.py
- src/busco/busco_tools/Toolset.py
- src/busco/busco_tools/augustus.py
- src/busco/busco_tools/base.py
- src/busco/busco_tools/blast.py
- src/busco/busco_tools/hmmer.py
- src/busco/busco_tools/metaeuk.py
- src/busco/busco_tools/prodigal.py
- src/busco/run_BUSCO.py
- tests/unittest_runner.py
- tests/unittests/AutoLineage_unittests.py
- tests/unittests/BuscoConfig_unittests.py
- + tests/unittests/BuscoRunner_unittests.py
- tests/unittests/ConfigManager_unittests.py
- tests/unittests/run_BUSCO_unittests.py


Changes:

=====================================
CHANGELOG
=====================================
@@ -1,3 +1,18 @@
+5.1.2
+- Fix bug in batch  mode that affects Augustus runs
+
+5.1.1
+- Fix bug in batch mode
+
+5.1.0
+- Implement batch mode
+- Issue #221 fixed
+- Issue #328 fixed
+- Issue #360 fixed
+- Issue #388 fixed
+- Issue #406 fixed
+- Issue #415 fixed
+
 5.0.0
 - Implement metaeuk exon overlap filter
 - Use metaeuk for eukaryote transcriptomes
@@ -18,14 +33,20 @@
 - Issue #373 fixed
 - Issue #385 fixed
 
-
-
 5.beta.1
 - Provide option to run augustus instead of metaeuk
 
 5.beta
 - Replace Augustus with Metaeuk
 
+4.1.4
+- Fix Augustus parsing bug
+
+4.1.3
+- Issue #296 fixed
+- Issue #305 fixed
+- Augustus parser improved
+
 4.1.2
 - Issue #295 fixed
 


=====================================
README.md
=====================================
@@ -5,6 +5,7 @@ For full documentation please consult the user guide: https://busco.ezlab.org/bu
 Main changes in v5:
 
 - Metaeuk is used as default gene predictor for eukaryote pipeline. Augustus is maintained and can be used optionally instead of Metaeuk.
+- Introduction of batch mode: input argument can be a folder containing input files
 - The folder structure has changed, so if doing a manual installation, make sure to completely remove any previous versions of BUSCO before installing v5.
 
 ***


=====================================
src/busco/Actions.py
=====================================
@@ -18,7 +18,7 @@ class ListLineagesAction(argparse.Action):
             self.config_manager = BuscoConfigManager({})
         except SystemExit as se:
             type(self).logger.error(se)
-            raise SystemExit()
+            raise SystemExit(1)
 
         self.config = PseudoConfig(self.config_manager.config_file)
         try:


=====================================
src/busco/AutoLineage.py
=====================================
@@ -1,15 +1,15 @@
 import os
-from busco.BuscoConfig import BuscoConfigAuto
 from busco.BuscoPlacer import BuscoPlacer
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
-from busco.BuscoRunner import BuscoRunner
+from busco.BuscoRunner import AnalysisRunner
+from busco.Exceptions import BuscoError
 import numpy as np
 
 logger = BuscoLogger.get_logger(__name__)
 
 
-class NoGenesError(SystemExit):
+class NoGenesError(BuscoError):
 
     def __init__(self):
         super().__init__("No genes were recognized by BUSCO. Please check the content of your input file.")
@@ -34,8 +34,9 @@ class AutoSelectLineage:
          "Because of overlapping markers/spurious matches among domains, busco matches in another domain do not "
          "necessarily mean that your genome/proteome contains sequences from this domain. "
          "However, a high busco score in multiple domains might help you identify possible contaminations.", logger)
-    def __init__(self, config):
-        self.config = config
+    def __init__(self, config_manager):
+        self.config_manager = config_manager
+        self.config = self.config_manager.config_main
         if self.config.getboolean("busco_run", "auto-lineage-prok"):
             self.all_lineages = ["archaea", "bacteria"]
         elif self.config.getboolean("busco_run", "auto-lineage-euk"):
@@ -71,6 +72,10 @@ class AutoSelectLineage:
         self.f_percents.append(f_percent)
         return
 
+    @classmethod
+    def reset(cls):
+        cls.runners = []
+
     @log("Running auto selector", logger, debug=True)
     def run_auto_selector(self):
         """
@@ -91,8 +96,6 @@ class AutoSelectLineage:
 
         logger.info("{} selected\n".format(os.path.basename(self.best_match_lineage_dataset)))
         self.config.set("busco_run", "domain_run_name", os.path.basename(self.best_match_lineage_dataset))
-        BuscoRunner.final_results.append(self.selected_runner.analysis.hmmer_runner.hmmer_results_lines)
-        BuscoRunner.results_datasets.append(os.path.basename(self.best_match_lineage_dataset))
         return
 
     def virus_check(self):
@@ -114,8 +117,8 @@ class AutoSelectLineage:
         root_runners = []
         for l in lineages_list:
             self.current_lineage = "{}_{}".format(l, self.dataset_version)
-            autoconfig = BuscoConfigAuto(self.config, self.current_lineage)
-            busco_run = BuscoRunner(autoconfig)
+            autoconfig = self.config_manager.load_busco_config_auto(self.current_lineage)
+            busco_run = AnalysisRunner(autoconfig)
             busco_run.run_analysis(callback=self.callback)
             root_runners.append(busco_run)
             type(self).runners.append(busco_run)  # Save all root runs so they can be recalled if chosen
@@ -203,8 +206,7 @@ class AutoSelectLineage:
             else:
                 logger.info("Mollicutes dataset is a better match for your data. Testing subclades...")
                 self._run_3_datasets(self.selected_runner)
-                BuscoRunner.final_results.append(self.selected_runner.analysis.hmmer_runner.hmmer_results_lines)
-                BuscoRunner.results_datasets.append(os.path.basename(self.best_match_lineage_dataset))
+
         elif ("geno" in self.selected_runner.mode
               and self.selected_runner.analysis.prodigal_runner.current_gc == "4"
               and os.path.basename(
@@ -212,14 +214,15 @@ class AutoSelectLineage:
             logger.info("The results from the Prodigal gene predictor indicate that your data belongs to the "
                         "mollicutes clade. Testing subclades...")
             self._run_3_datasets()
-            BuscoRunner.final_results.append(self.selected_runner.analysis.hmmer_runner.hmmer_results_lines)
-            BuscoRunner.results_datasets.append(os.path.basename(self.best_match_lineage_dataset))
         elif self.selected_runner.domain == "viruses":
             pass
         else:
             self.run_busco_placer()
         return
 
+    def set_best_match_lineage(self):
+        AnalysisRunner.selected_dataset = os.path.basename(self.best_match_lineage_dataset)
+
     def check_mollicutes(self):
         runners = self.run_lineages_list(["mollicutes"])
         runners.append(self.selected_runner)
@@ -252,9 +255,7 @@ class AutoSelectLineage:
         self.config.placement_files = placement_file_versions  # Necessary to pass these filenames to the final run to be recorded.
         lineage, supporting_markers, placed_markers = dataset_details
         lineage = "{}_{}".format(lineage, self.config.get("busco_run", "datasets_version"))  # todo: this should probably be done in buscoplacer
-        self.best_match_lineage_dataset = os.path.join(self.config.get("busco_run", "download_path"),
-                                                       "lineages",
-                                                       os.path.basename(lineage))
+        self.best_match_lineage_dataset = lineage  # basename
         return
 
     def _run_3_datasets(self, mollicutes_runner=None):


=====================================
src/busco/BuscoConfig.py
=====================================
@@ -4,6 +4,7 @@ from configparser import ParsingError
 from configparser import DuplicateOptionError
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BatchFatalError
 from abc import ABCMeta, abstractmethod
 from busco.BuscoDownloadManager import BuscoDownloadManager
 import os
@@ -34,6 +35,7 @@ class BaseConfig(ConfigParser):
         "limit": 3,
         "use_augustus": False,
         "long": False,
+        "batch_mode": False,
     }
 
     DEPENDENCY_SECTIONS = {
@@ -70,13 +72,15 @@ class BaseConfig(ConfigParser):
             with open(self.conf_file) as cfg_file:
                 self.read_file(cfg_file)
         except IOError:
-            raise SystemExit("Config file {} cannot be found".format(self.conf_file))
+            raise BatchFatalError(
+                "Config file {} cannot be found".format(self.conf_file)
+            )
         except ParsingError:
-            raise SystemExit(
+            raise BatchFatalError(
                 "Unable to parse the contents of config file {}".format(self.conf_file)
             )
         except DuplicateOptionError:
-            raise SystemExit(
+            raise BatchFatalError(
                 "Duplicated entry in config file {}. Unable to load configuration.".format(
                     self.conf_file
                 )
@@ -148,9 +152,8 @@ class PseudoConfig(BaseConfig):
                         "you have set 'update-data=True' in your config file."
                     )
                 else:
-                    raise SystemExit(
-                        "Unable to download list of datasets. Please make sure you have set "
-                        "update-data=True in your config file."
+                    raise BatchFatalError(
+                        "Unable to download list of datasets. Please use the --update-data option to use the latest available datasets."
                     )
 
         except NoOptionError:
@@ -224,6 +227,13 @@ class BuscoConfig(ConfigParser, metaclass=ABCMeta):
         self.set("busco_run", "lineage_dataset", local_lineage_filepath)
         return
 
+    def load_dataset(self, lineage):
+        self.set_results_dirname(lineage)  # function always only uses basename
+        self.download_lineage_file(
+            lineage
+        )  # full path will return, base name will attempt download
+        self.load_dataset_config()
+
     def load_dataset_config(self):
         """
         Load BUSCO dataset config file.
@@ -289,11 +299,8 @@ class BuscoConfigAuto(BuscoConfig):
     def __init__(self, config, lineage, **kwargs):
         super().__init__(**kwargs)
         self._propagate_config(config)
-        self.set_results_dirname(lineage)
-
+        self.load_dataset(lineage)
         self._create_required_paths()
-        self.download_lineage_file(lineage)
-        self.load_dataset_config()
 
     def _create_required_paths(self):
         """
@@ -348,6 +355,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
         "evalue",
         "limit",
         "use_augustus",
+        "batch_mode",
     ]
 
     def __init__(self, conf_file, params, **kwargs):
@@ -361,6 +369,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
         self.conf_file = conf_file
         self.params = params
         self.main_out = None
+        self._input_filepath = None
 
     def configure(self):
         if self.conf_file != "local environment":
@@ -378,11 +387,12 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
 
     def validate(self):
         self._check_mandatory_keys_exist()
+        self._cleanup_config()
         self._check_no_previous_run()
         self._check_allowed_keys()
         self._create_required_paths()
-        self._cleanup_config()
         self._check_required_input_exists()
+        self._check_batch_mode()
 
         self._init_downloader()
 
@@ -418,16 +428,25 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
 
             datasets_version = self.get("busco_run", "datasets_version")
             if datasets_version != "odb10":
-                raise SystemExit(
-                    "BUSCO v4 only works with datasets from OrthoDB v10 (with the suffix '_odb10'). "
+                raise BatchFatalError(
+                    "BUSCO v5 only works with datasets from OrthoDB v10 (with the suffix '_odb10'). "
                     "For a full list of available datasets, enter 'busco --list-datasets'. "
-                    "You can also run BUSCO using auto-lineage, to allow BUSCO to automatically select "
+                    "You can also run BUSCO using --auto-lineage, to allow BUSCO to automatically select "
                     "the best dataset for your input data."
                 )
             return True
-        except NoOptionError:  # todo: need a helpful error message if datasets_version is not set but lineage_dataset is.
+        except NoOptionError:
             return False
 
+    def _check_batch_mode(self):
+        if os.path.isdir(self._input_filepath):
+            self.set("busco_run", "batch_mode", "True")
+        elif not os.path.isfile(self._input_filepath):
+            raise BatchFatalError(
+                "Unrecognized input type. Please use either a single file or a directory name (for batch mode)"
+            )
+        return
+
     def _check_evalue(self):
         """
         Warn the user if the config contains a non-standard e-value cutoff.
@@ -436,20 +455,18 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
         if (
             self.getfloat("busco_run", "evalue")
             != type(self).DEFAULT_ARGS_VALUES["evalue"]
-        ):  # todo: introduce systemexit if not float
+        ):
             logger.warning("You are using a custom e-value cutoff")
         return
 
     def _check_limit_value(self):
         """
-        Check the value of limit. Ensure it is between 1 and 20, otherwise raise SystemExit.
+        Check the value of limit. Ensure it is between 1 and 20, otherwise raise BatchFatalError.
         :return:
         """
-        limit_val = self.getint(
-            "busco_run", "limit"
-        )  # todo: introduce systemexit if not int.
+        limit_val = self.getint("busco_run", "limit")
         if limit_val <= 0 or limit_val > 20:
-            raise SystemExit(
+            raise BatchFatalError(
                 "Limit must be an integer between 1 and 20 (you have used: {}). Note that this parameter "
                 "is not needed by the protein mode.".format(limit_val)
             )
@@ -502,7 +519,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
                         + synonyms["transcriptome"]
                         + synonyms["proteins"]
                     ):
-                        raise SystemExit(
+                        raise BatchFatalError(
                             "Unknown mode {}.\n'Mode' parameter must be one of "
                             "['genome', 'transcriptome', 'proteins']".format(value)
                         )
@@ -517,12 +534,12 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
 
                 if param == "out":
                     if value == "":
-                        raise SystemExit(
+                        raise BatchFatalError(
                             "Please specify an output name for the BUSCO run. "
                             "This can be done using the -o flag or in the config file"
                         )
             except NoOptionError:
-                raise SystemExit(
+                raise BatchFatalError(
                     'The parameter "{} (--{})" was not provided. '
                     "Please add it in the config file or provide it "
                     "through the command line".format(param, param)
@@ -542,7 +559,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
             try:
                 for option in self.options(section_name):
                     if option not in options:
-                        raise SystemExit(
+                        raise BatchFatalError(
                             "Unrecognized option '{}' in config file".format(option)
                         )
             except NoSectionError:
@@ -555,19 +572,12 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
         :return:
         """
         if "/" in self.get("busco_run", "out"):
-            raise SystemExit(
+            raise BatchFatalError(
                 "Please do not provide a full path in --out parameter, no slash. "
                 "Use out_path in the config.ini file to specify the full path."
             )
         return
 
-    @log(
-        "Input file is {}",
-        logger,
-        attr_name="_input_filepath",
-        on_func_exit=True,
-        log_once=True,
-    )
     def _check_required_input_exists(self):
         """
         Test for existence of input file.
@@ -575,8 +585,8 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
         """
         self._input_filepath = self.get("busco_run", "in")
         if not os.path.exists(self._input_filepath):
-            raise SystemExit(
-                "Input file {} does not exist".format(self._input_filepath)
+            raise BatchFatalError(
+                "Input {} does not exist".format(self._input_filepath)
             )
         return
 
@@ -594,7 +604,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
                     )
                 )
             else:
-                raise SystemExit(
+                raise BatchFatalError(
                     "A run with the name {} already exists...\n"
                     "\tIf you are sure you wish to overwrite existing files, "
                     "please use the -f (force) option".format(self.main_out)
@@ -650,7 +660,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
                 if item[0].endswith("_path") or item[0] == "path" or item[0] == "in":
                     if item[1].startswith("~"):
                         self.set(key, item[0], os.path.expanduser(item[1]))
-                    elif item[1].startswith(".") or (item[1] and "/" not in item[1]):
+                    elif item[1]:
                         self.set(key, item[0], os.path.abspath(item[1]))
 
         return


=====================================
src/busco/BuscoDownloadManager.py
=====================================
@@ -22,6 +22,7 @@ import gzip
 
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BatchFatalError, BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -72,7 +73,14 @@ class BuscoDownloadManager:
                     "Unable to verify BUSCO datasets because of offline mode"
                 )
             else:
-                raise SystemExit(e)
+                raise BatchFatalError(e)
+        except FileNotFoundError:
+            logger.warning(
+                "Unable to find file_versions.tsv. This may be due to an internet access problem. "
+                "Attempting to continue in offline mode."
+            )
+            self.offline = True
+
         return
 
     @log("Downloading information on latest versions of BUSCO data...", logger)
@@ -82,7 +90,7 @@ class BuscoDownloadManager:
         try:
             urllib.request.urlretrieve(remote_filepath, local_filepath)
         except URLError:
-            SystemExit("Cannot reach {}".format(remote_filepath))
+            raise BatchFatalError("Cannot reach {}".format(remote_filepath))
         return local_filepath
 
     def _create_category_dir(self, category):
@@ -103,7 +111,7 @@ class BuscoDownloadManager:
                     dataset_date = line[1]
                     break
         if not dataset_date:
-            raise SystemExit(
+            raise BuscoError(
                 "Creation date could not be extracted from dataset.cfg file."
             )
         return dataset_date
@@ -112,7 +120,7 @@ class BuscoDownloadManager:
         try:
             latest_update = type(self).version_files[data_basename][0]
         except KeyError:
-            raise SystemExit(
+            raise BuscoError(
                 "{} is not a valid option for '{}'".format(data_basename, category)
             )
         path_basename, extension = os.path.splitext(data_basename)
@@ -154,7 +162,7 @@ class BuscoDownloadManager:
             logger.info("Using local {} directory {}".format(category, data_name))
             return data_name
         elif "/" in data_name:
-            raise SystemExit("{} does not exist".format(data_name))
+            raise BuscoError("{} does not exist".format(data_name))
         if self.offline:
             if category == "lineages":
                 local_dataset = os.path.join(
@@ -163,7 +171,7 @@ class BuscoDownloadManager:
                 if os.path.exists(local_dataset):
                     return local_dataset
                 else:
-                    raise SystemExit(
+                    raise BuscoError(
                         "Unable to run BUSCO in offline mode. Dataset {} does not "
                         "exist.".format(local_dataset)
                     )
@@ -180,9 +188,8 @@ class BuscoDownloadManager:
                 )
                 if len(placement_files) > 0:
                     return placement_files[-1]
-                    # todo: for offline mode, log which files are being used (in case of more than one glob match)
                 else:
-                    raise SystemExit(
+                    raise BuscoError(
                         "Unable to run BUSCO placer in offline mode. Cannot find necessary placement "
                         "files in {}".format(self.local_download_path)
                     )
@@ -263,7 +270,7 @@ class BuscoDownloadManager:
                 )
                 logger.info("deleting corrupted file {}".format(local_filepath))
                 os.remove(local_filepath)
-                raise SystemExit(
+                raise BuscoError(
                     "BUSCO was unable to download or update all necessary files"
                 )
             else:
@@ -290,14 +297,24 @@ class BuscoDownloadManager:
                 with open(unzipped_filename, "wb") as decompressed_file:
                     for line in compressed_file:
                         decompressed_file.write(line)
-            os.remove(local_filepath)
+            try:
+                os.remove(local_filepath)
+            except FileNotFoundError:
+                logger.warning(
+                    "Downloaded gzip file was removed before this BUSCO run could remove it."
+                )
             local_filepath = unzipped_filename
 
         if os.path.splitext(local_filepath)[1] == ".tar":
             untarred_filename = local_filepath.replace(".tar", "")
             with tarfile.open(local_filepath) as tar_file:
                 tar_file.extractall(os.path.dirname(local_filepath))
-            os.remove(local_filepath)
+            try:
+                os.remove(local_filepath)
+            except FileNotFoundError:
+                logger.warning(
+                    "Downloaded tarball was removed before this BUSCO run could remove it."
+                )
             local_filepath = untarred_filename
 
         return local_filepath


=====================================
src/busco/BuscoLogger.py
=====================================
@@ -22,6 +22,7 @@ import select
 import time
 import threading
 import random
+from busco.Exceptions import BatchFatalError, BuscoError
 
 from configparser import NoOptionError
 from configparser import NoSectionError
@@ -64,7 +65,7 @@ class LogDecorator:
                     self.format_string(*args)
                     self.retval = func(*args, **kwargs)
                 return self.retval
-            except SystemExit:
+            except (BuscoError, BatchFatalError):
                 raise
 
         return wrapped_func
@@ -281,7 +282,7 @@ class BuscoLogger(logging.getLoggerClass()):
                 if e.errno == 13
                 else "IO error({0}): {1}".format(e.errno, e.strerror)
             )
-            raise SystemExit(errStr)
+            raise BatchFatalError(errStr)
 
         self._file_hdlr.setLevel(logging.DEBUG)
         self._file_hdlr.setFormatter(self._verbose_formatter)


=====================================
src/busco/BuscoPlacer.py
=====================================
@@ -16,6 +16,7 @@ import json
 import os
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BuscoError
 from Bio import SeqIO
 from busco.busco_tools.sepp import SEPPRunner
 
@@ -176,7 +177,7 @@ class BuscoPlacer:
             tree = data["tree"]
             placements = data["placements"]
         except FileNotFoundError:
-            raise SystemExit(
+            raise BuscoError(
                 "Placements failed. Try to rerun increasing the memory or select a lineage manually."
             )
 


=====================================
src/busco/BuscoRunner.py
=====================================
@@ -1,3 +1,4 @@
+from busco.Exceptions import BatchFatalError, BuscoError
 from busco.analysis.BuscoAnalysis import BuscoAnalysis
 from busco.analysis.GenomeAnalysis import (
     GenomeAnalysisEukaryotesAugustus,
@@ -12,14 +13,219 @@ from busco.analysis.GenomeAnalysis import GenomeAnalysisProkaryotes
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoConfig import BuscoConfigMain
 from busco.busco_tools.base import NoGenesError, BaseRunner
+from busco.BuscoLogger import LogDecorator as log
 from configparser import NoOptionError
+from busco.busco_tools.Toolset import ToolException
 import os
+import sys
 import shutil
+import time
+import traceback
+from collections import defaultdict
+
 
 logger = BuscoLogger.get_logger(__name__)
 
 
-class BuscoRunner:
+class SingleRunner:
+    def __init__(self, config_manager):
+        self.start_time = time.time()
+        self.config_manager = config_manager
+        self.config = self.config_manager.config_main
+        self.input_file = self.config.get("busco_run", "in")
+        self.lineage_dataset = None
+        self.runner = None
+
+    def get_lineage(self):
+        if self.config.getboolean("busco_run", "auto-lineage"):
+            lineage_dataset_fullpath, runner = self.auto_select_lineage()  # full path
+            if not runner.config.get("busco_run", "domain") == "viruses":
+                self.config.set(
+                    "busco_run",
+                    "parent_dataset",
+                    runner.config.get("busco_run", "lineage_dataset"),
+                )
+            self.config.set("busco_run", "lineage_dataset", lineage_dataset_fullpath)
+            self.runner = runner
+
+        self.lineage_dataset = self.config.get(
+            "busco_run", "lineage_dataset"
+        )  # full path
+        return
+
+    @log("No lineage specified. Running lineage auto selector.\n", logger)
+    def auto_select_lineage(self):
+        from busco.AutoLineage import (
+            AutoSelectLineage,
+        )  # Import statement inside method to avoid circular imports
+
+        asl = AutoSelectLineage(self.config_manager)
+        asl.run_auto_selector()
+        asl.get_lineage_dataset()
+        asl.set_best_match_lineage()
+        lineage_dataset = asl.best_match_lineage_dataset
+        runner = asl.selected_runner
+        asl.reset()
+        return lineage_dataset, runner
+
+    @staticmethod
+    def log_error(err):
+        logger.error(err)
+        logger.debug(err, exc_info=True)
+        logger.error("BUSCO analysis failed !")
+        logger.error(
+            "Check the logs, read the user guide (https://busco.ezlab.org/busco_userguide.html), "
+            "and check the BUSCO issue board on https://gitlab.com/ezlab/busco/issues\n"
+        )
+
+    @log("Input file is {}", logger, attr_name="input_file")
+    def run(self):
+        try:
+            self.get_lineage()
+            self.config.load_dataset(self.lineage_dataset)
+
+            lineage_basename = os.path.basename(
+                self.config.get("busco_run", "lineage_dataset")
+            )
+            main_out_folder = self.config.get("busco_run", "main_out")
+            lineage_results_folder = os.path.join(
+                main_out_folder,
+                "auto_lineage",
+                self.config.get("busco_run", "lineage_results_dir"),
+            )
+
+            if not (
+                self.config.getboolean("busco_run", "auto-lineage")
+                and (
+                    lineage_basename.startswith(("bacteria", "archaea", "eukaryota"))
+                    or (
+                        lineage_basename.startswith(
+                            ("mollicutes", "mycoplasmatales", "entomoplasmatales")
+                        )
+                        and os.path.exists(lineage_results_folder)
+                    )
+                )
+            ):
+                # It is possible that the  lineages ("mollicutes", "mycoplasmatales", "entomoplasmatales") were arrived at either by the Prodigal genetic code shortcut
+                # or by BuscoPlacer. If the former, the run will have already been completed. If the latter it still needs
+                # to be done.
+
+                self.runner = AnalysisRunner(self.config)
+
+            if os.path.exists(lineage_results_folder):
+                new_dest = os.path.join(
+                    main_out_folder, self.config.get("busco_run", "lineage_results_dir")
+                )
+                if not os.path.exists(new_dest):
+                    os.symlink(lineage_results_folder, new_dest)
+            else:
+                self.runner.run_analysis()
+                AnalysisRunner.selected_dataset = lineage_basename
+            self.runner.finish(time.time() - self.start_time)
+
+        except BuscoError as e:
+            self.log_error(e)
+            raise
+
+        except ToolException as e:
+            logger.error(e)
+            raise BatchFatalError(e)
+
+        except KeyboardInterrupt:
+            logger.exception(
+                "A signal was sent to kill the process. \nBUSCO analysis failed !"
+            )
+            raise BatchFatalError
+
+        except BaseException:
+            exc_type, exc_value, exc_traceback = sys.exc_info()
+            logger.critical(
+                "Unhandled exception occurred:\n{}\n".format(
+                    "".join(
+                        traceback.format_exception(exc_type, exc_value, exc_traceback)
+                    )
+                )
+            )
+            raise BatchFatalError
+
+
+class BatchRunner:
+
+    batch_results = []
+
+    @log(
+        "Running in batch mode. {} input files found in {}",
+        logger,
+        attr_name=["num_inputs", "input_dir"],
+        on_func_exit=True,
+    )
+    def __init__(self, config_manager):
+        self.config_manager = config_manager
+        self.config = self.config_manager.config_main
+        self.input_dir = self.config.get("busco_run", "in")
+        self.input_files = [
+            os.path.join(self.input_dir, f) for f in os.listdir(self.input_dir)
+        ]
+        self.num_inputs = len(self.input_files)
+
+    def run(self):
+        for i, input_file in enumerate(self.input_files):
+            try:
+                input_id = os.path.basename(input_file)
+                self.config.set("busco_run", "in", input_file)
+                self.config.set(
+                    "busco_run",
+                    "main_out",
+                    os.path.join(
+                        self.config.get("busco_run", "out_path"),
+                        self.config.get("busco_run", "out"),
+                        input_id,
+                    ),
+                )
+                single_run = SingleRunner(self.config_manager)
+                single_run.run()
+
+                run_summary = single_run.runner.format_run_summary()
+                type(self).batch_results.append(run_summary)
+
+                AnalysisRunner.reset()
+
+            except NoOptionError as noe:
+                raise BatchFatalError(noe)
+
+            except BatchFatalError:
+                raise
+
+            except BuscoError:
+                continue
+
+        try:
+            self.write_batch_summary()
+        except NoOptionError as noe:
+            raise BatchFatalError(noe)
+
+    def write_batch_summary(self):
+        summary_file = os.path.join(
+            self.config.get("busco_run", "out_path"),
+            self.config.get("busco_run", "out"),
+            "batch_summary.txt",
+        )
+        # for inp, res in type(self).batch_results.items():
+        #     if
+        with open(summary_file, "w") as f:
+            if self.config.getboolean("busco_run", "auto-lineage"):
+                f.write(
+                    "Input_file\tDataset\tComplete\tSingle\tDuplicated\tFragmented\tMissing\tn_markers\tScores_archaea_odb10\tScores_bacteria_odb10\tScores_eukaryota_odb10\n"
+                )
+            else:
+                f.write(
+                    "Input_file\tDataset\tComplete\tSingle\tDuplicated\tFragmented\tMissing\tn_markers\n"
+                )
+            for line in type(self).batch_results:
+                f.write(line)
+
+
+class AnalysisRunner:
 
     mode_dict = {
         "euk_genome_met": GenomeAnalysisEukaryotesMetaeuk,
@@ -30,8 +236,10 @@ class BuscoRunner:
         "proteins": GeneSetAnalysis,
     }
 
-    final_results = []
+    final_results = {}
     results_datasets = []
+    all_results = defaultdict(dict)
+    selected_dataset = None
 
     def __init__(self, config):
 
@@ -51,7 +259,7 @@ class BuscoRunner:
                 else:
                     self.mode = "euk_genome_met"
             else:
-                raise SystemExit("Unrecognized mode {}".format(self.mode))
+                raise BatchFatalError("Unrecognized mode {}".format(self.mode))
 
         elif self.mode == "transcriptome":
             if self.domain == "prokaryota":
@@ -59,7 +267,7 @@ class BuscoRunner:
             elif self.domain == "eukaryota":
                 self.mode = "euk_tran"
             else:
-                raise SystemExit("Unrecognized mode {}".format(self.mode))
+                raise BatchFatalError("Unrecognized mode {}".format(self.mode))
         analysis_type = type(self).mode_dict[self.mode]
         self.analysis = analysis_type()
         self.prok_fail_count = (
@@ -67,6 +275,50 @@ class BuscoRunner:
         )
         self.cleaned_up = False
 
+    @classmethod
+    def reset(cls):
+        cls.final_results = {}
+        cls.results_datasets = []
+        cls.all_results = defaultdict(dict)
+
+    def save_results(self):
+        lineage_basename = os.path.basename(
+            self.config.get("busco_run", "lineage_dataset")
+        )
+        if not self.config.getboolean("busco_run", "auto-lineage"):
+            type(self).selected_dataset = lineage_basename
+        self.final_results[
+            lineage_basename
+        ] = self.analysis.hmmer_runner.hmmer_results_lines
+        self.all_results[lineage_basename][
+            "one_line_summary"
+        ] = self.analysis.hmmer_runner.one_line_summary_raw
+        self.all_results[lineage_basename][
+            "Complete"
+        ] = self.analysis.hmmer_runner.complete_percent
+        self.all_results[lineage_basename][
+            "Single copy"
+        ] = self.analysis.hmmer_runner.s_percent
+        self.all_results[lineage_basename][
+            "Multi copy"
+        ] = self.analysis.hmmer_runner.d_percent
+        self.all_results[lineage_basename][
+            "Fragmented"
+        ] = self.analysis.hmmer_runner.f_percent
+        self.all_results[lineage_basename][
+            "Missing"
+        ] = self.analysis.hmmer_runner.missing_percent
+        self.all_results[lineage_basename][
+            "n_markers"
+        ] = self.analysis.hmmer_runner.total_buscos
+        self.all_results[lineage_basename]["domain"] = self.analysis.domain
+        try:
+            parent_dataset = self.config.get("busco_run", "parent_dataset")
+            self.all_results[lineage_basename]["parent_dataset"] = parent_dataset
+        except NoOptionError:
+            pass
+        self.results_datasets.append(os.path.basename(lineage_basename))
+
     def run_analysis(self, callback=(lambda *args: None)):
         try:
             self.analysis.run_analysis()
@@ -83,7 +335,7 @@ class BuscoRunner:
                 "{0} did not recognize any genes matching the dataset {1} in the input file. "
                 "If this is unexpected, check your input file and your "
                 "installation of {0}\n".format(
-                    nge.gene_predictor, self.analysis._lineage_name
+                    nge.gene_predictor, self.analysis.lineage_name
                 )
             )
             fatal = (
@@ -99,17 +351,19 @@ class BuscoRunner:
                 and self.prok_fail_count == 1
             )
             if fatal:
-                raise SystemExit(no_genes_msg)
+                raise BuscoError(no_genes_msg)
             else:
                 logger.warning(no_genes_msg)
                 s_buscos = d_buscos = f_buscos = s_percent = d_percent = f_percent = 0.0
                 if self.mode == "prok_genome":
                     self.prok_fail_count += 1
 
-        except SystemExit as se:
+        except (BuscoError, BatchFatalError):
             self.cleanup()
+            raise
 
-            raise se
+        finally:
+            self.save_results()
         return callback(s_buscos, d_buscos, f_buscos, s_percent, d_percent, f_percent)
 
     def cleanup(self):
@@ -118,23 +372,33 @@ class BuscoRunner:
 
     def format_results(self):
         framed_output = []
-        if len(type(self).results_datasets) == 1:
-            header1 = "Results from dataset {}\n".format(type(self).results_datasets[0])
+        final_dataset = type(self).selected_dataset
+        domain = type(self).all_results[final_dataset]["domain"]
+        try:
+            parent_dataset = os.path.basename(
+                type(self).all_results[final_dataset]["parent_dataset"]
+            )
+        except KeyError:
+            parent_dataset = None
+        if parent_dataset is None or domain == "viruses":
+            header1 = "Results from dataset {}\n".format(final_dataset)
+            final_output_results1 = "".join(
+                self._check_parasitic(type(self).final_results[final_dataset][1:])
+            )
         else:
-            header1 = "Results from generic domain {}\n".format(
-                type(self).results_datasets[0]
+            header1 = "Results from generic domain {}\n".format(parent_dataset)
+            final_output_results1 = "".join(
+                self._check_parasitic(type(self).final_results[parent_dataset][1:])
             )
-        final_output_results1 = "".join(
-            self._check_parasitic(type(self).final_results[0][1:])
-        )
+
         sb1 = SmartBox()
         framed_lines1 = sb1.create_results_box(header1, final_output_results1)
         framed_output.append(framed_lines1)
 
-        if len(type(self).final_results) == 2:
-            header2 = "Results from dataset {}\n".format(type(self).results_datasets[1])
+        if domain != "viruses" and parent_dataset and parent_dataset != final_dataset:
+            header2 = "Results from dataset {}\n".format(final_dataset)
             final_output_results2 = "".join(
-                self._check_parasitic(type(self).final_results[1][1:])
+                self._check_parasitic(type(self).final_results[final_dataset][1:])
             )
             sb2 = SmartBox()
             framed_lines2 = sb2.create_results_box(header2, final_output_results2)
@@ -142,10 +406,60 @@ class BuscoRunner:
 
         return "".join(framed_output)
 
+    def format_run_summary(self):
+        input_file = os.path.basename(self.config.get("busco_run", "in"))
+        dataset = type(self).selected_dataset
+        complete = type(self).all_results[dataset]["Complete"]
+        single = type(self).all_results[dataset]["Single copy"]
+        duplicated = type(self).all_results[dataset]["Multi copy"]
+        fragmented = type(self).all_results[dataset]["Fragmented"]
+        missing = type(self).all_results[dataset]["Missing"]
+        n_markers = type(self).all_results[dataset]["n_markers"]
+
+        if self.config.getboolean("busco_run", "auto-lineage"):
+            try:
+                scores_arch = (
+                    type(self).all_results["archaea_odb10"]["one_line_summary"].strip()
+                )
+            except KeyError:
+                scores_arch = "Not run"
+            try:
+                scores_bact = (
+                    type(self).all_results["bacteria_odb10"]["one_line_summary"].strip()
+                )
+            except KeyError:
+                scores_bact = "Not run"
+            try:
+                scores_euk = (
+                    type(self)
+                    .all_results["eukaryota_odb10"]["one_line_summary"]
+                    .strip()
+                )
+            except KeyError:
+                scores_euk = "Not run"
+        else:
+            scores_arch = ""
+            scores_bact = ""
+            scores_euk = ""
+        summary_line = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
+            input_file,
+            dataset,
+            complete,
+            single,
+            duplicated,
+            fragmented,
+            missing,
+            n_markers,
+            scores_arch,
+            scores_bact,
+            scores_euk,
+        )
+        return summary_line
+
     def _check_parasitic(self, final_output_results):
         try:
             with open(
-                os.path.join(self.analysis._lineage_dataset, "missing_in_parasitic.txt")
+                os.path.join(self.analysis.lineage_dataset, "missing_in_parasitic.txt")
             ) as parasitic_file:
                 missing_in_parasitic_buscos = [
                     entry.strip() for entry in parasitic_file.readlines()
@@ -219,8 +533,8 @@ class BuscoRunner:
             root_domain_output_folder_final = os.path.join(
                 main_out_folder, "run_{}".format(domain_results_folder)
             )
-            os.rename(root_domain_output_folder, root_domain_output_folder_final)
-            os.symlink(root_domain_output_folder_final, root_domain_output_folder)
+            print(root_domain_output_folder, root_domain_output_folder_final)
+            os.symlink(root_domain_output_folder, root_domain_output_folder_final)
             shutil.copyfile(
                 os.path.join(root_domain_output_folder_final, "short_summary.txt"),
                 os.path.join(
@@ -258,7 +572,11 @@ class BuscoRunner:
         # This is deliberately a staticmethod so it can be called from run_BUSCO() even if BuscoRunner has not yet
         # been initialized.
         try:
-            log_folder = os.path.join(config.get("busco_run", "main_out"), "logs")
+            log_folder = os.path.join(
+                config.get("busco_run", "out_path"),
+                config.get("busco_run", "out"),
+                "logs",
+            )
             if not os.path.exists(log_folder):
                 os.makedirs(log_folder)
             shutil.move(
@@ -300,8 +618,6 @@ class BuscoRunner:
             "https://busco.ezlab.org/busco_userguide.html\n"
         )
 
-        self.move_log_file(self.config)
-
 
 class SmartBox:
     def __init__(self):
@@ -367,7 +683,7 @@ class SmartBox:
 
     def create_results_box(self, header_text, body_text):
         header = self.wrap_header(header_text)  # Called first to define width
-        box_lines = ["\n"]
+        box_lines = list(["\n"])
         box_lines.append("\t{}".format(self.add_horizontal()))
         framed_header = self.add_vertical(header)
         for line in framed_header:


=====================================
src/busco/ConfigManager.py
=====================================
@@ -11,8 +11,7 @@ Licensed under the MIT license. See LICENSE.md file.
 
 """
 
-from busco.AutoLineage import AutoSelectLineage
-from busco.BuscoConfig import BuscoConfigMain
+from busco.BuscoConfig import BuscoConfigMain, BuscoConfigAuto
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
 import os
@@ -24,7 +23,7 @@ class BuscoConfigManager:
     def __init__(self, params):
         self.params = params
         self.config_file = None
-        self.config = None
+        self.config_main = None
         self.get_config_file()
         self.runner = None
 
@@ -51,58 +50,12 @@ class BuscoConfigManager:
         return self.config_file
 
     @log("Configuring BUSCO with {}", logger, attr_name="config_file")
-    def load_busco_config(self, *args):
-        self.config = BuscoConfigMain(self.config_file, self.params)
-        self.config.configure()
-        self.config.validate()
-        if not self.config.check_lineage_present():
-            if not self.config.getboolean(
-                "busco_run", "auto-lineage"
-            ) and not self.config.getboolean(
-                "busco_run", "auto-lineage-prok"
-            ):  # and not self.config.getboolean("busco_run", "auto-lineage-euk"):
-                logger.warning(
-                    "Running Auto Lineage Selector as no lineage dataset was specified. This will take a "
-                    "little longer than normal. If you know what lineage dataset you want to use, please "
-                    "specify this in the config file or using the -l (--lineage-dataset) flag in the "
-                    "command line."
-                )
-            self.config.set("busco_run", "auto-lineage", "True")
-            lineage_dataset_fullpath = self.auto_select_lineage()  # full path
-            self.config.set("busco_run", "lineage_dataset", lineage_dataset_fullpath)
-            lineage_dataset = os.path.basename(lineage_dataset_fullpath)  # base name
-        else:
-            if self.config.getboolean(
-                "busco_run", "auto-lineage"
-            ) or self.config.getboolean(
-                "busco_run", "auto-lineage-prok"
-            ):  # or self.config.getboolean("busco_run", "auto-lineage-euk"):
-                logger.warning(
-                    "You have selected auto-lineage but you have also provided a lineage dataset. "
-                    "BUSCO will proceed with the specified dataset. "
-                    "To run auto-lineage do not specify a dataset."
-                )
-            self.config.set("busco_run", "auto-lineage", "False")
-            self.config.set("busco_run", "auto-lineage-prok", "False")
-            self.config.set("busco_run", "auto-lineage-euk", "False")
-            lineage_dataset = self.config.get(
-                "busco_run", "lineage_dataset"
-            )  # full path
-
-        self.config.set_results_dirname(
-            lineage_dataset
-        )  # function always only uses basename
-        self.config.download_lineage_file(
-            lineage_dataset
-        )  # full path will return, base name will attempt download
-        self.config.load_dataset_config()
+    def load_busco_config_main(self, *args):
+        self.config_main = BuscoConfigMain(self.config_file, self.params)
+        self.config_main.configure()
+        self.config_main.validate()
         return
 
-    @log("No lineage specified. Running lineage auto selector.\n", logger)
-    def auto_select_lineage(self):
-        asl = AutoSelectLineage(self.config)
-        asl.run_auto_selector()
-        asl.get_lineage_dataset()
-        lineage_dataset = asl.best_match_lineage_dataset
-        self.runner = asl.selected_runner
-        return lineage_dataset
+    def load_busco_config_auto(self, lineage):
+        autoconfig = BuscoConfigAuto(self.config_main, lineage)
+        return autoconfig


=====================================
src/busco/Exceptions.py
=====================================
@@ -0,0 +1,22 @@
+class BatchFatalError(Exception):
+    """
+    Error that prevents batch mode from running.
+    """
+
+    def __init__(self, value):
+        self.value = value
+
+    def __str__(self):
+        return self.value
+
+
+class BuscoError(Exception):
+    """
+    Module-specific exception
+    """
+
+    def __init__(self, value):
+        self.value = value
+
+    def __str__(self):
+        return self.value


=====================================
src/busco/_version.py
=====================================
@@ -6,4 +6,4 @@ Copyright (c) 2016-2021, Evgeny Zdobnov (ez at ezlab.org)
 Licensed under the MIT license. See LICENSE.md file.
 
 """
-__version__ = "5.0.0"
+__version__ = "5.1.2"


=====================================
src/busco/analysis/Analysis.py
=====================================
@@ -3,6 +3,7 @@ from busco.BuscoLogger import BuscoLogger
 from abc import ABCMeta
 import os
 from busco.busco_tools.blast import TBLASTNRunner, MKBLASTRunner
+from busco.Exceptions import BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -18,7 +19,7 @@ class NucleotideAnalysis(metaclass=ABCMeta):
 
         super().__init__()  # Initialize BuscoAnalysis
         if not self.check_nucleotide_file(self.input_file):
-            raise SystemExit("The input file does not contain nucleotide sequences.")
+            raise BuscoError("The input file does not contain nucleotide sequences.")
 
     def check_nucleotide_file(self, filename):
         i = 0
@@ -77,7 +78,7 @@ class ProteinAnalysis:
     def __init__(self):
         super().__init__()
         if not self.check_protein_file(self.input_file):
-            raise SystemExit("Please provide a protein file as input")
+            raise BuscoError("Please provide a protein file as input")
 
     def check_protein_file(self, filename):
 
@@ -125,7 +126,7 @@ class BLASTAnalysis(metaclass=ABCMeta):
             self.config.set("busco_run", "restart", str(self.restart))
             self.mkblast_runner.run()
         if len(os.listdir(os.path.split(self.mkblast_runner.output_db)[0])) == 0:
-            raise SystemExit(
+            raise BuscoError(
                 "makeblastdb failed to create a BLAST DB at {}".format(
                     self.mkblast_runner.output_db
                 )


=====================================
src/busco/analysis/BuscoAnalysis.py
=====================================
@@ -16,6 +16,7 @@ from busco.busco_tools.hmmer import HMMERRunner
 import os
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BatchFatalError, BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -50,11 +51,14 @@ class BuscoAnalysis(metaclass=ABCMeta):
 
         # Get other useful variables
         self.input_file = self.config.get("busco_run", "in")
-        self._lineage_dataset = self.config.get("busco_run", "lineage_dataset")
-        self._lineage_name = os.path.basename(self._lineage_dataset)
-        self._domain = self.config.get("busco_run", "domain")
+        self.lineage_dataset = self.config.get("busco_run", "lineage_dataset")
+        self.lineage_name = os.path.basename(self.lineage_dataset)
+        self.domain = self.config.get("busco_run", "domain")
         self._has_variants_file = os.path.exists(
-            os.path.join(self._lineage_dataset, "ancestral_variants")
+            os.path.join(self.lineage_dataset, "ancestral_variants")
+        )
+        self.has_dataset_config_file = os.path.exists(
+            os.path.join(self.lineage_dataset, "dataset.cfg")
         )
         self._dataset_creation_date = self.config.get("busco_run", "creation_date")
         self.restart = self.config.getboolean("busco_run", "restart")
@@ -89,7 +93,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
     @log(
         "Running BUSCO using lineage dataset {0} ({1}, {2})",
         logger,
-        attr_name=["_lineage_name", "_domain", "_dataset_creation_date"],
+        attr_name=["lineage_name", "domain", "_dataset_creation_date"],
         on_func_exit=True,
     )
     def run_analysis(self):
@@ -106,7 +110,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
         This function runs hmmsearch.
         """
         if not busco_ids:
-            files = sorted(os.listdir(os.path.join(self._lineage_dataset, "hmms")))
+            files = sorted(os.listdir(os.path.join(self.lineage_dataset, "hmms")))
             busco_ids = [
                 os.path.splitext(f)[0] for f in files
             ]  # Each Busco ID has a HMM file of the form "<busco_id>.hmm"
@@ -116,7 +120,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
         if self.restart and self.hmmer_runner.check_previous_completed_run():
             logger.info("Skipping HMMER run as output already processed")
         elif len(os.listdir(self.hmmer_runner.results_dir)) > 0:
-            raise SystemExit(
+            raise BuscoError(
                 "HMMER results directory not empty. If you are running in restart mode, make sure you are "
                 "using the same eukaryotic gene predictor (metaeuk/augustus) as before."
             )
@@ -146,27 +150,27 @@ class BuscoAnalysis(metaclass=ABCMeta):
         Note: dataset.cfg file is not mandatory for offline mode
         # todo: implement a check for dataset.cfg file if not using offline mode
 
-        :raises SystemExit: if the dataset is missing files or folders
+        :raises BuscoError: if the dataset is missing files or folders
         """
 
         # Check hmm files exist
-        files = os.listdir(os.path.join(self._lineage_dataset, "hmms"))
+        files = os.listdir(os.path.join(self.lineage_dataset, "hmms"))
         if not files:
-            raise SystemExit(
+            raise BuscoError(
                 "The dataset you provided lacks hmm profiles in {}".format(
-                    os.path.join(self._lineage_dataset, "hmms")
+                    os.path.join(self.lineage_dataset, "hmms")
                 )
             )
 
-        if self._domain == "eukaryota":
+        if self.domain == "eukaryota":
             # Check prfl folder exists and contains profiles
             for dirpath, dirnames, files in os.walk(
-                os.path.join(self._lineage_dataset, "prfl")
+                os.path.join(self.lineage_dataset, "prfl")
             ):
                 if not files:
-                    raise SystemExit(
+                    raise BuscoError(
                         "The dataset you provided lacks elements in {}".format(
-                            os.path.join(self._lineage_dataset, "prfl")
+                            os.path.join(self.lineage_dataset, "prfl")
                         )
                     )
 
@@ -175,13 +179,17 @@ class BuscoAnalysis(metaclass=ABCMeta):
                 "The dataset you provided does not contain the file ancestral_variants, likely because it "
                 'is an old version. All blast steps will use the file "ancestral" instead'
             )
+        if not self.has_dataset_config_file:
+            raise BuscoError(
+                "The dataset you provided is missing the dataset.cfg file and is therefore corrupted."
+            )
 
         return
 
     def _check_data_integrity(self):
         self._check_dataset_integrity()
         if not os.stat(self.input_file).st_size > 0:
-            raise SystemExit("Input file is empty.")
+            raise BuscoError("Input file is empty.")
         with open(self.input_file) as f:
             for line in f:
                 if line.startswith(">"):
@@ -192,7 +200,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
     def _check_seq_uniqueness(self, line):
         seq_id = line.split(" ")[0]
         if seq_id in self.headers:
-            raise SystemExit("Duplicate of sequence {} in input file".format(seq_id))
+            raise BuscoError("Duplicate of sequence {} in input file".format(seq_id))
         self.headers.add(seq_id)
         return
 
@@ -203,11 +211,11 @@ class BuscoAnalysis(metaclass=ABCMeta):
         and warns the user and stops the execution
         :param header: a fasta header to check
         :type header: str
-        :raises SystemExit: if a problematic character is found
+        :raises BuscoError: if a problematic character is found
         """
         for char in BuscoConfig.FORBIDDEN_HEADER_CHARS:
             if char in header:
-                raise SystemExit(
+                raise BuscoError(
                     'The character "%s" is present in the fasta header %s, '
                     "which will crash BUSCO. Please clean the header of your "
                     "input file." % (char, header.strip())
@@ -215,14 +223,14 @@ class BuscoAnalysis(metaclass=ABCMeta):
 
         for char in BuscoConfig.FORBIDDEN_HEADER_CHARS_BEFORE_SPLIT:
             if char in header.split()[0]:
-                raise SystemExit(
+                raise BuscoError(
                     'The character "%s" is present in the fasta header %s, '
                     "which will crash Reader. Please clean the header of your"
                     " input file." % (char, header.split()[0].strip())
                 )
 
         if header.split()[0] == ">":
-            raise SystemExit(
+            raise BuscoError(
                 "A space is present in the fasta header %s, directly after "
                 '">" which will crash Reader. Please clean the header of '
                 "your input file." % (header.strip())
@@ -249,18 +257,18 @@ class BuscoAnalysis(metaclass=ABCMeta):
     def _create_main_dir(self):
         """
         This function creates the run (main) directory
-        :raises SystemExit: if write permissions are not available to the specified location
+        :raises BatchFatalError: if write permissions are not available to the specified location
         """
         try:
             os.makedirs(self.run_folder)
         except FileExistsError:
             if not self.restart:
-                raise SystemExit(
+                raise BatchFatalError(
                     "Something went wrong. BUSCO stopped before overwriting run folder "
                     "{}".format(self.run_folder)
                 )
         except PermissionError:
-            raise SystemExit(
+            raise BatchFatalError(
                 "Cannot write to the output directory, please make sure "
                 "you have write permissions to {}".format(self.run_folder)
             )


=====================================
src/busco/analysis/GenomeAnalysis.py
=====================================
@@ -31,6 +31,7 @@ import os
 import pandas as pd
 from collections import defaultdict
 import subprocess
+from busco.Exceptions import BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -187,9 +188,12 @@ class GenomeAnalysisEukaryotesAugustus(BLASTAnalysis, GenomeAnalysisEukaryotes):
         super().__init__()
         self._long = self.config.getboolean("busco_run", "long")
         try:
-            self._target_species = self.config.get("busco_run", "augustus_species")
+            self._target_species_initial = self.config.get(
+                "busco_run", "augustus_species"
+            )
+            self._target_species = self._target_species_initial
         except KeyError:
-            raise SystemExit(
+            raise BuscoError(
                 "Something went wrong. Eukaryota datasets should specify an augustus species."
             )
         try:
@@ -220,6 +224,9 @@ class GenomeAnalysisEukaryotesAugustus(BLASTAnalysis, GenomeAnalysisEukaryotes):
         try:
             if self._target_species.startswith("BUSCO"):
                 self.augustus_runner.move_retraining_parameters()
+                self.config.set(
+                    "busco_run", "augustus_species", self._target_species_initial
+                )  # Reset parameter for batch mode
         except OSError:
             pass
         super().cleanup()
@@ -398,7 +405,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
                 if i == 1:
                     logger.info("Metaeuk rerun did not find any genes")
                 else:
-                    raise SystemExit(
+                    raise BuscoError(
                         "Metaeuk did not find any genes in the input file."
                     )
 
@@ -406,7 +413,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
             self.metaeuk_runner.combine_run_results()
         except FileNotFoundError:
             # This exception should only happen if the rerun file does not exist. If the initial run file was
-            # missing there would have been a SystemExit call above. The index 0 sets the "combined" file to the
+            # missing there would have been a BatchFatalError call above. The index 0 sets the "combined" file to the
             # output of the initial run.
             self.metaeuk_runner.combined_pred_protein_seqs = (
                 self.metaeuk_runner.pred_protein_mod_files[0]
@@ -744,7 +751,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
             exon_matched = False
             exon_size_nt = int(entry["Stop"]) - int(entry["Start"]) + 1
             if not exon_size_nt % 3 == 0:
-                raise SystemExit(
+                raise BuscoError(
                     "The exon coordinates contain fractional reading frames and are ambiguous."
                 )
             exon_size_aa = exon_size_nt / 3


=====================================
src/busco/busco_tools/Toolset.py
=====================================
@@ -20,6 +20,7 @@ from abc import ABCMeta, abstractmethod
 from busco.BuscoLogger import BuscoLogger, ToolLogger
 from busco.BuscoLogger import LogDecorator as log
 from busco.BuscoLogger import StreamLogger
+from busco.Exceptions import BatchFatalError
 import logging
 
 logger = BuscoLogger.get_logger(__name__)
@@ -191,10 +192,10 @@ class Tool(metaclass=ABCMeta):
             self.log_no_jobs()
             return
 
-        if (
-            self.cpus is None
-        ):  # todo: need a different way to ensure self.cpus is nonzero number.
-            raise SystemExit("Number of CPUs not specified.")
+        if self.cpus is None:
+            raise BatchFatalError("Number of CPUs not specified.")
+        elif self.cpus == 0:
+            raise BatchFatalError("Number of CPUs must be greater than 0.")
 
         with Pool(
             self.cpus, initializer=type(self).init_globals, initargs=(Value("i", 0),)


=====================================
src/busco/busco_tools/augustus.py
=====================================
@@ -11,6 +11,7 @@ import shutil
 import numpy as np
 from configparser import NoOptionError
 import subprocess
+from busco.Exceptions import BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -61,8 +62,16 @@ class AugustusRunner(BaseRunner):
 
     def __init__(self):
         self.gene_details = None
-        self._augustus_config_path = os.environ.get("AUGUSTUS_CONFIG_PATH")
-        self.config.set("busco_run", "augustus_config_path", self._augustus_config_path)
+        try:
+            self._augustus_config_path = os.environ.get("AUGUSTUS_CONFIG_PATH")
+            self.config.set(
+                "busco_run", "augustus_config_path", self._augustus_config_path
+            )
+        except TypeError:
+            raise BuscoError(
+                "AUGUSTUS_CONFIG_PATH environment variable has not been set"
+            )
+
         self._target_species = self.config.get("busco_run", "augustus_species")
         super().__init__()
         self._output_folder = os.path.join(self.run_folder, "augustus_output")
@@ -129,22 +138,22 @@ class AugustusRunner(BaseRunner):
         """
         check dependencies on files and folders
         properly configured.
-        :raises SystemExit: if Augustus config path is not writable or
+        :raises BuscoError: if Augustus config path is not writable or
         not set at all
-        :raises SystemExit: if Augustus config path does not contain
+        :raises BuscoError: if Augustus config path does not contain
         the needed species
         present
         """
         try:
             augustus_species_dir = os.path.join(self._augustus_config_path, "species")
             if not os.access(augustus_species_dir, os.W_OK):
-                raise SystemExit(
+                raise BuscoError(
                     "Cannot write to Augustus species folder, please make sure you have write "
                     "permissions to {}".format(augustus_species_dir)
                 )
 
         except TypeError:
-            raise SystemExit("The environment variable AUGUSTUS_CONFIG_PATH is not set")
+            raise BuscoError("The environment variable AUGUSTUS_CONFIG_PATH is not set")
 
         if not os.path.exists(os.path.join(augustus_species_dir, self._target_species)):
             # Exclude the case where this is a restarted run and the retraining parameters have already been moved.
@@ -161,7 +170,7 @@ class AugustusRunner(BaseRunner):
             ):
                 pass
             else:
-                raise SystemExit(
+                raise BuscoError(
                     'Impossible to locate the species "{0}" in Augustus species folder'
                     " ({1}), check that AUGUSTUS_CONFIG_PATH is properly set"
                     " and contains this species. \n\t\tSee the help if you want "
@@ -547,7 +556,7 @@ class AugustusRunner(BaseRunner):
                 if gene_found:
                     break
             if not gene_found and self.run_number == 1:
-                raise SystemExit(
+                raise BuscoError(
                     "Unable to find single copy BUSCO gene in Augustus output."
                 )
 
@@ -745,7 +754,7 @@ class ETrainingRunner(BaseRunner):
         ):
             return
         else:
-            raise SystemExit(
+            raise BuscoError(
                 "Retraining did not complete correctly. Check your Augustus config path environment variable."
             )
 


=====================================
src/busco/busco_tools/base.py
=====================================
@@ -4,6 +4,7 @@ from busco.busco_tools.Toolset import Tool
 from shutil import which
 from abc import ABCMeta, abstractmethod
 from busco.BuscoConfig import BuscoConfigAuto
+from busco.Exceptions import BuscoError
 import time
 
 logger = BuscoLogger.get_logger(__name__)
@@ -24,6 +25,7 @@ class ToolException(Exception):
 class BaseRunner(Tool, metaclass=ABCMeta):
 
     config = None
+    tool_versions = {}
 
     def __init__(self):
         super().__init__()
@@ -49,6 +51,7 @@ class BaseRunner(Tool, metaclass=ABCMeta):
                 "path!".format(self.name)
             )
         self.version = self.get_version()
+        type(self).tool_versions[self.name] = self.version
         self.check_tool_dependencies()
 
         self.checkpoint_file = None
@@ -102,7 +105,7 @@ class BaseRunner(Tool, metaclass=ABCMeta):
                         elif int(tool_run_numbers[tool_ind]) < int(self.run_number):
                             start_search = tool_ind + 1
                         else:
-                            raise SystemExit(
+                            raise BuscoError(
                                 "Something went wrong. Information for {} run {} missing but "
                                 "information for run {} found.".format(
                                     self.name,


=====================================
src/busco/busco_tools/blast.py
=====================================
@@ -3,6 +3,7 @@ import os
 from collections import defaultdict
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BuscoError
 from Bio import SeqIO
 import subprocess
 
@@ -149,7 +150,7 @@ class TBLASTNRunner(BaseRunner):
             # Ensure value is between 5000 and MAX_FLANK
             flank = min(max(flank, 5000), type(self).MAX_FLANK)
         except IOError:  # Input data is only validated during run_analysis. This will catch any IO issues before that.
-            raise SystemExit(
+            raise BuscoError(
                 "Impossible to read the fasta file {}".format(self.input_file)
             )
 
@@ -192,13 +193,13 @@ class TBLASTNRunner(BaseRunner):
     def _check_output(self):
         # check that blast worked
         if not os.path.exists(self.blast_filename):
-            raise SystemExit("tblastn failed!")
+            raise BuscoError("tblastn failed!")
 
         # check that the file is not truncated
         with open(self.blast_filename, "r") as f:
             try:
                 if "processed" not in f.readlines()[-1]:
-                    raise SystemExit(
+                    raise BuscoError(
                         "tblastn has ended prematurely (the result file lacks the expected final line), "
                         "which will produce incomplete results in the next steps ! This problem likely "
                         "appeared in blast+ 2.4 and seems not fully fixed in 2.6. It happens only when "


=====================================
src/busco/busco_tools/hmmer.py
=====================================
@@ -9,6 +9,7 @@ from Bio import SeqIO
 import csv
 import subprocess
 from busco.BuscoConfig import BuscoConfigAuto
+from busco.Exceptions import BatchFatalError, BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -44,6 +45,7 @@ class HMMERRunner(BaseRunner):
         self.extra_columns = False
         self.log_count = 0  # Dummy variable used to skip logging for intermediate eukaryote pipeline results.
         self.one_line_summary = None
+        self.one_line_summary_raw = None
 
         # to be initialized before run time
         self.input_sequences = None
@@ -85,6 +87,8 @@ class HMMERRunner(BaseRunner):
         self.s_percent = 0
         self.d_percent = 0
         self.f_percent = 0
+        self.complete_percent = 0
+        self.missing_percent = 0
 
         self.hmmer_results_lines = None
 
@@ -199,7 +203,6 @@ class HMMERRunner(BaseRunner):
     def get_version(self):
         """
         check the Tool has the correct version
-        :raises SystemExit: if the version is not correct
         """
         hmmer_version = subprocess.check_output(
             [self.cmd, "-h"], stderr=subprocess.STDOUT, shell=False
@@ -218,11 +221,11 @@ class HMMERRunner(BaseRunner):
     def check_tool_dependencies(self):
         """
         check dependencies on tools
-        :raises SystemExit: if a Tool version is not supported
+        :raises BatchFatalError: if a Tool version is not supported
         """
         # check hmm version
         if not self.version >= BuscoConfig.HMMER_VERSION:
-            raise SystemExit(
+            raise BatchFatalError(
                 "HMMer version detected is not supported, please use HMMer v.{} +".format(
                     BuscoConfig.HMMER_VERSION
                 )
@@ -313,7 +316,7 @@ class HMMERRunner(BaseRunner):
                         records[gene_id]["env_coords"].append((env_start, env_end))
 
                     except IndexError as e:
-                        raise SystemExit(
+                        raise BuscoError(
                             e, "Cannot parse HMMER output file {}".format(filename)
                         )
         return records
@@ -444,7 +447,7 @@ class HMMERRunner(BaseRunner):
             )
             matched_genes = self.matched_genes_fragment
         else:
-            raise SystemExit("Unrecognized dictionary of BUSCOs.")
+            raise BuscoError("Unrecognized dictionary of BUSCOs.")
 
         for busco_id in list(busco_dict.keys()):
             matches = busco_dict[busco_id]
@@ -507,7 +510,7 @@ class HMMERRunner(BaseRunner):
         elif busco_dict == self.is_fragment:
             matched_genes = self.matched_genes_fragment
         else:
-            raise SystemExit("Unrecognized dictionary of BUSCOs.")
+            raise BuscoError("Unrecognized dictionary of BUSCOs.")
 
         busco_matches_to_remove = []
         # Keep the best scoring gene if gene is matched by more than one busco with the same match rank
@@ -778,7 +781,7 @@ class HMMERRunner(BaseRunner):
     def _load_length(self):
         """
         This function loads the length cutoffs file
-        :raises SystemExit: if the lengths_cutoff file cannot be read
+        :raises BuscoError: if the lengths_cutoff file cannot be read
         """
         lengths_cutoff_file = os.path.join(self.lineage_dataset, "lengths_cutoff")
         try:
@@ -797,9 +800,9 @@ class HMMERRunner(BaseRunner):
                             self.cutoff_dict[taxid]["sigma"] = 1
                         self.cutoff_dict[taxid]["length"] = length
                     except IndexError as e:
-                        raise SystemExit(e, "Error parsing the lengths_cutoff file.")
+                        raise BuscoError(e, "Error parsing the lengths_cutoff file.")
         except IOError:
-            raise SystemExit(
+            raise BuscoError(
                 "Impossible to read the lengths in {}".format(
                     os.path.join(lengths_cutoff_file)
                 )
@@ -809,7 +812,7 @@ class HMMERRunner(BaseRunner):
     def _load_score(self):
         """
         This function loads the score cutoffs file
-        :raises SystemExit: if the scores_cutoff file cannot be read
+        :raises BuscoError: if the scores_cutoff file cannot be read
         """
         scores_cutoff_file = os.path.join(self.lineage_dataset, "scores_cutoff")
         try:
@@ -822,9 +825,9 @@ class HMMERRunner(BaseRunner):
                         score = float(line[1])
                         self.cutoff_dict[taxid]["score"] = score
                     except IndexError as e:
-                        raise SystemExit(e, "Error parsing the scores_cutoff file.")
+                        raise BuscoError(e, "Error parsing the scores_cutoff file.")
         except IOError:
-            raise SystemExit(
+            raise BuscoError(
                 "Impossible to read the scores in {}".format(scores_cutoff_file)
             )
         return
@@ -900,52 +903,58 @@ class HMMERRunner(BaseRunner):
         return sorted_lines
 
     def produce_hmmer_summary(self):
-        (
-            single_copy,
-            multi_copy,
-            only_fragments,
-            total_buscos,
-        ) = self._get_busco_percentages()
+        self._get_busco_percentages()
 
         self.hmmer_results_lines.append("***** Results: *****\n\n")
-        self.one_line_summary = "C:{}%[S:{}%,D:{}%],F:{}%,M:{}%,n:{}\t{}\n".format(
-            round(self.s_percent + self.d_percent, 1),
+        self.one_line_summary_raw = "C:{}%[S:{}%,D:{}%],F:{}%,M:{}%,n:{}\t{}\n".format(
+            self.complete_percent,
             self.s_percent,
             self.d_percent,
             self.f_percent,
-            abs(round(100 - self.s_percent - self.d_percent - self.f_percent, 1)),
-            total_buscos,
+            self.missing_percent,
+            self.total_buscos,
             "   ",
         )
-        self.hmmer_results_lines.append(self.one_line_summary)
+        self.one_line_summary = "Results:\t{}".format(self.one_line_summary_raw)
+        self.hmmer_results_lines.append(self.one_line_summary_raw)
         self.hmmer_results_lines.append(
-            "{}\tComplete BUSCOs (C)\t\t\t{}\n".format(single_copy + multi_copy, "   ")
+            "{}\tComplete BUSCOs (C)\t\t\t{}\n".format(
+                self.single_copy + self.multi_copy, "   "
+            )
         )
         self.hmmer_results_lines.append(
-            "{}\tComplete and single-copy BUSCOs (S)\t{}\n".format(single_copy, "   ")
+            "{}\tComplete and single-copy BUSCOs (S)\t{}\n".format(
+                self.single_copy, "   "
+            )
         )
         self.hmmer_results_lines.append(
-            "{}\tComplete and duplicated BUSCOs (D)\t{}\n".format(multi_copy, "   ")
+            "{}\tComplete and duplicated BUSCOs (D)\t{}\n".format(
+                self.multi_copy, "   "
+            )
         )
         self.hmmer_results_lines.append(
-            "{}\tFragmented BUSCOs (F)\t\t\t{}\n".format(only_fragments, "   ")
+            "{}\tFragmented BUSCOs (F)\t\t\t{}\n".format(self.only_fragments, "   ")
         )
         self.hmmer_results_lines.append(
             "{}\tMissing BUSCOs (M)\t\t\t{}\n".format(
-                total_buscos - single_copy - multi_copy - only_fragments, "   "
+                self.total_buscos
+                - self.single_copy
+                - self.multi_copy
+                - self.only_fragments,
+                "   ",
             )
         )
         self.hmmer_results_lines.append(
-            "{}\tTotal BUSCO groups searched\t\t{}\n".format(total_buscos, "   ")
+            "{}\tTotal BUSCO groups searched\t\t{}\n".format(self.total_buscos, "   ")
         )
 
         if isinstance(self.config, BuscoConfigAuto):
-            self._one_line_hmmer_summary()
+            self._log_one_line_hmmer_summary()
         elif self.domain == "eukaryota" and self.log_count == 0:
             self.log_count += 1
             self._produce_full_hmmer_summary_debug()
         else:
-            self._one_line_hmmer_summary()
+            self._log_one_line_hmmer_summary()
 
         with open(self.short_summary_file, "w") as summary_file:
 
@@ -967,6 +976,10 @@ class HMMERRunner(BaseRunner):
             for line in self.hmmer_results_lines:
                 summary_file.write("\t{}".format(line))
 
+            tool_versions_lines = self.report_tool_versions()
+            for line in tool_versions_lines:
+                summary_file.write(line + "\n")
+
             if (
                 self.config.getboolean("busco_run", "auto-lineage")
                 and isinstance(self.config, BuscoConfigMain)
@@ -974,10 +987,17 @@ class HMMERRunner(BaseRunner):
             ):
                 summary_file.write("\nPlacement file versions:\n")
                 for placement_file in self.config.placement_files:
-                    summary_file.write("{}\n".format(placement_file))
+                    summary_file.write("\t{}\n".format(placement_file))
 
         return
 
+    def report_tool_versions(self):
+        lines = []
+        lines.append("\nDependencies and versions:")
+        for key, value in type(self).tool_versions.items():
+            lines.append("\t{}: {}".format(key, value))
+        return lines
+
     @log("{}", logger, attr_name="hmmer_results_lines", apply="join", on_func_exit=True)
     def _produce_full_hmmer_summary(self):
         return
@@ -993,9 +1013,8 @@ class HMMERRunner(BaseRunner):
     def _produce_full_hmmer_summary_debug(self):
         return
 
-    @log("{}", logger, attr_name="one_line_summary", on_func_exit=True)
-    def _one_line_hmmer_summary(self):
-        self.one_line_summary = "Results:\t{}".format(self.one_line_summary)
+    @log("{}", logger, attr_name="one_line_summary")
+    def _log_one_line_hmmer_summary(self):
         return
 
     def _write_output_header(
@@ -1009,7 +1028,7 @@ class HMMERRunner(BaseRunner):
         """
         file_object.write(
             "# BUSCO version is: {} \n"
-            "# The lineage dataset is: {} (Creation date: {}, number of species: {}, number of BUSCOs: {}"
+            "# The lineage dataset is: {} (Creation date: {}, number of genomes: {}, number of BUSCOs: {}"
             ")\n".format(
                 busco.__version__,
                 os.path.basename(self.lineage_dataset),
@@ -1055,5 +1074,9 @@ class HMMERRunner(BaseRunner):
         self.s_percent = abs(round((self.single_copy / self.total_buscos) * 100, 1))
         self.d_percent = abs(round((self.multi_copy / self.total_buscos) * 100, 1))
         self.f_percent = abs(round((self.only_fragments / self.total_buscos) * 100, 1))
+        self.complete_percent = round(self.s_percent + self.d_percent, 1)
+        self.missing_percent = abs(
+            round(100 - self.s_percent - self.d_percent - self.f_percent, 1)
+        )
 
-        return self.single_copy, self.multi_copy, self.only_fragments, self.total_buscos
+        return


=====================================
src/busco/busco_tools/metaeuk.py
=====================================
@@ -10,6 +10,7 @@ import gzip
 import pandas as pd
 import numpy as np
 import re
+from busco.Exceptions import BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -143,9 +144,14 @@ class MetaeukRunner(BaseRunner):
         self.ancestral_variants_file = os.path.join(
             self.lineage_dataset, "ancestral_variants"
         )
-
-        self.max_intron = self.config.get("busco_run", "max_intron")
-        self.max_seq_len = self.config.get("busco_run", "max_seq_len")
+        try:
+            self.max_intron = self.config.get("busco_run", "max_intron")
+            self.max_seq_len = self.config.get("busco_run", "max_seq_len")
+        except NoOptionError:
+            raise BuscoError(
+                "{} is an old dataset version and is not compatible with Metaeuk. Please update by using "
+                "the '--update-data' command line option".format(self.lineage_dataset)
+            )
         self.overlap = 1
         self.s_set = False
 
@@ -187,8 +193,6 @@ class MetaeukRunner(BaseRunner):
             self.refseq_db_rerun = os.path.join(
                 self._output_folder, "refseq_db_rerun.faa"
             )
-            self._extract_incomplete_buscos_ancestral()
-            self.refseq_db = self.refseq_db_rerun
             self.min_exon_aa = 5
             self.max_overlap = 5
             self.min_intron = 1
@@ -219,8 +223,8 @@ class MetaeukRunner(BaseRunner):
         self.codon_file = "{}.codon.fas".format(self._output_basename)
         self.pred_protein_seqs = "{}.fas".format(self._output_basename)
         self.pred_protein_files.append(self.pred_protein_seqs)
-        self.pred_protein_seqs_modified = self.pred_protein_seqs.replace(
-            ".fas", ".modified.fas"
+        self.pred_protein_seqs_modified = ".modified.fas".join(
+            self.pred_protein_seqs.rsplit(".fas", 1)
         )
         self.pred_protein_mod_files.append(self.pred_protein_seqs_modified)
 
@@ -376,6 +380,9 @@ class MetaeukRunner(BaseRunner):
 
     def run(self):
         super().run()
+        if self.run_number > 1:
+            self._extract_incomplete_buscos_ancestral()
+            self.refseq_db = self.refseq_db_rerun
         if self.extra_params:
             logger.info(
                 "Additional parameters for Metaeuk are {}: ".format(self.extra_params)
@@ -637,6 +644,6 @@ class MetaeukRunner(BaseRunner):
                     )
 
             except KeyError:
-                raise SystemExit("*headersMap.tsv file could not be parsed.")
+                raise BuscoError("*headersMap.tsv file could not be parsed.")
         except FileNotFoundError:
             raise NoRerunFile


=====================================
src/busco/busco_tools/prodigal.py
=====================================
@@ -9,6 +9,7 @@ import shutil
 import numpy as np
 from configparser import NoOptionError
 import subprocess
+from busco.Exceptions import BuscoError
 
 logger = BuscoLogger.get_logger(__name__)
 
@@ -45,7 +46,7 @@ class ProdigalRunner(BaseRunner):
                 else 0
             )
         except NoOptionError:
-            raise SystemExit(
+            raise BuscoError(
                 "Dataset config file does not contain required information. Please upgrade datasets."
             )
 


=====================================
src/busco/run_BUSCO.py
=====================================
@@ -4,7 +4,7 @@
 .. module:: run_BUSCO
    :synopsis:
 .. versionadded:: 3.0.0
-.. versionchanged:: 5.0.0
+.. versionchanged:: 5.1.0
 
 BUSCO - Benchmarking Universal Single-Copy Orthologs.
 This is the BUSCO main script.
@@ -18,25 +18,108 @@ Licensed under the MIT license. See LICENSE.md file.
 
 """
 
-import time
-import traceback
-import sys
+
 import argparse
-import os
-import shutil
 from argparse import RawTextHelpFormatter
 import busco
+from busco.BuscoRunner import AnalysisRunner, BatchRunner, SingleRunner
+from busco.Exceptions import BatchFatalError, BuscoError
 from busco.BuscoLogger import BuscoLogger
 from busco.BuscoLogger import LogDecorator as log
 from busco.ConfigManager import BuscoConfigManager
-from busco.busco_tools.Toolset import ToolException
-from busco.BuscoRunner import BuscoRunner
 from busco.Actions import ListLineagesAction, CleanHelpAction, CleanVersionAction
 from busco.ConfigManager import BuscoConfigMain
 
+# from busco.busco_tools.Toolset import ToolException
+import sys
+import time
+
+
 logger = BuscoLogger.get_logger(__name__)
 
 
+ at log(
+    "***** Start a BUSCO v{} analysis, current time: {} *****".format(
+        busco.__version__, time.strftime("%m/%d/%Y %H:%M:%S")
+    ),
+    logger,
+)
+class BuscoMaster:
+    def __init__(self, params):
+        self.params = params
+        self.config_manager = BuscoConfigManager(self.params)
+        self.config = self.config_manager.config_main
+
+    def harmonize_auto_lineage_settings(self):
+        if not self.config.check_lineage_present():
+            if (
+                not self.config.getboolean("busco_run", "auto-lineage")
+                and not self.config.getboolean("busco_run", "auto-lineage-prok")
+                and not self.config.getboolean("busco_run", "auto-lineage-euk")
+            ):
+                logger.warning(
+                    "Running Auto Lineage Selector as no lineage dataset was specified. This will take a "
+                    "little longer than normal. If you know what lineage dataset you want to use, please "
+                    "specify this in the config file or using the -l (--lineage-dataset) flag in the "
+                    "command line."
+                )
+            self.config.set("busco_run", "auto-lineage", "True")
+
+        else:
+            if (
+                self.config.getboolean("busco_run", "auto-lineage")
+                or self.config.getboolean("busco_run", "auto-lineage-prok")
+                or self.config.getboolean("busco_run", "auto-lineage-euk")
+            ):
+                logger.warning(
+                    "You have selected auto-lineage but you have also provided a lineage dataset. "
+                    "BUSCO will proceed with the specified dataset. "
+                    "To run auto-lineage do not specify a dataset."
+                )
+            self.config.set("busco_run", "auto-lineage", "False")
+            self.config.set("busco_run", "auto-lineage-prok", "False")
+            self.config.set("busco_run", "auto-lineage-euk", "False")
+        return
+
+    def load_config(self):
+        """
+        Load a busco config file that will figure out all the params from all sources
+        i.e. provided config file, dataset cfg, and user args
+        """
+        self.config_manager.load_busco_config_main(sys.argv)
+        self.config = self.config_manager.config_main
+
+    def check_batch_mode(self):
+        return self.config.getboolean("busco_run", "batch_mode")
+
+    def run(self):
+        # Need to add try/except blocks, distinguishing between run fatal and batch fatal
+        try:
+            self.load_config()
+            self.harmonize_auto_lineage_settings()
+            runner = (
+                BatchRunner(self.config_manager)
+                if self.check_batch_mode()
+                else SingleRunner(self.config_manager)
+            )
+            runner.run()
+
+        except BuscoError as be:
+            SingleRunner.log_error(be)
+            raise SystemExit(1)
+
+        except BatchFatalError as bfe:
+            SingleRunner.log_error(bfe)
+            raise SystemExit(1)
+
+        finally:
+            try:
+                AnalysisRunner.move_log_file(self.config)
+            except:
+                pass
+
+
+ at log("Command line: {}".format(" ".join(sys.argv[:])), logger, debug=True)
 def _parse_args():
     """
     This function parses the arguments provided by the user
@@ -45,9 +128,9 @@ def _parse_args():
     """
 
     parser = argparse.ArgumentParser(
-        description="Welcome to BUSCO %s: the Benchmarking Universal Single-Copy Ortholog assessment tool.\n"
+        description="Welcome to BUSCO {}: the Benchmarking Universal Single-Copy Ortholog assessment tool.\n"
         "For more detailed usage information, please review the README file provided with "
-        "this distribution and the BUSCO user guide." % busco.__version__,
+        "this distribution and the BUSCO user guide.".format(busco.__version__),
         usage="busco -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS]",
         formatter_class=RawTextHelpFormatter,
         add_help=False,
@@ -60,7 +143,7 @@ def _parse_args():
         "--in",
         dest="in",
         required=False,
-        metavar="FASTA FILE",
+        metavar="SEQUENCE_FILE",
         help="Input sequence file in FASTA format. "
         "Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set.",
     )
@@ -97,19 +180,35 @@ def _parse_args():
     )
 
     optional.add_argument(
-        "--auto-lineage",
-        dest="auto-lineage",
+        "--augustus",
+        dest="use_augustus",
         action="store_true",
         required=False,
-        help="Run auto-lineage to find optimum lineage path",
+        help="Use augustus gene predictor for eukaryote runs",
     )
 
     optional.add_argument(
-        "--auto-lineage-prok",
-        dest="auto-lineage-prok",
+        "--augustus_parameters",
+        dest="augustus_parameters",
+        metavar='"--PARAM1=VALUE1,--PARAM2=VALUE2"',
+        required=False,
+        help="Pass additional arguments to Augustus. All arguments should be contained within a "
+        "single pair of quotation marks, separated by commas.",
+    )
+
+    optional.add_argument(
+        "--augustus_species",
+        dest="augustus_species",
+        required=False,
+        help="Specify a species for Augustus training.",
+    )
+
+    optional.add_argument(
+        "--auto-lineage",
+        dest="auto-lineage",
         action="store_true",
         required=False,
-        help="Run auto-lineage just on non-eukaryote trees to find optimum lineage path",
+        help="Run auto-lineage to find optimum lineage path",
     )
 
     optional.add_argument(
@@ -120,50 +219,40 @@ def _parse_args():
         help="Run auto-placement just on eukaryote tree to find optimum lineage path",
     )
 
+    optional.add_argument(
+        "--auto-lineage-prok",
+        dest="auto-lineage-prok",
+        action="store_true",
+        required=False,
+        help="Run auto-lineage just on non-eukaryote trees to find optimum lineage path",
+    )
+
     optional.add_argument(
         "-c",
         "--cpu",
         dest="cpu",
+        type=int,
         required=False,
         metavar="N",
         help="Specify the number (N=integer) " "of threads/cores to use.",
     )
 
     optional.add_argument(
-        "-f",
-        "--force",
-        action="store_true",
-        required=False,
-        dest="force",
-        help="Force rewriting of existing files. "
-        "Must be used when output files with the provided name already exist.",
-    )
-
-    optional.add_argument(
-        "-r",
-        "--restart",
-        action="store_true",
-        required=False,
-        dest="restart",
-        help="Continue a run that had already partially completed.",
+        "--config", dest="config_file", required=False, help="Provide a config file"
     )
 
     optional.add_argument(
-        "-q",
-        "--quiet",
-        dest="quiet",
+        "--datasets_version",
+        dest="datasets_version",
         required=False,
-        help="Disable the info logs, displays only errors",
-        action="store_true",
+        help="Specify the version of BUSCO datasets, e.g. odb10",
     )
 
     optional.add_argument(
-        "--out_path",
-        dest="out_path",
+        "--download_base_url",
+        dest="download_base_url",
         required=False,
-        metavar="OUTPUT_PATH",
-        help="Optional location for results folder, excluding results folder name. "
-        "Default is current working directory.",
+        help="Set the url to the remote BUSCO dataset location",
     )
 
     optional.add_argument(
@@ -174,110 +263,118 @@ def _parse_args():
     )
 
     optional.add_argument(
-        "--datasets_version",
-        dest="datasets_version",
+        "-e",
+        "--evalue",
+        dest="evalue",
         required=False,
-        help="Specify the version of BUSCO datasets, e.g. odb10",
+        metavar="N",
+        type=float,
+        help="E-value cutoff for BLAST searches. "
+        "Allowed formats, 0.001 or 1e-03 (Default: {:.0e})".format(
+            BuscoConfigMain.DEFAULT_ARGS_VALUES["evalue"]
+        ),
     )
 
     optional.add_argument(
-        "--download_base_url",
-        dest="download_base_url",
+        "-f",
+        "--force",
+        action="store_true",
         required=False,
-        help="Set the url to the remote BUSCO dataset location",
+        dest="force",
+        help="Force rewriting of existing files. "
+        "Must be used when output files with the provided name already exist.",
     )
 
     optional.add_argument(
-        "--update-data",
-        dest="update-data",
-        action="store_true",
+        "-h", "--help", action=CleanHelpAction, help="Show this help message and exit"
+    )
+
+    optional.add_argument(
+        "--limit",
+        dest="limit",
+        metavar="N",
         required=False,
-        help="Download and replace with last versions all lineages datasets and files necessary"
-        " to their automated selection",
+        type=int,
+        help="How many candidate regions (contig or transcript) to consider per BUSCO (default: {})".format(
+            str(BuscoConfigMain.DEFAULT_ARGS_VALUES["limit"])
+        ),
     )
 
     optional.add_argument(
-        "--offline",
-        dest="offline",
+        "--list-datasets",
+        action=ListLineagesAction,
+        help="Print the list of available BUSCO datasets",
+    )
+
+    optional.add_argument(
+        "--long",
         action="store_true",
         required=False,
-        help="To indicate that BUSCO cannot attempt to download files",
+        dest="long",
+        help="Optimization Augustus self-training mode (Default: Off); adds considerably to the run "
+        "time, but can improve results for some non-model organisms",
     )
 
     optional.add_argument(
         "--metaeuk_parameters",
         dest="metaeuk_parameters",
+        metavar='"--PARAM1=VALUE1,--PARAM2=VALUE2"',
         required=False,
         help="Pass additional arguments to Metaeuk for the first run. All arguments should be "
-        "contained within a single pair of quotation marks, separated by commas. "
-        'E.g. "--param1=1,--param2=2"',
+        "contained within a single pair of quotation marks, separated by commas. ",
     )
 
     optional.add_argument(
         "--metaeuk_rerun_parameters",
         dest="metaeuk_rerun_parameters",
+        metavar='"--PARAM1=VALUE1,--PARAM2=VALUE2"',
         required=False,
         help="Pass additional arguments to Metaeuk for the second run. All arguments should be "
-        "contained within a single pair of quotation marks, separated by commas. "
-        'E.g. "--param1=1,--param2=2"',
+        "contained within a single pair of quotation marks, separated by commas. ",
     )
 
     optional.add_argument(
-        "-e",
-        "--evalue",
-        dest="evalue",
-        required=False,
-        metavar="N",
-        type=float,
-        help="E-value cutoff for BLAST searches. "
-        "Allowed formats, 0.001 or 1e-03 (Default: %.0e)"
-        % BuscoConfigMain.DEFAULT_ARGS_VALUES["evalue"],
-    )
-
-    optional.add_argument(
-        "--limit",
-        dest="limit",
-        metavar="REGION_LIMIT",
-        required=False,
-        type=int,
-        help="How many candidate regions (contig or transcript) to consider per BUSCO (default: %s)"
-        % str(BuscoConfigMain.DEFAULT_ARGS_VALUES["limit"]),
-    )
-
-    optional.add_argument(
-        "--augustus",
-        dest="use_augustus",
+        "--offline",
+        dest="offline",
         action="store_true",
         required=False,
-        help="Use augustus gene predictor for eukaryote runs",
+        help="To indicate that BUSCO cannot attempt to download files",
     )
 
     optional.add_argument(
-        "--augustus_parameters",
-        dest="augustus_parameters",
+        "--out_path",
+        dest="out_path",
         required=False,
-        help="Pass additional arguments to Augustus. All arguments should be contained within a "
-        'single pair of quotation marks, separated by commas. E.g. "--param1=1,--param2=2"',
+        metavar="OUTPUT_PATH",
+        help="Optional location for results folder, excluding results folder name. "
+        "Default is current working directory.",
     )
 
     optional.add_argument(
-        "--augustus_species",
-        dest="augustus_species",
+        "-q",
+        "--quiet",
+        dest="quiet",
         required=False,
-        help="Specify a species for Augustus training.",
+        help="Disable the info logs, displays only errors",
+        action="store_true",
     )
 
     optional.add_argument(
-        "--long",
+        "-r",
+        "--restart",
         action="store_true",
         required=False,
-        dest="long",
-        help="Optimization Augustus self-training mode (Default: Off); adds considerably to the run "
-        "time, but can improve results for some non-model organisms",
+        dest="restart",
+        help="Continue a run that had already partially completed.",
     )
 
     optional.add_argument(
-        "--config", dest="config_file", required=False, help="Provide a config file"
+        "--update-data",
+        dest="update-data",
+        action="store_true",
+        required=False,
+        help="Download and replace with last versions all lineages datasets and files necessary"
+        " to their automated selection",
     )
 
     optional.add_argument(
@@ -285,17 +382,7 @@ def _parse_args():
         "--version",
         action=CleanVersionAction,
         help="Show this version and exit",
-        version="BUSCO %s" % busco.__version__,
-    )
-
-    optional.add_argument(
-        "-h", "--help", action=CleanHelpAction, help="Show this help message and exit"
-    )
-
-    optional.add_argument(
-        "--list-datasets",
-        action=ListLineagesAction,
-        help="Print the list of available BUSCO datasets",
+        version="BUSCO {}".format(busco.__version__),
     )
 
     return vars(parser.parse_args())
@@ -308,105 +395,9 @@ def main():
     ``busco -h``
     :raises SystemExit: if any errors occur
     """
-
     params = _parse_args()
-    run_BUSCO(params)
-
-
- at log(
-    "***** Start a BUSCO v{} analysis, current time: {} *****".format(
-        busco.__version__, time.strftime("%m/%d/%Y %H:%M:%S")
-    ),
-    logger,
-)
-def run_BUSCO(params):
-    start_time = time.time()
-
-    try:
-        # Load a busco config file that will figure out all the params from all sources
-        # i.e. provided config file, dataset cfg, and user args
-        config_manager = BuscoConfigManager(params)
-        config_manager.load_busco_config(sys.argv)
-        config = config_manager.config
-
-        lineage_basename = os.path.basename(config.get("busco_run", "lineage_dataset"))
-        main_out_folder = config.get("busco_run", "main_out")
-        lineage_results_folder = os.path.join(
-            main_out_folder,
-            "auto_lineage",
-            config.get("busco_run", "lineage_results_dir"),
-        )
-
-        if config.getboolean("busco_run", "auto-lineage"):
-            if lineage_basename.startswith(("bacteria", "archaea", "eukaryota")):
-                busco_run = config_manager.runner
-
-            # It is possible that the following lineages were arrived at either by the Prodigal genetic code shortcut
-            # or by BuscoPlacer. If the former, the run will have already been completed. If the latter it still needs
-            # to be done.
-            elif lineage_basename.startswith(
-                ("mollicutes", "mycoplasmatales", "entomoplasmatales")
-            ) and os.path.exists(lineage_results_folder):
-                busco_run = config_manager.runner
-            else:
-                busco_run = BuscoRunner(config)
-        else:
-            busco_run = BuscoRunner(config)
-
-        if os.path.exists(lineage_results_folder):
-            new_dest = os.path.join(
-                main_out_folder, config.get("busco_run", "lineage_results_dir")
-            )
-            if os.path.exists(
-                new_dest
-            ):  # New dest would only exist if this is a rerun of a previously completed run
-                shutil.rmtree(new_dest)
-            os.rename(lineage_results_folder, new_dest)
-        else:
-            busco_run.run_analysis()
-            BuscoRunner.final_results.append(
-                busco_run.analysis.hmmer_runner.hmmer_results_lines
-            )
-            BuscoRunner.results_datasets.append(lineage_basename)
-        busco_run.finish(time.time() - start_time)
-
-    except ToolException as e:
-        logger.error(e)
-        raise SystemExit(1)
-
-    except SystemExit as se:
-        logger.error(se)
-        logger.debug(se, exc_info=True)
-        logger.error("BUSCO analysis failed !")
-        logger.error(
-            "Check the logs, read the user guide (https://busco.ezlab.org/busco_userguide.html), "
-            "and check the BUSCO issue board on https://gitlab.com/ezlab/busco/issues"
-        )
-        try:
-            BuscoRunner.move_log_file(config)
-        except NameError:
-            try:
-                BuscoRunner.move_log_file(config_manager.config)
-            except:
-                pass
-        except:
-            pass
-        raise SystemExit(1)
-
-    except KeyboardInterrupt:
-        logger.exception(
-            "A signal was sent to kill the process. \nBUSCO analysis failed !"
-        )
-        raise SystemExit(1)
-
-    except BaseException:
-        exc_type, exc_value, exc_traceback = sys.exc_info()
-        logger.critical(
-            "Unhandled exception occurred:\n{}\n".format(
-                "".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
-            )
-        )
-        raise SystemExit(1)
+    busco_run = BuscoMaster(params)
+    busco_run.run()
 
 
 # Entry point


=====================================
tests/unittest_runner.py
=====================================
@@ -4,6 +4,7 @@ from tests.unittests import ConfigManager_unittests
 from tests.unittests import BuscoConfig_unittests
 from tests.unittests import AutoLineage_unittests
 from tests.unittests import GenomeAnalysis_unittests
+from tests.unittests import BuscoRunner_unittests
 import sys
 
 loader = unittest.TestLoader()
@@ -14,6 +15,7 @@ suite.addTests(loader.loadTestsFromModule(ConfigManager_unittests))
 suite.addTests(loader.loadTestsFromModule(BuscoConfig_unittests))
 suite.addTests(loader.loadTestsFromModule(AutoLineage_unittests))
 suite.addTests(loader.loadTestsFromModule(GenomeAnalysis_unittests))
+suite.addTests(loader.loadTestsFromModule(BuscoRunner_unittests))
 
 runner = unittest.TextTestRunner(verbosity=3)
 result = runner.run(suite)


=====================================
tests/unittests/AutoLineage_unittests.py
=====================================
@@ -1,33 +1,34 @@
 import unittest
 from unittest.mock import patch, call, Mock
 from busco import AutoLineage, BuscoRunner
+from busco.ConfigManager import BuscoConfigManager
 
 
 class TestAutoLineage(unittest.TestCase):
     def setUp(self):
         pass
 
-    @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
-    def test_init_autolineage(self, mock_config_main):
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_init_autolineage(self, mock_config_manager):
         with self.assertLogs(AutoLineage.logger, 20):
-            AutoLineage.AutoSelectLineage(mock_config_main)
+            AutoLineage.AutoSelectLineage(mock_config_manager)
 
     @patch("busco.AutoLineage.AutoSelectLineage.virus_check", return_value=False)
-    @patch(
-        "busco.BuscoConfig.BuscoConfigMain.getboolean",
-        side_effect=[True, False, True, False, False],
-    )
     @patch("busco.AutoLineage.logger.info")
     @patch("busco.AutoLineage.os")
-    @patch("busco.AutoLineage.BuscoRunner")
+    @patch("busco.AutoLineage.AnalysisRunner")
     @patch("busco.AutoLineage.AutoSelectLineage.get_best_match_lineage")
     @patch("busco.AutoLineage.AutoSelectLineage.run_lineages_list")
-    @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
     def test_run_auto_selector_lineage_lists_no_virus(
-        self, mock_config_main, mock_run_lineages_list, *args
+        self, mock_run_lineages_list, *args
     ):
+        config_manager = BuscoConfigManager({})
+        config_manager.config_main = Mock()
+        config_manager.config_main.getboolean = Mock(
+            side_effect=[True, False, True, False, False]
+        )
         for _ in range(3):
-            asl = AutoLineage.AutoSelectLineage(mock_config_main)
+            asl = AutoLineage.AutoSelectLineage(config_manager)
             asl.selected_runner = Mock()
             asl.selected_runner.analysis.hmmer_runner.single_copy_buscos = [
                 0
@@ -44,38 +45,41 @@ class TestAutoLineage(unittest.TestCase):
         mock_run_lineages_list.assert_has_calls(calls, any_order=True)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
-    @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_run_lineages_initializes_BuscoConfigAuto(
-        self, mock_config_main, mock_config_auto, *args
-    ):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+    @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+    @patch("busco.ConfigManager.BuscoConfigAuto", autospec=True)
+    def test_run_lineages_initializes_BuscoConfigAuto(self, mock_config_auto, *args):
+        config_manager = BuscoConfigManager({})
+        config_manager.config_main = Mock()
+        asl = AutoLineage.AutoSelectLineage(config_manager)
         test_lineages = ["a", "b", "c"]
         test_dataset_version = "<dataset_version>"
         asl.dataset_version = test_dataset_version
         asl.run_lineages_list(test_lineages)
         calls = [
             call(
-                mock_config_main, "{}_{}".format(test_lineages[0], test_dataset_version)
+                config_manager.config_main,
+                "{}_{}".format(test_lineages[0], test_dataset_version),
             ),
             call(
-                mock_config_main, "{}_{}".format(test_lineages[1], test_dataset_version)
+                config_manager.config_main,
+                "{}_{}".format(test_lineages[1], test_dataset_version),
             ),
             call(
-                mock_config_main, "{}_{}".format(test_lineages[2], test_dataset_version)
+                config_manager.config_main,
+                "{}_{}".format(test_lineages[2], test_dataset_version),
             ),
         ]
         mock_config_auto.assert_has_calls(calls, any_order=True)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
+    @patch("__main__.AutoLineage_unittests.BuscoConfigManager.load_busco_config_auto")
+    @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
     def test_run_lineages_initializes_BuscoRunner(
-        self, mock_runner, mock_config_main, mock_config_auto, *args
+        self, mock_runner, mock_config_auto, *args
     ):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        config_manager = BuscoConfigManager({})
+        config_manager.config_main = Mock()
+        asl = AutoLineage.AutoSelectLineage(config_manager)
         test_lineages = ["a", "b", "c"]
         test_dataset_version = "<dataset_version>"
         asl.dataset_version = test_dataset_version
@@ -83,11 +87,11 @@ class TestAutoLineage(unittest.TestCase):
         mock_runner.assert_called_with(mock_config_auto.return_value)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
-    def test_run_lineages_runs_analysis(self, mock_runner, mock_config_main, *args):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+    @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+    def test_run_lineages_runs_analysis(self, mock_runner, mock_config_manager, *args):
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         test_lineages = ["a", "b", "c"]
         test_dataset_version = "<dataset_version>"
         asl.dataset_version = test_dataset_version
@@ -97,11 +101,13 @@ class TestAutoLineage(unittest.TestCase):
         )
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
-    def test_run_lineages_returns_runners(self, mock_runner, mock_config_main, *args):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+    @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+    def test_run_lineages_returns_runners(
+        self, mock_runner, mock_config_manager, *args
+    ):
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         test_lineages = ["a", "b", "c"]
         test_dataset_version = "<dataset_version>"
         asl.dataset_version = test_dataset_version
@@ -116,11 +122,11 @@ class TestAutoLineage(unittest.TestCase):
         )
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
-    @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_record_results_first_run(self, mock_config_main, *args):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+    @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+    @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_record_results_first_run(self, mock_config_manager, *args):
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         asl.record_results(0, 1, 2, 0, 1, 2)
         self.assertGreater(len(asl.s_buscos), 0)
         self.assertGreater(len(asl.d_buscos), 0)
@@ -130,11 +136,11 @@ class TestAutoLineage(unittest.TestCase):
         self.assertGreater(len(asl.f_percents), 0)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
-    @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_record_results_multiple_runs(self, mock_config_main, *args):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+    @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+    @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_record_results_multiple_runs(self, mock_config_manager, *args):
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         asl.record_results(0, 1, 2, 0, 1, 2)
         asl.record_results(0, 1, 2, 0, 1, 2)
         self.assertGreater(len(asl.s_buscos), 1)
@@ -145,19 +151,19 @@ class TestAutoLineage(unittest.TestCase):
         self.assertGreater(len(asl.f_percents), 1)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_evaluate_single_runner(self, mock_config_main, *args):
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_evaluate_single_runner(self, mock_config_manager, *args):
         runner1 = Mock()
         runner1.analysis.hmmer_runner.single_copy = 1
         runner1.analysis.hmmer_runner.multi_copy = 1
         runner1.analysis.hmmer_runner.only_fragments = 1
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         max_ind = asl.evaluate([runner1])
         self.assertEqual(max_ind, 0)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_evaluate_multiple_runners(self, mock_config_main, *args):
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_evaluate_multiple_runners(self, mock_config_manager, *args):
         runner1 = Mock()
         runner2 = Mock()
         runner3 = Mock()
@@ -167,7 +173,7 @@ class TestAutoLineage(unittest.TestCase):
         runner2.analysis.hmmer_runner.multi_copy = 5
         runner3.analysis.hmmer_runner.single_copy = 12
         runner3.analysis.hmmer_runner.multi_copy = 5
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         max_ind = asl.evaluate([runner1, runner2, runner3])
         self.assertEqual(max_ind, 1)
 
@@ -179,8 +185,8 @@ class TestAutoLineage(unittest.TestCase):
         self.assertEqual(max_ind, 2)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_evaluate_first_order_tiebreak(self, mock_config_main, *args):
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_evaluate_first_order_tiebreak(self, mock_config_manager, *args):
         runner1 = Mock()
         runner2 = Mock()
         runner3 = Mock()
@@ -193,13 +199,13 @@ class TestAutoLineage(unittest.TestCase):
         runner3.analysis.hmmer_runner.single_copy = 12
         runner3.analysis.hmmer_runner.multi_copy = 0
         runner3.analysis.hmmer_runner.only_fragments = 3
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         max_ind = asl.evaluate([runner1, runner2, runner3])
         self.assertEqual(max_ind, 1)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_evaluate_second_order_tiebreak(self, mock_config_main, *args):
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_evaluate_second_order_tiebreak(self, mock_config_manager, *args):
         runner1 = Mock()
         runner2 = Mock()
         runner3 = Mock()
@@ -220,13 +226,13 @@ class TestAutoLineage(unittest.TestCase):
         runner4.analysis.hmmer_runner.multi_copy = 1
         runner4.analysis.hmmer_runner.only_fragments = 2
         runner4.analysis.hmmer_runner.s_percent = 80
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         max_ind = asl.evaluate([runner1, runner2, runner3, runner4])
         self.assertEqual(max_ind, 3)
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.BuscoConfig.BuscoConfigMain")
-    def test_evaluate_third_order_tiebreak(self, mock_config_main, *args):
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_evaluate_third_order_tiebreak(self, mock_config_manager, *args):
         runner1 = Mock()
         runner2 = Mock()
         runner3 = Mock()
@@ -247,19 +253,19 @@ class TestAutoLineage(unittest.TestCase):
         runner4.analysis.hmmer_runner.multi_copy = 1
         runner4.analysis.hmmer_runner.only_fragments = 2
         runner4.analysis.hmmer_runner.s_percent = 80
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         with self.assertLogs(AutoLineage.logger, "WARNING"):
             max_ind = asl.evaluate([runner1, runner2, runner3, runner4])
         self.assertEqual(max_ind, 1)
 
     @patch("busco.AutoLineage.logger.info")
     @patch("busco.AutoLineage.AutoSelectLineage.cleanup_disused_runs")
-    @patch("__main__.AutoLineage_unittests.BuscoRunner.BuscoRunner.mode_dict")
-    @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
+    @patch("__main__.AutoLineage_unittests.BuscoRunner.AnalysisRunner.mode_dict")
+    @patch("busco.ConfigManager.BuscoConfigManager")
     def test_get_best_match_lineage(
-        self, mock_config_main, fake_modedict, mock_cleanup, *args
+        self, mock_config_manager, fake_modedict, mock_cleanup, *args
     ):
-        mock_config_main.get.side_effect = [None]
+        mock_config_manager.config_main.get.side_effect = [None]
 
         mock_config1 = Mock()
         mock_config2 = Mock()
@@ -284,20 +290,20 @@ class TestAutoLineage(unittest.TestCase):
             mock_analysis3,
         ]
 
-        runner1 = BuscoRunner.BuscoRunner(mock_config1)
-        runner2 = BuscoRunner.BuscoRunner(mock_config2)
-        runner3 = BuscoRunner.BuscoRunner(mock_config3)
+        runner1 = BuscoRunner.AnalysisRunner(mock_config1)
+        runner2 = BuscoRunner.AnalysisRunner(mock_config2)
+        runner3 = BuscoRunner.AnalysisRunner(mock_config3)
 
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
         asl.get_best_match_lineage([runner1, runner2, runner3])
         self.assertEqual(asl.best_match_lineage_dataset, "test2")
         self.assertEqual(asl.selected_runner, runner2)
         mock_cleanup.assert_called_with([runner1, runner3])
 
     @patch("busco.AutoLineage.logger.info")
-    @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
-    def test_cleanup_disused_runs(self, mock_config_main, *args):
-        asl = AutoLineage.AutoSelectLineage(mock_config_main)
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_cleanup_disused_runs(self, mock_config_manager, *args):
+        asl = AutoLineage.AutoSelectLineage(mock_config_manager)
         mock_runner1 = Mock()
         mock_runner2 = Mock()
         mock_runner1.cleaned_up = False


=====================================
tests/unittests/BuscoConfig_unittests.py
=====================================
@@ -3,8 +3,7 @@ from busco import BuscoConfig
 import shutil
 import os
 from unittest.mock import Mock
-from unittest.mock import patch
-from pathlib import Path
+from unittest.mock import patch, call
 
 
 class TestBuscoConfig(unittest.TestCase):
@@ -80,6 +79,7 @@ class TestBuscoConfig(unittest.TestCase):
                 "evalue",
                 "limit",
                 "use_augustus",
+                "batch_mode",
             ],
             "etraining": ["path", "command"],
             "gff2gbSmallDNA.pl": ["path", "command"],
@@ -100,7 +100,7 @@ class TestBuscoConfig(unittest.TestCase):
         self.assertIn("busco_run", config.sections())
 
     def test_read_config_file_ioerror(self):
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             config = BuscoConfig.BaseConfig()
             config.conf_file = "/path/not/found"
             config._load_config_file()
@@ -111,7 +111,7 @@ class TestBuscoConfig(unittest.TestCase):
         with open(config_path, "w") as f:
             f.write(test_config_contents)
 
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             config = BuscoConfig.BaseConfig()
             config.conf_file = config_path
             config._load_config_file()
@@ -123,7 +123,7 @@ class TestBuscoConfig(unittest.TestCase):
         with open(config_path, "w") as f:
             f.write(test_config_contents)
 
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             config = BuscoConfig.BaseConfig()
             config.conf_file = config_path
             config._load_config_file()
@@ -218,21 +218,21 @@ class TestBuscoConfig(unittest.TestCase):
             config._check_mandatory_keys_exist()
 
     def test_mandatory_keys_check_missing_param_in(self):
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             params_test = {"out": "output_name", "mode": "genome"}
             config = BuscoConfig.BuscoConfigMain(self.base_config, params_test)
             config.configure()
             config._check_mandatory_keys_exist()
 
     def test_mandatory_keys_check_missing_param_mode(self):
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             params_test = {"in": "input_file", "out": "output_name"}
             config = BuscoConfig.BuscoConfigMain(self.base_config, params_test)
             config.configure()
             config._check_mandatory_keys_exist()
 
     def test_mandatory_keys_check_missing_param_out(self):
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             params_test = {"in": "input_file", "mode": "genome"}
             config = BuscoConfig.BuscoConfigMain(self.base_config, params_test)
             config.configure()
@@ -251,7 +251,7 @@ class TestBuscoConfig(unittest.TestCase):
         os.makedirs(previous_run_name, exist_ok=True)
         self.test_params["out"] = previous_run_name
         self.test_params["force"] = "False"
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
             config.configure()
             config._check_no_previous_run()
@@ -312,7 +312,7 @@ class TestBuscoConfig(unittest.TestCase):
 
     def test_catch_disallowed_keys(self):
         for section_name in self.config_structure:
-            with self.assertRaises(SystemExit):
+            with self.assertRaises(BuscoConfig.BatchFatalError):
                 config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
                 config.configure()
                 config.set(section_name, "forbidden_option", "forbidden_value")
@@ -321,7 +321,7 @@ class TestBuscoConfig(unittest.TestCase):
     def test_out_value_check_invalid(self):
         for str_format in ["/path/to/output", "output/"]:
             self.test_params["out"] = str_format
-            with self.assertRaises(SystemExit):
+            with self.assertRaises(BuscoConfig.BatchFatalError):
                 config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
                 config.configure()
                 config._check_out_value()
@@ -334,7 +334,7 @@ class TestBuscoConfig(unittest.TestCase):
     def test_limit_value_out_of_range(self):
         for lim_val in [-1, 0, 25]:
             self.test_params["limit"] = lim_val
-            with self.assertRaises(SystemExit):
+            with self.assertRaises(BuscoConfig.BatchFatalError):
                 config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
                 config.configure()
                 config._check_limit_value()
@@ -398,15 +398,38 @@ class TestBuscoConfig(unittest.TestCase):
             config.get("hmmsearch", "path"), os.path.expanduser("~/test_hmmsearch_path")
         )
 
-    def test_required_input_exists_true(self):
-        input_filename = "test_input_file"
-        Path(input_filename).touch()
-        self.test_params["in"] = input_filename
+    @patch(
+        "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isdir", return_value=True
+    )
+    def test_batch_mode_true(self, *args):
         config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
-        config.configure()
-        with self.assertLogs(BuscoConfig.logger, level="INFO"):
-            config._check_required_input_exists()
-        os.remove(input_filename)
+        config.set = Mock()
+        config._check_batch_mode()
+        calls = [call("busco_run", "batch_mode", "True")]
+        config.set.assert_has_calls(calls)
+
+    @patch(
+        "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isdir", return_value=False
+    )
+    @patch(
+        "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isfile", return_value=True
+    )
+    def test_batch_mode_false_with_file(self, *args):
+        config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
+        config.set = Mock()
+        config._check_batch_mode()
+
+    @patch(
+        "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isdir", return_value=False
+    )
+    @patch(
+        "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isfile", return_value=False
+    )
+    def test_batch_mode_false_with_error(self, *args):
+        config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
+        config.set = Mock()
+        with self.assertRaises(BuscoConfig.BatchFatalError):
+            config._check_batch_mode()
 
     def test_required_input_exists_false(self):
         input_filename = "test_input_file"
@@ -415,7 +438,7 @@ class TestBuscoConfig(unittest.TestCase):
         self.test_params["in"] = input_filename
         config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
         config.configure()
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             config._check_required_input_exists()
 
     @patch("__main__.BuscoConfig_unittests.BuscoConfig.BuscoDownloadManager")
@@ -435,6 +458,7 @@ class TestBuscoConfig(unittest.TestCase):
 
     @patch.object(BuscoConfig.BuscoConfigMain, "log_config")
     @patch.object(BuscoConfig.BuscoConfigMain, "_init_downloader")
+    @patch.object(BuscoConfig.BuscoConfigMain, "_check_batch_mode")
     @patch.object(BuscoConfig.BuscoConfigMain, "_check_required_input_exists")
     @patch.object(BuscoConfig.BuscoConfigMain, "_expand_all_paths")
     @patch.object(BuscoConfig.BuscoConfigMain, "_check_evalue")
@@ -455,6 +479,7 @@ class TestBuscoConfig(unittest.TestCase):
         mock_check_evalue,
         mock_expand_all_paths,
         mock_check_input,
+        mock_check_batch,
         mock_init_downloader,
         mock_log_config,
     ):
@@ -470,6 +495,7 @@ class TestBuscoConfig(unittest.TestCase):
         mock_check_evalue.assert_called()
         mock_expand_all_paths.assert_called()
         mock_check_input.assert_called()
+        mock_check_batch.assert_called()
         mock_init_downloader.assert_called()
         mock_log_config.assert_called()
 
@@ -524,7 +550,7 @@ class TestBuscoConfig(unittest.TestCase):
         self.test_params["datasets_version"] = "odb11"
         config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
         config.configure()
-        with self.assertRaises(SystemExit):
+        with self.assertRaises(BuscoConfig.BatchFatalError):
             config.check_lineage_present()
 
     def test_set_results_dirname(self):
@@ -559,11 +585,9 @@ class TestBuscoConfig(unittest.TestCase):
         BuscoConfig.BuscoConfigAuto(None, "lineage")
         mock_set_dirname.assert_called_with("lineage")
 
-    @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset_config")
-    @patch("busco.BuscoConfig.BuscoConfigAuto.download_lineage_file")
+    @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset")
     @patch("busco.BuscoConfig.BuscoConfig")
     @patch("busco.BuscoConfig.BuscoConfigAuto._propagate_config")
-    @patch("busco.BuscoConfig.BuscoConfigAuto.set_results_dirname")
     @patch("busco.BuscoConfig.BuscoConfigAuto._create_required_paths")
     def test_autoconfig_init_creates_paths(self, mock_create_paths, *args):
         BuscoConfig.BuscoConfigAuto(None, None)
@@ -590,10 +614,8 @@ class TestBuscoConfig(unittest.TestCase):
         mock_load_dataset.assert_called()
 
     @patch("busco.BuscoConfig.BuscoConfigAuto._propagate_config")
-    @patch("busco.BuscoConfig.BuscoConfigAuto.set_results_dirname")
     @patch("busco.BuscoConfig.BuscoConfigAuto._create_required_paths")
-    @patch("busco.BuscoConfig.BuscoConfigAuto.download_lineage_file")
-    @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset_config")
+    @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset")
     @patch("busco.BuscoConfig.BuscoConfig.__init__")
     def test_autoconfig_init_calls_super(self, mock_config_parent, *args):
         BuscoConfig.BuscoConfigAuto(None, None)


=====================================
tests/unittests/BuscoRunner_unittests.py
=====================================
@@ -0,0 +1,113 @@
+import unittest
+from unittest.mock import patch, call, Mock
+from busco import BuscoRunner, ConfigManager
+
+
+class TestAutoLineage(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_config_updated_if_lineage_missing(self, mock_config_manager, *args):
+        mock_config_manager.config_main.check_lineage_present = lambda: False
+        runner = BuscoRunner.SingleRunner(mock_config_manager)
+        analysis_runner = Mock()
+        test_dataset_path = "/path/to/lineage_dataset"
+        with patch.object(
+            runner, "auto_select_lineage", lambda: (test_dataset_path, analysis_runner)
+        ):
+            runner.get_lineage()
+        calls = [
+            call("busco_run", "lineage_dataset", test_dataset_path),
+        ]
+        mock_config_manager.config_main.set.assert_has_calls(calls, any_order=True)
+
+    @patch("busco.BuscoRunner.logger.info")
+    @patch(
+        "busco.AutoLineage.AutoSelectLineage",
+        autospec=True,
+    )
+    def test_auto_select_lineage_call_function_initializes_asl(self, mock_asl, *args):
+        config = Mock()
+        mock_asl.return_value.best_match_lineage_dataset = Mock()
+        mock_asl.return_value.selected_runner = Mock()
+        runner = BuscoRunner.SingleRunner(config)
+        runner.auto_select_lineage()
+        mock_asl.assert_called()
+
+    @patch("busco.BuscoRunner.logger.info")
+    @patch(
+        "busco.AutoLineage.AutoSelectLineage",
+        autospec=True,
+    )
+    def test_auto_select_lineage_call_function_runs_asl(self, mock_asl, *args):
+        config = Mock()
+        mock_asl.return_value.best_match_lineage_dataset = Mock()
+        mock_asl.return_value.selected_runner = Mock()
+        runner = BuscoRunner.SingleRunner(config)
+        runner.auto_select_lineage()
+        mock_asl.return_value.run_auto_selector.assert_called()
+
+    @patch("busco.BuscoRunner.logger.info")
+    @patch(
+        "busco.AutoLineage.AutoSelectLineage",
+        autospec=True,
+    )
+    def test_auto_select_lineage_call_function_gets_lineage_dataset(
+        self, mock_asl, *args
+    ):
+        config = Mock()
+        mock_asl.return_value.best_match_lineage_dataset = Mock()
+        mock_asl.return_value.selected_runner = Mock()
+        runner = BuscoRunner.SingleRunner(config)
+        runner.auto_select_lineage()
+        mock_asl.return_value.get_lineage_dataset.assert_called()
+
+    @patch("busco.BuscoRunner.logger.info")
+    @patch(
+        "busco.AutoLineage.AutoSelectLineage",
+        autospec=True,
+    )
+    def test_auto_select_lineage_call_function_returns_lineage_dataset(
+        self, mock_asl, *args
+    ):
+        config = Mock()
+        lineage_dataset = "best_match_dataset"
+        mock_asl.return_value.best_match_lineage_dataset = lineage_dataset
+        mock_asl.return_value.selected_runner = Mock()
+        runner = BuscoRunner.SingleRunner(config)
+        retval_dataset, retval_runner = runner.auto_select_lineage()
+        self.assertEqual(retval_dataset, lineage_dataset)
+
+    @patch("busco.BuscoRunner.logger.info")
+    @patch(
+        "busco.AutoLineage.AutoSelectLineage",
+        autospec=True,
+    )
+    def test_auto_select_lineage_call_function_selects_runner(self, mock_asl, *args):
+        config = Mock()
+        lineage_dataset = "best_match_dataset"
+        mock_asl.return_value.best_match_lineage_dataset = lineage_dataset
+        mock_asl.return_value.selected_runner = Mock()
+        runner = BuscoRunner.SingleRunner(config)
+        retval_dataset, retval_runner = runner.auto_select_lineage()
+        self.assertEqual(retval_runner, mock_asl.return_value.selected_runner)
+
+    @patch("busco.ConfigManager.BuscoConfigManager")
+    def test_config_set_parent_dataset_if_not_virus(self, mock_config_manager, *args):
+        test_dataset_path = "/path/to/lineage_dataset"
+        test_parent_dataset = "/path/to/parent_dataset"
+        runner = BuscoRunner.SingleRunner(mock_config_manager)
+        analysis_runner = Mock()
+        analysis_runner.config.get.side_effect = ["bacteria", test_parent_dataset]
+        with patch.object(
+            runner, "auto_select_lineage", lambda: (test_dataset_path, analysis_runner)
+        ):
+            runner.get_lineage()
+        calls = [
+            call("busco_run", "parent_dataset", test_parent_dataset),
+        ]
+        mock_config_manager.config_main.set.assert_has_calls(calls, any_order=True)
+
+    def tearDown(self):
+        pass


=====================================
tests/unittests/ConfigManager_unittests.py
=====================================
@@ -1,5 +1,5 @@
 import unittest
-from unittest.mock import patch, call, Mock
+from unittest.mock import patch
 from busco import ConfigManager
 import os
 import importlib
@@ -50,220 +50,22 @@ class TestConfigManager(unittest.TestCase):
             )
 
     @patch("__main__.ConfigManager_unittests.ConfigManager.logger", autospec=True)
-    def test_config_validated(self, *args):
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        with patch(
-            "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain",
-            autospec=True,
-        ):
-            config_manager.load_busco_config()
-            config_manager.config.validate.assert_called()
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
-    )
-    def test_log_warning_if_neither_lineage_nor_autolineage_specified(
-        self, mock_config_main, *args
-    ):
-        mock_config_main.return_value.check_lineage_present = lambda: False
-        mock_config_main.return_value.getboolean.side_effect = [
-            False,
-            False,
-            True,
-            False,
-            True,
-        ]  # These values comprise the three distinct logical paths into this part
-        # of the code: run1: False, False; run2: True, (shortcircuit); run3: False, True.
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            with self.assertLogs(ConfigManager.logger, "WARNING"):
-                config_manager.load_busco_config()
-            with patch("busco.ConfigManager.logger.warning") as mock_logger:
-                for _ in range(2):
-                    config_manager.load_busco_config()
-                    self.assertFalse(mock_logger.called)
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
-    )
-    def test_log_warning_if_both_lineage_and_autolineage_specified(
-        self, mock_config_main, *args
-    ):
-        mock_config_main.return_value.check_lineage_present = lambda: True
-        mock_config_main.return_value.getboolean.side_effect = [
-            True,
-            False,
-            True,
-            False,
-            False,
-        ]
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            for _ in range(2):
-                with self.assertLogs(ConfigManager.logger, "WARNING"):
-                    config_manager.load_busco_config()
-            with patch("busco.ConfigManager.logger.warning") as mock_logger:
-                config_manager.load_busco_config()
-                self.assertFalse(mock_logger.called)
-
-    patch("busco.ConfigManager.logger.warning")
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
-    )
-    def test_config_updated_if_lineage_missing(self, mock_config_main, *args):
-        mock_config_main.return_value.check_lineage_present = lambda: False
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            config_manager.load_busco_config()
-        calls = [
-            call("busco_run", "auto-lineage", "True"),
-            call("busco_run", "lineage_dataset", test_dataset_path),
-        ]
-        mock_config_main.return_value.set.assert_has_calls(calls, any_order=True)
-
-    @patch("busco.ConfigManager.logger.warning")
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
-    )
-    def test_config_updated_if_lineage_present(self, mock_config_main, *args):
-        mock_config_main.return_value.check_lineage_present = lambda: True
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            config_manager.load_busco_config()
-        calls = [
-            call("busco_run", "auto-lineage", "False"),
-            call("busco_run", "auto-lineage-prok", "False"),
-            call("busco_run", "auto-lineage-euk", "False"),
-        ]
-        mock_config_main.return_value.set.assert_has_calls(calls, any_order=True)
-
-    @patch("busco.ConfigManager.logger.warning")
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
-    )
-    def test_update_dirname(self, mock_config_main, *args):
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        mock_config_main.return_value.get = lambda *args: test_dataset_path
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            config_manager.load_busco_config()
-        mock_config_main.return_value.set_results_dirname.assert_called_with(
-            test_dataset_path
-        )
-
-    @patch("busco.ConfigManager.logger.warning")
     @patch(
         "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
     )
-    def test_lineage_downloaded(self, mock_config_main, *args):
+    def test_config_main_configured(self, mock_config, *args):
         config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        mock_config_main.return_value.get = lambda *args: test_dataset_path
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            config_manager.load_busco_config()
-        mock_config_main.return_value.download_lineage_file.assert_called_with(
-            test_dataset_path
-        )
+        config_manager.load_busco_config_main()
+        mock_config.return_value.configure.assert_called()
 
-    @patch("busco.ConfigManager.logger.warning")
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
-    )
-    def test_lineage_dataset_config_loaded(self, mock_config_main, *args):
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        test_dataset_path = "/path/to/lineage_dataset"
-        with patch.object(
-            config_manager, "auto_select_lineage", lambda: test_dataset_path
-        ):
-            config_manager.load_busco_config()
-        mock_config_main.return_value.load_dataset_config.assert_called()
-
-    @patch("busco.ConfigManager.logger.warning")
+    @patch("__main__.ConfigManager_unittests.ConfigManager.logger", autospec=True)
     @patch(
         "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
     )
-    def test_run_auto_select_if_no_lineage(self, mock_config_main, *args):
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        mock_config_main.return_value.check_lineage_present = lambda: False
-        test_dataset_path = "/path/to/lineage_dataset"
-        with patch.object(
-            config_manager, "auto_select_lineage", return_value=test_dataset_path
-        ):
-            config_manager.load_busco_config()
-            config_manager.auto_select_lineage.assert_called()
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
-        autospec=True,
-    )
-    def test_auto_select_lineage_call_function_initializes_asl(self, mock_asl):
-        mock_asl.return_value.best_match_lineage_dataset = Mock()
-        mock_asl.return_value.selected_runner = Mock()
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        config_manager.auto_select_lineage()
-        mock_asl.assert_called()
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
-        autospec=True,
-    )
-    def test_auto_select_lineage_call_function_runs_asl(self, mock_asl):
-        mock_asl.return_value.best_match_lineage_dataset = Mock()
-        mock_asl.return_value.selected_runner = Mock()
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        config_manager.auto_select_lineage()
-        mock_asl.return_value.run_auto_selector.assert_called()
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
-        autospec=True,
-    )
-    def test_auto_select_lineage_call_function_gets_lineage_dataset(self, mock_asl):
-        mock_asl.return_value.best_match_lineage_dataset = Mock()
-        mock_asl.return_value.selected_runner = Mock()
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        config_manager.auto_select_lineage()
-        mock_asl.return_value.get_lineage_dataset.assert_called()
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
-        autospec=True,
-    )
-    def test_auto_select_lineage_call_function_returns_lineage_dataset(self, mock_asl):
-        config_manager = ConfigManager.BuscoConfigManager(self.params)
-        lineage_dataset = "best_match_dataset"
-        mock_asl.return_value.best_match_lineage_dataset = lineage_dataset
-        mock_asl.return_value.selected_runner = Mock()
-        retval = config_manager.auto_select_lineage()
-        self.assertEqual(retval, lineage_dataset)
-
-    @patch(
-        "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
-        autospec=True,
-    )
-    def test_auto_select_lineage_call_function_selects_runner(self, mock_asl):
+    def test_config_main_validated(self, mock_config, *args):
         config_manager = ConfigManager.BuscoConfigManager(self.params)
-        mock_asl.return_value.best_match_lineage_dataset = Mock()
-        mock_asl.return_value.selected_runner = "test"
-        config_manager.auto_select_lineage()
-        self.assertEqual("test", config_manager.runner)
+        config_manager.load_busco_config_main()
+        mock_config.return_value.validate.assert_called()
 
     def tearDown(self):
         pass


=====================================
tests/unittests/run_BUSCO_unittests.py
=====================================
@@ -2,6 +2,7 @@ import unittest
 from busco import run_BUSCO
 import sys
 import io
+from unittest.mock import Mock, patch, call
 
 
 class TestParams(unittest.TestCase):
@@ -97,7 +98,7 @@ class TestParams(unittest.TestCase):
         output_file = "output_file"
         mode = "mode"
         lineage_dataset = "lineage_dataset"
-        cpus = "cpus"
+        cpus = 10
         evalue = 0.1
 
         arg_values = {
@@ -153,7 +154,7 @@ class TestParams(unittest.TestCase):
         output_file = "output_file"
         mode = "mode"
         lineage_dataset = "lineage_dataset"
-        cpus = "cpus"
+        cpus = 10
         evalue = 0.1
         limit = 1
         augustus_parameters = "augustus_parameters"
@@ -235,5 +236,97 @@ class TestParams(unittest.TestCase):
         }
         self.assertDictEqual(params, correct_parse)
 
+    def test_command_line_cpu_type(self):
+        bad_args_cpu = ["cpus", "1.5", None]
+
+        for arg in bad_args_cpu:
+            sys.argv[1:] = ["--cpu", arg]
+            with self.assertRaises(SystemExit) as cm:
+                captured_output = io.StringIO()
+                sys.stderr = captured_output
+                try:
+                    run_BUSCO._parse_args()
+                finally:
+                    sys.stderr = sys.__stderr__
+            self.assertEqual(cm.exception.code, 2)
+
+    def test_command_line_evalue_type(self):
+        bad_args_evalue = ["evalue", None]
+
+        for arg in bad_args_evalue:
+            sys.argv[1:] = ["--evalue", arg]
+            with self.assertRaises(SystemExit) as cm:
+                captured_output = io.StringIO()
+                sys.stderr = captured_output
+                try:
+                    run_BUSCO._parse_args()
+                finally:
+                    sys.stderr = sys.__stderr__
+            self.assertEqual(cm.exception.code, 2)
+
+    def test_command_line_limit_type(self):
+        bad_args_limit = ["limit", "1.5", None]
+        for arg in bad_args_limit:
+            sys.argv[1:] = ["--limit", arg]
+            with self.assertRaises(SystemExit) as cm:
+                captured_output = io.StringIO()
+                sys.stderr = captured_output
+                try:
+                    run_BUSCO._parse_args()
+                finally:
+                    sys.stderr = sys.__stderr__
+            self.assertEqual(cm.exception.code, 2)
+
     def tearDown(self):
         sys.argv = [sys.argv[0]]
+
+
+class TestMaster(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+        self.params = {}
+        pass
+
+    @patch("busco.run_BUSCO.logger.info")
+    def test_log_warning_if_neither_lineage_nor_autolineage_specified(self, *args):
+        bm = run_BUSCO.BuscoMaster(self.params)
+        bm.config = Mock()
+        bm.config.check_lineage_present.return_value = False
+        bm.config.getboolean.side_effect = [False, False, False]
+        with self.assertLogs(run_BUSCO.logger, "WARNING"):
+            bm.harmonize_auto_lineage_settings()
+
+    @patch("busco.run_BUSCO.logger.info")
+    def test_config_updated_if_no_lineage(self, *args):
+        bm = run_BUSCO.BuscoMaster(self.params)
+        bm.config = Mock()
+        bm.config.check_lineage_present.return_value = False
+        calls = [call("busco_run", "auto-lineage", "True")]
+        bm.harmonize_auto_lineage_settings()
+        bm.config.set.assert_has_calls(calls, any_order=True)
+
+    @patch("busco.run_BUSCO.logger.info")
+    def test_config_updated_if_lineage_present(self, *args):
+        bm = run_BUSCO.BuscoMaster(self.params)
+        bm.config = Mock()
+        bm.config.check_lineage_present.return_value = True
+        bm.config.getboolean.side_effect = [False, False, False]
+        calls = [
+            call("busco_run", "auto-lineage", "False"),
+            call("busco_run", "auto-lineage-prok", "False"),
+            call("busco_run", "auto-lineage-euk", "False"),
+        ]
+        bm.harmonize_auto_lineage_settings()
+        bm.config.set.assert_has_calls(calls, any_order=True)
+
+    @patch("busco.run_BUSCO.logger.info")
+    def test_log_warning_if_both_lineage_and_autolineage_specified(self, *args):
+        bm = run_BUSCO.BuscoMaster(self.params)
+        bm.config = Mock()
+        bm.config.check_lineage_present.return_value = True
+        bm.config.getboolean.return_value = True
+        with self.assertLogs(run_BUSCO.logger, "WARNING"):
+            bm.harmonize_auto_lineage_settings()
+
+    def tearDown(self):
+        pass



View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/eb1970a6ab1bc65bdac8324e311e4d1232e8c9cc

-- 
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/eb1970a6ab1bc65bdac8324e311e4d1232e8c9cc
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210420/c862acf3/attachment-0001.htm>


More information about the debian-med-commit mailing list