[med-svn] [Git][med-team/busco][upstream] New upstream version 5.1.2
Nilesh Patra
gitlab at salsa.debian.org
Tue Apr 20 12:32:43 BST 2021
Nilesh Patra pushed to branch upstream at Debian Med / busco
Commits:
eb1970a6 by Nilesh Patra at 2021-04-20T16:59:41+05:30
New upstream version 5.1.2
- - - - -
29 changed files:
- CHANGELOG
- README.md
- src/busco/Actions.py
- src/busco/AutoLineage.py
- src/busco/BuscoConfig.py
- src/busco/BuscoDownloadManager.py
- src/busco/BuscoLogger.py
- src/busco/BuscoPlacer.py
- src/busco/BuscoRunner.py
- src/busco/ConfigManager.py
- + src/busco/Exceptions.py
- src/busco/_version.py
- src/busco/analysis/Analysis.py
- src/busco/analysis/BuscoAnalysis.py
- src/busco/analysis/GenomeAnalysis.py
- src/busco/busco_tools/Toolset.py
- src/busco/busco_tools/augustus.py
- src/busco/busco_tools/base.py
- src/busco/busco_tools/blast.py
- src/busco/busco_tools/hmmer.py
- src/busco/busco_tools/metaeuk.py
- src/busco/busco_tools/prodigal.py
- src/busco/run_BUSCO.py
- tests/unittest_runner.py
- tests/unittests/AutoLineage_unittests.py
- tests/unittests/BuscoConfig_unittests.py
- + tests/unittests/BuscoRunner_unittests.py
- tests/unittests/ConfigManager_unittests.py
- tests/unittests/run_BUSCO_unittests.py
Changes:
=====================================
CHANGELOG
=====================================
@@ -1,3 +1,18 @@
+5.1.2
+- Fix bug in batch mode that affects Augustus runs
+
+5.1.1
+- Fix bug in batch mode
+
+5.1.0
+- Implement batch mode
+- Issue #221 fixed
+- Issue #328 fixed
+- Issue #360 fixed
+- Issue #388 fixed
+- Issue #406 fixed
+- Issue #415 fixed
+
5.0.0
- Implement metaeuk exon overlap filter
- Use metaeuk for eukaryote transcriptomes
@@ -18,14 +33,20 @@
- Issue #373 fixed
- Issue #385 fixed
-
-
5.beta.1
- Provide option to run augustus instead of metaeuk
5.beta
- Replace Augustus with Metaeuk
+4.1.4
+- Fix Augustus parsing bug
+
+4.1.3
+- Issue #296 fixed
+- Issue #305 fixed
+- Augustus parser improved
+
4.1.2
- Issue #295 fixed
=====================================
README.md
=====================================
@@ -5,6 +5,7 @@ For full documentation please consult the user guide: https://busco.ezlab.org/bu
Main changes in v5:
- Metaeuk is used as default gene predictor for eukaryote pipeline. Augustus is maintained and can be used optionally instead of Metaeuk.
+- Introduction of batch mode: input argument can be a folder containing input files
- The folder structure has changed, so if doing a manual installation, make sure to completely remove any previous versions of BUSCO before installing v5.
***
=====================================
src/busco/Actions.py
=====================================
@@ -18,7 +18,7 @@ class ListLineagesAction(argparse.Action):
self.config_manager = BuscoConfigManager({})
except SystemExit as se:
type(self).logger.error(se)
- raise SystemExit()
+ raise SystemExit(1)
self.config = PseudoConfig(self.config_manager.config_file)
try:
=====================================
src/busco/AutoLineage.py
=====================================
@@ -1,15 +1,15 @@
import os
-from busco.BuscoConfig import BuscoConfigAuto
from busco.BuscoPlacer import BuscoPlacer
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
-from busco.BuscoRunner import BuscoRunner
+from busco.BuscoRunner import AnalysisRunner
+from busco.Exceptions import BuscoError
import numpy as np
logger = BuscoLogger.get_logger(__name__)
-class NoGenesError(SystemExit):
+class NoGenesError(BuscoError):
def __init__(self):
super().__init__("No genes were recognized by BUSCO. Please check the content of your input file.")
@@ -34,8 +34,9 @@ class AutoSelectLineage:
"Because of overlapping markers/spurious matches among domains, busco matches in another domain do not "
"necessarily mean that your genome/proteome contains sequences from this domain. "
"However, a high busco score in multiple domains might help you identify possible contaminations.", logger)
- def __init__(self, config):
- self.config = config
+ def __init__(self, config_manager):
+ self.config_manager = config_manager
+ self.config = self.config_manager.config_main
if self.config.getboolean("busco_run", "auto-lineage-prok"):
self.all_lineages = ["archaea", "bacteria"]
elif self.config.getboolean("busco_run", "auto-lineage-euk"):
@@ -71,6 +72,10 @@ class AutoSelectLineage:
self.f_percents.append(f_percent)
return
+ @classmethod
+ def reset(cls):
+ cls.runners = []
+
@log("Running auto selector", logger, debug=True)
def run_auto_selector(self):
"""
@@ -91,8 +96,6 @@ class AutoSelectLineage:
logger.info("{} selected\n".format(os.path.basename(self.best_match_lineage_dataset)))
self.config.set("busco_run", "domain_run_name", os.path.basename(self.best_match_lineage_dataset))
- BuscoRunner.final_results.append(self.selected_runner.analysis.hmmer_runner.hmmer_results_lines)
- BuscoRunner.results_datasets.append(os.path.basename(self.best_match_lineage_dataset))
return
def virus_check(self):
@@ -114,8 +117,8 @@ class AutoSelectLineage:
root_runners = []
for l in lineages_list:
self.current_lineage = "{}_{}".format(l, self.dataset_version)
- autoconfig = BuscoConfigAuto(self.config, self.current_lineage)
- busco_run = BuscoRunner(autoconfig)
+ autoconfig = self.config_manager.load_busco_config_auto(self.current_lineage)
+ busco_run = AnalysisRunner(autoconfig)
busco_run.run_analysis(callback=self.callback)
root_runners.append(busco_run)
type(self).runners.append(busco_run) # Save all root runs so they can be recalled if chosen
@@ -203,8 +206,7 @@ class AutoSelectLineage:
else:
logger.info("Mollicutes dataset is a better match for your data. Testing subclades...")
self._run_3_datasets(self.selected_runner)
- BuscoRunner.final_results.append(self.selected_runner.analysis.hmmer_runner.hmmer_results_lines)
- BuscoRunner.results_datasets.append(os.path.basename(self.best_match_lineage_dataset))
+
elif ("geno" in self.selected_runner.mode
and self.selected_runner.analysis.prodigal_runner.current_gc == "4"
and os.path.basename(
@@ -212,14 +214,15 @@ class AutoSelectLineage:
logger.info("The results from the Prodigal gene predictor indicate that your data belongs to the "
"mollicutes clade. Testing subclades...")
self._run_3_datasets()
- BuscoRunner.final_results.append(self.selected_runner.analysis.hmmer_runner.hmmer_results_lines)
- BuscoRunner.results_datasets.append(os.path.basename(self.best_match_lineage_dataset))
elif self.selected_runner.domain == "viruses":
pass
else:
self.run_busco_placer()
return
+ def set_best_match_lineage(self):
+ AnalysisRunner.selected_dataset = os.path.basename(self.best_match_lineage_dataset)
+
def check_mollicutes(self):
runners = self.run_lineages_list(["mollicutes"])
runners.append(self.selected_runner)
@@ -252,9 +255,7 @@ class AutoSelectLineage:
self.config.placement_files = placement_file_versions # Necessary to pass these filenames to the final run to be recorded.
lineage, supporting_markers, placed_markers = dataset_details
lineage = "{}_{}".format(lineage, self.config.get("busco_run", "datasets_version")) # todo: this should probably be done in buscoplacer
- self.best_match_lineage_dataset = os.path.join(self.config.get("busco_run", "download_path"),
- "lineages",
- os.path.basename(lineage))
+ self.best_match_lineage_dataset = lineage # basename
return
def _run_3_datasets(self, mollicutes_runner=None):
=====================================
src/busco/BuscoConfig.py
=====================================
@@ -4,6 +4,7 @@ from configparser import ParsingError
from configparser import DuplicateOptionError
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BatchFatalError
from abc import ABCMeta, abstractmethod
from busco.BuscoDownloadManager import BuscoDownloadManager
import os
@@ -34,6 +35,7 @@ class BaseConfig(ConfigParser):
"limit": 3,
"use_augustus": False,
"long": False,
+ "batch_mode": False,
}
DEPENDENCY_SECTIONS = {
@@ -70,13 +72,15 @@ class BaseConfig(ConfigParser):
with open(self.conf_file) as cfg_file:
self.read_file(cfg_file)
except IOError:
- raise SystemExit("Config file {} cannot be found".format(self.conf_file))
+ raise BatchFatalError(
+ "Config file {} cannot be found".format(self.conf_file)
+ )
except ParsingError:
- raise SystemExit(
+ raise BatchFatalError(
"Unable to parse the contents of config file {}".format(self.conf_file)
)
except DuplicateOptionError:
- raise SystemExit(
+ raise BatchFatalError(
"Duplicated entry in config file {}. Unable to load configuration.".format(
self.conf_file
)
@@ -148,9 +152,8 @@ class PseudoConfig(BaseConfig):
"you have set 'update-data=True' in your config file."
)
else:
- raise SystemExit(
- "Unable to download list of datasets. Please make sure you have set "
- "update-data=True in your config file."
+ raise BatchFatalError(
+ "Unable to download list of datasets. Please use the --update-data option to use the latest available datasets."
)
except NoOptionError:
@@ -224,6 +227,13 @@ class BuscoConfig(ConfigParser, metaclass=ABCMeta):
self.set("busco_run", "lineage_dataset", local_lineage_filepath)
return
+ def load_dataset(self, lineage):
+ self.set_results_dirname(lineage) # function always only uses basename
+ self.download_lineage_file(
+ lineage
+ ) # full path will return, base name will attempt download
+ self.load_dataset_config()
+
def load_dataset_config(self):
"""
Load BUSCO dataset config file.
@@ -289,11 +299,8 @@ class BuscoConfigAuto(BuscoConfig):
def __init__(self, config, lineage, **kwargs):
super().__init__(**kwargs)
self._propagate_config(config)
- self.set_results_dirname(lineage)
-
+ self.load_dataset(lineage)
self._create_required_paths()
- self.download_lineage_file(lineage)
- self.load_dataset_config()
def _create_required_paths(self):
"""
@@ -348,6 +355,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
"evalue",
"limit",
"use_augustus",
+ "batch_mode",
]
def __init__(self, conf_file, params, **kwargs):
@@ -361,6 +369,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
self.conf_file = conf_file
self.params = params
self.main_out = None
+ self._input_filepath = None
def configure(self):
if self.conf_file != "local environment":
@@ -378,11 +387,12 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
def validate(self):
self._check_mandatory_keys_exist()
+ self._cleanup_config()
self._check_no_previous_run()
self._check_allowed_keys()
self._create_required_paths()
- self._cleanup_config()
self._check_required_input_exists()
+ self._check_batch_mode()
self._init_downloader()
@@ -418,16 +428,25 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
datasets_version = self.get("busco_run", "datasets_version")
if datasets_version != "odb10":
- raise SystemExit(
- "BUSCO v4 only works with datasets from OrthoDB v10 (with the suffix '_odb10'). "
+ raise BatchFatalError(
+ "BUSCO v5 only works with datasets from OrthoDB v10 (with the suffix '_odb10'). "
"For a full list of available datasets, enter 'busco --list-datasets'. "
- "You can also run BUSCO using auto-lineage, to allow BUSCO to automatically select "
+ "You can also run BUSCO using --auto-lineage, to allow BUSCO to automatically select "
"the best dataset for your input data."
)
return True
- except NoOptionError: # todo: need a helpful error message if datasets_version is not set but lineage_dataset is.
+ except NoOptionError:
return False
+ def _check_batch_mode(self):
+ if os.path.isdir(self._input_filepath):
+ self.set("busco_run", "batch_mode", "True")
+ elif not os.path.isfile(self._input_filepath):
+ raise BatchFatalError(
+ "Unrecognized input type. Please use either a single file or a directory name (for batch mode)"
+ )
+ return
+
def _check_evalue(self):
"""
Warn the user if the config contains a non-standard e-value cutoff.
@@ -436,20 +455,18 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
if (
self.getfloat("busco_run", "evalue")
!= type(self).DEFAULT_ARGS_VALUES["evalue"]
- ): # todo: introduce systemexit if not float
+ ):
logger.warning("You are using a custom e-value cutoff")
return
def _check_limit_value(self):
"""
- Check the value of limit. Ensure it is between 1 and 20, otherwise raise SystemExit.
+ Check the value of limit. Ensure it is between 1 and 20, otherwise raise BatchFatalError.
:return:
"""
- limit_val = self.getint(
- "busco_run", "limit"
- ) # todo: introduce systemexit if not int.
+ limit_val = self.getint("busco_run", "limit")
if limit_val <= 0 or limit_val > 20:
- raise SystemExit(
+ raise BatchFatalError(
"Limit must be an integer between 1 and 20 (you have used: {}). Note that this parameter "
"is not needed by the protein mode.".format(limit_val)
)
@@ -502,7 +519,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
+ synonyms["transcriptome"]
+ synonyms["proteins"]
):
- raise SystemExit(
+ raise BatchFatalError(
"Unknown mode {}.\n'Mode' parameter must be one of "
"['genome', 'transcriptome', 'proteins']".format(value)
)
@@ -517,12 +534,12 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
if param == "out":
if value == "":
- raise SystemExit(
+ raise BatchFatalError(
"Please specify an output name for the BUSCO run. "
"This can be done using the -o flag or in the config file"
)
except NoOptionError:
- raise SystemExit(
+ raise BatchFatalError(
'The parameter "{} (--{})" was not provided. '
"Please add it in the config file or provide it "
"through the command line".format(param, param)
@@ -542,7 +559,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
try:
for option in self.options(section_name):
if option not in options:
- raise SystemExit(
+ raise BatchFatalError(
"Unrecognized option '{}' in config file".format(option)
)
except NoSectionError:
@@ -555,19 +572,12 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
:return:
"""
if "/" in self.get("busco_run", "out"):
- raise SystemExit(
+ raise BatchFatalError(
"Please do not provide a full path in --out parameter, no slash. "
"Use out_path in the config.ini file to specify the full path."
)
return
- @log(
- "Input file is {}",
- logger,
- attr_name="_input_filepath",
- on_func_exit=True,
- log_once=True,
- )
def _check_required_input_exists(self):
"""
Test for existence of input file.
@@ -575,8 +585,8 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
"""
self._input_filepath = self.get("busco_run", "in")
if not os.path.exists(self._input_filepath):
- raise SystemExit(
- "Input file {} does not exist".format(self._input_filepath)
+ raise BatchFatalError(
+ "Input {} does not exist".format(self._input_filepath)
)
return
@@ -594,7 +604,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
)
)
else:
- raise SystemExit(
+ raise BatchFatalError(
"A run with the name {} already exists...\n"
"\tIf you are sure you wish to overwrite existing files, "
"please use the -f (force) option".format(self.main_out)
@@ -650,7 +660,7 @@ class BuscoConfigMain(BuscoConfig, BaseConfig):
if item[0].endswith("_path") or item[0] == "path" or item[0] == "in":
if item[1].startswith("~"):
self.set(key, item[0], os.path.expanduser(item[1]))
- elif item[1].startswith(".") or (item[1] and "/" not in item[1]):
+ elif item[1]:
self.set(key, item[0], os.path.abspath(item[1]))
return
=====================================
src/busco/BuscoDownloadManager.py
=====================================
@@ -22,6 +22,7 @@ import gzip
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BatchFatalError, BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -72,7 +73,14 @@ class BuscoDownloadManager:
"Unable to verify BUSCO datasets because of offline mode"
)
else:
- raise SystemExit(e)
+ raise BatchFatalError(e)
+ except FileNotFoundError:
+ logger.warning(
+ "Unable to find file_versions.tsv. This may be due to an internet access problem. "
+ "Attempting to continue in offline mode."
+ )
+ self.offline = True
+
return
@log("Downloading information on latest versions of BUSCO data...", logger)
@@ -82,7 +90,7 @@ class BuscoDownloadManager:
try:
urllib.request.urlretrieve(remote_filepath, local_filepath)
except URLError:
- SystemExit("Cannot reach {}".format(remote_filepath))
+ raise BatchFatalError("Cannot reach {}".format(remote_filepath))
return local_filepath
def _create_category_dir(self, category):
@@ -103,7 +111,7 @@ class BuscoDownloadManager:
dataset_date = line[1]
break
if not dataset_date:
- raise SystemExit(
+ raise BuscoError(
"Creation date could not be extracted from dataset.cfg file."
)
return dataset_date
@@ -112,7 +120,7 @@ class BuscoDownloadManager:
try:
latest_update = type(self).version_files[data_basename][0]
except KeyError:
- raise SystemExit(
+ raise BuscoError(
"{} is not a valid option for '{}'".format(data_basename, category)
)
path_basename, extension = os.path.splitext(data_basename)
@@ -154,7 +162,7 @@ class BuscoDownloadManager:
logger.info("Using local {} directory {}".format(category, data_name))
return data_name
elif "/" in data_name:
- raise SystemExit("{} does not exist".format(data_name))
+ raise BuscoError("{} does not exist".format(data_name))
if self.offline:
if category == "lineages":
local_dataset = os.path.join(
@@ -163,7 +171,7 @@ class BuscoDownloadManager:
if os.path.exists(local_dataset):
return local_dataset
else:
- raise SystemExit(
+ raise BuscoError(
"Unable to run BUSCO in offline mode. Dataset {} does not "
"exist.".format(local_dataset)
)
@@ -180,9 +188,8 @@ class BuscoDownloadManager:
)
if len(placement_files) > 0:
return placement_files[-1]
- # todo: for offline mode, log which files are being used (in case of more than one glob match)
else:
- raise SystemExit(
+ raise BuscoError(
"Unable to run BUSCO placer in offline mode. Cannot find necessary placement "
"files in {}".format(self.local_download_path)
)
@@ -263,7 +270,7 @@ class BuscoDownloadManager:
)
logger.info("deleting corrupted file {}".format(local_filepath))
os.remove(local_filepath)
- raise SystemExit(
+ raise BuscoError(
"BUSCO was unable to download or update all necessary files"
)
else:
@@ -290,14 +297,24 @@ class BuscoDownloadManager:
with open(unzipped_filename, "wb") as decompressed_file:
for line in compressed_file:
decompressed_file.write(line)
- os.remove(local_filepath)
+ try:
+ os.remove(local_filepath)
+ except FileNotFoundError:
+ logger.warning(
+ "Downloaded gzip file was removed before this BUSCO run could remove it."
+ )
local_filepath = unzipped_filename
if os.path.splitext(local_filepath)[1] == ".tar":
untarred_filename = local_filepath.replace(".tar", "")
with tarfile.open(local_filepath) as tar_file:
tar_file.extractall(os.path.dirname(local_filepath))
- os.remove(local_filepath)
+ try:
+ os.remove(local_filepath)
+ except FileNotFoundError:
+ logger.warning(
+ "Downloaded tarball was removed before this BUSCO run could remove it."
+ )
local_filepath = untarred_filename
return local_filepath
=====================================
src/busco/BuscoLogger.py
=====================================
@@ -22,6 +22,7 @@ import select
import time
import threading
import random
+from busco.Exceptions import BatchFatalError, BuscoError
from configparser import NoOptionError
from configparser import NoSectionError
@@ -64,7 +65,7 @@ class LogDecorator:
self.format_string(*args)
self.retval = func(*args, **kwargs)
return self.retval
- except SystemExit:
+ except (BuscoError, BatchFatalError):
raise
return wrapped_func
@@ -281,7 +282,7 @@ class BuscoLogger(logging.getLoggerClass()):
if e.errno == 13
else "IO error({0}): {1}".format(e.errno, e.strerror)
)
- raise SystemExit(errStr)
+ raise BatchFatalError(errStr)
self._file_hdlr.setLevel(logging.DEBUG)
self._file_hdlr.setFormatter(self._verbose_formatter)
=====================================
src/busco/BuscoPlacer.py
=====================================
@@ -16,6 +16,7 @@ import json
import os
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BuscoError
from Bio import SeqIO
from busco.busco_tools.sepp import SEPPRunner
@@ -176,7 +177,7 @@ class BuscoPlacer:
tree = data["tree"]
placements = data["placements"]
except FileNotFoundError:
- raise SystemExit(
+ raise BuscoError(
"Placements failed. Try to rerun increasing the memory or select a lineage manually."
)
=====================================
src/busco/BuscoRunner.py
=====================================
@@ -1,3 +1,4 @@
+from busco.Exceptions import BatchFatalError, BuscoError
from busco.analysis.BuscoAnalysis import BuscoAnalysis
from busco.analysis.GenomeAnalysis import (
GenomeAnalysisEukaryotesAugustus,
@@ -12,14 +13,219 @@ from busco.analysis.GenomeAnalysis import GenomeAnalysisProkaryotes
from busco.BuscoLogger import BuscoLogger
from busco.BuscoConfig import BuscoConfigMain
from busco.busco_tools.base import NoGenesError, BaseRunner
+from busco.BuscoLogger import LogDecorator as log
from configparser import NoOptionError
+from busco.busco_tools.Toolset import ToolException
import os
+import sys
import shutil
+import time
+import traceback
+from collections import defaultdict
+
logger = BuscoLogger.get_logger(__name__)
-class BuscoRunner:
+class SingleRunner:
+ def __init__(self, config_manager):
+ self.start_time = time.time()
+ self.config_manager = config_manager
+ self.config = self.config_manager.config_main
+ self.input_file = self.config.get("busco_run", "in")
+ self.lineage_dataset = None
+ self.runner = None
+
+ def get_lineage(self):
+ if self.config.getboolean("busco_run", "auto-lineage"):
+ lineage_dataset_fullpath, runner = self.auto_select_lineage() # full path
+ if not runner.config.get("busco_run", "domain") == "viruses":
+ self.config.set(
+ "busco_run",
+ "parent_dataset",
+ runner.config.get("busco_run", "lineage_dataset"),
+ )
+ self.config.set("busco_run", "lineage_dataset", lineage_dataset_fullpath)
+ self.runner = runner
+
+ self.lineage_dataset = self.config.get(
+ "busco_run", "lineage_dataset"
+ ) # full path
+ return
+
+ @log("No lineage specified. Running lineage auto selector.\n", logger)
+ def auto_select_lineage(self):
+ from busco.AutoLineage import (
+ AutoSelectLineage,
+ ) # Import statement inside method to avoid circular imports
+
+ asl = AutoSelectLineage(self.config_manager)
+ asl.run_auto_selector()
+ asl.get_lineage_dataset()
+ asl.set_best_match_lineage()
+ lineage_dataset = asl.best_match_lineage_dataset
+ runner = asl.selected_runner
+ asl.reset()
+ return lineage_dataset, runner
+
+ @staticmethod
+ def log_error(err):
+ logger.error(err)
+ logger.debug(err, exc_info=True)
+ logger.error("BUSCO analysis failed !")
+ logger.error(
+ "Check the logs, read the user guide (https://busco.ezlab.org/busco_userguide.html), "
+ "and check the BUSCO issue board on https://gitlab.com/ezlab/busco/issues\n"
+ )
+
+ @log("Input file is {}", logger, attr_name="input_file")
+ def run(self):
+ try:
+ self.get_lineage()
+ self.config.load_dataset(self.lineage_dataset)
+
+ lineage_basename = os.path.basename(
+ self.config.get("busco_run", "lineage_dataset")
+ )
+ main_out_folder = self.config.get("busco_run", "main_out")
+ lineage_results_folder = os.path.join(
+ main_out_folder,
+ "auto_lineage",
+ self.config.get("busco_run", "lineage_results_dir"),
+ )
+
+ if not (
+ self.config.getboolean("busco_run", "auto-lineage")
+ and (
+ lineage_basename.startswith(("bacteria", "archaea", "eukaryota"))
+ or (
+ lineage_basename.startswith(
+ ("mollicutes", "mycoplasmatales", "entomoplasmatales")
+ )
+ and os.path.exists(lineage_results_folder)
+ )
+ )
+ ):
+ # It is possible that the lineages ("mollicutes", "mycoplasmatales", "entomoplasmatales") were arrived at either by the Prodigal genetic code shortcut
+ # or by BuscoPlacer. If the former, the run will have already been completed. If the latter it still needs
+ # to be done.
+
+ self.runner = AnalysisRunner(self.config)
+
+ if os.path.exists(lineage_results_folder):
+ new_dest = os.path.join(
+ main_out_folder, self.config.get("busco_run", "lineage_results_dir")
+ )
+ if not os.path.exists(new_dest):
+ os.symlink(lineage_results_folder, new_dest)
+ else:
+ self.runner.run_analysis()
+ AnalysisRunner.selected_dataset = lineage_basename
+ self.runner.finish(time.time() - self.start_time)
+
+ except BuscoError as e:
+ self.log_error(e)
+ raise
+
+ except ToolException as e:
+ logger.error(e)
+ raise BatchFatalError(e)
+
+ except KeyboardInterrupt:
+ logger.exception(
+ "A signal was sent to kill the process. \nBUSCO analysis failed !"
+ )
+ raise BatchFatalError
+
+ except BaseException:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ logger.critical(
+ "Unhandled exception occurred:\n{}\n".format(
+ "".join(
+ traceback.format_exception(exc_type, exc_value, exc_traceback)
+ )
+ )
+ )
+ raise BatchFatalError
+
+
+class BatchRunner:
+
+ batch_results = []
+
+ @log(
+ "Running in batch mode. {} input files found in {}",
+ logger,
+ attr_name=["num_inputs", "input_dir"],
+ on_func_exit=True,
+ )
+ def __init__(self, config_manager):
+ self.config_manager = config_manager
+ self.config = self.config_manager.config_main
+ self.input_dir = self.config.get("busco_run", "in")
+ self.input_files = [
+ os.path.join(self.input_dir, f) for f in os.listdir(self.input_dir)
+ ]
+ self.num_inputs = len(self.input_files)
+
+ def run(self):
+ for i, input_file in enumerate(self.input_files):
+ try:
+ input_id = os.path.basename(input_file)
+ self.config.set("busco_run", "in", input_file)
+ self.config.set(
+ "busco_run",
+ "main_out",
+ os.path.join(
+ self.config.get("busco_run", "out_path"),
+ self.config.get("busco_run", "out"),
+ input_id,
+ ),
+ )
+ single_run = SingleRunner(self.config_manager)
+ single_run.run()
+
+ run_summary = single_run.runner.format_run_summary()
+ type(self).batch_results.append(run_summary)
+
+ AnalysisRunner.reset()
+
+ except NoOptionError as noe:
+ raise BatchFatalError(noe)
+
+ except BatchFatalError:
+ raise
+
+ except BuscoError:
+ continue
+
+ try:
+ self.write_batch_summary()
+ except NoOptionError as noe:
+ raise BatchFatalError(noe)
+
+ def write_batch_summary(self):
+ summary_file = os.path.join(
+ self.config.get("busco_run", "out_path"),
+ self.config.get("busco_run", "out"),
+ "batch_summary.txt",
+ )
+ # for inp, res in type(self).batch_results.items():
+ # if
+ with open(summary_file, "w") as f:
+ if self.config.getboolean("busco_run", "auto-lineage"):
+ f.write(
+ "Input_file\tDataset\tComplete\tSingle\tDuplicated\tFragmented\tMissing\tn_markers\tScores_archaea_odb10\tScores_bacteria_odb10\tScores_eukaryota_odb10\n"
+ )
+ else:
+ f.write(
+ "Input_file\tDataset\tComplete\tSingle\tDuplicated\tFragmented\tMissing\tn_markers\n"
+ )
+ for line in type(self).batch_results:
+ f.write(line)
+
+
+class AnalysisRunner:
mode_dict = {
"euk_genome_met": GenomeAnalysisEukaryotesMetaeuk,
@@ -30,8 +236,10 @@ class BuscoRunner:
"proteins": GeneSetAnalysis,
}
- final_results = []
+ final_results = {}
results_datasets = []
+ all_results = defaultdict(dict)
+ selected_dataset = None
def __init__(self, config):
@@ -51,7 +259,7 @@ class BuscoRunner:
else:
self.mode = "euk_genome_met"
else:
- raise SystemExit("Unrecognized mode {}".format(self.mode))
+ raise BatchFatalError("Unrecognized mode {}".format(self.mode))
elif self.mode == "transcriptome":
if self.domain == "prokaryota":
@@ -59,7 +267,7 @@ class BuscoRunner:
elif self.domain == "eukaryota":
self.mode = "euk_tran"
else:
- raise SystemExit("Unrecognized mode {}".format(self.mode))
+ raise BatchFatalError("Unrecognized mode {}".format(self.mode))
analysis_type = type(self).mode_dict[self.mode]
self.analysis = analysis_type()
self.prok_fail_count = (
@@ -67,6 +275,50 @@ class BuscoRunner:
)
self.cleaned_up = False
+ @classmethod
+ def reset(cls):
+ cls.final_results = {}
+ cls.results_datasets = []
+ cls.all_results = defaultdict(dict)
+
+ def save_results(self):
+ lineage_basename = os.path.basename(
+ self.config.get("busco_run", "lineage_dataset")
+ )
+ if not self.config.getboolean("busco_run", "auto-lineage"):
+ type(self).selected_dataset = lineage_basename
+ self.final_results[
+ lineage_basename
+ ] = self.analysis.hmmer_runner.hmmer_results_lines
+ self.all_results[lineage_basename][
+ "one_line_summary"
+ ] = self.analysis.hmmer_runner.one_line_summary_raw
+ self.all_results[lineage_basename][
+ "Complete"
+ ] = self.analysis.hmmer_runner.complete_percent
+ self.all_results[lineage_basename][
+ "Single copy"
+ ] = self.analysis.hmmer_runner.s_percent
+ self.all_results[lineage_basename][
+ "Multi copy"
+ ] = self.analysis.hmmer_runner.d_percent
+ self.all_results[lineage_basename][
+ "Fragmented"
+ ] = self.analysis.hmmer_runner.f_percent
+ self.all_results[lineage_basename][
+ "Missing"
+ ] = self.analysis.hmmer_runner.missing_percent
+ self.all_results[lineage_basename][
+ "n_markers"
+ ] = self.analysis.hmmer_runner.total_buscos
+ self.all_results[lineage_basename]["domain"] = self.analysis.domain
+ try:
+ parent_dataset = self.config.get("busco_run", "parent_dataset")
+ self.all_results[lineage_basename]["parent_dataset"] = parent_dataset
+ except NoOptionError:
+ pass
+ self.results_datasets.append(os.path.basename(lineage_basename))
+
def run_analysis(self, callback=(lambda *args: None)):
try:
self.analysis.run_analysis()
@@ -83,7 +335,7 @@ class BuscoRunner:
"{0} did not recognize any genes matching the dataset {1} in the input file. "
"If this is unexpected, check your input file and your "
"installation of {0}\n".format(
- nge.gene_predictor, self.analysis._lineage_name
+ nge.gene_predictor, self.analysis.lineage_name
)
)
fatal = (
@@ -99,17 +351,19 @@ class BuscoRunner:
and self.prok_fail_count == 1
)
if fatal:
- raise SystemExit(no_genes_msg)
+ raise BuscoError(no_genes_msg)
else:
logger.warning(no_genes_msg)
s_buscos = d_buscos = f_buscos = s_percent = d_percent = f_percent = 0.0
if self.mode == "prok_genome":
self.prok_fail_count += 1
- except SystemExit as se:
+ except (BuscoError, BatchFatalError):
self.cleanup()
+ raise
- raise se
+ finally:
+ self.save_results()
return callback(s_buscos, d_buscos, f_buscos, s_percent, d_percent, f_percent)
def cleanup(self):
@@ -118,23 +372,33 @@ class BuscoRunner:
def format_results(self):
framed_output = []
- if len(type(self).results_datasets) == 1:
- header1 = "Results from dataset {}\n".format(type(self).results_datasets[0])
+ final_dataset = type(self).selected_dataset
+ domain = type(self).all_results[final_dataset]["domain"]
+ try:
+ parent_dataset = os.path.basename(
+ type(self).all_results[final_dataset]["parent_dataset"]
+ )
+ except KeyError:
+ parent_dataset = None
+ if parent_dataset is None or domain == "viruses":
+ header1 = "Results from dataset {}\n".format(final_dataset)
+ final_output_results1 = "".join(
+ self._check_parasitic(type(self).final_results[final_dataset][1:])
+ )
else:
- header1 = "Results from generic domain {}\n".format(
- type(self).results_datasets[0]
+ header1 = "Results from generic domain {}\n".format(parent_dataset)
+ final_output_results1 = "".join(
+ self._check_parasitic(type(self).final_results[parent_dataset][1:])
)
- final_output_results1 = "".join(
- self._check_parasitic(type(self).final_results[0][1:])
- )
+
sb1 = SmartBox()
framed_lines1 = sb1.create_results_box(header1, final_output_results1)
framed_output.append(framed_lines1)
- if len(type(self).final_results) == 2:
- header2 = "Results from dataset {}\n".format(type(self).results_datasets[1])
+ if domain != "viruses" and parent_dataset and parent_dataset != final_dataset:
+ header2 = "Results from dataset {}\n".format(final_dataset)
final_output_results2 = "".join(
- self._check_parasitic(type(self).final_results[1][1:])
+ self._check_parasitic(type(self).final_results[final_dataset][1:])
)
sb2 = SmartBox()
framed_lines2 = sb2.create_results_box(header2, final_output_results2)
@@ -142,10 +406,60 @@ class BuscoRunner:
return "".join(framed_output)
+ def format_run_summary(self):
+ input_file = os.path.basename(self.config.get("busco_run", "in"))
+ dataset = type(self).selected_dataset
+ complete = type(self).all_results[dataset]["Complete"]
+ single = type(self).all_results[dataset]["Single copy"]
+ duplicated = type(self).all_results[dataset]["Multi copy"]
+ fragmented = type(self).all_results[dataset]["Fragmented"]
+ missing = type(self).all_results[dataset]["Missing"]
+ n_markers = type(self).all_results[dataset]["n_markers"]
+
+ if self.config.getboolean("busco_run", "auto-lineage"):
+ try:
+ scores_arch = (
+ type(self).all_results["archaea_odb10"]["one_line_summary"].strip()
+ )
+ except KeyError:
+ scores_arch = "Not run"
+ try:
+ scores_bact = (
+ type(self).all_results["bacteria_odb10"]["one_line_summary"].strip()
+ )
+ except KeyError:
+ scores_bact = "Not run"
+ try:
+ scores_euk = (
+ type(self)
+ .all_results["eukaryota_odb10"]["one_line_summary"]
+ .strip()
+ )
+ except KeyError:
+ scores_euk = "Not run"
+ else:
+ scores_arch = ""
+ scores_bact = ""
+ scores_euk = ""
+ summary_line = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
+ input_file,
+ dataset,
+ complete,
+ single,
+ duplicated,
+ fragmented,
+ missing,
+ n_markers,
+ scores_arch,
+ scores_bact,
+ scores_euk,
+ )
+ return summary_line
+
def _check_parasitic(self, final_output_results):
try:
with open(
- os.path.join(self.analysis._lineage_dataset, "missing_in_parasitic.txt")
+ os.path.join(self.analysis.lineage_dataset, "missing_in_parasitic.txt")
) as parasitic_file:
missing_in_parasitic_buscos = [
entry.strip() for entry in parasitic_file.readlines()
@@ -219,8 +533,8 @@ class BuscoRunner:
root_domain_output_folder_final = os.path.join(
main_out_folder, "run_{}".format(domain_results_folder)
)
- os.rename(root_domain_output_folder, root_domain_output_folder_final)
- os.symlink(root_domain_output_folder_final, root_domain_output_folder)
+ print(root_domain_output_folder, root_domain_output_folder_final)
+ os.symlink(root_domain_output_folder, root_domain_output_folder_final)
shutil.copyfile(
os.path.join(root_domain_output_folder_final, "short_summary.txt"),
os.path.join(
@@ -258,7 +572,11 @@ class BuscoRunner:
# This is deliberately a staticmethod so it can be called from run_BUSCO() even if BuscoRunner has not yet
# been initialized.
try:
- log_folder = os.path.join(config.get("busco_run", "main_out"), "logs")
+ log_folder = os.path.join(
+ config.get("busco_run", "out_path"),
+ config.get("busco_run", "out"),
+ "logs",
+ )
if not os.path.exists(log_folder):
os.makedirs(log_folder)
shutil.move(
@@ -300,8 +618,6 @@ class BuscoRunner:
"https://busco.ezlab.org/busco_userguide.html\n"
)
- self.move_log_file(self.config)
-
class SmartBox:
def __init__(self):
@@ -367,7 +683,7 @@ class SmartBox:
def create_results_box(self, header_text, body_text):
header = self.wrap_header(header_text) # Called first to define width
- box_lines = ["\n"]
+ box_lines = list(["\n"])
box_lines.append("\t{}".format(self.add_horizontal()))
framed_header = self.add_vertical(header)
for line in framed_header:
=====================================
src/busco/ConfigManager.py
=====================================
@@ -11,8 +11,7 @@ Licensed under the MIT license. See LICENSE.md file.
"""
-from busco.AutoLineage import AutoSelectLineage
-from busco.BuscoConfig import BuscoConfigMain
+from busco.BuscoConfig import BuscoConfigMain, BuscoConfigAuto
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
import os
@@ -24,7 +23,7 @@ class BuscoConfigManager:
def __init__(self, params):
self.params = params
self.config_file = None
- self.config = None
+ self.config_main = None
self.get_config_file()
self.runner = None
@@ -51,58 +50,12 @@ class BuscoConfigManager:
return self.config_file
@log("Configuring BUSCO with {}", logger, attr_name="config_file")
- def load_busco_config(self, *args):
- self.config = BuscoConfigMain(self.config_file, self.params)
- self.config.configure()
- self.config.validate()
- if not self.config.check_lineage_present():
- if not self.config.getboolean(
- "busco_run", "auto-lineage"
- ) and not self.config.getboolean(
- "busco_run", "auto-lineage-prok"
- ): # and not self.config.getboolean("busco_run", "auto-lineage-euk"):
- logger.warning(
- "Running Auto Lineage Selector as no lineage dataset was specified. This will take a "
- "little longer than normal. If you know what lineage dataset you want to use, please "
- "specify this in the config file or using the -l (--lineage-dataset) flag in the "
- "command line."
- )
- self.config.set("busco_run", "auto-lineage", "True")
- lineage_dataset_fullpath = self.auto_select_lineage() # full path
- self.config.set("busco_run", "lineage_dataset", lineage_dataset_fullpath)
- lineage_dataset = os.path.basename(lineage_dataset_fullpath) # base name
- else:
- if self.config.getboolean(
- "busco_run", "auto-lineage"
- ) or self.config.getboolean(
- "busco_run", "auto-lineage-prok"
- ): # or self.config.getboolean("busco_run", "auto-lineage-euk"):
- logger.warning(
- "You have selected auto-lineage but you have also provided a lineage dataset. "
- "BUSCO will proceed with the specified dataset. "
- "To run auto-lineage do not specify a dataset."
- )
- self.config.set("busco_run", "auto-lineage", "False")
- self.config.set("busco_run", "auto-lineage-prok", "False")
- self.config.set("busco_run", "auto-lineage-euk", "False")
- lineage_dataset = self.config.get(
- "busco_run", "lineage_dataset"
- ) # full path
-
- self.config.set_results_dirname(
- lineage_dataset
- ) # function always only uses basename
- self.config.download_lineage_file(
- lineage_dataset
- ) # full path will return, base name will attempt download
- self.config.load_dataset_config()
+ def load_busco_config_main(self, *args):
+ self.config_main = BuscoConfigMain(self.config_file, self.params)
+ self.config_main.configure()
+ self.config_main.validate()
return
- @log("No lineage specified. Running lineage auto selector.\n", logger)
- def auto_select_lineage(self):
- asl = AutoSelectLineage(self.config)
- asl.run_auto_selector()
- asl.get_lineage_dataset()
- lineage_dataset = asl.best_match_lineage_dataset
- self.runner = asl.selected_runner
- return lineage_dataset
+ def load_busco_config_auto(self, lineage):
+ autoconfig = BuscoConfigAuto(self.config_main, lineage)
+ return autoconfig
=====================================
src/busco/Exceptions.py
=====================================
@@ -0,0 +1,22 @@
+class BatchFatalError(Exception):
+ """
+ Error that prevents batch mode from running.
+ """
+
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return self.value
+
+
+class BuscoError(Exception):
+ """
+ Module-specific exception
+ """
+
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return self.value
=====================================
src/busco/_version.py
=====================================
@@ -6,4 +6,4 @@ Copyright (c) 2016-2021, Evgeny Zdobnov (ez at ezlab.org)
Licensed under the MIT license. See LICENSE.md file.
"""
-__version__ = "5.0.0"
+__version__ = "5.1.2"
=====================================
src/busco/analysis/Analysis.py
=====================================
@@ -3,6 +3,7 @@ from busco.BuscoLogger import BuscoLogger
from abc import ABCMeta
import os
from busco.busco_tools.blast import TBLASTNRunner, MKBLASTRunner
+from busco.Exceptions import BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -18,7 +19,7 @@ class NucleotideAnalysis(metaclass=ABCMeta):
super().__init__() # Initialize BuscoAnalysis
if not self.check_nucleotide_file(self.input_file):
- raise SystemExit("The input file does not contain nucleotide sequences.")
+ raise BuscoError("The input file does not contain nucleotide sequences.")
def check_nucleotide_file(self, filename):
i = 0
@@ -77,7 +78,7 @@ class ProteinAnalysis:
def __init__(self):
super().__init__()
if not self.check_protein_file(self.input_file):
- raise SystemExit("Please provide a protein file as input")
+ raise BuscoError("Please provide a protein file as input")
def check_protein_file(self, filename):
@@ -125,7 +126,7 @@ class BLASTAnalysis(metaclass=ABCMeta):
self.config.set("busco_run", "restart", str(self.restart))
self.mkblast_runner.run()
if len(os.listdir(os.path.split(self.mkblast_runner.output_db)[0])) == 0:
- raise SystemExit(
+ raise BuscoError(
"makeblastdb failed to create a BLAST DB at {}".format(
self.mkblast_runner.output_db
)
=====================================
src/busco/analysis/BuscoAnalysis.py
=====================================
@@ -16,6 +16,7 @@ from busco.busco_tools.hmmer import HMMERRunner
import os
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BatchFatalError, BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -50,11 +51,14 @@ class BuscoAnalysis(metaclass=ABCMeta):
# Get other useful variables
self.input_file = self.config.get("busco_run", "in")
- self._lineage_dataset = self.config.get("busco_run", "lineage_dataset")
- self._lineage_name = os.path.basename(self._lineage_dataset)
- self._domain = self.config.get("busco_run", "domain")
+ self.lineage_dataset = self.config.get("busco_run", "lineage_dataset")
+ self.lineage_name = os.path.basename(self.lineage_dataset)
+ self.domain = self.config.get("busco_run", "domain")
self._has_variants_file = os.path.exists(
- os.path.join(self._lineage_dataset, "ancestral_variants")
+ os.path.join(self.lineage_dataset, "ancestral_variants")
+ )
+ self.has_dataset_config_file = os.path.exists(
+ os.path.join(self.lineage_dataset, "dataset.cfg")
)
self._dataset_creation_date = self.config.get("busco_run", "creation_date")
self.restart = self.config.getboolean("busco_run", "restart")
@@ -89,7 +93,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
@log(
"Running BUSCO using lineage dataset {0} ({1}, {2})",
logger,
- attr_name=["_lineage_name", "_domain", "_dataset_creation_date"],
+ attr_name=["lineage_name", "domain", "_dataset_creation_date"],
on_func_exit=True,
)
def run_analysis(self):
@@ -106,7 +110,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
This function runs hmmsearch.
"""
if not busco_ids:
- files = sorted(os.listdir(os.path.join(self._lineage_dataset, "hmms")))
+ files = sorted(os.listdir(os.path.join(self.lineage_dataset, "hmms")))
busco_ids = [
os.path.splitext(f)[0] for f in files
] # Each Busco ID has a HMM file of the form "<busco_id>.hmm"
@@ -116,7 +120,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
if self.restart and self.hmmer_runner.check_previous_completed_run():
logger.info("Skipping HMMER run as output already processed")
elif len(os.listdir(self.hmmer_runner.results_dir)) > 0:
- raise SystemExit(
+ raise BuscoError(
"HMMER results directory not empty. If you are running in restart mode, make sure you are "
"using the same eukaryotic gene predictor (metaeuk/augustus) as before."
)
@@ -146,27 +150,27 @@ class BuscoAnalysis(metaclass=ABCMeta):
Note: dataset.cfg file is not mandatory for offline mode
# todo: implement a check for dataset.cfg file if not using offline mode
- :raises SystemExit: if the dataset is missing files or folders
+ :raises BuscoError: if the dataset is missing files or folders
"""
# Check hmm files exist
- files = os.listdir(os.path.join(self._lineage_dataset, "hmms"))
+ files = os.listdir(os.path.join(self.lineage_dataset, "hmms"))
if not files:
- raise SystemExit(
+ raise BuscoError(
"The dataset you provided lacks hmm profiles in {}".format(
- os.path.join(self._lineage_dataset, "hmms")
+ os.path.join(self.lineage_dataset, "hmms")
)
)
- if self._domain == "eukaryota":
+ if self.domain == "eukaryota":
# Check prfl folder exists and contains profiles
for dirpath, dirnames, files in os.walk(
- os.path.join(self._lineage_dataset, "prfl")
+ os.path.join(self.lineage_dataset, "prfl")
):
if not files:
- raise SystemExit(
+ raise BuscoError(
"The dataset you provided lacks elements in {}".format(
- os.path.join(self._lineage_dataset, "prfl")
+ os.path.join(self.lineage_dataset, "prfl")
)
)
@@ -175,13 +179,17 @@ class BuscoAnalysis(metaclass=ABCMeta):
"The dataset you provided does not contain the file ancestral_variants, likely because it "
'is an old version. All blast steps will use the file "ancestral" instead'
)
+ if not self.has_dataset_config_file:
+ raise BuscoError(
+ "The dataset you provided is missing the dataset.cfg file and is therefore corrupted."
+ )
return
def _check_data_integrity(self):
self._check_dataset_integrity()
if not os.stat(self.input_file).st_size > 0:
- raise SystemExit("Input file is empty.")
+ raise BuscoError("Input file is empty.")
with open(self.input_file) as f:
for line in f:
if line.startswith(">"):
@@ -192,7 +200,7 @@ class BuscoAnalysis(metaclass=ABCMeta):
def _check_seq_uniqueness(self, line):
seq_id = line.split(" ")[0]
if seq_id in self.headers:
- raise SystemExit("Duplicate of sequence {} in input file".format(seq_id))
+ raise BuscoError("Duplicate of sequence {} in input file".format(seq_id))
self.headers.add(seq_id)
return
@@ -203,11 +211,11 @@ class BuscoAnalysis(metaclass=ABCMeta):
and warns the user and stops the execution
:param header: a fasta header to check
:type header: str
- :raises SystemExit: if a problematic character is found
+ :raises BuscoError: if a problematic character is found
"""
for char in BuscoConfig.FORBIDDEN_HEADER_CHARS:
if char in header:
- raise SystemExit(
+ raise BuscoError(
'The character "%s" is present in the fasta header %s, '
"which will crash BUSCO. Please clean the header of your "
"input file." % (char, header.strip())
@@ -215,14 +223,14 @@ class BuscoAnalysis(metaclass=ABCMeta):
for char in BuscoConfig.FORBIDDEN_HEADER_CHARS_BEFORE_SPLIT:
if char in header.split()[0]:
- raise SystemExit(
+ raise BuscoError(
'The character "%s" is present in the fasta header %s, '
"which will crash Reader. Please clean the header of your"
" input file." % (char, header.split()[0].strip())
)
if header.split()[0] == ">":
- raise SystemExit(
+ raise BuscoError(
"A space is present in the fasta header %s, directly after "
'">" which will crash Reader. Please clean the header of '
"your input file." % (header.strip())
@@ -249,18 +257,18 @@ class BuscoAnalysis(metaclass=ABCMeta):
def _create_main_dir(self):
"""
This function creates the run (main) directory
- :raises SystemExit: if write permissions are not available to the specified location
+ :raises BatchFatalError: if write permissions are not available to the specified location
"""
try:
os.makedirs(self.run_folder)
except FileExistsError:
if not self.restart:
- raise SystemExit(
+ raise BatchFatalError(
"Something went wrong. BUSCO stopped before overwriting run folder "
"{}".format(self.run_folder)
)
except PermissionError:
- raise SystemExit(
+ raise BatchFatalError(
"Cannot write to the output directory, please make sure "
"you have write permissions to {}".format(self.run_folder)
)
=====================================
src/busco/analysis/GenomeAnalysis.py
=====================================
@@ -31,6 +31,7 @@ import os
import pandas as pd
from collections import defaultdict
import subprocess
+from busco.Exceptions import BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -187,9 +188,12 @@ class GenomeAnalysisEukaryotesAugustus(BLASTAnalysis, GenomeAnalysisEukaryotes):
super().__init__()
self._long = self.config.getboolean("busco_run", "long")
try:
- self._target_species = self.config.get("busco_run", "augustus_species")
+ self._target_species_initial = self.config.get(
+ "busco_run", "augustus_species"
+ )
+ self._target_species = self._target_species_initial
except KeyError:
- raise SystemExit(
+ raise BuscoError(
"Something went wrong. Eukaryota datasets should specify an augustus species."
)
try:
@@ -220,6 +224,9 @@ class GenomeAnalysisEukaryotesAugustus(BLASTAnalysis, GenomeAnalysisEukaryotes):
try:
if self._target_species.startswith("BUSCO"):
self.augustus_runner.move_retraining_parameters()
+ self.config.set(
+ "busco_run", "augustus_species", self._target_species_initial
+ ) # Reset parameter for batch mode
except OSError:
pass
super().cleanup()
@@ -398,7 +405,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
if i == 1:
logger.info("Metaeuk rerun did not find any genes")
else:
- raise SystemExit(
+ raise BuscoError(
"Metaeuk did not find any genes in the input file."
)
@@ -406,7 +413,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
self.metaeuk_runner.combine_run_results()
except FileNotFoundError:
# This exception should only happen if the rerun file does not exist. If the initial run file was
- # missing there would have been a SystemExit call above. The index 0 sets the "combined" file to the
+ # missing there would have been a BatchFatalError call above. The index 0 sets the "combined" file to the
# output of the initial run.
self.metaeuk_runner.combined_pred_protein_seqs = (
self.metaeuk_runner.pred_protein_mod_files[0]
@@ -744,7 +751,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
exon_matched = False
exon_size_nt = int(entry["Stop"]) - int(entry["Start"]) + 1
if not exon_size_nt % 3 == 0:
- raise SystemExit(
+ raise BuscoError(
"The exon coordinates contain fractional reading frames and are ambiguous."
)
exon_size_aa = exon_size_nt / 3
=====================================
src/busco/busco_tools/Toolset.py
=====================================
@@ -20,6 +20,7 @@ from abc import ABCMeta, abstractmethod
from busco.BuscoLogger import BuscoLogger, ToolLogger
from busco.BuscoLogger import LogDecorator as log
from busco.BuscoLogger import StreamLogger
+from busco.Exceptions import BatchFatalError
import logging
logger = BuscoLogger.get_logger(__name__)
@@ -191,10 +192,10 @@ class Tool(metaclass=ABCMeta):
self.log_no_jobs()
return
- if (
- self.cpus is None
- ): # todo: need a different way to ensure self.cpus is nonzero number.
- raise SystemExit("Number of CPUs not specified.")
+ if self.cpus is None:
+ raise BatchFatalError("Number of CPUs not specified.")
+ elif self.cpus == 0:
+ raise BatchFatalError("Number of CPUs must be greater than 0.")
with Pool(
self.cpus, initializer=type(self).init_globals, initargs=(Value("i", 0),)
=====================================
src/busco/busco_tools/augustus.py
=====================================
@@ -11,6 +11,7 @@ import shutil
import numpy as np
from configparser import NoOptionError
import subprocess
+from busco.Exceptions import BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -61,8 +62,16 @@ class AugustusRunner(BaseRunner):
def __init__(self):
self.gene_details = None
- self._augustus_config_path = os.environ.get("AUGUSTUS_CONFIG_PATH")
- self.config.set("busco_run", "augustus_config_path", self._augustus_config_path)
+ try:
+ self._augustus_config_path = os.environ.get("AUGUSTUS_CONFIG_PATH")
+ self.config.set(
+ "busco_run", "augustus_config_path", self._augustus_config_path
+ )
+ except TypeError:
+ raise BuscoError(
+ "AUGUSTUS_CONFIG_PATH environment variable has not been set"
+ )
+
self._target_species = self.config.get("busco_run", "augustus_species")
super().__init__()
self._output_folder = os.path.join(self.run_folder, "augustus_output")
@@ -129,22 +138,22 @@ class AugustusRunner(BaseRunner):
"""
check dependencies on files and folders
properly configured.
- :raises SystemExit: if Augustus config path is not writable or
+ :raises BuscoError: if Augustus config path is not writable or
not set at all
- :raises SystemExit: if Augustus config path does not contain
+ :raises BuscoError: if Augustus config path does not contain
the needed species
present
"""
try:
augustus_species_dir = os.path.join(self._augustus_config_path, "species")
if not os.access(augustus_species_dir, os.W_OK):
- raise SystemExit(
+ raise BuscoError(
"Cannot write to Augustus species folder, please make sure you have write "
"permissions to {}".format(augustus_species_dir)
)
except TypeError:
- raise SystemExit("The environment variable AUGUSTUS_CONFIG_PATH is not set")
+ raise BuscoError("The environment variable AUGUSTUS_CONFIG_PATH is not set")
if not os.path.exists(os.path.join(augustus_species_dir, self._target_species)):
# Exclude the case where this is a restarted run and the retraining parameters have already been moved.
@@ -161,7 +170,7 @@ class AugustusRunner(BaseRunner):
):
pass
else:
- raise SystemExit(
+ raise BuscoError(
'Impossible to locate the species "{0}" in Augustus species folder'
" ({1}), check that AUGUSTUS_CONFIG_PATH is properly set"
" and contains this species. \n\t\tSee the help if you want "
@@ -547,7 +556,7 @@ class AugustusRunner(BaseRunner):
if gene_found:
break
if not gene_found and self.run_number == 1:
- raise SystemExit(
+ raise BuscoError(
"Unable to find single copy BUSCO gene in Augustus output."
)
@@ -745,7 +754,7 @@ class ETrainingRunner(BaseRunner):
):
return
else:
- raise SystemExit(
+ raise BuscoError(
"Retraining did not complete correctly. Check your Augustus config path environment variable."
)
=====================================
src/busco/busco_tools/base.py
=====================================
@@ -4,6 +4,7 @@ from busco.busco_tools.Toolset import Tool
from shutil import which
from abc import ABCMeta, abstractmethod
from busco.BuscoConfig import BuscoConfigAuto
+from busco.Exceptions import BuscoError
import time
logger = BuscoLogger.get_logger(__name__)
@@ -24,6 +25,7 @@ class ToolException(Exception):
class BaseRunner(Tool, metaclass=ABCMeta):
config = None
+ tool_versions = {}
def __init__(self):
super().__init__()
@@ -49,6 +51,7 @@ class BaseRunner(Tool, metaclass=ABCMeta):
"path!".format(self.name)
)
self.version = self.get_version()
+ type(self).tool_versions[self.name] = self.version
self.check_tool_dependencies()
self.checkpoint_file = None
@@ -102,7 +105,7 @@ class BaseRunner(Tool, metaclass=ABCMeta):
elif int(tool_run_numbers[tool_ind]) < int(self.run_number):
start_search = tool_ind + 1
else:
- raise SystemExit(
+ raise BuscoError(
"Something went wrong. Information for {} run {} missing but "
"information for run {} found.".format(
self.name,
=====================================
src/busco/busco_tools/blast.py
=====================================
@@ -3,6 +3,7 @@ import os
from collections import defaultdict
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
+from busco.Exceptions import BuscoError
from Bio import SeqIO
import subprocess
@@ -149,7 +150,7 @@ class TBLASTNRunner(BaseRunner):
# Ensure value is between 5000 and MAX_FLANK
flank = min(max(flank, 5000), type(self).MAX_FLANK)
except IOError: # Input data is only validated during run_analysis. This will catch any IO issues before that.
- raise SystemExit(
+ raise BuscoError(
"Impossible to read the fasta file {}".format(self.input_file)
)
@@ -192,13 +193,13 @@ class TBLASTNRunner(BaseRunner):
def _check_output(self):
# check that blast worked
if not os.path.exists(self.blast_filename):
- raise SystemExit("tblastn failed!")
+ raise BuscoError("tblastn failed!")
# check that the file is not truncated
with open(self.blast_filename, "r") as f:
try:
if "processed" not in f.readlines()[-1]:
- raise SystemExit(
+ raise BuscoError(
"tblastn has ended prematurely (the result file lacks the expected final line), "
"which will produce incomplete results in the next steps ! This problem likely "
"appeared in blast+ 2.4 and seems not fully fixed in 2.6. It happens only when "
=====================================
src/busco/busco_tools/hmmer.py
=====================================
@@ -9,6 +9,7 @@ from Bio import SeqIO
import csv
import subprocess
from busco.BuscoConfig import BuscoConfigAuto
+from busco.Exceptions import BatchFatalError, BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -44,6 +45,7 @@ class HMMERRunner(BaseRunner):
self.extra_columns = False
self.log_count = 0 # Dummy variable used to skip logging for intermediate eukaryote pipeline results.
self.one_line_summary = None
+ self.one_line_summary_raw = None
# to be initialized before run time
self.input_sequences = None
@@ -85,6 +87,8 @@ class HMMERRunner(BaseRunner):
self.s_percent = 0
self.d_percent = 0
self.f_percent = 0
+ self.complete_percent = 0
+ self.missing_percent = 0
self.hmmer_results_lines = None
@@ -199,7 +203,6 @@ class HMMERRunner(BaseRunner):
def get_version(self):
"""
check the Tool has the correct version
- :raises SystemExit: if the version is not correct
"""
hmmer_version = subprocess.check_output(
[self.cmd, "-h"], stderr=subprocess.STDOUT, shell=False
@@ -218,11 +221,11 @@ class HMMERRunner(BaseRunner):
def check_tool_dependencies(self):
"""
check dependencies on tools
- :raises SystemExit: if a Tool version is not supported
+ :raises BatchFatalError: if a Tool version is not supported
"""
# check hmm version
if not self.version >= BuscoConfig.HMMER_VERSION:
- raise SystemExit(
+ raise BatchFatalError(
"HMMer version detected is not supported, please use HMMer v.{} +".format(
BuscoConfig.HMMER_VERSION
)
@@ -313,7 +316,7 @@ class HMMERRunner(BaseRunner):
records[gene_id]["env_coords"].append((env_start, env_end))
except IndexError as e:
- raise SystemExit(
+ raise BuscoError(
e, "Cannot parse HMMER output file {}".format(filename)
)
return records
@@ -444,7 +447,7 @@ class HMMERRunner(BaseRunner):
)
matched_genes = self.matched_genes_fragment
else:
- raise SystemExit("Unrecognized dictionary of BUSCOs.")
+ raise BuscoError("Unrecognized dictionary of BUSCOs.")
for busco_id in list(busco_dict.keys()):
matches = busco_dict[busco_id]
@@ -507,7 +510,7 @@ class HMMERRunner(BaseRunner):
elif busco_dict == self.is_fragment:
matched_genes = self.matched_genes_fragment
else:
- raise SystemExit("Unrecognized dictionary of BUSCOs.")
+ raise BuscoError("Unrecognized dictionary of BUSCOs.")
busco_matches_to_remove = []
# Keep the best scoring gene if gene is matched by more than one busco with the same match rank
@@ -778,7 +781,7 @@ class HMMERRunner(BaseRunner):
def _load_length(self):
"""
This function loads the length cutoffs file
- :raises SystemExit: if the lengths_cutoff file cannot be read
+ :raises BuscoError: if the lengths_cutoff file cannot be read
"""
lengths_cutoff_file = os.path.join(self.lineage_dataset, "lengths_cutoff")
try:
@@ -797,9 +800,9 @@ class HMMERRunner(BaseRunner):
self.cutoff_dict[taxid]["sigma"] = 1
self.cutoff_dict[taxid]["length"] = length
except IndexError as e:
- raise SystemExit(e, "Error parsing the lengths_cutoff file.")
+ raise BuscoError(e, "Error parsing the lengths_cutoff file.")
except IOError:
- raise SystemExit(
+ raise BuscoError(
"Impossible to read the lengths in {}".format(
os.path.join(lengths_cutoff_file)
)
@@ -809,7 +812,7 @@ class HMMERRunner(BaseRunner):
def _load_score(self):
"""
This function loads the score cutoffs file
- :raises SystemExit: if the scores_cutoff file cannot be read
+ :raises BuscoError: if the scores_cutoff file cannot be read
"""
scores_cutoff_file = os.path.join(self.lineage_dataset, "scores_cutoff")
try:
@@ -822,9 +825,9 @@ class HMMERRunner(BaseRunner):
score = float(line[1])
self.cutoff_dict[taxid]["score"] = score
except IndexError as e:
- raise SystemExit(e, "Error parsing the scores_cutoff file.")
+ raise BuscoError(e, "Error parsing the scores_cutoff file.")
except IOError:
- raise SystemExit(
+ raise BuscoError(
"Impossible to read the scores in {}".format(scores_cutoff_file)
)
return
@@ -900,52 +903,58 @@ class HMMERRunner(BaseRunner):
return sorted_lines
def produce_hmmer_summary(self):
- (
- single_copy,
- multi_copy,
- only_fragments,
- total_buscos,
- ) = self._get_busco_percentages()
+ self._get_busco_percentages()
self.hmmer_results_lines.append("***** Results: *****\n\n")
- self.one_line_summary = "C:{}%[S:{}%,D:{}%],F:{}%,M:{}%,n:{}\t{}\n".format(
- round(self.s_percent + self.d_percent, 1),
+ self.one_line_summary_raw = "C:{}%[S:{}%,D:{}%],F:{}%,M:{}%,n:{}\t{}\n".format(
+ self.complete_percent,
self.s_percent,
self.d_percent,
self.f_percent,
- abs(round(100 - self.s_percent - self.d_percent - self.f_percent, 1)),
- total_buscos,
+ self.missing_percent,
+ self.total_buscos,
" ",
)
- self.hmmer_results_lines.append(self.one_line_summary)
+ self.one_line_summary = "Results:\t{}".format(self.one_line_summary_raw)
+ self.hmmer_results_lines.append(self.one_line_summary_raw)
self.hmmer_results_lines.append(
- "{}\tComplete BUSCOs (C)\t\t\t{}\n".format(single_copy + multi_copy, " ")
+ "{}\tComplete BUSCOs (C)\t\t\t{}\n".format(
+ self.single_copy + self.multi_copy, " "
+ )
)
self.hmmer_results_lines.append(
- "{}\tComplete and single-copy BUSCOs (S)\t{}\n".format(single_copy, " ")
+ "{}\tComplete and single-copy BUSCOs (S)\t{}\n".format(
+ self.single_copy, " "
+ )
)
self.hmmer_results_lines.append(
- "{}\tComplete and duplicated BUSCOs (D)\t{}\n".format(multi_copy, " ")
+ "{}\tComplete and duplicated BUSCOs (D)\t{}\n".format(
+ self.multi_copy, " "
+ )
)
self.hmmer_results_lines.append(
- "{}\tFragmented BUSCOs (F)\t\t\t{}\n".format(only_fragments, " ")
+ "{}\tFragmented BUSCOs (F)\t\t\t{}\n".format(self.only_fragments, " ")
)
self.hmmer_results_lines.append(
"{}\tMissing BUSCOs (M)\t\t\t{}\n".format(
- total_buscos - single_copy - multi_copy - only_fragments, " "
+ self.total_buscos
+ - self.single_copy
+ - self.multi_copy
+ - self.only_fragments,
+ " ",
)
)
self.hmmer_results_lines.append(
- "{}\tTotal BUSCO groups searched\t\t{}\n".format(total_buscos, " ")
+ "{}\tTotal BUSCO groups searched\t\t{}\n".format(self.total_buscos, " ")
)
if isinstance(self.config, BuscoConfigAuto):
- self._one_line_hmmer_summary()
+ self._log_one_line_hmmer_summary()
elif self.domain == "eukaryota" and self.log_count == 0:
self.log_count += 1
self._produce_full_hmmer_summary_debug()
else:
- self._one_line_hmmer_summary()
+ self._log_one_line_hmmer_summary()
with open(self.short_summary_file, "w") as summary_file:
@@ -967,6 +976,10 @@ class HMMERRunner(BaseRunner):
for line in self.hmmer_results_lines:
summary_file.write("\t{}".format(line))
+ tool_versions_lines = self.report_tool_versions()
+ for line in tool_versions_lines:
+ summary_file.write(line + "\n")
+
if (
self.config.getboolean("busco_run", "auto-lineage")
and isinstance(self.config, BuscoConfigMain)
@@ -974,10 +987,17 @@ class HMMERRunner(BaseRunner):
):
summary_file.write("\nPlacement file versions:\n")
for placement_file in self.config.placement_files:
- summary_file.write("{}\n".format(placement_file))
+ summary_file.write("\t{}\n".format(placement_file))
return
+ def report_tool_versions(self):
+ lines = []
+ lines.append("\nDependencies and versions:")
+ for key, value in type(self).tool_versions.items():
+ lines.append("\t{}: {}".format(key, value))
+ return lines
+
@log("{}", logger, attr_name="hmmer_results_lines", apply="join", on_func_exit=True)
def _produce_full_hmmer_summary(self):
return
@@ -993,9 +1013,8 @@ class HMMERRunner(BaseRunner):
def _produce_full_hmmer_summary_debug(self):
return
- @log("{}", logger, attr_name="one_line_summary", on_func_exit=True)
- def _one_line_hmmer_summary(self):
- self.one_line_summary = "Results:\t{}".format(self.one_line_summary)
+ @log("{}", logger, attr_name="one_line_summary")
+ def _log_one_line_hmmer_summary(self):
return
def _write_output_header(
@@ -1009,7 +1028,7 @@ class HMMERRunner(BaseRunner):
"""
file_object.write(
"# BUSCO version is: {} \n"
- "# The lineage dataset is: {} (Creation date: {}, number of species: {}, number of BUSCOs: {}"
+ "# The lineage dataset is: {} (Creation date: {}, number of genomes: {}, number of BUSCOs: {}"
")\n".format(
busco.__version__,
os.path.basename(self.lineage_dataset),
@@ -1055,5 +1074,9 @@ class HMMERRunner(BaseRunner):
self.s_percent = abs(round((self.single_copy / self.total_buscos) * 100, 1))
self.d_percent = abs(round((self.multi_copy / self.total_buscos) * 100, 1))
self.f_percent = abs(round((self.only_fragments / self.total_buscos) * 100, 1))
+ self.complete_percent = round(self.s_percent + self.d_percent, 1)
+ self.missing_percent = abs(
+ round(100 - self.s_percent - self.d_percent - self.f_percent, 1)
+ )
- return self.single_copy, self.multi_copy, self.only_fragments, self.total_buscos
+ return
=====================================
src/busco/busco_tools/metaeuk.py
=====================================
@@ -10,6 +10,7 @@ import gzip
import pandas as pd
import numpy as np
import re
+from busco.Exceptions import BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -143,9 +144,14 @@ class MetaeukRunner(BaseRunner):
self.ancestral_variants_file = os.path.join(
self.lineage_dataset, "ancestral_variants"
)
-
- self.max_intron = self.config.get("busco_run", "max_intron")
- self.max_seq_len = self.config.get("busco_run", "max_seq_len")
+ try:
+ self.max_intron = self.config.get("busco_run", "max_intron")
+ self.max_seq_len = self.config.get("busco_run", "max_seq_len")
+ except NoOptionError:
+ raise BuscoError(
+ "{} is an old dataset version and is not compatible with Metaeuk. Please update by using "
+ "the '--update-data' command line option".format(self.lineage_dataset)
+ )
self.overlap = 1
self.s_set = False
@@ -187,8 +193,6 @@ class MetaeukRunner(BaseRunner):
self.refseq_db_rerun = os.path.join(
self._output_folder, "refseq_db_rerun.faa"
)
- self._extract_incomplete_buscos_ancestral()
- self.refseq_db = self.refseq_db_rerun
self.min_exon_aa = 5
self.max_overlap = 5
self.min_intron = 1
@@ -219,8 +223,8 @@ class MetaeukRunner(BaseRunner):
self.codon_file = "{}.codon.fas".format(self._output_basename)
self.pred_protein_seqs = "{}.fas".format(self._output_basename)
self.pred_protein_files.append(self.pred_protein_seqs)
- self.pred_protein_seqs_modified = self.pred_protein_seqs.replace(
- ".fas", ".modified.fas"
+ self.pred_protein_seqs_modified = ".modified.fas".join(
+ self.pred_protein_seqs.rsplit(".fas", 1)
)
self.pred_protein_mod_files.append(self.pred_protein_seqs_modified)
@@ -376,6 +380,9 @@ class MetaeukRunner(BaseRunner):
def run(self):
super().run()
+ if self.run_number > 1:
+ self._extract_incomplete_buscos_ancestral()
+ self.refseq_db = self.refseq_db_rerun
if self.extra_params:
logger.info(
"Additional parameters for Metaeuk are {}: ".format(self.extra_params)
@@ -637,6 +644,6 @@ class MetaeukRunner(BaseRunner):
)
except KeyError:
- raise SystemExit("*headersMap.tsv file could not be parsed.")
+ raise BuscoError("*headersMap.tsv file could not be parsed.")
except FileNotFoundError:
raise NoRerunFile
=====================================
src/busco/busco_tools/prodigal.py
=====================================
@@ -9,6 +9,7 @@ import shutil
import numpy as np
from configparser import NoOptionError
import subprocess
+from busco.Exceptions import BuscoError
logger = BuscoLogger.get_logger(__name__)
@@ -45,7 +46,7 @@ class ProdigalRunner(BaseRunner):
else 0
)
except NoOptionError:
- raise SystemExit(
+ raise BuscoError(
"Dataset config file does not contain required information. Please upgrade datasets."
)
=====================================
src/busco/run_BUSCO.py
=====================================
@@ -4,7 +4,7 @@
.. module:: run_BUSCO
:synopsis:
.. versionadded:: 3.0.0
-.. versionchanged:: 5.0.0
+.. versionchanged:: 5.1.0
BUSCO - Benchmarking Universal Single-Copy Orthologs.
This is the BUSCO main script.
@@ -18,25 +18,108 @@ Licensed under the MIT license. See LICENSE.md file.
"""
-import time
-import traceback
-import sys
+
import argparse
-import os
-import shutil
from argparse import RawTextHelpFormatter
import busco
+from busco.BuscoRunner import AnalysisRunner, BatchRunner, SingleRunner
+from busco.Exceptions import BatchFatalError, BuscoError
from busco.BuscoLogger import BuscoLogger
from busco.BuscoLogger import LogDecorator as log
from busco.ConfigManager import BuscoConfigManager
-from busco.busco_tools.Toolset import ToolException
-from busco.BuscoRunner import BuscoRunner
from busco.Actions import ListLineagesAction, CleanHelpAction, CleanVersionAction
from busco.ConfigManager import BuscoConfigMain
+# from busco.busco_tools.Toolset import ToolException
+import sys
+import time
+
+
logger = BuscoLogger.get_logger(__name__)
+ at log(
+ "***** Start a BUSCO v{} analysis, current time: {} *****".format(
+ busco.__version__, time.strftime("%m/%d/%Y %H:%M:%S")
+ ),
+ logger,
+)
+class BuscoMaster:
+ def __init__(self, params):
+ self.params = params
+ self.config_manager = BuscoConfigManager(self.params)
+ self.config = self.config_manager.config_main
+
+ def harmonize_auto_lineage_settings(self):
+ if not self.config.check_lineage_present():
+ if (
+ not self.config.getboolean("busco_run", "auto-lineage")
+ and not self.config.getboolean("busco_run", "auto-lineage-prok")
+ and not self.config.getboolean("busco_run", "auto-lineage-euk")
+ ):
+ logger.warning(
+ "Running Auto Lineage Selector as no lineage dataset was specified. This will take a "
+ "little longer than normal. If you know what lineage dataset you want to use, please "
+ "specify this in the config file or using the -l (--lineage-dataset) flag in the "
+ "command line."
+ )
+ self.config.set("busco_run", "auto-lineage", "True")
+
+ else:
+ if (
+ self.config.getboolean("busco_run", "auto-lineage")
+ or self.config.getboolean("busco_run", "auto-lineage-prok")
+ or self.config.getboolean("busco_run", "auto-lineage-euk")
+ ):
+ logger.warning(
+ "You have selected auto-lineage but you have also provided a lineage dataset. "
+ "BUSCO will proceed with the specified dataset. "
+ "To run auto-lineage do not specify a dataset."
+ )
+ self.config.set("busco_run", "auto-lineage", "False")
+ self.config.set("busco_run", "auto-lineage-prok", "False")
+ self.config.set("busco_run", "auto-lineage-euk", "False")
+ return
+
+ def load_config(self):
+ """
+ Load a busco config file that will figure out all the params from all sources
+ i.e. provided config file, dataset cfg, and user args
+ """
+ self.config_manager.load_busco_config_main(sys.argv)
+ self.config = self.config_manager.config_main
+
+ def check_batch_mode(self):
+ return self.config.getboolean("busco_run", "batch_mode")
+
+ def run(self):
+ # Need to add try/except blocks, distinguishing between run fatal and batch fatal
+ try:
+ self.load_config()
+ self.harmonize_auto_lineage_settings()
+ runner = (
+ BatchRunner(self.config_manager)
+ if self.check_batch_mode()
+ else SingleRunner(self.config_manager)
+ )
+ runner.run()
+
+ except BuscoError as be:
+ SingleRunner.log_error(be)
+ raise SystemExit(1)
+
+ except BatchFatalError as bfe:
+ SingleRunner.log_error(bfe)
+ raise SystemExit(1)
+
+ finally:
+ try:
+ AnalysisRunner.move_log_file(self.config)
+ except:
+ pass
+
+
+ at log("Command line: {}".format(" ".join(sys.argv[:])), logger, debug=True)
def _parse_args():
"""
This function parses the arguments provided by the user
@@ -45,9 +128,9 @@ def _parse_args():
"""
parser = argparse.ArgumentParser(
- description="Welcome to BUSCO %s: the Benchmarking Universal Single-Copy Ortholog assessment tool.\n"
+ description="Welcome to BUSCO {}: the Benchmarking Universal Single-Copy Ortholog assessment tool.\n"
"For more detailed usage information, please review the README file provided with "
- "this distribution and the BUSCO user guide." % busco.__version__,
+ "this distribution and the BUSCO user guide.".format(busco.__version__),
usage="busco -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS]",
formatter_class=RawTextHelpFormatter,
add_help=False,
@@ -60,7 +143,7 @@ def _parse_args():
"--in",
dest="in",
required=False,
- metavar="FASTA FILE",
+ metavar="SEQUENCE_FILE",
help="Input sequence file in FASTA format. "
"Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set.",
)
@@ -97,19 +180,35 @@ def _parse_args():
)
optional.add_argument(
- "--auto-lineage",
- dest="auto-lineage",
+ "--augustus",
+ dest="use_augustus",
action="store_true",
required=False,
- help="Run auto-lineage to find optimum lineage path",
+ help="Use augustus gene predictor for eukaryote runs",
)
optional.add_argument(
- "--auto-lineage-prok",
- dest="auto-lineage-prok",
+ "--augustus_parameters",
+ dest="augustus_parameters",
+ metavar='"--PARAM1=VALUE1,--PARAM2=VALUE2"',
+ required=False,
+ help="Pass additional arguments to Augustus. All arguments should be contained within a "
+ "single pair of quotation marks, separated by commas.",
+ )
+
+ optional.add_argument(
+ "--augustus_species",
+ dest="augustus_species",
+ required=False,
+ help="Specify a species for Augustus training.",
+ )
+
+ optional.add_argument(
+ "--auto-lineage",
+ dest="auto-lineage",
action="store_true",
required=False,
- help="Run auto-lineage just on non-eukaryote trees to find optimum lineage path",
+ help="Run auto-lineage to find optimum lineage path",
)
optional.add_argument(
@@ -120,50 +219,40 @@ def _parse_args():
help="Run auto-placement just on eukaryote tree to find optimum lineage path",
)
+ optional.add_argument(
+ "--auto-lineage-prok",
+ dest="auto-lineage-prok",
+ action="store_true",
+ required=False,
+ help="Run auto-lineage just on non-eukaryote trees to find optimum lineage path",
+ )
+
optional.add_argument(
"-c",
"--cpu",
dest="cpu",
+ type=int,
required=False,
metavar="N",
help="Specify the number (N=integer) " "of threads/cores to use.",
)
optional.add_argument(
- "-f",
- "--force",
- action="store_true",
- required=False,
- dest="force",
- help="Force rewriting of existing files. "
- "Must be used when output files with the provided name already exist.",
- )
-
- optional.add_argument(
- "-r",
- "--restart",
- action="store_true",
- required=False,
- dest="restart",
- help="Continue a run that had already partially completed.",
+ "--config", dest="config_file", required=False, help="Provide a config file"
)
optional.add_argument(
- "-q",
- "--quiet",
- dest="quiet",
+ "--datasets_version",
+ dest="datasets_version",
required=False,
- help="Disable the info logs, displays only errors",
- action="store_true",
+ help="Specify the version of BUSCO datasets, e.g. odb10",
)
optional.add_argument(
- "--out_path",
- dest="out_path",
+ "--download_base_url",
+ dest="download_base_url",
required=False,
- metavar="OUTPUT_PATH",
- help="Optional location for results folder, excluding results folder name. "
- "Default is current working directory.",
+ help="Set the url to the remote BUSCO dataset location",
)
optional.add_argument(
@@ -174,110 +263,118 @@ def _parse_args():
)
optional.add_argument(
- "--datasets_version",
- dest="datasets_version",
+ "-e",
+ "--evalue",
+ dest="evalue",
required=False,
- help="Specify the version of BUSCO datasets, e.g. odb10",
+ metavar="N",
+ type=float,
+ help="E-value cutoff for BLAST searches. "
+ "Allowed formats, 0.001 or 1e-03 (Default: {:.0e})".format(
+ BuscoConfigMain.DEFAULT_ARGS_VALUES["evalue"]
+ ),
)
optional.add_argument(
- "--download_base_url",
- dest="download_base_url",
+ "-f",
+ "--force",
+ action="store_true",
required=False,
- help="Set the url to the remote BUSCO dataset location",
+ dest="force",
+ help="Force rewriting of existing files. "
+ "Must be used when output files with the provided name already exist.",
)
optional.add_argument(
- "--update-data",
- dest="update-data",
- action="store_true",
+ "-h", "--help", action=CleanHelpAction, help="Show this help message and exit"
+ )
+
+ optional.add_argument(
+ "--limit",
+ dest="limit",
+ metavar="N",
required=False,
- help="Download and replace with last versions all lineages datasets and files necessary"
- " to their automated selection",
+ type=int,
+ help="How many candidate regions (contig or transcript) to consider per BUSCO (default: {})".format(
+ str(BuscoConfigMain.DEFAULT_ARGS_VALUES["limit"])
+ ),
)
optional.add_argument(
- "--offline",
- dest="offline",
+ "--list-datasets",
+ action=ListLineagesAction,
+ help="Print the list of available BUSCO datasets",
+ )
+
+ optional.add_argument(
+ "--long",
action="store_true",
required=False,
- help="To indicate that BUSCO cannot attempt to download files",
+ dest="long",
+ help="Optimization Augustus self-training mode (Default: Off); adds considerably to the run "
+ "time, but can improve results for some non-model organisms",
)
optional.add_argument(
"--metaeuk_parameters",
dest="metaeuk_parameters",
+ metavar='"--PARAM1=VALUE1,--PARAM2=VALUE2"',
required=False,
help="Pass additional arguments to Metaeuk for the first run. All arguments should be "
- "contained within a single pair of quotation marks, separated by commas. "
- 'E.g. "--param1=1,--param2=2"',
+ "contained within a single pair of quotation marks, separated by commas. ",
)
optional.add_argument(
"--metaeuk_rerun_parameters",
dest="metaeuk_rerun_parameters",
+ metavar='"--PARAM1=VALUE1,--PARAM2=VALUE2"',
required=False,
help="Pass additional arguments to Metaeuk for the second run. All arguments should be "
- "contained within a single pair of quotation marks, separated by commas. "
- 'E.g. "--param1=1,--param2=2"',
+ "contained within a single pair of quotation marks, separated by commas. ",
)
optional.add_argument(
- "-e",
- "--evalue",
- dest="evalue",
- required=False,
- metavar="N",
- type=float,
- help="E-value cutoff for BLAST searches. "
- "Allowed formats, 0.001 or 1e-03 (Default: %.0e)"
- % BuscoConfigMain.DEFAULT_ARGS_VALUES["evalue"],
- )
-
- optional.add_argument(
- "--limit",
- dest="limit",
- metavar="REGION_LIMIT",
- required=False,
- type=int,
- help="How many candidate regions (contig or transcript) to consider per BUSCO (default: %s)"
- % str(BuscoConfigMain.DEFAULT_ARGS_VALUES["limit"]),
- )
-
- optional.add_argument(
- "--augustus",
- dest="use_augustus",
+ "--offline",
+ dest="offline",
action="store_true",
required=False,
- help="Use augustus gene predictor for eukaryote runs",
+ help="To indicate that BUSCO cannot attempt to download files",
)
optional.add_argument(
- "--augustus_parameters",
- dest="augustus_parameters",
+ "--out_path",
+ dest="out_path",
required=False,
- help="Pass additional arguments to Augustus. All arguments should be contained within a "
- 'single pair of quotation marks, separated by commas. E.g. "--param1=1,--param2=2"',
+ metavar="OUTPUT_PATH",
+ help="Optional location for results folder, excluding results folder name. "
+ "Default is current working directory.",
)
optional.add_argument(
- "--augustus_species",
- dest="augustus_species",
+ "-q",
+ "--quiet",
+ dest="quiet",
required=False,
- help="Specify a species for Augustus training.",
+ help="Disable the info logs, displays only errors",
+ action="store_true",
)
optional.add_argument(
- "--long",
+ "-r",
+ "--restart",
action="store_true",
required=False,
- dest="long",
- help="Optimization Augustus self-training mode (Default: Off); adds considerably to the run "
- "time, but can improve results for some non-model organisms",
+ dest="restart",
+ help="Continue a run that had already partially completed.",
)
optional.add_argument(
- "--config", dest="config_file", required=False, help="Provide a config file"
+ "--update-data",
+ dest="update-data",
+ action="store_true",
+ required=False,
+ help="Download and replace with last versions all lineages datasets and files necessary"
+ " to their automated selection",
)
optional.add_argument(
@@ -285,17 +382,7 @@ def _parse_args():
"--version",
action=CleanVersionAction,
help="Show this version and exit",
- version="BUSCO %s" % busco.__version__,
- )
-
- optional.add_argument(
- "-h", "--help", action=CleanHelpAction, help="Show this help message and exit"
- )
-
- optional.add_argument(
- "--list-datasets",
- action=ListLineagesAction,
- help="Print the list of available BUSCO datasets",
+ version="BUSCO {}".format(busco.__version__),
)
return vars(parser.parse_args())
@@ -308,105 +395,9 @@ def main():
``busco -h``
:raises SystemExit: if any errors occur
"""
-
params = _parse_args()
- run_BUSCO(params)
-
-
- at log(
- "***** Start a BUSCO v{} analysis, current time: {} *****".format(
- busco.__version__, time.strftime("%m/%d/%Y %H:%M:%S")
- ),
- logger,
-)
-def run_BUSCO(params):
- start_time = time.time()
-
- try:
- # Load a busco config file that will figure out all the params from all sources
- # i.e. provided config file, dataset cfg, and user args
- config_manager = BuscoConfigManager(params)
- config_manager.load_busco_config(sys.argv)
- config = config_manager.config
-
- lineage_basename = os.path.basename(config.get("busco_run", "lineage_dataset"))
- main_out_folder = config.get("busco_run", "main_out")
- lineage_results_folder = os.path.join(
- main_out_folder,
- "auto_lineage",
- config.get("busco_run", "lineage_results_dir"),
- )
-
- if config.getboolean("busco_run", "auto-lineage"):
- if lineage_basename.startswith(("bacteria", "archaea", "eukaryota")):
- busco_run = config_manager.runner
-
- # It is possible that the following lineages were arrived at either by the Prodigal genetic code shortcut
- # or by BuscoPlacer. If the former, the run will have already been completed. If the latter it still needs
- # to be done.
- elif lineage_basename.startswith(
- ("mollicutes", "mycoplasmatales", "entomoplasmatales")
- ) and os.path.exists(lineage_results_folder):
- busco_run = config_manager.runner
- else:
- busco_run = BuscoRunner(config)
- else:
- busco_run = BuscoRunner(config)
-
- if os.path.exists(lineage_results_folder):
- new_dest = os.path.join(
- main_out_folder, config.get("busco_run", "lineage_results_dir")
- )
- if os.path.exists(
- new_dest
- ): # New dest would only exist if this is a rerun of a previously completed run
- shutil.rmtree(new_dest)
- os.rename(lineage_results_folder, new_dest)
- else:
- busco_run.run_analysis()
- BuscoRunner.final_results.append(
- busco_run.analysis.hmmer_runner.hmmer_results_lines
- )
- BuscoRunner.results_datasets.append(lineage_basename)
- busco_run.finish(time.time() - start_time)
-
- except ToolException as e:
- logger.error(e)
- raise SystemExit(1)
-
- except SystemExit as se:
- logger.error(se)
- logger.debug(se, exc_info=True)
- logger.error("BUSCO analysis failed !")
- logger.error(
- "Check the logs, read the user guide (https://busco.ezlab.org/busco_userguide.html), "
- "and check the BUSCO issue board on https://gitlab.com/ezlab/busco/issues"
- )
- try:
- BuscoRunner.move_log_file(config)
- except NameError:
- try:
- BuscoRunner.move_log_file(config_manager.config)
- except:
- pass
- except:
- pass
- raise SystemExit(1)
-
- except KeyboardInterrupt:
- logger.exception(
- "A signal was sent to kill the process. \nBUSCO analysis failed !"
- )
- raise SystemExit(1)
-
- except BaseException:
- exc_type, exc_value, exc_traceback = sys.exc_info()
- logger.critical(
- "Unhandled exception occurred:\n{}\n".format(
- "".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
- )
- )
- raise SystemExit(1)
+ busco_run = BuscoMaster(params)
+ busco_run.run()
# Entry point
=====================================
tests/unittest_runner.py
=====================================
@@ -4,6 +4,7 @@ from tests.unittests import ConfigManager_unittests
from tests.unittests import BuscoConfig_unittests
from tests.unittests import AutoLineage_unittests
from tests.unittests import GenomeAnalysis_unittests
+from tests.unittests import BuscoRunner_unittests
import sys
loader = unittest.TestLoader()
@@ -14,6 +15,7 @@ suite.addTests(loader.loadTestsFromModule(ConfigManager_unittests))
suite.addTests(loader.loadTestsFromModule(BuscoConfig_unittests))
suite.addTests(loader.loadTestsFromModule(AutoLineage_unittests))
suite.addTests(loader.loadTestsFromModule(GenomeAnalysis_unittests))
+suite.addTests(loader.loadTestsFromModule(BuscoRunner_unittests))
runner = unittest.TextTestRunner(verbosity=3)
result = runner.run(suite)
=====================================
tests/unittests/AutoLineage_unittests.py
=====================================
@@ -1,33 +1,34 @@
import unittest
from unittest.mock import patch, call, Mock
from busco import AutoLineage, BuscoRunner
+from busco.ConfigManager import BuscoConfigManager
class TestAutoLineage(unittest.TestCase):
def setUp(self):
pass
- @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
- def test_init_autolineage(self, mock_config_main):
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_init_autolineage(self, mock_config_manager):
with self.assertLogs(AutoLineage.logger, 20):
- AutoLineage.AutoSelectLineage(mock_config_main)
+ AutoLineage.AutoSelectLineage(mock_config_manager)
@patch("busco.AutoLineage.AutoSelectLineage.virus_check", return_value=False)
- @patch(
- "busco.BuscoConfig.BuscoConfigMain.getboolean",
- side_effect=[True, False, True, False, False],
- )
@patch("busco.AutoLineage.logger.info")
@patch("busco.AutoLineage.os")
- @patch("busco.AutoLineage.BuscoRunner")
+ @patch("busco.AutoLineage.AnalysisRunner")
@patch("busco.AutoLineage.AutoSelectLineage.get_best_match_lineage")
@patch("busco.AutoLineage.AutoSelectLineage.run_lineages_list")
- @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
def test_run_auto_selector_lineage_lists_no_virus(
- self, mock_config_main, mock_run_lineages_list, *args
+ self, mock_run_lineages_list, *args
):
+ config_manager = BuscoConfigManager({})
+ config_manager.config_main = Mock()
+ config_manager.config_main.getboolean = Mock(
+ side_effect=[True, False, True, False, False]
+ )
for _ in range(3):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(config_manager)
asl.selected_runner = Mock()
asl.selected_runner.analysis.hmmer_runner.single_copy_buscos = [
0
@@ -44,38 +45,41 @@ class TestAutoLineage(unittest.TestCase):
mock_run_lineages_list.assert_has_calls(calls, any_order=True)
@patch("busco.AutoLineage.logger.info")
- @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
- @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_run_lineages_initializes_BuscoConfigAuto(
- self, mock_config_main, mock_config_auto, *args
- ):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+ @patch("busco.ConfigManager.BuscoConfigAuto", autospec=True)
+ def test_run_lineages_initializes_BuscoConfigAuto(self, mock_config_auto, *args):
+ config_manager = BuscoConfigManager({})
+ config_manager.config_main = Mock()
+ asl = AutoLineage.AutoSelectLineage(config_manager)
test_lineages = ["a", "b", "c"]
test_dataset_version = "<dataset_version>"
asl.dataset_version = test_dataset_version
asl.run_lineages_list(test_lineages)
calls = [
call(
- mock_config_main, "{}_{}".format(test_lineages[0], test_dataset_version)
+ config_manager.config_main,
+ "{}_{}".format(test_lineages[0], test_dataset_version),
),
call(
- mock_config_main, "{}_{}".format(test_lineages[1], test_dataset_version)
+ config_manager.config_main,
+ "{}_{}".format(test_lineages[1], test_dataset_version),
),
call(
- mock_config_main, "{}_{}".format(test_lineages[2], test_dataset_version)
+ config_manager.config_main,
+ "{}_{}".format(test_lineages[2], test_dataset_version),
),
]
mock_config_auto.assert_has_calls(calls, any_order=True)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
- @patch("busco.BuscoConfig.BuscoConfigMain")
- @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
+ @patch("__main__.AutoLineage_unittests.BuscoConfigManager.load_busco_config_auto")
+ @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
def test_run_lineages_initializes_BuscoRunner(
- self, mock_runner, mock_config_main, mock_config_auto, *args
+ self, mock_runner, mock_config_auto, *args
):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ config_manager = BuscoConfigManager({})
+ config_manager.config_main = Mock()
+ asl = AutoLineage.AutoSelectLineage(config_manager)
test_lineages = ["a", "b", "c"]
test_dataset_version = "<dataset_version>"
asl.dataset_version = test_dataset_version
@@ -83,11 +87,11 @@ class TestAutoLineage(unittest.TestCase):
mock_runner.assert_called_with(mock_config_auto.return_value)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
- @patch("busco.BuscoConfig.BuscoConfigMain")
- @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
- def test_run_lineages_runs_analysis(self, mock_runner, mock_config_main, *args):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+ def test_run_lineages_runs_analysis(self, mock_runner, mock_config_manager, *args):
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
test_lineages = ["a", "b", "c"]
test_dataset_version = "<dataset_version>"
asl.dataset_version = test_dataset_version
@@ -97,11 +101,13 @@ class TestAutoLineage(unittest.TestCase):
)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
- @patch("busco.BuscoConfig.BuscoConfigMain")
- @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
- def test_run_lineages_returns_runners(self, mock_runner, mock_config_main, *args):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+ def test_run_lineages_returns_runners(
+ self, mock_runner, mock_config_manager, *args
+ ):
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
test_lineages = ["a", "b", "c"]
test_dataset_version = "<dataset_version>"
asl.dataset_version = test_dataset_version
@@ -116,11 +122,11 @@ class TestAutoLineage(unittest.TestCase):
)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
- @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_record_results_first_run(self, mock_config_main, *args):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+ @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_record_results_first_run(self, mock_config_manager, *args):
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
asl.record_results(0, 1, 2, 0, 1, 2)
self.assertGreater(len(asl.s_buscos), 0)
self.assertGreater(len(asl.d_buscos), 0)
@@ -130,11 +136,11 @@ class TestAutoLineage(unittest.TestCase):
self.assertGreater(len(asl.f_percents), 0)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.AutoLineage.BuscoConfigAuto", autospec=True)
- @patch("__main__.AutoLineage_unittests.AutoLineage.BuscoRunner")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_record_results_multiple_runs(self, mock_config_main, *args):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ @patch("busco.BuscoConfig.BuscoConfigAuto", autospec=True)
+ @patch("__main__.AutoLineage_unittests.AutoLineage.AnalysisRunner")
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_record_results_multiple_runs(self, mock_config_manager, *args):
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
asl.record_results(0, 1, 2, 0, 1, 2)
asl.record_results(0, 1, 2, 0, 1, 2)
self.assertGreater(len(asl.s_buscos), 1)
@@ -145,19 +151,19 @@ class TestAutoLineage(unittest.TestCase):
self.assertGreater(len(asl.f_percents), 1)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_evaluate_single_runner(self, mock_config_main, *args):
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_evaluate_single_runner(self, mock_config_manager, *args):
runner1 = Mock()
runner1.analysis.hmmer_runner.single_copy = 1
runner1.analysis.hmmer_runner.multi_copy = 1
runner1.analysis.hmmer_runner.only_fragments = 1
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
max_ind = asl.evaluate([runner1])
self.assertEqual(max_ind, 0)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_evaluate_multiple_runners(self, mock_config_main, *args):
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_evaluate_multiple_runners(self, mock_config_manager, *args):
runner1 = Mock()
runner2 = Mock()
runner3 = Mock()
@@ -167,7 +173,7 @@ class TestAutoLineage(unittest.TestCase):
runner2.analysis.hmmer_runner.multi_copy = 5
runner3.analysis.hmmer_runner.single_copy = 12
runner3.analysis.hmmer_runner.multi_copy = 5
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
max_ind = asl.evaluate([runner1, runner2, runner3])
self.assertEqual(max_ind, 1)
@@ -179,8 +185,8 @@ class TestAutoLineage(unittest.TestCase):
self.assertEqual(max_ind, 2)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_evaluate_first_order_tiebreak(self, mock_config_main, *args):
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_evaluate_first_order_tiebreak(self, mock_config_manager, *args):
runner1 = Mock()
runner2 = Mock()
runner3 = Mock()
@@ -193,13 +199,13 @@ class TestAutoLineage(unittest.TestCase):
runner3.analysis.hmmer_runner.single_copy = 12
runner3.analysis.hmmer_runner.multi_copy = 0
runner3.analysis.hmmer_runner.only_fragments = 3
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
max_ind = asl.evaluate([runner1, runner2, runner3])
self.assertEqual(max_ind, 1)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_evaluate_second_order_tiebreak(self, mock_config_main, *args):
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_evaluate_second_order_tiebreak(self, mock_config_manager, *args):
runner1 = Mock()
runner2 = Mock()
runner3 = Mock()
@@ -220,13 +226,13 @@ class TestAutoLineage(unittest.TestCase):
runner4.analysis.hmmer_runner.multi_copy = 1
runner4.analysis.hmmer_runner.only_fragments = 2
runner4.analysis.hmmer_runner.s_percent = 80
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
max_ind = asl.evaluate([runner1, runner2, runner3, runner4])
self.assertEqual(max_ind, 3)
@patch("busco.AutoLineage.logger.info")
- @patch("busco.BuscoConfig.BuscoConfigMain")
- def test_evaluate_third_order_tiebreak(self, mock_config_main, *args):
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_evaluate_third_order_tiebreak(self, mock_config_manager, *args):
runner1 = Mock()
runner2 = Mock()
runner3 = Mock()
@@ -247,19 +253,19 @@ class TestAutoLineage(unittest.TestCase):
runner4.analysis.hmmer_runner.multi_copy = 1
runner4.analysis.hmmer_runner.only_fragments = 2
runner4.analysis.hmmer_runner.s_percent = 80
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
with self.assertLogs(AutoLineage.logger, "WARNING"):
max_ind = asl.evaluate([runner1, runner2, runner3, runner4])
self.assertEqual(max_ind, 1)
@patch("busco.AutoLineage.logger.info")
@patch("busco.AutoLineage.AutoSelectLineage.cleanup_disused_runs")
- @patch("__main__.AutoLineage_unittests.BuscoRunner.BuscoRunner.mode_dict")
- @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
+ @patch("__main__.AutoLineage_unittests.BuscoRunner.AnalysisRunner.mode_dict")
+ @patch("busco.ConfigManager.BuscoConfigManager")
def test_get_best_match_lineage(
- self, mock_config_main, fake_modedict, mock_cleanup, *args
+ self, mock_config_manager, fake_modedict, mock_cleanup, *args
):
- mock_config_main.get.side_effect = [None]
+ mock_config_manager.config_main.get.side_effect = [None]
mock_config1 = Mock()
mock_config2 = Mock()
@@ -284,20 +290,20 @@ class TestAutoLineage(unittest.TestCase):
mock_analysis3,
]
- runner1 = BuscoRunner.BuscoRunner(mock_config1)
- runner2 = BuscoRunner.BuscoRunner(mock_config2)
- runner3 = BuscoRunner.BuscoRunner(mock_config3)
+ runner1 = BuscoRunner.AnalysisRunner(mock_config1)
+ runner2 = BuscoRunner.AnalysisRunner(mock_config2)
+ runner3 = BuscoRunner.AnalysisRunner(mock_config3)
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager.config_main)
asl.get_best_match_lineage([runner1, runner2, runner3])
self.assertEqual(asl.best_match_lineage_dataset, "test2")
self.assertEqual(asl.selected_runner, runner2)
mock_cleanup.assert_called_with([runner1, runner3])
@patch("busco.AutoLineage.logger.info")
- @patch("busco.BuscoConfig.BuscoConfigMain", autospec=True)
- def test_cleanup_disused_runs(self, mock_config_main, *args):
- asl = AutoLineage.AutoSelectLineage(mock_config_main)
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_cleanup_disused_runs(self, mock_config_manager, *args):
+ asl = AutoLineage.AutoSelectLineage(mock_config_manager)
mock_runner1 = Mock()
mock_runner2 = Mock()
mock_runner1.cleaned_up = False
=====================================
tests/unittests/BuscoConfig_unittests.py
=====================================
@@ -3,8 +3,7 @@ from busco import BuscoConfig
import shutil
import os
from unittest.mock import Mock
-from unittest.mock import patch
-from pathlib import Path
+from unittest.mock import patch, call
class TestBuscoConfig(unittest.TestCase):
@@ -80,6 +79,7 @@ class TestBuscoConfig(unittest.TestCase):
"evalue",
"limit",
"use_augustus",
+ "batch_mode",
],
"etraining": ["path", "command"],
"gff2gbSmallDNA.pl": ["path", "command"],
@@ -100,7 +100,7 @@ class TestBuscoConfig(unittest.TestCase):
self.assertIn("busco_run", config.sections())
def test_read_config_file_ioerror(self):
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BaseConfig()
config.conf_file = "/path/not/found"
config._load_config_file()
@@ -111,7 +111,7 @@ class TestBuscoConfig(unittest.TestCase):
with open(config_path, "w") as f:
f.write(test_config_contents)
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BaseConfig()
config.conf_file = config_path
config._load_config_file()
@@ -123,7 +123,7 @@ class TestBuscoConfig(unittest.TestCase):
with open(config_path, "w") as f:
f.write(test_config_contents)
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BaseConfig()
config.conf_file = config_path
config._load_config_file()
@@ -218,21 +218,21 @@ class TestBuscoConfig(unittest.TestCase):
config._check_mandatory_keys_exist()
def test_mandatory_keys_check_missing_param_in(self):
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
params_test = {"out": "output_name", "mode": "genome"}
config = BuscoConfig.BuscoConfigMain(self.base_config, params_test)
config.configure()
config._check_mandatory_keys_exist()
def test_mandatory_keys_check_missing_param_mode(self):
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
params_test = {"in": "input_file", "out": "output_name"}
config = BuscoConfig.BuscoConfigMain(self.base_config, params_test)
config.configure()
config._check_mandatory_keys_exist()
def test_mandatory_keys_check_missing_param_out(self):
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
params_test = {"in": "input_file", "mode": "genome"}
config = BuscoConfig.BuscoConfigMain(self.base_config, params_test)
config.configure()
@@ -251,7 +251,7 @@ class TestBuscoConfig(unittest.TestCase):
os.makedirs(previous_run_name, exist_ok=True)
self.test_params["out"] = previous_run_name
self.test_params["force"] = "False"
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
config.configure()
config._check_no_previous_run()
@@ -312,7 +312,7 @@ class TestBuscoConfig(unittest.TestCase):
def test_catch_disallowed_keys(self):
for section_name in self.config_structure:
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
config.configure()
config.set(section_name, "forbidden_option", "forbidden_value")
@@ -321,7 +321,7 @@ class TestBuscoConfig(unittest.TestCase):
def test_out_value_check_invalid(self):
for str_format in ["/path/to/output", "output/"]:
self.test_params["out"] = str_format
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
config.configure()
config._check_out_value()
@@ -334,7 +334,7 @@ class TestBuscoConfig(unittest.TestCase):
def test_limit_value_out_of_range(self):
for lim_val in [-1, 0, 25]:
self.test_params["limit"] = lim_val
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
config.configure()
config._check_limit_value()
@@ -398,15 +398,38 @@ class TestBuscoConfig(unittest.TestCase):
config.get("hmmsearch", "path"), os.path.expanduser("~/test_hmmsearch_path")
)
- def test_required_input_exists_true(self):
- input_filename = "test_input_file"
- Path(input_filename).touch()
- self.test_params["in"] = input_filename
+ @patch(
+ "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isdir", return_value=True
+ )
+ def test_batch_mode_true(self, *args):
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
- config.configure()
- with self.assertLogs(BuscoConfig.logger, level="INFO"):
- config._check_required_input_exists()
- os.remove(input_filename)
+ config.set = Mock()
+ config._check_batch_mode()
+ calls = [call("busco_run", "batch_mode", "True")]
+ config.set.assert_has_calls(calls)
+
+ @patch(
+ "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isdir", return_value=False
+ )
+ @patch(
+ "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isfile", return_value=True
+ )
+ def test_batch_mode_false_with_file(self, *args):
+ config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
+ config.set = Mock()
+ config._check_batch_mode()
+
+ @patch(
+ "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isdir", return_value=False
+ )
+ @patch(
+ "__main__.BuscoConfig_unittests.BuscoConfig.os.path.isfile", return_value=False
+ )
+ def test_batch_mode_false_with_error(self, *args):
+ config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
+ config.set = Mock()
+ with self.assertRaises(BuscoConfig.BatchFatalError):
+ config._check_batch_mode()
def test_required_input_exists_false(self):
input_filename = "test_input_file"
@@ -415,7 +438,7 @@ class TestBuscoConfig(unittest.TestCase):
self.test_params["in"] = input_filename
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
config.configure()
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config._check_required_input_exists()
@patch("__main__.BuscoConfig_unittests.BuscoConfig.BuscoDownloadManager")
@@ -435,6 +458,7 @@ class TestBuscoConfig(unittest.TestCase):
@patch.object(BuscoConfig.BuscoConfigMain, "log_config")
@patch.object(BuscoConfig.BuscoConfigMain, "_init_downloader")
+ @patch.object(BuscoConfig.BuscoConfigMain, "_check_batch_mode")
@patch.object(BuscoConfig.BuscoConfigMain, "_check_required_input_exists")
@patch.object(BuscoConfig.BuscoConfigMain, "_expand_all_paths")
@patch.object(BuscoConfig.BuscoConfigMain, "_check_evalue")
@@ -455,6 +479,7 @@ class TestBuscoConfig(unittest.TestCase):
mock_check_evalue,
mock_expand_all_paths,
mock_check_input,
+ mock_check_batch,
mock_init_downloader,
mock_log_config,
):
@@ -470,6 +495,7 @@ class TestBuscoConfig(unittest.TestCase):
mock_check_evalue.assert_called()
mock_expand_all_paths.assert_called()
mock_check_input.assert_called()
+ mock_check_batch.assert_called()
mock_init_downloader.assert_called()
mock_log_config.assert_called()
@@ -524,7 +550,7 @@ class TestBuscoConfig(unittest.TestCase):
self.test_params["datasets_version"] = "odb11"
config = BuscoConfig.BuscoConfigMain(self.base_config, self.test_params)
config.configure()
- with self.assertRaises(SystemExit):
+ with self.assertRaises(BuscoConfig.BatchFatalError):
config.check_lineage_present()
def test_set_results_dirname(self):
@@ -559,11 +585,9 @@ class TestBuscoConfig(unittest.TestCase):
BuscoConfig.BuscoConfigAuto(None, "lineage")
mock_set_dirname.assert_called_with("lineage")
- @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset_config")
- @patch("busco.BuscoConfig.BuscoConfigAuto.download_lineage_file")
+ @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset")
@patch("busco.BuscoConfig.BuscoConfig")
@patch("busco.BuscoConfig.BuscoConfigAuto._propagate_config")
- @patch("busco.BuscoConfig.BuscoConfigAuto.set_results_dirname")
@patch("busco.BuscoConfig.BuscoConfigAuto._create_required_paths")
def test_autoconfig_init_creates_paths(self, mock_create_paths, *args):
BuscoConfig.BuscoConfigAuto(None, None)
@@ -590,10 +614,8 @@ class TestBuscoConfig(unittest.TestCase):
mock_load_dataset.assert_called()
@patch("busco.BuscoConfig.BuscoConfigAuto._propagate_config")
- @patch("busco.BuscoConfig.BuscoConfigAuto.set_results_dirname")
@patch("busco.BuscoConfig.BuscoConfigAuto._create_required_paths")
- @patch("busco.BuscoConfig.BuscoConfigAuto.download_lineage_file")
- @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset_config")
+ @patch("busco.BuscoConfig.BuscoConfigAuto.load_dataset")
@patch("busco.BuscoConfig.BuscoConfig.__init__")
def test_autoconfig_init_calls_super(self, mock_config_parent, *args):
BuscoConfig.BuscoConfigAuto(None, None)
=====================================
tests/unittests/BuscoRunner_unittests.py
=====================================
@@ -0,0 +1,113 @@
+import unittest
+from unittest.mock import patch, call, Mock
+from busco import BuscoRunner, ConfigManager
+
+
+class TestAutoLineage(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_config_updated_if_lineage_missing(self, mock_config_manager, *args):
+ mock_config_manager.config_main.check_lineage_present = lambda: False
+ runner = BuscoRunner.SingleRunner(mock_config_manager)
+ analysis_runner = Mock()
+ test_dataset_path = "/path/to/lineage_dataset"
+ with patch.object(
+ runner, "auto_select_lineage", lambda: (test_dataset_path, analysis_runner)
+ ):
+ runner.get_lineage()
+ calls = [
+ call("busco_run", "lineage_dataset", test_dataset_path),
+ ]
+ mock_config_manager.config_main.set.assert_has_calls(calls, any_order=True)
+
+ @patch("busco.BuscoRunner.logger.info")
+ @patch(
+ "busco.AutoLineage.AutoSelectLineage",
+ autospec=True,
+ )
+ def test_auto_select_lineage_call_function_initializes_asl(self, mock_asl, *args):
+ config = Mock()
+ mock_asl.return_value.best_match_lineage_dataset = Mock()
+ mock_asl.return_value.selected_runner = Mock()
+ runner = BuscoRunner.SingleRunner(config)
+ runner.auto_select_lineage()
+ mock_asl.assert_called()
+
+ @patch("busco.BuscoRunner.logger.info")
+ @patch(
+ "busco.AutoLineage.AutoSelectLineage",
+ autospec=True,
+ )
+ def test_auto_select_lineage_call_function_runs_asl(self, mock_asl, *args):
+ config = Mock()
+ mock_asl.return_value.best_match_lineage_dataset = Mock()
+ mock_asl.return_value.selected_runner = Mock()
+ runner = BuscoRunner.SingleRunner(config)
+ runner.auto_select_lineage()
+ mock_asl.return_value.run_auto_selector.assert_called()
+
+ @patch("busco.BuscoRunner.logger.info")
+ @patch(
+ "busco.AutoLineage.AutoSelectLineage",
+ autospec=True,
+ )
+ def test_auto_select_lineage_call_function_gets_lineage_dataset(
+ self, mock_asl, *args
+ ):
+ config = Mock()
+ mock_asl.return_value.best_match_lineage_dataset = Mock()
+ mock_asl.return_value.selected_runner = Mock()
+ runner = BuscoRunner.SingleRunner(config)
+ runner.auto_select_lineage()
+ mock_asl.return_value.get_lineage_dataset.assert_called()
+
+ @patch("busco.BuscoRunner.logger.info")
+ @patch(
+ "busco.AutoLineage.AutoSelectLineage",
+ autospec=True,
+ )
+ def test_auto_select_lineage_call_function_returns_lineage_dataset(
+ self, mock_asl, *args
+ ):
+ config = Mock()
+ lineage_dataset = "best_match_dataset"
+ mock_asl.return_value.best_match_lineage_dataset = lineage_dataset
+ mock_asl.return_value.selected_runner = Mock()
+ runner = BuscoRunner.SingleRunner(config)
+ retval_dataset, retval_runner = runner.auto_select_lineage()
+ self.assertEqual(retval_dataset, lineage_dataset)
+
+ @patch("busco.BuscoRunner.logger.info")
+ @patch(
+ "busco.AutoLineage.AutoSelectLineage",
+ autospec=True,
+ )
+ def test_auto_select_lineage_call_function_selects_runner(self, mock_asl, *args):
+ config = Mock()
+ lineage_dataset = "best_match_dataset"
+ mock_asl.return_value.best_match_lineage_dataset = lineage_dataset
+ mock_asl.return_value.selected_runner = Mock()
+ runner = BuscoRunner.SingleRunner(config)
+ retval_dataset, retval_runner = runner.auto_select_lineage()
+ self.assertEqual(retval_runner, mock_asl.return_value.selected_runner)
+
+ @patch("busco.ConfigManager.BuscoConfigManager")
+ def test_config_set_parent_dataset_if_not_virus(self, mock_config_manager, *args):
+ test_dataset_path = "/path/to/lineage_dataset"
+ test_parent_dataset = "/path/to/parent_dataset"
+ runner = BuscoRunner.SingleRunner(mock_config_manager)
+ analysis_runner = Mock()
+ analysis_runner.config.get.side_effect = ["bacteria", test_parent_dataset]
+ with patch.object(
+ runner, "auto_select_lineage", lambda: (test_dataset_path, analysis_runner)
+ ):
+ runner.get_lineage()
+ calls = [
+ call("busco_run", "parent_dataset", test_parent_dataset),
+ ]
+ mock_config_manager.config_main.set.assert_has_calls(calls, any_order=True)
+
+ def tearDown(self):
+ pass
=====================================
tests/unittests/ConfigManager_unittests.py
=====================================
@@ -1,5 +1,5 @@
import unittest
-from unittest.mock import patch, call, Mock
+from unittest.mock import patch
from busco import ConfigManager
import os
import importlib
@@ -50,220 +50,22 @@ class TestConfigManager(unittest.TestCase):
)
@patch("__main__.ConfigManager_unittests.ConfigManager.logger", autospec=True)
- def test_config_validated(self, *args):
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- with patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain",
- autospec=True,
- ):
- config_manager.load_busco_config()
- config_manager.config.validate.assert_called()
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
- )
- def test_log_warning_if_neither_lineage_nor_autolineage_specified(
- self, mock_config_main, *args
- ):
- mock_config_main.return_value.check_lineage_present = lambda: False
- mock_config_main.return_value.getboolean.side_effect = [
- False,
- False,
- True,
- False,
- True,
- ] # These values comprise the three distinct logical paths into this part
- # of the code: run1: False, False; run2: True, (shortcircuit); run3: False, True.
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- with self.assertLogs(ConfigManager.logger, "WARNING"):
- config_manager.load_busco_config()
- with patch("busco.ConfigManager.logger.warning") as mock_logger:
- for _ in range(2):
- config_manager.load_busco_config()
- self.assertFalse(mock_logger.called)
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
- )
- def test_log_warning_if_both_lineage_and_autolineage_specified(
- self, mock_config_main, *args
- ):
- mock_config_main.return_value.check_lineage_present = lambda: True
- mock_config_main.return_value.getboolean.side_effect = [
- True,
- False,
- True,
- False,
- False,
- ]
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- for _ in range(2):
- with self.assertLogs(ConfigManager.logger, "WARNING"):
- config_manager.load_busco_config()
- with patch("busco.ConfigManager.logger.warning") as mock_logger:
- config_manager.load_busco_config()
- self.assertFalse(mock_logger.called)
-
- patch("busco.ConfigManager.logger.warning")
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
- )
- def test_config_updated_if_lineage_missing(self, mock_config_main, *args):
- mock_config_main.return_value.check_lineage_present = lambda: False
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- config_manager.load_busco_config()
- calls = [
- call("busco_run", "auto-lineage", "True"),
- call("busco_run", "lineage_dataset", test_dataset_path),
- ]
- mock_config_main.return_value.set.assert_has_calls(calls, any_order=True)
-
- @patch("busco.ConfigManager.logger.warning")
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
- )
- def test_config_updated_if_lineage_present(self, mock_config_main, *args):
- mock_config_main.return_value.check_lineage_present = lambda: True
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- config_manager.load_busco_config()
- calls = [
- call("busco_run", "auto-lineage", "False"),
- call("busco_run", "auto-lineage-prok", "False"),
- call("busco_run", "auto-lineage-euk", "False"),
- ]
- mock_config_main.return_value.set.assert_has_calls(calls, any_order=True)
-
- @patch("busco.ConfigManager.logger.warning")
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
- )
- def test_update_dirname(self, mock_config_main, *args):
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- mock_config_main.return_value.get = lambda *args: test_dataset_path
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- config_manager.load_busco_config()
- mock_config_main.return_value.set_results_dirname.assert_called_with(
- test_dataset_path
- )
-
- @patch("busco.ConfigManager.logger.warning")
@patch(
"__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
)
- def test_lineage_downloaded(self, mock_config_main, *args):
+ def test_config_main_configured(self, mock_config, *args):
config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- mock_config_main.return_value.get = lambda *args: test_dataset_path
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- config_manager.load_busco_config()
- mock_config_main.return_value.download_lineage_file.assert_called_with(
- test_dataset_path
- )
+ config_manager.load_busco_config_main()
+ mock_config.return_value.configure.assert_called()
- @patch("busco.ConfigManager.logger.warning")
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
- )
- def test_lineage_dataset_config_loaded(self, mock_config_main, *args):
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- test_dataset_path = "/path/to/lineage_dataset"
- with patch.object(
- config_manager, "auto_select_lineage", lambda: test_dataset_path
- ):
- config_manager.load_busco_config()
- mock_config_main.return_value.load_dataset_config.assert_called()
-
- @patch("busco.ConfigManager.logger.warning")
+ @patch("__main__.ConfigManager_unittests.ConfigManager.logger", autospec=True)
@patch(
"__main__.ConfigManager_unittests.ConfigManager.BuscoConfigMain", autospec=True
)
- def test_run_auto_select_if_no_lineage(self, mock_config_main, *args):
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- mock_config_main.return_value.check_lineage_present = lambda: False
- test_dataset_path = "/path/to/lineage_dataset"
- with patch.object(
- config_manager, "auto_select_lineage", return_value=test_dataset_path
- ):
- config_manager.load_busco_config()
- config_manager.auto_select_lineage.assert_called()
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
- autospec=True,
- )
- def test_auto_select_lineage_call_function_initializes_asl(self, mock_asl):
- mock_asl.return_value.best_match_lineage_dataset = Mock()
- mock_asl.return_value.selected_runner = Mock()
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- config_manager.auto_select_lineage()
- mock_asl.assert_called()
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
- autospec=True,
- )
- def test_auto_select_lineage_call_function_runs_asl(self, mock_asl):
- mock_asl.return_value.best_match_lineage_dataset = Mock()
- mock_asl.return_value.selected_runner = Mock()
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- config_manager.auto_select_lineage()
- mock_asl.return_value.run_auto_selector.assert_called()
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
- autospec=True,
- )
- def test_auto_select_lineage_call_function_gets_lineage_dataset(self, mock_asl):
- mock_asl.return_value.best_match_lineage_dataset = Mock()
- mock_asl.return_value.selected_runner = Mock()
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- config_manager.auto_select_lineage()
- mock_asl.return_value.get_lineage_dataset.assert_called()
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
- autospec=True,
- )
- def test_auto_select_lineage_call_function_returns_lineage_dataset(self, mock_asl):
- config_manager = ConfigManager.BuscoConfigManager(self.params)
- lineage_dataset = "best_match_dataset"
- mock_asl.return_value.best_match_lineage_dataset = lineage_dataset
- mock_asl.return_value.selected_runner = Mock()
- retval = config_manager.auto_select_lineage()
- self.assertEqual(retval, lineage_dataset)
-
- @patch(
- "__main__.ConfigManager_unittests.ConfigManager.AutoSelectLineage",
- autospec=True,
- )
- def test_auto_select_lineage_call_function_selects_runner(self, mock_asl):
+ def test_config_main_validated(self, mock_config, *args):
config_manager = ConfigManager.BuscoConfigManager(self.params)
- mock_asl.return_value.best_match_lineage_dataset = Mock()
- mock_asl.return_value.selected_runner = "test"
- config_manager.auto_select_lineage()
- self.assertEqual("test", config_manager.runner)
+ config_manager.load_busco_config_main()
+ mock_config.return_value.validate.assert_called()
def tearDown(self):
pass
=====================================
tests/unittests/run_BUSCO_unittests.py
=====================================
@@ -2,6 +2,7 @@ import unittest
from busco import run_BUSCO
import sys
import io
+from unittest.mock import Mock, patch, call
class TestParams(unittest.TestCase):
@@ -97,7 +98,7 @@ class TestParams(unittest.TestCase):
output_file = "output_file"
mode = "mode"
lineage_dataset = "lineage_dataset"
- cpus = "cpus"
+ cpus = 10
evalue = 0.1
arg_values = {
@@ -153,7 +154,7 @@ class TestParams(unittest.TestCase):
output_file = "output_file"
mode = "mode"
lineage_dataset = "lineage_dataset"
- cpus = "cpus"
+ cpus = 10
evalue = 0.1
limit = 1
augustus_parameters = "augustus_parameters"
@@ -235,5 +236,97 @@ class TestParams(unittest.TestCase):
}
self.assertDictEqual(params, correct_parse)
+ def test_command_line_cpu_type(self):
+ bad_args_cpu = ["cpus", "1.5", None]
+
+ for arg in bad_args_cpu:
+ sys.argv[1:] = ["--cpu", arg]
+ with self.assertRaises(SystemExit) as cm:
+ captured_output = io.StringIO()
+ sys.stderr = captured_output
+ try:
+ run_BUSCO._parse_args()
+ finally:
+ sys.stderr = sys.__stderr__
+ self.assertEqual(cm.exception.code, 2)
+
+ def test_command_line_evalue_type(self):
+ bad_args_evalue = ["evalue", None]
+
+ for arg in bad_args_evalue:
+ sys.argv[1:] = ["--evalue", arg]
+ with self.assertRaises(SystemExit) as cm:
+ captured_output = io.StringIO()
+ sys.stderr = captured_output
+ try:
+ run_BUSCO._parse_args()
+ finally:
+ sys.stderr = sys.__stderr__
+ self.assertEqual(cm.exception.code, 2)
+
+ def test_command_line_limit_type(self):
+ bad_args_limit = ["limit", "1.5", None]
+ for arg in bad_args_limit:
+ sys.argv[1:] = ["--limit", arg]
+ with self.assertRaises(SystemExit) as cm:
+ captured_output = io.StringIO()
+ sys.stderr = captured_output
+ try:
+ run_BUSCO._parse_args()
+ finally:
+ sys.stderr = sys.__stderr__
+ self.assertEqual(cm.exception.code, 2)
+
def tearDown(self):
sys.argv = [sys.argv[0]]
+
+
+class TestMaster(unittest.TestCase):
+ def setUp(self):
+ self.maxDiff = None
+ self.params = {}
+ pass
+
+ @patch("busco.run_BUSCO.logger.info")
+ def test_log_warning_if_neither_lineage_nor_autolineage_specified(self, *args):
+ bm = run_BUSCO.BuscoMaster(self.params)
+ bm.config = Mock()
+ bm.config.check_lineage_present.return_value = False
+ bm.config.getboolean.side_effect = [False, False, False]
+ with self.assertLogs(run_BUSCO.logger, "WARNING"):
+ bm.harmonize_auto_lineage_settings()
+
+ @patch("busco.run_BUSCO.logger.info")
+ def test_config_updated_if_no_lineage(self, *args):
+ bm = run_BUSCO.BuscoMaster(self.params)
+ bm.config = Mock()
+ bm.config.check_lineage_present.return_value = False
+ calls = [call("busco_run", "auto-lineage", "True")]
+ bm.harmonize_auto_lineage_settings()
+ bm.config.set.assert_has_calls(calls, any_order=True)
+
+ @patch("busco.run_BUSCO.logger.info")
+ def test_config_updated_if_lineage_present(self, *args):
+ bm = run_BUSCO.BuscoMaster(self.params)
+ bm.config = Mock()
+ bm.config.check_lineage_present.return_value = True
+ bm.config.getboolean.side_effect = [False, False, False]
+ calls = [
+ call("busco_run", "auto-lineage", "False"),
+ call("busco_run", "auto-lineage-prok", "False"),
+ call("busco_run", "auto-lineage-euk", "False"),
+ ]
+ bm.harmonize_auto_lineage_settings()
+ bm.config.set.assert_has_calls(calls, any_order=True)
+
+ @patch("busco.run_BUSCO.logger.info")
+ def test_log_warning_if_both_lineage_and_autolineage_specified(self, *args):
+ bm = run_BUSCO.BuscoMaster(self.params)
+ bm.config = Mock()
+ bm.config.check_lineage_present.return_value = True
+ bm.config.getboolean.return_value = True
+ with self.assertLogs(run_BUSCO.logger, "WARNING"):
+ bm.harmonize_auto_lineage_settings()
+
+ def tearDown(self):
+ pass
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/eb1970a6ab1bc65bdac8324e311e4d1232e8c9cc
--
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/eb1970a6ab1bc65bdac8324e311e4d1232e8c9cc
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210420/c862acf3/attachment-0001.htm>
More information about the debian-med-commit
mailing list