[med-svn] [Git][med-team/busco][upstream] New upstream version 5.1.3
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Thu May 27 23:16:44 BST 2021
Nilesh Patra pushed to branch upstream at Debian Med / busco
Commits:
bf339030 by Nilesh Patra at 2021-05-28T03:44:21+05:30
New upstream version 5.1.3
- - - - -
8 changed files:
- CHANGELOG
- bin/busco
- src/busco/BuscoRunner.py
- src/busco/_version.py
- src/busco/analysis/GenomeAnalysis.py
- src/busco/busco_tools/augustus.py
- src/busco/busco_tools/hmmer.py
- src/busco/busco_tools/metaeuk.py
Changes:
=====================================
CHANGELOG
=====================================
@@ -1,3 +1,7 @@
+5.1.3
+- Issue #408 fixed
+- Bug fixes
+
5.1.2
- Fix bug in batch mode that affects Augustus runs
=====================================
bin/busco
=====================================
@@ -7,24 +7,35 @@ if __name__ == "__main__":
except ImportError as err:
try:
import re
- pattern_search = re.search("cannot import name '(?P<module_name>[\w]+)", err.msg)
+
+ pattern_search = re.search(
+ "cannot import name '(?P<module_name>[\w]+)", err.msg
+ )
missing_module = pattern_search.group("module_name")
if missing_module == "run_BUSCO":
- print("BUSCO must be installed before it is run. Please enter 'python setup.py install (--user)'. "
- "See the user guide for more information.")
+ print(
+ "BUSCO must be installed before it is run. Please enter 'python setup.py install (--user)'. "
+ "See the user guide for more information."
+ )
elif missing_module == "Bio":
- print("Please install BioPython (https://biopython.org/) before running BUSCO.")
+ print(
+ "Please install BioPython (https://biopython.org/) before running BUSCO."
+ )
elif missing_module == "numpy":
print("Please install NumPy before running BUSCO.")
else:
- print("Unable to find module {}. Please make sure it is installed. See the user guide and the GitLab issue "
- "board (https://gitlab.com/ezlab/busco/issues) if you need further assistance."
- "".format(missing_module))
+ print(
+ "Unable to find module {}. Please make sure it is installed. See the user guide and the GitLab issue "
+ "board (https://gitlab.com/ezlab/busco/issues) if you need further assistance."
+ "".format(missing_module)
+ )
except:
print(err.msg)
- print("There was a problem installing BUSCO or importing one of its dependencies. See the user guide and the "
- "GitLab issue board (https://gitlab.com/ezlab/busco/issues) if you need further assistance.")
+ print(
+ "There was a problem installing BUSCO or importing one of its dependencies. See the user guide and the "
+ "GitLab issue board (https://gitlab.com/ezlab/busco/issues) if you need further assistance."
+ )
raise SystemExit(1)
run_BUSCO.main()
=====================================
src/busco/BuscoRunner.py
=====================================
@@ -132,12 +132,11 @@ class SingleRunner:
raise BatchFatalError(e)
except KeyboardInterrupt:
- logger.exception(
+ raise BatchFatalError(
"A signal was sent to kill the process. \nBUSCO analysis failed !"
)
- raise BatchFatalError
- except BaseException:
+ except BaseException as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
logger.critical(
"Unhandled exception occurred:\n{}\n".format(
@@ -146,7 +145,7 @@ class SingleRunner:
)
)
)
- raise BatchFatalError
+ raise BatchFatalError(e)
class BatchRunner:
=====================================
src/busco/_version.py
=====================================
@@ -6,4 +6,4 @@ Copyright (c) 2016-2021, Evgeny Zdobnov (ez at ezlab.org)
Licensed under the MIT license. See LICENSE.md file.
"""
-__version__ = "5.1.2"
+__version__ = "5.1.3"
=====================================
src/busco/analysis/GenomeAnalysis.py
=====================================
@@ -222,11 +222,10 @@ class GenomeAnalysisEukaryotesAugustus(BLASTAnalysis, GenomeAnalysisEukaryotes):
def cleanup(self):
try:
- if self._target_species.startswith("BUSCO"):
- self.augustus_runner.move_retraining_parameters()
- self.config.set(
- "busco_run", "augustus_species", self._target_species_initial
- ) # Reset parameter for batch mode
+ self.augustus_runner.move_retraining_parameters()
+ self.config.set(
+ "busco_run", "augustus_species", self._target_species_initial
+ ) # Reset parameter for batch mode
except OSError:
pass
super().cleanup()
@@ -437,11 +436,8 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
def validate_output(self):
if len(self.metaeuk_runner.headers_files) < 2:
return
- hmmer_results = {
- **self.hmmer_runner.is_complete,
- **self.hmmer_runner.is_very_large,
- **self.hmmer_runner.is_fragment,
- }
+ hmmer_results = self.hmmer_runner.merge_dicts()
+
if len(hmmer_results) > 0:
exon_records = self.get_exon_records(hmmer_results)
df = self.exons_to_df(exon_records)
@@ -542,6 +538,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
strand,
score,
run_found,
+ gene_id,
)
exon_records.append(record)
return exon_records
@@ -555,10 +552,12 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
busco_group = busco_groups.get_group(busco_id)
except KeyError: # if busco was removed during overlap filtering
continue
- busco_score_groups = busco_group.groupby(["Score"])
- for _, busco_score_group in busco_score_groups:
+ busco_gene_groups = busco_group.groupby("Orig gene ID")
+ for gene_match, busco_gene_group in busco_gene_groups:
+ if gene_match not in matches:
+ continue
min_coord = None
- for idx, row in busco_score_group.iterrows():
+ for idx, row in busco_gene_group.iterrows():
low_coord = row["Start"]
high_coord = row["Stop"]
score = row["Score"]
@@ -569,19 +568,22 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
else:
min_coord = low_coord
max_coord = high_coord
- for gene_match, details in matches.items():
- if details[0]["bitscore"] == score:
- new_gene_match = "{}:{}-{}".format(seq, min_coord, max_coord)
- hmmer_result_dict_new[busco_id].update(
- {new_gene_match: details}
- )
- matched_genes_new[new_gene_match].append(busco_id)
- self.gene_details[new_gene_match] = self.gene_details[
- gene_match
- ]
- self.sequences_aa[
- new_gene_match
- ] = self.metaeuk_runner.sequences_aa[gene_match]
+
+ details = matches[gene_match]
+ df_strand = busco_gene_group["Strand"].iloc[0]
+ new_gene_match = "{}:{}-{}".format(seq, min_coord, max_coord)
+ hmmer_result_dict_new[busco_id].update({new_gene_match: details})
+ matched_genes_new[new_gene_match].append(busco_id)
+ self.gene_details[new_gene_match] = [
+ {
+ "gene_start": min_coord,
+ "gene_end": max_coord,
+ "strand": df_strand,
+ }
+ ]
+ self.sequences_aa[new_gene_match] = self.metaeuk_runner.sequences_aa[
+ gene_match
+ ]
return hmmer_result_dict_new, matched_genes_new
@log("Validating exons and removing overlapping matches", logger)
=====================================
src/busco/busco_tools/augustus.py
=====================================
@@ -72,7 +72,13 @@ class AugustusRunner(BaseRunner):
"AUGUSTUS_CONFIG_PATH environment variable has not been set"
)
- self._target_species = self.config.get("busco_run", "augustus_species")
+ try:
+ self._target_species = self.config.get("busco_run", "augustus_species")
+ except KeyError:
+ raise SystemExit(
+ "Something went wrong. Eukaryota datasets should specify an augustus species."
+ )
+
super().__init__()
self._output_folder = os.path.join(self.run_folder, "augustus_output")
self.tmp_dir = os.path.join(self._output_folder, "tmp")
@@ -591,22 +597,23 @@ class AugustusRunner(BaseRunner):
This function moves retraining parameters from augustus species folder
to the run folder
"""
- augustus_species_path = os.path.join(
- self._augustus_config_path, "species", self._target_species
- )
- if os.path.exists(augustus_species_path):
- new_path = os.path.join(
- self._output_folder, "retraining_parameters", self._target_species
- )
- shutil.move(augustus_species_path, new_path)
- elif self.config.getboolean("busco_run", "restart") and os.path.exists(
- os.path.join(
- self._output_folder, "retraining_parameters", self._target_species
+ if self._target_species.startswith("BUSCO"):
+ augustus_species_path = os.path.join(
+ self._augustus_config_path, "species", self._target_species
)
- ):
- pass
- else:
- logger.warning("Augustus did not produce a retrained species folder.")
+ if os.path.exists(augustus_species_path):
+ new_path = os.path.join(
+ self._output_folder, "retraining_parameters", self._target_species
+ )
+ shutil.move(augustus_species_path, new_path)
+ elif self.config.getboolean("busco_run", "restart") and os.path.exists(
+ os.path.join(
+ self._output_folder, "retraining_parameters", self._target_species
+ )
+ ):
+ pass
+ else:
+ logger.warning("Augustus did not produce a retrained species folder.")
return
=====================================
src/busco/busco_tools/hmmer.py
=====================================
@@ -268,6 +268,13 @@ class HMMERRunner(BaseRunner):
total_len[entry] += hit[1] - hit[0]
return total_len
+ def merge_dicts(self):
+ merged_dict = defaultdict(lambda: defaultdict(list))
+ for hmmer_dict in [self.is_complete, self.is_very_large, self.is_fragment]:
+ for busco_id, busco_matches in hmmer_dict.items():
+ merged_dict[busco_id].update(busco_matches)
+ return merged_dict
+
def parse_hmmer_output(self, filename, busco_query):
"""
Read and parse HMMER output file.
@@ -701,11 +708,14 @@ class HMMERRunner(BaseRunner):
)
elif self.mode == "genome":
scaffold = self.gene_details[gene_id][m]
- location_pattern = ":{}-{}".format(
- scaffold["gene_start"], scaffold["gene_end"]
- )
- if gene_id.endswith(location_pattern):
- gene_id = gene_id.replace(location_pattern, "")
+ if self.domain == "eukaryota":
+ location_pattern = ":{}-{}".format(
+ scaffold["gene_start"], scaffold["gene_end"]
+ )
+ if gene_id.endswith(location_pattern):
+ gene_id = gene_id.replace(location_pattern, "")
+ else: # Remove suffix assigned by Prodigal
+ gene_id = gene_id.rsplit("_", 1)[0]
try:
desc = links_info[busco]["description"]
link = links_info[busco]["link"]
=====================================
src/busco/busco_tools/metaeuk.py
=====================================
@@ -264,6 +264,7 @@ class MetaeukRunner(BaseRunner):
"Strand",
"Score",
"Run found",
+ "Orig gene ID",
],
index=np.arange(len(records)),
)
@@ -594,6 +595,7 @@ class MetaeukRunner(BaseRunner):
header_details["S"],
header_details["bitscore"],
None,
+ None,
)
all_headers.append(header_df_info)
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/bf33903040218db1489ef06ac664b0808476da27
--
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/bf33903040218db1489ef06ac664b0808476da27
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210527/e1ce2b8b/attachment-0001.htm>
More information about the debian-med-commit
mailing list