[med-svn] [Git][med-team/busco][upstream] New upstream version 5.1.3

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Thu May 27 23:16:44 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / busco


Commits:
bf339030 by Nilesh Patra at 2021-05-28T03:44:21+05:30
New upstream version 5.1.3
- - - - -


8 changed files:

- CHANGELOG
- bin/busco
- src/busco/BuscoRunner.py
- src/busco/_version.py
- src/busco/analysis/GenomeAnalysis.py
- src/busco/busco_tools/augustus.py
- src/busco/busco_tools/hmmer.py
- src/busco/busco_tools/metaeuk.py


Changes:

=====================================
CHANGELOG
=====================================
@@ -1,3 +1,7 @@
+5.1.3
+- Issue #408 fixed
+- Bug fixes
+
 5.1.2
 - Fix bug in batch  mode that affects Augustus runs
 


=====================================
bin/busco
=====================================
@@ -7,24 +7,35 @@ if __name__ == "__main__":
     except ImportError as err:
         try:
             import re
-            pattern_search = re.search("cannot import name '(?P<module_name>[\w]+)", err.msg)
+
+            pattern_search = re.search(
+                "cannot import name '(?P<module_name>[\w]+)", err.msg
+            )
             missing_module = pattern_search.group("module_name")
             if missing_module == "run_BUSCO":
-                print("BUSCO must be installed before it is run. Please enter 'python setup.py install (--user)'. "
-                      "See the user guide for more information.")
+                print(
+                    "BUSCO must be installed before it is run. Please enter 'python setup.py install (--user)'. "
+                    "See the user guide for more information."
+                )
             elif missing_module == "Bio":
-                print("Please install BioPython (https://biopython.org/) before running BUSCO.")
+                print(
+                    "Please install BioPython (https://biopython.org/) before running BUSCO."
+                )
             elif missing_module == "numpy":
                 print("Please install NumPy before running BUSCO.")
             else:
-                print("Unable to find module {}. Please make sure it is installed. See the user guide and the GitLab issue "
-                      "board (https://gitlab.com/ezlab/busco/issues) if you need further assistance."
-                      "".format(missing_module))
+                print(
+                    "Unable to find module {}. Please make sure it is installed. See the user guide and the GitLab issue "
+                    "board (https://gitlab.com/ezlab/busco/issues) if you need further assistance."
+                    "".format(missing_module)
+                )
 
         except:
             print(err.msg)
-            print("There was a problem installing BUSCO or importing one of its dependencies. See the user guide and the "
-                  "GitLab issue board (https://gitlab.com/ezlab/busco/issues) if you need further assistance.")
+            print(
+                "There was a problem installing BUSCO or importing one of its dependencies. See the user guide and the "
+                "GitLab issue board (https://gitlab.com/ezlab/busco/issues) if you need further assistance."
+            )
         raise SystemExit(1)
 
     run_BUSCO.main()


=====================================
src/busco/BuscoRunner.py
=====================================
@@ -132,12 +132,11 @@ class SingleRunner:
             raise BatchFatalError(e)
 
         except KeyboardInterrupt:
-            logger.exception(
+            raise BatchFatalError(
                 "A signal was sent to kill the process. \nBUSCO analysis failed !"
             )
-            raise BatchFatalError
 
-        except BaseException:
+        except BaseException as e:
             exc_type, exc_value, exc_traceback = sys.exc_info()
             logger.critical(
                 "Unhandled exception occurred:\n{}\n".format(
@@ -146,7 +145,7 @@ class SingleRunner:
                     )
                 )
             )
-            raise BatchFatalError
+            raise BatchFatalError(e)
 
 
 class BatchRunner:


=====================================
src/busco/_version.py
=====================================
@@ -6,4 +6,4 @@ Copyright (c) 2016-2021, Evgeny Zdobnov (ez at ezlab.org)
 Licensed under the MIT license. See LICENSE.md file.
 
 """
-__version__ = "5.1.2"
+__version__ = "5.1.3"


=====================================
src/busco/analysis/GenomeAnalysis.py
=====================================
@@ -222,11 +222,10 @@ class GenomeAnalysisEukaryotesAugustus(BLASTAnalysis, GenomeAnalysisEukaryotes):
 
     def cleanup(self):
         try:
-            if self._target_species.startswith("BUSCO"):
-                self.augustus_runner.move_retraining_parameters()
-                self.config.set(
-                    "busco_run", "augustus_species", self._target_species_initial
-                )  # Reset parameter for batch mode
+            self.augustus_runner.move_retraining_parameters()
+            self.config.set(
+                "busco_run", "augustus_species", self._target_species_initial
+            )  # Reset parameter for batch mode
         except OSError:
             pass
         super().cleanup()
@@ -437,11 +436,8 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
     def validate_output(self):
         if len(self.metaeuk_runner.headers_files) < 2:
             return
-        hmmer_results = {
-            **self.hmmer_runner.is_complete,
-            **self.hmmer_runner.is_very_large,
-            **self.hmmer_runner.is_fragment,
-        }
+        hmmer_results = self.hmmer_runner.merge_dicts()
+
         if len(hmmer_results) > 0:
             exon_records = self.get_exon_records(hmmer_results)
             df = self.exons_to_df(exon_records)
@@ -542,6 +538,7 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
                             strand,
                             score,
                             run_found,
+                            gene_id,
                         )
                         exon_records.append(record)
         return exon_records
@@ -555,10 +552,12 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
                 busco_group = busco_groups.get_group(busco_id)
             except KeyError:  # if busco was removed during overlap filtering
                 continue
-            busco_score_groups = busco_group.groupby(["Score"])
-            for _, busco_score_group in busco_score_groups:
+            busco_gene_groups = busco_group.groupby("Orig gene ID")
+            for gene_match, busco_gene_group in busco_gene_groups:
+                if gene_match not in matches:
+                    continue
                 min_coord = None
-                for idx, row in busco_score_group.iterrows():
+                for idx, row in busco_gene_group.iterrows():
                     low_coord = row["Start"]
                     high_coord = row["Stop"]
                     score = row["Score"]
@@ -569,19 +568,22 @@ class GenomeAnalysisEukaryotesMetaeuk(GenomeAnalysisEukaryotes):
                     else:
                         min_coord = low_coord
                         max_coord = high_coord
-                for gene_match, details in matches.items():
-                    if details[0]["bitscore"] == score:
-                        new_gene_match = "{}:{}-{}".format(seq, min_coord, max_coord)
-                        hmmer_result_dict_new[busco_id].update(
-                            {new_gene_match: details}
-                        )
-                        matched_genes_new[new_gene_match].append(busco_id)
-                        self.gene_details[new_gene_match] = self.gene_details[
-                            gene_match
-                        ]
-                        self.sequences_aa[
-                            new_gene_match
-                        ] = self.metaeuk_runner.sequences_aa[gene_match]
+
+                details = matches[gene_match]
+                df_strand = busco_gene_group["Strand"].iloc[0]
+                new_gene_match = "{}:{}-{}".format(seq, min_coord, max_coord)
+                hmmer_result_dict_new[busco_id].update({new_gene_match: details})
+                matched_genes_new[new_gene_match].append(busco_id)
+                self.gene_details[new_gene_match] = [
+                    {
+                        "gene_start": min_coord,
+                        "gene_end": max_coord,
+                        "strand": df_strand,
+                    }
+                ]
+                self.sequences_aa[new_gene_match] = self.metaeuk_runner.sequences_aa[
+                    gene_match
+                ]
         return hmmer_result_dict_new, matched_genes_new
 
     @log("Validating exons and removing overlapping matches", logger)


=====================================
src/busco/busco_tools/augustus.py
=====================================
@@ -72,7 +72,13 @@ class AugustusRunner(BaseRunner):
                 "AUGUSTUS_CONFIG_PATH environment variable has not been set"
             )
 
-        self._target_species = self.config.get("busco_run", "augustus_species")
+        try:
+            self._target_species = self.config.get("busco_run", "augustus_species")
+        except KeyError:
+            raise SystemExit(
+                "Something went wrong. Eukaryota datasets should specify an augustus species."
+            )
+
         super().__init__()
         self._output_folder = os.path.join(self.run_folder, "augustus_output")
         self.tmp_dir = os.path.join(self._output_folder, "tmp")
@@ -591,22 +597,23 @@ class AugustusRunner(BaseRunner):
         This function moves retraining parameters from augustus species folder
         to the run folder
         """
-        augustus_species_path = os.path.join(
-            self._augustus_config_path, "species", self._target_species
-        )
-        if os.path.exists(augustus_species_path):
-            new_path = os.path.join(
-                self._output_folder, "retraining_parameters", self._target_species
-            )
-            shutil.move(augustus_species_path, new_path)
-        elif self.config.getboolean("busco_run", "restart") and os.path.exists(
-            os.path.join(
-                self._output_folder, "retraining_parameters", self._target_species
+        if self._target_species.startswith("BUSCO"):
+            augustus_species_path = os.path.join(
+                self._augustus_config_path, "species", self._target_species
             )
-        ):
-            pass
-        else:
-            logger.warning("Augustus did not produce a retrained species folder.")
+            if os.path.exists(augustus_species_path):
+                new_path = os.path.join(
+                    self._output_folder, "retraining_parameters", self._target_species
+                )
+                shutil.move(augustus_species_path, new_path)
+            elif self.config.getboolean("busco_run", "restart") and os.path.exists(
+                os.path.join(
+                    self._output_folder, "retraining_parameters", self._target_species
+                )
+            ):
+                pass
+            else:
+                logger.warning("Augustus did not produce a retrained species folder.")
         return
 
 


=====================================
src/busco/busco_tools/hmmer.py
=====================================
@@ -268,6 +268,13 @@ class HMMERRunner(BaseRunner):
                 total_len[entry] += hit[1] - hit[0]
         return total_len
 
+    def merge_dicts(self):
+        merged_dict = defaultdict(lambda: defaultdict(list))
+        for hmmer_dict in [self.is_complete, self.is_very_large, self.is_fragment]:
+            for busco_id, busco_matches in hmmer_dict.items():
+                merged_dict[busco_id].update(busco_matches)
+        return merged_dict
+
     def parse_hmmer_output(self, filename, busco_query):
         """
         Read and parse HMMER output file.
@@ -701,11 +708,14 @@ class HMMERRunner(BaseRunner):
                             )
                     elif self.mode == "genome":
                         scaffold = self.gene_details[gene_id][m]
-                        location_pattern = ":{}-{}".format(
-                            scaffold["gene_start"], scaffold["gene_end"]
-                        )
-                        if gene_id.endswith(location_pattern):
-                            gene_id = gene_id.replace(location_pattern, "")
+                        if self.domain == "eukaryota":
+                            location_pattern = ":{}-{}".format(
+                                scaffold["gene_start"], scaffold["gene_end"]
+                            )
+                            if gene_id.endswith(location_pattern):
+                                gene_id = gene_id.replace(location_pattern, "")
+                        else:  # Remove suffix assigned by Prodigal
+                            gene_id = gene_id.rsplit("_", 1)[0]
                         try:
                             desc = links_info[busco]["description"]
                             link = links_info[busco]["link"]


=====================================
src/busco/busco_tools/metaeuk.py
=====================================
@@ -264,6 +264,7 @@ class MetaeukRunner(BaseRunner):
                 "Strand",
                 "Score",
                 "Run found",
+                "Orig gene ID",
             ],
             index=np.arange(len(records)),
         )
@@ -594,6 +595,7 @@ class MetaeukRunner(BaseRunner):
                         header_details["S"],
                         header_details["bitscore"],
                         None,
+                        None,
                     )
                     all_headers.append(header_df_info)
 



View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/bf33903040218db1489ef06ac664b0808476da27

-- 
View it on GitLab: https://salsa.debian.org/med-team/busco/-/commit/bf33903040218db1489ef06ac664b0808476da27
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210527/e1ce2b8b/attachment-0001.htm>


More information about the debian-med-commit mailing list