[med-svn] [Git][med-team/flye][upstream] New upstream version 2.9.4+dfsg

Alexandre Detiste (@detiste-guest) gitlab at salsa.debian.org
Wed May 22 14:32:34 BST 2024



Alexandre Detiste pushed to branch upstream at Debian Med / flye


Commits:
ebaf557e by Alexandre Detiste at 2024-05-22T14:18:47+02:00
New upstream version 2.9.4+dfsg
- - - - -


13 changed files:

- README.md
- docs/NEWS.md
- docs/USAGE.md
- flye/__build__.py
- flye/__version__.py
- flye/main.py
- flye/polishing/bubbles.py
- flye/polishing/polish.py
- flye/utils/sam_parser.py
- src/polishing/bubble_processor.cpp
- src/polishing/bubble_processor.h
- src/polishing/subs_matrix.cpp
- src/polishing/subs_matrix.h


Changes:

=====================================
README.md
=====================================
@@ -3,7 +3,7 @@ Flye assembler
 
 [![BioConda Install](https://img.shields.io/conda/dn/bioconda/flye.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/flye)
 
-### Version: 2.9.3
+### Version: 2.9.4
 
 Flye is a de novo assembler for single-molecule sequencing reads,
 such as those produced by PacBio and Oxford Nanopore Technologies.
@@ -178,7 +178,7 @@ Publications
 Mikhail Kolmogorov, Derek M. Bickhart, Bahar Behsaz, Alexey Gurevich, Mikhail Rayko, Sung Bong
 Shin, Kristen Kuhn, Jeffrey Yuan, Evgeny Polevikov, Timothy P. L. Smith and Pavel A. Pevzner
 "metaFlye: scalable long-read metagenome assembly using repeat graphs", Nature Methods, 2020
-[doi:s41592-020-00971-x](https://doi.org/10.1038/s41592-020-00971-x)
+[doi:10.1038/s41592-020-00971-x](https://doi.org/10.1038/s41592-020-00971-x)
 
 Mikhail Kolmogorov, Jeffrey Yuan, Yu Lin and Pavel Pevzner, 
 "Assembly of Long Error-Prone Reads Using Repeat Graphs", Nature Biotechnology, 2019


=====================================
docs/NEWS.md
=====================================
@@ -1,3 +1,7 @@
+Flye 2.9.4 release (14 May 2024)
+===============================
+* Minor technical changes
+
 Flye 2.9.3 release (28 November 2023)
 ====================================
 * Disjointig step speedup for `--nano-hq` mode


=====================================
docs/USAGE.md
=====================================
@@ -316,7 +316,7 @@ Scaffold gaps are marked with `??` symbols, and `*` symbol denotes a
 terminal graph node.
 
 Alternative contigs (representing alternative haplotypes) will have the same
-alt. group ID. Primary contigs are marked by `*`. Note that the ouptut of
+alt. group ID. Primary contigs are marked by `*`. Note that the outptut of
 alternative contigs could be disabled via the `--no-alt-contigs` option.
 
 ## <a name="graph"></a> Repeat graph


=====================================
flye/__build__.py
=====================================
@@ -1 +1 @@
-__build__ = 1797
+__build__ = 1799


=====================================
flye/__version__.py
=====================================
@@ -1 +1 @@
-__version__ = "2.9.3"
+__version__ = "2.9.4"


=====================================
flye/main.py
=====================================
@@ -406,12 +406,13 @@ def _set_genome_size(args):
         args.genome_size = human2bytes(args.genome_size.upper())
 
 
-def _run_polisher_only(args):
+def _run_polisher_only(args, output_progress=True):
     """
     Runs standalone polisher
     """
-    logger.info("Running Flye polisher")
-    logger.debug("Cmd: %s", " ".join(sys.argv))
+    if output_progress:
+        logger.info("Running Flye polisher")
+        logger.debug("Cmd: %s", " ".join(sys.argv))
     bam_input = False
 
     for read_file in args.reads:
@@ -434,8 +435,9 @@ def _run_polisher_only(args):
 
     pol.polish(args.polish_target, args.reads, args.out_dir,
                args.num_iters, args.threads, args.platform,
-               args.read_type, output_progress=True)
-    logger.info("Done!")
+               args.read_type, output_progress)
+    if output_progress:
+        logger.info("Done!")
 
 
 def _run(args):


=====================================
flye/polishing/bubbles.py
=====================================
@@ -12,6 +12,7 @@ import logging
 from bisect import bisect
 from flye.six.moves import range
 from collections import defaultdict
+from queue import Queue
 
 import multiprocessing
 import traceback
@@ -93,11 +94,16 @@ def _thread_worker(aln_reader, chunk_feeder, contigs_info, err_mode,
             for b in ctg_bubbles:
                 b.position += ctg_region.start
 
-            with bubbles_file_lock:
-                _output_bubbles(ctg_bubbles, open(bubbles_file, "a"))
+            if bubbles_file_lock:
+                bubbles_file_lock.acquire()
+
+            _output_bubbles(ctg_bubbles, open(bubbles_file, "a"))
             results_queue.put((ctg_id, len(ctg_bubbles), num_long_bubbles,
                                num_empty, num_long_branch, aln_errors,
                                mean_cov))
+            
+            if bubbles_file_lock:
+                bubbles_file_lock.release()
 
             del profile
             del ctg_bubbles
@@ -116,20 +122,26 @@ def make_bubbles(alignment_path, contigs_info, contigs_path,
     CHUNK_SIZE = 1000000
 
     contigs_fasta = fp.read_sequence_dict(contigs_path)
-    manager = multiprocessing.Manager()
+    manager = None if num_proc == 1 else multiprocessing.Manager()
     aln_reader = SynchronizedSamReader(alignment_path, contigs_fasta, manager,
                                        cfg.vals["max_read_coverage"], use_secondary=True)
     chunk_feeder = SynchonizedChunkManager(contigs_fasta, manager, chunk_size=CHUNK_SIZE)
 
-    results_queue = manager.Queue()
-    error_queue = manager.Queue()
-    bubbles_out_lock = multiprocessing.Lock()
-    #bubbles_out_handle = open(bubbles_out, "w")
+    if manager:
+        results_queue = manager.Queue()
+        error_queue = manager.Queue()
+        bubbles_out_lock = multiprocessing.Lock()
 
-    process_in_parallel(_thread_worker, (aln_reader, chunk_feeder, contigs_info, err_mode,
+        process_in_parallel(_thread_worker, (aln_reader, chunk_feeder, contigs_info, err_mode,
                          results_queue, error_queue, bubbles_out, bubbles_out_lock), num_proc)
-    #_thread_worker(aln_reader, chunk_feeder, contigs_info, err_mode,
-    #               results_queue, error_queue, bubbles_out, bubbles_out_lock)
+    else:
+        results_queue = Queue()
+        error_queue = Queue()
+        bubbles_out_lock = None
+
+        _thread_worker(aln_reader, chunk_feeder, contigs_info, err_mode,
+                results_queue, error_queue, bubbles_out, bubbles_out_lock)
+        
     if not error_queue.empty():
         raise error_queue.get()
 


=====================================
flye/polishing/polish.py
=====================================
@@ -104,6 +104,7 @@ def polish(contig_seqs, read_seqs, work_dir, num_iters, num_threads, read_platfo
                 logger.disabled = logger_state
             open(stats_file, "w").write("#seq_name\tlength\tcoverage\n")
             open(polished_file, "w")
+            gzip.open(bed_coverage, "wt")
             return polished_file, stats_file
 
         #####


=====================================
flye/utils/sam_parser.py
=====================================
@@ -137,8 +137,12 @@ class SynchonizedChunkManager(object):
         #will be shared between processes
         #self.shared_manager = multiprocessing.Manager()
         self.shared_num_jobs = multiprocessing.Value(ctypes.c_int, 0)
-        self.shared_lock = multiproc_manager.Lock()
-        self.shared_eof = multiprocessing.Value(ctypes.c_bool, False)
+        if multiproc_manager:
+            self.shared_lock = multiproc_manager.Lock()
+            self.shared_eof = multiprocessing.Value(ctypes.c_bool, False)
+        else:
+            self.shared_lock = None
+            self.shared_eof = ctypes.c_bool(False)
 
 
         for ctg_id in reference_fasta:
@@ -161,15 +165,22 @@ class SynchonizedChunkManager(object):
     def get_chunk(self):
         job_id = None
         while True:
-            with self.shared_lock:
-                if self.shared_eof.value:
-                    return None
-
-                job_id = self.shared_num_jobs.value
-                self.shared_num_jobs.value = self.shared_num_jobs.value + 1
-                if self.shared_num_jobs.value == len(self.fetch_list):
-                    self.shared_eof.value = True
-                break
+            if self.shared_lock:
+                self.shared_lock.acquire()
+
+            if self.shared_eof.value:
+                if self.shared_lock:
+                    self.shared_lock.release()
+                return None
+            
+            job_id = self.shared_num_jobs.value
+            self.shared_num_jobs.value = self.shared_num_jobs.value + 1
+            if self.shared_num_jobs.value == len(self.fetch_list):
+                self.shared_eof.value = True
+
+            if self.shared_lock:
+                self.shared_lock.release()
+            break
 
             time.sleep(0.01)
 
@@ -197,7 +208,7 @@ class SynchronizedSamReader(object):
         self.cigar_parser = re.compile(b"[0-9]+[MIDNSHP=X]")
 
         #self.shared_manager = multiprocessing.Manager()
-        self.ref_fasta = multiproc_manager.dict()
+        self.ref_fasta = dict() if multiproc_manager == None else multiproc_manager.dict()
         for (h, s) in iteritems(reference_fasta):
             self.ref_fasta[_BYTES(h)] = _BYTES(s)
 


=====================================
src/polishing/bubble_processor.cpp
=====================================
@@ -21,14 +21,14 @@ namespace
 BubbleProcessor::BubbleProcessor(const std::string& subsMatPath,
 								 const std::string& hopoMatrixPath,
 								 bool showProgress, bool hopoEnabled):
+	_hopoEnabled(hopoEnabled),
 	_subsMatrix(subsMatPath),
-	_hopoMatrix(hopoMatrixPath),
+	_hopoMatrix(hopoMatrixPath, _hopoEnabled),
 	_generalPolisher(_subsMatrix),
 	_homoPolisher(_subsMatrix, _hopoMatrix),
 	_dinucFixer(_subsMatrix),
 	_verbose(false),
-	_showProgress(showProgress),
-	_hopoEnabled(hopoEnabled)
+	_showProgress(showProgress)
 {
 }
 


=====================================
src/polishing/bubble_processor.h
=====================================
@@ -37,6 +37,10 @@ private:
 
 	const int BUBBLES_CACHE = 100;
 
+	bool					  _verbose;
+	bool 					  _showProgress;
+	bool					  _hopoEnabled;
+
 	const SubstitutionMatrix  _subsMatrix;
 	const HopoMatrix 		  _hopoMatrix;
 	const GeneralPolisher 	  _generalPolisher;
@@ -50,7 +54,4 @@ private:
 	std::ifstream			  _bubblesFile;
 	std::ofstream			  _consensusFile;
 	std::ofstream			  _logFile;
-	bool					  _verbose;
-	bool 					  _showProgress;
-	bool					  _hopoEnabled;
 };


=====================================
src/polishing/subs_matrix.cpp
=====================================
@@ -215,8 +215,12 @@ std::string HopoMatrix::obsToStr(HopoMatrix::Observation obs)
 	return result;
 }*/
 
-HopoMatrix::HopoMatrix(const std::string& fileName)
+HopoMatrix::HopoMatrix(const std::string& fileName, bool hopoEnabled = true)
 {
+	if (!hopoEnabled)
+	{
+		return;
+	}
 	for (size_t i = 0; i < NUM_HOPO_STATES; ++i)
 	{
 		_observationProbs.emplace_back(NUM_HOPO_OBS, probToScore(MIN_HOPO_PROB));
@@ -256,7 +260,7 @@ void HopoMatrix::loadMatrix(const std::string& fileName)
 	{
 		observationsFreq.push_back(std::vector<size_t>(NUM_HOPO_OBS, 0));
 	}
-
+	
 	while (std::getline(fin, buffer))
 	{
 		if (buffer.empty()) continue;


=====================================
src/polishing/subs_matrix.h
=====================================
@@ -68,7 +68,7 @@ public:
 	};
 	typedef std::vector<Observation> ObsVector;
 
-	HopoMatrix(const std::string& fileName);
+	HopoMatrix(const std::string& fileName, bool hopoEnabled);
 	AlnScoreType getObsProb(State state, Observation observ) const
 		{return _observationProbs[state.id][observ.id];}
 	AlnScoreType getGenomeProb(State state) const



View it on GitLab: https://salsa.debian.org/med-team/flye/-/commit/ebaf557e1e14db889cb34f08356125ce6895d1aa

-- 
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/flye/-/commit/ebaf557e1e14db889cb34f08356125ce6895d1aa
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240522/cbfe1a99/attachment-0001.htm>


More information about the debian-med-commit mailing list