[med-svn] [Git][med-team/augur][master] 4 commits: routine-update: New upstream version

Mon Nov 9 16:13:35 GMT 2020


Nilesh Patra pushed to branch master at Debian Med / augur


Commits:
27276569 by Nilesh Patra at 2020-11-09T21:36:33+05:30
routine-update: New upstream version

- - - - -
cdaaf804 by Nilesh Patra at 2020-11-09T21:36:34+05:30
New upstream version 10.0.4
- - - - -
2a334376 by Nilesh Patra at 2020-11-09T21:37:48+05:30
Update upstream source from tag 'upstream/10.0.4'

Update to upstream version '10.0.4'
with Debian dir 6195cab8fb73e919524e4676797389fec9ed4a75
- - - - -
b59a1a7a by Nilesh Patra at 2020-11-09T21:38:01+05:30
routine-update: Ready to upload to unstable

- - - - -


7 changed files:

- CHANGES.md
- augur/__version__.py
- augur/filter.py
- augur/tree.py
- debian/changelog
- + devel/test
- setup.py


Changes:

=====================================
CHANGES.md
=====================================
@@ -3,6 +3,16 @@
 ## __NEXT__
 
 
+## 10.0.4 (6 November 2020)
+
+### Bug Fixes
+
+* tree: Use a more generic approach to escape special characters from alignment sequence names prior to running IQ-TREE [#625][]
+* filter: Reduce memory usage by not reading sequences into memory [#627][]
+
+[#625]: https://github.com/nextstrain/augur/pull/625
+[#627]: https://github.com/nextstrain/augur/pull/627
+
 ## 10.0.3 (23 October 2020)
 
 ### Bug Fixes


=====================================
augur/__version__.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = '10.0.3'
+__version__ = '10.0.4'
 
 
 def is_augur_version_compatible(version):


=====================================
augur/filter.py
=====================================
@@ -139,7 +139,7 @@ def run(args):
     #if Fasta, read in file to get sequence names and sequences
     else:
         try:
-            seqs = SeqIO.to_dict(SeqIO.parse(args.sequences, 'fasta'))
+            seqs = SeqIO.index(args.sequences, 'fasta')
         except ValueError as error:
             print("ERROR: Problem reading in {}:".format(args.sequences))
             print(error)
@@ -394,11 +394,24 @@ def run(args):
         write_vcf(args.sequences, args.output, dropped_samps)
 
     else:
-        seq_to_keep = [seq for id,seq in seqs.items() if id in seq_keep]
-        if len(seq_to_keep) == 0:
+        # It is possible to have ids in the list of sequences to keep that do
+        # not exist in the original input sequences. Find the intersection of
+        # these two lists of ids to determine if all samples were dropped or
+        # not. This final list of ids is in the same order as the input
+        # sequences such that output sequences are always in the same order for
+        # a given set of filters.
+        seq_to_write = [seq_id for seq_id in seqs if seq_id in seq_keep]
+
+        if len(seq_to_write) == 0:
             print("ERROR: All samples have been dropped! Check filter rules and metadata file format.")
             return 1
-        SeqIO.write(seq_to_keep, args.output, 'fasta')
+
+        # Write out sequences that passed all filters using an iterator to
+        # ensure that sequences are streamed to disk without being read into
+        # memory first.
+        seq_to_keep = (seqs[seq_id] for seq_id in seq_to_write)
+        sequences_written = SeqIO.write(seq_to_keep, args.output, 'fasta')
+        seqs.close()
 
     print("\n%i sequences were dropped during filtering" % (len(all_seq) - len(seq_keep),))
     if args.exclude:


=====================================
augur/tree.py
=====================================
@@ -133,18 +133,29 @@ def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nt
         aln_file    file name of input aligment
         out_file    file name to write tree to
     '''
-    with open(aln_file, encoding='utf-8') as ifile:
-        tmp_seqs = ifile.readlines()
+    # create a dictionary for characters that IQ-tree changes.
+    # we remove those prior to tree-building and reinstantiate later
+    def random_string(n):
+        from string import ascii_uppercase as letters
+        return "".join([letters[i] for i in np.random.randint(len(letters), size=n)])
+    prefix = "DELIM"
+    escape_dict = {c:f'_{prefix}-{random_string(20)}_' for c in '/|()*'}
+    reverse_escape_dict = {v:k for k,v in escape_dict.items()}
+
 
     # IQ-tree messes with taxon names. Hence remove offending characters, reinstaniate later
     tmp_aln_file = aln_file.replace(".fasta", "-delim.fasta")
     log_file = tmp_aln_file.replace(".fasta", ".iqtree.log")
     num_seqs = 0
-    with open(tmp_aln_file, 'w', encoding='utf-8') as ofile:
-        for line in tmp_seqs:
+    with open(tmp_aln_file, 'w', encoding='utf-8') as ofile, open(aln_file, encoding='utf-8') as ifile:
+        for line in ifile:
+            tmp_line = line
             if line.startswith(">"):
                 num_seqs += 1
-            ofile.write(line.replace('/', '_X_X_').replace('|','_Y_Y_').replace("(","_X_Y_").replace(")","_Y_X_"))
+                for c,v in escape_dict.items():
+                    tmp_line = tmp_line.replace(c,v)
+
+            ofile.write(tmp_line)
 
     # For compat with older versions of iqtree, we avoid the newish -fast
     # option alias and instead spell out its component parts:
@@ -195,7 +206,10 @@ def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nt
         T = Phylo.read(tmp_aln_file+".treefile", 'newick')
         shutil.copyfile(tmp_aln_file+".treefile", out_file)
         for n in T.find_clades(terminal=True):
-            n.name = n.name.replace('_X_X_','/').replace('_Y_Y_','|').replace("_X_Y_","(").replace("_Y_X_",")")
+            tmp_name = n.name
+            for v,c in reverse_escape_dict.items():
+                tmp_name = tmp_name.replace(v,c)
+            n.name = tmp_name
         #this allows the user to check intermediate output, as tree.nwk will be
         if clean_up:
             #allow user to see chosen model if modeltest was run


=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+augur (10.0.4-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+
+ -- Nilesh Patra <npatra974 at gmail.com>  Mon, 09 Nov 2020 21:38:01 +0530
+
 augur (10.0.3-2) unstable; urgency=medium
 
   * Augur is installable only for Architecture: amd64 i386 due to its


=====================================
devel/test
=====================================
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Runs tests in a sandboxed environment that loosely replicates the Travis CI
+# environment.
+set -euo pipefail
+
+# Conda functions are not exported into a new bash environment by default, so we
+# need to source the conda profile script from the current conda installation.
+# This script expects PS1 to be defined, so we set it to an empty value.
+export PS1=
+conda_dir=$(conda info --base)
+source "${conda_dir}/etc/profile.d/conda.sh"
+
+# Install Augur from the current directory into a new conda environment.
+conda env create -f environment.yml
+conda activate augur
+python3 -m pip install -e .[dev]
+
+# Run unit and functional tests.
+./run_tests.sh
+bash tests/builds/runner.sh
+
+# Clean up the temporary environment.
+conda deactivate
+conda env remove -n augur


=====================================
setup.py
=====================================
@@ -71,7 +71,7 @@ setuptools.setup(
             "pytest-cov >=2.8.1, ==2.8.*",
             "pytest-mock >= 2.0.0, ==2.0.*",
             "recommonmark >=0.5.0, ==0.*",
-            "snakemake >=5.4.0, ==5.*",
+            "snakemake >=5.4.0, <5.27",
             "Sphinx >=2.0.1, ==2.*",
             "sphinx-argparse >=0.2.5, ==0.*",
             "sphinx-markdown-tables >= 0.0.9",



View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/76c71d0ebbf0065f663f8e5c24af7becbbf5a395...b59a1a7a9dcc7b11889c92a0c2605a20f37262b7

-- 
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/76c71d0ebbf0065f663f8e5c24af7becbbf5a395...b59a1a7a9dcc7b11889c92a0c2605a20f37262b7
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201109/78ea64b5/attachment-0001.html>