[med-svn] [Git][med-team/augur][master] 4 commits: routine-update: New upstream version
Nilesh Patra
gitlab at salsa.debian.org
Mon Nov 9 16:13:35 GMT 2020
Nilesh Patra pushed to branch master at Debian Med / augur
Commits:
27276569 by Nilesh Patra at 2020-11-09T21:36:33+05:30
routine-update: New upstream version
- - - - -
cdaaf804 by Nilesh Patra at 2020-11-09T21:36:34+05:30
New upstream version 10.0.4
- - - - -
2a334376 by Nilesh Patra at 2020-11-09T21:37:48+05:30
Update upstream source from tag 'upstream/10.0.4'
Update to upstream version '10.0.4'
with Debian dir 6195cab8fb73e919524e4676797389fec9ed4a75
- - - - -
b59a1a7a by Nilesh Patra at 2020-11-09T21:38:01+05:30
routine-update: Ready to upload to unstable
- - - - -
7 changed files:
- CHANGES.md
- augur/__version__.py
- augur/filter.py
- augur/tree.py
- debian/changelog
- + devel/test
- setup.py
Changes:
=====================================
CHANGES.md
=====================================
@@ -3,6 +3,16 @@
## __NEXT__
+## 10.0.4 (6 November 2020)
+
+### Bug Fixes
+
+* tree: Use a more generic approach to escape special characters from alignment sequence names prior to running IQ-TREE [#625][]
+* filter: Reduce memory usage by not reading sequences into memory [#627][]
+
+[#625]: https://github.com/nextstrain/augur/pull/625
+[#627]: https://github.com/nextstrain/augur/pull/627
+
## 10.0.3 (23 October 2020)
### Bug Fixes
=====================================
augur/__version__.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = '10.0.3'
+__version__ = '10.0.4'
def is_augur_version_compatible(version):
=====================================
augur/filter.py
=====================================
@@ -139,7 +139,7 @@ def run(args):
#if Fasta, read in file to get sequence names and sequences
else:
try:
- seqs = SeqIO.to_dict(SeqIO.parse(args.sequences, 'fasta'))
+ seqs = SeqIO.index(args.sequences, 'fasta')
except ValueError as error:
print("ERROR: Problem reading in {}:".format(args.sequences))
print(error)
@@ -394,11 +394,24 @@ def run(args):
write_vcf(args.sequences, args.output, dropped_samps)
else:
- seq_to_keep = [seq for id,seq in seqs.items() if id in seq_keep]
- if len(seq_to_keep) == 0:
+ # It is possible to have ids in the list of sequences to keep that do
+ # not exist in the original input sequences. Find the intersection of
+ # these two lists of ids to determine if all samples were dropped or
+ # not. This final list of ids is in the same order as the input
+ # sequences such that output sequences are always in the same order for
+ # a given set of filters.
+ seq_to_write = [seq_id for seq_id in seqs if seq_id in seq_keep]
+
+ if len(seq_to_write) == 0:
print("ERROR: All samples have been dropped! Check filter rules and metadata file format.")
return 1
- SeqIO.write(seq_to_keep, args.output, 'fasta')
+
+ # Write out sequences that passed all filters using an iterator to
+ # ensure that sequences are streamed to disk without being read into
+ # memory first.
+ seq_to_keep = (seqs[seq_id] for seq_id in seq_to_write)
+ sequences_written = SeqIO.write(seq_to_keep, args.output, 'fasta')
+ seqs.close()
print("\n%i sequences were dropped during filtering" % (len(all_seq) - len(seq_keep),))
if args.exclude:
=====================================
augur/tree.py
=====================================
@@ -133,18 +133,29 @@ def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nt
aln_file file name of input aligment
out_file file name to write tree to
'''
- with open(aln_file, encoding='utf-8') as ifile:
- tmp_seqs = ifile.readlines()
+ # create a dictionary for characters that IQ-tree changes.
+ # we remove those prior to tree-building and reinstantiate later
+ def random_string(n):
+ from string import ascii_uppercase as letters
+ return "".join([letters[i] for i in np.random.randint(len(letters), size=n)])
+ prefix = "DELIM"
+ escape_dict = {c:f'_{prefix}-{random_string(20)}_' for c in '/|()*'}
+ reverse_escape_dict = {v:k for k,v in escape_dict.items()}
+
# IQ-tree messes with taxon names. Hence remove offending characters, reinstaniate later
tmp_aln_file = aln_file.replace(".fasta", "-delim.fasta")
log_file = tmp_aln_file.replace(".fasta", ".iqtree.log")
num_seqs = 0
- with open(tmp_aln_file, 'w', encoding='utf-8') as ofile:
- for line in tmp_seqs:
+ with open(tmp_aln_file, 'w', encoding='utf-8') as ofile, open(aln_file, encoding='utf-8') as ifile:
+ for line in ifile:
+ tmp_line = line
if line.startswith(">"):
num_seqs += 1
- ofile.write(line.replace('/', '_X_X_').replace('|','_Y_Y_').replace("(","_X_Y_").replace(")","_Y_X_"))
+ for c,v in escape_dict.items():
+ tmp_line = tmp_line.replace(c,v)
+
+ ofile.write(tmp_line)
# For compat with older versions of iqtree, we avoid the newish -fast
# option alias and instead spell out its component parts:
@@ -195,7 +206,10 @@ def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nt
T = Phylo.read(tmp_aln_file+".treefile", 'newick')
shutil.copyfile(tmp_aln_file+".treefile", out_file)
for n in T.find_clades(terminal=True):
- n.name = n.name.replace('_X_X_','/').replace('_Y_Y_','|').replace("_X_Y_","(").replace("_Y_X_",")")
+ tmp_name = n.name
+ for v,c in reverse_escape_dict.items():
+ tmp_name = tmp_name.replace(v,c)
+ n.name = tmp_name
#this allows the user to check intermediate output, as tree.nwk will be
if clean_up:
#allow user to see chosen model if modeltest was run
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+augur (10.0.4-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+
+ -- Nilesh Patra <npatra974 at gmail.com> Mon, 09 Nov 2020 21:38:01 +0530
+
augur (10.0.3-2) unstable; urgency=medium
* Augur is installable only for Architecture: amd64 i386 due to its
=====================================
devel/test
=====================================
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Runs tests in a sandboxed environment that loosely replicates the Travis CI
+# environment.
+set -euo pipefail
+
+# Conda functions are not exported into a new bash environment by default, so we
+# need to source the conda profile script from the current conda installation.
+# This script expects PS1 to be defined, so we set it to an empty value.
+export PS1=
+conda_dir=$(conda info --base)
+source "${conda_dir}/etc/profile.d/conda.sh"
+
+# Install Augur from the current directory into a new conda environment.
+conda env create -f environment.yml
+conda activate augur
+python3 -m pip install -e .[dev]
+
+# Run unit and functional tests.
+./run_tests.sh
+bash tests/builds/runner.sh
+
+# Clean up the temporary environment.
+conda deactivate
+conda env remove -n augur
=====================================
setup.py
=====================================
@@ -71,7 +71,7 @@ setuptools.setup(
"pytest-cov >=2.8.1, ==2.8.*",
"pytest-mock >= 2.0.0, ==2.0.*",
"recommonmark >=0.5.0, ==0.*",
- "snakemake >=5.4.0, ==5.*",
+ "snakemake >=5.4.0, <5.27",
"Sphinx >=2.0.1, ==2.*",
"sphinx-argparse >=0.2.5, ==0.*",
"sphinx-markdown-tables >= 0.0.9",
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/76c71d0ebbf0065f663f8e5c24af7becbbf5a395...b59a1a7a9dcc7b11889c92a0c2605a20f37262b7
--
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/76c71d0ebbf0065f663f8e5c24af7becbbf5a395...b59a1a7a9dcc7b11889c92a0c2605a20f37262b7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201109/78ea64b5/attachment-0001.html>
More information about the debian-med-commit
mailing list