[med-svn] [Git][med-team/augur][master] 4 commits: routine-update: New upstream version
Nilesh Patra
gitlab at salsa.debian.org
Sun Oct 25 09:24:00 GMT 2020
Nilesh Patra pushed to branch master at Debian Med / augur
Commits:
4ff29e81 by Nilesh Patra at 2020-10-25T14:44:03+05:30
routine-update: New upstream version
- - - - -
68d6dee1 by Nilesh Patra at 2020-10-25T14:44:04+05:30
New upstream version 10.0.3
- - - - -
2551f730 by Nilesh Patra at 2020-10-25T14:45:17+05:30
Update upstream source from tag 'upstream/10.0.3'
Update to upstream version '10.0.3'
with Debian dir b7661a49cee5982542e0ce90574a7d2957a0f15f
- - - - -
98187557 by Nilesh Patra at 2020-10-25T14:45:39+05:30
routine-update: Ready to upload to unstable
- - - - -
15 changed files:
- CHANGES.md
- augur/__version__.py
- augur/data/schema-export-v2.json
- augur/refine.py
- augur/validate_export.py
- debian/changelog
- + tests/functional/refine.t
- + tests/functional/refine/aligned.fasta
- + tests/functional/refine/integer_branch_lengths.json
- + tests/functional/refine/metadata.tsv
- + tests/functional/refine/mutations_per_site_branch_lengths.json
- + tests/functional/refine/not_time_tree.nwk
- + tests/functional/refine/tree.nwk
- + tests/functional/refine/tree_raw.nwk
- + tests/test_validate_export.py
Changes:
=====================================
CHANGES.md
=====================================
@@ -3,6 +3,16 @@
## __NEXT__
+## 10.0.3 (23 October 2020)
+
+### Bug Fixes
+
+* refine: Report divergence by number of mutations as an integer instead of a floating point value [#618][]
+* validate: Allow internal nodes with a single child and do not allow duplicate node names [#621][]
+
+[#618]: https://github.com/nextstrain/augur/pull/618
+[#621]: https://github.com/nextstrain/augur/pull/621
+
## 10.0.2 (8 September 2020)
### Bug Fixes
=====================================
augur/__version__.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = '10.0.2'
+__version__ = '10.0.3'
def is_augur_version_compatible(version):
=====================================
augur/data/schema-export-v2.json
=====================================
@@ -445,9 +445,9 @@
},
"children": {
"description": "Child nodes. Recursive structure. Terminal nodes do not have this property.",
- "$comment": "Polytomies (more than 2 items) allowed.",
+ "$comment": "Polytomies (more than 2 items) allowed, as are nodes with a single child.",
"type": "array",
- "minItems": 2,
+ "minItems": 1,
"items": {"$ref": "#/properties/tree"}
}
}
=====================================
augur/refine.py
=====================================
@@ -6,7 +6,7 @@ import os, shutil, time, sys
from Bio import Phylo
from .utils import read_metadata, read_tree, get_numerical_dates, write_json, InvalidTreeError
from treetime.vcf_utils import read_vcf, write_vcf
-
+from treetime.seq_utils import profile_maps
def refine(tree=None, aln=None, ref=None, dates=None, branch_length_inference='auto',
confidence=False, resolve_polytomies=True, max_iter=2, precision='auto',
@@ -145,10 +145,15 @@ def run(args):
return 1
if not args.alignment:
- # fake alignment to appease treetime when only using it for naming nodes...
if args.timetree:
- print("ERROR: alignment is required for ancestral reconstruction or timetree inference")
+ print("ERROR: alignment is required for ancestral reconstruction or timetree inference", file=sys.stderr)
return 1
+
+ if args.divergence_units=='mutations':
+ print("ERROR: alignment is required for divergence in units of mutations", file=sys.stderr)
+ return 1
+
+ # fake alignment to appease treetime when only using it for naming nodes...
from Bio import SeqRecord, Seq, Align
seqs = []
for n in T.get_terminals():
@@ -156,7 +161,7 @@ def run(args):
aln = Align.MultipleSeqAlignment(seqs)
elif any([args.alignment.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]):
if not args.vcf_reference:
- print("ERROR: a reference Fasta is required with VCF-format alignments")
+ print("ERROR: a reference Fasta is required with VCF-format alignments", file=sys.stderr)
return 1
compress_seq = read_vcf(args.alignment, args.vcf_reference)
@@ -185,7 +190,7 @@ def run(args):
if args.timetree:
# load meta data and covert dates to numeric
if args.metadata is None:
- print("ERROR: meta data with dates is required for time tree reconstruction")
+ print("ERROR: meta data with dates is required for time tree reconstruction", file=sys.stderr)
return 1
metadata, columns = read_metadata(args.metadata)
if args.year_bounds:
@@ -217,7 +222,7 @@ def run(args):
node_data['skyline'] = [[float(x) for x in skyline.x], [float(y) for y in conf[0]],
[float(y) for y in skyline.y], [float(y) for y in conf[1]]]
except:
- print("ERROR: skyline optimization by TreeTime has failed.")
+ print("ERROR: skyline optimization by TreeTime has failed.", file=sys.stderr)
return 1
attributes.extend(['numdate', 'clock_length', 'mutation_length', 'raw_date', 'date'])
@@ -241,13 +246,35 @@ def run(args):
if args.divergence_units=='mutations-per-site': #default
pass
elif args.divergence_units=='mutations':
- L = tt.seq_len
- for node in node_data['nodes']:
+ if not args.timetree:
+ tt.infer_ancestral_sequences()
+ nuc_map = profile_maps['nuc']
+
+ def are_sequence_states_different(nuc1, nuc2):
+ '''
+ determine whether two ancestral states should count as mutation for divergence estimates
+ while correctly accounting for ambiguous nucleotides
+ '''
+ if nuc1 in ['-', 'N'] or nuc2 in ['-', 'N']:
+ return False
+ elif nuc1 in nuc_map and nuc2 in nuc_map:
+ return np.sum(nuc_map[nuc1]*nuc_map[nuc2])==0
+ else:
+ return False
+
+ for node in T.find_clades():
+ n_muts = len([
+ position
+ for ancestral, position, derived in node.mutations
+ if are_sequence_states_different(ancestral, derived)
+ ])
+
if args.timetree:
- node_data['nodes'][node]['mutation_length'] *= L
- node_data['nodes'][node]['branch_length'] *= L
+ node_data['nodes'][node.name]['mutation_length'] = n_muts
+
+ node_data['nodes'][node.name]['branch_length'] = n_muts
else:
- print("ERROR: divergence unit",args.divergence_units,"not supported!")
+ print("ERROR: divergence unit",args.divergence_units,"not supported!", file=sys.stderr)
return 1
# Export refined tree and node data
=====================================
augur/validate_export.py
=====================================
@@ -7,6 +7,20 @@ and refactored over time.
import sys
from collections import defaultdict
+def ensure_no_duplicate_names(root, ValidateError):
+ """
+ Check that all node names are identical, which is required for auspice (v2) JSONs.
+ """
+ names = set()
+ def recurse(node):
+ if node["name"] in names:
+ raise ValidateError(f"Node {node['name']} appears multiple times in the tree.")
+ names.add(node["name"])
+ if "children" in node:
+ [recurse(child) for child in node["children"]]
+ recurse(root)
+
+
def collectTreeAttrsV2(root, warn):
"""
Collect all keys specified on `node["node_attrs"]` throughout the tree
@@ -82,6 +96,8 @@ def verifyMainJSONIsInternallyConsistent(data, ValidateError):
print("Validating that the JSON is internally consistent...")
+ ensure_no_duplicate_names(data["tree"], ValidateError)
+
if "entropy" in data["meta"]["panels"] and "genome_annotations" not in data["meta"]:
warn("The entropy panel has been specified but annotations don't exist.")
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+augur (10.0.3-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+
+ -- Nilesh Patra <npatra974 at gmail.com> Sun, 25 Oct 2020 14:45:39 +0530
+
augur (10.0.2-1) unstable; urgency=medium
* Team Upload.
=====================================
tests/functional/refine.t
=====================================
@@ -0,0 +1,117 @@
+Integration tests for augur refine.
+
+ $ pushd "$TESTDIR" > /dev/null
+ $ export AUGUR="../../bin/augur"
+
+Try building a time tree.
+
+ $ ${AUGUR} refine \
+ > --tree "refine/tree_raw.nwk" \
+ > --alignment "refine/aligned.fasta" \
+ > --metadata "refine/metadata.tsv" \
+ > --output-tree "$TMP/tree.nwk" \
+ > --output-node-data "$TMP/branch_lengths.json" \
+ > --timetree \
+ > --coalescent opt \
+ > --date-confidence \
+ > --date-inference marginal \
+ > --clock-filter-iqd 4 \
+ > --seed 314159 > /dev/null
+ */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+ [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that TreeTime trees match expected topology and branch lengths.
+
+ $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+ {}
+
+Build a time tree with mutations as the reported divergence unit.
+
+ $ ${AUGUR} refine \
+ > --tree "refine/tree_raw.nwk" \
+ > --alignment "refine/aligned.fasta" \
+ > --metadata "refine/metadata.tsv" \
+ > --output-tree "$TMP/tree.nwk" \
+ > --output-node-data "$TMP/branch_lengths.json" \
+ > --timetree \
+ > --coalescent opt \
+ > --date-confidence \
+ > --date-inference marginal \
+ > --clock-filter-iqd 4 \
+ > --seed 314159 \
+ > --divergence-units mutations > /dev/null
+ */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+ [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that TreeTime trees match expected topology and branch lengths.
+
+ $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+ {}
+
+Run refine without inferring a time tree.
+This is one way to get named internal nodes for downstream analyses and does not require an alignment FASTA.
+
+ $ ${AUGUR} refine \
+ > --tree "refine/tree_raw.nwk" \
+ > --metadata "refine/metadata.tsv" \
+ > --output-tree "$TMP/tree.nwk" \
+ > --output-node-data "$TMP/branch_lengths.json" \
+ > --coalescent opt \
+ > --date-confidence \
+ > --date-inference marginal \
+ > --clock-filter-iqd 4 \
+ > --seed 314159 \
+ > --divergence-units mutations-per-site > /dev/null
+ */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+ [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that trees match expected topology and branch lengths, given that the output should not be a time tree.
+
+ $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/not_time_tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+ {}
+ $ python3 "$TESTDIR/../../scripts/diff_jsons.py" "refine/mutations_per_site_branch_lengths.json" "$TMP/branch_lengths.json" --significant-digits 0
+ {}
+
+Run refine again without a time tree, but request number of mutations per branch as the divergence unit.
+This approach only works when we provide an alignment FASTA.
+
+ $ ${AUGUR} refine \
+ > --tree "refine/tree_raw.nwk" \
+ > --alignment "refine/aligned.fasta" \
+ > --metadata "refine/metadata.tsv" \
+ > --output-tree "$TMP/tree.nwk" \
+ > --output-node-data "$TMP/branch_lengths.json" \
+ > --coalescent opt \
+ > --date-confidence \
+ > --date-inference marginal \
+ > --clock-filter-iqd 4 \
+ > --seed 314159 \
+ > --divergence-units mutations > /dev/null
+ */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+ [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that trees match expected topology and branch lengths, given that the output should not be a time tree.
+
+ $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/not_time_tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+ {}
+ $ python3 "$TESTDIR/../../scripts/diff_jsons.py" "refine/integer_branch_lengths.json" "$TMP/branch_lengths.json" --significant-digits 0
+ {}
+
+Run refine again without a time tree, but try to request number of mutations per branch as the divergence unit.
+This approach does not make sense and should not work without an alignment FASTA.
+
+ $ ${AUGUR} refine \
+ > --tree "refine/tree_raw.nwk" \
+ > --metadata "refine/metadata.tsv" \
+ > --output-tree "$TMP/tree.nwk" \
+ > --output-node-data "$TMP/branch_lengths.json" \
+ > --coalescent opt \
+ > --date-confidence \
+ > --date-inference marginal \
+ > --clock-filter-iqd 4 \
+ > --seed 314159 \
+ > --divergence-units mutations > /dev/null
+ *ERROR: alignment is required* (glob)
+ [1]
+
+ $ popd > /dev/null
=====================================
tests/functional/refine/aligned.fasta
=====================================
The diff for this file was not included because it is too large.
=====================================
tests/functional/refine/integer_branch_lengths.json
=====================================
@@ -0,0 +1,70 @@
+{
+ "alignment": "refine/aligned.fasta",
+ "generated_by": {
+ "program": "augur",
+ "version": "10.0.2"
+ },
+ "input_tree": "refine/tree_raw.nwk",
+ "nodes": {
+ "BRA/2016/FC_6706": {
+ "branch_length": 12
+ },
+ "COL/FLR_00008/2015": {
+ "branch_length": 5
+ },
+ "Colombia/2016/ZC204Se": {
+ "branch_length": 5
+ },
+ "DOM/2016/BB_0183": {
+ "branch_length": 24
+ },
+ "EcEs062_16": {
+ "branch_length": 18
+ },
+ "HND/2016/HU_ME59": {
+ "branch_length": 13
+ },
+ "KX369547.1": {
+ "branch_length": 3
+ },
+ "NODE_0000000": {
+ "branch_length": 0
+ },
+ "NODE_0000001": {
+ "branch_length": 7
+ },
+ "NODE_0000002": {
+ "branch_length": 2
+ },
+ "NODE_0000003": {
+ "branch_length": 0
+ },
+ "NODE_0000004": {
+ "branch_length": 1
+ },
+ "NODE_0000005": {
+ "branch_length": 2
+ },
+ "NODE_0000006": {
+ "branch_length": 11
+ },
+ "NODE_0000007": {
+ "branch_length": 2
+ },
+ "NODE_0000008": {
+ "branch_length": 6
+ },
+ "PAN/CDC_259359_V1_V3/2015": {
+ "branch_length": 9
+ },
+ "PRVABC59": {
+ "branch_length": 22
+ },
+ "VEN/UF_1/2016": {
+ "branch_length": 7
+ },
+ "ZKC2/2016": {
+ "branch_length": 28
+ }
+ }
+}
\ No newline at end of file
=====================================
tests/functional/refine/metadata.tsv
=====================================
@@ -0,0 +1,13 @@
+strain virus accession date region country division city db segment authors url title journal paper_url
+PAN/CDC_259359_V1_V3/2015 zika KX156774 2015-12-18 North America Panama Panama Panama genbank genome Shabman et al https://www.ncbi.nlm.nih.gov/nuccore/KX156774 Direct Submission Submitted (29-APR-2016) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/
+COL/FLR_00024/2015 zika MF574569 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574569 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/
+PRVABC59 zika KU501215 2015-12-XX North America Puerto Rico Puerto Rico Puerto Rico genbank genome Lanciotti et al https://www.ncbi.nlm.nih.gov/nuccore/KU501215 Phylogeny of Zika Virus in Western Hemisphere, 2015 Emerging Infect. Dis. 22 (5), 933-935 (2016) https://www.ncbi.nlm.nih.gov/pubmed/27088323
+COL/FLR_00008/2015 zika MF574562 2015-12-XX South America Colombia Colombia Colombia genbank genome Pickett et al https://www.ncbi.nlm.nih.gov/nuccore/MF574562 Direct Submission Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA https://www.ncbi.nlm.nih.gov/pubmed/
+Colombia/2016/ZC204Se zika KY317939 2016-01-06 South America Colombia Colombia Colombia genbank genome Quick et al https://www.ncbi.nlm.nih.gov/nuccore/KY317939 Multiplex PCR method for MinION and Illumina sequencing of Zika and other virus genomes directly from clinical samples Nat Protoc 12 (6), 1261-1276 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538739
+ZKC2/2016 zika KX253996 2016-02-16 Oceania American Samoa American Samoa American Samoa genbank genome Wu et al https://www.ncbi.nlm.nih.gov/nuccore/KX253996 Direct Submission Submitted (18-MAY-2016) Center for Diseases Control and Prevention of Guangdong Province; National Institute of Viral Disease Control and Prevention, China https://www.ncbi.nlm.nih.gov/pubmed/
+VEN/UF_1/2016 zika KX702400 2016-03-25 South America Venezuela Venezuela Venezuela genbank genome Blohm et al https://www.ncbi.nlm.nih.gov/nuccore/KX702400 Complete Genome Sequences of Identical Zika virus Isolates in a Nursing Mother and Her Infant Genome Announc 5 (17), e00231-17 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28450510
+DOM/2016/BB_0059 zika KY785425 2016-04-04 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785425 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734
+BRA/2016/FC_6706 zika KY785433 2016-04-08 South America Brazil Brazil Brazil genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785433 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734
+DOM/2016/BB_0183 zika KY785420 2016-04-18 North America Dominican Republic Dominican Republic Dominican Republic genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785420 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734
+EcEs062_16 zika KX879603 2016-04-XX South America Ecuador Ecuador Ecuador genbank genome Marquez et al https://www.ncbi.nlm.nih.gov/nuccore/KX879603 First Complete Genome Sequences of Zika Virus Isolated from Febrile Patient Sera in Ecuador Genome Announc 5 (8), e01673-16 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28232448
+HND/2016/HU_ME59 zika KY785418 2016-05-13 North America Honduras Honduras Honduras genbank genome Metsky et al https://www.ncbi.nlm.nih.gov/nuccore/KY785418 Zika virus evolution and spread in the Americas Nature 546 (7658), 411-415 (2017) https://www.ncbi.nlm.nih.gov/pubmed/28538734
=====================================
tests/functional/refine/mutations_per_site_branch_lengths.json
=====================================
@@ -0,0 +1,70 @@
+{
+ "alignment": null,
+ "generated_by": {
+ "program": "augur",
+ "version": "10.0.2"
+ },
+ "input_tree": "refine/tree_raw.nwk",
+ "nodes": {
+ "BRA/2016/FC_6706": {
+ "branch_length": 0.00119745
+ },
+ "COL/FLR_00008/2015": {
+ "branch_length": 0.00046961
+ },
+ "Colombia/2016/ZC204Se": {
+ "branch_length": 0.00047184
+ },
+ "DOM/2016/BB_0183": {
+ "branch_length": 0.00226698
+ },
+ "EcEs062_16": {
+ "branch_length": 0.00167718
+ },
+ "HND/2016/HU_ME59": {
+ "branch_length": 0.00125764
+ },
+ "KX369547.1": {
+ "branch_length": 0.00027828
+ },
+ "NODE_0000000": {
+ "branch_length": 0.001
+ },
+ "NODE_0000001": {
+ "branch_length": 0.000651
+ },
+ "NODE_0000002": {
+ "branch_length": 0.000184
+ },
+ "NODE_0000003": {
+ "branch_length": 2e-06
+ },
+ "NODE_0000004": {
+ "branch_length": 9.3e-05
+ },
+ "NODE_0000005": {
+ "branch_length": 0.000209
+ },
+ "NODE_0000006": {
+ "branch_length": 0.000949
+ },
+ "NODE_0000007": {
+ "branch_length": 0.000192
+ },
+ "NODE_0000008": {
+ "branch_length": 0.000579
+ },
+ "PAN/CDC_259359_V1_V3/2015": {
+ "branch_length": 0.00083922
+ },
+ "PRVABC59": {
+ "branch_length": 0.00206996
+ },
+ "VEN/UF_1/2016": {
+ "branch_length": 0.00067495
+ },
+ "ZKC2/2016": {
+ "branch_length": 0.00260941
+ }
+ }
+}
\ No newline at end of file
=====================================
tests/functional/refine/not_time_tree.nwk
=====================================
@@ -0,0 +1 @@
+(KX369547.1:0.00027828,ZKC2/2016:0.00260941,((EcEs062_16:0.00167718,DOM/2016/BB_0183:0.00226698)NODE_0000002:0.00018400,((HND/2016/HU_ME59:0.00125764,PRVABC59:0.00206996)NODE_0000004:0.00009300,(BRA/2016/FC_6706:0.00119745,(Colombia/2016/ZC204Se:0.00047184,(PAN/CDC_259359_V1_V3/2015:0.00083922,(COL/FLR_00008/2015:0.00046961,VEN/UF_1/2016:0.00067495)NODE_0000008:0.00057900)NODE_0000007:0.00019200)NODE_0000006:0.00094900)NODE_0000005:0.00020900)NODE_0000003:0.00000200)NODE_0000001:0.00065100)NODE_0000000:0.00100000;
=====================================
tests/functional/refine/tree.nwk
=====================================
@@ -0,0 +1 @@
+((Colombia/2016/ZC204Se:0.00105368,(PAN/CDC_259359_V1_V3/2015:0.00076051,(COL/FLR_00008/2015:0.00044440,VEN/UF_1/2016:0.00089377)NODE_0000008:0.00038502)NODE_0000007:0.00019253)NODE_0000001:0.00080159,(BRA/2016/FC_6706:0.00214920,(ZKC2/2016:0.00173693,(HND/2016/HU_ME59:0.00206150,PRVABC59:0.00135309)NODE_0000004:0.00013537,(EcEs062_16:0.00175918,DOM/2016/BB_0183:0.00184905)NODE_0000002:0.00021565)NODE_0000003:0.00013737)NODE_0000005:0.00019772)NODE_0000006:0.00100000;
=====================================
tests/functional/refine/tree_raw.nwk
=====================================
@@ -0,0 +1 @@
+(KX369547.1:0.00027828,(((BRA/2016/FC_6706:0.00119745,((PAN/CDC_259359_V1_V3/2015:0.00083922,(COL/FLR_00008/2015:0.00046961,VEN/UF_1/2016:0.00067495):0.00057900):0.00019200,Colombia/2016/ZC204Se:0.00047184):0.00094900):0.00020900,(HND/2016/HU_ME59:0.00125764,PRVABC59:0.00206996):0.00009300):0.00000200,(EcEs062_16:0.00167718,DOM/2016/BB_0183:0.00226698):0.00018400):0.00065100,ZKC2/2016:0.00260941):0.00000000;
=====================================
tests/test_validate_export.py
=====================================
@@ -0,0 +1,30 @@
+import Bio.Phylo
+from io import StringIO
+from pathlib import Path
+import pytest
+import sys
+
+# we assume (and assert) that this script is running from the tests/ directory
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from augur.export_v2 import convert_tree_to_json_structure
+from augur.validate import ValidateError
+from augur.validate_export import ensure_no_duplicate_names
+
+
+class TestValidateExport():
+ def test_export_without_duplicate_names(self):
+ # Create a tree with unique tip names.
+ tree = Bio.Phylo.read(StringIO("root(A, internal(B, C))"), "newick")
+ metadata = {"A": {}, "B": {}, "C": {}, "root": {}, "internal": {}}
+ root = convert_tree_to_json_structure(tree.root, metadata)
+ ensure_no_duplicate_names(root, ValidateError)
+
+ def test_export_with_duplicate_names(self):
+ # Create a tree with duplicate tip names.
+ tree = Bio.Phylo.read(StringIO("root(A, internal(B, B))"), "newick")
+ metadata = {"A": {}, "B": {}, "root": {}, "internal": {}}
+ root = convert_tree_to_json_structure(tree.root, metadata)
+
+ with pytest.raises(ValidateError):
+ ensure_no_duplicate_names(root, ValidateError)
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/7b51edd9e8746f5e33f89d484425a43ae3eadbfc...9818755762c227274d37d65366e6dd8468dc70f8
--
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/7b51edd9e8746f5e33f89d484425a43ae3eadbfc...9818755762c227274d37d65366e6dd8468dc70f8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201025/c33d6f51/attachment-0001.html>
More information about the debian-med-commit
mailing list