[med-svn] [Git][med-team/augur][upstream] New upstream version 10.0.3

Nilesh Patra gitlab at salsa.debian.org
Sun Oct 25 09:24:09 GMT 2020



Nilesh Patra pushed to branch upstream at Debian Med / augur


Commits:
68d6dee1 by Nilesh Patra at 2020-10-25T14:44:04+05:30
New upstream version 10.0.3
- - - - -


14 changed files:

- CHANGES.md
- augur/__version__.py
- augur/data/schema-export-v2.json
- augur/refine.py
- augur/validate_export.py
- + tests/functional/refine.t
- + tests/functional/refine/aligned.fasta
- + tests/functional/refine/integer_branch_lengths.json
- + tests/functional/refine/metadata.tsv
- + tests/functional/refine/mutations_per_site_branch_lengths.json
- + tests/functional/refine/not_time_tree.nwk
- + tests/functional/refine/tree.nwk
- + tests/functional/refine/tree_raw.nwk
- + tests/test_validate_export.py


Changes:

=====================================
CHANGES.md
=====================================
@@ -3,6 +3,16 @@
 ## __NEXT__
 
 
+## 10.0.3 (23 October 2020)
+
+### Bug Fixes
+
+* refine: Report divergence by number of mutations as an integer instead of a floating point value [#618][]
+* validate: Allow internal nodes with a single child and do not allow duplicate node names [#621][]
+
+[#618]: https://github.com/nextstrain/augur/pull/618
+[#621]: https://github.com/nextstrain/augur/pull/621
+
 ## 10.0.2 (8 September 2020)
 
 ### Bug Fixes


=====================================
augur/__version__.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = '10.0.2'
+__version__ = '10.0.3'
 
 
 def is_augur_version_compatible(version):


=====================================
augur/data/schema-export-v2.json
=====================================
@@ -445,9 +445,9 @@
                 },
                 "children": {
                     "description": "Child nodes. Recursive structure. Terminal nodes do not have this property.",
-                    "$comment": "Polytomies (more than 2 items) allowed.",
+                    "$comment": "Polytomies (more than 2 items) allowed, as are nodes with a single child.",
                     "type": "array",
-                    "minItems": 2,
+                    "minItems": 1,
                     "items": {"$ref": "#/properties/tree"}
                 }
             }


=====================================
augur/refine.py
=====================================
@@ -6,7 +6,7 @@ import os, shutil, time, sys
 from Bio import Phylo
 from .utils import read_metadata, read_tree, get_numerical_dates, write_json, InvalidTreeError
 from treetime.vcf_utils import read_vcf, write_vcf
-
+from treetime.seq_utils import profile_maps
 
 def refine(tree=None, aln=None, ref=None, dates=None, branch_length_inference='auto',
              confidence=False, resolve_polytomies=True, max_iter=2, precision='auto',
@@ -145,10 +145,15 @@ def run(args):
         return 1
 
     if not args.alignment:
-        # fake alignment to appease treetime when only using it for naming nodes...
         if args.timetree:
-            print("ERROR: alignment is required for ancestral reconstruction or timetree inference")
+            print("ERROR: alignment is required for ancestral reconstruction or timetree inference", file=sys.stderr)
             return 1
+
+        if args.divergence_units=='mutations':
+            print("ERROR: alignment is required for divergence in units of mutations", file=sys.stderr)
+            return 1
+
+        # fake alignment to appease treetime when only using it for naming nodes...
         from Bio import SeqRecord, Seq, Align
         seqs = []
         for n in T.get_terminals():
@@ -156,7 +161,7 @@ def run(args):
         aln = Align.MultipleSeqAlignment(seqs)
     elif any([args.alignment.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]):
         if not args.vcf_reference:
-            print("ERROR: a reference Fasta is required with VCF-format alignments")
+            print("ERROR: a reference Fasta is required with VCF-format alignments", file=sys.stderr)
             return 1
 
         compress_seq = read_vcf(args.alignment, args.vcf_reference)
@@ -185,7 +190,7 @@ def run(args):
     if args.timetree:
         # load meta data and covert dates to numeric
         if args.metadata is None:
-            print("ERROR: meta data with dates is required for time tree reconstruction")
+            print("ERROR: meta data with dates is required for time tree reconstruction", file=sys.stderr)
             return 1
         metadata, columns = read_metadata(args.metadata)
         if args.year_bounds:
@@ -217,7 +222,7 @@ def run(args):
                 node_data['skyline'] = [[float(x) for x in skyline.x], [float(y) for y in conf[0]],
                                         [float(y) for y in skyline.y], [float(y) for y in conf[1]]]
             except:
-                print("ERROR: skyline optimization by TreeTime has failed.")
+                print("ERROR: skyline optimization by TreeTime has failed.", file=sys.stderr)
                 return 1
 
         attributes.extend(['numdate', 'clock_length', 'mutation_length', 'raw_date', 'date'])
@@ -241,13 +246,35 @@ def run(args):
     if args.divergence_units=='mutations-per-site': #default
         pass
     elif args.divergence_units=='mutations':
-        L = tt.seq_len
-        for node in node_data['nodes']:
+        if not args.timetree:
+            tt.infer_ancestral_sequences()
+        nuc_map = profile_maps['nuc']
+
+        def are_sequence_states_different(nuc1, nuc2):
+            '''
+            determine whether two ancestral states should count as mutation for divergence estimates
+            while correctly accounting for ambiguous nucleotides
+            '''
+            if nuc1 in ['-', 'N'] or nuc2 in ['-', 'N']:
+                return False
+            elif nuc1 in nuc_map and nuc2 in nuc_map:
+                return np.sum(nuc_map[nuc1]*nuc_map[nuc2])==0
+            else:
+                return False
+
+        for node in T.find_clades():
+            n_muts = len([
+                position
+                for ancestral, position, derived in node.mutations
+                if are_sequence_states_different(ancestral, derived)
+            ])
+
             if args.timetree:
-                node_data['nodes'][node]['mutation_length'] *= L
-            node_data['nodes'][node]['branch_length'] *= L
+                node_data['nodes'][node.name]['mutation_length'] = n_muts
+
+            node_data['nodes'][node.name]['branch_length'] = n_muts
     else:
-        print("ERROR: divergence unit",args.divergence_units,"not supported!")
+        print("ERROR: divergence unit",args.divergence_units,"not supported!", file=sys.stderr)
         return 1
 
     # Export refined tree and node data


=====================================
augur/validate_export.py
=====================================
@@ -7,6 +7,20 @@ and refactored over time.
 import sys
 from collections import defaultdict
 
+def ensure_no_duplicate_names(root, ValidateError):
+    """
+    Check that all node names are identical, which is required for auspice (v2) JSONs.
+    """
+    names = set()
+    def recurse(node):
+        if node["name"] in names:
+            raise ValidateError(f"Node {node['name']} appears multiple times in the tree.")
+        names.add(node["name"])
+        if "children" in node:
+            [recurse(child) for child in node["children"]]
+    recurse(root)
+
+
 def collectTreeAttrsV2(root, warn):
     """
     Collect all keys specified on `node["node_attrs"]` throughout the tree
@@ -82,6 +96,8 @@ def verifyMainJSONIsInternallyConsistent(data, ValidateError):
 
     print("Validating that the JSON is internally consistent...")
 
+    ensure_no_duplicate_names(data["tree"], ValidateError)
+
     if "entropy" in data["meta"]["panels"] and "genome_annotations" not in data["meta"]:
         warn("The entropy panel has been specified but annotations don't exist.")
 


=====================================
tests/functional/refine.t
=====================================
@@ -0,0 +1,117 @@
+Integration tests for augur refine.
+
+  $ pushd "$TESTDIR" > /dev/null
+  $ export AUGUR="../../bin/augur"
+
+Try building a time tree.
+
+  $ ${AUGUR} refine \
+  >  --tree "refine/tree_raw.nwk" \
+  >  --alignment "refine/aligned.fasta" \
+  >  --metadata "refine/metadata.tsv" \
+  >  --output-tree "$TMP/tree.nwk" \
+  >  --output-node-data "$TMP/branch_lengths.json" \
+  >  --timetree \
+  >  --coalescent opt \
+  >  --date-confidence \
+  >  --date-inference marginal \
+  >  --clock-filter-iqd 4 \
+  >  --seed 314159 > /dev/null
+  */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+    [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that TreeTime trees match expected topology and branch lengths.
+
+  $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+  {}
+
+Build a time tree with mutations as the reported divergence unit.
+
+  $ ${AUGUR} refine \
+  >  --tree "refine/tree_raw.nwk" \
+  >  --alignment "refine/aligned.fasta" \
+  >  --metadata "refine/metadata.tsv" \
+  >  --output-tree "$TMP/tree.nwk" \
+  >  --output-node-data "$TMP/branch_lengths.json" \
+  >  --timetree \
+  >  --coalescent opt \
+  >  --date-confidence \
+  >  --date-inference marginal \
+  >  --clock-filter-iqd 4 \
+  >  --seed 314159 \
+  >  --divergence-units mutations > /dev/null
+  */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+    [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that TreeTime trees match expected topology and branch lengths.
+
+  $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+  {}
+
+Run refine without inferring a time tree.
+This is one way to get named internal nodes for downstream analyses and does not require an alignment FASTA.
+
+  $ ${AUGUR} refine \
+  >  --tree "refine/tree_raw.nwk" \
+  >  --metadata "refine/metadata.tsv" \
+  >  --output-tree "$TMP/tree.nwk" \
+  >  --output-node-data "$TMP/branch_lengths.json" \
+  >  --coalescent opt \
+  >  --date-confidence \
+  >  --date-inference marginal \
+  >  --clock-filter-iqd 4 \
+  >  --seed 314159 \
+  >  --divergence-units mutations-per-site > /dev/null
+  */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+    [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that trees match expected topology and branch lengths, given that the output should not be a time tree.
+
+  $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/not_time_tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+  {}
+  $ python3 "$TESTDIR/../../scripts/diff_jsons.py" "refine/mutations_per_site_branch_lengths.json" "$TMP/branch_lengths.json" --significant-digits 0
+  {}
+
+Run refine again without a time tree, but request number of mutations per branch as the divergence unit.
+This approach only works when we provide an alignment FASTA.
+
+  $ ${AUGUR} refine \
+  >  --tree "refine/tree_raw.nwk" \
+  >  --alignment "refine/aligned.fasta" \
+  >  --metadata "refine/metadata.tsv" \
+  >  --output-tree "$TMP/tree.nwk" \
+  >  --output-node-data "$TMP/branch_lengths.json" \
+  >  --coalescent opt \
+  >  --date-confidence \
+  >  --date-inference marginal \
+  >  --clock-filter-iqd 4 \
+  >  --seed 314159 \
+  >  --divergence-units mutations > /dev/null
+  */treetime/aa_models.py:108: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray (glob)
+    [0.800038509951648, 1.20274751778601, 1.55207513886163, 1.46600946033173, 0.830022143283238, 1.5416250309563, 1.53255698189437, 1.41208067821187, 1.47469999960758, 0.351200119909572, 0.570542199221932, 1.21378822764856, 0.609532859331199, 0.692733248746636, 1.40887880416009, 1.02015839286433, 0.807404666228614, 1.268589159299, 0.933095433689795]
+
+Confirm that trees match expected topology and branch lengths, given that the output should not be a time tree.
+
+  $ python3 "$TESTDIR/../../scripts/diff_trees.py" "refine/not_time_tree.nwk" "$TMP/tree.nwk" --significant-digits 2
+  {}
+  $ python3 "$TESTDIR/../../scripts/diff_jsons.py" "refine/integer_branch_lengths.json" "$TMP/branch_lengths.json" --significant-digits 0
+  {}
+
+Run refine again without a time tree, but try to request number of mutations per branch as the divergence unit.
+This approach does not make sense and should not work without an alignment FASTA.
+
+  $ ${AUGUR} refine \
+  >  --tree "refine/tree_raw.nwk" \
+  >  --metadata "refine/metadata.tsv" \
+  >  --output-tree "$TMP/tree.nwk" \
+  >  --output-node-data "$TMP/branch_lengths.json" \
+  >  --coalescent opt \
+  >  --date-confidence \
+  >  --date-inference marginal \
+  >  --clock-filter-iqd 4 \
+  >  --seed 314159 \
+  >  --divergence-units mutations > /dev/null
+  *ERROR: alignment is required* (glob)
+  [1]
+
+  $ popd > /dev/null


=====================================
tests/functional/refine/aligned.fasta
=====================================
The diff for this file was not included because it is too large.

=====================================
tests/functional/refine/integer_branch_lengths.json
=====================================
@@ -0,0 +1,70 @@
+{
+  "alignment": "refine/aligned.fasta",
+  "generated_by": {
+    "program": "augur",
+    "version": "10.0.2"
+  },
+  "input_tree": "refine/tree_raw.nwk",
+  "nodes": {
+    "BRA/2016/FC_6706": {
+      "branch_length": 12
+    },
+    "COL/FLR_00008/2015": {
+      "branch_length": 5
+    },
+    "Colombia/2016/ZC204Se": {
+      "branch_length": 5
+    },
+    "DOM/2016/BB_0183": {
+      "branch_length": 24
+    },
+    "EcEs062_16": {
+      "branch_length": 18
+    },
+    "HND/2016/HU_ME59": {
+      "branch_length": 13
+    },
+    "KX369547.1": {
+      "branch_length": 3
+    },
+    "NODE_0000000": {
+      "branch_length": 0
+    },
+    "NODE_0000001": {
+      "branch_length": 7
+    },
+    "NODE_0000002": {
+      "branch_length": 2
+    },
+    "NODE_0000003": {
+      "branch_length": 0
+    },
+    "NODE_0000004": {
+      "branch_length": 1
+    },
+    "NODE_0000005": {
+      "branch_length": 2
+    },
+    "NODE_0000006": {
+      "branch_length": 11
+    },
+    "NODE_0000007": {
+      "branch_length": 2
+    },
+    "NODE_0000008": {
+      "branch_length": 6
+    },
+    "PAN/CDC_259359_V1_V3/2015": {
+      "branch_length": 9
+    },
+    "PRVABC59": {
+      "branch_length": 22
+    },
+    "VEN/UF_1/2016": {
+      "branch_length": 7
+    },
+    "ZKC2/2016": {
+      "branch_length": 28
+    }
+  }
+}
\ No newline at end of file


=====================================
tests/functional/refine/metadata.tsv
=====================================
@@ -0,0 +1,13 @@
+strain	virus	accession	date	region	country	division	city	db	segment	authors	url	title	journal	paper_url
+PAN/CDC_259359_V1_V3/2015	zika	KX156774	2015-12-18	North America	Panama	Panama	Panama	genbank	genome	Shabman et al	https://www.ncbi.nlm.nih.gov/nuccore/KX156774	Direct Submission	Submitted (29-APR-2016) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA	https://www.ncbi.nlm.nih.gov/pubmed/
+COL/FLR_00024/2015	zika	MF574569	2015-12-XX	South America	Colombia	Colombia	Colombia	genbank	genome	Pickett et al	https://www.ncbi.nlm.nih.gov/nuccore/MF574569	Direct Submission	Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA	https://www.ncbi.nlm.nih.gov/pubmed/
+PRVABC59	zika	KU501215	2015-12-XX	North America	Puerto Rico	Puerto Rico	Puerto Rico	genbank	genome	Lanciotti et al	https://www.ncbi.nlm.nih.gov/nuccore/KU501215	Phylogeny of Zika Virus in Western Hemisphere, 2015	Emerging Infect. Dis. 22 (5), 933-935 (2016)	https://www.ncbi.nlm.nih.gov/pubmed/27088323
+COL/FLR_00008/2015	zika	MF574562	2015-12-XX	South America	Colombia	Colombia	Colombia	genbank	genome	Pickett et al	https://www.ncbi.nlm.nih.gov/nuccore/MF574562	Direct Submission	Submitted (28-JUL-2017) J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA	https://www.ncbi.nlm.nih.gov/pubmed/
+Colombia/2016/ZC204Se	zika	KY317939	2016-01-06	South America	Colombia	Colombia	Colombia	genbank	genome	Quick et al	https://www.ncbi.nlm.nih.gov/nuccore/KY317939	Multiplex PCR method for MinION and Illumina sequencing of Zika and other virus genomes directly from clinical samples	Nat Protoc 12 (6), 1261-1276 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28538739
+ZKC2/2016	zika	KX253996	2016-02-16	Oceania	American Samoa	American Samoa	American Samoa	genbank	genome	Wu et al	https://www.ncbi.nlm.nih.gov/nuccore/KX253996	Direct Submission	Submitted (18-MAY-2016) Center for Diseases Control and Prevention of Guangdong Province; National Institute of Viral Disease Control and Prevention, China	https://www.ncbi.nlm.nih.gov/pubmed/
+VEN/UF_1/2016	zika	KX702400	2016-03-25	South America	Venezuela	Venezuela	Venezuela	genbank	genome	Blohm et al	https://www.ncbi.nlm.nih.gov/nuccore/KX702400	Complete Genome Sequences of Identical Zika virus Isolates in a Nursing Mother and Her Infant	Genome Announc 5 (17), e00231-17 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28450510
+DOM/2016/BB_0059	zika	KY785425	2016-04-04	North America	Dominican Republic	Dominican Republic	Dominican Republic	genbank	genome	Metsky et al	https://www.ncbi.nlm.nih.gov/nuccore/KY785425	Zika virus evolution and spread in the Americas	Nature 546 (7658), 411-415 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28538734
+BRA/2016/FC_6706	zika	KY785433	2016-04-08	South America	Brazil	Brazil	Brazil	genbank	genome	Metsky et al	https://www.ncbi.nlm.nih.gov/nuccore/KY785433	Zika virus evolution and spread in the Americas	Nature 546 (7658), 411-415 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28538734
+DOM/2016/BB_0183	zika	KY785420	2016-04-18	North America	Dominican Republic	Dominican Republic	Dominican Republic	genbank	genome	Metsky et al	https://www.ncbi.nlm.nih.gov/nuccore/KY785420	Zika virus evolution and spread in the Americas	Nature 546 (7658), 411-415 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28538734
+EcEs062_16	zika	KX879603	2016-04-XX	South America	Ecuador	Ecuador	Ecuador	genbank	genome	Marquez et al	https://www.ncbi.nlm.nih.gov/nuccore/KX879603	First Complete Genome Sequences of Zika Virus Isolated from Febrile Patient Sera in Ecuador	Genome Announc 5 (8), e01673-16 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28232448
+HND/2016/HU_ME59	zika	KY785418	2016-05-13	North America	Honduras	Honduras	Honduras	genbank	genome	Metsky et al	https://www.ncbi.nlm.nih.gov/nuccore/KY785418	Zika virus evolution and spread in the Americas	Nature 546 (7658), 411-415 (2017)	https://www.ncbi.nlm.nih.gov/pubmed/28538734


=====================================
tests/functional/refine/mutations_per_site_branch_lengths.json
=====================================
@@ -0,0 +1,70 @@
+{
+  "alignment": null,
+  "generated_by": {
+    "program": "augur",
+    "version": "10.0.2"
+  },
+  "input_tree": "refine/tree_raw.nwk",
+  "nodes": {
+    "BRA/2016/FC_6706": {
+      "branch_length": 0.00119745
+    },
+    "COL/FLR_00008/2015": {
+      "branch_length": 0.00046961
+    },
+    "Colombia/2016/ZC204Se": {
+      "branch_length": 0.00047184
+    },
+    "DOM/2016/BB_0183": {
+      "branch_length": 0.00226698
+    },
+    "EcEs062_16": {
+      "branch_length": 0.00167718
+    },
+    "HND/2016/HU_ME59": {
+      "branch_length": 0.00125764
+    },
+    "KX369547.1": {
+      "branch_length": 0.00027828
+    },
+    "NODE_0000000": {
+      "branch_length": 0.001
+    },
+    "NODE_0000001": {
+      "branch_length": 0.000651
+    },
+    "NODE_0000002": {
+      "branch_length": 0.000184
+    },
+    "NODE_0000003": {
+      "branch_length": 2e-06
+    },
+    "NODE_0000004": {
+      "branch_length": 9.3e-05
+    },
+    "NODE_0000005": {
+      "branch_length": 0.000209
+    },
+    "NODE_0000006": {
+      "branch_length": 0.000949
+    },
+    "NODE_0000007": {
+      "branch_length": 0.000192
+    },
+    "NODE_0000008": {
+      "branch_length": 0.000579
+    },
+    "PAN/CDC_259359_V1_V3/2015": {
+      "branch_length": 0.00083922
+    },
+    "PRVABC59": {
+      "branch_length": 0.00206996
+    },
+    "VEN/UF_1/2016": {
+      "branch_length": 0.00067495
+    },
+    "ZKC2/2016": {
+      "branch_length": 0.00260941
+    }
+  }
+}
\ No newline at end of file


=====================================
tests/functional/refine/not_time_tree.nwk
=====================================
@@ -0,0 +1 @@
+(KX369547.1:0.00027828,ZKC2/2016:0.00260941,((EcEs062_16:0.00167718,DOM/2016/BB_0183:0.00226698)NODE_0000002:0.00018400,((HND/2016/HU_ME59:0.00125764,PRVABC59:0.00206996)NODE_0000004:0.00009300,(BRA/2016/FC_6706:0.00119745,(Colombia/2016/ZC204Se:0.00047184,(PAN/CDC_259359_V1_V3/2015:0.00083922,(COL/FLR_00008/2015:0.00046961,VEN/UF_1/2016:0.00067495)NODE_0000008:0.00057900)NODE_0000007:0.00019200)NODE_0000006:0.00094900)NODE_0000005:0.00020900)NODE_0000003:0.00000200)NODE_0000001:0.00065100)NODE_0000000:0.00100000;


=====================================
tests/functional/refine/tree.nwk
=====================================
@@ -0,0 +1 @@
+((Colombia/2016/ZC204Se:0.00105368,(PAN/CDC_259359_V1_V3/2015:0.00076051,(COL/FLR_00008/2015:0.00044440,VEN/UF_1/2016:0.00089377)NODE_0000008:0.00038502)NODE_0000007:0.00019253)NODE_0000001:0.00080159,(BRA/2016/FC_6706:0.00214920,(ZKC2/2016:0.00173693,(HND/2016/HU_ME59:0.00206150,PRVABC59:0.00135309)NODE_0000004:0.00013537,(EcEs062_16:0.00175918,DOM/2016/BB_0183:0.00184905)NODE_0000002:0.00021565)NODE_0000003:0.00013737)NODE_0000005:0.00019772)NODE_0000006:0.00100000;


=====================================
tests/functional/refine/tree_raw.nwk
=====================================
@@ -0,0 +1 @@
+(KX369547.1:0.00027828,(((BRA/2016/FC_6706:0.00119745,((PAN/CDC_259359_V1_V3/2015:0.00083922,(COL/FLR_00008/2015:0.00046961,VEN/UF_1/2016:0.00067495):0.00057900):0.00019200,Colombia/2016/ZC204Se:0.00047184):0.00094900):0.00020900,(HND/2016/HU_ME59:0.00125764,PRVABC59:0.00206996):0.00009300):0.00000200,(EcEs062_16:0.00167718,DOM/2016/BB_0183:0.00226698):0.00018400):0.00065100,ZKC2/2016:0.00260941):0.00000000;


=====================================
tests/test_validate_export.py
=====================================
@@ -0,0 +1,30 @@
+import Bio.Phylo
+from io import StringIO
+from pathlib import Path
+import pytest
+import sys
+
+# we assume (and assert) that this script is running from the tests/ directory
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from augur.export_v2 import convert_tree_to_json_structure
+from augur.validate import ValidateError
+from augur.validate_export import ensure_no_duplicate_names
+
+
+class TestValidateExport():
+    def test_export_without_duplicate_names(self):
+        # Create a tree with unique tip names.
+        tree = Bio.Phylo.read(StringIO("root(A, internal(B, C))"), "newick")
+        metadata = {"A": {}, "B": {}, "C": {}, "root": {}, "internal": {}}
+        root = convert_tree_to_json_structure(tree.root, metadata)
+        ensure_no_duplicate_names(root, ValidateError)
+
+    def test_export_with_duplicate_names(self):
+        # Create a tree with duplicate tip names.
+        tree = Bio.Phylo.read(StringIO("root(A, internal(B, B))"), "newick")
+        metadata = {"A": {}, "B": {}, "root": {}, "internal": {}}
+        root = convert_tree_to_json_structure(tree.root, metadata)
+
+        with pytest.raises(ValidateError):
+            ensure_no_duplicate_names(root, ValidateError)



View it on GitLab: https://salsa.debian.org/med-team/augur/-/commit/68d6dee105679cee78446c2a26a1930c0dd2b117

-- 
View it on GitLab: https://salsa.debian.org/med-team/augur/-/commit/68d6dee105679cee78446c2a26a1930c0dd2b117
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201025/35d9feb6/attachment-0001.html>


More information about the debian-med-commit mailing list