[med-svn] [r-cran-rotl] 04/12: New upstream version 3.0.1

Andreas Tille tille at debian.org
Sat Sep 23 06:49:55 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-rotl.

commit 8624bd0b3586522abb92a6b807d6a361af18fed3
Author: Andreas Tille <tille at debian.org>
Date:   Sat Sep 23 08:11:45 2017 +0200

    New upstream version 3.0.1
---
 DESCRIPTION                              |  32 ++
 LICENSE                                  |   2 +
 MD5                                      |  87 ++++
 NAMESPACE                                | 116 ++++++
 NEWS.md                                  | 104 +++++
 R/api-collections.R                      |  28 ++
 R/api-studies.R                          | 163 ++++++++
 R/api-taxonomy.R                         |  68 ++++
 R/api-tnrs.R                             |  70 ++++
 R/api-tol.R                              | 126 ++++++
 R/base.R                                 | 204 ++++++++++
 R/deduplicate_labels.R                   |  39 ++
 R/external_data.R                        | 119 ++++++
 R/match_names.R                          | 361 +++++++++++++++++
 R/methods.R                              |  93 +++++
 R/rotl-package.R                         |  50 +++
 R/studies-methods.R                      |  88 ++++
 R/studies-utils.R                        |  77 ++++
 R/studies.R                              | 461 +++++++++++++++++++++
 R/tax_utils.R                            |  73 ++++
 R/taxonomy.R                             | 325 +++++++++++++++
 R/tnrs.R                                 | 250 ++++++++++++
 R/tol.R                                  | 668 +++++++++++++++++++++++++++++++
 R/tree_to_labels.R                       |  48 +++
 README.md                                | 155 +++++++
 build/vignette.rds                       | Bin 0 -> 316 bytes
 debian/README.test                       |   8 -
 debian/changelog                         |   5 -
 debian/compat                            |   1 -
 debian/control                           |  31 --
 debian/copyright                         |  37 --
 debian/docs                              |   3 -
 debian/rules                             |   8 -
 debian/source/format                     |   1 -
 debian/tests/control                     |   3 -
 debian/tests/run-unit-test               |  11 -
 debian/watch                             |   2 -
 inst/CITATION                            |  20 +
 inst/doc/data_mashups.R                  |  61 +++
 inst/doc/data_mashups.Rmd                | 222 ++++++++++
 inst/doc/data_mashups.html               | 362 +++++++++++++++++
 inst/doc/how-to-use-rotl.R               |  75 ++++
 inst/doc/how-to-use-rotl.Rmd             | 283 +++++++++++++
 inst/doc/how-to-use-rotl.html            | 446 +++++++++++++++++++++
 inst/doc/meta-analysis.R                 |  66 +++
 inst/doc/meta-analysis.Rmd               | 231 +++++++++++
 inst/doc/meta-analysis.html              | 216 ++++++++++
 inst/extdata/egg.csv                     |  66 +++
 inst/extdata/protist_mutation_rates.csv  |   7 +
 man/get_study.Rd                         |  60 +++
 man/get_study_meta.Rd                    |  80 ++++
 man/get_study_subtree.Rd                 |  44 ++
 man/get_study_tree.Rd                    |  61 +++
 man/list_trees.Rd                        |  41 ++
 man/match_names-methods.Rd               |  60 +++
 man/match_names.Rd                       |  77 ++++
 man/rotl.Rd                              |  56 +++
 man/source_list.Rd                       |  31 ++
 man/strip_ott_ids.Rd                     |  32 ++
 man/studies_find_studies.Rd              |  71 ++++
 man/studies_find_trees.Rd                |  80 ++++
 man/studies_properties.Rd                |  39 ++
 man/study_external_IDs.Rd                |  43 ++
 man/synonyms.match_names.Rd              |  55 +++
 man/tax_lineage.Rd                       |  32 ++
 man/taxon_external_IDs.Rd                |  32 ++
 man/taxonomy-methods.Rd                  |  43 ++
 man/taxonomy_about.Rd                    |  44 ++
 man/taxonomy_mrca.Rd                     |  73 ++++
 man/taxonomy_subtree.Rd                  |  68 ++++
 man/taxonomy_taxon_info.Rd               |  85 ++++
 man/tnrs_contexts.Rd                     |  32 ++
 man/tnrs_infer_context.Rd                |  35 ++
 man/tnrs_match_names.Rd                  |  82 ++++
 man/tol_about.Rd                         | 123 ++++++
 man/tol_induced_subtree.Rd               |  50 +++
 man/tol_mrca.Rd                          | 157 ++++++++
 man/tol_node_info.Rd                     | 146 +++++++
 man/tol_subtree.Rd                       |  46 +++
 tests/test-all.R                         |   3 +
 tests/testthat/test-API.R                | 195 +++++++++
 tests/testthat/test-api-studies.R        | 195 +++++++++
 tests/testthat/test-api-taxonomy.R       |  87 ++++
 tests/testthat/test-api-tnrs.R           |  66 +++
 tests/testthat/test-api-tol.R            |  69 ++++
 tests/testthat/test-base.R               |  55 +++
 tests/testthat/test-deduplicate_labels.R |  45 +++
 tests/testthat/test-external.R           |  39 ++
 tests/testthat/test-match_names.R        | 408 +++++++++++++++++++
 tests/testthat/test-studies.R            | 484 ++++++++++++++++++++++
 tests/testthat/test-taxonomy.R           | 341 ++++++++++++++++
 tests/testthat/test-tnrs.R               |  67 ++++
 tests/testthat/test-tol.R                | 456 +++++++++++++++++++++
 tests/testthat/test-tree_to_labels.R     |  51 +++
 tests/tree_of_life.json                  | 111 +++++
 vignettes/data_mashups.Rmd               | 222 ++++++++++
 vignettes/how-to-use-rotl.Rmd            | 283 +++++++++++++
 vignettes/meta-analysis.Rmd              | 231 +++++++++++
 vignettes/vignette.css                   | 207 ++++++++++
 99 files changed, 11355 insertions(+), 110 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..460558c
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,32 @@
+Package: rotl
+Title: Interface to the 'Open Tree of Life' API
+Version: 3.0.1
+Authors at R: c(
+  person("Francois", "Michonneau", role=c("aut", "cre"),
+         email="francois.michonneau at gmail.com"),
+  person("Joseph", "Brown", role="aut"),
+  person("David", "Winter", role="aut"))
+Description: An interface to the 'Open Tree of Life' API to retrieve
+    phylogenetic trees, information about studies used to assemble the synthetic
+    tree, and utilities to match taxonomic names to 'Open Tree identifiers'. The
+    'Open Tree of Life' aims at assembling a comprehensive phylogenetic tree for all
+    named species.
+URL: https://github.com/ropensci/rotl
+BugReports: https://github.com/ropensci/rotl/issues
+Depends: R (>= 3.1.1)
+Imports: httr, jsonlite, assertthat (>= 0.1), rncl (>= 0.6.0), ape,
+        rentrez
+License: BSD_2_clause + file LICENSE
+Suggests: knitr (>= 1.12), rmarkdown (>= 0.7), testthat, RNeXML,
+        phylobase, MCMCglmm, fulltext (>= 0.1.6), readxl
+VignetteBuilder: knitr
+LazyData: true
+RoxygenNote: 5.0.1.9000
+NeedsCompilation: no
+Packaged: 2016-09-19 20:21:54 UTC; francois
+Author: Francois Michonneau [aut, cre],
+  Joseph Brown [aut],
+  David Winter [aut]
+Maintainer: Francois Michonneau <francois.michonneau at gmail.com>
+Repository: CRAN
+Date/Publication: 2016-09-20 00:45:03
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..68de442
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2016
+COPYRIGHT HOLDER: Francois Michonneau, Joseph W. Brown, David Winter
\ No newline at end of file
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..9a1c324
--- /dev/null
+++ b/MD5
@@ -0,0 +1,87 @@
+7652ea6d7ffe68af7d47e583c3447a6f *DESCRIPTION
+c0db011c3d740ac7b1bfd97861660487 *LICENSE
+52103d2d4b2a149f145c18da06b216f1 *NAMESPACE
+a79be268498bada7555d5f77b08eb33c *NEWS.md
+9a7c4a54d4f95926b66ed6e06476df86 *R/api-collections.R
+de19c1b60f39999478066198ec586cd3 *R/api-studies.R
+cf7e3dc77115551f7a666294970d090c *R/api-taxonomy.R
+08064573a536c31daf9a479eec383e25 *R/api-tnrs.R
+9049ace110a1de5d23d81cc09e1022b5 *R/api-tol.R
+f1d6f185eb969bcb30e8fa01828389b9 *R/base.R
+0b7d73e3fc74165f4a88ae6a6d3c08f8 *R/deduplicate_labels.R
+71769e61f96290be93a642ed5e7e1bf1 *R/external_data.R
+8f6a526c44396e5afe7a980ea0116d39 *R/match_names.R
+dc83309cfcec969f308dc5ef128b24c8 *R/methods.R
+885012a0fef7e432f1e3ab49ac05a621 *R/rotl-package.R
+ecec63dfc2513887fcc35f62ad8bbf10 *R/studies-methods.R
+e3689dd313ea6b632450c24f11ec56e8 *R/studies-utils.R
+bca9348daa609e2f03f5107bc678124d *R/studies.R
+6b70557ad769ce15aa747941be85903d *R/tax_utils.R
+3c36224fa41a50053831efb15ff2675c *R/taxonomy.R
+2b2d1c28d1e4b229a59780ba94059a47 *R/tnrs.R
+a884ca1e7339f1d913dc058a4d4a3653 *R/tol.R
+cadefaffbca9725850fbc2218753179a *R/tree_to_labels.R
+4cc01a99eb408fd699476cd28a8f76a0 *README.md
+5b69e412b9eb01d1c67779f8b9064e0f *build/vignette.rds
+8fea5362ecb8e33f8a73c289da7ff801 *inst/CITATION
+319582efa0a202fed973883555cf0e51 *inst/doc/data_mashups.R
+64c62d69bc9cec722c7f26e6d025ef35 *inst/doc/data_mashups.Rmd
+b7999410f9f487044fd1ec75cfdf7972 *inst/doc/data_mashups.html
+1ac42821867e8a575732c0cc48ae1104 *inst/doc/how-to-use-rotl.R
+7667f4f210b3fc36aa564a8c1ab4bcd5 *inst/doc/how-to-use-rotl.Rmd
+3ac44cb716cefb574425c56d1369d7bc *inst/doc/how-to-use-rotl.html
+eeaeb60fea9421f2f37f3b05d79034fb *inst/doc/meta-analysis.R
+0c9eebd21ffe483249d1b1c769e44d52 *inst/doc/meta-analysis.Rmd
+50f657b46970a015dd8ebd996c23a4ed *inst/doc/meta-analysis.html
+09d93b49b2b38f2495761ea86e134006 *inst/extdata/egg.csv
+5e5eb20ee387ba7192c62e629a3383f2 *inst/extdata/protist_mutation_rates.csv
+ea6a8fb50d20fd58b3072a9d7fbd0f01 *man/get_study.Rd
+7926e12f926142deb0ad4837110becdf *man/get_study_meta.Rd
+af93584e72106c40ef5f607259d9f616 *man/get_study_subtree.Rd
+4539fa72f42b2225f1860d52d3717a7e *man/get_study_tree.Rd
+20d9c6022758c5e82152bcc9fccc55dc *man/list_trees.Rd
+7e09ca72cc00b2319838296535c57b2d *man/match_names-methods.Rd
+1f9bc0f18b389ade4cf46d1c94888138 *man/match_names.Rd
+9f9c58fcedf44fecf5cfc9741ef54a3d *man/rotl.Rd
+e634c8c1f147fa2f39452f5b04db5dd8 *man/source_list.Rd
+e55bfa18aa6569359dc345326d24d05f *man/strip_ott_ids.Rd
+f1bf287630f8175d7358b522d5ad20bb *man/studies_find_studies.Rd
+7234766a4098b978eb9bcc21e41455b9 *man/studies_find_trees.Rd
+dad09a2555cc16f7a56b0200d8bc5f15 *man/studies_properties.Rd
+d306ed46f0e31121bf222bfe5e2df59c *man/study_external_IDs.Rd
+23b68f20f00f123a291f68a92844be96 *man/synonyms.match_names.Rd
+405f784cbcad294e767615d515389498 *man/tax_lineage.Rd
+2fe60e574b50374f14391b26bf0a903a *man/taxon_external_IDs.Rd
+f4fbe9e06a5c3542c59bdedaf53b9526 *man/taxonomy-methods.Rd
+2b4d81f1844a3561d12d1ccd680caaf0 *man/taxonomy_about.Rd
+5b0e808095fef1a19ece9cc8fa4a0f3f *man/taxonomy_mrca.Rd
+a8fb26f5f4fd69197416a6a4bbf3c470 *man/taxonomy_subtree.Rd
+95300f9b71cc6b82d4ad9eb990de4af9 *man/taxonomy_taxon_info.Rd
+99109d680828b91907f0db01094b9674 *man/tnrs_contexts.Rd
+8dcab348f50f8373446ffc1d3b2bad08 *man/tnrs_infer_context.Rd
+a8f2f0bea95cb247f8288e9758815225 *man/tnrs_match_names.Rd
+49026107b0a09e12070a80499021b377 *man/tol_about.Rd
+bec1c576d209985c3d66b7ee4a8db22f *man/tol_induced_subtree.Rd
+e572dd198f4bf21642ca0236482ecaa5 *man/tol_mrca.Rd
+8f8de8edd36bd21d9193b68dc935c973 *man/tol_node_info.Rd
+c6cef93ea9dbdabf4648a5cc6f4d32f7 *man/tol_subtree.Rd
+43c8ae96b072c5ffa02ee1476ab517af *tests/test-all.R
+8aa44593dfd95bba9fc05fe726562a4f *tests/testthat/test-API.R
+0978d0c6f16459de48230f0ca7242888 *tests/testthat/test-api-studies.R
+35dc779abfea3d163251ad3ba9681b06 *tests/testthat/test-api-taxonomy.R
+620a4b277bc83dc51d84c7dc4416b32d *tests/testthat/test-api-tnrs.R
+eec189b1b610d1fab12164c96ba55ff5 *tests/testthat/test-api-tol.R
+9a25dd598f4420dc68f2280ce323c367 *tests/testthat/test-base.R
+91e80d240d100527a3496b9c2040c771 *tests/testthat/test-deduplicate_labels.R
+51c9a683157c0ca97ca7fa7656dbf4a2 *tests/testthat/test-external.R
+4ead099ce223e9037f477fd200035935 *tests/testthat/test-match_names.R
+4aeb495b775e003fa9f273039a75cd19 *tests/testthat/test-studies.R
+c266628e0538911559aeb0cad7f9980d *tests/testthat/test-taxonomy.R
+57bcbfabee0c58d895dce12a81166027 *tests/testthat/test-tnrs.R
+da7d30c00521dcf6ebaa16a1ddd5dc50 *tests/testthat/test-tol.R
+85a49232f1097e80d7f059bea79d885d *tests/testthat/test-tree_to_labels.R
+fb4b19651907e0fdb26e6f4c0581af83 *tests/tree_of_life.json
+64c62d69bc9cec722c7f26e6d025ef35 *vignettes/data_mashups.Rmd
+7667f4f210b3fc36aa564a8c1ab4bcd5 *vignettes/how-to-use-rotl.Rmd
+0c9eebd21ffe483249d1b1c769e44d52 *vignettes/meta-analysis.Rmd
+da8cb33974fa673158fea8ccf2d8bbac *vignettes/vignette.css
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..3d26d50
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,116 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(candidate_for_synth,study_meta)
+S3method(flags,match_names)
+S3method(flags,taxon_info)
+S3method(flags,taxon_mrca)
+S3method(get_publication,study_meta)
+S3method(get_study_year,study_meta)
+S3method(get_tree_ids,study_meta)
+S3method(inspect,match_names)
+S3method(is_suppressed,match_names)
+S3method(is_suppressed,taxon_info)
+S3method(is_suppressed,taxon_mrca)
+S3method(list_trees,matched_studies)
+S3method(ott_id,match_names)
+S3method(ott_id,taxon_info)
+S3method(ott_id,taxon_mrca)
+S3method(ott_id,tol_mrca)
+S3method(ott_id,tol_node)
+S3method(ott_id,tol_summary)
+S3method(print,study_external_data)
+S3method(print,study_ids)
+S3method(print,study_meta)
+S3method(print,tnrs_contexts)
+S3method(print,tol_mrca)
+S3method(print,tol_node)
+S3method(print,tol_summary)
+S3method(source_list,tol_mrca)
+S3method(source_list,tol_node)
+S3method(source_list,tol_summary)
+S3method(synonyms,match_names)
+S3method(synonyms,taxon_info)
+S3method(tax_lineage,taxon_info)
+S3method(tax_lineage,tol_node)
+S3method(tax_name,match_names)
+S3method(tax_name,taxon_info)
+S3method(tax_name,taxon_mrca)
+S3method(tax_name,tol_mrca)
+S3method(tax_name,tol_node)
+S3method(tax_name,tol_summary)
+S3method(tax_rank,match_names)
+S3method(tax_rank,taxon_info)
+S3method(tax_rank,taxon_mrca)
+S3method(tax_rank,tol_mrca)
+S3method(tax_rank,tol_node)
+S3method(tax_rank,tol_summary)
+S3method(tax_sources,match_names)
+S3method(tax_sources,taxon_info)
+S3method(tax_sources,taxon_mrca)
+S3method(tax_sources,tol_mrca)
+S3method(tax_sources,tol_node)
+S3method(tax_sources,tol_summary)
+S3method(tol_lineage,tol_node)
+S3method(unique_name,match_names)
+S3method(unique_name,taxon_info)
+S3method(unique_name,taxon_mrca)
+S3method(unique_name,tol_mrca)
+S3method(unique_name,tol_node)
+S3method(unique_name,tol_summary)
+S3method(update,match_names)
+export(candidate_for_synth)
+export(flags)
+export(get_publication)
+export(get_study)
+export(get_study_meta)
+export(get_study_subtree)
+export(get_study_tree)
+export(get_study_year)
+export(get_tree_ids)
+export(inspect)
+export(is_suppressed)
+export(list_trees)
+export(ott_id)
+export(source_list)
+export(strip_ott_ids)
+export(studies_find_studies)
+export(studies_find_trees)
+export(studies_properties)
+export(study_external_IDs)
+export(synonyms)
+export(tax_lineage)
+export(tax_name)
+export(tax_rank)
+export(tax_sources)
+export(taxon_external_IDs)
+export(taxonomy_about)
+export(taxonomy_mrca)
+export(taxonomy_subtree)
+export(taxonomy_taxon_info)
+export(tnrs_contexts)
+export(tnrs_infer_context)
+export(tnrs_match_names)
+export(tol_about)
+export(tol_induced_subtree)
+export(tol_lineage)
+export(tol_mrca)
+export(tol_node_info)
+export(tol_subtree)
+export(unique_name)
+import(ape)
+importFrom(assertthat,assert_that)
+importFrom(assertthat,is.flag)
+importFrom(assertthat,is.string)
+importFrom(httr,GET)
+importFrom(httr,POST)
+importFrom(httr,content)
+importFrom(httr,parse_url)
+importFrom(jsonlite,fromJSON)
+importFrom(jsonlite,toJSON)
+importFrom(jsonlite,unbox)
+importFrom(rentrez,entrez_link)
+importFrom(rentrez,entrez_search)
+importFrom(rncl,read_newick_phylo)
+importFrom(stats,na.omit)
+importFrom(stats,setNames)
+importFrom(stats,update)
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..f7e3705
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,104 @@
+## rotl 3.0.1
+
+* Fix tests and vignette to reflect changes accompanying release 6.1 of the
+  synthetic tree
+
+* Add section in vignette "How to use rotl?" about how to get the higher
+  taxonomy from a given taxon.
+
+* Add `CITATION` file with MEE manuscript information (#82)
+
+## rotl 3.0.0
+
+* `rotl` now interacts with v3.0 of the Open Tree of Life APIs. The
+  documentation has been updated to reflect the associated changes. More
+  information about the v3.0 of the Open Tree of Life APIs can be found
+  [on their wiki](https://github.com/OpenTreeOfLife/germinator/wiki/Open-Tree-of-Life-Web-APIs).
+
+
+### New features
+
+* New methods: `tax_sources`, `is_suppressed`, `tax_rank`, `unique_name`,
+  `name`, `ott_id`, for objects returned by `tnrs_match_names()`,
+  `taxonomy_taxon_info()`, `taxonomy_taxon_mrca()`, `tol_node_info()`,
+  `tol_about()`, and `tol_mrca()`. Each of these methods have their own class.
+
+* New method `tax_lineage()` to extract the higher taxonomy from an object
+  returned by `taxonomy_taxon_info()` (initally suggested by Matt Pennell, #57).
+
+* New method `tol_lineage()` to extract the nodes towards the root of the tree.
+
+* New print methods for `tol_node_info()` and `tol_mrca()`.
+
+* New functions `study_external_IDs()` and `taxon_external_IDs()` that return
+  the external identifiers for a study and associated trees (e.g., DOI, TreeBase
+  ID); and the identifiers of taxon names in taxonomic databases. The vignette
+  "Data mashup" includes an example on how to use it.
+
+* The function `strip_ott_id()` gains the argument `remove_underscores` to remove
+  underscores from tips in trees returned by OTL.
+
+### Changes
+
+* Rename method `ott_taxon_name()` to `tax_name()` for consistency.
+
+* Rename method `synth_sources()` and `study_list()` to `source_list()`.
+
+* Refactor how result of query is checked and parsed (invisible to the user).
+
+### Bug fixes
+
+* Fix bug in `studies_find_studies()`, the arguments `verbose` and `exact` were
+  ignored.
+
+* The argument `only_current` has been dropped for the methods associated with
+  objects returned by `tnrs_match_names()`
+
+* The print method for `tnrs_context()` duplicated some names.
+
+* `inspect()`, `update()` and `synonyms()` methods for `tnrs_match_names()` did
+  not work if the query included unmatched taxa.
+
+
+## rotl 0.5.0
+
+* New vignette: `meta-analysis`
+
+* Added arguments `include_lineage` and `list_terminal_descendants` to
+  `taxonomy_taxon()`
+
+* Improve warning and format of the result if one of the taxa requested doesn't
+  match anything `tnrs_match_names`.
+
+* In the data frame returned by `tnrs_match_names`, the columns
+  `approximate_match`, `is_synonym` and `is_deprecated` are now `logical`
+  (instead of `character`) [issue #54]
+
+* New utility function `strip_ott_ids` removes OTT id information from
+  a character vector, making it easier to match tip labels in trees returned by
+  `tol_induced_subtree` to taxonomic names in other data sources. This function
+  can also remove underscores from the taxon names.
+
+* New method `list_trees` returns a list of tree ids associated with
+  studies. The function takes the output of `studies_find_studies` or
+  `studies_find_trees`.
+
+* `studies_find_studies` and `studies_find_trees` gain argument `detailed`
+  (default set to `TRUE`), that produces a data frame summarizing information
+  (title of the study, year of publication, DOI, ids of associated trees, ...)
+  about the studies matching the search criteria.
+
+* `get_study_tree` gains argument `deduplicate`. When `TRUE`, if the tree
+  returned for a given study contains duplicated tip labels, they will be made
+  unique before being parsed by NCL by appending a suffix (`_1`, `_2`, `_3`,
+  etc.). (#46, reported by @bomeara)
+
+* New method `get_study_year` for objects of class `study_meta` that returns the
+  year of publication of the study.
+
+* A more robust approach is used by `get_tree_ids` to identify the tree ids in
+  the metadata returned by the API
+
+## rotl 0.4.1
+
+* Initial CRAN release on July, 24th 2015
diff --git a/R/api-collections.R b/R/api-collections.R
new file mode 100644
index 0000000..5b18bfc
--- /dev/null
+++ b/R/api-collections.R
@@ -0,0 +1,28 @@
+##' @importFrom assertthat assert_that is.flag
+## This endpoint currently returns JSON in XML with mime type as text/html
+.collection_find_collections <- function(property = NULL, value = NULL,
+                                         verbose = FALSE, ...) {
+    assertthat::assert_that(assertthat::is.flag(verbose))
+    req_body <- list()
+    req_body$verbose <- verbose
+    res <- otl_POST(path = "collections/find_collections",
+                    body = req_body, ...)
+    res
+}
+
+.collection_properties <- function(...) {
+    req_body <- list()
+    res <- otl_POST(path = "collections/properties",
+                    body = req_body, ...)
+    res
+}
+
+
+.get_collection <- function(owner_id = NULL, collection_name = NULL, ...) {
+    assertthat::assert_that(assertthat::is.string(owner_id))
+    assertthat::assert_that(assertthat::is.string(collection_name))
+    req_body <- list()
+    res <- otl_GET(path = paste("collections", owner_id, collection_name,
+                                sep = "/"), ...)
+    res
+}
diff --git a/R/api-studies.R b/R/api-studies.R
new file mode 100644
index 0000000..420b3a4
--- /dev/null
+++ b/R/api-studies.R
@@ -0,0 +1,163 @@
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+## Return a list of studies from the OpenTree docstore that match a given properties
+.studies_find_studies <- function(property = NULL, value = NULL, verbose = FALSE,
+                                  exact = FALSE, ...) {
+    if (!is.logical(verbose)) stop("Argument \'verbose\' should be logical")
+    if (!is.logical(exact)) stop("Argument \'exact\' should be logical")
+
+    req_body <- list()
+    if (!is.null(property)) {
+        if (!is.character(property)) {
+            stop("Argument \'property\' must be of class \"character\"")
+        }
+        req_body$property <- jsonlite::unbox(property)
+    } else {
+        stop("Must supply a \'property\' argument")
+    }
+    if (!is.null(value)) {
+        if (!is.character(value)) {
+            stop("Argument \'value\' must be of class \"character\"")
+        }
+        req_body$value <- jsonlite::unbox(value)
+    } else {
+        stop("Must supply a \'value\' argument")
+    }
+    req_body$verbose <- jsonlite::unbox(verbose)
+    req_body$exact <- jsonlite::unbox(exact)
+    res <- otl_POST(path="studies/find_studies/",
+                    body=req_body,
+                    ...)
+    res
+}
+
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+## Return a list of trees from the OpenTree docstore that match a given properties
+.studies_find_trees <- function(property=NULL, value=NULL, verbose=FALSE,
+                                exact=FALSE, ...) {
+    if (!is.logical(verbose)) {
+        stop("Argument \'verbose\' must be of class \"logical\"")
+    }
+    if (!is.logical(exact)) {
+        stop("Argument \'exact\' must be of class \"logical\"")
+    }
+    req_body <- list()
+    if (!is.null(property)) {
+        if (!is.character(property)) {
+            stop("Argument \'property\' must be of class \"character\"")
+        }
+        req_body$property <- jsonlite::unbox(property)
+    } else {
+        stop("Must supply a \'property\' argument")
+    }
+    if (!is.null(value)) {
+        if (!is.character(value)) {
+            stop("Argument \'value\' must be of class \"character\"")
+        }
+        req_body$value <- jsonlite::unbox(value)
+    } else {
+        stop("Must supply a \'value\' argument")
+    }
+
+    res <- otl_POST(path="studies/find_trees/",
+                    body=c(req_body,
+                           jsonlite::unbox(verbose),
+                           jsonlite::unbox(exact)), ...)
+    res
+}
+
+
+##' @importFrom httr content
+## Return a list of properties that can be used to search studies and trees
+.studies_properties <- function() {
+    res <- otl_POST(path="studies/properties/", body=list())
+    res
+}
+
+
+##' @importFrom httr content
+## Get a study from the OpenTree docstore
+.get_study <- function(study_id = NULL, format = c("", "nexus", "newick", "nexml", "json"),
+                       ...) {
+    if (is.null(study_id)) {
+        stop("Must supply a \'study_id\' argument")
+    } else if (!is.character(study_id)) {
+        stop("Argument \'study_id\' must be of class \"character\"")
+    }
+    format <- match.arg(format)
+    res <- otl_GET(path=paste("study",
+                              paste0(study_id, otl_formats(format)), sep="/"),
+                   ...)
+    res
+}
+
+
+##' @importFrom httr content
+## Get a tree in a study from the OpenTree docstore
+.get_study_tree <- function(study_id=NULL, tree_id=NULL, format=c("json", "newick", "nexus"),
+                            tip_label = c("ot:originallabel", "ot:ottid", "ot:otttaxonname"),
+                            ...) {
+    if (is.null(study_id)) {
+        stop("Must supply a \'study_id\' argument")
+    } else if (!is.character(study_id)) {
+        stop("Argument \'study_id\' must be of class \"character\"")
+    }
+    if (is.null(tree_id)) {
+        stop("Must supply a \'tree\' argument")
+    } else if (!is.character(tree_id)) {
+        stop("Argument \'tree\' must be of class \"character\"")
+    }
+    format <- match.arg(format)
+    tip_label <- match.arg(tip_label)
+    tip_label <- paste0("/?tip_label=", tip_label)
+    tree_file <- paste0(tree_id, otl_formats(format), tip_label)
+    res <- otl_GET(path=paste("study", study_id, "tree", tree_file, sep="/"), ...)
+    res
+}
+
+##' @importFrom httr content
+.get_study_meta <- function(study_id, ...) {
+    otl_GET(path= paste("study", study_id, "meta", sep="/"), ...)
+}
+
+
+##' @importFrom httr content
+.get_study_subtree <- function(study_id, tree_id, subtree_id,
+                               format=c("newick", "nexus", "nexml", "json"), ...) {
+    if (is.null(study_id)) {
+        stop("Must supply a \'study_id\' argument")
+    } else if (!is.character(study_id)) {
+        stop("Argument \'study_id\' must be of class \"character\"")
+    }
+    if (is.null(tree_id)) {
+        stop("Must supply a \'tree\' argument")
+    } else if (!is.character(tree_id)) {
+        stop("Argument \'tree\' must be of class \"character\"")
+    }
+    if (is.null(subtree_id)) {
+        stop("Must supply a \'subtree\' argument")
+    } else if (!is.character(subtree_id)) {
+        stop("Argument \'subtree\' must be of class \"character\"")
+    }
+    format <- match.arg(format)
+    format <- otl_formats(format)
+    url_stem <- paste("study", study_id, "tree", paste0(tree_id, format), sep="/")
+    res <- otl_GET(path=paste(url_stem, "?subtree_id=", subtree_id, sep=""), ...)
+    res
+}
+
+### Let's not worry about those for now, as their results could be
+### obtained using get_study_tree
+
+get_study_otu <- function(study_id, otu=NULL, ...) {
+    otl_GET(path=paste("study", study_id, "otu", otu, sep="/"), ...)
+}
+
+get_study_otus <- function(study_id, otus, ...) {
+    otl_GET(path=paste("study", study_id, "otu", otus, sep="/"), ...)
+}
+
+get_study_otumap <- function(study_id, ...) {
+    otl_GET(path=paste("study", study_id,"otumap", sep="/"))
+}
diff --git a/R/api-taxonomy.R b/R/api-taxonomy.R
new file mode 100644
index 0000000..28f3f49
--- /dev/null
+++ b/R/api-taxonomy.R
@@ -0,0 +1,68 @@
+##' @importFrom httr content
+## Summary information about the OpenTree Taxaonomy (OTT)
+.taxonomy_about <- function(...) {
+    res <- otl_POST(path="/taxonomy/about", body=list(), ...)
+    res
+}
+
+
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+##' @importFrom assertthat is.flag
+##' @importFrom assertthat assert_that
+## Information about an OpenTree Taxonomy (OTT) taxon
+.taxonomy_taxon_info <- function(ott_id=NULL,
+                                 include_children = FALSE,
+                                 include_lineage = FALSE,
+                                 include_terminal_descendants = FALSE,
+                                  ...) {
+    ott_id <- check_ott_ids(ott_id)
+
+    if (length(ott_id) > 1) {
+        stop("Must only supply one ", sQuote("ott_id"), " argument")
+    }
+
+    assertthat::assert_that(assertthat::is.flag(include_children))
+    assertthat::assert_that(assertthat::is.flag(include_lineage))
+    assertthat::assert_that(assertthat::is.flag(include_terminal_descendants))
+    q <- list(ott_id=jsonlite::unbox(ott_id),
+              include_children = jsonlite::unbox(include_children),
+              include_lineage = jsonlite::unbox(include_lineage),
+              include_terminal_descendants = jsonlite::unbox(include_terminal_descendants))
+    res <- otl_POST(path="/taxonomy/taxon_info", body=q, ...)
+    res
+}
+
+
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+## Get a subtree from the OpenTree Taxonomy (OTT) taxonomic tree
+.taxonomy_subtree <- function(ott_id=NULL, label_format=NULL, ...) {
+    ott_id <- check_ott_ids(ott_id)
+
+    if (length(ott_id) > 1) {
+        stop("Must only supply one ", sQuote("ott_id"), " argument")
+    }
+
+    q <- list(ott_id=jsonlite::unbox(ott_id))
+    if (!is.null(label_format)) {
+        if (!check_label_format(label_format)) {
+            stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ",
+                 sQuote("id"), ", or ", sQuote("name_and_id"))
+        }
+        q$label_format <- jsonlite::unbox(label_format)
+    }
+    res <- otl_POST(path="/taxonomy/subtree", body=q, ...)
+    res
+}
+
+
+##' @importFrom httr content
+## Get the most recent common ancestor (MRCA) from nodes in the OpenTree Taxonomy (OTT)
+.taxonomy_mrca <- function (ott_ids = NULL, ...) {
+
+    ott_ids <- check_ott_ids(ott_ids)
+    q <- list(ott_ids=ott_ids)
+    res <- otl_POST(path="/taxonomy/mrca", body=q, ...)
+    res
+}
diff --git a/R/api-tnrs.R b/R/api-tnrs.R
new file mode 100644
index 0000000..0c7bc09
--- /dev/null
+++ b/R/api-tnrs.R
@@ -0,0 +1,70 @@
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+##' @importFrom assertthat is.string is.flag
+## Match taxon names
+.tnrs_match_names <- function(names=NULL, context_name=NULL,
+                              do_approximate_matching=TRUE,
+                              ids=NULL, include_suppressed=FALSE, ...) {
+
+    if (is.null(names)) {
+        stop("You must supply a ", sQuote("names"), " argument")
+    } else if (!is.character(names)) {
+        stop("Argument ", sQuote("names"), " must be of class ",
+             sQuote("character"))
+    }
+    if (!is.null(ids)) {
+        if (length(ids) != length(names)) {
+            stop("Arguments ", sQuote("ids"), " and ",
+                 sQuote("names"), " must be of the same length")
+        } else if (!is.character(ids)) {
+            stop("Argument ", sQuote("ids"), " must be of class ",
+                 sQuote("character"))
+        }
+    }
+    if (!assertthat::is.flag(do_approximate_matching)) {
+        stop("Argument ", sQuote("do_approximate_matching"),
+             " must be of class ",
+             sQuote("logical"))
+    }
+    if (!assertthat::is.flag(include_suppressed)) {
+        stop("Argument ", sQuote("include_deprecated"), " must be of class ",
+             sQuote("logical"))
+    }
+    if (!is.null(context_name)){
+        if(!assertthat::is.string(context_name)) {
+            stop("Argument ", sQuote("context_name"), " must be of class ",
+                 sQuote("character"))
+        }
+        context_name <- jsonlite::unbox(context_name)
+    }
+
+    q <- list(names = names, context_name = context_name,
+              do_approximate_matching = jsonlite::unbox(do_approximate_matching),
+              ids = ids, include_suppressed = jsonlite::unbox(include_suppressed))
+    toKeep <- sapply(q, is.null)
+    q <- q[!toKeep]
+
+    res <- otl_POST("tnrs/match_names", body=q, ...)
+    res
+}
+
+
+##' @importFrom httr content
+## Get OpenTree TNRS contexts
+.tnrs_contexts <- function(...) {
+    res <- otl_POST("tnrs/contexts", body=list(), ...)
+    res
+}
+
+
+## Infer taxonomic context from a set of names
+.tnrs_infer_context <- function(names=NULL, ...) {
+    if (is.null(names)) {
+        stop("Must supply a \'names\' argument")
+    } else if (!is.character(names)) {
+        stop("Argument \'names\' must be of class \"character\"")
+    }
+    q <- list(names=names)
+    res <- otl_POST("tnrs/infer_context", body=q, ...)
+    res
+}
diff --git a/R/api-tol.R b/R/api-tol.R
new file mode 100644
index 0000000..3a0af36
--- /dev/null
+++ b/R/api-tol.R
@@ -0,0 +1,126 @@
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+##' @importFrom assertthat is.flag
+## Summary information about the OpenTree Tree of Life
+.tol_about <- function(include_source_list=FALSE, ...) {
+    if (!assertthat::is.flag(include_source_list)) {
+        stop("Argument ", sQuote("include_ource_list"), " must be of class ", sQuote("logical"))
+    }
+    q <- list(include_source_list=jsonlite::unbox(include_source_list))
+    res <- otl_POST(path="tree_of_life/about", body=q, ...)
+    res
+}
+
+
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+## Get summary information about a node in the OpenTree Tree of Life
+.tol_node_info <- function(ott_id=NULL, node_id=NULL, include_lineage=FALSE, ...) {
+    if (!is.logical(include_lineage)) {
+        stop("Argument \'include_lineage\' must be of class \"logical\"")
+    }
+    if (is.null(ott_id) && is.null(node_id)) {
+        stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"))
+    }
+    if (!is.null(ott_id) && !is.null(node_id)) {
+        stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"), ", not both.")
+    }
+    if (!is.null(ott_id)) {
+        ott_id <- check_ott_ids(ott_id)
+        if (length(ott_id) != 1)
+            stop("Please provide a single ", sQuote("ott_id"), call. = FALSE)
+        q <- list(ott_id=jsonlite::unbox(ott_id), include_lineage=jsonlite::unbox(include_lineage))
+    } else {
+        if (!check_valid_node_id(node_id)) {
+            stop("Argument ", sQuote("node_id"), " must look like \'ott123\' or \'mrcaott123ott456\'.")
+        }
+        q <- list(node_id=jsonlite::unbox(node_id), include_lineage=jsonlite::unbox(include_lineage))
+    }
+    res <- otl_POST(path="tree_of_life/node_info", body=q, ...)
+    res
+}
+
+
+##' @importFrom httr content
+## Get the MRCA of a set of nodes
+.tol_mrca <- function(ott_ids=NULL, node_ids=NULL, ...) {
+    if (is.null(ott_ids) && is.null(node_ids)) {
+        stop("Must provide ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " (or both).")
+    }
+    q <- list()
+    if (!is.null(ott_ids)) {
+        ott_ids <- check_ott_ids(ott_ids)
+        q$ott_ids <- ott_ids
+    }
+    if (!is.null(node_ids)) {
+        check_node_ids(node_ids)
+        q$node_ids <- node_ids
+    }
+    res <- otl_POST(path="tree_of_life/mrca", body=q, ...)
+    res
+}
+
+
+# ignoring 'include_lineage' for subtree below. arguson only
+
+##' @importFrom jsonlite unbox
+##' @importFrom httr content
+## Get a subtree from the OpenTree Tree of Life
+.tol_subtree <- function(ott_id=NULL, node_id=NULL, label_format=NULL, ...) {
+    if (is.null(ott_id) && is.null(node_id)) {
+        stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"))
+    }
+    if (!is.null(ott_id) && !is.null(node_id)) {
+        stop("Must provide either ", sQuote("ott_id"), " or ", sQuote("node_id"), ", not both.")
+    }
+    if (!is.null(ott_id)) {
+        ott_id <- check_ott_ids(ott_id)
+        if (length(ott_id) !=  1)
+            stop("Please provide a single ", sQuote("ott_id"))
+        q <- list(ott_id=jsonlite::unbox(ott_id))
+    } else {
+        if (!check_valid_node_id(node_id)) {
+            stop("Argument ", sQuote("node_id"), " must look like \'ott123\' or \'mrcaott123ott456\'.")
+        }
+        q <- list(node_id=jsonlite::unbox(node_id))
+    }
+    if (!is.null(label_format)) {
+        if (!check_label_format(label_format)) {
+            stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ",
+                 sQuote("id"), ", or ", sQuote("name_and_id"))
+        }
+        q$label_format <- jsonlite::unbox(label_format)
+    }
+    res <- otl_POST(path="tree_of_life/subtree", body=q, ...)
+    res
+}
+
+
+##' @importFrom httr content
+## Get an induced subtree from the OpenTree Tree of Life from a set of nodes
+.tol_induced_subtree <- function(ott_ids=NULL, node_ids=NULL, label_format=NULL, ...) {
+    if (is.null(ott_ids) && is.null(node_ids)) {
+        stop("Must provide ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " (or both).")
+    }
+    q <- list()
+    if (!is.null(label_format)) {
+        if (!check_label_format(label_format)) {
+            stop(sQuote("label_format"), " must be one of: ", sQuote("name"), ", ",
+                 sQuote("id"), ", or ", sQuote("name_and_id"))
+        }
+        q$label_format <- jsonlite::unbox(label_format)
+    }
+    if (!is.null(ott_ids)) {
+        ott_ids <- check_ott_ids(ott_ids)
+        q$ott_ids <- ott_ids
+    }
+    if (!is.null(node_ids)) {
+        check_node_ids(node_ids)
+        q$node_ids <- node_ids
+    }
+    if ((length(ott_ids) + length(node_ids)) < 2) {
+        stop("At least two valid ", sQuote("ott_ids"), " or ", sQuote("node_ids"), " must be provided.")
+    }
+    res <- otl_POST("tree_of_life/induced_subtree", body=q, ...)
+    res
+}
diff --git a/R/base.R b/R/base.R
new file mode 100644
index 0000000..e707755
--- /dev/null
+++ b/R/base.R
@@ -0,0 +1,204 @@
+otl_url <- function(dev=FALSE) {
+    if (dev) {
+        "https://devapi.opentreeoflife.org"
+    } else {
+        "https://api.opentreeoflife.org"
+    }
+}
+
+otl_version <- function(version) {
+    if (missing(version)) {
+        return("v3")
+    } else {
+        return(version)
+    }
+}
+
+
+# Take a request object and return list (if JSON) or plain text (if another
+# type)
+##' @importFrom httr content
+##' @importFrom jsonlite fromJSON
+otl_parse <- function(req) {
+    if (grepl("application/json", req[["headers"]][["content-type"]]) ){
+        return(jsonlite::fromJSON(httr::content(req, "text", encoding = "UTF-8"), simplifyVector = FALSE))
+    }
+    txt <- httr::content(req, as="text", encoding = "UTF-8")
+    if(identical(txt, "")){
+        stop("No output to parse; check your query.", call. = FALSE)
+    }
+    txt
+}
+
+otl_check_error <- function(cont) {
+    if (is.list(cont)) {
+        if (exists("description", cont)) {
+            if (exists("Error", cont$description)) {
+                stop(paste("Error: ", cont$description$error, "\n", sep = ""))
+            } else if (exists("message", cont)) {
+                stop(paste("Message: ", cont$descrption$message, "\n", sep = ""))
+            }
+        }
+    }
+}
+
+## Check and parse result of query
+otl_check <- function(req) {
+    if (!req$status_code <  400) {
+        msg <- otl_parse(req)
+        stop("HTTP failure: ", req$status_code, "\n", msg, call. = FALSE)
+    }
+    desc <- otl_parse(req)
+    otl_check_error(desc)
+    desc
+}
+
+##' @importFrom httr GET
+otl_GET <- function(path, url = otl_url(...), otl_v = otl_version(...), ...) {
+    req <- httr::GET(url, path=paste(otl_v, path, sep="/"), ...)
+    otl_check(req)
+}
+
+##' @importFrom jsonlite toJSON
+##' @importFrom httr POST
+otl_POST <- function(path, body, url = otl_url(...), otl_v = otl_version(...), ...) {
+    stopifnot(is.list(body))
+
+    body_json <- ifelse(length(body), jsonlite::toJSON(body), "")
+
+    req <- httr::POST(url,
+                      path=paste(otl_v, path, sep="/"),
+                      body=body_json, ...)
+    otl_check(req)
+}
+
+otl_formats <- function(format) {
+    switch(tolower(format),
+           "nexus" = ".nex",
+           "newick" = ".tre",
+           "nexml" = ".nexml",
+           "json" = ".json",
+           "") #fall through is no extension = nex(j)son
+}
+
+## Strip all characters except the ottId from a OpenTree label (internal or terminal)
+otl_ottid_from_label <- function(label) {
+    return(as.numeric(gsub("(.+[ _]ott)([0-9]+)", "\\2", label)));
+}
+
+##' @importFrom rncl read_newick_phylo
+phylo_from_otl <- function(res, dedup = FALSE) {
+    if (is.list(res)) {
+        if (!is.null(res$newick)) {
+            tree <- res$newick
+        } else if (!is.null(res$subtree)) {
+            tree <- res$subtree
+        } else {
+            stop("Cannot find tree")
+        }
+    } else if (is.character(res)) {
+        tree <- res
+    } else stop("I don't know how to deal with this format.")
+    if (grepl("\\(", tree)) {
+        fnm <- tempfile()
+        cat(tree, file = fnm)
+        if (!dedup) {
+            phy <- rncl::read_newick_phylo(fnm)
+        } else {
+            dedup_tr <- deduplicate_labels(fnm)
+            phy <- rncl::read_newick_phylo(dedup_tr)
+            unlink(dedup_tr)
+        }
+        unlink(fnm)
+    } else {
+        phy <- tree_to_labels(tree)$tip_label
+    }
+    return(phy)
+}
+
+nexml_from_otl <- function(res) {
+    if (!requireNamespace("RNeXML", quietly = TRUE)) {
+        stop("The RNeXML package is needed to use the nexml file format")
+    }
+    fnm <- tempfile()
+    cat(res, file=fnm)
+    phy <- RNeXML::nexml_read(x=fnm)
+    unlink(fnm)
+    phy
+}
+
+## check if the argument provided looks like a number (can be coerced
+## to integer/numeric).
+check_numeric <- function(x) {
+    if (is.null(x)) {
+        return(FALSE)
+    }
+    if (length(x) != 1) {
+        stop("only 1 element should be provided")
+    }
+    if (!is.numeric(x)) {
+        x <- as.character(x)
+        if (any(is.na(x))) return(FALSE)
+        return(grepl("^[0-9]+$", x))
+    } else {
+        return(x %% 1 == 0)
+    }
+}
+
+## Check that ott_ids are not NULL, not NAs and look like numbers
+check_ott_ids <- function(ott_ids) {
+    if (!is.null(ott_ids)) {
+        if (inherits(ott_ids, "otl_ott_id")) {
+            ## convert objects returned by ott_id method to a vector
+            ott_ids <- unlist(ott_ids)
+        }
+        if (any(is.na(ott_ids))) {
+            stop("NAs are not allowed")
+        }
+        if (!all(sapply(ott_ids, check_numeric))) {
+            stop(sQuote("ott_ids"), " must look like numbers.")
+        }
+    } else {
+        stop("You must supply some OTT ids.")
+    }
+    ott_ids
+}
+
+## all nodes have a node_id (character, e.g. "ott12345" or "mrcaott123ott456")
+check_valid_node_id <- function(x) {
+    if (length(x) != 1) {
+        stop("only 1 element should be provided")
+    }
+    if (!is.character(x)) {
+        return(FALSE)
+    }
+    if (grepl('^mrcaott\\d+ott\\d+', x) || grepl('^ott\\d+', x)) {
+        return(TRUE)
+    } else {
+        return(FALSE)
+    }
+}
+
+check_node_ids <- function(node_ids) {
+    if (!is.null(node_ids)) {
+        if (!is.character(node_ids)) {
+            stop("Argument ", sQuote("node_ids"), " must be of type character.")
+        }
+        if (any(is.na(node_ids))) {
+            stop("NAs are not allowed")
+        }
+        if (!all(sapply(node_ids, check_valid_node_id))) {
+            stop(sQuote("node_ids"), " must look like \'ott123\' or \'mrcaott123ott456\'.")
+        }
+    }
+}
+
+# node labels for tree_of_life subtree and induced_subtree
+# might also be useful for taxonomy queries
+check_label_format <- function (x) {
+    if (x %in% c("name", "id", "name_and_id")) {
+        return(TRUE)
+    } else {
+        return(FALSE)
+    }
+}
diff --git a/R/deduplicate_labels.R b/R/deduplicate_labels.R
new file mode 100644
index 0000000..8ba3c01
--- /dev/null
+++ b/R/deduplicate_labels.R
@@ -0,0 +1,39 @@
+## Create a vector (character) that contains the NEWICK tree strings
+## found in a file
+parse_newick <- function(file) {
+    trs <- readLines(file, warn = FALSE)
+    trs <- strsplit(trs, split = ";")
+    trs <- sapply(trs, function(x) gsub("^\\s+|\\s+$", "", x))
+    trs <- unlist(trs)
+    trs <- gsub("\\s", "_", trs)
+    trs <- trs[nchar(trs) > 0]
+    trs
+}
+
+## Internal function to be used by `deduplicate_labels` that:
+## 1. identify duplicated labels
+## 2. make them unique
+## 3. replace the duplicated labels by their unique counterparts
+dedup_lbl <- function(tr_str) {
+    tr_lbl <- tree_to_labels(tr_str, remove_quotes = TRUE)$tip_label
+    tr_lbl_unq <- make.unique(tr_lbl, sep = "_")
+    if (!identical(tr_lbl, tr_lbl_unq)) {
+        for (i in seq_along(tr_lbl)) {
+            tr_str <- sub(paste0("([\\(|,]\\'?)\\Q", tr_lbl[i], "\\E(\\'?[:|\\)|,])"),
+                          paste0("\\1", tr_lbl_unq[i], "\\2"),  tr_str)
+        }
+        warning("Some tip labels were duplicated and have been modified: ",
+                paste(tr_lbl[duplicated(tr_lbl)], collapse = ", "))
+    }
+    paste0(tr_str, ";")
+}
+
+## Main function: takes a file with potentially duplicated tip labels
+## and reate a new file with unique labels
+deduplicate_labels <- function(file) {
+    tr_strs <- parse_newick(file)
+    tr_dedup <- sapply(tr_strs, dedup_lbl)
+    tmp_tr <- tempfile()
+    cat(tr_dedup, file = tmp_tr, sep = "\n")
+    tmp_tr
+}
diff --git a/R/external_data.R b/R/external_data.R
new file mode 100644
index 0000000..e1a9199
--- /dev/null
+++ b/R/external_data.R
@@ -0,0 +1,119 @@
+##' Get external identifiers for data associated with an Open Tree study
+##'
+##' Data associated with studies contributing to the Open Tree synthesis may
+##' be available from other databases. In particular, trees and alignments 
+##' may be available from treebase and DNA sequences and bibliographic
+##' information associated with a given study may be available from the NCBI.
+##' This function retrieves that information for a given study.
+##'  
+##' @param study_id An open tree study ID
+##' @return A study_external_data object (which inherits from a list) which
+##' contains some of the following.
+##' @return doi, character, the DOI for the paper describing this study
+##' @return external_data_url, character, a URL to an external data repository 
+##' (e.g. a treebase entry) if one exists.
+##' @return pubmed_id character, the unique ID for this study in the NCBI's pubmed database
+##' @return popset_ids character, vector of IDs for the NCBI's popset database
+##' @return nucleotide_ids character, vector of IDs for the NCBI's nucleotide database 
+##' @seealso studies_find_studies (used to discover study IDs)
+##' @importFrom httr parse_url
+##' @importFrom rentrez entrez_search
+##' @importFrom rentrez entrez_link
+##' @examples
+##' \dontrun{
+##' flies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Drosophilidae")
+##' study_external_IDs(flies[2,]$study_ids)
+##' }
+##' @export
+
+study_external_IDs <- function(study_id){
+    meta <- get_study_meta(study_id)
+    data_deposit <- meta[["nexml"]][["^ot:dataDeposit"]][["@href"]]
+    url <- attr(get_publication(meta), "DOI")
+    doi <- parse_url(url)$path    
+    pmid <- get_pmid(doi, study_id)
+    res <- list( doi = doi, 
+                 pubmed_id = pmid, 
+                 external_data_url = data_deposit)
+    if(!is.null(pmid)){
+        res$popset_ids <- entrez_link(dbfrom="pubmed", db="popset", id=pmid)[["links"]][["pubmed_popset"]]
+        res$nucleotide_ids <- entrez_link(dbfrom="pubmed", db="nuccore", id=pmid)[["links"]][["pubmed_nuccore"]]
+    }
+    structure(res, class=c("study_external_data", "list"), id=study_id)
+}
+
+##' Get external identifiers for data associated with an Open Tree taxon
+##'
+##' The Open Tree taxonomy is a synthesis of multiple reference taxonomies. This
+##' function retrieves identifiers to external taxonomic records that have
+##' contributed the rank, position and definition of a given Open Tree taxon.
+##'
+##' @param taxon_id An open tree study ID
+##' @return a data.frame in which each row represents a unique record in an
+##' external databse. The column "source" provides and abbreviated name for the 
+##' database, and "id" the unique ID for the record.
+##' @seealso tnrs_matchnames, which can be used to search for taxa by name.
+##' @seealso taxonomy_taxon, for more information about a given taxon.
+##' @examples
+##' \dontrun{
+##'    gibbon_IDs <- taxon_external_IDs(712902) 
+##' }
+##' @export
+
+taxon_external_IDs <- function(taxon_id){
+    taxon_info <- taxonomy_taxon_info(taxon_id)
+    srcs <- taxon_info[[1]][["tax_sources"]]
+    res <- do.call(rbind.data.frame, strsplit(unlist(srcs), ":"))
+    names(res) <- c("source", "id")
+    res
+}
+
+#'@export
+print.study_external_data <- function(x, ...){
+    cat("External data identifiers for study", attr(x, "study_id"), "\n")
+    cat(" $doi: ", x[["doi"]], "\n")
+    if(!is.null(x$pubmed_id)){
+        cat(" $pubmed_id: ", x[["pubmed_id"]], "\n")
+    }
+    if(!is.null(x$popset_ids)){
+        cat(" $popset_ids: vector of",  length(x[["popset_ids"]]), "IDs \n")
+    }
+    if(!is.null(x$nucleotide_ids)){
+        cat(" $nucleotide_ids: vector of", length(x[["nucleotide_ids"]]), "IDs\n")
+    }
+    if(nchar(x[["external_data_url"]]) > 0){
+        cat(" $external_data_url", x[["external_data_url"]], "\n")
+    }
+    cat("\n")
+}
+
+##Maybe include these functions to get summary information about a 
+## set of linked sequences?
+#summarize_nucleotide_data <- function(id_vector){
+#    summs <- entrez_summary(db="nuccore", id=id_vector)
+#    interesting <- extract_from_esummary(summs, c("uid", "title", "slen", "organism", "completeness"), simplify=FALSE)
+#    do.call(rbind.data.frame, interesting) 
+#}
+#
+#summarize_popset_data <- function(id_vector){
+#    summs <- entrez_summary(db="popset", id=id_vector)
+#    interesting <- extract_from_esummary(summs, c("uid", "title"), simplify=FALSE)
+#    do.call(rbind.data.frame, interesting) 
+#}
+#
+
+#Un-exported function to convert doi->pmid. Also takes study_id as an argument in
+#order to provide a helpful error message when 0 or >1 pmids are returned.
+get_pmid <- function(doi, study_id){
+    pubmed_search <- entrez_search(db="pubmed", term=paste0(doi, "[DOI]"))
+    if(length(pubmed_search$ids) == 0){
+        warning("Could not find PMID for study'", study_id, "', skipping NCBI data")
+        return(NULL)
+    }
+    if(length(pubmed_search$ids) > 1){
+        warning("Found more than one PMID matching study'", study_id, "', skipping NCBI data")
+        return(NULL)
+    }    
+    pubmed_search$ids
+}
+
diff --git a/R/match_names.R b/R/match_names.R
new file mode 100644
index 0000000..3e3625e
--- /dev/null
+++ b/R/match_names.R
@@ -0,0 +1,361 @@
+## internal function that match the arguments provided to the correct
+## row number in the data frame representing the Open Tree Taxonomy
+## for a series of matched names.
+check_args_match_names <- function(response, row_number, taxon_name, ott_id) {
+    orig_order <- attr(response, "original_order")
+    if (is.null(orig_order)) {
+        stop(sQuote(substitute(response)), " was not created using ",
+             sQuote("tnrs_match_names"))
+    }
+
+    if (missing(row_number) && missing(taxon_name) && missing(ott_id)) {
+        stop("You must specify one of ", sQuote("row_number"),
+             sQuote("taxon_name"), " or ", sQuote("ott_id"))
+    } else if (!missing(row_number) && missing(taxon_name) && missing(ott_id)) {
+        if (!is.numeric(row_number))
+            stop(sQuote("row_number"), " must be a numeric.")
+        if (!all(row_number %in% orig_order)) {
+            stop(sQuote("row_number"), " is not a valid row number.")
+        }
+        i <- orig_order[row_number]
+    } else if (missing(row_number) && !missing(taxon_name) && missing(ott_id)) {
+        if (!is.character(taxon_name))
+            stop(sQuote("taxon_name"), " must be a character.")
+        i <- orig_order[match(tolower(taxon_name), response$search_string)]
+        if (any(is.na(i)))
+            stop("Can't find ", taxon_name)
+    } else if (missing(row_number) && missing(taxon_name) && !missing(ott_id)) {
+        if (!check_numeric(ott_id))
+            stop(sQuote("ott_id"), " must look like a number.")
+        i <- orig_order[match(ott_id, response$ott_id)]
+        if (any(is.na(i))) stop("Can't find ", ott_id)
+    } else {
+        stop("You must use only one of ",
+             sQuote("row_number"),
+             sQuote("taxon_name"),
+             " or ", sQuote("ott_id"), ".")
+    }
+
+    if (length(i) > 1)
+        stop("You must supply a single element for each argument.")
+    i
+}
+
+match_row_number <- function(response, row_number, taxon_name, ott_id) {
+    ## all the checks on the validity of the arguments are taken care
+    ## by check_args_match_names()
+    if (missing(row_number) && missing(taxon_name) &&
+        missing(ott_id)) {
+        stop("You must specify one of ", sQuote("row_number"), " ",
+             sQuote("taxon_name"), " ", sQuote("ott_id"))
+    } else if (!missing(row_number) && (missing(taxon_name) && missing(ott_id))) {
+        i <- row_number
+    } else if (!missing(taxon_name) && (missing(row_number) && missing(ott_id))) {
+        i <- match(tolower(taxon_name), response[["search_string"]])
+    } else if (!missing(ott_id) && (missing(row_number) && missing(taxon_name))) {
+        i <- match(ott_id, response[["ott_id"]])
+    } else {
+        stop("You must use only one of ", sQuote("row_number"),
+             " ", sQuote("taxon_name"), " ", sQuote("ott_id"))
+    }
+    if (length(i) > 1)
+        stop("You must supply a single element for each argument.")
+    i
+}
+
+##' Taxonomic names may have different meanings in different taxonomic
+##' contexts, as the same genus name can be applied to animals and
+##' plants for instance. Additionally, the meaning of a taxonomic name
+##' may have change throughout its history, and may have referred to a
+##' different taxon in the past. In such cases, a given names might
+##' have multiple matches in the Open Tree Taxonomy. These functions
+##' allow users to inspect (and update) alternative meaning of a given
+##' name and its current taxonomic status according to the Open Tree
+##' Taxonomy.
+##'
+##' To inspect alternative taxonomic meanings of a given name, you
+##' need to provide the object resulting from a call to the
+##' tnrs_match_names function, as well as one of either the row number
+##' corresponding to the name in this object, the name itself (as used
+##' in the original query), or the ott_id listed for this name.
+##'
+##' To update one of the name, you also need to provide the row number
+##' in which the name to be replaced appear or its ott id.
+##'
+##' @title Inspect and Update alternative matches for a name returned
+##'     by tnrs_match_names
+##' @param response an object generated by the
+##'     \code{\link{tnrs_match_names}} function
+##' @param row_number the row number corresponding to the name to
+##'     inspect
+##' @param taxon_name the taxon name corresponding to the name to
+##'     inspect
+##' @param ott_id the ott id corresponding to the name to inspect
+##' @param ... currently ignored
+##' @return a data frame
+##' @seealso \code{\link{tnrs_match_names}}
+##' @examples
+##'   \dontrun{
+##'    matched_names <- tnrs_match_names(c("holothuria", "diadema", "boletus"))
+##'    inspect(matched_names, taxon_name="diadema")
+##'    new_matched_names <- update(matched_names, taxon_name="diadema",
+##'                                new_ott_id = 631176)
+##'    new_matched_names
+##'    }
+##' @export
+##' @rdname match_names
+inspect.match_names <- function(response, row_number, taxon_name, ott_id, ...) {
+
+    i <- check_args_match_names(response, row_number, taxon_name, ott_id)
+    j <- match_row_number(response, row_number, taxon_name, ott_id)
+
+    if (attr(response, "has_original_match")[j]) {
+        res <- attr(response, "original_response")
+        summary_match <- build_summary_match(res, res_id = i, match_id = NULL,
+                                             initial_creation = FALSE)
+    } else {
+        summary_match <- response[j, ]
+    }
+    summary_match
+}
+
+##' @export
+##' @rdname match_names
+inspect <- function(response, ...) UseMethod("inspect")
+
+##' @param object an object created by \code{\link{tnrs_match_names}}
+##' @param new_row_number the row number in the output of
+##'     \code{\link{inspect}} to replace the taxa specified by
+##'     \code{row_number}, \code{taxon_name}, or \code{ott_id}.
+##' @param new_ott_id the ott id of the taxon to replace the taxa
+##'     specified by \code{row_number}, \code{taxon_name}, or
+##'     \code{ott_id}.
+##' @export
+##' @rdname match_names
+##' @importFrom stats update
+update.match_names <- function(object, row_number, taxon_name, ott_id,
+                               new_row_number, new_ott_id, ...) {
+
+    response <- object
+    i <- check_args_match_names(response, row_number, taxon_name, ott_id)
+    j <- match_row_number(response, row_number, taxon_name, ott_id)
+
+    res <- attr(response, "original_response")
+
+    if (!attr(response, "has_original_match")[j]) {
+        warning("There is no match for this name, ",
+                 "so there is nothing to replace it with.")
+        return(response)
+    }
+
+    tmpRes <- res$results[[i]]
+
+    if (missing(row_number)) {
+        if (!missing(taxon_name)) {
+            rnb <- match(tolower(taxon_name), response$search_string)
+        } else if (!missing(ott_id)) {
+            rnb <- match(ott_id, response$ott_id)
+        }
+    } else {
+        rnb <- row_number
+    }
+
+    if (missing(new_row_number) && missing(new_ott_id)) {
+        stop("You must specify either ", sQuote("new_row_number"),
+             " or ", sQuote("new_ott_id"))
+    } else if (!missing(new_row_number) && missing(new_ott_id)) {
+        if (! new_row_number %in% seq_len(length(tmpRes$matches)))
+            stop(sQuote("new_row_number"), " is not a valid row number.")
+        j <- new_row_number
+    } else if (missing(new_row_number) && !missing(new_ott_id)) {
+        all_ott_id <- sapply(lapply(tmpRes[["matches"]],
+                                  function(x) x[["taxon"]]),
+                           function(x) .tax_ott_id(x))
+        j <- match(new_ott_id, all_ott_id)
+        if (any(is.na(j))) stop("Can't find ", new_ott_id)
+    } else {
+        stop("You must use only one of ", sQuote("new_row_number"),
+             " or ", sQuote("new_ott_id"))
+    }
+    if (length(j) > 1) stop("You must supply a single element for each argument")
+
+    summ_match <- summary_row_factory(res, res_id = i, match_id = j)
+
+    response[rnb, ] <- summ_match
+    attr(response, "match_id")[rnb] <- j
+    response
+}
+
+
+## Access the elements for a given match:
+## is_synonym, score, nomenclature_code, is_approximate_match, taxon
+get_list_element <- function(response, i, list_name) {
+    list_content <- lapply(response[["results"]][[i]][["matches"]],
+                           function(x) {
+        x[[list_name]]
+    })
+    list_content
+}
+
+match_names_method_factory <- function(list_name) {
+
+    function(tax, row_number, taxon_name, ott_id, ...) {
+
+        response <- tax
+        res <- attr(response, "original_response")
+
+        no_args <- all(c(missing(row_number), missing(taxon_name),
+                         missing(ott_id)))
+
+        if (no_args) {
+            res_i <- attr(response, "original_order")[attr(response, "has_original_match")]
+            ret <- lapply(res_i, function(i) {
+                get_list_element(res, i, list_name)
+            })
+            names(ret) <- sapply(res_i, function(i) {
+                get_list_element(res, i, "matched_name")[[1]]
+            })
+            ## ret is already in the correct order so we can use a sequence
+            ## to extract the correct element
+            ret <- mapply(function(x, i) {
+                ret[[x]][i]
+            }, seq_along(ret), attr(response, "match_id")[attr(response, "has_original_match")])
+            if (all(sapply(ret, length) == 1)) {
+                ret <- unlist(ret, use.names = TRUE)
+            }
+        } else {
+            i <- check_args_match_names(response, row_number, taxon_name, ott_id)
+            j <- match_row_number(response, row_number, taxon_name, ott_id)
+            if (attr(response, "has_original_match")[j]) {
+                ret <- get_list_element(res, i, list_name)[attr(response, "match_id")[j]]
+            } else {
+                ret <- list(ott_id = NA_character_,
+                            name = response[["search_string"]][j],
+                            unique_name = NA_character_,
+                            rank = NA_character_,
+                            tax_sources = NA_character_,
+                            flags = NA_character_,
+                            synonyms = NA_character_,
+                            is_suppressed = NA_character_)
+                ret <- list(ret)
+            }
+        }
+
+        ret
+    }
+
+}
+
+match_names_taxon_method_factory <- function(.f) {
+    function(tax, row_number, taxon_name, ott_id, ...) {
+        extract_tax_list <- match_names_method_factory("taxon")
+        tax_info <- extract_tax_list(tax, row_number = row_number,
+                                     taxon_name = taxon_name,
+                                     ott_id = ott_id)
+        res <- lapply(tax_info, function(x) .f(x))
+        names(res) <- vapply(tax_info, function(x) .tax_unique_name(x), character(1))
+        res <- add_otl_class(res, .f)
+        res
+    }
+}
+
+##' \code{rotl} provides a collection of functions that allows users
+##' to extract relevant information from an object generated by
+##' \code{\link{tnrs_match_names}} function.
+##'
+##' These methods optionally accept one of the arguments
+##' \code{row_number}, \code{taxon_name} or \code{ott_id} to retrieve
+##' the corresponding information for one of the matches in the object
+##' returned by the \code{\link{tnrs_match_names}} function.
+##'
+##' If these arguments are not provided, these methods can return
+##' information for the matches currently listed in the object
+##' returned by \code{\link{tnrs_match_names}}.
+##'
+##' @title \code{ott_id} and \code{flags} for taxonomic names matched
+##'     by \code{tnrs_match_names}
+##' @param tax an object returned by \code{\link{tnrs_match_names}}
+##' @param row_number the row number corresponding to the name for
+##'     which to list the synonyms
+##' @param taxon_name the taxon name corresponding to the name for
+##'     which to list the synonyms
+##' @param ott_id the ott id corresponding to the name for which to
+##'     list the synonyms
+##' @param ... currently ignored
+##' @return A list of the ott ids or flags for the taxonomic names
+##'     matched with \code{\link{tnrs_match_names}}, for either one or
+##'     all the names.
+##' @examples
+##' \dontrun{
+##'   rsp <- tnrs_match_names(c("Diadema", "Tyrannosaurus"))
+##'   rsp$ott_id    # ott id for match currently in use
+##'   ott_id(rsp)   # similar as above but elements are named
+##'
+##'   ## flags() is useful for instance to determine if a taxon is extinct
+##'   flags(rsp, taxon_name="Tyrannosaurus")
+##' }
+##' @export
+##' @rdname match_names-methods
+ott_id.match_names <- match_names_taxon_method_factory(.tax_ott_id)
+
+
+##' @export
+##' @rdname match_names-methods
+flags.match_names <- match_names_taxon_method_factory(.tax_flags)
+
+##' When querying the Taxonomic Name Resolution Services for a
+##' particular taxonomic name, the API returns as possible matches all
+##' names that include the queried name as a possible synonym. This
+##' function allows you to explore other synonyms for an accepted
+##' name, and allows you to determine why the name you queried is
+##' returning an accepted synonym.
+##'
+##' To list synonyms for a given taxonomic name, you need to provide
+##' the object resulting from a call to the
+##' \code{\link{tnrs_match_names}} function, as well as one of either
+##' the row number corresponding to the name in this object, the name
+##' itself (as used in the original query), or the ott_id listed for
+##' this name. Otherwise, the synonyms for all the currently matched
+##' names are returned.
+##'
+##' @title List the synonyms for a given name
+##' @param tax a data frame generated by the
+##'     \code{\link{tnrs_match_names}} function
+##' @param row_number the row number corresponding to the name for
+##'     which to list the synonyms
+##' @param taxon_name the taxon name corresponding to the name for
+##'     which to list the synonyms
+##' @param ott_id the ott id corresponding to the name for which to
+##'     list the synonyms
+##' @param ... currently ignored
+##' @return a list whose elements are all synomym names (as vectors of
+##'     character) for the taxonomic names that match the query (the
+##'     names of the elements of the list).
+##' @examples
+##' \dontrun{
+##'    echino <- tnrs_match_names(c("Diadema", "Acanthaster", "Fromia"))
+##'    ## These 3 calls are identical
+##'    synonyms(echino, taxon_name="Acanthaster")
+##'    synonyms(echino, row_number=2)
+##'    synonyms(echino, ott_id=337928)
+##' }
+##' @export
+synonyms.match_names <- match_names_taxon_method_factory(.tax_synonyms)
+
+##' @export
+tax_sources.match_names <- match_names_taxon_method_factory(.tax_sources)
+
+##' @export
+tax_rank.match_names <- match_names_taxon_method_factory(.tax_rank)
+
+
+##' @export
+is_suppressed.match_names <- match_names_taxon_method_factory(.tax_is_suppressed)
+
+
+##' @export
+unique_name.match_names <- match_names_taxon_method_factory(.tax_unique_name)
+
+
+##' @export
+tax_name.match_names <- match_names_taxon_method_factory(.tax_name)
diff --git a/R/methods.R b/R/methods.R
new file mode 100644
index 0000000..52f4856
--- /dev/null
+++ b/R/methods.R
@@ -0,0 +1,93 @@
+############################################################################
+## methods                                                                ##
+############################################################################
+
+##' Methods for dealing with objects containing taxonomic information
+##' (Taxonomy, TNRS endpoints)
+##'
+##' This is the page for the generic methods. See the help pages for
+##' \code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, and
+##' \code{\link{tnrs_match_names}} for more information.
+##'
+##' @title Methods for Taxonomy
+##' @param tax an object returned by \code{\link{taxonomy_taxon_info}},
+##'     \code{\link{taxonomy_mrca}}, or \code{\link{tnrs_match_names}}
+##' @param ... additional arguments (see
+##'     \code{\link{tnrs_match_names}})
+##' @rdname taxonomy-methods
+##' @export
+
+tax_rank <- function(tax, ...) { UseMethod("tax_rank") }
+
+##' @export
+##' @rdname taxonomy-methods
+ott_id <- function(tax, ...) { UseMethod("ott_id") }
+
+##' @export
+##' @rdname taxonomy-methods
+synonyms <- function(tax, ...) { UseMethod("synonyms") }
+
+##' @export
+##' @rdname taxonomy-methods
+tax_sources <- function(tax, ...) UseMethod("tax_sources")
+
+##' @export
+##' @rdname taxonomy-methods
+is_suppressed <- function(tax, ...) UseMethod("is_suppressed")
+
+##' @export
+##' @rdname taxonomy-methods
+unique_name <- function(tax, ...) UseMethod("unique_name")
+
+##' @export
+##' @rdname taxonomy-methods
+tax_name <- function(tax, ...) UseMethod("tax_name")
+
+### flags -----------------------------------------------------------------------
+
+##' @export
+##' @rdname match_names-methods
+flags <- function(tax, ...) UseMethod("flags")
+
+###
+
+##' Retrieve the detailed information for the list of studies used in
+##' the Tree of Life.
+##'
+##' @title List of studies used in the Tree of Life
+##'
+##' @details This function takes the object resulting from
+##'     \code{tol_about(study_list = TRUE)}, \code{tol_mrca()},
+##'     \code{tol_node_info()}, and returns a data frame listing the
+##'     \code{tree_id}, \code{study_id} and \code{git_sha} for the
+##'     studies currently included in the Tree of Life.
+##'
+##' @param tax a list containing a \code{source_id_map} slot.
+##' @param ... additional arguments (currently unused)
+##'
+##' @return a data frame
+##' @export
+source_list <- function(tax, ...) UseMethod("source_list")
+
+
+##' Extract the lineage information (higher taxonomy) from an object
+##' returned by \code{\link{taxonomy_taxon_info}}.
+##'
+##' The object passed to this function must have been created using
+##' the argument \code{include_lineage=TRUE}.
+##'
+##' @title Lineage of a taxon
+##' @param tax an object created by \code{\link{taxonomy_taxon_info}}
+##'     using the argument \code{include_lineage=TRUE}.
+##' @param ... additional arguments (currently unused).
+##' @return A list with one slot per taxon that contains a data frame
+##'     with 3 columns: the taxonomy rank, the name, and unique name
+##'     for all taxa included in the lineage of the taxon up to the
+##'     root of the tree.
+##' @rdname tax_lineage
+##' @export
+tax_lineage <- function(tax, ...) UseMethod("tax_lineage")
+
+##' @export
+##' @rdname tol_node_info
+tol_lineage <- function(tax, ...) UseMethod("tol_lineage")
diff --git a/R/rotl-package.R b/R/rotl-package.R
new file mode 100644
index 0000000..11b569a
--- /dev/null
+++ b/R/rotl-package.R
@@ -0,0 +1,50 @@
+##' An Interface to the Open Tree of Life API
+##'
+##' The Open Tree of Life is an NSF funded project that is generating
+##' an online, comprehensive phylogenetic tree for 1.8 million
+##' species. \code{rotl} provides an interface that allows you to
+##' query and retrive the parts of the tree of life that is of
+##' interest to you.
+##'
+##' \code{rotl} provides function to most of the end points the API
+##' provides. The documentation of the API is available at:
+##' \url{https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs}
+##'
+##' @section Customizing API calls:
+##'
+##'     All functions that use API end points can take 2 arguments to
+##'     customize the API call and are passed as \code{...} arguments.
+##'
+##'     \itemize{
+##'
+##'     \item{ \code{otl_v} } { This argument controls which version
+##'     of the API your call is using. The default value for this
+##'     argument is a call to the non-exported function
+##'     \code{otl_version()} which returns the current version of the
+##'     Open Tree of Life APIs (v2).}
+##'
+##'     \item{ \code{dev_url} } { This argument controls whether to use
+##'     the development version of the API. By default, \code{dev_url}
+##'     is set to \code{FALSE}, using \code{dev_url = TRUE} in your
+##'     function calls will use the development version.}
+##'
+##'     }
+##'
+##'     For example, to use the development version of the API, you
+##'     could use: \code{tnrs_match_names("anas", dev_url=TRUE)}
+##'
+##'     Additional arguments can also be passed to the
+##'     \code{\link[httr]{GET}} and \code{\link[httr]{POST}} methods.
+##'
+##'
+##' @section Acknowledgments:
+##'
+##'     This package was started during the Open Tree of Life
+##'     \href{http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/}{Hackathon}
+##'     organized by OpenTree, the NESCent Hackathon Interoperability
+##'     Phylogenetic group, and Arbor.
+##'
+##' @name rotl
+##' @docType package
+##' @import ape
+NULL
diff --git a/R/studies-methods.R b/R/studies-methods.R
new file mode 100644
index 0000000..352909b
--- /dev/null
+++ b/R/studies-methods.R
@@ -0,0 +1,88 @@
+
+### list_trees -----------------------------------------------------------------
+
+##' List trees ids in objects returned by
+##' \code{\link{studies_find_studies}} and
+##' \code{\link{studies_find_trees}}.
+##'
+##' \code{list_trees} returns all trees associated with a particular
+##' study when used on an object returned by
+##' \code{\link{studies_find_studies}}, but only the trees that match
+##' the search criteria when used on objects returned by
+##' \code{\link{studies_find_trees}}.
+##'
+##' @param matched_studies an object created by
+##'     \code{studies_find_trees} or \code{studies_find_studies}.
+##' @param study_id a \code{study_id} listed in the object returned by
+##'     \code{studies_find_trees}
+##' @param ... Currently unused
+##' @return \code{list_trees} returns a list of the tree_ids for each
+##'     study that match the requested criteria. If a \code{study_id}
+##'     is provided, then only the trees for this study are returned
+##'     as a vector.
+##' @seealso \code{\link{studies_find_studies}} and
+##'     \code{\link{studies_find_trees}}. The help for these functions
+##'     have examples demonstrating the use of \code{list_trees}.
+##' @export
+list_trees <- function(matched_studies, ...) UseMethod("list_trees")
+
+##' @rdname list_trees
+##' @export
+list_trees.matched_studies <- function(matched_studies, study_id, ...) {
+  res <- attr(matched_studies, "found_trees")
+  if (missing(study_id)) {
+    res
+  } else {
+    if (is.na(match(study_id, names(res))))
+      stop(sQuote(study_id), " isn't a valid id.")
+    else
+      res[[study_id]]
+  }
+}
+
+
+
+
+##' @export
+##' @rdname get_study_meta
+get_tree_ids <- function(sm) UseMethod("get_tree_ids")
+
+##' @export
+##' @rdname get_study_meta
+get_publication <- function(sm) UseMethod("get_publication")
+
+##' @export
+##' @rdname get_study_meta
+candidate_for_synth <- function(sm) UseMethod("candidate_for_synth")
+
+##' @export
+##' @rdname get_study_meta
+get_study_year <- function(sm) UseMethod("get_study_year")
+
+##' @export
+##' @rdname get_study_meta
+get_tree_ids.study_meta <- function(sm) {
+    unlist(sm[["nexml"]][["treesById"]][[sm[["nexml"]][["^ot:treesElementOrder"]][[1]]]][["^ot:treeElementOrder"]])
+}
+
+##' @export
+##' @rdname get_study_meta
+get_publication.study_meta <- function(sm) {
+    pub <- sm[["nexml"]][["^ot:studyPublicationReference"]]
+    attr(pub, "DOI") <- sm[["nexml"]][["^ot:studyPublication"]][["@href"]]
+    pub
+}
+
+##' @export
+##' @rdname get_study_meta
+candidate_for_synth.study_meta <- function(sm) {
+    unlist(sm[["nexml"]][["^ot:candidateTreeForSynthesis"]])
+}
+
+
+
+##' @export
+##' @rdname get_study_meta
+get_study_year.study_meta <- function(sm) {
+    sm[["nexml"]][["^ot:studyYear"]]
+}
diff --git a/R/studies-utils.R b/R/studies-utils.R
new file mode 100644
index 0000000..5bbf779
--- /dev/null
+++ b/R/studies-utils.R
@@ -0,0 +1,77 @@
+## Unexported function that generates a data frame summarizing the metadata.
+## This function is used by both studies_find_studies and studies_find_trees,
+## to generate the output when using the argument detailed=TRUE
+##' @importFrom stats setNames
+summarize_meta <- function(study_ids) {
+    fill <- function(x) {
+        if (length(unlist(x))) {
+            x
+        } else {
+            ""
+        }
+    }
+    meta_raw <- lapply(study_ids, function(x) get_study_meta(x))
+    ## Extract the metadata
+    meta <- lapply(meta_raw, function(m) {
+      c(tree_ids =  fill(list(get_tree_ids(m))),
+        study_year = fill(get_study_year(m)),
+        publication = fill(get_publication(m)),
+        doi = fill(attr(get_publication(m), "DOI")),
+        candidate = fill(list(candidate_for_synth(m)))
+        )
+    })
+    ## Convert into a data frame
+    dat <- lapply(meta, function(m) {
+        c(n_trees = length(m[["tree_ids"]]),
+          tree_ids = limit_trees(m[["tree_ids"]]),
+          candidate = paste(m[["candidate"]], collapse = ", "),
+          study_year = m[["study_year"]],
+          title =  fill(extract_title(m[["publication"]])),
+          study_doi = m[["doi"]])
+    })
+    dat <- do.call("rbind", dat)
+    dat <- cbind(study_ids = study_ids, dat)
+    rownames(dat) <- NULL
+    dat <- data.frame(dat, stringsAsFactors = FALSE)
+
+    ## Add list of found trees as attributes
+    found_trees <- lapply(meta, function(m) {
+      m[["tree_ids"]]
+    })
+    found_trees <- stats::setNames(found_trees, study_ids)
+    attr(dat, "found_trees") <- found_trees
+    attr(dat, "metadata") <- meta_raw
+
+    dat
+}
+
+
+
+## Unexported function that attempts to extract title from the
+## citation information associated with the study information. The
+## function gets the element that follows what looks like a year in
+## the string.
+## pub_orig: the publication string extracted from the study metadata
+## split_char: the character on which the bibliographic elements are
+## separated with. (currently only deals with . and ,)
+extract_title <- function(pub_orig, split_char = "\\.") {
+    pub <- unlist(strsplit(pub_orig, split = split_char))
+    pub <- gsub("^\\s|\\s$", "",  pub)
+    which_year <- grep("^\\d{4}[a-z]?$", pub)
+    res <- pub[which_year + 1]
+    if (length(res) > 0)
+        return(res)
+    else if (split_char == ",") {
+        return(character(0))
+    } else {
+        extract_title(pub_orig, ",")
+    }
+}
+
+## Unexported function that limit the display of tree_ids to the first
+## 5 values.
+limit_trees <- function(x) {
+    if (length(x) > 5)
+        x <- c(x[1:5], "...")
+    paste(x, collapse = ", ")
+}
diff --git a/R/studies.R b/R/studies.R
new file mode 100644
index 0000000..56c2f71
--- /dev/null
+++ b/R/studies.R
@@ -0,0 +1,461 @@
+##' Return the list of study properties that can be used to search
+##' studies and trees used in the synthetic tree.
+##'
+##' The list returned has 2 elements \code{tree_properties} and
+##' \code{studies_properties}. Each of these elements lists additional
+##' arguments to customize the API request properties that can be used
+##' to search for trees and studies that are contributing to the
+##' synthetic tree. The definitions of these properties are available
+##' from
+##' \url{https://github.com/OpenTreeOfLife/phylesystem-api/wiki/NexSON}
+##'
+##' @title Properties of the Studies
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return A list of the study properties that can be used to find
+##'     studies and trees that are contributing to the synthetic tree.
+##' @seealso \code{\link{studies_find_trees}}
+##' @export
+##' @examples
+##' \dontrun{
+##'  all_the_properties <- studies_properties()
+##'  unlist(all_the_properties$tree_properties)
+##' }
+
+studies_properties <- function(...) {
+    res <- .studies_properties(...)
+    lapply(res, unlist)
+}
+
+
+##' Return the identifiers of studies that match given properties
+##'
+##' @title Find a Study
+##' @param exact Should exact matching be used? (logical, default
+##'     \code{FALSE})
+##' @param property The property to be searched on (character)
+##' @param value The property value to be searched on (character)
+##' @param detailed If \code{TRUE} (default), the function will return
+##'     a data frame that summarizes information about the study (see
+##'     \sQuote{Value}). Otherwise, it only returns the study
+##'     identifiers.
+##' @param verbose Should the output include all metadata (logical
+##'     default \code{FALSE})
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return If \code{detailed=TRUE}, the function returns a data frame
+##'     listing the study id (\code{study_ids}), the number of trees
+##'     associated with this study (\code{n_trees}), the tree ids (at
+##'     most 5) associated with the studies (\code{tree_ids}), the
+##'     tree id that is a candidate for the synthetic tree if any
+##'     (\code{candidate}), the year of publication of the study
+##'     (\code{study_year}), the title of the publication for the
+##'     study (\code{title}), and the DOI (Digital Object Identifier)
+##'     for the study (\code{study_doi}).
+##'
+##'     If \code{detailed=FALSE}, the function returns a data frame
+##'     with a single column containing the study identifiers.
+##' @seealso \code{\link{studies_properties}} which lists properties
+##'     against which the studies can be
+##'     searched. \code{\link{list_trees}} that returns a list for all
+##'     tree ids associated with a study.
+##' @export
+##' @examples
+##' \dontrun{
+##' ## To match a study for which the identifier is already known
+##' one_study <- studies_find_studies(property="ot:studyId", value="pg_719")
+##' list_trees(one_study)
+##'
+##' ## To find studies pertaining to Mammals
+##' mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
+##'                                 value="mammalia")
+##' ## To extract the tree identifiers for each of the studies
+##' list_trees(mammals)
+##' ## ... or for a given study
+##' list_trees(mammals, "ot_308")
+##'
+##' ## Just the identifiers without other information about the studies
+##' mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
+##'                                 value="mammalia", detailed=FALSE)
+##' }
+studies_find_studies <- function(property=NULL, value=NULL, verbose=FALSE,
+                                 exact=FALSE, detailed = TRUE, ...) {
+    .res <- .studies_find_studies(property = property, value = value,
+                                  verbose = verbose, exact = exact, ...)
+
+    res <- vapply(.res[["matched_studies"]],
+                  function(x) x[["ot:studyId"]],
+                  character(1))
+    if (detailed) {
+        dat <- summarize_meta(res)
+    } else {
+        meta_raw <- .res
+        dat <- data.frame(study_ids = res, stringsAsFactors = FALSE)
+        attr(dat, "found_trees") <- paste("If you want to get a list of the",
+                                          "trees associated with the studies,",
+                                          "use", sQuote("detailed = TRUE"))
+        class(dat) <- c("study_ids", class(dat))
+        attr(dat, "metadata") <- meta_raw
+    }
+    class(dat) <- c("matched_studies", class(dat))
+    dat
+}
+
+##' @export
+print.study_ids <- function(x, ...) {
+    print(format(x), ...)
+}
+
+##' Return a list of studies for which trees match a given set of
+##' properties
+##'
+##' The list of possible values to be used as values for the argument
+##' \code{property} can be found using the function
+##' \code{\link{studies_properties}}.
+##'
+##' @title Find Trees
+##' @param property The property to be searched on (character)
+##' @param value The property-value to be searched on (character)
+##' @param verbose Should the output include all metadata? (logical,
+##'     default \code{FALSE})
+##' @param exact Should exact matching be used for the value?
+##'     (logical, default \code{FALSE})
+##' @param detailed Should a detailed report be provided? If
+##'     \code{TRUE} (default), the output will include metadata about
+##'     the study that include trees matching the property. Otherwise,
+##'     only information about the trees will be provided.
+##' @param ... additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return A data frame that summarizes the trees found (and their
+##'     associated studies) for the requested criteria. If a study has
+##'     more than 5 trees, the \code{tree_ids} of the first ones will
+##'     be shown, followed by \code{...} to indicate that more are
+##'     present.
+##'
+##'     If \code{detailed=FALSE}, the data frame will include the
+##'     study ids of the study (\code{study_ids}), the number of trees
+##'     in this study that match the search criteria
+##'     (\code{n_matched_trees}), the tree ids that match the search
+##'     criteria (\code{match_tree_ids}).
+##'
+##'     If \code{detailed=TRUE}, in addition of the fields listed
+##'     above, the data frame will also contain the total number of
+##'     trees associated with the study (\code{n_trees}), all the tree
+##'     ids associated with the study (\code{tree_ids}), the tree id
+##'     that is a potential candidate for inclusion in the synthetic
+##'     tree (if any) (\code{candidate}), the year the study was
+##'     published (\code{study_year}), the title of the study
+##'     (\code{title}), the DOI for the study (\code{study_doi}).
+##'
+##' @seealso \code{\link{studies_properties}} which lists properties
+##'   the studies can be searched on. \code{\link{list_trees}} for
+##'   listing the trees that match the query.
+##' @export
+##' @importFrom stats setNames
+##' @examples
+##' \dontrun{
+##' res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
+##'                           detailed=FALSE)
+##' ## summary of the trees and associated studies that match this criterion
+##' res
+##' ## With metadata about the studies (default)
+##' res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
+##'                           detailed=TRUE)
+##' ## The list of trees for each study that match the search criteria
+##' list_trees(res)
+##' ## the trees for a given study
+##' list_trees(res, study_id = "pg_2769")
+##' }
+studies_find_trees <- function(property=NULL, value=NULL, verbose=FALSE,
+                               exact=FALSE, detailed = TRUE, ...) {
+    .res <- .studies_find_trees(property = property, value = value,
+                               verbose = verbose, exact = exact, ...)
+    study_ids <- vapply(.res[["matched_studies"]],
+                        function(x) x[["ot:studyId"]],
+                        character(1))
+    n_matched_trees <- vapply(.res[["matched_studies"]],
+                              function(x) length(x[["matched_trees"]]),
+                              numeric(1))
+    match_tree_ids <- lapply(.res[["matched_studies"]],
+                             function(x) {
+        sapply(x[["matched_trees"]],
+               function(y) y[["nexson_id"]])
+    })
+    # this one doesn't return all of the treeids. confusing, bc trees are what is wanted
+    #tree_str <- vapply(match_tree_ids, limit_trees, character(1))
+    tree_str <- sapply(match_tree_ids, function(x) paste(x, collapse = ", "))
+    res <- data.frame(study_ids, n_matched_trees, match_tree_ids = tree_str,
+                      stringsAsFactors = FALSE)
+    if (detailed) {
+        meta <- summarize_meta(study_ids)
+        # the next bit seems really slow (JWB)
+        res <- merge(meta, res)
+        attr(res, "metadata") <- attr(meta, "metadata")
+    } else {
+        attr(res, "metadata") <- .res
+    }
+    attr(res, "found_trees") <- stats::setNames(match_tree_ids, study_ids)
+    class(res) <- c("matched_studies", class(res))
+    res
+}
+
+
+
+##' Returns the trees associated with a given study
+##'
+##' If \code{file_format} is missing, the function returns an object
+##' of the class \code{phylo} from the \code{\link[ape]{ape}} package
+##' (default), or an object of the class \code{nexml} from the
+##' \code{RNeXML} package.
+##'
+##' Otherwise \code{file_format} can be either \code{newick},
+##' \code{nexus}, \code{nexml} or \code{json}, and the function will
+##' generate a file of the selected format. In this case, a file name
+##' needs to be provided using the argument \code{file}. If a file
+##' with the same name already exists, it will be silently
+##' overwritten.
+##'
+##' @title Get all the trees associated with a particular study
+##' @param study_id the study ID for the study of interest (character)
+##' @param object_format the class of the object the query should
+##'     return (either \code{phylo} or \code{nexml}). Ignored if
+##'     \code{file_format} is specified.
+##' @param file_format the format of the file to be generated
+##'     (\code{newick}, \code{nexus}, \code{nexml} or \code{json}).
+##' @param file the file name where the output of the function will be
+##'     saved.
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return if \code{file_format} is missing, an object of class
+##'     \code{phylo} or \code{nexml}, otherwise a logical indicating
+##'     whether the file was successfully created.
+##' @seealso \code{\link{get_study_meta}}
+##' @export
+##' @importFrom jsonlite toJSON
+##' @examples
+##' \dontrun{
+##' that_one_study <- get_study(study_id="pg_719", object_format="phylo")
+##' if (require(RNeXML)) { ## if RNeXML is installed get the object directly
+##'    nexml_study <- get_study(study_id="pg_719", object_format="nexml")
+##' } else { ## otherwise write it to a file
+##'    get_study(study_id="pg_719", file_format="nexml", file=tempfile(fileext=".nexml"))
+##' }
+##' }
+get_study <- function(study_id = NULL, object_format = c("phylo", "nexml"),
+                      file_format, file, ...) {
+    object_format <- match.arg(object_format)
+    if (!missing(file)) {
+        if (!missing(file_format)) {
+            file_format <- match.arg(file_format, c("newick", "nexus", "nexml", "json"))
+            res <- .get_study(study_id, format = file_format)
+            unlink(file)
+            if (identical(file_format, "json")) {
+                cat(jsonlite::toJSON(res), file=file)
+            } else {
+                cat(res, file=file)
+            }
+            return(invisible(file.exists(file)))
+        } else {
+            stop(sQuote("file_format"), " must be specified.")
+        }
+    } else if (identical(object_format, "phylo")) {
+        file_format <- "newick"
+        res <- .get_study(study_id = study_id, format=file_format, ...)
+        res <- phylo_from_otl(res)
+    } else if (identical(object_format, "nexml")) {
+        file_format <- "nexml"
+        res <- .get_study(study_id = study_id, format = file_format, ...)
+        res <- nexml_from_otl(res)
+    } else stop("Something is very wrong. Contact us.")
+    res
+}
+
+##' Returns a specific tree from within a study
+##'
+##' @title Study Tree
+##' @param study_id the identifier of a study (character)
+##' @param tree_id the identifier of a tree within the study
+##' @param object_format the class of the object to be returned
+##'     (default and currently only possible value \code{phylo} from
+##'     the \code{\link[ape]{ape}} package).
+##' @param tip_label the format of the tip
+##'     labels. \dQuote{\code{original_label}} (default) returns the
+##'     original labels as provided in the study,
+##'     \dQuote{\code{ott_id}} labels are replaced by their ott IDs,
+##'     \dQuote{\code{ott_taxon_name}} labels are replaced by their
+##'     Open Tree Taxonomy taxon name.
+##' @param file_format the format of the file to be generated
+##'     (\code{newick} default, \code{nexus}, or \code{json}).
+##' @param file the file name where the output of the function will be
+##'     saved.
+##' @param deduplicate logical (default \code{TRUE}). If the tree
+##' returned by the study contains duplicated taxon names, should they
+##' be made unique? It is normally illegal for NEXUS/Newick tree
+##' strings to contain duplicated tip names. This is a workaround to
+##' circumvent this requirement. If \code{TRUE}, duplicated tip labels
+##' will be appended \code{_1}, \code{_2}, etc.
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return if \code{file_format} is missing, an object of class
+##'     \code{phylo}, otherwise a logical indicating whether the file
+##'     was successfully created.
+##' @export
+##' @importFrom jsonlite toJSON
+##' @examples
+##' \dontrun{
+##'  tree <- get_study_tree(study_id="pg_1144", tree="tree2324")
+##'
+##'  ## comparison of the first few tip labels depending on the options used
+##'  head(get_study_tree(study_id="pg_1144", tree="tree2324", tip_label="original_label")$tip.label)
+##'  head(get_study_tree(study_id="pg_1144", tree="tree2324", tip_label="ott_id")$tip.label)
+##'  head(get_study_tree(study_id="pg_1144", tree="tree2324", tip_label="ott_taxon_name")$tip.label)
+##' }
+
+get_study_tree <- function(study_id = NULL, tree_id = NULL, object_format = c("phylo"),
+                           tip_label = c("original_label", "ott_id", "ott_taxon_name"),
+                           file_format, file, deduplicate = TRUE, ...) {
+
+    object_format <- match.arg(object_format)
+    tip_label <- match.arg(tip_label)
+    tip_label <- switch(tip_label,
+                        original_labels = "ot:originallabel",
+                        ott_id =  "ot:ottid",
+                        ott_taxon_name = "ot:otttaxonname")
+    if (!missing(file)) {
+        if (!missing(file_format)) {
+            file_format <- match.arg(file_format, c("newick", "nexus", "json"))
+            if (missing(file)) stop("You must specify a file to write your output")
+            res <- .get_study_tree(study_id = study_id, tree_id = tree_id,
+                                   format=file_format, tip_label = tip_label, ...)
+            unlink(file)
+            if (identical(file_format, "json")) {
+                cat(jsonlite::toJSON(res), file=file)
+            } else {
+                cat(res, file=file)
+            }
+            return(invisible(file.exists(file)))
+        } else {
+            stop(sQuote("file_format"), " must be specified.")
+        }
+    } else if (identical(object_format, "phylo")) {
+        file_format <- "newick"
+        res <- .get_study_tree(study_id = study_id, tree_id = tree_id,
+                               format=file_format, tip_label = tip_label, ...)
+        res <- phylo_from_otl(res, dedup = deduplicate)
+    } else stop("Something is very wrong. Contact us.")
+    res
+}
+
+##' Retrieve metadata about a study in the Open Tree of Life datastore.
+##'
+##' \code{get_study_meta} returns a long list of attributes for the
+##' studies that are contributing to the synthetic tree. To help with
+##' the extraction of relevant information from this list, several
+##' helper functions exists: \itemize{
+##'
+##'   \item {get_tree_ids} { The identifiers of the trees
+##'   associated with the study }
+##'
+##'   \item {get_publication} { The citation information of the
+##'   publication for the study. The DOI (or URL) for the study is
+##'   available as an attribute to the returned object (i.e.,
+##'   \code{attr(object, "DOI")} ) }.
+##'
+##'   \item {candidate_for_synth} { The identifier of the tree(s) from
+##'   the study used in the synthetic tree. This is a subset of the
+##'   result of \code{get_tree_ids}.
+##'
+##'   \item {get_study_year} { The year of publication of the study. }
+##'
+##'   }
+##' }
+##'
+##' @title Study Metadata
+##' @param study_id the study identifier (character)
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @param sm an object created by \code{get_study_meta}
+##' @return named-list containing the metadata associated with the
+##'     study requested
+##' @export
+##' @examples
+##' \dontrun{
+##' req <- get_study_meta("pg_719")
+##' get_tree_ids(req)
+##' candidate_for_synth(req)
+##' get_publication(req)
+##' get_study_year(req)
+##' }
+get_study_meta <- function(study_id, ...) {
+    res <- .get_study_meta(study_id = study_id, ...)
+    class(res) <- "study_meta"
+    attr(res, "study_id") <- study_id
+    res
+}
+
+##' @export
+print.study_meta <- function(x, ...) {
+    cat("Metadata for OToL study ", attr(x, "study_id"), ". Contents:\n", sep="")
+    cat(paste0("  $nexml$", names(x$nexml)), sep="\n")
+}
+
+##' Retrieve subtree from a specific tree in the Open Tree of Life data store
+##'
+##' @title Study Subtree
+##' @param study_id the study identifier (character)
+##' @param tree_id the tree identifier (character)
+##' @param object_format the class of the object returned by the
+##'     function (default, and currently only possibility \code{phylo}
+##'     from the \code{\link[ape]{ape}} package)
+##' @param file_format character, the file format to use to save the
+##'     results of the query (possible values, \sQuote{newick},
+##'     \sQuote{nexus}, \sQuote{json}).
+##' @param file character, the path and file name where the output
+##'     should be written.
+##' @param subtree_id, either a node id that specifies a subtree or
+##'     \dQuote{ingroup} which returns the ingroup for this subtree.
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @export
+##' @importFrom jsonlite toJSON
+##' @examples
+##' \dontrun{
+##' small_tr <- get_study_subtree(study_id="pg_1144", tree="tree2324", subtree_id="node552052")
+##' ingroup  <- get_study_subtree(study_id="pg_1144", tree="tree2324", subtree_id="ingroup")
+##' nexus_file <- tempfile(fileext=".nex")
+##' get_study_subtree(study_id="pg_1144", tree="tree2324", subtree_id="ingroup", file=nexus_file,
+##'                   file_format="nexus")
+##' }
+get_study_subtree <- function(study_id, tree_id, subtree_id, object_format=c("phylo"),
+                              file_format, file, ...) {
+    object_format <- match.arg(object_format)
+    if (!missing(file)) {
+        if (!missing(file_format)) {
+            if (missing(file)) stop("You must specify a file to write your output")
+            file_format <- match.arg(file_format, c("newick", "nexus", "json"))
+            res <- .get_study_subtree(study_id = study_id, tree_id = tree_id,
+                                      subtree_id = subtree_id, format=file_format, ...)
+            unlink(file)
+            if (identical(file_format, "json")) {
+                cat(jsonlite::toJSON(res), file=file)
+            } else {
+                cat(res, file=file)
+            }
+            return(invisible(file.exists(file)))
+        } else {
+            stop(sQuote("file_format"), " must be specified.")
+        }
+    } else if (identical(object_format, "phylo")) {
+        file_format <- "newick"
+        res <-  .get_study_subtree(study_id = study_id, tree_id = tree_id,
+                                   subtree_id = subtree_id, format=file_format, ...)
+        res <- phylo_from_otl(res)
+        ## NeXML should be possible for both object_format and file_format but it seems there
+        ## is something wrong with the server at this time (FM - 2015-06-07)
+        ## } else if (identical(object_format, "nexml")) {
+        ##    file_format <- "nexml"
+        ##    res <- .get_study_subtree(study_id, tree_id, subtree_id, format=file_format)
+        ##    res <- nexml_from_otl(res)
+    } else stop("Something is very wrong. Contact us.")
+    res
+}
diff --git a/R/tax_utils.R b/R/tax_utils.R
new file mode 100644
index 0000000..4bdeff9
--- /dev/null
+++ b/R/tax_utils.R
@@ -0,0 +1,73 @@
+## all extended-taxon-descriptors have:
+## - ott_id
+## - name
+## - rank
+## - unique_name
+## - tax_sources
+## and they may have
+## - flags
+## - synonyms
+## - is_suppressed
+
+## builds the functions to access the content of the taxon descriptors.
+## slot: the name of the list element we need to access
+## flatten: if the list element is a list, make it a vector
+## optional: is the slot found in all taxon descriptors or only in some
+tax_access_factory <- function(slot, flatten, optional) {
+    function(tax) {
+        if ((!exists(slot, tax))) {
+            if (optional) {
+                warning("This object doesn't have ", sQuote(slot), call. = FALSE)
+                return(NULL)
+            } else {
+                stop("Invalid taxon object", call. = FALSE)
+            }
+        } else {
+            if (flatten) {
+                unlist(tax[[slot]])
+            } else {
+                tax[[slot]]
+            }
+        }
+    }
+}
+
+.tax_ott_id <- tax_access_factory("ott_id", flatten = FALSE, optional = FALSE)
+
+.tax_name <- tax_access_factory("name", flatten = FALSE, optional = FALSE)
+
+.tax_rank <- tax_access_factory("rank", flatten = FALSE, optional = FALSE)
+
+.tax_sources <- tax_access_factory("tax_sources", flatten = TRUE,
+                                  optional = FALSE)
+
+.tax_unique_name <- tax_access_factory("unique_name", flatten = FALSE,
+                                      optional = FALSE)
+
+
+## optional
+.tax_flags <- tax_access_factory("flags", flatten = TRUE, optional = TRUE)
+
+.tax_is_suppressed <- tax_access_factory("is_suppressed", flatten = FALSE,
+                                        optional = TRUE)
+
+.tax_synonyms <- tax_access_factory("synonyms", flatten = TRUE, optional = TRUE)
+
+## Does the slot element represent a taxon?
+is_taxon <- function(slot) {
+    if (all(c("ott_id", "name", "rank", "tax_sources",
+              "unique_name") %in% names(slot))) {
+        TRUE
+    } else {
+        FALSE
+    }
+}
+
+### adds a class to the objects returned by the methods
+add_otl_class <- function(res, .f) {
+    ## we need a prefix to avoid class name conflict
+    ## apparently the class "name" already exists
+    class(res) <- c(paste0("otl_", as.list(environment(.f))[["slot"]]),
+                    class(res))
+    res
+}
diff --git a/R/taxonomy.R b/R/taxonomy.R
new file mode 100644
index 0000000..ee01892
--- /dev/null
+++ b/R/taxonomy.R
@@ -0,0 +1,325 @@
+##' Summary information about the Open Tree Taxaonomy (OTT)
+##'
+##' Return metadata and information about the taxonomy
+##' itself. Currently, the available metadata is fairly sparse, but
+##' includes (at least) the version, and the location from which the
+##' complete taxonomy source files can be downloaded.
+##'
+##' @title Information about the Open Tree Taxonomy
+##' @param ... additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return A list with the following properties:
+##' \itemize{
+##'
+##'     \item {weburl} {String. The release page for this version
+##'     of the taxonomy.}
+##'
+##'     \item {author} {String. The author string.}
+##'
+##'     \item {name} {String. The name of the taxonomy.}
+##'
+##'     \item {source} {String. The full identifying information for
+##'     this version of the taxonomy.}
+##'
+##'     \item {version} {String. The version number of the taxonomy.}
+##' }
+##' @examples
+##' \dontrun{
+##' taxonomy_about()
+##' }
+##' @export
+taxonomy_about <- function (...) {
+    res <- .taxonomy_about(...)
+    return(res)
+}
+
+
+##' Information about taxa.
+##'
+##' Given a vector of ott ids, \code{taxonomy_taxon_info} returns
+##' information about the specified taxa.
+##'
+##' The functions \code{tax_rank}, \code{tax_name}, and
+##' \code{synonyms} can extract this information from an object
+##' created by the \code{taxonomy_taxon_info()}.
+##'
+##' @title Taxon information
+##' @param ott_ids the ott ids of the taxon of interest (numeric or
+##'     character containing only numbers)
+##' @param include_children whether to include information about all
+##'     the children of this taxon. Default \code{FALSE}.
+##' @param include_lineage whether to include information about all
+##'     the higher level taxa that include the \code{ott_ids}.
+##'     Default \code{FALSE}.
+##' @param include_terminal_descendants whether to include the list of
+##'     terminal \code{ott_ids} contained in the \code{ott_ids}
+##'     provided.
+##' @param ... additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @param tax an object generated by the \code{taxonomy_taxon_info}
+##'     function
+##' @return \code{taxonomy_taxon_info} returns a list detailing
+##'     information about the taxa. \code{tax_rank} and
+##'     \code{tax_name} return a vector. \code{synonyms} returns a
+##'     list whose elements are the synonyms for each of the
+##'     \code{ott_id} requested.
+##'
+##' @seealso \code{\link{tnrs_match_names}} to obtain \code{ott_id}
+##'     from a taxonomic name.
+##' @examples
+##' \dontrun{
+##' req <- taxonomy_taxon_info(ott_id=515698)
+##' tax_rank(req)
+##' tax_name(req)
+##' synonyms(req)
+##' }
+##' @export
+taxonomy_taxon_info <- function (ott_ids, include_children = FALSE,
+                                 include_lineage = FALSE,
+                                 include_terminal_descendants = FALSE, ...) {
+    res <- lapply(ott_ids, function(x) {
+        .taxonomy_taxon_info(
+            ott_id = x,
+            include_children = include_children,
+            include_lineage = include_lineage,
+            include_terminal_descendants = include_terminal_descendants,
+            ...
+        )
+    })
+    names(res) <- ott_ids
+    class(res) <- "taxon_info"
+    return(res)
+}
+
+
+##' Given an ott id, return the inclusive taxonomic subtree descended
+##' from the specified taxon.
+##'
+##' If the output of this function is exported to a file, the only
+##' possible value for the \code{output_format} argument is
+##' \dQuote{\code{newick}}. If the file provided already exists, it
+##' will be silently overwritten.
+##'
+##' @title Taxonomy subtree
+##' @param ott_id The ott id of the taxon of interest.
+##' @param output_format the format of the object to be returned. See
+##'     the \sQuote{Return} section.
+##' @param label_format Character. Defines the label type; one of
+##'     \dQuote{\code{name}}, \dQuote{\code{id}}, or
+##'      \dQuote{\code{name_and_id}} (the default).
+##' @param ... additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @param file the file name where to save the output of the
+##'     function. Ignored unless \code{output_format} is set to
+##'     \dQuote{\code{phylo}}.
+##' @return If the \code{file} argument is missing: \itemize{
+##'
+##'     \item{\dQuote{\code{taxa}}} { a list of the taxa names
+##'     (species) in slot \code{tip_label}, and higher-level taxanomy
+##'     (e.g., families, genera) in slot \code{edge_label}, descending
+##'     from the taxa corresponding to the \code{ott_id} provided. }
+##'
+##'     \item{\dQuote{\code{newick}}} { a character vector containing
+##'     the newick formatted string corresponding to the taxonomic
+##'     subtree for the \code{ott_id} provided. }
+##'
+##'     \item{\dQuote{\code{phylo}}} { an object of the class
+##'     \code{phylo} from the \code{\link[ape]{ape}} package. }
+##'
+##'     \item{\dQuote{\code{raw}}} { the direct output from the API,
+##'     i.e., a list with an element named \sQuote{newick} that
+##'     contains the subtree as a newick formatted string. }
+##'
+##'     }
+##'
+##'     If a \code{file} argument is provided (and
+##'     \code{output_format} is set to \dQuote{\code{phylo}}), a
+##'     logical indicating whether the file was successfully created.
+##'
+##' @examples
+##' \dontrun{
+##' req <- taxonomy_subtree(ott_id=515698)
+##' plot(taxonomy_subtree(ott_id=515698, output_format="phylo"))
+##' }
+##' @export
+taxonomy_subtree <- function (ott_id=NULL,
+                              output_format = c("taxa", "newick", "phylo", "raw"),
+                              label_format=NULL, file, ...) {
+    output_format <- match.arg(output_format)
+    res <- .taxonomy_subtree(ott_id = ott_id, label_format = label_format, ...)
+    if (!missing(file) && !identical(output_format, "newick"))
+        warning(sQuote("file"),
+                " argument is ignored, you can only write newick tree strings to a file.")
+    if (identical(output_format, "raw")) {
+        return(res)
+    } else if (identical(output_format, "newick")) {
+        res <- res$newick
+        if (!missing(file)) {
+            unlink(file)
+            cat(res, file = file)
+            invisible(return(file.exists(file)))
+        }
+    } else if (identical(output_format, "phylo")) {
+        res <- phylo_from_otl(res)
+    } else { ## in all other cases use tree_to_labels
+        res <- tree_to_labels(res)
+    }
+    return(res)
+}
+
+
+##' Taxonomic Least Inclusive Common Ancestor (MRCA)
+##'
+##' Given a set of OTT ids, get the taxon that is the most recent common
+##' ancestor (the MRCA) of all the identified taxa.
+##'
+##' @title Taxonomic MRCA
+##' @param ott_ids a vector of ott ids for the taxa whose MRCA is to
+##'     be found (numeric).
+##' @param tax an object generated by the \code{taxonomy_mrca}
+##'     function
+##' @param ... additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return \itemize{
+##'
+##'     \item{\code{taxonomy_mrca}} { returns a list about the
+##'     taxonomic information relating to the MRCA for the ott_ids
+##'     provided. }
+##'
+##'     \item{\code{tax_rank}} { returns a character vector of the
+##'     taxonomic rank for the MRCA. }
+##'
+##'     \item{\code{tax_name}} { returns a character vector the
+##'     Open Tree Taxonomy name for the MRCA. }
+##'
+##'     \item{\code{ott_id}} { returns a numeric vector of the ott id
+##'     for the MRCA. }
+##'
+##' }
+##' @examples
+##' \dontrun{
+##' req <- taxonomy_mrca(ott_ids=c(515698,590452,643717))
+##' tax_rank(req)
+##' tax_name(req)
+##' ott_id(req)
+##' }
+##' @export
+taxonomy_mrca <- function (ott_ids=NULL, ...) {
+    res <- .taxonomy_mrca(ott_ids = ott_ids, ...)
+    class(res) <- c("taxon_mrca", class(res))
+    return(res)
+}
+
+
+
+### methods for taxonomy_taxon_info ---------------------------------------------
+
+taxon_info_method_factory <- function(.f) {
+    function(tax, ...) {
+        res <- lapply(tax, .f)
+        names(res) <- vapply(tax, .tax_unique_name, character(1))
+        res <- add_otl_class(res, .f)
+        res
+    }
+}
+
+##' @export
+##' @rdname taxonomy_taxon_info
+tax_rank.taxon_info <- taxon_info_method_factory(.tax_rank)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+tax_name.taxon_info <- taxon_info_method_factory(.tax_name)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+unique_name.taxon_info <- taxon_info_method_factory(.tax_unique_name)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+synonyms.taxon_info <- taxon_info_method_factory(.tax_synonyms)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+ott_id.taxon_info <- taxon_info_method_factory(.tax_ott_id)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+tax_sources.taxon_info <- taxon_info_method_factory(.tax_sources)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+is_suppressed.taxon_info <- taxon_info_method_factory(.tax_is_suppressed)
+
+##' @export
+##' @rdname taxonomy_taxon_info
+flags.taxon_info <- taxon_info_method_factory(.tax_flags)
+
+
+### methods for taxonomy_mrca ---------------------------------------------------
+
+taxon_mrca_method_factory <- function(.f) {
+    function(tax, ...)  {
+        res <- list(.f(tax[["mrca"]]))
+        names(res) <- .tax_unique_name(tax[["mrca"]])
+        res <- add_otl_class(res, .f)
+        res
+    }
+}
+
+##' @export
+##' @rdname taxonomy_mrca
+tax_rank.taxon_mrca <- taxon_mrca_method_factory(.tax_rank)
+
+##' @export
+##' @rdname taxonomy_mrca
+tax_name.taxon_mrca <- taxon_mrca_method_factory(.tax_name)
+
+##' @export
+##' @rdname taxonomy_mrca
+ott_id.taxon_mrca <- taxon_mrca_method_factory(.tax_ott_id)
+
+##' @export
+##' @rdname taxonomy_mrca
+unique_name.taxon_mrca <- taxon_mrca_method_factory(.tax_unique_name)
+
+##' @export
+##' @rdname taxonomy_mrca
+tax_sources.taxon_mrca <- taxon_mrca_method_factory(.tax_sources)
+
+##' @export
+##' @rdname taxonomy_mrca
+flags.taxon_mrca <- taxon_mrca_method_factory(.tax_flags)
+
+##' @export
+##' @rdname taxonomy_mrca
+is_suppressed.taxon_mrca <- taxon_mrca_method_factory(.tax_is_suppressed)
+
+### method for extracting higher taxonomy from taxonomy_taxon_info calls  -------
+
+get_lineage <- function(tax) {
+    check_lineage(tax)
+    lg <- lapply(tax[["lineage"]], build_lineage)
+    lg <- do.call("rbind", lg)
+    as.data.frame(lg, stringsAsFactors = FALSE)
+}
+
+build_lineage <- function(x) {
+        c("rank" = .tax_rank(x),
+          "name" = .tax_name(x),
+          "unique_name" = .tax_unique_name(x),
+          "ott_id" = .tax_ott_id(x))
+}
+
+check_lineage <- function(tax) {
+    if (!exists("lineage", tax)) {
+        stop("The object needs to be created using ",
+             sQuote("include_lineage=TRUE"))
+    }
+}
+
+##' @export
+##' @rdname tax_lineage
+tax_lineage.taxon_info <- function(tax, ...) {
+    lapply(tax, get_lineage)
+}
diff --git a/R/tnrs.R b/R/tnrs.R
new file mode 100644
index 0000000..11ae453
--- /dev/null
+++ b/R/tnrs.R
@@ -0,0 +1,250 @@
+
+##' Match taxonomic names to the Open Tree Taxonomy.
+##'
+##' Accepts one or more taxonomic names and returns information about
+##' potential matches for these names to known taxa in the Open Tree
+##' Taxononmy.
+##'
+##' This service uses taxonomic contexts to disambiguate homonyms and
+##' misspelled names; a context may be specified using the
+##' \code{context_name} argument. If no context is specified, then the
+##' context will be inferred (i.e., the shallowest taxonomic context
+##' that contains all unambiguous names in the input). Taxonomic
+##' contexts are uncontested higher taxa that have been selected to
+##' allow limits to be applied to the scope of TNRS searches
+##' (e.g. 'match names only within flowering plants'). Once a context
+##' has been identified (either user-specified or inferred), all taxon
+##' name matches will performed only against taxa within that
+##' context. For a list of available taxonomic contexts, see
+##' \code{\link{tnrs_contexts}}.
+##'
+##' A name is considered unambiguous if it is not a synonym and has
+##' only one exact match to any taxon name in the entire taxonomy.
+##'
+##' Several functions listed in the \sQuote{See also} section can be
+##' used to inspect and manipulate the object generated by this
+##' function.
+##'
+##'
+##' @title Match names to the Open Tree Taxonomy
+##' @param names taxon names to be queried. Currently limited to
+##'     10,000 names for exact matches and 2,500 names for approximate
+##'     matches (character vector)
+##' @param context_name name of the taxonomic context to be searched
+##'     (length-one character vector). Must match (case sensitive) one
+##'     of the values returned by \code{\link{tnrs_contexts}}.
+##' @param do_approximate_matching A logical indicating whether or not
+##'     to perform approximate string (a.k.a. \dQuote{fuzzy})
+##'     matching. Using \code{FALSE} will greatly improve
+##'     speed. Default, however, is \code{TRUE}.
+##' @param ids A vector of ids to use for identifying names. These
+##'     will be assigned to each name in the names array. If ids is
+##'     provided, then ids and names must be identical in length.
+##' @param include_suppressed Ordinarily, some quasi-taxa, such as
+##'     incertae sedis buckets and other non-OTUs, are suppressed from
+##'     TNRS results. If this parameter is true, these quasi-taxa are
+##'     allowed as possible TNRS results.
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return A data frame summarizing the results of the query. The
+##'     original query output is appended as an attribute to the
+##'     returned object (and can be obtained using \code{attr(object,
+##'     "original_response")}).
+##' @seealso \code{\link{inspect.match_names}},
+##'     \code{\link{update.match_names}},
+##'     \code{\link{synonyms.match_names}}.
+##' @examples \dontrun{
+##'  deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha",
+##'                                             "chordata", "hemichordata"))
+##' }
+##' @importFrom stats setNames
+##' @export
+tnrs_match_names <- function(names = NULL, context_name = NULL,
+                             do_approximate_matching = TRUE, ids = NULL,
+                             include_suppressed = FALSE, ...) {
+
+    if (!is.null(context_name) &&
+        !context_name %in% unlist(tnrs_contexts(...))) {
+        stop("The ", sQuote("context_name"),
+             " is not valid. Check possible values using tnrs_contexts()")
+    }
+
+    res <- .tnrs_match_names(names = names, context_name = context_name,
+                             do_approximate_matching = do_approximate_matching,
+                             ids = ids, include_suppressed = include_suppressed,
+                             ...)
+
+    check_tnrs(res)
+    summary_match <- build_summary_match(res,
+                                         res_id = seq_along(res[["results"]]),
+                                         match_id = 1, initial_creation = TRUE)
+
+    summary_match$search_string <- gsub("\\\\", "", summary_match$search_string)
+    summary_match <- summary_match[match(tolower(names),
+                                         summary_match$search_string), ]
+
+    summary_match[["approximate_match"]] <-
+        convert_to_logical(summary_match[["approximate_match"]])
+    summary_match[["is_synonym"]] <-
+        convert_to_logical(summary_match[["is_synonym"]])
+    summary_match[["flags"]] <- convert_to_logical(summary_match[["flags"]])
+
+    attr(summary_match, "original_order") <- as.numeric(rownames(summary_match))
+    rownames(summary_match) <- NULL
+    attr(summary_match, "original_response") <- res
+    attr(summary_match, "match_id") <- rep(1, nrow(summary_match))
+    attr(summary_match, "has_original_match") <-
+        !is.na(summary_match[["number_matches"]])
+    class(summary_match) <- c("match_names", "data.frame")
+    summary_match
+}
+
+##' @importFrom stats na.omit
+convert_to_logical <- function(x) {
+    if (all(stats::na.omit(x) %in% c("TRUE", "FALSE"))) {
+        x <- as.logical(x)
+    } else {
+        x
+    }
+}
+
+check_tnrs <- function(req) {
+    if (length(req$results) < 1) {
+        stop("No matches for any of the provided taxa")
+    }
+    if (length(req[["unmatched_names"]]) > 0) {
+        warning(paste(req$unmatched_names, collapse=", "), " are not matched")
+    }
+}
+
+
+tnrs_columns <- list(
+    "search_string" = function(x) x[["search_string"]],
+    "unique_name" = function(x) .tax_unique_name(x[["taxon"]]),
+    "approximate_match" = function(x) x[["is_approximate_match"]],
+    "ott_id" = function(x) .tax_ott_id(x[["taxon"]]),
+    "is_synonym" = function(x) x[["is_synonym"]],
+    "flags" = function(x) paste(.tax_flags(x[["taxon"]]), collapse = ", ")
+)
+
+summary_row_factory <- function(res, res_id, match_id, columns = tnrs_columns) {
+    res_address <- res[["results"]][[res_id]][["matches"]][[match_id]]
+    ret <- sapply(columns, function(f) f(res_address))
+    n_match <- length(res[["results"]][[res_id]][["matches"]])
+    c(ret, number_matches = n_match)
+}
+
+build_summary_match <- function(res, res_id, match_id = NULL, initial_creation) {
+
+    build_empty_row <- function(x) {
+        no_match_row <- stats::setNames(
+            rep(NA, length(tnrs_columns) + 1),
+            c(names(tnrs_columns), "number_matches"))
+        no_match_row[1] <- x
+        no_match_row
+    }
+
+    if (length(res_id) > 1 &&
+       (!is.null(match_id) && length(match_id) > 1)) {
+        stop("Something is wrong. Please contact us.")
+    }
+
+    build_summary_row <- function(rid) {
+        if (is.null(match_id)) {
+            match_id <- seq_len(length(res[["results"]][[rid]][["matches"]]))
+        }
+        res <- lapply(match_id, function(mid) {
+            summary_row_factory(res, rid, mid)
+        })
+        if (identical(length(match_id), 1L)) {
+            unlist(res)
+        } else res
+    }
+
+    summary_row <- lapply(res_id, build_summary_row)
+
+    if (identical(length(res_id), 1L)) {
+        summary_row <- unlist(summary_row, recursive = FALSE)
+    }
+
+    ## Needed if only 1 row returned
+    if (!inherits(summary_row, "list")) {
+        summary_row <- list(summary_row)
+    }
+
+    ## Add potential unmatched names
+    if (initial_creation && length(res[["unmatched_names"]])) {
+        no_match <- lapply(res[["unmatched_names"]], build_empty_row)
+        summary_row <- c(summary_row, no_match)
+    }
+
+    summary_match <- do.call("rbind", summary_row)
+    summary_match <- data.frame(summary_match, stringsAsFactors=FALSE)
+    names(summary_match) <- c(names(tnrs_columns), "number_matches")
+    summary_match
+}
+
+##' This function returns a list of pre-defined taxonomic contexts
+##' (i.e. clades) which can be used to limit the scope of tnrs
+##' queries.
+##'
+##' Taxonomic contexts are available to limit the scope of TNRS
+##' searches. These contexts correspond to uncontested higher taxa
+##' such as 'Animals' or 'Land plants'. This service returns a list
+##' containing all available taxonomic context names, which may be
+##' used as input (via the \code{context_name} argument in other
+##' functions) to limit the search scope of other services including
+##' \code{\link{tnrs_match_names}}.
+##' @title TNRS contexts
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return Returns invisibly a list for each major clades (e.g.,
+##'     animals, microbes, plants, fungi, life) whose elements
+##'     contains the possible contexts.
+##' @export
+
+tnrs_contexts <- function(...) {
+    res <- .tnrs_contexts(...)
+    class(res) <- "tnrs_contexts"
+    res
+}
+
+##' @export
+print.tnrs_contexts <- function(x, ...) {
+    cat("Possible contexts:\n")
+    lapply(x, function(t) {
+        res <- unlist(t)
+        cat("  ", res[1], "\n")
+        if (length(res) > 1) {
+            lapply(seq(2, length(res), by = 5), function(l) {
+                m <- ifelse(l + 5 <= length(res), l+4, length(res))
+                cat("     ", paste(res[l:m], collapse = ", "), "\n")
+            })
+        }
+    })
+}
+
+##' Return a taxonomic context given a list of taxonomic names
+##'
+##' Find the least inclusive taxonomic context that includes all the
+##' unambiguous names in the input set. Unambiguous names are names
+##' with exact matches to non-homonym taxa. Ambiguous names (those
+##' without exact matches to non-homonym taxa) are indicated in
+##' results.
+##'
+##' @title Infer the taxonomic context from a list of names
+##' @param names Vector of taxon names.
+##' @param ...  additional arguments to customize the API request (see
+##'     \code{\link{rotl}} package documentation).
+##' @return A list including the context name, the context ott id and
+##'     possibly the names in the query that have an ambiguous
+##'     taxonomic meaning in the query.
+##' @examples
+##' \dontrun{
+##' res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus"))
+##' }
+##' @export
+tnrs_infer_context <- function(names=NULL, ...) {
+    res <- .tnrs_infer_context(names = names, ...)
+    return(res)
+}
diff --git a/R/tol.R b/R/tol.R
new file mode 100644
index 0000000..e93b204
--- /dev/null
+++ b/R/tol.R
@@ -0,0 +1,668 @@
+
+.source_list <- function(tax, ...) {
+    if (! exists("source_id_map", tax)) {
+        ## it should only be missing with tol_about when using
+        ## include_source_list=FALSE
+        stop("Make sure that your object has been created using ",
+             sQuote("tol_about(include_source_list = TRUE)"))
+    }
+    tt <- lapply(tax[["source_id_map"]], function(x) {
+        c(x[["study_id"]], x[["tree_id"]], x[["git_sha"]])
+    })
+    tt <- do.call("rbind", tt)
+    setNames(as.data.frame(tt, stringsAsFactors=FALSE),
+             c("study_id", "tree_id", "git_sha"))
+}
+
+##' Basic information about the Open Tree of Life (the synthetic tree)
+##'
+##' @title Information about the Tree of Life
+##'
+##' @details Summary information about the current draft tree of life,
+##'     including information about the list of trees and the taxonomy
+##'     used to build it. The object returned by \code{tol_about} can
+##'     be passed to the taxonomy methods (\code{tax_name()},
+##'     \code{tax_rank()}, \code{tax_sources()}, \code{ott_id}), to
+##'     extract relevant taxonomic information for the root of the
+##'     synthetic tree.
+##'
+##' @param include_source_list Logical (default =
+##'     \code{FALSE}). Return an ordered list of source trees.
+##' @param tax an object created with a call to \code{tol_about}.
+##' @param ... additional arguments to customize the API call (see
+##'     \code{\link{rotl}} for more information).
+##'
+##' @return An invisible list of synthetic tree summary statistics:
+##'
+##' \itemize{
+##'
+##'     \item {date_created} {String. The creation date of the tree.}
+##'
+##'     \item {num_source_studies} {Integer. The number of studies
+##'     (publications)used as sources.}
+##'
+##'     \item {num_source_trees} {The number of trees used as sources
+##'     (may be >1 tree per study).}
+##'
+##'     \item {taxonomy_version} {The Open Tree Taxonomy version used
+##'     as a source.}
+##'
+##'     \item {filtered_flags} {List. Taxa with these taxonomy flags were
+##'     not used in construction of the tree.}
+##'
+##'     \item {root} {List. Describes the root node:}
+##'         \itemize{
+##'             \item {node_id} {String. The canonical identifier of the node.}
+##'
+##'             \item {num_tips} {Numeric. The number of descendent tips.}
+##'
+##'             \item {taxon} {A list of taxonomic properties:}
+##'             \itemize{
+##'                 \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ott_id).}
+##'
+##'                 \item {name} {String. The taxonomic name of the queried node.}
+##'
+##'                 \item {unique_name} {String. The string that uniquely
+##'                 identifies the taxon in OTT.}
+##'
+##'                 \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+##'
+##'                 \item {tax_sources} {List. A list of identifiers for taxonomic
+##'                 sources, such as other taxonomies, that define taxa judged
+##'                 equivalent to this taxon.}
+##'             }
+##'         }
+##'
+##'     \item {source_list} {List. Present only if
+##'     \code{include_source_list} is \code{TRUE}. The sourceid
+##'     ordering is the precedence order for synthesis, with
+##'     relationships from earlier trees in the list having priority
+##'     over those from later trees in the list. See
+##'     \code{source_id_map} below for study details.}
+##'
+##'     \item {source_id_map} {Named list of lists. Present only if
+##'     \code{include_source_list} is \code{TRUE}. Names correspond to
+##'     the \sQuote{sourceids} used in \code{source_list}
+##'     above. Source trees will have the following properties:}
+##'
+##'         \itemize{
+##'             \item {git_sha} {String. The git SHA identifying a particular source
+##'             version.}
+#'
+##'             \item {tree_id} {String. The tree id associated with the study id used.}
+##'
+##'             \item {study_id} {String. The study identifier. Will typically include
+##'             a prefix ("pg_" or "ot_").}
+##'         }
+##'
+##'     \item {synth_id} {The unique string for this version of the tree.}
+##' }
+##' @seealso \code{\link{source_list}} to explore the list of studies
+##'     used in the synthetic tree (see example).
+##'
+##' @examples
+##' \dontrun{
+##' res <- tol_about()
+##' tax_sources(res)
+##' ott_id(res)
+##' studies <- source_list(tol_about(include_source_list=TRUE))}
+##' @rdname tol_about
+##' @export
+tol_about <- function(include_source_list=FALSE, ...) {
+    res <- .tol_about(include_source_list=include_source_list, ...)
+    class(res) <- c("tol_summary", class(res))
+    res
+}
+
+
+##' @export
+print.tol_summary <- function(x, ...) {
+    cat("\nOpenTree Synthetic Tree of Life.\n\n")
+    cat("Tree version: ", x$synth_id, "\n", sep="")
+    cat("Taxonomy version: ", x$taxonomy, "\n", sep="")
+    cat("Constructed on: ", x$date_created, "\n", sep="")
+    cat("Number of terminal taxa: ", x$root$num_tips, "\n", sep="")
+    cat("Number of source trees: ", x$num_source_trees, "\n", sep="")
+    cat("Number of source studies: ", x$num_source_studies, "\n", sep = "")
+    cat("Source list present: ", ifelse(exists("source_list", x), "true", "false"), "\n", sep="")
+    cat("Root taxon: ", x$root$taxon$name, "\n", sep="")
+    cat("Root ott_id: ", x$root$taxon$ott_id, "\n", sep="")
+    cat("Root node_id: ", x$root$node_id, "\n", sep="")
+}
+
+tol_about_method_factory <- function(.f) {
+    function(tax, ...) {
+        res <- list(.f(tax[["root"]][["taxon"]]))
+        names(res) <- .tax_unique_name(tax[["root"]][["taxon"]])
+        res <- add_otl_class(res, .f)
+        res
+    }
+}
+
+##' @export
+##' @rdname tol_about
+tax_rank.tol_summary <- tol_about_method_factory(.tax_rank)
+
+##' @export
+##' @rdname tol_about
+tax_sources.tol_summary <- tol_about_method_factory(.tax_sources)
+
+##' @export
+##' @rdname tol_about
+unique_name.tol_summary <- tol_about_method_factory(.tax_unique_name)
+
+##' @export
+##' @rdname tol_about
+tax_name.tol_summary <- tol_about_method_factory(.tax_name)
+
+##' @export
+##' @rdname tol_about
+ott_id.tol_summary <- tol_about_method_factory(.tax_ott_id)
+
+##' @export
+##' @rdname source_list
+source_list.tol_summary <- .source_list
+
+
+
+##' Most Recent Common Ancestor for a set of nodes
+##'
+##' @title MRCA of taxa from the synthetic tree
+##'
+##' @details Get the MRCA of a set of nodes on the current synthetic
+##'     tree. Accepts any combination of node ids and ott ids as
+##'     input. Returns information about the most recent common
+##'     ancestor (MRCA) node as well as the most recent taxonomic
+##'     ancestor (MRTA) node (the closest taxonomic node to the MRCA
+##'     node in the synthetic tree; the MRCA and MRTA may be the same
+##'     node). If they are the same, the taxonomic information will be
+##'     in the \code{mrca} slot, otherwise they will be in the
+##'     \code{nearest_taxon} slot of the list. If any of the specified
+##'     nodes is not in the synthetic tree an error will be returned.
+##'
+##'     Taxonomic methods (\code{tax_sources()}, \code{ott_id()},
+##'     \code{unique_name()}, ...) are availble on the objects
+##'     returned by \code{tol_mrca()}. If the MRCA node is MRTA, the
+##'     name of the object returned by these methods will start with
+##'     \sQuote{ott}, otherwise it will start with \sQuote{mrca}.
+##'
+##' @param ott_ids Numeric vector. The ott ids for which the MRCA is desired.
+##' @param node_ids Character vector. The node ids for which the MRCA is desired.
+##' @param tax an object returned by \code{tol_mrca()}.
+##' @param ... additional arguments to customize the API call (see
+##'     \code{\link{rotl}} for more information).
+##'
+##' @return An invisible list of the MRCA node properties:
+##'
+##' \itemize{
+##'
+##'     \item {mrca} {List of node properties.}
+##'
+##'     \itemize{
+##'         \item {node_id} {String. The canonical identifier of the node.}
+##'
+##'         \item {num_tips} {Numeric. The number of descendent tips.}
+##'
+##'         \item {taxon} {A list of taxonomic properties. Only returned if
+##'         the queried node is a taxon. (If the node is not a taxon, a
+##'         \code{nearest_taxon} list is returned (see below)).}
+##'
+##'             \itemize{
+##'                 \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
+##'
+##'                 \item {name} {String. The taxonomic name of the queried node.}
+##'
+##'                 \item {unique_name} {String. The string that uniquely
+##'                 identifies the taxon in OTT.}
+##'
+##'                 \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+##'
+##'                \item {tax_sources} {List. A list of identifiers for taxonomic
+##'                 sources, such as other taxonomies, that define taxa judged
+##'                 equivalent to this taxon.}
+##'             }
+##'
+##'         The following properties list support/conflict for the node across
+##'         synthesis source trees. All properties involve sourceid keys and
+##'         nodeid values (see \code{source_id_map} below) Not all properties are
+##'         are present for every node.
+##'
+##'         \item {partial_path_of} {List. The edge below this synthetic tree node
+##'         is compatible with the edge below each of these input tree nodes (one
+##'         per tree). Each returned element is reported as sourceid:nodeid.}
+##'
+##'         \item {supported_by} {List. Input tree nodes (one per tree) that support
+##'         this synthetic tree node. Each returned element is reported as
+##'         sourceid:nodeid.}
+##'
+##'         \item {terminal} {List. Input tree nodes (one per tree) that are equivalent
+##'         to this synthetic tree node (via an exact mapping, or the input tree
+##'         terminal may be the only terminal descended from this synthetic tree node.
+##'         Each returned element is reported as sourceid:nodeid.}
+##'
+##'         \item {conflicts_with} {Named list of lists. Names correspond to
+##'         sourceid keys. Each list contains input tree node ids (one or more per tree)
+##'         that conflict with this synthetic node.}
+##'     }
+##'
+##'     \item {nearest_taxon} {A list of taxonomic properties of the nearest rootward
+##'     taxon node to the MRCA node. Only returned if the MRCA node is a not taxon
+##'     (otherwise the \code{taxon} list above is returned).}
+##'
+##'         \itemize{
+##'             \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
+##'
+##'             \item {name} {String. The taxonomic name of the queried node.}
+##'
+##'             \item {unique_name} {String. The string that uniquely
+##'             identifies the taxon in OTT.}
+##'
+##'             \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+##'
+##'            \item {tax_sources} {List. A list of identifiers for taxonomic
+##'             sources, such as other taxonomies, that define taxa judged
+##'             equivalent to this taxon.}
+##'         }
+##'
+##'     \item {source_id_map} {Named list of lists. Names correspond to the
+##'     sourceid keys used in the support/conflict properties of the \code{mrca}
+##'     list above. Source trees will have the following properties:}
+##'
+##'         \itemize{
+##'             \item {git_sha} {The git SHA identifying a particular source
+##'             version.}
+##'
+##'             \item {tree_id} {The tree id associated with the study id used.}
+##'
+##'             \item {study_id} {The study identifier. Will typically include
+##'             a prefix ("pg_" or "ot_").}
+##'         }
+##'     The only sourceid that does not correspond to a source tree is the taxonomy,
+##'     which will have the name "ott"+`taxonomy_version`, and the value is the
+##'     ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
+##'     appear in \code{supported_by}.
+##'
+##'    }
+##'
+##' @examples
+##' \dontrun{
+##' birds_mrca <- tol_mrca(ott_ids=c(412129, 536234))
+##' ott_id(birds_mrca)
+##' tax_sources(birds_mrca)}
+##' @rdname tol_mrca
+##' @export
+tol_mrca <- function(ott_ids=NULL, node_ids=NULL, ...) {
+    res <- .tol_mrca(ott_ids=ott_ids, node_ids=node_ids, ...)
+    class(res) <- c("tol_mrca", class(res))
+    return(res)
+}
+
+##' @export
+print.tol_mrca <- function(x, ...) {
+    cat("\nOpenTree MRCA node.\n\n")
+    cat("Node id: ", x$mrca$node_id, "\n", sep="")
+    cat("Number of terminal descendants: ", x$mrca$num_tips, "\n", sep="")
+    if (is_taxon(x[["mrca"]][["taxon"]])) {
+        cat("Is taxon: TRUE\n")
+        cat("Name: ", x$mrca$taxon$name, "\n", sep="")
+        cat("ott id: ", x$mrca$taxon$ott_id, "\n", sep="")
+    } else {
+        cat("Is taxon: FALSE\n")
+        cat("Nearest taxon:\n")
+        cat("  Name: ", x$nearest_taxon$name, "\n", sep="")
+        cat("  ott id: ", x$nearest_taxon$ott_id, "\n", sep="")
+    }
+}
+
+tol_mrca_method_factory <- function(.f) {
+    function(tax, ...) {
+        if (is_taxon(tax[["mrca"]][["taxon"]])) {
+            res <- list(.f(tax[["mrca"]][["taxon"]]))
+            names(res) <- .tax_unique_name(tax[["mrca"]][["taxon"]])
+            attr(res, "taxon_type") <- "mrca"
+        } else {
+            res <- list(.f(tax[["nearest_taxon"]]))
+            names(res) <- .tax_unique_name(tax[["nearest_taxon"]])
+            attr(res, "taxon_type") <- "nearest_taxon"
+        }
+        res <- add_otl_class(res, .f)
+        res
+    }
+}
+
+##' @export
+##' @rdname tol_mrca
+tax_sources.tol_mrca <- tol_mrca_method_factory(.tax_sources)
+
+##' @export
+##' @rdname tol_mrca
+unique_name.tol_mrca <- tol_mrca_method_factory(.tax_unique_name)
+
+##' @export
+##' @rdname tol_mrca
+tax_name.tol_mrca <- tol_mrca_method_factory(.tax_name)
+
+##' @export
+##' @rdname tol_mrca
+tax_rank.tol_mrca <- tol_mrca_method_factory(.tax_rank)
+
+##' @export
+##' @rdname tol_mrca
+ott_id.tol_mrca <- tol_mrca_method_factory(.tax_ott_id)
+
+##' @export
+##' @rdname tol_mrca
+source_list.tol_mrca <- .source_list
+
+
+##' Extract a subtree from the synthetic tree from an Open Tree node id.
+##'
+##' @title Extract a subtree from the synthetic tree
+##'
+##' @details Given a node, return the subtree of the synthetic tree descended
+##'     from that node. The start node may be specified using either a node id
+##'     or an ott id, but not both. If the specified node is not in the
+##'     synthetic tree an error will be returned. There is a size limit of
+##'     25000 tips for this method.
+##'
+##' @param ott_id Numeric. The ott id of the node in the tree that should
+##'     serve as the root of the tree returned.
+##' @param node_id Character. The node id of the node in the tree that should
+##'     serve as the root of the tree returned.
+##' @param label_format Character. Defines the label type; one of
+##'     \dQuote{\code{name}}, \dQuote{\code{id}}, or
+##'      \dQuote{\code{name_and_id}} (the default).
+##' @param file If specified, the function will write the subtree to a
+##'     file in newick format.
+##' @param ... additional arguments to customize the API call (see
+##'     \code{\link{rotl}} for more information).
+##'
+##' @return If no value is specified to the \code{file} argument
+##'     (default), a phyogenetic tree of class \code{phylo}.
+##'     Otherwise, the function returns invisibly a logical indicating
+##'     whether the file was successfully created.
+##'
+##' @examples
+##' \dontrun{
+##' res <- tol_subtree(ott_id=241841)}
+##' @export
+tol_subtree <- function(ott_id=NULL, node_id=NULL, label_format=NULL,
+                        file, ...) {
+    res <- .tol_subtree(ott_id=ott_id, node_id=node_id,
+                        label_format=label_format, ...)
+
+    if (!missing(file)) {
+        unlink(file)
+        cat(res$newick, file=file)
+        return(invisible(file.exists(file)))
+    } else {
+        phy <- phylo_from_otl(res)
+        return(phy)
+    }
+}
+
+
+##' Return the induced subtree on the synthetic tree that relates a list of nodes.
+##'
+##' @title Subtree from the Open Tree of Life
+##'
+##' @details Return a tree with tips corresponding to the nodes identified in
+##' the input set that is consistent with the topology of the current
+##' synthetic tree. This tree is equivalent to the minimal subtree
+##' induced on the draft tree by the set of identified nodes.
+##'
+##' @param ott_ids Numeric vector. OTT ids indicating nodes to be used
+##'     as tips in the induced tree.
+##' @param node_ids Character vector. Node ids indicating nodes to be used
+##'     as tips in the induced tree.
+##' @param label_format Character. Defines the label type; one of
+##'     \dQuote{\code{name}}, \dQuote{\code{id}}, or
+##'      \dQuote{\code{name_and_id}} (the default).
+##' @param file If specified, the function will write the subtree to a
+##'     file in newick format.
+##' @param ... additional arguments to customize the API call (see
+##'     \code{\link{rotl}} for more information).
+##'
+##' @return If no value is specified to the \code{file} argument
+##'     (default), a phyogenetic tree of class \code{phylo}.
+##'
+##'     Otherwise, the function returns invisibly a logical indicating
+##'     whether the file was successfully created.
+##'
+##' @examples
+##' \dontrun{
+##' res <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710))
+##' tree_file <- tempfile(fileext=".tre")
+##' tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710),
+##'                     file=tree_file)}
+##' @export
+tol_induced_subtree <- function(ott_ids=NULL, node_ids=NULL, label_format=NULL,
+                                file, ...) {
+    res <- .tol_induced_subtree(ott_ids=ott_ids, node_ids=node_ids,
+                                label_format=label_format, ...)
+    if (!missing(file)) {
+        unlink(file)
+        cat(res$newick, file=file)
+        return(file.exists(file))
+    } else {
+        phy <- phylo_from_otl(res)
+        return(phy)
+    }
+}
+
+
+##' Strip OTT ids from tip labels
+##' @param tip_labels a character vector containing tip labels (most
+##'     likely the \code{tip.label} element from a tree returned by
+##'     \code{\link{tol_induced_subtree}}
+##' @param remove_underscores logical (defaults to FALSE). If set to
+##'     TRUE underscores in tip labels are converted to spaces
+##' @return A character vector containing the contents of
+##'     \code{tip_labels} with any OTT ids removed.
+##'
+##' @examples
+##' \dontrun{
+##' genera <- c("Perdix", "Dendroica", "Cinclus", "Selasphorus", "Struthio")
+##' tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 102710))
+##' tr$tip.label %in% genera
+##' tr$tip.label <- strip_ott_ids(tr$tip.label)
+##' tr$tip.label %in% genera}
+##'@export
+strip_ott_ids <- function(tip_labels, remove_underscores=FALSE){
+    stripped <- sub("_ott\\d+$", "", tip_labels)
+    if(remove_underscores){
+        return(gsub("_", " ", stripped))
+    }
+    stripped
+}
+
+
+##' Get summary information about a node in the synthetic tree
+##'
+##' @title Node info
+##'
+##' @details Returns summary information about a node in the graph. The
+##'     node of interest may be specified using either a node id or an
+##'     taxon id, but not both. If the specified node or OTT id is not
+##'     in the graph, an error will be returned.
+##'
+##'     If the argument \code{include_lineage=TRUE} is used, you can
+##'     use \code{tax_lineage()} or \code{tol_lineage} to return the
+##'     taxonomic information or the node information for all the
+##'     ancestors to this node, down to the root of the tree.
+##'
+##'
+##' @param ott_id Numeric. The OpenTree taxonomic identifier.
+##' @param node_id Character. The OpenTree node identifier.
+##' @param include_lineage Logical (default = FALSE). Whether to return the
+##'     lineage of the node from the synthetic tree.
+##' @param ... additional arguments to customize the API call (see
+##'     ?rotl for more information)
+##'
+##' @return \code{tol_node_info} returns an invisible list of summary
+##'     information about the queried node:
+##'
+##' \itemize{
+##'
+##'     \item {node_id} {String. The canonical identifier of the node.}
+##'
+##'     \item {num_tips} {Numeric. The number of descendent tips.}
+##'
+##'     \item {taxon} {A list of taxonomic properties. Only returned if
+##'     the queried node is a taxon. Each source has:}
+##'
+##'         \itemize{
+##'             \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
+##'
+##'             \item {name} {String. The taxonomic name of the queried node.}
+##'
+##'             \item {unique_name} {String. The string that uniquely
+##'             identifies the taxon in OTT.}
+##'
+##'             \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+##'
+##'             \item {tax_sources} {List. A list of identifiers for taxonomic
+##'             sources, such as other taxonomies, that define taxa judged
+##'             equivalent to this taxon.}
+##'         }
+##'
+##'     The following properties list support/conflict for the node across
+##'     synthesis source trees. All properties involve sourceid keys and
+##'     nodeid values (see \code{source_id_map} below).
+##'
+##'     \item {partial_path_of} {List. The edge below this synthetic tree node
+##'     is compatible with the edge below each of these input tree nodes (one
+##'     per tree). Each returned element is reported as sourceid:nodeid.}
+##'
+##'     \item {supported_by} {List. Input tree nodes (one per tree) that support
+##'     this synthetic tree node. Each returned element is reported as
+##'     sourceid:nodeid.}
+##'
+##'     \item {terminal} {List. Input tree nodes (one per tree) that are equivalent
+##'     to this synthetic tree node (via an exact mapping, or the input tree
+##'     terminal may be the only terminal descended from this synthetic tree node.
+##'     Each returned element is reported as sourceid:nodeid.}
+##'
+##'     \item {conflicts_with} {Named list of lists. Names correspond to
+##'     sourceid keys. Each list contains input tree node ids (one or more per tree)
+##'     that conflict with this synthetic node.}
+##'
+##'     \item {source_id_map} {Named list of lists. Names correspond to the
+##'     sourceid keys used in the 4 properties above. Source trees will have the
+##'     following properties:}
+##'
+##'         \itemize{
+##'             \item {git_sha} {The git SHA identifying a particular source
+##'             version.}
+##'
+##'             \item {tree_id} {The tree id associated with the study id used.}
+##'
+##'             \item {study_id} {The study identifier. Will typically include
+##'             a prefix ("pg_" or "ot_").}
+##'         }
+##'     The only sourceid that does not correspond to a source tree is the taxonomy,
+##'     which will have the name "ott"+`taxonomy_version`, and the value is the
+##'     ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
+##'     appear in \code{supported_by}.
+##'
+##'    }
+##'
+##'     \code{tol_lineage} and \code{tax_lineage} return data
+##'         frames. \code{tol_lineage} indicate for each ancestor its
+##'         node identifier, the number of tips descending from that
+##'         node, and whether it corresponds to a taxonomic level.
+##'
+##' @examples
+##' \dontrun{
+##' birds <- tol_node_info(ott_id=81461, include_lineage=TRUE)
+##' source_list(birds)
+##' tax_rank(birds)
+##' ott_id(birds)
+##' tax_lineage(birds)
+##' tol_lineage(birds)}
+##' @export
+tol_node_info <- function(ott_id=NULL, node_id=NULL, include_lineage=FALSE, ...) {
+    res <- .tol_node_info(ott_id=ott_id, node_id=node_id,
+                          include_lineage=include_lineage, ...)
+    class(res) <- c("tol_node", class(res))
+    return(res)
+}
+
+tol_node_method_factory <- function(.f) {
+    function(tax, ...) {
+        res <- setNames(list(.f(tax[["taxon"]])),
+                        .tax_unique_name(tax[["taxon"]]))
+        res <- add_otl_class(res, .f)
+        res
+    }
+}
+
+##' @export
+print.tol_node <- function(x, ...) {
+    cat("\nOpenTree node.\n\n")
+    cat("Node id: ", x$node_id, "\n", sep="")
+    cat("Number of terminal descendants: ", x$num_tips, "\n", sep="")
+    if (is_taxon(x[["taxon"]])) {
+        cat("Is taxon: TRUE\n")
+        cat("Name: ", x$taxon$name, "\n", sep="")
+        cat("Rank: ", x$taxon$rank, "\n", sep="")
+        cat("ott id: ", x$taxon$ott_id, "\n", sep="")
+    } else {
+        cat("Is taxon: FALSE\n")
+    }
+}
+
+##' @export
+##' @param tax an object returned by \code{tol_node_info}.
+##' @rdname tol_node_info
+tax_rank.tol_node <- tol_node_method_factory(.tax_rank)
+
+##' @export
+##' @rdname tol_node_info
+tax_sources.tol_node <- tol_node_method_factory(.tax_sources)
+
+##' @export
+##' @rdname tol_node_info
+unique_name.tol_node <- tol_node_method_factory(.tax_unique_name)
+
+##' @export
+##' @rdname tol_node_info
+tax_name.tol_node <- tol_node_method_factory(.tax_name)
+
+##' @export
+##' @rdname tol_node_info
+ott_id.tol_node <- tol_node_method_factory(.tax_ott_id)
+
+##' @export
+##' @rdname tol_node_info
+source_list.tol_node <- .source_list
+
+##' @export
+##' @rdname tol_node_info
+tax_lineage.tol_node <- function(tax, ...) {
+    check_lineage(tax)
+    lg <- lapply(tax[["lineage"]], function(x) {
+        if (exists("taxon", x)) {
+            build_lineage(x[["taxon"]])
+        } else {
+            NULL
+        }
+
+    })
+    lg <- do.call("rbind", lg)
+    as.data.frame(lg, stringsAsFactors = FALSE)
+}
+
+
+##' @export
+##' @rdname tol_node_info
+tol_lineage.tol_node <- function(tax, ...) {
+    check_lineage(tax)
+    lg <- lapply(tax[["lineage"]], function(x) {
+        c("node_id" = x[["node_id"]],
+          "num_tips" = x[["num_tips"]],
+          "is_taxon" = exists("taxon", x))
+    })
+    lg <- do.call("rbind", lg)
+    as.data.frame(lg, stringsAsFactors = FALSE)
+}
diff --git a/R/tree_to_labels.R b/R/tree_to_labels.R
new file mode 100644
index 0000000..a6cfb67
--- /dev/null
+++ b/R/tree_to_labels.R
@@ -0,0 +1,48 @@
+## Function to extract tip and edge labels from newick formatted strings
+## useful when the tree is too small to be read in by ape/rncl.
+## tr needs to be a newick formatted tree string
+## - missing tips are removed (OK for OTL as it won't happen)
+tree_to_labels <- function(tr, remove_quotes = TRUE) {
+
+    n_right <- unlist(gregexpr("\\)", tr))
+    n_left <- unlist(gregexpr("\\(", tr))
+
+    if (n_right[1] == -1) n_right <- 0 else n_right <- length(n_right)
+    if (n_left[1] == -1) n_left <- 0 else n_left <- length(n_left)
+
+    if (!identical(n_right, n_left)) {
+        stop("invalid newick string, numbers of ( and ) don't match")
+    }
+
+    ## remove white spaces
+    tr <- gsub("\\s+", "", tr)
+
+    ## remove branch lengths
+    tr <- gsub(":[0-9]+(\\.[0-9]+)?", "", tr)
+
+    ## TODO?: remove comments
+
+    if (n_right < 1) {
+        ## if only 1 tip
+        tip_lbl <- gsub(";$", "", tr)
+        edge_lbl <- character(0)
+    } else {
+        ## extract edge labels
+        edge_lbl <- unlist(strsplit(tr, ")"))
+        edge_lbl <- grep("^[^\\(]", edge_lbl, value = T)
+        edge_lbl <- gsub("(,|;).*$", "", edge_lbl)
+        edge_lbl <- edge_lbl[nzchar(edge_lbl)]
+
+        ## extract tips
+        tip_lbl <- unlist(strsplit(tr, ","))
+        tip_lbl <- gsub("^\\(*", "", tip_lbl)
+        tip_lbl <- gsub("\\).*$", "", tip_lbl)
+        tip_lbl <- tip_lbl[nzchar(tip_lbl)]
+    }
+
+    if (remove_quotes) {
+        tip_lbl <- gsub("^(\\\"|\\\')(.+)(\\\'|\\\")$", "\\2", tip_lbl)
+    }
+
+    list(tip_label = tip_lbl, edge_label = edge_lbl)
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6391802
--- /dev/null
+++ b/README.md
@@ -0,0 +1,155 @@
+
+
+[![Build Status](https://travis-ci.org/ropensci/rotl.svg?branch=master)](https://travis-ci.org/ropensci/rotl)
+[![Build status](https://ci.appveyor.com/api/projects/status/jwvl84e6m36bqwga?svg=true)](https://ci.appveyor.com/project/fmichonneau/rotl)
+[![codecov.io](https://codecov.io/github/ropensci/rotl/coverage.svg?branch=master)](https://codecov.io/github/ropensci/rotl?branch=master)
+[![](http://www.r-pkg.org/badges/version/rotl)](http://www.r-pkg.org/pkg/rotl)
+[![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/rotl)](http://www.r-pkg.org/pkg/rotl)
+[![Research software impact](http://depsy.org/api/package/cran/rotl/badge.svg)](http://depsy.org/package/r/rotl)
+
+# An R interface to Open Tree API
+
+`rotl` is an R package to interact with the Open Tree of Life data APIs. It was
+initially developed as part of the
+[NESCENT/OpenTree/Arbor hackathon](http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/).
+
+Client libraries to interact with the Open Tree of Life API also exists for
+[Python](https://github.com/OpenTreeOfLife/pyopentree)
+and [Ruby](https://github.com/SpeciesFileGroup/bark).
+
+
+## Installation
+
+The current stable version is available from CRAN, and can be installed by
+typing the following at the prompt in R:
+
+
+```r
+install.packages("rotl")
+```
+
+If you want to test the development version, you first need to install
+[ghit](https://github.com/cloudyr/ghit) (`ghit` is a more lightweight version of
+[devtools](https://github.com/hadley/devtools) if your sole purpose is to
+install packages that are hosted on GitHub).
+
+
+```r
+install.packages("ghit")
+```
+
+Then you can install `rotl` using:
+
+
+```r
+library(ghit) # or library(devtools)
+install_github("ropensci/rotl")
+```
+
+## Vignettes
+
+There are three vignettes:
+
+- Start by checking out the "How to use `rotl`?" by typing:
+  `vignette("how-to-use-rotl", package="rotl")` after installing the
+  package.
+
+- Then explore how you can use `rotl` with other packages to combine your data
+  with trees from the Open Tree of Life project by typing:
+  `vignette("data_mashups", package="rotl")`.
+
+- The vignette "Using the Open Tree Synthesis in a comparative analsysis"
+  demonstrates how you can reproduce an analysis of a published paper by
+  downloading the tree they used, and data from the supplementary material:
+  `vignette("meta-analysis", package="rotl")`.
+
+The vignettes are also available from CRAN:
+[How to use `rotl`?](https://cran.r-project.org/web/packages/rotl/vignettes/how-to-use-rotl.html),
+[Data mashups](https://cran.r-project.org/web/packages/rotl/vignettes/data_mashups.html),
+and
+[Using the Open Tree synthesis in a comparative analysis](https://cran.r-project.org/web/packages/rotl/vignettes/meta-analysis.html).
+
+## Quick start
+
+### Get a little bit of the big Open Tree tree
+
+Taxonomic names are represented in the Open Tree by numeric identifiers, the
+`ott_ids` (Open Tree Taxonomy identifiers). To extract a portion of a tree from
+the Open Tree, you first need to find `ott_ids` for a set of names using the
+`tnrs_match_names` function:
+
+
+```r
+library(rotl)
+apes <- c("Pan", "Pongo", "Pan", "Gorilla", "Hoolock", "Homo")
+(resolved_names <- tnrs_match_names(apes))
+```
+
+```
+##   search_string unique_name approximate_match ott_id is_synonym flags
+## 1           pan         Pan             FALSE 417957      FALSE      
+## 2         pongo       Pongo             FALSE 417949      FALSE      
+## 3           pan         Pan             FALSE 417957      FALSE      
+## 4       gorilla     Gorilla             FALSE 417969      FALSE      
+## 5       hoolock     Hoolock             FALSE 712902      FALSE      
+## 6          homo        Homo             FALSE 770309      FALSE      
+##   number_matches
+## 1              1
+## 2              1
+## 3              1
+## 4              1
+## 5              1
+## 6              1
+```
+
+Now we can get the tree with just those tips:
+
+
+```r
+tr <- tol_induced_subtree(ott_ids=ott_id(resolved_names))
+plot(tr)
+```
+
+![plot of chunk get_tr](http://i.imgur.com/t9GhUs5.png)
+
+The code above can be summarized in a single pipe:
+
+
+```r
+library(magrittr)
+## or expressed as a pipe:
+c("Pan", "Pongo", "Pan", "Gorilla", "Hoolock", "Homo") %>%
+    tnrs_match_names %>%
+    ott_id %>%
+    tol_induced_subtree %>%
+    plot
+```
+
+![plot of chunk pipe](http://i.imgur.com/wDgMNtP.png)
+
+## Versioning
+
+Starting with v3.0.0 of the package, the major and minor version numbers (the
+first 2 digits of the version number) will be matched to those of the API. The
+patch number (the 3rd digit of the version number) will be used to reflect
+bug fixes and other changes that are independent from changes to the API.
+
+`rotl` can be used to access other versions of the API (if they are available)
+but most likely the high level functions will not work. Instead, you will need
+to parse the output yourself using the "raw" returns from the unexported
+low-level functions (all prefixed with a `.`). For instance to use the
+`tnrs/match_names` endpoint for `v2` of the API:
+
+
+```r
+rotl:::.tnrs_match_names(c("pan", "pango", "gorilla", "hoolock", "homo"), otl_v="v2")
+```
+
+
+### Code of Conduct
+
+Please note that this project is released with a
+[Contributor Code of Conduct](CONDUCT.md). By participating in this project you
+agree to abide by its terms.
+
+[![](http://ropensci.org/public_images/github_footer.png)](http://ropensci.org)
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..42e1a0f
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/debian/README.test b/debian/README.test
deleted file mode 100644
index 55a9142..0000000
--- a/debian/README.test
+++ /dev/null
@@ -1,8 +0,0 @@
-Notes on how this package can be tested.
-────────────────────────────────────────
-
-To run the unit tests provided by the package you can do
-
-   sh  run-unit-test
-
-in this directory.
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index b3b1abc..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-r-cran-rotl (3.0.1-1) unstable; urgency=medium
-
-  * Initial release (closes: #844360)
-
- -- Andreas Tille <tille at debian.org>  Mon, 14 Nov 2016 21:15:53 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 9df1325..0000000
--- a/debian/control
+++ /dev/null
@@ -1,31 +0,0 @@
-Source: r-cran-rotl
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Priority: optional
-Build-Depends: debhelper (>= 9),
-               dh-r,
-               r-base-dev,
-               r-cran-httr,
-               r-cran-jsonlite,
-               r-cran-assertthat,
-               r-cran-rncl,
-               r-cran-ape,
-               r-cran-rentrez
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-cran-rotl/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-cran-rotl/trunk/
-Homepage: https://cran.r-project.org/package=rotl
-
-Package: r-cran-rotl
-Architecture: all
-Depends: ${R:Depends},
-         ${misc:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: GNU R interface to the 'Open Tree of Life' API
- An interface to the 'Open Tree of Life' API to retrieve phylogenetic
- trees, information about studies used to assemble the synthetic tree,
- and utilities to match taxonomic names to 'Open Tree identifiers'. The
- 'Open Tree of Life' aims at assembling a comprehensive phylogenetic tree
- for all named species.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 824551c..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,37 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: rotl
-Upstream-Contact: Francois Michonneau <francois.michonneau at gmail.com>
-Source: https://cran.r-project.org/package=rotl
-
-Files: *
-Copyright: 2012-2016 Francois Michonneau, Joseph Brown, David Winter [aut]
-License: BSD-2-clause
-
-Files: debian/*
-Copyright: 2016 Andreas Tille <tille at debian.org>
-License: BSD-2-clause
-
-License: BSD-2-clause
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- .
-    Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
- .
-    Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution.
- .
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index 960011c..0000000
--- a/debian/docs
+++ /dev/null
@@ -1,3 +0,0 @@
-tests
-debian/README.test
-debian/tests/run-unit-test
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 3108f73..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/make -f
-
-%:
-	dh $@ --buildsystem R
-
-override_dh_fixperms:
-	dh_fixperms
-	find debian -name "*.csv" -exec chmod -x \{\} \;
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/tests/control b/debian/tests/control
deleted file mode 100644
index b044b0c..0000000
--- a/debian/tests/control
+++ /dev/null
@@ -1,3 +0,0 @@
-Tests: run-unit-test
-Depends: @, r-cran-testthat
-Restrictions: allow-stderr
diff --git a/debian/tests/run-unit-test b/debian/tests/run-unit-test
deleted file mode 100644
index 956fb53..0000000
--- a/debian/tests/run-unit-test
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh -e
-
-pkg=r-cran-rotl
-if [ "$ADTTMP" = "" ] ; then
-  ADTTMP=`mktemp -d /tmp/${pkg}-test.XXXXXX`
-  trap "rm -rf $ADTTMP" 0 INT QUIT ABRT PIPE TERM
-fi
-cd $ADTTMP
-cp -a /usr/share/doc/${pkg}/tests/* $ADTTMP
-find . -name "*.gz" -exec gunzip \{\} \;
-LC_ALL=C R --no-save < test-all.R
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index b122fd8..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=4
-https://cran.r-project.org/src/contrib/rotl_([-\d.]*)\.tar\.gz
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..f904c0b
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,20 @@
+citHeader("To cite rotl in publications use:")
+
+citEntry(entry = "Article",
+  title        = "{rotl}: an R package to interact with the Open Tree of Life data",
+  author       = personList(as.person("Francois Michonneau"),
+                            as.person("Joseph W. Brown"),
+                            as.person("David J. Winter")),
+  journal      = "Methods in Ecology and Evolution",
+  year         = "2016",
+  volume       = "",
+  number       = "",
+  pages        = "",
+  url          = "https://doi.org/10.1111/2041-210X.12593",
+  doi          =  "10.1111/2041-210X.12593",
+
+  textVersion  =
+      paste("Michonneau, F., Brown, J. W. and Winter, D. J. (2016),
+             rotl: an R package to interact with the Open Tree of Life data.
+             Methods Ecol Evol. doi:10.1111/2041-210X.12593")
+  )
diff --git a/inst/doc/data_mashups.R b/inst/doc/data_mashups.R
new file mode 100644
index 0000000..bc282ca
--- /dev/null
+++ b/inst/doc/data_mashups.R
@@ -0,0 +1,61 @@
+## ---- data---------------------------------------------------------------
+csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
+mu <- read.csv(csv_path, stringsAsFactors=FALSE)
+mu
+
+## ---- context------------------------------------------------------------
+library(rotl)
+tnrs_contexts()
+
+## ---- match--------------------------------------------------------------
+taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
+knitr::kable(taxon_search)
+
+## ---- munge--------------------------------------------------------------
+mu$ott_name <- taxon_search$unique_name
+mu$ott_id <- taxon_search$ott_id
+
+## ---- properties---------------------------------------------------------
+studies_properties()
+
+## ----taxon_count---------------------------------------------------------
+studies_find_trees(property="ot:ottId", value="180195")
+
+## ---- all_taxa_count-----------------------------------------------------
+hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
+sapply(hits, function(x) sum(x[["n_matched_trees"]]))
+
+## ----subtree,  fig.width=7, fig.height=4---------------------------------
+tr <- tol_induced_subtree(ott_ids=mu$ott_id)
+plot(tr)
+
+## ---- match_names--------------------------------------------------------
+mu$ott_name[1]
+tr$tip.label[4]
+
+## ---- sub----------------------------------------------------------------
+tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
+tr$tip.label %in% mu$ott_name
+
+## ----phylobase-----------------------------------------------------------
+library(phylobase)
+mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
+rownames(mu_numeric) <- mu$ott_name
+tree_data <- phylo4d(tr, mu_numeric)
+
+## ----  fig.width=7, fig.height=5-----------------------------------------
+plot(tree_data)
+
+## ------------------------------------------------------------------------
+extra_data <- study_external_IDs("pg_1980")
+extra_data
+
+## ------------------------------------------------------------------------
+library(rentrez)
+seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
+cat(seqs)
+
+## ------------------------------------------------------------------------
+Tt_ids <- taxon_external_IDs(mu$ott_id[2])
+Tt_ids
+
diff --git a/inst/doc/data_mashups.Rmd b/inst/doc/data_mashups.Rmd
new file mode 100644
index 0000000..d2a3f78
--- /dev/null
+++ b/inst/doc/data_mashups.Rmd
@@ -0,0 +1,222 @@
+---
+title: "Connecting data to Open Tree trees"
+author: "David Winter"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Connecting data to Open Tree trees}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+## Combining data from OToL and other sources.
+
+One of the major goals of `rotl` is to help users combine data from other
+sources with the phylogenetic trees in the Open Tree database. This examples
+document describes some of the ways in whih a user might connect data to trees
+from Open Tree.
+
+## Get Open Tree IDs to match your data.
+
+Let's say you have a dataset where each row represents a measurement taken from
+one species, and your goal is to put these measurements in some phylogenetic
+context. Here's a small example: the best estimate of the mutation rate for a
+set of unicellular Eukaryotes along with some other property of those species
+which might explain the mutation rate:
+
+```{r, data}
+csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
+mu <- read.csv(csv_path, stringsAsFactors=FALSE)
+mu
+```
+
+If we want to get a tree for these species we need to start by finding the
+unique ID for each of these species in the Open Tree database. We can use the
+Taxonomic Name Resolution Service (`tnrs`) functions to do this. Before we do
+that we should see if any of the taxonomic contexts, which can be used to narrow
+a search and avoid conflicts between different codes, apply to our group of species:
+
+```{r, context}
+library(rotl)
+tnrs_contexts()
+```
+
+Hmm, none of those groups contain all of our species. In this case we can
+search using the `All life` context and the function `tnrs_match_names`:
+
+```{r, match}
+taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
+knitr::kable(taxon_search)
+```
+
+Good, all of our  species are known to Open Tree. Note, though, that one of the names
+is a synonym. _Saccharomyces pombe_ is older name for what is now called
+_Schizosaccharomyces pombe_. As the name suggests, the Taxonomic Name
+Resolution Service is designed to deal with these problems (and similar ones
+like misspellings), but it is always a good idea to check the results of
+`tnrs_match_names` closely to ensure the results are what you expect.
+
+In this case we have a good ID for each of our species so we can move on. Before
+we do that, let's ensure we can match up our original data to the Open Tree
+names and IDs by adding them to our `data.frame`:
+
+```{r, munge}
+mu$ott_name <- taxon_search$unique_name
+mu$ott_id <- taxon_search$ott_id
+```
+
+## Find a tree with your taxa
+
+Now let's find a tree. There are two possible options here: we can search for
+published studies that include our taxa or we can use the 'synthetic tree' from
+Open Tree. We can try both approaches.
+
+### Published trees
+
+Before we can search for published studies or trees, we should check out the
+list of properties we can use to perform such searches:
+
+```{r, properties}
+studies_properties()
+```
+
+We have `ottIds` for our taxa, so let's use those IDs to search for trees that
+contain them.  Starting with our first species _Tetrahymena thermophila_ we can
+use `studies_find_trees` to do this search.
+
+```{r taxon_count}
+studies_find_trees(property="ot:ottId", value="180195")
+```
+
+Well... that's not very promising. We can repeat that process for all of the IDs
+to see if the other species are better represented.
+
+
+```{r, all_taxa_count}
+hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
+sapply(hits, function(x) sum(x[["n_matched_trees"]]))
+```
+
+OK, most of our species are not in any of the published trees available. You can
+help fix this sort of problem by [making sure you submit your published trees to
+Open Tree](https://tree.opentreeoflife.org/curator).
+
+### A part of the synthesis tree
+
+Thankfully, we can still use the complete Tree of Life made from the
+combined results of all of the published trees and taxonomies that go into Open
+Tree. The function `tol_induced_subtree` will fetch a tree relating a set of IDs.
+
+Using the default arguments you can get a tree object into your R session:
+
+
+```{r subtree,  fig.width=7, fig.height=4}
+tr <- tol_induced_subtree(ott_ids=mu$ott_id)
+plot(tr)
+```
+
+### Connect your data to the tips of your tree
+
+Now we have a tree for of our species, how can we use the tree and the data
+together?
+
+The package `phylobase` provide an object class called `phylo4d`, which is
+designed to represent a phylogeny and data associated with its tips. In oder to
+get our tree and data into one of these objects we have to make sure the labels
+in the tree and in our data match exactly. That's not quite the case at the
+moment (tree labels have underscores and IDs appended):
+
+```{r, match_names}
+mu$ott_name[1]
+tr$tip.label[4]
+```
+
+`rotl` provides a convienence function `strip_ott_ids` to deal with these. 
+
+```{r, sub}
+tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
+tr$tip.label %in% mu$ott_name
+```
+
+Ok, now the tips are together we can make a new dataset. The `phylo4d()`
+functions matches tip labels to the row names of a `data.frame`, so let's make
+a new dataset that contains just the relevant data and has row names to match
+the tree
+
+```{r phylobase}
+library(phylobase)
+mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
+rownames(mu_numeric) <- mu$ott_name
+tree_data <- phylo4d(tr, mu_numeric)
+```
+And now we can plot the data and the tree together
+
+
+```{r,  fig.width=7, fig.height=5}
+plot(tree_data)
+```
+
+##Find external data associated with studies, trees and taxa from Open Tree
+
+In the above example we looked for a tree that related species in another dataset. 
+Now we will go the other way, and try to find data associated with Open Tree records
+in other databases.
+
+### Get external data from a study
+
+Let's imagine you were interested in extending or reproducing the results of a
+published study. If that study is included in Open Tree you can find it via
+`studies_find_studies` or `studies_find_trees` and retrieve the published trees 
+with `get_study`. `rotl` will also help you find external. The function 
+`study_external_IDs` retrieves the DOI for a given study, and uses that to 
+gather some more data:
+
+```{r}
+extra_data <- study_external_IDs("pg_1980")
+extra_data
+```
+
+Here the returned object contains an `external_data_url` (in this case a link to
+the study in Treebase), a pubmed ID for the paper and a vector IDs for the
+NCBI's nuleotide database. The packages `treebase` and `rentrez` provide
+functions to make use of these IDs within R.
+
+As an example, let's use `rentrez` to download the first two DNA seqences and
+print them. 
+
+```{r}
+library(rentrez)
+seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
+cat(seqs)
+```
+
+You could further process these sequences in R with the function `read.dna` from 
+`ape` or save them to disk by specifying a file name with `cat`.
+
+### Find a OTT taxon in another taxonomic database
+
+It is also possible map an Open Tree taxon to a record in another taxonomic
+database. For instance, if we wanted to search for data about one of the tips of
+the sub-tree we fetched in the example above we could do so using
+`taxon_external_IDs`:
+
+```{r}
+Tt_ids <- taxon_external_IDs(mu$ott_id[2])
+Tt_ids
+```
+
+A user could then use `rgbif` to find locality records using the gbif ID or
+`rentrez` to get genetic or bibliometric data about from the NCBI's databases.
+
+
+## What next
+
+The demonstration gets you to the point of visualizing your data in a
+phylogenetic context. But there's a lot more you do with this sort of data in R.
+For instance, you could use packages like `ape`, `caper`, `phytools` and
+`mcmcGLMM` to perform phylogenetic comparative analyses of your data. You could
+gather more data on your species using packages that connect to
+trait databases like `rfishbase`, `AntWeb` or `rnpn` which provides data from
+the US National Phenology Network. You could also use `rentrez` to find genetic
+data for each of your species, and use that data to generate branch lengths for
+the phylogeny.
diff --git a/inst/doc/data_mashups.html b/inst/doc/data_mashups.html
new file mode 100644
index 0000000..23fc9f4
--- /dev/null
+++ b/inst/doc/data_mashups.html
@@ -0,0 +1,362 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="David Winter" />
+
+<meta name="date" content="2016-09-19" />
+
+<title>Connecting data to Open Tree trees</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+  margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Connecting data to Open Tree trees</h1>
+<h4 class="author"><em>David Winter</em></h4>
+<h4 class="date"><em>2016-09-19</em></h4>
+
+
+
+<div id="combining-data-from-otol-and-other-sources." class="section level2">
+<h2>Combining data from OToL and other sources.</h2>
+<p>One of the major goals of <code>rotl</code> is to help users combine data from other sources with the phylogenetic trees in the Open Tree database. This examples document describes some of the ways in whih a user might connect data to trees from Open Tree.</p>
+</div>
+<div id="get-open-tree-ids-to-match-your-data." class="section level2">
+<h2>Get Open Tree IDs to match your data.</h2>
+<p>Let’s say you have a dataset where each row represents a measurement taken from one species, and your goal is to put these measurements in some phylogenetic context. Here’s a small example: the best estimate of the mutation rate for a set of unicellular Eukaryotes along with some other property of those species which might explain the mutation rate:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">csv_path <-<span class="st"> </span><span class="kw">system.file</span>(<span class="st">"extdata"</span>, <span class="st">"protist_mutation_rates.csv"</span>, <span class="dt">package =</span> <span class="st">"rotl"</span>)
+mu <-<span class="st"> </span><span class="kw">read.csv</span>(csv_path, <span class="dt">stringsAsFactors=</span><span class="ot">FALSE</span>)
+mu</code></pre></div>
+<pre><code>##                     species       mu pop.size genome.size
+## 1   Tetrahymena thermophila 7.61e-12 1.12e+08    1.04e+08
+## 2    Paramecium tetraurelia 1.94e-11 1.24e+08    7.20e+07
+## 3 Chlamydomonas reinhardtii 2.08e-10 1.00e+08    1.12e+08
+## 4  Dictyostelium discoideum 2.90e-11 7.40e+06    3.40e+07
+## 5  Saccharomyces cerevisiae 3.30e-10 1.00e+08    1.25e+08
+## 6       Saccharomyces pombe 2.00e-10 1.00e+07    1.25e+08</code></pre>
+<p>If we want to get a tree for these species we need to start by finding the unique ID for each of these species in the Open Tree database. We can use the Taxonomic Name Resolution Service (<code>tnrs</code>) functions to do this. Before we do that we should see if any of the taxonomic contexts, which can be used to narrow a search and avoid conflicts between different codes, apply to our group of species:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(rotl)
+<span class="kw">tnrs_contexts</span>()</code></pre></div>
+<pre><code>## Possible contexts:
+##    Animals 
+##       Birds, Tetrapods, Mammals, Amphibians, Vertebrates 
+##       Arthropods, Molluscs, Nematodes, Platyhelminthes, Annelids 
+##       Cnidarians, Arachnides, Insects 
+##    Bacteria 
+##       SAR group, Archaea, Excavata, Amoebae, Centrohelida 
+##       Haptophyta, Apusozoa, Diatoms, Ciliates, Forams 
+##    Fungi 
+##       Basidiomycetes, Ascomycetes 
+##    Land plants 
+##       Hornworts, Mosses, Liverworts, Vascular plants, Club mosses 
+##       Ferns, Seed plants, Flowering plants, Monocots, Eudicots 
+##       Rosids, Asterids, Asterales, Asteraceae, Aster 
+##       Symphyotrichum, Campanulaceae, Lobelia 
+##    All life</code></pre>
+<p>Hmm, none of those groups contain all of our species. In this case we can search using the <code>All life</code> context and the function <code>tnrs_match_names</code>:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">taxon_search <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(<span class="dt">names=</span>mu$species, <span class="dt">context_name=</span><span class="st">"All life"</span>)
+knitr::<span class="kw">kable</span>(taxon_search)</code></pre></div>
+<table>
+<thead>
+<tr class="header">
+<th align="left">search_string</th>
+<th align="left">unique_name</th>
+<th align="left">approximate_match</th>
+<th align="left">ott_id</th>
+<th align="left">is_synonym</th>
+<th align="left">flags</th>
+<th align="left">number_matches</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">tetrahymena thermophila</td>
+<td align="left">Tetrahymena thermophila</td>
+<td align="left">FALSE</td>
+<td align="left">180195</td>
+<td align="left">FALSE</td>
+<td align="left">SIBLING_HIGHER</td>
+<td align="left">1</td>
+</tr>
+<tr class="even">
+<td align="left">paramecium tetraurelia</td>
+<td align="left">Paramecium tetraurelia</td>
+<td align="left">FALSE</td>
+<td align="left">568130</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="odd">
+<td align="left">chlamydomonas reinhardtii</td>
+<td align="left">Chlamydomonas reinhardtii</td>
+<td align="left">FALSE</td>
+<td align="left">33153</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="even">
+<td align="left">dictyostelium discoideum</td>
+<td align="left">Dictyostelium discoideum</td>
+<td align="left">FALSE</td>
+<td align="left">160850</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="odd">
+<td align="left">saccharomyces cerevisiae</td>
+<td align="left">Saccharomyces cerevisiae</td>
+<td align="left">FALSE</td>
+<td align="left">908549</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="even">
+<td align="left">saccharomyces pombe</td>
+<td align="left">Schizosaccharomyces pombe</td>
+<td align="left">FALSE</td>
+<td align="left">990004</td>
+<td align="left">TRUE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+</tbody>
+</table>
+<p>Good, all of our species are known to Open Tree. Note, though, that one of the names is a synonym. <em>Saccharomyces pombe</em> is older name for what is now called <em>Schizosaccharomyces pombe</em>. As the name suggests, the Taxonomic Name Resolution Service is designed to deal with these problems (and similar ones like misspellings), but it is always a good idea to check the results of <code>tnrs_match_names</code> closely to ensure the results are what you expect.</p>
+<p>In this case we have a good ID for each of our species so we can move on. Before we do that, let’s ensure we can match up our original data to the Open Tree names and IDs by adding them to our <code>data.frame</code>:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">mu$ott_name <-<span class="st"> </span>taxon_search$unique_name
+mu$ott_id <-<span class="st"> </span>taxon_search$ott_id</code></pre></div>
+</div>
+<div id="find-a-tree-with-your-taxa" class="section level2">
+<h2>Find a tree with your taxa</h2>
+<p>Now let’s find a tree. There are two possible options here: we can search for published studies that include our taxa or we can use the ‘synthetic tree’ from Open Tree. We can try both approaches.</p>
+<div id="published-trees" class="section level3">
+<h3>Published trees</h3>
+<p>Before we can search for published studies or trees, we should check out the list of properties we can use to perform such searches:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">studies_properties</span>()</code></pre></div>
+<pre><code>## $tree_properties
+##  [1] "ot:treebaseOTUId"           "ot:nodeLabelMode"          
+##  [3] "ot:originalLabel"           "oti_tree_id"               
+##  [5] "ot:ottTaxonName"            "ot:inferenceMethod"        
+##  [7] "ot:tag"                     "ot:treebaseTreeId"         
+##  [9] "ot:comment"                 "ot:branchLengthDescription"
+## [11] "ot:treeModified"            "ot:studyId"                
+## [13] "ot:branchLengthTimeUnits"   "ot:ottId"                  
+## [15] "is_deprecated"              "ot:branchLengthMode"       
+## [17] "ot:treeLastEdited"          "ot:nodeLabelDescription"   
+## 
+## $study_properties
+##  [1] "ot:studyModified"             "ot:focalClade"               
+##  [3] "ot:focalCladeOTTTaxonName"    "ot:focalCladeOTTId"          
+##  [5] "ot:studyPublication"          "ot:studyLastEditor"          
+##  [7] "ot:tag"                       "ot:focalCladeTaxonName"      
+##  [9] "ot:studyLabel"                "ot:comment"                  
+## [11] "ot:authorContributed"         "ot:studyPublicationReference"
+## [13] "ot:studyId"                   "ot:curatorName"              
+## [15] "ot:studyUploaded"             "ot:studyYear"                
+## [17] "is_deprecated"                "ot:dataDeposit"              
+## [19] "ot:candidateTreeForSynthesis"</code></pre>
+<p>We have <code>ottIds</code> for our taxa, so let’s use those IDs to search for trees that contain them. Starting with our first species <em>Tetrahymena thermophila</em> we can use <code>studies_find_trees</code> to do this search.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">studies_find_trees</span>(<span class="dt">property=</span><span class="st">"ot:ottId"</span>, <span class="dt">value=</span><span class="st">"180195"</span>)</code></pre></div>
+<pre><code>## [1] study_ids       dat             n_matched_trees
+## <0 rows> (or 0-length row.names)</code></pre>
+<p>Well… that’s not very promising. We can repeat that process for all of the IDs to see if the other species are better represented.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">hits <-<span class="st"> </span><span class="kw">lapply</span>(mu$ott_id, studies_find_trees, <span class="dt">property=</span><span class="st">"ot:ottId"</span>, <span class="dt">detailed =</span> <span class="ot">FALSE</span>)
+<span class="kw">sapply</span>(hits, function(x) <span class="kw">sum</span>(x[[<span class="st">"n_matched_trees"</span>]]))</code></pre></div>
+<pre><code>## [1]  0  0  2  0 32  3</code></pre>
+<p>OK, most of our species are not in any of the published trees available. You can help fix this sort of problem by <a href="https://tree.opentreeoflife.org/curator">making sure you submit your published trees to Open Tree</a>.</p>
+</div>
+<div id="a-part-of-the-synthesis-tree" class="section level3">
+<h3>A part of the synthesis tree</h3>
+<p>Thankfully, we can still use the complete Tree of Life made from the combined results of all of the published trees and taxonomies that go into Open Tree. The function <code>tol_induced_subtree</code> will fetch a tree relating a set of IDs.</p>
+<p>Using the default arguments you can get a tree object into your R session:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr <-<span class="st"> </span><span class="kw">tol_induced_subtree</span>(<span class="dt">ott_ids=</span>mu$ott_id)
+<span class="kw">plot</span>(tr)</code></pre></div>
+<p><img src=" [...]
+</div>
+<div id="connect-your-data-to-the-tips-of-your-tree" class="section level3">
+<h3>Connect your data to the tips of your tree</h3>
+<p>Now we have a tree for of our species, how can we use the tree and the data together?</p>
+<p>The package <code>phylobase</code> provide an object class called <code>phylo4d</code>, which is designed to represent a phylogeny and data associated with its tips. In oder to get our tree and data into one of these objects we have to make sure the labels in the tree and in our data match exactly. That’s not quite the case at the moment (tree labels have underscores and IDs appended):</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">mu$ott_name[<span class="dv">1</span>]</code></pre></div>
+<pre><code>## [1] "Tetrahymena thermophila"</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr$tip.label[<span class="dv">4</span>]</code></pre></div>
+<pre><code>## [1] "Chlamydomonas_reinhardtii_ott33153"</code></pre>
+<p><code>rotl</code> provides a convienence function <code>strip_ott_ids</code> to deal with these.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr$tip.label <-<span class="st"> </span><span class="kw">strip_ott_ids</span>(tr$tip.label, <span class="dt">remove_underscores=</span><span class="ot">TRUE</span>)
+tr$tip.label %in%<span class="st"> </span>mu$ott_name</code></pre></div>
+<pre><code>## [1] TRUE TRUE TRUE TRUE TRUE TRUE</code></pre>
+<p>Ok, now the tips are together we can make a new dataset. The <code>phylo4d()</code> functions matches tip labels to the row names of a <code>data.frame</code>, so let’s make a new dataset that contains just the relevant data and has row names to match the tree</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(phylobase)
+mu_numeric <-<span class="st"> </span>mu[,<span class="kw">c</span>(<span class="st">"mu"</span>, <span class="st">"pop.size"</span>, <span class="st">"genome.size"</span>)]
+<span class="kw">rownames</span>(mu_numeric) <-<span class="st"> </span>mu$ott_name
+tree_data <-<span class="st"> </span><span class="kw">phylo4d</span>(tr, mu_numeric)</code></pre></div>
+<p>And now we can plot the data and the tree together</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot</span>(tree_data)</code></pre></div>
+<p><img src=" [...]
+</div>
+</div>
+<div id="find-external-data-associated-with-studies-trees-and-taxa-from-open-tree" class="section level2">
+<h2>Find external data associated with studies, trees and taxa from Open Tree</h2>
+<p>In the above example we looked for a tree that related species in another dataset. Now we will go the other way, and try to find data associated with Open Tree records in other databases.</p>
+<div id="get-external-data-from-a-study" class="section level3">
+<h3>Get external data from a study</h3>
+<p>Let’s imagine you were interested in extending or reproducing the results of a published study. If that study is included in Open Tree you can find it via <code>studies_find_studies</code> or <code>studies_find_trees</code> and retrieve the published trees with <code>get_study</code>. <code>rotl</code> will also help you find external. The function <code>study_external_IDs</code> retrieves the DOI for a given study, and uses that to gather some more data:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">extra_data <-<span class="st"> </span><span class="kw">study_external_IDs</span>(<span class="st">"pg_1980"</span>)
+extra_data</code></pre></div>
+<pre><code>## External data identifiers for study 
+##  $doi:  10.1016/j.ympev.2006.04.016 
+##  $pubmed_id:  16762568 
+##  $nucleotide_ids: vector of 58 IDs
+##  $external_data_url http://purl.org/phylo/treebase/phylows/study/TB2:S1575</code></pre>
+<p>Here the returned object contains an <code>external_data_url</code> (in this case a link to the study in Treebase), a pubmed ID for the paper and a vector IDs for the NCBI’s nuleotide database. The packages <code>treebase</code> and <code>rentrez</code> provide functions to make use of these IDs within R.</p>
+<p>As an example, let’s use <code>rentrez</code> to download the first two DNA seqences and print them.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(rentrez)
+seqs <-<span class="st"> </span><span class="kw">entrez_fetch</span>(<span class="dt">db=</span><span class="st">"nucleotide"</span>, <span class="dt">id=</span>extra_data$nucleotide_ids[<span class="dv">1</span>:<span class="dv">2</span>], <span class="dt">rettype=</span><span class="st">"fasta"</span>)
+<span class="kw">cat</span>(seqs)</code></pre></div>
+<pre><code>## >gi|111606994|emb|AM181011.1| Plectroninia neocaledoniense partial 28S rRNA gene, specimen voucher G316300 (Queensland Museum)
+## GCTAGCAAGCGCGTCGGTGGTTCAGCCGGCTGGTCTCGTCGAGTTGTCGGTGTGCGGATCCGAACGGACC
+## GCGGCCGATGGCGTCGGCGGGCAAGCTGTGGTGCACTCTGTCGGCGTGCGCGTCAGCGTCGGTTTCGGCC
+## GGACGACGAGGCGCTCGGGGAAGGTAGCTGGACCGGTCTTCGGTGCAGTGTTATAGCCCTGGGCCGCTGG
+## GTTCGGCGTTTGGGACCGAGGAGAGAGATGATCGCTGCAGCGCCTGTCTCCCTCTCGAGGGGGGCTAGCC
+## AGCCGCTGTTTGGGTGGCGTCACTGGCGGAGGACTGCACGCAGTGCTTCGCCGGTGGTCGTGTCCAGGCG
+## GGCGGTGTGGGTATAGAGGCGCTTAGGACGCTGGCGTCCAAATGGCCGTGCGCGACCCGTCTTGAAACAC
+## GGACCAAGGAGTCTAGCATGTGCGCGAGTCTTAGGGTGTGGAAGCCCTCGGGCGCAATGAAAGTGAAGGG
+## CCGTCGTCTCTCGGGGCTGCGGTGTGAGGTGAGAGCCGTCGCCGTCGGGTGGCGGTGCATCATCGGCCGG
+## TCCATCCTGCTCTCAGGAGGATCTGCGCAAGAGCGTGTTTGCTGGGACCCGAAAGATGGTGAACTATGCC
+## TGAATAGGGTGAAGCCAGAGGAAACTCTGGTGGAGGCTCGTAGCGGTTCTGACGTGCAAATCGATCGTCA
+## AATTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGG
+## ATAGCTGGAACTCGTCTTGACACAGTTTTATCAGGTAAAGCGAATGATTAGAGGTCTTGGGGGTGAAACA
+## CCCTCAACCTATTCTCAAACTTTAAATAGGTAAGAAGCGCGACTTGCTCAATTGAAGTGGCGCGCAGTGA
+## ATGTGAGTTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGAACGCTCGGTTA
+## AGGTGCCCAAGTCGACGCTCATCAGACCCCAGAAAAGGTGTTGGTCGATATAGACAGCAGGACGGTGGCC
+## ATGGAAGTCGGAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCTGAAAATGGAT
+## GGCGCTCAAGCGTCGCACCTATACCGAGCCGTCGTGGTAAATGCCAGGCCACGACGAGTAGGAGGGCGCG
+## GTGGTCGTGACGCAGCCCTTGGCGCGAGCCTGGGCGAAACGGCCTCCGGTGCAGATCTTGGTGGTAGTAG
+## CAAATATTCAAATGAGAGCTTTGAAGACCGAAGTGGAGAAAGGTTCCATGTGAACAGCAGTTGGACATGG
+## GTTAGTCGATCCTAAGAGATAGGGAAGTTCCGTGTGAAAGTGCGCAATGCGCTTCTGTGCTGCGCGCCTC
+## CTATCGAAAGGGAATCGGGTTAATATTCCCGAACCGGAAGGCGGATATCTCTGGCTCTCGGGTCAGGAGC
+## GGCAACGCAAGCGTACTGCGAGACGTCGGCGGGGGCTCCGGGAAGAGTTGTCTTTTCTTTTTAACGCAGT
+## CGCCATCCCTGGAATCGGTTTGCCCGGAGATAGGGTTGGCTGGCTCGGTAAAGCAGCACACTTCATGTGC
+## TGTCCGGTGCGCTCTCGACGGCCCTTGAAAATCGCAGGTGTGCATCGATTCTCGCATCCGGTCGTACTCA
+## TAACCGCATCAGGTCTCCAAGGT
+## 
+## >gi|111606993|emb|AM181010.1| Eilhardia schulzei partial 28S rRNA gene, specimen voucher G316071 (Queensland Museum)
+## GCTAGTAATGTACGTTGGTGGTTCAGCCGGCTAGTCTTGTCGAGTCGTCGTGTGGTGGATCCGACTGGAC
+## CGTCCGCGGTGGTGTCGGCGGGCGAGCTGTGGTGCACTCTACGGACGTGCGCGTCAGCGTCGGTTCTCGA
+## TGGGCGATAAGGTGCGTGGGGGAAGGTGGCTCGGTCCTTGGGAACTGAGTGTTACAGACCCTGGTGCTGG
+## GCTCGTCGTGGGACCGAGGAGAGAGAGAGATGATCGCTGCGGCACCTGCCCCGTTGTCATTTTTCGGGGC
+## TAGCCAGCCGTTTGTCAGGTGTGCGTCGGACGTTGAGGACTGCACGCAGTGCTGGACGTGGAGGCGTGAT
+## CTGATGGCGGTGTGGGCATTAGAGGTGCCTAGGACGCTGGCGTCCAAATGGCCGTGCGCGACCCGTCTTG
+## AAACACGGACCAAGGAGTCTAACATGTGCGCGAGTCTTAGGGTGTGCAAGCCCTCGGGCGCAATGAAAGT
+## GAAGGCTCGGCGGCGCTAGTCGAGCTGAGGTGAGAGCCGTGGCCGTTGCATGTGGCGGCGGCGGCGCATC
+## ATCGGCCGGTCCATCCTGCTCTCAGGGGGATCCGAGCAAGAGCGTATTTGTTGGGACCCGAAAGATGGTG
+## AACTATGCCTGAATAGGGTGAAGCCAGAGGAAACTCTGGTGGAGGCTCGTAGCGATTCTGACGTGCAAAT
+## CGATCGTCAAATTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTT
+## TCCCTCAGGATAGCTGGAGCTCTTGGACACAGTTTTATCAGGTAAAGCGAATGATCAGAGGTCTTGGGGG
+## TGAAACACCCTCAACCTATTCTCAAACTTTAAATCGGTAAGAAGCGCGACTTGCTGAATTGAAGCCGCGC
+## GCAAGCAATGTGAGTTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGAACGC
+## TGGGTTAAGGTGCCAAAGTCGACGCTCATCAGACCCCAGAAAAGGTGTTGGTTGATATAGACAGCAGGAC
+## GATGGCCATGGAAGTCGGAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCTGAA
+## AATGGATGGCGCTCAAGCGTCGCACCTATACCGGGCCGTCGTCGCAAATGCCAGGCGACGACGAGTAGGA
+## GGGCGCAGTGGTCGTCATGCAGCCCTTGGCGTGAGCCTGGGTCAAACGGCCTCTGGTGCAGATCTTGGTG
+## GTAGTAGCAAATATTCAAATGAGAGCTTTGAAGACCGAAGTGGAGAAAGGTTCCATGTGAACAGCAGTTG
+## GACATGGGTTAGTCGATCCTAAGTGATAGGGGAGCTCCGTATGAAAGTGCGCAATCGGCCCTGCTTGTGT
+## CGCCTTGCGCCACCTATCGAAAGGGAATCGGGTTAATATTCCCGAACCGGAAGGCGGATTTTCTCTGGCT
+## CTCGGGTCAGGAGCGGCAACGCTAGCGAACCGCGAGACGTCGGCGGGGGCTCCGGGAAGAGTTGTCTTTT
+## CTTTTTAACGCAGTCGCCATCCCTGGAATCGGTTTGCCCGGAGATAGGGTTGGCTGGCTCGGTAAAGCAG
+## CACACTTCATGTGCTGTCCGGTGCGCTCTCGACGGCCCTTGAAAATCGCGGCGAGTGTAGTCTGATTTTC
+## GCATCCGTTCGTACTCATAACCGCATCAGGTCTCCAAGGT</code></pre>
+<p>You could further process these sequences in R with the function <code>read.dna</code> from <code>ape</code> or save them to disk by specifying a file name with <code>cat</code>.</p>
+</div>
+<div id="find-a-ott-taxon-in-another-taxonomic-database" class="section level3">
+<h3>Find a OTT taxon in another taxonomic database</h3>
+<p>It is also possible map an Open Tree taxon to a record in another taxonomic database. For instance, if we wanted to search for data about one of the tips of the sub-tree we fetched in the example above we could do so using <code>taxon_external_IDs</code>:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">Tt_ids <-<span class="st"> </span><span class="kw">taxon_external_IDs</span>(mu$ott_id[<span class="dv">2</span>])
+Tt_ids</code></pre></div>
+<pre><code>##   source       id
+## 1  silva AY102613
+## 2   ncbi     5888
+## 3   gbif  5839866</code></pre>
+<p>A user could then use <code>rgbif</code> to find locality records using the gbif ID or <code>rentrez</code> to get genetic or bibliometric data about from the NCBI’s databases.</p>
+</div>
+</div>
+<div id="what-next" class="section level2">
+<h2>What next</h2>
+<p>The demonstration gets you to the point of visualizing your data in a phylogenetic context. But there’s a lot more you do with this sort of data in R. For instance, you could use packages like <code>ape</code>, <code>caper</code>, <code>phytools</code> and <code>mcmcGLMM</code> to perform phylogenetic comparative analyses of your data. You could gather more data on your species using packages that connect to trait databases like <code>rfishbase</code>, <code>AntWeb</code> or <code>rnp [...]
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>
diff --git a/inst/doc/how-to-use-rotl.R b/inst/doc/how-to-use-rotl.R
new file mode 100644
index 0000000..176e721
--- /dev/null
+++ b/inst/doc/how-to-use-rotl.R
@@ -0,0 +1,75 @@
+## ------------------------------------------------------------------------
+library(rotl)
+taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
+resolved_names <- tnrs_match_names(taxa)
+
+## ------------------------------------------------------------------------
+resolved_names <- tnrs_match_names(taxa, context_name = "Animals")
+
+## ---- fig.width=7, fig.height=4------------------------------------------
+my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
+plot(my_tree, no.margin=TRUE)
+
+## ------------------------------------------------------------------------
+taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
+resolved_names <- tnrs_match_names(taxa)
+resolved_names
+inspect(resolved_names, taxon_name = "diadema")
+
+## ------------------------------------------------------------------------
+resolved_names <- update(resolved_names, taxon_name = "diadema",
+                         new_row_number = 2)
+
+## we could also have used the ott_id to replace this taxon:
+## resolved_names <- update(resolved_names, taxon_name = "diadema",
+##                          new_ott_id = 4930522)
+
+## ------------------------------------------------------------------------
+diadema_info <- taxonomy_taxon_info(631176)
+tax_rank(diadema_info)
+synonyms(diadema_info)
+tax_name(diadema_info)
+
+## ------------------------------------------------------------------------
+diadema_tax_tree <- taxonomy_subtree(631176)
+diadema_tax_tree
+
+## ---- fig.width=7, fig.height=4------------------------------------------
+mono_id <- tnrs_match_names("Monotremata")
+mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
+plot(mono_tree)
+
+## ------------------------------------------------------------------------
+furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
+furry_ids <- furry_studies$study_ids
+
+## ------------------------------------------------------------------------
+furry_meta <- get_study_meta("pg_2550")
+get_publication(furry_meta)     ## The citation for the source of the study
+get_tree_ids(furry_meta)        ## This study has 10 trees associated with it
+candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
+
+## ---- eval=FALSE---------------------------------------------------------
+#  get_study_tree(study_id="pg_710", tree_id="tree1277",
+#                 tip_label='ott_taxon_name', file = "/tmp/tree.tre",
+#                 file_format = "newick")
+#  tr <- ape::read.tree(file = "/tmp/tree.tre")
+
+## ------------------------------------------------------------------------
+giant_squid <- tnrs_match_names("Architeuthis")
+tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
+
+## ------------------------------------------------------------------------
+turducken <- c("Meleagris", "Anas", "Gallus")
+taxa <- tnrs_match_names(turducken, context="Animals")
+taxa
+
+## ---- eval=FALSE---------------------------------------------------------
+#  tr <- tol_induced_subtree(taxa$ott_id)
+
+## ---- fig.width=7, fig.height=4------------------------------------------
+turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus")
+taxa <- tnrs_match_names(turducken_spp, context="Animals")
+tr <- tol_induced_subtree(taxa$ott_id)
+plot(tr)
+
diff --git a/inst/doc/how-to-use-rotl.Rmd b/inst/doc/how-to-use-rotl.Rmd
new file mode 100644
index 0000000..77c9d88
--- /dev/null
+++ b/inst/doc/how-to-use-rotl.Rmd
@@ -0,0 +1,283 @@
+---
+title: "How to use rotl?"
+author: "François Michonneau"
+date: "`r Sys.Date()`"
+output:
+  rmarkdown::html_vignette:
+    css: vignette.css
+vignette: >
+  %\VignetteIndexEntry{How to use rotl?}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+`rotl` provides an interface to the Open Tree of Life (OTL) API and allows users
+to query the API, retrieve parts of the Tree of Life and integrate these parts
+with other R packages.
+
+The OTL API provides services to access:
+
+* the **Tree of Life** a.k.a. TOL (the synthetic tree): a single draft tree that is
+  a combination of **the OTL taxonomy** and the **source trees** (studies)
+* the **Taxonomic name resolution services** a.k.a. TNRS: the methods for
+  resolving taxonomic names to the internal identifiers used by the TOL and the
+  GOL (the `ott ids`).
+* the **Taxonomy** a.k.a. OTT (for Open Tree Taxonomy): which represents the
+  synthesis of the different taxonomies used as a backbone of the TOL when no
+  studies are available.
+* the **Studies** containing the source trees used to build the TOL, and
+  extracted from the scientific literature.
+
+In `rotl`, each of these services correspond to functions with different
+prefixes:
+
+| Service       | `rotl` prefix |
+|---------------|---------------|
+| Tree of Life  | `tol_`        |
+| TNRS          | `tnrs_`       |
+| Taxonomy      | `taxonomy_`   |
+| Studies       | `studies_`    |
+
+`rotl` also provides a few other functions and methods that can be used to
+extract relevant information from the objects returned by these functions.
+
+
+## Demonstration of a basic workflow
+
+The most common use for `rotl` is probably to start from a list of species and
+get the relevant parts of the tree for these species. This is a two step
+process:
+
+1. the species names need to be matched to their `ott_id` (the Open Tree
+	Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)
+1. these `ott_id` will then be used to retrieve the relevant parts of the Tree
+   of Life.
+
+### Step 1: Matching taxonomy to the `ott_id`
+
+Let's start by doing a search on a diverse group of taxa: a tree frog (genus
+_Hyla_), a fish (genus _Salmo_), a sea urchin (genus _Diadema_), and a nautilus
+(genus _Nautilus_).
+
+```{r}
+library(rotl)
+taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
+resolved_names <- tnrs_match_names(taxa)
+```
+
+It's always a good idea to check that the resolved names match what you
+intended:
+
+`r knitr::kable(resolved_names)`
+
+The column `unique_name` sometimes indicates the higher taxonomic level
+associated with the name. The column `number_matches` indicates the number of
+`ott_id` that corresponds to a given name. In this example, our search on
+_Diadema_ returns 2 matches, and the one returned by default is indeed the sea
+urchin that we want for our query. The argument `context_name` allows you to
+limit the taxonomic scope of your search. _Diadema_ is also the genus name of a
+fungus. To ensure that our search is limited to animal names, we could do:
+
+```{r}
+resolved_names <- tnrs_match_names(taxa, context_name = "Animals")
+```
+
+If you are trying to build a tree with deeply divergent taxa that the argument
+`context_name` cannot fix, see "How to change the ott ids assigned to my taxa?"
+in the FAQ below.
+
+
+### Step 2: Getting the tree corresponding to our taxa
+
+Now that we have the correct `ott_id` for our taxa, we can ask for the tree
+using the `tol_induced_subtree()` function. By default, the object returned by
+`tol_induced_subtree` is a phylo object (from the
+[ape](https://cran.r-project.org/package=ape) package), so we can plot it
+directly.
+
+```{r, fig.width=7, fig.height=4}
+my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
+plot(my_tree, no.margin=TRUE)
+```
+
+
+## FAQ
+
+### How to change the ott ids assigned to my taxa?
+
+If you realize that `tnrs_match_names` assigns the incorrect taxonomic group to
+your name (e.g., because of synonymy) and changing the `context_name` does not
+help, you can use the function `inspect`. This function takes the object
+resulting from `tnrs_match_names()`, and either the row number, the taxon name
+(you used in your search in lowercase), or the `ott_id` returned by the initial
+query.
+
+To illustrate this, let's re-use the previous query but this time pretending that
+we are interested in the fungus _Diadema_ and not the sea urchin:
+
+```{r}
+taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
+resolved_names <- tnrs_match_names(taxa)
+resolved_names
+inspect(resolved_names, taxon_name = "diadema")
+```
+
+In our case, we want the second row in this data frame to replace the
+information that initially matched for _Diadema_. We can now use the `update()`
+function, to change to the correct taxa (the fungus not the sea urchin):
+
+```{r}
+resolved_names <- update(resolved_names, taxon_name = "diadema",
+                         new_row_number = 2)
+
+## we could also have used the ott_id to replace this taxon:
+## resolved_names <- update(resolved_names, taxon_name = "diadema",
+##                          new_ott_id = 4930522)
+```
+
+And now our `resolved_names` data frame includes the taxon we want:
+
+`r knitr::kable(resolved_names)`
+
+### How do I know that the taxa I'm asking for is the correct one?
+
+The function `taxonomy_taxon_info()` takes `ott_ids` as arguments and returns
+taxonomic information about the taxa. This output can be passed to some helpers
+functions to extract the relevant information. Let's illustrate this with our
+_Diadema_ example
+
+```{r}
+diadema_info <- taxonomy_taxon_info(631176)
+tax_rank(diadema_info)
+synonyms(diadema_info)
+tax_name(diadema_info)
+```
+
+In some cases, it might also be useful to investigate the taxonomic tree
+descending from an `ott_id` to check that it's the correct taxon and to
+determine the species included in the Open Tree Taxonomy:
+
+```{r}
+diadema_tax_tree <- taxonomy_subtree(631176)
+diadema_tax_tree
+```
+
+By default, this function return all taxa (including self, and internal)
+descending from this `ott_id` but it also possible to return `phylo` object.
+
+### How do I get the tree for a particular taxonomic group?
+
+If you are looking to get the tree for a particular taxonomic group, you need to
+first identify it by its node id or ott id, and then use the `tol_subtree()`
+function:
+
+```{r, fig.width=7, fig.height=4}
+mono_id <- tnrs_match_names("Monotremata")
+mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
+plot(mono_tree)
+```
+
+
+### How do I find trees from studies focused on my favourite taxa?
+
+The function `studies_find_trees()` allows the user to search for studies
+matching a specific criteria. The function `studies_properties()` returns the
+list of properties that can be used in the search.
+
+```{r}
+furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
+furry_ids <- furry_studies$study_ids
+```
+
+Now that we know the `study_id`, we can ask for the meta data information
+associated with this study:
+
+```{r}
+furry_meta <- get_study_meta("pg_2550")
+get_publication(furry_meta)     ## The citation for the source of the study
+get_tree_ids(furry_meta)        ## This study has 10 trees associated with it
+candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
+```
+
+Using `get_study("pg_2550")` would returns a `multiPhylo` object (default) with
+all the trees associated with this particular study, while
+`get_study_tree("pg_2550", "tree5513")` would return one of these trees.
+
+### The tree returned by the API has duplicated tip labels, how can I work around it?
+
+You may encounter the following error message:
+
+```
+Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
+angulata) has already been encountered in this tree. Duplication of taxa in a
+tree is prohibited.
+```
+
+This message occurs as duplicate labels are not allowed in the NEXUS format and
+it is stricly enforced by the part of the code used by `rotl` to import the
+trees in memory.
+
+If you use a version of `rotl` more recent than 0.4.1, this should not happen by
+default for the function `get_study_tree`. If it happens with another function,
+please [let us know](https://github.com/ropensci/rotl/issues).
+
+The easiest way to work around this is to save the tree in a file, and use APE
+to read it in memory:
+
+```{r, eval=FALSE}
+get_study_tree(study_id="pg_710", tree_id="tree1277",
+               tip_label='ott_taxon_name', file = "/tmp/tree.tre",
+               file_format = "newick")
+tr <- ape::read.tree(file = "/tmp/tree.tre")
+```
+
+### How do I get the higher taxonomy for a given taxa?
+
+If you encounter a taxon name you are not familiar with, it might be useful to
+obtain its higher taxonomy to see where it fits in the tree of life. We can
+combine several taxonomy methods to extract this information easily.
+
+```{r}
+giant_squid <- tnrs_match_names("Architeuthis")
+tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
+```
+
+### Why are OTT IDs discovered with `rotl` missing from an induced subtree?
+
+Some taxonomic names that can be retrieved through the taxonomic name
+resolution service are not part of the Open Tree's synthesis tree. These are
+usually traditional higher-level taxa that have been found to be paraphyletic.
+
+For instance, if you wanted to fetch a tree relating the three birds that go
+into a [Turkducken](https://en.wikipedia.org/wiki/Turducken) you might search
+for the turkey, duck, and chicken genera:
+
+```{r}
+turducken <- c("Meleagris", "Anas", "Gallus")
+taxa <- tnrs_match_names(turducken, context="Animals")
+taxa
+```
+So, we have IDs for each genus but those messages in the `flag` column look
+concerning. `BARREN` means these taxa have no descendants and
+`MAJOR_RANK_CONFLICT` refers to inconsitancies between the rank of these taxa
+and some of their relatives. For thtese reasons, _Meleagris_ and _Anas_ are not
+included in the synthetic tree.
+
+If we tried to press on ahead with the IDs that we have, we'd get an
+error (because there are too few good IDs) or a tree with fewer tips that we had
+anticipated.
+
+```{r, eval=FALSE}
+tr <- tol_induced_subtree(taxa$ott_id)
+```
+
+The best way to avoid these problems is to specify complete species names
+(species being the lowest level of classification in the Open Tree taxonomy they
+are guaranteed to be monophyletic):
+
+```{r, fig.width=7, fig.height=4}
+turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus")
+taxa <- tnrs_match_names(turducken_spp, context="Animals")
+tr <- tol_induced_subtree(taxa$ott_id)
+plot(tr)
+```
diff --git a/inst/doc/how-to-use-rotl.html b/inst/doc/how-to-use-rotl.html
new file mode 100644
index 0000000..e5dce8d
--- /dev/null
+++ b/inst/doc/how-to-use-rotl.html
@@ -0,0 +1,446 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="François Michonneau" />
+
+<meta name="date" content="2016-09-19" />
+
+<title>How to use rotl?</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+  margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">How to use rotl?</h1>
+<h4 class="author"><em>François Michonneau</em></h4>
+<h4 class="date"><em>2016-09-19</em></h4>
+
+
+
+<p><code>rotl</code> provides an interface to the Open Tree of Life (OTL) API and allows users to query the API, retrieve parts of the Tree of Life and integrate these parts with other R packages.</p>
+<p>The OTL API provides services to access:</p>
+<ul>
+<li>the <strong>Tree of Life</strong> a.k.a. TOL (the synthetic tree): a single draft tree that is a combination of <strong>the OTL taxonomy</strong> and the <strong>source trees</strong> (studies)</li>
+<li>the <strong>Taxonomic name resolution services</strong> a.k.a. TNRS: the methods for resolving taxonomic names to the internal identifiers used by the TOL and the GOL (the <code>ott ids</code>).</li>
+<li>the <strong>Taxonomy</strong> a.k.a. OTT (for Open Tree Taxonomy): which represents the synthesis of the different taxonomies used as a backbone of the TOL when no studies are available.</li>
+<li>the <strong>Studies</strong> containing the source trees used to build the TOL, and extracted from the scientific literature.</li>
+</ul>
+<p>In <code>rotl</code>, each of these services correspond to functions with different prefixes:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Service</th>
+<th align="left"><code>rotl</code> prefix</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Tree of Life</td>
+<td align="left"><code>tol_</code></td>
+</tr>
+<tr class="even">
+<td align="left">TNRS</td>
+<td align="left"><code>tnrs_</code></td>
+</tr>
+<tr class="odd">
+<td align="left">Taxonomy</td>
+<td align="left"><code>taxonomy_</code></td>
+</tr>
+<tr class="even">
+<td align="left">Studies</td>
+<td align="left"><code>studies_</code></td>
+</tr>
+</tbody>
+</table>
+<p><code>rotl</code> also provides a few other functions and methods that can be used to extract relevant information from the objects returned by these functions.</p>
+<div id="demonstration-of-a-basic-workflow" class="section level2">
+<h2>Demonstration of a basic workflow</h2>
+<p>The most common use for <code>rotl</code> is probably to start from a list of species and get the relevant parts of the tree for these species. This is a two step process:</p>
+<ol style="list-style-type: decimal">
+<li>the species names need to be matched to their <code>ott_id</code> (the Open Tree Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)</li>
+<li>these <code>ott_id</code> will then be used to retrieve the relevant parts of the Tree of Life.</li>
+</ol>
+<div id="step-1-matching-taxonomy-to-the-ott_id" class="section level3">
+<h3>Step 1: Matching taxonomy to the <code>ott_id</code></h3>
+<p>Let’s start by doing a search on a diverse group of taxa: a tree frog (genus <em>Hyla</em>), a fish (genus <em>Salmo</em>), a sea urchin (genus <em>Diadema</em>), and a nautilus (genus <em>Nautilus</em>).</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(rotl)
+taxa <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"Hyla"</span>, <span class="st">"Salmo"</span>, <span class="st">"Diadema"</span>, <span class="st">"Nautilus"</span>)
+resolved_names <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(taxa)</code></pre></div>
+<p>It’s always a good idea to check that the resolved names match what you intended:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">search_string</th>
+<th align="left">unique_name</th>
+<th align="left">approximate_match</th>
+<th align="left">ott_id</th>
+<th align="left">is_synonym</th>
+<th align="left">flags</th>
+<th align="left">number_matches</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">hyla</td>
+<td align="left">Hyla</td>
+<td align="left">FALSE</td>
+<td align="left">1062216</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="even">
+<td align="left">salmo</td>
+<td align="left">Salmo</td>
+<td align="left">FALSE</td>
+<td align="left">982359</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="odd">
+<td align="left">diadema</td>
+<td align="left">Diadema (genus in Holozoa)</td>
+<td align="left">FALSE</td>
+<td align="left">631176</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">3</td>
+</tr>
+<tr class="even">
+<td align="left">nautilus</td>
+<td align="left">Nautilus</td>
+<td align="left">FALSE</td>
+<td align="left">616358</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+</tbody>
+</table>
+<p>The column <code>unique_name</code> sometimes indicates the higher taxonomic level associated with the name. The column <code>number_matches</code> indicates the number of <code>ott_id</code> that corresponds to a given name. In this example, our search on <em>Diadema</em> returns 2 matches, and the one returned by default is indeed the sea urchin that we want for our query. The argument <code>context_name</code> allows you to limit the taxonomic scope of your search. <em>Diadema</em> [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resolved_names <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(taxa, <span class="dt">context_name =</span> <span class="st">"Animals"</span>)</code></pre></div>
+<p>If you are trying to build a tree with deeply divergent taxa that the argument <code>context_name</code> cannot fix, see “How to change the ott ids assigned to my taxa?” in the FAQ below.</p>
+</div>
+<div id="step-2-getting-the-tree-corresponding-to-our-taxa" class="section level3">
+<h3>Step 2: Getting the tree corresponding to our taxa</h3>
+<p>Now that we have the correct <code>ott_id</code> for our taxa, we can ask for the tree using the <code>tol_induced_subtree()</code> function. By default, the object returned by <code>tol_induced_subtree</code> is a phylo object (from the <a href="https://cran.r-project.org/package=ape">ape</a> package), so we can plot it directly.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">my_tree <-<span class="st"> </span><span class="kw">tol_induced_subtree</span>(<span class="dt">ott_ids =</span> resolved_names$ott_id)
+<span class="kw">plot</span>(my_tree, <span class="dt">no.margin=</span><span class="ot">TRUE</span>)</code></pre></div>
+<p><img src=" [...]
+</div>
+</div>
+<div id="faq" class="section level2">
+<h2>FAQ</h2>
+<div id="how-to-change-the-ott-ids-assigned-to-my-taxa" class="section level3">
+<h3>How to change the ott ids assigned to my taxa?</h3>
+<p>If you realize that <code>tnrs_match_names</code> assigns the incorrect taxonomic group to your name (e.g., because of synonymy) and changing the <code>context_name</code> does not help, you can use the function <code>inspect</code>. This function takes the object resulting from <code>tnrs_match_names()</code>, and either the row number, the taxon name (you used in your search in lowercase), or the <code>ott_id</code> returned by the initial query.</p>
+<p>To illustrate this, let’s re-use the previous query but this time pretending that we are interested in the fungus <em>Diadema</em> and not the sea urchin:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">taxa <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"Hyla"</span>, <span class="st">"Salmo"</span>, <span class="st">"Diadema"</span>, <span class="st">"Nautilus"</span>)
+resolved_names <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(taxa)
+resolved_names</code></pre></div>
+<pre><code>##   search_string                unique_name approximate_match  ott_id
+## 1          hyla                       Hyla             FALSE 1062216
+## 2         salmo                      Salmo             FALSE  982359
+## 3       diadema Diadema (genus in Holozoa)             FALSE  631176
+## 4      nautilus                   Nautilus             FALSE  616358
+##   is_synonym flags number_matches
+## 1      FALSE                    1
+## 2      FALSE                    1
+## 3      FALSE                    3
+## 4      FALSE                    1</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">inspect</span>(resolved_names, <span class="dt">taxon_name =</span> <span class="st">"diadema"</span>)</code></pre></div>
+<pre><code>##   search_string                    unique_name approximate_match  ott_id
+## 1       diadema     Diadema (genus in Holozoa)             FALSE  631176
+## 2       diadema Diadema (genus in Nucletmycea)             FALSE 4930522
+## 3       diadema                     Hypolimnas             FALSE  643831
+##   is_synonym flags number_matches
+## 1      FALSE                    3
+## 2      FALSE                    3
+## 3       TRUE                    3</code></pre>
+<p>In our case, we want the second row in this data frame to replace the information that initially matched for <em>Diadema</em>. We can now use the <code>update()</code> function, to change to the correct taxa (the fungus not the sea urchin):</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">resolved_names <-<span class="st"> </span><span class="kw">update</span>(resolved_names, <span class="dt">taxon_name =</span> <span class="st">"diadema"</span>,
+                         <span class="dt">new_row_number =</span> <span class="dv">2</span>)
+
+## we could also have used the ott_id to replace this taxon:
+## resolved_names <- update(resolved_names, taxon_name = "diadema",
+##                          new_ott_id = 4930522)</code></pre></div>
+<p>And now our <code>resolved_names</code> data frame includes the taxon we want:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">search_string</th>
+<th align="left">unique_name</th>
+<th align="left">approximate_match</th>
+<th align="left">ott_id</th>
+<th align="left">is_synonym</th>
+<th align="left">flags</th>
+<th align="left">number_matches</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">hyla</td>
+<td align="left">Hyla</td>
+<td align="left">FALSE</td>
+<td align="left">1062216</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="even">
+<td align="left">salmo</td>
+<td align="left">Salmo</td>
+<td align="left">FALSE</td>
+<td align="left">982359</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+<tr class="odd">
+<td align="left">diadema</td>
+<td align="left">Diadema (genus in Nucletmycea)</td>
+<td align="left">FALSE</td>
+<td align="left">4930522</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">3</td>
+</tr>
+<tr class="even">
+<td align="left">nautilus</td>
+<td align="left">Nautilus</td>
+<td align="left">FALSE</td>
+<td align="left">616358</td>
+<td align="left">FALSE</td>
+<td align="left"></td>
+<td align="left">1</td>
+</tr>
+</tbody>
+</table>
+</div>
+<div id="how-do-i-know-that-the-taxa-im-asking-for-is-the-correct-one" class="section level3">
+<h3>How do I know that the taxa I’m asking for is the correct one?</h3>
+<p>The function <code>taxonomy_taxon_info()</code> takes <code>ott_ids</code> as arguments and returns taxonomic information about the taxa. This output can be passed to some helpers functions to extract the relevant information. Let’s illustrate this with our <em>Diadema</em> example</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">diadema_info <-<span class="st"> </span><span class="kw">taxonomy_taxon_info</span>(<span class="dv">631176</span>)
+<span class="kw">tax_rank</span>(diadema_info)</code></pre></div>
+<pre><code>## $`Diadema (genus in Holozoa)`
+## [1] "genus"
+## 
+## attr(,"class")
+## [1] "otl_rank" "list"</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">synonyms</span>(diadema_info)</code></pre></div>
+<pre><code>## $`Diadema (genus in Holozoa)`
+## [1] "Diamema"                "Centrechinus (Diadema)"
+## [3] "Cidaris (Diadema)"      "Centrechinus"          
+## 
+## attr(,"class")
+## [1] "otl_synonyms" "list"</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">tax_name</span>(diadema_info)</code></pre></div>
+<pre><code>## $`Diadema (genus in Holozoa)`
+## [1] "Diadema"
+## 
+## attr(,"class")
+## [1] "otl_name" "list"</code></pre>
+<p>In some cases, it might also be useful to investigate the taxonomic tree descending from an <code>ott_id</code> to check that it’s the correct taxon and to determine the species included in the Open Tree Taxonomy:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">diadema_tax_tree <-<span class="st"> </span><span class="kw">taxonomy_subtree</span>(<span class="dv">631176</span>)
+diadema_tax_tree</code></pre></div>
+<pre><code>## $tip_label
+##  [1] "Diadema_principeana_ott5725746"          
+##  [2] "Diadema_vetus_ott5725747"                
+##  [3] "Diadema_sp._CS-2014_ott5502179"          
+##  [4] "Diadema_ascensionis_ott4950423"          
+##  [5] "Diadema_africanum_ott4147369"            
+##  [6] "Diadema_antillarum_antillarum_ott4147370"
+##  [7] "Diadema_antillarum_scensionis_ott220009" 
+##  [8] "Diadema_palmeri_ott836860"               
+##  [9] "Diadema_sp._DSM6_ott771059"              
+## [10] "Diadema_mexicanum_ott639130"             
+## [11] "Diadema_setosum_ott631175"               
+## [12] "Diadema_sp._SETO15_ott587479"            
+## [13] "Diadema_sp._seto17_ott587478"            
+## [14] "Diadema_sp._DSM7_ott587487"              
+## [15] "Diadema_sp._DSM8_ott587486"              
+## [16] "Diadema_sp._seto9_ott587485"             
+## [17] "Diadema_sp._seto10_ott587484"            
+## [18] "Diadema_sp._DSM2_ott587483"              
+## [19] "Diadema_sp._DSM3_ott587482"              
+## [20] "Diadema_sp._DSM4_ott587481"              
+## [21] "Diadema_sp._dsm5_ott587480"              
+## [22] "Diadema_savignyi_ott395692"              
+## [23] "Diadema_paucispinum_ott312263"           
+## [24] "Diadema_sp._seto16_ott312262"            
+## [25] "Diadema_sp._DSM1_ott219999"              
+## [26] "Diadema_sp._DJN9_ott66626"               
+## [27] "Diadema_sp._seto19_ott66624"             
+## [28] "Diadema_sp._seto38_ott66625"             
+## [29] "Diadema_sp._seto18_ott66623"             
+## [30] "Diadema_sp._seto35_ott66618"             
+## 
+## $edge_label
+## [1] "Diadema_antillarum_ott1022356" "Diadema_ott631176"</code></pre>
+<p>By default, this function return all taxa (including self, and internal) descending from this <code>ott_id</code> but it also possible to return <code>phylo</code> object.</p>
+</div>
+<div id="how-do-i-get-the-tree-for-a-particular-taxonomic-group" class="section level3">
+<h3>How do I get the tree for a particular taxonomic group?</h3>
+<p>If you are looking to get the tree for a particular taxonomic group, you need to first identify it by its node id or ott id, and then use the <code>tol_subtree()</code> function:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">mono_id <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(<span class="st">"Monotremata"</span>)
+mono_tree <-<span class="st"> </span><span class="kw">tol_subtree</span>(<span class="dt">ott_id =</span> <span class="kw">ott_id</span>(mono_id))
+<span class="kw">plot</span>(mono_tree)</code></pre></div>
+<p><img src=" [...]
+</div>
+<div id="how-do-i-find-trees-from-studies-focused-on-my-favourite-taxa" class="section level3">
+<h3>How do I find trees from studies focused on my favourite taxa?</h3>
+<p>The function <code>studies_find_trees()</code> allows the user to search for studies matching a specific criteria. The function <code>studies_properties()</code> returns the list of properties that can be used in the search.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">furry_studies <-<span class="st"> </span><span class="kw">studies_find_studies</span>(<span class="dt">property=</span><span class="st">"ot:focalCladeOTTTaxonName"</span>, <span class="dt">value=</span><span class="st">"Mammalia"</span>)
+furry_ids <-<span class="st"> </span>furry_studies$study_ids</code></pre></div>
+<p>Now that we know the <code>study_id</code>, we can ask for the meta data information associated with this study:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">furry_meta <-<span class="st"> </span><span class="kw">get_study_meta</span>(<span class="st">"pg_2550"</span>)
+<span class="kw">get_publication</span>(furry_meta)     ## The citation for the source of the study</code></pre></div>
+<pre><code>## [1] "O'Leary, Maureen A., Marc Allard, Michael J. Novacek, Jin Meng, and John Gatesy. 2004. \"Building the mammalian sector of the tree of life: Combining different data and a discussion of divergence times for placental mammals.\" In: Cracraft J., & Donoghue M., eds. Assembling the Tree of Life. pp. 490-516. Oxford, United Kingdom, Oxford University Press."
+## attr(,"DOI")
+## [1] ""</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">get_tree_ids</span>(furry_meta)        ## This study has 10 trees associated with it</code></pre></div>
+<pre><code>##  [1] "tree5513" "tree5515" "tree5516" "tree5517" "tree5518" "tree5519"
+##  [7] "tree5520" "tree5521" "tree5522" "tree5523"</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">candidate_for_synth</span>(furry_meta) ## None of these trees are yet included in the OTL</code></pre></div>
+<pre><code>## NULL</code></pre>
+<p>Using <code>get_study("pg_2550")</code> would returns a <code>multiPhylo</code> object (default) with all the trees associated with this particular study, while <code>get_study_tree("pg_2550", "tree5513")</code> would return one of these trees.</p>
+</div>
+<div id="the-tree-returned-by-the-api-has-duplicated-tip-labels-how-can-i-work-around-it" class="section level3">
+<h3>The tree returned by the API has duplicated tip labels, how can I work around it?</h3>
+<p>You may encounter the following error message:</p>
+<pre><code>Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
+angulata) has already been encountered in this tree. Duplication of taxa in a
+tree is prohibited.</code></pre>
+<p>This message occurs as duplicate labels are not allowed in the NEXUS format and it is stricly enforced by the part of the code used by <code>rotl</code> to import the trees in memory.</p>
+<p>If you use a version of <code>rotl</code> more recent than 0.4.1, this should not happen by default for the function <code>get_study_tree</code>. If it happens with another function, please <a href="https://github.com/ropensci/rotl/issues">let us know</a>.</p>
+<p>The easiest way to work around this is to save the tree in a file, and use APE to read it in memory:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">get_study_tree</span>(<span class="dt">study_id=</span><span class="st">"pg_710"</span>, <span class="dt">tree_id=</span><span class="st">"tree1277"</span>,
+               <span class="dt">tip_label=</span><span class="st">'ott_taxon_name'</span>, <span class="dt">file =</span> <span class="st">"/tmp/tree.tre"</span>,
+               <span class="dt">file_format =</span> <span class="st">"newick"</span>)
+tr <-<span class="st"> </span>ape::<span class="kw">read.tree</span>(<span class="dt">file =</span> <span class="st">"/tmp/tree.tre"</span>)</code></pre></div>
+</div>
+<div id="how-do-i-get-the-higher-taxonomy-for-a-given-taxa" class="section level3">
+<h3>How do I get the higher taxonomy for a given taxa?</h3>
+<p>If you encounter a taxon name you are not familiar with, it might be useful to obtain its higher taxonomy to see where it fits in the tree of life. We can combine several taxonomy methods to extract this information easily.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">giant_squid <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(<span class="st">"Architeuthis"</span>)
+<span class="kw">tax_lineage</span>(<span class="kw">taxonomy_taxon_info</span>(<span class="kw">ott_id</span>(giant_squid), <span class="dt">include_lineage =</span> <span class="ot">TRUE</span>))</code></pre></div>
+<pre><code>## $`5295401`
+##          rank               name        unique_name  ott_id
+## 1      family     Architeuthidae     Architeuthidae  564393
+## 2    suborder          Oegopsina          Oegopsina   43352
+## 3       order           Teuthida           Teuthida  380472
+## 4  superorder     Decapodiformes     Decapodiformes  854107
+## 5  infraclass       Neocoleoidea       Neocoleoidea  329546
+## 6    subclass          Coleoidea          Coleoidea    7371
+## 7       class        Cephalopoda        Cephalopoda    7368
+## 8      phylum           Mollusca           Mollusca  802117
+## 9     no rank     Lophotrochozoa     Lophotrochozoa  155737
+## 10    no rank        Protostomia        Protostomia  189832
+## 11    no rank          Bilateria          Bilateria  117569
+## 12    no rank          Eumetazoa          Eumetazoa  641038
+## 13    kingdom            Metazoa            Metazoa  691846
+## 14    no rank            Holozoa            Holozoa 5246131
+## 15    no rank       Opisthokonta       Opisthokonta  332573
+## 16     domain          Eukaryota          Eukaryota  304358
+## 17    no rank cellular organisms cellular organisms   93302
+## 18    no rank               life               life  805080</code></pre>
+</div>
+<div id="why-are-ott-ids-discovered-with-rotl-missing-from-an-induced-subtree" class="section level3">
+<h3>Why are OTT IDs discovered with <code>rotl</code> missing from an induced subtree?</h3>
+<p>Some taxonomic names that can be retrieved through the taxonomic name resolution service are not part of the Open Tree’s synthesis tree. These are usually traditional higher-level taxa that have been found to be paraphyletic.</p>
+<p>For instance, if you wanted to fetch a tree relating the three birds that go into a <a href="https://en.wikipedia.org/wiki/Turducken">Turkducken</a> you might search for the turkey, duck, and chicken genera:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">turducken <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"Meleagris"</span>, <span class="st">"Anas"</span>, <span class="st">"Gallus"</span>)
+taxa <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(turducken, <span class="dt">context=</span><span class="st">"Animals"</span>)
+taxa</code></pre></div>
+<pre><code>##   search_string unique_name approximate_match ott_id is_synonym flags
+## 1     meleagris   Meleagris             FALSE 446481      FALSE      
+## 2          anas        Anas             FALSE 765185      FALSE      
+## 3        gallus      Gallus             FALSE 153562      FALSE      
+##   number_matches
+## 1              2
+## 2              1
+## 3              4</code></pre>
+<p>So, we have IDs for each genus but those messages in the <code>flag</code> column look concerning. <code>BARREN</code> means these taxa have no descendants and <code>MAJOR_RANK_CONFLICT</code> refers to inconsitancies between the rank of these taxa and some of their relatives. For thtese reasons, <em>Meleagris</em> and <em>Anas</em> are not included in the synthetic tree.</p>
+<p>If we tried to press on ahead with the IDs that we have, we’d get an error (because there are too few good IDs) or a tree with fewer tips that we had anticipated.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr <-<span class="st"> </span><span class="kw">tol_induced_subtree</span>(taxa$ott_id)</code></pre></div>
+<p>The best way to avoid these problems is to specify complete species names (species being the lowest level of classification in the Open Tree taxonomy they are guaranteed to be monophyletic):</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">turducken_spp <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"Meleagris gallopavo"</span>, <span class="st">"Anas platyrhynchos"</span>, <span class="st">"Gallus gallus"</span>)
+taxa <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(turducken_spp, <span class="dt">context=</span><span class="st">"Animals"</span>)
+tr <-<span class="st"> </span><span class="kw">tol_induced_subtree</span>(taxa$ott_id)
+<span class="kw">plot</span>(tr)</code></pre></div>
+<p><img src=" [...]
+</div>
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>
diff --git a/inst/doc/meta-analysis.R b/inst/doc/meta-analysis.R
new file mode 100644
index 0000000..9433e7a
--- /dev/null
+++ b/inst/doc/meta-analysis.R
@@ -0,0 +1,66 @@
+## ----egg_data, cache=TRUE------------------------------------------------
+library(rotl)
+library(fulltext)
+
+if (require(readxl)) {
+    doi <- "10.1111/jeb.12282"
+    xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
+    egg_data <- read_excel(xl_file)
+} else {
+    egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
+}
+head(egg_data)
+
+## ----eggs_in_a_funnel, fig.width=6, fig.height=3-------------------------
+plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
+     ylab="Effect size (Zr)",
+     xlab="Precision (1/SE)",
+     main="Effect sizes for sex bias in egg size among 51 brid species" )
+
+## ---- clean_eggs---------------------------------------------------------
+egg_data <- as.data.frame(egg_data)
+egg_data$animal <- tolower(egg_data$animal)
+
+## ---- birds, cache=TRUE--------------------------------------------------
+taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
+head(taxa)
+
+## ----bird_map------------------------------------------------------------
+taxon_map <- structure(taxa$search_string, names=taxa$unique_name)
+
+## ----odd_duck------------------------------------------------------------
+taxon_map["Anser caerulescens"]
+
+## ----birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'------
+tr <- tol_induced_subtree(taxa$ott_id)
+plot(tr, show.tip.label=FALSE)
+
+## ----tip_lab-------------------------------------------------------------
+tr$tip.label[1:4]
+
+## ----clean_tips----------------------------------------------------------
+otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
+tr$tip.label <- taxon_map[ otl_tips ]
+
+## ----remove_nodes--------------------------------------------------------
+tr$node.label <- NULL
+
+## ----model---------------------------------------------------------------
+library(MCMCglmm, quiet=TRUE)
+set.seed(123)
+
+pr<-list(R=list(V=1,nu=0.002),
+             G=list(G1=list(V=1,nu=0.002))
+)
+
+model <- MCMCglmm(Zr~1,random=~animal,
+                       pedigree=tr,
+                       mev=egg_data$VZr,
+                       prior=pr,
+                       data=egg_data,
+                       verbose=FALSE)
+
+## ----PhyH----------------------------------------------------------------
+var_comps <- colMeans(model$VCV )
+var_comps["animal"] / sum(var_comps)
+
diff --git a/inst/doc/meta-analysis.Rmd b/inst/doc/meta-analysis.Rmd
new file mode 100644
index 0000000..0ff201a
--- /dev/null
+++ b/inst/doc/meta-analysis.Rmd
@@ -0,0 +1,231 @@
+---
+title: "Using the Open Tree synthesis in a comparative analysis"
+author: "David Winter"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Using the Open Tree synthesis in a comparative analysis}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+## Phylogenetic Comparative Methods
+
+The development of phylogenetic comparative methods has made phylogenies and
+important source of data in fields as diverse as ecology, genomic and medicine.
+Comparative  methods can be used to investigate patterns in the evolution of
+traits or the diversification of lineages. In other cases a phylogeny is treated
+as a "nuisance parameter", allowing with the autocorrelation created by the shared
+evolutionary history of the different species included to be controlled for.
+
+In many cases finding a tree that relates the species for which trait data are
+available is a rate-limiting step in such comparative analyses. Here we show
+how the synthetic tree provided by Open Tree of Life (and made available in R via
+`rotl`) can help to fill this gap.
+
+## A phylogenetic meta-analysis
+
+To demonstrate the use of `rotl` in a comparative analysis, we will partially
+reproduce the results of [Rutkowska _et al_ 2014](dx.doi.org/10.1111/jeb.12282).
+Very briefly, this study is a meta-analysis summarising the results of multiple
+studies testing for systematic differences in the size of eggs which contain
+male and female offspring. Such a difference might mean that birds invest more
+heavily in one sex than the other.
+
+Because this study involves data from 51 different species, Rutkowska _et al_
+used a phylogenetic comparative approach to account for the shared evolutionary
+history among some of the studied-species.
+
+### Gather the data
+
+If we are going to reproduce this analysis, we will first need to gather the
+data. Thankfully, the data is available as supplementary material from the
+publisher's website. We can collect the data from using `fulltext` (with the
+papers DOI as input) and read it into memory with `gdata`:
+
+```{r egg_data, cache=TRUE}
+library(rotl)
+library(fulltext)
+
+if (require(readxl)) {
+    doi <- "10.1111/jeb.12282"
+    xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
+    egg_data <- read_excel(xl_file)
+} else {
+    egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
+}
+head(egg_data)
+```
+
+The most important variable in this dataset is `Zr`, which is a [normalized
+effect size](https://en.wikipedia.org/wiki/Fisher_transformation) for difference
+in size between eggs that contain males and females. Values close to zero come
+from studies that found the sex of an egg's inhabitant had little effect in its size,
+while large positive or negative values correspond to studies with substantial
+sex biases (towards males and females respectively). Since this is a
+meta-analysis we should produce the classic [funnel plot](https://en.wikipedia.org/wiki/Funnel_plot)
+with effects-size on the y-axis and precision (the inverse of the sample
+standard error) on the x-axis. Here we calculate precision from the sample
+variance (`Vzr`):
+
+```{r eggs_in_a_funnel, fig.width=6, fig.height=3}
+plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
+     ylab="Effect size (Zr)",
+     xlab="Precision (1/SE)",
+     main="Effect sizes for sex bias in egg size among 51 brid species" )
+```
+
+In order to use this data later on we need to first convert it to a standard
+`data.frame`. We can also convert the `animal` column (the species names) to
+lower case which will make it easier to match names later on:
+
+```{r, clean_eggs}
+egg_data <- as.data.frame(egg_data)
+egg_data$animal <- tolower(egg_data$animal)
+```
+### Find the species in OTT
+
+We can use the OTL synthesis tree to relate these species. To do so we first need to
+find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the
+Taxonomic Name Resolution Service function `tnrs_match_names`:
+
+```{r, birds, cache=TRUE}
+taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
+head(taxa)
+```
+
+All of these species are in OTT, but a few of them go by different names in the
+Open Tree than we have in our data set. Because the tree `rotl` fetches
+will have Open Tree names, we need to create a named vector that maps the names
+we have for each species to the names Open Tree uses for them:
+
+
+```{r bird_map}
+taxon_map <- structure(taxa$search_string, names=taxa$unique_name)
+```
+
+Now we can use this map to retrieve "data set names" from "OTT names":
+
+
+```{r odd_duck}
+taxon_map["Anser caerulescens"]
+```
+
+### Get a tree
+
+Now we can get the tree. There are really too many tips here to show nicely, so
+we will leave them out of this plot
+
+```{r birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'}
+tr <- tol_induced_subtree(taxa$ott_id)
+plot(tr, show.tip.label=FALSE)
+```
+
+There are a few things to note here. First, the tree has not branch lengths.
+At present this is true for the whole of the Open Tree synthetic tree. Some
+comparative methods require either branch lengths or an ultrametric tree. Before
+you can use one of those methods you will need to get a tree with branch
+lengths. You could try looking for published trees made available by the Open
+Tree with `studies_find_trees`. Alternatively, you could estimate branch lengths
+from the toplogy of a phylogeny returned by `tol_induced_subtree`, perhaps by
+downloading DNA sequences from the NCBI with `rentrez` or "hanging" the tree on
+nodes of known-age using  penalized likelihood method in `ape::chronos`.
+In this case, we will use only the topology of the tree as input to our
+comparative analysis, so we can skip these steps.
+
+Second, the tip labels contain OTT IDs, which means they will not perfectly
+match the species names in our dataset or the taxon map that we created earlier:
+
+
+```{r tip_lab}
+tr$tip.label[1:4]
+```
+
+Finally, the tree contains node labels for those nodes that match a higher taxonomic
+group, and empty character vectors (`""`) for all other nodes. Some
+comparative methods either do no expect node labels at all, or require all
+labeled nodes to have a unique name (meaning multiple "empty" labels will cause
+and error).
+
+We can deal with all these details easily. `rotl` provides  the convenience
+function `strip_ott_ids` to remove the extra information from the tip labels.
+With the IDs removed, we can use our taxon map to replace the tip labels in the tree
+with the species names from dataset.
+
+
+
+```{r clean_tips}
+otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
+tr$tip.label <- taxon_map[ otl_tips ]
+```
+
+Finally, we can remove the node labels by setting the `node.label` attribute of
+the tree to `NULL`.
+
+```{r remove_nodes}
+tr$node.label <- NULL
+```
+
+### Perform the meta-analysis
+
+
+Now we have data and a tree, and we know the names in the tree match the ones in
+the data. It's time to do the comparative analysis. Rutkowska _et al_. used `MCMCglmm`, a
+Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis,
+and we will do the same. Of course, to properly analyse these data you would
+take some care in deciding on the appropriate priors to use and inspect the
+results carefully. In this case, we are really interested in using this as a
+demonstration, so we will just run a simple model.
+
+Specifically we sill fit a model where the only variable that might explain the
+values of `Zr` is the random factor `animal`, which corresponds to the
+phylogenetic relationships among species. We also provide `Zvr` as the measurement
+error variance, effectively adding extra weight to the results of more powerful
+studies. Here's how we specify and fit that model with `MCMCglmm`:
+
+
+```{r model}
+library(MCMCglmm, quiet=TRUE)
+set.seed(123)
+
+pr<-list(R=list(V=1,nu=0.002),
+             G=list(G1=list(V=1,nu=0.002))
+)
+
+model <- MCMCglmm(Zr~1,random=~animal,
+                       pedigree=tr,
+                       mev=egg_data$VZr,
+                       prior=pr,
+                       data=egg_data,
+                       verbose=FALSE)
+```
+
+
+Now that we have a result we can find out how much phylogenetic signal exists
+for sex-biased differences in egg-size. In a multi-level model we can use variance
+components to look at this, specifically the proportion of the total variance
+that can be explained by phylogeny is called the phylogenetic reliability, _H_. Let's
+calculate the _H_ for this model:
+
+
+```{r PhyH}
+var_comps <- colMeans(model$VCV )
+var_comps["animal"] / sum(var_comps)
+```
+
+It appears there is almost no phylogenetic signal to the data.
+The relationships among species explain much less that one percent of the total
+variance in the data. If you were wondering,  Rutkowska _et al_. report a similar result,
+even after adding more predictors to their model most of the variance in `Zr`
+was left unexplained.
+
+## What other comparative methods can I use in R?
+
+Here we have demonstrated just one comparative analysis that you might do in R.
+There are an ever-growing number of packages that allow an ever-growing number
+of analysis to performed in R. Some "classics" like ancestral state
+reconstruction,  phylogenetic independent contrasts and lineage through time plots
+are implemented in `ape`. Packages like `phytools`, `caper` and `diversitree`
+provide extensions to these methods.  The [CRAN Phylogenetics Taskview](https://cran.r-project.org/web/views/Phylogenetics.html)
+gives a good idea of the diversity of packages and analyses that can be
+completed in R.
diff --git a/inst/doc/meta-analysis.html b/inst/doc/meta-analysis.html
new file mode 100644
index 0000000..e2c1bf1
--- /dev/null
+++ b/inst/doc/meta-analysis.html
@@ -0,0 +1,216 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="David Winter" />
+
+<meta name="date" content="2016-09-19" />
+
+<title>Using the Open Tree synthesis in a comparative analysis</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+  margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Using the Open Tree synthesis in a comparative analysis</h1>
+<h4 class="author"><em>David Winter</em></h4>
+<h4 class="date"><em>2016-09-19</em></h4>
+
+
+
+<div id="phylogenetic-comparative-methods" class="section level2">
+<h2>Phylogenetic Comparative Methods</h2>
+<p>The development of phylogenetic comparative methods has made phylogenies and important source of data in fields as diverse as ecology, genomic and medicine. Comparative methods can be used to investigate patterns in the evolution of traits or the diversification of lineages. In other cases a phylogeny is treated as a “nuisance parameter”, allowing with the autocorrelation created by the shared evolutionary history of the different species included to be controlled for.</p>
+<p>In many cases finding a tree that relates the species for which trait data are available is a rate-limiting step in such comparative analyses. Here we show how the synthetic tree provided by Open Tree of Life (and made available in R via <code>rotl</code>) can help to fill this gap.</p>
+</div>
+<div id="a-phylogenetic-meta-analysis" class="section level2">
+<h2>A phylogenetic meta-analysis</h2>
+<p>To demonstrate the use of <code>rotl</code> in a comparative analysis, we will partially reproduce the results of <a href="dx.doi.org/10.1111/jeb.12282">Rutkowska <em>et al</em> 2014</a>. Very briefly, this study is a meta-analysis summarising the results of multiple studies testing for systematic differences in the size of eggs which contain male and female offspring. Such a difference might mean that birds invest more heavily in one sex than the other.</p>
+<p>Because this study involves data from 51 different species, Rutkowska <em>et al</em> used a phylogenetic comparative approach to account for the shared evolutionary history among some of the studied-species.</p>
+<div id="gather-the-data" class="section level3">
+<h3>Gather the data</h3>
+<p>If we are going to reproduce this analysis, we will first need to gather the data. Thankfully, the data is available as supplementary material from the publisher’s website. We can collect the data from using <code>fulltext</code> (with the papers DOI as input) and read it into memory with <code>gdata</code>:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(rotl)
+<span class="kw">library</span>(fulltext)
+
+if (<span class="kw">require</span>(readxl)) {
+    doi <-<span class="st"> "10.1111/jeb.12282"</span>
+    xl_file <-<span class="st"> </span><span class="kw">ft_get_si</span>(doi, <span class="dv">1</span>, <span class="dt">save.name=</span><span class="st">"egg.xls"</span>)
+    egg_data <-<span class="st"> </span><span class="kw">read_excel</span>(xl_file)
+} else {
+    egg_data <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="kw">system.file</span>(<span class="st">"extdata"</span>, <span class="st">"egg.csv"</span>, <span class="dt">package =</span> <span class="st">"rotl"</span>))
+}</code></pre></div>
+<pre><code>## Loading required package: readxl</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(egg_data)</code></pre></div>
+<pre><code>## # A tibble: 6 × 14
+##                   animal                   Spp       Lndim Measure Neggs
+##                    <chr>                 <chr>       <dbl>   <chr> <dbl>
+## 1 Zonotrichia_leucophrys White-crowned sparrow 0.000000000  volume   294
+## 2      Passer_domesticus         House sparrow 0.009407469  volume   149
+## 3        Serinus_canaria                Canary 0.000000000  volume    52
+## 4          Turdus_merula    European blackbird 0.021189299  volume    82
+## 5    Agelaius_phoeniceus  Red-winged blackbird 0.218316086  volume   394
+## 6    Quiscalus_mexicanus  Great-tailed grackle 0.281894985    mass   822
+## # ... with 9 more variables: Nclutches <dbl>, ESr <dbl>, Type <chr>,
+## #   StudyID <chr>, Year <dbl>, D <dbl>, EN <dbl>, Zr <dbl>, VZr <dbl></code></pre>
+<p>The most important variable in this dataset is <code>Zr</code>, which is a <a href="https://en.wikipedia.org/wiki/Fisher_transformation">normalized effect size</a> for difference in size between eggs that contain males and females. Values close to zero come from studies that found the sex of an egg’s inhabitant had little effect in its size, while large positive or negative values correspond to studies with substantial sex biases (towards males and females respectively). Since this is [...]
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot</span>(<span class="dv">1</span>/<span class="kw">sqrt</span>(egg_data$VZr), egg_data$Zr, <span class="dt">pch=</span><span class="dv">16</span>,
+     <span class="dt">ylab=</span><span class="st">"Effect size (Zr)"</span>,
+     <span class="dt">xlab=</span><span class="st">"Precision (1/SE)"</span>,
+     <span class="dt">main=</span><span class="st">"Effect sizes for sex bias in egg size among 51 brid species"</span> )</code></pre></div>
+<p><img src=" [...]
+<p>In order to use this data later on we need to first convert it to a standard <code>data.frame</code>. We can also convert the <code>animal</code> column (the species names) to lower case which will make it easier to match names later on:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">egg_data <-<span class="st"> </span><span class="kw">as.data.frame</span>(egg_data)
+egg_data$animal <-<span class="st"> </span><span class="kw">tolower</span>(egg_data$animal)</code></pre></div>
+</div>
+<div id="find-the-species-in-ott" class="section level3">
+<h3>Find the species in OTT</h3>
+<p>We can use the OTL synthesis tree to relate these species. To do so we first need to find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the Taxonomic Name Resolution Service function <code>tnrs_match_names</code>:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">taxa <-<span class="st"> </span><span class="kw">tnrs_match_names</span>(<span class="kw">unique</span>(egg_data$animal), <span class="dt">context=</span><span class="st">"Animals"</span>)
+<span class="kw">head</span>(taxa)</code></pre></div>
+<pre><code>##            search_string            unique_name approximate_match ott_id
+## 1 zonotrichia_leucophrys Zonotrichia leucophrys              TRUE 265553
+## 2      passer_domesticus      Passer domesticus              TRUE 745175
+## 3        serinus_canaria        Serinus canaria              TRUE 464865
+## 4          turdus_merula          Turdus merula              TRUE 568572
+## 5    agelaius_phoeniceus    Agelaius phoeniceus              TRUE 226605
+## 6    quiscalus_mexicanus    Quiscalus mexicanus              TRUE 743411
+##   is_synonym          flags number_matches
+## 1      FALSE                             1
+## 2      FALSE                             1
+## 3      FALSE SIBLING_HIGHER              2
+## 4      FALSE                             1
+## 5      FALSE                             2
+## 6      FALSE                             1</code></pre>
+<p>All of these species are in OTT, but a few of them go by different names in the Open Tree than we have in our data set. Because the tree <code>rotl</code> fetches will have Open Tree names, we need to create a named vector that maps the names we have for each species to the names Open Tree uses for them:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">taxon_map <-<span class="st"> </span><span class="kw">structure</span>(taxa$search_string, <span class="dt">names=</span>taxa$unique_name)</code></pre></div>
+<p>Now we can use this map to retrieve “data set names” from “OTT names”:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">taxon_map[<span class="st">"Anser caerulescens"</span>]</code></pre></div>
+<pre><code>##  Anser caerulescens 
+## "chen_caerulescens"</code></pre>
+</div>
+<div id="get-a-tree" class="section level3">
+<h3>Get a tree</h3>
+<p>Now we can get the tree. There are really too many tips here to show nicely, so we will leave them out of this plot</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr <-<span class="st"> </span><span class="kw">tol_induced_subtree</span>(taxa$ott_id)
+<span class="kw">plot</span>(tr, <span class="dt">show.tip.label=</span><span class="ot">FALSE</span>)</code></pre></div>
+<p><img src=" [...]
+<p>There are a few things to note here. First, the tree has not branch lengths. At present this is true for the whole of the Open Tree synthetic tree. Some comparative methods require either branch lengths or an ultrametric tree. Before you can use one of those methods you will need to get a tree with branch lengths. You could try looking for published trees made available by the Open Tree with <code>studies_find_trees</code>. Alternatively, you could estimate branch lengths from the top [...]
+<p>Second, the tip labels contain OTT IDs, which means they will not perfectly match the species names in our dataset or the taxon map that we created earlier:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr$tip.label[<span class="dv">1</span>:<span class="dv">4</span>]</code></pre></div>
+<pre><code>## [1] "Sturnus_vulgaris_ott565813"    "Sturnus_unicolor_ott366470"   
+## [3] "Luscinia_svecica_ott274225"    "Ficedula_albicollis_ott107840"</code></pre>
+<p>Finally, the tree contains node labels for those nodes that match a higher taxonomic group, and empty character vectors (<code>""</code>) for all other nodes. Some comparative methods either do no expect node labels at all, or require all labeled nodes to have a unique name (meaning multiple “empty” labels will cause and error).</p>
+<p>We can deal with all these details easily. <code>rotl</code> provides the convenience function <code>strip_ott_ids</code> to remove the extra information from the tip labels. With the IDs removed, we can use our taxon map to replace the tip labels in the tree with the species names from dataset.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">otl_tips <-<span class="st"> </span><span class="kw">strip_ott_ids</span>(tr$tip.label, <span class="dt">remove_underscores=</span><span class="ot">TRUE</span>)
+tr$tip.label <-<span class="st"> </span>taxon_map[ otl_tips ]</code></pre></div>
+<p>Finally, we can remove the node labels by setting the <code>node.label</code> attribute of the tree to <code>NULL</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tr$node.label <-<span class="st"> </span><span class="ot">NULL</span></code></pre></div>
+</div>
+<div id="perform-the-meta-analysis" class="section level3">
+<h3>Perform the meta-analysis</h3>
+<p>Now we have data and a tree, and we know the names in the tree match the ones in the data. It’s time to do the comparative analysis. Rutkowska <em>et al</em>. used <code>MCMCglmm</code>, a Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis, and we will do the same. Of course, to properly analyse these data you would take some care in deciding on the appropriate priors to use and inspect the results carefully. In this case, we are really interested in u [...]
+<p>Specifically we sill fit a model where the only variable that might explain the values of <code>Zr</code> is the random factor <code>animal</code>, which corresponds to the phylogenetic relationships among species. We also provide <code>Zvr</code> as the measurement error variance, effectively adding extra weight to the results of more powerful studies. Here’s how we specify and fit that model with <code>MCMCglmm</code>:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(MCMCglmm, <span class="dt">quiet=</span><span class="ot">TRUE</span>)</code></pre></div>
+<pre><code>## 
+## Attaching package: 'ape'</code></pre>
+<pre><code>## The following object is masked from 'package:phylobase':
+## 
+##     edges</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">set.seed</span>(<span class="dv">123</span>)
+
+pr<-<span class="kw">list</span>(<span class="dt">R=</span><span class="kw">list</span>(<span class="dt">V=</span><span class="dv">1</span>,<span class="dt">nu=</span><span class="fl">0.002</span>),
+             <span class="dt">G=</span><span class="kw">list</span>(<span class="dt">G1=</span><span class="kw">list</span>(<span class="dt">V=</span><span class="dv">1</span>,<span class="dt">nu=</span><span class="fl">0.002</span>))
+)
+
+model <-<span class="st"> </span><span class="kw">MCMCglmm</span>(Zr~<span class="dv">1</span>,<span class="dt">random=</span>~animal,
+                       <span class="dt">pedigree=</span>tr,
+                       <span class="dt">mev=</span>egg_data$VZr,
+                       <span class="dt">prior=</span>pr,
+                       <span class="dt">data=</span>egg_data,
+                       <span class="dt">verbose=</span><span class="ot">FALSE</span>)</code></pre></div>
+<pre><code>## Warning in inverseA(pedigree = pedigree, scale = scale, nodes = nodes): no
+## branch lengths: compute.brlen from ape has been used</code></pre>
+<p>Now that we have a result we can find out how much phylogenetic signal exists for sex-biased differences in egg-size. In a multi-level model we can use variance components to look at this, specifically the proportion of the total variance that can be explained by phylogeny is called the phylogenetic reliability, <em>H</em>. Let’s calculate the <em>H</em> for this model:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">var_comps <-<span class="st"> </span><span class="kw">colMeans</span>(model$VCV )
+var_comps[<span class="st">"animal"</span>] /<span class="st"> </span><span class="kw">sum</span>(var_comps)</code></pre></div>
+<pre><code>##      animal 
+## 0.002809591</code></pre>
+<p>It appears there is almost no phylogenetic signal to the data. The relationships among species explain much less that one percent of the total variance in the data. If you were wondering, Rutkowska <em>et al</em>. report a similar result, even after adding more predictors to their model most of the variance in <code>Zr</code> was left unexplained.</p>
+</div>
+</div>
+<div id="what-other-comparative-methods-can-i-use-in-r" class="section level2">
+<h2>What other comparative methods can I use in R?</h2>
+<p>Here we have demonstrated just one comparative analysis that you might do in R. There are an ever-growing number of packages that allow an ever-growing number of analysis to performed in R. Some “classics” like ancestral state reconstruction, phylogenetic independent contrasts and lineage through time plots are implemented in <code>ape</code>. Packages like <code>phytools</code>, <code>caper</code> and <code>diversitree</code> provide extensions to these methods. The <a href="https:// [...]
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>
diff --git a/inst/extdata/egg.csv b/inst/extdata/egg.csv
new file mode 100644
index 0000000..8a99c2b
--- /dev/null
+++ b/inst/extdata/egg.csv
@@ -0,0 +1,66 @@
+animal,Spp,Lndim,Measure,Neggs,Nclutches,ESr,Type,StudyID,Year,D,EN,Zr,VZr
+Zonotrichia_leucophrys,White-crowned sparrow,0,volume,294,73,0.140045943,stat,Mead1987,1987,3.421917808,85.91673339,0.140972438,0.012060292
+Passer_domesticus,House sparrow,0.009407469,volume,149,31,0.11175203,stat,Cordero2000,2000,4.04516129,36.83413078,0.112220753,0.029555954
+Serinus_canaria,Canary,0,volume,52,21,0.4967914,stat,Leitner2006,2006,2.180952381,23.84279476,0.545037117,0.047978211
+Turdus_merula,European blackbird,0.021189299,volume,82,54,0.3859854,stat,Martyka2010,2010,1.414814815,57.95811518,0.40707397,0.018195675
+Agelaius_phoeniceus,Red-winged blackbird,0.218316086,volume,394,106,0.07410136,raw,Weatherhead1985,1985,3.173584906,124.1498216,0.074237439,0.008254242
+Quiscalus_mexicanus,Great-tailed grackle,0.281894985,mass,822,205,0.051788336,raw,Teather1989,1989,3.407804878,241.2109934,0.05183471,0.004197959
+Taeniopygia_guttata,Zebra finch,-0.010812869,mass,116,24,-0.05636213,stat,Rutkowska2005,2005,4.066666667,28.52459016,-0.056421926,0.039177906
+Taeniopygia_guttata,Zebra finch,-0.010812869,mass,90,20,0,stat,Rutkowska2002,2002,3.8,23.68421053,0,0.048346056
+Vanellus_vanellus,Northern lapwing,-0.029825984,volume,114,32,0.03014961,stat,Lislevand2005,2005,3.05,37.37704918,0.03015875,0.029089175
+Philomachus_pugnax,Ruff,0.22184875,volume,120,30,0.03462025,stat,Thuman2003,2003,3.4,35.29411765,0.034634091,0.030965392
+Luscinia_svecica,Bluethroat,0,volume,102,18,-0.1468127,stat,Lifjeld2005,2005,4.733333333,21.54929577,-0.147881353,0.053910402
+Sturnus_unicolor,Spotless starling,0.025305865,mass,153,34,-0.133824538,stat,Cordero2001,2001,3.8,40.26315789,-0.134632122,0.026836158
+Branta_canadensis,Canada goose,0.061028185,mass,242,44,-0.006674089,stat,Leblanc1987,1987,4.6,52.60869565,-0.006674188,0.020157756
+Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,132,33,-0.1910071,stat,Martinez-Padilla2007,2007,3.4,38.82352941,-0.193382195,0.027914614
+Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,108,28,0.2165249,stat,Blanco2003,2003,3.285714286,32.86956522,0.220007175,0.033478894
+Larus_michahellis,Yellow-legged gull,0.091409863,mass,1068,669,-0.06718163,stat,Rubolini2009,2009,1.477130045,723.0236794,-0.067282976,0.001388843
+Cincloramphus_cruralis,Brown songlark,0.363356588,mass,44,17,0.1439293,stat,Isaksson2010,2010,2.270588235,19.37823834,0.144935702,0.061056628
+Pavo_cristatus,Peafowl,0.119205592,mass,3313,205,0.005428755,stat,Petrie2001,2001,13.12878049,252.3463625,0.005428808,0.004010486
+Parus_caeruleus,Blue tit,0.010299957,mass,192,21,0.07261821,stat,Cichon2003,2003,7.514285714,25.5513308,0.072746264,0.044343281
+Larus_argentatus,Herring gull,0.049687784,mass,79,30,0.1160999,stat,Bogdanova2006,2006,2.306666667,34.24855491,0.116625804,0.03200148
+Larus_fuscus,Lesser black-backed gull,0.064503231,mass,60,22,0.124354,stat,Bogdanova2005,2005,2.381818182,25.19083969,0.125001014,0.045063639
+Corvus_monedula,Jackdaw,0.041392685,mass,226,70,0.2033713,stat,Arnold2003,2003,2.782857143,81.21149897,0.206246808,0.012785844
+Carpodacus_mexicanus,House finch,0.002809678,volume,378,84,-0.075561895,raw,Badyaev2006,2006,3.8,99.47368421,-0.075706199,0.010365521
+Carpodacus_mexicanus,House finch,0,volume,304,64,-0.072529696,raw,Badyaev2006,2006,4,76,-0.072657281,0.01369863
+Sterna_hirundo,Common Tern,0.008600172,volume,427,158,0,raw,Fletcher2004,2004,2.362025316,180.7770632,0,0.005625023
+Lonchura_striata,Bengalese finch,-0.021189299,mass,116,34,0.09239892,stat,Soma2007,2007,2.929411765,39.59839357,0.092663229,0.027323604
+Molothrus_bonariensis,Ring-billed gull,0.078874433,mass,90,30,-0.000377536,stat,Chin2012,2012,2.6,34.61538462,-0.000377536,0.03163017
+Pica_pica,Magpie,0.063358906,volume,43,8,-0.036738337,raw,Slagsvold1992,1992,4.5,9.555555556,-0.036754879,0.152542373
+Corvus_corone,Hooded crow,0.049218023,volume,103,31,0.073794034,raw,Slagsvold1992,1992,2.858064516,36.03837472,0.073928423,0.030267833
+Taeniopygia_guttata,Zebra finch,-0.010812869,mass,133,22,-0.03994883,stat,Pariser2012,2012,5.036363636,26.40794224,-0.039970102,0.042720543
+Delichon_urbicum,House martin,0,mass,90,35,0.03076685,stat,Gil2006,2006,2.257142857,39.87341772,0.030776563,0.027119808
+Xanthocephalus_xanthocephalus,Yellow-headed blackbird,0.247321812,mass,90,23,-0.088294337,raw,Richter1983,1983,3.330434783,27.02349869,-0.088524861,0.04162591
+Larus_delawarensis,Ring-billed gull,0.06069784,mass,110,37,0.053696485,raw,Meathrel1987,1987,2.578378378,42.66247379,0.053748182,0.025212749
+Cincloramphus_cruralis,Brown songlark,0.363356588,volume,95,40,-0.3293106,stat,Magrath2003,2003,2.1,45.23809524,-0.342054801,0.02367531
+Larus_ridibundus,Black-headed gull,0,mass,60,20,0.1391671,stat,Groothuis2006,2006,2.6,23.07692308,0.140076126,0.049808429
+Anseranas_semipalmata,Magpie goose,0.125672077,mass,60,16,-0.110406595,raw,Whitehead1990,1990,3.2,18.75,-0.11085851,0.063492063
+Sterna_hirundo,Common Tern,0,volume,108,40,0.073872636,raw,Gonzalez-Solis2005,2005,2.36,45.76271186,0.074007456,0.023384859
+Falco_tinnunculus,Eurasian kestrel,-0.080479586,mass,80,16,0.097815848,raw,Wu2010,2010,4.2,19.04761905,0.098129617,0.06231454
+Larus_delawarensis,Shiny cowbird,0.035472318,volume,90,90,-0.07294093,stat,Tuero2012,2012,1,90,-0.073070702,0.011494253
+Pygoscelis_antarcticus,Chinstrap penguin,0.058509856,volume,264,132,0.03094903,stat,Fargallo2006,2006,1.8,146.6666667,0.030958917,0.006960557
+Phoebastria_irrorata,Waved albatross,0.091157684,volume,224,224,-0.088469783,stat,Awkerman2007,2007,1,224,-0.088701688,0.004524887
+Passer_domesticus,House sparrow,0.009407469,volume,77,19,0.05470045,stat,Wetzel2012,2012,3.442105263,22.37003058,0.054755105,0.051626145
+Sula_nebouxii,Blue-footed booby,-0.147287056,mass,76,56,-0.2611557,stat,D'Alba2007,2007,1.285714286,59.11111111,-0.267348297,0.017821782
+Stercorarius_parasiticus,Parasitic jaeger,-0.063477845,volume,534,267,0.1397023,stat,Janssen2006,2006,1.8,296.6666667,0.140621937,0.003405221
+Ficedula_albicollis,Collared flycatcher,0.013679697,volume,1162,198,0.04804496,stat,Bowers2013,2013,4.894949495,237.3875361,0.048081979,0.004266438
+Sterna_dougallii,Roseate tern,0,mass,440,146,-0.062868359,raw,Szczys2005,2005,2.610958904,168.5204617,-0.062951384,0.006041549
+Centrocercus_urophasianus,Greater sage-grouse,0.261995252,volume,146,20,0.06614164,stat,Atamian2010,2010,6.04,24.17218543,0.066238344,0.04723178
+Phasianus_colchicus,Ring-necked pheasant,0.140492874,mass,106,15,0.4531655,stat,Rubolini2007,2007,5.853333333,18.10933941,0.488676681,0.06618423
+Taeniopygia_guttata,Zebra finch,-0.010812869,mass,43,15,0,raw,Clotfelter1996,1996,2.493333333,17.2459893,0,0.070195195
+Larus_fuscus,Lesser black-backed gull,-0.010812869,volume,304,101,-0.00990453,raw,Bradbury1999,1999,2.607920792,116.5679575,-0.009904854,0.008805301
+Sturnus_vulgaris,European starling,0.01616166,mass,354,69,0.04996099,stat,Love2011,2011,4.304347826,82.24242424,0.050002622,0.012619503
+Erythrura_gouldiae,Gouldian finch,0,volume,1473,324,-0.019559388,stat,Pryke2009,2009,3.837037037,383.8899614,-0.019561883,0.00262543
+Larus_ridibundus,Black-headed gull,0,volume,147,49,0.05453834,stat,Lezalova2005,2005,2.6,56.53846154,0.05459251,0.018678161
+Aythya_ferina,Common pochard,0,volume,185,26,0.1957707,stat,Lezalova2013a,2014,5.892307692,31.39686684,0.198330883,0.035215153
+Aythya_fuligula,Tufted duck,0.026629385,volume,46,7,-0.04923335,stat,Lezalova2013b,2014,5.457142857,8.429319372,-0.049273187,0.184185149
+Aythya_affinis,Lesser scaup,0.031791834,volume,38,5,-0.143657133,raw,Dawson1996,1996,6.28,6.050955414,-0.144657788,0.32776618
+Chen_caerulescens,Lesser snow goose,0.037500891,mass,85,22,-0.089455106,raw,Ankney1980,1980,3.290909091,25.82872928,-0.089694871,0.043804453
+Falco_sparverius,American kestrels,-0.036212173,volume,170,34,0.093153037,raw,Anderson1997,1997,4.2,40.47619048,0.093423893,0.026683609
+Columba_livia,Domestic pigeon,0,mass,63,16,0.3301501,stat,Pike2005,2005,3.35,18.80597015,0.342996707,0.063267233
+Zonotrichia_leucophrys,White-crowned sparrow,0,mass,38,11,0.06348392,stat,Bonier2007,2007,2.963636364,12.82208589,0.063569411,0.101811368
+Quiscalus_major,Boat-tailed grackle,0.268544242,mass,122,41,-0.056059589,raw,Bancroft1984,1984,2.580487805,47.2778828,-0.056118426,0.022584639
+Cuculus_canorus,Common cuckoo,0.042879996,volume,71,38,0.1527316,stat,Fossoy2012,2012,1.694736842,41.89440994,0.153936091,0.025710636
+Eudyptes_chrysocome,Rockhopper penguin,0.037301411,mass,213,194,0.1191936,stat,Poisbleau2010,2010,1.078350515,197.5239006,0.119762927,0.005140756
+Larus_michahellis,Yellow-legged gull,0.091409863,volume,124,48,-0.003408011,stat,Perez2006,2006,2.266666667,54.70588235,-0.003408024,0.019340159
+Gallus_gallus,Red Junglefowl,0.080943092,volume,220,71,-0.08467843,stat,Parker2005,2005,2.678873239,82.12407992,-0.084881699,0.012638378
diff --git a/inst/extdata/protist_mutation_rates.csv b/inst/extdata/protist_mutation_rates.csv
new file mode 100755
index 0000000..658df4e
--- /dev/null
+++ b/inst/extdata/protist_mutation_rates.csv
@@ -0,0 +1,7 @@
+species,mu,pop.size,genome.size
+Tetrahymena thermophila,7.61E-012,1.12E+008,1.04E+008
+Paramecium tetraurelia,1.94E-011,1.24E+008,7.20E+007
+Chlamydomonas reinhardtii,2.08E-010,1.00E+008,1.12E+008
+Dictyostelium discoideum,2.9E-011,7.40E+006,3.40E+007
+Saccharomyces cerevisiae,3.3E-010,1.00E+008,1.25E+008
+Saccharomyces pombe,2E-010,1.00E+007,1.25E+008
diff --git a/man/get_study.Rd b/man/get_study.Rd
new file mode 100644
index 0000000..8d625a2
--- /dev/null
+++ b/man/get_study.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies.R
+\name{get_study}
+\alias{get_study}
+\title{Get all the trees associated with a particular study}
+\usage{
+get_study(study_id = NULL, object_format = c("phylo", "nexml"), file_format,
+  file, ...)
+}
+\arguments{
+\item{study_id}{the study ID for the study of interest (character)}
+
+\item{object_format}{the class of the object the query should
+return (either \code{phylo} or \code{nexml}). Ignored if
+\code{file_format} is specified.}
+
+\item{file_format}{the format of the file to be generated
+(\code{newick}, \code{nexus}, \code{nexml} or \code{json}).}
+
+\item{file}{the file name where the output of the function will be
+saved.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+if \code{file_format} is missing, an object of class
+    \code{phylo} or \code{nexml}, otherwise a logical indicating
+    whether the file was successfully created.
+}
+\description{
+Returns the trees associated with a given study
+}
+\details{
+If \code{file_format} is missing, the function returns an object
+of the class \code{phylo} from the \code{\link[ape]{ape}} package
+(default), or an object of the class \code{nexml} from the
+\code{RNeXML} package.
+
+Otherwise \code{file_format} can be either \code{newick},
+\code{nexus}, \code{nexml} or \code{json}, and the function will
+generate a file of the selected format. In this case, a file name
+needs to be provided using the argument \code{file}. If a file
+with the same name already exists, it will be silently
+overwritten.
+}
+\examples{
+\dontrun{
+that_one_study <- get_study(study_id="pg_719", object_format="phylo")
+if (require(RNeXML)) { ## if RNeXML is installed get the object directly
+   nexml_study <- get_study(study_id="pg_719", object_format="nexml")
+} else { ## otherwise write it to a file
+   get_study(study_id="pg_719", file_format="nexml", file=tempfile(fileext=".nexml"))
+}
+}
+}
+\seealso{
+\code{\link{get_study_meta}}
+}
+
diff --git a/man/get_study_meta.Rd b/man/get_study_meta.Rd
new file mode 100644
index 0000000..debb7c6
--- /dev/null
+++ b/man/get_study_meta.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies-methods.R, R/studies.R
+\name{get_tree_ids}
+\alias{candidate_for_synth}
+\alias{candidate_for_synth.study_meta}
+\alias{get_publication}
+\alias{get_publication.study_meta}
+\alias{get_study_meta}
+\alias{get_study_year}
+\alias{get_study_year.study_meta}
+\alias{get_tree_ids}
+\alias{get_tree_ids.study_meta}
+\title{Study Metadata}
+\usage{
+get_tree_ids(sm)
+
+get_publication(sm)
+
+candidate_for_synth(sm)
+
+get_study_year(sm)
+
+\method{get_tree_ids}{study_meta}(sm)
+
+\method{get_publication}{study_meta}(sm)
+
+\method{candidate_for_synth}{study_meta}(sm)
+
+\method{get_study_year}{study_meta}(sm)
+
+get_study_meta(study_id, ...)
+}
+\arguments{
+\item{sm}{an object created by \code{get_study_meta}}
+
+\item{study_id}{the study identifier (character)}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+named-list containing the metadata associated with the
+    study requested
+}
+\description{
+Retrieve metadata about a study in the Open Tree of Life datastore.
+}
+\details{
+\code{get_study_meta} returns a long list of attributes for the
+studies that are contributing to the synthetic tree. To help with
+the extraction of relevant information from this list, several
+helper functions exists: \itemize{
+
+\item {get_tree_ids} { The identifiers of the trees
+  associated with the study }
+
+\item {get_publication} { The citation information of the
+  publication for the study. The DOI (or URL) for the study is
+  available as an attribute to the returned object (i.e.,
+  \code{attr(object, "DOI")} ) }.
+
+\item {candidate_for_synth} { The identifier of the tree(s) from
+  the study used in the synthetic tree. This is a subset of the
+  result of \code{get_tree_ids}.
+
+\item {get_study_year} { The year of publication of the study. }
+
+}
+}
+}
+\examples{
+\dontrun{
+req <- get_study_meta("pg_719")
+get_tree_ids(req)
+candidate_for_synth(req)
+get_publication(req)
+get_study_year(req)
+}
+}
+
diff --git a/man/get_study_subtree.Rd b/man/get_study_subtree.Rd
new file mode 100644
index 0000000..45573d7
--- /dev/null
+++ b/man/get_study_subtree.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies.R
+\name{get_study_subtree}
+\alias{get_study_subtree}
+\title{Study Subtree}
+\usage{
+get_study_subtree(study_id, tree_id, subtree_id, object_format = c("phylo"),
+  file_format, file, ...)
+}
+\arguments{
+\item{study_id}{the study identifier (character)}
+
+\item{tree_id}{the tree identifier (character)}
+
+\item{subtree_id, }{either a node id that specifies a subtree or
+\dQuote{ingroup} which returns the ingroup for this subtree.}
+
+\item{object_format}{the class of the object returned by the
+function (default, and currently only possibility \code{phylo}
+from the \code{\link[ape]{ape}} package)}
+
+\item{file_format}{character, the file format to use to save the
+results of the query (possible values, \sQuote{newick},
+\sQuote{nexus}, \sQuote{json}).}
+
+\item{file}{character, the path and file name where the output
+should be written.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\description{
+Retrieve subtree from a specific tree in the Open Tree of Life data store
+}
+\examples{
+\dontrun{
+small_tr <- get_study_subtree(study_id="pg_1144", tree="tree2324", subtree_id="node552052")
+ingroup  <- get_study_subtree(study_id="pg_1144", tree="tree2324", subtree_id="ingroup")
+nexus_file <- tempfile(fileext=".nex")
+get_study_subtree(study_id="pg_1144", tree="tree2324", subtree_id="ingroup", file=nexus_file,
+                  file_format="nexus")
+}
+}
+
diff --git a/man/get_study_tree.Rd b/man/get_study_tree.Rd
new file mode 100644
index 0000000..8c3dcd5
--- /dev/null
+++ b/man/get_study_tree.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies.R
+\name{get_study_tree}
+\alias{get_study_tree}
+\title{Study Tree}
+\usage{
+get_study_tree(study_id = NULL, tree_id = NULL,
+  object_format = c("phylo"), tip_label = c("original_label", "ott_id",
+  "ott_taxon_name"), file_format, file, deduplicate = TRUE, ...)
+}
+\arguments{
+\item{study_id}{the identifier of a study (character)}
+
+\item{tree_id}{the identifier of a tree within the study}
+
+\item{object_format}{the class of the object to be returned
+(default and currently only possible value \code{phylo} from
+the \code{\link[ape]{ape}} package).}
+
+\item{tip_label}{the format of the tip
+labels. \dQuote{\code{original_label}} (default) returns the
+original labels as provided in the study,
+\dQuote{\code{ott_id}} labels are replaced by their ott IDs,
+\dQuote{\code{ott_taxon_name}} labels are replaced by their
+Open Tree Taxonomy taxon name.}
+
+\item{file_format}{the format of the file to be generated
+(\code{newick} default, \code{nexus}, or \code{json}).}
+
+\item{file}{the file name where the output of the function will be
+saved.}
+
+\item{deduplicate}{logical (default \code{TRUE}). If the tree
+returned by the study contains duplicated taxon names, should they
+be made unique? It is normally illegal for NEXUS/Newick tree
+strings to contain duplicated tip names. This is a workaround to
+circumvent this requirement. If \code{TRUE}, duplicated tip labels
+will be appended \code{_1}, \code{_2}, etc.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+if \code{file_format} is missing, an object of class
+    \code{phylo}, otherwise a logical indicating whether the file
+    was successfully created.
+}
+\description{
+Returns a specific tree from within a study
+}
+\examples{
+\dontrun{
+ tree <- get_study_tree(study_id="pg_1144", tree="tree2324")
+
+ ## comparison of the first few tip labels depending on the options used
+ head(get_study_tree(study_id="pg_1144", tree="tree2324", tip_label="original_label")$tip.label)
+ head(get_study_tree(study_id="pg_1144", tree="tree2324", tip_label="ott_id")$tip.label)
+ head(get_study_tree(study_id="pg_1144", tree="tree2324", tip_label="ott_taxon_name")$tip.label)
+}
+}
+
diff --git a/man/list_trees.Rd b/man/list_trees.Rd
new file mode 100644
index 0000000..822264e
--- /dev/null
+++ b/man/list_trees.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies-methods.R
+\name{list_trees}
+\alias{list_trees}
+\alias{list_trees.matched_studies}
+\title{List trees ids in objects returned by
+\code{\link{studies_find_studies}} and
+\code{\link{studies_find_trees}}.}
+\usage{
+list_trees(matched_studies, ...)
+
+\method{list_trees}{matched_studies}(matched_studies, study_id, ...)
+}
+\arguments{
+\item{matched_studies}{an object created by
+\code{studies_find_trees} or \code{studies_find_studies}.}
+
+\item{...}{Currently unused}
+
+\item{study_id}{a \code{study_id} listed in the object returned by
+\code{studies_find_trees}}
+}
+\value{
+\code{list_trees} returns a list of the tree_ids for each
+    study that match the requested criteria. If a \code{study_id}
+    is provided, then only the trees for this study are returned
+    as a vector.
+}
+\description{
+\code{list_trees} returns all trees associated with a particular
+study when used on an object returned by
+\code{\link{studies_find_studies}}, but only the trees that match
+the search criteria when used on objects returned by
+\code{\link{studies_find_trees}}.
+}
+\seealso{
+\code{\link{studies_find_studies}} and
+    \code{\link{studies_find_trees}}. The help for these functions
+    have examples demonstrating the use of \code{list_trees}.
+}
+
diff --git a/man/match_names-methods.Rd b/man/match_names-methods.Rd
new file mode 100644
index 0000000..576e1f9
--- /dev/null
+++ b/man/match_names-methods.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/match_names.R, R/methods.R
+\name{ott_id.match_names}
+\alias{flags}
+\alias{flags.match_names}
+\alias{ott_id.match_names}
+\title{\code{ott_id} and \code{flags} for taxonomic names matched
+    by \code{tnrs_match_names}}
+\usage{
+\method{ott_id}{match_names}(tax, row_number, taxon_name, ott_id, ...)
+
+\method{flags}{match_names}(tax, row_number, taxon_name, ott_id, ...)
+
+flags(tax, ...)
+}
+\arguments{
+\item{tax}{an object returned by \code{\link{tnrs_match_names}}}
+
+\item{row_number}{the row number corresponding to the name for
+which to list the synonyms}
+
+\item{taxon_name}{the taxon name corresponding to the name for
+which to list the synonyms}
+
+\item{ott_id}{the ott id corresponding to the name for which to
+list the synonyms}
+
+\item{...}{currently ignored}
+}
+\value{
+A list of the ott ids or flags for the taxonomic names
+    matched with \code{\link{tnrs_match_names}}, for either one or
+    all the names.
+}
+\description{
+\code{rotl} provides a collection of functions that allows users
+to extract relevant information from an object generated by
+\code{\link{tnrs_match_names}} function.
+}
+\details{
+These methods optionally accept one of the arguments
+\code{row_number}, \code{taxon_name} or \code{ott_id} to retrieve
+the corresponding information for one of the matches in the object
+returned by the \code{\link{tnrs_match_names}} function.
+
+If these arguments are not provided, these methods can return
+information for the matches currently listed in the object
+returned by \code{\link{tnrs_match_names}}.
+}
+\examples{
+\dontrun{
+  rsp <- tnrs_match_names(c("Diadema", "Tyrannosaurus"))
+  rsp$ott_id    # ott id for match currently in use
+  ott_id(rsp)   # similar as above but elements are named
+
+  ## flags() is useful for instance to determine if a taxon is extinct
+  flags(rsp, taxon_name="Tyrannosaurus")
+}
+}
+
diff --git a/man/match_names.Rd b/man/match_names.Rd
new file mode 100644
index 0000000..500fdb0
--- /dev/null
+++ b/man/match_names.Rd
@@ -0,0 +1,77 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/match_names.R
+\name{inspect.match_names}
+\alias{inspect}
+\alias{inspect.match_names}
+\alias{update.match_names}
+\title{Inspect and Update alternative matches for a name returned
+    by tnrs_match_names}
+\usage{
+\method{inspect}{match_names}(response, row_number, taxon_name, ott_id, ...)
+
+inspect(response, ...)
+
+\method{update}{match_names}(object, row_number, taxon_name, ott_id,
+  new_row_number, new_ott_id, ...)
+}
+\arguments{
+\item{response}{an object generated by the
+\code{\link{tnrs_match_names}} function}
+
+\item{row_number}{the row number corresponding to the name to
+inspect}
+
+\item{taxon_name}{the taxon name corresponding to the name to
+inspect}
+
+\item{ott_id}{the ott id corresponding to the name to inspect}
+
+\item{...}{currently ignored}
+
+\item{object}{an object created by \code{\link{tnrs_match_names}}}
+
+\item{new_row_number}{the row number in the output of
+\code{\link{inspect}} to replace the taxa specified by
+\code{row_number}, \code{taxon_name}, or \code{ott_id}.}
+
+\item{new_ott_id}{the ott id of the taxon to replace the taxa
+specified by \code{row_number}, \code{taxon_name}, or
+\code{ott_id}.}
+}
+\value{
+a data frame
+}
+\description{
+Taxonomic names may have different meanings in different taxonomic
+contexts, as the same genus name can be applied to animals and
+plants for instance. Additionally, the meaning of a taxonomic name
+may have change throughout its history, and may have referred to a
+different taxon in the past. In such cases, a given names might
+have multiple matches in the Open Tree Taxonomy. These functions
+allow users to inspect (and update) alternative meaning of a given
+name and its current taxonomic status according to the Open Tree
+Taxonomy.
+}
+\details{
+To inspect alternative taxonomic meanings of a given name, you
+need to provide the object resulting from a call to the
+tnrs_match_names function, as well as one of either the row number
+corresponding to the name in this object, the name itself (as used
+in the original query), or the ott_id listed for this name.
+
+To update one of the name, you also need to provide the row number
+in which the name to be replaced appear or its ott id.
+}
+\examples{
+  \dontrun{
+   matched_names <- tnrs_match_names(c("holothuria", "diadema", "boletus"))
+   inspect(matched_names, taxon_name="diadema")
+   new_matched_names <- update(matched_names, taxon_name="diadema",
+                               new_ott_id = 631176)
+   new_matched_names
+   }
+}
+\seealso{
+\code{\link{tnrs_match_names}}
+}
+
diff --git a/man/rotl.Rd b/man/rotl.Rd
new file mode 100644
index 0000000..c4f8f9e
--- /dev/null
+++ b/man/rotl.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rotl-package.R
+\docType{package}
+\name{rotl}
+\alias{rotl}
+\alias{rotl-package}
+\title{An Interface to the Open Tree of Life API}
+\description{
+The Open Tree of Life is an NSF funded project that is generating
+an online, comprehensive phylogenetic tree for 1.8 million
+species. \code{rotl} provides an interface that allows you to
+query and retrive the parts of the tree of life that is of
+interest to you.
+}
+\details{
+\code{rotl} provides function to most of the end points the API
+provides. The documentation of the API is available at:
+\url{https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs}
+}
+\section{Customizing API calls}{
+
+
+    All functions that use API end points can take 2 arguments to
+    customize the API call and are passed as \code{...} arguments.
+
+    \itemize{
+
+    \item{ \code{otl_v} } { This argument controls which version
+    of the API your call is using. The default value for this
+    argument is a call to the non-exported function
+    \code{otl_version()} which returns the current version of the
+    Open Tree of Life APIs (v2).}
+
+    \item{ \code{dev_url} } { This argument controls whether to use
+    the development version of the API. By default, \code{dev_url}
+    is set to \code{FALSE}, using \code{dev_url = TRUE} in your
+    function calls will use the development version.}
+
+    }
+
+    For example, to use the development version of the API, you
+    could use: \code{tnrs_match_names("anas", dev_url=TRUE)}
+
+    Additional arguments can also be passed to the
+    \code{\link[httr]{GET}} and \code{\link[httr]{POST}} methods.
+}
+
+\section{Acknowledgments}{
+
+
+    This package was started during the Open Tree of Life
+    \href{http://blog.opentreeoflife.org/2014/06/11/apply-for-tree-for-all-a-hackathon-to-access-opentree-resources/}{Hackathon}
+    organized by OpenTree, the NESCent Hackathon Interoperability
+    Phylogenetic group, and Arbor.
+}
+
diff --git a/man/source_list.Rd b/man/source_list.Rd
new file mode 100644
index 0000000..5a6f5c1
--- /dev/null
+++ b/man/source_list.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods.R, R/tol.R
+\name{source_list}
+\alias{source_list}
+\alias{source_list.tol_summary}
+\title{List of studies used in the Tree of Life}
+\usage{
+source_list(tax, ...)
+
+\method{source_list}{tol_summary}(tax, ...)
+}
+\arguments{
+\item{tax}{a list containing a \code{source_id_map} slot.}
+
+\item{...}{additional arguments (currently unused)}
+}
+\value{
+a data frame
+}
+\description{
+Retrieve the detailed information for the list of studies used in
+the Tree of Life.
+}
+\details{
+This function takes the object resulting from
+    \code{tol_about(study_list = TRUE)}, \code{tol_mrca()},
+    \code{tol_node_info()}, and returns a data frame listing the
+    \code{tree_id}, \code{study_id} and \code{git_sha} for the
+    studies currently included in the Tree of Life.
+}
+
diff --git a/man/strip_ott_ids.Rd b/man/strip_ott_ids.Rd
new file mode 100644
index 0000000..b140af6
--- /dev/null
+++ b/man/strip_ott_ids.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tol.R
+\name{strip_ott_ids}
+\alias{strip_ott_ids}
+\title{Strip OTT ids from tip labels}
+\usage{
+strip_ott_ids(tip_labels, remove_underscores = FALSE)
+}
+\arguments{
+\item{tip_labels}{a character vector containing tip labels (most
+likely the \code{tip.label} element from a tree returned by
+\code{\link{tol_induced_subtree}}}
+
+\item{remove_underscores}{logical (defaults to FALSE). If set to
+TRUE underscores in tip labels are converted to spaces}
+}
+\value{
+A character vector containing the contents of
+    \code{tip_labels} with any OTT ids removed.
+}
+\description{
+Strip OTT ids from tip labels
+}
+\examples{
+\dontrun{
+genera <- c("Perdix", "Dendroica", "Cinclus", "Selasphorus", "Struthio")
+tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 102710))
+tr$tip.label \%in\% genera
+tr$tip.label <- strip_ott_ids(tr$tip.label)
+tr$tip.label \%in\% genera}
+}
+
diff --git a/man/studies_find_studies.Rd b/man/studies_find_studies.Rd
new file mode 100644
index 0000000..e6e8be9
--- /dev/null
+++ b/man/studies_find_studies.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies.R
+\name{studies_find_studies}
+\alias{studies_find_studies}
+\title{Find a Study}
+\usage{
+studies_find_studies(property = NULL, value = NULL, verbose = FALSE,
+  exact = FALSE, detailed = TRUE, ...)
+}
+\arguments{
+\item{property}{The property to be searched on (character)}
+
+\item{value}{The property value to be searched on (character)}
+
+\item{verbose}{Should the output include all metadata (logical
+default \code{FALSE})}
+
+\item{exact}{Should exact matching be used? (logical, default
+\code{FALSE})}
+
+\item{detailed}{If \code{TRUE} (default), the function will return
+a data frame that summarizes information about the study (see
+\sQuote{Value}). Otherwise, it only returns the study
+identifiers.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+If \code{detailed=TRUE}, the function returns a data frame
+    listing the study id (\code{study_ids}), the number of trees
+    associated with this study (\code{n_trees}), the tree ids (at
+    most 5) associated with the studies (\code{tree_ids}), the
+    tree id that is a candidate for the synthetic tree if any
+    (\code{candidate}), the year of publication of the study
+    (\code{study_year}), the title of the publication for the
+    study (\code{title}), and the DOI (Digital Object Identifier)
+    for the study (\code{study_doi}).
+
+    If \code{detailed=FALSE}, the function returns a data frame
+    with a single column containing the study identifiers.
+}
+\description{
+Return the identifiers of studies that match given properties
+}
+\examples{
+\dontrun{
+## To match a study for which the identifier is already known
+one_study <- studies_find_studies(property="ot:studyId", value="pg_719")
+list_trees(one_study)
+
+## To find studies pertaining to Mammals
+mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
+                                value="mammalia")
+## To extract the tree identifiers for each of the studies
+list_trees(mammals)
+## ... or for a given study
+list_trees(mammals, "ot_308")
+
+## Just the identifiers without other information about the studies
+mammals <- studies_find_studies(property="ot:focalCladeOTTTaxonName",
+                                value="mammalia", detailed=FALSE)
+}
+}
+\seealso{
+\code{\link{studies_properties}} which lists properties
+    against which the studies can be
+    searched. \code{\link{list_trees}} that returns a list for all
+    tree ids associated with a study.
+}
+
diff --git a/man/studies_find_trees.Rd b/man/studies_find_trees.Rd
new file mode 100644
index 0000000..8de6e59
--- /dev/null
+++ b/man/studies_find_trees.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies.R
+\name{studies_find_trees}
+\alias{studies_find_trees}
+\title{Find Trees}
+\usage{
+studies_find_trees(property = NULL, value = NULL, verbose = FALSE,
+  exact = FALSE, detailed = TRUE, ...)
+}
+\arguments{
+\item{property}{The property to be searched on (character)}
+
+\item{value}{The property-value to be searched on (character)}
+
+\item{verbose}{Should the output include all metadata? (logical,
+default \code{FALSE})}
+
+\item{exact}{Should exact matching be used for the value?
+(logical, default \code{FALSE})}
+
+\item{detailed}{Should a detailed report be provided? If
+\code{TRUE} (default), the output will include metadata about
+the study that include trees matching the property. Otherwise,
+only information about the trees will be provided.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+A data frame that summarizes the trees found (and their
+    associated studies) for the requested criteria. If a study has
+    more than 5 trees, the \code{tree_ids} of the first ones will
+    be shown, followed by \code{...} to indicate that more are
+    present.
+
+    If \code{detailed=FALSE}, the data frame will include the
+    study ids of the study (\code{study_ids}), the number of trees
+    in this study that match the search criteria
+    (\code{n_matched_trees}), the tree ids that match the search
+    criteria (\code{match_tree_ids}).
+
+    If \code{detailed=TRUE}, in addition of the fields listed
+    above, the data frame will also contain the total number of
+    trees associated with the study (\code{n_trees}), all the tree
+    ids associated with the study (\code{tree_ids}), the tree id
+    that is a potential candidate for inclusion in the synthetic
+    tree (if any) (\code{candidate}), the year the study was
+    published (\code{study_year}), the title of the study
+    (\code{title}), the DOI for the study (\code{study_doi}).
+}
+\description{
+Return a list of studies for which trees match a given set of
+properties
+}
+\details{
+The list of possible values to be used as values for the argument
+\code{property} can be found using the function
+\code{\link{studies_properties}}.
+}
+\examples{
+\dontrun{
+res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
+                          detailed=FALSE)
+## summary of the trees and associated studies that match this criterion
+res
+## With metadata about the studies (default)
+res <- studies_find_trees(property="ot:ottTaxonName", value="Drosophilia",
+                          detailed=TRUE)
+## The list of trees for each study that match the search criteria
+list_trees(res)
+## the trees for a given study
+list_trees(res, study_id = "pg_2769")
+}
+}
+\seealso{
+\code{\link{studies_properties}} which lists properties
+  the studies can be searched on. \code{\link{list_trees}} for
+  listing the trees that match the query.
+}
+
diff --git a/man/studies_properties.Rd b/man/studies_properties.Rd
new file mode 100644
index 0000000..837ead4
--- /dev/null
+++ b/man/studies_properties.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/studies.R
+\name{studies_properties}
+\alias{studies_properties}
+\title{Properties of the Studies}
+\usage{
+studies_properties(...)
+}
+\arguments{
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+A list of the study properties that can be used to find
+    studies and trees that are contributing to the synthetic tree.
+}
+\description{
+Return the list of study properties that can be used to search
+studies and trees used in the synthetic tree.
+}
+\details{
+The list returned has 2 elements \code{tree_properties} and
+\code{studies_properties}. Each of these elements lists additional
+arguments to customize the API request properties that can be used
+to search for trees and studies that are contributing to the
+synthetic tree. The definitions of these properties are available
+from
+\url{https://github.com/OpenTreeOfLife/phylesystem-api/wiki/NexSON}
+}
+\examples{
+\dontrun{
+ all_the_properties <- studies_properties()
+ unlist(all_the_properties$tree_properties)
+}
+}
+\seealso{
+\code{\link{studies_find_trees}}
+}
+
diff --git a/man/study_external_IDs.Rd b/man/study_external_IDs.Rd
new file mode 100644
index 0000000..93933ec
--- /dev/null
+++ b/man/study_external_IDs.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/external_data.R
+\name{study_external_IDs}
+\alias{study_external_IDs}
+\title{Get external identifiers for data associated with an Open Tree study}
+\usage{
+study_external_IDs(study_id)
+}
+\arguments{
+\item{study_id}{An open tree study ID}
+}
+\value{
+A study_external_data object (which inherits from a list) which
+contains some of the following.
+
+doi, character, the DOI for the paper describing this study
+
+external_data_url, character, a URL to an external data repository 
+(e.g. a treebase entry) if one exists.
+
+pubmed_id character, the unique ID for this study in the NCBI's pubmed database
+
+popset_ids character, vector of IDs for the NCBI's popset database
+
+nucleotide_ids character, vector of IDs for the NCBI's nucleotide database
+}
+\description{
+Data associated with studies contributing to the Open Tree synthesis may
+be available from other databases. In particular, trees and alignments 
+may be available from treebase and DNA sequences and bibliographic
+information associated with a given study may be available from the NCBI.
+This function retrieves that information for a given study.
+}
+\examples{
+\dontrun{
+flies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Drosophilidae")
+study_external_IDs(flies[2,]$study_ids)
+}
+}
+\seealso{
+studies_find_studies (used to discover study IDs)
+}
+
diff --git a/man/synonyms.match_names.Rd b/man/synonyms.match_names.Rd
new file mode 100644
index 0000000..5f90b3a
--- /dev/null
+++ b/man/synonyms.match_names.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/match_names.R
+\name{synonyms.match_names}
+\alias{synonyms.match_names}
+\title{List the synonyms for a given name}
+\usage{
+\method{synonyms}{match_names}(tax, row_number, taxon_name, ott_id, ...)
+}
+\arguments{
+\item{tax}{a data frame generated by the
+\code{\link{tnrs_match_names}} function}
+
+\item{row_number}{the row number corresponding to the name for
+which to list the synonyms}
+
+\item{taxon_name}{the taxon name corresponding to the name for
+which to list the synonyms}
+
+\item{ott_id}{the ott id corresponding to the name for which to
+list the synonyms}
+
+\item{...}{currently ignored}
+}
+\value{
+a list whose elements are all synomym names (as vectors of
+    character) for the taxonomic names that match the query (the
+    names of the elements of the list).
+}
+\description{
+When querying the Taxonomic Name Resolution Services for a
+particular taxonomic name, the API returns as possible matches all
+names that include the queried name as a possible synonym. This
+function allows you to explore other synonyms for an accepted
+name, and allows you to determine why the name you queried is
+returning an accepted synonym.
+}
+\details{
+To list synonyms for a given taxonomic name, you need to provide
+the object resulting from a call to the
+\code{\link{tnrs_match_names}} function, as well as one of either
+the row number corresponding to the name in this object, the name
+itself (as used in the original query), or the ott_id listed for
+this name. Otherwise, the synonyms for all the currently matched
+names are returned.
+}
+\examples{
+\dontrun{
+   echino <- tnrs_match_names(c("Diadema", "Acanthaster", "Fromia"))
+   ## These 3 calls are identical
+   synonyms(echino, taxon_name="Acanthaster")
+   synonyms(echino, row_number=2)
+   synonyms(echino, ott_id=337928)
+}
+}
+
diff --git a/man/tax_lineage.Rd b/man/tax_lineage.Rd
new file mode 100644
index 0000000..b6b255c
--- /dev/null
+++ b/man/tax_lineage.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods.R, R/taxonomy.R
+\name{tax_lineage}
+\alias{tax_lineage}
+\alias{tax_lineage.taxon_info}
+\title{Lineage of a taxon}
+\usage{
+tax_lineage(tax, ...)
+
+\method{tax_lineage}{taxon_info}(tax, ...)
+}
+\arguments{
+\item{tax}{an object created by \code{\link{taxonomy_taxon_info}}
+using the argument \code{include_lineage=TRUE}.}
+
+\item{...}{additional arguments (currently unused).}
+}
+\value{
+A list with one slot per taxon that contains a data frame
+    with 3 columns: the taxonomy rank, the name, and unique name
+    for all taxa included in the lineage of the taxon up to the
+    root of the tree.
+}
+\description{
+Extract the lineage information (higher taxonomy) from an object
+returned by \code{\link{taxonomy_taxon_info}}.
+}
+\details{
+The object passed to this function must have been created using
+the argument \code{include_lineage=TRUE}.
+}
+
diff --git a/man/taxon_external_IDs.Rd b/man/taxon_external_IDs.Rd
new file mode 100644
index 0000000..0ec2e8b
--- /dev/null
+++ b/man/taxon_external_IDs.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/external_data.R
+\name{taxon_external_IDs}
+\alias{taxon_external_IDs}
+\title{Get external identifiers for data associated with an Open Tree taxon}
+\usage{
+taxon_external_IDs(taxon_id)
+}
+\arguments{
+\item{taxon_id}{An open tree study ID}
+}
+\value{
+a data.frame in which each row represents a unique record in an
+external databse. The column "source" provides and abbreviated name for the 
+database, and "id" the unique ID for the record.
+}
+\description{
+The Open Tree taxonomy is a synthesis of multiple reference taxonomies. This
+function retrieves identifiers to external taxonomic records that have
+contributed the rank, position and definition of a given Open Tree taxon.
+}
+\examples{
+\dontrun{
+   gibbon_IDs <- taxon_external_IDs(712902) 
+}
+}
+\seealso{
+tnrs_matchnames, which can be used to search for taxa by name.
+
+taxonomy_taxon, for more information about a given taxon.
+}
+
diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd
new file mode 100644
index 0000000..f05da36
--- /dev/null
+++ b/man/taxonomy-methods.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods.R
+\name{tax_rank}
+\alias{is_suppressed}
+\alias{ott_id}
+\alias{synonyms}
+\alias{tax_name}
+\alias{tax_rank}
+\alias{tax_sources}
+\alias{unique_name}
+\title{Methods for Taxonomy}
+\usage{
+tax_rank(tax, ...)
+
+ott_id(tax, ...)
+
+synonyms(tax, ...)
+
+tax_sources(tax, ...)
+
+is_suppressed(tax, ...)
+
+unique_name(tax, ...)
+
+tax_name(tax, ...)
+}
+\arguments{
+\item{tax}{an object returned by \code{\link{taxonomy_taxon_info}},
+\code{\link{taxonomy_mrca}}, or \code{\link{tnrs_match_names}}}
+
+\item{...}{additional arguments (see
+\code{\link{tnrs_match_names}})}
+}
+\description{
+Methods for dealing with objects containing taxonomic information
+(Taxonomy, TNRS endpoints)
+}
+\details{
+This is the page for the generic methods. See the help pages for
+\code{\link{taxonomy_taxon_info}}, \code{\link{taxonomy_mrca}}, and
+\code{\link{tnrs_match_names}} for more information.
+}
+
diff --git a/man/taxonomy_about.Rd b/man/taxonomy_about.Rd
new file mode 100644
index 0000000..88f0a82
--- /dev/null
+++ b/man/taxonomy_about.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/taxonomy.R
+\name{taxonomy_about}
+\alias{taxonomy_about}
+\title{Information about the Open Tree Taxonomy}
+\usage{
+taxonomy_about(...)
+}
+\arguments{
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+A list with the following properties:
+\itemize{
+
+    \item {weburl} {String. The release page for this version
+    of the taxonomy.}
+
+    \item {author} {String. The author string.}
+
+    \item {name} {String. The name of the taxonomy.}
+
+    \item {source} {String. The full identifying information for
+    this version of the taxonomy.}
+
+    \item {version} {String. The version number of the taxonomy.}
+}
+}
+\description{
+Summary information about the Open Tree Taxaonomy (OTT)
+}
+\details{
+Return metadata and information about the taxonomy
+itself. Currently, the available metadata is fairly sparse, but
+includes (at least) the version, and the location from which the
+complete taxonomy source files can be downloaded.
+}
+\examples{
+\dontrun{
+taxonomy_about()
+}
+}
+
diff --git a/man/taxonomy_mrca.Rd b/man/taxonomy_mrca.Rd
new file mode 100644
index 0000000..ca2bcfd
--- /dev/null
+++ b/man/taxonomy_mrca.Rd
@@ -0,0 +1,73 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/taxonomy.R
+\name{taxonomy_mrca}
+\alias{flags.taxon_mrca}
+\alias{is_suppressed.taxon_mrca}
+\alias{ott_id.taxon_mrca}
+\alias{tax_name.taxon_mrca}
+\alias{tax_rank.taxon_mrca}
+\alias{tax_sources.taxon_mrca}
+\alias{taxonomy_mrca}
+\alias{unique_name.taxon_mrca}
+\title{Taxonomic MRCA}
+\usage{
+taxonomy_mrca(ott_ids = NULL, ...)
+
+\method{tax_rank}{taxon_mrca}(tax, ...)
+
+\method{tax_name}{taxon_mrca}(tax, ...)
+
+\method{ott_id}{taxon_mrca}(tax, ...)
+
+\method{unique_name}{taxon_mrca}(tax, ...)
+
+\method{tax_sources}{taxon_mrca}(tax, ...)
+
+\method{flags}{taxon_mrca}(tax, ...)
+
+\method{is_suppressed}{taxon_mrca}(tax, ...)
+}
+\arguments{
+\item{ott_ids}{a vector of ott ids for the taxa whose MRCA is to
+be found (numeric).}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+
+\item{tax}{an object generated by the \code{taxonomy_mrca}
+function}
+}
+\value{
+\itemize{
+
+    \item{\code{taxonomy_mrca}} { returns a list about the
+    taxonomic information relating to the MRCA for the ott_ids
+    provided. }
+
+    \item{\code{tax_rank}} { returns a character vector of the
+    taxonomic rank for the MRCA. }
+
+    \item{\code{tax_name}} { returns a character vector the
+    Open Tree Taxonomy name for the MRCA. }
+
+    \item{\code{ott_id}} { returns a numeric vector of the ott id
+    for the MRCA. }
+
+}
+}
+\description{
+Taxonomic Least Inclusive Common Ancestor (MRCA)
+}
+\details{
+Given a set of OTT ids, get the taxon that is the most recent common
+ancestor (the MRCA) of all the identified taxa.
+}
+\examples{
+\dontrun{
+req <- taxonomy_mrca(ott_ids=c(515698,590452,643717))
+tax_rank(req)
+tax_name(req)
+ott_id(req)
+}
+}
+
diff --git a/man/taxonomy_subtree.Rd b/man/taxonomy_subtree.Rd
new file mode 100644
index 0000000..1be019a
--- /dev/null
+++ b/man/taxonomy_subtree.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/taxonomy.R
+\name{taxonomy_subtree}
+\alias{taxonomy_subtree}
+\title{Taxonomy subtree}
+\usage{
+taxonomy_subtree(ott_id = NULL, output_format = c("taxa", "newick", "phylo",
+  "raw"), label_format = NULL, file, ...)
+}
+\arguments{
+\item{ott_id}{The ott id of the taxon of interest.}
+
+\item{output_format}{the format of the object to be returned. See
+the \sQuote{Return} section.}
+
+\item{label_format}{Character. Defines the label type; one of
+\dQuote{\code{name}}, \dQuote{\code{id}}, or
+ \dQuote{\code{name_and_id}} (the default).}
+
+\item{file}{the file name where to save the output of the
+function. Ignored unless \code{output_format} is set to
+\dQuote{\code{phylo}}.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+If the \code{file} argument is missing: \itemize{
+
+    \item{\dQuote{\code{taxa}}} { a list of the taxa names
+    (species) in slot \code{tip_label}, and higher-level taxanomy
+    (e.g., families, genera) in slot \code{edge_label}, descending
+    from the taxa corresponding to the \code{ott_id} provided. }
+
+    \item{\dQuote{\code{newick}}} { a character vector containing
+    the newick formatted string corresponding to the taxonomic
+    subtree for the \code{ott_id} provided. }
+
+    \item{\dQuote{\code{phylo}}} { an object of the class
+    \code{phylo} from the \code{\link[ape]{ape}} package. }
+
+    \item{\dQuote{\code{raw}}} { the direct output from the API,
+    i.e., a list with an element named \sQuote{newick} that
+    contains the subtree as a newick formatted string. }
+
+    }
+
+    If a \code{file} argument is provided (and
+    \code{output_format} is set to \dQuote{\code{phylo}}), a
+    logical indicating whether the file was successfully created.
+}
+\description{
+Given an ott id, return the inclusive taxonomic subtree descended
+from the specified taxon.
+}
+\details{
+If the output of this function is exported to a file, the only
+possible value for the \code{output_format} argument is
+\dQuote{\code{newick}}. If the file provided already exists, it
+will be silently overwritten.
+}
+\examples{
+\dontrun{
+req <- taxonomy_subtree(ott_id=515698)
+plot(taxonomy_subtree(ott_id=515698, output_format="phylo"))
+}
+}
+
diff --git a/man/taxonomy_taxon_info.Rd b/man/taxonomy_taxon_info.Rd
new file mode 100644
index 0000000..7eeff4a
--- /dev/null
+++ b/man/taxonomy_taxon_info.Rd
@@ -0,0 +1,85 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/taxonomy.R
+\name{taxonomy_taxon_info}
+\alias{flags.taxon_info}
+\alias{is_suppressed.taxon_info}
+\alias{ott_id.taxon_info}
+\alias{synonyms.taxon_info}
+\alias{tax_name.taxon_info}
+\alias{tax_rank.taxon_info}
+\alias{tax_sources.taxon_info}
+\alias{taxonomy_taxon_info}
+\alias{unique_name.taxon_info}
+\title{Taxon information}
+\usage{
+taxonomy_taxon_info(ott_ids, include_children = FALSE,
+  include_lineage = FALSE, include_terminal_descendants = FALSE, ...)
+
+\method{tax_rank}{taxon_info}(tax, ...)
+
+\method{tax_name}{taxon_info}(tax, ...)
+
+\method{unique_name}{taxon_info}(tax, ...)
+
+\method{synonyms}{taxon_info}(tax, ...)
+
+\method{ott_id}{taxon_info}(tax, ...)
+
+\method{tax_sources}{taxon_info}(tax, ...)
+
+\method{is_suppressed}{taxon_info}(tax, ...)
+
+\method{flags}{taxon_info}(tax, ...)
+}
+\arguments{
+\item{ott_ids}{the ott ids of the taxon of interest (numeric or
+character containing only numbers)}
+
+\item{include_children}{whether to include information about all
+the children of this taxon. Default \code{FALSE}.}
+
+\item{include_lineage}{whether to include information about all
+the higher level taxa that include the \code{ott_ids}.
+Default \code{FALSE}.}
+
+\item{include_terminal_descendants}{whether to include the list of
+terminal \code{ott_ids} contained in the \code{ott_ids}
+provided.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+
+\item{tax}{an object generated by the \code{taxonomy_taxon_info}
+function}
+}
+\value{
+\code{taxonomy_taxon_info} returns a list detailing
+    information about the taxa. \code{tax_rank} and
+    \code{tax_name} return a vector. \code{synonyms} returns a
+    list whose elements are the synonyms for each of the
+    \code{ott_id} requested.
+}
+\description{
+Information about taxa.
+}
+\details{
+Given a vector of ott ids, \code{taxonomy_taxon_info} returns
+information about the specified taxa.
+
+The functions \code{tax_rank}, \code{tax_name}, and
+\code{synonyms} can extract this information from an object
+created by the \code{taxonomy_taxon_info()}.
+}
+\examples{
+\dontrun{
+req <- taxonomy_taxon_info(ott_id=515698)
+tax_rank(req)
+tax_name(req)
+synonyms(req)
+}
+}
+\seealso{
+\code{\link{tnrs_match_names}} to obtain \code{ott_id}
+    from a taxonomic name.
+}
+
diff --git a/man/tnrs_contexts.Rd b/man/tnrs_contexts.Rd
new file mode 100644
index 0000000..5fa0eef
--- /dev/null
+++ b/man/tnrs_contexts.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tnrs.R
+\name{tnrs_contexts}
+\alias{tnrs_contexts}
+\title{TNRS contexts}
+\usage{
+tnrs_contexts(...)
+}
+\arguments{
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+Returns invisibly a list for each major clades (e.g.,
+    animals, microbes, plants, fungi, life) whose elements
+    contains the possible contexts.
+}
+\description{
+This function returns a list of pre-defined taxonomic contexts
+(i.e. clades) which can be used to limit the scope of tnrs
+queries.
+}
+\details{
+Taxonomic contexts are available to limit the scope of TNRS
+searches. These contexts correspond to uncontested higher taxa
+such as 'Animals' or 'Land plants'. This service returns a list
+containing all available taxonomic context names, which may be
+used as input (via the \code{context_name} argument in other
+functions) to limit the search scope of other services including
+\code{\link{tnrs_match_names}}.
+}
+
diff --git a/man/tnrs_infer_context.Rd b/man/tnrs_infer_context.Rd
new file mode 100644
index 0000000..43d7964
--- /dev/null
+++ b/man/tnrs_infer_context.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tnrs.R
+\name{tnrs_infer_context}
+\alias{tnrs_infer_context}
+\title{Infer the taxonomic context from a list of names}
+\usage{
+tnrs_infer_context(names = NULL, ...)
+}
+\arguments{
+\item{names}{Vector of taxon names.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+A list including the context name, the context ott id and
+    possibly the names in the query that have an ambiguous
+    taxonomic meaning in the query.
+}
+\description{
+Return a taxonomic context given a list of taxonomic names
+}
+\details{
+Find the least inclusive taxonomic context that includes all the
+unambiguous names in the input set. Unambiguous names are names
+with exact matches to non-homonym taxa. Ambiguous names (those
+without exact matches to non-homonym taxa) are indicated in
+results.
+}
+\examples{
+\dontrun{
+res <- tnrs_infer_context(names=c("Stellula calliope", "Struthio camelus"))
+}
+}
+
diff --git a/man/tnrs_match_names.Rd b/man/tnrs_match_names.Rd
new file mode 100644
index 0000000..e7fd658
--- /dev/null
+++ b/man/tnrs_match_names.Rd
@@ -0,0 +1,82 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tnrs.R
+\name{tnrs_match_names}
+\alias{tnrs_match_names}
+\title{Match names to the Open Tree Taxonomy}
+\usage{
+tnrs_match_names(names = NULL, context_name = NULL,
+  do_approximate_matching = TRUE, ids = NULL, include_suppressed = FALSE,
+  ...)
+}
+\arguments{
+\item{names}{taxon names to be queried. Currently limited to
+10,000 names for exact matches and 2,500 names for approximate
+matches (character vector)}
+
+\item{context_name}{name of the taxonomic context to be searched
+(length-one character vector). Must match (case sensitive) one
+of the values returned by \code{\link{tnrs_contexts}}.}
+
+\item{do_approximate_matching}{A logical indicating whether or not
+to perform approximate string (a.k.a. \dQuote{fuzzy})
+matching. Using \code{FALSE} will greatly improve
+speed. Default, however, is \code{TRUE}.}
+
+\item{ids}{A vector of ids to use for identifying names. These
+will be assigned to each name in the names array. If ids is
+provided, then ids and names must be identical in length.}
+
+\item{include_suppressed}{Ordinarily, some quasi-taxa, such as
+incertae sedis buckets and other non-OTUs, are suppressed from
+TNRS results. If this parameter is true, these quasi-taxa are
+allowed as possible TNRS results.}
+
+\item{...}{additional arguments to customize the API request (see
+\code{\link{rotl}} package documentation).}
+}
+\value{
+A data frame summarizing the results of the query. The
+    original query output is appended as an attribute to the
+    returned object (and can be obtained using \code{attr(object,
+    "original_response")}).
+}
+\description{
+Match taxonomic names to the Open Tree Taxonomy.
+}
+\details{
+Accepts one or more taxonomic names and returns information about
+potential matches for these names to known taxa in the Open Tree
+Taxononmy.
+
+This service uses taxonomic contexts to disambiguate homonyms and
+misspelled names; a context may be specified using the
+\code{context_name} argument. If no context is specified, then the
+context will be inferred (i.e., the shallowest taxonomic context
+that contains all unambiguous names in the input). Taxonomic
+contexts are uncontested higher taxa that have been selected to
+allow limits to be applied to the scope of TNRS searches
+(e.g. 'match names only within flowering plants'). Once a context
+has been identified (either user-specified or inferred), all taxon
+name matches will performed only against taxa within that
+context. For a list of available taxonomic contexts, see
+\code{\link{tnrs_contexts}}.
+
+A name is considered unambiguous if it is not a synonym and has
+only one exact match to any taxon name in the entire taxonomy.
+
+Several functions listed in the \sQuote{See also} section can be
+used to inspect and manipulate the object generated by this
+function.
+}
+\examples{
+\dontrun{
+ deuterostomes <- tnrs_match_names(names=c("echinodermata", "xenacoelomorpha",
+                                            "chordata", "hemichordata"))
+}
+}
+\seealso{
+\code{\link{inspect.match_names}},
+    \code{\link{update.match_names}},
+    \code{\link{synonyms.match_names}}.
+}
+
diff --git a/man/tol_about.Rd b/man/tol_about.Rd
new file mode 100644
index 0000000..e27b95f
--- /dev/null
+++ b/man/tol_about.Rd
@@ -0,0 +1,123 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tol.R
+\name{tol_about}
+\alias{ott_id.tol_summary}
+\alias{tax_name.tol_summary}
+\alias{tax_rank.tol_summary}
+\alias{tax_sources.tol_summary}
+\alias{tol_about}
+\alias{unique_name.tol_summary}
+\title{Information about the Tree of Life}
+\usage{
+tol_about(include_source_list = FALSE, ...)
+
+\method{tax_rank}{tol_summary}(tax, ...)
+
+\method{tax_sources}{tol_summary}(tax, ...)
+
+\method{unique_name}{tol_summary}(tax, ...)
+
+\method{tax_name}{tol_summary}(tax, ...)
+
+\method{ott_id}{tol_summary}(tax, ...)
+}
+\arguments{
+\item{include_source_list}{Logical (default =
+\code{FALSE}). Return an ordered list of source trees.}
+
+\item{...}{additional arguments to customize the API call (see
+\code{\link{rotl}} for more information).}
+
+\item{tax}{an object created with a call to \code{tol_about}.}
+}
+\value{
+An invisible list of synthetic tree summary statistics:
+
+\itemize{
+
+    \item {date_created} {String. The creation date of the tree.}
+
+    \item {num_source_studies} {Integer. The number of studies
+    (publications)used as sources.}
+
+    \item {num_source_trees} {The number of trees used as sources
+    (may be >1 tree per study).}
+
+    \item {taxonomy_version} {The Open Tree Taxonomy version used
+    as a source.}
+
+    \item {filtered_flags} {List. Taxa with these taxonomy flags were
+    not used in construction of the tree.}
+
+    \item {root} {List. Describes the root node:}
+        \itemize{
+            \item {node_id} {String. The canonical identifier of the node.}
+
+            \item {num_tips} {Numeric. The number of descendent tips.}
+
+            \item {taxon} {A list of taxonomic properties:}
+            \itemize{
+                \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ott_id).}
+
+                \item {name} {String. The taxonomic name of the queried node.}
+
+                \item {unique_name} {String. The string that uniquely
+                identifies the taxon in OTT.}
+
+                \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+
+                \item {tax_sources} {List. A list of identifiers for taxonomic
+                sources, such as other taxonomies, that define taxa judged
+                equivalent to this taxon.}
+            }
+        }
+
+    \item {source_list} {List. Present only if
+    \code{include_source_list} is \code{TRUE}. The sourceid
+    ordering is the precedence order for synthesis, with
+    relationships from earlier trees in the list having priority
+    over those from later trees in the list. See
+    \code{source_id_map} below for study details.}
+
+    \item {source_id_map} {Named list of lists. Present only if
+    \code{include_source_list} is \code{TRUE}. Names correspond to
+    the \sQuote{sourceids} used in \code{source_list}
+    above. Source trees will have the following properties:}
+
+        \itemize{
+            \item {git_sha} {String. The git SHA identifying a particular source
+            version.}
+
+            \item {tree_id} {String. The tree id associated with the study id used.}
+
+            \item {study_id} {String. The study identifier. Will typically include
+            a prefix ("pg_" or "ot_").}
+        }
+
+    \item {synth_id} {The unique string for this version of the tree.}
+}
+}
+\description{
+Basic information about the Open Tree of Life (the synthetic tree)
+}
+\details{
+Summary information about the current draft tree of life,
+    including information about the list of trees and the taxonomy
+    used to build it. The object returned by \code{tol_about} can
+    be passed to the taxonomy methods (\code{tax_name()},
+    \code{tax_rank()}, \code{tax_sources()}, \code{ott_id}), to
+    extract relevant taxonomic information for the root of the
+    synthetic tree.
+}
+\examples{
+\dontrun{
+res <- tol_about()
+tax_sources(res)
+ott_id(res)
+studies <- source_list(tol_about(include_source_list=TRUE))}
+}
+\seealso{
+\code{\link{source_list}} to explore the list of studies
+    used in the synthetic tree (see example).
+}
+
diff --git a/man/tol_induced_subtree.Rd b/man/tol_induced_subtree.Rd
new file mode 100644
index 0000000..6818bc5
--- /dev/null
+++ b/man/tol_induced_subtree.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tol.R
+\name{tol_induced_subtree}
+\alias{tol_induced_subtree}
+\title{Subtree from the Open Tree of Life}
+\usage{
+tol_induced_subtree(ott_ids = NULL, node_ids = NULL, label_format = NULL,
+  file, ...)
+}
+\arguments{
+\item{ott_ids}{Numeric vector. OTT ids indicating nodes to be used
+as tips in the induced tree.}
+
+\item{node_ids}{Character vector. Node ids indicating nodes to be used
+as tips in the induced tree.}
+
+\item{label_format}{Character. Defines the label type; one of
+\dQuote{\code{name}}, \dQuote{\code{id}}, or
+ \dQuote{\code{name_and_id}} (the default).}
+
+\item{file}{If specified, the function will write the subtree to a
+file in newick format.}
+
+\item{...}{additional arguments to customize the API call (see
+\code{\link{rotl}} for more information).}
+}
+\value{
+If no value is specified to the \code{file} argument
+    (default), a phyogenetic tree of class \code{phylo}.
+
+    Otherwise, the function returns invisibly a logical indicating
+    whether the file was successfully created.
+}
+\description{
+Return the induced subtree on the synthetic tree that relates a list of nodes.
+}
+\details{
+Return a tree with tips corresponding to the nodes identified in
+the input set that is consistent with the topology of the current
+synthetic tree. This tree is equivalent to the minimal subtree
+induced on the draft tree by the set of identified nodes.
+}
+\examples{
+\dontrun{
+res <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710))
+tree_file <- tempfile(fileext=".tre")
+tol_induced_subtree(ott_ids=c(292466, 267845, 666104, 316878, 102710),
+                    file=tree_file)}
+}
+
diff --git a/man/tol_mrca.Rd b/man/tol_mrca.Rd
new file mode 100644
index 0000000..7a09e15
--- /dev/null
+++ b/man/tol_mrca.Rd
@@ -0,0 +1,157 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tol.R
+\name{tol_mrca}
+\alias{ott_id.tol_mrca}
+\alias{source_list.tol_mrca}
+\alias{tax_name.tol_mrca}
+\alias{tax_rank.tol_mrca}
+\alias{tax_sources.tol_mrca}
+\alias{tol_mrca}
+\alias{unique_name.tol_mrca}
+\title{MRCA of taxa from the synthetic tree}
+\usage{
+tol_mrca(ott_ids = NULL, node_ids = NULL, ...)
+
+\method{tax_sources}{tol_mrca}(tax, ...)
+
+\method{unique_name}{tol_mrca}(tax, ...)
+
+\method{tax_name}{tol_mrca}(tax, ...)
+
+\method{tax_rank}{tol_mrca}(tax, ...)
+
+\method{ott_id}{tol_mrca}(tax, ...)
+
+\method{source_list}{tol_mrca}(tax, ...)
+}
+\arguments{
+\item{ott_ids}{Numeric vector. The ott ids for which the MRCA is desired.}
+
+\item{node_ids}{Character vector. The node ids for which the MRCA is desired.}
+
+\item{...}{additional arguments to customize the API call (see
+\code{\link{rotl}} for more information).}
+
+\item{tax}{an object returned by \code{tol_mrca()}.}
+}
+\value{
+An invisible list of the MRCA node properties:
+
+\itemize{
+
+    \item {mrca} {List of node properties.}
+
+    \itemize{
+        \item {node_id} {String. The canonical identifier of the node.}
+
+        \item {num_tips} {Numeric. The number of descendent tips.}
+
+        \item {taxon} {A list of taxonomic properties. Only returned if
+        the queried node is a taxon. (If the node is not a taxon, a
+        \code{nearest_taxon} list is returned (see below)).}
+
+            \itemize{
+                \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
+
+                \item {name} {String. The taxonomic name of the queried node.}
+
+                \item {unique_name} {String. The string that uniquely
+                identifies the taxon in OTT.}
+
+                \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+
+               \item {tax_sources} {List. A list of identifiers for taxonomic
+                sources, such as other taxonomies, that define taxa judged
+                equivalent to this taxon.}
+            }
+
+        The following properties list support/conflict for the node across
+        synthesis source trees. All properties involve sourceid keys and
+        nodeid values (see \code{source_id_map} below) Not all properties are
+        are present for every node.
+
+        \item {partial_path_of} {List. The edge below this synthetic tree node
+        is compatible with the edge below each of these input tree nodes (one
+        per tree). Each returned element is reported as sourceid:nodeid.}
+
+        \item {supported_by} {List. Input tree nodes (one per tree) that support
+        this synthetic tree node. Each returned element is reported as
+        sourceid:nodeid.}
+
+        \item {terminal} {List. Input tree nodes (one per tree) that are equivalent
+        to this synthetic tree node (via an exact mapping, or the input tree
+        terminal may be the only terminal descended from this synthetic tree node.
+        Each returned element is reported as sourceid:nodeid.}
+
+        \item {conflicts_with} {Named list of lists. Names correspond to
+        sourceid keys. Each list contains input tree node ids (one or more per tree)
+        that conflict with this synthetic node.}
+    }
+
+    \item {nearest_taxon} {A list of taxonomic properties of the nearest rootward
+    taxon node to the MRCA node. Only returned if the MRCA node is a not taxon
+    (otherwise the \code{taxon} list above is returned).}
+
+        \itemize{
+            \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
+
+            \item {name} {String. The taxonomic name of the queried node.}
+
+            \item {unique_name} {String. The string that uniquely
+            identifies the taxon in OTT.}
+
+            \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+
+           \item {tax_sources} {List. A list of identifiers for taxonomic
+            sources, such as other taxonomies, that define taxa judged
+            equivalent to this taxon.}
+        }
+
+    \item {source_id_map} {Named list of lists. Names correspond to the
+    sourceid keys used in the support/conflict properties of the \code{mrca}
+    list above. Source trees will have the following properties:}
+
+        \itemize{
+            \item {git_sha} {The git SHA identifying a particular source
+            version.}
+
+            \item {tree_id} {The tree id associated with the study id used.}
+
+            \item {study_id} {The study identifier. Will typically include
+            a prefix ("pg_" or "ot_").}
+        }
+    The only sourceid that does not correspond to a source tree is the taxonomy,
+    which will have the name "ott"+`taxonomy_version`, and the value is the
+    ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
+    appear in \code{supported_by}.
+
+   }
+}
+\description{
+Most Recent Common Ancestor for a set of nodes
+}
+\details{
+Get the MRCA of a set of nodes on the current synthetic
+    tree. Accepts any combination of node ids and ott ids as
+    input. Returns information about the most recent common
+    ancestor (MRCA) node as well as the most recent taxonomic
+    ancestor (MRTA) node (the closest taxonomic node to the MRCA
+    node in the synthetic tree; the MRCA and MRTA may be the same
+    node). If they are the same, the taxonomic information will be
+    in the \code{mrca} slot, otherwise they will be in the
+    \code{nearest_taxon} slot of the list. If any of the specified
+    nodes is not in the synthetic tree an error will be returned.
+
+    Taxonomic methods (\code{tax_sources()}, \code{ott_id()},
+    \code{unique_name()}, ...) are availble on the objects
+    returned by \code{tol_mrca()}. If the MRCA node is MRTA, the
+    name of the object returned by these methods will start with
+    \sQuote{ott}, otherwise it will start with \sQuote{mrca}.
+}
+\examples{
+\dontrun{
+birds_mrca <- tol_mrca(ott_ids=c(412129, 536234))
+ott_id(birds_mrca)
+tax_sources(birds_mrca)}
+}
+
diff --git a/man/tol_node_info.Rd b/man/tol_node_info.Rd
new file mode 100644
index 0000000..83efc83
--- /dev/null
+++ b/man/tol_node_info.Rd
@@ -0,0 +1,146 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods.R, R/tol.R
+\name{tol_lineage}
+\alias{ott_id.tol_node}
+\alias{source_list.tol_node}
+\alias{tax_lineage.tol_node}
+\alias{tax_name.tol_node}
+\alias{tax_rank.tol_node}
+\alias{tax_sources.tol_node}
+\alias{tol_lineage}
+\alias{tol_lineage.tol_node}
+\alias{tol_node_info}
+\alias{unique_name.tol_node}
+\title{Node info}
+\usage{
+tol_lineage(tax, ...)
+
+tol_node_info(ott_id = NULL, node_id = NULL, include_lineage = FALSE, ...)
+
+\method{tax_rank}{tol_node}(tax, ...)
+
+\method{tax_sources}{tol_node}(tax, ...)
+
+\method{unique_name}{tol_node}(tax, ...)
+
+\method{tax_name}{tol_node}(tax, ...)
+
+\method{ott_id}{tol_node}(tax, ...)
+
+\method{source_list}{tol_node}(tax, ...)
+
+\method{tax_lineage}{tol_node}(tax, ...)
+
+\method{tol_lineage}{tol_node}(tax, ...)
+}
+\arguments{
+\item{tax}{an object returned by \code{tol_node_info}.}
+
+\item{...}{additional arguments to customize the API call (see
+?rotl for more information)}
+
+\item{ott_id}{Numeric. The OpenTree taxonomic identifier.}
+
+\item{node_id}{Character. The OpenTree node identifier.}
+
+\item{include_lineage}{Logical (default = FALSE). Whether to return the
+lineage of the node from the synthetic tree.}
+}
+\value{
+\code{tol_node_info} returns an invisible list of summary
+    information about the queried node:
+
+\itemize{
+
+    \item {node_id} {String. The canonical identifier of the node.}
+
+    \item {num_tips} {Numeric. The number of descendent tips.}
+
+    \item {taxon} {A list of taxonomic properties. Only returned if
+    the queried node is a taxon. Each source has:}
+
+        \itemize{
+            \item {ott_id} {Numeric. The OpenTree Taxonomy ID (ottID).}
+
+            \item {name} {String. The taxonomic name of the queried node.}
+
+            \item {unique_name} {String. The string that uniquely
+            identifies the taxon in OTT.}
+
+            \item {rank} {String. The taxonomic rank of the taxon in OTT.}
+
+            \item {tax_sources} {List. A list of identifiers for taxonomic
+            sources, such as other taxonomies, that define taxa judged
+            equivalent to this taxon.}
+        }
+
+    The following properties list support/conflict for the node across
+    synthesis source trees. All properties involve sourceid keys and
+    nodeid values (see \code{source_id_map} below).
+
+    \item {partial_path_of} {List. The edge below this synthetic tree node
+    is compatible with the edge below each of these input tree nodes (one
+    per tree). Each returned element is reported as sourceid:nodeid.}
+
+    \item {supported_by} {List. Input tree nodes (one per tree) that support
+    this synthetic tree node. Each returned element is reported as
+    sourceid:nodeid.}
+
+    \item {terminal} {List. Input tree nodes (one per tree) that are equivalent
+    to this synthetic tree node (via an exact mapping, or the input tree
+    terminal may be the only terminal descended from this synthetic tree node.
+    Each returned element is reported as sourceid:nodeid.}
+
+    \item {conflicts_with} {Named list of lists. Names correspond to
+    sourceid keys. Each list contains input tree node ids (one or more per tree)
+    that conflict with this synthetic node.}
+
+    \item {source_id_map} {Named list of lists. Names correspond to the
+    sourceid keys used in the 4 properties above. Source trees will have the
+    following properties:}
+
+        \itemize{
+            \item {git_sha} {The git SHA identifying a particular source
+            version.}
+
+            \item {tree_id} {The tree id associated with the study id used.}
+
+            \item {study_id} {The study identifier. Will typically include
+            a prefix ("pg_" or "ot_").}
+        }
+    The only sourceid that does not correspond to a source tree is the taxonomy,
+    which will have the name "ott"+`taxonomy_version`, and the value is the
+    ott_id of the taxon in that taxonomy version. "Taxonomy" will only ever
+    appear in \code{supported_by}.
+
+   }
+
+    \code{tol_lineage} and \code{tax_lineage} return data
+        frames. \code{tol_lineage} indicate for each ancestor its
+        node identifier, the number of tips descending from that
+        node, and whether it corresponds to a taxonomic level.
+}
+\description{
+Get summary information about a node in the synthetic tree
+}
+\details{
+Returns summary information about a node in the graph. The
+    node of interest may be specified using either a node id or an
+    taxon id, but not both. If the specified node or OTT id is not
+    in the graph, an error will be returned.
+
+    If the argument \code{include_lineage=TRUE} is used, you can
+    use \code{tax_lineage()} or \code{tol_lineage} to return the
+    taxonomic information or the node information for all the
+    ancestors to this node, down to the root of the tree.
+}
+\examples{
+\dontrun{
+birds <- tol_node_info(ott_id=81461, include_lineage=TRUE)
+source_list(birds)
+tax_rank(birds)
+ott_id(birds)
+tax_lineage(birds)
+tol_lineage(birds)}
+}
+
diff --git a/man/tol_subtree.Rd b/man/tol_subtree.Rd
new file mode 100644
index 0000000..569713a
--- /dev/null
+++ b/man/tol_subtree.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tol.R
+\name{tol_subtree}
+\alias{tol_subtree}
+\title{Extract a subtree from the synthetic tree}
+\usage{
+tol_subtree(ott_id = NULL, node_id = NULL, label_format = NULL, file, ...)
+}
+\arguments{
+\item{ott_id}{Numeric. The ott id of the node in the tree that should
+serve as the root of the tree returned.}
+
+\item{node_id}{Character. The node id of the node in the tree that should
+serve as the root of the tree returned.}
+
+\item{label_format}{Character. Defines the label type; one of
+\dQuote{\code{name}}, \dQuote{\code{id}}, or
+ \dQuote{\code{name_and_id}} (the default).}
+
+\item{file}{If specified, the function will write the subtree to a
+file in newick format.}
+
+\item{...}{additional arguments to customize the API call (see
+\code{\link{rotl}} for more information).}
+}
+\value{
+If no value is specified to the \code{file} argument
+    (default), a phyogenetic tree of class \code{phylo}.
+    Otherwise, the function returns invisibly a logical indicating
+    whether the file was successfully created.
+}
+\description{
+Extract a subtree from the synthetic tree from an Open Tree node id.
+}
+\details{
+Given a node, return the subtree of the synthetic tree descended
+    from that node. The start node may be specified using either a node id
+    or an ott id, but not both. If the specified node is not in the
+    synthetic tree an error will be returned. There is a size limit of
+    25000 tips for this method.
+}
+\examples{
+\dontrun{
+res <- tol_subtree(ott_id=241841)}
+}
+
diff --git a/tests/test-all.R b/tests/test-all.R
new file mode 100644
index 0000000..1df8195
--- /dev/null
+++ b/tests/test-all.R
@@ -0,0 +1,3 @@
+###
+library(testthat)
+test_check('rotl')
diff --git a/tests/testthat/test-API.R b/tests/testthat/test-API.R
new file mode 100644
index 0000000..1c8ce03
--- /dev/null
+++ b/tests/testthat/test-API.R
@@ -0,0 +1,195 @@
+####
+## Making use of the shared OpenTree testing architecture
+####
+## The R, Python and Ruby wrappers for the Open Tree share a very similar design,
+## allowing them to make use of a single test suite for the low-level functions
+## (thus, the tests both checkvan individual library works as expected, and that
+## the different libraries stay in line with each other).
+##
+## This file pulls the current version of the test from a github repo
+## (https://github.com/OpenTreeOfLife/shared-api-tests) and translates the json
+## files into tests that run in testthat. This takes a considerable amount of
+## infrastructure so I'll briefly described the rational here.
+##
+## The JSON test-specificaton is defined at the github repo linked above, to
+## translate these tests I have created custom testthat expectation-functionals
+## (contains(), (key_has_value()... ). Because many of the test blocks in the
+## JSON files have multiple expectiatoins (i.e. many key-value pairs for
+## test_equals) there are functions starting with `test_` that run an entire
+## test block for a given expectation. Since many of these tests require
+## translation between R-objects and JSON encoded strings there is a set of
+## convienence functions to automate that step and a function "test_map" that
+## returns the appropriate test_* function for r given JSON test block.
+##
+## Finally, testthat_json_test uses the above functions to runs an entire test
+## from a JSON object, and run_shared_tests() runs every tests in a JSON file.
+
+
+
+
+#functionals that start with a response
+contains <- function(key_name){
+    function(x){
+        expectation(key_name %in% names(x), sprintf("Missing key name: %s", key_name))
+    }
+}
+
+key_has_value <- function(key, value){
+    function(x){
+        if(length(value) == 0){
+            expectation(length(x[[key]]) == 0,
+                               paste("Key", key, "is not empty"))
+        }
+        else if(length(value)==1){
+            expectation(x[[key]] == value,
+                        paste("Key", key, "doesn't have value", value))
+        }
+        else{
+            expectation(all(x[[key]] %in% value),
+                        paste("Key", key, "doesn't contain all of", value))
+        }
+
+    }
+}
+
+value_is_longer_than <- function(key, len){
+    function(x){
+        expectation(length(x[[key]]) > len,
+                    paste("Value for key", key, "is shorter than", len))
+    }
+}
+
+value_is_error <- function(key_name){
+    function(x){
+        expectation(x[[key_name]] == 'error',
+                       sprintf("Key %s is not 'error'",key_name))
+    }
+}
+
+## Functions to test entire test blocks with the above expectations
+
+test_contains <- function(response, test_block){
+    key_names <- test_block[,1]
+    sapply(key_names, function(k) expect_that(response, contains(k)))
+}
+
+test_equals <- function(response, test_block){
+    kv_pairs <- sapply(test_block, "[[", 1)
+    for(i in 1:length(kv_pairs)){
+        expect_that(response, key_has_value(kv_pairs[[1]], kv_pairs[[2]]))
+    }
+}
+
+test_of_type <- function(response, test_block){
+    rtype <- type_map(test_block[[1]])
+    expect_that(response, is_a(rtype))
+}
+
+test_deep_equals <- function(response, test_block){
+    cat("*")
+    expect_true(TRUE)
+}
+
+
+test_length_greater_than <- function(response, test_block){
+    vl_pairs <- sapply(test_block, "[[", 1)
+    apply(vl_pairs, 2, function(v)
+          expect_that(response, value_is_longer_than(v[[1]], v[[2]])))
+}
+
+test_contains_error <- function(response, test_block){
+    errs <- test_block[,1]
+    sapply(errs, function(e) expect_that(reponse, contains_error(e)))
+}
+
+##convience functions
+obj_map <- function(input){
+    if(is.character(input) & length(input)==1){
+        switch(tolower(input),
+               "true" = TRUE,
+               "false" = FALSE,
+               "null"  = NULL,
+               input)
+    }
+    else{
+        input
+    }
+}
+
+json_to_r <- function(test_input){
+    if(length(test_input) == 0){
+       return(test_input)
+    }
+    return(lapply(test_input, obj_map))
+}
+
+type_map <- function(json_type){
+    switch(json_type,
+           "dict" = "list",
+           stop(sprintf("unknown json type in testing file: %s", json_type))
+          )
+}
+
+
+test_map <- function(test_type){
+    switch(test_type,
+           "contains"    = test_contains,
+           "equals"      = test_equals,
+           "deep_equals" = test_deep_equals,
+           "error"       = stop("Error tests should be handled first"),
+           "length_greater_than" = test_length_greater_than,
+           "of_type"     = test_of_type,
+           stop(sprintf("Unkown error type in JSON test: %s", test_type))
+           )
+}
+
+make_request <- function(json_test){
+    test_fxn <- paste0(".", json_test$test_function)
+    do.call(what=test_fxn, args=json_to_r(json_test$test_input))
+
+}
+
+
+testthat_json_test <- function(test_obj, test_name){
+    tests_to_run <- names(test_obj[[test_name]]$tests)
+    if(length(tests_to_run)==1){
+        if( grepl("error", tests_to_run)){
+        expect_error( make_request(test_obj[[test_name]]) )
+        }
+    }
+    else{
+        response <- make_request(test_obj[[test_name]])
+        for(i in 1:length(tests_to_run)){
+            test_block <- test_obj[[test_name]]$tests[[ tests_to_run[i] ]]
+            test_fxn <- test_map(tests_to_run[i])
+            test_fxn(response, test_block)
+        }
+    }
+}
+
+run_shared_test <- function(json_obj){
+   all_tests <- names(json_obj)
+   for(i in 1:length(all_tests)) {
+       test_that(all_tests[i], {
+           skip_on_cran()
+           testthat_json_test(json_obj, all_tests[i])
+       })
+   }
+}
+
+
+## if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+##     base_url <- "https://raw.githubusercontent.com/OpenTreeOfLife/shared-api-tests/master/"
+##     apis <- c("graph_of_life",
+##               "studies",
+##               "taxonomy",
+##               "tree_of_life",
+##               "tnrs"
+##               )
+##     for(i in 1:length(apis)){
+##         context( paste(apis[i], "API") )
+##         test_text <- httr::GET(paste0(base_url, apis[i], ".json"))
+##         test_description <- jsonlite::fromJSON(httr::content(test_text))
+##         run_shared_test(test_description)
+##     }
+## }
diff --git a/tests/testthat/test-api-studies.R b/tests/testthat/test-api-studies.R
new file mode 100644
index 0000000..e8765d5
--- /dev/null
+++ b/tests/testthat/test-api-studies.R
@@ -0,0 +1,195 @@
+context("studies API tests")
+
+
+###########################
+## .studies_find_studies ##
+###########################
+
+test_that("argument verbose needs to be logical for .studies_find_studies", {
+    skip_on_cran()
+    expect_error(.studies_find_studies(NULL, NULL, "123", FALSE),
+                 "logical")
+})
+
+test_that("argument exact needs to be logical for .studies_find_studies", {
+    skip_on_cran()
+    expect_error(.studies_find_studies(NULL, NULL, TRUE, "123"),
+                 "logical")
+})
+
+test_that("argument property needs to be character for .studies_find_studies", {
+    skip_on_cran()
+    expect_error(.studies_find_studies(123, NULL, TRUE, TRUE),
+                 "character")
+})
+
+test_that("argument value needs to be character for .studies_find_studies", {
+    skip_on_cran()
+    expect_error(.studies_find_studies("test", 123, TRUE, TRUE),
+                 "character")
+})
+
+test_that("both property & value need to be provided for .studies_find_studies", {
+    skip_on_cran()
+    expect_error(.studies_find_studies("test", NULL, TRUE, TRUE),
+                 "Must supply")
+})
+
+test_that("both property & value need to be provided for .studies_find_studies", {
+    skip_on_cran()
+    expect_error(.studies_find_studies(NULL, "test", TRUE, TRUE),
+                 "Must supply")
+})
+
+
+###########################
+## .studies_find_trees ##
+###########################
+
+test_that("argument verbose needs to be logical for .studies_find_trees", {
+    skip_on_cran()
+    expect_error(.studies_find_trees(NULL, NULL, "123", FALSE),
+                 "logical")
+})
+
+test_that("argument exact needs to be logical for .studies_find_trees", {
+    skip_on_cran()
+    expect_error(.studies_find_trees(NULL, NULL, TRUE, "123"),
+                 "logical")
+})
+
+test_that("argument property needs to be character for .studies_find_trees", {
+    skip_on_cran()
+    expect_error(.studies_find_trees(123, NULL, TRUE, TRUE),
+                 "character")
+})
+
+test_that("argument value needs to be character for .studies_find_trees", {
+    skip_on_cran()
+    expect_error(.studies_find_trees("test", 123, TRUE, TRUE),
+                 "character")
+})
+
+test_that("both property & value need to be provided for .studies_find_trees", {
+    skip_on_cran()
+    expect_error(.studies_find_trees("test", NULL, TRUE, TRUE),
+                 "Must supply")
+})
+
+test_that("both property & value need to be provided for .studies_find_trees", {
+    skip_on_cran()
+    expect_error(.studies_find_trees(NULL, "test", TRUE, TRUE),
+                 "Must supply")
+})
+
+test_that("exact works as intended", {
+    skip_on_cran()
+    expect_equal(length(.studies_find_studies("ot:focalCladeOTTTaxonName",
+                                              "felidae", exact = TRUE)$matched_studies), 0)
+})
+
+
+test_that("exact works as intended", {
+    skip_on_cran()
+    expect_true(length(.studies_find_studies("ot:focalCladeOTTTaxonName",
+                                             "Felidae", exact = TRUE)$matched_studies) >= 1)
+})
+
+############################################################################
+## .get_study                                                             ##
+############################################################################
+
+
+test_that("study_id isn't NULL for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study(NULL, "test"),
+                 "Must supply")
+})
+
+test_that("study_id is character for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study(TRUE, "test"),
+                 "character")
+})
+
+
+############################################################################
+## .get_study_tree                                                        ##
+############################################################################
+
+test_that("study_id isn't NULL for .get_study_tree", {
+    skip_on_cran()
+    expect_error(.get_study_tree(NULL, NULL),
+                 "Must supply")
+})
+
+test_that("study_id isn't NULL for .get_study_tree", {
+    skip_on_cran()
+    expect_error(.get_study_tree("123", NULL),
+                 "Must supply")
+})
+
+test_that("study_id isn't NULL for .get_study_tree", {
+    skip_on_cran()
+    expect_error(.get_study_tree(NULL, "123"),
+                 "Must supply")
+})
+
+test_that("study_id is character for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study_tree(TRUE, "test"),
+                 "character")
+})
+
+test_that("study_id is character for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study_tree("test", TRUE),
+                 "character")
+})
+
+
+############################################################################
+## .get_study_subtree                                                        ##
+############################################################################
+
+test_that("study_id isn't NULL for .get_study_subtree", {
+    skip_on_cran()
+    expect_error(.get_study_subtree(NULL, NULL, NULL),
+                 "Must supply")
+})
+
+test_that("tree_id isn't NULL for .get_study_subtree", {
+    skip_on_cran()
+    expect_error(.get_study_subtree("123", NULL, "123"),
+                 "Must supply")
+})
+
+test_that("subtree_id isn't NULL for .get_study_subtree", {
+    skip_on_cran()
+    expect_error(.get_study_subtree(NULL, "123", "123"),
+                 "Must supply")
+})
+
+test_that("study_id isn't NULL for .get_study_subtree", {
+    skip_on_cran()
+    expect_error(.get_study_subtree("123", "123", NULL),
+                 "Must supply")
+})
+
+test_that("study_id is character for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study_subtree(TRUE, "test", "test"),
+                 "character")
+})
+
+test_that("tree_id is character for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study_subtree("test", TRUE, "test"),
+                 "character")
+})
+
+test_that("subtree_id is character for .get_study", {
+    skip_on_cran()
+    expect_error(.get_study_subtree("test", "test", TRUE),
+                 "character")
+})
diff --git a/tests/testthat/test-api-taxonomy.R b/tests/testthat/test-api-taxonomy.R
new file mode 100644
index 0000000..815cd4e
--- /dev/null
+++ b/tests/testthat/test-api-taxonomy.R
@@ -0,0 +1,87 @@
+context("taxonomy API")
+
+
+############################################################################
+## .taxonomy_taxon_info                                                        ##
+############################################################################
+
+
+test_that("ott_id is not null for .taxonomy_taxon_info", {
+    skip_on_cran()
+    expect_error(.taxonomy_taxon_info(NULL),
+                 "must supply")
+})
+
+test_that("ott_id is of length 1 for .taxonomy_taxon_info", {
+    skip_on_cran()
+    expect_error(.taxonomy_taxon_info(c(123, 456, 789)),
+                 "Must only supply")
+})
+
+test_that("ott_id is a numeric for .taxonomy_taxon_info", {
+    skip_on_cran()
+    expect_error(.taxonomy_taxon_info(TRUE),
+                 "look like numbers")
+})
+
+test_that("include_lineage is a flag", {
+    skip_on_cran()
+    expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c(TRUE, FALSE)),
+                 "is not a flag")
+    expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c("na")),
+                 "is not a flag")
+    expect_error(.taxonomy_taxon_info(ott_id = 515698, include_lineage = c(1235)),
+                 "is not a flag")
+})
+
+test_that("list_terminal_descendants is a flag", {
+    skip_on_cran()
+    expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c(TRUE, FALSE)),
+                 "is not a flag")
+    expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c("na")),
+                 "is not a flag")
+    expect_error(.taxonomy_taxon_info(ott_id = 515698, include_terminal_descendants = c(1235)),
+                 "is not a flag")
+})
+
+
+############################################################################
+## .taxonomy_subtree                                                      ##
+############################################################################
+
+
+test_that("ott_id is not null for .taxonomy_subtree", {
+    skip_on_cran()
+    expect_error(.taxonomy_subtree(NULL),
+                 "must supply")
+})
+
+test_that("ott_id is of length 1 for .taxonomy_subtree", {
+    skip_on_cran()
+    expect_error(.taxonomy_subtree(c(123, 456, 789)),
+                 "Must only supply")
+})
+
+test_that("ott_id is a numeric for .taxonomy_subtree", {
+    skip_on_cran()
+    expect_error(.taxonomy_subtree(TRUE),
+                 "look like numbers")
+})
+
+
+############################################################################
+## .taxonomy_mrca                                                         ##
+############################################################################
+
+
+test_that("ott_id is not null for .taxonomy_lica", {
+    skip_on_cran()
+    expect_error(.taxonomy_mrca(NULL),
+                 "must supply")
+})
+
+test_that("ott_id is a numeric for .taxonomy_lica", {
+    skip_on_cran()
+    expect_error(.taxonomy_mrca(TRUE),
+                 "look like numbers")
+})
diff --git a/tests/testthat/test-api-tnrs.R b/tests/testthat/test-api-tnrs.R
new file mode 100644
index 0000000..e10a94d
--- /dev/null
+++ b/tests/testthat/test-api-tnrs.R
@@ -0,0 +1,66 @@
+context("tnrs API")
+
+
+############################################################################
+## .tnrs_match_names                                                      ##
+############################################################################
+
+test_that("names argument is provided for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names(NULL, NULL, TRUE, NULL, FALSE),
+                 "must supply")
+})
+
+test_that("names argument is character for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names(TRUE, NULL, TRUE, NULL, FALSE),
+                 "character")
+})
+
+test_that("names and ids have the same lengths for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names("Felis", NULL, TRUE, c("abc", "def"), FALSE),
+                 "same length")
+})
+
+test_that("ids must be character for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names("Felis", NULL, TRUE, TRUE, FALSE),
+                 "character")
+})
+
+test_that("do_approximate_matching is logical for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names("Felis", NULL, "true", NULL, FALSE),
+                 "logical")
+})
+
+test_that("include_suppressed is logical for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names("Felis", NULL, TRUE, NULL, "true"),
+                 "logical")
+})
+
+
+test_that("context_name is character for .tnrs_match_names", {
+    skip_on_cran()
+    expect_error(.tnrs_match_names("Felis", TRUE, TRUE, NULL, FALSE, TRUE),
+                 "character")
+})
+
+
+############################################################################
+## .tnrs_infer_context                                                    ##
+############################################################################
+
+test_that("names is not NULL for .tnrs_infer_context", {
+    skip_on_cran()
+    expect_error(.tnrs_infer_context(NULL),
+                 "Must supply")
+})
+
+test_that("names is character for .tnrs_infer_context", {
+    skip_on_cran()
+    expect_error(.tnrs_infer_context(TRUE),
+                 "character")
+})
diff --git a/tests/testthat/test-api-tol.R b/tests/testthat/test-api-tol.R
new file mode 100644
index 0000000..b2fe227
--- /dev/null
+++ b/tests/testthat/test-api-tol.R
@@ -0,0 +1,69 @@
+context("Tree of Life API")
+
+############################################################################
+## .tol_about                                                             ##
+############################################################################
+
+test_that("include_source_list is logical for .tol_about", {
+    skip_on_cran()
+    expect_error(.tol_about("true"),
+                 "logical")
+})
+
+############################################################################
+## .tol_mrca                                                              ##
+############################################################################
+
+test_that("neither ott_ids nor node_ids are NULL for .tol_mrca", {
+    skip_on_cran()
+    expect_error(.tol_mrca(NULL),
+                 "Must provide")
+})
+
+############################################################################
+## .tol_subtree                                                           ##
+############################################################################
+
+test_that("ott_id is not NULL", {
+    skip_on_cran()
+    expect_error(.tol_subtree(ott_id = NULL, node_id = NULL),
+                 "Must provide")
+})
+
+############################################################################
+## .tol_induced_subtree                                                   ##
+############################################################################
+
+test_that("ott_ids is not NULL", {
+    skip_on_cran()
+    expect_error(.tol_induced_subtree(ott_ids = NULL),
+                 "Must provide")
+})
+
+test_that("NAs are not accepted for ott_ids", {
+    skip_on_cran()
+    expect_error(.tol_induced_subtree(ott_ids = c(123, NA, 456)),
+                 "NAs are not allowed")
+})
+
+####################
+## .tol_node_info ##
+####################
+
+test_that("include_lineage must be logical with .tol_node_info", {
+    skip_on_cran()
+    expect_error(.tol_node_info(ott_id = "ott_123", include_lineage = "123"),
+                 "logical")
+})
+
+test_that("ott_id must be a numeric with .tol_node_info", {
+    skip_on_cran()
+    expect_error(.tol_node_info(ott_id = "test"),
+                 "look like numbers")
+})
+
+test_that("node_id must be a character with .tol_node_info", {
+    skip_on_cran()
+    expect_error(.tol_node_info(node_id = 123),
+                 "must look like")
+})
diff --git a/tests/testthat/test-base.R b/tests/testthat/test-base.R
new file mode 100644
index 0000000..41eceed
--- /dev/null
+++ b/tests/testthat/test-base.R
@@ -0,0 +1,55 @@
+context("base functions")
+
+test_that("otl_url returns the correct strings", {
+    skip_on_cran()
+    expect_match(otl_url(dev = TRUE), "^https://devapi.opentreeoflife.org$")
+    expect_match(otl_url(dev = FALSE), "^https://api.opentreeoflife.org$")
+})
+
+test_that("otl_version", {
+    skip_on_cran()
+    expect_equal(otl_version(), "v3")
+    expect_equal(otl_version("foobar"), "foobar")
+})
+
+test_that("otl_ottid_from_label", {
+    skip_on_cran()
+    expect_equal(otl_ottid_from_label("flkdjfs_ott314343"),
+                 314343)
+})
+
+
+test_that("errors that would otherwise not get caught in phylo_from_otl", {
+    expect_error(phylo_from_otl(list(something = "((A, B), C);")),
+                 "Cannot find tree")
+    expect_error(phylo_from_otl(999), "I don't know how to deal with this format")
+})
+
+############################################################################
+## check_numeric                                                          ##
+############################################################################
+
+test_that("check_numeric works on integer", {
+    expect_true(check_numeric("123"))
+    expect_true(check_numeric(123))
+    expect_true(check_numeric(123L))
+    expect_true(check_numeric(list(123)))
+})
+
+test_that("check_numeric fails if there are characters", {
+    expect_false(check_numeric("A123"))
+    expect_false(check_numeric("1A23"))
+    expect_false(check_numeric("123A"))
+    expect_false(check_numeric("12-3"))
+})
+
+test_that("check_numeric fails with more exotic types", {
+    expect_false(check_numeric(NA))
+    expect_false(check_numeric(TRUE))
+    expect_false(check_numeric(1.23))
+    expect_false(check_numeric(0.9999999999999))
+
+})
+
+test_that("check_numeric fails if more than 1 element provided",
+          expect_error(check_numeric(c(1, 2))))
diff --git a/tests/testthat/test-deduplicate_labels.R b/tests/testthat/test-deduplicate_labels.R
new file mode 100644
index 0000000..12c1011
--- /dev/null
+++ b/tests/testthat/test-deduplicate_labels.R
@@ -0,0 +1,45 @@
+tr_string <- "
+((A,A),A 1); ((B.1,B,C),B);
+((D,D_1),D.1);
+((('A 1','A 1'),A.1),'A 1');
+((('A A A','A A A'),A.1),'A 1');
+
+((((A_1:0.1,B__2:0.1)cats:0.1,(A_1:0.1,A_1:0.1)dogs:0.1)mammals:0.1):0.1)fur:0.1;
+"
+file_dup <- tempfile()
+cat(tr_string, file = file_dup, sep = "\n")
+
+############################################################################
+## parse_newick                                                           ##
+############################################################################
+
+context("parse_newick")
+test_that("parse newick works correctly", {
+   prsed_str <- parse_newick(file_dup)
+   expect_true(is.character(prsed_str))
+   expect_equal(length(prsed_str), 6L)
+})
+
+############################################################################
+## deduplicate_labels                                                     ##
+############################################################################
+
+context("deduplicate_labels")
+
+test_that("deduplicate labels works on made up example", {
+   expect_warning(dedup_tr <- deduplicate_labels(file_dup),
+                  "Some tip labels were duplicated")
+   expect_true(file.exists(dedup_tr))
+   phylo_tr <- rncl::read_newick_phylo(file = dedup_tr)
+   expect_true(inherits(phylo_tr, "multiPhylo"))
+   expect_equal(phylo_tr[[6]]$tip.label, c("A_1_1", "B__2", "A_1_2", "A_1"))
+})
+
+
+test_that("deduplicate labels works on a OTL study", {
+   skip_on_cran()
+   expect_warning(get_study_tree(study_id="pg_710", tree_id="tree1277", tip_label='ott_taxon_name'),
+                  "Some tip labels were duplicated")
+})
+
+unlink(file_dup)
diff --git a/tests/testthat/test-external.R b/tests/testthat/test-external.R
new file mode 100644
index 0000000..950674d
--- /dev/null
+++ b/tests/testthat/test-external.R
@@ -0,0 +1,39 @@
+context("Study external data")
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    all_sources <- c("doi", "pubmed_id", "external_data_url", "popset_ids", "nucleotide_ids")
+    all_data <- study_external_IDs("pg_1940")
+}
+
+test_that("We can recover dois, pmids, NCBI IDs", {
+    skip_on_cran()
+    expect_that(all_data, is_a("study_external_data"))
+    expect_named(all_data)
+})
+
+test_that("We can handle studies with missing external IDs", {
+    skip_on_cran()
+    expect_warning(
+        missing_data <- study_external_IDs("ot_97"), "skipping NCBI"
+    )
+    expect_named(missing_data)
+    expect_that(missing_data, is_a("study_external_data"))
+    expect_equal( sum(is.na(match(all_sources, names(missing_data)))), 2) #we really skipped the NCBI
+})
+
+test_that("The print functions for external data objects work", {
+    skip_on_cran()
+    missing_data <- study_external_IDs("ot_91")
+    expect_output(print(all_data), "External data identifiers for study")
+    expect_output(print(missing_data), "External data identifiers for study")
+})
+
+
+context("Taxon external data")
+
+test_that("We can recover external IDs for Open Tree taxa", {
+    skip_on_cran()
+    gibbon_IDs <- taxon_external_IDs(712902)
+    expect_that(gibbon_IDs, is_a("data.frame"))
+    expect_equal(names(gibbon_IDs), c("source", "id"))
+})
diff --git a/tests/testthat/test-match_names.R b/tests/testthat/test-match_names.R
new file mode 100644
index 0000000..4d6f838
--- /dev/null
+++ b/tests/testthat/test-match_names.R
@@ -0,0 +1,408 @@
+context("match names")
+
+############################################################################
+## check_args_match_names                                                 ##
+############################################################################
+
+context("check_args_match_names")
+
+ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+     rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia"))
+ }
+
+
+test_that("error generated if object provided isn't created by tnrs_match_names",
+          expect_error(rotl:::check_args_match_names(letters),
+                       "was not created using"))
+
+test_that("error generated if no argument is provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp),
+                 "You must specify")
+})
+
+test_that("error generated if row_number and taxon_name are provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, row_number = 1,
+                                               taxon_name = "holothuria"),
+                 "must use only one of ")
+})
+
+test_that("error generated if row_number and ott_id are provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, row_number = 1,
+                                               ott_id = 5004030),
+                 "must use only one of")
+})
+
+test_that("error generated if ott_id and taxon_name are provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, taxon_name = "holothuria",
+                                               ott_id = 5004030),
+                 "must use only one of")
+})
+
+test_that("error generated if row_number is not numeric", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, row_number = TRUE),
+                 "must be a numeric")
+})
+
+test_that("error generated if ott_id is not numeric", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, ott_id = TRUE),
+                 "must look like a number")
+})
+
+test_that("error generated if taxon_name is not character", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, taxon_name = TRUE),
+                 "must be a character")
+})
+
+test_that("error generated if row_number if not one of the row", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, row_number = 10),
+                 "is not a valid row number")
+    expect_error(rotl:::check_args_match_names(rsp, row_number = 1.5),
+                 "is not a valid row number")
+    expect_error(rotl:::check_args_match_names(rsp, row_number = 0),
+                 "is not a valid row number")
+})
+
+test_that("error generated if invalid taxon_name", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, taxon_name = "echinodermata"),
+                 "Can't find")
+    expect_error(rotl:::check_args_match_names(rsp, taxon_name = NA_character_),
+                 "Can't find")
+})
+
+test_that("error generated if invalid ott id", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, ott_id = 66666),
+                 "Can't find")
+})
+
+test_that("error generated if more than 1 value for row_number is provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, row_number = c(1, 2, 3)),
+                 "You must supply a single element")
+})
+
+test_that("error generated if more than 1 value for taxon_name is provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, taxon_name = c("holothuria", "diadema")),
+                 "You must supply a single element")
+})
+
+
+test_that("error generated if more than 1 value for ott_id is provided", {
+    skip_on_cran()
+    expect_error(rotl:::check_args_match_names(rsp, ott_id = c(5004030, 4930522, 240396)),
+                 "only 1 element should be provided")
+})
+
+############################################################################
+## inspect.match_names                                                    ##
+############################################################################
+
+context("inspect.match_names")
+
+ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+     rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia"))
+     expect_warning(rsp_na <- tnrs_match_names(names = c("diadema", "fluffy",
+                                                         "hemichordata", "escherichia")))
+     diadema_ids <- c(4930522, 631176, 643831)
+ }
+
+
+test_that("correct data is being returned when asked to lookup by taxon name", {
+    skip_on_cran()
+    tt <- inspect(rsp, taxon_name = "diadema")[["ott_id"]]
+    expect_true(all(tt %in% diadema_ids))
+})
+
+test_that("correct data is being returned when asked to lookup by ott_id", {
+    skip_on_cran()
+    tt <- inspect(rsp, ott_id = 631176)[["ott_id"]]
+    expect_true(all(tt %in% diadema_ids))
+})
+
+test_that("correct data is being returned when asked to lookup by row number", {
+    skip_on_cran()
+    tt <- inspect(rsp, row_number = 2)[["ott_id"]]
+    expect_true(all(tt %in% diadema_ids))
+})
+
+## with missing data
+
+test_that("correct data is being returned when asked to lookup by taxon name (with missing data)", {
+    skip_on_cran()
+    tt <- inspect(rsp_na, taxon_name = "diadema")[["ott_id"]]
+    expect_true(all(tt %in% diadema_ids))
+    expect_true(is.na(inspect(rsp_na, taxon_name = "fluffy")[["ott_id"]]))
+})
+
+test_that("correct data is being returned when asked to lookup by ott_id (with missing data)", {
+    skip_on_cran()
+    tt <- inspect(rsp_na, ott_id = 631176)[["ott_id"]]
+    expect_true(all(tt %in% diadema_ids))
+})
+
+test_that("correct data is being returned when asked to lookup by row number (with missing data)", {
+    skip_on_cran()
+    tt <- inspect(rsp_na, row_number = 1)[["ott_id"]]
+    expect_true(all(tt %in% diadema_ids))
+    expect_true(is.na(inspect(rsp_na, row_number = 2)[["ott_id"]]))
+})
+
+
+
+
+############################################################################
+## synonyms.match_names                                                   ##
+############################################################################
+
+context("list_synonym_match_names")
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    tax_rsp <- c("Holothuria", "Diadema", "Fromia")
+    rsp <- tnrs_match_names(names = tax_rsp)
+    tax_rsp_na <- c("Holothuria", "Diadema", "fluffy", "Fromia")
+    expect_warning(rsp_na <- tnrs_match_names(names = tax_rsp_na))
+}
+
+
+test_that("synonyms", {
+    skip_on_cran()
+    tt <- synonyms(rsp)
+    expect_true(inherits(tt, "list"))
+    expect_equal(names(tt),
+                 c("Holothuria", "Diadema (genus in Holozoa)", "Fromia"))
+})
+
+
+test_that("correct synonyms are being returned when asked to look up by taxon name", {
+    skip_on_cran()
+    tt <- synonyms(rsp, taxon_name = "holothuria")
+    expect_true(any(grepl("^Holothuria", names(tt))))
+})
+
+test_that("holothuria is present in each element of the list", {
+    skip_on_cran()
+    tt <- synonyms(rsp, taxon_name = "holothuria")
+    expect_true(all(sapply(tt, function(x) any(grepl("holothuria", x, ignore.case = TRUE)))))
+    expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
+})
+
+test_that("correct synonyms are being returned when asked to look up by row number", {
+    skip_on_cran()
+    tt <- synonyms(rsp, row_number = 1)
+    expect_true(any(grepl("^Holothuria", names(tt))))
+    expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
+
+})
+
+
+test_that("correct synonyms are being returned when asked to look up by ott id", {
+    skip_on_cran()
+    tt <- synonyms(rsp, ott_id = 5004030)
+    expect_true(any(grepl("^Holothuria", names(tt))))
+    expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
+})
+
+## with missing data
+
+test_that("synonyms", {
+    skip_on_cran()
+    tt <- synonyms(rsp_na)
+    expect_true(inherits(tt, "list"))
+    expect_equal(names(tt),
+                 c("Holothuria", "Diadema (genus in Holozoa)", "Fromia"))
+})
+
+
+test_that("correct synonyms are being returned when asked to look up by taxon name", {
+    skip_on_cran()
+    tt <- synonyms(rsp_na, taxon_name = "holothuria")
+    expect_true(any(grepl("^Holothuria", names(tt))))
+    expect_true(is.na(synonyms(rsp_na, taxon_name = "fluffy")[[1]]))
+})
+
+
+test_that("correct synonyms are being returned when asked to look up by row number", {
+    skip_on_cran()
+    tt <- synonyms(rsp_na, row_number = 1)
+    expect_true(any(grepl("^Holothuria", names(tt))))
+    expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
+    expect_true(is.na(synonyms(rsp_na, row_number = 3)[[1]]))
+})
+
+
+test_that("correct synonyms are being returned when asked to look up by ott id", {
+    skip_on_cran()
+    tt <- synonyms(rsp_na, ott_id = 5004030)
+    expect_true(any(grepl("^Holothuria", names(tt))))
+    expect_true(any(grepl("Halodeima", tt[["Holothuria"]])))
+})
+
+
+############################################################################
+## update.match_names                                                     ##
+############################################################################
+
+context("update.match_names")
+
+ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+     rsp <- tnrs_match_names(names = c("holothuria", "diadema", "fromia"))
+ }
+
+test_that("error message if missing both new arguments", {
+    skip_on_cran()
+    expect_error(update(rsp, row_number = 1),
+                 "You must specify either")
+})
+
+test_that("error message if both new arguments are provided", {
+    skip_on_cran()
+    expect_error(update(rsp, row_number = 1,
+                        new_row_number = 1,
+                        new_ott_id = 6666),
+                 "You must use only")
+})
+
+test_that("error message if wrong new row number provided", {
+    skip_on_cran()
+    expect_error(update(rsp, row_number = 1,
+                        new_row_number = 10),
+                 "is not a valid row number")
+    expect_error(update(rsp, row_number = 1,
+                        new_row_number = 1.5),
+                 "is not a valid row number")
+})
+
+test_that("error message if wrong new ott id provided", {
+    skip_on_cran()
+    expect_error(update(rsp, row_number = 1,
+                        new_ott_id = 66666),
+                 "Can't find")
+})
+
+test_that("it works correctly when providing a new row number", {
+    skip_on_cran()
+    new_rsp <- update(rsp, row_number = 2,
+                      new_row_number = 2)
+    expect_equal(new_rsp[new_rsp$search_string == "diadema", "ott_id"],
+                 "4930522")
+})
+
+
+test_that("it works correctly when providing a new ott id", {
+    skip_on_cran()
+    new_rsp <- update(rsp, row_number = 2,
+                      new_ott_id = 4930522)
+    expect_equal(new_rsp[new_rsp$search_string == "diadema", "ott_id"],
+                 "4930522")
+})
+
+test_that("it produces warning when trying to update with unmatched name", {
+    skip_on_cran()
+    expect_warning(new_rsp <- update(rsp_na, row_number = 3, new_row_number = 1))
+    expect_identical(new_rsp, rsp_na)
+
+})
+
+
+############################################################################
+## flags method                                                           ##
+############################################################################
+
+context("flags method for class match_names")
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    tax_rsp <- c("Tyrannosaurus", "Helicoplacus", "Ctenocystis",
+                 "Holothuria", "Echinoidea")
+    rsp <- tnrs_match_names(tax_rsp)
+}
+
+test_that("flags with no arguments", {
+    skip_on_cran()
+    flags_rsp <- flags(rsp)
+    expect_equal(length(flags_rsp), 5)
+    expect_equivalent(sapply(flags_rsp, length),
+                      c(1, 3, 2, 0, 0))
+})
+
+test_that("flags with row number", {
+    skip_on_cran()
+    flags_rsp <- flags(rsp, 1)
+    expect_true(inherits(flags_rsp, "list"))
+    expect_equal(length(flags_rsp), 1)
+    expect_equal(length(flags_rsp[[1]]), 1)
+    expect_true(inherits(flags_rsp[[1]], "character"))
+    expect_equal(names(flags_rsp), tax_rsp[1])
+})
+
+test_that("flags with taxon name", {
+    skip_on_cran()
+    flags_rsp <- flags(rsp, taxon_name = "Tyrannosaurus")
+    expect_true(inherits(flags_rsp, "list"))
+    expect_equal(length(flags_rsp), 1)
+    expect_equal(length(flags_rsp[[1]]), 1)
+    expect_true(inherits(flags_rsp[[1]], "character"))
+    expect_equal(names(flags_rsp), tax_rsp[1])
+})
+
+test_that("flags with ott id", {
+    skip_on_cran()
+    flags_rsp <- flags(rsp, ott_id = 664348)
+    expect_true(inherits(flags_rsp, "list"))
+    expect_equal(length(flags_rsp), 1)
+    expect_equal(length(flags_rsp[[1]]), 1)
+    expect_true(inherits(flags_rsp[[1]], "character"))
+    expect_equal(names(flags_rsp), tax_rsp[1])
+})
+
+
+############################################################################
+## ott_id method                                                          ##
+############################################################################
+
+context("ott_id method for class match_names")
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    tax_rsp <- c("Tyrannosaurus", "Helicoplacus", "Ctenocystis",
+                 "Holothuria", "Echinoidea")
+    rsp <- tnrs_match_names(tax_rsp)
+}
+
+test_that("ott_id with no arguments", {
+    skip_on_cran()
+    expect_true(inherits(ott_id(rsp), "list"))
+    expect_true(inherits(ott_id(rsp), "otl_ott_id"))
+    expect_equal(names(ott_id(rsp)), tax_rsp)
+    expect_equal(ott_id(rsp)[["Holothuria"]][[1]], 5004030)
+})
+
+
+test_that("ott_id with row number", {
+    skip_on_cran()
+    expect_equal(length(ott_id(rsp, 4)), 1)
+    expect_true(inherits(ott_id(rsp, 4), "list"))
+    expect_equivalent(ott_id(rsp, 4)[[1]], 5004030)
+})
+
+test_that("ott_id with taxon name", {
+    skip_on_cran()
+    expect_equal(length(ott_id(rsp, taxon_name = "Holothuria")), 1)
+    expect_true(inherits(ott_id(rsp, taxon_name = "Holothuria"), "list"))
+    expect_equivalent(ott_id(rsp, taxon_name = "Holothuria")[[1]], 5004030)
+})
+
+test_that("ott_id with ott id", {
+    skip_on_cran()
+    expect_equal(length(ott_id(rsp, ott_id=5004030)), 1)
+    expect_true(inherits(ott_id(rsp, ott_id=5004030), "list"))
+    expect_equivalent(ott_id(rsp, ott_id=5004030)[[1]], 5004030)
+})
diff --git a/tests/testthat/test-studies.R b/tests/testthat/test-studies.R
new file mode 100644
index 0000000..1b7039f
--- /dev/null
+++ b/tests/testthat/test-studies.R
@@ -0,0 +1,484 @@
+context("test of studies")
+
+############################################################################
+## studies_properties                                                     ##
+############################################################################
+
+test_that("studies_properties is a list with 2 elements (if breaks, need to update documentation)", {
+    skip_on_cran()
+    expect_true(all(names(studies_properties() %in% c("tree_properties", "study_properties"))))
+})
+
+
+############################################################################
+## get_study                                                              ##
+############################################################################
+
+test_that("get_study returns an error when asking for a study that doesn't exist", {
+    skip_on_cran()
+    expect_error(get_study("tt_666666"))
+})
+
+test_that("get_study generates a phylo object", {
+    skip_on_cran()
+    tr <- get_study("pg_719", object_format = "phylo")
+    expect_true(inherits(tr, "multiPhylo"))
+    expect_equal(length(tr), 3)
+    expect_true(length(tr[[1]]$tip.label) > 1)
+})
+
+test_that("get_study returns an error if file is specied but file_format is not", {
+    skip_on_cran()
+    expect_error(get_study("pg_719", file = "test"),
+                 "must be specified")
+})
+
+test_that("get_study generates a nexml object", {
+    skip_on_cran()
+    tr <- get_study("pg_719", object_format = "nexml")
+    expect_true(inherits(tr, "nexml"))
+})
+
+test_that("get_study generates a newick file", {
+    skip_on_cran()
+    ff <- tempfile()
+    tr <- get_study("pg_719", file_format = "newick", file = ff)
+    expect_true(tr)
+    expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study generates a nexus file", {
+    skip_on_cran()
+    ff <- tempfile()
+    tr <- get_study("pg_719", file_format = "nexus", file = ff)
+    expect_true(tr)
+    expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study generates a nexml file", {
+    skip_on_cran()
+    ff <- tempfile()
+    tr <- get_study("pg_719", file_format = "nexml", file = ff)
+    expect_true(tr)
+    expect_true(grepl("^<\\?xml", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study generates a json file", {
+    skip_on_cran()
+    ff <- tempfile()
+    tr <- get_study("pg_719", file_format = "json", file = ff)
+    expect_true(tr)
+    expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE)))
+})
+
+
+
+############################################################################
+## get_study_tree                                                         ##
+############################################################################
+
+test_that("get_study_tree returns error when tree doesn't exist", {
+    skip_on_cran()
+    expect_error(get_study_tree("2655", "tree5555"))
+})
+
+test_that("get_study_tree returns error when study doesn't exist", {
+    skip_on_cran()
+    expect_error(get_study_tree("5555555", "tree555555"))
+})
+
+
+test_that("get_study_tree generates nexus file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".nex")
+    tt <- get_study_tree("pg_1144", "tree2324", file_format = "nexus",
+                         file = ff)
+    expect_true(tt)
+    expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study_tree generates newick file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".tre")
+    tt <- get_study_tree("pg_1144", "tree2324", file_format = "newick",
+                         file = ff)
+    expect_true(tt)
+    expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study_tree generates json file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".json")
+    tt <- get_study_tree("pg_1144", "tree2324", file_format = "json",
+                         file = ff)
+    expect_true(tt)
+    expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study_tree returns a phylo object", {
+    skip_on_cran()
+    tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo")
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+})
+
+### Test types of labels with phylo objects
+
+test_that("get_study_tree returns a phylo object and ott_id for tip labels", {
+    skip_on_cran()
+    tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo",
+                         tip_label = "ott_id")
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+    expect_true(grepl("^[0-9]+$", tt$tip.label[1]))
+})
+
+test_that("get_study_tree returns a phylo object and ott_taxon_names for tip labels", {
+    skip_on_cran()
+    tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo",
+                         tip_label = "ott_taxon_name")
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+    expect_true(sum(!grepl("^[A-Za-z]+(_[a-z]+)?$", tt$tip.label)) < 3)
+})
+
+test_that("get_study_tree returns a phylo object and original labels for tip labels", {
+    skip_on_cran()
+    tt <- get_study_tree("pg_1144", "tree2324", object_format = "phylo",
+                         tip_label = "original_label")
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+    expect_equal(sum(!grepl("^[A-Za-z]+_[a-z]+$", tt$tip.label)), 45)
+})
+
+### Test types of labels with files (skipping json for now because there is no good way of doing it)
+
+test_that("get_study_tree returns an error if file is given but file format is not", {
+    skip_on_cran()
+    expect_error(get_study_tree(study_id="pg_1144", tree="tree2324", file = "test"),
+                 "must be specified")
+})
+
+test_that("get_study_tree returns nexus file and ott_id for tip labels", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".nex")
+    tt <- get_study_tree("pg_1144", "tree2324", file_format = "nexus",
+                         tip_label = "ott_id", file = ff)
+    expect_true(tt)
+    tr <- rncl::read_nexus_phylo(ff)
+    expect_true(length(tr$tip.label) > 1)
+    expect_true(grepl("^[0-9]+$", tr$tip.label[1]))
+})
+
+test_that("get_study_tree returns a phylo object and ott_taxon_names for tip labels", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".tre")
+    tt <- get_study_tree("pg_1144", "tree2324", file_format = "newick",
+                         tip_label = "ott_taxon_name", file = ff)
+    expect_true(tt)
+    tr <- rncl::read_newick_phylo(ff)
+    expect_true(length(tr$tip.label) > 1)
+    expect_true(sum(!grepl("^[A-Za-z]+(_[a-z]+)?$", tr$tip.label)) < 3)
+})
+
+
+
+############################################################################
+## get_study_subtree                                                      ##
+############################################################################
+
+test_that("get_study_subtree returns an error when study_id doesn't exist", {
+        skip_on_cran()
+        expect_error(get_study_subtree("pg_55555", "tree55555", subtree_id = "node555555"))
+})
+
+test_that("get_study_subtree returns an error when tree_id doesn't exist", {
+    skip_on_cran()
+    expect_error(get_study_subtree("pg_1144", "tree55555", subtree_id = "node555555"))
+})
+
+## API still returns object
+## test_that("get_study_subtree returns an error when the subtree_id is invalid",
+##           expect_error(get_study_subtree("pg_1144", "tree2324", "foobar")))
+
+test_that("get_study_subtree returns a phylo object", {
+    skip_on_cran()
+    tt <- get_study_subtree("pg_1144", "tree2324", subtree_id = "ingroup",
+                            object_format = "phylo")
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+})
+
+test_that("get_study_subtree fails if file name is given but no file format", {
+    skip_on_cran()
+    expect_error(get_study_subtree("pg_1144", "tree2324", subtree_id = "ingroup",
+                                   file = "test"), "must be specified")
+})
+
+test_that("get_study_subtree returns a nexus file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".nex")
+    tt <- get_study_subtree("pg_1144", "tree2324", subtree_id = "ingroup",
+                            file_format = "nexus", file = ff)
+    expect_true(tt)
+    expect_true(grepl("^#NEXUS", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study_subtree returns a newick file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".tre")
+    tt <- get_study_subtree("pg_1144", "tree2324", subtree_id = "ingroup",
+                            file_format = "newick", file = ff)
+    expect_true(tt)
+    expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("get_study_subtree returns a json file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".json")
+    tt <- get_study_subtree("pg_1144", "tree2324", subtree_id = "ingroup",
+                            file_format = "json", file = ff)
+    expect_true(tt)
+    expect_true(grepl("^\\{", readLines(ff, n = 1, warn = FALSE)))
+})
+
+
+############################################################################
+## get_study_meta                                                         ##
+############################################################################
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    sm <- get_study_meta("pg_719")
+}
+
+test_that("get_study meta returns a study_meta object", {
+    skip_on_cran()
+    expect_true(inherits(sm, "study_meta"))
+})
+
+test_that("get_tree_ids method for study_meta", {
+    skip_on_cran()
+    expect_equal(get_tree_ids(sm), c("tree1294", "tree1295", "tree1296"))
+})
+
+test_that("get_publication method for study_meta", {
+    skip_on_cran()
+    expect_equal(attr(get_publication(sm), "DOI"), "http://dx.doi.org/10.1600/036364411X605092")
+})
+
+test_that("candidate_for_synth method for study_meta", {
+    skip_on_cran()
+    expect_true(candidate_for_synth(sm) %in% get_tree_ids(sm))
+})
+
+test_that("get_study_year method for study_meta", {
+     skip_on_cran()
+     expect_equal(get_study_year(sm), 2011)
+ })
+
+############################################################################
+## tol_about                                                              ##
+############################################################################
+
+test_that("tol_about returns class tol_summary", {
+    skip_on_cran()
+    expect_true(inherits(tol_about(), "tol_summary"))
+})
+
+test_that("study_about", {
+    skip_on_cran()
+    ta <- source_list(tol_about(TRUE))
+    expect_true(inherits(ta, "data.frame"))
+    expect_true(nrow(ta) > 100)
+    expect_equal(names(ta), c("study_id","tree_id", "git_sha"))
+})
+
+############################################################################
+## studies_find_studies                                                   ##
+############################################################################
+
+test_that("single study detailed=TRUE", {
+              skip_on_cran()
+              res <- studies_find_studies(property = "ot:studyId",
+                                          value = "ot_248", detailed = TRUE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_true(all(names(res) %in% c("study_ids", "n_trees", "tree_ids",
+                                                "candidate", "study_year", "title",
+                                                "study_doi")))
+              expect_true(nrow(res) >= 1L)
+              expect_equal(res[["study_ids"]], "ot_248")
+              expect_equal(res[["n_trees"]], "1")
+              expect_equal(res[["candidate"]], "Tr76302")
+              expect_equal(res[["study_year"]], "2014")
+              expect_equal(res[["study_doi"]], "http://dx.doi.org/10.1016/j.cub.2014.06.060")
+              expect_equal(res[["title"]], "'Phylogenomic Resolution of the Class Ophiuroidea Unlocks a Global Microfossil Record'")
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+})
+
+test_that("single study detailed=FALSE", {
+              skip_on_cran()
+              res <- studies_find_studies(property = "ot:studyId",
+                                          value = "ot_248", detailed = FALSE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "study_ids"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_match(attr(res, "found_trees"), "list of the trees associated")
+              expect_equal(names(res), "study_ids")
+              expect_equal(res[1, 1], "ot_248")
+              expect_equal(nrow(res), 1L)
+              expect_equal(ncol(res), 1L)
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+test_that("multiple studies detailed=TRUE", {
+              skip_on_cran()
+              res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName",
+                                          value = "Aves", detailed = TRUE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_true(all(names(res) %in% c("study_ids", "n_trees", "tree_ids",
+                                                "candidate", "study_year",
+                                                "title", "study_doi")))
+              expect_true(nrow(res) >= 8L)
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+test_that("multiple studies detailed=FALSE", {
+              skip_on_cran()
+              res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName",
+                                          value = "Aves", detailed = FALSE)
+              expect_true(inherits(res, "study_ids"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_true(inherits(res, "data.frame"))
+              expect_equal(ncol(res), 1L)
+              expect_true(nrow(res) >= 8)
+              expect_equal(names(res), "study_ids")
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+
+############################################################################
+## studies_find_trees                                                     ##
+############################################################################
+
+test_that("studies_find_trees single study detailed=FALSE", {
+              skip_on_cran()
+              res <- studies_find_trees(property = "ot:studyId",
+                                        value = "ot_248", detailed = FALSE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_match(attr(res, "found_trees")[[1]], "Tr76302")
+              expect_equal(names(res), c("study_ids",
+                                         "n_matched_trees",
+                                         "match_tree_ids"))
+              expect_equal(res[1, 1], "ot_248")
+              expect_equal(nrow(res), 1L)
+              expect_equal(ncol(res), 3L)
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+test_that("studies_find_trees single study detailed=TRUE", {
+              skip_on_cran()
+              res <- studies_find_trees(property = "ot:studyId",
+                                        value = "ot_248", detailed = TRUE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_equal(names(res), c("study_ids", "n_trees",
+                                         "tree_ids", "candidate",
+                                         "study_year", "title",
+                                         "study_doi",
+                                         "n_matched_trees",
+                                         "match_tree_ids"))
+              expect_equal(nrow(res), 1L)
+              expect_equal(res[["study_ids"]], "ot_248")
+              expect_equal(res[["n_trees"]], "1")
+              expect_equal(res[["candidate"]], "Tr76302")
+              expect_equal(res[["study_year"]], "2014")
+              expect_equal(res[["study_doi"]], "http://dx.doi.org/10.1016/j.cub.2014.06.060")
+              expect_equal(res[["title"]], "'Phylogenomic Resolution of the Class Ophiuroidea Unlocks a Global Microfossil Record'")
+              expect_equal(res[["tree_ids"]], "Tr76302")
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+test_that("studies_find_trees multiple studies detailed=TRUE", {
+              skip_on_cran()
+              res <- studies_find_trees(property = "ot:ottTaxonName",
+                                        value = "Echinodermata", detailed = TRUE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_equal(names(res), c("study_ids", "n_trees",
+                                         "tree_ids", "candidate",
+                                         "study_year", "title",
+                                         "study_doi",
+                                         "n_matched_trees",
+                                         "match_tree_ids"))
+              expect_true(nrow(res) >= 5L)
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+test_that("studies_find_trees multiple studies detailed=FALSE", {
+              skip_on_cran()
+              res <- studies_find_trees(property = "ot:ottTaxonName",
+                                        value = "Echinodermata", detailed = FALSE)
+              expect_true(inherits(res, "data.frame"))
+              expect_true(inherits(res, "matched_studies"))
+              expect_equal(names(res), c("study_ids",
+                                         "n_matched_trees",
+                                         "match_tree_ids"))
+              expect_true(nrow(res) >= 5L)
+              expect_true(length(attr(res, "metadata")) > 0)
+              expect_true(length(attr(res, "found_trees")) > 0)
+          })
+
+
+############################################################################
+## list_trees                                                             ##
+############################################################################
+
+test_that("list_trees with studies_find_studies and detailed = FALSE", {
+              skip_on_cran()
+              expect_match(list_trees(studies_find_studies(
+                                          property = "ot:focalCladeOTTTaxonName",
+                                          value = "Aves", detailed = FALSE)),
+                           "If you want to get a list of the trees associated with the studies")
+          })
+
+test_that("list_trees with studies_find_studies and detailed = TRUE",  {
+              skip_on_cran()
+              res <- studies_find_studies(property = "ot:focalCladeOTTTaxonName",
+                                          value = "Aves", detailed = TRUE)
+              expect_true(inherits(list_trees(res), "list"))
+              expect_true(length(list_trees(res)) >= 8)
+              expect_true(sum(names(list_trees(res)) %in% c("pg_435", "ot_428",
+                                                            "pg_420", "ot_429",
+                                                            "ot_214", "ot_117",
+                                                            "ot_116", "pg_2799")) >= 8)
+          })
+
+test_that("list_trees with studies_find_trees and detailed=FALSE", {
+              skip_on_cran()
+              res <- studies_find_trees(property = "ot:ottTaxonName",
+                                        value = "Echinodermata", detailed = FALSE)
+              lt <- list_trees(res)
+              expect_true(inherits(lt, "list"))
+              expect_true(length(names(lt)) >=  5L)
+              expect_true(all(sapply(lt, length) >=  1L))
+          })
+
+test_that("list_trees with studies_find_trees and detailed=TRUE", {
+              skip_on_cran()
+              res <- studies_find_trees(property = "ot:ottTaxonName",
+                                        value = "Echinodermata", detailed = TRUE)
+              lt <- list_trees(res)
+              expect_true(inherits(lt, "list"))
+              expect_true(length(names(lt)) >=  5L)
+              expect_true(all(sapply(lt, length) >=  1L))
+          })
diff --git a/tests/testthat/test-taxonomy.R b/tests/testthat/test-taxonomy.R
new file mode 100644
index 0000000..1916acb
--- /dev/null
+++ b/tests/testthat/test-taxonomy.R
@@ -0,0 +1,341 @@
+context("taxonomy")
+
+############################################################################
+## taxonomy about                                                         ##
+############################################################################
+
+test_that("taxonomy_about is a list", {
+    skip_on_cran()
+    tt <- taxonomy_about()
+    expect_true(inherits(tt, "list"))
+})
+
+test_that("taxonomy_about has the names listed in documentation (if it breaks update documentation)", {
+    skip_on_cran()
+    tt <- taxonomy_about()
+    expect_true(all(names(tt) %in% c("weburl", "author", "name", "source", "version")))
+})
+
+
+############################################################################
+## taxon Info                                                             ##
+############################################################################
+
+test_that("taxonomy taxon info", {
+    skip_on_cran()
+    tid <- 515698
+    tt <- taxonomy_taxon_info(tid)
+    expect_equal(tt[[1]][["ott_id"]], tid)
+    expect_true(inherits(tt, "taxon_info"))
+})
+
+test_that("taxonomy with include_lineage=TRUE", {
+    skip_on_cran()
+    tt <- taxonomy_taxon_info(515698, include_lineage = TRUE)
+    expect_true(exists("lineage", tt[[1]]))
+    expect_true(length(tt[[1]]$lineage) > 1)
+})
+
+test_that("taxonomy with include_lineage=FALSE", {
+    skip_on_cran()
+    tt <- taxonomy_taxon_info(515698, include_lineage = FALSE)
+    expect_false(exists("lineage", tt[[1]]))
+})
+
+test_that("taxonomy with include_terminal_descendants=TRUE", {
+    skip_on_cran()
+    tt <- taxonomy_taxon_info(515698, include_terminal_descendants = TRUE)
+    expect_true(exists("terminal_descendants", tt[[1]]))
+    expect_true(length(tt[[1]][["terminal_descendants"]]) > 1)
+})
+
+test_that("taxonomy with include_terminal_descendants=FALSE", {
+    skip_on_cran()
+    tt <- taxonomy_taxon_info(515698, include_terminal_descendants = FALSE)
+    expect_false(exists("terminal_descendants", tt[[1]]))
+})
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    tid <- c(5004030, 337928, 631176)
+    tax_info <- taxonomy_taxon_info(tid)
+}
+
+test_that("taxonomy_taxon tax_rank method", {
+    skip_on_cran()
+    expect_true(inherits(tax_rank(tax_info),
+                         c("otl_tax_rank", "list")))
+    expect_equal(names(tax_rank(tax_info)),
+                 c("Holothuria", "Acanthaster",
+                   "Diadema (genus in Holozoa)"))
+    expect_equal(unlist(unname(tax_rank(tax_info))),
+                 rep("genus", 3))
+})
+
+test_that("taxonomy_taxon ott_taxon_name method", {
+    skip_on_cran()
+    expect_true(inherits(tax_name(tax_info),
+                         c("otl_tax_info", "list")))
+    expect_equal(names(tax_name(tax_info)),
+                 c("Holothuria", "Acanthaster",
+                   "Diadema (genus in Holozoa)"))
+    expect_equal(unlist(unname(tax_name(tax_info))),
+                 c("Holothuria", "Acanthaster", "Diadema"))
+})
+
+test_that("taxonomy_taxon synonyms method", {
+    skip_on_cran()
+    expect_true(inherits(synonyms(tax_info),
+                         c("otl_synonyms", "list")))
+    expect_equal(names(synonyms(tax_info)),
+                 c("Holothuria", "Acanthaster",
+                   "Diadema (genus in Holozoa)"))
+    expect_true(all(c("Diamema", "Centrechinus") %in%
+                    synonyms(tax_info)[[3]]))
+})
+
+test_that("taxonomy_taxon is_suppressed method", {
+    skip_on_cran()
+    expect_true(inherits(is_suppressed(tax_info),
+                         c("otl_is_suppressed", "list")))
+    expect_equal(names(is_suppressed(tax_info)),
+                 c("Holothuria", "Acanthaster",
+                   "Diadema (genus in Holozoa)"))
+    expect_equal(unlist(unname(is_suppressed(tax_info))),
+                 c(FALSE, FALSE, FALSE))
+})
+
+test_that("taxonomy_taxon flags method", {
+    skip_on_cran()
+    expect_true(inherits(flags(tax_info),
+                         c("otl_flags", "list")))
+    expect_equal(names(flags(tax_info)),
+                 c("Holothuria", "Acanthaster",
+                   "Diadema (genus in Holozoa)"))
+    expect_equal(unlist(unname(flags(tax_info))),
+                 NULL)
+})
+
+test_that("higher taxonomy method", {
+    skip_on_cran()
+    expect_error(tax_lineage(tax_info), "needs to be created")
+    lg <- tax_lineage(taxonomy_taxon_info(tid, include_lineage = TRUE))
+    expect_true(inherits(lg, "list"))
+    expect_true(inherits(lg[[1]], "data.frame"))
+    expect_true(all(names(lg[[1]]) %in% c("rank", "name", "unique_name", "ott_id")))
+    expect_true(any(grepl("no rank", lg[[1]][["rank"]])))
+    expect_true(any(grep("life", lg[[1]][["name"]])))
+})
+
+### ott_id() --------------------------------------------------------------------
+
+test_that("taxonomy_taxon_info with ott_id for tax_info", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tax_info),
+                 ott_id(taxonomy_taxon_info(ott_id(tax_info))))
+})
+
+test_that("taxonomy_subtree with ott_id for tax_info", {
+    skip_on_cran()
+    expect_error(taxonomy_subtree(ott_id = ott_id(tax_info)),
+                 "supply one")
+})
+
+test_that("tol_node_info with ott_id for tax_info", {
+    skip_on_cran()
+    expect_error(tol_node_info(ott_id(tax_info)),
+                 "provide a single")
+})
+
+test_that("tol_subtree with ott_id for tax_info", {
+    skip_on_cran()
+    expect_error(tol_subtree(ott_id = ott_id(tax_info)),
+                 "provide a single")
+})
+
+test_that("tol_mrca with ott_id for tax_info", {
+    skip_on_cran()
+    expect_equivalent(list("Euleutheroza" = 317277),
+                      ott_id(tol_mrca(ott_id(tax_info))))
+})
+
+test_that("tol_induced_subtree with ott_id for tax_info", {
+    skip_on_cran()
+    expect_true(inherits(tol_induced_subtree(ott_id(tax_info)),
+                         "phylo"))
+})
+
+test_that("taxonomy_mrca with ott_id for tax_info", {
+    skip_on_cran()
+    expect_equivalent(list("Euleutheroza" = 317277),
+                      ott_id(taxonomy_mrca(ott_id(tax_info))))
+})
+
+
+############################################################################
+## taxon subtree                                                          ##
+############################################################################
+
+test_that("taxonomy subtree raw output", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(515698, output_format = "raw")
+    expect_true(inherits(tt, "list"))
+    expect_identical(names(tt), "newick")
+})
+
+test_that("taxonomy subtree returns warning if file is provided with something else than newick output", {
+    skip_on_cran()
+    expect_warning(taxonomy_subtree(515698, output_format = "raw", file = "/foo/bar"),
+                   "ignored")
+})
+
+test_that("taxonomy subtree writes a 'valid' newick file", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".tre")
+    tt <- taxonomy_subtree(515698, output_format = "newick", file = ff)
+    expect_true(tt)
+    expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
+})
+
+test_that("taxonomy subtree returns a valid newick string", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(515698, output_format = "newick")
+    expect_true(inherits(ape::read.tree(text = tt), "phylo"))
+})
+
+test_that("taxonomy subtree returns a valid phylo object", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(515698, output_format = "phylo")
+    expect_true(inherits(tt, "phylo"))
+})
+
+test_that("taxonomy subtree returns valid internal node names", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(515698, output_format = "taxa")
+    expect_true(inherits(tt, "list"))
+    expect_equal(length(tt), 2)
+    expect_equal(length(tt$tip_label), 14)
+    expect_equal(length(tt$edge_label), 2)
+})
+
+test_that("taxonomy subtree works if taxa has only 1 descendant", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(ott_id = 3658331, output_format = "taxa")
+    expect_true(inherits(tt, "list"))
+    expect_equal(length(tt), 2)
+    expect_true(inherits(tt$tip_label, "character"))
+})
+
+############################################################################
+## taxonomic MRCA                                                         ##
+############################################################################
+
+ if (identical(Sys.getenv("NOT_CRAN"), "true"))  {
+     tax_mrca <- taxonomy_mrca(ott_id = c(515698, 590452, 643717))
+     tax_mrca_mono <- taxonomy_mrca(ott_id = c(79623, 962377))
+ }
+
+test_that("taxonomic most recent common ancestor", {
+    skip_on_cran()
+    expect_true(inherits(tax_mrca, "taxon_mrca"))
+    expect_true(inherits(tax_mrca, "list"))
+})
+
+test_that("mrca tax_rank method", {
+    skip_on_cran()
+    expect_equal(tax_rank(tax_mrca)[1],
+                 list("Asterales" = "order"))
+})
+
+test_that("mrca tax_name method", {
+    skip_on_cran()
+    expect_equal(tax_name(tax_mrca)[1],
+                 list("Asterales" = "Asterales"))
+})
+
+test_that("mrca ott_id method", {
+    skip_on_cran()
+    expect_equal(ott_id(tax_mrca)[1],
+                 list("Asterales" = 1042120))
+    expect_true(inherits(ott_id(tax_mrca), "otl_ott_id"))
+})
+
+test_that("mrca unique_name method", {
+    skip_on_cran()
+    expect_equal(unique_name(tax_mrca)[1],
+                 list("Asterales" = "Asterales"))
+    expect_true(inherits(unique_name(tax_mrca),
+                         "otl_unique_name"))
+})
+
+test_that("mrca tax_sources method", {
+    skip_on_cran()
+    expect_equal(tax_sources(tax_mrca)[1],
+                 list("Asterales" =
+                 c("ncbi:4209", "worms:234044",
+                   "gbif:414", "irmng:10011")))
+    expect_true(inherits(tax_sources(tax_mrca),
+                         "otl_tax_sources"))
+})
+
+test_that("mrca is_suppressed method", {
+    skip_on_cran()
+    expect_true(inherits(is_suppressed(tax_mrca),
+                         c("otl_is_suppressed", "list")))
+    expect_equal(is_suppressed(tax_mrca)[1],
+                 list("Asterales" = FALSE))
+})
+
+test_that("mrca flags method", {
+    skip_on_cran()
+    expect_true(inherits(flags(tax_mrca),
+                         c("otl_flags", "list")))
+    expect_equal(flags(tax_mrca)[1],
+                 list("Asterales" = NULL))
+})
+
+### ott_id() --------------------------------------------------------------------
+
+test_that("taxonomy_taxon_info with ott_id for tax_mrca", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tax_mrca_mono),
+                 ott_id(taxonomy_taxon_info(ott_id(tax_mrca_mono))))
+})
+
+test_that("taxonomy_subtree with ott_id for tax_mrca", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(ott_id = ott_id(tax_mrca_mono))
+    expect_true(length(tt[["tip_label"]]) > 10)
+    expect_true(length(tt[["edge_label"]]) > 1)
+})
+
+test_that("tol_node_info with ott_id for tax_mrca", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tax_mrca_mono),
+                 ott_id(tol_node_info(ott_id(tax_mrca_mono))))
+})
+
+test_that("tol_subtree with ott_id for tax_mrca", {
+    skip_on_cran()
+    tt <- tol_subtree(ott_id = ott_id(tax_mrca_mono))
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+    expect_true(length(tt$node.label) > 1)
+})
+
+test_that("tol_mrca with ott_id for tax_mrca", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tax_mrca_mono),
+                 ott_id(tol_mrca(ott_id(tax_mrca_mono))))
+})
+
+test_that("tol_induced_subtree with ott_id for tax_mrca", {
+    skip_on_cran()
+    expect_error(tol_induced_subtree(ott_id(tax_mrca_mono)),
+                 "least two valid")
+})
+
+test_that("taxonomy_mrca with ott_id for tax_mrca", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tax_mrca_mono),
+                      ott_id(taxonomy_mrca(ott_id(tax_mrca_mono))))
+})
diff --git a/tests/testthat/test-tnrs.R b/tests/testthat/test-tnrs.R
new file mode 100644
index 0000000..128b7e8
--- /dev/null
+++ b/tests/testthat/test-tnrs.R
@@ -0,0 +1,67 @@
+context("tnrs")
+
+############################################################################
+## tnrs_match_names                                                       ##
+############################################################################
+
+test_that("tnrs_match_names fails if incorrect context is provided", {
+    skip_on_cran()
+    expect_error(tnrs_match_names("felis", context_name = "Cats"),
+                 "Check possible values using tnrs_contexts")
+})
+
+test_that("tnrs_match_names fails if invalid name provided (nothing returned)", {
+    skip_on_cran()
+    expect_error(tnrs_match_names("fluffy", do_approximate_matching = FALSE),
+                 "No matches for any of the provided taxa")
+})
+
+test_that("tnrs_match_names warns if a name is not matched", {
+    skip_on_cran()
+    expect_warning(tnrs_match_names(c("fluffy", "felis"), do_approximate_matching = FALSE),
+                   "are not matched")
+})
+
+test_that("object returned by tnrs_match_names have the correct data type", {
+    skip_on_cran()
+    birds <- c("stercorarius parasiticus", "ficedula albicollis", "sterna dougallii")
+    taxa <- tnrs_match_names(birds, do_approximate_matching = FALSE)
+    expect_true(is.logical(taxa[["approximate_match"]]))
+    expect_true(is.logical(taxa[["is_synonym"]]))
+})
+
+test_that("tnrs_match_names deals correctly with non-exact matches", {
+    skip_on_cran()
+    birds <- c("stercorarius parasiticus", "ficedula albicollis", "sternadougallii")
+    expect_warning(taxa <- tnrs_match_names(birds, do_approximate_matching = FALSE),
+                   "are not matched")
+    expect_equal(nrow(taxa), 3L)
+    expect_equivalent(taxa[match("sternadougallii", taxa[["search_string"]]), ],
+                 list("sternadougallii", NA_character_, NA, NA_character_, NA, NA_character_, NA_character_))
+
+})
+
+## everything else is covered by the match_names + the API tests
+
+############################################################################
+## tnrs_contexts                                                          ##
+############################################################################
+
+test_that("tnrs_contexts", {
+    skip_on_cran()
+    tc <- tnrs_contexts()
+    expect_true(inherits(tc, "tnrs_contexts"))
+    expect_true(all(names(tc) %in% c("ANIMALS", "MICROBES", "FUNGI", "PLANTS", "LIFE")))
+})
+
+############################################################################
+## tnrs_infer_context                                                     ##
+############################################################################
+
+test_that("tnrs_infer_context", {
+    skip_on_cran()
+    tic <- tnrs_infer_context(c("Felis", "Leo"))
+    expect_equal(tic[["context_name"]], "Mammals")
+    expect_equal(tic[["context_ott_id"]], 244265)
+    expect_equal(tic[["ambiguous_names"]][[1]], "leo")
+})
diff --git a/tests/testthat/test-tol.R b/tests/testthat/test-tol.R
new file mode 100644
index 0000000..095e86b
--- /dev/null
+++ b/tests/testthat/test-tol.R
@@ -0,0 +1,456 @@
+############################################################################
+## tol_about                                                              ##
+############################################################################
+
+context("test tol_about (and in turn print.tol_summary)")
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    req <- tol_about(include_source_list = TRUE)
+}
+
+test_that("Names in object returned are correct/match the docs", {
+    skip_on_cran()
+    expect_true(all(names(req) %in%
+                    c("source_list", "date_created", "root", "num_source_trees",
+                      "taxonomy_version", "num_source_studies",
+                      "filtered_flags", "synth_id", "source_id_map")))
+    expect_true(all(names(req$root) %in%
+                    c("taxon", "num_tips", "node_id")))
+    expect_true(all(names(req$root$taxon) %in%
+                    c("tax_sources", "name", "unique_name", "rank", "ott_id")))
+    expect_true(all(names(source_list(req)) %in% c("study_id",
+                                                   "tree_id",
+                                                   "git_sha")))
+    expect_error(source_list(tol_about(include_source_list = FALSE)),
+                 "has been created using")
+    expect_true(nrow(source_list(req)) > 1)
+    expect_true(all(grepl("^(ot|pg)", source_list(req)[["study_id"]])))
+    expect_true(all(grepl("^tr", source_list(req)[["tree_id"]], ignore.case = TRUE)))
+})
+
+
+
+test_that("tol_node tax_rank method", {
+    skip_on_cran()
+    expect_true(inherits(tax_rank(req),
+                         c("otl_rank", "list")))
+    expect_equal(tax_rank(req)[[1]], "no rank")
+})
+
+test_that("tol_node ott_id method", {
+    skip_on_cran()
+    expect_true(inherits(ott_id(req),
+                         c("otl_ott_id", "list")))
+    expect_equal(ott_id(req)[[1]], 93302)
+    expect_equal(names(ott_id(req)), "cellular organisms")
+})
+
+test_that("tol_node tax_sources", {
+    skip_on_cran()
+    expect_true(inherits(tax_sources(req),
+                         c("otl_tax_sources", "list")))
+    expect_true(any(grepl("ncbi", tax_sources(req)[[1]])))
+    expect_equal(names(tax_sources(req)), "cellular organisms")
+})
+
+test_that("tol_node unique_name", {
+    skip_on_cran()
+    expect_true(inherits(unique_name(req),
+                         c("otl_unique_name", "list")))
+    expect_equal(unique_name(req)[[1]], "cellular organisms")
+    expect_equal(names(unique_name(req)), "cellular organisms")
+})
+
+test_that("tol_node tax_name", {
+    skip_on_cran()
+    expect_true(inherits(tax_name(req),
+                         c("otl_name", "list")))
+    expect_equal(tax_name(req)[[1]], "cellular organisms")
+    expect_equal(names(tax_name(req)), "cellular organisms")
+})
+
+### ott_id() --------------------------------------------------------------------
+
+test_that("taxonomy_taxon_info with ott_id for tol_about", {
+    skip_on_cran()
+    expect_equal(ott_id(req),
+                 ott_id(taxonomy_taxon_info(ott_id(req))))
+})
+
+## can't do that, it's pulling the whole tree
+## test_that("taxonomy_subtree with ott_id for tol_about", {
+##     taxonomy_subtree(ott_id = ott_id(req))
+## })
+
+test_that("tol_node_info with ott_id for tol_about", {
+    skip_on_cran()
+    expect_equal(ott_id(req),
+                 ott_id(tol_node_info(ott_id(req))))
+})
+
+## can't do that, it's pulling the whole tree
+## test_that("tol_subtree with ott_id for tol_about", {
+##     tol_subtree(ott_id = ott_id(req))
+## })
+
+test_that("tol_mrca with ott_id for tol_about", {
+    skip_on_cran()
+    expect_equal(ott_id(req)[1],
+                 ott_id(tol_mrca(ott_id(req)))[1])
+})
+
+test_that("tol_induced_subtree with ott_id for tol_about", {
+    skip_on_cran()
+    expect_error(tol_induced_subtree(ott_id(req)),
+                 "least two valid")
+})
+
+test_that("taxonomy_mrca with ott_id for tol_about", {
+    skip_on_cran()
+    expect_equal(ott_id(req),
+                 ott_id(taxonomy_mrca(ott_id(req))))
+})
+
+############################################################################
+## tol_subtree                                                            ##
+############################################################################
+
+context("test tol_subtree")
+
+test_that("tol_subtree fails if ott_id is invalid", {
+    skip_on_cran()
+    expect_error(tol_subtree(ott_id = 6666666))
+})
+
+test_that("tol_subtree fails if more than one ott_id is provided", {
+    skip_on_cran()
+    expect_error(tol_subtree(ott_id = c(666666, 6666667)),
+                 "Please provide a single")
+})
+
+test_that("tol_subtree fails if ott_id doesn't look like a number", {
+    skip_on_cran()
+    expect_error(tol_subtree(ott_id = "111A1111"),
+                 "must look like numbers")
+})
+
+test_that("tol_subtree returns a phylo object by default", {
+    skip_on_cran()
+    expect_true(inherits(tol_subtree(ott_id = 81461), "phylo"))
+})
+
+test_that("tol_subtree returns a newick file when providing a file argument", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".tre")
+    tr <- tol_subtree(ott_id = 81461,  file = ff)
+    expect_true(tr)
+    expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
+})
+
+
+############################################################################
+## tol_induced_subtree                                                    ##
+############################################################################
+
+context("test tol_induced_subtree")
+
+test_that("warning for node ids that are not in TOL graph", {
+    skip_on_cran()
+    expect_error(tol_induced_subtree(ott_ids = c(357968, 867416, 939325, 9999999)),
+                   "not found")
+})
+
+test_that("error if ott_ids provided don't look like numbers", {
+    skip_on_cran()
+    expect_error(tol_induced_subtree(ott_ids = c("13242", "kitten")),
+                 "must look like numbers")
+})
+
+
+## test_that("warning for ott ids not in tree",
+##           ???)
+
+test_that("tol_induced_subtree generates a newick file when providing a file argument", {
+    skip_on_cran()
+    ff <- tempfile(fileext = ".tre")
+    tr <- tol_induced_subtree(ott_ids=c(292466, 267845, 666104), file = ff)
+    expect_true(tr)
+    expect_true(grepl("^\\(", readLines(ff, n = 1, warn = FALSE)))
+})
+
+
+############################################################################
+## tol_mrca                                                               ##
+############################################################################
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    birds <- tol_mrca(ott_ids = c(412129, 536234))
+    hol <- tol_mrca(c(431586, 957434))
+    mono <- tol_mrca(ott_ids = c(962377, 79623))
+}
+
+test_that("tol_mrca fails if ott_ids are not numbers", {
+    skip_on_cran()
+    expect_error(tol_mrca(ott_ids = c(13243, "a13415")),
+                 "must look like numbers")
+})
+
+test_that("tol_mrca returns a list", {
+    skip_on_cran()
+    expect_true(inherits(birds, "list"))
+    expect_true(inherits(birds, "tol_mrca"))
+    expect_true(all(names(birds) %in%
+                    c("mrca",
+                      "source_id_map",
+                      "nearest_taxon")))
+})
+
+test_that("methods for tol_mrca where the node is a taxon", {
+    skip_on_cran()
+    expect_true(inherits(tax_sources(hol),
+                         c("otl_tax_sources", "list")))
+    expect_true(inherits(unique_name(hol),
+                         c("otl_unique_name", "list")))
+    expect_true(inherits(tax_name(hol),
+                         c("otl_name", "list")))
+    expect_true(inherits(tax_rank(hol),
+                         c("otl_rank", "list")))
+    expect_true(inherits(ott_id(hol),
+                         c("otl_ott_id", "list")))
+    expect_true(length(tax_sources(hol)[[1]]) > 1)
+    expect_true(any(grepl("worms", tax_sources(hol)[[1]])))
+    expect_equal(unique_name(hol)[[1]], "Holothuria")
+    expect_equal(tax_name(hol)[[1]], "Holothuria")
+    expect_equal(tax_rank(hol)[[1]], "genus")
+    expect_equal(ott_id(hol)[[1]], 5004030)
+    expect_equal(names(tax_sources(hol)), "Holothuria")
+    expect_true(all(names(source_list(hol)) %in% c("tree_id",
+                                                   "study_id",
+                                                   "git_sha")))
+    expect_equal(attr(tax_sources(hol), "taxon_type"), "mrca")
+})
+
+test_that("methods for tol_mrca where the node is not a taxon", {
+    skip_on_cran()
+    expect_true(inherits(birds, "list"))
+    expect_true(inherits(tax_sources(birds),
+                         c("otl_tax_sources", "list")))
+    expect_true(inherits(unique_name(birds),
+                         c("otl_unique_name", "list")))
+    expect_true(inherits(tax_name(birds),
+                         c("otl_name", "list")))
+    expect_true(inherits(tax_rank(birds),
+                         c("otl_rank", "list")))
+    expect_true(inherits(ott_id(birds),
+                         c("otl_ott_id", "list")))
+    expect_true(length(tax_sources(birds)[[1]]) >=  1)
+    expect_true(any(grepl("ncbi", tax_sources(birds)[[1]])))
+    expect_equal(unique_name(birds)[[1]], "Neognathae")
+    expect_equal(tax_name(birds)[[1]], "Neognathae")
+    expect_equal(tax_rank(birds)[[1]], "superorder")
+    expect_equal(ott_id(birds)[[1]], 241846)
+    expect_equal(names(ott_id(birds)), "Neognathae")
+    expect_true(all(names(source_list(birds)) %in% c("tree_id",
+                                                          "study_id",
+                                                          "git_sha")))
+    expect_equal(attr(tax_sources(birds), "taxon_type"), "nearest_taxon")
+})
+
+### ott_id() --------------------------------------------------------------------
+
+test_that("taxonomy_taxon_info with ott_id for tol_mrca", {
+    skip_on_cran()
+    expect_equal(ott_id(mono)[1],
+                 ott_id(taxonomy_taxon_info(ott_id(mono)))[1])
+})
+
+test_that("taxonomy_subtree with ott_id for tol_mrca", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(ott_id = ott_id(mono))
+    expect_true(length(tt[["tip_label"]]) > 10)
+    expect_true(length(tt[["edge_label"]]) > 7)
+})
+
+test_that("tol_node_info with ott_id for tol_mrca", {
+    skip_on_cran()
+    expect_equal(ott_id(mono)[1],
+                 ott_id(tol_node_info(ott_id(mono)))[1])
+})
+
+test_that("tol_subtree with ott_id for tol_mrca", {
+    skip_on_cran()
+    tt <- tol_subtree(ott_id = ott_id(mono))
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+    expect_true(length(tt$node.label) > 1)
+})
+
+test_that("tol_mrca with ott_id for tol_mrca", {
+    skip_on_cran()
+    expect_equal(ott_id(mono)[1],
+                 ott_id(tol_mrca(ott_id(mono)))[1])
+})
+
+test_that("tol_induced_subtree with ott_id for tol_mrca", {
+    skip_on_cran()
+    expect_error(tol_induced_subtree(ott_id(mono)),
+                 "least two valid")
+})
+
+test_that("taxonomy_mrca with ott_id for tol_mrca", {
+    skip_on_cran()
+    expect_equivalent(ott_id(mono),
+                      ott_id(taxonomy_mrca(ott_id(mono))))
+})
+
+
+############################################################################
+## strip_ott_ids                                                          ##
+############################################################################
+
+test_that("OTT ids can be striped from tip labels to allow taxon-matching", {
+    skip_on_cran()
+    genera <- c("Setophaga", "Cinclus", "Struthio")
+    tr <- tol_induced_subtree(ott_ids=c(666104, 267845, 292466))
+    expect_true(all(strip_ott_ids(tr$tip.label) %in% genera))
+})
+
+
+############################################################################
+## tol_node_info                                                          ##
+############################################################################
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+    tol_info <- tol_node_info(ott_id = 81461)
+    tol_lin <- tol_node_info(ott_id = 81461, include_lineage = TRUE)
+    tol_mono <- tol_node_info(ott_id = 962396)
+}
+
+test_that("tol node info.", {
+    skip_on_cran()
+    expect_true(all(names(tol_info) %in%
+                      c("partial_path_of", "supported_by", "source_id_map", "taxon",
+                        "num_tips", "terminal", "node_id")))
+    expect_true(inherits(tol_info, "tol_node"))
+})
+
+
+### methods ---------------------------------------------------------------------
+
+test_that("tol_node tax_rank method", {
+    skip_on_cran()
+    expect_true(inherits(tax_rank(tol_info),
+                         c("otl_tax_rank", "list")))
+    expect_equal(tax_rank(tol_info)[[1]], "class")
+})
+
+test_that("tol_node ott_id method", {
+    skip_on_cran()
+    expect_true(inherits(ott_id(tol_info),
+                         c("otl_ott_id", "list")))
+    expect_equal(ott_id(tol_info)[[1]], 81461)
+    expect_equal(names(ott_id(tol_info)), "Aves")
+})
+
+test_that("tol_node tax_sources", {
+    skip_on_cran()
+    expect_true(inherits(tax_sources(tol_info),
+                         c("otl_tax_sources", "list")))
+    expect_true(any(grepl("worms", tax_sources(tol_info)[[1]])))
+    expect_equal(names(tax_sources(tol_info)), "Aves")
+})
+
+test_that("tol_node unique_name", {
+    skip_on_cran()
+    expect_true(inherits(unique_name(tol_info),
+                         c("otl_unique_name", "list")))
+    expect_equal(unique_name(tol_info)[[1]], "Aves")
+    expect_equal(names(unique_name(tol_info)), "Aves")
+})
+
+test_that("tol_node tax_name", {
+    skip_on_cran()
+    expect_true(inherits(tax_name(tol_info),
+                         c("otl_name", "list")))
+    expect_equal(tax_name(tol_info)[[1]], "Aves")
+    expect_equal(names(tax_name(tol_info)), "Aves")
+})
+
+
+test_that("tol_node source_list method", {
+    skip_on_cran()
+    expect_true(inherits(source_list(tol_info), "data.frame"))
+    expect_true(all(names(source_list(tol_info)) %in%
+                      c("study_id", "tree_id", "git_sha")))
+})
+
+test_that("tol_node tol_lineage", {
+    skip_on_cran()
+    expect_error(tol_lineage(tol_info), "needs to be created")
+    expect_true(inherits(tol_lineage(tol_lin), "data.frame"))
+    expect_true(nrow(tol_lineage(tol_lin)) > 1)
+    expect_true(all(names(tol_lineage(tol_lin)) %in% c("node_id",
+                                                       "num_tips",
+                                                       "is_taxon")))
+    expect_true(all(grepl("^(ott|mrca)", tol_lineage(tol_lin)[["node_id"]])))
+})
+
+test_that("tol_node tax_lineage", {
+    skip_on_cran()
+    expect_error(tax_lineage(tol_info), "needs to be created")
+    expect_true(inherits(tax_lineage(tol_lin), "data.frame"))
+    expect_true(nrow(tax_lineage(tol_lin)) > 1)
+    expect_true(all(names(tax_lineage(tol_lin)) %in% c("rank",
+                                                       "name",
+                                                       "unique_name",
+                                                       "ott_id")))
+    expect_true(any(grepl("no rank", tax_lineage(tol_lin)[["rank"]])))
+    expect_true(any(grepl("cellular organisms", tax_lineage(tol_lin)[["name"]])))
+})
+
+### ott_id() --------------------------------------------------------------------
+
+test_that("taxonomy_taxon_info with ott_id for tol_info", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tol_mono),
+                 ott_id(taxonomy_taxon_info(ott_id(tol_mono))))
+})
+
+test_that("taxonomy_subtree with ott_id for tol_info", {
+    skip_on_cran()
+    tt <- taxonomy_subtree(ott_id = ott_id(tol_mono))
+    expect_true(length(tt[["tip_label"]]) > 10)
+    expect_true(length(tt[["edge_label"]]) > 7)
+})
+
+test_that("tol_node_info with ott_id for tol_info", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tol_mono),
+                 ott_id(tol_node_info(ott_id(tol_mono))))
+})
+
+test_that("tol_subtree with ott_id for tol_info", {
+    skip_on_cran()
+    tt <- tol_subtree(ott_id = ott_id(tol_mono))
+    expect_true(inherits(tt, "phylo"))
+    expect_true(length(tt$tip.label) > 1)
+    expect_true(length(tt$node.label) > 1)
+})
+
+test_that("tol_mrca with ott_id for tol_info", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tol_mono),
+                 ott_id(tol_mrca(ott_id(tol_mono))))
+})
+
+test_that("tol_induced_subtree with ott_id for tol_info", {
+    skip_on_cran()
+    expect_error(tol_induced_subtree(ott_id(tol_mono)),
+                 "least two valid")
+})
+
+test_that("taxonomy_mrca with ott_id for tol_info", {
+    skip_on_cran()
+    expect_equivalent(ott_id(tol_mono),
+                      ott_id(taxonomy_mrca(ott_id(tol_mono))))
+})
diff --git a/tests/testthat/test-tree_to_labels.R b/tests/testthat/test-tree_to_labels.R
new file mode 100644
index 0000000..a8877b3
--- /dev/null
+++ b/tests/testthat/test-tree_to_labels.R
@@ -0,0 +1,51 @@
+context("test tree_to_labels")
+
+test_that("basic tree 1", {
+    tree1 <- "((raccon:19.19959,bear:6.80041)InnerNode1:0.84600,((sea_lion:11.99700,seal:12.00300)InnerNode2:7.52973,((monkey:100.85930,cat:47.14069):20.59201,weasel:18.87953):2.09460):3.87382,dog:25.46154);"
+    res_tree1 <- tree_to_labels(tree1)
+    expect_equal(res_tree1$tip_label, c("raccon", "bear", "sea_lion", "seal", "monkey", "cat", "weasel", "dog"))
+    expect_equal(res_tree1$edge_label, c("InnerNode1", "InnerNode2"))
+})
+
+test_that("basic tree 2", {
+    tree2 <- "(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460):0.10;"
+    res_tree2 <- tree_to_labels(tree2)
+    expect_equal(res_tree2$tip_label, c("Bovine", "Gibbon", "Orang", "Gorilla", "Chimp", "Human", "Mouse"))
+    expect_equal(res_tree2$edge_label, character(0))
+})
+
+test_that("basic tree 3", {
+    tree3 <- "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
+    res_tree3 <- tree_to_labels(tree3)
+    expect_equal(res_tree3$tip_label, c("Bovine", "Hylobates", "Pongo", "G._Gorilla", "P._paniscus", "H._sapiens", "Rodent"))
+    expect_equal(res_tree3$edge_label, character(0))
+})
+
+test_that("only 1 tip", {
+    tree_tip <- "A;"
+    res_tree_tip <- tree_to_labels(tree_tip)
+    expect_equal(res_tree_tip$tip_label, "A")
+    expect_equal(res_tree_tip$edge_label, character(0))
+})
+
+test_that("only 1 tip with parentheses", {
+    tree_tip <- "(A);"
+    res_tree_tip <- tree_to_labels(tree_tip)
+    expect_equal(res_tree_tip$tip_label, "A")
+    expect_equal(res_tree_tip$edge_label, character(0))
+})
+
+test_that("only 1 tip and 1 internal", {
+    tree_tip <- "(A)B;"
+    res_tree_tip <- tree_to_labels(tree_tip)
+    expect_equal(res_tree_tip$tip_label, "A")
+    expect_equal(res_tree_tip$edge_label, "B")
+})
+
+
+test_that("tree with singletons", {
+    tree_sing <- "(((((A)cats,B)dogs,(C,D)ducks)frogs)animals,E)fungi;"
+    res_tree_sing <- tree_to_labels(tree_sing)
+    expect_equal(res_tree_sing$tip_label, LETTERS[1:5])
+    expect_equal(res_tree_sing$edge_label, c("cats", "dogs", "ducks", "frogs", "animals", "fungi"))
+})
diff --git a/tests/tree_of_life.json b/tests/tree_of_life.json
new file mode 100644
index 0000000..2438dc9
--- /dev/null
+++ b/tests/tree_of_life.json
@@ -0,0 +1,111 @@
+{
+    "test_mrca_normal_input": {
+        "test_function": "tol_mrca",
+        "test_input": {"ott_ids":[412129, 536234]},
+        "tests": {
+             "of_type":
+                        ["dict","Response is of wrong type"]
+                        ,
+             "equals":  [
+                        [["nearest_taxon_mrca_rank","'superorder'"],"Fails that nearest_taxon_mrca_rank contains superorder"]
+                        ],
+             "contains": [
+                        ["nearest_taxon_mrca_ott_id","Doesn't contain nearest_taxon_mrca_ott_id"]
+                        ]
+        }
+    },
+    "test_mrca_empty_list_input": {
+        "test_function": "tol_mrca",
+        "test_input": {"ott_ids":[]},
+        "tests": {
+             "error": [
+                 ["ValueError","Return wrong kind of error, or did return error"]
+                 ]
+        }
+    },
+    "test_mrca_empty_list_input_two": {
+        "test_function": "tol_mrca",
+        "test_input": {"ott_ids":[],
+                       "node_ids":[]},
+        "tests": {
+             "error": [
+                 ["ValueError","Return wrong kind of error, or did return error"]
+                 ]
+        }
+    },
+    "test_mrca_non_existing_node": {
+        "test_function": "tol_mrca",
+        "test_input": {"ott_ids":[4259824365942365972436598732]},
+        "tests": {
+            "error": [
+                ["OpenTreeService.OpenTreeError","Return wrong kind of error, or did return error"]
+                ]
+        }
+    },
+    "test_mrca_non_existing_empty": {
+        "test_function": "tol_mrca",
+        "test_input": "null",
+        "tests": {
+            "error": [
+                ["ValueError","Return wrong kind of error, or did return error"]
+                ]
+        }
+    },
+    "test_subtree_demo": {
+        "test_function": "tol_subtree",
+        "test_input": {"ott_id":3599390},
+        "tests": {
+             "of_type":
+                 ["dict","Response is of wrong type"]
+                 ,
+             "contains": [
+                 ["newick","Doesn't contain a newick string"]
+                 ]
+        }
+    },
+    "test_subtree_null": {
+        "test_function": "tol_subtree",
+        "test_input": {},
+        "tests": {
+            "error": [
+                ["ValueError","Return wrong kind of error, or did return error"]
+            ],
+	    "of_type": ["jfdsm"]
+        }
+    },
+    "test_induced_tree_good": {
+        "test_function": "tol_induced_tree",
+        "test_input": {"ott_ids":[292466, 501678, 267845, 666104, 316878, 102710, 176458]},
+        "tests": {
+             "of_type":
+                 ["dict","Response is of wrong type"]
+                 ,
+             "contains": [
+                 ["subtree","Doesn't contain a subtree string"]
+                 ]
+        }
+    },
+    "test_induced_tree_null": {
+        "test_function": "tol_induced_tree",
+        "test_input": {},
+        "tests": {
+            "error": [
+                ["ValueError","Return wrong kind of error, or did return error"]
+            ],
+	    "of_type": ["something"]
+        }
+    },
+    "test_about": {
+        "test_function": "tol_about",
+        "test_input": {},
+        "tests": {
+            "contains": [
+                ["root_taxon_name","Output doesn't contain root_taxon_name"],
+                ["num_source_studies","Output doesn't contain num_source_studies"],
+                ["taxonomy_version","Output doesn't contain taxonomy_version"],
+                ["root_ott_id","Output doesn't contain root_ott_id"],
+                ["num_tips","Output doesn't contain num_tips"]
+                ]
+        }
+    }
+}
diff --git a/vignettes/data_mashups.Rmd b/vignettes/data_mashups.Rmd
new file mode 100644
index 0000000..d2a3f78
--- /dev/null
+++ b/vignettes/data_mashups.Rmd
@@ -0,0 +1,222 @@
+---
+title: "Connecting data to Open Tree trees"
+author: "David Winter"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Connecting data to Open Tree trees}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+## Combining data from OToL and other sources.
+
+One of the major goals of `rotl` is to help users combine data from other
+sources with the phylogenetic trees in the Open Tree database. This examples
+document describes some of the ways in whih a user might connect data to trees
+from Open Tree.
+
+## Get Open Tree IDs to match your data.
+
+Let's say you have a dataset where each row represents a measurement taken from
+one species, and your goal is to put these measurements in some phylogenetic
+context. Here's a small example: the best estimate of the mutation rate for a
+set of unicellular Eukaryotes along with some other property of those species
+which might explain the mutation rate:
+
+```{r, data}
+csv_path <- system.file("extdata", "protist_mutation_rates.csv", package = "rotl")
+mu <- read.csv(csv_path, stringsAsFactors=FALSE)
+mu
+```
+
+If we want to get a tree for these species we need to start by finding the
+unique ID for each of these species in the Open Tree database. We can use the
+Taxonomic Name Resolution Service (`tnrs`) functions to do this. Before we do
+that we should see if any of the taxonomic contexts, which can be used to narrow
+a search and avoid conflicts between different codes, apply to our group of species:
+
+```{r, context}
+library(rotl)
+tnrs_contexts()
+```
+
+Hmm, none of those groups contain all of our species. In this case we can
+search using the `All life` context and the function `tnrs_match_names`:
+
+```{r, match}
+taxon_search <- tnrs_match_names(names=mu$species, context_name="All life")
+knitr::kable(taxon_search)
+```
+
+Good, all of our  species are known to Open Tree. Note, though, that one of the names
+is a synonym. _Saccharomyces pombe_ is older name for what is now called
+_Schizosaccharomyces pombe_. As the name suggests, the Taxonomic Name
+Resolution Service is designed to deal with these problems (and similar ones
+like misspellings), but it is always a good idea to check the results of
+`tnrs_match_names` closely to ensure the results are what you expect.
+
+In this case we have a good ID for each of our species so we can move on. Before
+we do that, let's ensure we can match up our original data to the Open Tree
+names and IDs by adding them to our `data.frame`:
+
+```{r, munge}
+mu$ott_name <- taxon_search$unique_name
+mu$ott_id <- taxon_search$ott_id
+```
+
+## Find a tree with your taxa
+
+Now let's find a tree. There are two possible options here: we can search for
+published studies that include our taxa or we can use the 'synthetic tree' from
+Open Tree. We can try both approaches.
+
+### Published trees
+
+Before we can search for published studies or trees, we should check out the
+list of properties we can use to perform such searches:
+
+```{r, properties}
+studies_properties()
+```
+
+We have `ottIds` for our taxa, so let's use those IDs to search for trees that
+contain them.  Starting with our first species _Tetrahymena thermophila_ we can
+use `studies_find_trees` to do this search.
+
+```{r taxon_count}
+studies_find_trees(property="ot:ottId", value="180195")
+```
+
+Well... that's not very promising. We can repeat that process for all of the IDs
+to see if the other species are better represented.
+
+
+```{r, all_taxa_count}
+hits <- lapply(mu$ott_id, studies_find_trees, property="ot:ottId", detailed = FALSE)
+sapply(hits, function(x) sum(x[["n_matched_trees"]]))
+```
+
+OK, most of our species are not in any of the published trees available. You can
+help fix this sort of problem by [making sure you submit your published trees to
+Open Tree](https://tree.opentreeoflife.org/curator).
+
+### A part of the synthesis tree
+
+Thankfully, we can still use the complete Tree of Life made from the
+combined results of all of the published trees and taxonomies that go into Open
+Tree. The function `tol_induced_subtree` will fetch a tree relating a set of IDs.
+
+Using the default arguments you can get a tree object into your R session:
+
+
+```{r subtree,  fig.width=7, fig.height=4}
+tr <- tol_induced_subtree(ott_ids=mu$ott_id)
+plot(tr)
+```
+
+### Connect your data to the tips of your tree
+
+Now we have a tree for of our species, how can we use the tree and the data
+together?
+
+The package `phylobase` provide an object class called `phylo4d`, which is
+designed to represent a phylogeny and data associated with its tips. In oder to
+get our tree and data into one of these objects we have to make sure the labels
+in the tree and in our data match exactly. That's not quite the case at the
+moment (tree labels have underscores and IDs appended):
+
+```{r, match_names}
+mu$ott_name[1]
+tr$tip.label[4]
+```
+
+`rotl` provides a convienence function `strip_ott_ids` to deal with these. 
+
+```{r, sub}
+tr$tip.label <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
+tr$tip.label %in% mu$ott_name
+```
+
+Ok, now the tips are together we can make a new dataset. The `phylo4d()`
+functions matches tip labels to the row names of a `data.frame`, so let's make
+a new dataset that contains just the relevant data and has row names to match
+the tree
+
+```{r phylobase}
+library(phylobase)
+mu_numeric <- mu[,c("mu", "pop.size", "genome.size")]
+rownames(mu_numeric) <- mu$ott_name
+tree_data <- phylo4d(tr, mu_numeric)
+```
+And now we can plot the data and the tree together
+
+
+```{r,  fig.width=7, fig.height=5}
+plot(tree_data)
+```
+
+##Find external data associated with studies, trees and taxa from Open Tree
+
+In the above example we looked for a tree that related species in another dataset. 
+Now we will go the other way, and try to find data associated with Open Tree records
+in other databases.
+
+### Get external data from a study
+
+Let's imagine you were interested in extending or reproducing the results of a
+published study. If that study is included in Open Tree you can find it via
+`studies_find_studies` or `studies_find_trees` and retrieve the published trees 
+with `get_study`. `rotl` will also help you find external. The function 
+`study_external_IDs` retrieves the DOI for a given study, and uses that to 
+gather some more data:
+
+```{r}
+extra_data <- study_external_IDs("pg_1980")
+extra_data
+```
+
+Here the returned object contains an `external_data_url` (in this case a link to
+the study in Treebase), a pubmed ID for the paper and a vector IDs for the
+NCBI's nuleotide database. The packages `treebase` and `rentrez` provide
+functions to make use of these IDs within R.
+
+As an example, let's use `rentrez` to download the first two DNA seqences and
+print them. 
+
+```{r}
+library(rentrez)
+seqs <- entrez_fetch(db="nucleotide", id=extra_data$nucleotide_ids[1:2], rettype="fasta")
+cat(seqs)
+```
+
+You could further process these sequences in R with the function `read.dna` from 
+`ape` or save them to disk by specifying a file name with `cat`.
+
+### Find a OTT taxon in another taxonomic database
+
+It is also possible map an Open Tree taxon to a record in another taxonomic
+database. For instance, if we wanted to search for data about one of the tips of
+the sub-tree we fetched in the example above we could do so using
+`taxon_external_IDs`:
+
+```{r}
+Tt_ids <- taxon_external_IDs(mu$ott_id[2])
+Tt_ids
+```
+
+A user could then use `rgbif` to find locality records using the gbif ID or
+`rentrez` to get genetic or bibliometric data about from the NCBI's databases.
+
+
+## What next
+
+The demonstration gets you to the point of visualizing your data in a
+phylogenetic context. But there's a lot more you do with this sort of data in R.
+For instance, you could use packages like `ape`, `caper`, `phytools` and
+`mcmcGLMM` to perform phylogenetic comparative analyses of your data. You could
+gather more data on your species using packages that connect to
+trait databases like `rfishbase`, `AntWeb` or `rnpn` which provides data from
+the US National Phenology Network. You could also use `rentrez` to find genetic
+data for each of your species, and use that data to generate branch lengths for
+the phylogeny.
diff --git a/vignettes/how-to-use-rotl.Rmd b/vignettes/how-to-use-rotl.Rmd
new file mode 100644
index 0000000..77c9d88
--- /dev/null
+++ b/vignettes/how-to-use-rotl.Rmd
@@ -0,0 +1,283 @@
+---
+title: "How to use rotl?"
+author: "François Michonneau"
+date: "`r Sys.Date()`"
+output:
+  rmarkdown::html_vignette:
+    css: vignette.css
+vignette: >
+  %\VignetteIndexEntry{How to use rotl?}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+`rotl` provides an interface to the Open Tree of Life (OTL) API and allows users
+to query the API, retrieve parts of the Tree of Life and integrate these parts
+with other R packages.
+
+The OTL API provides services to access:
+
+* the **Tree of Life** a.k.a. TOL (the synthetic tree): a single draft tree that is
+  a combination of **the OTL taxonomy** and the **source trees** (studies)
+* the **Taxonomic name resolution services** a.k.a. TNRS: the methods for
+  resolving taxonomic names to the internal identifiers used by the TOL and the
+  GOL (the `ott ids`).
+* the **Taxonomy** a.k.a. OTT (for Open Tree Taxonomy): which represents the
+  synthesis of the different taxonomies used as a backbone of the TOL when no
+  studies are available.
+* the **Studies** containing the source trees used to build the TOL, and
+  extracted from the scientific literature.
+
+In `rotl`, each of these services correspond to functions with different
+prefixes:
+
+| Service       | `rotl` prefix |
+|---------------|---------------|
+| Tree of Life  | `tol_`        |
+| TNRS          | `tnrs_`       |
+| Taxonomy      | `taxonomy_`   |
+| Studies       | `studies_`    |
+
+`rotl` also provides a few other functions and methods that can be used to
+extract relevant information from the objects returned by these functions.
+
+
+## Demonstration of a basic workflow
+
+The most common use for `rotl` is probably to start from a list of species and
+get the relevant parts of the tree for these species. This is a two step
+process:
+
+1. the species names need to be matched to their `ott_id` (the Open Tree
+	Taxonomy identifiers) using the Taxonomic name resolution services (TNRS)
+1. these `ott_id` will then be used to retrieve the relevant parts of the Tree
+   of Life.
+
+### Step 1: Matching taxonomy to the `ott_id`
+
+Let's start by doing a search on a diverse group of taxa: a tree frog (genus
+_Hyla_), a fish (genus _Salmo_), a sea urchin (genus _Diadema_), and a nautilus
+(genus _Nautilus_).
+
+```{r}
+library(rotl)
+taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
+resolved_names <- tnrs_match_names(taxa)
+```
+
+It's always a good idea to check that the resolved names match what you
+intended:
+
+`r knitr::kable(resolved_names)`
+
+The column `unique_name` sometimes indicates the higher taxonomic level
+associated with the name. The column `number_matches` indicates the number of
+`ott_id` that corresponds to a given name. In this example, our search on
+_Diadema_ returns 2 matches, and the one returned by default is indeed the sea
+urchin that we want for our query. The argument `context_name` allows you to
+limit the taxonomic scope of your search. _Diadema_ is also the genus name of a
+fungus. To ensure that our search is limited to animal names, we could do:
+
+```{r}
+resolved_names <- tnrs_match_names(taxa, context_name = "Animals")
+```
+
+If you are trying to build a tree with deeply divergent taxa that the argument
+`context_name` cannot fix, see "How to change the ott ids assigned to my taxa?"
+in the FAQ below.
+
+
+### Step 2: Getting the tree corresponding to our taxa
+
+Now that we have the correct `ott_id` for our taxa, we can ask for the tree
+using the `tol_induced_subtree()` function. By default, the object returned by
+`tol_induced_subtree` is a phylo object (from the
+[ape](https://cran.r-project.org/package=ape) package), so we can plot it
+directly.
+
+```{r, fig.width=7, fig.height=4}
+my_tree <- tol_induced_subtree(ott_ids = resolved_names$ott_id)
+plot(my_tree, no.margin=TRUE)
+```
+
+
+## FAQ
+
+### How to change the ott ids assigned to my taxa?
+
+If you realize that `tnrs_match_names` assigns the incorrect taxonomic group to
+your name (e.g., because of synonymy) and changing the `context_name` does not
+help, you can use the function `inspect`. This function takes the object
+resulting from `tnrs_match_names()`, and either the row number, the taxon name
+(you used in your search in lowercase), or the `ott_id` returned by the initial
+query.
+
+To illustrate this, let's re-use the previous query but this time pretending that
+we are interested in the fungus _Diadema_ and not the sea urchin:
+
+```{r}
+taxa <- c("Hyla", "Salmo", "Diadema", "Nautilus")
+resolved_names <- tnrs_match_names(taxa)
+resolved_names
+inspect(resolved_names, taxon_name = "diadema")
+```
+
+In our case, we want the second row in this data frame to replace the
+information that initially matched for _Diadema_. We can now use the `update()`
+function, to change to the correct taxa (the fungus not the sea urchin):
+
+```{r}
+resolved_names <- update(resolved_names, taxon_name = "diadema",
+                         new_row_number = 2)
+
+## we could also have used the ott_id to replace this taxon:
+## resolved_names <- update(resolved_names, taxon_name = "diadema",
+##                          new_ott_id = 4930522)
+```
+
+And now our `resolved_names` data frame includes the taxon we want:
+
+`r knitr::kable(resolved_names)`
+
+### How do I know that the taxa I'm asking for is the correct one?
+
+The function `taxonomy_taxon_info()` takes `ott_ids` as arguments and returns
+taxonomic information about the taxa. This output can be passed to some helpers
+functions to extract the relevant information. Let's illustrate this with our
+_Diadema_ example
+
+```{r}
+diadema_info <- taxonomy_taxon_info(631176)
+tax_rank(diadema_info)
+synonyms(diadema_info)
+tax_name(diadema_info)
+```
+
+In some cases, it might also be useful to investigate the taxonomic tree
+descending from an `ott_id` to check that it's the correct taxon and to
+determine the species included in the Open Tree Taxonomy:
+
+```{r}
+diadema_tax_tree <- taxonomy_subtree(631176)
+diadema_tax_tree
+```
+
+By default, this function return all taxa (including self, and internal)
+descending from this `ott_id` but it also possible to return `phylo` object.
+
+### How do I get the tree for a particular taxonomic group?
+
+If you are looking to get the tree for a particular taxonomic group, you need to
+first identify it by its node id or ott id, and then use the `tol_subtree()`
+function:
+
+```{r, fig.width=7, fig.height=4}
+mono_id <- tnrs_match_names("Monotremata")
+mono_tree <- tol_subtree(ott_id = ott_id(mono_id))
+plot(mono_tree)
+```
+
+
+### How do I find trees from studies focused on my favourite taxa?
+
+The function `studies_find_trees()` allows the user to search for studies
+matching a specific criteria. The function `studies_properties()` returns the
+list of properties that can be used in the search.
+
+```{r}
+furry_studies <- studies_find_studies(property="ot:focalCladeOTTTaxonName", value="Mammalia")
+furry_ids <- furry_studies$study_ids
+```
+
+Now that we know the `study_id`, we can ask for the meta data information
+associated with this study:
+
+```{r}
+furry_meta <- get_study_meta("pg_2550")
+get_publication(furry_meta)     ## The citation for the source of the study
+get_tree_ids(furry_meta)        ## This study has 10 trees associated with it
+candidate_for_synth(furry_meta) ## None of these trees are yet included in the OTL
+```
+
+Using `get_study("pg_2550")` would returns a `multiPhylo` object (default) with
+all the trees associated with this particular study, while
+`get_study_tree("pg_2550", "tree5513")` would return one of these trees.
+
+### The tree returned by the API has duplicated tip labels, how can I work around it?
+
+You may encounter the following error message:
+
+```
+Error in rncl(file = file, ...) : Taxon number 39 (coded by the token Pratia
+angulata) has already been encountered in this tree. Duplication of taxa in a
+tree is prohibited.
+```
+
+This message occurs as duplicate labels are not allowed in the NEXUS format and
+it is stricly enforced by the part of the code used by `rotl` to import the
+trees in memory.
+
+If you use a version of `rotl` more recent than 0.4.1, this should not happen by
+default for the function `get_study_tree`. If it happens with another function,
+please [let us know](https://github.com/ropensci/rotl/issues).
+
+The easiest way to work around this is to save the tree in a file, and use APE
+to read it in memory:
+
+```{r, eval=FALSE}
+get_study_tree(study_id="pg_710", tree_id="tree1277",
+               tip_label='ott_taxon_name', file = "/tmp/tree.tre",
+               file_format = "newick")
+tr <- ape::read.tree(file = "/tmp/tree.tre")
+```
+
+### How do I get the higher taxonomy for a given taxa?
+
+If you encounter a taxon name you are not familiar with, it might be useful to
+obtain its higher taxonomy to see where it fits in the tree of life. We can
+combine several taxonomy methods to extract this information easily.
+
+```{r}
+giant_squid <- tnrs_match_names("Architeuthis")
+tax_lineage(taxonomy_taxon_info(ott_id(giant_squid), include_lineage = TRUE))
+```
+
+### Why are OTT IDs discovered with `rotl` missing from an induced subtree?
+
+Some taxonomic names that can be retrieved through the taxonomic name
+resolution service are not part of the Open Tree's synthesis tree. These are
+usually traditional higher-level taxa that have been found to be paraphyletic.
+
+For instance, if you wanted to fetch a tree relating the three birds that go
+into a [Turkducken](https://en.wikipedia.org/wiki/Turducken) you might search
+for the turkey, duck, and chicken genera:
+
+```{r}
+turducken <- c("Meleagris", "Anas", "Gallus")
+taxa <- tnrs_match_names(turducken, context="Animals")
+taxa
+```
+So, we have IDs for each genus but those messages in the `flag` column look
+concerning. `BARREN` means these taxa have no descendants and
+`MAJOR_RANK_CONFLICT` refers to inconsitancies between the rank of these taxa
+and some of their relatives. For thtese reasons, _Meleagris_ and _Anas_ are not
+included in the synthetic tree.
+
+If we tried to press on ahead with the IDs that we have, we'd get an
+error (because there are too few good IDs) or a tree with fewer tips that we had
+anticipated.
+
+```{r, eval=FALSE}
+tr <- tol_induced_subtree(taxa$ott_id)
+```
+
+The best way to avoid these problems is to specify complete species names
+(species being the lowest level of classification in the Open Tree taxonomy they
+are guaranteed to be monophyletic):
+
+```{r, fig.width=7, fig.height=4}
+turducken_spp <- c("Meleagris gallopavo", "Anas platyrhynchos", "Gallus gallus")
+taxa <- tnrs_match_names(turducken_spp, context="Animals")
+tr <- tol_induced_subtree(taxa$ott_id)
+plot(tr)
+```
diff --git a/vignettes/meta-analysis.Rmd b/vignettes/meta-analysis.Rmd
new file mode 100644
index 0000000..0ff201a
--- /dev/null
+++ b/vignettes/meta-analysis.Rmd
@@ -0,0 +1,231 @@
+---
+title: "Using the Open Tree synthesis in a comparative analysis"
+author: "David Winter"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Using the Open Tree synthesis in a comparative analysis}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+## Phylogenetic Comparative Methods
+
+The development of phylogenetic comparative methods has made phylogenies and
+important source of data in fields as diverse as ecology, genomic and medicine.
+Comparative  methods can be used to investigate patterns in the evolution of
+traits or the diversification of lineages. In other cases a phylogeny is treated
+as a "nuisance parameter", allowing with the autocorrelation created by the shared
+evolutionary history of the different species included to be controlled for.
+
+In many cases finding a tree that relates the species for which trait data are
+available is a rate-limiting step in such comparative analyses. Here we show
+how the synthetic tree provided by Open Tree of Life (and made available in R via
+`rotl`) can help to fill this gap.
+
+## A phylogenetic meta-analysis
+
+To demonstrate the use of `rotl` in a comparative analysis, we will partially
+reproduce the results of [Rutkowska _et al_ 2014](dx.doi.org/10.1111/jeb.12282).
+Very briefly, this study is a meta-analysis summarising the results of multiple
+studies testing for systematic differences in the size of eggs which contain
+male and female offspring. Such a difference might mean that birds invest more
+heavily in one sex than the other.
+
+Because this study involves data from 51 different species, Rutkowska _et al_
+used a phylogenetic comparative approach to account for the shared evolutionary
+history among some of the studied-species.
+
+### Gather the data
+
+If we are going to reproduce this analysis, we will first need to gather the
+data. Thankfully, the data is available as supplementary material from the
+publisher's website. We can collect the data from using `fulltext` (with the
+papers DOI as input) and read it into memory with `gdata`:
+
+```{r egg_data, cache=TRUE}
+library(rotl)
+library(fulltext)
+
+if (require(readxl)) {
+    doi <- "10.1111/jeb.12282"
+    xl_file <- ft_get_si(doi, 1, save.name="egg.xls")
+    egg_data <- read_excel(xl_file)
+} else {
+    egg_data <- read.csv(system.file("extdata", "egg.csv", package = "rotl"))
+}
+head(egg_data)
+```
+
+The most important variable in this dataset is `Zr`, which is a [normalized
+effect size](https://en.wikipedia.org/wiki/Fisher_transformation) for difference
+in size between eggs that contain males and females. Values close to zero come
+from studies that found the sex of an egg's inhabitant had little effect in its size,
+while large positive or negative values correspond to studies with substantial
+sex biases (towards males and females respectively). Since this is a
+meta-analysis we should produce the classic [funnel plot](https://en.wikipedia.org/wiki/Funnel_plot)
+with effects-size on the y-axis and precision (the inverse of the sample
+standard error) on the x-axis. Here we calculate precision from the sample
+variance (`Vzr`):
+
+```{r eggs_in_a_funnel, fig.width=6, fig.height=3}
+plot(1/sqrt(egg_data$VZr), egg_data$Zr, pch=16,
+     ylab="Effect size (Zr)",
+     xlab="Precision (1/SE)",
+     main="Effect sizes for sex bias in egg size among 51 brid species" )
+```
+
+In order to use this data later on we need to first convert it to a standard
+`data.frame`. We can also convert the `animal` column (the species names) to
+lower case which will make it easier to match names later on:
+
+```{r, clean_eggs}
+egg_data <- as.data.frame(egg_data)
+egg_data$animal <- tolower(egg_data$animal)
+```
+### Find the species in OTT
+
+We can use the OTL synthesis tree to relate these species. To do so we first need to
+find Open Tree Taxonomy (OTT) IDs for each species. We can do that with the
+Taxonomic Name Resolution Service function `tnrs_match_names`:
+
+```{r, birds, cache=TRUE}
+taxa <- tnrs_match_names(unique(egg_data$animal), context="Animals")
+head(taxa)
+```
+
+All of these species are in OTT, but a few of them go by different names in the
+Open Tree than we have in our data set. Because the tree `rotl` fetches
+will have Open Tree names, we need to create a named vector that maps the names
+we have for each species to the names Open Tree uses for them:
+
+
+```{r bird_map}
+taxon_map <- structure(taxa$search_string, names=taxa$unique_name)
+```
+
+Now we can use this map to retrieve "data set names" from "OTT names":
+
+
+```{r odd_duck}
+taxon_map["Anser caerulescens"]
+```
+
+### Get a tree
+
+Now we can get the tree. There are really too many tips here to show nicely, so
+we will leave them out of this plot
+
+```{r birds_in_a_tree, fig.width=5, fig.height=5, fig.align='center'}
+tr <- tol_induced_subtree(taxa$ott_id)
+plot(tr, show.tip.label=FALSE)
+```
+
+There are a few things to note here. First, the tree has not branch lengths.
+At present this is true for the whole of the Open Tree synthetic tree. Some
+comparative methods require either branch lengths or an ultrametric tree. Before
+you can use one of those methods you will need to get a tree with branch
+lengths. You could try looking for published trees made available by the Open
+Tree with `studies_find_trees`. Alternatively, you could estimate branch lengths
+from the toplogy of a phylogeny returned by `tol_induced_subtree`, perhaps by
+downloading DNA sequences from the NCBI with `rentrez` or "hanging" the tree on
+nodes of known-age using  penalized likelihood method in `ape::chronos`.
+In this case, we will use only the topology of the tree as input to our
+comparative analysis, so we can skip these steps.
+
+Second, the tip labels contain OTT IDs, which means they will not perfectly
+match the species names in our dataset or the taxon map that we created earlier:
+
+
+```{r tip_lab}
+tr$tip.label[1:4]
+```
+
+Finally, the tree contains node labels for those nodes that match a higher taxonomic
+group, and empty character vectors (`""`) for all other nodes. Some
+comparative methods either do no expect node labels at all, or require all
+labeled nodes to have a unique name (meaning multiple "empty" labels will cause
+and error).
+
+We can deal with all these details easily. `rotl` provides  the convenience
+function `strip_ott_ids` to remove the extra information from the tip labels.
+With the IDs removed, we can use our taxon map to replace the tip labels in the tree
+with the species names from dataset.
+
+
+
+```{r clean_tips}
+otl_tips <- strip_ott_ids(tr$tip.label, remove_underscores=TRUE)
+tr$tip.label <- taxon_map[ otl_tips ]
+```
+
+Finally, we can remove the node labels by setting the `node.label` attribute of
+the tree to `NULL`.
+
+```{r remove_nodes}
+tr$node.label <- NULL
+```
+
+### Perform the meta-analysis
+
+
+Now we have data and a tree, and we know the names in the tree match the ones in
+the data. It's time to do the comparative analysis. Rutkowska _et al_. used `MCMCglmm`, a
+Bayesian MCMC approach to fitting multi-level models,to perform their meta-analysis,
+and we will do the same. Of course, to properly analyse these data you would
+take some care in deciding on the appropriate priors to use and inspect the
+results carefully. In this case, we are really interested in using this as a
+demonstration, so we will just run a simple model.
+
+Specifically we sill fit a model where the only variable that might explain the
+values of `Zr` is the random factor `animal`, which corresponds to the
+phylogenetic relationships among species. We also provide `Zvr` as the measurement
+error variance, effectively adding extra weight to the results of more powerful
+studies. Here's how we specify and fit that model with `MCMCglmm`:
+
+
+```{r model}
+library(MCMCglmm, quiet=TRUE)
+set.seed(123)
+
+pr<-list(R=list(V=1,nu=0.002),
+             G=list(G1=list(V=1,nu=0.002))
+)
+
+model <- MCMCglmm(Zr~1,random=~animal,
+                       pedigree=tr,
+                       mev=egg_data$VZr,
+                       prior=pr,
+                       data=egg_data,
+                       verbose=FALSE)
+```
+
+
+Now that we have a result we can find out how much phylogenetic signal exists
+for sex-biased differences in egg-size. In a multi-level model we can use variance
+components to look at this, specifically the proportion of the total variance
+that can be explained by phylogeny is called the phylogenetic reliability, _H_. Let's
+calculate the _H_ for this model:
+
+
+```{r PhyH}
+var_comps <- colMeans(model$VCV )
+var_comps["animal"] / sum(var_comps)
+```
+
+It appears there is almost no phylogenetic signal to the data.
+The relationships among species explain much less that one percent of the total
+variance in the data. If you were wondering,  Rutkowska _et al_. report a similar result,
+even after adding more predictors to their model most of the variance in `Zr`
+was left unexplained.
+
+## What other comparative methods can I use in R?
+
+Here we have demonstrated just one comparative analysis that you might do in R.
+There are an ever-growing number of packages that allow an ever-growing number
+of analysis to performed in R. Some "classics" like ancestral state
+reconstruction,  phylogenetic independent contrasts and lineage through time plots
+are implemented in `ape`. Packages like `phytools`, `caper` and `diversitree`
+provide extensions to these methods.  The [CRAN Phylogenetics Taskview](https://cran.r-project.org/web/views/Phylogenetics.html)
+gives a good idea of the diversity of packages and analyses that can be
+completed in R.
diff --git a/vignettes/vignette.css b/vignettes/vignette.css
new file mode 100644
index 0000000..5a2bf98
--- /dev/null
+++ b/vignettes/vignette.css
@@ -0,0 +1,207 @@
+body {
+  background-color: #fff;
+  margin: 1em auto;
+  max-width: 700px;
+  overflow: visible;
+  padding-left: 2em;
+  padding-right: 2em;
+  font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
+  font-size: 14px;
+  line-height: 1.35;
+}
+
+#header {
+  text-align: center;
+}
+
+#TOC {
+  clear: both;
+  margin: 0 0 10px 10px;
+  padding: 4px;
+  width: 400px;
+  border: 1px solid #CCCCCC;
+  border-radius: 5px;
+
+  background-color: #f6f6f6;
+  font-size: 13px;
+  line-height: 1.3;
+}
+  #TOC .toctitle {
+    font-weight: bold;
+    font-size: 15px;
+    margin-left: 5px;
+  }
+
+  #TOC ul {
+    padding-left: 40px;
+    margin-left: -1.5em;
+    margin-top: 5px;
+    margin-bottom: 5px;
+  }
+  #TOC ul ul {
+    margin-left: -2em;
+  }
+  #TOC li {
+    line-height: 16px;
+  }
+
+table {
+  margin: 1em auto;
+  border-width: 1px;
+  border-color: #DDDDDD;
+  border-style: outset;
+  border-collapse: collapse;
+}
+table th {
+  border-width: 2px;
+  padding: 5px;
+  border-style: inset;
+}
+table td {
+  border-width: 1px;
+  border-style: inset;
+  line-height: 18px;
+  padding: 5px 5px;
+}
+table, table th, table td {
+  border-left-style: none;
+  border-right-style: none;
+}
+table thead, table tr.even {
+  background-color: #f7f7f7;
+}
+
+p {
+  margin: 0.5em 0;
+}
+
+blockquote {
+  background-color: #f6f6f6;
+  padding: 0.25em 0.75em;
+}
+
+hr {
+  border-style: solid;
+  border: none;
+  border-top: 1px solid #777;
+  margin: 28px 0;
+}
+
+dl {
+  margin-left: 0;
+}
+  dl dd {
+    margin-bottom: 13px;
+    margin-left: 13px;
+  }
+  dl dt {
+    font-weight: bold;
+  }
+
+ul {
+  margin-top: 0;
+}
+  ul li {
+    list-style: circle outside;
+  }
+  ul ul {
+    margin-bottom: 0;
+  }
+
+pre, code {
+  background-color: #f7f7f7;
+  border-radius: 3px;
+  color: #333;
+  white-space: pre-wrap;
+}
+pre {
+  /*white-space: pre-wrap;    /* Wrap long lines */
+  border-radius: 3px;
+  margin: 5px 0px 10px 0px;
+  padding: 10px;
+}
+pre:not([class]) {
+  background-color: #f7f7f7;
+}
+
+code {
+  font-family: Consolas, Monaco, 'Courier New', monospace;
+  font-size: 85%;
+}
+p > code, li > code {
+  padding: 2px 0px;
+}
+
+div.figure {
+  text-align: center;
+}
+img {
+  background-color: #FFFFFF;
+  padding: 2px;
+  border: 1px solid #DDDDDD;
+  border-radius: 3px;
+  border: 1px solid #CCCCCC;
+  margin: 0 5px;
+}
+
+h1 {
+  margin-top: 0;
+  font-size: 35px;
+  line-height: 40px;
+}
+
+h2 {
+  border-bottom: 4px solid #f7f7f7;
+  padding-top: 10px;
+  padding-bottom: 2px;
+  font-size: 145%;
+}
+
+h3 {
+  border-bottom: 2px solid #f7f7f7;
+  padding-top: 10px;
+  font-size: 120%;
+}
+
+h4 {
+  border-bottom: 1px solid #f7f7f7;
+  margin-left: 8px;
+  font-size: 105%;
+}
+
+h5, h6 {
+  border-bottom: 1px solid #ccc;
+  font-size: 105%;
+}
+
+a {
+  color: #0033dd;
+  text-decoration: none;
+}
+  a:hover {
+    color: #6666ff; }
+  a:visited {
+    color: #800080; }
+  a:visited:hover {
+    color: #BB00BB; }
+  a[href^="http:"] {
+    text-decoration: underline; }
+  a[href^="https:"] {
+    text-decoration: underline; }
+
+/* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css
+   Colours from https://gist.github.com/robsimmons/1172277 */
+
+code > span.kw { color: #555; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal (decimal values) */
+code > span.bn { color: #d14; } /* BaseN */
+code > span.fl { color: #d14; } /* Float */
+code > span.ch { color: #d14; } /* Char */
+code > span.st { color: #d14; } /* String */
+code > span.co { color: #888888; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* OtherToken */
+code > span.al { color: #ff0000; font-weight: bold; } /* AlertToken */
+code > span.fu { color: #900; font-weight: bold; } /* Function calls */ 
+code > span.er { color: #a61717; background-color: #e3d2d2; } /* ErrorTok */
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-rotl.git



More information about the debian-med-commit mailing list