[med-svn] [r-cran-wikitaxa] 01/02: New upstream version 0.1.4
Andreas Tille
tille at debian.org
Mon Oct 2 13:16:32 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-wikitaxa.
commit c92ae26c6d856ab141dbeb3002a17f447236e91e
Author: Andreas Tille <tille at debian.org>
Date: Mon Oct 2 15:16:05 2017 +0200
New upstream version 0.1.4
---
DESCRIPTION | 31 +++
LICENSE | 2 +
MD5 | 37 +++
NAMESPACE | 19 ++
NEWS.md | 19 ++
R/globals.R | 3 +
R/wiki.R | 107 ++++++++
R/wikicommons.R | 110 +++++++++
R/wikipages.R | 176 +++++++++++++
R/wikipedia.R | 136 ++++++++++
R/wikispecies.R | 109 ++++++++
R/wikitaxa-package.R | 25 ++
R/zzz.R | 45 ++++
README.md | 309 +++++++++++++++++++++++
build/vignette.rds | Bin 0 -> 229 bytes
data/wikipedias.rda | Bin 0 -> 6246 bytes
inst/doc/wikitaxa_vignette.R | 73 ++++++
inst/doc/wikitaxa_vignette.Rmd | 177 +++++++++++++
inst/doc/wikitaxa_vignette.html | 426 ++++++++++++++++++++++++++++++++
man/wikipedias.Rd | 18 ++
man/wikitaxa-package.Rd | 16 ++
man/wt_data.Rd | 60 +++++
man/wt_wiki_page.Rd | 35 +++
man/wt_wiki_page_parse.Rd | 39 +++
man/wt_wiki_url_build.Rd | 60 +++++
man/wt_wiki_url_parse.Rd | 33 +++
man/wt_wikicommons.Rd | 75 ++++++
man/wt_wikipedia.Rd | 89 +++++++
man/wt_wikispecies.Rd | 75 ++++++
tests/test-all.R | 3 +
tests/testthat/test-wikicommons.R | 102 ++++++++
tests/testthat/test-wikipedia.R | 108 ++++++++
tests/testthat/test-wikispecies.R | 102 ++++++++
tests/testthat/test-wt_data.R | 21 ++
tests/testthat/test-wt_wiki_page.R | 37 +++
tests/testthat/test-wt_wiki_url_build.R | 21 ++
tests/testthat/test-wt_wiki_url_parse.R | 23 ++
vignettes/wikitaxa_vignette.Rmd | 177 +++++++++++++
38 files changed, 2898 insertions(+)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..cdc891d
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,31 @@
+Package: wikitaxa
+Title: Taxonomic Information from 'Wikipedia'
+Description: 'Taxonomic' information from 'Wikipedia', 'Wikicommons',
+ 'Wikispecies', and 'Wikidata'. Functions included for getting
+ taxonomic information from each of the sources just listed, as
+ well performing taxonomic search.
+Version: 0.1.4
+License: MIT + file LICENSE
+URL: https://github.com/ropensci/wikitaxa
+BugReports: https://github.com/ropensci/wikitaxa/issues
+Authors at R: c(
+ person("Scott", "Chamberlain", role = c("aut", "cre"),
+ email = "myrmecocystus+r at gmail.com"),
+ person("Ethan", "Welty", role = "aut")
+ )
+LazyLoad: yes
+LazyData: yes
+Encoding: UTF-8
+VignetteBuilder: knitr
+Depends: R(>= 3.2.1)
+Imports: WikidataR, data.table, curl, crul (>= 0.3.4), tibble,
+ jsonlite, xml2
+Suggests: roxygen2 (>= 6.0.1), testthat, knitr, rmarkdown
+RoxygenNote: 6.0.1
+NeedsCompilation: no
+Packaged: 2017-05-05 23:43:44 UTC; sacmac
+Author: Scott Chamberlain [aut, cre],
+ Ethan Welty [aut]
+Maintainer: Scott Chamberlain <myrmecocystus+r at gmail.com>
+Repository: CRAN
+Date/Publication: 2017-05-06 00:02:37 UTC
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..37ee2c7
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2017
+COPYRIGHT HOLDER: Scott Chamberlain
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..82fc780
--- /dev/null
+++ b/MD5
@@ -0,0 +1,37 @@
+887082c3602c92f9acf6c4fd9a814512 *DESCRIPTION
+c5af52351472a750055a760a8924ce71 *LICENSE
+913e5d7d676aaab98f66aa238239936a *NAMESPACE
+7d560b566156c165559fcfce19873392 *NEWS.md
+742ccfe2d41233878115e1c41a293f53 *R/globals.R
+ae7403b57c4f24f28e01090892e6319c *R/wiki.R
+63c2d24a5b4d754479b7fda9bc2fe0c3 *R/wikicommons.R
+ced60ef6ab6afa5d44605b0a1b4536ac *R/wikipages.R
+6202228c1fa33e2ae7562dc96d79d382 *R/wikipedia.R
+83020bf858be02f29beafa725baa6b60 *R/wikispecies.R
+04d38ca008da155845ee56063b54d35a *R/wikitaxa-package.R
+6ec093544fad1f90b748ef0e07edf165 *R/zzz.R
+dac9abfb8843cb5ecefd3116975d09f3 *README.md
+b6b29f6217889f808423524e828af7c6 *build/vignette.rds
+ed7a9871999234c7e411153f36dc5530 *data/wikipedias.rda
+1b3cadb5ad75e550ad6c53d201061c5e *inst/doc/wikitaxa_vignette.R
+6a0319642b0b7bcd0a58a15068eae741 *inst/doc/wikitaxa_vignette.Rmd
+9da48443c58156b3b97bc51e67db213b *inst/doc/wikitaxa_vignette.html
+95a5800c2e07653bd1e555a5b105911a *man/wikipedias.Rd
+c52da248a29e5d39bc26f8437f5a3947 *man/wikitaxa-package.Rd
+66333bb34080b0b0b8594731bd64af6d *man/wt_data.Rd
+45863fa8f408821364438c642173e05c *man/wt_wiki_page.Rd
+23c68e21a98be07d2d1937d17e51b29e *man/wt_wiki_page_parse.Rd
+7b48c4aac9de999437c327d17d55fc6d *man/wt_wiki_url_build.Rd
+95cec5153587ef7765b041eafccabc0b *man/wt_wiki_url_parse.Rd
+e353d06626013f36250f76d43ef30062 *man/wt_wikicommons.Rd
+2e1f6d59f7767984a202b1e3cbbe69bc *man/wt_wikipedia.Rd
+5d42e4b4c29b2cc6baeb485dc1df1a97 *man/wt_wikispecies.Rd
+f8a030c37b64a043072d27be6aa286d1 *tests/test-all.R
+eb02e51b509ce44730777748cd2e8003 *tests/testthat/test-wikicommons.R
+7f98a3a4736f67a4f430a131dfd69ab2 *tests/testthat/test-wikipedia.R
+a6551ebffd104df381d645ad57d3fdbd *tests/testthat/test-wikispecies.R
+4930b09d3f1c6cc9fcca4da14999e053 *tests/testthat/test-wt_data.R
+0f6d914ba4d5a8cce72733421fb3e667 *tests/testthat/test-wt_wiki_page.R
+27a0b128a66d503af9209461eba1a936 *tests/testthat/test-wt_wiki_url_build.R
+e93890d0d28f9fc47b5ffa19beae9b95 *tests/testthat/test-wt_wiki_url_parse.R
+6a0319642b0b7bcd0a58a15068eae741 *vignettes/wikitaxa_vignette.Rmd
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..1df337e
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,19 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(wt_data,default)
+S3method(wt_data,wiki_id)
+export(wt_data)
+export(wt_data_id)
+export(wt_wiki_page)
+export(wt_wiki_page_parse)
+export(wt_wiki_url_build)
+export(wt_wiki_url_parse)
+export(wt_wikicommons)
+export(wt_wikicommons_parse)
+export(wt_wikicommons_search)
+export(wt_wikipedia)
+export(wt_wikipedia_parse)
+export(wt_wikipedia_search)
+export(wt_wikispecies)
+export(wt_wikispecies_parse)
+export(wt_wikispecies_search)
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..a14c896
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,19 @@
+wikitaxa 0.1.4
+==============
+
+## NEW FEATURES
+
+* `wt_wikipedia()` and `wt_wikipedia_search()` gain parameter `wiki`
+to give the wiki language, which defaults to `en` (#9)
+
+### MINOR IMPROVEMENTS
+
+* move some examples to dontrun (#11)
+
+
+wikitaxa 0.1.0
+==============
+
+## NEW FEATURES
+
+* Released to CRAN
diff --git a/R/globals.R b/R/globals.R
new file mode 100644
index 0000000..fdc4813
--- /dev/null
+++ b/R/globals.R
@@ -0,0 +1,3 @@
+if (getRversion() >= "2.15.1") {
+ utils::globalVariables(c('wikipedias'))
+}
diff --git a/R/wiki.R b/R/wiki.R
new file mode 100644
index 0000000..59d9891
--- /dev/null
+++ b/R/wiki.R
@@ -0,0 +1,107 @@
+#' Wikidata taxonomy data
+#'
+#' @export
+#' @param x (character) a taxonomic name
+#' @param property (character) a property id, e.g., P486
+#' @param ... curl options passed on to [httr::GET()]
+#' @param language (character) two letter language code
+#' @param limit (integer) records to return. Default: 10
+#' @return `wt_data` searches Wikidata, and returns a list with elements:
+#' \itemize{
+#' \item labels - data.frame with columns: language, value
+#' \item descriptions - data.frame with columns: language, value
+#' \item aliases - data.frame with columns: language, value
+#' \item sitelinks - data.frame with columns: site, title
+#' \item claims - data.frame with columns: claims, property_value,
+#' property_description, value (comma separted values in string)
+#' }
+#'
+#' `wt_data_id` gets the Wikidata ID for the searched term, and
+#' returns the ID as character
+#'
+#' @details Note that `wt_data` can take a while to run since when fetching
+#' claims it has to do so one at a time for each claim
+#'
+#' You can search things other than taxonomic names with `wt_data` if you
+#' like
+#' @examples \dontrun{
+#' # search by taxon name
+#' # wt_data("Mimulus alsinoides")
+#'
+#' # choose which properties to return
+#' wt_data("Mimulus foliatus", property = c("P846", "P815"))
+#'
+#' # get a taxonomic identifier
+#' wt_data_id("Mimulus foliatus")
+#' # the id can be passed directly to wt_data()
+#' # wt_data(wt_data_id("Mimulus foliatus"))
+#' }
+wt_data <- function(x, property = NULL, ...) {
+ UseMethod("wt_data")
+}
+
+#' @export
+wt_data.wiki_id <- function(x, property = NULL, ...) {
+ data_wiki(x, property = property, ...)
+}
+
+#' @export
+wt_data.default <- function(x, property = NULL, ...) {
+ x <- WikidataR::find_item(search_term = x, ...)
+ if (length(x) == 0) stop("no results found", call. = FALSE)
+ data_wiki(x[[1]]$id, property = property, ...)
+}
+
+#' @export
+#' @rdname wt_data
+wt_data_id <- function(x, language = "en", limit = 10, ...) {
+ x <- WikidataR::find_item(search_term = x, language = language,
+ limit = limit, ...)
+ x <- if (length(x) == 0) NA else x[[1]]$id
+ structure(x, class = "wiki_id")
+}
+
+data_wiki <- function(x, property = NULL, ...) {
+ xx <- WikidataR::get_item(x, ...)
+
+ if (is.null(property)) {
+ claims <- create_claims(xx$claims)
+ } else{
+ cl <- Filter(function(x) x$mainsnak$property %in% property, xx$claims)
+ if (length(cl) == 0) stop("No matching properties", call. = FALSE)
+ claims <- create_claims(cl)
+ }
+
+ list(
+ labels = dt_df(xx$labels),
+ descriptions = dt_df(xx$descriptions),
+ aliases = dt_df(xx$aliases),
+ sitelinks = dt_df(lapply(xx$sitelinks, function(x)
+ x[names(x) %in% c('site', 'title')])),
+ claims = dt_df(claims)
+ )
+}
+
+fetch_property <- function(x) {
+ tmp <- WikidataR::get_property(x)
+ list(
+ property_value = tmp$labels$en$value,
+ property_description = tmp$descriptions$en$value
+ )
+}
+
+create_claims <- function(x) {
+ lapply(x, function(z) {
+ c(
+ property = paste0(unique(z$mainsnak$property), collapse = ","),
+ fetch_property(unique(z$mainsnak$property)),
+ value = {
+ if (inherits(z$mainsnak$datavalue$value, "data.frame")) {
+ paste0(z$mainsnak$datavalue$value$`numeric-id`, collapse = ",")
+ } else {
+ paste0(z$mainsnak$datavalue$value, collapse = ",")
+ }
+ }
+ )
+ })
+}
diff --git a/R/wikicommons.R b/R/wikicommons.R
new file mode 100644
index 0000000..764bf12
--- /dev/null
+++ b/R/wikicommons.R
@@ -0,0 +1,110 @@
+#' WikiCommons
+#'
+#' @export
+#' @template args
+#' @family Wikicommons functions
+#' @return `wt_wikicommons` returns a list, with slots:
+#' \itemize{
+#' \item langlinks - language page links
+#' \item externallinks - external links
+#' \item common_names - a data.frame with `name` and `language` columns
+#' \item classification - a data.frame with `rank` and `name` columns
+#' }
+#'
+#' `wt_wikicommons_parse` returns a list
+#'
+#' `wt_wikicommons_search` returns a list with slots for `continue` and
+#' `query`, where `query` holds the results, with `query$search` slot with
+#' the search results
+#' @references <https://www.mediawiki.org/wiki/API:Search> for help on search
+#' @examples \dontrun{
+#' # high level
+#' wt_wikicommons(name = "Malus domestica")
+#'
+#' # low level
+#' pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Malus_domestica")
+#' wt_wikicommons_parse(pg)
+#'
+#' # search wikicommons
+#' wt_wikicommons_search(query = "Pinus")
+#'
+#' ## use search results to dig into pages
+#' res <- wt_wikicommons_search(query = "Pinus")
+#' lapply(res$query$search$title[1:3], wt_wikicommons)
+#' }
+wt_wikicommons <- function(name, utf8 = TRUE, ...) {
+ assert(name, "character")
+ stopifnot(length(name) == 1)
+ prop <- c("langlinks", "externallinks", "common_names", "classification")
+ res <- wt_wiki_url_build(
+ wiki = "commons", type = "wikimedia", page = name,
+ utf8 = utf8,
+ prop = prop)
+ pg <- wt_wiki_page(res, ...)
+ wt_wikicommons_parse(pg, prop, tidy = TRUE)
+}
+
+#' @export
+#' @rdname wt_wikicommons
+wt_wikicommons_parse <- function(page, types = c("langlinks", "iwlinks",
+ "externallinks", "common_names",
+ "classification"),
+ tidy = FALSE) {
+
+ result <- wt_wiki_page_parse(page, types = types, tidy = tidy)
+ json <- jsonlite::fromJSON(rawToChar(page$content), simplifyVector = FALSE)
+ if (is.null(json$parse)) {
+ return(result)
+ }
+ ## Common names
+ if ("common_names" %in% types) {
+ txt <- xml2::read_html(json$parse$text[[1]])
+ vernacular_html <- xml2::xml_find_all(txt,
+ xpath = "//bdi[@class='vernacular']")
+ # XML formats:
+ # <bdi class="vernacular" lang="en"><a href="">name</a></bdi>
+ # <bdi class="vernacular" lang="en">name</bdi>
+ ## Name formats:
+ # name1 / name2
+ # name1, name2
+ # name (category)
+ cnms <- lapply(vernacular_html, function(x) {
+ attributes <- xml2::xml_attrs(x)
+ language <- attributes[["lang"]]
+ name <- trimws(gsub("[ ]*\\(.*\\)", "", xml2::xml_text(x)))
+ list(
+ name = name,
+ language = language
+ )
+ })
+ result$common_names <- if (tidy) atbl(dt_df(cnms)) else cnms
+ }
+ ## classification
+ if ("classification" %in% types) {
+ txt <- xml2::read_html(json$parse$text[[1]])
+ html <- xml2::xml_find_all(txt, "//div[contains(., \"APG IV\")]")
+ labels <- xml2::xml_text(xml2::xml_find_all(
+ html,
+ "b[not(following-sibling::*[1][self::a])]/following-sibling::text()[1] | b/following-sibling::*[1][self::a]/text()" #nolint
+ ))
+ labels <- gsub(
+ "^\\s+|\\s$|\\(|\\)", "",
+ gsub("^:\\s+|^\\s+\\u2022\\s+", "", labels)
+ )
+ values <- xml2::xml_text(xml2::xml_find_all(html, ".//b"))[-1]
+ values <- gsub("^:\\s+|^.+:\\s?", "", values)
+ clz <- mapply(list, rank = labels, name = values,
+ SIMPLIFY = FALSE, USE.NAMES = FALSE)
+ result$classification <- if (tidy) atbl(dt_df(clz)) else clz
+ }
+ return(result)
+}
+
+#' @export
+#' @rdname wt_wikicommons
+wt_wikicommons_search <- function(query, limit = 10, offset = 0, utf8 = TRUE,
+ ...) {
+ tmp <- g_et(search_base("commons"), sh(query, limit, offset, utf8), ...)
+ tmp$query$search <- atbl(tmp$query$search)
+ return(tmp)
+}
diff --git a/R/wikipages.R b/R/wikipages.R
new file mode 100644
index 0000000..78075ab
--- /dev/null
+++ b/R/wikipages.R
@@ -0,0 +1,176 @@
+# MediaWiki (general) ----------------
+
+#' Parse MediaWiki Page URL
+#'
+#' Parse a MediaWiki page url into its component parts (wiki name, wiki type,
+#' and page title). Supports both static page urls and their equivalent API
+#' calls.
+#'
+#' @export
+#' @param url (character) MediaWiki page url.
+#' @family MediaWiki functions
+#' @return a list with elements:
+#' \itemize{
+#' \item wiki - wiki language
+#' \item type - wikipedia type
+#' \item page - page name
+#' }
+#' @examples
+#' wt_wiki_url_parse(url="https://en.wikipedia.org/wiki/Malus_domestica")
+#' wt_wiki_url_parse("https://en.wikipedia.org/w/api.php?page=Malus_domestica")
+wt_wiki_url_parse <- function(url) {
+ url <- curl::curl_unescape(url)
+ if (grepl("/w/api.php?", url)) {
+ matches <-
+ match_(
+ url, "//([^\\.]+).([^\\.]+).[^/]*/w/api\\.php\\?.*page=([^&]+).*$")
+ } else {
+ matches <- match_(url, "//([^\\.]+).([^\\.]+).[^/]*/wiki/([^\\?]+)")
+ }
+ return(list(
+ wiki = matches[2],
+ type = matches[3],
+ page = matches[4]
+ ))
+}
+
+#' Build MediaWiki Page URL
+#'
+#' Builds a MediaWiki page url from its component parts (wiki name, wiki type,
+#' and page title). Supports both static page urls and their equivalent API
+#' calls.
+#'
+#' @export
+#' @param wiki (character | list) Either the wiki name or a list with
+#' `$wiki`, `$type`, and `$page` (the output of [wt_wiki_url_parse()]).
+#' @param type (character) Wiki type.
+#' @param page (character) Wiki page title.
+#' @param api (boolean) Whether to return an API call or a static page url
+#' (default). If `FALSE`, all following (API-only) arguments are ignored.
+#' @param action (character) See <https://en.wikipedia.org/w/api.php>
+#' for supported actions. This function currently only supports "parse".
+#' @param redirects (boolean) If the requested page is set to a redirect,
+#' resolve it.
+#' @param format (character) See <https://en.wikipedia.org/w/api.php>
+#' for supported output formats.
+#' @param utf8 (boolean) If `TRUE`, encodes most (but not all) non-ASCII
+#' characters as UTF-8 instead of replacing them with hexadecimal escape
+#' sequences.
+#' @param prop (character) Properties to retrieve, either as a character vector
+#' or pipe-delimited string. See
+#' <https://en.wikipedia.org/w/api.php?action=help&modules=parse> for
+#' supported properties.
+#' @family MediaWiki functions
+#' @return a URL (character)
+#' @examples
+#' wt_wiki_url_build(wiki = "en", type = "wikipedia", page = "Malus domestica")
+#' wt_wiki_url_build(
+#' wt_wiki_url_parse("https://en.wikipedia.org/wiki/Malus_domestica"))
+#' wt_wiki_url_build("en", "wikipedia", "Malus domestica", api = TRUE)
+wt_wiki_url_build <- function(wiki, type = NULL, page = NULL, api = FALSE,
+ action = "parse", redirects = TRUE, format = "json",
+ utf8 = TRUE,
+ prop = c("text", "langlinks", "categories",
+ "links", "templates", "images",
+ "externallinks", "sections", "revid",
+ "displaytitle", "iwlinks", "properties")) {
+
+ assert(utf8, "logical")
+ if (is.null(type) && is.null(page)) {
+ type <- wiki$type
+ page <- wiki$page
+ wiki <- wiki$wiki
+ }
+ page <- gsub(" ", "_", page)
+ if (api) {
+ base_url <- paste0("https://", wiki, ".", type, ".org/w/api.php")
+ # To ensure it is removed
+ if (!utf8) utf8 <- ""
+ prop <- paste(prop, collapse = "|")
+ query <- c(page = page, mget(c("action", "redirects", "format", "utf8",
+ "prop")))
+ query <- query[vapply(query, "!=", logical(1), "")]
+ url <- crul::url_build(base_url, query = query)
+ return(url)
+ } else {
+ return(paste0("https://", wiki, ".", type, ".org/wiki/", page))
+ }
+}
+
+#' Get MediaWiki Page from API
+#'
+#' Supports both static page urls and their equivalent API calls.
+#'
+#' @export
+#' @param url (character) MediaWiki page url.
+#' @param ... Arguments passed to [wt_wiki_url_build()] if `url`
+#' is a static page url.
+#' @family MediaWiki functions
+#' @return an `HttpResponse` response object from \pkg{crul}
+#' @details If the URL given is for a human readable html page,
+#' we convert it to equivalent API call - if URL is already an API call,
+#' we just use that.
+#' @examples \dontrun{
+#' wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+#' }
+wt_wiki_page <- function(url, ...) {
+ stopifnot(inherits(url, "character"))
+ if (!grepl("/w/api.php?", url)) {
+ url <- wt_wiki_url_build(wt_wiki_url_parse(url), api = TRUE)
+ }
+ cli <- crul::HttpClient$new(url = url)
+ res <- cli$get(...)
+ res$raise_for_status()
+ return(res)
+}
+
+#' Parse MediaWiki Page
+#'
+#' Parses common properties from the result of a MediaWiki API page call.
+#'
+#' @export
+#' @param page ([crul::HttpResponse]) Result of [wt_wiki_page()]
+#' @param types (character) List of properties to parse.
+#' @param tidy (logical). tidy output to data.frames when possible.
+#' Default: `FALSE`
+#' @family MediaWiki functions
+#' @return a list
+#' @details Available properties currently not parsed:
+#' title, displaytitle, pageid, revid, redirects, text, categories,
+#' links, templates, images, sections, properties, ...
+#' @examples \dontrun{
+#' pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+#' wt_wiki_page_parse(pg)
+#' }
+wt_wiki_page_parse <- function(page, types = c("langlinks", "iwlinks",
+ "externallinks"),
+ tidy = FALSE) {
+ stopifnot(inherits(page, "HttpResponse"))
+ result <- list()
+ json <- jsonlite::fromJSON(rawToChar(page$content), tidy)
+ if (is.null(json$parse)) {
+ return(result)
+ }
+ ## Links to equivalent page in other languages
+ if ("langlinks" %in% types) {
+ result$langlinks <- if (tidy) {
+ atbl(json$parse$langlinks)
+ } else {
+ vapply(json$parse$langlinks, "[[", "", "url")
+ }
+ }
+ ## Other wiki links
+ if ("iwlinks" %in% types) {
+ result$iwlinks <- if (tidy) {
+ atbl(json$parse$iwlinks$url)
+ } else {
+ vapply(json$parse$iwlinks, "[[", "", "url")
+ }
+ }
+ ## Links to external resources
+ if ("externallinks" %in% types) {
+ result$externallinks <- json$parse$externallinks
+ }
+ ## Return
+ return(result)
+}
diff --git a/R/wikipedia.R b/R/wikipedia.R
new file mode 100644
index 0000000..34522df
--- /dev/null
+++ b/R/wikipedia.R
@@ -0,0 +1,136 @@
+#' Wikipedia
+#'
+#' @export
+#' @template args
+#' @param wiki (character) wiki language. default: en. See [wikipedias] for
+#' language codes.
+#' @family Wikipedia functions
+#' @return `wt_wikipedia` returns a list, with slots:
+#' \itemize{
+#' \item langlinks - language page links
+#' \item externallinks - external links
+#' \item common_names - a data.frame with `name` and `language` columns
+#' \item classification - a data.frame with `rank` and `name` columns
+#' \item synonyms - a character vector with taxonomic names
+#' }
+#'
+#' `wt_wikipedia_parse` returns a list with same slots determined by
+#' the `types` parmeter
+#'
+#' `wt_wikipedia_search` returns a list with slots for `continue` and
+#' `query`, where `query` holds the results, with `query$search` slot with
+#' the search results
+#' @references <https://www.mediawiki.org/wiki/API:Search> for help on search
+#' @examples \dontrun{
+#' # high level
+#' wt_wikipedia(name = "Malus domestica")
+#' wt_wikipedia(name = "Malus domestica", wiki = "fr")
+#' wt_wikipedia(name = "Malus domestica", wiki = "da")
+#'
+#' # low level
+#' pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+#' wt_wikipedia_parse(pg)
+#' wt_wikipedia_parse(pg, tidy = TRUE)
+#'
+#' # search wikipedia
+#' wt_wikipedia_search(query = "Pinus")
+#' wt_wikipedia_search(query = "Pinus", wiki = "fr")
+#' wt_wikipedia_search(query = "Pinus", wiki = "br")
+#'
+#' ## curl options
+#' # wt_wikipedia_search(query = "Pinus", verbose = TRUE)
+#'
+#' ## use search results to dig into pages
+#' res <- wt_wikipedia_search(query = "Pinus")
+#' lapply(res$query$search$title[1:3], wt_wikipedia)
+#' }
+wt_wikipedia <- function(name, wiki = "en", utf8 = TRUE, ...) {
+ assert(name, "character")
+ assert(wiki, "character")
+ stopifnot(length(name) == 1)
+ prop <- c("langlinks", "externallinks", "common_names", "classification",
+ "synonyms")
+ res <- wt_wiki_url_build(
+ wiki = wiki, type = "wikipedia", page = name,
+ utf8 = utf8,
+ prop = prop)
+ pg <- wt_wiki_page(res, ...)
+ wt_wikipedia_parse(page = pg, types = prop, tidy = TRUE)
+}
+
+#' @export
+#' @rdname wt_wikipedia
+wt_wikipedia_parse <- function(page, types = c("langlinks", "iwlinks",
+ "externallinks", "common_names",
+ "classification"),
+ tidy = FALSE) {
+
+ result <- wt_wiki_page_parse(page, types = types, tidy = tidy)
+ json <- jsonlite::fromJSON(rawToChar(page$content), simplifyVector = TRUE)
+ if (is.null(json$parse)) {
+ return(result)
+ }
+ ## Common names
+ if ("common_names" %in% types) {
+ xml <- xml2::read_html(json$parse$text[[1]])
+ names_xml <- list(
+ regular_bolds = xml2::xml_find_all(
+ xml,
+ xpath = "/html/body/p[count(preceding::div[contains(@id, 'toc') or contains(@class, 'toc')]) = 0 and count(preceding::h1) = 0 and count(preceding::h2) = 0 and count(preceding::h3) = 0]//b[not(parent::*[self::i]) and not(i)]"), #nolint
+ regular_biotabox_header =
+ xml2::xml_find_all(
+ xml,
+ xpath = "(//table[contains(@class, 'infobox biota') or contains(@class, 'infobox_v2 biota')]//th)[1]/b[not(parent::*[self::i]) and not(i)]") #nolint
+ )
+ # NOTE: Often unreliable.
+ regular_title <- stats::na.omit(
+ match_(json$parse$displaytitle, "^([^<]*)$")[2])
+ common_names <- unique(c(unlist(lapply(names_xml, xml2::xml_text)),
+ regular_title))
+ language <- match_(page$url, 'http[s]*://([^\\.]*)\\.')[2]
+ cnms <- lapply(common_names, function(name) {
+ list(name = name, language = language)
+ })
+ result$common_names <- if (tidy) atbl(dt_df(cnms)) else cnms
+ }
+ ## classification
+ if ("classification" %in% types) {
+ txt <- xml2::read_html(json$parse$text[[1]])
+ html <-
+ xml2::xml_find_all(txt, "//table[@class=\"infobox biota\"]//span")
+ labels <- xml2::xml_attr(html, "class")
+ labels <- gsub("^\\s+|\\s$|\\(|\\)", "", labels)
+ values <- gsub("^\\s+|\\s$", "", xml2::xml_text(html))
+ clz <- mapply(list, rank = labels, name = values,
+ SIMPLIFY = FALSE, USE.NAMES = FALSE)
+ result$classification <- if (tidy) atbl(dt_df(clz)) else clz
+ }
+ ## synonyms
+ if ("synonyms" %in% types) {
+ syns <- list()
+ txt <- xml2::read_html(json$parse$text[[1]])
+ html <-
+ xml2::xml_find_all(txt, "//table[@class=\"infobox biota\"]//td")
+ syn_node <-
+ xml2::xml_find_first(html, "//th/a[contains(text(), \"Synonyms\")]")
+ if (length(stats::na.omit(xml2::xml_text(syn_node))) > 0) {
+ syn <- strsplit(xml2::xml_text(html[length(html)]), "\n")[[1]]
+ syns <- syn[nzchar(syn)]
+ }
+ result$synonyms <- syns
+ }
+
+ return(result)
+}
+
+#' @export
+#' @rdname wt_wikipedia
+wt_wikipedia_search <- function(query, wiki = "en", limit = 10, offset = 0,
+ utf8 = TRUE, ...) {
+
+ assert(wiki, "character")
+ tmp <- g_et(search_base(wiki, "wikipedia"), sh(query, limit, offset, utf8),
+ ...)
+ tmp$query$search <- atbl(tmp$query$search)
+ return(tmp)
+}
diff --git a/R/wikispecies.R b/R/wikispecies.R
new file mode 100644
index 0000000..ff4e5fa
--- /dev/null
+++ b/R/wikispecies.R
@@ -0,0 +1,109 @@
+#' WikiSpecies
+#'
+#' @export
+#' @template args
+#' @family Wikispecies functions
+#' @return `wt_wikispecies` returns a list, with slots:
+#' \itemize{
+#' \item langlinks - language page links
+#' \item externallinks - external links
+#' \item common_names - a data.frame with `name` and `language` columns
+#' \item classification - a data.frame with `rank` and `name` columns
+#' }
+#'
+#' `wt_wikispecies_parse` returns a list
+#'
+#' `wt_wikispecies_search` returns a list with slots for `continue` and
+#' `query`, where `query` holds the results, with `query$search` slot with
+#' the search results
+#' @references <https://www.mediawiki.org/wiki/API:Search> for help on search
+#' @examples \dontrun{
+#' # high level
+#' wt_wikispecies(name = "Malus domestica")
+#'
+#' # low level
+#' pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Abelmoschus")
+#' wt_wikispecies_parse(pg)
+#'
+#' # search wikispecies
+#' wt_wikispecies_search(query = "pine tree")
+#'
+#' ## use search results to dig into pages
+#' res <- wt_wikispecies_search(query = "pine tree")
+#' lapply(res$query$search$title[1:3], wt_wikispecies)
+#' }
+wt_wikispecies <- function(name, utf8 = TRUE, ...) {
+ assert(name, "character")
+ stopifnot(length(name) == 1)
+ prop <- c("langlinks", "externallinks", "common_names", "classification")
+ res <- wt_wiki_url_build(
+ wiki = "species", type = "wikimedia", page = name,
+ utf8 = utf8,
+ prop = prop)
+ pg <- wt_wiki_page(res, ...)
+ wt_wikispecies_parse(pg, prop, tidy = TRUE)
+}
+
+#' @export
+#' @rdname wt_wikispecies
+wt_wikispecies_parse <- function(page, types = c("langlinks", "iwlinks",
+ "externallinks", "common_names",
+ "classification"),
+ tidy = FALSE) {
+
+ result <- wt_wiki_page_parse(page, types = types, tidy = tidy)
+ json <- jsonlite::fromJSON(rawToChar(page$content), simplifyVector = FALSE)
+ if (is.null(json$parse)) {
+ return(result)
+ }
+ ## Common names
+ if ("common_names" %in% types) {
+ xml <- xml2::read_html(json$parse$text[[1]])
+ # XML formats:
+ # <b>language:</b> [name|<a>name</a>]
+ # Name formats:
+ # name1, name2
+ vernacular_html <- xml2::xml_find_all(
+ xml,
+ "(//h2/span[contains(@id, 'Vernacular')]/parent::*/following-sibling::div)[1]" #nolint
+ )
+ languages_html <- xml2::xml_find_all(vernacular_html, xpath = "b")
+ languages <- gsub("\\s*:\\s*", "",
+ unlist(lapply(languages_html, xml2::xml_text)))
+ names_html <-
+ xml2::xml_find_all(
+ vernacular_html,
+ "b[not(following-sibling::*[1][self::a])]/following-sibling::text()[1] | b/following-sibling::*[1][self::a]/text()") #nolint
+ common_names <- gsub("^\\s*", "",
+ unlist(lapply(names_html, xml2::xml_text)))
+ cnms <-
+ mapply(list, name = common_names,
+ language = languages, SIMPLIFY = FALSE, USE.NAMES = FALSE)
+ result$common_names <- if (tidy) atbl(dt_df(cnms)) else cnms
+ }
+ ## classification
+ if ("classification" %in% types) {
+ txt <- xml2::read_html(json$parse$text[[1]])
+ html <- xml2::xml_text(
+ xml2::xml_find_first(txt, "//table[contains(@class, \"wikitable\")]//p"))
+ html <- strsplit(html, "\n")[[1]]
+ labels <-
+ vapply(html, function(z) strsplit(z, ":")[[1]][1], "", USE.NAMES = FALSE)
+ values <-
+ vapply(html, function(z) strsplit(z, ":")[[1]][2], "", USE.NAMES = FALSE)
+ values <- gsub("^\\s+|\\s+$", "", values)
+ clz <- mapply(list, rank = labels, name = values,
+ SIMPLIFY = FALSE, USE.NAMES = FALSE)
+ result$classification <- if (tidy) atbl(dt_df(clz)) else clz
+ }
+ return(result)
+}
+
+#' @export
+#' @rdname wt_wikispecies
+wt_wikispecies_search <- function(query, limit = 10, offset = 0, utf8 = TRUE,
+ ...) {
+ tmp <- g_et(search_base("species"), sh(query, limit, offset, utf8), ...)
+ tmp$query$search <- atbl(tmp$query$search)
+ return(tmp)
+}
diff --git a/R/wikitaxa-package.R b/R/wikitaxa-package.R
new file mode 100644
index 0000000..36dc114
--- /dev/null
+++ b/R/wikitaxa-package.R
@@ -0,0 +1,25 @@
+#' Taxonomic Information from Wikipedia
+#'
+#' @name wikitaxa-package
+#' @aliases wikitaxa
+#' @docType package
+#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
+#' @author Ethan Welty
+#' @keywords package
+NULL
+
+#' List of Wikipedias
+#'
+#' data.frame of 295 rows, with 3 columns:
+#' \itemize{
+#' \item language - language
+#' \item language_local - language in local name
+#' \item wiki - langugae code for the wiki
+#' }
+#'
+#' From <https://meta.wikimedia.org/wiki/List_of_Wikipedias>
+#'
+#' @name wikipedias
+#' @docType data
+#' @keywords data
+NULL
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..14b931b
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,45 @@
+tc <- function(l) Filter(Negate(is.null), l)
+
+dt_df <- function(x) {
+ (ffff <- data.table::setDF(data.table::rbindlist(x, fill = TRUE,
+ use.names = TRUE)))
+}
+
+search_base <- function(x, y = "wikimedia") {
+ sprintf("https://%s.%s.org/w/api.php", x, y)
+}
+
+atbl <- function(x) tibble::as_tibble(x)
+
+g_et <- function(url, args = list(), ...) {
+ cli <- crul::HttpClient$new(url = url)
+ res <- cli$get(query = args, ...)
+ res$raise_for_status()
+ jsonlite::fromJSON(res$parse("UTF-8"))
+}
+
+assert <- function(x, y) {
+ if (!is.null(x)) {
+ if (!class(x) %in% y) {
+ stop(deparse(substitute(x)), " must be of class ",
+ paste0(y, collapse = ", "), call. = FALSE)
+ }
+ }
+}
+
+sh <- function(query, limit, offset, utf8) {
+ assert(limit, c("integer", "numeric"))
+ assert(offset, c("integer", "numeric"))
+ assert(utf8, "logical")
+ tc(list(
+ action = "query", list = "search", srsearch = query,
+ utf8 = if (utf8) "" else NULL, format = "json",
+ srprop = "size|wordcount|timestamp|snippet",
+ srlimit = limit, sroffset = offset
+ ))
+}
+
+match_ <- function(string, pattern) {
+ pos <- regexec(pattern, string)
+ regmatches(string, pos)[[1]]
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f5ec1b9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,309 @@
+wikitaxa
+========
+
+
+
+[](http://www.repostatus.org/#wip)
+[](https://travis-ci.org/ropensci/wikitaxa)
+[](https://codecov.io/gh/ropensci/wikitaxa)
+[](https://github.com/metacran/cranlogs.app)
+[](https://cran.r-project.org/package=wikitaxa)
+
+`wikitaxa` - taxonomy data from Wikipedia/Wikidata/Wikispecies
+
+
+### Low level API
+
+The low level API is meant for power users and gives you more control,
+but requires more knowledge.
+
+* `wt_wiki_page()`
+* `wt_wiki_page_parse()`
+* `wt_wiki_url_build()`
+* `wt_wiki_url_parse()`
+* `wt_wikispecies_parse()`
+* `wt_wikicommons_parse()`
+* `wt_wikipedia_parse()`
+
+### High level API
+
+The high level API is meant to be easier and faster to use.
+
+* `wt_data()`
+* `wt_data_id()`
+* `wt_wikispecies()`
+* `wt_wikicommons()`
+* `wt_wikipedia()`
+
+Search functions:
+
+* `wt_wikicommons_search()`
+* `wt_wikispecies_search()`
+* `wt_wikipedia_search()`
+
+## Installation
+
+CRAN version
+
+
+```r
+install.packages("wikitaxa")
+```
+
+Dev version
+
+
+```r
+install.packages("devtools")
+devtools::install_github("ropensci/wikitaxa")
+```
+
+
+```r
+library('wikitaxa')
+```
+
+## wiki data
+
+
+```r
+wt_data("Poa annua")
+```
+
+Get a Wikidata ID
+
+
+```r
+wt_data_id("Mimulus foliatus")
+#> [1] "Q6495130"
+#> attr(,"class")
+#> [1] "wiki_id"
+```
+
+## wikipedia
+
+lower level
+
+
+```r
+pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+res <- wt_wiki_page_parse(pg)
+res$iwlinks
+#> [1] "https://en.wiktionary.org/wiki/apple"
+#> [2] "https://commons.wikimedia.org/wiki/Special:Search/Apple"
+#> [3] "https://en.wikiquote.org/wiki/Apples"
+#> [4] "https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Apple"
+#> [5] "https://en.wikibooks.org/wiki/Apples"
+#> [6] "https://species.wikimedia.org/wiki/Malus_domestica"
+#> [7] "https://commons.wikimedia.org/wiki/Category:Apple_cultivars"
+```
+
+higher level
+
+
+```r
+res <- wt_wikipedia("Malus domestica")
+res$common_names
+#> # A tibble: 3 × 2
+#> name language
+#> <chr> <chr>
+#> 1 apple tree en
+#> 2 apple en
+#> 3 Apple en
+res$classification
+#> # A tibble: 9 × 2
+#> rank name
+#> <chr> <chr>
+#> 1 kingdom Plantae
+#> 2 unranked Angiosperms
+#> 3 unranked Eudicots
+#> 4 unranked Rosids
+#> 5 order Rosales
+#> 6 family Rosaceae
+#> 7 genus Malus
+#> 8 species M. pumila
+#> 9 binomial Malus pumila
+```
+
+choose a wikipedia language
+
+
+```r
+# French
+wt_wikipedia(name = "Malus domestica", wiki = "fr")
+# Slovak
+wt_wikipedia(name = "Malus domestica", wiki = "sk")
+# Vietnamese
+wt_wikipedia(name = "Malus domestica", wiki = "vi")
+```
+
+
+## wikicommons
+
+lower level
+
+
+```r
+pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
+res <- wt_wikicommons_parse(pg)
+res$common_names[1:3]
+#> [[1]]
+#> [[1]]$name
+#> [1] "okra"
+#>
+#> [[1]]$language
+#> [1] "en"
+#>
+#>
+#> [[2]]
+#> [[2]]$name
+#> [1] "مسكي"
+#>
+#> [[2]]$language
+#> [1] "ar"
+#>
+#>
+#> [[3]]
+#> [[3]]$name
+#> [1] "Abelmoş"
+#>
+#> [[3]]$language
+#> [1] "az"
+```
+
+higher level
+
+
+```r
+res <- wt_wikicommons("Abelmoschus")
+res$classification
+#> # A tibble: 15 × 2
+#> rank name
+#> <chr> <chr>
+#> 1 Domain Eukaryota
+#> 2 • unranked Archaeplastida
+#> 3 • Regnum Plantae
+#> 4 • Cladus angiosperms
+#> 5 • Cladus eudicots
+#> 6 • Cladus core eudicots
+#> 7 • Cladus superrosids
+#> 8 • Cladus rosids
+#> 9 • Cladus eurosids II
+#> 10 • Ordo Malvales
+#> 11 • Familia Malvaceae
+#> 12 • Subfamilia Malvoideae
+#> 13 • Tribus Hibisceae
+#> 14 • Abelmoschus
+#> 15 Medik. (1787)
+res$common_names
+#> # A tibble: 18 × 2
+#> name language
+#> <chr> <chr>
+#> 1 okra en
+#> 2 مسكي ar
+#> 3 Abelmoş az
+#> 4 Ibiškovec cs
+#> 5 Bisameibisch de
+#> 6 Okrat fi
+#> 7 Abelmosco gl
+#> 8 Abelmošus hr
+#> 9 Ybiškė lt
+#> 10 അബെ\u0d7dമോസ്കസ് ml
+#> 11 Абельмош mrj
+#> 12 Piżmian pl
+#> 13 Абельмош ru
+#> 14 موري sd
+#> 15 Okrasläktet sv
+#> 16 Абельмош udm
+#> 17 Chi Vông vang vi
+#> 18 黄葵属 zh
+```
+
+## wikispecies
+
+lower level
+
+
+```r
+pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
+res <- wt_wikispecies_parse(pg, types = "common_names")
+res$common_names[1:3]
+#> [[1]]
+#> [[1]]$name
+#> [1] "Ябълка"
+#>
+#> [[1]]$language
+#> [1] "български"
+#>
+#>
+#> [[2]]
+#> [[2]]$name
+#> [1] "Poma, pomera"
+#>
+#> [[2]]$language
+#> [1] "català"
+#>
+#>
+#> [[3]]
+#> [[3]]$name
+#> [1] "Apfel"
+#>
+#> [[3]]$language
+#> [1] "Deutsch"
+```
+
+higher level
+
+
+```r
+res <- wt_wikispecies("Malus domestica")
+res$classification
+#> # A tibble: 8 × 2
+#> rank name
+#> <chr> <chr>
+#> 1 Superregnum Eukaryota
+#> 2 Regnum Plantae
+#> 3 Cladus Angiosperms
+#> 4 Cladus Eudicots
+#> 5 Cladus Core eudicots
+#> 6 Cladus Rosids
+#> 7 Cladus Eurosids I
+#> 8 Ordo Rosales
+res$common_names
+#> # A tibble: 19 × 2
+#> name language
+#> <chr> <chr>
+#> 1 Ябълка български
+#> 2 Poma, pomera català
+#> 3 Apfel Deutsch
+#> 4 Aed-õunapuu eesti
+#> 5 Μηλιά Ελληνικά
+#> 6 Apple English
+#> 7 Manzano español
+#> 8 Pomme français
+#> 9 Melâr furlan
+#> 10 사과나무 한국어
+#> 11 ‘Āpala Hawaiʻi
+#> 12 Melo italiano
+#> 13 Aapel Nordfriisk
+#> 14 Maçã, Macieira português
+#> 15 Яблоня домашняя русский
+#> 16 Tarhaomenapuu suomi
+#> 17 Elma Türkçe
+#> 18 Яблуня домашня українська
+#> 19 Pomaro vèneto
+```
+
+## Contributors
+
+* [Ethan Welty](https://github.com/ezwelty)
+* [Scott Chamberlain](https://github.com/sckott)
+
+## Meta
+
+* Please [report any issues or bugs](https://github.com/ropensci/wikitaxa/issues).
+* License: MIT
+* Get citation information for `wikitaxa` in R doing `citation(package = 'wikitaxa')`
+* Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
+
+[](https://ropensci.org)
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..1b8438c
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/data/wikipedias.rda b/data/wikipedias.rda
new file mode 100644
index 0000000..26b151f
Binary files /dev/null and b/data/wikipedias.rda differ
diff --git a/inst/doc/wikitaxa_vignette.R b/inst/doc/wikitaxa_vignette.R
new file mode 100644
index 0000000..ada6f0f
--- /dev/null
+++ b/inst/doc/wikitaxa_vignette.R
@@ -0,0 +1,73 @@
+## ----echo=FALSE----------------------------------------------------------
+knitr::opts_chunk$set(
+ comment = "#>",
+ collapse = TRUE,
+ warning = FALSE,
+ message = FALSE
+)
+
+## ----eval=FALSE----------------------------------------------------------
+# install.packages("wikitaxa")
+
+## ----eval=FALSE----------------------------------------------------------
+# devtools::install_github("ropensci/wikitaxa")
+
+## ------------------------------------------------------------------------
+library("wikitaxa")
+
+## ----eval=FALSE----------------------------------------------------------
+# wt_data("Poa annua")
+
+## ------------------------------------------------------------------------
+wt_data_id("Mimulus foliatus")
+
+## ------------------------------------------------------------------------
+pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+res <- wt_wiki_page_parse(pg)
+res$iwlinks
+
+## ------------------------------------------------------------------------
+res <- wt_wikipedia("Malus domestica")
+res$common_names
+res$classification
+
+## ----eval=FALSE----------------------------------------------------------
+# # French
+# wt_wikipedia(name = "Malus domestica", wiki = "fr")
+# # Slovak
+# wt_wikipedia(name = "Malus domestica", wiki = "sk")
+# # Vietnamese
+# wt_wikipedia(name = "Malus domestica", wiki = "vi")
+
+## ------------------------------------------------------------------------
+wt_wikipedia_search(query = "Pinus")
+
+## ----eval=FALSE----------------------------------------------------------
+# wt_wikipedia_search(query = "Pinus", wiki = "fr")
+
+## ------------------------------------------------------------------------
+pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
+res <- wt_wikicommons_parse(pg)
+res$common_names[1:3]
+
+## ------------------------------------------------------------------------
+res <- wt_wikicommons("Abelmoschus")
+res$classification
+res$common_names
+
+## ------------------------------------------------------------------------
+wt_wikicommons_search(query = "Pinus")
+
+## ------------------------------------------------------------------------
+pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
+res <- wt_wikispecies_parse(pg, types = "common_names")
+res$common_names[1:3]
+
+## ------------------------------------------------------------------------
+res <- wt_wikispecies("Malus domestica")
+res$classification
+res$common_names
+
+## ------------------------------------------------------------------------
+wt_wikispecies_search(query = "Pinus")
+
diff --git a/inst/doc/wikitaxa_vignette.Rmd b/inst/doc/wikitaxa_vignette.Rmd
new file mode 100644
index 0000000..388bc8a
--- /dev/null
+++ b/inst/doc/wikitaxa_vignette.Rmd
@@ -0,0 +1,177 @@
+---
+title: "Introduction to the wikitaxa package"
+author: "Scott Chamberlain"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Introduction to the wikitaxa package}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r echo=FALSE}
+knitr::opts_chunk$set(
+ comment = "#>",
+ collapse = TRUE,
+ warning = FALSE,
+ message = FALSE
+)
+```
+
+`wikitaxa` - Taxonomy data from Wikipedia
+
+The goal of `wikitaxa` is to allow search and taxonomic data retrieval from
+across many Wikimedia sites, including: Wikipedia, Wikicommons, and
+Wikispecies.
+
+There are lower level and higher level parts to the package API:
+
+### Low level API
+
+The low level API is meant for power users and gives you more control,
+but requires more knowledge.
+
+* `wt_wiki_page()`
+* `wt_wiki_page_parse()`
+* `wt_wiki_url_build()`
+* `wt_wiki_url_parse()`
+* `wt_wikispecies_parse()`
+* `wt_wikicommons_parse()`
+* `wt_wikipedia_parse()`
+
+### High level API
+
+The high level API is meant to be easier and faster to use.
+
+* `wt_data()`
+* `wt_data_id()`
+* `wt_wikispecies()`
+* `wt_wikicommons()`
+* `wt_wikipedia()`
+
+Search functions:
+
+* `wt_wikicommons_search()`
+* `wt_wikispecies_search()`
+* `wt_wikipedia_search()`
+
+## Installation
+
+CRAN version
+
+```{r eval=FALSE}
+install.packages("wikitaxa")
+```
+
+Dev version
+
+```{r eval=FALSE}
+devtools::install_github("ropensci/wikitaxa")
+```
+
+```{r}
+library("wikitaxa")
+```
+
+## wiki data
+
+```{r eval=FALSE}
+wt_data("Poa annua")
+```
+
+Get a Wikidata ID
+
+```{r}
+wt_data_id("Mimulus foliatus")
+```
+
+## wikipedia
+
+lower level
+
+```{r}
+pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+res <- wt_wiki_page_parse(pg)
+res$iwlinks
+```
+
+higher level
+
+```{r}
+res <- wt_wikipedia("Malus domestica")
+res$common_names
+res$classification
+```
+
+choose a wikipedia language
+
+```{r eval=FALSE}
+# French
+wt_wikipedia(name = "Malus domestica", wiki = "fr")
+# Slovak
+wt_wikipedia(name = "Malus domestica", wiki = "sk")
+# Vietnamese
+wt_wikipedia(name = "Malus domestica", wiki = "vi")
+```
+
+search
+
+```{r}
+wt_wikipedia_search(query = "Pinus")
+```
+
+search supports languages
+
+```{r eval=FALSE}
+wt_wikipedia_search(query = "Pinus", wiki = "fr")
+```
+
+
+## wikicommons
+
+lower level
+
+```{r}
+pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
+res <- wt_wikicommons_parse(pg)
+res$common_names[1:3]
+```
+
+higher level
+
+```{r}
+res <- wt_wikicommons("Abelmoschus")
+res$classification
+res$common_names
+```
+
+search
+
+```{r}
+wt_wikicommons_search(query = "Pinus")
+```
+
+
+## wikispecies
+
+lower level
+
+```{r}
+pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
+res <- wt_wikispecies_parse(pg, types = "common_names")
+res$common_names[1:3]
+```
+
+higher level
+
+```{r}
+res <- wt_wikispecies("Malus domestica")
+res$classification
+res$common_names
+```
+
+search
+
+```{r}
+wt_wikispecies_search(query = "Pinus")
+```
diff --git a/inst/doc/wikitaxa_vignette.html b/inst/doc/wikitaxa_vignette.html
new file mode 100644
index 0000000..3d1f67e
--- /dev/null
+++ b/inst/doc/wikitaxa_vignette.html
@@ -0,0 +1,426 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8" />
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="Scott Chamberlain" />
+
+<meta name="date" content="2017-05-05" />
+
+<title>Introduction to the wikitaxa package</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+ margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Introduction to the wikitaxa package</h1>
+<h4 class="author"><em>Scott Chamberlain</em></h4>
+<h4 class="date"><em>2017-05-05</em></h4>
+
+
+
+<p><code>wikitaxa</code> - Taxonomy data from Wikipedia</p>
+<p>The goal of <code>wikitaxa</code> is to allow search and taxonomic data retrieval from across many Wikimedia sites, including: Wikipedia, Wikicommons, and Wikispecies.</p>
+<p>There are lower level and higher level parts to the package API:</p>
+<div id="low-level-api" class="section level3">
+<h3>Low level API</h3>
+<p>The low level API is meant for power users and gives you more control, but requires more knowledge.</p>
+<ul>
+<li><code>wt_wiki_page()</code></li>
+<li><code>wt_wiki_page_parse()</code></li>
+<li><code>wt_wiki_url_build()</code></li>
+<li><code>wt_wiki_url_parse()</code></li>
+<li><code>wt_wikispecies_parse()</code></li>
+<li><code>wt_wikicommons_parse()</code></li>
+<li><code>wt_wikipedia_parse()</code></li>
+</ul>
+</div>
+<div id="high-level-api" class="section level3">
+<h3>High level API</h3>
+<p>The high level API is meant to be easier and faster to use.</p>
+<ul>
+<li><code>wt_data()</code></li>
+<li><code>wt_data_id()</code></li>
+<li><code>wt_wikispecies()</code></li>
+<li><code>wt_wikicommons()</code></li>
+<li><code>wt_wikipedia()</code></li>
+</ul>
+<p>Search functions:</p>
+<ul>
+<li><code>wt_wikicommons_search()</code></li>
+<li><code>wt_wikispecies_search()</code></li>
+<li><code>wt_wikipedia_search()</code></li>
+</ul>
+</div>
+<div id="installation" class="section level2">
+<h2>Installation</h2>
+<p>CRAN version</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">install.packages</span>(<span class="st">"wikitaxa"</span>)</code></pre></div>
+<p>Dev version</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">devtools<span class="op">::</span><span class="kw">install_github</span>(<span class="st">"ropensci/wikitaxa"</span>)</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"wikitaxa"</span>)</code></pre></div>
+</div>
+<div id="wiki-data" class="section level2">
+<h2>wiki data</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">wt_data</span>(<span class="st">"Poa annua"</span>)</code></pre></div>
+<p>Get a Wikidata ID</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">wt_data_id</span>(<span class="st">"Mimulus foliatus"</span>)
+<span class="co">#> [1] "Q6495130"</span>
+<span class="co">#> attr(,"class")</span>
+<span class="co">#> [1] "wiki_id"</span></code></pre></div>
+</div>
+<div id="wikipedia" class="section level2">
+<h2>wikipedia</h2>
+<p>lower level</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">pg <-<span class="st"> </span><span class="kw">wt_wiki_page</span>(<span class="st">"https://en.wikipedia.org/wiki/Malus_domestica"</span>)
+res <-<span class="st"> </span><span class="kw">wt_wiki_page_parse</span>(pg)
+res<span class="op">$</span>iwlinks
+<span class="co">#> [1] "https://en.wiktionary.org/wiki/apple" </span>
+<span class="co">#> [2] "https://commons.wikimedia.org/wiki/Special:Search/Apple" </span>
+<span class="co">#> [3] "https://en.wikiquote.org/wiki/Apples" </span>
+<span class="co">#> [4] "https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Apple"</span>
+<span class="co">#> [5] "https://en.wikibooks.org/wiki/Apples" </span>
+<span class="co">#> [6] "https://species.wikimedia.org/wiki/Malus_domestica" </span>
+<span class="co">#> [7] "https://commons.wikimedia.org/wiki/Category:Apple_cultivars"</span></code></pre></div>
+<p>higher level</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">res <-<span class="st"> </span><span class="kw">wt_wikipedia</span>(<span class="st">"Malus domestica"</span>)
+res<span class="op">$</span>common_names
+<span class="co">#> # A tibble: 3 × 2</span>
+<span class="co">#> name language</span>
+<span class="co">#> <chr> <chr></span>
+<span class="co">#> 1 apple tree en</span>
+<span class="co">#> 2 apple en</span>
+<span class="co">#> 3 Apple en</span>
+res<span class="op">$</span>classification
+<span class="co">#> # A tibble: 9 × 2</span>
+<span class="co">#> rank name</span>
+<span class="co">#> <chr> <chr></span>
+<span class="co">#> 1 kingdom Plantae</span>
+<span class="co">#> 2 unranked Angiosperms</span>
+<span class="co">#> 3 unranked Eudicots</span>
+<span class="co">#> 4 unranked Rosids</span>
+<span class="co">#> 5 order Rosales</span>
+<span class="co">#> 6 family Rosaceae</span>
+<span class="co">#> 7 genus Malus</span>
+<span class="co">#> 8 species M. pumila</span>
+<span class="co">#> 9 binomial Malus pumila</span></code></pre></div>
+<p>choose a wikipedia language</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># French</span>
+<span class="kw">wt_wikipedia</span>(<span class="dt">name =</span> <span class="st">"Malus domestica"</span>, <span class="dt">wiki =</span> <span class="st">"fr"</span>)
+<span class="co"># Slovak</span>
+<span class="kw">wt_wikipedia</span>(<span class="dt">name =</span> <span class="st">"Malus domestica"</span>, <span class="dt">wiki =</span> <span class="st">"sk"</span>)
+<span class="co"># Vietnamese</span>
+<span class="kw">wt_wikipedia</span>(<span class="dt">name =</span> <span class="st">"Malus domestica"</span>, <span class="dt">wiki =</span> <span class="st">"vi"</span>)</code></pre></div>
+<p>search</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">wt_wikipedia_search</span>(<span class="dt">query =</span> <span class="st">"Pinus"</span>)
+<span class="co">#> $batchcomplete</span>
+<span class="co">#> [1] ""</span>
+<span class="co">#> </span>
+<span class="co">#> $continue</span>
+<span class="co">#> $continue$sroffset</span>
+<span class="co">#> [1] 10</span>
+<span class="co">#> </span>
+<span class="co">#> $continue$continue</span>
+<span class="co">#> [1] "-||"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> $query</span>
+<span class="co">#> $query$searchinfo</span>
+<span class="co">#> $query$searchinfo$totalhits</span>
+<span class="co">#> [1] 2804</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> $query$search</span>
+<span class="co">#> # A tibble: 10 × 6</span>
+<span class="co">#> ns title size wordcount</span>
+<span class="co">#> * <int> <chr> <int> <int></span>
+<span class="co">#> 1 0 Pine 19915 2372</span>
+<span class="co">#> 2 0 List of Pinus species 13999 995</span>
+<span class="co">#> 3 0 Pinus luchuensis 2903 166</span>
+<span class="co">#> 4 0 Pinus wallichiana 4295 433</span>
+<span class="co">#> 5 0 Pinus nigra 11468 1352</span>
+<span class="co">#> 6 0 Pinus kesiya 5281 512</span>
+<span class="co">#> 7 0 Pinus devoniana 3801 397</span>
+<span class="co">#> 8 0 Pinus × sondereggeri 3485 347</span>
+<span class="co">#> 9 0 Pinus mugo 10884 795</span>
+<span class="co">#> 10 0 Pinus heldreichii 6482 707</span>
+<span class="co">#> # ... with 2 more variables: snippet <chr>, timestamp <chr></span></code></pre></div>
+<p>search supports languages</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">wt_wikipedia_search</span>(<span class="dt">query =</span> <span class="st">"Pinus"</span>, <span class="dt">wiki =</span> <span class="st">"fr"</span>)</code></pre></div>
+</div>
+<div id="wikicommons" class="section level2">
+<h2>wikicommons</h2>
+<p>lower level</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">pg <-<span class="st"> </span><span class="kw">wt_wiki_page</span>(<span class="st">"https://commons.wikimedia.org/wiki/Abelmoschus"</span>)
+res <-<span class="st"> </span><span class="kw">wt_wikicommons_parse</span>(pg)
+res<span class="op">$</span>common_names[<span class="dv">1</span><span class="op">:</span><span class="dv">3</span>]
+<span class="co">#> [[1]]</span>
+<span class="co">#> [[1]]$name</span>
+<span class="co">#> [1] "okra"</span>
+<span class="co">#> </span>
+<span class="co">#> [[1]]$language</span>
+<span class="co">#> [1] "en"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> [[2]]</span>
+<span class="co">#> [[2]]$name</span>
+<span class="co">#> [1] "مسكي"</span>
+<span class="co">#> </span>
+<span class="co">#> [[2]]$language</span>
+<span class="co">#> [1] "ar"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> [[3]]</span>
+<span class="co">#> [[3]]$name</span>
+<span class="co">#> [1] "Abelmoş"</span>
+<span class="co">#> </span>
+<span class="co">#> [[3]]$language</span>
+<span class="co">#> [1] "az"</span></code></pre></div>
+<p>higher level</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">res <-<span class="st"> </span><span class="kw">wt_wikicommons</span>(<span class="st">"Abelmoschus"</span>)
+res<span class="op">$</span>classification
+<span class="co">#> # A tibble: 15 × 2</span>
+<span class="co">#> rank name</span>
+<span class="co">#> <chr> <chr></span>
+<span class="co">#> 1 Domain Eukaryota</span>
+<span class="co">#> 2 • unranked Archaeplastida</span>
+<span class="co">#> 3 • Regnum Plantae</span>
+<span class="co">#> 4 • Cladus angiosperms</span>
+<span class="co">#> 5 • Cladus eudicots</span>
+<span class="co">#> 6 • Cladus core eudicots</span>
+<span class="co">#> 7 • Cladus superrosids</span>
+<span class="co">#> 8 • Cladus rosids</span>
+<span class="co">#> 9 • Cladus eurosids II</span>
+<span class="co">#> 10 • Ordo Malvales</span>
+<span class="co">#> 11 • Familia Malvaceae</span>
+<span class="co">#> 12 • Subfamilia Malvoideae</span>
+<span class="co">#> 13 • Tribus Hibisceae</span>
+<span class="co">#> 14 • Abelmoschus</span>
+<span class="co">#> 15 Medik. (1787)</span>
+res<span class="op">$</span>common_names
+<span class="co">#> # A tibble: 18 × 2</span>
+<span class="co">#> name language</span>
+<span class="co">#> <chr> <chr></span>
+<span class="co">#> 1 okra en</span>
+<span class="co">#> 2 مسكي ar</span>
+<span class="co">#> 3 Abelmoş az</span>
+<span class="co">#> 4 Ibiškovec cs</span>
+<span class="co">#> 5 Bisameibisch de</span>
+<span class="co">#> 6 Okrat fi</span>
+<span class="co">#> 7 Abelmosco gl</span>
+<span class="co">#> 8 Abelmošus hr</span>
+<span class="co">#> 9 Ybiškė lt</span>
+<span class="co">#> 10 അബെ\u0d7dമോസ്കസ് ml</span>
+<span class="co">#> 11 Абельмош mrj</span>
+<span class="co">#> 12 Piżmian pl</span>
+<span class="co">#> 13 Абельмош ru</span>
+<span class="co">#> 14 موري sd</span>
+<span class="co">#> 15 Okrasläktet sv</span>
+<span class="co">#> 16 Абельмош udm</span>
+<span class="co">#> 17 Chi Vông vang vi</span>
+<span class="co">#> 18 黄葵属 zh</span></code></pre></div>
+<p>search</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">wt_wikicommons_search</span>(<span class="dt">query =</span> <span class="st">"Pinus"</span>)
+<span class="co">#> $batchcomplete</span>
+<span class="co">#> [1] ""</span>
+<span class="co">#> </span>
+<span class="co">#> $continue</span>
+<span class="co">#> $continue$sroffset</span>
+<span class="co">#> [1] 10</span>
+<span class="co">#> </span>
+<span class="co">#> $continue$continue</span>
+<span class="co">#> [1] "-||"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> $query</span>
+<span class="co">#> $query$searchinfo</span>
+<span class="co">#> $query$searchinfo$totalhits</span>
+<span class="co">#> [1] 257</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> $query$search</span>
+<span class="co">#> # A tibble: 10 × 6</span>
+<span class="co">#> ns title size wordcount</span>
+<span class="co">#> * <int> <chr> <int> <int></span>
+<span class="co">#> 1 0 Pinus 4160 303</span>
+<span class="co">#> 2 0 Pinus nigra 7449 486</span>
+<span class="co">#> 3 0 Pinus × schwerinii 634 67</span>
+<span class="co">#> 4 0 Pinus mugo 7157 573</span>
+<span class="co">#> 5 0 Spinus pinus 1563 242</span>
+<span class="co">#> 6 0 Pinus tabuliformis 1739 136</span>
+<span class="co">#> 7 0 Setophaga pinus 1735 198</span>
+<span class="co">#> 8 0 Pinus sabiniana 2799 217</span>
+<span class="co">#> 9 0 Pinus distribution maps of North America 25971 92</span>
+<span class="co">#> 10 0 Pinus cooperi 564 64</span>
+<span class="co">#> # ... with 2 more variables: snippet <chr>, timestamp <chr></span></code></pre></div>
+</div>
+<div id="wikispecies" class="section level2">
+<h2>wikispecies</h2>
+<p>lower level</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">pg <-<span class="st"> </span><span class="kw">wt_wiki_page</span>(<span class="st">"https://species.wikimedia.org/wiki/Malus_domestica"</span>)
+res <-<span class="st"> </span><span class="kw">wt_wikispecies_parse</span>(pg, <span class="dt">types =</span> <span class="st">"common_names"</span>)
+res<span class="op">$</span>common_names[<span class="dv">1</span><span class="op">:</span><span class="dv">3</span>]
+<span class="co">#> [[1]]</span>
+<span class="co">#> [[1]]$name</span>
+<span class="co">#> [1] "Ябълка"</span>
+<span class="co">#> </span>
+<span class="co">#> [[1]]$language</span>
+<span class="co">#> [1] "български"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> [[2]]</span>
+<span class="co">#> [[2]]$name</span>
+<span class="co">#> [1] "Poma, pomera"</span>
+<span class="co">#> </span>
+<span class="co">#> [[2]]$language</span>
+<span class="co">#> [1] "català"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> [[3]]</span>
+<span class="co">#> [[3]]$name</span>
+<span class="co">#> [1] "Apfel"</span>
+<span class="co">#> </span>
+<span class="co">#> [[3]]$language</span>
+<span class="co">#> [1] "Deutsch"</span></code></pre></div>
+<p>higher level</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">res <-<span class="st"> </span><span class="kw">wt_wikispecies</span>(<span class="st">"Malus domestica"</span>)
+res<span class="op">$</span>classification
+<span class="co">#> # A tibble: 8 × 2</span>
+<span class="co">#> rank name</span>
+<span class="co">#> <chr> <chr></span>
+<span class="co">#> 1 Superregnum Eukaryota</span>
+<span class="co">#> 2 Regnum Plantae</span>
+<span class="co">#> 3 Cladus Angiosperms</span>
+<span class="co">#> 4 Cladus Eudicots</span>
+<span class="co">#> 5 Cladus Core eudicots</span>
+<span class="co">#> 6 Cladus Rosids</span>
+<span class="co">#> 7 Cladus Eurosids I</span>
+<span class="co">#> 8 Ordo Rosales</span>
+res<span class="op">$</span>common_names
+<span class="co">#> # A tibble: 19 × 2</span>
+<span class="co">#> name language</span>
+<span class="co">#> <chr> <chr></span>
+<span class="co">#> 1 Ябълка български</span>
+<span class="co">#> 2 Poma, pomera català</span>
+<span class="co">#> 3 Apfel Deutsch</span>
+<span class="co">#> 4 Aed-õunapuu eesti</span>
+<span class="co">#> 5 Μηλιά Ελληνικά</span>
+<span class="co">#> 6 Apple English</span>
+<span class="co">#> 7 Manzano español</span>
+<span class="co">#> 8 Pomme français</span>
+<span class="co">#> 9 Melâr furlan</span>
+<span class="co">#> 10 사과나무 한국어</span>
+<span class="co">#> 11 ‘Āpala Hawaiʻi</span>
+<span class="co">#> 12 Melo italiano</span>
+<span class="co">#> 13 Aapel Nordfriisk</span>
+<span class="co">#> 14 Maçã, Macieira português</span>
+<span class="co">#> 15 Яблоня домашняя русский</span>
+<span class="co">#> 16 Tarhaomenapuu suomi</span>
+<span class="co">#> 17 Elma Türkçe</span>
+<span class="co">#> 18 Яблуня домашня українська</span>
+<span class="co">#> 19 Pomaro vèneto</span></code></pre></div>
+<p>search</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">wt_wikispecies_search</span>(<span class="dt">query =</span> <span class="st">"Pinus"</span>)
+<span class="co">#> $batchcomplete</span>
+<span class="co">#> [1] ""</span>
+<span class="co">#> </span>
+<span class="co">#> $continue</span>
+<span class="co">#> $continue$sroffset</span>
+<span class="co">#> [1] 10</span>
+<span class="co">#> </span>
+<span class="co">#> $continue$continue</span>
+<span class="co">#> [1] "-||"</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> $query</span>
+<span class="co">#> $query$searchinfo</span>
+<span class="co">#> $query$searchinfo$totalhits</span>
+<span class="co">#> [1] 396</span>
+<span class="co">#> </span>
+<span class="co">#> </span>
+<span class="co">#> $query$search</span>
+<span class="co">#> # A tibble: 10 × 6</span>
+<span class="co">#> ns title size wordcount</span>
+<span class="co">#> * <int> <chr> <int> <int></span>
+<span class="co">#> 1 0 Pinus 1570 282</span>
+<span class="co">#> 2 0 Pinus subg. Pinus 318 27</span>
+<span class="co">#> 3 0 Pinus clausa 1183 211</span>
+<span class="co">#> 4 0 Pinus sect. Pinus 623 68</span>
+<span class="co">#> 5 0 Pinus resinosa 1195 166</span>
+<span class="co">#> 6 0 Pinus nigra subsp. nigra 1412 127</span>
+<span class="co">#> 7 0 Pinus cooperi 680 89</span>
+<span class="co">#> 8 0 Pinus thunbergii 873 122</span>
+<span class="co">#> 9 0 Pinus gordoniana 594 61</span>
+<span class="co">#> 10 0 Pinus subsect. Pinus 718 94</span>
+<span class="co">#> # ... with 2 more variables: snippet <chr>, timestamp <chr></span></code></pre></div>
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+ (function () {
+ var script = document.createElement("script");
+ script.type = "text/javascript";
+ script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+ document.getElementsByTagName("head")[0].appendChild(script);
+ })();
+</script>
+
+</body>
+</html>
diff --git a/man/wikipedias.Rd b/man/wikipedias.Rd
new file mode 100644
index 0000000..45b999c
--- /dev/null
+++ b/man/wikipedias.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikitaxa-package.R
+\docType{data}
+\name{wikipedias}
+\alias{wikipedias}
+\title{List of Wikipedias}
+\description{
+data.frame of 295 rows, with 3 columns:
+\itemize{
+\item language - language
+\item language_local - language in local name
+\item wiki - langugae code for the wiki
+}
+}
+\details{
+From \url{https://meta.wikimedia.org/wiki/List_of_Wikipedias}
+}
+\keyword{data}
diff --git a/man/wikitaxa-package.Rd b/man/wikitaxa-package.Rd
new file mode 100644
index 0000000..60abb4d
--- /dev/null
+++ b/man/wikitaxa-package.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikitaxa-package.R
+\docType{package}
+\name{wikitaxa-package}
+\alias{wikitaxa-package}
+\alias{wikitaxa}
+\title{Taxonomic Information from Wikipedia}
+\description{
+Taxonomic Information from Wikipedia
+}
+\author{
+Scott Chamberlain \email{myrmecocystus at gmail.com}
+
+Ethan Welty
+}
+\keyword{package}
diff --git a/man/wt_data.Rd b/man/wt_data.Rd
new file mode 100644
index 0000000..b3c82c4
--- /dev/null
+++ b/man/wt_data.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wiki.R
+\name{wt_data}
+\alias{wt_data}
+\alias{wt_data_id}
+\title{Wikidata taxonomy data}
+\usage{
+wt_data(x, property = NULL, ...)
+
+wt_data_id(x, language = "en", limit = 10, ...)
+}
+\arguments{
+\item{x}{(character) a taxonomic name}
+
+\item{property}{(character) a property id, e.g., P486}
+
+\item{...}{curl options passed on to \code{\link[httr:GET]{httr::GET()}}}
+
+\item{language}{(character) two letter language code}
+
+\item{limit}{(integer) records to return. Default: 10}
+}
+\value{
+\code{wt_data} searches Wikidata, and returns a list with elements:
+\itemize{
+\item labels - data.frame with columns: language, value
+\item descriptions - data.frame with columns: language, value
+\item aliases - data.frame with columns: language, value
+\item sitelinks - data.frame with columns: site, title
+\item claims - data.frame with columns: claims, property_value,
+property_description, value (comma separted values in string)
+}
+
+\code{wt_data_id} gets the Wikidata ID for the searched term, and
+returns the ID as character
+}
+\description{
+Wikidata taxonomy data
+}
+\details{
+Note that \code{wt_data} can take a while to run since when fetching
+claims it has to do so one at a time for each claim
+
+You can search things other than taxonomic names with \code{wt_data} if you
+like
+}
+\examples{
+\dontrun{
+# search by taxon name
+# wt_data("Mimulus alsinoides")
+
+# choose which properties to return
+wt_data("Mimulus foliatus", property = c("P846", "P815"))
+
+# get a taxonomic identifier
+wt_data_id("Mimulus foliatus")
+# the id can be passed directly to wt_data()
+# wt_data(wt_data_id("Mimulus foliatus"))
+}
+}
diff --git a/man/wt_wiki_page.Rd b/man/wt_wiki_page.Rd
new file mode 100644
index 0000000..de0cfa0
--- /dev/null
+++ b/man/wt_wiki_page.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikipages.R
+\name{wt_wiki_page}
+\alias{wt_wiki_page}
+\title{Get MediaWiki Page from API}
+\usage{
+wt_wiki_page(url, ...)
+}
+\arguments{
+\item{url}{(character) MediaWiki page url.}
+
+\item{...}{Arguments passed to \code{\link[=wt_wiki_url_build]{wt_wiki_url_build()}} if \code{url}
+is a static page url.}
+}
+\value{
+an \code{HttpResponse} response object from \pkg{crul}
+}
+\description{
+Supports both static page urls and their equivalent API calls.
+}
+\details{
+If the URL given is for a human readable html page,
+we convert it to equivalent API call - if URL is already an API call,
+we just use that.
+}
+\examples{
+\dontrun{
+wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+}
+}
+\seealso{
+Other MediaWiki functions: \code{\link{wt_wiki_page_parse}},
+ \code{\link{wt_wiki_url_build}},
+ \code{\link{wt_wiki_url_parse}}
+}
diff --git a/man/wt_wiki_page_parse.Rd b/man/wt_wiki_page_parse.Rd
new file mode 100644
index 0000000..d547473
--- /dev/null
+++ b/man/wt_wiki_page_parse.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikipages.R
+\name{wt_wiki_page_parse}
+\alias{wt_wiki_page_parse}
+\title{Parse MediaWiki Page}
+\usage{
+wt_wiki_page_parse(page, types = c("langlinks", "iwlinks", "externallinks"),
+ tidy = FALSE)
+}
+\arguments{
+\item{page}{(\link[crul:HttpResponse]{crul::HttpResponse}) Result of \code{\link[=wt_wiki_page]{wt_wiki_page()}}}
+
+\item{types}{(character) List of properties to parse.}
+
+\item{tidy}{(logical). tidy output to data.frames when possible.
+Default: \code{FALSE}}
+}
+\value{
+a list
+}
+\description{
+Parses common properties from the result of a MediaWiki API page call.
+}
+\details{
+Available properties currently not parsed:
+title, displaytitle, pageid, revid, redirects, text, categories,
+links, templates, images, sections, properties, ...
+}
+\examples{
+\dontrun{
+pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+wt_wiki_page_parse(pg)
+}
+}
+\seealso{
+Other MediaWiki functions: \code{\link{wt_wiki_page}},
+ \code{\link{wt_wiki_url_build}},
+ \code{\link{wt_wiki_url_parse}}
+}
diff --git a/man/wt_wiki_url_build.Rd b/man/wt_wiki_url_build.Rd
new file mode 100644
index 0000000..09d5fcc
--- /dev/null
+++ b/man/wt_wiki_url_build.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikipages.R
+\name{wt_wiki_url_build}
+\alias{wt_wiki_url_build}
+\title{Build MediaWiki Page URL}
+\usage{
+wt_wiki_url_build(wiki, type = NULL, page = NULL, api = FALSE,
+ action = "parse", redirects = TRUE, format = "json", utf8 = TRUE,
+ prop = c("text", "langlinks", "categories", "links", "templates", "images",
+ "externallinks", "sections", "revid", "displaytitle", "iwlinks",
+ "properties"))
+}
+\arguments{
+\item{wiki}{(character | list) Either the wiki name or a list with
+\code{$wiki}, \code{$type}, and \code{$page} (the output of \code{\link[=wt_wiki_url_parse]{wt_wiki_url_parse()}}).}
+
+\item{type}{(character) Wiki type.}
+
+\item{page}{(character) Wiki page title.}
+
+\item{api}{(boolean) Whether to return an API call or a static page url
+(default). If \code{FALSE}, all following (API-only) arguments are ignored.}
+
+\item{action}{(character) See \url{https://en.wikipedia.org/w/api.php}
+for supported actions. This function currently only supports "parse".}
+
+\item{redirects}{(boolean) If the requested page is set to a redirect,
+resolve it.}
+
+\item{format}{(character) See \url{https://en.wikipedia.org/w/api.php}
+for supported output formats.}
+
+\item{utf8}{(boolean) If \code{TRUE}, encodes most (but not all) non-ASCII
+characters as UTF-8 instead of replacing them with hexadecimal escape
+sequences.}
+
+\item{prop}{(character) Properties to retrieve, either as a character vector
+or pipe-delimited string. See
+\url{https://en.wikipedia.org/w/api.php?action=help&modules=parse} for
+supported properties.}
+}
+\value{
+a URL (character)
+}
+\description{
+Builds a MediaWiki page url from its component parts (wiki name, wiki type,
+and page title). Supports both static page urls and their equivalent API
+calls.
+}
+\examples{
+wt_wiki_url_build(wiki = "en", type = "wikipedia", page = "Malus domestica")
+wt_wiki_url_build(
+ wt_wiki_url_parse("https://en.wikipedia.org/wiki/Malus_domestica"))
+wt_wiki_url_build("en", "wikipedia", "Malus domestica", api = TRUE)
+}
+\seealso{
+Other MediaWiki functions: \code{\link{wt_wiki_page_parse}},
+ \code{\link{wt_wiki_page}},
+ \code{\link{wt_wiki_url_parse}}
+}
diff --git a/man/wt_wiki_url_parse.Rd b/man/wt_wiki_url_parse.Rd
new file mode 100644
index 0000000..11e1386
--- /dev/null
+++ b/man/wt_wiki_url_parse.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikipages.R
+\name{wt_wiki_url_parse}
+\alias{wt_wiki_url_parse}
+\title{Parse MediaWiki Page URL}
+\usage{
+wt_wiki_url_parse(url)
+}
+\arguments{
+\item{url}{(character) MediaWiki page url.}
+}
+\value{
+a list with elements:
+\itemize{
+\item wiki - wiki language
+\item type - wikipedia type
+\item page - page name
+}
+}
+\description{
+Parse a MediaWiki page url into its component parts (wiki name, wiki type,
+and page title). Supports both static page urls and their equivalent API
+calls.
+}
+\examples{
+wt_wiki_url_parse(url="https://en.wikipedia.org/wiki/Malus_domestica")
+wt_wiki_url_parse("https://en.wikipedia.org/w/api.php?page=Malus_domestica")
+}
+\seealso{
+Other MediaWiki functions: \code{\link{wt_wiki_page_parse}},
+ \code{\link{wt_wiki_page}},
+ \code{\link{wt_wiki_url_build}}
+}
diff --git a/man/wt_wikicommons.Rd b/man/wt_wikicommons.Rd
new file mode 100644
index 0000000..128888d
--- /dev/null
+++ b/man/wt_wikicommons.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikicommons.R
+\name{wt_wikicommons}
+\alias{wt_wikicommons}
+\alias{wt_wikicommons_parse}
+\alias{wt_wikicommons_search}
+\title{WikiCommons}
+\usage{
+wt_wikicommons(name, utf8 = TRUE, ...)
+
+wt_wikicommons_parse(page, types = c("langlinks", "iwlinks", "externallinks",
+ "common_names", "classification"), tidy = FALSE)
+
+wt_wikicommons_search(query, limit = 10, offset = 0, utf8 = TRUE, ...)
+}
+\arguments{
+\item{name}{(character) Wiki name - as a page title, must be length 1}
+
+\item{utf8}{(logical) If `TRUE`, encodes most (but not all) non-ASCII
+characters as UTF-8 instead of replacing them with hexadecimal escape
+sequences. Default: `TRUE`}
+
+\item{...}{curl options, passed on to [httr::GET()]}
+
+\item{page}{([httr::response()]) Result of [wt_wiki_page()]}
+
+\item{types}{(character) List of properties to parse}
+
+\item{tidy}{(logical). tidy output to data.frame's if possible.
+Default: `FALSE`}
+
+\item{query}{(character) query terms}
+
+\item{limit}{(integer) number of results to return. Default: 10}
+
+\item{offset}{(integer) record to start at. Default: 0}
+}
+\value{
+\code{wt_wikicommons} returns a list, with slots:
+\itemize{
+\item langlinks - language page links
+\item externallinks - external links
+\item common_names - a data.frame with \code{name} and \code{language} columns
+\item classification - a data.frame with \code{rank} and \code{name} columns
+}
+
+\code{wt_wikicommons_parse} returns a list
+
+\code{wt_wikicommons_search} returns a list with slots for \code{continue} and
+\code{query}, where \code{query} holds the results, with \code{query$search} slot with
+the search results
+}
+\description{
+WikiCommons
+}
+\examples{
+\dontrun{
+# high level
+wt_wikicommons(name = "Malus domestica")
+
+# low level
+pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Malus_domestica")
+wt_wikicommons_parse(pg)
+
+# search wikicommons
+wt_wikicommons_search(query = "Pinus")
+
+## use search results to dig into pages
+res <- wt_wikicommons_search(query = "Pinus")
+lapply(res$query$search$title[1:3], wt_wikicommons)
+}
+}
+\references{
+\url{https://www.mediawiki.org/wiki/API:Search} for help on search
+}
diff --git a/man/wt_wikipedia.Rd b/man/wt_wikipedia.Rd
new file mode 100644
index 0000000..d7d579f
--- /dev/null
+++ b/man/wt_wikipedia.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikipedia.R
+\name{wt_wikipedia}
+\alias{wt_wikipedia}
+\alias{wt_wikipedia_parse}
+\alias{wt_wikipedia_search}
+\title{Wikipedia}
+\usage{
+wt_wikipedia(name, wiki = "en", utf8 = TRUE, ...)
+
+wt_wikipedia_parse(page, types = c("langlinks", "iwlinks", "externallinks",
+ "common_names", "classification"), tidy = FALSE)
+
+wt_wikipedia_search(query, wiki = "en", limit = 10, offset = 0,
+ utf8 = TRUE, ...)
+}
+\arguments{
+\item{name}{(character) Wiki name - as a page title, must be length 1}
+
+\item{wiki}{(character) wiki language. default: en. See \link{wikipedias} for
+language codes.}
+
+\item{utf8}{(logical) If `TRUE`, encodes most (but not all) non-ASCII
+characters as UTF-8 instead of replacing them with hexadecimal escape
+sequences. Default: `TRUE`}
+
+\item{...}{curl options, passed on to [httr::GET()]}
+
+\item{page}{([httr::response()]) Result of [wt_wiki_page()]}
+
+\item{types}{(character) List of properties to parse}
+
+\item{tidy}{(logical). tidy output to data.frame's if possible.
+Default: `FALSE`}
+
+\item{query}{(character) query terms}
+
+\item{limit}{(integer) number of results to return. Default: 10}
+
+\item{offset}{(integer) record to start at. Default: 0}
+}
+\value{
+\code{wt_wikipedia} returns a list, with slots:
+\itemize{
+\item langlinks - language page links
+\item externallinks - external links
+\item common_names - a data.frame with \code{name} and \code{language} columns
+\item classification - a data.frame with \code{rank} and \code{name} columns
+\item synonyms - a character vector with taxonomic names
+}
+
+\code{wt_wikipedia_parse} returns a list with same slots determined by
+the \code{types} parmeter
+
+\code{wt_wikipedia_search} returns a list with slots for \code{continue} and
+\code{query}, where \code{query} holds the results, with \code{query$search} slot with
+the search results
+}
+\description{
+Wikipedia
+}
+\examples{
+\dontrun{
+# high level
+wt_wikipedia(name = "Malus domestica")
+wt_wikipedia(name = "Malus domestica", wiki = "fr")
+wt_wikipedia(name = "Malus domestica", wiki = "da")
+
+# low level
+pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+wt_wikipedia_parse(pg)
+wt_wikipedia_parse(pg, tidy = TRUE)
+
+# search wikipedia
+wt_wikipedia_search(query = "Pinus")
+wt_wikipedia_search(query = "Pinus", wiki = "fr")
+wt_wikipedia_search(query = "Pinus", wiki = "br")
+
+## curl options
+# wt_wikipedia_search(query = "Pinus", verbose = TRUE)
+
+## use search results to dig into pages
+res <- wt_wikipedia_search(query = "Pinus")
+lapply(res$query$search$title[1:3], wt_wikipedia)
+}
+}
+\references{
+\url{https://www.mediawiki.org/wiki/API:Search} for help on search
+}
diff --git a/man/wt_wikispecies.Rd b/man/wt_wikispecies.Rd
new file mode 100644
index 0000000..0ffc589
--- /dev/null
+++ b/man/wt_wikispecies.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wikispecies.R
+\name{wt_wikispecies}
+\alias{wt_wikispecies}
+\alias{wt_wikispecies_parse}
+\alias{wt_wikispecies_search}
+\title{WikiSpecies}
+\usage{
+wt_wikispecies(name, utf8 = TRUE, ...)
+
+wt_wikispecies_parse(page, types = c("langlinks", "iwlinks", "externallinks",
+ "common_names", "classification"), tidy = FALSE)
+
+wt_wikispecies_search(query, limit = 10, offset = 0, utf8 = TRUE, ...)
+}
+\arguments{
+\item{name}{(character) Wiki name - as a page title, must be length 1}
+
+\item{utf8}{(logical) If `TRUE`, encodes most (but not all) non-ASCII
+characters as UTF-8 instead of replacing them with hexadecimal escape
+sequences. Default: `TRUE`}
+
+\item{...}{curl options, passed on to [httr::GET()]}
+
+\item{page}{([httr::response()]) Result of [wt_wiki_page()]}
+
+\item{types}{(character) List of properties to parse}
+
+\item{tidy}{(logical). tidy output to data.frame's if possible.
+Default: `FALSE`}
+
+\item{query}{(character) query terms}
+
+\item{limit}{(integer) number of results to return. Default: 10}
+
+\item{offset}{(integer) record to start at. Default: 0}
+}
+\value{
+\code{wt_wikispecies} returns a list, with slots:
+\itemize{
+\item langlinks - language page links
+\item externallinks - external links
+\item common_names - a data.frame with \code{name} and \code{language} columns
+\item classification - a data.frame with \code{rank} and \code{name} columns
+}
+
+\code{wt_wikispecies_parse} returns a list
+
+\code{wt_wikispecies_search} returns a list with slots for \code{continue} and
+\code{query}, where \code{query} holds the results, with \code{query$search} slot with
+the search results
+}
+\description{
+WikiSpecies
+}
+\examples{
+\dontrun{
+# high level
+wt_wikispecies(name = "Malus domestica")
+
+# low level
+pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Abelmoschus")
+wt_wikispecies_parse(pg)
+
+# search wikispecies
+wt_wikispecies_search(query = "pine tree")
+
+## use search results to dig into pages
+res <- wt_wikispecies_search(query = "pine tree")
+lapply(res$query$search$title[1:3], wt_wikispecies)
+}
+}
+\references{
+\url{https://www.mediawiki.org/wiki/API:Search} for help on search
+}
diff --git a/tests/test-all.R b/tests/test-all.R
new file mode 100644
index 0000000..7d45b5b
--- /dev/null
+++ b/tests/test-all.R
@@ -0,0 +1,3 @@
+library(testthat)
+library(wikitaxa)
+test_check("wikitaxa")
diff --git a/tests/testthat/test-wikicommons.R b/tests/testthat/test-wikicommons.R
new file mode 100644
index 0000000..a756628
--- /dev/null
+++ b/tests/testthat/test-wikicommons.R
@@ -0,0 +1,102 @@
+context("wt_wikicommons")
+
+test_that("wt_wikicommons returns non-empty results", {
+ skip_on_cran()
+
+ aa <- wt_wikicommons(name = "Malus domestica")
+
+ expect_is(aa, "list")
+ expect_named(aa, c('langlinks', 'externallinks', 'common_names',
+ 'classification'))
+ expect_is(aa$langlinks, "data.frame")
+ expect_is(aa$externallinks, "character")
+ expect_is(aa$common_names, "data.frame")
+ expect_named(aa$common_names, c('name', 'language'))
+ expect_is(aa$classification, "data.frame")
+ expect_named(aa$classification, c('rank', 'name'))
+
+ bb <- wt_wikicommons(name = "Poa annua")
+
+ expect_is(bb, "list")
+ expect_named(bb, c('langlinks', 'externallinks', 'common_names',
+ 'classification'))
+ expect_is(bb$langlinks, "data.frame")
+ expect_is(bb$externallinks, "character")
+ expect_is(bb$common_names, "data.frame")
+ expect_named(bb$common_names, c('name', 'language'))
+ expect_is(bb$classification, "data.frame")
+ expect_named(bb$classification, c('rank', 'name'))
+})
+
+test_that("wt_wikicommons fails well", {
+ skip_on_cran()
+
+ expect_error(wt_wikicommons(),
+ "argument \"name\" is missing")
+ expect_error(wt_wikicommons(5),
+ "name must be of class character")
+
+ # "name" must be length 1
+ expect_error(
+ wt_wikicommons(c("Pinus", "asdfadsf")),
+ "length\\(name\\) == 1 is not TRUE"
+ )
+
+ # "utf8" must be logical
+ expect_error(
+ wt_wikicommons("Pinus", "asdf"),
+ "utf8 must be of class logical"
+ )
+})
+
+context("wt_wikicommons_parse")
+
+test_that("wt_wikicommons_parse returns non-empty results", {
+ skip_on_cran()
+
+ url <- "https://commons.wikimedia.org/wiki/Malus_domestica"
+ pg <- wt_wiki_page(url)
+ types <- c("common_names")
+ result <- wt_wikicommons_parse(pg, types = types)
+ expect_is(result, "list")
+ for (fieldname in types) {
+ expect_is(result[fieldname], "list")
+ expect_gt(length(result[fieldname]), 0)
+ }
+})
+
+context("wt_wikicommons_search")
+
+test_that("wt_wikicommons_search works", {
+ skip_on_cran()
+
+ aa <- wt_wikicommons_search(query = "Pinus")
+
+ expect_is(aa, "list")
+ expect_is(aa$continue, "list")
+ expect_is(aa$query, "list")
+ expect_is(aa$query$searchinfo, "list")
+ expect_is(aa$query$search, "data.frame")
+ expect_named(aa$query$search, c('ns', 'title', 'size', 'wordcount',
+ 'snippet', 'timestamp'))
+
+ # no results when not found
+ expect_equal(NROW(wt_wikicommons_search("asdfadfaadfadfs")$query$search), 0)
+})
+
+test_that("wt_wikicommons_search fails well", {
+ skip_on_cran()
+
+ expect_error(
+ wt_wikicommons_search(),
+ "argument \"query\" is missing"
+ )
+ expect_error(
+ wt_wikicommons_search("Pinus", limit = "adf"),
+ "limit must be of class integer, numeric"
+ )
+ expect_error(
+ wt_wikicommons_search("Pinus", offset = "adf"),
+ "offset must be of class integer, numeric"
+ )
+})
diff --git a/tests/testthat/test-wikipedia.R b/tests/testthat/test-wikipedia.R
new file mode 100644
index 0000000..9fb22e0
--- /dev/null
+++ b/tests/testthat/test-wikipedia.R
@@ -0,0 +1,108 @@
+context("wt_wikipedia")
+
+test_that("wt_wikipedia returns non-empty results", {
+ skip_on_cran()
+
+ aa <- wt_wikipedia(name = "Malus domestica")
+
+ expect_is(aa, "list")
+ expect_named(aa, c('langlinks', 'externallinks', 'common_names',
+ 'classification', 'synonyms'))
+ expect_is(aa$langlinks, "data.frame")
+ expect_is(aa$externallinks, "character")
+ expect_is(aa$common_names, "data.frame")
+ expect_named(aa$common_names, c('name', 'language'))
+ expect_is(aa$classification, "data.frame")
+ expect_named(aa$classification, c('rank', 'name'))
+
+ bb <- wt_wikipedia(name = "Poa annua")
+
+ expect_is(bb, "list")
+ expect_named(bb, c('langlinks', 'externallinks', 'common_names',
+ 'classification', 'synonyms'))
+ expect_is(bb$langlinks, "data.frame")
+ expect_is(bb$externallinks, "character")
+ expect_is(bb$common_names, "data.frame")
+ expect_named(bb$common_names, c('name', 'language'))
+ expect_is(bb$classification, "data.frame")
+ expect_named(bb$classification, c('rank', 'name'))
+})
+
+test_that("wt_wikipedia fails well", {
+ skip_on_cran()
+
+ expect_error(wt_wikipedia(),
+ "argument \"name\" is missing")
+ expect_error(wt_wikipedia(5),
+ "name must be of class character")
+
+ # "name" must be length 1
+ expect_error(
+ wt_wikipedia(c("Pinus", "asdfadsf")),
+ "length\\(name\\) == 1 is not TRUE"
+ )
+
+ # "wiki" must be character
+ expect_error(
+ wt_wikipedia("Pinus", 5),
+ "wiki must be of class character"
+ )
+
+ # "utf8" must be logical
+ expect_error(
+ wt_wikipedia("Pinus", utf8 = "asdf"),
+ "utf8 must be of class logical"
+ )
+})
+
+context("wt_wikipedia_parse")
+
+test_that("wt_wikipedia_parse returns non-empty results", {
+ skip_on_cran()
+
+ url <- "https://species.wikimedia.org/wiki/Malus_domestica"
+ pg <- wt_wiki_page(url)
+ types <- c("common_names")
+ result <- wt_wikipedia_parse(pg, types = types)
+ expect_is(result, "list")
+ for (fieldname in types) {
+ expect_is(result[fieldname], "list")
+ expect_gt(length(result[fieldname]), 0)
+ }
+})
+
+context("wt_wikipedia_search")
+
+test_that("wt_wikipedia_search works", {
+ skip_on_cran()
+
+ aa <- wt_wikipedia_search(query = "Pinus")
+
+ expect_is(aa, "list")
+ expect_is(aa$continue, "list")
+ expect_is(aa$query, "list")
+ expect_is(aa$query$searchinfo, "list")
+ expect_is(aa$query$search, "data.frame")
+ expect_named(aa$query$search, c('ns', 'title', 'size', 'wordcount',
+ 'snippet', 'timestamp'))
+
+ # no results when not found
+ expect_equal(NROW(wt_wikipedia_search("asdfadfaadfadfs")$query$search), 0)
+})
+
+test_that("wt_wikipedia_search fails well", {
+ skip_on_cran()
+
+ expect_error(
+ wt_wikipedia_search(),
+ "argument \"query\" is missing"
+ )
+ expect_error(
+ wt_wikipedia_search("Pinus", limit = "adf"),
+ "limit must be of class integer, numeric"
+ )
+ expect_error(
+ wt_wikipedia_search("Pinus", offset = "adf"),
+ "offset must be of class integer, numeric"
+ )
+})
diff --git a/tests/testthat/test-wikispecies.R b/tests/testthat/test-wikispecies.R
new file mode 100644
index 0000000..f372bf2
--- /dev/null
+++ b/tests/testthat/test-wikispecies.R
@@ -0,0 +1,102 @@
+context("wt_wikispecies")
+
+test_that("wt_wikispecies returns non-empty results", {
+ skip_on_cran()
+
+ aa <- wt_wikispecies(name = "Malus domestica")
+
+ expect_is(aa, "list")
+ expect_named(aa, c('langlinks', 'externallinks', 'common_names',
+ 'classification'))
+ expect_is(aa$langlinks, "data.frame")
+ expect_is(aa$externallinks, "character")
+ expect_is(aa$common_names, "data.frame")
+ expect_named(aa$common_names, c('name', 'language'))
+ expect_is(aa$classification, "data.frame")
+ expect_named(aa$classification, c('rank', 'name'))
+
+ bb <- wt_wikispecies(name = "Poa annua")
+
+ expect_is(bb, "list")
+ expect_named(bb, c('langlinks', 'externallinks', 'common_names',
+ 'classification'))
+ expect_is(bb$langlinks, "data.frame")
+ expect_is(bb$externallinks, "character")
+ expect_is(bb$common_names, "data.frame")
+ expect_named(bb$common_names, c('name', 'language'))
+ expect_is(bb$classification, "data.frame")
+ expect_named(bb$classification, c('rank', 'name'))
+})
+
+test_that("wt_wikispecies fails well", {
+ skip_on_cran()
+
+ expect_error(wt_wikispecies(),
+ "argument \"name\" is missing")
+ expect_error(wt_wikispecies(5),
+ "name must be of class character")
+
+ # "name" must be length 1
+ expect_error(
+ wt_wikispecies(c("Pinus", "asdfadsf")),
+ "length\\(name\\) == 1 is not TRUE"
+ )
+
+ # "utf8" must be logical
+ expect_error(
+ wt_wikispecies("Pinus", "asdf"),
+ "utf8 must be of class logical"
+ )
+})
+
+context("wt_wikispecies_parse")
+
+test_that("wt_wikispecies_parse returns non-empty results", {
+ skip_on_cran()
+
+ url <- "https://species.wikimedia.org/wiki/Malus_domestica"
+ pg <- wt_wiki_page(url)
+ types <- c("common_names")
+ result <- wt_wikispecies_parse(pg, types = types)
+ expect_is(result, "list")
+ for (fieldname in types) {
+ expect_is(result[fieldname], "list")
+ expect_gt(length(result[fieldname]), 0)
+ }
+})
+
+context("wt_wikispecies_search")
+
+test_that("wt_wikispecies_search works", {
+ skip_on_cran()
+
+ aa <- wt_wikispecies_search(query = "Pinus")
+
+ expect_is(aa, "list")
+ expect_is(aa$continue, "list")
+ expect_is(aa$query, "list")
+ expect_is(aa$query$searchinfo, "list")
+ expect_is(aa$query$search, "data.frame")
+ expect_named(aa$query$search, c('ns', 'title', 'size', 'wordcount',
+ 'snippet', 'timestamp'))
+
+ # no results when not found
+ expect_equal(NROW(wt_wikispecies_search("asdfadfaadfadfs")$query$search), 0)
+})
+
+test_that("wt_wikispecies_search fails well", {
+ skip_on_cran()
+
+ expect_error(
+ wt_wikispecies_search(),
+ "argument \"query\" is missing"
+ )
+ expect_error(
+ wt_wikispecies_search("Pinus", limit = "adf"),
+ "limit must be of class integer, numeric"
+ )
+ expect_error(
+ wt_wikispecies_search("Pinus", offset = "adf"),
+ "offset must be of class integer, numeric"
+ )
+})
diff --git a/tests/testthat/test-wt_data.R b/tests/testthat/test-wt_data.R
new file mode 100644
index 0000000..6f71fa9
--- /dev/null
+++ b/tests/testthat/test-wt_data.R
@@ -0,0 +1,21 @@
+context("wt_data")
+
+test_that("wt_data returns the correct class", {
+ skip_on_cran()
+
+ prop <- "P846"
+ aa <- wt_data("Mimulus foliatus", property = prop)
+
+ expect_is(aa, "list")
+ expect_is(aa$labels, "data.frame")
+ expect_is(aa$descriptions, "data.frame")
+ expect_is(aa$aliases, "data.frame")
+ expect_is(aa$sitelinks, "data.frame")
+ expect_is(aa$claims, "data.frame")
+ expect_is(aa$claims, "data.frame")
+ expect_equal(aa$claims$property, prop)
+})
+
+test_that("wt_data fails well", {
+ expect_error(wt_data(), "argument \"x\" is missing, with no default")
+})
diff --git a/tests/testthat/test-wt_wiki_page.R b/tests/testthat/test-wt_wiki_page.R
new file mode 100644
index 0000000..c1d357b
--- /dev/null
+++ b/tests/testthat/test-wt_wiki_page.R
@@ -0,0 +1,37 @@
+context("wt_wiki_page/wt_wiki_page_parse")
+
+test_that("wt_wiki_page returns a response object", {
+ skip_on_cran()
+
+ url <- "https://en.wikipedia.org/wiki/Malus_domestica"
+ result <- wt_wiki_page(url)
+ expect_is(result, "HttpResponse")
+})
+
+test_that("wt_wiki_page_parse returns non-empty results", {
+ skip_on_cran()
+
+ url <- "https://en.wikipedia.org/wiki/Malus_domestica"
+ pg <- wt_wiki_page(url)
+ types <- c("langlinks", "iwlinks", "externallinks")
+ result <- wt_wiki_page_parse(pg, types = types)
+ expect_is(result, "list")
+ for (fieldname in types) {
+ expect_is(result[fieldname], "list")
+ expect_gt(length(result[fieldname]), 0)
+ }
+})
+
+test_that("wt_wiki_page_parse returns non-empty results", {
+ skip_on_cran()
+
+ url <- "https://en.wikipedia.org/wiki/Malus_domestica"
+ pg <- wt_wiki_page(url)
+ types <- c("common_names")
+ result <- wt_wiki_page_parse(pg, types = types)
+ expect_is(result, "list")
+ for (fieldname in types) {
+ expect_is(result[fieldname], "list")
+ expect_gt(length(result[fieldname]), 0)
+ }
+})
diff --git a/tests/testthat/test-wt_wiki_url_build.R b/tests/testthat/test-wt_wiki_url_build.R
new file mode 100644
index 0000000..318afa8
--- /dev/null
+++ b/tests/testthat/test-wt_wiki_url_build.R
@@ -0,0 +1,21 @@
+context("wt_wiki_url_build")
+
+test_that("wt_wiki_url_build correctly builds static page url", {
+ skip_on_cran()
+
+ url <- "https://en.wikipedia.org/wiki/Malus_domestica"
+ result <- wt_wiki_url_build("en", "wikipedia", "Malus domestica")
+ expect_equal(result, url)
+})
+
+test_that("wt_wiki_url_build correctly builds API page url", {
+ skip_on_cran()
+
+ url <- gsub("\n|\\s+", "", "https://en.wikipedia.org/w/api.php?page=
+ Malus_domestica&action=parse&redirects=TRUE&format=json&
+ utf8=TRUE&prop=text")
+ result <- wt_wiki_url_build("en", "wikipedia", "Malus domestica",
+ api = TRUE, action = "parse", redirects = TRUE,
+ format = "json", utf8 = TRUE, prop = "text")
+ expect_equal(result, url)
+})
diff --git a/tests/testthat/test-wt_wiki_url_parse.R b/tests/testthat/test-wt_wiki_url_parse.R
new file mode 100644
index 0000000..fc3bed1
--- /dev/null
+++ b/tests/testthat/test-wt_wiki_url_parse.R
@@ -0,0 +1,23 @@
+context("wt_wiki_url_parse")
+
+test_that("wt_wiki_url_parse correctly parses static page url", {
+ skip_on_cran()
+
+ url <- "https://en.wikipedia.org/wiki/Malus_domestica"
+ result <- wt_wiki_url_parse(url)
+ expect_is(result, "list")
+ expect_equal(result$wiki, "en")
+ expect_equal(result$type, "wikipedia")
+ expect_equal(result$page, "Malus_domestica")
+})
+
+test_that("wt_wiki_url_parse correctly parses API page url", {
+ skip_on_cran()
+
+ url <- "https://en.wikipedia.org/w/api.php?page=Malus_domestica"
+ result <- wt_wiki_url_parse(url)
+ expect_is(result, "list")
+ expect_equal(result$wiki, "en")
+ expect_equal(result$type, "wikipedia")
+ expect_equal(result$page, "Malus_domestica")
+})
diff --git a/vignettes/wikitaxa_vignette.Rmd b/vignettes/wikitaxa_vignette.Rmd
new file mode 100644
index 0000000..388bc8a
--- /dev/null
+++ b/vignettes/wikitaxa_vignette.Rmd
@@ -0,0 +1,177 @@
+---
+title: "Introduction to the wikitaxa package"
+author: "Scott Chamberlain"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Introduction to the wikitaxa package}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r echo=FALSE}
+knitr::opts_chunk$set(
+ comment = "#>",
+ collapse = TRUE,
+ warning = FALSE,
+ message = FALSE
+)
+```
+
+`wikitaxa` - Taxonomy data from Wikipedia
+
+The goal of `wikitaxa` is to allow search and taxonomic data retrieval from
+across many Wikimedia sites, including: Wikipedia, Wikicommons, and
+Wikispecies.
+
+There are lower level and higher level parts to the package API:
+
+### Low level API
+
+The low level API is meant for power users and gives you more control,
+but requires more knowledge.
+
+* `wt_wiki_page()`
+* `wt_wiki_page_parse()`
+* `wt_wiki_url_build()`
+* `wt_wiki_url_parse()`
+* `wt_wikispecies_parse()`
+* `wt_wikicommons_parse()`
+* `wt_wikipedia_parse()`
+
+### High level API
+
+The high level API is meant to be easier and faster to use.
+
+* `wt_data()`
+* `wt_data_id()`
+* `wt_wikispecies()`
+* `wt_wikicommons()`
+* `wt_wikipedia()`
+
+Search functions:
+
+* `wt_wikicommons_search()`
+* `wt_wikispecies_search()`
+* `wt_wikipedia_search()`
+
+## Installation
+
+CRAN version
+
+```{r eval=FALSE}
+install.packages("wikitaxa")
+```
+
+Dev version
+
+```{r eval=FALSE}
+devtools::install_github("ropensci/wikitaxa")
+```
+
+```{r}
+library("wikitaxa")
+```
+
+## wiki data
+
+```{r eval=FALSE}
+wt_data("Poa annua")
+```
+
+Get a Wikidata ID
+
+```{r}
+wt_data_id("Mimulus foliatus")
+```
+
+## wikipedia
+
+lower level
+
+```{r}
+pg <- wt_wiki_page("https://en.wikipedia.org/wiki/Malus_domestica")
+res <- wt_wiki_page_parse(pg)
+res$iwlinks
+```
+
+higher level
+
+```{r}
+res <- wt_wikipedia("Malus domestica")
+res$common_names
+res$classification
+```
+
+choose a wikipedia language
+
+```{r eval=FALSE}
+# French
+wt_wikipedia(name = "Malus domestica", wiki = "fr")
+# Slovak
+wt_wikipedia(name = "Malus domestica", wiki = "sk")
+# Vietnamese
+wt_wikipedia(name = "Malus domestica", wiki = "vi")
+```
+
+search
+
+```{r}
+wt_wikipedia_search(query = "Pinus")
+```
+
+search supports languages
+
+```{r eval=FALSE}
+wt_wikipedia_search(query = "Pinus", wiki = "fr")
+```
+
+
+## wikicommons
+
+lower level
+
+```{r}
+pg <- wt_wiki_page("https://commons.wikimedia.org/wiki/Abelmoschus")
+res <- wt_wikicommons_parse(pg)
+res$common_names[1:3]
+```
+
+higher level
+
+```{r}
+res <- wt_wikicommons("Abelmoschus")
+res$classification
+res$common_names
+```
+
+search
+
+```{r}
+wt_wikicommons_search(query = "Pinus")
+```
+
+
+## wikispecies
+
+lower level
+
+```{r}
+pg <- wt_wiki_page("https://species.wikimedia.org/wiki/Malus_domestica")
+res <- wt_wikispecies_parse(pg, types = "common_names")
+res$common_names[1:3]
+```
+
+higher level
+
+```{r}
+res <- wt_wikispecies("Malus domestica")
+res$classification
+res$common_names
+```
+
+search
+
+```{r}
+wt_wikispecies_search(query = "Pinus")
+```
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-wikitaxa.git
More information about the debian-med-commit
mailing list