[med-svn] [r-bioc-biomart] 01/05: New upstream version 2.34.0

Andreas Tille tille at debian.org
Wed Nov 8 13:43:11 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-bioc-biomart.

commit 2380ae3693ae779a918f4ecddd64bc0cfd232f50
Author: Andreas Tille <tille at debian.org>
Date:   Wed Nov 8 14:32:09 2017 +0100

    New upstream version 2.34.0
---
 DESCRIPTION                          |   6 +-
 NAMESPACE                            |  12 +-
 NEWS                                 |  21 ++
 R/biomaRt.R                          | 317 ++++++++++--------------
 R/ensembl.R                          |  24 ++
 R/methods-Mart.R                     |  20 +-
 R/utilityFunctions.R                 | 115 ++++++++-
 build/vignette.rds                   | Bin 211 -> 212 bytes
 inst/doc/biomaRt.R                   |  13 +-
 inst/doc/biomaRt.Rmd                 |  20 +-
 inst/doc/biomaRt.html                | 464 +++++++++++++++++++++++------------
 man/listEnsemblArchives.Rd           |  18 ++
 man/listMarts.Rd                     |   6 +-
 man/useMart.Rd                       |   2 +-
 tests/testthat/test_hostProcessing.R |  16 ++
 tests/testthat/test_useMart.R        |  13 +
 vignettes/biomaRt.Rmd                |  20 +-
 17 files changed, 721 insertions(+), 366 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ca0db64..263e019 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: biomaRt
-Version: 2.32.1
+Version: 2.34.0
 Title: Interface to BioMart databases (e.g. Ensembl, COSMIC, Wormbase
         and Gramene)
 Author: Steffen Durinck <biomartdev at gmail.com>, Wolfgang Huber
@@ -7,7 +7,7 @@ Contributors: Sean Davis <sdavis2 at mail.nih.gov>, Francois Pepin, Vince
         S. Buffalo, Mike Smith
 Maintainer: Steffen Durinck <biomartdev at gmail.com>
 Depends: methods
-Imports: utils, XML, RCurl, AnnotationDbi
+Imports: utils, XML, RCurl, AnnotationDbi, progress, stringr
 Suggests: annotate, BiocStyle, knitr, rmarkdown, testthat
 VignetteBuilder: knitr
 biocViews: Annotation
@@ -27,4 +27,4 @@ Description: In recent years a wealth of biological data has become available
 License: Artistic-2.0
 LazyLoad: yes
 NeedsCompilation: no
-Packaged: 2017-06-08 22:19:24 UTC; biocbuild
+Packaged: 2017-10-30 22:42:08 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index b006204..8dcf92a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,10 +1,20 @@
 import(methods)
 import(RCurl,XML)
+
 importFrom(utils, edit, head, read.table)
 importFrom(AnnotationDbi, keys, columns, keytypes, select)
+importFrom(progress, progress_bar)
+importFrom(stringr, str_extract_all)
+
 #for some reason RCurl needs to have findHTTPHeaderEncoding exported -
 #remove it from the exports if this ever gets fixed
-export(listMarts, getGene, getSequence, exportFASTA, useMart, listDatasets, useDataset, listEnsembl, useEnsembl, listAttributes, listFilters, getBM, getXML,getLDS, attributePages, filterOptions,filterType, getBMlist, NP2009code, keys, columns, keytypes, select)
+export(listMarts, getGene, getSequence, exportFASTA, useMart, listDatasets, 
+       useDataset, listEnsembl, useEnsembl, listAttributes, listFilters, 
+       getBM, getXML,getLDS, attributePages, filterOptions,filterType, 
+       getBMlist, NP2009code, keys, columns, keytypes, select,
+       listEnsemblArchives)
+
 exportClasses(Mart)
+
 exportMethods("show")
 
diff --git a/NEWS b/NEWS
index 027282f..21be840 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,24 @@
+CHANGES IN VERSION 2.34.0
+-------------------------
+
+NEW FEATURES
+
+    o  Added the listEnsemblArchives() function.  This returns a table of the
+    available Ensembl archives, and replaces the archive = TRUE argument to
+    several functions, which was no longer working.
+
+BUG FIXES
+
+    o The Ensembl BioMart server doesn't always respond well if queries with
+    more than 500 filter values are submitted.  If a query that exceed this is
+    detect biomaRt will now submit the query in batches and concatonate the 
+    result when completed.
+    
+MINOR CHANGES
+
+    o You can now provide a host with 'http://' at the start, or a trailing
+    '/' (typically copy/pasted from a browser) and useMarts() will cope.
+
 CHANGES IN VERSION 2.32.0
 -------------------------
 
diff --git a/R/biomaRt.R b/R/biomaRt.R
index ae0ddd2..0e1f8f9 100644
--- a/R/biomaRt.R
+++ b/R/biomaRt.R
@@ -83,11 +83,13 @@ listMarts <- function( mart = NULL, host="www.ensembl.org", path="/biomart/marts
                            "&redirect=no",
                            "")
         
+        host <- .cleanHostURL(host)
         if(archive) {
-            request = paste0("http://",host,":",port,path,"?type=registry_archive&requestid=biomaRt")
+            warning("The archive = TRUE argument is now deprecated.\nUse listEnsemblMarts() to find the URL to directly query an Ensembl archive.")
+            request = paste0(host, ":", port, path, "?type=registry_archive&requestid=biomaRt")
         } 
         else {
-            request = paste0("http://", host, ":", port, path, "?type=registry&requestid=biomaRt", redirect)	
+            request = paste0(host, ":", port, path, "?type=registry&requestid=biomaRt", redirect)	
         }
     }
     else{
@@ -178,11 +180,13 @@ useMart <- function(biomart, dataset, host = "www.ensembl.org", path = "/biomart
             stop("biomart argument is not a string. ",
                  "The biomart argument should be a single character string")
     }
-    #if(biomart == "ensembl" & (host == "www.ensembl.org" | host == "uswest.ensembl.org")){
+
     if(biomart == "ensembl" & grepl(x = host, pattern = "ensembl.org")) {
         biomart = "ENSEMBL_MART_ENSEMBL"
     }
+    
     reqHost = host
+    host <- .cleanHostURL(host)
 
     marts <- listMarts(host=host, path=path, port=port, includeHosts = TRUE,
                        archive = archive, ssl.verifypeer = ssl.verifypeer, 
@@ -468,201 +472,146 @@ filterType <- function(filter, mart){
 ##########################################
 
 getBM <- function(attributes, filters = "", values = "", mart, curl = NULL, checkFilters = TRUE, verbose=FALSE, uniqueRows=TRUE, bmHeader=FALSE, quote="\""){
-  
-  martCheck(mart)
-  if(missing( attributes ))
-    stop("Argument 'attributes' must be specified.")
-  
-   if(is.list(filters) && !missing( values ))
-             warning("Argument 'values' should not be used when argument 'filters' is a list and will be ignored.")
-   if(is.list(filters) && is.null(names(filters)))
-             stop("Argument 'filters' must be a named list when sent as a list.")
-   if(!is.list(filters) && filters != "" && missing( values ))
+    
+    martCheck(mart)
+    if(missing( attributes ))
+        stop("Argument 'attributes' must be specified.")
+    
+    if(is.list(filters) && !missing( values ))
+        warning("Argument 'values' should not be used when argument 'filters' is a list and will be ignored.")
+    if(is.list(filters) && is.null(names(filters)))
+        stop("Argument 'filters' must be a named list when sent as a list.")
+    if(!is.list(filters) && filters != "" && missing( values ))
         stop("Argument 'values' must be specified.")
-
-  if(length(filters) > 0 && length(values) == 0)
-    stop("Values argument contains no data.")
-  
-  if(is.list(filters)){
-    values = filters
-    filters = names(filters)
-  }
-  
-  if(class(uniqueRows) != "logical")
-    stop("Argument 'uniqueRows' must be a logical value, so either TRUE or FALSE")
-
-  ## force the query to return the 'english text' header names with the result
-  ## we use these later to match and order attribute/column names    
-  callHeader <- TRUE
-  xmlQuery = paste0("<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE Query><Query  virtualSchemaName = '",
-                   martVSchema(mart),
-                   "' uniqueRows = '",
-                   as.numeric(uniqueRows),
-                   "' count = '0' datasetConfigVersion = '0.6' header='",
-                   as.numeric(callHeader),
-                   "' requestid= 'biomaRt'> <Dataset name = '",
-                   martDataset(mart),"'>")
-  
-  #checking the Attributes
-  invalid = !(attributes %in% listAttributes(mart, what="name"))
-  if(any(invalid))
-    stop(paste("Invalid attribute(s):", paste(attributes[invalid], collapse=", "),
-               "\nPlease use the function 'listAttributes' to get valid attribute names"))
-
-  #check if attributes come from multiple attribute pages currently disabled until ID issue resovled at Ensembl
-  if(FALSE){
-    att = listAttributes(mart, what=c("name","page"))
-    att = att[which(att[,1] %in% attributes),]
-    attOK = FALSE
-    pages = unique(att[,2])
-    if(length(pages) <= 1){
-      attOK = TRUE
-    }
-    else{
-      for(page in pages){
-        if(length(attributes) == length(which(attributes %in% att[which(att[,2] == page),1]))) attOK = TRUE
-      }
-    }
-    if(!attOK){
-      stop(paste("Querying attributes from multiple attribute pages is not allowed.  To see the attribute pages attributes belong to, use the function attributePages."))
+    
+    if(length(filters) > 0 && length(values) == 0)
+        stop("Values argument contains no data.")
+    
+    if(is.list(filters)){
+        values = filters
+        filters = names(filters)
     }
-  }
-  #attribute are ok lets add them to the query
-  attributeXML =  paste("<Attribute name = '", attributes, "'/>", collapse="", sep="")
-  
-  #checking the filters
-  if(filters[1] != "" && checkFilters){
-    invalid = !(filters %in% listFilters(mart, what="name"))
+    
+    if(class(uniqueRows) != "logical")
+        stop("Argument 'uniqueRows' must be a logical value, so either TRUE or FALSE")
+    
+    ## force the query to return the 'english text' header names with the result
+    ## we use these later to match and order attribute/column names    
+    callHeader <- TRUE
+    xmlQuery = paste0("<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE Query><Query  virtualSchemaName = '",
+                      martVSchema(mart),
+                      "' uniqueRows = '",
+                      as.numeric(uniqueRows),
+                      "' count = '0' datasetConfigVersion = '0.6' header='",
+                      as.numeric(callHeader),
+                      "' requestid= 'biomaRt'> <Dataset name = '",
+                      martDataset(mart),"'>")
+    
+    #checking the Attributes
+    invalid = !(attributes %in% listAttributes(mart, what="name"))
     if(any(invalid))
-      stop(paste("Invalid filters(s):", paste(filters[invalid], collapse=", "),
-                 "\nPlease use the function 'listFilters' to get valid filter names"))
-  }
-  
-  filterXML = NULL
-  
-  if(length(filters) > 1){
-    if(class(values)!= "list")stop("If using multiple filters, the 'value' has to be a list.\nFor example, a valid list for 'value' could be: list(affyid=c('1939_at','1000_at'), chromosome= '16')\nHere we select on Affymetrix identifier and chromosome, only results that pass both filters will be returned");
-   
-    for(i in seq(along = filters)){
-      if(filters[i] %in% listFilters(mart, what = "name")){
-        filtertype=filterType(filters[i], mart)
-        if(filtertype == 'boolean' || filtertype == 'boolean_list'){
-          if(!is.logical(values[[i]])) 
-              stop("biomaRt error: ", filters[i], " is a boolean filter and needs a corresponding logical value of TRUE or FALSE to indicate if the query should retrieve all data that fulfill the boolean or alternatively that all data that not fulfill the requirement should be retrieved.")  
-          if(!values[[i]]){
-            values[[i]] = 1
-          }
-          else{
-            values[[i]] = 0 
-          }
-          filterXML = paste(filterXML,paste("<Filter name = '",filters[i],"' excluded = \"",values[[i]],"\" />", collapse="",sep=""),sep="")
+        stop(paste("Invalid attribute(s):", paste(attributes[invalid], collapse=", "),
+                   "\nPlease use the function 'listAttributes' to get valid attribute names"))
+    
+    #check if attributes come from multiple attribute pages currently disabled until ID issue resovled at Ensembl
+    if(FALSE){
+        att = listAttributes(mart, what=c("name","page"))
+        att = att[which(att[,1] %in% attributes),]
+        attOK = FALSE
+        pages = unique(att[,2])
+        if(length(pages) <= 1){
+            attOK = TRUE
         }
         else{
-          if(is.numeric(values[[i]])){ values[[i]] = as.integer(values[[i]])}
-          valuesString = paste(values[[i]],"",collapse=",",sep="")
-          filterXML = paste(filterXML,paste("<Filter name = '",filters[i],"' value = '",valuesString,"' />", collapse="",sep=""),sep="")
+            for(page in pages){
+                if(length(attributes) == length(which(attributes %in% att[which(att[,2] == page),1]))) attOK = TRUE
+            }
         }
-      }
-      else{ #used for attributes with values as these are treated as filters in BioMart
-        valuesString = paste(values[[i]],"",collapse=",",sep="")
-        filterXML = paste(filterXML,paste("<Filter name = '",filters[i],"' value = '",valuesString,"' />", collapse="",sep=""),sep="")
-      } 
-    }
-  }
-  else{
-    if(filters != ""){
-      if(is.list(values)){
-        values = unlist(values)
-      }
-      if(filters %in% listFilters(mart, what="name")){
-        filtertype =filterType(filters, mart)
-        if(filtertype == 'boolean' || filtertype == 'boolean_list'){
-          if(!is.logical(values)) stop(paste("biomaRt error: ",filters," is a boolean filter and needs a corresponding logical value of TRUE or FALSE to indicate if the query should retrieve all data that fulfill the boolean or alternatively that all data that not fulfill the requirement should be retrieved."), sep="") 
-          if(!values){
-            values = 1
-          }
-          else{
-            values = 0 
-          }
-          filterXML = paste("<Filter name = '",filters,"' excluded = \"",values,"\" />", collapse="",sep="")
+        if(!attOK){
+            stop(paste("Querying attributes from multiple attribute pages is not allowed.  To see the attribute pages attributes belong to, use the function attributePages."))
         }
-        else{
-          if(is.numeric(values)){
-            values = as.integer(values)
-          }  
-          valuesString = paste(values,"",collapse=",",sep="")
-          filterXML = paste("<Filter name = '",filters,"' value = '",valuesString,"' />", collapse="",sep="")
-        }
-      }
-      else{ #used for attributes with values as these are treated as filters in BioMart
-        valuesString = paste(values,"",collapse=",",sep="")
-        filterXML = paste(filterXML,paste("<Filter name = '",filters,"' value = '",valuesString,"' />", collapse="",sep=""),sep="")
-      }
     }
-    else{
-      filterXML=""
+    #attribute are ok lets add them to the query
+    attributeXML = paste("<Attribute name = '", attributes, "'/>", collapse="", sep="")
+    
+    #checking the filters
+    if(filters[1] != "" && checkFilters){
+        invalid = !(filters %in% listFilters(mart, what="name"))
+        if(any(invalid))
+            stop(paste("Invalid filters(s):", paste(filters[invalid], collapse=", "),
+                       "\nPlease use the function 'listFilters' to get valid filter names"))
     }
-  }
-  
-  xmlQuery = paste(xmlQuery, attributeXML, filterXML,"</Dataset></Query>",sep="")
     
-  if(verbose){
-    cat(paste(xmlQuery,"\n", sep=""))
-  }      
-  
-  ## we choose a separator based on whether 'redirect=no' is present
-  sep <- ifelse(grepl(x = martHost(mart), pattern = ".+\\?.+"), "&", "?")
-
-  postRes = tryCatch(postForm(paste0(martHost(mart), sep),"query" = xmlQuery), error = function(e){stop("Request to BioMart web service failed. Verify if you are still connected to the internet.  Alternatively the BioMart web service is temporarily down.")})
-  if(verbose){
-    writeLines("#################\nResults from server:")
-    print(postRes)
-  }
-  if(!(is.character(postRes) && (length(postRes)==1L)))
-    stop("The query to the BioMart webservice returned an invalid result: biomaRt expected a character string of length 1. Please report this to the mailing list.")
-
-  if(gsub("\n", "", postRes, fixed = TRUE, useBytes = TRUE) == "") { # meaning an empty result
+    ## filterXML is a list containing filters with reduced numbers of values
+    ## to meet the 500 value limit in BioMart queries
+    filterXmlList <- .generateFilterXML(filters, values, mart)
     
-    result = as.data.frame(matrix("", ncol=length(attributes), nrow=0), stringsAsFactors=FALSE)
+    resultList <- list()
+    if(length(filterXmlList) > 1) {
+        pb <- progress_bar$new(total = length(filterXmlList),
+                           width = options()$width - 10,
+                           format = "Batch submitting query [:bar] :percent eta: :eta")
+        pb$tick(0)
+    }
     
-  } else {
+    ## we submit a query for each chunk of the filter list
+    for(i in seq_along(filterXmlList)) {
+        
+        if(exists('pb')) {
+            pb$tick()
+        }
+        
+        filterXML <- filterXmlList[[ i ]]
+        fullXmlQuery = paste(xmlQuery, attributeXML, filterXML,"</Dataset></Query>",sep="")
+        
+        if(verbose) {
+            message(fullXmlQuery)
+        }      
+        
+        ## we choose a separator based on whether '?redirect=no' is present
+        sep <- ifelse(grepl(x = martHost(mart), pattern = ".+\\?.+"), "&", "?")
+        
+        postRes = tryCatch(postForm(paste0(martHost(mart), sep),"query" = fullXmlQuery), 
+                           error = function(e) {
+                               stop("Request to BioMart web service failed. Verify if you are still connected to the internet.  Alternatively the BioMart web service is temporarily down.") 
+                               }
+                           )
+        if(verbose){
+            writeLines("#################\nResults from server:")
+            print(postRes)
+        }
+        if(!(is.character(postRes) && (length(postRes)==1L)))
+            stop("The query to the BioMart webservice returned an invalid result: biomaRt expected a character string of length 1. Please report this to the mailing list.")
+        
+        if(gsub("\n", "", postRes, fixed = TRUE, useBytes = TRUE) == "") { # meaning an empty result
+            
+            result = as.data.frame(matrix("", ncol=length(attributes), nrow=0), stringsAsFactors=FALSE)
+            
+        } else {
+            
+            if(length(grep("^Query ERROR", postRes))>0L)
+                stop(postRes)
+            
+            ## convert the serialized table into a dataframe
+            con = textConnection(postRes)
+            result = read.table(con, sep="\t", header=callHeader, quote = quote, comment.char = "", check.names = FALSE, stringsAsFactors=FALSE)
+            if(verbose){
+                writeLines("#################\nParsed results:")
+                print(result)
+            }
+            close(con)
+            
+            if(!(is(result, "data.frame") && (ncol(result)==length(attributes)))) {
+                print(head(result))
+                stop("The query to the BioMart webservice returned an invalid result: the number of columns in the result table does not equal the number of attributes in the query. Please report this to the mailing list.")
+            }
+        }
     
-    if(length(grep("^Query ERROR", postRes))>0L)
-      stop(postRes)
-
-    ## convert the serialized table into a dataframe
-    con = textConnection(postRes)
-    result = read.table(con, sep="\t", header=callHeader, quote = quote, comment.char = "", check.names = FALSE, stringsAsFactors=FALSE)
-    if(verbose){
-      writeLines("#################\nParsed results:")
-      print(result)
+        resultList[[i]] <- .setResultColNames(result, mart = mart, attributes = attributes, bmHeader = bmHeader)
     }
-    close(con)
-
-    if(!(is(result, "data.frame") && (ncol(result)==length(attributes)))) {
-      print(head(result))
-      stop("The query to the BioMart webservice returned an invalid result: the number of columns in the result table does not equal the number of attributes in the query. Please report this to the mailing list.")
-    }
-  }
-  # if(!bmHeader){  #assumes order of results same as order of attibutes in input 
-  #   colnames(result) = attributes
-  # }
-  # else{
-  #   toAttributeName=FALSE
-  #   if(toAttributeName){  #set to TRUE if attempting to replace attribute descriptions with attribute names
-  #     att = listAttributes(mart)
-  #     resultNames = colnames(result)
-  #     for(r in 1:length(resultNames)){
-  #       asel = which(att[,2] == resultNames[r])
-  #       if(length(asel) == 1){
-  #         resultNames[r] = att[asel,1]
-  #       }
-  #     }
-  #     colnames(result) = resultNames
-  #   }
-  # }
-  result <- .setResultColNames(result, mart = mart, attributes = attributes, bmHeader = bmHeader)
-  return(result)
+    ## collate results
+    result <- do.call('rbind', resultList)
+    return(result)
 }
 
 ###################################
diff --git a/R/ensembl.R b/R/ensembl.R
new file mode 100644
index 0000000..8459455
--- /dev/null
+++ b/R/ensembl.R
@@ -0,0 +1,24 @@
+## location of Ensembl specific functions
+
+## scrapes the ensembl website for the list of current archives and returns
+## a data frame containing the versions and their URL
+listEnsemblArchives <- function() {
+    
+    html <- htmlParse("http://www.ensembl.org/info/website/archives/index.html")
+    
+    archive_box <- getNodeSet(html, path = "//div[@class='plain-box float-right archive-box']")[[1]]
+    
+    archive_box_string <- toString.XMLNode(archive_box)
+    
+    archives <- strsplit(archive_box_string, split = "<li>")[[1]][-1]
+    
+    extracted <- str_extract_all(string = archives, 
+                    pattern = "Ensembl [A-Za-z0-9 ]{2,6}|http://.*ensembl\\.org|[A-Z][a-z]{2} [0-9]{4}")
+    
+    tab <- do.call("rbind", extracted)
+    colnames(tab) <- c("url", "version", "date")
+    tab <- tab[,c(2,3,1)]
+    
+    return(tab)
+}
+
diff --git a/R/methods-Mart.R b/R/methods-Mart.R
index 8e6a2f8..9a17cc7 100644
--- a/R/methods-Mart.R
+++ b/R/methods-Mart.R
@@ -1,8 +1,20 @@
 setMethod("show",signature(object="Mart"),
-  function(object){	
-    res = paste("Object of class 'Mart':\n Using the ",object at biomart," BioMart database\n Using the ",object at dataset," dataset\n", sep="")
-    cat(res)
-})
+          function(object){	
+              
+              dbase <- ifelse(nchar(object at biomart) != 0, 
+                              yes = paste("  Using the", object at biomart, "BioMart database"),
+                              no = "  No database selected.")
+              
+              dset <- ifelse(nchar(object at dataset) != 0, 
+                             yes = paste("  Using the", object at dataset, "dataset"),
+                             no = "  No dataset selected.")
+              
+              res <- paste("Object of class 'Mart':",
+                           dbase,
+                           dset,
+                           sep="\n")
+              cat(res)
+          })
 
 setGeneric("martBM",def=function(obj,...) standardGeneric("martBM"))
 setMethod("martBM",signature("Mart"), function(obj) obj at biomart)
diff --git a/R/utilityFunctions.R b/R/utilityFunctions.R
index 982b2c7..bbe89e5 100644
--- a/R/utilityFunctions.R
+++ b/R/utilityFunctions.R
@@ -34,4 +34,117 @@
     result <- result[, match(att[matches,1], attributes), drop=FALSE]
     
     return(result)
-}
\ No newline at end of file
+}
+
+## BioMart doesn't work well if the list of values provided to a filter is 
+## longer than 500 values.  It returns only a subset of the requested data
+## and does so silently!  This function is designed to take a list of provided
+## filters, and split any longer than 'maxChunkSize'.  It operates recursively
+## incase there are multiple filters that need splitting, and should ensure
+## all possible groupings of filters are retained.
+.splitValues <- function(valuesList, maxChunkSize = 500) {
+    
+    vLength <- vapply(valuesList[[1]], FUN = length, FUN.VALUE = integer(1))
+    
+    if(all(vLength <= maxChunkSize)) {
+        return(valuesList)
+    } else {
+        ## pick the next filter to split
+        vIdx <- min(which(vLength > maxChunkSize))
+        
+        nchunks <- (vLength[vIdx] %/% maxChunkSize) + 1
+        splitIdx <- rep(1:nchunks, each = ceiling(vLength[vIdx] / nchunks))[ 1:vLength[vIdx] ]
+        
+        ## a new list we will populate with the chunks
+        tmpList <- list()
+        for(i in 1:nchunks) {
+            for( j in 1:length(valuesList) ) {
+                listIdx <- ((i - 1) * length(valuesList)) + j
+                tmpList[[ listIdx ]] <- valuesList[[j]]
+                tmpList[[ listIdx ]][[ vIdx ]] <- tmpList[[ listIdx ]][[ vIdx ]][which(splitIdx == i)]
+            }
+        }
+        ## recursively call the function to process next filter
+        valuesList <- .splitValues(tmpList)
+    }
+    return(valuesList)
+}
+
+## Creating the filter XML for a single chunk of values.  Returns a character
+## vector containing the XML lines for all specified filters & their 
+## attributes spliced together into a single string.
+.createFilterXMLchunk <- function(filterChunk, mart) {
+    
+    individualFilters <- vapply(names(filterChunk), 
+        FUN = function(filter, values, mart) {
+            
+            ## if the filter exists and is boolean we do this
+            if(filter %in% listFilters(mart, what = "name") && 
+               grepl('boolean', filterType(filter = filter, mart = mart)) ) {
+                if(!is.logical(values[[filter]])) 
+                    stop("biomaRt error:\n", 
+                         filter, " is a boolean filter and needs a corresponding logical value of TRUE or FALSE to indicate if the query should retrieve all data that fulfill the boolean or alternatively that all data that not fulfill the requirement should be retrieved.")
+                val <- ifelse(values[[filter]], yes = 0, no = 1)
+                val <- paste0("' excluded = \"", val, "\" ")
+                
+            } else { 
+                ## otherwise the filter isn't boolean, or doesn't exist
+                
+                if(is.numeric(values[[filter]])) 
+                    values[[filter]] <- as.integer(values[[filter]])
+                val <- paste0(values[[filter]], collapse = ",")
+                val <- paste0("' value = '", val, "' ")
+            }
+            filterXML <- paste0("<Filter name = '", filter, val, "/>")
+            return(filterXML)
+        }, FUN.VALUE = character(1), 
+        filterChunk, mart,
+        USE.NAMES = FALSE)
+    
+    filterXML <- paste0(individualFilters, collapse = "")
+    return(filterXML)
+}
+
+.generateFilterXML <- function(filters = "", values, mart) {
+    
+    ## return emptry string if no filter specified
+    if(filters[1]== "") {
+        return("")
+    }
+    ## if we have multiple filters, the values must be specified as a list.
+    if(length(filters) > 1 && class(values) != "list") {
+        stop("If using multiple filters, the 'value' has to be a list.\nFor example, a valid list for 'value' could be: list(affyid=c('1939_at','1000_at'), chromosome= '16')\nHere we select on Affymetrix identifier and chromosome, only results that pass both filters will be returned");
+    } 
+    ## it's easy to not realise you're passing a data frame here, so check
+    if(is.data.frame(values) && ncol(values == 1)) {
+        values <- values[,1]
+    }
+
+    
+    if(!is.list(values)){
+        values <- list(values)
+    }
+    names(values) <- filters
+    
+    values <- .splitValues(list(values))
+    
+    filterXML_list <- lapply(values, .createFilterXMLchunk, mart)
+}
+
+#' it seems like pretty common practice for users to copy and paste the host
+#' name from a browser if they're not accessing Ensembl.  Typically this will
+#' include the "http://" and maybe a trailing "/" and this messes up or
+#' paste the complete URL strategy and produces something invalid.  
+#' This function tidies that up to catch common variants.
+.cleanHostURL <- function(host) {
+    
+    ## strip trailing slash
+    host <- gsub(pattern = "/$", replacement = "", x = host)
+    
+    ## only prepend http if needed 
+    if(!grepl(pattern = "^http://|^https://", x = host)) {
+        host <- paste0("http://", host)
+    }
+    
+    return(host)
+}
diff --git a/build/vignette.rds b/build/vignette.rds
index daeeff4..fef17e9 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/doc/biomaRt.R b/inst/doc/biomaRt.R
index 658a278..aa42009 100644
--- a/inst/doc/biomaRt.R
+++ b/inst/doc/biomaRt.R
@@ -1,4 +1,4 @@
-## ----setup, cache = F, echo = FALSE--------------------------------------
+## ----setup, cache = F, echo = FALSE----------------------------------------
 knitr::opts_chunk$set(error = TRUE)
 
 ## ----annotate,echo=FALSE----------------------------------------------------------------------------------------------
@@ -129,11 +129,14 @@ listMarts(archive = TRUE)
 ## ----archiveMarts2, echo = TRUE, eval = TRUE--------------------------------------------------------------------------
 ensembl = useMart("ensembl_mart_46", dataset="hsapiens_gene_ensembl", archive = TRUE)
 
+## ----archiveMarts, echo = TRUE, eval = TRUE---------------------------------------------------------------------------
+listEnsemblArchives()
+
 ## ----archiveMarts3, echo = TRUE, eval = TRUE--------------------------------------------------------------------------
-listMarts(host='may2009.archive.ensembl.org')
-ensembl54=useMart(host='may2009.archive.ensembl.org', 
-                  biomart='ENSEMBL_MART_ENSEMBL', 
-                  dataset='hsapiens_gene_ensembl')
+listMarts(host = 'may2009.archive.ensembl.org')
+ensembl54 <- useMart(host='may2009.archive.ensembl.org', 
+                     biomart='ENSEMBL_MART_ENSEMBL', 
+                     dataset='hsapiens_gene_ensembl')
 
 ## ----wormbase, echo=TRUE, eval=TRUE-----------------------------------------------------------------------------------
 listMarts(host = "parasite.wormbase.org")
diff --git a/inst/doc/biomaRt.Rmd b/inst/doc/biomaRt.Rmd
index ed3b60f..c404e64 100644
--- a/inst/doc/biomaRt.Rmd
+++ b/inst/doc/biomaRt.Rmd
@@ -326,14 +326,24 @@ After you selected the BioMart database and dataset, queries can be performed in
 ## Accessing archives through specifying the archive host
 -->
 
-Use the <http://www.ensembl.org> website and go down the bottom of the page.  Click on 'view in Archive' and select the archive you need.  Copy the url and use that url as shown below to connect to the specified BioMart database.  The example below shows how to query Ensembl 54. 
+`r Biocpkg("biomaRt")` provides the function `listEnsemblArchives()` to view the available archives.  This function takes no arguments, and produces a table containing the names of the available archived versions, the date they were first available, and the URL where they can be accessed.
+
+```{r archiveMarts, echo = TRUE, eval = TRUE}
+listEnsemblArchives()
+```
+
+Alternatively, one can use the <http://www.ensembl.org> website to find archived version.  From the main page scroll down the bottom of the page, click on 'view in Archive' and select the archive you need.  
+
+*You will notice that there is an archive URL even for the current release of Ensembl.  It can be useful to use this if you wish to ensure that script you write now will return exactly the same results in the future.  Using `www.ensembl.org` will always access the current release, and so the data retrieved may change over time as new releases come out.*
+
+Whichever method you use to find the URL of the archive you wish to query, copy the url and use that in the `host` argument as shown below to connect to the specified BioMart database.  The example below shows how to query Ensembl 54. 
 
 
 ```{r archiveMarts3, echo = TRUE, eval = TRUE}
-listMarts(host='may2009.archive.ensembl.org')
-ensembl54=useMart(host='may2009.archive.ensembl.org', 
-                  biomart='ENSEMBL_MART_ENSEMBL', 
-                  dataset='hsapiens_gene_ensembl')
+listMarts(host = 'may2009.archive.ensembl.org')
+ensembl54 <- useMart(host='may2009.archive.ensembl.org', 
+                     biomart='ENSEMBL_MART_ENSEMBL', 
+                     dataset='hsapiens_gene_ensembl')
 ```
 
 
diff --git a/inst/doc/biomaRt.html b/inst/doc/biomaRt.html
index e03348c..96d2cd4 100644
--- a/inst/doc/biomaRt.html
+++ b/inst/doc/biomaRt.html
@@ -4,23 +4,30 @@
 
 <head>
 
-<meta charset="utf-8">
+<meta charset="utf-8" />
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 <meta name="generator" content="pandoc" />
 
+
 <meta name="author" content="Steffen Durinck, Wolfgang Huber, Mike Smith" />
 
+<meta name="date" content="2017-10-30" />
 
 <title>The biomaRt users guide</title>
 
+<script src="data:application/x-javascript;base64,LyohIGpRdWVyeSB2MS4xMS4zIHwgKGMpIDIwMDUsIDIwMTUgalF1ZXJ5IEZvdW5kYXRpb24sIEluYy4gfCBqcXVlcnkub3JnL2xpY2Vuc2UgKi8KIWZ1bmN0aW9uKGEsYil7Im9iamVjdCI9PXR5cGVvZiBtb2R1bGUmJiJvYmplY3QiPT10eXBlb2YgbW9kdWxlLmV4cG9ydHM/bW9kdWxlLmV4cG9ydHM9YS5kb2N1bWVudD9iKGEsITApOmZ1bmN0aW9uKGEpe2lmKCFhLmRvY3VtZW50KXRocm93IG5ldyBFcnJvcigialF1ZXJ5IHJlcXVpcmVzIGEgd2luZG93IHdpdGggYSBkb2N1bWVudCIpO3JldHVybiBiKGEpfTpiKGEpfSgidW5kZWZpbmVkIiE9dHlwZW9mIHdpbmRvdz93aW5kb3c6dG [...]
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<link href="data:text/css;charset=utf-8,html%7Bfont%2Dfamily%3Asans%2Dserif%3B%2Dwebkit%2Dtext%2Dsize%2Dadjust%3A100%25%3B%2Dms%2Dtext%2Dsize%2Dadjust%3A100%25%7Dbody%7Bmargin%3A0%7Darticle%2Caside%2Cdetails%2Cfigcaption%2Cfigure%2Cfooter%2Cheader%2Chgroup%2Cmain%2Cmenu%2Cnav%2Csection%2Csummary%7Bdisplay%3Ablock%7Daudio%2Ccanvas%2Cprogress%2Cvideo%7Bdisplay%3Ainline%2Dblock%3Bvertical%2Dalign%3Abaseline%7Daudio%3Anot%28%5Bcontrols%5D%29%7Bdisplay%3Anone%3Bheight%3A0%7D%5Bhidden%5D%2Ctem [...]
+<script src="data:application/x-javascript;base64,LyohCiAqIEJvb3RzdHJhcCB2My4zLjUgKGh0dHA6Ly9nZXRib290c3RyYXAuY29tKQogKiBDb3B5cmlnaHQgMjAxMS0yMDE1IFR3aXR0ZXIsIEluYy4KICogTGljZW5zZWQgdW5kZXIgdGhlIE1JVCBsaWNlbnNlCiAqLwppZigidW5kZWZpbmVkIj09dHlwZW9mIGpRdWVyeSl0aHJvdyBuZXcgRXJyb3IoIkJvb3RzdHJhcCdzIEphdmFTY3JpcHQgcmVxdWlyZXMgalF1ZXJ5Iik7K2Z1bmN0aW9uKGEpeyJ1c2Ugc3RyaWN0Ijt2YXIgYj1hLmZuLmpxdWVyeS5zcGxpdCgiICIpWzBdLnNwbGl0KCIuIik7aWYoYlswXTwyJiZiWzFdPDl8fDE9PWJbMF0mJjk9PWJbMV0mJmJbMl08MSl0aHJvdy [...]
+<script src="data:application/x-javascript;base64,LyoqCiogQHByZXNlcnZlIEhUTUw1IFNoaXYgMy43LjIgfCBAYWZhcmthcyBAamRhbHRvbiBAam9uX25lYWwgQHJlbSB8IE1JVC9HUEwyIExpY2Vuc2VkCiovCi8vIE9ubHkgcnVuIHRoaXMgY29kZSBpbiBJRSA4CmlmICghIXdpbmRvdy5uYXZpZ2F0b3IudXNlckFnZW50Lm1hdGNoKCJNU0lFIDgiKSkgewohZnVuY3Rpb24oYSxiKXtmdW5jdGlvbiBjKGEsYil7dmFyIGM9YS5jcmVhdGVFbGVtZW50KCJwIiksZD1hLmdldEVsZW1lbnRzQnlUYWdOYW1lKCJoZWFkIilbMF18fGEuZG9jdW1lbnRFbGVtZW50O3JldHVybiBjLmlubmVySFRNTD0ieDxzdHlsZT4iK2IrIjwvc3R5bGU+IixkLm [...]
+<script src="data:application/x-javascript;base64,LyohIFJlc3BvbmQuanMgdjEuNC4yOiBtaW4vbWF4LXdpZHRoIG1lZGlhIHF1ZXJ5IHBvbHlmaWxsICogQ29weXJpZ2h0IDIwMTMgU2NvdHQgSmVobAogKiBMaWNlbnNlZCB1bmRlciBodHRwczovL2dpdGh1Yi5jb20vc2NvdHRqZWhsL1Jlc3BvbmQvYmxvYi9tYXN0ZXIvTElDRU5TRS1NSVQKICogICovCgovLyBPbmx5IHJ1biB0aGlzIGNvZGUgaW4gSUUgOAppZiAoISF3aW5kb3cubmF2aWdhdG9yLnVzZXJBZ2VudC5tYXRjaCgiTVNJRSA4IikpIHsKIWZ1bmN0aW9uKGEpeyJ1c2Ugc3RyaWN0IjthLm1hdGNoTWVkaWE9YS5tYXRjaE1lZGlhfHxmdW5jdGlvbihhKXt2YXIgYixjPWEuZG [...]
+<script src="data:application/x-javascript;base64,CgovKioKICogalF1ZXJ5IFBsdWdpbjogU3RpY2t5IFRhYnMKICoKICogQGF1dGhvciBBaWRhbiBMaXN0ZXIgPGFpZGFuQHBocC5uZXQ+CiAqIGFkYXB0ZWQgYnkgUnViZW4gQXJzbGFuIHRvIGFjdGl2YXRlIHBhcmVudCB0YWJzIHRvbwogKiBodHRwOi8vd3d3LmFpZGFubGlzdGVyLmNvbS8yMDE0LzAzL3BlcnNpc3RpbmctdGhlLXRhYi1zdGF0ZS1pbi1ib290c3RyYXAvCiAqLwooZnVuY3Rpb24oJCkgewogICJ1c2Ugc3RyaWN0IjsKICAkLmZuLnJtYXJrZG93blN0aWNreVRhYnMgPSBmdW5jdGlvbigpIHsKICAgIHZhciBjb250ZXh0ID0gdGhpczsKICAgIC8vIFNob3cgdGhlIHRhYi [...]
 <link href="data:text/css;charset=utf-8,pre%20%2Eoperator%2C%0Apre%20%2Eparen%20%7B%0Acolor%3A%20rgb%28104%2C%20118%2C%20135%29%0A%7D%0Apre%20%2Eliteral%20%7B%0Acolor%3A%20%23990073%0A%7D%0Apre%20%2Enumber%20%7B%0Acolor%3A%20%23099%3B%0A%7D%0Apre%20%2Ecomment%20%7B%0Acolor%3A%20%23998%3B%0Afont%2Dstyle%3A%20italic%0A%7D%0Apre%20%2Ekeyword%20%7B%0Acolor%3A%20%23900%3B%0Afont%2Dweight%3A%20bold%0A%7D%0Apre%20%2Eidentifier%20%7B%0Acolor%3A%20rgb%280%2C%200%2C%200%29%3B%0A%7D%0Apre%20%2Estri [...]
 <script src="data:application/x-javascript;base64,dmFyIGhsanM9bmV3IGZ1bmN0aW9uKCl7ZnVuY3Rpb24gbShwKXtyZXR1cm4gcC5yZXBsYWNlKC8mL2dtLCImYW1wOyIpLnJlcGxhY2UoLzwvZ20sIiZsdDsiKX1mdW5jdGlvbiBmKHIscSxwKXtyZXR1cm4gUmVnRXhwKHEsIm0iKyhyLmNJPyJpIjoiIikrKHA/ImciOiIiKSl9ZnVuY3Rpb24gYihyKXtmb3IodmFyIHA9MDtwPHIuY2hpbGROb2Rlcy5sZW5ndGg7cCsrKXt2YXIgcT1yLmNoaWxkTm9kZXNbcF07aWYocS5ub2RlTmFtZT09IkNPREUiKXtyZXR1cm4gcX1pZighKHEubm9kZVR5cGU9PTMmJnEubm9kZVZhbHVlLm1hdGNoKC9ccysvKSkpe2JyZWFrfX19ZnVuY3Rpb24gaCh0LH [...]
 
 <style type="text/css">code{white-space: pre;}</style>
 <style type="text/css">
-  pre:not([class]) {
-    background-color: white;
-  }
+
 </style>
 <script type="text/javascript">
 if (window.hljs && document.readyState && document.readyState === "complete") {
@@ -31,27 +38,88 @@ if (window.hljs && document.readyState && document.readyState === "complete") {
 </script>
 
 
-<link href="data:text/css;charset=utf-8,body%2C%20td%20%7B%0Afont%2Dfamily%3A%20sans%2Dserif%3B%0Abackground%2Dcolor%3A%20white%3B%0Afont%2Dsize%3A%2013px%3B%0A%7D%0Abody%20%7B%0Amax%2Dwidth%3A%20800px%3B%0Amargin%3A%200%20auto%3B%0Apadding%3A%201em%201em%202em%3B%0Aline%2Dheight%3A%2020px%3B%0A%7D%0A%0Adiv%23TOC%20li%20%7B%0Alist%2Dstyle%3Anone%3B%0Abackground%2Dimage%3Anone%3B%0Abackground%2Drepeat%3Anone%3B%0Abackground%2Dposition%3A0%3B%0A%7D%0A%0Ap%2C%20pre%20%7B%20margin%3A%200em%2 [...]
 
-<script type="text/javascript">
-document.addEventListener("DOMContentLoaded", function() {
-  var links = document.links;  
-  for (var i = 0, linksLength = links.length; i < linksLength; i++)
-    if(links[i].hostname != window.location.hostname)
-      links[i].target = '_blank';
-});
-</script>
+<style type="text/css">
+h1 {
+  font-size: 34px;
+}
+h1.title {
+  font-size: 38px;
+}
+h2 {
+  font-size: 30px;
+}
+h3 {
+  font-size: 24px;
+}
+h4 {
+  font-size: 18px;
+}
+h5 {
+  font-size: 16px;
+}
+h6 {
+  font-size: 12px;
+}
+.table th:not([align]) {
+  text-align: left;
+}
+</style>
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Amargin%3A%200px%20auto%3B%0Amax%2Dwidth%3A%201134px%3B%0Afont%2Dfamily%3A%20sans%2Dserif%3B%0Afont%2Dsize%3A%2010pt%3B%0A%7D%0A%0Adiv%23TOC%20ul%20%7B%0Apadding%3A%200px%200px%200px%2045px%3B%0Alist%2Dstyle%3A%20none%3B%0Abackground%2Dimage%3A%20none%3B%0Abackground%2Drepeat%3A%20none%3B%0Abackground%2Dposition%3A%200%3B%0Afont%2Dsize%3A%2010pt%3B%0Afont%2Dfamily%3A%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0A%7D%0Adiv%23TOC%20%3E%20 [...]
 
 </head>
 
 <body>
 
+<style type="text/css">
+.main-container {
+  max-width: 828px;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+img {
+  max-width:100%;
+  height: auto;
+}
+.tabbed-pane {
+  padding-top: 12px;
+}
+button.code-folding-btn:focus {
+  outline: none;
+}
+</style>
+
+
+
+<div class="container-fluid main-container">
+
+<!-- tabsets -->
+<script>
+$(document).ready(function () {
+  window.buildTabsets("TOC");
+});
+</script>
+
+<!-- code folding -->
+
+
+
+
+
+
+<div class="fluid-row" id="header">
+
+
+
+<h1 class="title toc-ignore">The biomaRt users guide</h1>
+<p class="author-name">Steffen Durinck, Wolfgang Huber, Mike Smith</p>
+<h4 class="date"><em>30 October 2017</em></h4>
+<h4 class="package">Package</h4>
+<p>biomaRt 2.34.0</p>
 
-<div id="header">
-<h1 class="title">The biomaRt users guide</h1>
-<h4 class="author"><em>Steffen Durinck, Wolfgang Huber, Mike Smith</em></h4>
 </div>
-<h4 class="package">Package version: <span style="font-weight: normal">biomaRt 2.32.1</span></h4>
 
 <h1>Contents</h1>
 <div id="TOC">
@@ -98,10 +166,10 @@ document.addEventListener("DOMContentLoaded", function() {
 <pre class="r"><code>library("biomaRt")
 listMarts()</code></pre>
 <pre><code>##                biomart               version
-## 1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 89
-## 2   ENSEMBL_MART_MOUSE      Mouse strains 89
-## 3     ENSEMBL_MART_SNP  Ensembl Variation 89
-## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 89</code></pre>
+## 1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 90
+## 2   ENSEMBL_MART_MOUSE      Mouse strains 90
+## 3     ENSEMBL_MART_SNP  Ensembl Variation 90
+## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 90</code></pre>
 <p>Note: if the function <code>useMart()</code> runs into proxy problems you should set your proxy first before calling any <em><a href="http://bioconductor.org/packages/biomaRt">biomaRt</a></em> functions.<br />
 You can do this using the Sys.putenv command:</p>
 <pre class="r"><code>Sys.setenv("http_proxy" = "http://my.proxy.org:9999")</code></pre>
@@ -111,76 +179,92 @@ You can do this using the Sys.putenv command:</p>
 <pre class="r"><code>ensembl=useMart("ensembl")</code></pre>
 <p>BioMart databases can contain several datasets, for Ensembl every species is a different dataset. In a next step we look at which datasets are available in the selected BioMart by using the function <code>listDatasets()</code>.</p>
 <pre class="r"><code>listDatasets(ensembl)</code></pre>
-<pre><code>##                           dataset                                 description                version
-## 1          loculatus_gene_ensembl                 Spotted gar genes (LepOcu1)                LepOcu1
-## 2          lafricana_gene_ensembl                  Elephant genes (Loxafr3.0)              Loxafr3.0
-## 3         ocuniculus_gene_ensembl                    Rabbit genes (OryCun2.0)              OryCun2.0
-## 4      acarolinensis_gene_ensembl              Anole lizard genes (AnoCar2.0)              AnoCar2.0
-## 5     aplatyrhynchos_gene_ensembl                   Duck genes (BGI_duck_1.0)           BGI_duck_1.0
-## 6         mdomestica_gene_ensembl                     Opossum genes (monDom5)                monDom5
-## 7          sharrisii_gene_ensembl      Tasmanian devil genes (Devil_ref v7.0)         Devil_ref v7.0
-## 8             oaries_gene_ensembl                      Sheep genes (Oar_v3.1)               Oar_v3.1
-## 9           ggorilla_gene_ensembl                   Gorilla genes (gorGor3.1)              gorGor3.1
-## 10           btaurus_gene_ensembl                          Cow genes (UMD3.1)                 UMD3.1
-## 11         ecaballus_gene_ensembl                     Horse genes (Equ Cab 2)              Equ Cab 2
-## 12        gaculeatus_gene_ensembl                Stickleback genes (BROAD S1)               BROAD S1
-## 13        tbelangeri_gene_ensembl                  Tree Shrew genes (tupBel1)                tupBel1
-## 14        choffmanni_gene_ensembl                       Sloth genes (choHof1)                choHof1
-## 15        cporcellus_gene_ensembl                  Guinea Pig genes (cavPor3)                cavPor3
-## 16     tnigroviridis_gene_ensembl             Tetraodon genes (TETRAODON 8.0)          TETRAODON 8.0
-## 17        ogarnettii_gene_ensembl                    Bushbaby genes (OtoGar3)                OtoGar3
-## 18          csabaeus_gene_ensembl                Vervet-AGM genes (ChlSab1.1)              ChlSab1.1
-## 19           pabelii_gene_ensembl                     Orangutan genes (PPYG2)                  PPYG2
-## 20         etelfairi_gene_ensembl       Lesser hedgehog tenrec genes (TENREC)                 TENREC
-## 21           sscrofa_gene_ensembl                     Pig genes (Sscrofa10.2)            Sscrofa10.2
-## 22          olatipes_gene_ensembl                         Medaka genes (HdrR)                   HdrR
-## 23          pformosa_gene_ensembl Amazon molly genes (Poecilia_formosa-5.1.2) Poecilia_formosa-5.1.2
-## 24             mfuro_gene_ensembl                 Ferret genes (MusPutFur1.0)           MusPutFur1.0
-## 25     dnovemcinctus_gene_ensembl                 Armadillo genes (Dasnov3.0)              Dasnov3.0
-## 26          pmarinus_gene_ensembl                Lamprey genes (Pmarinus_7.0)           Pmarinus_7.0
-## 27        eeuropaeus_gene_ensembl                    Hedgehog genes (eriEur1)                eriEur1
-## 28        mgallopavo_gene_ensembl                  Turkey genes (Turkey_2.01)            Turkey_2.01
-## 29          tguttata_gene_ensembl             Zebra Finch genes (taeGut3.2.4)            taeGut3.2.4
-## 30           gmorhua_gene_ensembl                         Cod genes (gadMor1)                gadMor1
-## 31 itridecemlineatus_gene_ensembl                    Squirrel genes (spetri2)                spetri2
-## 32         pcapensis_gene_ensembl                       Hyrax genes (proCap1)                proCap1
-## 33       nleucogenys_gene_ensembl                      Gibbon genes (Nleu1.0)                Nleu1.0
-## 34         pvampyrus_gene_ensembl                     Megabat genes (pteVam1)                pteVam1
-## 35            vpacos_gene_ensembl                      Alpaca genes (vicPac1)                vicPac1
-## 36         oprinceps_gene_ensembl                  Pika genes (OchPri2.0-Ens)          OchPri2.0-Ens
-## 37        mlucifugus_gene_ensembl                  Microbat genes (Myoluc2.0)              Myoluc2.0
-## 38           ggallus_gene_ensembl           Chicken genes (Gallus_gallus-5.0)      Gallus_gallus-5.0
-## 39            dordii_gene_ensembl                Kangaroo rat genes (dipOrd1)                dipOrd1
-## 40      ptroglodytes_gene_ensembl               Chimpanzee genes (CHIMP2.1.4)             CHIMP2.1.4
-## 41        lchalumnae_gene_ensembl                  Coelacanth genes (LatCha1)                LatCha1
-## 42          saraneus_gene_ensembl                       Shrew genes (sorAra1)                sorAra1
-## 43      amelanoleuca_gene_ensembl                       Panda genes (ailMel1)                ailMel1
-## 44        oniloticus_gene_ensembl                   Tilapia genes (Orenil1.0)              Orenil1.0
-## 45         trubripes_gene_ensembl                       Fugu genes (FUGU 4.0)               FUGU 4.0
-## 46       cfamiliaris_gene_ensembl                       Dog genes (CanFam3.1)              CanFam3.1
-## 47          mmulatta_gene_ensembl                  Macaque genes (Mmul_8.0.1)             Mmul_8.0.1
-## 48           panubis_gene_ensembl              Olive baboon genes (PapAnu2.0)              PapAnu2.0
-## 49            fcatus_gene_ensembl                 Cat genes (Felis_catus_6.2)        Felis_catus_6.2
-## 50          neugenii_gene_ensembl                    Wallaby genes (Meug_1.0)               Meug_1.0
-## 51         csavignyi_gene_ensembl                 C.savignyi genes (CSAV 2.0)               CSAV 2.0
-## 52     dmelanogaster_gene_ensembl                      Fruitfly genes (BDGP6)                  BDGP6
-## 53     cintestinalis_gene_ensembl                   C.intestinalis genes (KH)                     KH
-## 54        xmaculatus_gene_ensembl               Platyfish genes (Xipmac4.4.2)            Xipmac4.4.2
-## 55          mmurinus_gene_ensembl                Mouse Lemur genes (Mmur_2.0)               Mmur_2.0
-## 56          hsapiens_gene_ensembl                    Human genes (GRCh38.p10)             GRCh38.p10
-## 57         csyrichta_gene_ensembl                     Tarsier genes (tarSyr1)                tarSyr1
-## 58          celegans_gene_ensembl     Caenorhabditis elegans genes (WBcel235)               WBcel235
-## 59         psinensis_gene_ensembl Chinese softshell turtle genes (PelSin_1.0)             PelSin_1.0
-## 60       rnorvegicus_gene_ensembl                        Rat genes (Rnor_6.0)               Rnor_6.0
-## 61          cjacchus_gene_ensembl             Marmoset genes (C_jacchus3.2.1)         C_jacchus3.2.1
-## 62         oanatinus_gene_ensembl                      Platypus genes (OANA5)                  OANA5
-## 63        ttruncatus_gene_ensembl                     Dolphin genes (turTru1)                turTru1
-## 64        amexicanus_gene_ensembl                 Cave fish genes (AstMex102)              AstMex102
-## 65       scerevisiae_gene_ensembl    Saccharomyces cerevisiae genes (R64-1-1)                R64-1-1
-## 66            drerio_gene_ensembl                    Zebrafish genes (GRCz10)                 GRCz10
-## 67       xtropicalis_gene_ensembl                     Xenopus genes (JGI 4.2)                JGI 4.2
-## 68         mmusculus_gene_ensembl                     Mouse genes (GRCm38.p5)              GRCm38.p5
-## 69       falbicollis_gene_ensembl               Flycatcher genes (FicAlb_1.4)             FicAlb_1.4</code></pre>
+<pre><code>##                           dataset                                                  description                version
+## 1            ngalili_gene_ensembl Upper Galilee mountains blind mole rat genes (S.galili_v1.0)          S.galili_v1.0
+## 2          oprinceps_gene_ensembl                                   Pika genes (OchPri2.0-Ens)          OchPri2.0-Ens
+## 3            hfemale_gene_ensembl              Naked mole-rat female genes (HetGla_female_1.0)      HetGla_female_1.0
+## 4           pbairdii_gene_ensembl                Northern American deer mouse genes (Pman_1.0)               Pman_1.0
+## 5           mmurinus_gene_ensembl                                 Mouse Lemur genes (Mmur_2.0)               Mmur_2.0
+## 6              mfuro_gene_ensembl                                  Ferret genes (MusPutFur1.0)           MusPutFur1.0
+## 7          trubripes_gene_ensembl                                        Fugu genes (FUGU 4.0)               FUGU 4.0
+## 8         cporcellus_gene_ensembl                                 Guinea Pig genes (Cavpor3.0)              Cavpor3.0
+## 9           saraneus_gene_ensembl                                        Shrew genes (sorAra1)                sorAra1
+## 10       fdamarensis_gene_ensembl                             Damara mole rat genes (DMR_v1.0)               DMR_v1.0
+## 11           gmorhua_gene_ensembl                                          Cod genes (gadMor1)                gadMor1
+## 12      mochrogaster_gene_ensembl                               Prairie vole genes (MicOch1.0)              MicOch1.0
+## 13          cjacchus_gene_ensembl                              Marmoset genes (C_jacchus3.2.1)         C_jacchus3.2.1
+## 14         lafricana_gene_ensembl                                   Elephant genes (Loxafr3.0)              Loxafr3.0
+## 15           btaurus_gene_ensembl                                           Cow genes (UMD3.1)                 UMD3.1
+## 16           mcaroli_gene_ensembl                         Ryukyu mouse genes (CAROLI_EIJ_v1.1)        CAROLI_EIJ_v1.1
+## 17          pmarinus_gene_ensembl                                 Lamprey genes (Pmarinus_7.0)           Pmarinus_7.0
+## 18        ogarnettii_gene_ensembl                                     Bushbaby genes (OtoGar3)                OtoGar3
+## 19         csyrichta_gene_ensembl                                      Tarsier genes (tarSyr1)                tarSyr1
+## 20        gaculeatus_gene_ensembl                                 Stickleback genes (BROAD S1)               BROAD S1
+## 21         etelfairi_gene_ensembl                        Lesser hedgehog tenrec genes (TENREC)                 TENREC
+## 22        ttruncatus_gene_ensembl                                      Dolphin genes (turTru1)                turTru1
+## 23         ecaballus_gene_ensembl                                      Horse genes (Equ Cab 2)              Equ Cab 2
+## 24         mmusculus_gene_ensembl                                      Mouse genes (GRCm38.p5)              GRCm38.p5
+## 25           pabelii_gene_ensembl                                      Orangutan genes (PPYG2)                  PPYG2
+## 26            drerio_gene_ensembl                                     Zebrafish genes (GRCz10)                 GRCz10
+## 27        oniloticus_gene_ensembl                                    Tilapia genes (Orenil1.0)              Orenil1.0
+## 28        mdomestica_gene_ensembl                                      Opossum genes (monDom5)                monDom5
+## 29     cintestinalis_gene_ensembl                                    C.intestinalis genes (KH)                     KH
+## 30           panubis_gene_ensembl                               Olive baboon genes (PapAnu2.0)              PapAnu2.0
+## 31        mgallopavo_gene_ensembl                                   Turkey genes (Turkey_2.01)            Turkey_2.01
+## 32          olatipes_gene_ensembl                                          Medaka genes (HdrR)                   HdrR
+## 33         oanatinus_gene_ensembl                                       Platypus genes (OANA5)                  OANA5
+## 34        ocuniculus_gene_ensembl                                     Rabbit genes (OryCun2.0)              OryCun2.0
+## 35          jjaculus_gene_ensembl                     Lesser Egyptian jerboa genes (JacJac1.0)              JacJac1.0
+## 36       rnorvegicus_gene_ensembl                                         Rat genes (Rnor_6.0)               Rnor_6.0
+## 37      amelanoleuca_gene_ensembl                                        Panda genes (ailMel1)                ailMel1
+## 38         csavignyi_gene_ensembl                                  C.savignyi genes (CSAV 2.0)               CSAV 2.0
+## 39          mauratus_gene_ensembl                             Golden Hamster genes (MesAur1.0)              MesAur1.0
+## 40             hmale_gene_ensembl                       Naked mole-rat male genes (HetGla_1.0)             HetGla_1.0
+## 41            oaries_gene_ensembl                                       Sheep genes (Oar_v3.1)               Oar_v3.1
+## 42     tnigroviridis_gene_ensembl                              Tetraodon genes (TETRAODON 8.0)          TETRAODON 8.0
+## 43        cchok1gshd_gene_ensembl                 Chinese hamster CHOK1GS genes (CHOK1GS_HDv1)           CHOK1GS_HDv1
+## 44 itridecemlineatus_gene_ensembl                                   Squirrel genes (SpeTri2.0)              SpeTri2.0
+## 45      ptroglodytes_gene_ensembl                                Chimpanzee genes (CHIMP2.1.4)             CHIMP2.1.4
+## 46       xtropicalis_gene_ensembl                                      Xenopus genes (JGI 4.2)                JGI 4.2
+## 47            odegus_gene_ensembl                                       Degu genes (OctDeg1.0)              OctDeg1.0
+## 48        choffmanni_gene_ensembl                                        Sloth genes (choHof1)                choHof1
+## 49     dmelanogaster_gene_ensembl                                       Fruitfly genes (BDGP6)                  BDGP6
+## 50          tguttata_gene_ensembl                              Zebra Finch genes (taeGut3.2.4)            taeGut3.2.4
+## 51            vpacos_gene_ensembl                                       Alpaca genes (vicPac1)                vicPac1
+## 52       falbicollis_gene_ensembl                                Flycatcher genes (FicAlb_1.4)             FicAlb_1.4
+## 53     acarolinensis_gene_ensembl                               Anole lizard genes (AnoCar2.0)              AnoCar2.0
+## 54           caperea_gene_ensembl                        Brazilian guinea pig genes (CavAp1.0)               CavAp1.0
+## 55     dnovemcinctus_gene_ensembl                                  Armadillo genes (Dasnov3.0)              Dasnov3.0
+## 56           ggallus_gene_ensembl                            Chicken genes (Gallus_gallus-5.0)      Gallus_gallus-5.0
+## 57         pvampyrus_gene_ensembl                                      Megabat genes (pteVam1)                pteVam1
+## 58    aplatyrhynchos_gene_ensembl                                    Duck genes (BGI_duck_1.0)           BGI_duck_1.0
+## 59          mmulatta_gene_ensembl                                   Macaque genes (Mmul_8.0.1)             Mmul_8.0.1
+## 60          neugenii_gene_ensembl                                     Wallaby genes (Meug_1.0)               Meug_1.0
+## 61        mlucifugus_gene_ensembl                                   Microbat genes (Myoluc2.0)              Myoluc2.0
+## 62        xmaculatus_gene_ensembl                                Platyfish genes (Xipmac4.4.2)            Xipmac4.4.2
+## 63          csabaeus_gene_ensembl                                 Vervet-AGM genes (ChlSab1.1)              ChlSab1.1
+## 64          hsapiens_gene_ensembl                                     Human genes (GRCh38.p10)             GRCh38.p10
+## 65          pformosa_gene_ensembl                  Amazon molly genes (Poecilia_formosa-5.1.2) Poecilia_formosa-5.1.2
+## 66         psinensis_gene_ensembl                  Chinese softshell turtle genes (PelSin_1.0)             PelSin_1.0
+## 67       scerevisiae_gene_ensembl                     Saccharomyces cerevisiae genes (R64-1-1)                R64-1-1
+## 68        lchalumnae_gene_ensembl                                   Coelacanth genes (LatCha1)                LatCha1
+## 69            fcatus_gene_ensembl                                  Cat genes (Felis_catus_6.2)        Felis_catus_6.2
+## 70            dordii_gene_ensembl                                Kangaroo rat genes (Dord_2.0)               Dord_2.0
+## 71        amexicanus_gene_ensembl                                  Cave fish genes (AstMex102)              AstMex102
+## 72        tbelangeri_gene_ensembl                                   Tree Shrew genes (tupBel1)                tupBel1
+## 73          celegans_gene_ensembl                      Caenorhabditis elegans genes (WBcel235)               WBcel235
+## 74       nleucogenys_gene_ensembl                                       Gibbon genes (Nleu1.0)                Nleu1.0
+## 75         pcapensis_gene_ensembl                                        Hyrax genes (proCap1)                proCap1
+## 76           ccrigri_gene_ensembl                    Chinese hamster CriGri genes (CriGri_1.0)             CriGri_1.0
+## 77        eeuropaeus_gene_ensembl                                     Hedgehog genes (eriEur1)                eriEur1
+## 78         clanigera_gene_ensembl                     Long-tailed chinchilla genes (ChiLan1.0)              ChiLan1.0
+## 79           mpahari_gene_ensembl                          Shrew mouse genes (PAHARI_EIJ_v1.1)        PAHARI_EIJ_v1.1
+## 80         loculatus_gene_ensembl                                  Spotted gar genes (LepOcu1)                LepOcu1
+## 81          ggorilla_gene_ensembl                                    Gorilla genes (gorGor3.1)              gorGor3.1
+## 82           sscrofa_gene_ensembl                                      Pig genes (Sscrofa11.1)            Sscrofa11.1
+## 83       cfamiliaris_gene_ensembl                                        Dog genes (CanFam3.1)              CanFam3.1
+## 84         sharrisii_gene_ensembl                       Tasmanian devil genes (Devil_ref v7.0)         Devil_ref v7.0
+## 85         mspreteij_gene_ensembl                          Algerian mouse genes (SPRET_EiJ_v1)           SPRET_EiJ_v1</code></pre>
 <p>To select a dataset we can update the <code>Mart</code> object using the function <code>useDataset()</code>. In the example below we choose to use the hsapiens dataset.</p>
 <pre class="r"><code>ensembl = useDataset("hsapiens_gene_ensembl",mart=ensembl)</code></pre>
 <p>Or alternatively if the dataset one wants to use is known in advance, we can select a BioMart database and dataset in one step by:</p>
@@ -200,12 +284,12 @@ filters[1:5,]</code></pre>
 <p><em>Attributes</em> define the values we are interested in to retrieve. For example we want to retrieve the gene symbols or chromosomal coordinates. The <code>listAttributes()</code> function displays all available attributes in the selected dataset.</p>
 <pre class="r"><code>attributes = listAttributes(ensembl)
 attributes[1:5,]</code></pre>
-<pre><code>##                    name          description         page
-## 1       ensembl_gene_id       Gene stable ID feature_page
-## 2 ensembl_transcript_id Transcript stable ID feature_page
-## 3    ensembl_peptide_id    Protein stable ID feature_page
-## 4       ensembl_exon_id       Exon stable ID feature_page
-## 5           description     Gene description feature_page</code></pre>
+<pre><code>##                            name                  description         page
+## 1               ensembl_gene_id               Gene stable ID feature_page
+## 2       ensembl_gene_id_version       Gene stable ID version feature_page
+## 3         ensembl_transcript_id         Transcript stable ID feature_page
+## 4 ensembl_transcript_id_version Transcript stable ID version feature_page
+## 5            ensembl_peptide_id            Protein stable ID feature_page</code></pre>
 <p>The <code>getBM()</code> function is the main query function in <em><a href="http://bioconductor.org/packages/biomaRt">biomaRt</a></em>. It has four main arguments:</p>
 <ul>
 <li><code>attributes</code>: is a vector of attributes that one wants to retrieve (= the output of the query).</li>
@@ -253,12 +337,12 @@ goids = getBM(attributes = c('entrezgene', 'go_id'),
               mart = ensembl)
 head(goids)</code></pre>
 <pre><code>##   entrezgene      go_id
-## 1        673 GO:0044297
-## 2        673 GO:0043005
-## 3        673 GO:0016020
-## 4        673 GO:0005886
-## 5        673 GO:0005739
-## 6        673 GO:0005737</code></pre>
+## 1        673 GO:0000166
+## 2        673 GO:0004672
+## 3        673 GO:0004674
+## 4        673 GO:0005524
+## 5        673 GO:0006468
+## 6        673 GO:0010628</code></pre>
 </div>
 <div id="retrieve-all-hugo-gene-symbols-of-genes-that-are-located-on-chromosomes-1720-or-y-and-are-associated-with-specific-go-terms" class="section level2">
 <h2><span class="header-section-number">4.3</span> Retrieve all HUGO gene symbols of genes that are located on chromosomes 17,20 or Y, and are associated with specific GO terms</h2>
@@ -333,20 +417,20 @@ ipro</code></pre>
       values = 'GO:0004707', 
       mart = ensembl)</code></pre>
 <pre><code>##    entrezgene hgnc_symbol
-## 1        1432      MAPK14
-## 2        5596       MAPK4
-## 3      225689      MAPK15
-## 4        5603      MAPK13
-## 5        5601       MAPK9
+## 1      225689      MAPK15
+## 2        5594       MAPK1
+## 3        5595       MAPK3
+## 4        6300      MAPK12
+## 5        5600      MAPK11
 ## 6       51701         NLK
-## 7        5594       MAPK1
-## 8        5599       MAPK8
-## 9        5602      MAPK10
-## 10       6300      MAPK12
+## 7        5598       MAPK7
+## 8        5596       MAPK4
+## 9        1432      MAPK14
+## 10       5603      MAPK13
 ## 11       5597       MAPK6
-## 12       5600      MAPK11
-## 13       5598       MAPK7
-## 14       5595       MAPK3</code></pre>
+## 12       5599       MAPK8
+## 13       5601       MAPK9
+## 14       5602      MAPK10</code></pre>
 </div>
 <div id="given-a-set-of-entrezgene-identifiers-retrieve-100bp-upstream-promoter-sequences" class="section level2">
 <h2><span class="header-section-number">4.7</span> Given a set of EntrezGene identifiers, retrieve 100bp upstream promoter sequences</h2>
@@ -376,7 +460,10 @@ getSequence(id = entrez,
             seqType="coding_gene_flank",
             upstream=100, 
             mart=ensembl) </code></pre>
-<pre><code>## Error in getBM(c(seqType, type), filters = c(type, "upstream_flank"), : Query ERROR: caught BioMart::Exception::Usage: Filter upstream_flank NOT FOUND</code></pre>
+<pre><code>##                                                                                      coding_gene_flank entrezgene
+## 1 CCTCCGCCTCCGCCTCCGCCTCCGCCTCCCCCAGCTCTCCGCCTCCCTTCCCCCTCCCCGCCCGACAGCGGCCGCTCGGGCCCCGGCTCTCGGTTATAAG        673
+## 2 CACGTTTCCGCCCTTTGCAATAAGGAAATACATAGTTTACTTTCATTTTTGACTCTGAGGCTCTTTCCAACGCTGTAAAAAAGGACAGAGGCTGTTCCCT        837
+## 3 TCCTTCTCTGCAGGCCCAGGTGACCCAGGGTTGGAAGTGTCTCATGCTGGATCCCCACTTTTCCTCTTGCAGCAGCCAGACTGCCTTCCGGGTCACTGCC       7157</code></pre>
 </div>
 <div id="retrieve-all-5-utr-sequences-of-all-genes-that-are-located-on-chromosome-3-between-the-positions-185514033-and-185535839" class="section level2">
 <h2><span class="header-section-number">4.8</span> Retrieve all 5’ UTR sequences of all genes that are located on chromosome 3 between the positions 185,514,033 and 185,535,839</h2>
@@ -387,10 +474,10 @@ getSequence(id = entrez,
                    mart=ensembl)
 utr5</code></pre>
 <pre><code>##                                                                                                                                             5utr
-## 1                                                                                                        ATTCTTGTGAATGTGACACACGATCTCTCCAGTTTCCAT
-## 2                                                                                                                           Sequence unavailable
-## 3                                                        TGAGCAAAATCCCACAGTGGAAACTCTTAAGCCTCTGCGAAGTAAATCATTCTTGTGAATGTGACACACGATCTCTCCAGTTTCCAT
-## 4 AGTCCCTAGGGAACTTCCTGTTGTCACCACACCTCTGAGTCGTCTGAGCTCACTGTGAGCAAAATCCCACAGTGGAAACTCTTAAGCCTCTGCGAAGTAAATCATTCTTGTGAATGTGACACACGATCTCTCCAGTTTCCAT
+## 1                                                        TGAGCAAAATCCCACAGTGGAAACTCTTAAGCCTCTGCGAAGTAAATCATTCTTGTGAATGTGACACACGATCTCTCCAGTTTCCAT
+## 2                                                                                                        ATTCTTGTGAATGTGACACACGATCTCTCCAGTTTCCAT
+## 3 AGTCCCTAGGGAACTTCCTGTTGTCACCACACCTCTGAGTCGTCTGAGCTCACTGTGAGCAAAATCCCACAGTGGAAACTCTTAAGCCTCTGCGAAGTAAATCATTCTTGTGAATGTGACACACGATCTCTCCAGTTTCCAT
+## 4                                                                                                                           Sequence unavailable
 ##   entrezgene
 ## 1     200879
 ## 2     200879
@@ -406,23 +493,23 @@ utr5</code></pre>
                       mart=ensembl)
 protein</code></pre>
 <pre><code>##                                                                                                                                                                                                                                                                                                                                                                                                                peptide
-## 1                                                                                                                                                                                                                                                           ALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVS*
-## 2                                                                                                                                             MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQEAVKSGIHRTVHAGEVGSAEVVKEAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEAQK*
-## 3                                         MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQEAVKSGIHRTVHAGEVGSAEVVKEAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRLKNDQANYSLNTDDPLIFKSTLDTDYQMTKRDMGFTEEEFKRLNINAAKSSFLPEDEKRELLDLLYKAYGMPPSASAGQNL*
-## 4                                                                                                                                                                                                                                                                                                                                                                                                 Sequence unavailable
-## 5                                                                                                                                                                                                                                                                                                                                                                                                 Sequence unavailable
-## 6                                                                                                                                                                                                                                                                                                                                         MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIARL*
-## 7                                                                 MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRLKNDQANYSLNTDDPLIFKSTLDTDYQMTKRDMGFTEEEFKRLNINAAKSSFLPEDEKRELLDLLYKAYGMPPSASAGQNL*
-## 8 MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV*
+## 1                                                                                                                                                                                                                                                                                                                                                                                                 Sequence unavailable
+## 2                                                                 MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRLKNDQANYSLNTDDPLIFKSTLDTDYQMTKRDMGFTEEEFKRLNINAAKSSFLPEDEKRELLDLLYKAYGMPPSASAGQNL*
+## 3                                                                                                                                                                                                                                                                                                                                                                                                 Sequence unavailable
+## 4 MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV*
+## 5                                                                                                                                                                                                                                                                                                                                         MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIARL*
+## 6                                                                                                                                                                                                                                                           ALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVS*
+## 7                                         MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQEAVKSGIHRTVHAGEVGSAEVVKEAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRLKNDQANYSLNTDDPLIFKSTLDTDYQMTKRDMGFTEEEFKRLNINAAKSSFLPEDEKRELLDLLYKAYGMPPSASAGQNL*
+## 8                                                                                                                                             MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQEAVKSGIHRTVHAGEVGSAEVVKEAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEAQK*
 ##   entrezgene
-## 1       5728
+## 1        100
 ## 2        100
-## 3        100
+## 3       5728
 ## 4       5728
 ## 5        100
-## 6        100
+## 6       5728
 ## 7        100
-## 8       5728</code></pre>
+## 8        100</code></pre>
 </div>
 <div id="retrieve-known-snps-located-on-the-human-chromosome-8-between-positions-148350-and-148612" class="section level2">
 <h2><span class="header-section-number">4.10</span> Retrieve known SNPs located on the human chromosome 8 between positions 148350 and 148612</h2>
@@ -479,6 +566,11 @@ listMarts(archive = TRUE)
 ```
 
 ```
+## Warning in listMarts(archive = TRUE): The archive = TRUE argument is now deprecated.
+## Use listEnsemblMarts() to find the URL to directly query an Ensembl archive.
+```
+
+```
 ##                        biomart                     version
 ## 1              ensembl_mart_51                  Ensembl 51
 ## 2                  snp_mart_51                      SNP 51
@@ -533,6 +625,11 @@ ensembl = useMart("ensembl_mart_46", dataset="hsapiens_gene_ensembl", archive =
 ```
 
 ```
+## Warning in listMarts(host = host, path = path, port = port, includeHosts = TRUE, : The archive = TRUE argument is now deprecated.
+## Use listEnsemblMarts() to find the URL to directly query an Ensembl archive.
+```
+
+```
 ## Note: requested host was redirected from www.ensembl.org to http://aug2007.archive.ensembl.org:80/biomart/martservice
 ## When using archived Ensembl versions this sometimes can result in connecting to a newer version than the intended Ensembl version
 ## Check your ensembl version using listMarts(mart)
@@ -547,8 +644,32 @@ After you selected the BioMart database and dataset, queries can be performed in
 
 ## Accessing archives through specifying the archive host
 -->
-<p>Use the <a href="http://www.ensembl.org" class="uri">http://www.ensembl.org</a> website and go down the bottom of the page. Click on ‘view in Archive’ and select the archive you need. Copy the url and use that url as shown below to connect to the specified BioMart database. The example below shows how to query Ensembl 54.</p>
-<pre class="r"><code>listMarts(host='may2009.archive.ensembl.org')</code></pre>
+<p><em><a href="http://bioconductor.org/packages/biomaRt">biomaRt</a></em> provides the function <code>listEnsemblArchives()</code> to view the available archives. This function takes no arguments, and produces a table containing the names of the available archived versions, the date they were first available, and the URL where they can be accessed.</p>
+<pre class="r"><code>listEnsemblArchives()</code></pre>
+<pre><code>##       version          date       url                                 
+##  [1,] "Ensembl GRCh37" "Feb 2014" "http://grch37.ensembl.org"         
+##  [2,] "Ensembl 89"     "May 2017" "http://May2017.archive.ensembl.org"
+##  [3,] "Ensembl 88"     "Mar 2017" "http://Mar2017.archive.ensembl.org"
+##  [4,] "Ensembl 87"     "Dec 2016" "http://Dec2016.archive.ensembl.org"
+##  [5,] "Ensembl 86"     "Oct 2016" "http://Oct2016.archive.ensembl.org"
+##  [6,] "Ensembl 85"     "Jul 2016" "http://Jul2016.archive.ensembl.org"
+##  [7,] "Ensembl 84"     "Mar 2016" "http://Mar2016.archive.ensembl.org"
+##  [8,] "Ensembl 83"     "Dec 2015" "http://Dec2015.archive.ensembl.org"
+##  [9,] "Ensembl 82"     "Sep 2015" "http://Sep2015.archive.ensembl.org"
+## [10,] "Ensembl 81"     "Jul 2015" "http://Jul2015.archive.ensembl.org"
+## [11,] "Ensembl 80"     "May 2015" "http://May2015.archive.ensembl.org"
+## [12,] "Ensembl 79"     "Mar 2015" "http://Mar2015.archive.ensembl.org"
+## [13,] "Ensembl 78"     "Dec 2014" "http://Dec2014.archive.ensembl.org"
+## [14,] "Ensembl 77"     "Oct 2014" "http://Oct2014.archive.ensembl.org"
+## [15,] "Ensembl 76"     "Aug 2014" "http://Aug2014.archive.ensembl.org"
+## [16,] "Ensembl 75"     "Feb 2014" "http://Feb2014.archive.ensembl.org"
+## [17,] "Ensembl 74"     "Dec 2013" "http://Dec2013.archive.ensembl.org"
+## [18,] "Ensembl 67"     "May 2012" "http://May2012.archive.ensembl.org"
+## [19,] "Ensembl 54"     "May 2009" "http://May2009.archive.ensembl.org"</code></pre>
+<p>Alternatively, one can use the <a href="http://www.ensembl.org" class="uri">http://www.ensembl.org</a> website to find archived version. From the main page scroll down the bottom of the page, click on ‘view in Archive’ and select the archive you need.</p>
+<p><em>You will notice that there is an archive URL even for the current release of Ensembl. It can be useful to use this if you wish to ensure that script you write now will return exactly the same results in the future. Using <code>www.ensembl.org</code> will always access the current release, and so the data retrieved may change over time as new releases come out.</em></p>
+<p>Whichever method you use to find the URL of the archive you wish to query, copy the url and use that in the <code>host</code> argument as shown below to connect to the specified BioMart database. The example below shows how to query Ensembl 54.</p>
+<pre class="r"><code>listMarts(host = 'may2009.archive.ensembl.org')</code></pre>
 <pre><code>##                biomart              version
 ## 1 ENSEMBL_MART_ENSEMBL           Ensembl 54
 ## 2     ENSEMBL_MART_SNP Ensembl Variation 54
@@ -556,9 +677,9 @@ After you selected the BioMart database and dataset, queries can be performed in
 ## 4             REACTOME   Reactome(CSHL US) 
 ## 5     wormbase_current   WormBase (CSHL US)
 ## 6                pride       PRIDE (EBI UK)</code></pre>
-<pre class="r"><code>ensembl54=useMart(host='may2009.archive.ensembl.org', 
-                  biomart='ENSEMBL_MART_ENSEMBL', 
-                  dataset='hsapiens_gene_ensembl')</code></pre>
+<pre class="r"><code>ensembl54 <- useMart(host='may2009.archive.ensembl.org', 
+                     biomart='ENSEMBL_MART_ENSEMBL', 
+                     dataset='hsapiens_gene_ensembl')</code></pre>
 </div>
 <div id="using-a-biomart-other-than-ensembl" class="section level1">
 <h1><span class="header-section-number">6</span> Using a BioMart other than Ensembl</h1>
@@ -618,7 +739,7 @@ head(listFilters(wormbase))</code></pre>
 <h3><span class="header-section-number">7.2.2</span> filterOptions</h3>
 <p>Some filters have a limited set of values that can be given to them. To know which values these are one can use the <code>filterOptions()</code> function to retrieve the predetermed values of the respective filter.</p>
 <pre class="r"><code>filterOptions("biotype",ensembl)</code></pre>
-<pre><code>## [1] "[3prime_overlapping_ncRNA,antisense,bidirectional_promoter_lncRNA,IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_J_gene,IG_J_pseudogene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lincRNA,macro_lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,non_coding,polymorphic_pseudogene,processed_pseudogene,processed_transcript,protein_coding,pseudogene,ribozyme,rRNA,scaRNA,scRNA,sense_intronic,sense_overlapping,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene, [...]
+<pre><code>## [1] "[3prime_overlapping_ncRNA,antisense_RNA,bidirectional_promoter_lncRNA,IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_J_gene,IG_J_pseudogene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lincRNA,macro_lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,non_coding,polymorphic_pseudogene,processed_pseudogene,processed_transcript,protein_coding,pseudogene,ribozyme,rRNA,scaRNA,scRNA,sense_intronic,sense_overlapping,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudog [...]
 <p>If there are no predetermed values e.g. for the entrezgene filter, then <code>filterOptions()</code> will return the type of filter it is. And most of the times the filter name or it’s description will suggest what values one case use for the respective filter (e.g. entrezgene filter will work with enterzgene identifiers as values)</p>
 </div>
 </div>
@@ -630,13 +751,13 @@ pages</code></pre>
 <pre><code>## [1] "feature_page" "structure"    "homologs"     "snp"          "snp_somatic"  "sequences"</code></pre>
 <p>To show us a smaller list of attributes which belong to a specific page, we can now specify this in the <code>listAttributes()</code> function. <em>The set of attributes is still quite long, so we use <code>head()</code> to show only the first few items here.</em></p>
 <pre class="r"><code>head(listAttributes(ensembl, page="feature_page"))</code></pre>
-<pre><code>##                    name              description         page
-## 1       ensembl_gene_id           Gene stable ID feature_page
-## 2 ensembl_transcript_id     Transcript stable ID feature_page
-## 3    ensembl_peptide_id        Protein stable ID feature_page
-## 4       ensembl_exon_id           Exon stable ID feature_page
-## 5           description         Gene description feature_page
-## 6       chromosome_name Chromosome/scaffold name feature_page</code></pre>
+<pre><code>##                            name                  description         page
+## 1               ensembl_gene_id               Gene stable ID feature_page
+## 2       ensembl_gene_id_version       Gene stable ID version feature_page
+## 3         ensembl_transcript_id         Transcript stable ID feature_page
+## 4 ensembl_transcript_id_version Transcript stable ID version feature_page
+## 5            ensembl_peptide_id            Protein stable ID feature_page
+## 6    ensembl_peptide_id_version    Protein stable ID version feature_page</code></pre>
 <p>We now get a short list of attributes related to the region where the genes are located.</p>
 </div>
 </div>
@@ -677,13 +798,13 @@ select(mart, keys=affy, columns=c('affy_hg_u133_plus_2','entrezgene'),
 <div id="session-info" class="section level1">
 <h1><span class="header-section-number">10</span> Session Info</h1>
 <pre class="r"><code>sessionInfo()</code></pre>
-<pre><code>## R version 3.4.0 (2017-04-21)
+<pre><code>## R version 3.4.2 (2017-09-28)
 ## Platform: x86_64-pc-linux-gnu (64-bit)
-## Running under: Ubuntu 16.04.2 LTS
+## Running under: Ubuntu 16.04.3 LTS
 ## 
 ## Matrix products: default
-## BLAS: /home/biocbuild/bbs-3.5-bioc/R/lib/libRblas.so
-## LAPACK: /home/biocbuild/bbs-3.5-bioc/R/lib/libRlapack.so
+## BLAS: /home/biocbuild/bbs-3.6-bioc/R/lib/libRblas.so
+## LAPACK: /home/biocbuild/bbs-3.6-bioc/R/lib/libRlapack.so
 ## 
 ## locale:
 ##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=C              
@@ -694,21 +815,52 @@ select(mart, keys=affy, columns=c('affy_hg_u133_plus_2','entrezgene'),
 ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 ## 
 ## other attached packages:
-## [1] biomaRt_2.32.1  BiocStyle_2.4.0
+## [1] biomaRt_2.34.0  BiocStyle_2.6.0
 ## 
 ## loaded via a namespace (and not attached):
-##  [1] Rcpp_0.12.11         AnnotationDbi_1.38.1 knitr_1.16           magrittr_1.5         IRanges_2.10.2      
-##  [6] BiocGenerics_0.22.0  stringr_1.2.0        tools_3.4.0          parallel_3.4.0       Biobase_2.36.2      
-## [11] DBI_0.6-1            htmltools_0.3.6      yaml_2.1.14          rprojroot_1.2        digest_0.6.12       
-## [16] S4Vectors_0.14.3     bitops_1.0-6         RCurl_1.95-4.8       memoise_1.1.0        evaluate_0.10       
-## [21] RSQLite_1.1-2        rmarkdown_1.5        stringi_1.1.5        compiler_3.4.0       backports_1.1.0     
-## [26] stats4_3.4.0         XML_3.98-1.7</code></pre>
+##  [1] Rcpp_0.12.13         AnnotationDbi_1.40.0 knitr_1.17           magrittr_1.5         progress_1.1.2      
+##  [6] IRanges_2.12.0       BiocGenerics_0.24.0  bit_1.1-12           R6_2.2.2             rlang_0.1.2         
+## [11] stringr_1.2.0        blob_1.1.0           tools_3.4.2          parallel_3.4.2       Biobase_2.38.0      
+## [16] DBI_0.7              htmltools_0.3.6      assertthat_0.2.0     yaml_2.1.14          bit64_0.9-7         
+## [21] rprojroot_1.2        digest_0.6.12        tibble_1.3.4         bookdown_0.5         S4Vectors_0.16.0    
+## [26] bitops_1.0-6         RCurl_1.95-4.8       memoise_1.1.0        evaluate_0.10.1      RSQLite_2.0         
+## [31] rmarkdown_1.6        stringi_1.1.5        compiler_3.4.2       prettyunits_1.0.2    backports_1.1.1     
+## [36] stats4_3.4.2         XML_3.98-1.9</code></pre>
 <pre class="r"><code>warnings()</code></pre>
 <pre><code>## NULL</code></pre>
 </div>
 
 
 
+
+</div>
+
+<script>
+
+// add bootstrap table styles to pandoc tables
+function bootstrapStylePandocTables() {
+  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
+}
+$(document).ready(function () {
+  bootstrapStylePandocTables();
+});
+
+
+</script>
+
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    "HTML-CSS": {
+      styles: {
+        ".MathJax_Display": {
+           "text-align": "center",
+           padding: "0px 150px 0px 65px",
+           margin: "0px 0px 0.5em"
+        },
+      }
+    }
+  });
+</script>
 <!-- dynamically load mathjax for compatibility with self-contained -->
 <script>
   (function () {
diff --git a/man/listEnsemblArchives.Rd b/man/listEnsemblArchives.Rd
new file mode 100644
index 0000000..3a2901d
--- /dev/null
+++ b/man/listEnsemblArchives.Rd
@@ -0,0 +1,18 @@
+\name{listEnsemblArchives}
+\alias{listEnsemblArchives}
+\title{Lists the available archived versions of Ensembl}
+\description{Returns a table containing the available archived versions of 
+Ensembl, along with the dates they were created and the URL used to access
+them.}
+
+\usage{listEnsemblArchives()}
+
+\arguments{}
+
+\author{Mike Smith} 
+
+\examples{
+listEnsemblArchives()
+}
+\keyword{methods}
+
diff --git a/man/listMarts.Rd b/man/listMarts.Rd
index b1a2fe7..709cc38 100644
--- a/man/listMarts.Rd
+++ b/man/listMarts.Rd
@@ -14,7 +14,11 @@ marts there are to connect to.}
 \item{path}{path to martservice that should be pasted behind the host to get to web service URL}
 \item{port}{port to use in HTTP communication} 
 \item{includeHosts}{boolean to indicate if function should return host of the BioMart databases}
-\item{archive}{Boolean to indicate if you want to access archived versions of BioMart database}
+\item{archive}{Boolean to indicate if you want to access archived versions of 
+BioMart database. Note that this argument is now deprecated and will be removed
+in the future.  A better alternative is to specify the url of the archived 
+BioMart you want to access.  For Ensembl you can view the list of archives 
+using \code{\link{listEnsemblArchives}}}
 \item{ssl.verifypeer}{Set SSL peer verification on or off.  By default ssl.verifypeer is set to TRUE}
 \item{ensemblRedirect}{By default when you access Ensembl BioMart it will 
 redirect you to your local mirror, even if you have set a region specific 
diff --git a/man/useMart.Rd b/man/useMart.Rd
index ac982c6..4397539 100644
--- a/man/useMart.Rd
+++ b/man/useMart.Rd
@@ -12,7 +12,7 @@ TRUE, ensemblRedirect = TRUE, version, verbose = FALSE)}
 \item{host}{Host to connect to. Defaults to \code{www.ensembl.org}}
 \item{path}{Path that should be pasted after to host to get access to the web service URL}
 \item{port}{port to connect to, will be pasted between host and path}
-\item{archive}{Boolean to indicate if you want to access archived versions of BioMart databases.  Note that this gives access to only a limited number of archived BioMarts and the most recent archives are often not available. A better alternative is to leave archive = FALSE and to specify the url of the archived BioMart you want to access see vignette for an example.}
+\item{archive}{Boolean to indicate if you want to access archived versions of BioMart databases.  Note that this argument is now deprecated and will be removed in the future.  A better alternative is to leave archive = FALSE and to specify the url of the archived BioMart you want to access.  For Ensembl you can view the list of archives using \code{\link{listEnsemblArchives}}}
 \item{ssl.verifypeer}{Set SSL peer verification on or off.  By default ssl.verifypeer is set to TRUE}
 \item{ensemblRedirect}{By default when you access Ensembl BioMart it will 
 redirect you to your local mirror, even if you have set a region specific 
diff --git a/tests/testthat/test_hostProcessing.R b/tests/testthat/test_hostProcessing.R
new file mode 100644
index 0000000..43f069d
--- /dev/null
+++ b/tests/testthat/test_hostProcessing.R
@@ -0,0 +1,16 @@
+library(biomaRt)
+
+## adding http if needed
+host <- 'www.myurl.org'
+expect_equal(object = .cleanHostURL(host = host),
+             expected = "http://www.myurl.org")
+ 
+## stripping trailing slash
+host <- 'http://www.myurl.org/'
+expect_equal(object = .cleanHostURL(host = host),
+             expected = "http://www.myurl.org")
+
+## leave https already there
+host <- 'https://www.myurl.org'
+expect_equal(object = .cleanHostURL(host = host),
+             expected = "https://www.myurl.org")
diff --git a/tests/testthat/test_useMart.R b/tests/testthat/test_useMart.R
new file mode 100644
index 0000000..3ce0fd4
--- /dev/null
+++ b/tests/testthat/test_useMart.R
@@ -0,0 +1,13 @@
+library(biomaRt)
+
+## checking the show() method
+ensembl <- useMart("ensembl")
+ensembl_with_dataset <- useDataset(ensembl, 
+                                   dataset = "xtropicalis_gene_ensembl")
+
+test_that("Show give sensible dataset information", {
+    expect_output(object = show(ensembl), 
+                  regexp = "No dataset selected")
+    expect_output(object = show(ensembl_with_dataset), 
+                  regexp = "Using the xtropicalis_gene_ensembl dataset")
+})
diff --git a/vignettes/biomaRt.Rmd b/vignettes/biomaRt.Rmd
index ed3b60f..c404e64 100644
--- a/vignettes/biomaRt.Rmd
+++ b/vignettes/biomaRt.Rmd
@@ -326,14 +326,24 @@ After you selected the BioMart database and dataset, queries can be performed in
 ## Accessing archives through specifying the archive host
 -->
 
-Use the <http://www.ensembl.org> website and go down the bottom of the page.  Click on 'view in Archive' and select the archive you need.  Copy the url and use that url as shown below to connect to the specified BioMart database.  The example below shows how to query Ensembl 54. 
+`r Biocpkg("biomaRt")` provides the function `listEnsemblArchives()` to view the available archives.  This function takes no arguments, and produces a table containing the names of the available archived versions, the date they were first available, and the URL where they can be accessed.
+
+```{r archiveMarts, echo = TRUE, eval = TRUE}
+listEnsemblArchives()
+```
+
+Alternatively, one can use the <http://www.ensembl.org> website to find archived version.  From the main page scroll down the bottom of the page, click on 'view in Archive' and select the archive you need.  
+
+*You will notice that there is an archive URL even for the current release of Ensembl.  It can be useful to use this if you wish to ensure that script you write now will return exactly the same results in the future.  Using `www.ensembl.org` will always access the current release, and so the data retrieved may change over time as new releases come out.*
+
+Whichever method you use to find the URL of the archive you wish to query, copy the url and use that in the `host` argument as shown below to connect to the specified BioMart database.  The example below shows how to query Ensembl 54. 
 
 
 ```{r archiveMarts3, echo = TRUE, eval = TRUE}
-listMarts(host='may2009.archive.ensembl.org')
-ensembl54=useMart(host='may2009.archive.ensembl.org', 
-                  biomart='ENSEMBL_MART_ENSEMBL', 
-                  dataset='hsapiens_gene_ensembl')
+listMarts(host = 'may2009.archive.ensembl.org')
+ensembl54 <- useMart(host='may2009.archive.ensembl.org', 
+                     biomart='ENSEMBL_MART_ENSEMBL', 
+                     dataset='hsapiens_gene_ensembl')
 ```
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-biomart.git



More information about the debian-med-commit mailing list