[med-svn] [r-cran-xml2] 12/14: New upstream version 1.1.0
Andreas Tille
tille at debian.org
Fri Oct 13 07:17:16 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-xml2.
commit a0e64b12808fafa6bdce04ef8ef2695c27c326d2
Author: Andreas Tille <tille at debian.org>
Date: Fri Oct 13 09:14:08 2017 +0200
New upstream version 1.1.0
---
DESCRIPTION | 38 ++
MD5 | 115 ++++
NAMESPACE | 207 ++++++++
NEWS.md | 116 +++++
R/RcppExports.R | 275 ++++++++++
R/as_list.R | 106 ++++
R/as_xml_document.R | 84 +++
R/classes.R | 273 ++++++++++
R/paths.R | 58 +++
R/utils.R | 68 +++
R/xml_attr.R | 242 +++++++++
R/xml_children.R | 141 +++++
R/xml_find.R | 219 ++++++++
R/xml_modify.R | 332 ++++++++++++
R/xml_name.R | 92 ++++
R/xml_namespaces.R | 103 ++++
R/xml_parse.R | 128 +++++
R/xml_path.R | 29 ++
R/xml_schema.R | 21 +
R/xml_serialize.R | 54 ++
R/xml_structure.R | 92 ++++
R/xml_text.R | 140 +++++
R/xml_type.R | 51 ++
R/xml_url.R | 32 ++
R/xml_write.R | 118 +++++
R/zzz.R | 4 +
README.md | 63 +++
build/vignette.rds | Bin 0 -> 204 bytes
build/xml2.pdf | Bin 0 -> 124812 bytes
cleanup | 2 +
configure | 67 +++
configure.win | 0
debian/README.test | 9 -
debian/changelog | 28 -
debian/compat | 1 -
debian/control | 27 -
debian/copyright | 30 --
debian/docs | 3 -
debian/patches/series | 1 -
debian/patches/use_debian_packages_boost.patch | 15 -
debian/rules | 9 -
debian/source/format | 1 -
debian/tests/control | 3 -
debian/tests/run-unit-test | 12 -
debian/watch | 2 -
inst/doc/modification.R | 96 ++++
inst/doc/modification.Rmd | 197 +++++++
inst/doc/modification.html | 285 ++++++++++
inst/extdata/order-doc.xml | 32 ++
inst/extdata/order-schema.xml | 76 +++
inst/extdata/r-project.html | 102 ++++
inst/include/xml2_types.h | 18 +
man/as_list.Rd | 46 ++
man/as_xml_document.Rd | 31 ++
man/read_xml.Rd | 79 +++
man/url_absolute.Rd | 33 ++
man/url_escape.Rd | 26 +
man/url_parse.Rd | 24 +
man/write_xml.Rd | 50 ++
man/xml_attr.Rd | 102 ++++
man/xml_cdata.Rd | 19 +
man/xml_children.Rd | 81 +++
man/xml_comment.Rd | 20 +
man/xml_dtd.Rd | 34 ++
man/xml_find_all.Rd | 106 ++++
man/xml_missing.Rd | 12 +
man/xml_name.Rd | 43 ++
man/xml_new_document.Rd | 39 ++
man/xml_ns.Rd | 53 ++
man/xml_ns_strip.Rd | 30 ++
man/xml_path.Rd | 22 +
man/xml_replace.Rd | 52 ++
man/xml_serialize.Rd | 34 ++
man/xml_set_namespace.Rd | 22 +
man/xml_structure.Rd | 32 ++
man/xml_text.Rd | 51 ++
man/xml_type.Rd | 19 +
man/xml_url.Rd | 25 +
man/xml_validate.Rd | 25 +
src/Makevars.in | 2 +
src/Makevars.win | 14 +
src/RcppExports.cpp | 696 +++++++++++++++++++++++++
src/connection.cpp | 37 ++
src/xml2_doc.cpp | 226 ++++++++
src/xml2_init.cpp | 38 ++
src/xml2_namespace.cpp | 56 ++
src/xml2_node.cpp | 540 +++++++++++++++++++
src/xml2_output.cpp | 157 ++++++
src/xml2_schema.cpp | 30 ++
src/xml2_url.cpp | 162 ++++++
src/xml2_utils.h | 116 +++++
src/xml2_xpath.cpp | 95 ++++
tests/testthat.R | 4 +
tests/testthat/cd_catalog.xml.bz2 | Bin 0 -> 987 bytes
tests/testthat/helper-version.R | 1 +
tests/testthat/lego.html.bz2 | Bin 0 -> 42649 bytes
tests/testthat/ns-multiple-aliases.xml | 4 +
tests/testthat/ns-multiple-default.xml | 4 +
tests/testthat/ns-multiple-prefix.xml | 4 +
tests/testthat/ns-multiple.xml | 4 +
tests/testthat/output/html_structure.txt | 331 ++++++++++++
tests/testthat/output/print-xml_document.txt | 4 +
tests/testthat/output/print-xml_node.txt | 17 +
tests/testthat/output/print-xml_nodeset.txt | 11 +
tests/testthat/test-as_list.R | 37 ++
tests/testthat/test-as_xml_document.R | 41 ++
tests/testthat/test-cdata.R | 7 +
tests/testthat/test-comment.R | 8 +
tests/testthat/test-dtd.R | 16 +
tests/testthat/test-format.R | 17 +
tests/testthat/test-modify-xml.R | 238 +++++++++
tests/testthat/test-namespaces.R | 44 ++
tests/testthat/test-null.R | 70 +++
tests/testthat/test-print.R | 21 +
tests/testthat/test-read-xml.R | 57 ++
tests/testthat/test-url.R | 76 +++
tests/testthat/test-write_xml.R | 112 ++++
tests/testthat/test-xml_attrs.R | 225 ++++++++
tests/testthat/test-xml_children.R | 52 ++
tests/testthat/test-xml_find.R | 123 +++++
tests/testthat/test-xml_missing.R | 44 ++
tests/testthat/test-xml_name.R | 66 +++
tests/testthat/test-xml_nodeset.R | 75 +++
tests/testthat/test-xml_schema.R | 21 +
tests/testthat/test-xml_serialize.R | 26 +
tests/testthat/test-xml_structure.R | 10 +
tests/testthat/test-xml_text.R | 60 +++
tools/winlibs.R | 8 +
vignettes/modification.Rmd | 197 +++++++
129 files changed, 9390 insertions(+), 141 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..c76d112
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,38 @@
+Package: xml2
+Version: 1.1.0
+Title: Parse XML
+Description: Work with XML files using a simple, consistent interface. Built on
+ top of the 'libxml2' C library.
+Authors at R: c(
+ person("Hadley", "Wickham", , "hadley at rstudio.com", "aut"),
+ person("James", "Hester", , "james.hester at rstudio.com", c("aut", "cre")),
+ person("Jeroen", "Ooms", role = "aut"),
+ person("RStudio", role = "cph"),
+ person("R Foundation", role = "ctb",
+ comment = "Copy of R-project homepage cached as example")
+ )
+URL: https://github.com/hadley/xml2/
+BugReports: https://github.com/hadley/xml2/issues/
+Depends: R (>= 3.1.0)
+Imports: Rcpp
+LinkingTo: Rcpp (>= 0.11.4.6), BH
+Suggests: testthat, curl, covr, knitr, rmarkdown, magrittr, httr
+SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel (rpm)
+License: GPL (>= 2)
+RoxygenNote: 5.0.1.9000
+VignetteBuilder: knitr
+Collate: 'RcppExports.R' 'as_list.R' 'xml_parse.R' 'as_xml_document.R'
+ 'classes.R' 'paths.R' 'utils.R' 'xml_attr.R' 'xml_children.R'
+ 'xml_find.R' 'xml_modify.R' 'xml_name.R' 'xml_namespaces.R'
+ 'xml_path.R' 'xml_schema.R' 'xml_serialize.R' 'xml_structure.R'
+ 'xml_text.R' 'xml_type.R' 'xml_url.R' 'xml_write.R' 'zzz.R'
+NeedsCompilation: yes
+Packaged: 2017-01-06 14:09:44 UTC; jhester
+Author: Hadley Wickham [aut],
+ James Hester [aut, cre],
+ Jeroen Ooms [aut],
+ RStudio [cph],
+ R Foundation [ctb] (Copy of R-project homepage cached as example)
+Maintainer: James Hester <james.hester at rstudio.com>
+Repository: CRAN
+Date/Publication: 2017-01-07 11:35:17
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..2a58abd
--- /dev/null
+++ b/MD5
@@ -0,0 +1,115 @@
+7b22b59c2866c5626458b24a56acb5eb *DESCRIPTION
+4e1f4a66127068f7885b1ce9fefedb07 *NAMESPACE
+6c96527e57c088cf54c238e7e8317b12 *NEWS.md
+dfc441f52ec9da64eae00904f1d74ead *R/RcppExports.R
+efeedfe02bf29b68260c262df64d882d *R/as_list.R
+2e24c60fdf22a2aebfcaab2c91b03452 *R/as_xml_document.R
+b901072b0ecd2f70e60a1327d764cd70 *R/classes.R
+1543aa35645349a951aad3364b1949be *R/paths.R
+7704129c781ce428f6e23a0756851b41 *R/utils.R
+f17a1b9fa937d66d171c1764626b9791 *R/xml_attr.R
+b81b7f6d7317a61ddf2dc2853badefa4 *R/xml_children.R
+36f5724b3b6fdfe9368c9acc7c25de6a *R/xml_find.R
+2fdabe91be1adad1a46c1dfdee6b905b *R/xml_modify.R
+478c4e2684b0a5360884927158dd903c *R/xml_name.R
+649cf1b94f4ad880383e0769d2250178 *R/xml_namespaces.R
+138989ae029bf968ca02e3803c5147dd *R/xml_parse.R
+2706069aca6092fb2b5fc4f8f4379c3a *R/xml_path.R
+e48ec74c5598397df079bd346de4f6d5 *R/xml_schema.R
+7354b331749acfcd2bbbfea0222dd752 *R/xml_serialize.R
+424771b35b57b65fa9059943c3284953 *R/xml_structure.R
+22e72e228f601585d84c871f903ef74c *R/xml_text.R
+87e30f5b7c448d142118127c55cb5770 *R/xml_type.R
+f459598b4712e40ab2dca73beb1a00ef *R/xml_url.R
+f47d60217e13f3b1173fe65e2f8c3a2f *R/xml_write.R
+ba02edba719f666ae5dffeb962b215df *R/zzz.R
+0bda8bcf4ea5b3c5d6b8df5fc8096ccb *README.md
+f4514a9a6c2e08612dba25767717a65a *build/vignette.rds
+79379862415a7bafaae7d68aa19aab39 *build/xml2.pdf
+6071edd604dbeb75308cfbedc7790398 *cleanup
+3c3ad3c3abb2a5ef3752d70a3cb2c59f *configure
+d41d8cd98f00b204e9800998ecf8427e *configure.win
+9bb95c171ea8d0083ec59b77dc9650b5 *inst/doc/modification.R
+ae51ba62548cf18322d5b3ec999de7eb *inst/doc/modification.Rmd
+ee9691f93fdf294dcd2401db6a33b55b *inst/doc/modification.html
+aa42c312677d8bd431c9f6055bacc5a2 *inst/extdata/order-doc.xml
+d815540b17662ddb93faa91d78ba0e5f *inst/extdata/order-schema.xml
+3d75cb054004ba36cf5bb4cac44eb27c *inst/extdata/r-project.html
+776eac29335438227dda8d244515c341 *inst/include/xml2_types.h
+672cae9b3d87a5580f6e2f40b9e54c99 *man/as_list.Rd
+e5c2a742f2de7359c54ff9039d2e119e *man/as_xml_document.Rd
+a4e8d995a624650a05ea9b8c6d435432 *man/read_xml.Rd
+24b2935e381103674077d1d9a6ad4e27 *man/url_absolute.Rd
+6223f4d852c642df2b09fc513f30b27e *man/url_escape.Rd
+c6359ec11955cb2ab7587cb39d7f0ef6 *man/url_parse.Rd
+9cbb5559255c3bf4e5c47d9554268fc9 *man/write_xml.Rd
+472bb4d0d98d78fe1fcfce67b578dff6 *man/xml_attr.Rd
+7bddba06da41606a5ab5502b6c92f5ac *man/xml_cdata.Rd
+c9d944a0cc1fbd6f2dacba8944b720a4 *man/xml_children.Rd
+eec4938beaf42ddbe1764fbe20a83172 *man/xml_comment.Rd
+5db6b02f83879ca57feb9e18574ce9d5 *man/xml_dtd.Rd
+4a717f45ad9db3e0df04aa55ca2d1425 *man/xml_find_all.Rd
+006d39e67f0ca9f1285b1cb03dc86ef9 *man/xml_missing.Rd
+fca5835d845dd407681e77260a6568af *man/xml_name.Rd
+f70fc26b367345422afed85bd7598395 *man/xml_new_document.Rd
+037d2c236cca02742de11e64b23fa799 *man/xml_ns.Rd
+ce6906ae564f92989f5f34954d61baac *man/xml_ns_strip.Rd
+dbf2a353f094ffb6ecd83057cbc0f8d5 *man/xml_path.Rd
+749633a0eb7e0572db780e5865bd2913 *man/xml_replace.Rd
+eee09c0505d35d3a65ba5046b29ca4be *man/xml_serialize.Rd
+a9f5428f85b307c4c9fc57a0c2335e81 *man/xml_set_namespace.Rd
+11b8a920e5234a8919bf10f1898fc573 *man/xml_structure.Rd
+516d4923a3e271bed7d1c4ea9a1aa6e2 *man/xml_text.Rd
+38ba6b30dd14531181f367e7cf8d5733 *man/xml_type.Rd
+a1713061a0ddaa768ea801bf36468087 *man/xml_url.Rd
+f2db32908ff0cdc1fa672f77eb5c5e7b *man/xml_validate.Rd
+efac49ba259e55ca828361e371908689 *src/Makevars.in
+b95a9c820250cfd180989665fb796d11 *src/Makevars.win
+69779d5f2a8ba3c88ab2a42563286fdb *src/RcppExports.cpp
+be2c92c3d2a0fb1ded1b2bc32a737fa4 *src/connection.cpp
+3b2c8ed2d12e6a3a0ae8f1a73c065d3c *src/xml2_doc.cpp
+3c3e387db63660a3d30f5b9f7304294d *src/xml2_init.cpp
+c764718bad3bc105164a88bc881f9061 *src/xml2_namespace.cpp
+970127334c4f8989442bd8ea2788af47 *src/xml2_node.cpp
+03a4d6a31342fe701dd20d3b3beb575d *src/xml2_output.cpp
+f842d5236e178aa9ca5928386ce24fee *src/xml2_schema.cpp
+1c198cf53390d534d28d3f1d3507dec2 *src/xml2_url.cpp
+c0dd2aad034c8822def5607011821eb7 *src/xml2_utils.h
+f0c22499ba8f1b7d00b4c68c48908afa *src/xml2_xpath.cpp
+d3359ccbcfce42a321e96d2044c0d953 *tests/testthat.R
+6c4d2afc7e3283500020ddea8a3a5e44 *tests/testthat/cd_catalog.xml.bz2
+e3fd8bf3406ddda18ae64691e137e45f *tests/testthat/helper-version.R
+88882b509327ad5661e9e8b8ca7805d4 *tests/testthat/lego.html.bz2
+1269f1bd55f2272ef97d7f0900edee94 *tests/testthat/ns-multiple-aliases.xml
+188d2702e42e6f3cb3b4d36779c6f0cc *tests/testthat/ns-multiple-default.xml
+cf756b5e4205cd7b096ef03475ab2571 *tests/testthat/ns-multiple-prefix.xml
+5e2eed6052aeceba49eb114479f6694a *tests/testthat/ns-multiple.xml
+b2e3f18f3d068252f9dbb073b92a437a *tests/testthat/output/html_structure.txt
+4ff7b732ee1d7c31bafa8a9a57babd23 *tests/testthat/output/print-xml_document.txt
+b35d08dad980a718fa0eea965ef03f45 *tests/testthat/output/print-xml_node.txt
+1134e7618c0aa64eadd06fa2d4906b21 *tests/testthat/output/print-xml_nodeset.txt
+cde9be4ab9103a9c2766d2f071832c4f *tests/testthat/test-as_list.R
+d5906d49db2eb83fb7c358f8582aa2b2 *tests/testthat/test-as_xml_document.R
+f11f06505ed7203653dab06a56075c1f *tests/testthat/test-cdata.R
+ba04389bf1231d1edbb7e4beb014f04d *tests/testthat/test-comment.R
+f16f5a677e6e647de277e7b0f4cf9e44 *tests/testthat/test-dtd.R
+64d0872b75fa98871ab36d602ff390a7 *tests/testthat/test-format.R
+bd71f73c90711d0472deb374b30cbda8 *tests/testthat/test-modify-xml.R
+67f5d50ba8bc1097905d4e6c6399992e *tests/testthat/test-namespaces.R
+f9040e9a15fc995b1f187caaff3584ba *tests/testthat/test-null.R
+ff777492a9390829bea82e9a588153a6 *tests/testthat/test-print.R
+6118dee651b9cfea3e20301dbf5c15e5 *tests/testthat/test-read-xml.R
+d560dd14cf92d732d6578c48af9d08e0 *tests/testthat/test-url.R
+77c654f21a95b5359f28d7db10760021 *tests/testthat/test-write_xml.R
+fa4acefcc47e73f77f7ceeb55afa754d *tests/testthat/test-xml_attrs.R
+31d7265f51cca8421bf424e5a8a5aaac *tests/testthat/test-xml_children.R
+cb3b65d3346cd83b56efecfdf28161a8 *tests/testthat/test-xml_find.R
+304de1b3d4405f8fa282166bc2cb540c *tests/testthat/test-xml_missing.R
+c161bbcd94441a4a86ed74a693d05e28 *tests/testthat/test-xml_name.R
+34489e0c5773c8892b2fdb6ddac27f4d *tests/testthat/test-xml_nodeset.R
+1d96bc26d0d47fd2f9c0a88279957f37 *tests/testthat/test-xml_schema.R
+f4bb4584a48ef582d2e0ab24064a0ab9 *tests/testthat/test-xml_serialize.R
+265405f9caea92a70a6f94e2d3078359 *tests/testthat/test-xml_structure.R
+116ac76436068562dbf9754f5a7e4263 *tests/testthat/test-xml_text.R
+50ce4b1afd65325497a8b4dcce763e94 *tools/winlibs.R
+ae51ba62548cf18322d5b3ec999de7eb *vignettes/modification.Rmd
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..5ff0ab8
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,207 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method("[",xml_missing)
+S3method("[",xml_nodeset)
+S3method("[[",xml_missing)
+S3method("xml_attr<-",xml_missing)
+S3method("xml_attr<-",xml_node)
+S3method("xml_attr<-",xml_nodeset)
+S3method("xml_attrs<-",xml_missing)
+S3method("xml_attrs<-",xml_node)
+S3method("xml_attrs<-",xml_nodeset)
+S3method("xml_name<-",xml_missing)
+S3method("xml_name<-",xml_node)
+S3method("xml_name<-",xml_nodeset)
+S3method("xml_text<-",xml_missing)
+S3method("xml_text<-",xml_node)
+S3method("xml_text<-",xml_nodeset)
+S3method(as.character,xml_document)
+S3method(as.character,xml_missing)
+S3method(as.character,xml_node)
+S3method(as.character,xml_nodeset)
+S3method(as_list,xml_missing)
+S3method(as_list,xml_node)
+S3method(as_list,xml_nodeset)
+S3method(as_xml_document,character)
+S3method(as_xml_document,connection)
+S3method(as_xml_document,list)
+S3method(as_xml_document,raw)
+S3method(as_xml_document,response)
+S3method(as_xml_document,xml_document)
+S3method(as_xml_document,xml_node)
+S3method(as_xml_document,xml_nodeset)
+S3method(format,xml_node)
+S3method(is.na,xml_missing)
+S3method(is.na,xml_node)
+S3method(is.na,xml_nodeset)
+S3method(nodeset_apply,xml_document)
+S3method(nodeset_apply,xml_missing)
+S3method(nodeset_apply,xml_node)
+S3method(nodeset_apply,xml_nodeset)
+S3method(print,xml_document)
+S3method(print,xml_missing)
+S3method(print,xml_namespace)
+S3method(print,xml_node)
+S3method(print,xml_nodeset)
+S3method(read_html,default)
+S3method(read_html,response)
+S3method(read_xml,character)
+S3method(read_xml,connection)
+S3method(read_xml,raw)
+S3method(read_xml,response)
+S3method(tree_structure,xml_missing)
+S3method(tree_structure,xml_node)
+S3method(tree_structure,xml_nodeset)
+S3method(write_html,xml_document)
+S3method(write_html,xml_missing)
+S3method(write_html,xml_node)
+S3method(write_html,xml_nodeset)
+S3method(write_xml,xml_document)
+S3method(write_xml,xml_missing)
+S3method(write_xml,xml_node)
+S3method(write_xml,xml_nodeset)
+S3method(xml_add_child,xml_document)
+S3method(xml_add_child,xml_missing)
+S3method(xml_add_child,xml_node)
+S3method(xml_add_child,xml_nodeset)
+S3method(xml_add_parent,xml_missing)
+S3method(xml_add_parent,xml_node)
+S3method(xml_add_parent,xml_nodeset)
+S3method(xml_add_sibling,xml_missing)
+S3method(xml_add_sibling,xml_node)
+S3method(xml_add_sibling,xml_nodeset)
+S3method(xml_attr,xml_missing)
+S3method(xml_attr,xml_node)
+S3method(xml_attr,xml_nodeset)
+S3method(xml_attrs,xml_missing)
+S3method(xml_attrs,xml_node)
+S3method(xml_attrs,xml_nodeset)
+S3method(xml_double,xml_missing)
+S3method(xml_double,xml_node)
+S3method(xml_double,xml_nodeset)
+S3method(xml_find_all,xml_missing)
+S3method(xml_find_all,xml_node)
+S3method(xml_find_all,xml_nodeset)
+S3method(xml_find_chr,xml_missing)
+S3method(xml_find_chr,xml_node)
+S3method(xml_find_chr,xml_nodeset)
+S3method(xml_find_first,xml_node)
+S3method(xml_find_first,xml_nodeset)
+S3method(xml_find_lgl,xml_missing)
+S3method(xml_find_lgl,xml_node)
+S3method(xml_find_lgl,xml_nodeset)
+S3method(xml_find_num,xml_missing)
+S3method(xml_find_num,xml_node)
+S3method(xml_find_num,xml_nodeset)
+S3method(xml_integer,xml_missing)
+S3method(xml_integer,xml_node)
+S3method(xml_integer,xml_nodeset)
+S3method(xml_length,xml_missing)
+S3method(xml_length,xml_node)
+S3method(xml_length,xml_nodeset)
+S3method(xml_name,xml_missing)
+S3method(xml_name,xml_node)
+S3method(xml_name,xml_nodeset)
+S3method(xml_ns,xml_document)
+S3method(xml_ns,xml_missing)
+S3method(xml_ns,xml_node)
+S3method(xml_ns,xml_nodeset)
+S3method(xml_parent,xml_missing)
+S3method(xml_parent,xml_node)
+S3method(xml_parent,xml_nodeset)
+S3method(xml_path,xml_missing)
+S3method(xml_path,xml_node)
+S3method(xml_path,xml_nodeset)
+S3method(xml_remove,xml_missing)
+S3method(xml_remove,xml_node)
+S3method(xml_remove,xml_nodeset)
+S3method(xml_replace,xml_missing)
+S3method(xml_replace,xml_node)
+S3method(xml_replace,xml_nodeset)
+S3method(xml_serialize,xml_document)
+S3method(xml_serialize,xml_node)
+S3method(xml_serialize,xml_nodeset)
+S3method(xml_set_attr,xml_missing)
+S3method(xml_set_attr,xml_node)
+S3method(xml_set_attr,xml_nodeset)
+S3method(xml_set_attrs,xml_missing)
+S3method(xml_set_attrs,xml_node)
+S3method(xml_set_attrs,xml_nodeset)
+S3method(xml_set_name,xml_missing)
+S3method(xml_set_name,xml_node)
+S3method(xml_set_name,xml_nodeset)
+S3method(xml_text,xml_missing)
+S3method(xml_text,xml_node)
+S3method(xml_text,xml_nodeset)
+S3method(xml_type,xml_missing)
+S3method(xml_type,xml_node)
+S3method(xml_type,xml_nodeset)
+S3method(xml_url,xml_missing)
+S3method(xml_url,xml_node)
+S3method(xml_url,xml_nodeset)
+S3method(xml_validate,xml_document)
+export("xml_attr<-")
+export("xml_attrs<-")
+export("xml_name<-")
+export("xml_text<-")
+export(as_list)
+export(as_xml_document)
+export(html_structure)
+export(read_html)
+export(read_xml)
+export(url_absolute)
+export(url_escape)
+export(url_parse)
+export(url_relative)
+export(url_unescape)
+export(write_html)
+export(write_xml)
+export(xml_add_child)
+export(xml_add_parent)
+export(xml_add_sibling)
+export(xml_attr)
+export(xml_attrs)
+export(xml_cdata)
+export(xml_child)
+export(xml_children)
+export(xml_comment)
+export(xml_contents)
+export(xml_double)
+export(xml_dtd)
+export(xml_find_all)
+export(xml_find_chr)
+export(xml_find_first)
+export(xml_find_lgl)
+export(xml_find_num)
+export(xml_find_one)
+export(xml_has_attr)
+export(xml_integer)
+export(xml_length)
+export(xml_missing)
+export(xml_name)
+export(xml_new_document)
+export(xml_new_root)
+export(xml_ns)
+export(xml_ns_rename)
+export(xml_ns_strip)
+export(xml_parent)
+export(xml_parents)
+export(xml_path)
+export(xml_remove)
+export(xml_replace)
+export(xml_root)
+export(xml_serialize)
+export(xml_set_attr)
+export(xml_set_attrs)
+export(xml_set_name)
+export(xml_set_namespace)
+export(xml_set_text)
+export(xml_siblings)
+export(xml_structure)
+export(xml_text)
+export(xml_type)
+export(xml_unserialize)
+export(xml_url)
+export(xml_validate)
+importFrom(Rcpp,sourceCpp)
+useDynLib(xml2)
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..f48f68f
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,116 @@
+# xml2 1.1.0
+
+## New Features
+* `write_xml()` and `write_html()` now accept connections as well as filenames
+ for output. (#157)
+
+* `xml_add_child()` now takes a `.where` argument specifying where to add the
+ new children. (#138)
+
+* `as_xml()` generic function to convert R objects to xml. The most important
+ method is for lists and enables full roundtrip support for going to and back
+ from xml for lists and enables full roundtrip support to and from XML. (#137, #143)
+
+* `xml_new_root()` can be used to create a new document and a root node in one step (#131).
+
+* `xml_add_parent()` inserts a new node between the node and its parent (#129)
+
+* Add `xml_validate()` to validate a document against an xml schema (#31, @jeroenooms).
+
+* Export `xml2_types.h` to allow for extension packages such as xslt.
+
+* `xml_comment()` allows you to add comment nodes to a document. (#111)
+
+* `xml_cdata()` allows you to add CDATA nodes to a document. (#128)
+
+* Add `xml_set_text()` and `xml_set_name()` equivalent to `xml_text<-` and `xml_name<-`. (#130).
+
+* Add `xml_set_attr()` and `xml_set_attrs()` equivalent to `xml_attr<-` and `xml_attrs<-`. (#109, #130)
+
+* Add `write_html()` method (#133).
+
+## Bugfixes
+
+* `xml_new_document()` now explicitly sets the encoding (default UTF-8) (#142)
+
+* Document formatting options for `write_xml()` (#132)
+
+* Add missing methods for xml_missing objects. (#134)
+
+* Bugfix for xml_length.xml_nodeset that caused it to fail unconditionally. (#140)
+
+* `is.na()` now returns `TRUE` for `xml_missing` objects. (#139)
+
+* Trim non-breaking spaces in `xml_text(trim = TRUE)` (#151).
+
+* Allow setting non-character attributes (values are coerced to characters). (@sjp, #117, #122).
+
+* Fixed return value in call to vapply in xml_integer.xml_nodeset. (@ddiez, #146, #147).
+
+* Allow docs missing a root element to be created and printed. (@sjp, #126, #121).
+
+* xml_add_* methods now return invisibly. (@sjp, #124)
+
+* `as_list()` now preserves element names when attributes exist, and escapes
+ XML attributes that conflict with special R attributes (@peterfoley, #115).
+
+# xml2 1.0.0
+
+* All C++ functions now use `checked_get()` instead of `get()` where possible,
+ so NULL XPtrs properly throw an error rather than crashing. (@jimhester,
+ #101, #104).
+
+* `xml_integer()` and `xml_double()` functions to make it easy to extract
+ integer and double text from nodes (@jimhester, #97, #99).
+
+* xml2 now supports modification and creation of XML nodes. New functions
+ `xml_new_document()`, `xml_new_child()`, `xml_new_sibling()`,
+ `xml_set_namespace()`, , `xml_remove()`, `xml_replace()`, `xml_root()`
+ and replacement methods for `xml_name()`, `xml_attr()`, `xml_attrs()` and
+ `xml_text()` (@jimhester, #9 #76)
+
+* `xml_ns()` now keeps namespace prefixes that point to the same URI
+ (@jimhester, #35, #95).
+
+* `read_xml()` and `read_html()` methods added for `httr::response()` objects.
+ (@jimhester, #63, #93)
+
+* `xml_child()` function to make selecting children a little easier
+ (@jimhester, #23, #94)
+
+* `xml_find_one()` has been deprecated in favor of `xml_find_first()`
+ (@jimhester, #58, #92)
+
+* `xml_read()` functions now default to passing the document's namespace
+ object. Namespace definitions can now be removed as well as added and
+ `xml_ns_strip()` added to remove all default namespaces from a document.
+ (@jimhester, #28, #89)
+
+* `xml_read()` gains a `options` argument to control all available parsing
+ options, including `HUGE` to turn off limits for parsing very large
+ documents and now drops blank text nodes by default, mimicking default
+ behavior of XML package. (@jimhester, #49, #62, #85, #88)
+
+* `xml_write()` expands the path on filenames, so directories can be specified
+ with '~/' (@jimhester, #86, #80)
+
+* `xml_find_one()` now returns a 'xml_missing' node object if there are 0
+ matches (@jimhester, #55, #53, hadley/rvest#82).
+
+* `xml_find_num()`, `xml_find_chr()`, `xml_find_lgl()` functions added to
+ return numeric, character and logical results from XPath expressions. (@jimhester, #55)
+
+* `xml_name()` and `xml_text()` always correctly encode returned value as
+ UTF-8 (#54).
+
+# xml2 0.1.2
+
+* Improved configure script - now works again on R-devel on windows.
+
+* Compiles with older versions of libxml2.,
+
+# xml2 0.1.1
+
+* Make configure script more cross platform.
+
+* Add `xml_length()` to count the number of children (#32).
diff --git a/R/RcppExports.R b/R/RcppExports.R
new file mode 100644
index 0000000..750251e
--- /dev/null
+++ b/R/RcppExports.R
@@ -0,0 +1,275 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+read_connection_ <- function(con, chunk_size = 64 * 1024L) {
+ .Call('xml2_read_connection_', PACKAGE = 'xml2', con, chunk_size)
+}
+
+xml_parse_options <- function() {
+ .Call('xml2_xml_parse_options', PACKAGE = 'xml2')
+}
+
+doc_parse_file <- function(path, encoding = "", as_html = FALSE, options = 0L) {
+ .Call('xml2_doc_parse_file', PACKAGE = 'xml2', path, encoding, as_html, options)
+}
+
+doc_parse_raw <- function(x, encoding, base_url = "", as_html = FALSE, options = 0L) {
+ .Call('xml2_doc_parse_raw', PACKAGE = 'xml2', x, encoding, base_url, as_html, options)
+}
+
+doc_root <- function(x) {
+ .Call('xml2_doc_root', PACKAGE = 'xml2', x)
+}
+
+doc_has_root <- function(x) {
+ .Call('xml2_doc_has_root', PACKAGE = 'xml2', x)
+}
+
+doc_url <- function(x) {
+ .Call('xml2_doc_url', PACKAGE = 'xml2', x)
+}
+
+doc_new <- function(version, encoding = "UTF-8") {
+ .Call('xml2_doc_new', PACKAGE = 'xml2', version, encoding)
+}
+
+doc_set_root <- function(doc, root) {
+ .Call('xml2_doc_set_root', PACKAGE = 'xml2', doc, root)
+}
+
+libxml2_version <- function() {
+ .Call('xml2_libxml2_version', PACKAGE = 'xml2')
+}
+
+unique_ns <- function(ns) {
+ .Call('xml2_unique_ns', PACKAGE = 'xml2', ns)
+}
+
+doc_namespaces <- function(doc) {
+ .Call('xml2_doc_namespaces', PACKAGE = 'xml2', doc)
+}
+
+ns_lookup_uri <- function(doc, node, uri) {
+ .Call('xml2_ns_lookup_uri', PACKAGE = 'xml2', doc, node, uri)
+}
+
+ns_lookup <- function(doc, node, prefix) {
+ .Call('xml2_ns_lookup', PACKAGE = 'xml2', doc, node, prefix)
+}
+
+node_name <- function(node, nsMap) {
+ .Call('xml2_node_name', PACKAGE = 'xml2', node, nsMap)
+}
+
+node_set_name <- function(node, value) {
+ invisible(.Call('xml2_node_set_name', PACKAGE = 'xml2', node, value))
+}
+
+node_text <- function(node) {
+ .Call('xml2_node_text', PACKAGE = 'xml2', node)
+}
+
+node_attr <- function(node, name, missing, nsMap) {
+ .Call('xml2_node_attr', PACKAGE = 'xml2', node, name, missing, nsMap)
+}
+
+node_attrs <- function(node_, nsMap) {
+ .Call('xml2_node_attrs', PACKAGE = 'xml2', node_, nsMap)
+}
+
+node_set_attr <- function(node_, name, value, nsMap) {
+ invisible(.Call('xml2_node_set_attr', PACKAGE = 'xml2', node_, name, value, nsMap))
+}
+
+node_children <- function(node, onlyNode = TRUE) {
+ .Call('xml2_node_children', PACKAGE = 'xml2', node, onlyNode)
+}
+
+node_length <- function(node, onlyNode = TRUE) {
+ .Call('xml2_node_length', PACKAGE = 'xml2', node, onlyNode)
+}
+
+node_parents <- function(node) {
+ .Call('xml2_node_parents', PACKAGE = 'xml2', node)
+}
+
+node_siblings <- function(node, onlyNode = TRUE) {
+ .Call('xml2_node_siblings', PACKAGE = 'xml2', node, onlyNode)
+}
+
+node_parent <- function(n) {
+ .Call('xml2_node_parent', PACKAGE = 'xml2', n)
+}
+
+node_path <- function(n) {
+ .Call('xml2_node_path', PACKAGE = 'xml2', n)
+}
+
+nodes_duplicated <- function(nodes) {
+ .Call('xml2_nodes_duplicated', PACKAGE = 'xml2', nodes)
+}
+
+node_type <- function(node) {
+ .Call('xml2_node_type', PACKAGE = 'xml2', node)
+}
+
+node_copy <- function(node) {
+ .Call('xml2_node_copy', PACKAGE = 'xml2', node)
+}
+
+node_set_content <- function(node, content) {
+ invisible(.Call('xml2_node_set_content', PACKAGE = 'xml2', node, content))
+}
+
+node_append_content <- function(node, content) {
+ invisible(.Call('xml2_node_append_content', PACKAGE = 'xml2', node, content))
+}
+
+node_append_child <- function(parent, cur) {
+ .Call('xml2_node_append_child', PACKAGE = 'xml2', parent, cur)
+}
+
+node_prepend_sibling <- function(cur, elem) {
+ .Call('xml2_node_prepend_sibling', PACKAGE = 'xml2', cur, elem)
+}
+
+node_append_sibling <- function(cur, elem) {
+ .Call('xml2_node_append_sibling', PACKAGE = 'xml2', cur, elem)
+}
+
+node_replace <- function(old, cur) {
+ .Call('xml2_node_replace', PACKAGE = 'xml2', old, cur)
+}
+
+node_remove <- function(cur, free) {
+ invisible(.Call('xml2_node_remove', PACKAGE = 'xml2', cur, free))
+}
+
+node_new <- function(name) {
+ .Call('xml2_node_new', PACKAGE = 'xml2', name)
+}
+
+node_cdata_new <- function(doc, content) {
+ .Call('xml2_node_cdata_new', PACKAGE = 'xml2', doc, content)
+}
+
+node_comment_new <- function(content) {
+ .Call('xml2_node_comment_new', PACKAGE = 'xml2', content)
+}
+
+node_new_ns <- function(name, ns) {
+ .Call('xml2_node_new_ns', PACKAGE = 'xml2', name, ns)
+}
+
+node_null <- function() {
+ .Call('xml2_node_null', PACKAGE = 'xml2')
+}
+
+node_set_namespace_uri <- function(doc, node, uri) {
+ invisible(.Call('xml2_node_set_namespace_uri', PACKAGE = 'xml2', doc, node, uri))
+}
+
+node_set_namespace_prefix <- function(doc, node, prefix) {
+ invisible(.Call('xml2_node_set_namespace_prefix', PACKAGE = 'xml2', doc, node, prefix))
+}
+
+node_new_dtd <- function(doc, name = "", eid = "", sid = "") {
+ invisible(.Call('xml2_node_new_dtd', PACKAGE = 'xml2', doc, name, eid, sid))
+}
+
+xml_save_options <- function() {
+ .Call('xml2_xml_save_options', PACKAGE = 'xml2')
+}
+
+doc_write_file <- function(x, path, encoding = "UTF-8", options = 1L) {
+ invisible(.Call('xml2_doc_write_file', PACKAGE = 'xml2', x, path, encoding, options))
+}
+
+doc_write_connection <- function(x, connection, encoding = "UTF-8", options = 1L) {
+ invisible(.Call('xml2_doc_write_connection', PACKAGE = 'xml2', x, connection, encoding, options))
+}
+
+doc_write_character <- function(x, encoding = "UTF-8", options = 1L) {
+ .Call('xml2_doc_write_character', PACKAGE = 'xml2', x, encoding, options)
+}
+
+node_write_file <- function(x, path, encoding = "UTF-8", options = 1L) {
+ invisible(.Call('xml2_node_write_file', PACKAGE = 'xml2', x, path, encoding, options))
+}
+
+node_write_connection <- function(x, connection, encoding = "UTF-8", options = 1L) {
+ invisible(.Call('xml2_node_write_connection', PACKAGE = 'xml2', x, connection, encoding, options))
+}
+
+node_write_character <- function(x, encoding = "UTF-8", options = 1L) {
+ .Call('xml2_node_write_character', PACKAGE = 'xml2', x, encoding, options)
+}
+
+doc_validate <- function(doc, schema) {
+ .Call('xml2_doc_validate', PACKAGE = 'xml2', doc, schema)
+}
+
+#' Convert between relative and absolute urls.
+#'
+#' @param x A character vector of urls relative to that base
+#' @param base A string giving a base url.
+#' @return A character vector of urls
+#' @seealso \code{\link{xml_url}} to retrieve the URL associated with a document
+#' @export
+#' @examples
+#' url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d")
+#'
+#' url_relative("http://hadley.nz/a/c", "http://hadley.nz")
+#' url_relative("http://hadley.nz/a/c", "http://hadley.nz/")
+#' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b")
+#' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/")
+url_absolute <- function(x, base) {
+ .Call('xml2_url_absolute', PACKAGE = 'xml2', x, base)
+}
+
+#' @export
+#' @rdname url_absolute
+url_relative <- function(x, base) {
+ .Call('xml2_url_relative', PACKAGE = 'xml2', x, base)
+}
+
+#' Parse a url into its component pieces.
+#'
+#' @param x A character vector of urls.
+#' @return A dataframe with one row for each element of \code{x} and
+#' columns: scheme, server, port, user, path, query, fragment.
+#' @export
+#' @examples
+#' url_parse("http://had.co.nz/")
+#' url_parse("http://had.co.nz:1234/")
+#' url_parse("http://had.co.nz:1234/?a=1&b=2")
+#' url_parse("http://had.co.nz:1234/?a=1&b=2#def")
+url_parse <- function(x) {
+ .Call('xml2_url_parse', PACKAGE = 'xml2', x)
+}
+
+#' Escape and unescape urls.
+#'
+#' @param x A character vector of urls.
+#' @param reserved A string containing additional characters to avoid escaping.
+#' @export
+#' @examples
+#' url_escape("a b c")
+#' url_escape("a b c", "")
+#'
+#' url_unescape("a%20b%2fc")
+#' url_unescape("%C2%B5")
+url_escape <- function(x, reserved = "") {
+ .Call('xml2_url_escape', PACKAGE = 'xml2', x, reserved)
+}
+
+#' @export
+#' @rdname url_escape
+url_unescape <- function(x) {
+ .Call('xml2_url_unescape', PACKAGE = 'xml2', x)
+}
+
+xpath_search <- function(node, doc, xpath, nsMap, num_results) {
+ .Call('xml2_xpath_search', PACKAGE = 'xml2', node, doc, xpath, nsMap, num_results)
+}
+
diff --git a/R/as_list.R b/R/as_list.R
new file mode 100644
index 0000000..32a102e
--- /dev/null
+++ b/R/as_list.R
@@ -0,0 +1,106 @@
+#' Coerce xml nodes to a list.
+#'
+#' This turns an XML document (or node or nodeset) into the equivalent R
+#' list. Note that this is \code{as_list()}, not \code{as.list()}:
+#' \code{lapply()} automatically calls \code{as.list()} on its inputs, so
+#' we can't override the default.
+#'
+#' \code{as_list} currently only handles the four most common types of
+#' children that an element might have:
+#'
+#' \itemize{
+#' \item Other elements, converted to lists.
+#' \item Attributes, stored as R attributes. Attributes that have special meanings in R
+#' (\code{\link{class}}, \code{\link{comment}}, \code{\link{dim}},
+#' \code{\link{dimnames}}, \code{\link{names}}, \code{\link{row.names}} and
+#' \code{\link{tsp}}) are escaped with '.'
+#' \item Text, stored as a character vector.
+#' }
+#'
+#' @inheritParams xml_name
+#' @param ... Needed for compatibility with generic. Unused.
+#' @export
+#' @examples
+#' as_list(read_xml("<foo> a <b /><c><![CDATA[<d></d>]]></c></foo>"))
+#' as_list(read_xml("<foo> <bar><baz /></bar> </foo>"))
+#' as_list(read_xml("<foo id = 'a'></foo>"))
+#' as_list(read_xml("<foo><bar id='a'/><bar id='b'/></foo>"))
+as_list <- function(x, ns = character(), ...) {
+ UseMethod("as_list")
+}
+
+#' @export
+as_list.xml_missing <- function(x, ns = character(), ...) {
+ list()
+}
+
+# @export
+as_list.xml_document <- function(x, ns = character(), ...) {
+ if (!inherits(x, "xml_node")) {
+ return(list())
+ }
+
+ out <- list(NextMethod())
+ names(out) <- xml_name(x)
+ out
+}
+
+#' @export
+as_list.xml_node <- function(x, ns = character(), ...) {
+ contents <- xml_contents(x)
+ if (length(contents) == 0) {
+ # Base case - contents
+ type <- xml_type(x)
+
+ if (type %in% c("text", "cdata"))
+ return(xml_text(x))
+ if (type != "element" && type != "document")
+ return(paste("[", type, "]"))
+
+ out <- list()
+ } else {
+ out <- lapply(seq_along(contents), function(i) as_list(contents[[i]], ns = ns))
+
+ nms <- ifelse(xml_type(contents) == "element", xml_name(contents, ns = ns), "")
+ if (any(nms != "")) {
+ names(out) <- nms
+ }
+ }
+
+ # Add xml attributes as R attributes
+ attributes(out) <- c(list(names = names(out)), xml_to_r_attrs(xml_attrs(x, ns = ns)))
+
+ out
+}
+
+#' @export
+as_list.xml_nodeset <- function(x, ns = character(), ...) {
+ lapply(seq_along(x), function(i) as_list(x[[i]], ns = ns))
+}
+
+special_attributes <- c("class", "comment", "dim", "dimnames", "names", "row.names", "tsp")
+
+xml_to_r_attrs <- function(x) {
+ if (length(x) == 0) {
+ return(NULL)
+ }
+ # escape special names
+ special <- names(x) %in% special_attributes
+ names(x)[special] <- paste0(".", names(x)[special])
+ as.list(x)
+}
+
+r_attrs_to_xml <- function(x) {
+ if (length(x) == 0) {
+ return(NULL)
+ }
+
+ # Drop R special attributes
+ x <- x[!names(x) %in% special_attributes]
+
+ # Rename any xml attributes needed
+ special <- names(x) %in% paste0(".", special_attributes)
+
+ names(x)[special] <- sub("^\\.", "", names(x)[special])
+ x
+}
diff --git a/R/as_xml_document.R b/R/as_xml_document.R
new file mode 100644
index 0000000..abf31a5
--- /dev/null
+++ b/R/as_xml_document.R
@@ -0,0 +1,84 @@
+#' Coerce a R list to xml nodes.
+#'
+#' This turns an R list into the equivalent XML document. Not all R lists will
+#' produce valid XML, in particular there can only be one root node and all
+#' child nodes need to be named (or empty) lists. R attributes become XML
+#' attributes and R names become XML node names.
+#'
+#' @inheritParams as_list
+#' @include as_list.R xml_parse.R
+#' @export
+#' @examples
+# empty lists generate empty nodes
+#'as_xml_document(list(x = list()))
+#'
+#'# Nesting multiple nodes
+#'as_xml_document(list(foo = list(bar = list(baz = list()))))
+#'
+#'# attributes are stored as R attributes
+#'as_xml_document(list(foo = structure(list(), id = "a")))
+#'as_xml_document(list(foo = list(
+#' bar = structure(list(), id = "a"),
+#' bar = structure(list(), id = "b"))))
+as_xml_document <- function(x, ...) {
+ UseMethod("as_xml_document")
+}
+
+#' @export
+as_xml_document.character <- read_xml.character
+
+#' @export
+as_xml_document.raw <- read_xml.raw
+
+#' @export
+as_xml_document.connection <- read_xml.connection
+
+#' @export
+as_xml_document.response <- read_xml.response
+
+#' @export
+as_xml_document.list <- function(x, ...) {
+ if (length(x) > 1) {
+ stop("Root nodes must be of length 1", call. = FALSE)
+ }
+
+
+ add_node <- function(x, parent, tag = NULL) {
+ if (is.atomic(x)) {
+ return(xml_set_text(parent, as.character(x)))
+ }
+ if (!is.null(tag)) {
+ parent <- xml_add_child(parent, tag)
+ attr <- r_attrs_to_xml(attributes(x))
+ for (i in seq_along(attr)) {
+ xml_set_attr(parent, names(attr)[[i]], attr[[i]])
+ }
+ }
+ for (i in seq_along(x)) {
+ add_node(x[[i]], parent, names(x)[[i]])
+ }
+ }
+
+ doc <- xml_new_document()
+ add_node(x, doc)
+ xml_root(doc)
+}
+
+#' @export
+as_xml_document.xml_node <- function(x, ...) {
+ xml_new_root(.value = x, ..., .copy = TRUE)
+}
+
+#' @export
+as_xml_document.xml_nodeset <- function(x, root, ...) {
+ doc <- xml_new_root(.value = root, ..., .copy = TRUE)
+ for (i in seq_along(x)) {
+ xml_add_child(doc, x[[i]], .copy = TRUE)
+ }
+ doc
+}
+
+#' @export
+as_xml_document.xml_document <- function(x, ...) {
+ x
+}
diff --git a/R/classes.R b/R/classes.R
new file mode 100644
index 0000000..0f3e9a8
--- /dev/null
+++ b/R/classes.R
@@ -0,0 +1,273 @@
+#' @useDynLib xml2
+#' @importFrom Rcpp sourceCpp
+NULL
+
+# node -------------------------------------------------------------------------
+
+xml_node <- function(node = NULL, doc = NULL) {
+ if (inherits(node, "xml_node")) {
+ node
+ } else {
+ structure(list(node = node, doc = doc), class = "xml_node")
+ }
+}
+
+#' @export
+as.character.xml_node <- function(x, ..., options = "format", encoding = "UTF-8") {
+ options <- parse_options(options, xml_save_options())
+ node_write_character(x$node, options = options, encoding = encoding)
+}
+
+#' @export
+print.xml_node <- function(x, width = getOption("width"), max_n = 20, ...) {
+ cat("{xml_node}\n")
+ cat(format(x), "\n", sep = "")
+ show_nodes(xml_children(x), width = width, max_n = max_n)
+}
+
+#' @export
+print.xml_missing <- function(x, width = getOption("width"), max_n = 20, ...) {
+ cat("{xml_missing}\n")
+ cat(format(x), "\n", sep = "")
+}
+
+# document ---------------------------------------------------------------------
+
+xml_document <- function(doc) {
+ if (doc_has_root(doc)) {
+ x <- xml_node(doc_root(doc), doc)
+ class(x) <- c("xml_document", class(x))
+ x
+ } else {
+ structure(list(doc = doc), class = "xml_document")
+ }
+}
+
+#' @export
+print.xml_document <- function(x, width = getOption("width"), max_n = 20, ...) {
+ doc <- xml_document(x$doc)
+ cat("{xml_document}\n")
+ if (inherits(doc, "xml_node")) {
+ cat(format(doc), "\n", sep = "")
+ show_nodes(xml_children(doc), width = width, max_n = max_n)
+ }
+}
+
+#' @export
+as.character.xml_document <- function(x, ..., options = "format", encoding = "UTF-8") {
+ options <- parse_options(options, xml_save_options())
+ doc_write_character(x$doc, options = options, encoding = encoding)
+}
+
+# nodeset ----------------------------------------------------------------------
+
+xml_nodeset <- function(nodes = list()) {
+ nodes <- nodes[!nodes_duplicated(nodes)]
+ structure(nodes, class = "xml_nodeset")
+}
+
+#' @param nodes A list (possible nested) of external pointers to nodes
+#' @return a nodeset
+#' @noRd
+make_nodeset <- function(nodes, doc) {
+ nodes <- unlist(nodes, recursive = FALSE)
+
+ xml_nodeset(lapply(nodes, xml_node, doc = doc))
+}
+
+#' @export
+print.xml_nodeset <- function(x, width = getOption("width"), max_n = 20, ...) {
+ n <- length(x)
+ cat("{xml_nodeset (", n, ")}\n", sep = "")
+
+ if (n > 0)
+ show_nodes(x, width = width, max_n = max_n)
+}
+
+#' @export
+as.character.xml_nodeset <- function(x, ...) {
+ vapply(x, as.character, FUN.VALUE = character(1))
+}
+
+#' @export
+`[.xml_nodeset` <- function(x, i, ...) {
+ if (length(x) == 0) {
+ return(x)
+ }
+ xml_nodeset(NextMethod())
+}
+
+show_nodes <- function(x, width = getOption("width"), max_n = 20) {
+ stopifnot(inherits(x, "xml_nodeset"))
+
+ n <- length(x)
+ if (n == 0)
+ return()
+
+ if (n > max_n) {
+ n <- max_n
+ x <- x[seq_len(n)]
+ trunc <- TRUE
+ } else {
+ trunc <- FALSE
+ }
+
+ label <- format(paste0("[", seq_len(n), "]"), justify = "right")
+ contents <- encodeString(vapply(x, as.character, FUN.VALUE = character(1)))
+
+ desc <- paste0(label, " ", contents)
+ needs_trunc <- nchar(desc) > width
+ desc[needs_trunc] <- paste(substr(desc[needs_trunc], 1, width - 3), "...")
+
+ cat(desc, sep = "\n")
+ if (trunc) {
+ cat("...\n")
+ }
+ invisible()
+}
+
+
+nodeset_apply <- function(x, fun, ...) UseMethod("nodeset_apply")
+
+#' @export
+nodeset_apply.xml_missing <- function(x, fun, ...) {
+ xml_nodeset()
+}
+
+#' @export
+nodeset_apply.xml_nodeset <- function(x, fun, ...) {
+ if (length(x) == 0)
+ return(xml_nodeset())
+
+ is_missing <- is.na(x)
+ res <- list(length(x))
+
+ res[is_missing] <- list(xml_missing())
+ if (any(!is_missing)) {
+ res[!is_missing] <- lapply(x[!is_missing], function(x) fun(x$node, ...))
+ }
+
+ make_nodeset(res, x[[1]]$doc)
+}
+
+#' @export
+nodeset_apply.xml_node <- function(x, fun, ...) {
+ nodes <- fun(x$node, ...)
+ xml_nodeset(lapply(nodes, xml_node, doc = x$doc))
+}
+
+#' @export
+nodeset_apply.xml_document <- function(x, fun, ...) {
+ if (inherits(x, "xml_node")) {
+ NextMethod()
+ } else {
+ xml_nodeset()
+ }
+}
+
+#' @export
+format.xml_node <- function(x, ...) {
+ attrs <- xml_attrs(x)
+ paste("<",
+ paste(
+ c(xml_name(x),
+ format_attributes(attrs)),
+ collapse = " "),
+ ">", sep = "")
+}
+
+format_attributes <- function(x) {
+ if (length(x) == 0) {
+ character(0)
+ } else {
+ paste(names(x), quote_str(x), sep = "=")
+ }
+}
+
+#' Construct an missing xml object
+#' @export
+#' @keywords internal
+xml_missing <- function() {
+ structure(list(), class = "xml_missing")
+}
+
+#' @export
+is.na.xml_missing <- function(x) {
+ TRUE
+}
+
+#' @export
+is.na.xml_nodeset <- function(x) {
+ vapply(x, is.na, logical(1))
+}
+
+#' @export
+is.na.xml_node <- function(x) {
+ FALSE
+}
+
+format.xml_missing <- function(x, ...) {
+ "<NA>"
+}
+
+#' @export
+as.character.xml_missing <- function(x, ...) {
+ NA_character_
+}
+
+# These mimic the behavior of NA[[1]], NA[[2]], NA[1], NA[2]
+
+#' @export
+`[.xml_missing` <- function(x, i, ...) x
+
+#' @export
+`[[.xml_missing` <- function(x, i, ...) if (i == 1L) x else stop("subscript out of bounds")
+
+#' Construct a cdata node
+#' @param content The CDATA content, does not include \code{<![CDATA[}
+#' @examples
+#' x <- xml_new_root("root")
+#' xml_add_child(x, xml_cdata("<d/>"))
+#' as.character(x)
+#' @export
+xml_cdata <- function(content) {
+ structure(content, class = "xml_cdata")
+}
+
+#' Construct a comment node
+#' @param content The comment content
+#' @examples
+#' x <- xml_new_document()
+#' r <- xml_add_child(x, "root")
+#' xml_add_child(r, xml_comment("Hello!"))
+#' as.character(x)
+#' @export
+xml_comment <- function(content) {
+ structure(content, class = "xml_comment")
+}
+
+#' Construct a document type definition
+#'
+#' This is used to create simple document type definitions. If you need to
+#' create a more complicated definition with internal subsets it is recommended
+#' to parse a string directly with \code{read_xml()}.
+#' @param name The name of the declaration
+#' @param external_id The external ID of the declaration
+#' @param system_id The system ID of the declaration
+#' @examples
+#' r <- xml_new_root(
+#' xml_dtd("html",
+#' "-//W3C//DTD XHTML 1.0 Transitional//EN",
+#' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
+#'
+#' # Use read_xml directly for more complicated DTD
+#' d <- read_xml(
+#' '<!DOCTYPE doc [
+#' <!ELEMENT doc (#PCDATA)>
+#' <!ENTITY foo " test ">
+#' ]>
+#' <doc>This is a valid document &foo; !</doc>')
+#' @export
+xml_dtd <- function(name = "", external_id = "", system_id = "") {
+ structure(list(name = name, external_id = external_id, system_id = system_id), class = "xml_dtd")
+}
diff --git a/R/paths.R b/R/paths.R
new file mode 100644
index 0000000..e2d98df
--- /dev/null
+++ b/R/paths.R
@@ -0,0 +1,58 @@
+path_to_connection <- function(path, check = c("file", "dir")) {
+ check <- match.arg(check)
+
+ if (!is.character(path) || length(path) != 1L)
+ return(path)
+
+ if (is_url(path)) {
+ if (requireNamespace("curl", quietly = TRUE)) {
+ return(curl::curl(path))
+ } else {
+ return(url(path))
+ }
+ }
+
+ if (check == "file") {
+ path <- check_path(path)
+ } else {
+ path <- file.path(check_path(dirname(path)), basename(path))
+ }
+ switch(tools::file_ext(path),
+ gz = gzfile(path, ""),
+ bz2 = bzfile(path, ""),
+ xz = xzfile(path, ""),
+ zip = zipfile(path, ""),
+ path
+ )
+}
+
+is_url <- function(path) {
+ grepl("^(http|ftp)s?://", path)
+}
+
+check_path <- function(path) {
+ if (file.exists(path))
+ return(normalizePath(path, "/", mustWork = FALSE))
+
+ stop("'", path, "' does not exist",
+ if (!is_absolute_path(path))
+ paste0(" in current working directory ('", getwd(), "')"),
+ ".",
+ call. = FALSE
+ )
+}
+
+is_absolute_path <- function(path) {
+ grepl("^(/|[A-Za-z]:|\\\\|~)", path)
+}
+
+zipfile <- function(path, open = "r") {
+ files <- utils::unzip(path, list = TRUE)
+ file <- files$Name[[1]]
+
+ if (nrow(files) > 1) {
+ message("Multiple files in zip: reading '", file, "'")
+ }
+
+ unz(path, file, open = open)
+}
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..954a3a9
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,68 @@
+`%||%` <- function(a, b) if (is.null(a)) b else a
+
+is_named <- function(x) {
+ all(has_names(x))
+}
+
+has_names <- function(x) {
+ nms <- names(x)
+ if (is.null(nms)) {
+ rep(FALSE, length(x))
+ } else {
+ !(is.na(nms) | nms == "")
+ }
+}
+
+# non smart quote version of sQuote
+quote_str <- function(x, quote = "\"") {
+ if (!length(x)) {
+ return(character(0))
+ }
+
+ paste0(quote, x, quote)
+}
+
+is_installed <- function(pkg) {
+ requireNamespace(pkg, quietly = TRUE)
+}
+
+need_package <- function(pkg) {
+ if (is_installed(pkg)) return(invisible())
+
+ stop("Please install ", pkg, " package", call. = FALSE)
+}
+
+# Format the C bitwise flags for display in Rd. The input object is a named
+# integer vector with a 'description' character vector attribute that
+# corresponds to each flag.
+describe_options <- function(x) {
+ paste0("\\describe{\n",
+ paste0(" \\item{", names(x), "}{", attr(x, "description"), "}", collapse = "\n"),
+ "\n}")
+}
+
+s_quote <- function(x) paste0("'", x, "'")
+
+# Similar to match.arg, but returns character() with NULL or empty input and
+# errors if any of the inputs are not found (fixing
+# https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16659)
+parse_options <- function(arg, options) {
+ if (is.numeric(arg)) {
+ return(as.integer(arg))
+ }
+
+ if (is.null(arg) || !nzchar(arg)) {
+ return(0L)
+ }
+
+ # set duplicates.ok = TRUE so any duplicates are counted differently than
+ # non-matches, then take only unique results
+ i <- pmatch(arg, names(options), duplicates.ok = TRUE)
+ if (any(is.na(i))) {
+ stop(sprintf("`options` %s is not a valid option, should be one of %s",
+ s_quote(arg[is.na(i)][1L]),
+ paste(s_quote(names(options)), collapse = ", ")),
+ call. = FALSE)
+ }
+ sum(options[unique(i)])
+}
diff --git a/R/xml_attr.R b/R/xml_attr.R
new file mode 100644
index 0000000..eb7440f
--- /dev/null
+++ b/R/xml_attr.R
@@ -0,0 +1,242 @@
+#' Retrieve an attribute.
+#'
+#' \code{xml_attrs()} retrieves all attributes values as a named character
+#' vector, \code{xml_attrs() <-} or \code{xml_set_attrs()} sets all attribute
+#' values. \code{xml_attr()} retrieves the value of single attribute and
+#' \code{xml_attr() <-} or \code{xml_set_attr()} modifies its value. If the
+#' attribute doesn't exist, it will return \code{default}, which defaults to
+#' \code{NA}. \code{xml_has_attr()} tests if an attribute is present.
+#'
+#' @inheritParams xml_name
+#' @param attr Name of attribute to extract.
+#' @param default Default value to use when attribute is not present.
+#' @return \code{xml_attr()} returns a character vector. \code{NA} is used
+#' to represent of attributes that aren't defined.
+#'
+#' \code{xml_has_attr()} returns a logical vector.
+#'
+#' \code{xml_attrs()} returns a named character vector if \code{x} x is single
+#' node, or a list of character vectors if given a nodeset
+#' @export
+#' @examples
+#' x <- read_xml("<root id='1'><child id ='a' /><child id='b' d='b'/></root>")
+#' xml_attr(x, "id")
+#' xml_attr(x, "apple")
+#' xml_attrs(x)
+#'
+#' kids <- xml_children(x)
+#' kids
+#' xml_attr(kids, "id")
+#' xml_has_attr(kids, "id")
+#' xml_attrs(kids)
+#'
+#' # Missing attributes give missing values
+#' xml_attr(xml_children(x), "d")
+#' xml_has_attr(xml_children(x), "d")
+#'
+#' # If the document has a namespace, use the ns argument and
+#' # qualified attribute names
+#' x <- read_xml('
+#' <root xmlns:b="http://bar.com" xmlns:f="http://foo.com">
+#' <doc b:id="b" f:id="f" id="" />
+#' </root>
+#' ')
+#' doc <- xml_children(x)[[1]]
+#' ns <- xml_ns(x)
+#'
+#' xml_attrs(doc)
+#' xml_attrs(doc, ns)
+#'
+#' # If you don't supply a ns spec, you get the first matching attribute
+#' xml_attr(doc, "id")
+#' xml_attr(doc, "b:id", ns)
+#' xml_attr(doc, "id", ns)
+#'
+#' # Can set a single attribute with `xml_attr() <-` or `xml_set_attr()`
+#' xml_attr(doc, "id") <- "one"
+#' xml_set_attr(doc, "id", "two")
+#'
+#' # Or set multiple attributes with `xml_attrs()` or `xml_set_attrs()`
+#' xml_attrs(doc) <- c("b:id" = "one", "f:id" = "two", "id" = "three")
+#' xml_set_attrs(doc, c("b:id" = "one", "f:id" = "two", "id" = "three"))
+xml_attr <- function(x, attr, ns = character(), default = NA_character_) {
+ UseMethod("xml_attr")
+}
+
+#' @export
+xml_attr.xml_missing <- function(x, attr, ns = character(), default = NA_character_) {
+ default
+}
+
+#' @export
+xml_attr.xml_node <- function(x, attr, ns = character(),
+ default = NA_character_) {
+ node_attr(x$node, name = attr, missing = default, nsMap = ns)
+}
+
+#' @export
+xml_attr.xml_nodeset <- function(x, attr, ns = character(),
+ default = NA_character_) {
+ vapply(x, xml_attr, attr = attr, default = default, ns = ns,
+ FUN.VALUE = character(1))
+}
+
+#' @export
+#' @rdname xml_attr
+xml_has_attr <- function(x, attr, ns = character()) {
+ !is.na(xml_attr(x, attr, ns = ns))
+}
+
+#' @export
+#' @rdname xml_attr
+xml_attrs <- function(x, ns = character()) {
+ UseMethod("xml_attrs")
+}
+
+#' @export
+xml_attrs.xml_missing <- function(x, ns = character()) {
+ NA_character_
+}
+
+#' @export
+xml_attrs.xml_node <- function(x, ns = character()) {
+ node_attrs(x$node, nsMap = ns)
+}
+
+#' @export
+xml_attrs.xml_nodeset <- function(x, ns = character()) {
+ lapply(x, xml_attrs, ns = ns)
+}
+
+#' @param value character vector of new value.
+#' @rdname xml_attr
+#' @export
+`xml_attr<-` <- function(x, attr, ns = character(), value) {
+ UseMethod("xml_attr<-")
+}
+
+#' @export
+`xml_attr<-.xml_node` <- function(x, attr, ns = character(), value) {
+ if (is.null(value)) {
+ node_set_attr(x$node, name = attr, nsMap = ns, "")
+ } else {
+ value <- as.character(value)
+ node_set_attr(x$node, name = attr, nsMap = ns, value)
+ }
+ x
+}
+
+#' @export
+`xml_attr<-.xml_nodeset` <- function(x, attr, ns = character(), value) {
+ lapply(x, `xml_attr<-`, attr = attr, ns = ns, value = value)
+ x
+}
+
+#' @export
+`xml_attr<-.xml_missing` <- function(x, attr, ns = character(), value) {
+ x
+}
+
+#' @rdname xml_attr
+#' @export
+xml_set_attr <- function(x, attr, value, ns = character()) {
+ UseMethod("xml_set_attr")
+}
+
+# This function definition is used for all methods, we need to rearrange the `ns`
+# argument to be at the end of the set function
+set_attr_fun <- function(x, attr, value, ns = character()) {
+ xml_attr(x = x, attr = attr, ns = ns) <- value
+}
+
+#' @export
+xml_set_attr.xml_node <- set_attr_fun
+
+#' @export
+xml_set_attr.xml_nodeset <- set_attr_fun
+
+#' @export
+xml_set_attr.xml_missing <- set_attr_fun
+
+#' @rdname xml_attr
+#' @export
+`xml_attrs<-` <- function(x, ns = character(), value) {
+ UseMethod("xml_attrs<-")
+}
+
+#' @export
+`xml_attrs<-.xml_node` <- function(x, ns = character(), value) {
+ if (!is_named(value)) {
+ stop("`value` must be a named character vector or `NULL`", call. = FALSE)
+ }
+
+ attrs <- names(value)
+
+ # as.character removes all attributes (including names)
+ value <- stats::setNames(as.character(value), attrs)
+
+ current_attrs <- names(xml_attrs(x, ns = ns))
+
+ existing <- intersect(current_attrs, attrs)
+ new <- setdiff(attrs, current_attrs)
+ removed <- setdiff(current_attrs, attrs)
+
+ # replace existing attributes and add new ones
+ Map(function(attr, val) {
+ xml_attr(x, attr, ns) <- val
+ }, attr = c(existing, new), value[c(existing, new)])
+
+
+ # Remove attributes which no longer exist
+ Map(function(attr) {
+ xml_attr(x, attr, ns) <- NULL
+ }, attr = removed)
+
+ x
+}
+
+#' @export
+`xml_attrs<-.xml_nodeset` <- function(x, ns = character(), value) {
+ if (length(x) == 0) {
+ return(x)
+ }
+ if (!is.list(ns)) {
+ ns <- list(ns)
+ }
+ if (!is.list(value)) {
+ value <- list(value)
+ }
+ if (!all(vapply(value, is_named, logical(1)))) {
+ stop("`value` must be a list of named character vectors")
+ }
+
+ Map(`xml_attrs<-`, x, ns, value)
+
+ x
+}
+
+#' @export
+`xml_attrs<-.xml_missing` <- function(x, ns = character(), value) {
+ x
+}
+
+#' @rdname xml_attr
+#' @export
+xml_set_attrs <- function(x, value, ns = character()) {
+ UseMethod("xml_set_attrs")
+}
+
+# This function definition is used for all methods, we need to rearrange the `ns`
+# argument to be at the end of the set function
+set_attrs_fun <- function(x, value, ns = character()) {
+ xml_attrs(x = x, ns = ns) <- value
+}
+
+#' @export
+xml_set_attrs.xml_node <- set_attrs_fun
+
+#' @export
+xml_set_attrs.xml_nodeset <- set_attrs_fun
+
+#' @export
+xml_set_attrs.xml_missing <- set_attrs_fun
diff --git a/R/xml_children.R b/R/xml_children.R
new file mode 100644
index 0000000..cbe7b7a
--- /dev/null
+++ b/R/xml_children.R
@@ -0,0 +1,141 @@
+#' Navigate around the family tree.
+#'
+#' \code{xml_children} returns only elements, \code{xml_contents} returns
+#' all nodes. \code{xml_length} returns the number of children.
+#' \code{xml_parent} returns the parent node, \code{xml_parents}
+#' returns all parents up to the root. \code{xml_siblings} returns all nodes
+#' at the same level. \code{xml_child} makes it easy to specify a specific
+#' child to return.
+#'
+#' @inheritParams xml_name
+#' @param only_elements For \code{xml_length}, should it count all children,
+#' or just children that are elements (the default)?
+#' @param search For \code{xml_child}, either the child number to return (by
+#' position), or the name of the child node to return. If there are multiple
+#' child nodes with the same name, the first will be returned
+#' @return A node or nodeset (possibly empty). Results are always de-duplicated.
+#' @export
+#' @examples
+#' x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>")
+#' xml_children(x)
+#' xml_children(xml_children(x))
+#' xml_siblings(xml_children(x)[[1]])
+#'
+#' # Note the each unique node only appears once in the output
+#' xml_parent(xml_children(x))
+#'
+#' # Mixed content
+#' x <- read_xml("<foo> a <b/> c <d>e</d> f</foo>")
+#' # Childen gets the elements, contents gets all node types
+#' xml_children(x)
+#' xml_contents(x)
+#'
+#' xml_length(x)
+#' xml_length(x, only_elements = FALSE)
+#'
+#' # xml_child makes it easier to select specific children
+#' xml_child(x)
+#' xml_child(x, 2)
+#' xml_child(x, "baz")
+xml_children <- function(x) {
+ nodeset_apply(x, node_children)
+}
+
+#' @export
+#' @rdname xml_children
+xml_child <- function(x, search = 1, ns = xml_ns(x)) {
+ if (length(search) != 1) {
+ stop("`search` must be of length 1", call. = FALSE)
+ }
+
+ if (is.numeric(search)) {
+ xml_children(x)[[search]]
+ } else if (is.character(search)) {
+ xml_find_first(x, xpath = paste0("./", search), ns = ns)
+ } else {
+ stop("`search` must be `numeric` or `character`", call. = FALSE)
+ }
+}
+
+#' @export
+#' @rdname xml_children
+xml_contents <- function(x) {
+ nodeset_apply(x, node_children, onlyNode = FALSE)
+}
+
+#' @export
+#' @rdname xml_children
+xml_parents <- function(x) {
+ nodeset_apply(x, node_parents)
+}
+
+#' @export
+#' @rdname xml_children
+xml_siblings <- function(x) {
+ nodeset_apply(x, node_siblings)
+}
+
+#' @export
+#' @rdname xml_children
+xml_parent <- function(x) {
+ UseMethod("xml_parent")
+}
+
+#' @export
+xml_parent.xml_missing <- function(x) {
+ xml_missing()
+}
+
+#' @export
+xml_parent.xml_node <- function(x) {
+ xml_node(node_parent(x$node), x$doc)
+}
+
+#' @export
+xml_parent.xml_nodeset <- function(x) {
+ nodeset_apply(x, node_parent)
+}
+
+
+#' @export
+#' @rdname xml_children
+xml_length <- function(x, only_elements = TRUE) {
+ UseMethod("xml_length")
+}
+
+#' @export
+xml_length.xml_missing <- function(x, only_elements = TRUE) {
+ 0L
+}
+
+#' @export
+xml_length.xml_node <- function(x, only_elements = TRUE) {
+ node_length(x$node, onlyNode = only_elements)
+}
+
+#' @export
+xml_length.xml_nodeset <- function(x, only_elements = TRUE) {
+ if (length(x) == 0)
+ return(0L)
+
+ vapply(x, xml_length, only_elements = only_elements, FUN.VALUE = integer(1))
+}
+
+#' @export
+#' @rdname xml_children
+xml_root <- function(x) {
+ stopifnot(inherits(x, c("xml_node", "xml_document", "xml_nodeset")))
+
+ if (inherits(x, "xml_nodeset")) {
+ if (length(x) == 0) {
+ return(NULL)
+ } else {
+ return(xml_root(x[[1]]))
+ }
+ }
+ if (!doc_has_root(x$doc)) {
+ xml_missing()
+ } else {
+ xml_document(x$doc)
+ }
+}
diff --git a/R/xml_find.R b/R/xml_find.R
new file mode 100644
index 0000000..322647f
--- /dev/null
+++ b/R/xml_find.R
@@ -0,0 +1,219 @@
+#' Find nodes that match an xpath expression.
+#'
+#' Xpath is like regular expressions for trees - it's worth learning if
+#' you're trying to extract nodes from arbitrary locations in a document.
+#' Use \code{xml_find_all} to find all matches - if there's no match you'll
+#' get an empty result. Use \code{xml_find_first} to find a specific match -
+#' if there's no match you'll get an \code{xml_missing} node.
+#'
+#' @section Deprecated functions:
+#' \code{xml_find_one()} has been deprecated. Instead use
+#' \code{xml_find_first()}.
+
+#' @param xpath A string containing a xpath (1.0) expression.
+#' @inheritParams xml_name
+#' @return \code{xml_find_all} always returns a nodeset: if there are no matches
+#' the nodeset will be empty. The result will always be unique; repeated
+#' nodes are automatically de-duplicated.
+#'
+#' \code{xml_find_first} returns a node if applied to a node, and a nodeset
+#' if applied to a nodeset. The output is \emph{always} the same size as
+#' the input. If there are no matches, \code{xml_find_first} will return a
+#' missing node; if there are multiple matches, it will return the first
+#' only.
+#'
+#' \code{xml_find_num}, \code{xml_find_chr}, \code{xml_find_lgl} return
+#' numeric, character and logical results respectively.
+#' @export
+#' @seealso \code{\link{xml_ns_strip}} to remove the default namespaces
+#' @examples
+#' x <- read_xml("<foo><bar><baz/></bar><baz/></foo>")
+#' xml_find_all(x, ".//baz")
+#' xml_path(xml_find_all(x, ".//baz"))
+#'
+#' # Note the difference between .// and //
+#' # // finds anywhere in the document (ignoring the current node)
+#' # .// finds anywhere beneath the current node
+#' (bar <- xml_find_all(x, ".//bar"))
+#' xml_find_all(bar, ".//baz")
+#' xml_find_all(bar, "//baz")
+#'
+#' # Find all vs find one -----------------------------------------------------
+#' x <- read_xml("<body>
+#' <p>Some <b>text</b>.</p>
+#' <p>Some <b>other</b> <b>text</b>.</p>
+#' <p>No bold here!</p>
+#' </body>")
+#' para <- xml_find_all(x, ".//p")
+#'
+#' # If you apply xml_find_all to a nodeset, it finds all matches,
+#' # de-duplicates them, and returns as a single list. This means you
+#' # never know how many results you'll get
+#' xml_find_all(para, ".//b")
+#'
+#' # xml_find_first only returns the first match per input node. If there are 0
+#' # matches it will return a missing node
+#' xml_find_first(para, ".//b")
+#' xml_text(xml_find_first(para, ".//b"))
+#'
+#' # Namespaces ---------------------------------------------------------------
+#' # If the document uses namespaces, you'll need use xml_ns to form
+#' # a unique mapping between full namespace url and a short prefix
+#' x <- read_xml('
+#' <root xmlns:f = "http://foo.com" xmlns:g = "http://bar.com">
+#' <f:doc><g:baz /></f:doc>
+#' <f:doc><g:baz /></f:doc>
+#' </root>
+#' ')
+#' xml_find_all(x, ".//f:doc")
+#' xml_find_all(x, ".//f:doc", xml_ns(x))
+xml_find_all <- function(x, xpath, ns = xml_ns(x)) {
+ UseMethod("xml_find_all")
+}
+
+#' @export
+xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
+ xml_nodeset()
+}
+
+#' @export
+xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x)) {
+ nodes <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
+ xml_nodeset(nodes)
+}
+
+#' @export
+xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
+ if (length(x) == 0)
+ return(xml_nodeset())
+
+ nodes <- unlist(recursive = FALSE,
+ lapply(x, function(x)
+ xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)))
+
+ xml_nodeset(nodes)
+}
+
+#' @export
+#' @rdname xml_find_all
+xml_find_first <- function(x, xpath, ns = xml_ns(x)) {
+ UseMethod("xml_find_first")
+}
+
+xml_find_first.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
+ xml_missing()
+}
+
+#' @export
+xml_find_first.xml_node <- function(x, xpath, ns = xml_ns(x)) {
+ res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = 1)
+ if (length(res) == 1) {
+ res[[1]]
+ } else {
+ res
+ }
+}
+
+#' @export
+xml_find_first.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
+ if (length(x) == 0)
+ return(xml_nodeset())
+
+ xml_nodeset(lapply(x, function(x)
+ xml_find_first.xml_node(x, xpath = xpath, ns = ns)))
+}
+
+
+#' @export
+#' @rdname xml_find_all
+xml_find_num <- function(x, xpath, ns = xml_ns(x)) {
+ UseMethod("xml_find_num")
+}
+
+#' @export
+xml_find_num.xml_node <- function(x, xpath, ns = xml_ns(x)) {
+ res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
+ if (!is.numeric(res)) {
+ stop("result of type: ", sQuote(class(res)), ", not numeric", call. = FALSE)
+ }
+ res
+}
+
+#' @export
+xml_find_num.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
+ if (length(x) == 0)
+ return(numeric())
+
+ vapply(x, function(x) xml_find_num(x, xpath = xpath, ns = ns), numeric(1))
+}
+
+#' @export
+xml_find_num.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
+ numeric(0)
+}
+
+#' @export
+#' @rdname xml_find_all
+xml_find_chr <- function(x, xpath, ns = xml_ns(x)) {
+ UseMethod("xml_find_chr")
+}
+
+#' @export
+xml_find_chr.xml_node <- function(x, xpath, ns = xml_ns(x)) {
+ res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
+ if (!is.character(res)) {
+ stop("result of type: ", sQuote(class(res)), ", not character", call. = FALSE)
+ }
+ res
+}
+
+#' @export
+xml_find_chr.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
+ if (length(x) == 0)
+ return(character())
+
+ vapply(x, function(x) xml_find_chr(x, xpath = xpath, ns = ns), character(1))
+}
+
+#' @export
+xml_find_chr.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
+ character(0)
+}
+
+#' @export
+#' @rdname xml_find_all
+xml_find_lgl <- function(x, xpath, ns = xml_ns(x)) {
+ UseMethod("xml_find_lgl")
+}
+
+#' @export
+xml_find_lgl.xml_node <- function(x, xpath, ns = xml_ns(x)) {
+ res <- xpath_search(x$node, x$doc, xpath = xpath, nsMap = ns, num_results = Inf)
+ if (!is.logical(res)) {
+ stop("result of type: ", sQuote(class(res)), ", not logical", call. = FALSE)
+ }
+ res
+}
+
+#' @export
+xml_find_lgl.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
+ if (length(x) == 0)
+ return(logical())
+
+ vapply(x, function(x) xml_find_lgl(x, xpath = xpath, ns = ns), logical(1))
+}
+
+#' @export
+xml_find_lgl.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
+ logical(0)
+}
+
+# Deprecated functions ----------------------------------------------------
+
+#' @rdname xml_find_all
+#' @usage NULL
+#' @export
+xml_find_one <- function(x, xpath, ns = xml_ns(x)) {
+ .Deprecated("xml_find_first")
+ UseMethod("xml_find_first")
+}
diff --git a/R/xml_modify.R b/R/xml_modify.R
new file mode 100644
index 0000000..fa349cd
--- /dev/null
+++ b/R/xml_modify.R
@@ -0,0 +1,332 @@
+#' Modify a tree by inserting, replacing or removing nodes
+#'
+#' \code{xml_add_sibling()} and \code{xml_add_child()} are used to insert a node
+#' as a sibling or a child. \code{xml_add_parent()} adds a new parent in
+#' between the input node and the current parent. \code{xml_replace()}
+#' replaces an existing node with a new node. \code{xml_remove()} removes a
+#' node from the tree.
+#'
+#' @details Care needs to be taken when using \code{xml_remove()},
+#' @param .x a document, node or nodeset.
+#' @param .copy whether to copy the \code{.value} before replacing. If this is \code{FALSE}
+#' then the node will be moved from it's current location.
+#' @param .where to add thenew node, for \code{xml_add_child} the position
+#' after which to add, use \code{0} for the first child. For
+#' \code{xml_add_sibling} either \sQuote{"befeore"} or \sQuote{"after"}
+#' indicating if the new node should be before or after \code{.x}.
+#' @param ... If named attributes or namespaces to set on the node, if unnamed
+#' text to assign to the node.
+#' @param .value node or nodeset to insert.
+#' @param free When removing the node also free the memory used for that node.
+#' Note if you use this option you cannot use any existing objects pointing to
+#' the node or its children, it is likely to crash R or return garbage.
+#' @export
+xml_replace <- function(.x, .value, ..., .copy = TRUE) {
+ UseMethod("xml_replace")
+}
+
+#' @export
+xml_replace.xml_node <- function(.x, .value, ..., .copy = TRUE) {
+
+ node <- create_node(.value, .x, .copy = .copy, ...)
+
+ .x$node <- node_replace(.x$node, node$node)
+ node
+}
+
+#' @export
+xml_replace.xml_nodeset <- function(.x, .value, ..., .copy = TRUE) {
+
+ if (length(.x) == 0) {
+ return(.x)
+ }
+
+ # Need to wrap this in a list if a bare xml_node so it is recycled properly
+ if (inherits(.value, "xml_node")) {
+ .value <- list(.value)
+ }
+
+ Map(xml_replace, .x, .value, ..., .copy = .copy)
+}
+
+#' @export
+xml_replace.xml_missing <- function(.x, .value, ..., .copy = TRUE) {
+ .x
+}
+
+#' @rdname xml_replace
+#' @export
+xml_add_sibling <- function(.x, .value, ..., .where = c("after", "before"), .copy = TRUE) {
+ UseMethod("xml_add_sibling")
+}
+
+#' @export
+xml_add_sibling.xml_node <- function(.x, .value, ..., .where = c("after", "before"), .copy = inherits(.value, "xml_node")) {
+ .where <- match.arg(.where)
+
+ node <- create_node(.value, .x, .copy = .copy, ...)
+
+ .x$node <- switch(.where,
+ before = node_prepend_sibling(.x$node, node$node),
+ after = node_append_sibling(.x$node, node$node))
+
+ invisible(.x)
+}
+
+#' @export
+xml_add_sibling.xml_nodeset <- function(.x, .value, ..., .where = c("after", "before"), .copy = TRUE) {
+ if (length(.x) == 0) {
+ return(.x)
+ }
+
+ .where <- match.arg(.where)
+
+ # Need to wrap this in a list if a bare xml_node so it is recycled properly
+ if (inherits(.value, "xml_node")) {
+ .value <- list(.value)
+ }
+
+ invisible(Map(xml_add_sibling, rev(.x), rev(.value), ..., .where = .where, .copy = .copy))
+}
+
+#' @export
+xml_add_sibling.xml_missing <- function(.x, .value, ..., .where = c("after", "before"), .copy = TRUE) {
+ .x
+}
+
+# Helper function used in the xml_add* methods
+create_node <- function(.value, parent, .copy, ...) {
+ if (inherits(.value, "xml_node")) {
+ if (isTRUE(.copy)) {
+ .value$node <- node_copy(.value$node)
+ }
+ return(.value)
+ }
+
+ if (inherits(.value, "xml_cdata")) {
+ return(xml_node(node_cdata_new(parent$doc, .value), doc = parent$doc))
+ }
+
+ if (inherits(.value, "xml_comment")) {
+ return(xml_node(node_comment_new(.value), doc = parent$doc))
+ }
+
+ if (inherits(.value, "xml_dtd")) {
+ node_new_dtd(parent$doc, .value$name, .value$external_id, .value$system_id)
+ return()
+ }
+
+ if (!is.character(.value)) {
+ stop("`.value` must be a character", call. = FALSE)
+ }
+
+ parts <- strsplit(.value, ":")[[1]]
+ if (length(parts) == 2) {
+ namespace <- ns_lookup(parent$doc, parent$node, parts[[1]])
+ node <- structure(list(node = node_new_ns(parts[[2]], namespace), doc = parent$doc), class = "xml_node")
+ } else {
+ node <- structure(list(node = node_new(.value), doc = parent$doc), class = "xml_node")
+ }
+
+ args <- list(...)
+ named <- has_names(args)
+ xml_attrs(node) <- args[named]
+ xml_text(node) <- paste(args[!named], collapse = "")
+
+ node
+}
+
+#' @rdname xml_replace
+#' @export
+xml_add_child <- function(.x, .value, ..., .where = length(xml_children(.x)), .copy = TRUE) {
+ UseMethod("xml_add_child")
+}
+
+#' @export
+xml_add_child.xml_node <- function(.x, .value, ..., .where = length(xml_children(.x)), .copy = inherits(.value, "xml_node")) {
+
+ node <- create_node(.value, .x, .copy = .copy, ...)
+
+ num_children <- length(xml_children(.x))
+
+ if (.where >= num_children) {
+ node_append_child(.x$node, node$node)
+ } else if (.where == 0L) {
+ node_prepend_sibling(xml_child(.x, search = 1)$node, node$node)
+ } else {
+ node_append_sibling(xml_child(.x, search = .where)$node, node$node)
+ }
+
+ invisible(node)
+}
+
+#' @export
+xml_add_child.xml_document <- function(.x, .value, ..., .where = length(xml_children(.x)), .copy = inherits(.value, "xml_node")) {
+ if (inherits(.x, "xml_node")) {
+ NextMethod("xml_add_child")
+ } else {
+ node <- create_node(.value, .x, .copy = .copy, ...)
+ if (!is.null(node)) {
+ if (!doc_has_root(.x$doc)) {
+ doc_set_root(.x$doc, node$node)
+ }
+ node_append_child(doc_root(.x$doc), node$node)
+ }
+ invisible(xml_document(.x$doc))
+ }
+}
+
+#' @export
+xml_add_child.xml_nodeset <- function(.x, .value, ..., .where = length(xml_children(.x)), .copy = TRUE) {
+ if (length(.x) == 0) {
+ return(.x)
+ }
+
+ # Need to wrap this in a list if a bare xml_node so it is recycled properly
+ if (inherits(.value, "xml_node")) {
+ .value <- list(.value)
+ }
+
+ res <- Map(xml_add_child, .x, .value, ..., .where = .where, .copy = .copy)
+ invisible(make_nodeset(res, res[[1]]$doc))
+}
+
+#' @export
+xml_add_child.xml_missing <- function(.x, .value, ..., .copy = TRUE) {
+ .x
+}
+
+#' @rdname xml_replace
+#' @export
+xml_add_parent <- function(.x, .value, ...) {
+ UseMethod("xml_add_parent")
+}
+
+#' @export
+xml_add_parent.xml_node <- function(.x, .value, ...) {
+ new_parent <- xml_replace(.x, .value = .value, ..., .copy = FALSE)
+ node <- xml_add_child(new_parent, .value = .x, .copy = FALSE)
+
+ invisible(node)
+}
+
+#' @export
+xml_add_parent.xml_nodeset <- function(.x, .value, ...) {
+ if (length(.x) == 0) {
+ return(.x)
+ }
+
+ # Need to wrap this in a list if a bare xml_node so it is recycled properly
+ if (inherits(.value, "xml_node")) {
+ .value <- list(.value)
+ }
+
+ res <- Map(xml_add_parent, .x, .value, ...)
+ invisible(make_nodeset(res, res[[1]]$doc))
+}
+
+#' @export
+xml_add_parent.xml_missing <- function(.x, .value, ..., .copy = TRUE) {
+ invisible(.x)
+}
+
+#' @rdname xml_replace
+#' @export
+xml_remove <- function(.x, free = FALSE) {
+ UseMethod("xml_remove")
+}
+
+#' @export
+xml_remove.xml_node <- function(.x, free = FALSE) {
+ node_remove(.x$node, free = free)
+}
+
+#' @export
+xml_remove.xml_nodeset <- function(.x, free = FALSE) {
+ if (length(.x) == 0) {
+ return(.x)
+ }
+
+ Map(xml_remove, rev(.x), free = free)
+}
+
+#' @export
+xml_remove.xml_missing <- function(.x, free = FALSE) {
+ .x
+}
+
+#' Set the node's namespace
+#'
+#' The namespace to be set must be already defined in one of the node's
+#' ancestors.
+#' @param .x a node
+#' @param prefix The namespace prefix to use
+#' @param uri The namespace URI to use
+#' @return the node (invisibly)
+#' @export
+xml_set_namespace <- function(.x, prefix = "", uri = "") {
+ stopifnot(inherits(.x, "xml_node"))
+
+ if (nzchar(uri)) {
+ node_set_namespace_uri(.x$doc, .x$node, uri)
+ } else {
+ node_set_namespace_prefix(.x$doc, .x$node, prefix)
+ }
+ invisible(.x)
+}
+
+#' Create a new document, possibly with a root node
+#'
+#' \code{xml_new_document} creates only a new document without a root node. In
+#' most cases you should instead use \code{xml_new_root}, which creates a new
+#' document and assigns the root node in one step.
+#' @param version The version number of the document.
+#' @param encoding The character encoding to use in the document. The default
+#' encoding is \sQuote{UTF-8}. Available encodings are specified at
+#' \url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.
+#' @return A \code{xml_document} object.
+#' @export
+# TODO: jimhester 2016-12-16 Deprecate this in the future?
+xml_new_document <- function(version = "1.0", encoding = "UTF-8") {
+ doc <- doc_new(version)
+ structure(list(doc = doc), class = "xml_document")
+}
+
+#' @param .version The version number of the document, passed to \code{xml_new_document(version)}.
+#' @param .encoding The encoding of the document, passed to \code{xml_new_document(encoding)}.
+#' @inheritParams xml_add_child
+#' @rdname xml_new_document
+#' @export
+xml_new_root <- function(.value, ..., .copy = inherits(.value, "xml_node"), .version = "1.0", .encoding = "UTF-8") {
+ xml_add_child(xml_new_document(version = .version, encoding = .encoding), .value = .value, ... = ..., .copy = .copy)
+}
+
+#' Strip the default namespaces from a document
+#'
+#' @inheritParams xml_name
+#' @examples
+#' x <- read_xml(
+#' "<foo xmlns = 'http://foo.com'>
+#' <baz/>
+#' <bar xmlns = 'http://bar.com'>
+#' <baz/>
+#' </bar>
+#' </foo>")
+#' # Need to specify the default namespaces to find the baz nodes
+#' xml_find_all(x, "//d1:baz")
+#' xml_find_all(x, "//d2:baz")
+#'
+#' # After stripping the default namespaces you can find both baz nodes directly
+#' xml_ns_strip(x)
+#' xml_find_all(x, "//baz")
+#' @export
+xml_ns_strip <- function(x) {
+
+ # //namespace::*[name()=''] finds all the namespace definition nodes with no
+ # prefix (default namespaces).
+ # What we actually want is the element node the definitions are contained in
+ # so return the parent (/parent::*)
+ namespace_element_nodes <- xml_find_all(x, "//namespace::*[name()='']/parent::*")
+ xml_attr(namespace_element_nodes, "xmlns") <- NULL
+ invisible(x)
+}
diff --git a/R/xml_name.R b/R/xml_name.R
new file mode 100644
index 0000000..153e735
--- /dev/null
+++ b/R/xml_name.R
@@ -0,0 +1,92 @@
+#' The (tag) name of an xml element.
+#'
+#' @param x A document, node, or node set.
+#' @param ns Optionally, a named vector giving prefix-url pairs, as produced
+#' by \code{\link{xml_ns}}. If provided, all names will be explicitly
+#' qualified with the ns prefix, i.e. if the element \code{bar} is defined
+#' in namespace \code{foo}, it will be called \code{foo:bar}. (And
+#' similarly for atttributes). Default namespaces must be given an explicit
+#' name. The ns is ignored when using \code{\link{xml_name<-}} and
+#' \code{\link{xml_set_name}}.
+#' @return A character vector.
+#' @export
+#' @examples
+#' x <- read_xml("<bar>123</bar>")
+#' xml_name(x)
+#'
+#' y <- read_xml("<bar><baz>1</baz>abc<foo /></bar>")
+#' z <- xml_children(y)
+#' xml_name(xml_children(y))
+xml_name <- function(x, ns = character()) {
+ UseMethod("xml_name")
+}
+
+#' @export
+xml_name.xml_missing <- function(x, ns = character()) {
+ NA_character_
+}
+
+#' @export
+xml_name.xml_nodeset <- function(x, ns = character()) {
+ vapply(x, xml_name, ns = ns, FUN.VALUE = character(1))
+}
+
+#' @export
+xml_name.xml_node <- function(x, ns = character()) {
+ node_name(x$node, nsMap = ns)
+}
+
+#' Modify the (tag) name of an element
+#'
+#' @inheritParams xml_name
+#' @param value a character vector with replacement name.
+#' @rdname xml_name
+#' @export
+`xml_name<-` <- function(x, ns = character(), value) {
+ UseMethod("xml_name<-")
+}
+
+#' @export
+`xml_name<-.xml_node` <- function(x, ns = character(), value) {
+ node_set_name(x$node, value)
+ x
+}
+
+#' @export
+`xml_name<-.xml_nodeset` <- function(x, ns = character(), value) {
+ if (length(x) == 0) {
+ return(x)
+ }
+ if (!is.list(ns)) {
+ ns <- list(ns)
+ }
+ Map(`xml_name<-`, x, ns, value)
+ x
+}
+
+#' @export
+`xml_name<-.xml_missing` <- function(x, ns = character(), value) {
+ x
+}
+
+set_name <- function(x, value, ns = character()) {
+ xml_name(x = x, ns = ns) <- value
+ x
+}
+
+#' @rdname xml_name
+#' @inheritParams xml_name
+#' @export
+#' @export
+xml_set_name <- function(x, value, ns = character()) {
+ UseMethod("xml_set_name")
+}
+
+#' @export
+xml_set_name.xml_node <- set_name
+
+#' @export
+xml_set_name.xml_nodeset <- set_name
+
+#' @export
+xml_set_name.xml_missing <- set_name
diff --git a/R/xml_namespaces.R b/R/xml_namespaces.R
new file mode 100644
index 0000000..3c307bd
--- /dev/null
+++ b/R/xml_namespaces.R
@@ -0,0 +1,103 @@
+#' XML namespaces.
+#'
+#' \code{xml_ns} extracts all namespaces from a document, matching each
+#' unique namespace url with the prefix it was first associated with. Default
+#' namespaces are named \code{d1}, \code{d2} etc. Use \code{xml_ns_rename}
+#' to change the prefixes. Once you have a namespace object, you can pass it to
+#' other functions to work with fully qualified names instead of local names.
+#'
+#' @export
+#' @inheritParams xml_name
+#' @param old,... An existing xml_namespace object followed by name-value
+#' (old prefix-new prefix) pairs to replace.
+#' @return A character vector with class \code{xml_namespace} so the
+#' default display is a little nicer.
+#' @examples
+#' x <- read_xml('
+#' <root>
+#' <doc1 xmlns = "http://foo.com"><baz /></doc1>
+#' <doc2 xmlns = "http://bar.com"><baz /></doc2>
+#' </root>
+#' ')
+#' xml_ns(x)
+#'
+#' # When there are default namespaces, it's a good idea to rename
+#' # them to give informative names:
+#' ns <- xml_ns_rename(xml_ns(x), d1 = "foo", d2 = "bar")
+#' ns
+#'
+#' # Now we can pass ns to other xml function to use fully qualified names
+#' baz <- xml_children(xml_children(x))
+#' xml_name(baz)
+#' xml_name(baz, ns)
+#'
+#' xml_find_all(x, "//baz")
+#' xml_find_all(x, "//foo:baz", ns)
+#'
+#' str(as_list(x))
+#' str(as_list(x, ns))
+xml_ns <- function(x) {
+ UseMethod("xml_ns")
+}
+
+#' @export
+xml_ns.xml_document <- function(x) {
+ if (length(x) == 0) {
+ return(character())
+ }
+
+ stopifnot(inherits(x, "xml_document"))
+ doc <- x$doc
+ x <- doc_namespaces(doc)
+
+ # Number default namespaces
+ is_default <- names(x) == ""
+ names(x)[is_default] <- paste0("d", seq_len(sum(is_default)))
+
+ # Make prefixes unique
+ names(x) <- make.unique(names(x), "")
+
+ class(x) <- "xml_namespace"
+
+ x
+}
+
+#' @export
+xml_ns.xml_node <- function(x) {
+ xml_ns(xml_root(x))
+}
+
+#' @export
+xml_ns.xml_nodeset <- function(x) {
+ if (length(x) == 0) {
+ return(character())
+ }
+ xml_ns(x[[1]])
+}
+
+#' @export
+xml_ns.xml_missing <- function(x) {
+ character()
+}
+
+#' @export
+print.xml_namespace <- function(x, ...) {
+ prefix <- format(names(x))
+
+ cat(paste0(prefix, " <-> ", x, collapse = "\n"), "\n", sep = "")
+}
+
+#' @export
+#' @rdname xml_ns
+xml_ns_rename <- function(old, ...) {
+ new <- c(...)
+
+ m <- match(names(new), names(old))
+ if (any(is.na(m))) {
+ missing <- paste(names(new)[is.na(m)], collapse = ", ")
+ stop("Some prefixes [", missing, "] don't already exist.", call. = FALSE)
+ }
+
+ names(old)[m] <- new
+ old
+}
diff --git a/R/xml_parse.R b/R/xml_parse.R
new file mode 100644
index 0000000..de4f7e6
--- /dev/null
+++ b/R/xml_parse.R
@@ -0,0 +1,128 @@
+#' Read HTML or XML.
+#'
+#' @param x A string, a connection, or a raw vector.
+#'
+#' A string can be either a path, a url or literal xml. Urls will
+#' be converted into connections either using \code{base::url} or, if
+#' installed, \code{curl::curl}. Local paths ending in \code{.gz},
+#' \code{.bz2}, \code{.xz}, \code{.zip} will be automatically uncompressed.
+#'
+#' If a connection, the complete connection is read into a raw vector before
+#' being parsed.
+#' @param encoding Specify a default encoding for the document. Unless
+#' otherwise specified XML documents are assumed to be in UTF-8 or
+#' UTF-16. If the document is not UTF-8/16, and lacks an explicit
+#' encoding directive, this allows you to supply a default.
+#' @param ... Additional arguments passed on to methods.
+#' @param as_html Optionally parse an xml file as if it's html.
+#' @param base_url When loading from a connection, raw vector or literal
+#' html/xml, this allows you to specify a base url for the document. Base
+#' urls are used to turn relative urls into absolute urls.
+#' @param n If \code{file} is a connection, the number of bytes to read per
+#' iteration. Defaults to 64kb.
+#' @param verbose When reading from a slow connection, this prints some
+#' output on every iteration so you know its working.
+#' @param options Set parsing options for the libxml2 parser. Zero of more of
+#' \Sexpr[results=rd]{xml2:::describe_options(xml2:::xml_parse_options())}
+#' @return An XML document. HTML is normalised to valid XML - this may not
+#' be exactly the same transformation performed by the browser, but it's
+#' a reasonable approximation.
+#' @export
+#' @examples
+#' # Literal xml/html is useful for small examples
+#' read_xml("<foo><bar /></foo>")
+#' read_html("<html><title>Hi<title></html>")
+#' read_html("<html><title>Hi")
+#'
+#' # From a local path
+#' read_html(system.file("extdata", "r-project.html", package = "xml2"))
+#'
+#' # From a url
+#' cd <- read_xml("http://www.xmlfiles.com/examples/cd_catalog.xml")
+#' me <- read_html("http://had.co.nz")
+read_xml <- function(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS") {
+ UseMethod("read_xml")
+}
+
+#' @export
+#' @rdname read_xml
+read_html <- function(x, encoding = "", ..., options = c("RECOVER", "NOERROR", "NOBLANKS")) {
+ UseMethod("read_html")
+}
+
+#' @export
+read_html.default <- function(x, encoding = "", ..., options = c("RECOVER", "NOERROR", "NOBLANKS")) {
+ options <- parse_options(options, xml_parse_options())
+
+ suppressWarnings(read_xml(x, encoding = encoding, ..., as_html = TRUE, options = options))
+}
+
+#' @export
+read_html.response <- function(x, encoding = "", options = c("RECOVER",
+ "NOERROR", "NOBLANKS"), ...) {
+ need_package("httr")
+
+ options <- parse_options(options, xml_parse_options())
+ content <- httr::content(x, as = "raw")
+ xml2::read_html(content, encoding = encoding, options = options, ...)
+}
+
+#' @export
+#' @rdname read_xml
+read_xml.character <- function(x, encoding = "", ..., as_html = FALSE,
+ options = "NOBLANKS") {
+
+ options <- parse_options(options, xml_parse_options())
+ if (grepl("<|>", x)) {
+ read_xml.raw(charToRaw(enc2utf8(x)), "UTF-8", ..., as_html = as_html, options = options)
+ } else {
+ con <- path_to_connection(x)
+ if (inherits(con, "connection")) {
+ read_xml.connection(con, encoding = encoding, ..., as_html = as_html,
+ base_url = x, options = options)
+ } else {
+ doc <- doc_parse_file(con, encoding = encoding, as_html = as_html,
+ options = options)
+ xml_document(doc)
+ }
+ }
+}
+
+#' @export
+#' @rdname read_xml
+read_xml.raw <- function(x, encoding = "", base_url = "", ...,
+ as_html = FALSE, options = "NOBLANKS") {
+ options <- parse_options(options, xml_parse_options())
+
+ doc <- doc_parse_raw(x, encoding = encoding, base_url = base_url,
+ as_html = as_html, options = options)
+ xml_document(doc)
+}
+
+#' @export
+#' @rdname read_xml
+read_xml.connection <- function(x, encoding = "", n = 64 * 1024,
+ verbose = FALSE, ..., base_url = "",
+ as_html = FALSE, options = "NOBLANKS") {
+ options <- parse_options(options, xml_parse_options())
+
+ if (!isOpen(x)) {
+ open(x, "rb")
+ on.exit(close(x))
+ }
+
+ raw <- read_connection_(x, n)
+ read_xml.raw(raw, encoding = encoding, base_url = base_url, as_html =
+ as_html, options = options)
+}
+
+#' @export
+read_xml.response <- function(x, encoding = "", base_url = "", ...,
+ as_html = FALSE, options = "NOBLANKS") {
+ need_package("httr")
+
+ options <- parse_options(options, xml_parse_options())
+ content <- httr::content(x, as = "raw")
+ xml2::read_xml(content, encoding = encoding, base_url = base_url,
+ as_html = as_html, option = options, ...)
+}
diff --git a/R/xml_path.R b/R/xml_path.R
new file mode 100644
index 0000000..973c6d1
--- /dev/null
+++ b/R/xml_path.R
@@ -0,0 +1,29 @@
+#' Retrieve the xpath to a node
+#'
+#' This is useful when you want to figure out where nodes matching an
+#' xpath expression live in a document.
+#'
+#' @inheritParams xml_name
+#' @return A character vector.
+#' @export
+#' @examples
+#' x <- read_xml("<foo><bar><baz /></bar><baz /></foo>")
+#' xml_path(xml_find_all(x, ".//baz"))
+xml_path <- function(x) {
+ UseMethod("xml_path")
+}
+
+#' @export
+xml_path.xml_missing <- function(x) {
+ NA_character_
+}
+
+#' @export
+xml_path.xml_node <- function(x) {
+ node_path(x$node)
+}
+
+#' @export
+xml_path.xml_nodeset <- function(x) {
+ vapply(x, xml_path, FUN.VALUE = character(1))
+}
diff --git a/R/xml_schema.R b/R/xml_schema.R
new file mode 100644
index 0000000..84a934f
--- /dev/null
+++ b/R/xml_schema.R
@@ -0,0 +1,21 @@
+#' Validate XML schema
+#'
+#' Validate an XML document against an XML 1.0 schema.
+#'
+#' @inheritParams xml_name
+#' @return TRUE or FALSE
+#' @export
+#' @param schema an XML document containing the schema
+#' @examples # Example from https://msdn.microsoft.com/en-us/library/ms256129(v=vs.110).aspx
+#' doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2"))
+#' schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
+#' xml_validate(doc, schema)
+xml_validate <- function(x, schema) {
+ UseMethod("xml_validate")
+}
+
+#' @export
+xml_validate.xml_document <- function(x, schema) {
+ stopifnot(inherits(schema, "xml_document"))
+ doc_validate(x$doc, schema$doc)
+}
diff --git a/R/xml_serialize.R b/R/xml_serialize.R
new file mode 100644
index 0000000..238b78e
--- /dev/null
+++ b/R/xml_serialize.R
@@ -0,0 +1,54 @@
+#' Serializing XML objects to connections.
+#'
+#' @inheritParams base::serialize
+#' @param ... Additional arguments passed to \code{\link{read_xml}}.
+#' @examples
+#' library(xml2)
+#' x <- read_xml("<a>
+#' <b><c>123</c></b>
+#' <b><c>456</c></b>
+#' </a>")
+#'
+#' b <- xml_find_all(x, "//b")
+#' out <- xml_serialize(b, NULL)
+#' xml_unserialize(out)
+#' @export
+xml_serialize <- function(object, connection, ...) UseMethod("xml_serialize")
+
+#' @export
+xml_serialize.xml_document <- function(object, connection, ...) {
+ serialize(structure(as.character(object, ...), class = "xml_serialized_document"), connection)
+}
+
+#' @export
+xml_serialize.xml_node <- function(object, connection, ...) {
+ x <- as_xml_document(object)
+ serialize(structure(as.character(x, ...), class = "xml_serialized_node"), connection)
+}
+
+#' @export
+xml_serialize.xml_nodeset <- function(object, connection, ...) {
+ x <- as_xml_document(object, "root")
+ serialize(structure(as.character(x, ...), class = "xml_serialized_nodeset"), connection)
+}
+
+#' @rdname xml_serialize
+#' @export
+xml_unserialize <- function(connection, ...) {
+ object <- unserialize(connection)
+ if (inherits(object, "xml_serialized_nodeset")) {
+ x <- read_xml(unclass(object), ...)
+
+ # Select only the direct children of the root
+ xml_find_all(x, "/*/node()")
+ } else if (inherits(object, "xml_serialized_node")) {
+ x <- read_xml(unclass(object), ...)
+
+ # Select only the root
+ xml_find_first(x, "/node()")
+ } else if (inherits(object, "xml_serialized_document")) {
+ x <- read_xml(unclass(object), ...)
+ } else {
+ stop("Not a serialized xml2 object", call. = FALSE)
+ }
+}
diff --git a/R/xml_structure.R b/R/xml_structure.R
new file mode 100644
index 0000000..8f18a04
--- /dev/null
+++ b/R/xml_structure.R
@@ -0,0 +1,92 @@
+#' Show the structure of an html/xml document.
+#'
+#' Show the structure of an html/xml document without displaying any of
+#' the values. This is useful if you want to get a high level view of the
+#' way a document is organised. Compared to \code{xml_structure},
+#' \code{html_structure} prints the id and class attributes.
+#'
+#' @param x HTML/XML document (or part there of)
+#' @param indent Number of spaces to ident
+#' @export
+#' @examples
+#' xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"))
+#'
+#' rproj <- read_html(system.file("extdata","r-project.html", package = "xml2"))
+#' xml_structure(rproj)
+#' xml_structure(xml_find_all(rproj, ".//p"))
+#'
+#' h <- read_html("<body><p id = 'a'></p><p class = 'c d'></p></body>")
+#' html_structure(h)
+xml_structure <- function(x, indent = 2) {
+ tree_structure(x, indent = indent, html = FALSE)
+}
+
+#' @export
+#' @rdname xml_structure
+html_structure <- function(x, indent = 2) {
+ tree_structure(x, indent = indent, html = TRUE)
+}
+
+tree_structure <- function(x, indent = 2, html = FALSE) {
+ UseMethod("tree_structure")
+}
+
+#' @export
+tree_structure.xml_missing <- function(x, indent = 2, html = FALSE) {
+ NA_character_
+}
+
+#' @export
+tree_structure.xml_nodeset <- function(x, indent = 2, html = FALSE) {
+ for (i in seq_along(x)) {
+ cat("[[", i, "]]\n", sep = "")
+ print_xml_structure(x[[i]], indent = indent, html = html)
+ cat("\n")
+ }
+
+ invisible()
+}
+
+#' @export
+tree_structure.xml_node <- function(x, indent = 2, html = FALSE) {
+ print_xml_structure(x, indent = indent, html = html)
+ invisible()
+}
+
+print_xml_structure <- function(x, prefix = 0, indent = 2, html = FALSE) {
+ padding <- paste(rep(" ", prefix), collapse = "")
+ type <- xml_type(x)
+
+ if (type == "element") {
+
+ attr <- xml_attrs(x)
+ if (html) {
+ html_attrs <- list()
+ if ("id" %in% names(attr)) {
+ html_attrs$id <- paste0("#", attr[["id"]])
+ attr <- attr[setdiff(names(attr), "id")]
+ }
+
+ if ("class" %in% names(attr)) {
+ html_attrs$class <- paste0(".", gsub(" ", ".", attr[["class"]]))
+ attr <- attr[setdiff(names(attr), "class")]
+ }
+
+ attr_str <- paste(unlist(html_attrs), collapse = " ")
+ } else {
+ attr_str <- ""
+ }
+
+ if (length(attr) > 0) {
+ attr_str <- paste0(attr_str, " [", paste0(names(attr), collapse = ", "), "]")
+ }
+
+ node <- paste0("<", xml_name(x), attr_str, ">")
+
+ cat(padding, node, "\n", sep = "")
+ lapply(xml_contents(x), print_xml_structure, prefix = prefix + indent,
+ indent = indent, html = html)
+ } else {
+ cat(padding, "{", type, "}\n", sep = "")
+ }
+}
diff --git a/R/xml_text.R b/R/xml_text.R
new file mode 100644
index 0000000..5c29573
--- /dev/null
+++ b/R/xml_text.R
@@ -0,0 +1,140 @@
+#' Extract or modify the text
+#'
+#' \code{xml_text} returns a character vector, \code{xml_double} returns a
+#' numeric vector, \code{xml_integer} returns an integer vector.
+#' @inheritParams xml_name
+#' @param trim If \code{TRUE} will trim leading and trailing spaces.
+#' @return A character vector, the same length as x.
+#' @examples
+#' x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+#' xml_text(x)
+#' xml_text(xml_children(x))
+#'
+#' x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
+#' xml_text(x)
+#' xml_text(xml_find_all(x, "//x"))
+#'
+#' x <- read_xml("<p> Some text </p>")
+#' xml_text(x, trim = TRUE)
+#'
+#' # xml_double() and xml_integer() are useful for extracting numeric
+#' attributes
+#' x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>")
+#' xml_integer(xml_find_all(x, "//@x"))
+#' @export
+xml_text <- function(x, trim = FALSE) {
+ UseMethod("xml_text")
+}
+
+#' @export
+xml_text.xml_missing <- function(x, trim = FALSE) {
+ NA_character_
+}
+
+#' @export
+xml_text.xml_node <- function(x, trim = FALSE) {
+ res <- node_text(x$node)
+ if (isTRUE(trim)) {
+ res <- sub("^[[:space:]\u00a0]+", "", res)
+ res <- sub("[[:space:]\u00a0]+$", "", res)
+ }
+ res
+}
+
+#' @export
+xml_text.xml_nodeset <- function(x, trim = FALSE) {
+ vapply(x, xml_text, trim = trim, FUN.VALUE = character(1))
+}
+
+#' @rdname xml_text
+#' @param value character vector with replacement text.
+#' @export
+`xml_text<-` <- function(x, value) {
+ UseMethod("xml_text<-")
+}
+
+#' @export
+`xml_text<-.xml_nodeset` <- function(x, value) {
+ if (length(x) == 0) {
+ return(x)
+ }
+ # We need to do the modification in reverse order as the modification can
+ # potentially delete nodes
+ Map(`xml_text<-`, rev(x), rev(value))
+
+ # what to return here, setting the text could invalidate some nodes in
+ # the nodeset having pointers to free'd memory.
+ x
+}
+
+#' @export
+`xml_text<-.xml_node` <- function(x, value) {
+ if (xml_type(x) != "text") {
+ text_child <- xml_find_first(x, ".//text()[1]", ns = character())
+ if (inherits(text_child, "xml_missing")) {
+ node_append_content(x$node, value)
+ } else {
+ node_set_content(text_child$node, value)
+ }
+ } else {
+ node_set_content(x$node, value)
+ }
+
+ x
+}
+
+#' @export
+`xml_text<-.xml_missing` <- function(x, value) {
+ NA_character_
+}
+
+#' @export
+#' @rdname xml_text
+`xml_set_text` <- `xml_text<-`
+
+#' @rdname xml_text
+#' @export
+xml_double <- function(x) {
+ UseMethod("xml_double")
+}
+
+#' @export
+xml_double.xml_missing <- function(x) {
+ NA_real_
+}
+
+#' @export
+xml_double.xml_node <- function(x) {
+ as.numeric(xml_text(x))
+}
+
+#' @export
+xml_double.xml_nodeset <- function(x) {
+ vapply(x, xml_double, numeric(1))
+}
+
+#' @export
+xml_integer <- function(x) {
+ UseMethod("xml_integer")
+}
+
+#' @export
+xml_integer.xml_missing <- function(x) {
+ NA_integer_
+}
+
+#' @rdname xml_text
+#' @export
+xml_integer <- function(x) {
+ UseMethod("xml_integer")
+}
+
+#' @export
+xml_integer.xml_node <- function(x) {
+ as.integer(xml_text(x))
+}
+
+#' @export
+xml_integer.xml_nodeset <- function(x) {
+ vapply(x, xml_integer, integer(1))
+}
diff --git a/R/xml_type.R b/R/xml_type.R
new file mode 100644
index 0000000..5fdaabe
--- /dev/null
+++ b/R/xml_type.R
@@ -0,0 +1,51 @@
+#' Determine the type of a node.
+#'
+#' @inheritParams xml_name
+#' @export
+#' @examples
+#' x <- read_xml("<foo> a <b /> <![CDATA[ blah]]></foo>")
+#' xml_type(x)
+#' xml_type(xml_contents(x))
+xml_type <- function(x) {
+ UseMethod("xml_type")
+}
+
+#' @export
+xml_type.xml_missing <- function(x) {
+ NA_character_
+}
+
+#' @export
+xml_type.xml_node <- function(x) {
+ xmlElementType[node_type(x$node)]
+}
+
+#' @export
+xml_type.xml_nodeset <- function(x) {
+ types <- vapply(x, function(x) node_type(x$node), integer(1))
+ xmlElementType[types]
+}
+
+xmlElementType <- c(
+ "element",
+ "attribute",
+ "text",
+ "cdata",
+ "entity_ref",
+ "entity",
+ "pi",
+ "comment",
+ "document",
+ "document_type",
+ "document_frag",
+ "notation",
+ "html_document",
+ "dtd",
+ "element_decl",
+ "attribute_decl",
+ "entity_decl",
+ "namespace_decl",
+ "xinclude_start",
+ "xinclude_end",
+ "docb_document"
+)
diff --git a/R/xml_url.R b/R/xml_url.R
new file mode 100644
index 0000000..3b07e3f
--- /dev/null
+++ b/R/xml_url.R
@@ -0,0 +1,32 @@
+#' The URL of an XML document
+#'
+#' This is useful for interpreting relative urls with \code{\link{url_relative}}.
+#'
+#' @param x A node or document.
+#' @return A character vector of length 1. Returns \code{NA} if the name is
+#' not set.
+#' @export
+#' @examples
+#' catalog <- read_xml("http://www.xmlfiles.com/examples/cd_catalog.xml")
+#' xml_url(catalog)
+#'
+#' x <- read_xml("<foo/>")
+#' xml_url(x)
+xml_url <- function(x) {
+ UseMethod("xml_url")
+}
+
+#' @export
+xml_url.xml_missing <- function(x) {
+ NA_character_
+}
+
+#' @export
+xml_url.xml_node <- function(x) {
+ doc_url(x$doc)
+}
+
+#' @export
+xml_url.xml_nodeset <- function(x) {
+ vapply(x, doc_url, character(1))
+}
diff --git a/R/xml_write.R b/R/xml_write.R
new file mode 100644
index 0000000..de76625
--- /dev/null
+++ b/R/xml_write.R
@@ -0,0 +1,118 @@
+#' Write XML or HTML to disk.
+#'
+#' This writes out both XML and normalised HTML. The default behavior will
+#' output the same format which was read. If you want to force output pass
+#' \code{option = "as_xml"} or \code{option = "as_html"} respectively.
+#'
+#' @param x A document or node to write to disk. It's not possible to
+#' save nodesets containing more than one node.
+#' @param file Path to file or connection to write to.
+#' @param encoding The character encoding to use in the document. The default
+#' encoding is \sQuote{UTF-8}. Available encodings are specified at
+#' \url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.
+#' @param options default: \sQuote{format}. Zero or more of
+#' \Sexpr[results=rd]{xml2:::describe_options(xml2:::xml_save_options())}
+#' @param ... additional arguments passed to methods.
+#' @export
+#' @examples
+#' h <- read_html("<p>Hi!</p>")
+#'
+#' tmp <- tempfile(fileext = ".xml")
+#' write_xml(h, tmp, options = "format")
+#' readLines(tmp)
+#'
+#' # write formatted HTML output
+#' write_html(h, tmp, options = "format")
+#' readLines(tmp)
+write_xml <- function(x, file, ...) {
+ UseMethod("write_xml")
+}
+
+#' @export
+write_xml.xml_missing <- function(x, file, ...) {
+ stop("Missing data cannot be written", call. = FALSE)
+}
+
+#' @rdname write_xml
+#' @export
+write_xml.xml_document <- function(x, file, ..., options = "format", encoding = "UTF-8") {
+ options <- parse_options(options, xml_save_options())
+ file <- path_to_connection(file, check = "dir")
+
+ if (inherits(file, "connection")) {
+ if (!isOpen(file)) {
+ open(file, "wb")
+ on.exit(close(file))
+ }
+ doc_write_connection(x$doc, file, options = options, encoding = encoding)
+ } else {
+ if (!(is.character(file) && length(file) == 1 && nzchar(file))) {
+ stop("`file` must be a non-zero character of length 1", call. = FALSE)
+ }
+ doc_write_file(x$doc, file, options = options, encoding = encoding)
+ }
+}
+
+#' @export
+write_xml.xml_nodeset <- function(x, file, ..., options = "format", encoding = "UTF-8") {
+ if (length(x) != 1) {
+ stop("Can only save length 1 node sets", call. = FALSE)
+ }
+
+ options <- parse_options(options, xml_save_options())
+ file <- path_to_connection(file, check = "dir")
+
+ if (inherits(file, "connection")) {
+ if (!isOpen(file)) {
+ open(file, "wb")
+ on.exit(close(file))
+ }
+ node_write_connection(x[[1]]$node, file, options = options, encoding = encoding)
+ } else {
+ if (!(is.character(file) && length(file) == 1 && nzchar(file))) {
+ stop("`file` must be a non-zero character of length 1", call. = FALSE)
+ }
+ node_write_file(x[[1]]$node, file, options = options, encoding = encoding)
+ }
+}
+
+#' @export
+write_xml.xml_node <- function(x, file, ..., options = "format", encoding = "UTF-8") {
+ options <- parse_options(options, xml_save_options())
+
+ file <- path_to_connection(file, check = "dir")
+ if (inherits(file, "connection")) {
+ if (!isOpen(file)) {
+ open(file, "wb")
+ on.exit(close(file))
+ }
+ node_write_connection(x$node, file, options = options, encoding = encoding)
+ } else {
+ if (!(is.character(file) && length(file) == 1 && nzchar(file))) {
+ stop("`file` must be a non-zero character of length 1", call. = FALSE)
+ }
+ node_write_file(x$node, file, options = options, encoding = encoding)
+ }
+}
+
+
+#' @export
+#' @rdname write_xml
+write_html <- function(x, file, ...) {
+ UseMethod("write_html")
+}
+
+#' @export
+write_html.xml_missing <- function(x, file, ...) {
+ stop("Missing data cannot be written", call. = FALSE)
+}
+
+#' @rdname write_xml
+#' @export
+write_html.xml_document <- write_xml.xml_document
+
+#' @export
+write_html.xml_nodeset <- write_xml.xml_nodeset
+
+#' @export
+write_html.xml_node <- write_xml.xml_node
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..792156a
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,4 @@
+.onUnload <- function(libpath) {
+ gc() # trigger finalisers
+ library.dynam.unload("xml2", libpath)
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fbfddf2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,63 @@
+# xml2
+[![Build Status](https://travis-ci.org/hadley/xml2.svg?branch=master)](https://travis-ci.org/hadley/xml2)
+[![Coverage Status](https://img.shields.io/codecov/c/github/hadley/xml2/master.svg)](https://codecov.io/github/hadley/xml2?branch=master)
+[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hadley/xml2?branch=master&svg=true)](https://ci.appveyor.com/project/hadley/xml2)
+
+The xml2 package is a binding to [libxml2](http://xmlsoft.org), making it easy to work with HTML and XML from R. The API is somewhat inspired by [jQuery](http://jquery.com).
+
+## Installation
+
+You can install xml2 from CRAN,
+
+```R
+install.packages("xml2")
+```
+
+or you can install the development version from github, using `devtools`:
+
+```R
+# install.packages("devtools")
+devtools::install_github("hadley/xml2")
+```
+
+## Usage
+
+```R
+library("xml2")
+x <- read_xml("<foo> <bar> text <baz/> </bar> </foo>")
+x
+
+xml_name(x)
+xml_children(x)
+xml_text(x)
+xml_find_all(x, ".//baz")
+
+h <- read_html("<html><p>Hi <b>!")
+h
+xml_name(h)
+xml_text(h)
+```
+
+There are three key classes:
+
+* `xml_node`: a single node in a document.
+
+* `xml_doc`: the complete document. Acting on a document is usually the same
+ as acting on the root node of the document.
+
+* `xml_nodeset`: a __set__ of nodes within the document. Operations on
+ `xml_nodeset`s are vectorised, apply the operation over each node in the set.
+
+## Compared to the XML package
+
+xml2 has similar goals to the XML package. The main differences are:
+
+* xml2 takes care of memory management for you. It will automatically
+ free the memory used by an XML document as soon as the last reference
+ to it goes away.
+
+* xml2 has a very simple class hierarchy so don't need to think about exactly
+ what type of object you have, xml2 will just do the right thing.
+
+* More convenient handling of namespaces in Xpath expressions - see `xml_ns()`
+ and `xml_ns_strip()` to get started.
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..551e8ce
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/build/xml2.pdf b/build/xml2.pdf
new file mode 100644
index 0000000..856fbd8
Binary files /dev/null and b/build/xml2.pdf differ
diff --git a/cleanup b/cleanup
new file mode 100755
index 0000000..3c020d3
--- /dev/null
+++ b/cleanup
@@ -0,0 +1,2 @@
+#!/bin/sh
+rm -f src/Makevars
diff --git a/configure b/configure
new file mode 100755
index 0000000..902bc4f
--- /dev/null
+++ b/configure
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Anticonf (tm) script by Jeroen Ooms (2015)
+# This script will query 'pkg-config' for the required cflags and ldflags.
+# If pkg-config is unavailable or does not find the library, try setting
+# INCLUDE_DIR and LIB_DIR manually via e.g:
+# R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib'
+
+# Library settings
+PKG_CONFIG_NAME="libxml-2.0"
+PKG_DEB_NAME="libxml2-dev"
+PKG_RPM_NAME="libxml2-devel"
+PKG_CSW_NAME="libxml2_dev"
+PKG_TEST_HEADER="<libxml/tree.h>"
+PKG_LIBS="-lxml2"
+
+# Use xml2-config if available
+if [ $(command -v xml2-config) ]; then
+ PKGCONFIG_CFLAGS=$(xml2-config --cflags)
+ PKGCONFIG_LIBS=$(xml2-config --libs)
+elif [ $(command -v pkg-config) ]; then
+ PKGCONFIG_CFLAGS=$(pkg-config --cflags $PKG_CONFIG_NAME)
+ PKGCONFIG_LIBS=$(pkg-config --libs $PKG_CONFIG_NAME)
+fi
+
+# Note that cflags may be empty in case of success
+if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then
+ echo "Found INCLUDE_DIR and/or LIB_DIR!"
+ PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
+ PKG_LIBS="-L$LIB_DIR $PKG_LIBS"
+elif [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
+ echo "Found pkg-config cflags and libs!"
+ PKG_CFLAGS=${PKGCONFIG_CFLAGS}
+ PKG_LIBS=${PKGCONFIG_LIBS}
+fi
+
+# Find compiler
+CC=$(${R_HOME}/bin/R CMD config CC)
+CFLAGS=$(${R_HOME}/bin/R CMD config CFLAGS)
+CPPFLAGS=$(${R_HOME}/bin/R CMD config CPPFLAGS)
+
+# For debugging
+echo "Using PKG_CFLAGS=$PKG_CFLAGS"
+echo "Using PKG_LIBS=$PKG_LIBS"
+
+# Test configuration
+echo "#include $PKG_TEST_HEADER" | ${CC} ${CPPFLAGS} ${PKG_CFLAGS} ${CFLAGS} -E -xc - >/dev/null 2>&1 || R_CONFIG_ERROR=1;
+
+# Customize the error
+if [ $R_CONFIG_ERROR ]; then
+ echo "------------------------- ANTICONF ERROR ---------------------------"
+ echo "Configuration failed because $PKG_CONFIG_NAME was not found. Try installing:"
+ echo " * deb: $PKG_DEB_NAME (Debian, Ubuntu, etc)"
+ echo " * rpm: $PKG_RPM_NAME (Fedora, CentOS, RHEL)"
+ echo " * csw: $PKG_CSW_NAME (Solaris)"
+ echo "If $PKG_CONFIG_NAME is already installed, check that 'pkg-config' is in your"
+ echo "PATH and PKG_CONFIG_PATH contains a $PKG_CONFIG_NAME.pc file. If pkg-config"
+ echo "is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:"
+ echo "R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'"
+ echo "--------------------------------------------------------------------"
+ exit 1;
+fi
+
+# Write to Makevars
+sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
+
+# Success
+exit 0
diff --git a/configure.win b/configure.win
new file mode 100644
index 0000000..e69de29
diff --git a/debian/README.test b/debian/README.test
deleted file mode 100644
index 8d70ca3..0000000
--- a/debian/README.test
+++ /dev/null
@@ -1,9 +0,0 @@
-Notes on how this package can be tested.
-────────────────────────────────────────
-
-This package can be tested by running the provided test:
-
-cd tests
-LC_ALL=C R --no-save < testthat.R
-
-in order to confirm its integrity.
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 7b2a479..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,28 +0,0 @@
-r-cran-xml2 (1.1.0-1) unstable; urgency=medium
-
- * New upstream version
- * debhelper 10
-
- -- Andreas Tille <tille at debian.org> Wed, 11 Jan 2017 09:02:13 +0100
-
-r-cran-xml2 (1.0.0-1) unstable; urgency=medium
-
- * New upstream version
- * Convert to dh-r
- * Canonical homepage for CRAN
- * d/watch: version=4
-
- -- Andreas Tille <tille at debian.org> Wed, 16 Nov 2016 11:10:54 +0100
-
-r-cran-xml2 (0.1.2-2) unstable; urgency=medium
-
- * Add missing dependency: r-cran-rcpp
- * cme fix dpkg-control
-
- -- Andreas Tille <tille at debian.org> Thu, 28 Apr 2016 08:26:49 +0200
-
-r-cran-xml2 (0.1.2-1) unstable; urgency=low
-
- * Initial release (Closes: #819553)
-
- -- Andreas Tille <tille at debian.org> Wed, 30 Mar 2016 15:01:49 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index dd5e1b9..0000000
--- a/debian/control
+++ /dev/null
@@ -1,27 +0,0 @@
-Source: r-cran-xml2
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Priority: optional
-Build-Depends: debhelper (>= 10),
- dh-r,
- r-base-dev,
- r-cran-rcpp (>= 0.11.4.6),
- libxml2-dev,
- libboost-dev,
- libjs-jquery
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-cran-xml2/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-cran-xml2/trunk/
-Homepage: https://cran.r-project.org/package=xml2
-
-Package: r-cran-xml2
-Architecture: any
-Depends: ${misc:Depends},
- ${shlibs:Depends},
- ${R:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: GNU R XML parser
- This GNU R package works with XML files using a simple, consistent
- interface. Built on top of the 'libxml2' C library.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 4b851d0..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,30 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: xml2
-Upstream-Contact: Hadley Wickham <hadley at rstudio.com>
-Source: https://cran.r-project.org/package=xml2
-
-Files: *
-Copyright: 2013-2016 Hadley Wickham <hadley at rstudio.com>, RStudio
-License: GPL-2+
-
-Files: debian/*
-Copyright: 2016 Andreas Tille <tille at debian.org>
-License: GPL-2+
-
-License: GPL-2+
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- .
- On Debian systems, the complete text of the GNU General Public
- License can be found in `/usr/share/common-licenses/GPL-2'.
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index 960011c..0000000
--- a/debian/docs
+++ /dev/null
@@ -1,3 +0,0 @@
-tests
-debian/README.test
-debian/tests/run-unit-test
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 450a5f3..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-use_debian_packages_boost.patch
diff --git a/debian/patches/use_debian_packages_boost.patch b/debian/patches/use_debian_packages_boost.patch
deleted file mode 100644
index 22ab9a9..0000000
--- a/debian/patches/use_debian_packages_boost.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Tue, 22 Mar 2016 21:32:09 +0100
-Description: Use Debian packaged boost library
-
---- a/DESCRIPTION
-+++ b/DESCRIPTION
-@@ -15,7 +15,7 @@ URL: https://github.com/hadley/xml2/
- BugReports: https://github.com/hadley/xml2/issues/
- Depends: R (>= 3.1.0)
- Imports: Rcpp
--LinkingTo: Rcpp (>= 0.11.4.6), BH
-+LinkingTo: Rcpp (>= 0.11.4.6)
- Suggests: testthat, curl, covr, knitr, rmarkdown, magrittr, httr
- SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel (rpm)
- License: GPL (>= 2)
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 1a3fd9b..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/make -f
-
-%:
- dh $@ --buildsystem R
-
-override_dh_install:
- dh_install
- find debian -name r-project.html -exec \
- sed -i -e 's|http.*/jquery.min.js|file://usr/share/javascript/jquery/jquery.min.js|' \{\} \;
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/tests/control b/debian/tests/control
deleted file mode 100644
index b044b0c..0000000
--- a/debian/tests/control
+++ /dev/null
@@ -1,3 +0,0 @@
-Tests: run-unit-test
-Depends: @, r-cran-testthat
-Restrictions: allow-stderr
diff --git a/debian/tests/run-unit-test b/debian/tests/run-unit-test
deleted file mode 100644
index 177e2e6..0000000
--- a/debian/tests/run-unit-test
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh -e
-
-oname=xml2
-pkg=r-cran-`echo $oname | tr [A-Z] [a-z]`
-
-if [ "$ADTTMP" = "" ] ; then
- ADTTMP=`mktemp -d /tmp/${pkg}-test.XXXXXX`
-fi
-cd $ADTTMP
-cp -a /usr/share/doc/${pkg}/tests/* $ADTTMP
-LC_ALL=C R --no-save < testthat.R
-rm -fr $ADTTMP/*
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index eff2bcb..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=4
-http://cran.r-project.org/src/contrib/xml2_([-0-9\.]*).tar.gz
diff --git a/inst/doc/modification.R b/inst/doc/modification.R
new file mode 100644
index 0000000..063827e
--- /dev/null
+++ b/inst/doc/modification.R
@@ -0,0 +1,96 @@
+## ---- echo = FALSE, message = FALSE--------------------------------------
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+library(xml2)
+
+## ------------------------------------------------------------------------
+x <- read_xml("<p>This is some <b>text</b>. This is more.</p>")
+xml_text(x)
+
+xml_text(x) <- "This is some other text."
+xml_text(x)
+
+# You can avoid this by explicitly selecting the text node.
+x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+text_only <- xml_find_all(x, "//text()")
+
+xml_text(text_only) <- c("This is some other text. ", "Still bold!")
+xml_text(x)
+xml_structure(x)
+
+## ------------------------------------------------------------------------
+x <- read_xml("<a href='invalid!'>xml2</a>")
+xml_attr(x, "href")
+
+xml_attr(x, "href") <- "https://github.com/hadley/xml2"
+xml_attr(x, "href")
+
+xml_attrs(x) <- c(id = "xml2", href = "https://github.com/hadley/xml2")
+xml_attrs(x)
+x
+
+xml_attrs(x) <- NULL
+x
+
+# Namespaces are added with as a xmlns or xmlns:prefix attribute
+xml_attr(x, "xmlns") <- "http://foo"
+x
+
+xml_attr(x, "xmlns:bar") <- "http://bar"
+x
+
+## ------------------------------------------------------------------------
+x <- read_xml("<a><b/></a>")
+x
+xml_name(x)
+xml_name(x) <- "c"
+x
+
+## ------------------------------------------------------------------------
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_replace(t1, t3)
+x
+
+## ------------------------------------------------------------------------
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_add_sibling(t1, t3)
+x
+
+xml_add_sibling(t3, t1, where = "before")
+x
+
+## ------------------------------------------------------------------------
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_add_child(t1, t3)
+x
+
+xml_add_child(t1, read_xml("<test/>"))
+x
+
+## ------------------------------------------------------------------------
+library(magrittr)
+d <- xml_new_root("sld",
+ xmlns = "http://www.o.net/sld",
+ "xmlns:ogc" = "http://www.o.net/ogc",
+ "xmlns:se" = "http://www.o.net/se",
+ version = "1.1.0") %>%
+ xml_add_child("layer") %>%
+ xml_add_child("se:Name", "My Layer") %>%
+ xml_root()
+
+d
+
diff --git a/inst/doc/modification.Rmd b/inst/doc/modification.Rmd
new file mode 100644
index 0000000..ac10732
--- /dev/null
+++ b/inst/doc/modification.Rmd
@@ -0,0 +1,197 @@
+---
+title: "Node Modification"
+author: "Jim Hester"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Node Modification}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, echo = FALSE, message = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+library(xml2)
+```
+
+# Modifying Existing XML
+
+Modifying existing XML can be done in xml2 by using the replacement functions
+of the accessors. They all have methods for both individual `xml_node` objects
+as well as `xml_nodeset` objects. If a vector of values is provided it is
+applied piecewise over the nodeset, otherwise the value is recycled.
+
+## Text Modification ##
+
+Text modification only happens on text nodes. If a given node has more than one
+text node only the first will be affected. If you want to modify additional
+text nodes you need to select them explicitly with `/text()`.
+
+```{r}
+x <- read_xml("<p>This is some <b>text</b>. This is more.</p>")
+xml_text(x)
+
+xml_text(x) <- "This is some other text."
+xml_text(x)
+
+# You can avoid this by explicitly selecting the text node.
+x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+text_only <- xml_find_all(x, "//text()")
+
+xml_text(text_only) <- c("This is some other text. ", "Still bold!")
+xml_text(x)
+xml_structure(x)
+```
+
+## Attribute and Namespace Definition Modification ##
+
+Attributes and namespace definitions are modified one at a time with
+`xml_attr()` or all at once with `xml_attrs()`. In both cases using `NULL` as
+the value will remove the attribute completely.
+
+```{r}
+x <- read_xml("<a href='invalid!'>xml2</a>")
+xml_attr(x, "href")
+
+xml_attr(x, "href") <- "https://github.com/hadley/xml2"
+xml_attr(x, "href")
+
+xml_attrs(x) <- c(id = "xml2", href = "https://github.com/hadley/xml2")
+xml_attrs(x)
+x
+
+xml_attrs(x) <- NULL
+x
+
+# Namespaces are added with as a xmlns or xmlns:prefix attribute
+xml_attr(x, "xmlns") <- "http://foo"
+x
+
+xml_attr(x, "xmlns:bar") <- "http://bar"
+x
+```
+
+## Name Modification ##
+
+Node names are modified with `xml_name()`.
+
+```{r}
+x <- read_xml("<a><b/></a>")
+x
+xml_name(x)
+xml_name(x) <- "c"
+x
+```
+
+# Node modification #
+All of these functions have a `.copy` argument. If this is set to `FALSE` they
+will remove the new node from its location before inserting it into the new
+location. Otherwise they make a copy of the node before insertion.
+
+## Replacing existing nodes ##
+```{r}
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_replace(t1, t3)
+x
+```
+
+## Add a sibling ##
+```{r}
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_add_sibling(t1, t3)
+x
+
+xml_add_sibling(t3, t1, where = "before")
+x
+```
+
+## Add a child ##
+```{r}
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_add_child(t1, t3)
+x
+
+xml_add_child(t1, read_xml("<test/>"))
+x
+```
+
+## Removing nodes ##
+The `xml_remove()` can be used to remove a node (and it's children) from a
+tree. The default behavior is to unlink the node from the tree, but does _not_
+free the memory for the node, so R objects pointing to the node are still
+valid.
+
+This allows code like the following to work without crashing R
+
+```r
+x <- read_xml("<foo><bar><baz/><bar></foo>")
+x1 <- x %>% xml_children() %>% .[[1]]
+x2 <- x1 %>% xml_children() %>% .[[1]]
+
+xml_remove(x1)
+rm(x1)
+gc()
+
+x2
+```
+If you are not planning on referencing these nodes again this memory is wasted.
+Calling `xml_remove(free = TRUE)` will remove the nodes _and_ free the memory
+used to store them. **Note** In this case _any_ node which previously pointed
+to the node or it's children will instead be pointing to free memory and may
+cause R to crash. xml2 can't figure this out for you, so it's your
+responsibility to remove any objects which are no longer valid.
+
+In particular `xml_find_*()` results are easy to overlook, for example
+
+```r
+x <- read_xml("<a><b /><b><b /></b></a>")
+bees <- xml_find_all(x, "//b")
+xml_remove(xml_child(x), free = TRUE)
+# bees[[1]] is no longer valid!!!
+rm(bees)
+```
+
+## Namespaces ##
+
+We want to construct a document with the following namespace layout. (From
+http://stackoverflow.com/questions/32939229/creating-xml-in-r-with-namespaces/32941524#32941524).
+```xml
+<?xml version = "1.0" encoding="UTF-8"?>
+<sld xmlns="http://www.o.net/sld"
+ xmlns:ogc="http://www.o.net/ogc"
+ xmlns:se="http://www.o.net/se"
+ version="1.1.0" >
+<layer>
+<se:Name>My Layer</se:Name>
+</layer>
+</sld>
+```
+
+```{r}
+library(magrittr)
+d <- xml_new_root("sld",
+ xmlns = "http://www.o.net/sld",
+ "xmlns:ogc" = "http://www.o.net/ogc",
+ "xmlns:se" = "http://www.o.net/se",
+ version = "1.1.0") %>%
+ xml_add_child("layer") %>%
+ xml_add_child("se:Name", "My Layer") %>%
+ xml_root()
+
+d
+```
diff --git a/inst/doc/modification.html b/inst/doc/modification.html
new file mode 100644
index 0000000..9e80c86
--- /dev/null
+++ b/inst/doc/modification.html
@@ -0,0 +1,285 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="Jim Hester" />
+
+<meta name="date" content="2017-01-06" />
+
+<title>Node Modification</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+ margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Node Modification</h1>
+<h4 class="author"><em>Jim Hester</em></h4>
+<h4 class="date"><em>2017-01-06</em></h4>
+
+
+
+<div id="modifying-existing-xml" class="section level1">
+<h1>Modifying Existing XML</h1>
+<p>Modifying existing XML can be done in xml2 by using the replacement functions of the accessors. They all have methods for both individual <code>xml_node</code> objects as well as <code>xml_nodeset</code> objects. If a vector of values is provided it is applied piecewise over the nodeset, otherwise the value is recycled.</p>
+<div id="text-modification" class="section level2">
+<h2>Text Modification</h2>
+<p>Text modification only happens on text nodes. If a given node has more than one text node only the first will be affected. If you want to modify additional text nodes you need to select them explicitly with <code>/text()</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<p>This is some <b>text</b>. This is more.</p>"</span>)
+<span class="kw">xml_text</span>(x)
+<span class="co">#> [1] "This is some text. This is more."</span>
+
+<span class="kw">xml_text</span>(x) <-<span class="st"> "This is some other text."</span>
+<span class="kw">xml_text</span>(x)
+<span class="co">#> [1] "This is some other text.text. This is more."</span>
+
+<span class="co"># You can avoid this by explicitly selecting the text node.</span>
+x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<p>This is some text. This is <b>bold!</b></p>"</span>)
+text_only <-<span class="st"> </span><span class="kw">xml_find_all</span>(x, <span class="st">"//text()"</span>)
+
+<span class="kw">xml_text</span>(text_only) <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"This is some other text. "</span>, <span class="st">"Still bold!"</span>)
+<span class="kw">xml_text</span>(x)
+<span class="co">#> [1] "This is some other text. Still bold!"</span>
+<span class="kw">xml_structure</span>(x)
+<span class="co">#> <p></span>
+<span class="co">#> {text}</span>
+<span class="co">#> <b></span>
+<span class="co">#> {text}</span></code></pre></div>
+</div>
+<div id="attribute-and-namespace-definition-modification" class="section level2">
+<h2>Attribute and Namespace Definition Modification</h2>
+<p>Attributes and namespace definitions are modified one at a time with <code>xml_attr()</code> or all at once with <code>xml_attrs()</code>. In both cases using <code>NULL</code> as the value will remove the attribute completely.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<a href='invalid!'>xml2</a>"</span>)
+<span class="kw">xml_attr</span>(x, <span class="st">"href"</span>)
+<span class="co">#> [1] "invalid!"</span>
+
+<span class="kw">xml_attr</span>(x, <span class="st">"href"</span>) <-<span class="st"> "https://github.com/hadley/xml2"</span>
+<span class="kw">xml_attr</span>(x, <span class="st">"href"</span>)
+<span class="co">#> [1] "https://github.com/hadley/xml2"</span>
+
+<span class="kw">xml_attrs</span>(x) <-<span class="st"> </span><span class="kw">c</span>(<span class="dt">id =</span> <span class="st">"xml2"</span>, <span class="dt">href =</span> <span class="st">"https://github.com/hadley/xml2"</span>)
+<span class="kw">xml_attrs</span>(x)
+<span class="co">#> href id </span>
+<span class="co">#> "https://github.com/hadley/xml2" "xml2"</span>
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <a href="https://github.com/hadley/xml2" id="xml2"></span>
+
+<span class="kw">xml_attrs</span>(x) <-<span class="st"> </span><span class="ot">NULL</span>
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <a></span>
+
+<span class="co"># Namespaces are added with as a xmlns or xmlns:prefix attribute</span>
+<span class="kw">xml_attr</span>(x, <span class="st">"xmlns"</span>) <-<span class="st"> "http://foo"</span>
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <a xmlns="http://foo"></span>
+
+<span class="kw">xml_attr</span>(x, <span class="st">"xmlns:bar"</span>) <-<span class="st"> "http://bar"</span>
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <a xmlns="http://foo" xmlns:bar="http://bar"></span></code></pre></div>
+</div>
+<div id="name-modification" class="section level2">
+<h2>Name Modification</h2>
+<p>Node names are modified with <code>xml_name()</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<a><b/></a>"</span>)
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <a></span>
+<span class="co">#> [1] <b/></span>
+<span class="kw">xml_name</span>(x)
+<span class="co">#> [1] "a"</span>
+<span class="kw">xml_name</span>(x) <-<span class="st"> "c"</span>
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <c></span>
+<span class="co">#> [1] <b/></span></code></pre></div>
+</div>
+</div>
+<div id="node-modification" class="section level1">
+<h1>Node modification</h1>
+<p>All of these functions have a <code>.copy</code> argument. If this is set to <code>FALSE</code> they will remove the new node from its location before inserting it into the new location. Otherwise they make a copy of the node before insertion.</p>
+<div id="replacing-existing-nodes" class="section level2">
+<h2>Replacing existing nodes</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<parent><child>1</child><child>2<child>3</child></child></parent>"</span>)
+children <-<span class="st"> </span><span class="kw">xml_children</span>(x)
+t1 <-<span class="st"> </span>children[[<span class="dv">1</span>]]
+t2 <-<span class="st"> </span>children[[<span class="dv">2</span>]]
+t3 <-<span class="st"> </span><span class="kw">xml_children</span>(children[[<span class="dv">2</span>]])[[<span class="dv">1</span>]]
+
+<span class="kw">xml_replace</span>(t1, t3)
+<span class="co">#> {xml_node}</span>
+<span class="co">#> <child></span>
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <parent></span>
+<span class="co">#> [1] <child>3</child></span>
+<span class="co">#> [2] <child>2<child>3</child></child></span></code></pre></div>
+</div>
+<div id="add-a-sibling" class="section level2">
+<h2>Add a sibling</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<parent><child>1</child><child>2<child>3</child></child></parent>"</span>)
+children <-<span class="st"> </span><span class="kw">xml_children</span>(x)
+t1 <-<span class="st"> </span>children[[<span class="dv">1</span>]]
+t2 <-<span class="st"> </span>children[[<span class="dv">2</span>]]
+t3 <-<span class="st"> </span><span class="kw">xml_children</span>(children[[<span class="dv">2</span>]])[[<span class="dv">1</span>]]
+
+<span class="kw">xml_add_sibling</span>(t1, t3)
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <parent></span>
+<span class="co">#> [1] <child>1</child></span>
+<span class="co">#> [2] <child>3</child></span>
+<span class="co">#> [3] <child>2<child>3</child></child></span>
+
+<span class="kw">xml_add_sibling</span>(t3, t1, <span class="dt">where =</span> <span class="st">"before"</span>)
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <parent></span>
+<span class="co">#> [1] <child>1</child></span>
+<span class="co">#> [2] <child>3</child></span>
+<span class="co">#> [3] <child>2<child>3</child><child>1</child></child></span></code></pre></div>
+</div>
+<div id="add-a-child" class="section level2">
+<h2>Add a child</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<parent><child>1</child><child>2<child>3</child></child></parent>"</span>)
+children <-<span class="st"> </span><span class="kw">xml_children</span>(x)
+t1 <-<span class="st"> </span>children[[<span class="dv">1</span>]]
+t2 <-<span class="st"> </span>children[[<span class="dv">2</span>]]
+t3 <-<span class="st"> </span><span class="kw">xml_children</span>(children[[<span class="dv">2</span>]])[[<span class="dv">1</span>]]
+
+<span class="kw">xml_add_child</span>(t1, t3)
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <parent></span>
+<span class="co">#> [1] <child>1<child>3</child></child></span>
+<span class="co">#> [2] <child>2<child>3</child></child></span>
+
+<span class="kw">xml_add_child</span>(t1, <span class="kw">read_xml</span>(<span class="st">"<test/>"</span>))
+x
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <parent></span>
+<span class="co">#> [1] <child>1<child>3</child><test/></child></span>
+<span class="co">#> [2] <child>2<child>3</child></child></span></code></pre></div>
+</div>
+<div id="removing-nodes" class="section level2">
+<h2>Removing nodes</h2>
+<p>The <code>xml_remove()</code> can be used to remove a node (and it’s children) from a tree. The default behavior is to unlink the node from the tree, but does <em>not</em> free the memory for the node, so R objects pointing to the node are still valid.</p>
+<p>This allows code like the following to work without crashing R</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<foo><bar><baz/><bar></foo>"</span>)
+x1 <-<span class="st"> </span>x %>%<span class="st"> </span><span class="kw">xml_children</span>() %>%<span class="st"> </span>.[[<span class="dv">1</span>]]
+x2 <-<span class="st"> </span>x1 %>%<span class="st"> </span><span class="kw">xml_children</span>() %>%<span class="st"> </span>.[[<span class="dv">1</span>]]
+
+<span class="kw">xml_remove</span>(x1)
+<span class="kw">rm</span>(x1)
+<span class="kw">gc</span>()
+
+x2</code></pre></div>
+<p>If you are not planning on referencing these nodes again this memory is wasted. Calling <code>xml_remove(free = TRUE)</code> will remove the nodes <em>and</em> free the memory used to store them. <strong>Note</strong> In this case <em>any</em> node which previously pointed to the node or it’s children will instead be pointing to free memory and may cause R to crash. xml2 can’t figure this out for you, so it’s your responsibility to remove any objects which are no longer valid.</p>
+<p>In particular <code>xml_find_*()</code> results are easy to overlook, for example</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">x <-<span class="st"> </span><span class="kw">read_xml</span>(<span class="st">"<a><b /><b><b /></b></a>"</span>)
+bees <-<span class="st"> </span><span class="kw">xml_find_all</span>(x, <span class="st">"//b"</span>)
+<span class="kw">xml_remove</span>(<span class="kw">xml_child</span>(x), <span class="dt">free =</span> <span class="ot">TRUE</span>)
+<span class="co"># bees[[1]] is no longer valid!!!</span>
+<span class="kw">rm</span>(bees)</code></pre></div>
+</div>
+<div id="namespaces" class="section level2">
+<h2>Namespaces</h2>
+<p>We want to construct a document with the following namespace layout. (From <a href="http://stackoverflow.com/questions/32939229/creating-xml-in-r-with-namespaces/32941524#32941524" class="uri">http://stackoverflow.com/questions/32939229/creating-xml-in-r-with-namespaces/32941524#32941524</a>).</p>
+<div class="sourceCode"><pre class="sourceCode xml"><code class="sourceCode xml"><span class="kw"><?xml</span> version = "1.0" encoding="UTF-8"<span class="kw">?></span>
+<span class="kw"><sld</span><span class="ot"> xmlns=</span><span class="st">"http://www.o.net/sld"</span>
+<span class="ot"> xmlns:ogc=</span><span class="st">"http://www.o.net/ogc"</span>
+<span class="ot"> xmlns:se=</span><span class="st">"http://www.o.net/se"</span>
+<span class="ot"> version=</span><span class="st">"1.1.0"</span> <span class="kw">></span>
+<span class="kw"><layer></span>
+<span class="kw"><se:Name></span>My Layer<span class="kw"></se:Name></span>
+<span class="kw"></layer></span>
+<span class="kw"></sld></span></code></pre></div>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(magrittr)
+d <-<span class="st"> </span><span class="kw">xml_new_root</span>(<span class="st">"sld"</span>,
+ <span class="dt">xmlns =</span> <span class="st">"http://www.o.net/sld"</span>,
+ <span class="st">"xmlns:ogc"</span> =<span class="st"> "http://www.o.net/ogc"</span>,
+ <span class="st">"xmlns:se"</span> =<span class="st"> "http://www.o.net/se"</span>,
+ <span class="dt">version =</span> <span class="st">"1.1.0"</span>) %>%
+<span class="st"> </span><span class="kw">xml_add_child</span>(<span class="st">"layer"</span>) %>%
+<span class="st"> </span><span class="kw">xml_add_child</span>(<span class="st">"se:Name"</span>, <span class="st">"My Layer"</span>) %>%
+<span class="st"> </span><span class="kw">xml_root</span>()
+
+d
+<span class="co">#> {xml_document}</span>
+<span class="co">#> <sld version="1.1.0" xmlns="http://www.o.net/sld" xmlns:ogc="http://www.o.net/ogc" xmlns:se="http://www.o.net/se"></span>
+<span class="co">#> [1] <layer>\n <se:Name>My Layer</se:Name>\n</layer></span></code></pre></div>
+</div>
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+ (function () {
+ var script = document.createElement("script");
+ script.type = "text/javascript";
+ script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+ document.getElementsByTagName("head")[0].appendChild(script);
+ })();
+</script>
+
+</body>
+</html>
diff --git a/inst/extdata/order-doc.xml b/inst/extdata/order-doc.xml
new file mode 100644
index 0000000..b97c0f9
--- /dev/null
+++ b/inst/extdata/order-doc.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<purchaseOrder xmlns="http://tempuri.org/po.xsd" orderDate="1999-10-20">
+ <shipTo country="US">
+ <name>Alice Smith</name>
+ <street>123 Maple Street</street>
+ <city>Mill Valley</city>
+ <state>CA</state>
+ <zip>90952</zip>
+ </shipTo>
+ <billTo country="US">
+ <name>Robert Smith</name>
+ <street>8 Oak Avenue</street>
+ <city>Old Town</city>
+ <state>PA</state>
+ <zip>95819</zip>
+ </billTo>
+ <comment>Hurry, my lawn is going wild!</comment>
+ <items>
+ <item partNum="872-AA">
+ <productName>Lawnmower</productName>
+ <quantity>1</quantity>
+ <USPrice>148.95</USPrice>
+ <comment>Confirm this is electric</comment>
+ </item>
+ <item partNum="926-AA">
+ <productName>Baby Monitor</productName>
+ <quantity>1</quantity>
+ <USPrice>39.98</USPrice>
+ <shipDate>1999-05-21</shipDate>
+ </item>
+ </items>
+</purchaseOrder>
diff --git a/inst/extdata/order-schema.xml b/inst/extdata/order-schema.xml
new file mode 100644
index 0000000..685d31c
--- /dev/null
+++ b/inst/extdata/order-schema.xml
@@ -0,0 +1,76 @@
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="http://tempuri.org/po.xsd"
+xmlns="http://tempuri.org/po.xsd" elementFormDefault="qualified">
+ <xs:annotation>
+ <xs:documentation xml:lang="en">
+ Purchase order schema for Example.com.
+ Copyright 2000 Example.com. All rights reserved.
+ </xs:documentation>
+ </xs:annotation>
+
+ <xs:element name="purchaseOrder" type="PurchaseOrderType"/>
+
+ <xs:element name="comment" type="xs:string"/>
+
+ <xs:complexType name="PurchaseOrderType">
+ <xs:sequence>
+ <xs:element name="shipTo" type="USAddress"/>
+ <xs:element name="billTo" type="USAddress"/>
+ <xs:element ref="comment" minOccurs="0"/>
+ <xs:element name="items" type="Items"/>
+ </xs:sequence>
+ <xs:attribute name="orderDate" type="xs:date"/>
+ </xs:complexType>
+
+ <xs:complexType name="USAddress">
+ <xs:annotation>
+ <xs:documentation>
+ Purchase order schema for Example.Microsoft.com.
+ Copyright 2001 Example.Microsoft.com. All rights reserved.
+ </xs:documentation>
+ <xs:appinfo>
+ Application info.
+ </xs:appinfo>
+ </xs:annotation>
+
+ <xs:sequence>
+ <xs:element name="name" type="xs:string"/>
+ <xs:element name="street" type="xs:string"/>
+ <xs:element name="city" type="xs:string"/>
+ <xs:element name="state" type="xs:string"/>
+ <xs:element name="zip" type="xs:decimal"/>
+ </xs:sequence>
+ <xs:attribute name="country" type="xs:NMTOKEN"
+ fixed="US"/>
+ </xs:complexType>
+
+ <xs:complexType name="Items">
+ <xs:sequence>
+ <xs:element name="item" minOccurs="0" maxOccurs="unbounded">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="productName" type="xs:string"/>
+ <xs:element name="quantity">
+ <xs:simpleType>
+ <xs:restriction base="xs:positiveInteger">
+ <xs:maxExclusive value="100"/>
+ </xs:restriction>
+ </xs:simpleType>
+ </xs:element>
+ <xs:element name="USPrice" type="xs:decimal"/>
+ <xs:element ref="comment" minOccurs="0"/>
+ <xs:element name="shipDate" type="xs:date" minOccurs="0"/>
+ </xs:sequence>
+ <xs:attribute name="partNum" type="SKU" use="required"/>
+ </xs:complexType>
+ </xs:element>
+ </xs:sequence>
+ </xs:complexType>
+
+ <!-- Stock Keeping Unit, a code for identifying products -->
+ <xs:simpleType name="SKU">
+ <xs:restriction base="xs:string">
+ <xs:pattern value="\d{3}-[A-Z]{2}"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+</xs:schema>
diff --git a/inst/extdata/r-project.html b/inst/extdata/r-project.html
new file mode 100644
index 0000000..eefffb2
--- /dev/null
+++ b/inst/extdata/r-project.html
@@ -0,0 +1,102 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+ <title>R: The R Project for Statistical Computing</title>
+
+ <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32" />
+ <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16" />
+
+ <!-- Bootstrap -->
+ <link href="/css/bootstrap.min.css" rel="stylesheet">
+ <link href="/css/R.css" rel="stylesheet">
+
+ <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+ <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+ <!--[if lt IE 9]>
+ <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+ <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+ <![endif]-->
+ </head>
+ <body>
+ <div class="container page">
+ <div class="row">
+ <div class="col-xs-12 col-sm-offset-1 col-sm-2 sidebar" role="navigation">
+<div class="row">
+<div class="col-xs-4 col-sm-12">
+<p><a href="/"><img src="/Rlogo.jpg" alt="R" /></a></p>
+<p><small><a href="/">[Home]</a></small></p>
+<h2>Download</h2>
+<p><a href="http://cran.r-project.org/mirrors.html">CRAN</a></p>
+<h2>R Project</h2>
+<ul>
+<li><a href="/about.html">About R</a></li>
+<li><a href="/contributors.html">Contributors</a></li>
+<li><a href="/news.html">What’s New?</a></li>
+<li><a href="/mail.html">Mailing Lists</a></li>
+<li><a href="http://bugs.R-project.org">Bug Tracking</a></li>
+<li><a href="/conferences.html">Conferences</a></li>
+<li><a href="/search.html">Search</a></li>
+</ul>
+</div>
+<div class="col-xs-4 col-sm-12">
+<h2>R Foundation</h2>
+<ul>
+<li><a href="/foundation/">Foundation</a></li>
+<li><a href="/foundation/board.html">Board</a></li>
+<li><a href="/foundation/members.html">Members</a></li>
+<li><a href="/foundation/donors.html">Donors</a></li>
+<li><a href="/foundation/donations.html">Donate</a></li>
+</ul>
+</div>
+<div class="col-xs-4 col-sm-12">
+<h2>Documentation</h2>
+<ul>
+<li><a href="http://cran.r-project.org/manuals.html">Manuals</a></li>
+<li><a href="http://cran.r-project.org/faqs.html">FAQs</a></li>
+<li><a href="http://journal.r-project.org">The R Journal</a></li>
+<li><a href="/doc/bib/R-books.html">Books</a></li>
+<li><a href="/certification.html">Certification</a></li>
+<li><a href="/other-docs.html">Other</a></li>
+</ul>
+</div>
+<div class="col-xs-4 col-sm-12">
+<h2>Links</h2>
+<ul>
+<li><a href="http://www.bioconductor.org">Bioconductor</a></li>
+<li><a href="/other-projects.html">Related Projects</a></li>
+</ul>
+</div>
+</div>
+ </div>
+ <div class="col-xs-12 col-sm-7">
+ <h1>The R Project for Statistical Computing</h1>
+<h2 id="getting-started">Getting Started</h2>
+<p>R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To <strong><a href="http://cran.r-project.org/mirrors.html">download R</a></strong>, please choose your preferred <a href="http://cran.r-project.org/mirrors.html">CRAN mirror</a>.</p>
+<p>If you have questions about R like how to download and install the software, or what the license terms are, please read our <a href="http://cran.R-project.org/faqs.html">answers to frequently asked questions</a> before you send an email.</p>
+<h2 id="news">News</h2>
+<ul>
+<li><p><a href="http://cran.r-project.org/src/base-prerelease"><strong>R 3.2.0 (Full of Ingredients) prerelease versions</strong></a> will appear starting March 19. Final release is scheduled for 2015-04-16.</p></li>
+<li><p><strong>R version 3.1.3</strong> (Smooth Sidewalk) has been released on 2015-03-09.</p></li>
+<li><p><a href="http://journal.r-project.org"><strong>The R Journal Volume 6/2</strong></a> is available.</p></li>
+<li><p><strong>R version 3.1.2</strong> (Pumpkin Helmet) has been released on 2014-10-31.</p></li>
+<li><p><strong><a href="http://www.r-project.org/useR-2015">useR! 2015</a></strong>, will take place at the University of Aalborg, Denmark, June 30 - July 3, 2015.</p></li>
+<li><p><strong><a href="http://www.r-project.org/useR-2014">useR! 2014</a></strong>, took place at the University of California, Los Angeles, USA June 30 - July 3, 2014.</p></li>
+</ul>
+<!--- (Boilerplate for release run-in)
+- [**R 3.1.3 (Smooth Sidewalk) prerelease versions**](http://cran.r-project.org/src/base-prerelease) will appear starting February 28. Final release is scheduled for 2015-03-09.
+-->
+ </div>
+ </div>
+ <div class="raw footer">
+ © The R Foundation.
+ </div>
+ </div>
+ <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
+ <!-- Include all compiled plugins (below), or include individual files as needed -->
+ <script src="js/bootstrap.min.js"></script>
+ </body>
+</html>
diff --git a/inst/include/xml2_types.h b/inst/include/xml2_types.h
new file mode 100644
index 0000000..442e928
--- /dev/null
+++ b/inst/include/xml2_types.h
@@ -0,0 +1,18 @@
+#ifndef __XML2_XML2_TYPES__
+#define __XML2_XML2_TYPES__
+
+#include <libxml/tree.h>
+#include <Rcpp.h>
+
+inline void finaliseNode(xmlNodePtr node) {
+ // do nothing
+}
+
+inline void finaliseNs(xmlNsPtr ns) {
+ // do nothing
+}
+
+typedef Rcpp::XPtr<xmlDoc,Rcpp::PreserveStorage,xmlFreeDoc> XPtrDoc;
+typedef Rcpp::XPtr<xmlNode,Rcpp::PreserveStorage,finaliseNode> XPtrNode;
+typedef Rcpp::XPtr<xmlNs,Rcpp::PreserveStorage,finaliseNs> XPtrNs;
+#endif
diff --git a/man/as_list.Rd b/man/as_list.Rd
new file mode 100644
index 0000000..51538de
--- /dev/null
+++ b/man/as_list.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as_list.R
+\name{as_list}
+\alias{as_list}
+\title{Coerce xml nodes to a list.}
+\usage{
+as_list(x, ns = character(), ...)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
+by \code{\link{xml_ns}}. If provided, all names will be explicitly
+qualified with the ns prefix, i.e. if the element \code{bar} is defined
+in namespace \code{foo}, it will be called \code{foo:bar}. (And
+similarly for atttributes). Default namespaces must be given an explicit
+name. The ns is ignored when using \code{\link{xml_name<-}} and
+\code{\link{xml_set_name}}.}
+
+\item{...}{Needed for compatibility with generic. Unused.}
+}
+\description{
+This turns an XML document (or node or nodeset) into the equivalent R
+list. Note that this is \code{as_list()}, not \code{as.list()}:
+\code{lapply()} automatically calls \code{as.list()} on its inputs, so
+we can't override the default.
+}
+\details{
+\code{as_list} currently only handles the four most common types of
+children that an element might have:
+
+\itemize{
+ \item Other elements, converted to lists.
+ \item Attributes, stored as R attributes. Attributes that have special meanings in R
+ (\code{\link{class}}, \code{\link{comment}}, \code{\link{dim}},
+ \code{\link{dimnames}}, \code{\link{names}}, \code{\link{row.names}} and
+ \code{\link{tsp}}) are escaped with '.'
+ \item Text, stored as a character vector.
+}
+}
+\examples{
+as_list(read_xml("<foo> a <b /><c><![CDATA[<d></d>]]></c></foo>"))
+as_list(read_xml("<foo> <bar><baz /></bar> </foo>"))
+as_list(read_xml("<foo id = 'a'></foo>"))
+as_list(read_xml("<foo><bar id='a'/><bar id='b'/></foo>"))
+}
diff --git a/man/as_xml_document.Rd b/man/as_xml_document.Rd
new file mode 100644
index 0000000..4f7d351
--- /dev/null
+++ b/man/as_xml_document.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as_xml_document.R
+\name{as_xml_document}
+\alias{as_xml_document}
+\title{Coerce a R list to xml nodes.}
+\usage{
+as_xml_document(x, ...)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{...}{Needed for compatibility with generic. Unused.}
+}
+\description{
+This turns an R list into the equivalent XML document. Not all R lists will
+produce valid XML, in particular there can only be one root node and all
+child nodes need to be named (or empty) lists. R attributes become XML
+attributes and R names become XML node names.
+}
+\examples{
+as_xml_document(list(x = list()))
+
+# Nesting multiple nodes
+as_xml_document(list(foo = list(bar = list(baz = list()))))
+
+# attributes are stored as R attributes
+as_xml_document(list(foo = structure(list(), id = "a")))
+as_xml_document(list(foo = list(
+ bar = structure(list(), id = "a"),
+ bar = structure(list(), id = "b"))))
+}
diff --git a/man/read_xml.Rd b/man/read_xml.Rd
new file mode 100644
index 0000000..340d5fa
--- /dev/null
+++ b/man/read_xml.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_parse.R
+\name{read_xml}
+\alias{read_xml}
+\alias{read_html}
+\alias{read_xml.character}
+\alias{read_xml.raw}
+\alias{read_xml.connection}
+\title{Read HTML or XML.}
+\usage{
+read_xml(x, encoding = "", ..., as_html = FALSE, options = "NOBLANKS")
+
+read_html(x, encoding = "", ..., options = c("RECOVER", "NOERROR",
+ "NOBLANKS"))
+
+\method{read_xml}{character}(x, encoding = "", ..., as_html = FALSE,
+ options = "NOBLANKS")
+
+\method{read_xml}{raw}(x, encoding = "", base_url = "", ...,
+ as_html = FALSE, options = "NOBLANKS")
+
+\method{read_xml}{connection}(x, encoding = "", n = 64 * 1024,
+ verbose = FALSE, ..., base_url = "", as_html = FALSE,
+ options = "NOBLANKS")
+}
+\arguments{
+\item{x}{A string, a connection, or a raw vector.
+
+ A string can be either a path, a url or literal xml. Urls will
+ be converted into connections either using \code{base::url} or, if
+ installed, \code{curl::curl}. Local paths ending in \code{.gz},
+ \code{.bz2}, \code{.xz}, \code{.zip} will be automatically uncompressed.
+
+ If a connection, the complete connection is read into a raw vector before
+ being parsed.}
+
+\item{encoding}{Specify a default encoding for the document. Unless
+otherwise specified XML documents are assumed to be in UTF-8 or
+UTF-16. If the document is not UTF-8/16, and lacks an explicit
+encoding directive, this allows you to supply a default.}
+
+\item{...}{Additional arguments passed on to methods.}
+
+\item{as_html}{Optionally parse an xml file as if it's html.}
+
+\item{options}{Set parsing options for the libxml2 parser. Zero of more of
+\Sexpr[results=rd]{xml2:::describe_options(xml2:::xml_parse_options())}}
+
+\item{base_url}{When loading from a connection, raw vector or literal
+html/xml, this allows you to specify a base url for the document. Base
+urls are used to turn relative urls into absolute urls.}
+
+\item{n}{If \code{file} is a connection, the number of bytes to read per
+iteration. Defaults to 64kb.}
+
+\item{verbose}{When reading from a slow connection, this prints some
+output on every iteration so you know its working.}
+}
+\value{
+An XML document. HTML is normalised to valid XML - this may not
+ be exactly the same transformation performed by the browser, but it's
+ a reasonable approximation.
+}
+\description{
+Read HTML or XML.
+}
+\examples{
+# Literal xml/html is useful for small examples
+read_xml("<foo><bar /></foo>")
+read_html("<html><title>Hi<title></html>")
+read_html("<html><title>Hi")
+
+# From a local path
+read_html(system.file("extdata", "r-project.html", package = "xml2"))
+
+# From a url
+cd <- read_xml("http://www.xmlfiles.com/examples/cd_catalog.xml")
+me <- read_html("http://had.co.nz")
+}
diff --git a/man/url_absolute.Rd b/man/url_absolute.Rd
new file mode 100644
index 0000000..92cbdd7
--- /dev/null
+++ b/man/url_absolute.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{url_absolute}
+\alias{url_absolute}
+\alias{url_relative}
+\title{Convert between relative and absolute urls.}
+\usage{
+url_absolute(x, base)
+
+url_relative(x, base)
+}
+\arguments{
+\item{x}{A character vector of urls relative to that base}
+
+\item{base}{A string giving a base url.}
+}
+\value{
+A character vector of urls
+}
+\description{
+Convert between relative and absolute urls.
+}
+\examples{
+url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d")
+
+url_relative("http://hadley.nz/a/c", "http://hadley.nz")
+url_relative("http://hadley.nz/a/c", "http://hadley.nz/")
+url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b")
+url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/")
+}
+\seealso{
+\code{\link{xml_url}} to retrieve the URL associated with a document
+}
diff --git a/man/url_escape.Rd b/man/url_escape.Rd
new file mode 100644
index 0000000..5119a08
--- /dev/null
+++ b/man/url_escape.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{url_escape}
+\alias{url_escape}
+\alias{url_unescape}
+\title{Escape and unescape urls.}
+\usage{
+url_escape(x, reserved = "")
+
+url_unescape(x)
+}
+\arguments{
+\item{x}{A character vector of urls.}
+
+\item{reserved}{A string containing additional characters to avoid escaping.}
+}
+\description{
+Escape and unescape urls.
+}
+\examples{
+url_escape("a b c")
+url_escape("a b c", "")
+
+url_unescape("a\%20b\%2fc")
+url_unescape("\%C2\%B5")
+}
diff --git a/man/url_parse.Rd b/man/url_parse.Rd
new file mode 100644
index 0000000..dbd3e06
--- /dev/null
+++ b/man/url_parse.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RcppExports.R
+\name{url_parse}
+\alias{url_parse}
+\title{Parse a url into its component pieces.}
+\usage{
+url_parse(x)
+}
+\arguments{
+\item{x}{A character vector of urls.}
+}
+\value{
+A dataframe with one row for each element of \code{x} and
+ columns: scheme, server, port, user, path, query, fragment.
+}
+\description{
+Parse a url into its component pieces.
+}
+\examples{
+url_parse("http://had.co.nz/")
+url_parse("http://had.co.nz:1234/")
+url_parse("http://had.co.nz:1234/?a=1&b=2")
+url_parse("http://had.co.nz:1234/?a=1&b=2#def")
+}
diff --git a/man/write_xml.Rd b/man/write_xml.Rd
new file mode 100644
index 0000000..72113f1
--- /dev/null
+++ b/man/write_xml.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_write.R
+\name{write_xml}
+\alias{write_xml}
+\alias{write_xml.xml_document}
+\alias{write_html}
+\alias{write_html.xml_document}
+\title{Write XML or HTML to disk.}
+\usage{
+write_xml(x, file, ...)
+
+\method{write_xml}{xml_document}(x, file, ..., options = "format",
+ encoding = "UTF-8")
+
+write_html(x, file, ...)
+
+\method{write_html}{xml_document}(x, file, ..., options = "format",
+ encoding = "UTF-8")
+}
+\arguments{
+\item{x}{A document or node to write to disk. It's not possible to
+save nodesets containing more than one node.}
+
+\item{file}{Path to file or connection to write to.}
+
+\item{...}{additional arguments passed to methods.}
+
+\item{options}{default: \sQuote{format}. Zero or more of
+\Sexpr[results=rd]{xml2:::describe_options(xml2:::xml_save_options())}}
+
+\item{encoding}{The character encoding to use in the document. The default
+encoding is \sQuote{UTF-8}. Available encodings are specified at
+\url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.}
+}
+\description{
+This writes out both XML and normalised HTML. The default behavior will
+output the same format which was read. If you want to force output pass
+\code{option = "as_xml"} or \code{option = "as_html"} respectively.
+}
+\examples{
+h <- read_html("<p>Hi!</p>")
+
+tmp <- tempfile(fileext = ".xml")
+write_xml(h, tmp, options = "format")
+readLines(tmp)
+
+# write formatted HTML output
+write_html(h, tmp, options = "format")
+readLines(tmp)
+}
diff --git a/man/xml_attr.Rd b/man/xml_attr.Rd
new file mode 100644
index 0000000..0479845
--- /dev/null
+++ b/man/xml_attr.Rd
@@ -0,0 +1,102 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_attr.R
+\name{xml_attr}
+\alias{xml_attr}
+\alias{xml_has_attr}
+\alias{xml_attrs}
+\alias{xml_attr<-}
+\alias{xml_set_attr}
+\alias{xml_attrs<-}
+\alias{xml_set_attrs}
+\title{Retrieve an attribute.}
+\usage{
+xml_attr(x, attr, ns = character(), default = NA_character_)
+
+xml_has_attr(x, attr, ns = character())
+
+xml_attrs(x, ns = character())
+
+xml_attr(x, attr, ns = character()) <- value
+
+xml_set_attr(x, attr, value, ns = character())
+
+xml_attrs(x, ns = character()) <- value
+
+xml_set_attrs(x, value, ns = character())
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{attr}{Name of attribute to extract.}
+
+\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
+by \code{\link{xml_ns}}. If provided, all names will be explicitly
+qualified with the ns prefix, i.e. if the element \code{bar} is defined
+in namespace \code{foo}, it will be called \code{foo:bar}. (And
+similarly for atttributes). Default namespaces must be given an explicit
+name. The ns is ignored when using \code{\link{xml_name<-}} and
+\code{\link{xml_set_name}}.}
+
+\item{default}{Default value to use when attribute is not present.}
+
+\item{value}{character vector of new value.}
+}
+\value{
+\code{xml_attr()} returns a character vector. \code{NA} is used
+ to represent of attributes that aren't defined.
+
+ \code{xml_has_attr()} returns a logical vector.
+
+ \code{xml_attrs()} returns a named character vector if \code{x} x is single
+ node, or a list of character vectors if given a nodeset
+}
+\description{
+\code{xml_attrs()} retrieves all attributes values as a named character
+vector, \code{xml_attrs() <-} or \code{xml_set_attrs()} sets all attribute
+values. \code{xml_attr()} retrieves the value of single attribute and
+\code{xml_attr() <-} or \code{xml_set_attr()} modifies its value. If the
+attribute doesn't exist, it will return \code{default}, which defaults to
+\code{NA}. \code{xml_has_attr()} tests if an attribute is present.
+}
+\examples{
+x <- read_xml("<root id='1'><child id ='a' /><child id='b' d='b'/></root>")
+xml_attr(x, "id")
+xml_attr(x, "apple")
+xml_attrs(x)
+
+kids <- xml_children(x)
+kids
+xml_attr(kids, "id")
+xml_has_attr(kids, "id")
+xml_attrs(kids)
+
+# Missing attributes give missing values
+xml_attr(xml_children(x), "d")
+xml_has_attr(xml_children(x), "d")
+
+# If the document has a namespace, use the ns argument and
+# qualified attribute names
+x <- read_xml('
+ <root xmlns:b="http://bar.com" xmlns:f="http://foo.com">
+ <doc b:id="b" f:id="f" id="" />
+ </root>
+')
+doc <- xml_children(x)[[1]]
+ns <- xml_ns(x)
+
+xml_attrs(doc)
+xml_attrs(doc, ns)
+
+# If you don't supply a ns spec, you get the first matching attribute
+xml_attr(doc, "id")
+xml_attr(doc, "b:id", ns)
+xml_attr(doc, "id", ns)
+
+# Can set a single attribute with `xml_attr() <-` or `xml_set_attr()`
+xml_attr(doc, "id") <- "one"
+xml_set_attr(doc, "id", "two")
+
+# Or set multiple attributes with `xml_attrs()` or `xml_set_attrs()`
+xml_attrs(doc) <- c("b:id" = "one", "f:id" = "two", "id" = "three")
+xml_set_attrs(doc, c("b:id" = "one", "f:id" = "two", "id" = "three"))
+}
diff --git a/man/xml_cdata.Rd b/man/xml_cdata.Rd
new file mode 100644
index 0000000..c5dcd35
--- /dev/null
+++ b/man/xml_cdata.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/classes.R
+\name{xml_cdata}
+\alias{xml_cdata}
+\title{Construct a cdata node}
+\usage{
+xml_cdata(content)
+}
+\arguments{
+\item{content}{The CDATA content, does not include \code{<![CDATA[}}
+}
+\description{
+Construct a cdata node
+}
+\examples{
+x <- xml_new_root("root")
+xml_add_child(x, xml_cdata("<d/>"))
+as.character(x)
+}
diff --git a/man/xml_children.Rd b/man/xml_children.Rd
new file mode 100644
index 0000000..8909b47
--- /dev/null
+++ b/man/xml_children.Rd
@@ -0,0 +1,81 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_children.R
+\name{xml_children}
+\alias{xml_children}
+\alias{xml_child}
+\alias{xml_contents}
+\alias{xml_parents}
+\alias{xml_siblings}
+\alias{xml_parent}
+\alias{xml_length}
+\alias{xml_root}
+\title{Navigate around the family tree.}
+\usage{
+xml_children(x)
+
+xml_child(x, search = 1, ns = xml_ns(x))
+
+xml_contents(x)
+
+xml_parents(x)
+
+xml_siblings(x)
+
+xml_parent(x)
+
+xml_length(x, only_elements = TRUE)
+
+xml_root(x)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{search}{For \code{xml_child}, either the child number to return (by
+position), or the name of the child node to return. If there are multiple
+child nodes with the same name, the first will be returned}
+
+\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
+by \code{\link{xml_ns}}. If provided, all names will be explicitly
+qualified with the ns prefix, i.e. if the element \code{bar} is defined
+in namespace \code{foo}, it will be called \code{foo:bar}. (And
+similarly for atttributes). Default namespaces must be given an explicit
+name. The ns is ignored when using \code{\link{xml_name<-}} and
+\code{\link{xml_set_name}}.}
+
+\item{only_elements}{For \code{xml_length}, should it count all children,
+or just children that are elements (the default)?}
+}
+\value{
+A node or nodeset (possibly empty). Results are always de-duplicated.
+}
+\description{
+\code{xml_children} returns only elements, \code{xml_contents} returns
+all nodes. \code{xml_length} returns the number of children.
+\code{xml_parent} returns the parent node, \code{xml_parents}
+returns all parents up to the root. \code{xml_siblings} returns all nodes
+at the same level. \code{xml_child} makes it easy to specify a specific
+child to return.
+}
+\examples{
+x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>")
+xml_children(x)
+xml_children(xml_children(x))
+xml_siblings(xml_children(x)[[1]])
+
+# Note the each unique node only appears once in the output
+xml_parent(xml_children(x))
+
+# Mixed content
+x <- read_xml("<foo> a <b/> c <d>e</d> f</foo>")
+# Childen gets the elements, contents gets all node types
+xml_children(x)
+xml_contents(x)
+
+xml_length(x)
+xml_length(x, only_elements = FALSE)
+
+# xml_child makes it easier to select specific children
+xml_child(x)
+xml_child(x, 2)
+xml_child(x, "baz")
+}
diff --git a/man/xml_comment.Rd b/man/xml_comment.Rd
new file mode 100644
index 0000000..49f26d9
--- /dev/null
+++ b/man/xml_comment.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/classes.R
+\name{xml_comment}
+\alias{xml_comment}
+\title{Construct a comment node}
+\usage{
+xml_comment(content)
+}
+\arguments{
+\item{content}{The comment content}
+}
+\description{
+Construct a comment node
+}
+\examples{
+x <- xml_new_document()
+r <- xml_add_child(x, "root")
+xml_add_child(r, xml_comment("Hello!"))
+as.character(x)
+}
diff --git a/man/xml_dtd.Rd b/man/xml_dtd.Rd
new file mode 100644
index 0000000..9b0d4c0
--- /dev/null
+++ b/man/xml_dtd.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/classes.R
+\name{xml_dtd}
+\alias{xml_dtd}
+\title{Construct a document type definition}
+\usage{
+xml_dtd(name = "", external_id = "", system_id = "")
+}
+\arguments{
+\item{name}{The name of the declaration}
+
+\item{external_id}{The external ID of the declaration}
+
+\item{system_id}{The system ID of the declaration}
+}
+\description{
+This is used to create simple document type definitions. If you need to
+create a more complicated definition with internal subsets it is recommended
+to parse a string directly with \code{read_xml()}.
+}
+\examples{
+r <- xml_new_root(
+ xml_dtd("html",
+ "-//W3C//DTD XHTML 1.0 Transitional//EN",
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
+
+# Use read_xml directly for more complicated DTD
+d <- read_xml(
+'<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY foo " test ">
+]>
+<doc>This is a valid document &foo; !</doc>')
+}
diff --git a/man/xml_find_all.Rd b/man/xml_find_all.Rd
new file mode 100644
index 0000000..06c2e0f
--- /dev/null
+++ b/man/xml_find_all.Rd
@@ -0,0 +1,106 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_find.R
+\name{xml_find_all}
+\alias{xml_find_all}
+\alias{xml_find_first}
+\alias{xml_find_num}
+\alias{xml_find_chr}
+\alias{xml_find_lgl}
+\alias{xml_find_one}
+\title{Find nodes that match an xpath expression.}
+\usage{
+xml_find_all(x, xpath, ns = xml_ns(x))
+
+xml_find_first(x, xpath, ns = xml_ns(x))
+
+xml_find_num(x, xpath, ns = xml_ns(x))
+
+xml_find_chr(x, xpath, ns = xml_ns(x))
+
+xml_find_lgl(x, xpath, ns = xml_ns(x))
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{xpath}{A string containing a xpath (1.0) expression.}
+
+\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
+by \code{\link{xml_ns}}. If provided, all names will be explicitly
+qualified with the ns prefix, i.e. if the element \code{bar} is defined
+in namespace \code{foo}, it will be called \code{foo:bar}. (And
+similarly for atttributes). Default namespaces must be given an explicit
+name. The ns is ignored when using \code{\link{xml_name<-}} and
+\code{\link{xml_set_name}}.}
+}
+\value{
+\code{xml_find_all} always returns a nodeset: if there are no matches
+ the nodeset will be empty. The result will always be unique; repeated
+ nodes are automatically de-duplicated.
+
+ \code{xml_find_first} returns a node if applied to a node, and a nodeset
+ if applied to a nodeset. The output is \emph{always} the same size as
+ the input. If there are no matches, \code{xml_find_first} will return a
+ missing node; if there are multiple matches, it will return the first
+ only.
+
+ \code{xml_find_num}, \code{xml_find_chr}, \code{xml_find_lgl} return
+ numeric, character and logical results respectively.
+}
+\description{
+Xpath is like regular expressions for trees - it's worth learning if
+you're trying to extract nodes from arbitrary locations in a document.
+Use \code{xml_find_all} to find all matches - if there's no match you'll
+get an empty result. Use \code{xml_find_first} to find a specific match -
+if there's no match you'll get an \code{xml_missing} node.
+}
+\section{Deprecated functions}{
+
+\code{xml_find_one()} has been deprecated. Instead use
+\code{xml_find_first()}.
+}
+
+\examples{
+x <- read_xml("<foo><bar><baz/></bar><baz/></foo>")
+xml_find_all(x, ".//baz")
+xml_path(xml_find_all(x, ".//baz"))
+
+# Note the difference between .// and //
+# // finds anywhere in the document (ignoring the current node)
+# .// finds anywhere beneath the current node
+(bar <- xml_find_all(x, ".//bar"))
+xml_find_all(bar, ".//baz")
+xml_find_all(bar, "//baz")
+
+# Find all vs find one -----------------------------------------------------
+x <- read_xml("<body>
+ <p>Some <b>text</b>.</p>
+ <p>Some <b>other</b> <b>text</b>.</p>
+ <p>No bold here!</p>
+</body>")
+para <- xml_find_all(x, ".//p")
+
+# If you apply xml_find_all to a nodeset, it finds all matches,
+# de-duplicates them, and returns as a single list. This means you
+# never know how many results you'll get
+xml_find_all(para, ".//b")
+
+# xml_find_first only returns the first match per input node. If there are 0
+# matches it will return a missing node
+xml_find_first(para, ".//b")
+xml_text(xml_find_first(para, ".//b"))
+
+# Namespaces ---------------------------------------------------------------
+# If the document uses namespaces, you'll need use xml_ns to form
+# a unique mapping between full namespace url and a short prefix
+x <- read_xml('
+ <root xmlns:f = "http://foo.com" xmlns:g = "http://bar.com">
+ <f:doc><g:baz /></f:doc>
+ <f:doc><g:baz /></f:doc>
+ </root>
+')
+xml_find_all(x, ".//f:doc")
+xml_find_all(x, ".//f:doc", xml_ns(x))
+}
+\seealso{
+\code{\link{xml_ns_strip}} to remove the default namespaces
+}
diff --git a/man/xml_missing.Rd b/man/xml_missing.Rd
new file mode 100644
index 0000000..10d5101
--- /dev/null
+++ b/man/xml_missing.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/classes.R
+\name{xml_missing}
+\alias{xml_missing}
+\title{Construct an missing xml object}
+\usage{
+xml_missing()
+}
+\description{
+Construct an missing xml object
+}
+\keyword{internal}
diff --git a/man/xml_name.Rd b/man/xml_name.Rd
new file mode 100644
index 0000000..bc2e5f7
--- /dev/null
+++ b/man/xml_name.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_name.R
+\name{xml_name}
+\alias{xml_name}
+\alias{xml_name<-}
+\alias{xml_set_name}
+\title{The (tag) name of an xml element.}
+\usage{
+xml_name(x, ns = character())
+
+xml_name(x, ns = character()) <- value
+
+xml_set_name(x, value, ns = character())
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{ns}{Optionally, a named vector giving prefix-url pairs, as produced
+by \code{\link{xml_ns}}. If provided, all names will be explicitly
+qualified with the ns prefix, i.e. if the element \code{bar} is defined
+in namespace \code{foo}, it will be called \code{foo:bar}. (And
+similarly for atttributes). Default namespaces must be given an explicit
+name. The ns is ignored when using \code{\link{xml_name<-}} and
+\code{\link{xml_set_name}}.}
+
+\item{value}{a character vector with replacement name.}
+}
+\value{
+A character vector.
+}
+\description{
+The (tag) name of an xml element.
+
+Modify the (tag) name of an element
+}
+\examples{
+x <- read_xml("<bar>123</bar>")
+xml_name(x)
+
+y <- read_xml("<bar><baz>1</baz>abc<foo /></bar>")
+z <- xml_children(y)
+xml_name(xml_children(y))
+}
diff --git a/man/xml_new_document.Rd b/man/xml_new_document.Rd
new file mode 100644
index 0000000..5dbcf88
--- /dev/null
+++ b/man/xml_new_document.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_modify.R
+\name{xml_new_document}
+\alias{xml_new_document}
+\alias{xml_new_root}
+\title{Create a new document, possibly with a root node}
+\usage{
+xml_new_document(version = "1.0", encoding = "UTF-8")
+
+xml_new_root(.value, ..., .copy = inherits(.value, "xml_node"),
+ .version = "1.0", .encoding = "UTF-8")
+}
+\arguments{
+\item{version}{The version number of the document.}
+
+\item{encoding}{The character encoding to use in the document. The default
+encoding is \sQuote{UTF-8}. Available encodings are specified at
+\url{http://xmlsoft.org/html/libxml-encoding.html#xmlCharEncoding}.}
+
+\item{.value}{node or nodeset to insert.}
+
+\item{...}{If named attributes or namespaces to set on the node, if unnamed
+text to assign to the node.}
+
+\item{.copy}{whether to copy the \code{.value} before replacing. If this is \code{FALSE}
+then the node will be moved from it's current location.}
+
+\item{.version}{The version number of the document, passed to \code{xml_new_document(version)}.}
+
+\item{.encoding}{The encoding of the document, passed to \code{xml_new_document(encoding)}.}
+}
+\value{
+A \code{xml_document} object.
+}
+\description{
+\code{xml_new_document} creates only a new document without a root node. In
+most cases you should instead use \code{xml_new_root}, which creates a new
+document and assigns the root node in one step.
+}
diff --git a/man/xml_ns.Rd b/man/xml_ns.Rd
new file mode 100644
index 0000000..29f3db6
--- /dev/null
+++ b/man/xml_ns.Rd
@@ -0,0 +1,53 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_namespaces.R
+\name{xml_ns}
+\alias{xml_ns}
+\alias{xml_ns_rename}
+\title{XML namespaces.}
+\usage{
+xml_ns(x)
+
+xml_ns_rename(old, ...)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{old, ...}{An existing xml_namespace object followed by name-value
+(old prefix-new prefix) pairs to replace.}
+}
+\value{
+A character vector with class \code{xml_namespace} so the
+ default display is a little nicer.
+}
+\description{
+\code{xml_ns} extracts all namespaces from a document, matching each
+unique namespace url with the prefix it was first associated with. Default
+namespaces are named \code{d1}, \code{d2} etc. Use \code{xml_ns_rename}
+to change the prefixes. Once you have a namespace object, you can pass it to
+other functions to work with fully qualified names instead of local names.
+}
+\examples{
+x <- read_xml('
+ <root>
+ <doc1 xmlns = "http://foo.com"><baz /></doc1>
+ <doc2 xmlns = "http://bar.com"><baz /></doc2>
+ </root>
+')
+xml_ns(x)
+
+# When there are default namespaces, it's a good idea to rename
+# them to give informative names:
+ns <- xml_ns_rename(xml_ns(x), d1 = "foo", d2 = "bar")
+ns
+
+# Now we can pass ns to other xml function to use fully qualified names
+baz <- xml_children(xml_children(x))
+xml_name(baz)
+xml_name(baz, ns)
+
+xml_find_all(x, "//baz")
+xml_find_all(x, "//foo:baz", ns)
+
+str(as_list(x))
+str(as_list(x, ns))
+}
diff --git a/man/xml_ns_strip.Rd b/man/xml_ns_strip.Rd
new file mode 100644
index 0000000..f210f54
--- /dev/null
+++ b/man/xml_ns_strip.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_modify.R
+\name{xml_ns_strip}
+\alias{xml_ns_strip}
+\title{Strip the default namespaces from a document}
+\usage{
+xml_ns_strip(x)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+}
+\description{
+Strip the default namespaces from a document
+}
+\examples{
+x <- read_xml(
+ "<foo xmlns = 'http://foo.com'>
+ <baz/>
+ <bar xmlns = 'http://bar.com'>
+ <baz/>
+ </bar>
+ </foo>")
+# Need to specify the default namespaces to find the baz nodes
+xml_find_all(x, "//d1:baz")
+xml_find_all(x, "//d2:baz")
+
+# After stripping the default namespaces you can find both baz nodes directly
+xml_ns_strip(x)
+xml_find_all(x, "//baz")
+}
diff --git a/man/xml_path.Rd b/man/xml_path.Rd
new file mode 100644
index 0000000..6874024
--- /dev/null
+++ b/man/xml_path.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_path.R
+\name{xml_path}
+\alias{xml_path}
+\title{Retrieve the xpath to a node}
+\usage{
+xml_path(x)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+}
+\value{
+A character vector.
+}
+\description{
+This is useful when you want to figure out where nodes matching an
+xpath expression live in a document.
+}
+\examples{
+x <- read_xml("<foo><bar><baz /></bar><baz /></foo>")
+xml_path(xml_find_all(x, ".//baz"))
+}
diff --git a/man/xml_replace.Rd b/man/xml_replace.Rd
new file mode 100644
index 0000000..1fdf418
--- /dev/null
+++ b/man/xml_replace.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_modify.R
+\name{xml_replace}
+\alias{xml_replace}
+\alias{xml_add_sibling}
+\alias{xml_add_child}
+\alias{xml_add_parent}
+\alias{xml_remove}
+\title{Modify a tree by inserting, replacing or removing nodes}
+\usage{
+xml_replace(.x, .value, ..., .copy = TRUE)
+
+xml_add_sibling(.x, .value, ..., .where = c("after", "before"),
+ .copy = TRUE)
+
+xml_add_child(.x, .value, ..., .where = length(xml_children(.x)),
+ .copy = TRUE)
+
+xml_add_parent(.x, .value, ...)
+
+xml_remove(.x, free = FALSE)
+}
+\arguments{
+\item{.x}{a document, node or nodeset.}
+
+\item{.value}{node or nodeset to insert.}
+
+\item{...}{If named attributes or namespaces to set on the node, if unnamed
+text to assign to the node.}
+
+\item{.copy}{whether to copy the \code{.value} before replacing. If this is \code{FALSE}
+then the node will be moved from it's current location.}
+
+\item{.where}{to add thenew node, for \code{xml_add_child} the position
+after which to add, use \code{0} for the first child. For
+\code{xml_add_sibling} either \sQuote{"befeore"} or \sQuote{"after"}
+indicating if the new node should be before or after \code{.x}.}
+
+\item{free}{When removing the node also free the memory used for that node.
+Note if you use this option you cannot use any existing objects pointing to
+the node or its children, it is likely to crash R or return garbage.}
+}
+\description{
+\code{xml_add_sibling()} and \code{xml_add_child()} are used to insert a node
+as a sibling or a child. \code{xml_add_parent()} adds a new parent in
+between the input node and the current parent. \code{xml_replace()}
+replaces an existing node with a new node. \code{xml_remove()} removes a
+node from the tree.
+}
+\details{
+Care needs to be taken when using \code{xml_remove()},
+}
diff --git a/man/xml_serialize.Rd b/man/xml_serialize.Rd
new file mode 100644
index 0000000..61b7c6f
--- /dev/null
+++ b/man/xml_serialize.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_serialize.R
+\name{xml_serialize}
+\alias{xml_serialize}
+\alias{xml_unserialize}
+\title{Serializing XML objects to connections.}
+\usage{
+xml_serialize(object, connection, ...)
+
+xml_unserialize(connection, ...)
+}
+\arguments{
+\item{object}{\R object to serialize.}
+
+\item{connection}{an open \link{connection} or (for \code{serialize})
+ \code{NULL} or (for \code{unserialize}) a raw vector
+ (see \sQuote{Details}).}
+
+\item{...}{Additional arguments passed to \code{\link{read_xml}}.}
+}
+\description{
+Serializing XML objects to connections.
+}
+\examples{
+library(xml2)
+x <- read_xml("<a>
+ <b><c>123</c></b>
+ <b><c>456</c></b>
+</a>")
+
+b <- xml_find_all(x, "//b")
+out <- xml_serialize(b, NULL)
+xml_unserialize(out)
+}
diff --git a/man/xml_set_namespace.Rd b/man/xml_set_namespace.Rd
new file mode 100644
index 0000000..5051b83
--- /dev/null
+++ b/man/xml_set_namespace.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_modify.R
+\name{xml_set_namespace}
+\alias{xml_set_namespace}
+\title{Set the node's namespace}
+\usage{
+xml_set_namespace(.x, prefix = "", uri = "")
+}
+\arguments{
+\item{.x}{a node}
+
+\item{prefix}{The namespace prefix to use}
+
+\item{uri}{The namespace URI to use}
+}
+\value{
+the node (invisibly)
+}
+\description{
+The namespace to be set must be already defined in one of the node's
+ancestors.
+}
diff --git a/man/xml_structure.Rd b/man/xml_structure.Rd
new file mode 100644
index 0000000..4d50c81
--- /dev/null
+++ b/man/xml_structure.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_structure.R
+\name{xml_structure}
+\alias{xml_structure}
+\alias{html_structure}
+\title{Show the structure of an html/xml document.}
+\usage{
+xml_structure(x, indent = 2)
+
+html_structure(x, indent = 2)
+}
+\arguments{
+\item{x}{HTML/XML document (or part there of)}
+
+\item{indent}{Number of spaces to ident}
+}
+\description{
+Show the structure of an html/xml document without displaying any of
+the values. This is useful if you want to get a high level view of the
+way a document is organised. Compared to \code{xml_structure},
+\code{html_structure} prints the id and class attributes.
+}
+\examples{
+xml_structure(read_xml("<a><b><c/><c/></b><d/></a>"))
+
+rproj <- read_html(system.file("extdata","r-project.html", package = "xml2"))
+xml_structure(rproj)
+xml_structure(xml_find_all(rproj, ".//p"))
+
+h <- read_html("<body><p id = 'a'></p><p class = 'c d'></p></body>")
+html_structure(h)
+}
diff --git a/man/xml_text.Rd b/man/xml_text.Rd
new file mode 100644
index 0000000..655d56a
--- /dev/null
+++ b/man/xml_text.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_text.R
+\name{xml_text}
+\alias{xml_text}
+\alias{xml_text<-}
+\alias{xml_set_text}
+\alias{xml_double}
+\alias{xml_integer}
+\title{Extract or modify the text}
+\usage{
+xml_text(x, trim = FALSE)
+
+xml_text(x) <- value
+
+xml_set_text(x, value)
+
+xml_double(x)
+
+xml_integer(x)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{trim}{If \code{TRUE} will trim leading and trailing spaces.}
+
+\item{value}{character vector with replacement text.}
+}
+\value{
+A character vector, the same length as x.
+}
+\description{
+\code{xml_text} returns a character vector, \code{xml_double} returns a
+numeric vector, \code{xml_integer} returns an integer vector.
+}
+\examples{
+x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+xml_text(x)
+xml_text(xml_children(x))
+
+x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
+xml_text(x)
+xml_text(xml_find_all(x, "//x"))
+
+x <- read_xml("<p> Some text </p>")
+xml_text(x, trim = TRUE)
+
+# xml_double() and xml_integer() are useful for extracting numeric
+attributes
+x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>")
+xml_integer(xml_find_all(x, "//@x"))
+}
diff --git a/man/xml_type.Rd b/man/xml_type.Rd
new file mode 100644
index 0000000..d2a2d06
--- /dev/null
+++ b/man/xml_type.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_type.R
+\name{xml_type}
+\alias{xml_type}
+\title{Determine the type of a node.}
+\usage{
+xml_type(x)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+}
+\description{
+Determine the type of a node.
+}
+\examples{
+x <- read_xml("<foo> a <b /> <![CDATA[ blah]]></foo>")
+xml_type(x)
+xml_type(xml_contents(x))
+}
diff --git a/man/xml_url.Rd b/man/xml_url.Rd
new file mode 100644
index 0000000..ec3f4de
--- /dev/null
+++ b/man/xml_url.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_url.R
+\name{xml_url}
+\alias{xml_url}
+\title{The URL of an XML document}
+\usage{
+xml_url(x)
+}
+\arguments{
+\item{x}{A node or document.}
+}
+\value{
+A character vector of length 1. Returns \code{NA} if the name is
+ not set.
+}
+\description{
+This is useful for interpreting relative urls with \code{\link{url_relative}}.
+}
+\examples{
+catalog <- read_xml("http://www.xmlfiles.com/examples/cd_catalog.xml")
+xml_url(catalog)
+
+x <- read_xml("<foo/>")
+xml_url(x)
+}
diff --git a/man/xml_validate.Rd b/man/xml_validate.Rd
new file mode 100644
index 0000000..738de6e
--- /dev/null
+++ b/man/xml_validate.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xml_schema.R
+\name{xml_validate}
+\alias{xml_validate}
+\title{Validate XML schema}
+\usage{
+xml_validate(x, schema)
+}
+\arguments{
+\item{x}{A document, node, or node set.}
+
+\item{schema}{an XML document containing the schema}
+}
+\value{
+TRUE or FALSE
+}
+\description{
+Validate an XML document against an XML 1.0 schema.
+}
+\examples{
+# Example from https://msdn.microsoft.com/en-us/library/ms256129(v=vs.110).aspx
+doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2"))
+schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
+xml_validate(doc, schema)
+}
diff --git a/src/Makevars.in b/src/Makevars.in
new file mode 100644
index 0000000..e4fdb85
--- /dev/null
+++ b/src/Makevars.in
@@ -0,0 +1,2 @@
+PKG_CPPFLAGS=-I../inst/include @cflags@
+PKG_LIBS=@libs@
diff --git a/src/Makevars.win b/src/Makevars.win
new file mode 100644
index 0000000..754dfd6
--- /dev/null
+++ b/src/Makevars.win
@@ -0,0 +1,14 @@
+PKG_CPPFLAGS=-I../inst/include -I../windows/libxml2-2.9.4/include/libxml2 \
+ -DLIBXML_STATIC
+
+PKG_LIBS=-L../windows/libxml2-2.9.4/lib${R_ARCH} -lxml2 -llzma -liconv -lz -lws2_32
+
+all: clean winlibs
+
+clean:
+ rm -f $(OBJECTS) $(SHLIB)
+
+winlibs:
+ "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" --vanilla "../tools/winlibs.R"
+
+.PHONY: all winlibs clean
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
new file mode 100644
index 0000000..a1dcd12
--- /dev/null
+++ b/src/RcppExports.cpp
@@ -0,0 +1,696 @@
+// Generated by using Rcpp::compileAttributes() -> do not edit by hand
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include "../inst/include/xml2_types.h"
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+// read_connection_
+RawVector read_connection_(RObject con, int chunk_size);
+RcppExport SEXP xml2_read_connection_(SEXP conSEXP, SEXP chunk_sizeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< RObject >::type con(conSEXP);
+ Rcpp::traits::input_parameter< int >::type chunk_size(chunk_sizeSEXP);
+ rcpp_result_gen = Rcpp::wrap(read_connection_(con, chunk_size));
+ return rcpp_result_gen;
+END_RCPP
+}
+// xml_parse_options
+Rcpp::IntegerVector xml_parse_options();
+RcppExport SEXP xml2_xml_parse_options() {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ rcpp_result_gen = Rcpp::wrap(xml_parse_options());
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_parse_file
+XPtrDoc doc_parse_file(std::string path, std::string encoding, bool as_html, int options);
+RcppExport SEXP xml2_doc_parse_file(SEXP pathSEXP, SEXP encodingSEXP, SEXP as_htmlSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::string >::type path(pathSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< bool >::type as_html(as_htmlSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_parse_file(path, encoding, as_html, options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_parse_raw
+XPtrDoc doc_parse_raw(RawVector x, std::string encoding, std::string base_url, bool as_html, int options);
+RcppExport SEXP xml2_doc_parse_raw(SEXP xSEXP, SEXP encodingSEXP, SEXP base_urlSEXP, SEXP as_htmlSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< RawVector >::type x(xSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< std::string >::type base_url(base_urlSEXP);
+ Rcpp::traits::input_parameter< bool >::type as_html(as_htmlSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_parse_raw(x, encoding, base_url, as_html, options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_root
+XPtrNode doc_root(XPtrDoc x);
+RcppExport SEXP xml2_doc_root(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_root(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_has_root
+bool doc_has_root(XPtrDoc x);
+RcppExport SEXP xml2_doc_has_root(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_has_root(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_url
+CharacterVector doc_url(XPtrDoc x);
+RcppExport SEXP xml2_doc_url(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_url(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_new
+XPtrDoc doc_new(std::string version, std::string encoding);
+RcppExport SEXP xml2_doc_new(SEXP versionSEXP, SEXP encodingSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::string >::type version(versionSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_new(version, encoding));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_set_root
+XPtrNode doc_set_root(XPtrDoc doc, XPtrNode root);
+RcppExport SEXP xml2_doc_set_root(SEXP docSEXP, SEXP rootSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type root(rootSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_set_root(doc, root));
+ return rcpp_result_gen;
+END_RCPP
+}
+// libxml2_version
+std::string libxml2_version();
+RcppExport SEXP xml2_libxml2_version() {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ rcpp_result_gen = Rcpp::wrap(libxml2_version());
+ return rcpp_result_gen;
+END_RCPP
+}
+// unique_ns
+CharacterVector unique_ns(CharacterVector ns);
+RcppExport SEXP xml2_unique_ns(SEXP nsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< CharacterVector >::type ns(nsSEXP);
+ rcpp_result_gen = Rcpp::wrap(unique_ns(ns));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_namespaces
+CharacterVector doc_namespaces(XPtrDoc doc);
+RcppExport SEXP xml2_doc_namespaces(SEXP docSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_namespaces(doc));
+ return rcpp_result_gen;
+END_RCPP
+}
+// ns_lookup_uri
+XPtrNs ns_lookup_uri(XPtrDoc doc, XPtrNode node, std::string uri);
+RcppExport SEXP xml2_ns_lookup_uri(SEXP docSEXP, SEXP nodeSEXP, SEXP uriSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type uri(uriSEXP);
+ rcpp_result_gen = Rcpp::wrap(ns_lookup_uri(doc, node, uri));
+ return rcpp_result_gen;
+END_RCPP
+}
+// ns_lookup
+XPtrNs ns_lookup(XPtrDoc doc, XPtrNode node, std::string prefix);
+RcppExport SEXP xml2_ns_lookup(SEXP docSEXP, SEXP nodeSEXP, SEXP prefixSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type prefix(prefixSEXP);
+ rcpp_result_gen = Rcpp::wrap(ns_lookup(doc, node, prefix));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_name
+CharacterVector node_name(XPtrNode node, CharacterVector nsMap);
+RcppExport SEXP xml2_node_name(SEXP nodeSEXP, SEXP nsMapSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type nsMap(nsMapSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_name(node, nsMap));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_set_name
+void node_set_name(XPtrNode node, std::string value);
+RcppExport SEXP xml2_node_set_name(SEXP nodeSEXP, SEXP valueSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type value(valueSEXP);
+ node_set_name(node, value);
+ return R_NilValue;
+END_RCPP
+}
+// node_text
+CharacterVector node_text(XPtrNode node);
+RcppExport SEXP xml2_node_text(SEXP nodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_text(node));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_attr
+SEXP node_attr(XPtrNode node, std::string name, CharacterVector missing, CharacterVector nsMap);
+RcppExport SEXP xml2_node_attr(SEXP nodeSEXP, SEXP nameSEXP, SEXP missingSEXP, SEXP nsMapSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type name(nameSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type missing(missingSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type nsMap(nsMapSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_attr(node, name, missing, nsMap));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_attrs
+CharacterVector node_attrs(XPtrNode node_, CharacterVector nsMap);
+RcppExport SEXP xml2_node_attrs(SEXP node_SEXP, SEXP nsMapSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node_(node_SEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type nsMap(nsMapSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_attrs(node_, nsMap));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_set_attr
+void node_set_attr(XPtrNode node_, std::string name, std::string value, CharacterVector nsMap);
+RcppExport SEXP xml2_node_set_attr(SEXP node_SEXP, SEXP nameSEXP, SEXP valueSEXP, SEXP nsMapSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node_(node_SEXP);
+ Rcpp::traits::input_parameter< std::string >::type name(nameSEXP);
+ Rcpp::traits::input_parameter< std::string >::type value(valueSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type nsMap(nsMapSEXP);
+ node_set_attr(node_, name, value, nsMap);
+ return R_NilValue;
+END_RCPP
+}
+// node_children
+Rcpp::List node_children(XPtrNode node, bool onlyNode);
+RcppExport SEXP xml2_node_children(SEXP nodeSEXP, SEXP onlyNodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< bool >::type onlyNode(onlyNodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_children(node, onlyNode));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_length
+int node_length(XPtrNode node, bool onlyNode);
+RcppExport SEXP xml2_node_length(SEXP nodeSEXP, SEXP onlyNodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< bool >::type onlyNode(onlyNodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_length(node, onlyNode));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_parents
+Rcpp::List node_parents(XPtrNode node);
+RcppExport SEXP xml2_node_parents(SEXP nodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_parents(node));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_siblings
+Rcpp::List node_siblings(XPtrNode node, bool onlyNode);
+RcppExport SEXP xml2_node_siblings(SEXP nodeSEXP, SEXP onlyNodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< bool >::type onlyNode(onlyNodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_siblings(node, onlyNode));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_parent
+XPtrNode node_parent(XPtrNode n);
+RcppExport SEXP xml2_node_parent(SEXP nSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type n(nSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_parent(n));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_path
+std::string node_path(XPtrNode n);
+RcppExport SEXP xml2_node_path(SEXP nSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type n(nSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_path(n));
+ return rcpp_result_gen;
+END_RCPP
+}
+// nodes_duplicated
+LogicalVector nodes_duplicated(List nodes);
+RcppExport SEXP xml2_nodes_duplicated(SEXP nodesSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< List >::type nodes(nodesSEXP);
+ rcpp_result_gen = Rcpp::wrap(nodes_duplicated(nodes));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_type
+int node_type(XPtrNode node);
+RcppExport SEXP xml2_node_type(SEXP nodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_type(node));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_copy
+XPtrNode node_copy(XPtrNode node);
+RcppExport SEXP xml2_node_copy(SEXP nodeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_copy(node));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_set_content
+void node_set_content(XPtrNode node, std::string content);
+RcppExport SEXP xml2_node_set_content(SEXP nodeSEXP, SEXP contentSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type content(contentSEXP);
+ node_set_content(node, content);
+ return R_NilValue;
+END_RCPP
+}
+// node_append_content
+void node_append_content(XPtrNode node, std::string content);
+RcppExport SEXP xml2_node_append_content(SEXP nodeSEXP, SEXP contentSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type content(contentSEXP);
+ node_append_content(node, content);
+ return R_NilValue;
+END_RCPP
+}
+// node_append_child
+XPtrNode node_append_child(XPtrNode parent, XPtrNode cur);
+RcppExport SEXP xml2_node_append_child(SEXP parentSEXP, SEXP curSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type parent(parentSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type cur(curSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_append_child(parent, cur));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_prepend_sibling
+XPtrNode node_prepend_sibling(XPtrNode cur, XPtrNode elem);
+RcppExport SEXP xml2_node_prepend_sibling(SEXP curSEXP, SEXP elemSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type cur(curSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type elem(elemSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_prepend_sibling(cur, elem));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_append_sibling
+XPtrNode node_append_sibling(XPtrNode cur, XPtrNode elem);
+RcppExport SEXP xml2_node_append_sibling(SEXP curSEXP, SEXP elemSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type cur(curSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type elem(elemSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_append_sibling(cur, elem));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_replace
+XPtrNode node_replace(XPtrNode old, XPtrNode cur);
+RcppExport SEXP xml2_node_replace(SEXP oldSEXP, SEXP curSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type old(oldSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type cur(curSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_replace(old, cur));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_remove
+void node_remove(XPtrNode cur, bool free);
+RcppExport SEXP xml2_node_remove(SEXP curSEXP, SEXP freeSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type cur(curSEXP);
+ Rcpp::traits::input_parameter< bool >::type free(freeSEXP);
+ node_remove(cur, free);
+ return R_NilValue;
+END_RCPP
+}
+// node_new
+XPtrNode node_new(std::string name);
+RcppExport SEXP xml2_node_new(SEXP nameSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::string >::type name(nameSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_new(name));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_cdata_new
+XPtrNode node_cdata_new(XPtrDoc doc, std::string content);
+RcppExport SEXP xml2_node_cdata_new(SEXP docSEXP, SEXP contentSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< std::string >::type content(contentSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_cdata_new(doc, content));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_comment_new
+XPtrNode node_comment_new(std::string content);
+RcppExport SEXP xml2_node_comment_new(SEXP contentSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::string >::type content(contentSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_comment_new(content));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_new_ns
+XPtrNode node_new_ns(std::string name, XPtrNs ns);
+RcppExport SEXP xml2_node_new_ns(SEXP nameSEXP, SEXP nsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::string >::type name(nameSEXP);
+ Rcpp::traits::input_parameter< XPtrNs >::type ns(nsSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_new_ns(name, ns));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_null
+XPtrNode node_null();
+RcppExport SEXP xml2_node_null() {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ rcpp_result_gen = Rcpp::wrap(node_null());
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_set_namespace_uri
+void node_set_namespace_uri(XPtrDoc doc, XPtrNode node, std::string uri);
+RcppExport SEXP xml2_node_set_namespace_uri(SEXP docSEXP, SEXP nodeSEXP, SEXP uriSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type uri(uriSEXP);
+ node_set_namespace_uri(doc, node, uri);
+ return R_NilValue;
+END_RCPP
+}
+// node_set_namespace_prefix
+void node_set_namespace_prefix(XPtrDoc doc, XPtrNode node, std::string prefix);
+RcppExport SEXP xml2_node_set_namespace_prefix(SEXP docSEXP, SEXP nodeSEXP, SEXP prefixSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< std::string >::type prefix(prefixSEXP);
+ node_set_namespace_prefix(doc, node, prefix);
+ return R_NilValue;
+END_RCPP
+}
+// node_new_dtd
+void node_new_dtd(XPtrDoc doc, std::string name, std::string eid, std::string sid);
+RcppExport SEXP xml2_node_new_dtd(SEXP docSEXP, SEXP nameSEXP, SEXP eidSEXP, SEXP sidSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< std::string >::type name(nameSEXP);
+ Rcpp::traits::input_parameter< std::string >::type eid(eidSEXP);
+ Rcpp::traits::input_parameter< std::string >::type sid(sidSEXP);
+ node_new_dtd(doc, name, eid, sid);
+ return R_NilValue;
+END_RCPP
+}
+// xml_save_options
+Rcpp::IntegerVector xml_save_options();
+RcppExport SEXP xml2_xml_save_options() {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ rcpp_result_gen = Rcpp::wrap(xml_save_options());
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_write_file
+void doc_write_file(XPtrDoc x, std::string path, std::string encoding, int options);
+RcppExport SEXP xml2_doc_write_file(SEXP xSEXP, SEXP pathSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
+ Rcpp::traits::input_parameter< std::string >::type path(pathSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ doc_write_file(x, path, encoding, options);
+ return R_NilValue;
+END_RCPP
+}
+// doc_write_connection
+void doc_write_connection(XPtrDoc x, SEXP connection, std::string encoding, int options);
+RcppExport SEXP xml2_doc_write_connection(SEXP xSEXP, SEXP connectionSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
+ Rcpp::traits::input_parameter< SEXP >::type connection(connectionSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ doc_write_connection(x, connection, encoding, options);
+ return R_NilValue;
+END_RCPP
+}
+// doc_write_character
+CharacterVector doc_write_character(XPtrDoc x, std::string encoding, int options);
+RcppExport SEXP xml2_doc_write_character(SEXP xSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type x(xSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_write_character(x, encoding, options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// node_write_file
+void node_write_file(XPtrNode x, std::string path, std::string encoding, int options);
+RcppExport SEXP xml2_node_write_file(SEXP xSEXP, SEXP pathSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type x(xSEXP);
+ Rcpp::traits::input_parameter< std::string >::type path(pathSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ node_write_file(x, path, encoding, options);
+ return R_NilValue;
+END_RCPP
+}
+// node_write_connection
+void node_write_connection(XPtrNode x, SEXP connection, std::string encoding, int options);
+RcppExport SEXP xml2_node_write_connection(SEXP xSEXP, SEXP connectionSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type x(xSEXP);
+ Rcpp::traits::input_parameter< SEXP >::type connection(connectionSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ node_write_connection(x, connection, encoding, options);
+ return R_NilValue;
+END_RCPP
+}
+// node_write_character
+CharacterVector node_write_character(XPtrNode x, std::string encoding, int options);
+RcppExport SEXP xml2_node_write_character(SEXP xSEXP, SEXP encodingSEXP, SEXP optionsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type x(xSEXP);
+ Rcpp::traits::input_parameter< std::string >::type encoding(encodingSEXP);
+ Rcpp::traits::input_parameter< int >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(node_write_character(x, encoding, options));
+ return rcpp_result_gen;
+END_RCPP
+}
+// doc_validate
+Rcpp::LogicalVector doc_validate(XPtrDoc doc, XPtrDoc schema);
+RcppExport SEXP xml2_doc_validate(SEXP docSEXP, SEXP schemaSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< XPtrDoc >::type schema(schemaSEXP);
+ rcpp_result_gen = Rcpp::wrap(doc_validate(doc, schema));
+ return rcpp_result_gen;
+END_RCPP
+}
+// url_absolute
+CharacterVector url_absolute(CharacterVector x, CharacterVector base);
+RcppExport SEXP xml2_url_absolute(SEXP xSEXP, SEXP baseSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type base(baseSEXP);
+ rcpp_result_gen = Rcpp::wrap(url_absolute(x, base));
+ return rcpp_result_gen;
+END_RCPP
+}
+// url_relative
+CharacterVector url_relative(CharacterVector x, CharacterVector base);
+RcppExport SEXP xml2_url_relative(SEXP xSEXP, SEXP baseSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type base(baseSEXP);
+ rcpp_result_gen = Rcpp::wrap(url_relative(x, base));
+ return rcpp_result_gen;
+END_RCPP
+}
+// url_parse
+List url_parse(CharacterVector x);
+RcppExport SEXP xml2_url_parse(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(url_parse(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// url_escape
+CharacterVector url_escape(CharacterVector x, CharacterVector reserved);
+RcppExport SEXP xml2_url_escape(SEXP xSEXP, SEXP reservedSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type reserved(reservedSEXP);
+ rcpp_result_gen = Rcpp::wrap(url_escape(x, reserved));
+ return rcpp_result_gen;
+END_RCPP
+}
+// url_unescape
+CharacterVector url_unescape(CharacterVector x);
+RcppExport SEXP xml2_url_unescape(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< CharacterVector >::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(url_unescape(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// xpath_search
+RObject xpath_search(XPtrNode node, XPtrDoc doc, std::string xpath, CharacterVector nsMap, double num_results);
+RcppExport SEXP xml2_xpath_search(SEXP nodeSEXP, SEXP docSEXP, SEXP xpathSEXP, SEXP nsMapSEXP, SEXP num_resultsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< XPtrNode >::type node(nodeSEXP);
+ Rcpp::traits::input_parameter< XPtrDoc >::type doc(docSEXP);
+ Rcpp::traits::input_parameter< std::string >::type xpath(xpathSEXP);
+ Rcpp::traits::input_parameter< CharacterVector >::type nsMap(nsMapSEXP);
+ Rcpp::traits::input_parameter< double >::type num_results(num_resultsSEXP);
+ rcpp_result_gen = Rcpp::wrap(xpath_search(node, doc, xpath, nsMap, num_results));
+ return rcpp_result_gen;
+END_RCPP
+}
diff --git a/src/connection.cpp b/src/connection.cpp
new file mode 100644
index 0000000..e36a6f6
--- /dev/null
+++ b/src/connection.cpp
@@ -0,0 +1,37 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+// Wrapper around R's read_bin function
+RawVector read_bin(RObject con, int bytes = 64 * 1024) {
+ Rcpp::Environment baseEnv = Rcpp::Environment::base_env();
+ Rcpp::Function readBin = baseEnv["readBin"];
+
+ RawVector out = Rcpp::as<RawVector>(readBin(con, "raw", bytes));
+ return out;
+}
+
+// Read data from a connection in chunks and then combine into a single
+// raw vector.
+//
+// [[Rcpp::export]]
+RawVector read_connection_(RObject con, int chunk_size = 64 * 1024) {
+ std::vector<RawVector> chunks;
+
+ RawVector chunk;
+ while((chunk = read_bin(con, chunk_size)).size() > 0)
+ chunks.push_back(chunk);
+
+ size_t size = 0;
+ for (size_t i = 0; i < chunks.size(); ++i)
+ size += chunks[i].size();
+
+ RawVector out(size);
+ size_t pos = 0;
+ for (size_t i = 0; i < chunks.size(); ++i) {
+ memcpy(RAW(out) + pos, RAW(chunks[i]), chunks[i].size());
+ pos += chunks[i].size();
+ }
+
+ return out;
+}
+
diff --git a/src/xml2_doc.cpp b/src/xml2_doc.cpp
new file mode 100644
index 0000000..eeb7e8d
--- /dev/null
+++ b/src/xml2_doc.cpp
@@ -0,0 +1,226 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
+#include "xml2_types.h"
+#include "xml2_utils.h"
+
+// [[Rcpp::export]]
+Rcpp::IntegerVector xml_parse_options() {
+
+ /* * *
+ * Author: Daniel Veillard <veillard at redhat.com>
+ * Date: Mon May 16 16:03:50 2011 +0800
+ * https://github.com/GNOME/libxml2/commit/c62efc847c836d4c4f1aea08c68cd93bd342b9f4
+ *
+ * Add options to ignore the internal encoding
+ */
+#if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 20800)
+#define HAS_IGNORE_ENC
+#endif
+
+ /* * *
+ * Author: Daniel Veillard <veillard at redhat.com>
+ * Date: Mon Aug 13 12:41:33 2012 +0800
+ * https://github.com/GNOME/libxml2/commit/968a03a2e54f5bcf53089f5e3c8f790dbe0bf824
+ *
+ * Add support for big line numbers in error reporting
+ */
+#if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 20900)
+#define HAS_BIG_LINES
+#endif
+
+ const char * names[] = {
+ "RECOVER",
+ "NOENT",
+ "DTDLOAD",
+ "DTDATTR",
+ "DTDVALID",
+ "NOERROR",
+ "NOWARNING",
+ "PEDANTIC",
+ "NOBLANKS",
+ "SAX1",
+ "XINCLUDE",
+ "NONET",
+ "NODICT",
+ "NSCLEAN",
+ "NOCDATA",
+ "NOXINCNODE",
+ "COMPACT",
+ "OLD10",
+ "NOBASEFIX",
+ "HUGE",
+ "OLDSAX",
+#ifdef HAS_IGNORE_ENC
+ "IGNORE_ENC",
+#endif
+#ifdef HAS_BIG_LINES
+ "BIG_LINES",
+#endif
+ };
+
+ const int values[] = {
+ XML_PARSE_RECOVER,
+ XML_PARSE_NOENT,
+ XML_PARSE_DTDLOAD,
+ XML_PARSE_DTDATTR,
+ XML_PARSE_DTDVALID,
+ XML_PARSE_NOERROR,
+ XML_PARSE_NOWARNING,
+ XML_PARSE_PEDANTIC,
+ XML_PARSE_NOBLANKS,
+ XML_PARSE_SAX1,
+ XML_PARSE_XINCLUDE,
+ XML_PARSE_NONET,
+ XML_PARSE_NODICT,
+ XML_PARSE_NSCLEAN,
+ XML_PARSE_NOCDATA,
+ XML_PARSE_NOXINCNODE,
+ XML_PARSE_COMPACT,
+ XML_PARSE_OLD10,
+ XML_PARSE_NOBASEFIX,
+ XML_PARSE_HUGE,
+ XML_PARSE_OLDSAX,
+#ifdef HAS_IGNORE_ENC
+ XML_PARSE_IGNORE_ENC,
+#endif
+#ifdef HAS_BIG_LINES
+ XML_PARSE_BIG_LINES,
+#endif
+ };
+
+ const char * descriptions[] = {
+ "recover on errors",
+ "substitute entities",
+ "load the external subset",
+ "default DTD attributes",
+ "validate with the DTD",
+ "suppress error reports",
+ "suppress warning reports",
+ "pedantic error reporting",
+ "remove blank nodes",
+ "use the SAX1 interface internally",
+ "Implement XInclude substitition",
+ "Forbid network access",
+ "Do not reuse the context dictionary",
+ "remove redundant namespaces declarations",
+ "merge CDATA as text nodes",
+ "do not generate XINCLUDE START/END nodes",
+ "compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)",
+ "parse using XML-1.0 before update 5",
+ "do not fixup XINCLUDE xml:base uris",
+ "relax any hardcoded limit from the parser",
+ "parse using SAX2 interface before 2.7.0",
+#ifdef HAS_IGNORE_ENC
+ "ignore internal document encoding hint",
+#endif
+#ifdef HAS_BIG_LINES
+ "Store big lines numbers in text PSVI field",
+#endif
+ };
+
+ size_t size = sizeof(values) / sizeof(values[0]);
+
+ Rcpp::IntegerVector out_values = Rcpp::IntegerVector(size);
+ Rcpp::CharacterVector out_names = Rcpp::CharacterVector(size);
+ Rcpp::CharacterVector out_descriptions = Rcpp::CharacterVector(size);
+ for (int i = 0; i < size; ++i) {
+ out_values[i] = values[i];
+ out_names[i] = names[i];
+ out_descriptions[i] = descriptions[i];
+ }
+ out_values.attr("names") = out_names;
+ out_values.attr("descriptions") = out_descriptions;
+
+ return out_values;
+
+#undef HAS_BIG_LINES
+#undef HAS_IGNORE_ENC
+}
+
+// [[Rcpp::export]]
+XPtrDoc doc_parse_file(std::string path,
+ std::string encoding = "",
+ bool as_html = false,
+ int options = 0) {
+ xmlDoc* pDoc;
+ if (as_html) {
+ pDoc = htmlReadFile(
+ path.c_str(),
+ encoding == "" ? NULL : encoding.c_str(),
+ options
+ );
+ } else {
+ pDoc = xmlReadFile(
+ path.c_str(),
+ encoding == "" ? NULL : encoding.c_str(),
+ options
+ );
+ }
+
+ if (pDoc == NULL)
+ Rcpp::stop("Failed to parse %s", path);
+
+ return XPtrDoc(pDoc);
+}
+
+// [[Rcpp::export]]
+XPtrDoc doc_parse_raw(RawVector x, std::string encoding,
+ std::string base_url = "",
+ bool as_html = false,
+ int options = 0) {
+ xmlDoc* pDoc;
+ if (as_html) {
+ pDoc = htmlReadMemory(
+ (const char *) RAW(x),
+ Rf_length(x),
+ base_url == "" ? NULL : base_url.c_str(),
+ encoding == "" ? NULL : encoding.c_str(),
+ options
+ );
+ } else {
+ pDoc = xmlReadMemory(
+ (const char *) RAW(x),
+ Rf_length(x),
+ base_url == "" ? NULL : base_url.c_str(),
+ encoding == "" ? NULL : encoding.c_str(),
+ options
+ );
+ }
+
+ if (pDoc == NULL)
+ Rcpp::stop("Failed to parse text");
+
+ return XPtrDoc(pDoc);
+}
+
+// [[Rcpp::export]]
+XPtrNode doc_root(XPtrDoc x) {
+ return XPtrNode(xmlDocGetRootElement(x.checked_get()));
+}
+
+// [[Rcpp::export]]
+bool doc_has_root(XPtrDoc x) {
+ return xmlDocGetRootElement(x.get()) != NULL;
+}
+
+// [[Rcpp::export]]
+CharacterVector doc_url(XPtrDoc x) {
+ SEXP string = (x->URL == NULL) ? NA_STRING : Rf_mkCharCE((const char*) x->URL, CE_UTF8);
+ return CharacterVector(string);
+}
+
+// [[Rcpp::export]]
+XPtrDoc doc_new(std::string version, std::string encoding = "UTF-8") {
+ XPtrDoc x = XPtrDoc(xmlNewDoc(asXmlChar(version)));
+ xmlCharEncodingHandlerPtr p = xmlFindCharEncodingHandler(encoding.c_str());
+ x->encoding = xmlStrdup(reinterpret_cast<const xmlChar *>(p->name));
+ return x;
+}
+
+// [[Rcpp::export]]
+XPtrNode doc_set_root(XPtrDoc doc, XPtrNode root) {
+ return XPtrNode(xmlDocSetRootElement(doc, root));
+}
diff --git a/src/xml2_init.cpp b/src/xml2_init.cpp
new file mode 100644
index 0000000..ab92746
--- /dev/null
+++ b/src/xml2_init.cpp
@@ -0,0 +1,38 @@
+#include <Rcpp.h>
+#include <R_ext/Rdynload.h>
+#include <stdio.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlerror.h>
+#include <libxml/parser.h>
+
+void handleError(void* userData, xmlError* error) {
+ std::string message = std::string(error->message);
+ message.resize(message.size() - 1); // trim off trailing newline
+
+ if (error->level <= 2) {
+ Rcpp::warning("%s [%i]", message, error->code);
+ } else {
+ Rcpp::stop("%s [%i]", message, error->code);
+ }
+}
+
+extern "C" {
+
+ void R_init_xml2(DllInfo *info) {
+ // Check that header and libs are compatible
+ LIBXML_TEST_VERSION
+
+ xmlInitParser();
+ xmlSetStructuredErrorFunc(NULL, handleError);
+ }
+
+ void R_unload_xml2(DllInfo *info) {
+ xmlCleanupParser();
+ }
+
+}
+
+// [[Rcpp::export]]
+std::string libxml2_version(){
+ return LIBXML_DOTTED_VERSION;
+}
diff --git a/src/xml2_namespace.cpp b/src/xml2_namespace.cpp
new file mode 100644
index 0000000..7bc1392
--- /dev/null
+++ b/src/xml2_namespace.cpp
@@ -0,0 +1,56 @@
+#include <Rcpp.h>
+#include <libxml/tree.h>
+
+using namespace Rcpp;
+#include "xml2_types.h"
+#include "xml2_utils.h"
+
+// [[Rcpp::export]]
+CharacterVector unique_ns(CharacterVector ns) {
+ return NsMap(ns).out();
+}
+
+void cache_namespace(xmlNode* node, NsMap* nsMap) {
+ // Iterate over namespace definitions
+ for(xmlNs* cur = node->nsDef; cur != NULL; cur = cur->next) {
+ nsMap->add(cur->prefix, cur->href);
+ }
+
+
+ // Iterate over children, calling this function recursively
+ for(xmlNode* cur = node->children; cur != NULL; cur = cur->next)
+ cache_namespace(cur, nsMap);
+}
+
+// [[Rcpp::export]]
+CharacterVector doc_namespaces(XPtrDoc doc) {
+ NsMap nsMap;
+
+ xmlNode* root = xmlDocGetRootElement(doc.checked_get());
+ cache_namespace(root, &nsMap);
+
+ return nsMap.out();
+}
+
+// [[Rcpp::export]]
+XPtrNs ns_lookup_uri(XPtrDoc doc, XPtrNode node, std::string uri) {
+ xmlNsPtr ns = xmlSearchNsByHref(doc.checked_get(), node.checked_get(), asXmlChar(uri));
+ if (ns == NULL) {
+ stop("No namespace with URI `%s` found", uri);
+ }
+ return XPtrNs(ns);
+}
+
+// [[Rcpp::export]]
+XPtrNs ns_lookup(XPtrDoc doc, XPtrNode node, std::string prefix) {
+ xmlNsPtr ns = NULL;
+ if (prefix.length() == 0) {
+ ns = xmlSearchNs(doc.checked_get(), node.checked_get(), NULL);
+ } else {
+ ns = xmlSearchNs(doc.checked_get(), node.checked_get(), asXmlChar(prefix));
+ if (ns == NULL) {
+ stop("No namespace with prefix `%s` found", prefix);
+ }
+ }
+ return XPtrNs(ns);
+}
diff --git a/src/xml2_node.cpp b/src/xml2_node.cpp
new file mode 100644
index 0000000..a7954b6
--- /dev/null
+++ b/src/xml2_node.cpp
@@ -0,0 +1,540 @@
+#include <Rcpp.h>
+#include <libxml/tree.h>
+#include <boost/shared_ptr.hpp>
+#include <boost/algorithm/string/trim.hpp>
+#include <fstream>
+#include <sstream>
+
+using namespace Rcpp;
+#include "xml2_types.h"
+#include "xml2_utils.h"
+
+template<typename T> // for xmlAttr and xmlNode
+std::string nodeName(T* node, CharacterVector nsMap) {
+ std::string name = Xml2String(node->name).asStdString();
+ if (nsMap.size() == 0)
+ return name;
+
+ xmlNs* ns = node->ns;
+ if (ns == NULL)
+ return name;
+
+ std::string prefix = NsMap(nsMap).findPrefix(Xml2String(ns->href).asStdString());
+ return prefix + ":" + name;
+}
+
+// [[Rcpp::export]]
+CharacterVector node_name(XPtrNode node, CharacterVector nsMap) {
+ return asCharacterVector(nodeName(node.checked_get(), nsMap));
+}
+
+// [[Rcpp::export]]
+void node_set_name(XPtrNode node, std::string value) {
+ return xmlNodeSetName(node, asXmlChar(value));
+}
+
+// [[Rcpp::export]]
+CharacterVector node_text(XPtrNode node) {
+ std::string text = Xml2String(xmlNodeGetContent(node.checked_get())).asStdString();
+
+ return asCharacterVector(text.c_str());
+}
+
+bool hasPrefix(std::string lhs, std::string rhs) {
+ if (lhs.length() > rhs.length()) {
+ return false;
+ }
+
+ return std::equal(
+ lhs.begin(),
+ lhs.end(),
+ rhs.begin());
+}
+
+const xmlChar* xmlNsDefinition(xmlNodePtr node, xmlChar* lookup) {
+ xmlNsPtr next = node->nsDef;
+
+ while(next != NULL) {
+ // default namespace
+ if (xmlStrEqual(next->prefix, lookup)) {
+ return next->href;
+ }
+ next = next->next;
+ }
+
+ return NULL;
+}
+
+// [[Rcpp::export]]
+SEXP node_attr(XPtrNode node, std::string name, CharacterVector missing,
+ CharacterVector nsMap) {
+ if (missing.size() != 1)
+ Rcpp::stop("`missing` should be length 1");
+ SEXP missingVal = missing[0];
+
+ if (name == "xmlns") {
+ return CharacterVector(Xml2String(xmlNsDefinition(node, NULL)).asRString(missingVal));
+ }
+ if (hasPrefix("xmlns:", name)) {
+ std::string prefix = name.substr(6);
+ return CharacterVector(Xml2String(xmlNsDefinition(node, asXmlChar(prefix))).asRString(missingVal));
+ }
+ xmlChar* string;
+ if (nsMap.size() == 0) {
+ string = xmlGetProp(node.checked_get(), asXmlChar(name));
+ } else {
+ size_t colon = name.find(":");
+ if (colon == std::string::npos) {
+ // Has namespace spec, but attribute not qualified, so look for attribute
+ // without namespace
+ string = xmlGetNoNsProp(node.checked_get(), asXmlChar(name));
+ } else {
+ // Split name into prefix & attr, then look up full url
+ std::string
+ prefix = name.substr(0, colon),
+ attr = name.substr(colon + 1, name.size() - 1);
+
+ std::string url = NsMap(nsMap).findUrl(prefix);
+
+ string = xmlGetNsProp(node.checked_get(), asXmlChar(attr), asXmlChar(url));
+ }
+ }
+
+ return CharacterVector(Xml2String(string).asRString(missingVal));
+}
+
+// [[Rcpp::export]]
+CharacterVector node_attrs(XPtrNode node_, CharacterVector nsMap) {
+
+ int n = 0;
+ xmlNodePtr node = node_.checked_get();
+
+ if (node->type == XML_ELEMENT_NODE) {
+ // attributes
+ for(xmlAttr* cur = node->properties; cur != NULL; cur = cur->next)
+ n++;
+
+ // namespace definitions
+ for(xmlNsPtr cur = node->nsDef; cur != NULL; cur = cur->next)
+ n++;
+
+ CharacterVector names(n), values(n);
+
+ int i = 0;
+ for(xmlAttr* cur = node->properties; cur != NULL; cur = cur->next, ++i) {
+ names[i] = nodeName(cur, nsMap);
+
+ xmlNs* ns = cur->ns;
+ if (ns == NULL) {
+ if (nsMap.size() > 0) {
+ values[i] = Xml2String(xmlGetNoNsProp(node, cur->name)).asRString();
+ } else {
+ values[i] = Xml2String(xmlGetProp(node, cur->name)).asRString();
+ }
+ } else {
+ values[i] = Xml2String(xmlGetNsProp(node, cur->name, ns->href)).asRString();
+ }
+ }
+
+ for(xmlNsPtr cur = node->nsDef; cur != NULL; cur = cur->next, ++i) {
+ if (cur->prefix == NULL) {
+ names[i] = "xmlns";
+ } else {
+ names[i] = "xmlns:" + Xml2String(cur->prefix).asStdString();
+ }
+ values[i] = Xml2String(cur->href).asRString();
+ }
+
+ values.attr("names") = wrap<CharacterVector>(names);
+ return values;
+ }
+
+ return CharacterVector();
+}
+
+
+// Fix the tree by removing the namespace pointers to the given tree
+void xmlRemoveNamespace(xmlNodePtr tree, xmlNsPtr ns) {
+
+ // From https://github.com/GNOME/libxml2/blob/v2.9.2/tree.c#L6440
+ //
+ xmlNodePtr node = tree;
+ /*
+ * Browse the full subtree, deep first
+ */
+ while(node != NULL) {
+ if (node->ns != NULL && node->ns == ns) {
+ node->ns = NULL;
+ }
+
+ // Check for namespaces on the attributes
+ if (ns->prefix != NULL && // default namespaces will not exist on attributes
+ node->type == XML_ELEMENT_NODE) {
+ xmlAttrPtr attr = node->properties;
+ while (attr != NULL) {
+ if (attr->ns != NULL && attr->ns == ns) {
+ attr->ns = NULL;
+ }
+ attr = attr->next;
+ }
+ }
+
+ if ((node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) {
+ /* deep first */
+ node = node->children;
+ } else if ((node != tree) && (node->next != NULL)) {
+ /* then siblings */
+ node = node->next;
+ } else if (node != tree) {
+ /* go up to parents->next if needed */
+ while (node != tree) {
+ if (node->parent != NULL)
+ node = node->parent;
+ if ((node != tree) && (node->next != NULL)) {
+ node = node->next;
+ break;
+ }
+ if (node->parent == NULL) {
+ node = NULL;
+ break;
+ }
+ }
+ /* exit condition */
+ if (node == tree)
+ node = NULL;
+ } else
+ break;
+ }
+ return;
+}
+
+// Fix the tree by adding the namespace pointers to the given tree
+void xmlAddNamespace(xmlNodePtr tree, xmlNsPtr ns) {
+
+ // Only needed for default namespaces
+ if (ns->prefix != NULL) {
+ return;
+ }
+
+ // From https://github.com/GNOME/libxml2/blob/v2.9.2/tree.c#L6440
+ //
+ xmlNodePtr node = tree;
+ /*
+ * Browse the full subtree, deep first
+ */
+ while(node != NULL) {
+ if (node->ns == NULL) {
+ node->ns = ns;
+ }
+
+ if ((node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) {
+ /* deep first */
+ node = node->children;
+ } else if ((node != tree) && (node->next != NULL)) {
+ /* then siblings */
+ node = node->next;
+ } else if (node != tree) {
+ /* go up to parents->next if needed */
+ while (node != tree) {
+ if (node->parent != NULL)
+ node = node->parent;
+ if ((node != tree) && (node->next != NULL)) {
+ node = node->next;
+ break;
+ }
+ if (node->parent == NULL) {
+ node = NULL;
+ break;
+ }
+ }
+ /* exit condition */
+ if (node == tree)
+ node = NULL;
+ } else
+ break;
+ }
+ return;
+}
+
+void removeNs(xmlNodePtr node, xmlChar* prefix) {
+ if (node == NULL) {
+ return;
+ }
+
+ if (node->nsDef == NULL) {
+ return;
+ }
+
+ xmlNsPtr prev = node->nsDef;
+ if (xmlStrEqual(prev->prefix, prefix)) {
+ node->nsDef = prev->next;
+ xmlRemoveNamespace(node, prev);
+ xmlFreeNs(prev);
+ return;
+ }
+
+ while(prev->next != NULL) {
+ xmlNsPtr cur = prev->next;
+ if (xmlStrEqual(cur->prefix, prefix)) {
+ prev->next = cur->next;
+ xmlRemoveNamespace(node, cur);
+ xmlFreeNs(cur);
+ return;
+ }
+ prev = prev->next;
+ }
+ return;
+}
+
+// [[Rcpp::export]]
+void node_set_attr(XPtrNode node_, std::string name, std::string value, CharacterVector nsMap) {
+
+ bool remove = value.length() == 0;
+ const xmlNodePtr node = node_.checked_get();
+
+ if (name == "xmlns") {
+ if (remove) removeNs(node, NULL);
+ else xmlAddNamespace(node, xmlNewNs(node, asXmlChar(value), NULL));
+ return;
+ }
+ if (hasPrefix("xmlns:", name)) {
+ std::string prefix = name.substr(6);
+ if (remove) removeNs(node, asXmlChar(prefix));
+ else xmlAddNamespace(node, xmlNewNs(node, asXmlChar(value), asXmlChar(prefix)));
+ return;
+ }
+
+ if (nsMap.size() == 0) {
+ if (remove) xmlUnsetProp(node, asXmlChar(name));
+ else xmlSetProp(node, asXmlChar(name), asXmlChar(value));
+ } else {
+ size_t colon = name.find(":");
+ if (colon == std::string::npos) {
+ // Has namespace spec, but attribute not qualified, so just use that name
+ if (remove) xmlUnsetNsProp(node, NULL, asXmlChar(name));
+ else xmlSetProp(node, asXmlChar(name), asXmlChar(value));
+ } else {
+ // Split name into prefix & attr, then look up full url
+ std::string
+ prefix = name.substr(0, colon),
+ attr = name.substr(colon + 1, name.size() - 1);
+
+ std::string url = NsMap(nsMap).findUrl(prefix);
+
+ xmlNsPtr ns = xmlSearchNsByHref(node_->doc, node, asXmlChar(url));
+
+ if (remove) xmlUnsetNsProp(node, ns, asXmlChar(attr));
+ else xmlSetNsProp(node, ns, asXmlChar(attr), asXmlChar(value));
+ }
+ }
+
+ return;
+}
+
+List asList(std::vector<xmlNode*> nodes) {
+ List out(nodes.size());
+ for (size_t i = 0; i < nodes.size(); ++i)
+ out[i] = XPtrNode(nodes[i]);
+
+ return out;
+}
+
+// [[Rcpp::export]]
+Rcpp::List node_children(XPtrNode node, bool onlyNode = true) {
+ std::vector<xmlNode*> out;
+
+ for(xmlNode* cur = node->xmlChildrenNode; cur != NULL; cur = cur->next) {
+ if (onlyNode && cur->type != XML_ELEMENT_NODE)
+ continue;
+ out.push_back(cur);
+ }
+
+ return asList(out);
+}
+
+// [[Rcpp::export]]
+int node_length(XPtrNode node, bool onlyNode = true) {
+ int i = 0;
+ for(xmlNode* cur = node->xmlChildrenNode; cur != NULL; cur = cur->next) {
+ if (onlyNode && cur->type != XML_ELEMENT_NODE)
+ continue;
+ ++i;
+ }
+
+ return i;
+}
+
+
+// [[Rcpp::export]]
+Rcpp::List node_parents(XPtrNode node) {
+ std::vector<xmlNode*> out;
+
+ for(xmlNode* cur = node->parent; cur != NULL; cur = cur->parent) {
+ if (cur->type != XML_ELEMENT_NODE)
+ continue;
+ out.push_back(cur);
+ }
+
+ return asList(out);
+}
+
+// [[Rcpp::export]]
+Rcpp::List node_siblings(XPtrNode node, bool onlyNode = true) {
+ std::vector<xmlNode*> out;
+
+ xmlNode* parent = node->parent;
+ if (parent == NULL)
+ return List();
+
+ for(xmlNode* cur = parent->xmlChildrenNode; cur != NULL; cur = cur->next) {
+ if (cur == node)
+ continue;
+ if (onlyNode && cur->type != XML_ELEMENT_NODE)
+ continue;
+
+ out.push_back(cur);
+ }
+
+ return asList(out);
+}
+
+
+// [[Rcpp::export]]
+XPtrNode node_parent(XPtrNode n) {
+ if (n->parent == NULL)
+ Rcpp::stop("Parent does not exist");
+ return XPtrNode(n->parent);
+}
+
+// [[Rcpp::export]]
+std::string node_path(XPtrNode n) {
+ return Xml2String(xmlGetNodePath(n.checked_get())).asStdString();
+}
+
+// [[Rcpp::export]]
+LogicalVector nodes_duplicated(List nodes) {
+
+ std::set<xmlNode*> seen;
+
+ int n = nodes.size();
+ LogicalVector out(n);
+
+ for (int i = 0; i < n; ++i) {
+ bool result;
+ if (RObject(nodes[i]).inherits("xml_node")) {
+ XPtrNode node = as<XPtrNode>(List(nodes[i])["node"]);
+ result = !seen.insert(node.checked_get()).second;
+ } else if (RObject(nodes[i]).inherits("xml_missing")){
+ result = false;
+ } else {
+ XPtrNode node = nodes[i];
+ result = !seen.insert(node.checked_get()).second;
+ }
+ out[i] = result;
+ }
+
+ return out;
+}
+
+// [[Rcpp::export]]
+int node_type(XPtrNode node) {
+ return node->type;
+}
+
+// [[Rcpp::export]]
+XPtrNode node_copy(XPtrNode node) {
+ return XPtrNode(xmlCopyNode(node.checked_get(), 1));
+}
+
+// [[Rcpp::export]]
+void node_set_content(XPtrNode node, std::string content) {
+ return xmlNodeSetContentLen(node.checked_get(), asXmlChar(content), content.size());
+}
+
+// [[Rcpp::export]]
+void node_append_content(XPtrNode node, std::string content) {
+ return xmlNodeAddContentLen(node.checked_get(), asXmlChar(content), content.size());
+}
+
+// [[Rcpp::export]]
+XPtrNode node_append_child(XPtrNode parent, XPtrNode cur) {
+ return XPtrNode(xmlAddChild(parent.checked_get(), cur.checked_get()));
+}
+
+// Previous sibling
+// [[Rcpp::export]]
+XPtrNode node_prepend_sibling(XPtrNode cur, XPtrNode elem) {
+ return XPtrNode(xmlAddPrevSibling(cur.checked_get(), elem.checked_get()));
+}
+
+// Append sibling
+// [[Rcpp::export]]
+XPtrNode node_append_sibling(XPtrNode cur, XPtrNode elem) {
+ return XPtrNode(xmlAddNextSibling(cur.checked_get(), elem.checked_get()));
+}
+
+// Replace node
+// [[Rcpp::export]]
+XPtrNode node_replace(XPtrNode old, XPtrNode cur) {
+ return XPtrNode(xmlReplaceNode(old.checked_get(), cur.checked_get()));
+}
+
+// [[Rcpp::export]]
+void node_remove(XPtrNode cur, bool free) {
+ xmlUnlinkNode(cur.checked_get());
+ if (free) {
+ xmlFreeNode(cur.checked_get());
+ }
+ return;
+}
+
+// [[Rcpp::export]]
+XPtrNode node_new(std::string name) {
+ return XPtrNode(xmlNewNode(NULL, asXmlChar(name)));
+}
+
+
+// [[Rcpp::export]]
+XPtrNode node_cdata_new(XPtrDoc doc, std::string content) {
+ return XPtrNode(xmlNewCDataBlock(doc.checked_get(), asXmlChar(content), content.length()));
+}
+
+// [[Rcpp::export]]
+XPtrNode node_comment_new(std::string content) {
+ return XPtrNode(xmlNewComment(asXmlChar(content)));
+}
+
+// [[Rcpp::export]]
+XPtrNode node_new_ns(std::string name, XPtrNs ns) {
+ return XPtrNode(xmlNewNode(ns.checked_get(), asXmlChar(name)));
+}
+
+// [[Rcpp::export]]
+XPtrNode node_null() {
+ return XPtrNode(xmlNodePtr(NULL));
+}
+
+// [[Rcpp::export]]
+void node_set_namespace_uri(XPtrDoc doc, XPtrNode node, std::string uri) {
+ xmlNsPtr ns = xmlSearchNsByHref(doc.checked_get(), node.checked_get(), asXmlChar(uri));
+
+ xmlSetNs(node.checked_get(), ns);
+}
+
+// [[Rcpp::export]]
+void node_set_namespace_prefix(XPtrDoc doc, XPtrNode node, std::string prefix) {
+ xmlNsPtr ns = NULL;
+ if (prefix.length() == 0) {
+ ns = xmlSearchNs(doc.checked_get(), node.checked_get(), NULL);
+ } else {
+ ns = xmlSearchNs(doc.checked_get(), node.checked_get(), asXmlChar(prefix));
+ }
+
+ xmlSetNs(node.checked_get(), ns);
+}
+
+// [[Rcpp::export]]
+void node_new_dtd(XPtrDoc doc, std::string name = "", std::string eid = "", std::string sid = "") {
+ xmlDtdPtr dtd = xmlNewDtd(doc, name == "" ? NULL : asXmlChar(name), eid == "" ? NULL : asXmlChar(eid), sid == "" ? NULL : asXmlChar(sid));
+ xmlAddChild(reinterpret_cast<xmlNodePtr>(doc.checked_get()), reinterpret_cast<xmlNodePtr>(dtd));
+}
diff --git a/src/xml2_output.cpp b/src/xml2_output.cpp
new file mode 100644
index 0000000..73e76c5
--- /dev/null
+++ b/src/xml2_output.cpp
@@ -0,0 +1,157 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+#define class class_name
+#define private private_ptr
+#include <R_ext/Connections.h>
+#undef class
+#undef private
+
+#if R_CONNECTIONS_VERSION != 1
+#error "Missing or unsupported connection API in R"
+#endif
+
+#if defined(R_VERSION) && R_VERSION >= R_Version(3, 3, 0)
+Rconnection get_connection(SEXP con) {
+ return R_GetConnection(con);
+}
+# else
+extern "C" {
+ extern Rconnection getConnection(int) ;
+}
+Rconnection get_connection(SEXP con) {
+ if (!Rf_inherits(con, "connection")) stop("invalid connection");
+ return getConnection(Rf_asInteger(con));
+}
+#endif
+
+#include <libxml/tree.h>
+#include <libxml/HTMLtree.h>
+#include <libxml/xmlsave.h>
+
+#include "xml2_types.h"
+#include "xml2_utils.h"
+
+// [[Rcpp::export]]
+Rcpp::IntegerVector xml_save_options() {
+ Rcpp::IntegerVector out = Rcpp::IntegerVector::create(
+ Rcpp::_["format"] = XML_SAVE_FORMAT,
+ Rcpp::_["no_declaration"] = XML_SAVE_NO_DECL,
+ Rcpp::_["no_empty_tags"] = XML_SAVE_NO_EMPTY,
+ Rcpp::_["no_xhtml"] = XML_SAVE_NO_XHTML,
+ Rcpp::_["require_xhtml"] = XML_SAVE_XHTML,
+ Rcpp::_["as_xml"] = XML_SAVE_AS_XML,
+ Rcpp::_["as_html"] = XML_SAVE_AS_HTML,
+ Rcpp::_["format_whitespace"] = XML_SAVE_WSNONSIG);
+ out.attr("descriptions") = Rcpp::CharacterVector::create(
+ "Format output",
+ "Drop the XML declaration",
+ "Remove empty tags",
+ "Disable XHTML1 rules",
+ "Force XHTML1 rules",
+ "Force XML output",
+ "Force HTML output",
+ "Format with non-significant whitespace");
+ return out;
+}
+
+int xml_write_callback(Rconnection con, const char * buffer, int len) {
+ size_t write_size;
+
+ if ((write_size = R_WriteConnection(con, (void *) buffer, len)) != len) {
+ stop("write failed, expected %l, got %l", len, write_size);
+ }
+ return write_size;
+}
+
+// [[Rcpp::export]]
+void doc_write_file(XPtrDoc x, std::string path, std::string encoding = "UTF-8", int options = 1) {
+ xmlSaveCtxtPtr savectx = xmlSaveToFilename(
+ path.c_str(),
+ encoding.c_str(),
+ options);
+ xmlSaveDoc(savectx, x.checked_get());
+ if (xmlSaveClose(savectx) == -1) {
+ stop("Error closing file");
+ }
+}
+
+// [[Rcpp::export]]
+void doc_write_connection(XPtrDoc x, SEXP connection, std::string encoding = "UTF-8", int options = 1) {
+
+ Rconnection con = get_connection(connection);
+
+ xmlSaveCtxtPtr savectx = xmlSaveToIO(
+ reinterpret_cast<xmlOutputWriteCallback>(xml_write_callback),
+ NULL,
+ con,
+ encoding.c_str(),
+ options);
+
+ xmlSaveDoc(savectx, x.checked_get());
+ if (xmlSaveClose(savectx) == -1) {
+ stop("Error closing connection");
+ }
+}
+
+// [[Rcpp::export]]
+CharacterVector doc_write_character(XPtrDoc x, std::string encoding = "UTF-8", int options = 1) {
+ boost::shared_ptr<xmlBuffer> buffer(xmlBufferCreate(), xmlFree);
+
+ xmlSaveCtxtPtr savectx = xmlSaveToBuffer(
+ buffer.get(),
+ encoding.c_str(),
+ options);
+
+ xmlSaveDoc(savectx, x.checked_get());
+ if (xmlSaveClose(savectx) == -1) {
+ stop("Error writing to buffer");
+ }
+ return Xml2String(buffer->content).asRString();
+}
+
+// [[Rcpp::export]]
+void node_write_file(XPtrNode x, std::string path, std::string encoding = "UTF-8", int options = 1) {
+ xmlSaveCtxtPtr savectx = xmlSaveToFilename(
+ path.c_str(),
+ encoding.c_str(),
+ options);
+ xmlSaveTree(savectx, x.checked_get());
+ if (xmlSaveClose(savectx) == -1) {
+ stop("Error closing file");
+ }
+}
+
+// [[Rcpp::export]]
+void node_write_connection(XPtrNode x, SEXP connection, std::string encoding = "UTF-8", int options = 1) {
+
+ Rconnection con = get_connection(connection);
+
+ xmlSaveCtxtPtr savectx = xmlSaveToIO(
+ (xmlOutputWriteCallback)xml_write_callback,
+ NULL,
+ con,
+ encoding.c_str(),
+ options);
+
+ xmlSaveTree(savectx, x.checked_get());
+ if (xmlSaveClose(savectx) == -1) {
+ stop("Error closing connection");
+ }
+}
+
+// [[Rcpp::export]]
+CharacterVector node_write_character(XPtrNode x, std::string encoding = "UTF-8", int options = 1) {
+ boost::shared_ptr<xmlBuffer> buffer(xmlBufferCreate(), xmlFree);
+
+ xmlSaveCtxtPtr savectx = xmlSaveToBuffer(
+ buffer.get(),
+ encoding.c_str(),
+ options);
+
+ xmlSaveTree(savectx, x.checked_get());
+ if (xmlSaveClose(savectx) == -1) {
+ stop("Error writing to buffer");
+ }
+ return Xml2String(buffer->content).asRString();
+}
diff --git a/src/xml2_schema.cpp b/src/xml2_schema.cpp
new file mode 100644
index 0000000..ed7ded6
--- /dev/null
+++ b/src/xml2_schema.cpp
@@ -0,0 +1,30 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+#include <libxml/xmlschemas.h>
+#include "xml2_types.h"
+
+void handleSchemaError(void* userData, xmlError* error) {
+ Rcpp::CharacterVector * vec = (Rcpp::CharacterVector *) userData;
+ std::string message = std::string(error->message);
+ message.resize(message.size() - 1);
+ vec->push_back(message);
+}
+
+// [[Rcpp::export]]
+Rcpp::LogicalVector doc_validate(XPtrDoc doc, XPtrDoc schema) {
+ xmlLineNumbersDefault(1);
+ Rcpp::CharacterVector vec;
+ xmlSchemaParserCtxtPtr cptr = xmlSchemaNewDocParserCtxt(schema.checked_get());
+ xmlSchemaSetParserStructuredErrors(cptr, handleSchemaError, &vec);
+ xmlSchemaPtr sptr = xmlSchemaParse(cptr);
+ xmlSchemaValidCtxtPtr vptr = xmlSchemaNewValidCtxt(sptr);
+ xmlSchemaSetValidStructuredErrors(vptr, handleSchemaError, &vec);
+ Rcpp::LogicalVector out;
+ out.push_back(0 == xmlSchemaValidateDoc(vptr, doc.checked_get()));
+ xmlSchemaFreeParserCtxt(cptr);
+ xmlSchemaFreeValidCtxt(vptr);
+ xmlSchemaFree(sptr);
+ out.attr("errors") = vec;
+ return out;
+}
diff --git a/src/xml2_url.cpp b/src/xml2_url.cpp
new file mode 100644
index 0000000..29bb133
--- /dev/null
+++ b/src/xml2_url.cpp
@@ -0,0 +1,162 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+#include <libxml/uri.h>
+#include "xml2_utils.h"
+
+//' Convert between relative and absolute urls.
+//'
+//' @param x A character vector of urls relative to that base
+//' @param base A string giving a base url.
+//' @return A character vector of urls
+//' @seealso \code{\link{xml_url}} to retrieve the URL associated with a document
+//' @export
+//' @examples
+//' url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d")
+//'
+//' url_relative("http://hadley.nz/a/c", "http://hadley.nz")
+//' url_relative("http://hadley.nz/a/c", "http://hadley.nz/")
+//' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b")
+//' url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/")
+// [[Rcpp::export]]
+CharacterVector url_absolute(CharacterVector x, CharacterVector base) {
+ int n = x.size();
+ CharacterVector out(n);
+
+ if (base.size() > 1)
+ Rcpp::stop("Base URL must be length 1");
+ const xmlChar* base_uri = (xmlChar*) Rf_translateCharUTF8(base[0]);
+
+ for (int i = 0; i < n; ++i) {
+ const xmlChar* uri = (xmlChar*) Rf_translateCharUTF8(x[i]);
+ out[i] = Xml2String(xmlBuildURI(uri, base_uri)).asRString();
+ }
+
+ return out;
+}
+
+//' @export
+//' @rdname url_absolute
+// [[Rcpp::export]]
+CharacterVector url_relative(CharacterVector x, CharacterVector base) {
+ int n = x.size();
+ CharacterVector out(n);
+
+ if (base.size() > 1)
+ Rcpp::stop("Base URL must be length 1");
+ const xmlChar* base_uri = (xmlChar*) Rf_translateCharUTF8(base[0]);
+
+ for (int i = 0; i < n; ++i) {
+ const xmlChar* uri = (xmlChar*) Rf_translateCharUTF8(x[i]);
+ out[i] = Xml2String(xmlBuildRelativeURI(uri, base_uri)).asRString();
+ }
+
+ return out;
+}
+
+//' Parse a url into its component pieces.
+//'
+//' @param x A character vector of urls.
+//' @return A dataframe with one row for each element of \code{x} and
+//' columns: scheme, server, port, user, path, query, fragment.
+//' @export
+//' @examples
+//' url_parse("http://had.co.nz/")
+//' url_parse("http://had.co.nz:1234/")
+//' url_parse("http://had.co.nz:1234/?a=1&b=2")
+//' url_parse("http://had.co.nz:1234/?a=1&b=2#def")
+// [[Rcpp::export]]
+List url_parse(CharacterVector x) {
+ int n = x.size();
+ CharacterVector scheme(n), server(n), user(n), path(n), query(n), fragment(n);
+ IntegerVector port(n);
+
+ for (int i = 0; i < n; ++i) {
+ const char* raw = Rf_translateCharUTF8(x[i]);
+ xmlURI* uri = xmlParseURI(raw);
+ if (uri == NULL)
+ continue;
+
+ scheme[i] = uri->scheme == NULL ? "" : uri->scheme;
+ server[i] = uri->server == NULL ? "" : uri->server;
+ port[i] = uri->port == 0 ? NA_INTEGER : uri->port;
+ user[i] = uri->user == NULL ? "" : uri->user;
+ path[i] = uri->path == NULL ? "" : uri->path;
+ fragment[i] = uri->fragment == NULL ? "" : uri->fragment;
+
+ /* * *
+ * Thu Apr 26 10:36:26 CEST 2007 Daniel Veillard
+ * svn path=/trunk/; revision=3607
+ * https://github.com/GNOME/libxml2/commit/a1413b84f7163d57c6251d5f4251186368efd859
+ */
+ #if defined(LIBXML_VERSION) && (LIBXML_VERSION >= 20629)
+ query[i] = uri->query_raw == NULL ? "" : uri->query_raw;
+ #else
+ query[i] = uri->query == NULL ? "" : uri->query;
+ #endif
+
+ xmlFreeURI(uri);
+ }
+
+ List out = List::create(
+ _["scheme"] = scheme,
+ _["server"] = server,
+ _["port"] = port,
+ _["user"] = user,
+ _["path"] = path,
+ _["query"] = query,
+ _["fragment"] = fragment
+ );
+ out.attr("class") = "data.frame";
+ out.attr("row.names") = IntegerVector::create(NA_INTEGER, -n);
+
+ return out;
+}
+
+//' Escape and unescape urls.
+//'
+//' @param x A character vector of urls.
+//' @param reserved A string containing additional characters to avoid escaping.
+//' @export
+//' @examples
+//' url_escape("a b c")
+//' url_escape("a b c", "")
+//'
+//' url_unescape("a%20b%2fc")
+//' url_unescape("%C2%B5")
+// [[Rcpp::export]]
+CharacterVector url_escape(CharacterVector x, CharacterVector reserved = "") {
+ int n = x.size();
+ CharacterVector out(n);
+
+ if (reserved.size() != 1)
+ stop("`reserved` must be character vector of length 1");
+ xmlChar* xReserved = (xmlChar*) Rf_translateCharUTF8(reserved[0]);
+
+ for (int i = 0; i < n; ++i) {
+ const xmlChar* xx = (xmlChar*) Rf_translateCharUTF8(x[i]);
+ out[i] = Xml2String(xmlURIEscapeStr(xx, xReserved)).asRString();
+ }
+
+ return out;
+}
+
+//' @export
+//' @rdname url_escape
+// [[Rcpp::export]]
+CharacterVector url_unescape(CharacterVector x) {
+ int n = x.size();
+ CharacterVector out(n);
+
+ std::string buffer;
+
+ for (int i = 0; i < n; ++i) {
+ const char* xx = Rf_translateCharUTF8(x[i]);
+
+ char* unescaped = xmlURIUnescapeString(xx, 0, NULL);
+ out[i] = (unescaped == NULL) ? NA_STRING : Rf_mkCharCE(unescaped, CE_UTF8);
+ xmlFree(unescaped);
+ }
+
+ return out;
+}
diff --git a/src/xml2_utils.h b/src/xml2_utils.h
new file mode 100644
index 0000000..f3f7cc4
--- /dev/null
+++ b/src/xml2_utils.h
@@ -0,0 +1,116 @@
+#ifndef __XML2_XML_UTILS__
+#define __XML2_XML_UTILS__
+
+#include <Rcpp.h>
+#include <libxml/tree.h>
+#include <boost/shared_ptr.hpp>
+#include <map>
+
+inline xmlChar* asXmlChar(std::string x) {
+ return (xmlChar*) x.c_str();
+}
+
+inline Rcpp::CharacterVector asCharacterVector(std::string x) {
+ return Rcpp::CharacterVector(Rf_mkCharCE(x.c_str(), CE_UTF8));
+}
+
+// ----------------------------------------------------------------------------
+// A wrapper around xmlChar* that frees memory if necessary
+class Xml2String {
+ xmlChar* string_;
+ bool free_;
+
+public:
+ Xml2String(): string_(NULL), free_(false) {}
+
+ Xml2String(xmlChar* string): string_(string), free_(true) {}
+
+ // Pointers into structs are const, so don't need to be freed
+ Xml2String(const xmlChar* string): string_((xmlChar*) string), free_(false) {}
+
+ // Some strings are regular strings
+ Xml2String(const char* string): string_((xmlChar*) string), free_(false) {}
+
+ ~Xml2String() {
+ try {
+ if (free_ && string_ != NULL)
+ xmlFree(string_);
+ } catch (...) {}
+ }
+
+ std::string asStdString(std::string missing = "") {
+ if (string_ == NULL)
+ return missing;
+
+ return std::string((char*) string_);
+ }
+
+ SEXP asRString(SEXP missing = NA_STRING) {
+ if (string_ == NULL)
+ return missing;
+
+ return Rf_mkCharCE((char*) string_, CE_UTF8);
+ };
+};
+
+// ----------------------------------------------------------------------------
+// A wrapper around a pair of character vectors used to namespaces to prefixes
+
+class NsMap {
+
+ // We only store the index to avoid duplicating the data
+ typedef std::multimap<std::string, std::string> prefix2url_t;
+
+ prefix2url_t prefix2url;
+
+ public:
+ NsMap() {
+ }
+
+ // Initialise from an existing character vector
+ NsMap(Rcpp::CharacterVector x) {
+ Rcpp::CharacterVector names = Rcpp::as<Rcpp::CharacterVector>(x.attr("names"));
+ for (R_len_t i = 0; i < x.size(); ++i) {
+ add(std::string(names[i]), std::string(x[i]));
+ }
+ }
+
+ bool hasPrefix(const std::string& prefix) {
+ return prefix2url.find(prefix) != prefix2url.end();
+ }
+
+ std::string findUrl(const std::string& prefix) {
+ prefix2url_t::const_iterator it = prefix2url.find(prefix);
+ if (it != prefix2url.end()) {
+ return it->second;
+ }
+
+ Rcpp::stop("Couldn't find url for prefix %s", prefix);
+ return std::string();
+ }
+
+ std::string findPrefix(const std::string& url) {
+ for (prefix2url_t::const_iterator it = prefix2url.begin(); it != prefix2url.end(); ++it) {
+ if (it->second == url) {
+ return it->first;
+ }
+ }
+
+ Rcpp::stop("Couldn't find prefix for url %s", url);
+ return std::string();
+ }
+
+ bool add(const xmlChar* prefix, const xmlChar* url) {
+ return add(Xml2String(prefix).asStdString(), Xml2String(url).asStdString());
+ }
+
+ bool add(std::string prefix, std::string url) {
+ prefix2url.insert(prefix2url_t::value_type(prefix, url));
+ return true;
+ }
+
+ Rcpp::CharacterVector out() {
+ return Rcpp::wrap(prefix2url);
+ }
+};
+#endif
diff --git a/src/xml2_xpath.cpp b/src/xml2_xpath.cpp
new file mode 100644
index 0000000..0b84fec
--- /dev/null
+++ b/src/xml2_xpath.cpp
@@ -0,0 +1,95 @@
+#include <Rcpp.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+#include <libxml/tree.h>
+#include "xml2_types.h"
+using namespace Rcpp;
+
+class XmlSeeker {
+ xmlXPathContext* context_;
+ xmlXPathObject* result_;
+ std::string xpath_;
+ XPtrDoc doc_;
+
+public:
+
+ XmlSeeker(XPtrDoc doc, xmlNode* node) : result_(NULL), doc_(doc) {
+ context_ = xmlXPathNewContext(doc.checked_get());
+ // Set context to current node
+ context_->node = node;
+ }
+
+ void registerNamespace(CharacterVector nsMap) {
+ if (nsMap.size() == 0)
+ return;
+
+ CharacterVector prefix = as<CharacterVector>(nsMap.attr("names"));
+
+ for (int i = 0; i < nsMap.size(); ++i) {
+ xmlChar* prefixI = (xmlChar*) CHAR(STRING_ELT(prefix, i));
+ xmlChar* urlI = (xmlChar*) CHAR(STRING_ELT(nsMap, i));
+
+ if (xmlXPathRegisterNs(context_, prefixI, urlI) != 0)
+ stop("Failed to register namespace (%s <-> %s)", prefixI, urlI);
+ }
+ }
+
+ RObject search(std::string xpath, int num_results) {
+ xpath_ = xpath;
+ result_ = xmlXPathEval((xmlChar*) xpath.c_str(), context_);
+ if (result_ == NULL) {
+ List ret = List();
+ ret.attr("class") = "xml_missing";
+ return ret;
+ }
+
+ switch (result_->type) {
+ case XPATH_NODESET:
+ {
+ xmlNodeSet* nodes = result_->nodesetval;
+ if (nodes == NULL || nodes->nodeNr == 0) {
+ List ret = List();
+ ret.attr("class") = "xml_missing";
+ return ret;
+ }
+ int n = std::min(result_->nodesetval->nodeNr, num_results);
+ List out(n);
+ for (int i = 0; i < n; i++) {
+ List ret;
+ ret["node"] = XPtrNode(nodes->nodeTab[i]);
+ ret["doc"] = doc_;
+ ret.attr("class") = "xml_node";
+ out[i] = ret;
+ }
+ return out;
+ }
+ case XPATH_NUMBER: { return NumericVector::create(result_->floatval); }
+ case XPATH_BOOLEAN: { return LogicalVector::create(result_->boolval); }
+ case XPATH_STRING: { return CharacterVector::create(Rf_mkCharCE((char *) result_->stringval, CE_UTF8)); }
+ default:
+ stop("XPath result type: %d not supported", result_->type);
+ }
+
+ return R_NilValue;
+ }
+
+ ~XmlSeeker() {
+ try {
+ xmlXPathFreeContext(context_);
+ if (result_ != NULL)
+ xmlXPathFreeObject(result_);
+ } catch (...) {}
+ }
+
+};
+
+// [[Rcpp::export]]
+RObject xpath_search(XPtrNode node, XPtrDoc doc, std::string xpath, CharacterVector nsMap, double num_results) {
+
+ if (num_results == R_PosInf) {
+ num_results = INT_MAX;
+ }
+ XmlSeeker seeker(doc, node.checked_get());
+ seeker.registerNamespace(nsMap);
+ return seeker.search(xpath, num_results);
+}
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 0000000..77626b8
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(xml2)
+
+test_check("xml2")
diff --git a/tests/testthat/cd_catalog.xml.bz2 b/tests/testthat/cd_catalog.xml.bz2
new file mode 100644
index 0000000..9b4018d
Binary files /dev/null and b/tests/testthat/cd_catalog.xml.bz2 differ
diff --git a/tests/testthat/helper-version.R b/tests/testthat/helper-version.R
new file mode 100644
index 0000000..a5c0da1
--- /dev/null
+++ b/tests/testthat/helper-version.R
@@ -0,0 +1 @@
+cat("This is libxml2 version", xml2:::libxml2_version(), "\n")
diff --git a/tests/testthat/lego.html.bz2 b/tests/testthat/lego.html.bz2
new file mode 100644
index 0000000..e786090
Binary files /dev/null and b/tests/testthat/lego.html.bz2 differ
diff --git a/tests/testthat/ns-multiple-aliases.xml b/tests/testthat/ns-multiple-aliases.xml
new file mode 100644
index 0000000..bc14cf9
--- /dev/null
+++ b/tests/testthat/ns-multiple-aliases.xml
@@ -0,0 +1,4 @@
+<root>
+ <doc1 xmlns:b="http://bar.com"><b:bar /></doc1>
+ <doc2 xmlns:c="http://bar.com"><c:bar /></doc2>
+</root>
diff --git a/tests/testthat/ns-multiple-default.xml b/tests/testthat/ns-multiple-default.xml
new file mode 100644
index 0000000..7d093a4
--- /dev/null
+++ b/tests/testthat/ns-multiple-default.xml
@@ -0,0 +1,4 @@
+<root>
+ <doc1 xmlns = "http://foo.com"><bar /></doc1>
+ <doc2 xmlns = "http://bar.com"><bar /></doc2>
+</root>
diff --git a/tests/testthat/ns-multiple-prefix.xml b/tests/testthat/ns-multiple-prefix.xml
new file mode 100644
index 0000000..78fa69e
--- /dev/null
+++ b/tests/testthat/ns-multiple-prefix.xml
@@ -0,0 +1,4 @@
+<root>
+ <doc1 xmlns:b="http://baz.com"><b:bar /></doc1>
+ <doc2 xmlns:b="http://bar.com"><b:bar /></doc2>
+</root>
diff --git a/tests/testthat/ns-multiple.xml b/tests/testthat/ns-multiple.xml
new file mode 100644
index 0000000..0e9b37c
--- /dev/null
+++ b/tests/testthat/ns-multiple.xml
@@ -0,0 +1,4 @@
+<root xmlns:f="http://foo.com" xmlns:g = "http://bar.com">
+ <doc1><f:bar f:id="a" /></doc1>
+ <doc2><g:bar g:id="b" /></doc2>
+</root>
diff --git a/tests/testthat/output/html_structure.txt b/tests/testthat/output/html_structure.txt
new file mode 100644
index 0000000..8a72f7f
--- /dev/null
+++ b/tests/testthat/output/html_structure.txt
@@ -0,0 +1,331 @@
+<div.aux-content-widget-3.links.subnav [div]>
+ {text}
+ <h3>
+ {text}
+ {text}
+ <div#maindetails_quicklinks>
+ {text}
+ <div.split_0>
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ {text}
+ <div.split_1>
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ {text}
+ {text}
+ <hr>
+ <div#full_subnav>
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link.ghost [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link.ghost [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link.ghost [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link.ghost [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <h4>
+ {text}
+ {text}
+ <ul.quicklinks>
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <li.subnav_item_main>
+ {text}
+ <a.link [href]>
+ {text}
+ {text}
+ {text}
+ <hr>
+ {text}
+ <div.show_more>
+ <span.titlePageSprite.arrows.show>
+ {text}
+ {text}
+ <div.show_less>
+ <span.titlePageSprite.arrows.hide>
+ {text}
+ {text}
diff --git a/tests/testthat/output/print-xml_document.txt b/tests/testthat/output/print-xml_document.txt
new file mode 100644
index 0000000..894d40b
--- /dev/null
+++ b/tests/testthat/output/print-xml_document.txt
@@ -0,0 +1,4 @@
+{xml_document}
+<html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
+[1] <head>\n<script type="text/javascript">var ue_t0=window.ue_t0||+new Date( ...
+[2] <body id="styleguide-v2" class="fixed">\n<script>\n if (typeof uet == ...
diff --git a/tests/testthat/output/print-xml_node.txt b/tests/testthat/output/print-xml_node.txt
new file mode 100644
index 0000000..8848a4a
--- /dev/null
+++ b/tests/testthat/output/print-xml_node.txt
@@ -0,0 +1,17 @@
+{xml_node}
+<body id="styleguide-v2" class="fixed">
+ [1] <script>\n if (typeof uet == 'function') {\n uet("bb");\n }\n ...
+ [2] <script>\n if ('csm' in window) {\n csm.measure('csm_body_delive ...
+ [3] <div id="wrapper">\n <div id="root" class="redesign">\n<scrip ...
+ [4] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ...
+ [5] <script type="text/imdblogin-js" id="login">\njQuery(document).ready(fun ...
+ [6] <script type="text/javascript">\n jQuery(\n ...
+ [7] <iframe id="sis_pixel_sitewide" width="1" height="1" frameborder="0" mar ...
+ [8] <script>\n setTimeout(function(){\n try{\n //sis3.0 ...
+ [9] <script type="text/javascript" src="http://ia.media-imdb.com/images/G/01 ...
+[10] <script type="text/javascript">\nif(window.COMSCORE){\nCOMSCORE.beacon({ ...
+[11] <noscript>\n<img src="http://b.scorecardresearch.com/p?c1=2&c2=60349 ...
+[12] <script>\n doWithAds(function(){\n (new Image()).src = "http:/ ...
+[13] <script>\n(function(){\n var readyTimeout = setInterval(function(){\n ...
+[14] <div id="servertime" time="235"></div>
+[15] <script>\n if (typeof uet == 'function') {\n uet("be");\n }\n ...
diff --git a/tests/testthat/output/print-xml_nodeset.txt b/tests/testthat/output/print-xml_nodeset.txt
new file mode 100644
index 0000000..36a7bb1
--- /dev/null
+++ b/tests/testthat/output/print-xml_nodeset.txt
@@ -0,0 +1,11 @@
+{xml_nodeset (10)}
+ [1] <div id="wrapper">\n <div id="root" class="redesign">\n<scrip ...
+ [2] <div id="root" class="redesign">\n<script>\n if (typeof uet == 'funct ...
+ [3] <div id="nb20" class="navbarSprite">\n<div id="supertab">\t\n\t<!-- begi ...
+ [4] <div id="supertab">\t\n\t<!-- begin TOP_AD -->\n<div id="top_ad_wrapper" ...
+ [5] <div id="top_ad_wrapper" class="dfp_slot">\n<script type="text/javascrip ...
+ [6] <div id="top_ad_reflow_helper"></div>
+ [7] <div id="navbar" class="navbarSprite">\n<noscript>\n <link rel="stylesh ...
+ [8] <div id="nb_search">\n <noscript><div id="more_if_no_javascript"><a h ...
+ [9] <div id="more_if_no_javascript"><a href="/search/">More</a></div>
+[10] <div class="magnifyingglass navbarSprite"></div>
diff --git a/tests/testthat/test-as_list.R b/tests/testthat/test-as_list.R
new file mode 100644
index 0000000..26d0087
--- /dev/null
+++ b/tests/testthat/test-as_list.R
@@ -0,0 +1,37 @@
+context("as_list")
+
+list_xml <- function(x) as_list(read_xml(x))
+
+test_that("empty elements become empty lists", {
+ expect_equal(list_xml("<x></x>"), list(x = list()))
+ expect_equal(list_xml("<x><y/></x>"), list(x = list(y = list())))
+ expect_equal(list_xml("<x><y><z/></y></x>"), list(x = list(y = list(z = list()))))
+})
+
+test_that("text nodes become character vectors", {
+ expect_equal(list_xml("<x>a</x>"), list(x = list("a")))
+ expect_equal(list_xml("<x><y>a</y></x>"), list(x = list(y = list("a"))))
+})
+
+test_that("cdata nodes become character vectors", {
+ expect_equal(list_xml("<x><![CDATA[<y/>]]></x>"), list(x = list("<y/>")))
+})
+
+test_that("xml attributes become R attibutes", {
+ expect_equal(list_xml("<x a='1' b='2'></x>"), list(x = structure(list(), a = "1", b = "2")))
+})
+
+test_that("xml names are preserved when attributes exist", {
+ expect_equal(list_xml("<x a='1' b='2'><y>3</y><z>4</z></x>"),
+ list(x = structure(list(y = list('3'), z = list('4')), a = "1", b = "2")))
+})
+
+test_that("special attributes are escaped", {
+ expect_equal(list_xml("<x a='1' b='2' names='esc'><y>3</y><z>4</z></x>"),
+ list(x = structure(list(y = list('3'), z = list('4')), a = "1", b = "2", .names='esc')))
+})
+
+test_that("attributes in child nodes", {
+ expect_equal(list_xml("<w aa = '0'><x a='1' b='2' names='esc'><y>3</y><z>4</z></x></w>"),
+ list(w = structure(list(x = structure(list(y = list('3'), z = list('4')), a = "1", b = "2", .names='esc')), aa = "0")))
+})
diff --git a/tests/testthat/test-as_xml_document.R b/tests/testthat/test-as_xml_document.R
new file mode 100644
index 0000000..282d931
--- /dev/null
+++ b/tests/testthat/test-as_xml_document.R
@@ -0,0 +1,41 @@
+context("as_xml_document")
+
+roundtrip_xml <- function(x) {
+ xml <- read_xml(x)
+ lst <- as_list(xml)
+ xml2 <- as_xml_document(lst)
+ expect_equal(as.character(xml), as.character(xml2))
+}
+
+test_that("roundtrips with single children", {
+ roundtrip_xml("<a><b/></a>")
+
+ roundtrip_xml("<a><b><c/></b></a>")
+
+ roundtrip_xml("<a><b>foo<c/></b></a>")
+
+ roundtrip_xml("<a><b>foo<c>bar</c></b></a>")
+
+ roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b></a>")
+})
+
+test_that("roundtrips with multi children", {
+ roundtrip_xml("<a><b1/><b2/></a>")
+
+ roundtrip_xml("<a><b><c1/><c2/></b></a>")
+
+ roundtrip_xml("<a><b1>foo<c/></b1><b2>bar<c/></b2></a>")
+
+ roundtrip_xml("<a><b>foo<c>bar</c><c>baz</c></b></a>")
+
+ roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b></a>")
+ roundtrip_xml("<a x = '1'><b y = '2'>foo<c z = '3'>bar</c></b><c zz = '4'>baz</c></a>")
+})
+
+test_that("rountrips with special attributes", {
+ roundtrip_xml("<a names = 'test'><b/></a>")
+})
+
+test_that("more than one root node is an error", {
+ expect_error(as_xml_document(list(a = list(), b = list())), "Root nodes must be of length 1")
+})
diff --git a/tests/testthat/test-cdata.R b/tests/testthat/test-cdata.R
new file mode 100644
index 0000000..84f0b7a
--- /dev/null
+++ b/tests/testthat/test-cdata.R
@@ -0,0 +1,7 @@
+context("cdata")
+
+test_that("CDATA creation works", {
+ x <- xml_new_root("root")
+ xml_add_child(x, xml_cdata("<d/>"))
+ expect_identical(as.character(x), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root><![CDATA[<d/>]]></root>\n")
+})
diff --git a/tests/testthat/test-comment.R b/tests/testthat/test-comment.R
new file mode 100644
index 0000000..474c505
--- /dev/null
+++ b/tests/testthat/test-comment.R
@@ -0,0 +1,8 @@
+context("comment")
+
+test_that("Comment creation works", {
+ x <- xml_new_root("root")
+ xml_add_child(x, xml_comment("Hello!"))
+ expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root><!--Hello!--></root>\n", as.character(x, options = ""))
+})
+
diff --git a/tests/testthat/test-dtd.R b/tests/testthat/test-dtd.R
new file mode 100644
index 0000000..c7ecb2f
--- /dev/null
+++ b/tests/testthat/test-dtd.R
@@ -0,0 +1,16 @@
+context("dtd")
+
+test_that("xml_dtd works", {
+
+ r <- xml_new_root(xml_dtd(name = "html", external_id = "-//W3C//DTD XHTML 1.0 Transitional//EN", system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
+ expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(r))
+
+ no_name <- xml_new_root(xml_dtd(external_id = "-//W3C//DTD XHTML 1.0 Transitional//EN", system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
+ expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(no_name))
+
+ no_name_external_id <- xml_new_root(xml_dtd(system_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"))
+ expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", as.character(no_name_external_id))
+
+ no_name_external_id_internal_id <- xml_new_root(xml_dtd())
+ expect_identical("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE >\n", as.character(no_name_external_id_internal_id))
+})
diff --git a/tests/testthat/test-format.R b/tests/testthat/test-format.R
new file mode 100644
index 0000000..f156e24
--- /dev/null
+++ b/tests/testthat/test-format.R
@@ -0,0 +1,17 @@
+context("format")
+test_that("format.xml_node prints attributes for root nodes", {
+
+ x <- read_xml("<parent foo = 'bar' />")
+ expect_equal(format(x), "<parent foo=\"bar\">")
+})
+test_that("format.xml_node prints namespaces for root nodes", {
+
+ x <- read_xml("<parent/>")
+ expect_equal(format(x), "<parent>")
+
+ y <- read_xml("<parent xmlns = 'tag:james.f.hester at gmail.com,2016:bar' />")
+ expect_equal(format(y), "<parent xmlns=\"tag:james.f.hester at gmail.com,2016:bar\">")
+
+ z <- read_xml("<parent xmlns:foo = 'tag:james.f.hester at gmail.com,2016:bar' />")
+ expect_equal(format(z), "<parent xmlns:foo=\"tag:james.f.hester at gmail.com,2016:bar\">")
+})
diff --git a/tests/testthat/test-modify-xml.R b/tests/testthat/test-modify-xml.R
new file mode 100644
index 0000000..36a6194
--- /dev/null
+++ b/tests/testthat/test-modify-xml.R
@@ -0,0 +1,238 @@
+context("modify nodes")
+
+test_that("modifying nodes works", {
+ x <- read_xml("<x><y/></x>")
+ node <- xml_find_first(x, "//x")
+
+ expect_equal(xml_name(node), "x")
+
+ node_set_name(node$node, "y")
+ expect_equal(xml_name(node), "y")
+
+ expect_equal(xml_text(node), "")
+
+ node_set_content(node$node, "test")
+ expect_equal(xml_text(node), "test")
+})
+
+test_that("xml_text<- only modifies text content", {
+ x <- read_xml("<node>Text1<subnode/>text2</node>")
+
+ expect_equal(xml_text(x), "Text1text2")
+
+ # will only change the first text by default
+ xml_text(x) <- "new_text1"
+ expect_equal(xml_text(x), "new_text1text2")
+
+ # You can change the second by explicitly selecting it
+ text_node <- xml_find_first(x, "//text()[2]")
+ xml_text(text_node) <- "new_text2"
+ expect_equal(xml_text(x), "new_text1new_text2")
+})
+
+test_that("xml_text<- creates new text nodes if needed", {
+ x <- read_xml("<node><subnode/></node>")
+ xml_text(x) <- "test"
+
+ expect_equal(xml_text(x), "test")
+})
+
+test_that("xml_remove removes nodes", {
+
+ x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+ children <- xml_children(x)
+ t1 <- children[[1]]
+ xml_remove(children, free = TRUE)
+ expect_equal(xml_text(x), "")
+})
+
+test_that("xml_replace replaces nodes", {
+
+ x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+ children <- xml_children(x)
+ t1 <- children[[1]]
+ t2 <- children[[2]]
+ t3 <- xml_children(children[[2]])[[1]]
+ expect_equal(xml_text(x), "123")
+
+ xml_replace(t1, t3)
+ expect_equal(xml_text(x), "323")
+
+ first_child <- xml_children(x)[[1]]
+ xml_replace(first_child, t1, .copy = FALSE)
+ expect_equal(xml_text(x), "123")
+ xml_remove(first_child, free = TRUE)
+
+ first_child <- xml_children(x)[[1]]
+ xml_replace(first_child, t3, .copy = FALSE)
+ expect_equal(xml_text(x), "32")
+ xml_remove(first_child, free = TRUE)
+})
+
+test_that("xml_replace works with nodesets", {
+
+ x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+ children <- xml_children(x)
+ t1 <- children[[1]]
+ xml_replace(children, t1)
+ expect_equal(xml_text(x), "11")
+})
+
+test_that("xml_sibling adds a sibling node", {
+ x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+ children <- xml_children(x)
+ t1 <- children[[1]]
+ t2 <- children[[2]]
+ t3 <- xml_children(children[[2]])[[1]]
+
+ xml_add_sibling(t1, t3)
+ expect_length(xml_siblings(t1), 2)
+ expect_equal(xml_text(x), "1323")
+
+ xml_add_sibling(t1, t3, .where = "before")
+ expect_length(xml_siblings(t1), 3)
+ expect_equal(xml_text(x), "31323")
+
+ children <- xml_children(x)
+ xml_add_sibling(children, t1)
+ expect_equal(xml_text(x), "311131231")
+})
+
+test_that("xml_add_child adds a child node", {
+ x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+ children <- xml_children(x)
+ t1 <- children[[1]]
+ t2 <- children[[2]]
+ t3 <- xml_children(children[[2]])[[1]]
+
+ expect_length(xml_children(t1), 0)
+
+ xml_add_child(t1, t3, .copy = TRUE)
+ expect_length(xml_children(t1), 1)
+ expect_equal(xml_text(x), "1323")
+
+ children <- xml_children(x)
+ xml_add_child(children, t1)
+ expect_equal(xml_text(x), "1313231313")
+})
+
+test_that("xml_add_child can create a new default namespace", {
+ x <- xml_root(xml_add_child(xml_new_document(), "foo", xmlns = "bar"))
+
+ expect_equal(unclass(xml_ns(x)), c(d1 = "bar"))
+})
+
+test_that("xml_add_child can create a new prefixed namespace", {
+ x <- xml_root(xml_add_child(xml_new_document(), "foo", "xmlns:bar" = "baz"))
+
+ expect_equal(unclass(xml_ns(x)), c(bar = "baz"))
+})
+
+test_that("xml_add_child can create a new attribute", {
+ x <- xml_add_child(xml_new_document(), "foo", "bar" = "baz")
+
+ expect_equal(xml_attr(x, "bar"), "baz")
+})
+
+test_that("xml_add_child can create new text", {
+ x <- xml_add_child(xml_new_document(), "foo", "bar")
+
+ expect_equal(xml_text(x), "bar")
+})
+
+test_that("xml_add_child can create a new node with the specified prefix", {
+ x <- xml_root(xml_add_child(xml_new_document(), "foo", "xmlns:bar" = "baz"))
+
+ t1 <- xml_add_child(x, "bar:qux")
+ expect_equal(xml_name(t1), "qux")
+ expect_equal(xml_name(t1, xml_ns(x)), "bar:qux")
+})
+
+test_that("xml_add_child can create a new node with the specified prefix", {
+ x <- xml_root(xml_add_child(xml_new_document(), "foo", "xmlns:bar" = "baz"))
+
+ expect_error(xml_add_child(x, "bar2:qux"), "No namespace with prefix `bar2` found")
+})
+
+test_that("xml_add_parent works with xml_node input", {
+ x <- read_xml("<x><y/></x>")
+ y <- xml_find_first(x, ".//y")
+ xml_add_parent(y, "z")
+
+ expect_equal(xml_name(xml_parent(y)), "z")
+ expect_equal(xml_name(xml_child(x)), "z")
+})
+
+test_that("xml_add_parent works with xml_nodeset input", {
+ x <- read_xml("<x><y/><y/></x>")
+ y <- xml_find_all(x, ".//y")
+ xml_add_parent(y, "z")
+
+ expect_equal(xml_name(xml_parent(y)), c("z", "z"))
+ expect_equal(xml_name(xml_child(x)), "z")
+})
+
+test_that("xml_add_parent works with xml_missing input", {
+ x <- read_xml("<body>
+ <p>Some <b>text</b>.</p>
+ <p>Some <b>other</b>.</p>
+ <p>No bold text</p>
+ </body>")
+
+ y <- xml_find_all(x, ".//p")
+ z <- xml_find_first(y, ".//b")
+ xml_add_parent(z, "em")
+
+ expect_equal(xml_name(xml_parent(z)), c("em", "em"))
+ expect_equal(xml_name(xml_children(y)), c("em", "em"))
+})
+
+test_that("xml_new_document adds a default character encoding", {
+
+ x <- read_xml("<root>\u00E1\u00FC\u00EE</root>")
+ expect_equal(as.character(x), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root>\u00E1\u00FC\u00EE</root>\n")
+
+ x2 <- xml_new_document()
+ xml_add_child(x2, "root", "\u00E1\u00FC\u00EE")
+ expect_equal(as.character(x2), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<root>\u00E1\u00FC\u00EE</root>\n")
+})
+
+test_that("xml_new_root is equivalent to using xml_new_document xml_add_child", {
+ x1 <- xml_add_child(xml_new_document(), "foo", "bar")
+
+ x2 <- xml_new_root("foo", "bar")
+
+ expect_identical(as.character(x1), as.character(x2))
+})
+
+test_that("xml_add_child can insert anywhere in the child list", {
+ x <- read_xml("<a/>")
+
+ xml_add_child(x, "z")
+ expect_equal(c("z"), xml_name(xml_children(x)))
+
+ xml_add_child(x, "w", .where = 0)
+ expect_equal(c("w", "z"), xml_name(xml_children(x)))
+
+ xml_add_child(x, "y", .where = 1)
+ expect_equal(c("w", "y", "z"), xml_name(xml_children(x)))
+
+ xml_add_child(x, "x", .where = 1)
+ expect_equal(c("w", "x", "y", "z"), xml_name(xml_children(x)))
+})
+
+test_that("xml_add_child can insert anywhere in a nodeset", {
+ x <- read_xml("<body>
+ <p>Some <b>text</b>.</p>
+ <p>Some <b>other</b>.</p>
+ <p>No bold text</p>
+ </body>")
+
+ y <- xml_find_all(x, ".//p")
+ z <- xml_find_first(y, ".//b")
+
+ xml_add_child(z, "bar")
+ xml_add_child(z, "foo", .where = 0)
+
+ expect_equal(c("foo", "bar", "foo", "bar"), xml_name(xml_children(z)))
+})
diff --git a/tests/testthat/test-namespaces.R b/tests/testthat/test-namespaces.R
new file mode 100644
index 0000000..24bb5fe
--- /dev/null
+++ b/tests/testthat/test-namespaces.R
@@ -0,0 +1,44 @@
+context("Namespaces")
+
+# XML parsing tests ------------------------------------------------------------
+
+test_that("multiple default namespaces given unique names", {
+ ns <- unclass(xml_ns(read_xml("ns-multiple-default.xml")))
+ expect_equal(ns, c(d1 = "http://foo.com", d2 = "http://bar.com"))
+})
+
+test_that("repeated prefixes given unique names", {
+ ns <- unclass(xml_ns(read_xml("ns-multiple-prefix.xml")))
+ expect_equal(ns, c(b = "http://baz.com", b1 = "http://bar.com"))
+})
+
+test_that("aliased prefixes retained", {
+ ns <- unclass(xml_ns(read_xml("ns-multiple-aliases.xml")))
+ expect_equal(ns, c(b = "http://bar.com", c = "http://bar.com"))
+})
+
+
+# Low-level character vector tests ---------------------------------------------
+
+test_that("unique prefix-url combo unchanged", {
+ x <- c(blah = "http://blah.com", rah = "http://rah.com")
+ expect_equal(unique_ns(x), x)
+})
+
+test_that("all prefixs kept", {
+ x <- c(blah = "http://blah.com", rah = "http://blah.com")
+ expect_equal(names(unique_ns(x)), c("blah", "rah"))
+})
+
+test_that("multiple default namespaces can be stripped", {
+ x <- read_xml("ns-multiple-default.xml")
+ ns <- unclass(xml_ns(x))
+ expect_equal(ns, c(d1 = "http://foo.com", d2 = "http://bar.com"))
+ expect_equal(length(xml_find_all(x, "//bar")), 0)
+
+ xml_ns_strip(x)
+ ns <- unclass(xml_ns(x))
+
+ expect_equivalent(ns, character())
+ expect_equal(length(xml_find_all(x, "//bar")), 2)
+})
diff --git a/tests/testthat/test-null.R b/tests/testthat/test-null.R
new file mode 100644
index 0000000..20b8a46
--- /dev/null
+++ b/tests/testthat/test-null.R
@@ -0,0 +1,70 @@
+context("Null XPtr")
+
+data <- read_xml("ns-multiple.xml")
+tf <- tempfile()
+on.exit(unlink(tf))
+saveRDS(data, file = tf)
+x <- readRDS(tf)
+
+test_that("accessors all fail rather than crash with NULL Xptrs", {
+
+ expect_error(as_list(x), "external pointer is not valid")
+
+ expect_error(html_structure(x), "external pointer is not valid")
+
+ expect_error(xml_add_child(x, x), "external pointer is not valid")
+ expect_error(xml_add_sibling(x, x), "external pointer is not valid")
+
+ expect_error(xml_attr(x, "foo"), "external pointer is not valid")
+ expect_error(xml_attr(x, "foo") <- "bar", "external pointer is not valid")
+
+ expect_error(xml_attrs(x), "external pointer is not valid")
+ expect_error(xml_attrs(x) <- list(), "external pointer is not valid")
+
+ expect_error(xml_child(x), "external pointer is not valid")
+ expect_error(xml_children(x), "external pointer is not valid")
+
+ expect_error(xml_contents(x), "external pointer is not valid")
+
+ expect_error(xml_double(x), "external pointer is not valid")
+
+ expect_error(xml_find_all(x, ""), "external pointer is not valid")
+ expect_error(xml_find_chr(x, ""), "external pointer is not valid")
+ expect_error(xml_find_first(x, ""), "external pointer is not valid")
+ expect_error(xml_find_lgl(x, ""), "external pointer is not valid")
+ expect_error(xml_find_num(x, ""), "external pointer is not valid")
+
+ expect_error(xml_has_attr(x, ""), "external pointer is not valid")
+
+ expect_error(xml_integer(x), "external pointer is not valid")
+
+ expect_error(xml_length(x), "external pointer is not valid")
+
+ expect_error(xml_name(x), "external pointer is not valid")
+ expect_error(xml_name(x) <- "foo", "external pointer is not valid")
+
+ expect_error(xml_ns(x), "external pointer is not valid")
+ expect_error(xml_ns_strip(x), "external pointer is not valid")
+
+ expect_error(xml_parent(x), "external pointer is not valid")
+ expect_error(xml_parents(x), "external pointer is not valid")
+
+ expect_error(xml_path(x), "external pointer is not valid")
+
+ expect_error(xml_remove(x), "external pointer is not valid")
+
+ expect_error(xml_replace(x, x), "external pointer is not valid")
+
+ expect_error(xml_set_namespace(x, "foo"), "external pointer is not valid")
+
+ expect_error(xml_siblings(x), "external pointer is not valid")
+
+ expect_error(xml_structure(x), "external pointer is not valid")
+
+ expect_error(xml_text(x), "external pointer is not valid")
+ expect_error(xml_text(x) <- "test", "external pointer is not valid")
+
+ expect_error(xml_type(x), "external pointer is not valid")
+
+ expect_error(xml_url(x), "external pointer is not valid")
+})
diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R
new file mode 100644
index 0000000..1f9b4fc
--- /dev/null
+++ b/tests/testthat/test-print.R
@@ -0,0 +1,21 @@
+context("print")
+
+x <- read_html("lego.html.bz2")
+body <- xml_find_first(x, "//body")
+divs <- xml_find_all(body, ".//div")[1:10]
+quicklinks <- xml_find_first(body, "//div[contains(@div, 'quicklinks')]")
+
+test_that("print method is correct", {
+ expect_output_file(print(x), "output/print-xml_document.txt", update = FALSE)
+
+ body <- xml_find_first(x, "//body")
+ expect_output_file(print(body), "output/print-xml_node.txt", update = FALSE)
+
+ divs <- xml_find_all(body, ".//div")[1:10]
+ expect_output_file(print(divs), "output/print-xml_nodeset.txt", update = FALSE)
+})
+
+test_that("xml_structure is correct", {
+ quicklinks <- xml_find_first(x, "//div[contains(@div, 'quicklinks')]")
+ expect_output_file(html_structure(quicklinks), "output/html_structure.txt", update = FALSE)
+})
diff --git a/tests/testthat/test-read-xml.R b/tests/testthat/test-read-xml.R
new file mode 100644
index 0000000..85f1ac6
--- /dev/null
+++ b/tests/testthat/test-read-xml.R
@@ -0,0 +1,57 @@
+context("read_xml")
+
+test_that("read_html correctly parses malformed document", {
+ lego <- read_html("lego.html.bz2")
+ expect_equal(length(xml_find_all(lego, ".//p")), 39)
+})
+
+test_that("parse_options errors when given an invalid option", {
+ expect_error(parse_options("INVALID", xml_parse_options()),
+ "`options` 'INVALID' is not a valid option")
+
+ expect_error(read_html("lego.html.bz2", options = "INVALID"),
+ "`options` 'INVALID' is not a valid option")
+
+ # Empty inputs returned as 0
+ expect_identical(0L, parse_options("", xml_parse_options()))
+ expect_identical(0L, parse_options(NULL, xml_parse_options()))
+
+ # Numerics returned as integers
+ expect_identical(12L, parse_options(12L, xml_parse_options()))
+ expect_identical(12L, parse_options(12, xml_parse_options()))
+
+ # Multiple inputs summed
+ expect_identical(3L, parse_options(c("RECOVER", "NOENT"), xml_parse_options()))
+})
+
+test_that("read_html properly passes parser arguments", {
+
+ skip_if_not(libxml2_version() >= "2.9.2")
+
+ blanks <- read_html("cd_catalog.xml.bz2", options = c("RECOVER", "NOERROR"))
+ expect_equal(as_list(blanks)$html$body$catalog$cd[[1]],
+ "\r\n ")
+
+ no_blanks <- read_html("cd_catalog.xml.bz2", options = c("RECOVER", "NOERROR", "NOBLANKS"))
+
+ expect_equal(as_list(no_blanks)$html$body$catalog$cd[[1]],
+ list("Empire Burlesque"))
+})
+
+test_that("read_xml works with httr response objects", {
+ skip_on_cran()
+
+ x <- read_xml(httr::GET("http://httpbin.org/xml"))
+ expect_is(x, "xml_document")
+
+ expect_equal(length(xml_find_all(x, "//slide")), 2)
+})
+
+test_that("read_html works with httr response objects", {
+ skip_on_cran()
+
+ x <- read_html(httr::GET("http://httpbin.org/xml"))
+ expect_is(x, "xml_document")
+
+ expect_equal(length(xml_find_all(x, "//slide")), 2)
+})
diff --git a/tests/testthat/test-url.R b/tests/testthat/test-url.R
new file mode 100644
index 0000000..ba1f27c
--- /dev/null
+++ b/tests/testthat/test-url.R
@@ -0,0 +1,76 @@
+context("url")
+
+test_that("url_absolute", {
+ expect_equal(
+ url_absolute(c(".", "..", "/", "/x"), "http://hadley.nz/a/b/c/d"),
+ c("http://hadley.nz/a/b/c/", "http://hadley.nz/a/b/", "http://hadley.nz/", "http://hadley.nz/x"))
+
+ expect_error(url_absolute(c(".", "..", "/", "/x"), c("http://hadley.nz/a/b/c/d", "http://foo.bar")),
+ "Base URL must be length 1")
+})
+
+test_that("url_relative", {
+
+ expect_equal(
+ url_relative("http://hadley.nz/a/c", "http://hadley.nz"),
+ "/a/c")
+
+ expect_equal(
+ url_relative("http://hadley.nz/a/c", "http://hadley.nz/"),
+ "../a/c")
+
+ expect_equal(
+ url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b"),
+ "c")
+
+ expect_equal(
+ url_relative("http://hadley.nz/a/c", "http://hadley.nz/a/b/"),
+ "../c")
+
+ expect_error(url_relative("http://hadley.nz/a/c", c("http://hadley.nz/a/b/c/d", "http://foo.bar")),
+ "Base URL must be length 1")
+})
+
+test_that("url_parse", {
+
+ expect_equal(
+ url_parse("http://had.co.nz/"),
+ data.frame(scheme = "http", server = "had.co.nz", port = NA_integer_,
+ user = "", path = "/", query = "", fragment = "", stringsAsFactors = FALSE))
+
+ expect_equal(
+ url_parse("http://had.co.nz:1234/"),
+ data.frame(scheme = "http", server = "had.co.nz", port = 1234L,
+ user = "", path = "/", query = "", fragment = "", stringsAsFactors = FALSE))
+
+ expect_equal(
+ url_parse("http://had.co.nz:1234/?a=1&b=2"),
+ data.frame(scheme = "http", server = "had.co.nz", port = 1234L,
+ user = "", path = "/", query = "a=1&b=2", fragment = "", stringsAsFactors = FALSE))
+
+ expect_equal(
+ url_parse("http://had.co.nz:1234/?a=1&b=2#def"),
+ data.frame(scheme = "http", server = "had.co.nz", port = 1234L,
+ user = "", path = "/", query = "a=1&b=2", fragment = "def", stringsAsFactors = FALSE))
+})
+
+test_that("url_escape", {
+ expect_error(url_escape("a b c", reserved = c("a", "b")),
+ "`reserved` must be character vector of length 1")
+
+ expect_equal(
+ url_escape("a b c"),
+ "a%20b%20c")
+
+ expect_equal(
+ url_escape("a b c", " "),
+ "a b c")
+
+ expect_equal(
+ url_unescape("a%20b%2fc"),
+ "a b/c")
+
+ expect_equal(
+ url_unescape("%C2%B5"),
+ "\u00B5")
+})
diff --git a/tests/testthat/test-write_xml.R b/tests/testthat/test-write_xml.R
new file mode 100644
index 0000000..67ac0ec
--- /dev/null
+++ b/tests/testthat/test-write_xml.R
@@ -0,0 +1,112 @@
+context("write_xml")
+
+test_that("write_xml errors for incorrect directory and with invalid inputs", {
+ x <- read_xml("<x/>")
+ filename <- ".../test.xml"
+ expect_error(write_xml(x, filename), "'...' does not exist in current working directory")
+
+
+ expect_error(write_xml(x, c("test.xml", "foo")), "`file` must be a non-zero character of length 1")
+})
+
+test_that("write_xml works with relative file paths", {
+ x <- read_xml("<x/>")
+
+ filename <- "../test.xml"
+ on.exit(unlink(filename))
+ write_xml(x, filename, options = "no_declaration")
+ expect_identical(readChar(filename, 1000L), "<x/>\n")
+})
+
+test_that("write_xml works with no options", {
+ x <- read_xml("<x/>")
+
+ filename <- "../test.xml"
+ on.exit(unlink(filename))
+ write_xml(x, filename, options = NULL)
+ expect_identical(readChar(filename, 1000L), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<x/>\n")
+})
+
+test_that("write_xml works with an explicit connections", {
+ x <- read_xml("<x/>")
+
+ filename <- "../test.xml"
+ file <- file(filename, "wb")
+ on.exit(unlink(filename))
+ write_xml(x, file, options = "no_declaration")
+ close(file)
+ expect_identical(readChar(filename, 1000L), "<x/>\n")
+})
+
+test_that("write_xml works with an implicit connections", {
+ x <- read_xml("<x/>")
+
+ filename <- "../test.xml.gz"
+ write_xml(x, filename, options = "no_declaration")
+ file <- gzfile(filename, "rb")
+ on.exit({unlink(filename); close(file)})
+ expect_identical(readChar(file, 1000L), "<x/>\n")
+})
+
+test_that("write_xml works with nodeset input and files", {
+ x <- read_xml("<x><y/><y><z/></y></x>")
+ y <- xml_find_all(x, "//y")
+
+ filename <- "../test.xml"
+ on.exit(unlink(filename))
+ expect_error(write_xml(y, filename, options = "no_declaration"),
+ "Can only save length 1 node sets")
+
+ write_xml(y[1], filename, options = "no_declaration")
+ expect_identical(readChar(filename, 1000L), "<y/>")
+})
+
+test_that("write_xml works with nodeset input and connections", {
+ x <- read_xml("<x><y/><y/></x>")
+ y <- xml_find_all(x, "//y")
+
+ filename <- "../test.xml.gz"
+ expect_error(write_xml(y, filename, options = "no_declaration"),
+ "Can only save length 1 node sets")
+
+ expect_error(write_xml(y[1], c(filename, "foo")), "`file` must be a non-zero character of length 1")
+
+ write_xml(y[1], filename, options = "no_declaration")
+ file <- gzfile(filename, "rb")
+ on.exit({unlink(filename); close(file)})
+ expect_identical(readChar(file, 1000L), "<y/>")
+})
+
+test_that("write_xml works with node input and files", {
+ x <- read_xml("<x><y/><y/></x>")
+ y <- xml_find_first(x, "//y")
+
+ filename <- "../test.xml"
+ expect_error(write_xml(y, c(filename, "foo")), "`file` must be a non-zero character of length 1")
+
+ write_xml(y, filename, options = "no_declaration")
+ on.exit(unlink(filename))
+ expect_identical(readChar(filename, 1000L), "<y/>")
+})
+
+test_that("write_xml works with node input and connections", {
+ x <- read_xml("<x><y/><y/></x>")
+ y <- xml_find_first(x, "//y")
+
+ filename <- "../test.xml.gz"
+ write_xml(y, filename, options = "no_declaration")
+ file <- gzfile(filename, "rb")
+ on.exit({unlink(filename); close(file)})
+ expect_identical(readChar(file, 1000L), "<y/>")
+})
+
+test_that("write_html work with html input", {
+ x <- read_html("<html><title>Foo</title></html>")
+
+ filename <- "../test.html.gz"
+ write_html(x, filename)
+ file <- gzfile(filename, "rb")
+ on.exit({unlink(filename); close(file)})
+ expect_identical(readChar(file, 1000L),
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n<title>Foo</title>\n</head></html>\n")
+})
diff --git a/tests/testthat/test-xml_attrs.R b/tests/testthat/test-xml_attrs.R
new file mode 100644
index 0000000..7a1c1a0
--- /dev/null
+++ b/tests/testthat/test-xml_attrs.R
@@ -0,0 +1,225 @@
+context("xml_attrs")
+
+test_that("missing attributes returned as NA by default", {
+ x <- read_xml("<x/>")
+ expect_equal(xml_attr(x, "id"), NA_character_)
+})
+
+test_that("missing attributes returned as NA", {
+ x <- read_xml("<x/>")
+ expect_equal(xml_attr(x, "id", default = 1), "1")
+})
+
+test_that("attributes are correctly found", {
+ x <- read_xml("<x id='1' />")
+
+ expect_true(xml_has_attr(x, "id"))
+
+ expect_false(xml_has_attr(x, "id2"))
+})
+
+test_that("returning an attribute node prints properly", {
+ x <- read_xml("<a><b c='1' /></a>")
+
+ t1 <- xml_find_first(x, "//@c")
+
+ expect_equal(format(t1), "<c>")
+})
+
+# Namespaces -------------------------------------------------------------------
+
+# Default namespace doesn't apply to attributes
+
+test_that("qualified names returned when ns given", {
+ x <- read_xml("ns-multiple.xml")
+ ns <- xml_ns(x)
+
+ bars <- xml_children(xml_children(x))
+ attr <- xml_attrs(bars, ns)
+
+ expect_equal(names(attr[[1]]), "f:id")
+ expect_equal(names(attr[[2]]), "g:id")
+})
+
+
+x <- read_xml('
+ <root xmlns:b="http://bar.com" xmlns:f="http://foo.com">
+ <doc b:id="b" f:id="f" id="" />
+ <doc b:id="b" f:id="f" id="" />
+ </root>
+')
+doc <- xml_children(x)[[1]]
+docs <- xml_find_all(x, "//doc")
+ns <- xml_ns(x)
+
+test_that("qualified attributes get own values", {
+ expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "f:id" = "f", "id" = ""))
+})
+
+test_that("unqualified name gets unnamespace attribute", {
+ expect_equal(xml_attr(doc, "id", ns), "")
+})
+
+test_that("namespace names gets namespaced attribute", {
+ expect_equal(xml_attr(doc, "b:id", ns), "b")
+ expect_equal(xml_attr(doc, "f:id", ns), "f")
+})
+
+test_that("xml_attr<- modifies properties", {
+ xml_attr(doc, "id", ns) <- "test"
+ expect_equal(xml_attr(doc, "id", ns), "test")
+
+ xml_attr(doc, "b:id", ns) <- "b_test"
+ expect_equal(xml_attr(doc, "b:id", ns), "b_test")
+
+ xml_attr(doc, "f:id", ns) <- "f_test"
+ expect_equal(xml_attr(doc, "f:id", ns), "f_test")
+
+ xml_attr(docs, "f:id", ns) <- "f_test2"
+ expect_equal(xml_attr(docs, "f:id", ns), c("f_test2", "f_test2"))
+
+ xml_attr(docs, "f:id", ns) <- NULL
+ expect_equal(xml_attr(docs, "f:id", ns), c(NA_character_, NA_character_))
+})
+
+test_that("xml_attrs<- modifies all attributes", {
+ expect_error(xml_attrs(doc) <- 1, "`value` must be a named character vector or `NULL`")
+ expect_error(xml_attrs(doc) <- "test", "`value` must be a named character vector or `NULL`")
+
+ xml_attrs(doc, ns) <- c("b:id" = "b", "f:id" = "f", "id" = "test")
+ expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "id" = "test", "f:id" = "f"))
+
+ xml_attrs(doc, ns) <- c("b:id" = "b", "f:id" = "f")
+ expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "f:id" = "f"))
+
+ xml_attrs(doc, ns) <- c("b:id" = "b", "id" = "test")
+ expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "id" = "test"))
+
+ expect_error(xml_attrs(docs) <- "test", "`value` must be a list of named character vectors")
+
+ xml_attrs(docs, ns) <- c("b:id" = "b", "id" = "test")
+ expect_equal(xml_attrs(docs, ns),
+ list(
+ c("b:id" = "b", "id" = "test"),
+ c("b:id" = "b", "id" = "test")))
+
+ xml_attrs(docs, ns) <- NULL
+ expect_equivalent(xml_attrs(docs, ns), list(character(0), character(0)))
+})
+
+test_that("xml_attr<- accepts non-character values", {
+ x <- read_xml('<svg><rect /></svg>')
+ svg <- xml_root(x)
+
+ xml_attr(svg, "width") <- 8L
+ expect_that(xml_attr(svg, "width"), equals("8"))
+
+ xml_attr(svg, "height") <- 12.5
+ expect_that(xml_attr(svg, "height"), equals("12.5"))
+
+ expect_that(xml_attrs(svg), equals(c(width = "8", height = "12.5")))
+
+ xml_attrs(svg) <- c(width = 14L, height = 23.45)
+ expect_that(xml_attrs(svg), equals(c(width = "14", height = "23.45")))
+})
+
+test_that("xml_attr<- removes namespaces if desired", {
+ xml_attr(x, "xmlns:b") <- NULL
+
+ expect_equal(xml_attrs(x), c("xmlns:f" = "http://foo.com"))
+})
+
+test_that("xml_attr<- removes namespaces if desired", {
+ x <- read_xml("<a xmlns = 'tag:foo'><b/></a>")
+
+ # cannot find //b with a default namespace
+ expect_equal(length(xml_find_all(x, "//b")), 0)
+
+ # unless we specify it explicitly
+ expect_equal(length(xml_find_all(x, "//b")), 0)
+ expect_equal(length(xml_find_all(x, "//d1:b", xml_ns(x))), 1)
+
+ # but can find it once we remove the namespace
+ xml_attr(x, "xmlns") <- NULL
+ expect_equal(length(xml_find_all(x, "//b")), 1)
+
+ # and add the old namespace back
+ xml_attr(x, "xmlns") <- "tag:foo"
+ expect_equal(xml_attr(x, "xmlns"), "tag:foo")
+ expect_equal(length(xml_find_all(x, "//b")), 0)
+ expect_equal(length(xml_find_all(x, "//d1:b", xml_ns(x))), 1)
+
+ expect_equal(xml_attr(x, "xmlns"), "tag:foo")
+})
+
+test_that("xml_attr<- removes prefixed namespaces if desired", {
+ x <- read_xml("<a xmlns:pre = 'tag:foo'><pre:b/></a>")
+
+ # cannot find //b with a prefixed namespace
+ expect_equal(length(xml_find_all(x, "//b")), 0)
+
+ # unless we specify it explicitly
+ expect_equal(length(xml_find_all(x, "//b")), 0)
+ expect_equal(length(xml_find_all(x, "//pre:b", xml_ns(x))), 1)
+
+ # but can find it once we remove the namespace
+ xml_attr(x, "xmlns:pre") <- NULL
+ expect_equal(length(xml_find_all(x, "//b")), 1)
+
+ # and add the old namespace back
+ xml_attr(x, "xmlns:pre") <- "tag:foo"
+ xml_set_namespace(xml_children(x)[[1]], "pre")
+ expect_equal(xml_attr(x, "xmlns:pre"), "tag:foo")
+ expect_equal(length(xml_find_all(x, "//b")), 0)
+ expect_equal(length(xml_find_all(x, "//pre:b", xml_ns(x))), 1)
+
+ expect_equal(xml_attr(x, "xmlns:pre"), "tag:foo")
+})
+
+test_that("xml_set_attr works identically to xml_attr<-", {
+ content <- "<a><b><c/></b><b/></a>"
+ x <- read_xml(content)
+ y <- read_xml(content)
+
+ xml_attr(x, "a") <- "test"
+ xml_set_attr(y, "a", "test")
+
+ expect_equal(as.character(x), as.character(y))
+
+ bx <- xml_find_all(x, "//b")
+ by <- xml_find_all(y, "//b")
+
+ xml_attr(bx, "b") <- "test2"
+ xml_set_attr(by, "b", "test2")
+
+ expect_equal(as.character(x), as.character(y))
+
+ # No errors for xml_missing
+ mss <- xml_find_first(bx, "./c")
+ expect_error(xml_attr(mss[[2]], "b") <- "blah", NA)
+ expect_error(xml_set_attr(mss[[2]], "b", "blah"), NA)
+})
+
+test_that("xml_set_attrs works identically to xml_attrs<-", {
+ content <- "<a><b><c/></b><b/></a>"
+ x <- read_xml(content)
+ y <- read_xml(content)
+
+ xml_attrs(x) <- c(a = "test")
+ xml_set_attrs(y, c(a = "test"))
+
+ expect_equal(as.character(x), as.character(y))
+
+ bx <- xml_find_all(x, "//b")
+ by <- xml_find_all(y, "//b")
+
+ xml_attrs(bx) <- c(b = "test2")
+ xml_set_attrs(by, c(b = "test2"))
+
+ expect_equal(as.character(x), as.character(y))
+
+ # No errors for xml_missing
+ mss <- xml_find_first(bx, "./c")
+ expect_error(xml_attrs(mss[[2]]) <- c("b" = "blah"), NA)
+ expect_error(xml_set_attrs(mss[[2]], c("b" = "blah")), NA)
+})
diff --git a/tests/testthat/test-xml_children.R b/tests/testthat/test-xml_children.R
new file mode 100644
index 0000000..d80ee83
--- /dev/null
+++ b/tests/testthat/test-xml_children.R
@@ -0,0 +1,52 @@
+context("xml_children")
+
+x <- read_xml("<foo> <bar><boo /></bar> <baz/> </foo>")
+
+test_that("xml_child() returns the proper child", {
+ expect_equal(xml_child(x), xml_children(x)[[1L]])
+
+ expect_equal(xml_child(x, 2), xml_children(x)[[2L]])
+})
+
+test_that("xml_child() returns child by name", {
+ expect_equal(xml_child(x, "baz"), xml_find_first(x, "./baz"))
+})
+
+test_that("xml_child() errors if more than one search is given", {
+ expect_error(xml_child(x, 1:2), "`search` must be of length 1")
+})
+
+test_that("xml_child() errors if search is not numeric or character", {
+ expect_error(xml_child(x, TRUE), "`search` must be `numeric` or `character`")
+ expect_error(xml_child(x, as.factor("test")), "`search` must be `numeric` or `character`")
+ expect_error(xml_child(x, raw(1)), "`search` must be `numeric` or `character`")
+ expect_error(xml_child(x, list(1)), "`search` must be `numeric` or `character`")
+})
+
+test_that("xml_length", {
+ expect_equal(xml_length(x), 2)
+ all <- xml_find_all(x, "//*")
+ expect_equal(xml_length(all), c(2, 1, 0, 0))
+})
+
+test_that("xml_parent", {
+ expect_equal(unclass(xml_parent(xml_child(x))), unclass(x))
+})
+
+test_that("xml_parents", {
+ expect_equal(
+ xml_name(xml_parents(xml_find_first(x, "//boo"))),
+ c("bar", "foo"))
+})
+
+test_that("xml_root", {
+ doc <- xml_new_document()
+
+ expect_is(xml_root(doc), "xml_missing")
+
+ a <- xml_add_child(doc, "a")
+ b <- xml_add_child(doc, "b")
+
+ expect_that(xml_name(xml_root(b)), equals("a"))
+ expect_that(xml_name(xml_root(doc)), equals("a"))
+})
diff --git a/tests/testthat/test-xml_find.R b/tests/testthat/test-xml_find.R
new file mode 100644
index 0000000..1e10f66
--- /dev/null
+++ b/tests/testthat/test-xml_find.R
@@ -0,0 +1,123 @@
+context("xml_find")
+
+# Find one ---------------------------------------------------------------------
+
+test_that("xml_find_first returns a missing object if no match", {
+ x <- read_xml("<x><y/></x>")
+ expect_equal(xml_find_first(x, ".//z"), xml_missing())
+})
+
+test_that("xml_find_first returns the first match if more than one match", {
+ x <- read_xml("<x><y/><y/></x>")
+ y <- xml_find_first(x, ".//y")
+ expect_is(y, "xml_node")
+})
+
+# Find all ---------------------------------------------------------------------
+
+test_that("unqualified names don't look in default ns", {
+ x <- read_xml("ns-multiple-default.xml")
+
+ expect_equal(length(xml_find_all(x, "//bar")), 0)
+})
+
+test_that("qualified names matches to namespace", {
+ x <- read_xml("ns-multiple-default.xml")
+ ns <- xml_ns(x)
+
+ expect_equal(length(xml_find_all(x, "//d1:bar", ns)), 1)
+ expect_equal(length(xml_find_all(x, "//d2:bar", ns)), 1)
+})
+
+test_that("warning if unknown namespace", {
+ x <- read_xml("<foo><bar /></foo>")
+ expect_warning(xml_find_all(x, "//g:bar"), "Undefined namespace prefix")
+})
+
+test_that("no matches returns empty nodeset", {
+ x <- read_xml("<foo><bar /></foo>")
+ expect_equal(length(xml_find_all(x, "//baz")), 0)
+})
+
+# Find num ---------------------------------------------------------------------
+test_that("xml_find_num errors with non numeric results", {
+ x <- read_xml("<x><y/><y/></x>")
+ expect_error(xml_find_num(x, "//z"), "result of type:.*xml_missing.*, not numeric")
+ expect_error(xml_find_num(x, "//y"), "result of type:.*list.*, not numeric")
+ expect_error(xml_find_num(x, "1=1"), "result of type:.*logical.*, not numeric")
+ expect_error(xml_find_num(x, "string(5)"), "result of type:.*character.*, not numeric")
+})
+
+test_that("xml_find_num returns a numeric result", {
+ x <- read_xml("<x><y>1</y><y/></x>")
+ expect_equal(xml_find_num(x, "1 div 0"), Inf)
+
+ expect_equal(xml_find_num(x, "-1 div 0"), -Inf)
+
+ expect_equal(xml_find_num(x, "0 div 0"), NaN)
+
+ expect_equal(xml_find_num(x, "1 div floor(-0.1)"), -1)
+
+ expect_equal(xml_find_num(x, "1 div floor(0)"), Inf)
+
+ expect_equal(xml_find_num(x, "1 div floor(-0)"), -Inf)
+})
+
+# Find chr ---------------------------------------------------------------------
+test_that("xml_find_chr errors with non character results", {
+ x <- read_xml("<x><y/><y/></x>")
+ expect_error(xml_find_chr(x, "//z"), "result of type:.*xml_missing.*, not character")
+ expect_error(xml_find_chr(x, "//y"), "result of type:.*list.*, not character")
+ expect_error(xml_find_chr(x, "1=1"), "result of type:.*logical.*, not character")
+ expect_error(xml_find_chr(x, "1+1"), "result of type:.*numeric.*, not character")
+})
+
+test_that("xml_find_chr returns a character result", {
+ x <- read_xml("<x><y>1</y><y/></x>")
+ expect_equal(xml_find_chr(x, "string(5)"), "5")
+
+ expect_equal(xml_find_chr(x, "string(0.5)"), "0.5")
+
+ expect_equal(xml_find_chr(x, "string(-0.5)"), "-0.5")
+
+ expect_equal(xml_find_chr(x, "concat(\"titi\", \"toto\")"), "tititoto")
+
+ expect_equal(xml_find_chr(x, "substring(\"12345\", 2, 3)"), "234")
+
+ expect_equal(xml_find_chr(x, "substring(\"12345\", 2)"), "2345")
+
+ expect_equal(xml_find_chr(x, "substring(\"12345\", -4)"), "12345")
+})
+
+# Find lgl ---------------------------------------------------------------------
+test_that("xml_find_lgl errors with non logical results", {
+ x <- read_xml("<x><y/><y/></x>")
+ expect_error(xml_find_lgl(x, "//z"), "result of type:.*xml_missing.*, not logical")
+ expect_error(xml_find_lgl(x, "//y"), "result of type:.*list.*, not logical")
+ expect_error(xml_find_lgl(x, "string(5)"), "result of type:.*character.*, not logical")
+ expect_error(xml_find_lgl(x, "1+1"), "result of type:.*numeric.*, not logical")
+})
+
+test_that("xml_find_lgl returns a logical result", {
+ x <- read_xml("<x><y>1</y><y/></x>")
+
+ expect_equal(xml_find_lgl(x, "1=1"), TRUE)
+
+ expect_equal(xml_find_lgl(x, "1!=1"), FALSE)
+
+ expect_equal(xml_find_lgl(x, "true()=true()"), TRUE)
+
+ expect_equal(xml_find_lgl(x, "true()=false()"), FALSE)
+
+ expect_equal(xml_find_lgl(x, "'test'='test'"), TRUE)
+})
+
+test_that("Searches with empty inputs retain type stability", {
+ empty <- xml_nodeset()
+
+ expect_equal(xml_find_num(empty, "1 div 0"), integer())
+
+ expect_equal(xml_find_chr(empty, "string(0.5)"), character())
+
+ expect_equal(xml_find_lgl(empty, "1=1"), logical())
+})
diff --git a/tests/testthat/test-xml_missing.R b/tests/testthat/test-xml_missing.R
new file mode 100644
index 0000000..56710ea
--- /dev/null
+++ b/tests/testthat/test-xml_missing.R
@@ -0,0 +1,44 @@
+context("xml_missing")
+x <- read_xml("<body>
+ <p>Some <b>text</b>.</p>
+ <p>Some <b>other</b>.</p>
+ <p>No bold text</p>
+ </body>")
+para <- xml_find_all(x, ".//p")
+b <- xml_find_first(para, ".//b")
+mss <- b[[3]]
+
+test_that("xml_find returns nodes of class 'xml_missing' for missing nodes", {
+ expect_equal(length(b), 3L)
+ expect_equal(vapply(b, length, integer(1)), c(2L, 2L, 0L))
+ expect_is(mss, "xml_missing")
+})
+
+test_that("xml_missing methods return properly for all S3 methods", {
+
+ expect_equal(as.character(mss), NA_character_)
+ expect_equal(as_list(mss), list())
+ expect_equal(nodeset_apply(mss), xml_nodeset())
+ expect_output(print(mss), "\\{xml_missing\\}\n<NA>")
+ expect_equal(tree_structure(mss), NA_character_)
+ expect_error(write_xml(mss), "Missing data cannot be written")
+ expect_error(write_html(mss), "Missing data cannot be written")
+ expect_equal(xml_attr(mss), NA_character_)
+ expect_equal(xml_attrs(mss), NA_character_)
+ expect_equal(xml_find_all(mss), xml_nodeset())
+ expect_equal(xml_find_chr(mss), character())
+ expect_equal(xml_find_lgl(mss), logical())
+ expect_equal(xml_find_num(mss), numeric())
+ expect_equal(xml_find_first(mss), xml_missing())
+ expect_equal(xml_length(mss), 0L)
+ expect_equal(xml_name(mss), NA_character_)
+ expect_equal(xml_parent(mss), xml_missing())
+ expect_equal(xml_path(mss), NA_character_)
+ expect_equal(xml_text(mss), NA_character_)
+ expect_equal(xml_type(mss), NA_character_)
+ expect_equal(xml_url(mss), NA_character_)
+})
+
+test_that("is.na() should return TRUE for xml_missing",
+ expect_true(is.na(xml_missing()))
+)
diff --git a/tests/testthat/test-xml_name.R b/tests/testthat/test-xml_name.R
new file mode 100644
index 0000000..9883c37
--- /dev/null
+++ b/tests/testthat/test-xml_name.R
@@ -0,0 +1,66 @@
+context("xml_name")
+
+test_that("qualified names returned when ns given", {
+ x <- read_xml("ns-multiple-default.xml")
+ ns <- xml_ns(x)
+
+ bars <- xml_children(xml_children(x))
+ expect_equal(xml_name(bars), c("bar", "bar"))
+ expect_equal(xml_name(bars, ns), c("d1:bar", "d2:bar"))
+})
+
+test_that("error if missing ns spec", {
+ x <- read_xml("ns-multiple-default.xml")
+ ns <- xml_ns(x)[1]
+
+ bars <- xml_children(xml_children(x))
+ expect_error(xml_name(bars, ns), "Couldn't find prefix")
+})
+
+test_that("xml_name<- modifies the name", {
+ x <- read_xml("ns-multiple-default.xml")
+ ns <- xml_ns(x)
+
+ bars <- xml_children(xml_children(x))
+ bar <- bars[[1]]
+
+ xml_name(bar) <- "foo"
+ expect_equal(xml_name(bar), "foo")
+ expect_equal(xml_name(bar, ns), "d1:foo")
+
+ # ns is ignored
+ xml_name(bar, ns) <- "bar"
+ expect_equal(xml_name(bar), "bar")
+ expect_equal(xml_name(bar, ns), "d1:bar")
+
+ xml_name(bars) <- "foo"
+ expect_equal(xml_name(bars), c("foo", "foo"))
+
+ old_mss <- mss <- xml_missing()
+ xml_name(mss) <- "foo"
+ expect_identical(old_mss, mss)
+})
+
+test_that("xml_set_name modifies the name", {
+ x <- read_xml("ns-multiple-default.xml")
+ ns <- xml_ns(x)
+
+ bars <- xml_children(xml_children(x))
+ bar <- bars[[1]]
+
+ xml_set_name(bar, "foo")
+ expect_equal(xml_name(bar), "foo")
+ expect_equal(xml_name(bar, ns), "d1:foo")
+
+ # ns is ignored
+ xml_set_name(bar, "bar", ns)
+ expect_equal(xml_name(bar), "bar")
+ expect_equal(xml_name(bar, ns), "d1:bar")
+
+ xml_set_name(bars, "foo")
+ expect_equal(xml_name(bars), c("foo", "foo"))
+
+ old_mss <- mss <- xml_missing()
+ xml_set_name(mss, "foo")
+ expect_identical(old_mss, mss)
+})
diff --git a/tests/testthat/test-xml_nodeset.R b/tests/testthat/test-xml_nodeset.R
new file mode 100644
index 0000000..5ff4748
--- /dev/null
+++ b/tests/testthat/test-xml_nodeset.R
@@ -0,0 +1,75 @@
+context("xml_nodeset")
+
+test_that("methods work on empty nodesets", {
+
+ x <- read_xml("<a><b/></a>")
+ empty <- xml_find_all(x, "//c")
+
+ expect_error(empty[[1]], "subscript out of bounds")
+ expect_identical(empty[1], empty)
+ test <- empty
+
+ xml_attr(test, "test") <- 1
+ expect_identical(test, empty)
+
+ xml_attrs(test) <- c("test" = 1)
+ expect_identical(test, empty)
+
+ xml_name(test) <- "test"
+ expect_identical(test, empty)
+
+ xml_text(test) <- "test"
+ expect_identical(test, empty)
+
+ expect_identical(as.character(empty), character(0))
+ expect_identical(as_list(empty), list())
+ expect_identical(nodeset_apply(empty, identical), empty)
+ expect_output(print(empty), "\\{xml_nodeset \\(0\\)\\}")
+ expect_output(tree_structure(empty), NA)
+
+ xml_add_child(test, "test")
+ expect_identical(test, empty)
+
+ xml_add_sibling(test, "test")
+ expect_identical(test, empty)
+
+ expect_identical(xml_attr(empty, "test"), character())
+ expect_identical(xml_attrs(empty), list())
+ expect_identical(xml_double(empty), numeric())
+ expect_identical(xml_find_all(empty), empty)
+ expect_identical(xml_find_chr(empty), character())
+ expect_identical(xml_find_first(empty), empty)
+ expect_identical(xml_find_lgl(empty), logical())
+ expect_identical(xml_find_num(empty), numeric())
+ expect_identical(xml_integer(empty), integer())
+ expect_identical(xml_length(empty), 0L)
+ expect_identical(xml_name(empty), character())
+ expect_identical(xml_ns(empty), character())
+ expect_identical(xml_parent(empty), empty)
+ expect_identical(xml_path(empty), character())
+
+ xml_remove(test)
+ expect_identical(test, empty)
+
+ xml_replace(test)
+ expect_identical(test, empty)
+
+ xml_set_attr(test, "test", 1)
+ expect_identical(test, empty)
+
+ xml_set_attrs(test, c("test" = 1))
+ expect_identical(test, empty)
+
+ xml_set_name(test, "test")
+ expect_identical(test, empty)
+
+ xml_set_text(test, "test")
+ expect_identical(test, empty)
+
+ expect_identical(xml_siblings(empty), empty)
+ expect_output(xml_structure(empty), NA)
+
+ expect_identical(xml_text(empty), character())
+ expect_identical(xml_type(empty), character())
+ expect_identical(xml_url(empty), character())
+})
diff --git a/tests/testthat/test-xml_schema.R b/tests/testthat/test-xml_schema.R
new file mode 100644
index 0000000..542e0a8
--- /dev/null
+++ b/tests/testthat/test-xml_schema.R
@@ -0,0 +1,21 @@
+context("xml_schema")
+
+test_that("xml schema validates", {
+ doc <- read_xml(system.file("extdata/order-doc.xml", package = "xml2"))
+ schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
+ expect_true(xml_validate(doc, schema))
+})
+
+test_that("xml schema errors", {
+ str <- readLines(system.file("extdata/order-doc.xml", package = "xml2"))
+ str <- sub("<quantity>1", "<quantity>", str)
+ str <- sub("95819", "ABC95819", str)
+ str <- sub('partNum="926-AA"', "", str)
+ doc <- read_xml(paste(str, collapse = "\n"))
+ schema <- read_xml(system.file("extdata/order-schema.xml", package = "xml2"))
+ out <- xml_validate(doc, schema)
+ expect_false(out)
+ errors <- attr(out, "errors")
+ expect_is(errors, "character")
+ expect_length(errors, 4)
+})
diff --git a/tests/testthat/test-xml_serialize.R b/tests/testthat/test-xml_serialize.R
new file mode 100644
index 0000000..957cbd0
--- /dev/null
+++ b/tests/testthat/test-xml_serialize.R
@@ -0,0 +1,26 @@
+context("xml_serialize")
+
+x <- read_xml("<a>
+ <b><c>123</c></b>
+ <b><c>456</c></b>
+ </a>")
+test_that("xml_serialize and xml_unserialize work with xml_document input", {
+ out <- xml_unserialize(xml_serialize(x, NULL))
+ expect_identical(as.character(x), as.character(out))
+})
+
+test_that("xml_serialize and xml_unserialize work with xml_node input", {
+ b <- xml_find_first(x, "//b")
+ out <- xml_unserialize(xml_serialize(b, NULL))
+ expect_identical(as.character(b), as.character(out))
+})
+
+test_that("xml_serialize and xml_unserialize work with xml_nodeset input", {
+ b <- xml_find_all(x, "//b")
+ out <- xml_unserialize(xml_serialize(b, NULL))
+ expect_identical(as.character(b), as.character(out))
+})
+
+test_that("xml_unserialize throws an error if given a invalid object", {
+ expect_error(xml_unserialize(serialize(1, NULL)), "Not a serialized xml2 object")
+})
diff --git a/tests/testthat/test-xml_structure.R b/tests/testthat/test-xml_structure.R
new file mode 100644
index 0000000..6ee862a
--- /dev/null
+++ b/tests/testthat/test-xml_structure.R
@@ -0,0 +1,10 @@
+context("xml_structure")
+
+test_that("xml_structure", {
+ expect_output(xml_structure(read_xml("<a><b><c/><c/></b><d/></a>")),
+"<a>
+ <b>
+ <c>
+ <c>
+ <d>")
+})
diff --git a/tests/testthat/test-xml_text.R b/tests/testthat/test-xml_text.R
new file mode 100644
index 0000000..8e888ba
--- /dev/null
+++ b/tests/testthat/test-xml_text.R
@@ -0,0 +1,60 @@
+context("xml_text")
+
+test_that("xml_text returns only text without markup", {
+ x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+
+ expect_identical(xml_text(x), "This is some text. This is bold!")
+
+ expect_identical(xml_text(xml_children(x)), "bold!")
+})
+
+test_that("xml_text returns only text without markup", {
+ x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+
+ expect_identical(xml_text(x), "This is some text. This is bold!")
+
+ expect_identical(xml_text(xml_children(x)), "bold!")
+})
+
+test_that("xml_text works properly with xml_nodeset objects", {
+ x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
+
+ expect_identical(xml_text(x), "This is some text. This is some nested text.")
+
+ expect_identical(xml_text(xml_find_all(x, "//x")),
+ c("This is some text. This is some nested text.", "This is some nested text."))
+})
+
+test_that("xml_text<- and xml_set_text work properly with xml_nodeset objects", {
+ x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")
+
+ expect_identical(xml_text(x), "This is some text. This is some nested text.")
+
+ xml_text(x) <- "test"
+ expect_identical(xml_text(x), "testThis is some nested text.")
+ xml_set_text(x, "test2")
+ expect_identical(xml_text(x), "test2This is some nested text.")
+})
+
+test_that("xml_text trims whitespace if requested, including non-breaking spaces", {
+ x <- read_html("<p> Some text € </p>")
+ expect_identical(xml_text(x),
+ " Some text \u20ac \u00a0")
+
+ expect_identical(xml_text(x, trim = TRUE),
+ "Some text \u20ac")
+})
+
+test_that("xml_integer() returns an integer vector", {
+ x <- read_xml("<plot><point x='1' y='2' /><point x='2' y='1' /></plot>")
+
+ expect_identical(xml_integer(xml_find_all(x, "//@x")),
+ c(1L, 2L))
+})
+
+
+test_that("xml_double() returns a numeric vector", {
+ x <- read_xml("<earth><point latitude = '42.3466456' longitude = '-71.0390351' /><point latitude = '-36.8523378' longitude = '174.7691073' /></earth>")
+
+ expect_identical(xml_double(xml_find_all(x, "//@latitude")), c(42.3466456, -36.8523378))
+})
diff --git a/tools/winlibs.R b/tools/winlibs.R
new file mode 100644
index 0000000..e5d31c6
--- /dev/null
+++ b/tools/winlibs.R
@@ -0,0 +1,8 @@
+# Build against libxml2 from Rtools
+if (!file.exists("../windows/libxml2-2.9.4/include/libxml2/libxml/parser.h")) {
+ if(getRversion() < "3.3.0") setInternet2()
+ download.file("https://github.com/rwinlib/libxml2/archive/v2.9.4.zip", "lib.zip", quiet = TRUE)
+ dir.create("../windows", showWarnings = FALSE)
+ unzip("lib.zip", exdir = "../windows")
+ unlink("lib.zip")
+}
diff --git a/vignettes/modification.Rmd b/vignettes/modification.Rmd
new file mode 100644
index 0000000..ac10732
--- /dev/null
+++ b/vignettes/modification.Rmd
@@ -0,0 +1,197 @@
+---
+title: "Node Modification"
+author: "Jim Hester"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Node Modification}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, echo = FALSE, message = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+library(xml2)
+```
+
+# Modifying Existing XML
+
+Modifying existing XML can be done in xml2 by using the replacement functions
+of the accessors. They all have methods for both individual `xml_node` objects
+as well as `xml_nodeset` objects. If a vector of values is provided it is
+applied piecewise over the nodeset, otherwise the value is recycled.
+
+## Text Modification ##
+
+Text modification only happens on text nodes. If a given node has more than one
+text node only the first will be affected. If you want to modify additional
+text nodes you need to select them explicitly with `/text()`.
+
+```{r}
+x <- read_xml("<p>This is some <b>text</b>. This is more.</p>")
+xml_text(x)
+
+xml_text(x) <- "This is some other text."
+xml_text(x)
+
+# You can avoid this by explicitly selecting the text node.
+x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")
+text_only <- xml_find_all(x, "//text()")
+
+xml_text(text_only) <- c("This is some other text. ", "Still bold!")
+xml_text(x)
+xml_structure(x)
+```
+
+## Attribute and Namespace Definition Modification ##
+
+Attributes and namespace definitions are modified one at a time with
+`xml_attr()` or all at once with `xml_attrs()`. In both cases using `NULL` as
+the value will remove the attribute completely.
+
+```{r}
+x <- read_xml("<a href='invalid!'>xml2</a>")
+xml_attr(x, "href")
+
+xml_attr(x, "href") <- "https://github.com/hadley/xml2"
+xml_attr(x, "href")
+
+xml_attrs(x) <- c(id = "xml2", href = "https://github.com/hadley/xml2")
+xml_attrs(x)
+x
+
+xml_attrs(x) <- NULL
+x
+
+# Namespaces are added with as a xmlns or xmlns:prefix attribute
+xml_attr(x, "xmlns") <- "http://foo"
+x
+
+xml_attr(x, "xmlns:bar") <- "http://bar"
+x
+```
+
+## Name Modification ##
+
+Node names are modified with `xml_name()`.
+
+```{r}
+x <- read_xml("<a><b/></a>")
+x
+xml_name(x)
+xml_name(x) <- "c"
+x
+```
+
+# Node modification #
+All of these functions have a `.copy` argument. If this is set to `FALSE` they
+will remove the new node from its location before inserting it into the new
+location. Otherwise they make a copy of the node before insertion.
+
+## Replacing existing nodes ##
+```{r}
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_replace(t1, t3)
+x
+```
+
+## Add a sibling ##
+```{r}
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_add_sibling(t1, t3)
+x
+
+xml_add_sibling(t3, t1, where = "before")
+x
+```
+
+## Add a child ##
+```{r}
+x <- read_xml("<parent><child>1</child><child>2<child>3</child></child></parent>")
+children <- xml_children(x)
+t1 <- children[[1]]
+t2 <- children[[2]]
+t3 <- xml_children(children[[2]])[[1]]
+
+xml_add_child(t1, t3)
+x
+
+xml_add_child(t1, read_xml("<test/>"))
+x
+```
+
+## Removing nodes ##
+The `xml_remove()` can be used to remove a node (and it's children) from a
+tree. The default behavior is to unlink the node from the tree, but does _not_
+free the memory for the node, so R objects pointing to the node are still
+valid.
+
+This allows code like the following to work without crashing R
+
+```r
+x <- read_xml("<foo><bar><baz/><bar></foo>")
+x1 <- x %>% xml_children() %>% .[[1]]
+x2 <- x1 %>% xml_children() %>% .[[1]]
+
+xml_remove(x1)
+rm(x1)
+gc()
+
+x2
+```
+If you are not planning on referencing these nodes again this memory is wasted.
+Calling `xml_remove(free = TRUE)` will remove the nodes _and_ free the memory
+used to store them. **Note** In this case _any_ node which previously pointed
+to the node or it's children will instead be pointing to free memory and may
+cause R to crash. xml2 can't figure this out for you, so it's your
+responsibility to remove any objects which are no longer valid.
+
+In particular `xml_find_*()` results are easy to overlook, for example
+
+```r
+x <- read_xml("<a><b /><b><b /></b></a>")
+bees <- xml_find_all(x, "//b")
+xml_remove(xml_child(x), free = TRUE)
+# bees[[1]] is no longer valid!!!
+rm(bees)
+```
+
+## Namespaces ##
+
+We want to construct a document with the following namespace layout. (From
+http://stackoverflow.com/questions/32939229/creating-xml-in-r-with-namespaces/32941524#32941524).
+```xml
+<?xml version = "1.0" encoding="UTF-8"?>
+<sld xmlns="http://www.o.net/sld"
+ xmlns:ogc="http://www.o.net/ogc"
+ xmlns:se="http://www.o.net/se"
+ version="1.1.0" >
+<layer>
+<se:Name>My Layer</se:Name>
+</layer>
+</sld>
+```
+
+```{r}
+library(magrittr)
+d <- xml_new_root("sld",
+ xmlns = "http://www.o.net/sld",
+ "xmlns:ogc" = "http://www.o.net/ogc",
+ "xmlns:se" = "http://www.o.net/se",
+ version = "1.1.0") %>%
+ xml_add_child("layer") %>%
+ xml_add_child("se:Name", "My Layer") %>%
+ xml_root()
+
+d
+```
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-xml2.git
More information about the debian-med-commit
mailing list