[med-svn] [SCM] r-cran-plyr branch, master, updated. upstream/1.7.1-18-gcd7753b
Charles Plessy
plessy at debian.org
Thu Apr 4 00:36:30 UTC 2013
The following commit has been merged in the master branch:
commit 818872b5f9b3dae5b0ee89eefb035f0945a2e515
Author: Charles Plessy <plessy at debian.org>
Date: Thu Apr 4 09:20:48 2013 +0900
Imported Upstream version 1.8
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index ac77dcd..0000000
--- a/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-src-*
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index 4db06cb..e22eb95 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,37 +1,38 @@
Package: plyr
Type: Package
Title: Tools for splitting, applying and combining data
-Version: 1.7.1
+Version: 1.8
Author: Hadley Wickham <h.wickham at gmail.com>
Maintainer: Hadley Wickham <h.wickham at gmail.com>
-Description: plyr is a set of tools that solves a common
- set of problems: you need to break a big problem down
- into manageable pieces, operate on each pieces and then
- put all the pieces back together. For example, you
- might want to fit a model to each spatial location or
- time point in your study, summarise data by panels or
- collapse high-dimensional arrays to simpler summary
- statistics. The development of plyr has been generously
- supported by BD (Becton Dickinson).
+Description: plyr is a set of tools that solves a common set of
+ problems: you need to break a big problem down into manageable
+ pieces, operate on each pieces and then put all the pieces back
+ together. For example, you might want to fit a model to each
+ spatial location or time point in your study, summarise data by
+ panels or collapse high-dimensional arrays to simpler summary
+ statistics. The development of plyr has been generously
+ supported by BD (Becton Dickinson).
URL: http://had.co.nz/plyr
Depends: R (>= 2.11.0)
-Suggests: abind, testthat (>= 0.2), tcltk, foreach, itertools,
+Suggests: abind, testthat (>= 0.2), tcltk, foreach, doMC, itertools,
iterators
License: MIT
LazyData: true
-Collate: 'dimensions.r' 'helper-arrange.r' 'helper-col-wise.r'
- 'helper-count.r' 'helper-data-frame.r' 'helper-each.r'
- 'helper-match-df.r' 'helper-mutate.r' 'helper-quick-df.r'
- 'helper-rename.r' 'helper-round-any.r' 'helper-splat.r'
- 'helper-strip-splits.r' 'helper-summarise.r' 'helper-try.r'
- 'helper-vaggregate.r' 'id.r' 'immutable.r' 'indexed-array.r'
- 'indexed-data-frame.r' 'indexed.r' 'join.r' 'loop-apply.r'
- 'ply-array.r' 'ply-data-frame.r' 'ply-iterator.r' 'ply-list.r'
- 'ply-mapply.r' 'ply-null.r' 'ply-replicate.r' 'progress.r'
- 'quote.r' 'rbind-matrix.r' 'rbind.r' 'simplify-array.r'
- 'simplify-data-frame.r' 'simplify-vector.r' 'split-array.r'
- 'split-data-frame.r' 'split-indices.r' 'split.r' 'utils.r'
- 'data.r' 'helper-defaults.r'
-Packaged: 2012-01-02 17:03:32 UTC; hadley
+Collate: 'dimensions.r' 'id.r' 'indexed-array.r' 'indexed-data-frame.r'
+ 'indexed.r' 'join.r' 'loop-apply.r' 'progress.r' 'quote.r'
+ 'split-indices.r' 'split.r' 'utils.r' 'utils-functional.r'
+ 'data.r' 'plyr.r' 'parallel.r' 'progress-time.r' 'a_ply.r'
+ 'aaply.r' 'adply.r' 'alply.r' 'd_ply.r' 'daply.r' 'ddply.r'
+ 'dlply.r' 'l_ply.r' 'laply.r' 'llply.r' 'm_ply.r' 'maply.r'
+ 'mdply.r' 'mlply.r' 'r_ply.r' 'raply.r' 'rdply.r' 'rlply.r'
+ 'liply.r' 'ldply.r' 'arrange.r' 'colwise.r' 'count.r'
+ 'data-frame.r' 'defaults.r' 'each.r' 'here.r' 'match-df.r'
+ 'mutate.r' 'name-rows.r' 'quickdf.r' 'rename.r' 'revalue.r'
+ 'round-any.r' 'splat.r' 'strip-splits.r' 'summarise.r' 'take.r'
+ 'try.r' 'vaggregate.r' 'idataframe.r' 'join-all.r'
+ 'list-to-array.r' 'list-to-dataframe.r' 'list-to-vector.r'
+ 'rbind-fill-matrix.r' 'rbind-fill.r' 'splitter-a.r'
+ 'splitter-d.r'
+Packaged: 2012-12-05 21:45:23 UTC; hadley
Repository: CRAN
-Date/Publication: 2012-01-08 14:36:10
+Date/Publication: 2012-12-06 08:59:31
diff --git a/MD5 b/MD5
index 4423ff2..3a36dfd 100644
--- a/MD5
+++ b/MD5
@@ -1,107 +1,129 @@
-c95db4b09bcceb259084f93056b2c958 *DESCRIPTION
-8280054e503d798bd7f5b48a76730aa7 *NAMESPACE
-8eac1ffc09bd09f9bb76e95dd811d67b *NEWS
-440943b4da15dec5e8c83f88bb77dcdb *R/data.r
-31ec5b7b64247e44008a27a1f93844a2 *R/dimensions.r
-949e179b3c8a9e83e19f4b178e081dce *R/helper-arrange.r
-a6cfd7666befca471b7bfe45172e626b *R/helper-col-wise.r
-575a3e9dd8698d1d0f9e833574364821 *R/helper-count.r
-2e08f92f6d8d85d9c7ee821b0c2bbbf7 *R/helper-data-frame.r
-0642de2d6e145c699bfc133c27f9178d *R/helper-defaults.r
-713fbaa676fcbf8c0cc5193828573d57 *R/helper-each.r
-78059d3ed24848c245579aa3006fafa6 *R/helper-match-df.r
-3544ee1fc50cd459ea2cf32108b59be1 *R/helper-mutate.r
-8c1eb2927926e40923ed4c38e6ff4018 *R/helper-quick-df.r
-058b6d8f702281af7482c19adbf04c58 *R/helper-rename.r
-48e07291fa6bd98affd63bf540b8df28 *R/helper-round-any.r
-cf2fae17dcef3993216de3d6b8ead161 *R/helper-splat.r
-c3f4bc5baaa2be00a96fae50f235097a *R/helper-strip-splits.r
-7edfb23b1ab1a4f0b204e2c37bea167c *R/helper-summarise.r
-cdfaffb4b859c51d393f9c2c37c2f80f *R/helper-try.r
-278e5697a22c3979f98332185fc8e8dd *R/helper-vaggregate.r
-ec2bebb2bc07c4637cb6a28c4e89d5e5 *R/id.r
-57d33661d1a1a95c3ad397d2b88aec91 *R/immutable.r
-7227ac3c846b56de488568b76500e9b4 *R/indexed-array.r
-cde3b285e844dec52d641c0bb635c781 *R/indexed-data-frame.r
-ddb28236498a27d7022e02e334c198da *R/indexed.r
-31b76aaf8444ac679fee9f7c59fc658a *R/join.r
-5ff867a5fb5e7e6387d287b365f7ce84 *R/loop-apply.r
-140f89b0d74465935163ecd6221b05ce *R/ply-array.r
-d215143f2204cbf590af159a77fc4d9a *R/ply-data-frame.r
-5ef0a94f737b21abbcd60d13254d48e1 *R/ply-iterator.r
-803a8ac4ea84dcb499c5a20d58177cfb *R/ply-list.r
-265d23ca8e3f4db27b1a4f168be06999 *R/ply-mapply.r
-a74fc049cf4c1a1b9c2ae02a5faa5e6d *R/ply-null.r
-cd7c2330bd0b8ba95339e602740e87af *R/ply-replicate.r
-6eb256b91da870c9efbd76382070a3e6 *R/progress.r
-643804016a04a29590acf2027b007119 *R/quote.r
-d39f992113a4e034d12d31eb65f7a9a1 *R/rbind-matrix.r
-6c4206e43165d73c12e57aab509e4016 *R/rbind.r
-754361506e7ab122c23087d5ca65cbcd *R/simplify-array.r
-1707c1938f31186f225cfdf7fc7d7248 *R/simplify-data-frame.r
-247bcf03076acce7e79b3c652e4986e7 *R/simplify-vector.r
-9cdaa6d07c3664c45592b065795afc51 *R/split-array.r
-460099a51217e7185f3e24b2941111f5 *R/split-data-frame.r
-d570c2cb71484f327ef8d99d6fb1b306 *R/split-indices.r
-d6adb94b00d4fb4197cb171ce45a874a *R/split.r
-fe449b5474347ff59407b25a4f2ed424 *R/utils.r
+9b663d59b979371b6a101f6c0514a156 *DESCRIPTION
+2fa4b4cc08cfe312801d77dab73ebc3a *NAMESPACE
+37f7eb2a168c9ca03d9856e20387903f *NEWS
+b30083458b44dd0ebdb6573f81cbaf09 *R/a_ply.r
+051d8e915e9d4fd1d37baba2c8540cd3 *R/aaply.r
+7518e52d3fffabe64480674f0c0841fb *R/adply.r
+effba88914db9ef403f64b4b0d00a8d5 *R/alply.r
+27850e8a34d803a77059aee92fe98a51 *R/arrange.r
+dbaeb0f7ac128903161538fa22f26da7 *R/colwise.r
+bab38e28e3e345d2bea2ee72f682e220 *R/count.r
+f05b9167d486f77d2eaecaee67eb416f *R/d_ply.r
+fb6dcb53dca0a98cf6a1da120c2c478d *R/daply.r
+9ec2eb1767f801a2f9783af803cac212 *R/data-frame.r
+526de3e3bf03679c79a9b4165d6cc128 *R/data.r
+6c4c8ef405dac560c9441f3a59cc1648 *R/ddply.r
+201c9ac2be5dfa8fa52575e9b459bb64 *R/defaults.r
+a18f14f7e0a6f50360586504d1311227 *R/dimensions.r
+fabcd1e2c375a3861200a934b924074a *R/dlply.r
+099f05c40f96cc0e0e9bc5bb1e33cfac *R/each.r
+dcaa350d0ab003f6ad12e4d18f5eef90 *R/here.r
+6573fcf86598a272fe60ce9cd86d7015 *R/id.r
+8d38e8274a1e6e549456888e314509e4 *R/idataframe.r
+d31d1c394a815adaa016acb562de0842 *R/indexed-array.r
+2dd51053e2dd003d3c46ed3a2f6fa459 *R/indexed-data-frame.r
+0cc320306148b4c9e00cc7ead82cbd12 *R/indexed.r
+aeb3707b4a7ffae8af0e9237f3f67e72 *R/join-all.r
+f4572136e8a85fdd0bd77a2216bb557d *R/join.r
+154acfdcc1ea3b63cc80151d69d8c734 *R/l_ply.r
+49db6bd50575429654a36aec5fca75dc *R/laply.r
+7e06f4ff7fba5ce7e93c39331e7b2fdd *R/ldply.r
+61a3bea3cc93368b6ac6f453a0ae3744 *R/liply.r
+0a2810798164088dda8d4e31e0891ff2 *R/list-to-array.r
+ab9cd1a82d137c652e71324eb25dec70 *R/list-to-dataframe.r
+0235ddca614287ca137a2a5ec80b6ecc *R/list-to-vector.r
+fe85fcdb0225eb6ac7812ff3a9aac5ba *R/llply.r
+a111fc796dbb2bb6d98e8c7a44a3cf16 *R/loop-apply.r
+138f6c83d529bbf8c68f8830d7081570 *R/m_ply.r
+233c43c50f214fb7b6cb4da744bda204 *R/maply.r
+8c2d4fbdc639835b6a7d5fa2f01f0026 *R/match-df.r
+cdf123cbd9772e88f6a1238f60b77078 *R/mdply.r
+f8952eb7391ea62de7b94494f2734d23 *R/mlply.r
+fb0da67095913ebc3e7dc368e6410ca2 *R/mutate.r
+7787cd67a1bd78ff9b8169cc55015e3c *R/name-rows.r
+56cf70c7362e1f76ddf82672b8444b5c *R/parallel.r
+65cad21d5e61b3c53af0278540715656 *R/plyr.r
+eb8917263bad12770cd32445403be25c *R/progress-time.r
+b413617b9c819a43a15cc1b450b88cdc *R/progress.r
+87b4723fec09c203ea9c98373d87cb4b *R/quickdf.r
+1baea22f9ff11a3cafa901689d1cc1bc *R/quote.r
+14fe46cf226a4774cb05fc4e7aede76f *R/r_ply.r
+c5b6f32a7d4ceba0b201ed58d6d3b989 *R/raply.r
+9f5486ed284127abbdd22cd9ce783f4f *R/rbind-fill-matrix.r
+d772aca0b29a04520372a22ec6c6a474 *R/rbind-fill.r
+746027418d6df16e5ab63e10aafe7772 *R/rdply.r
+483e90a4ad2aebccc73c5ddd1b32503d *R/rename.r
+4921126fc5506e7ab00ac924cf937cc2 *R/revalue.r
+d44675874eccaa49892e11a11ea9ebba *R/rlply.r
+ca9e3ea0a3d9e47854bfcb533fdad53b *R/round-any.r
+7369a7d69027736f1e62f0f49fa8aed6 *R/splat.r
+cef16fac9a4a53ffa46169dfd3ebcc91 *R/split-indices.r
+29e4abb6bc1f7561ff08c08554ccb58c *R/split.r
+c80b8aa598ab1329699f201d414a3ef6 *R/splitter-a.r
+ecbe80157dbd628a6c741f90c56d0fcb *R/splitter-d.r
+c3f4bc5baaa2be00a96fae50f235097a *R/strip-splits.r
+d6dbd2b5909f72f5ec28c6e3cff1c3ed *R/summarise.r
+632a3d93a68172b1350d5f23151c0f0c *R/take.r
+d1c1587c3f3056b2a17c6085a1e0d8e2 *R/try.r
+d2189ca6934b505c385b70e66165e298 *R/utils-functional.r
+e7f9e2adc241a3f9ad0ab2e26e2aedb1 *R/utils.r
+3d73325ab592dff535a8e7bd8e869967 *R/vaggregate.r
8f61b6486addfcd6b8b079cd6add0e9c *README.md
9b2d63a08f6c4d1718642d2c904c230a *data/baseball.rda
12d6f72bbc8c97a10e7c4c235aab3ae3 *data/ozone.rda
c064ec8464bce62b7862acec50e8475b *inst/CITATION
-7bf016d79d9b0a1eb2b9fbf8dcab8593 *inst/tests/test-array.r
-bd7a564506b80a4f5687a9967374350c *inst/tests/test-count.r
-b673fa44cb8d99959e340441a72771e4 *inst/tests/test-data-frame.r
-2b717e803c0120c9eddfce6140a67ca9 *inst/tests/test-empty.r
-c4fcb5075ce2b331fe7b3a40dc32c54c *inst/tests/test-join.r
-087232558cb0e750ed85e0266509e98b *inst/tests/test-list.r
-3c23d1caf6e4eabb83ef9fd5191c3a60 *inst/tests/test-mapply.r
-2d6b1f7575558c36542e12692a563d73 *inst/tests/test-mutate.r
-3a6ea4a903bf108b3d61e42414fd43c7 *inst/tests/test-ninteraction.r
-09adde0b9ff4dd3885d4d9d1ca6267cc *inst/tests/test-progress.r
-707bd9b78405b6f1196e64caefb79977 *inst/tests/test-quote.r
-5fabb9ce4effa9f4c6bd85eb5289ede4 *inst/tests/test-rbind.matrix.r
-c057ca2befc92a8ab0b762ef14ba37f4 *inst/tests/test-rbind.r
-53ce4db1efdff6b3be55c2f7ce79cad7 *inst/tests/test-rename.r
-1a66f084877eaee634093dc67fa60880 *inst/tests/test-replicate.r
-9e201cc2542d4eea00413ccdc8cc7140 *inst/tests/test-simplify-df.r
-6aacf8f9052d9b11e7b0d8a08b00e496 *inst/tests/test-split-data-frame.r
-9cf6b9d3d4d8914fd56340a40b76352d *inst/tests/test-split-labels.r
-86fe728034a2414c543ade4ba35fd5e4 *inst/tests/test-summarise.r
-c91e51c9f7c6c3afcde0e116187991da *man-roxygen/-a.r
-e94cc9b8ab37508b841b24fa63cc7e21 *man-roxygen/-d.r
-6886fab6c01b5c14b0151eb74a355631 *man-roxygen/-l.r
-e53e37e04b0c997f3002f5583f66a5bf *man-roxygen/a-.r
-adbe33feeeb9d8c236a5522f0fc7c50c *man-roxygen/d-.r
-72dfca8e201b3316a7728b53de09ac2c *man-roxygen/l-.r
-3960cc94458fd5dd53b21fd1d7fc6eaf *man-roxygen/ply.r
-9f2b1878b51cf2a3c8a03398bb6a481a *man/a_ply.Rd
-7d4bcca0f7e3cfabad9dfb34eb9cfb94 *man/aaply.Rd
-80b8ae599d942ce03dd8dea4de30bef0 *man/adply.Rd
-455a74077756876473ac37d8ceada5f8 *man/alply.Rd
+d564b7820ed6c9598ef0e9f39913a4dc *inst/tests/quickdf.r
+71b3d690155f10491a0c36295fcca10a *inst/tests/test-array.r
+9df6700af6e72c3afcea6d488917972e *inst/tests/test-count.r
+64843ae8890dedb58491b0c68b7d5e41 *inst/tests/test-data-frame.r
+570589b2d8f6a5e7a5a9292bde0255c3 *inst/tests/test-empty.r
+22fb32944156a59fb79d4b9b9185f685 *inst/tests/test-idf.r
+edcb617aef78240a4aab68b4d9eef8b9 *inst/tests/test-join.r
+de3b9fe325bde5a4d7ae4fee2b124dd4 *inst/tests/test-list.r
+e4a2dbcd3e829d98895ac697916dbfc5 *inst/tests/test-mapply.r
+f0b164f2304fbef1573b5859032a81ba *inst/tests/test-mutate.r
+b4ffb0e168d64cfb2337467d5949bb87 *inst/tests/test-ninteraction.r
+148c66709e0a612190930417b891569e *inst/tests/test-parallel.r
+5edbe06f3ef5ca4afebae668d5f72394 *inst/tests/test-progress.r
+af8f0160f64ccc89202dd1c6fd4255ad *inst/tests/test-quote.r
+d6275e43a3f6b046422f88c65323bc26 *inst/tests/test-rbind.matrix.r
+680dccdcc148e4b95ca0d9216e2d8289 *inst/tests/test-rbind.r
+0db4452cb3b1aebc91a230ec4152d8b4 *inst/tests/test-rename.r
+92f8e58ba0c83bb0c1c3e333bc3ffe55 *inst/tests/test-replicate.r
+f2b44485ec854cfc9e68c44d5fbc630b *inst/tests/test-revalue.r
+3b4322ac83b50f5a1a370c07c8f5d92b *inst/tests/test-simplify-df.r
+8333352212f7d58b1e53ce26a60afb5f *inst/tests/test-split-data-frame.r
+fe3ec6775a168e58a03f54a4b2324596 *inst/tests/test-split-indices.r
+9dd79657465c4e2ab72d4209f047854b *inst/tests/test-split-labels.r
+dc9ed90dded1449345037dce24ba8c79 *inst/tests/test-summarise.r
+49ea637ae8cb2562fe7b5c102ac10559 *man/a_ply.Rd
+ce52ec32036bd6ddf088b33155bdf7b4 *man/aaply.Rd
+fc292562290ffb4d124ff2907090b5ce *man/adply.Rd
+9e97ae53b984ab36fba6e004295a085e *man/alply.Rd
dfff8dc0d6841b6b437875342bb784dd *man/amv_dim.Rd
144600b773ec891d73e4a109b60b153b *man/amv_dimnames.Rd
-05b856a3cc96a34e3ed608ddfcc0ba89 *man/arrange.Rd
+8ab0a5af25222d56a4315c092796f9d4 *man/arrange.Rd
30bf0f3c6fc678900168bac2f015ded6 *man/as.data.frame.function.Rd
2a5482715255383f335065d8a6d7d8ae *man/as.list.split.Rd
0efc6aa43e0d9eb9eeb89fee250e4c96 *man/as.quoted.Rd
de25bd94213ee4dd760b525ddefd45b2 *man/baseball.Rd
-dab72bc81de4fb86559f1d7bf5ea7e72 *man/colwise.Rd
+10a6252651f974b732dd6c1af3b14467 *man/colwise.Rd
27e6c58afc383e8f66bb02568a7bd007 *man/compact.Rd
-bc8995f338ad01762ad685dc1b3fb6d6 *man/count.Rd
-ce6de9bbe0c6c16f3714047549dff4a1 *man/create_progress_bar.Rd
-cc7098829129b5d5fdb2df3efe9ace7c *man/d_ply.Rd
-1d1dd3ca0a0aa8264cc4405bda47e95b *man/daply.Rd
-733e939f287a894713b5e55f1dd9546c *man/ddply.Rd
+938c1d38daf1e20f1dec95862ab78be4 *man/count.Rd
+06c0a8d42784a7f307b08623b9c895a3 *man/create_progress_bar.Rd
+63408510df419a00edd6f6467572bd96 *man/d_ply.Rd
+ea421a939afbb16459398b1118fa86a7 *man/daply.Rd
+1898a193a4ddc66e3a5bfd1eac362f22 *man/ddply.Rd
598e17cddf0197cd109324b6e793e2a7 *man/defaults.Rd
6b88af2bb271b3a0c4f4e7317deb6ab7 *man/desc.Rd
3715aa08855964d4330e187d7d486b16 *man/dims.Rd
-0b204a4e24f7f7fa2f96d51c68415904 *man/dlply.Rd
-430aaa65caa6299b9da512d1eea32192 *man/each.Rd
+76221834bab7a319e98439e8e77ef9c7 *man/dlply.Rd
+fdf29018b750e2f3d94a75b9c87d0170 *man/each.Rd
15b8fca1da5a1d13124297ff1a69036b *man/empty.Rd
c372bd8f82bf880e2e91f4b5dbb55b6c *man/eval.quoted.Rd
90421f580f1c9692f41f2422e014d584 *man/failwith.Rd
7ae0a790c8276dcee9e0cf627003ffbd *man/get-split.Rd
+6397abb97a76e9f28bb62f44827b2777 *man/here.Rd
2c17345f244448c628b8616d743c5002 *man/id.Rd
c7b3d72280561f2fe93371bc1a37061d *man/id_var.Rd
485099a413e4731b7cf14a4184744e73 *man/idata.frame.Rd
@@ -110,56 +132,62 @@ c7b3d72280561f2fe93371bc1a37061d *man/id_var.Rd
85ead63b1217af3ef8fdfe007e01cfe4 *man/is.discrete.Rd
b6e6fad38ecf71f9333b8ac9520786bb *man/is.formula.Rd
bb37d48d1c298ea013748f1c01866975 *man/isplit2.Rd
-7e071c6c87614b5c5876edc72c664e7d *man/join.Rd
+b396bc6d3cc7d38555911ce1e63250e9 *man/join.Rd
ec9d2d7fc41be3fa106cc534222426d0 *man/join.keys.Rd
-712cdb6e85304511a7aecd2ad1a8cca0 *man/l_ply.Rd
-f2d10b3668095f988a3501594aba8845 *man/laply.Rd
-6d981457c9ed297401570804db55d1f3 *man/ldply.Rd
+97587fef54e4fdf7effc02740a4f0a9e *man/join_all.Rd
+dfe4750fdf4923e3104ffaaf131776c7 *man/l_ply.Rd
+16d71e2c8e67a223d8f84b0f7ec499be *man/laply.Rd
+934ef2296ac98ee0e0cc4e39f96e69d9 *man/ldply.Rd
7516a16cb60cf27566dc51c0046fc138 *man/liply.Rd
629de84d5032c86d038012340ba02b8f *man/list_to_array.Rd
4a476049a866905659cdf070dd9727a2 *man/list_to_dataframe.Rd
862688bd52d3cab5daf4d8f1511f138b *man/list_to_vector.Rd
-bdb89ac2ce0bb3cc27ad6c3989ef79c7 *man/llply.Rd
+c230664131ce3aacccdaaf4ddb68ec35 *man/llply.Rd
1d1821666ac2124fc476fa512abb8200 *man/loop_apply.Rd
-3d97b994738a0cca66041cf6d623db8a *man/m_ply.Rd
-91991e6cb01f69beba0e692e7e2c0e97 *man/maply.Rd
-1bfd5694f4d8ca18721f6fdc8a5c3941 *man/match_df.Rd
-d6a428ef78435f09a45f5245014459e0 *man/mdply.Rd
-4a0750df4d6b49bf0a1ddac9e8a2e1e0 *man/mlply.Rd
+6cfa6ff8f8e9dd5a84fc7c770425fc9f *man/m_ply.Rd
+3fed62ee065293b23f7b079c1d7f497f *man/maply.Rd
+499659e175a892fe417828493a319b78 *man/mapvalues.Rd
+bd78c0cdf780b4bada59979f2ae1b99c *man/match_df.Rd
+a8238f408548022f6d4848226291e0cf *man/mdply.Rd
+bc809f5e6aa94fcb3df99d733503b284 *man/mlply.Rd
2d0c92444909eb8f009df5f3e21bb091 *man/mutate.Rd
+62c79cc73057d579d576328ae58a4d25 *man/name_rows.Rd
c559d1372836dbd9b8d603c7631fa023 *man/names.quoted.Rd
5008ea2c544983f5a8ad9b51367ee642 *man/nunique.Rd
d2710642ac44b64f9f43fd08f8237e15 *man/ozone.Rd
+9685e33922cf6f5a69c55af08205cc72 *man/plyr.Rd
4294d4956a0321e27c5e38799bf20613 *man/print.quoted.Rd
73a288e900f81f9cbb37dd7f323c4946 *man/print.split.Rd
-7b87349e7e26ab5c8ac223601db5719d *man/progress_none.Rd
-332b6fa1c3fbf9b19fff44403c86ca9e *man/progress_text.Rd
-6d588353e4aea25e5a43f84bb014223f *man/progress_tk.Rd
-a26a1840c7c2bdac1b870cf89b3b87e9 *man/progress_win.Rd
+f7bff61b6aa741fd33c61909c57ac5be *man/progress_none.Rd
+75050a56b20299646c38082b34c3d97c *man/progress_text.Rd
+c2373c71158781240ce95d96294f6e27 *man/progress_time.Rd
+2ca810721d1ea1db287d333158a1d692 *man/progress_tk.Rd
+0e3c94f6895b7218f7c0aa60be62a409 *man/progress_win.Rd
13212fc5e1602a6a5f06e90cabe390bf *man/quickdf.Rd
32590979c2f2c53d84dcc634b102c5e7 *man/quoted.Rd
2a52a63c04e805bb604e8e68f95e01a0 *man/r_ply.Rd
d07d0e83bbbe03417ae22ca158ce55d5 *man/raply.Rd
-7b80816d07602bf3bd4dee93e646b263 *man/rbind.fill.Rd
-1987a59277f9dc980482ee0059b118c6 *man/rbind.fill.matrix.Rd
+d7811882a3cb607dd5ff4fc71d81a39c *man/rbind.fill.Rd
+615864fe9a2917d474be097fd10e3c70 *man/rbind.fill.matrix.Rd
b16dd4428fe00cd8c7af4f9368570fca *man/rdply.Rd
15caff5bb35366d7161fb8ddd00328d7 *man/reduce_dim.Rd
-5131ad30f894becca04b10c4a22adc33 *man/rename.Rd
+4925909bd5e4e4fa668da77200865be2 *man/rename.Rd
+84ed21424b2528e4f4e75453ef045f66 *man/revalue.Rd
ed68d9045b8faa3790fa42c6d96d264d *man/rlply.Rd
-79b7f6da61717a2c196594fa250a9522 *man/round_any.Rd
+3636bd0de16181a7b32d6fa38af00bd0 *man/round_any.Rd
9c974ee3f4474ac74c9c9a60f662cb03 *man/splat.Rd
-2eca4e1cba023c91ffb7efb71abffabe *man/split_indices.Rd
+8a1d5936fd6a63480f33890a26102ba7 *man/split_indices.Rd
dcbea80602c0d5c9626b6ea2de228f94 *man/split_labels.Rd
34f4376f16955b48a7e7bc463083efad *man/splitter_a.Rd
4e5bb4f6cda668c8c823fbd1ad96a651 *man/splitter_d.Rd
73169417e97bf94ab6fd2da68ffb6526 *man/strip_splits.Rd
-6e2ac3b064759d996900c1f003d5ba5c *man/summarise.Rd
+04da1fd0201904b43c42917089026113 *man/summarise.Rd
+698bab1ef9ddd92aa29af5f252e7e4ff *man/take.Rd
f965825f4813cb5890c5afb6cc8d7690 *man/true.Rd
-4c3b9b60204e445c6ea851056cccd060 *man/try_default.Rd
+29f454fa61e816d69cff2ba1993e699f *man/try_default.Rd
dbafa9f424846e45f77e9d961b1ef717 *man/tryapply.Rd
1268056f8d0e765ad0f99af317a906ab *man/unrowname.Rd
2ea2af25c66ef2a745886cbd9b640353 *man/vaggregate.Rd
021c0a6dbed105a81284fb031280df1d *src/loop-apply.c
-f8ba0a3e5630453a11411906a040bd47 *src/split-numeric.c
-40942eb1281f8f6e3bf146e3a3bbcb26 *tests/dependencies.R
-9c5817fbf81a350e47b5d06c11d03440 *tests/test-all.R
+9e136c2fbd222ce76ba3d0bfa05d7e80 *src/split-numeric.c
+a16ff31afc22b8d7893bfcb504d98355 *tests/test-all.R
diff --git a/NAMESPACE b/NAMESPACE
index e4ef53d..b77d47e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,3 +1,34 @@
+S3method("[",idf)
+S3method("[",indexed)
+S3method("[",quoted)
+S3method("[",split)
+S3method("[[",idf)
+S3method("[[",indexed_array)
+S3method("[[",indexed_df)
+S3method(as.data.frame,"function")
+S3method(as.data.frame,idf)
+S3method(as.list,indexed)
+S3method(as.list,split)
+S3method(as.quoted,"NULL")
+S3method(as.quoted,call)
+S3method(as.quoted,character)
+S3method(as.quoted,factor)
+S3method(as.quoted,formula)
+S3method(as.quoted,name)
+S3method(as.quoted,numeric)
+S3method(as.quoted,quoted)
+S3method(c,quoted)
+S3method(dim,idf)
+S3method(length,indexed)
+S3method(length,indexed_array)
+S3method(names,idf)
+S3method(names,indexed)
+S3method(names,quoted)
+S3method(print,indexed)
+S3method(print,quoted)
+S3method(print,split)
+S3method(round_any,POSIXct)
+S3method(round_any,numeric)
export(.)
export(a_ply)
export(aaply)
@@ -21,14 +52,16 @@ export(each)
export(empty)
export(eval.quoted)
export(failwith)
+export(here)
export(id)
export(idata.frame)
export(is.discrete)
export(is.formula)
export(is.quoted)
export(isplit2)
-export(join.keys)
export(join)
+export(join.keys)
+export(join_all)
export(l_ply)
export(laply)
export(ldply)
@@ -36,62 +69,39 @@ export(liply)
export(llply)
export(m_ply)
export(maply)
+export(mapvalues)
export(match_df)
export(mdply)
export(mlply)
export(mutate)
+export(name_rows)
export(numcolwise)
export(progress_none)
export(progress_text)
+export(progress_time)
export(progress_tk)
export(progress_win)
export(quickdf)
export(r_ply)
export(raply)
-export(rbind.fill.matrix)
export(rbind.fill)
+export(rbind.fill.matrix)
export(rdply)
export(rename)
+export(revalue)
export(rlply)
export(round_any)
export(splat)
+export(split_indices)
export(split_labels)
export(strip_splits)
export(summarise)
export(summarize)
+export(take)
export(true)
export(try_default)
export(tryapply)
export(unrowname)
export(vaggregate)
importFrom(stats,setNames)
-S3method("[",idf)
-S3method("[",indexed)
-S3method("[",quoted)
-S3method("[",split)
-S3method("[[",idf)
-S3method("[[",indexed_array)
-S3method("[[",indexed_df)
-S3method(as.data.frame,"function")
-S3method(as.data.frame,idf)
-S3method(as.list,indexed)
-S3method(as.list,split)
-S3method(as.quoted,"NULL")
-S3method(as.quoted,call)
-S3method(as.quoted,character)
-S3method(as.quoted,factor)
-S3method(as.quoted,formula)
-S3method(as.quoted,name)
-S3method(as.quoted,numeric)
-S3method(as.quoted,quoted)
-S3method(c,quoted)
-S3method(dim,idf)
-S3method(length,indexed_array)
-S3method(length,indexed)
-S3method(names,idf)
-S3method(names,indexed)
-S3method(names,quoted)
-S3method(print,indexed)
-S3method(print,quoted)
-S3method(print,split)
useDynLib(plyr)
diff --git a/NEWS b/NEWS
index 6a771ff..e8bdb85 100644
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,75 @@
+Version 1.8
+------------------------------------------------------------------------------
+
+NEW FEATURES AND FUNCTIONS
+
+* `**ply` gain a `.inform` argument (previously only available in `llply`) - this gives more useful debugging information at the cost of some speed. (Thanks to Brian Diggs, #57)
+
+* if `.dims = TRUE` `alply`'s output gains dimensions and dimnames, similar to `apply`. Sequential indexing of a list produced by `alply` should be unaffected. (Peter Meilstrup)
+
+* `colwise`, `numcolwise` and `catcolwise` now all accept additional arguments in .... (Thanks to Stavros Macrakis, #62)
+
+* `here` makes it possible to use `**ply` + a function that uses non-standard evaluation (e.g. `summarise`, `mutate`, `subset`, `arrange`) inside a function. (Thanks to Peter Meilstrup, #3)
+
+* `join_all` recursively joins a list of data frames. (Fixes #29)
+
+* `name_rows` provides a convenient way of saving and then restoring row names so that you can preserve them if you need to. (#61)
+
+* `progress_time` (used with `.progress = "time"`) estimates the amount of time remaining before the job is completed. (Thanks to Mike Lawrence, #78)
+
+* `summarise` now works iteratively so that later columns can refer to earlier. (Thanks to Jim Hester, #44)
+
+* `take` makes it easy to subset along an arbitrary dimension.
+
+* Improved documentation thanks to patches from Tim Bates.
+
+PARALLEL PLYR
+
+* `**ply` gains a `.paropts` argument, a list of options that is passed onto `foreach` for controlling parallel computation.
+
+* `*_ply` now accepts `.parallel` argument to enable parallel processing. (Fixes #60)
+
+* Progress bars are disabled when using parallel plyr (Fixes #32)
+
+PERFORMANCE IMPROVEMENTS
+
+* `a*ply`: 25x speedup when indexing array objects, 3x speedup when indexing data frames. This should substantially reduce the overhead of using `a*ply`
+
+* `d*ply` subsetting has been considerably optimised: this will have a small impact unless you have a very large number of groups, in which case it will be considerably faster.
+
+* `idata.frame`: Subsetting immutable data frames with `[.idf` is now
+ faster (Peter Meilstrup)
+
+* `quickdf` is around 20% faster
+
+* `split_indices`, which powers much internal splitting code (like `vaggregate`, `join` and `d*ply`) is about 2x faster. It was already incredible fast ~0.2s for 1,000,000 obs, so this won't have much impact on overall performance
+
+BUG FIXES
+
+* `*aply` functions now bind list mode results into a list-array (Peter Meilstrup)
+
+* `*aply` now accepts 0-dimension arrays as inputs. (#88)
+
+* `*dply` now deals better with matrix results, converting them to data frames, rather than vectors. (Fixes #12)
+
+* `d*ply` will now preserve factor levels input if `drop = FALSE` (#81)
+
+* `join` works correctly when there are no common rows (Fixes #74), or when one input has no rows (Fixes #48). It also consistently orders the columns: common columns, then x cols, then y cols (Fixes #40).
+
+* `quickdf` correctly handles NA variable names. (Fixes #66. Thanks to Scott Kostyshak)
+
+* `rbind.fill` and `rbind.fill.matrix` work consistently with matrices and data frames with zero rows. Fixes #79. (Peter Meilstrup)
+
+* `rbind.fill` now stops if inputs are not data frames. (Fixes #51)
+
+* `rbind.fill` now works consistently with 0 column data frames
+
+* `round_any` now works with `POSIXct` objects, thanks to Jean-Olivier Irisson (#76)
+
Version 1.7.1
------------------------------------------------------------------------------
-* Fix bug in
+* Fix bug in id, using numeric instead of integer
Version 1.7
------------------------------------------------------------------------------
diff --git a/R/a_ply.r b/R/a_ply.r
new file mode 100644
index 0000000..bb9b70a
--- /dev/null
+++ b/R/a_ply.r
@@ -0,0 +1,17 @@
+#' Split array, apply function, and discard results.
+#'
+#' For each slice of an array, apply function and discard results
+#'
+#' @template ply
+#' @template a-
+#' @template -_
+#' @export
+a_ply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE,
+ .progress = "none", .inform = FALSE,
+ .print = FALSE, .parallel = FALSE, .paropts = NULL) {
+ pieces <- splitter_a(.data, .margins, .expand)
+
+ l_ply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .print = .print, .parallel = .parallel,
+ .paropts = .paropts)
+}
diff --git a/R/aaply.r b/R/aaply.r
new file mode 100644
index 0000000..12a0810
--- /dev/null
+++ b/R/aaply.r
@@ -0,0 +1,41 @@
+#' Split array, apply function, and return results in an array.
+#'
+#' For each slice of an array, apply function, keeping results as an array.
+#'
+#' This function is very similar to \code{\link{apply}}, except that it will
+#' always return an array, and when the function returns >1 d data structures,
+#' those dimensions are added on to the highest dimensions, rather than the
+#' lowest dimensions. This makes \code{aaply} idempotent, so that
+#' \code{aaply(input, X, identity)} is equivalent to \code{aperm(input, X)}.
+#'
+#' @template ply
+#' @template a-
+#' @template -a
+#' @export
+#' @examples
+#' dim(ozone)
+#' aaply(ozone, 1, mean)
+#' aaply(ozone, 1, mean, .drop = FALSE)
+#' aaply(ozone, 3, mean)
+#' aaply(ozone, c(1,2), mean)
+#'
+#' dim(aaply(ozone, c(1,2), mean))
+#' dim(aaply(ozone, c(1,2), mean, .drop = FALSE))
+#'
+#' aaply(ozone, 1, each(min, max))
+#' aaply(ozone, 3, each(min, max))
+#'
+#' standardise <- function(x) (x - min(x)) / (max(x) - min(x))
+#' aaply(ozone, 3, standardise)
+#' aaply(ozone, 1:2, standardise)
+#'
+#' aaply(ozone, 1:2, diff)
+aaply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE,
+ .progress = "none", .inform = FALSE, .drop = TRUE,
+ .parallel = FALSE, .paropts = NULL) {
+ pieces <- splitter_a(.data, .margins, .expand)
+
+ laply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform, .drop = .drop,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/adply.r b/R/adply.r
new file mode 100644
index 0000000..969756a
--- /dev/null
+++ b/R/adply.r
@@ -0,0 +1,18 @@
+#' Split array, apply function, and return results in a data frame.
+#'
+#' For each slice of an array, apply function then combine results into a data
+#' frame.
+#'
+#' @template ply
+#' @template a-
+#' @template -d
+#' @export
+adply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE,
+ .progress = "none", .inform = FALSE, .parallel = FALSE,
+ .paropts = NULL) {
+ pieces <- splitter_a(.data, .margins, .expand)
+
+ ldply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/alply.r b/R/alply.r
new file mode 100644
index 0000000..8dd9328
--- /dev/null
+++ b/R/alply.r
@@ -0,0 +1,45 @@
+#' Split array, apply function, and return results in a list.
+#'
+#' For each slice of an array, apply function then combine results into a
+#' list.
+#'
+#' The list will have "dims" and "dimnames" corresponding to the
+#' margins given. For instance \code{alply(x, c(3,2), ...)} where
+#' \code{x} has dims \code{c(4,3,2)} will give a result with dims
+#' \code{c(2,3)}.
+#'
+#' \code{alply} is somewhat similar to \code{\link{apply}} for cases
+#' where the results are not atomic.
+#'
+#' @template ply
+#' @template a-
+#' @template -l
+#' @param .dims if \code{TRUE}, copy over dimensions and names from input.
+#' @export
+#' @examples
+#' alply(ozone, 3, quantile)
+#' alply(ozone, 3, function(x) table(round(x)))
+alply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE,
+ .progress = "none", .inform = FALSE, .parallel = FALSE,
+ .paropts = NULL, .dims = FALSE) {
+ pieces <- splitter_a(.data, .margins, .expand)
+
+ res <- llply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+
+ if (.dims) {
+ labels <- attr(pieces, "split_labels")
+ #splitting a dataframe along dimension 1 is a special case which
+ #gets a different output from splitter_a, so guard against that
+ if (length(labels) == length(.margins)) {
+ res_labels <- lapply(labels, function(x) as.character(unique(x)))
+ res_dim <- sapply(res_labels, length)
+ if (length(res_dim) > 0) {
+ dim(res) <- res_dim
+ dimnames(res) <- res_labels
+ }
+ }
+ }
+ res
+}
diff --git a/R/helper-arrange.r b/R/arrange.r
similarity index 68%
rename from R/helper-arrange.r
rename to R/arrange.r
index 363888c..5edcfaf 100644
--- a/R/helper-arrange.r
+++ b/R/arrange.r
@@ -1,7 +1,7 @@
#' Order a data frame by its colums.
#'
#' This function completes the subsetting, transforming and ordering triad
-#' with a function that works in a similar way to \code{\link{subset}} and
+#' with a function that works in a similar way to \code{\link{subset}} and
#' \code{\link{transform}} but for reordering a data frame by its columns.
#' This saves a lot of typing!
#'
@@ -9,15 +9,25 @@
#' @param ... expressions evaluated in the context of \code{df} and then fed
#' to \code{\link{order}}
#' @keywords manip
+#' @seealso \code{\link{order}} for sorting function in the base package
#' @export
#' @examples
+#' # sort mtcars data by cylinder and displacement
#' mtcars[with(mtcars, order(cyl, disp)), ]
+#' # Same result using arrange: no need to use with(), as the context is implicit
+#' # NOTE: plyr functions do NOT preserve row.names
#' arrange(mtcars, cyl, disp)
-#' arrange(mtcars, cyl, desc(disp))
+#' # Let's keep the row.names in this example
+#' myCars = cbind(vehicle=row.names(mtcars), mtcars)
+#' arrange(myCars, cyl, disp)
+#' # Sort with displacement in descending order
+#' arrange(myCars, cyl, desc(disp))
arrange <- function(df, ...) {
+ stopifnot(is.data.frame(df))
+
ord <- eval(substitute(order(...)), df, parent.frame())
if(length(ord) != nrow(df)) {
- stop("Length of ordering vectors don't match data frame size",
+ stop("Length of ordering vectors don't match data frame size",
call. = FALSE)
}
unrowname(df[ord, , drop = FALSE])
diff --git a/R/helper-col-wise.r b/R/colwise.r
similarity index 73%
rename from R/helper-col-wise.r
rename to R/colwise.r
index adc56ca..61343ea 100644
--- a/R/helper-col-wise.r
+++ b/R/colwise.r
@@ -5,26 +5,26 @@
#'
#' \code{catcolwise} and \code{numcolwise} provide version that only operate
#' on discrete and numeric variables respectively.
-#'
+#'
#' @param .fun function
#' @param .cols either a function that tests columns for inclusion, or a
#' quoted object giving which columns to process
-#' @aliases colwise catcolwise numcolwise
-#' @export colwise numcolwise catcolwise
+#' @param ... other arguments passed on to \code{.fun}
+#' @export
#' @examples
#' # Count number of missing values
#' nmissing <- function(x) sum(is.na(x))
#'
-#' # Apply to every column in a data frame
+#' # Apply to every column in a data frame
#' colwise(nmissing)(baseball)
-#' # This syntax looks a little different. It is shorthand for the
+#' # This syntax looks a little different. It is shorthand for the
#' # the following:
#' f <- colwise(nmissing)
#' f(baseball)
#'
#' # This is particularly useful in conjunction with d*ply
#' ddply(baseball, .(year), colwise(nmissing))
-#'
+#'
#' # To operate only on specified columns, supply them as the second
#' # argument. Many different forms are accepted.
#' ddply(baseball, .(year), colwise(nmissing, .(sb, cs, so)))
@@ -37,35 +37,46 @@
#' ddply(baseball, .(year), colwise(nmissing, is.numeric))
#' ddply(baseball, .(year), colwise(nmissing, is.discrete))
#'
-#' # These last two cases are particularly common, so some shortcuts are
+#' # These last two cases are particularly common, so some shortcuts are
#' # provided:
#' ddply(baseball, .(year), numcolwise(nmissing))
#' ddply(baseball, .(year), catcolwise(nmissing))
-colwise <- function(.fun, .cols = true) {
+#'
+#' # You can supply additional arguments to either colwise, or the function
+#' # it generates:
+#' numcolwise(mean)(baseball, na.rm = TRUE)
+#' numcolwise(mean, na.rm = TRUE)(baseball)
+colwise <- function(.fun, .cols = true, ...) {
if (!is.function(.cols)) {
.cols <- as.quoted(.cols)
filter <- function(df) eval.quoted(.cols, df)
} else {
filter <- function(df) Filter(.cols, df)
}
-
+
+ dots <- list(...)
function(df, ...) {
stopifnot(is.data.frame(df))
df <- strip_splits(df)
filtered <- filter(df)
if (length(filtered) == 0) return(data.frame())
-
- df <- quickdf(lapply(filtered, .fun, ...))
- names(df) <- names(filtered)
- df
+
+ out <- do.call("lapply", c(list(filtered, .fun, ...), dots))
+ names(out) <- names(filtered)
+
+ quickdf(out)
}
}
-catcolwise <- function(.fun, .try = FALSE) {
- colwise(.fun, is.discrete)
+#' @rdname colwise
+#' @export
+catcolwise <- function(.fun, ...) {
+ colwise(.fun, is.discrete, ...)
}
-numcolwise <- function(.fun, .try = FALSE) {
- colwise(.fun, is.numeric)
+#' @rdname colwise
+#' @export
+numcolwise <- function(.fun, ...) {
+ colwise(.fun, is.numeric, ...)
}
diff --git a/R/helper-count.r b/R/count.r
similarity index 76%
rename from R/helper-count.r
rename to R/count.r
index 373666b..d94570e 100644
--- a/R/helper-count.r
+++ b/R/count.r
@@ -19,48 +19,55 @@
#' variables.
#' @return a data frame with label and freq columns
#' @keywords manip
+#' @seealso \code{\link{table}} for related functionality in the base package
#' @export
#' @examples
-#' count(baseball, "id")
-#' count(baseball, "id", "g")
+#' # Count of each value of "id" in the first 100 cases
+#' count(baseball[1:100,], vars = "id")
+#' # Count of ids, weighted by their "g" loading
+#' count(baseball[1:100,], vars = "id", wt_var = "g")
#' count(baseball, "id", "ab")
#' count(baseball, "lg")
+#' # How many stints do players do?
#' count(baseball, "stint")
-#' count(count(baseball, c("id", "year")), "id", "freq")
+#' # Count of times each player appeared in each of the years they played
+#' count(baseball[1:100,], c("id", "year"))
+#' # Count of counts
+#' count(count(baseball[1:100,], c("id", "year")), "id", "freq")
#' count(count(baseball, c("id", "year")), "freq")
count <- function(df, vars = NULL, wt_var = NULL) {
if (is.vector(df)) {
df <- data.frame(x = df)
}
-
+
if (!is.null(vars)) {
vars <- as.quoted(vars)
df2 <- quickdf(eval.quoted(vars, df))
} else {
df2 <- df
}
-
+
id <- ninteraction(df2, drop = TRUE)
u_id <- !duplicated(id)
labels <- df2[u_id, , drop = FALSE]
labels <- labels[order(id[u_id]), , drop = FALSE]
-
+
if (is.null(wt_var) && "freq" %in% names(df)) {
message("Using freq as weighting variable")
wt_var <- "freq"
}
-
+
if (!is.null(wt_var)) {
wt_var <- as.quoted(wt_var)
if (length(wt_var) > 1) {
stop("wt_var must be a single variable", call. = FALSE)
}
-
+
wt <- eval.quoted(wt_var, df)[[1]]
freq <- vaggregate(wt, id, sum, .default = 0)
} else {
- freq <- tabulate(id, attr(id, "n"))
+ freq <- tabulate(id, attr(id, "n"))
}
-
+
unrowname(data.frame(labels, freq))
}
diff --git a/R/d_ply.r b/R/d_ply.r
new file mode 100644
index 0000000..c303356
--- /dev/null
+++ b/R/d_ply.r
@@ -0,0 +1,18 @@
+#' Split data frame, apply function, and discard results.
+#'
+#' For each subset of a data frame, apply function and discard results
+#'
+#' @template ply
+#' @template d-
+#' @template -_
+#' @export
+d_ply <- function(.data, .variables, .fun = NULL, ..., .progress = "none",
+ .inform = FALSE, .drop = TRUE, .print = FALSE,
+ .parallel = FALSE, .paropts = NULL) {
+ .variables <- as.quoted(.variables)
+ pieces <- splitter_d(.data, .variables, drop = .drop)
+
+ l_ply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform, .print = .print,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/daply.r b/R/daply.r
new file mode 100644
index 0000000..5b42b61
--- /dev/null
+++ b/R/daply.r
@@ -0,0 +1,43 @@
+#' Split data frame, apply function, and return results in an array.
+#'
+#' For each subset of data frame, apply function then combine results into
+#' an array. \code{daply} with a function that operates column-wise is
+#' similar to \code{\link{aggregate}}.
+#'
+#' @template ply
+#' @section Input: This function splits data frames by variables.
+#' @section Output:
+#' If there are no results, then this function will return a vector of
+#' length 0 (\code{vector()}).
+#' @param .data data frame to be processed
+#' @param .variables variables to split data frame by, as quoted
+#' variables, a formula or character vector
+#' @param .drop_i should combinations of variables that do not appear in the
+#' input data be preserved (FALSE) or dropped (TRUE, default)
+#' @return if results are atomic with same type and dimensionality, a
+#' vector, matrix or array; otherwise, a list-array (a list with
+#' dimensions)
+#' @param .drop_o should extra dimensions of length 1 in the output be
+#' dropped, simplifying the output. Defaults to \code{TRUE}
+#' @family array output
+#' @family data frame input
+#' @export
+#' @examples
+#' daply(baseball, .(year), nrow)
+#'
+#' # Several different ways of summarising by variables that should not be
+#' # included in the summary
+#'
+#' daply(baseball[, c(2, 6:9)], .(year), colwise(mean))
+#' daply(baseball[, 6:9], .(baseball$year), colwise(mean))
+#' daply(baseball, .(year), function(df) colwise(mean)(df[, 6:9]))
+daply <- function(.data, .variables, .fun = NULL, ..., .progress = "none",
+ .inform = FALSE, .drop_i = TRUE, .drop_o = TRUE, .parallel = FALSE,
+ .paropts = NULL) {
+ .variables <- as.quoted(.variables)
+ pieces <- splitter_d(.data, .variables, drop = .drop_i)
+
+ laply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform, .drop = .drop_o,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/helper-data-frame.r b/R/data-frame.r
similarity index 98%
rename from R/helper-data-frame.r
rename to R/data-frame.r
index 7d46aaf..8a8ddf0 100644
--- a/R/helper-data-frame.r
+++ b/R/data-frame.r
@@ -2,10 +2,10 @@
#'
#' Create a new function that returns the existing function wrapped in a
#' data.frame
-#'
+#'
#' This is useful when calling \code{*dply} functions with a function that
#' returns a vector, and you want the output in rows, rather than columns
-#'
+#'
#' @keywords manip
#' @param x function to make return a data frame
#' @param row.names necessary to match the generic, but not used
diff --git a/R/data.r b/R/data.r
index 1c53b58..c6fb8a2 100644
--- a/R/data.r
+++ b/R/data.r
@@ -17,27 +17,27 @@
#' @examples
#' value <- ozone[1, 1, ]
#' time <- 1:72
-#' month.abbr <- c("Jan", "Feb", "Mar", "Apr", "May",
+#' month.abbr <- c("Jan", "Feb", "Mar", "Apr", "May",
#' "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
#' month <- factor(rep(month.abbr, length = 72), levels = month.abbr)
#' year <- rep(1:6, each = 12)
#' deseasf <- function(value) lm(value ~ month - 1)
-#'
+#'
#' models <- alply(ozone, 1:2, deseasf)
#' coefs <- laply(models, coef)
#' dimnames(coefs)[[3]] <- month.abbr
#' names(dimnames(coefs))[3] <- "month"
-#'
+#'
#' deseas <- laply(models, resid)
#' dimnames(deseas)[[3]] <- 1:72
#' names(dimnames(deseas))[3] <- "time"
-#'
+#'
#' dim(coefs)
#' dim(deseas)
NULL
#' Yearly batting records for all major league baseball players
-#'
+#'
#' This data frame contains batting statistics for a subset of players
#' collected from \url{http://www.baseball-databank.org/}. There are a total
#' of 21,699 records, covering 1,228 players from 1871 to 2007. Only players
@@ -57,7 +57,7 @@ NULL
#' \item h, hits, times reached base because of a batted, fair ball without
#' error by the defense
#' \item X2b, hits on which the batter reached second base safely
-#' \item X3b, hits on which the batter reached third base safely
+#' \item X3b, hits on which the batter reached third base safely
#' \item hr, number of home runs
#' \item rbi, runs batted in
#' \item sb, stolen bases
@@ -79,17 +79,17 @@ NULL
#' @examples
#' baberuth <- subset(baseball, id == "ruthba01")
#' baberuth$cyear <- baberuth$year - min(baberuth$year) + 1
-#'
+#'
#' calculate_cyear <- function(df) {
-#' mutate(df,
+#' mutate(df,
#' cyear = year - min(year),
#' cpercent = cyear / (max(year) - min(year))
#' )
#' }
-#'
+#'
#' baseball <- ddply(baseball, .(id), calculate_cyear)
#' baseball <- subset(baseball, ab >= 25)
-#'
+#'
#' model <- function(df) {
#' lm(rbi / ab ~ cyear, data=df)
#' }
diff --git a/R/ddply.r b/R/ddply.r
new file mode 100644
index 0000000..dead751
--- /dev/null
+++ b/R/ddply.r
@@ -0,0 +1,52 @@
+#' Split data frame, apply function, and return results in a data frame.
+#'
+#' For each subset of a data frame, apply function then combine results into a
+#' data frame.
+#'
+#' @template ply
+#' @template d-
+#' @template -d
+#' @seealso \code{\link{tapply}} for similar functionality in the base package
+#' @export
+#' @examples
+#' # Summarize a dataset by two variables
+#' require(plyr)
+#' dfx <- data.frame(
+#' group = c(rep('A', 8), rep('B', 15), rep('C', 6)),
+#' sex = sample(c("M", "F"), size = 29, replace = TRUE),
+#' age = runif(n = 29, min = 18, max = 54)
+#' )
+#'
+#' # Note the use of the '.' function to allow
+#' # group and sex to be used without quoting
+#' ddply(dfx, .(group, sex), summarize,
+#' mean = round(mean(age), 2),
+#' sd = round(sd(age), 2))
+#'
+#' # An example using a formula for .variables
+#' ddply(baseball[1:100,], ~ year, nrow)
+#' # Applying two functions; nrow and ncol
+#' ddply(baseball, .(lg), c("nrow", "ncol"))
+#'
+#' # Calculate mean runs batted in for each year
+#' rbi <- ddply(baseball, .(year), summarise,
+#' mean_rbi = mean(rbi, na.rm = TRUE))
+#' # Plot a line chart of the result
+#' plot(mean_rbi ~ year, type = "l", data = rbi)
+#'
+#' # make new variable career_year based on the
+#' # start year for each player (id)
+#' base2 <- ddply(baseball, .(id), mutate,
+#' career_year = year - min(year) + 1
+#' )
+ddply <- function(.data, .variables, .fun = NULL, ..., .progress = "none",
+ .inform = FALSE, .drop = TRUE, .parallel = FALSE,
+ .paropts = NULL) {
+ if (empty(.data)) return(.data)
+ .variables <- as.quoted(.variables)
+ pieces <- splitter_d(.data, .variables, drop = .drop)
+
+ ldply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/helper-defaults.r b/R/defaults.r
similarity index 85%
rename from R/helper-defaults.r
rename to R/defaults.r
index 9e1386c..6af1b23 100644
--- a/R/helper-defaults.r
+++ b/R/defaults.r
@@ -1,11 +1,11 @@
#' Set defaults.
#'
#' Convient method for combining a list of values with their defaults.
-#'
+#'
#' @param x list of values
#' @param y defaults
-#' @keywords manip
+#' @keywords manip
#' @export
defaults <- function(x, y) {
c(x, y[setdiff(names(y), names(x))])
-}
+}
diff --git a/R/dimensions.r b/R/dimensions.r
index 1cd5304..8b519f1 100644
--- a/R/dimensions.r
+++ b/R/dimensions.r
@@ -1,7 +1,7 @@
#' Number of dimensions.
#'
#' Number of dimensions of an array or vector
-#'
+#'
#' @param x array
#' @keywords internal
dims <- function(x) length(amv_dim(x))
@@ -9,30 +9,30 @@ dims <- function(x) length(amv_dim(x))
#' Dimensions.
#'
#' Consistent dimensions for vectors, matrices and arrays.
-#'
+#'
#' @param x array, matrix or vector
-#' @keywords internal
+#' @keywords internal
amv_dim <- function(x) if (is.vector(x)) length(x) else dim(x)
#' Dimension names.
#'
#' Consistent dimnames for vectors, matrices and arrays.
-#'
+#'
#' Unlike \code{\link{dimnames}} no part of the output will ever be
#' null. If a component of dimnames is omitted, \code{amv_dimnames}
#' will return an integer sequence of the appropriate length.
-#'
+#'
#' @param x array, matrix or vector
-#' @keywords internal
+#' @keywords internal
#' @export
amv_dimnames <- function(x) {
d <- if (is.vector(x)) list(names(x)) else dimnames(x)
-
+
if (is.null(d)) d <- rep(list(NULL), dims(x))
null_names <- which(unlist(llply(d, is.null)))
- d[null_names] <- llply(null_names, function(i) seq.int(amv_dim(x)[i]))
-
+ d[null_names] <- llply(null_names, function(i) seq_len(amv_dim(x)[i]))
+
# if (is.null(names(d))) names(d) <- paste("X", 1:length(d), sep="")
d
}
@@ -40,10 +40,12 @@ amv_dimnames <- function(x) {
#' Reduce dimensions.
#'
#' Remove extraneous dimensions
-#'
+#'
#' @param x array
-#' @keywords internal
+#' @keywords internal
reduce_dim <- function(x) {
- do.call("[", c(list(x), lapply(dim(x), function(x) if (x==1) 1 else TRUE), drop=TRUE))
+ subs <- lapply(dim(x), function(x) if (x == 1) 1 else bquote())
+ call <- as.call(c(list(as.name("["), quote(x)), subs, list(drop = TRUE)))
+ eval(call)
}
diff --git a/R/dlply.r b/R/dlply.r
new file mode 100644
index 0000000..a211b9b
--- /dev/null
+++ b/R/dlply.r
@@ -0,0 +1,31 @@
+#' Split data frame, apply function, and return results in a list.
+#'
+#' For each subset of a data frame, apply function then combine results into a
+#' list. \code{dlply} is similar to \code{\link{by}} except that the results
+#' are returned in a different format.
+#'
+#' @template ply
+#' @template d-
+#' @template -l
+#' @export
+#' @examples
+#' linmod <- function(df) {
+#' lm(rbi ~ year, data = mutate(df, year = year - min(year)))
+#' }
+#' models <- dlply(baseball, .(id), linmod)
+#' models[[1]]
+#'
+#' coef <- ldply(models, coef)
+#' with(coef, plot(`(Intercept)`, year))
+#' qual <- laply(models, function(mod) summary(mod)$r.squared)
+#' hist(qual)
+dlply <- function(.data, .variables, .fun = NULL, ..., .progress = "none",
+ .inform = FALSE, .drop = TRUE, .parallel = FALSE,
+ .paropts = NULL) {
+ .variables <- as.quoted(.variables)
+ pieces <- splitter_d(.data, .variables, drop = .drop)
+
+ llply(.data = pieces, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/helper-each.r b/R/each.r
similarity index 71%
rename from R/helper-each.r
rename to R/each.r
index 83f0101..cf98c4f 100644
--- a/R/helper-each.r
+++ b/R/each.r
@@ -2,28 +2,37 @@
#'
#' Combine multiple functions into a single function returning a named vector
#' of outputs.
-#'
+#' Note: you cannot supply additional parameters for the summary functions
+#'
#' @param ... functions to combine. each function should produce a single
#' number as output
#' @keywords manip
+#' @seealso \code{\link{summarise}} for applying summary functions to data
#' @export
#' @examples
+#' # Call min() and max() on the vector 1:10
#' each(min, max)(1:10)
+#' # This syntax looks a little different. It is shorthand for the
+#' # the following:
+#' f<- each(min, max)
+#' f(1:10)
+#' # Three equivalent ways to call min() and max() on the vector 1:10
#' each("min", "max")(1:10)
#' each(c("min", "max"))(1:10)
#' each(c(min, max))(1:10)
+#' # Call length(), min() and max() on a random normal vector
#' each(length, mean, var)(rnorm(100))
each <- function(...) {
fnames <- laply(match.call()[-1], deparse)
fs <- list(...)
if (length(fs[[1]]) > 1) {
fs <- fs[[1]]
-
+
# Jump through hoops to work out names
snames <- as.list(match.call()[2])[[1]]
fnames <- unlist(lapply(as.list(snames)[-1], deparse))
}
-
+
# Find function names and replace with function objects
char <- laply(fs, is.character)
fnames[char] <- fs[char]
@@ -32,14 +41,14 @@ each <- function(...) {
unames <- names(fs)
if (is.null(unames)) unames <- fnames
unames[unames == ""] <- fnames[unames == ""]
-
+
n <- length(fs)
proto <- NULL
result <- NULL
-
+
if (n == 1) {
# If there is only one function, things are simple. We just
- # need to name the output, if appopriate.
+ # need to name the output, if appropriate.
function(x, ...) {
res <- fs[[1]](x, ...)
if (length(res) == 1) names(res) <- unames
@@ -48,7 +57,7 @@ each <- function(...) {
} else {
function(x, ...) {
# For n > 1 things are a little tricky
- # Construct prtotype for output on first call
+ # Construct protoype for output on first call
if (is.null(proto)) {
result <<- vector("list", length = n)
names(result) <- unames
@@ -56,9 +65,9 @@ each <- function(...) {
for(i in 1:n) result[[i]] <- fs[[i]](x, ...)
proto <<- list_to_vector(result)
} else {
- for(i in 1:n) proto[[i]] <- fs[[i]](x, ...)
+ for(i in 1:n) proto[[i]] <- fs[[i]](x, ...)
}
proto
- }
+ }
}
}
diff --git a/R/helper-match-df.r b/R/helper-match-df.r
deleted file mode 100644
index ed5d43a..0000000
--- a/R/helper-match-df.r
+++ /dev/null
@@ -1,24 +0,0 @@
-#' Extract matching rows of a data frame.
-#'
-#' This is particularly useful when you've summarised the data in some way
-#' and want to subset the original data by a characteristic of the subset.
-#'
-#' @param x data frame to subset.
-#' @param y data frame defining matching rows.
-#' @param on variables to match on - by default will use all variables common
-#' to both data frames.
-#' @return a data frame
-#' @seealso \code{\link{join}} to combine the columns from both x and y
-#' @export
-#' @examples
-#' longterm <- subset(count(baseball, "id"), freq > 25)
-#' bb_longterm <- match_df(baseball, longterm)
-match_df <- function(x, y, on = NULL) {
- if (is.null(on)) {
- on <- intersect(names(x), names(y))
- message("Matching on: ", paste(on, collapse = ", "))
- }
-
- keys <- join.keys(x, y, on)
- x[keys$x %in% keys$y, ]
-}
diff --git a/R/helper-rename.r b/R/helper-rename.r
deleted file mode 100644
index b1c53a9..0000000
--- a/R/helper-rename.r
+++ /dev/null
@@ -1,17 +0,0 @@
-#' Modify names by name, not position.
-#'
-#' @param x named object to modify
-#' @param replace named character vector, with new names as values, and
-#' old names as names.
-#' @export
-#' @importFrom stats setNames
-#' @examples
-#' x <- c("a" = 1, "b" = 2, d = 3, 4)
-#' rename(x, c("d" = "c"))
-#' rename(mtcars, c("disp" = "displ"))
-rename <- function (x, replace) {
- old_names <- names(x)
- new_names <- unname(replace)[match(old_names, names(replace))]
-
- setNames(x, ifelse(is.na(new_names), old_names, new_names))
-}
diff --git a/R/helper-round-any.r b/R/helper-round-any.r
deleted file mode 100644
index c6e3a80..0000000
--- a/R/helper-round-any.r
+++ /dev/null
@@ -1,21 +0,0 @@
-#' Round to multiple of any number.
-#'
-#' @param x numeric vector to round
-#' @param accuracy number to round to
-#' @param f rounding function: \code{\link{floor}}, \code{\link{ceiling}} or
-#' \code{\link{round}}
-#' @keywords manip
-#' @export
-#' @examples
-#' round_any(135, 10)
-#' round_any(135, 100)
-#' round_any(135, 25)
-#' round_any(135, 10, floor)
-#' round_any(135, 100, floor)
-#' round_any(135, 25, floor)
-#' round_any(135, 10, ceiling)
-#' round_any(135, 100, ceiling)
-#' round_any(135, 25, ceiling)
-round_any <- function(x, accuracy, f = round) {
- f(x / accuracy) * accuracy
-}
diff --git a/R/here.r b/R/here.r
new file mode 100644
index 0000000..8718b78
--- /dev/null
+++ b/R/here.r
@@ -0,0 +1,28 @@
+#' Capture current evaluation context.
+#'
+#' This function captures the current context, making it easier
+#' to use \code{**ply} with functions that do special evaluation and
+#' need access to the environment where ddply was called from.
+#'
+#' @author Peter Meilstrup, \url{https://github.com/crowding}
+#' @param f a function that does non-standard evaluation
+#' @export
+#' @examples
+#' df <- data.frame(a = rep(c("a","b"), each = 10), b = 1:20)
+#' f1 <- function(label) {
+#' ddply(df, "a", mutate, label = paste(label, b))
+#' }
+#' \dontrun{f1("name:")}
+#' # Doesn't work because mutate can't find label in the current scope
+#'
+#' f2 <- function(label) {
+#' ddply(df, "a", here(mutate), label = paste(label, b))
+#' }
+#' f2("name:")
+#' # Works :)
+here <- function(f) {
+ call <- substitute(function(...) (f)(...), list(f = f))
+ fun <- eval(call, parent.frame())
+ attr(fun, "srcref") <- srcfilecopy("<text>", deparse(call))
+ fun
+}
diff --git a/R/id.r b/R/id.r
index d7543f3..621e98c 100644
--- a/R/id.r
+++ b/R/id.r
@@ -16,8 +16,13 @@
#' @aliases id ninteraction
#' @export
id <- function(.variables, drop = FALSE) {
+ # Drop all zero length inputs
+ lengths <- vapply(.variables, length, integer(1))
+ .variables <- .variables[lengths != 0]
+
if (length(.variables) == 0) {
- return(structure(1L, n = 1))
+ n <- nrow(.variables) %||% 0L
+ return(structure(seq_len(n), n = n))
}
# Special case for single variable
@@ -30,12 +35,12 @@ id <- function(.variables, drop = FALSE) {
p <- length(ids)
# Calculate dimensions
- ndistinct <- vapply(ids, attr, "n", FUN.VALUE = numeric(1),
+ ndistinct <- vapply(ids, attr, "n", FUN.VALUE = numeric(1),
USE.NAMES = FALSE)
n <- prod(ndistinct)
if (n > 2 ^ 31) {
# Too big for integers, have to use strings, which will be much slower :(
-
+
char_id <- do.call("paste", c(ids, sep = "\r"))
res <- match(char_id, unique(char_id))
} else {
@@ -60,7 +65,7 @@ ninteraction <- id
id_var <- function(x, drop = FALSE) {
if (length(x) == 0) return(structure(integer(), n = 0L))
if (!is.null(attr(x, "n")) && !drop) return(x)
-
+
if (is.factor(x) && !drop) {
id <- as.integer(addNA(x, ifany = TRUE))
n <- length(levels(x))
diff --git a/R/immutable.r b/R/idataframe.r
similarity index 86%
rename from R/immutable.r
rename to R/idataframe.r
index 65202e0..b7a56d0 100644
--- a/R/immutable.r
+++ b/R/idataframe.r
@@ -1,7 +1,7 @@
#' Construct an immutable data frame.
-#'
+#'
#' An immutable data frame works like an ordinary data frame, except that when
-#' you subset it, it returns a reference to the original data frame, not a
+#' you subset it, it returns a reference to the original data frame, not a
#' a copy. This makes subsetting substantially faster and has a big impact
#' when you are working with large datasets with many groups.
#'
@@ -25,24 +25,26 @@ idata.frame <- function(df) {
self$`_data` <- df
self$`_rows` <- seq_len(nrow(df))
self$`_cols` <- names(df)
-
- for (name in names(df)) {
- f <- eval(substitute(function(v) {
+ self$`_getters` <- lapply(names(df), function(name) {
+ eval(substitute(function(v) {
if (missing(v)) {
- `_data`[[name]]
+ `_data`[[name]][`_rows`]
} else {
stop("Immutable")
}
- }, list(name = name)))
+ }, list(name = name)), envir=self)
+ })
+ names(self$`_getters`) <- names(df)
+ for (name in names(df)) {
+ f <- self$`_getters`[[name]]
environment(f) <- self
makeActiveBinding(name, f, self)
}
-
- structure(self,
+ structure(self,
class = c("idf", "environment"))
}
-"[.idf" <- function(x, i, j, drop = TRUE) {
+"[.idf" <- function(x, i, j, drop = TRUE) {
# Single column special cases
if (nargs() == 2) {
j <- i
@@ -53,14 +55,14 @@ idata.frame <- function(df) {
if (missing(i)) i <- TRUE
return(x[[j]][i])
}
-
+
# New rows
rows <- x$`_rows`
if (!missing(i)) {
if (is.character(i)) stop("Row names not supported")
rows <- rows[i]
}
-
+
# New cols
cols <- x$`_cols`
if (!missing(j)) {
@@ -68,28 +70,23 @@ idata.frame <- function(df) {
cols <- intersect(cols, j)
} else {
cols <- cols[j]
- }
+ }
}
-
+
# Make active bindings for functions like lm and eval that will treat this
# object as an environment or list
self <- new.env(parent = parent.env(x))
self$`_rows` <- rows
self$`_cols` <- cols
self$`_data` <- x$`_data`
+ self$`_getters` <- x$`_getters`
for (col in cols) {
- f <- eval(substitute(function(v) {
- if (missing(v)) {
- `_data`[[name]][`_rows`]
- } else {
- stop("Immutable")
- }
- }, list(name = col)))
+ f <- self$`_getters`[[col]]
environment(f) <- self
makeActiveBinding(col, f, self)
}
-
+
structure(self,
class = c("idf", "environment"))
}
diff --git a/R/indexed-array.r b/R/indexed-array.r
index e91cbea..0e46b31 100644
--- a/R/indexed-array.r
+++ b/R/indexed-array.r
@@ -2,7 +2,7 @@
#'
#' Create a indexed array, a space efficient way of indexing into a large
#' array.
-#'
+#'
#' @param env environment containing data frame
#' @param index list of indices
#' @keywords internal
@@ -16,26 +16,26 @@ indexed_array <- function(env, index) {
# * normal array
# * normal vector
# * list-array with inexact indexing
- #
+ #
# Situations that should use [[
# * list
# * list-array with exact indexing
-
+
if (is.list(env$data)) {
if (is.data.frame(env$data) || (is.array(env$data) && !exact)) {
- subs <- c("[", "]")
+ subs <- "["
} else {
- subs <- c("[[", "]]")
+ subs <- "[["
}
} else {
- subs <- c("[", "]")
+ subs <- "["
}
-
+
# Don't drop if data is a data frame
drop <- !is.data.frame(env$data)
-
+
structure(
- list(env = env, index = index, drop = drop, subs = subs),
+ list(env = env, index = index, drop = drop, subs = as.name(subs)),
class = c("indexed_array", "indexed")
)
}
@@ -45,14 +45,14 @@ length.indexed_array <- function(x) nrow(x$index)
#' @S3method [[ indexed_array
"[[.indexed_array" <- function(x, i) {
- indices <- paste(x$index[i, ,drop=TRUE], collapse = ", ")
-
- ## This is very slow because we have to create a copy to use do.call
- # do.call(x$subs, c(list(x$env$data), indices, drop=TRUE))
+ indices <- unname(x$index[i, , drop = TRUE])
+ indices <- lapply(indices, function(x) if (x == "") bquote() else x)
- call <- paste("x$env$data",
- x$subs[1], indices, ", drop = ", x$drop, x$subs[2], sep = "")
- eval(parse(text = call))
+ call <- as.call(c(
+ list(x$subs, quote(x$env$data)),
+ indices,
+ list(drop = x$drop)))
+ eval(call)
}
#' @S3method names indexed
diff --git a/R/indexed-data-frame.r b/R/indexed-data-frame.r
index 1edd4de..519255d 100644
--- a/R/indexed-data-frame.r
+++ b/R/indexed-data-frame.r
@@ -1,13 +1,13 @@
#' An indexed data frame.
#'
#' Create a indexed list, a space efficient way of indexing into a large data frame
-#'
+#'
#' @param env environment containing data frame
#' @param index list of indices
#' @param vars a character vector giving the variables used for subsetting
#' @keywords internal
indexed_df <- function(data, index, vars) {
-
+
structure(
list(data = data, index = index, vars = vars),
class = c("indexed", "indexed_df")
@@ -16,8 +16,35 @@ indexed_df <- function(data, index, vars) {
#' @S3method [[ indexed_df
"[[.indexed_df" <- function(x, i) {
- structure(x$data[x$index[[i]], , drop = FALSE], vars = x$vars)
- # x$env$data[x$index[[i]], , drop = FALSE]
- # slice(x, attr(x, "index")[[i]])
- # subset_rows(x$env$data, x$index[[i]])
+ out <- extract_rows(x$data, x$index[[i]])
+ attr(out, "vars") <- x$vars
+ out
+}
+
+extract_rows <- function(x, i) {
+ if (!is.data.frame(x)) return(x[i, , drop = FALSE])
+
+ n <- ncol(x)
+
+ out <- lapply(seq_len(n), extract_col_rows, df = x, i = i)
+
+ names(out) <- names(x)
+ class(out) <- "data.frame"
+ attr(out, "row.names") <- c(NA_integer_, -length(out[[1]]))
+
+ out
+}
+extract_col_rows <- function(df, i, j) {
+ col <- .subset2(df, j)
+ if (isS4(col)) return(col[i])
+
+ if (is.null(attr(col, "class"))) {
+ .subset(col, i)
+ } else if (inherits(col, "factor") || inherits(col, "POSIXt")) {
+ out <- .subset(col, i)
+ attributes(out) <- attributes(col)
+ out
+ } else {
+ col[i]
+ }
}
diff --git a/R/indexed.r b/R/indexed.r
index dcf9c0d..d0c785c 100644
--- a/R/indexed.r
+++ b/R/indexed.r
@@ -5,7 +5,7 @@ length.indexed <- function(x) length(x$index)
names.indexed <- function(x) {
labels <- attr(x, "split_labels")
labels[] <- lapply(labels, as.character)
-
+
do.call(paste, c(labels, list(sep = ".")))
}
diff --git a/R/join-all.r b/R/join-all.r
new file mode 100644
index 0000000..d52ffc8
--- /dev/null
+++ b/R/join-all.r
@@ -0,0 +1,23 @@
+#' Recursively join a list of data frames.
+#'
+#' @param dfs A list of data frames.
+#' @inheritParams join
+#' @export
+#' @examples
+#' dfs <- list(
+#' a = data.frame(x = 1:10, a = runif(10)),
+#' b = data.frame(x = 1:10, b = runif(10)),
+#' c = data.frame(x = 1:10, c = runif(10))
+#' )
+#' join_all(dfs)
+#' join_all(dfs, "x")
+join_all <- function(dfs, by = NULL, type = "left", match = "all") {
+ if (length(dfs) == 1) return(dfs[[1]])
+
+ joined <- dfs[[1]]
+ for(i in 2:length(dfs)) {
+ joined <- join(joined, dfs[[i]], by = by, type = type, match = match)
+ }
+
+ joined
+}
diff --git a/R/join.r b/R/join.r
index 1f9c66e..1a77b73 100644
--- a/R/join.r
+++ b/R/join.r
@@ -1,10 +1,10 @@
#' Join two data frames together.
#'
#' Join, like merge, is designed for the types of problems
-#' where you would use a sql join.
+#' where you would use a sql join.
#'
#' The four join types return:
-#'
+#'
#' \itemize{
#' \item \code{inner}: only rows with matching keys in both x and y
#' \item \code{left}: all rows in x, adding matching columns from y
@@ -21,14 +21,17 @@
#' than merge, although it is somewhat less featureful - it currently offers
#' no way to rename output or merge on different variables in the x and y
#' data frames.
-#'
+#'
#' @param x data frame
#' @param y data frame
-#' @param by character vector of variable names to join by
-#' @param type type of join: left (default), right, inner or full. See
+#' @param by character vector of variable names to join by. If omitted, will
+#' match on all common variables.
+#' @param type type of join: left (default), right, inner or full. See
#' details for more information.
#' @param match how should duplicate ids be matched? Either match just the
-#' \code{"first"} matching row, or match \code{"all"} matching rows.
+#' \code{"first"} matching row, or match \code{"all"} matching rows. Defaults
+#' to \code{"all"} for compatibility with merge, but \code{"first"} is
+#' significantly faster.
#' @keywords manip
#' @export
#' @examples
@@ -39,107 +42,119 @@
#' b2 <- arrange(b2, id, year, stint)
#' b3 <- arrange(b3, id, year, stint)
#' stopifnot(all.equal(b2, b3))
-join <- function(x, y, by = intersect(names(x), names(y)), type = "left", match = "all") {
+join <- function(x, y, by = NULL, type = "left", match = "all") {
type <- match.arg(type, c("left", "right", "inner", "full"))
match <- match.arg(match, c("first", "all"))
-
- if (missing(by)) {
+
+ if (is.null(by)) {
+ by <- intersect(names(x), names(y))
message("Joining by: ", paste(by, collapse = ", "))
}
-
- switch(match,
- "first" = join_first(x, y, by, type),
- "all" = join_all(x, y, by, type))
+
+ switch(match,
+ "first" = .join_first(x, y, by, type),
+ "all" = .join_all(x, y, by, type))
}
-join_first <- function(x, y, by, type) {
+.join_first <- function(x, y, by, type) {
keys <- join.keys(x, y, by = by)
- new.cols <- setdiff(names(y), by)
-
+
+ x.cols <- setdiff(names(x), by)
+ y.cols <- setdiff(names(y), by)
+
if (type == "inner") {
x.match <- match(keys$y, keys$x, 0)
y.match <- match(keys$x, keys$y, 0)
- cbind(x[x.match, , drop = FALSE], y[y.match, new.cols, drop = FALSE])
- } else if (type == "left") {
+ cbind(
+ x[x.match, by, drop = FALSE],
+ x[x.match, x.cols, drop = FALSE],
+ y[y.match, y.cols, drop = FALSE]
+ )
+ } else if (type == "left") {
y.match <- match(keys$x, keys$y)
- y.matched <- unrowname(y[y.match, new.cols, drop = FALSE])
- cbind(x, y.matched)
+ y.matched <- unrowname(y[y.match, y.cols, drop = FALSE])
+ cbind(x[by], x[x.cols], y.matched)
} else if (type == "right") {
if (any(duplicated(keys$y))) {
stop("Duplicated key in y", call. = FALSE)
}
-
+
new.cols <- setdiff(names(x), by)
x.match <- match(keys$y, keys$x)
- x.matched <- unrowname(x[x.match, , drop = FALSE])
- cbind(y, x.matched[, new.cols, drop = FALSE])
-
+ x.matched <- unrowname(x[x.match, x.cols, drop = FALSE])
+
+ cbind(y[by], x.matched, y[y.cols])
} else if (type == "full") {
# x with matching y's then any unmatched ys
y.match <- match(keys$x, keys$y)
- y.matched <- unrowname(y[y.match, new.cols, drop = FALSE])
+ y.matched <- unrowname(y[y.match, y.cols, drop = FALSE])
y.unmatch <- is.na(match(keys$y, keys$x))
-
- rbind.fill(cbind(x, y.matched), y[y.unmatch, , drop = FALSE])
+
+ rbind.fill(cbind(x[c(by, x.cols)], y.matched), y[y.unmatch, , drop = FALSE])
}
}
# Basic idea to perform a full cartesian product of the two data frames
-# and then evaluate which rows meet the merging criteria. But that is
+# and then evaluate which rows meet the merging criteria. But that is
# horrendously inefficient, so we do various types of hashing, implemented
# in R as split_indices
-join_all <- function(x, y, by, type) {
- new.cols <- setdiff(names(y), by)
-
+.join_all <- function(x, y, by, type) {
+ x.cols <- setdiff(names(x), by)
+ y.cols <- setdiff(names(y), by)
+
if (type == "inner") {
ids <- join_ids(x, y, by)
- out <- cbind(x[ids$x, , drop = FALSE], y[ids$y, new.cols, drop = FALSE])
+ out <- cbind(x[ids$x, , drop = FALSE], y[ids$y, y.cols, drop = FALSE])
} else if (type == "left") {
ids <- join_ids(x, y, by, all = TRUE)
- out <- cbind(x[ids$x, , drop = FALSE], y[ids$y, new.cols, drop = FALSE])
+ out <- cbind(x[ids$x, , drop = FALSE], y[ids$y, y.cols, drop = FALSE])
} else if (type == "right") {
# Flip x and y, but make sure to put new columns in the right place
new.cols <- setdiff(names(x), by)
ids <- join_ids(y, x, by, all = TRUE)
- out <- cbind(y[ids$x, , drop = FALSE], x[ids$y, new.cols, drop = FALSE])
+ out <- cbind(
+ y[ids$x, by, drop = FALSE],
+ x[ids$y, x.cols, drop = FALSE],
+ y[ids$x, y.cols, drop = FALSE]
+ )
} else if (type == "full") {
# x's with all matching y's, then non-matching y's - just the same as
# join.first
ids <- join_ids(x, y, by, all = TRUE)
-
- matched <- cbind(x[ids$x, , drop = FALSE],
- y[ids$y, new.cols, drop = FALSE])
+
+ matched <- cbind(x[ids$x, , drop = FALSE],
+ y[ids$y, y.cols, drop = FALSE])
unmatched <- y[setdiff(seq_len(nrow(y)), ids$y), , drop = FALSE]
out <- rbind.fill(matched, unmatched)
}
-
+
unrowname(out)
}
join_ids <- function(x, y, by, all = FALSE) {
keys <- join.keys(x, y, by = by)
-
- ys <- split_indices(seq_along(keys$y), keys$y, keys$n)
+
+ ys <- split_indices(keys$y, keys$n)
length(ys) <- keys$n
-
+
if (all) {
# replace NULL with NA to preserve those x's without matching y's
nulls <- vapply(ys, function(x) length(x) == 0, logical(1))
- ys[nulls] <- list(NA)
+ ys[nulls] <- list(NA_real_)
}
-
+
ys <- ys[keys$x]
xs <- rep(seq_along(keys$x), vapply(ys, length, numeric(1)))
-
+
list(x = xs, y = unlist(ys))
}
#' Join keys.
-#' Given two data frames, create a unique key for each row.
+#' Given two data frames, create a unique key for each row.
#'
#' @param x data frame
#' @param y data frame
@@ -149,10 +164,13 @@ join_ids <- function(x, y, by, all = FALSE) {
join.keys <- function(x, y, by) {
joint <- rbind.fill(x[by], y[by])
keys <- id(joint, drop = TRUE)
-
+
+ n_x <- nrow(x)
+ n_y <- nrow(y)
+
list(
- x = keys[1:nrow(x)],
- y = keys[-(1:nrow(x))],
+ x = keys[seq_len(n_x)],
+ y = keys[n_x + seq_len(n_y)],
n = attr(keys, "n")
)
}
diff --git a/R/l_ply.r b/R/l_ply.r
new file mode 100644
index 0000000..2293ff1
--- /dev/null
+++ b/R/l_ply.r
@@ -0,0 +1,39 @@
+#' Split list, apply function, and discard results.
+#'
+#' For each element of a list, apply function and discard results
+#'
+#' @template ply
+#' @template l-
+#' @template -_
+#' @export
+l_ply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
+ .print = FALSE, .parallel = FALSE, .paropts = NULL) {
+ if (is.character(.fun) || is.list(.fun)) .fun <- each(.fun)
+ if (!is.function(.fun)) stop(".fun is not a function.")
+
+ progress <- create_progress_bar(.progress)
+ progress$init(length(.data))
+ on.exit(progress$term())
+
+ if (.parallel) {
+ if (.print) message("Printing disabled for parallel processing")
+ if (.progress != "none") message("Progress disabled for parallel processing")
+
+ setup_parallel()
+ .paropts$.combine <- function(...) NULL
+ fe_call <- as.call(c(list(as.name("foreach"), d = as.name(".data")),
+ .paropts))
+ fe <- eval(fe_call)
+
+ fe %dopar% .fun(d, ...)
+ } else {
+ .data <- as.list(.data)
+ for(i in seq_along(.data)) {
+ x <- .fun(.data[[i]], ...)
+ if (.print) print(x)
+ progress$step()
+ }
+ }
+
+ invisible()
+}
diff --git a/R/laply.r b/R/laply.r
new file mode 100644
index 0000000..90977aa
--- /dev/null
+++ b/R/laply.r
@@ -0,0 +1,35 @@
+#' Split list, apply function, and return results in an array.
+#'
+#' For each element of a list, apply function then combine results into an
+#' array.
+#'
+#' \code{laply} is similar in spirit to \code{\link{sapply}} except
+#' that it will always return an array, and the output is transposed with
+#' respect \code{sapply} - each element of the list corresponds to a row,
+#' not a column.
+#'
+#' @template ply
+#' @template l-
+#' @template -a
+#' @export
+#' @examples
+#' laply(baseball, is.factor)
+#' # cf
+#' ldply(baseball, is.factor)
+#' colwise(is.factor)(baseball)
+#'
+#' laply(seq_len(10), identity)
+#' laply(seq_len(10), rep, times = 4)
+#' laply(seq_len(10), matrix, nrow = 2, ncol = 2)
+laply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
+ .drop = TRUE, .parallel = FALSE, .paropts = NULL) {
+ if (is.character(.fun)) .fun <- do.call("each", as.list(.fun))
+ if (!is.function(.fun)) stop(".fun is not a function.")
+
+ if (!inherits(.data, "split")) .data <- as.list(.data)
+ res <- llply(.data = .data, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+
+ list_to_array(res, attr(.data, "split_labels"), .drop)
+}
diff --git a/R/ldply.r b/R/ldply.r
new file mode 100644
index 0000000..714a621
--- /dev/null
+++ b/R/ldply.r
@@ -0,0 +1,18 @@
+#' Split list, apply function, and return results in a data frame.
+#'
+#' For each element of a list, apply function then combine results into a data
+#' frame.
+#'
+#' @template ply
+#' @template l-
+#' @template -d
+#' @export
+ldply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
+ .parallel = FALSE, .paropts = NULL) {
+ if (!inherits(.data, "split")) .data <- as.list(.data)
+ res <- llply(.data = .data, .fun = .fun, ...,
+ .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+
+ list_to_dataframe(res, attr(.data, "split_labels"))
+}
diff --git a/R/ply-iterator.r b/R/liply.r
similarity index 97%
rename from R/ply-iterator.r
rename to R/liply.r
index 47ed857..f336d0c 100644
--- a/R/ply-iterator.r
+++ b/R/liply.r
@@ -1,11 +1,11 @@
#' Experimental iterator based version of llply.
-#'
-#' Because iterators do not have known length, \code{liply} starts by
-#' allocating an output list of length 50, and then doubles that length
-#' whenever it runs out of space. This gives O(n ln n) performance rather
+#'
+#' Because iterators do not have known length, \code{liply} starts by
+#' allocating an output list of length 50, and then doubles that length
+#' whenever it runs out of space. This gives O(n ln n) performance rather
#' than the O(n ^ 2) performance from the naive strategy of growing the list
#' each time.
-#'
+#'
#' @keywords manip
#' @param .iterator iterator object
#' @param .fun function to apply to each piece
@@ -24,19 +24,19 @@
liply <- function(.iterator, .fun = NULL, ...) {
stopifnot(inherits(.iterator, "iter"))
if (is.null(.fun)) return(as.list(.iterator))
-
+
iterator <- itertools::ihasNext(.iterator)
-
+
if (is.character(.fun)) .fun <- each(.fun)
if (!is.function(.fun)) stop(".fun is not a function.")
-
+
result <- vector("list", 50)
i <- 0
while(itertools::hasNext(iterator)) {
piece <- iterators::nextElem(iterator)
res <- .fun(piece, ...)
-
+
# Double length of vector when necessary. Gives O(n ln n) performance
# instead of naive O(n^2)
i <- i + 1
@@ -46,7 +46,7 @@ liply <- function(.iterator, .fun = NULL, ...) {
if (!is.null(res)) result[[i]] <- res
}
length(result) <- i
-
+
result
}
diff --git a/R/simplify-array.r b/R/list-to-array.r
similarity index 71%
rename from R/simplify-array.r
rename to R/list-to-array.r
index 44ae145..fa75d4d 100644
--- a/R/simplify-array.r
+++ b/R/list-to-array.r
@@ -1,7 +1,7 @@
#' List to array.
#'
#' Reduce/simplify a list of homogenous objects to an array
-#'
+#'
#' @param res list of input data
#' @param labels a data frame of labels, one row for each element of res
#' @param .drop should extra dimensions be dropped (TRUE) or preserved (FALSE)
@@ -10,35 +10,27 @@
list_to_array <- function(res, labels = NULL, .drop = FALSE) {
if (length(res) == 0) return(vector())
n <- length(res)
-
+
atomic <- sapply(res, is.atomic)
- if (all(atomic)) {
- # Atomics need to be same size
+ if (all(atomic) || all(!atomic)) {
dlength <- unique.default(llply(res, dims))
- if (length(dlength) != 1)
+ if (length(dlength) != 1)
stop("Results must have the same number of dimensions.")
dims <- unique(do.call("rbind", llply(res, amv_dim)))
- if (is.null(dims) || !all(dims > 0))
+ if (is.null(dims))
stop("Results must have one or more dimensions.", call. = FALSE)
- if (nrow(dims) != 1)
+ if (nrow(dims) != 1)
stop("Results must have the same dimensions.", call. = FALSE)
res_dim <- amv_dim(res[[1]])
res_labels <- amv_dimnames(res[[1]])
res_index <- expand.grid(res_labels)
- res <- unname(unlist(res))
+ res <- unlist(res, use.names = FALSE, recursive = FALSE)
} else {
- # Lists are degenerate case where every element is a singleton
- res_index <- as.data.frame(matrix(0, 1, 0))
- res_dim <- numeric()
- res_labels <- NULL
-
- attr(res, "split_type") <- NULL
- attr(res, "split_labels") <- NULL
- class(res) <- class(res)[2]
+ stop("Results must have compatible types.")
}
if (is.null(labels)) {
@@ -46,17 +38,16 @@ list_to_array <- function(res, labels = NULL, .drop = FALSE) {
in_labels <- list(NULL)
in_dim <- n
} else {
- in_labels <- lapply(labels,
+ in_labels <- lapply(labels,
function(x) if(is.factor(x)) levels(x) else sort(unique(x)))
- in_dim <- sapply(in_labels, length)
-
+ in_dim <- sapply(in_labels, length)
}
-
- # Work out where each result should go in the new array
+
+ # Work out where each result should go in the new array
index_old <- rep(id(rev(labels)), each = nrow(res_index))
index_new <- rep(id(rev(res_index)), nrow(labels))
index <- (index_new - 1) * prod(in_dim) + index_old
-
+
out_dim <- unname(c(in_dim, res_dim))
out_labels <- c(in_labels, res_labels)
n <- prod(out_dim)
@@ -66,8 +57,8 @@ list_to_array <- function(res, labels = NULL, .drop = FALSE) {
} else {
overall <- order(index)
}
-
- out_array <- res[overall]
+
+ out_array <- res[overall]
dim(out_array) <- out_dim
dimnames(out_array) <- out_labels
if (.drop) reduce_dim(out_array) else out_array
diff --git a/R/simplify-data-frame.r b/R/list-to-dataframe.r
similarity index 90%
rename from R/simplify-data-frame.r
rename to R/list-to-dataframe.r
index e3c840c..0abb240 100644
--- a/R/simplify-data-frame.r
+++ b/R/list-to-dataframe.r
@@ -3,7 +3,7 @@
#' Reduce/simplify a list of homogenous objects to a data frame.
#' All \code{NULL} entries are removed. Remaining entries must be all atomic
#' or all data frames.
-#'
+#'
#' @family list simplification functions
#' @param res list of input data
#' @param labels a data frame of labels, one row for each element of res
@@ -24,17 +24,21 @@ list_to_dataframe <- function(res, labels = NULL) {
# Figure out how to turn elements into a data frame
atomic <- unlist(lapply(res, is.atomic))
df <- unlist(lapply(res, is.data.frame))
+ mat <- unlist(lapply(res, is.matrix))
- if (all(atomic)) {
+ if (all(mat)) {
+ resdf <- as.data.frame(rbind.fill.matrix(res))
+ rows <- unlist(lapply(res, NROW))
+ } else if (all(atomic)) {
nrow <- length(res)
ncol <- unique(unlist(lapply(res, length)))
if (length(ncol) != 1) stop("Results do not have equal lengths")
-
+
vec <- unname(do.call("c", res))
-
+
resdf <- quickdf(unname(split(vec, rep(seq_len(ncol), nrow))))
names(resdf) <- make_names(res[[1]], "V")
-
+
rows <- rep(1, length(nrow))
} else if (all(df)) {
resdf <- rbind.fill(res)
@@ -42,14 +46,14 @@ list_to_dataframe <- function(res, labels = NULL) {
} else {
stop("Results must be all atomic, or all data frames")
}
-
+
if(is.null(labels)) return(unrowname(resdf))
# Add labels to results
names(labels) <- make_names(labels, "X")
-
+
cols <- setdiff(names(labels), names(resdf))
labels <- labels[rep(1:nrow(labels), rows), cols, drop = FALSE]
-
+
unrowname(cbind(labels, resdf))
}
diff --git a/R/simplify-vector.r b/R/list-to-vector.r
similarity index 87%
rename from R/simplify-vector.r
rename to R/list-to-vector.r
index c032ea6..5d7322c 100644
--- a/R/simplify-vector.r
+++ b/R/list-to-vector.r
@@ -1,7 +1,7 @@
#' List to vector.
#'
#' Reduce/simplify a list of homogenous objects to a vector
-#'
+#'
#' @param res list of input data
#' @keywords internal
#' @family list simplification functions
@@ -9,15 +9,15 @@ list_to_vector <- function(res) {
n <- length(res)
if (n == 0) return(vector())
if (n == 1) return(res[[1]])
-
+
atomic <- sapply(res, is.atomic)
if (all(atomic)) {
- numeric <- all(unlist(lapply(res, is.numeric)))
+ numeric <- all(unlist(lapply(res, is.numeric)))
classes <- unique(lapply(res, class))
if (numeric || length(classes) == 1) {
res <- unlist(res)
}
- }
+ }
res
}
diff --git a/R/llply.r b/R/llply.r
new file mode 100644
index 0000000..12b380e
--- /dev/null
+++ b/R/llply.r
@@ -0,0 +1,90 @@
+#' Split list, apply function, and return results in a list.
+#'
+#' For each element of a list, apply function, keeping results as a list.
+#'
+#' \code{llply} is equivalent to \code{\link{lapply}} except that it will
+#' preserve labels and can display a progress bar.
+#'
+#' @template ply
+#' @template l-
+#' @template -l
+#' @export
+#' @examples
+#' llply(llply(mtcars, round), table)
+#' llply(baseball, summary)
+#' # Examples from ?lapply
+#' x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
+#'
+#' llply(x, mean)
+#' llply(x, quantile, probs = 1:3/4)
+llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE,
+ .parallel = FALSE, .paropts = NULL) {
+ if (is.null(.fun)) return(as.list(.data))
+ if (is.character(.fun) || is.list(.fun)) .fun <- each(.fun)
+ if (!is.function(.fun)) stop(".fun is not a function.")
+
+ if (!inherits(.data, "split")) {
+ pieces <- as.list(.data)
+
+ # This special case can be done much faster with lapply, so do it.
+ fast_path <- .progress == "none" && !.inform && !.parallel
+ if (fast_path) {
+ return(structure(lapply(pieces, .fun, ...), dim = dim(pieces)))
+ }
+
+ } else {
+ pieces <- .data
+ }
+
+ n <- length(pieces)
+ if (n == 0) return(list())
+
+ if (.parallel && .progress != "none") {
+ message("Progress disabled when using parallel plyr")
+ .progress <- "none"
+ }
+
+ progress <- create_progress_bar(.progress)
+ progress$init(n)
+ on.exit(progress$term())
+
+ result <- vector("list", n)
+ do.ply <- function(i) {
+ piece <- pieces[[i]]
+
+ # Display informative error messages, if desired
+ if (.inform) {
+ res <- try(.fun(piece, ...))
+ if (inherits(res, "try-error")) {
+ piece <- paste(capture.output(print(piece)), collapse = "\n")
+ stop("with piece ", i, ": \n", piece, call. = FALSE)
+ }
+ } else {
+ res <- .fun(piece, ...)
+ }
+ progress$step()
+ res
+ }
+ if (.parallel) {
+ setup_parallel()
+
+ i <- seq_len(n)
+ fe_call <- as.call(c(list(as.name("foreach"), i = i), .paropts))
+ fe <- eval(fe_call)
+
+ result <- fe %dopar% do.ply(i)
+ } else {
+ result <- loop_apply(n, do.ply)
+ }
+
+ attributes(result)[c("split_type", "split_labels")] <-
+ attributes(pieces)[c("split_type", "split_labels")]
+ names(result) <- names(pieces)
+
+ # Only set dimension if not null, otherwise names are removed
+ if (!is.null(dim(pieces))) {
+ dim(result) <- dim(pieces)
+ }
+
+ result
+}
diff --git a/R/loop-apply.r b/R/loop-apply.r
index 766e468..9905326 100644
--- a/R/loop-apply.r
+++ b/R/loop-apply.r
@@ -1,6 +1,6 @@
#' Loop apply
#'
-#' An optimised version of lapply for the special case of operating on
+#' An optimised version of lapply for the special case of operating on
#' \code{seq_len(n)}
#'
#' @param n length of sequence
diff --git a/R/m_ply.r b/R/m_ply.r
new file mode 100644
index 0000000..43c7dc4
--- /dev/null
+++ b/R/m_ply.r
@@ -0,0 +1,19 @@
+#' Call function with arguments in array or data frame, discarding results.
+#'
+#' Call a multi-argument function with values taken from columns of an
+#' data frame or array, and discard results into a list.
+#'
+#' @template ply
+#' @template m-
+#' @template -_
+#' @export
+m_ply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none",
+ .inform = FALSE, .print = FALSE, .parallel = FALSE,
+ .paropts = NULL) {
+ if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
+
+ f <- splat(.fun)
+ a_ply(.data = .data, .margins = 1, .fun = f, ..., .expand = .expand,
+ .progress = .progress, .inform = .inform, .print = .print,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/maply.r b/R/maply.r
new file mode 100644
index 0000000..daf067b
--- /dev/null
+++ b/R/maply.r
@@ -0,0 +1,23 @@
+#' Call function with arguments in array or data frame, returning an array.
+#'
+#' Call a multi-argument function with values taken from columns of an
+#' data frame or array, and combine results into an array
+#'
+#' @template ply
+#' @template m-
+#' @template -a
+#' @export
+#' @examples
+#' maply(cbind(mean = 1:5, sd = 1:5), rnorm, n = 5)
+#' maply(expand.grid(mean = 1:5, sd = 1:5), rnorm, n = 5)
+#' maply(cbind(1:5, 1:5), rnorm, n = 5)
+maply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none",
+ .inform = FALSE, .drop = TRUE, .parallel = FALSE,
+ .paropts = NULL) {
+ if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
+
+ f <- splat(.fun)
+ aaply(.data = .data, .margins = 1, .fun = f, ...,
+ .expand = .expand, .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts, .drop = .drop)
+}
diff --git a/R/match-df.r b/R/match-df.r
new file mode 100644
index 0000000..0f3a5b3
--- /dev/null
+++ b/R/match-df.r
@@ -0,0 +1,45 @@
+#' Extract matching rows of a data frame.
+#'
+#' Match works in the same way as join, but instead of return the combined
+#' dataset, it only returns the matching rows from the first dataset. This is
+#' particularly useful when you've summarised the data in some way
+#' and want to subset the original data by a characteristic of the subset.
+#'
+#' \code{match_df} shares the same semantics as \code{\link{join}}, not
+#' \code{\link{match}}:
+#'
+#' \itemize{
+#' \item the match criterion is \code{==}, not \code{\link{identical}}).
+#' \item it doesn't work for columns that are not atomic vectors
+#' \item if there are no matches, the row will be omitted'
+#' }
+#'
+#' @param x data frame to subset.
+#' @param y data frame defining matching rows.
+#' @param on variables to match on - by default will use all variables common
+#' to both data frames.
+#' @return a data frame
+#' @seealso \code{\link{join}} to combine the columns from both x and y
+#' and \code{\link{match}} for the base function selecting matching items
+#' @export
+#' @examples
+#' # count the occurrences of each id in the baseball dataframe, then get the subset with a freq >25
+#' longterm <- subset(count(baseball, "id"), freq > 25)
+#' # longterm
+#' # id freq
+#' # 30 ansonca01 27
+#' # 48 baineha01 27
+#' # ...
+#' # Select only rows from these longterm players from the baseball dataframe
+#' # (match would default to match on shared column names, but here was explicitly set "id")
+#' bb_longterm <- match_df(baseball, longterm, on="id")
+#' bb_longterm[1:5,]
+match_df <- function(x, y, on = NULL) {
+ if (is.null(on)) {
+ on <- intersect(names(x), names(y))
+ message("Matching on: ", paste(on, collapse = ", "))
+ }
+
+ keys <- join.keys(x, y, on)
+ x[keys$x %in% keys$y, , drop = FALSE]
+}
diff --git a/R/mdply.r b/R/mdply.r
new file mode 100644
index 0000000..2ae8201
--- /dev/null
+++ b/R/mdply.r
@@ -0,0 +1,23 @@
+#' Call function with arguments in array or data frame, returning a data frame.
+#'
+#' Call a multi-argument function with values taken from columns of an
+#' data frame or array, and combine results into a data frame
+#'
+#' @template ply
+#' @template m-
+#' @template -d
+#' @export
+#' @examples
+#' mdply(data.frame(mean = 1:5, sd = 1:5), rnorm, n = 2)
+#' mdply(expand.grid(mean = 1:5, sd = 1:5), rnorm, n = 2)
+#' mdply(cbind(mean = 1:5, sd = 1:5), rnorm, n = 5)
+#' mdply(cbind(mean = 1:5, sd = 1:5), as.data.frame(rnorm), n = 5)
+mdply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none",
+ .inform = FALSE, .parallel = FALSE, .paropts = NULL) {
+ if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
+
+ f <- splat(.fun)
+ adply(.data = .data, .margins = 1, .fun = f, ...,
+ .expand = .expand, .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/mlply.r b/R/mlply.r
new file mode 100644
index 0000000..dbc33be
--- /dev/null
+++ b/R/mlply.r
@@ -0,0 +1,25 @@
+#' Call function with arguments in array or data frame, returning a list.
+#'
+#' Call a multi-argument function with values taken from columns of an
+#' data frame or array, and combine results into a list.
+#'
+#' @template ply
+#' @template m-
+#' @template -l
+#' @export
+#' @examples
+#' mlply(cbind(1:4, 4:1), rep)
+#' mlply(cbind(1:4, times = 4:1), rep)
+#'
+#' mlply(cbind(1:4, 4:1), seq)
+#' mlply(cbind(1:4, length = 4:1), seq)
+#' mlply(cbind(1:4, by = 4:1), seq, to = 20)
+mlply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none",
+ .inform = FALSE, .parallel = FALSE, .paropts = NULL) {
+ if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
+
+ f <- splat(.fun)
+ alply(.data = .data, .margins = 1, .fun = f, ...,
+ .expand = .expand, .progress = .progress, .inform = .inform,
+ .parallel = .parallel, .paropts = .paropts)
+}
diff --git a/R/helper-mutate.r b/R/mutate.r
similarity index 94%
rename from R/helper-mutate.r
rename to R/mutate.r
index d5f8bec..eb896d2 100644
--- a/R/helper-mutate.r
+++ b/R/mutate.r
@@ -20,15 +20,17 @@
#' mutate(airquality, new = -Ozone, Temp = (Temp - 32) / 1.8)
#'
#' # Things transform can't do
-#' mutate(airquality, Temp = (Temp - 32) / 1.8, OzT = Ozone / Temp)
+#' mutate(airquality, Temp = (Temp - 32) / 1.8, OzT = Ozone / Temp)
#'
#' # mutate is rather faster than transform
#' system.time(transform(baseball, avg_ab = ab / g))
#' system.time(mutate(baseball, avg_ab = ab / g))
mutate <- function(.data, ...) {
+ stopifnot(is.data.frame(.data) || is.list(.data) || is.environment(.data))
+
cols <- as.list(substitute(list(...))[-1])
cols <- cols[names(cols) != ""] # Silently drop unnamed columns
-
+
for(col in names(cols)) {
.data[[col]] <- eval(cols[[col]], .data, parent.frame())
}
diff --git a/R/name-rows.r b/R/name-rows.r
new file mode 100644
index 0000000..1cc8661
--- /dev/null
+++ b/R/name-rows.r
@@ -0,0 +1,33 @@
+#' Toggle row names between explicit and implicit.
+#'
+#' Plyr functions ignore row names, so this function provides a way to preserve
+#' them by converting them to an explicit column in the data frame. After the
+#' plyr operation, you can then apply \code{name_rows} again to convert back
+#' from the explicit column to the implicit \code{rownames}.
+#'
+#' @param df a data.frame, with either \code{rownames}, or a column called
+#' \code{.rownames}.
+#' @export
+#' @examples
+#' name_rows(mtcars)
+#' name_rows(name_rows(mtcars))
+#'
+#' df <- data.frame(a = sample(10))
+#' arrange(df, a)
+#' arrange(name_rows(df), a)
+#' name_rows(arrange(name_rows(df), a))
+name_rows <- function(df) {
+ stopifnot(is.data.frame(df))
+
+ rn_col <- !is.null(df$.rownames)
+
+ if (rn_col) {
+ rownames(df) <- df$.rownames
+ df$.rownames <- NULL
+ } else {
+ df$.rownames <- rownames(df)
+ rownames(df) <- NULL
+ }
+
+ df
+}
diff --git a/R/parallel.r b/R/parallel.r
new file mode 100644
index 0000000..2e738ea
--- /dev/null
+++ b/R/parallel.r
@@ -0,0 +1,16 @@
+setup_parallel <- function() {
+ if (!require("foreach")) {
+ stop("foreach package required for parallel plyr operation",
+ call. = FALSE)
+ }
+ if (getDoParWorkers() == 1) {
+ warning("No parallel backend registered", call. = TRUE)
+ }
+}
+
+parallel_fe <- function(n, options) {
+ i <- seq_len(n)
+ fe_call <- as.call(c(list(as.name("foreach"), i = i), options))
+
+ eval(fe_call)
+}
diff --git a/R/ply-array.r b/R/ply-array.r
deleted file mode 100644
index fc24f11..0000000
--- a/R/ply-array.r
+++ /dev/null
@@ -1,113 +0,0 @@
-#' Split list, apply function, and return results in an array.
-#'
-#' For each element of a list, apply function then combine results into an
-#' array. \code{laply} is similar in spirit to \code{\link{sapply}} except
-#' that it will always return an array, and the output is transposed with
-#' respect \code{sapply} - each element of the list corresponds to a column,
-#' not a row.
-#'
-#' @template ply
-#' @template l-
-#' @template -a
-#' @export
-#' @examples
-#' laply(baseball, is.factor)
-#' # cf
-#' ldply(baseball, is.factor)
-#' colwise(is.factor)(baseball)
-#'
-#' laply(seq_len(10), identity)
-#' laply(seq_len(10), rep, times = 4)
-#' laply(seq_len(10), matrix, nrow = 2, ncol = 2)
-laply <- function(.data, .fun = NULL, ..., .progress = "none", .drop = TRUE, .parallel = FALSE) {
- if (is.character(.fun)) .fun <- do.call("each", as.list(.fun))
- if (!is.function(.fun)) stop(".fun is not a function.")
-
- if (!inherits(.data, "split")) .data <- as.list(.data)
- res <- llply(.data = .data, .fun = .fun, ...,
- .progress = .progress, .parallel = .parallel)
-
- list_to_array(res, attr(.data, "split_labels"), .drop)
-}
-
-
-#' Split data frame, apply function, and return results in an array.
-#'
-#' For each subset of data frame, apply function then combine results into
-#' an array. \code{daply} with a function that operates column-wise is
-#' similar to \code{\link{aggregate}}.
-#'
-#' @template ply
-#' @section Input: This function splits data frames by variables.
-#' @section Output:
-#' If there are no results, then this function will return a vector of
-#' length 0 (\code{vector()}).
-#' @param .data data frame to be processed
-#' @param .variables variables to split data frame by, as quoted
-#' variables, a formula or character vector
-#' @param .drop_i should combinations of variables that do not appear in the
-#' input data be preserved (FALSE) or dropped (TRUE, default)
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' @return if results are atomic with same type and dimensionality, a
-#' vector, matrix or array; otherwise, a list-array (a list with
-#' dimensions)
-#' @param .drop_o should extra dimensions of length 1 in the output be
-#' dropped, simplifying the output. Defaults to \code{TRUE}
-#' @family array output
-#' @family data frame input
-#' @export
-#' @examples
-#' daply(baseball, .(year), nrow)
-#'
-#' # Several different ways of summarising by variables that should not be
-#' # included in the summary
-#'
-#' daply(baseball[, c(2, 6:9)], .(year), colwise(mean))
-#' daply(baseball[, 6:9], .(baseball$year), colwise(mean))
-#' daply(baseball, .(year), function(df) colwise(mean)(df[, 6:9]))
-daply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop_i = TRUE, .drop_o = TRUE, .parallel = FALSE) {
- .variables <- as.quoted(.variables)
- pieces <- splitter_d(.data, .variables, drop = .drop_i)
-
- laply(.data = pieces, .fun = .fun, ...,
- .progress = .progress, .drop = .drop_o, .parallel = .parallel)
-}
-
-#' Split array, apply function, and return results in an array.
-#'
-#' For each slice of an array, apply function, keeping results as an array.
-#' This function is very similar to \code{\link{apply}}, except that it will
-#' always return an array, and when the function returns >1 d data structures,
-#' those dimensions are added on to the highest dimensions, rather than the
-#' lowest dimensions. This makes \code{aaply} idempotent, so that
-#' \code{apply(input, X, identity)} is equivalent to \code{aperm(input, X)}.
-#'
-#' @template ply
-#' @template a-
-#' @template -a
-#' @export
-#' @examples
-#' dim(ozone)
-#' aaply(ozone, 1, mean)
-#' aaply(ozone, 1, mean, .drop = FALSE)
-#' aaply(ozone, 3, mean)
-#' aaply(ozone, c(1,2), mean)
-#'
-#' dim(aaply(ozone, c(1,2), mean))
-#' dim(aaply(ozone, c(1,2), mean, .drop = FALSE))
-#'
-#' aaply(ozone, 1, each(min, max))
-#' aaply(ozone, 3, each(min, max))
-#'
-#' standardise <- function(x) (x - min(x)) / (max(x) - min(x))
-#' aaply(ozone, 3, standardise)
-#' aaply(ozone, 1:2, standardise)
-#'
-#' aaply(ozone, 1:2, diff)
-aaply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .drop = TRUE, .parallel = FALSE) {
- pieces <- splitter_a(.data, .margins, .expand)
-
- laply(.data = pieces, .fun = .fun, ...,
- .progress = .progress, .drop = .drop, .parallel = .parallel)
-}
diff --git a/R/ply-data-frame.r b/R/ply-data-frame.r
deleted file mode 100644
index 9a2ea46..0000000
--- a/R/ply-data-frame.r
+++ /dev/null
@@ -1,61 +0,0 @@
-#' Split list, apply function, and return results in a data frame.
-#'
-#' For each element of a list, apply function then combine results into a data
-#' frame.
-#'
-#' @template ply
-#' @template l-
-#' @template -d
-#' @export
-ldply <- function(.data, .fun = NULL, ..., .progress = "none", .parallel = FALSE) {
- if (!inherits(.data, "split")) .data <- as.list(.data)
- res <- llply(.data = .data, .fun = .fun, ...,
- .progress = .progress, .parallel = .parallel)
-
- list_to_dataframe(res, attr(.data, "split_labels"))
-}
-
-#' Split data frame, apply function, and return results in a data frame.
-#'
-#' For each subset of a data frame, apply function then combine results into a
-#' data frame.
-#'
-#' @template ply
-#' @template d-
-#' @template -d
-#' @export
-#' @examples
-#' ddply(baseball, .(year), "nrow")
-#' ddply(baseball, .(lg), c("nrow", "ncol"))
-#'
-#' rbi <- ddply(baseball, .(year), summarise,
-#' mean_rbi = mean(rbi, na.rm = TRUE))
-#' with(rbi, plot(year, mean_rbi, type="l"))
-#'
-#' base2 <- ddply(baseball, .(id), transform,
-#' career_year = year - min(year) + 1
-#' )
-ddply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop = TRUE, .parallel = FALSE) {
- if (empty(.data)) return(.data)
- .variables <- as.quoted(.variables)
- pieces <- splitter_d(.data, .variables, drop = .drop)
-
- ldply(.data = pieces, .fun = .fun, ...,
- .progress = .progress, .parallel = .parallel)
-}
-
-#' Split array, apply function, and return results in a data frame.
-#'
-#' For each slice of an array, apply function then combine results into a data
-#' frame.
-#'
-#' @template ply
-#' @template a-
-#' @template -d
-#' @export
-adply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
- pieces <- splitter_a(.data, .margins, .expand)
-
- ldply(.data = pieces, .fun = .fun, ...,
- .progress = .progress, .parallel = .parallel)
-}
diff --git a/R/ply-list.r b/R/ply-list.r
deleted file mode 100644
index 8e49cc7..0000000
--- a/R/ply-list.r
+++ /dev/null
@@ -1,136 +0,0 @@
-#' Split list, apply function, and return results in a list.
-#'
-#' For each element of a list, apply function, keeping results as a list.
-#' \code{llply} is equivalent to \code{\link{lapply}} except that it will
-#' preserve labels and can display a progress bar.
-#'
-#' @template ply
-#' @template l-
-#' @template -l
-#' @param .inform produce informative error messages? This is turned off by
-#' by default because it substantially slows processing speed, but is very
-#' useful for debugging
-#' @export
-#' @examples
-#' llply(llply(mtcars, round), table)
-#' llply(baseball, summary)
-#' # Examples from ?lapply
-#' x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
-#'
-#' llply(x, mean)
-#' llply(x, quantile, probs = 1:3/4)
-llply <- function(.data, .fun = NULL, ..., .progress = "none", .inform = FALSE, .parallel = FALSE) {
- if (is.null(.fun)) return(as.list(.data))
- if (is.character(.fun) || is.list(.fun)) .fun <- each(.fun)
- if (!is.function(.fun)) stop(".fun is not a function.")
-
- if (!inherits(.data, "split")) {
- pieces <- as.list(.data)
-
- # This special case can be done much faster with lapply, so do it.
- fast_path <- .progress == "none" && !.inform && !.parallel
- if (fast_path) {
- return(structure(lapply(pieces, .fun, ...), dim = dim(pieces)))
- }
-
- } else {
- pieces <- .data
- }
-
- n <- length(pieces)
- if (n == 0) return(list())
-
- progress <- create_progress_bar(.progress)
- progress$init(n)
- on.exit(progress$term())
-
- result <- vector("list", n)
- do.ply <- function(i) {
- piece <- pieces[[i]]
-
- # Display informative error messages, if desired
- if (.inform) {
- res <- try(.fun(piece, ...))
- if (inherits(res, "try-error")) {
- piece <- paste(capture.output(print(piece)), collapse = "\n")
- stop("with piece ", i, ": \n", piece, call. = FALSE)
- }
- } else {
- res <- .fun(piece, ...)
- }
- progress$step()
- res
- }
- if (.parallel) {
- if (!require("foreach")) {
- stop("foreach package required for parallel plyr operation",
- call. = FALSE)
- }
- if (getDoParWorkers() == 1) {
- warning("No parallel backend registered", call. = TRUE)
- }
- result <- foreach(i = seq_len(n)) %dopar% do.ply(i)
- } else {
- result <- loop_apply(n, do.ply)
- }
-
- attributes(result)[c("split_type", "split_labels")] <-
- attributes(pieces)[c("split_type", "split_labels")]
- names(result) <- names(pieces)
-
- # Only set dimension if not null, otherwise names are removed
- if (!is.null(dim(pieces))) {
- dim(result) <- dim(pieces)
- }
-
- result
-}
-
-#' Split data frame, apply function, and return results in a list.
-#'
-#' For each subset of a data frame, apply function then combine results into a
-#' list. \code{dlply} is similar to \code{\link{by}} except that the results
-#' are returned in a different format.
-#'
-#' @template ply
-#' @template d-
-#' @template -l
-#' @export
-#' @examples
-#' linmod <- function(df) {
-#' lm(rbi ~ year, data = mutate(df, year = year - min(year)))
-#' }
-#' models <- dlply(baseball, .(id), linmod)
-#' models[[1]]
-#'
-#' coef <- ldply(models, coef)
-#' with(coef, plot(`(Intercept)`, year))
-#' qual <- laply(models, function(mod) summary(mod)$r.squared)
-#' hist(qual)
-dlply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .drop = TRUE, .parallel = FALSE) {
- .variables <- as.quoted(.variables)
- pieces <- splitter_d(.data, .variables, drop = .drop)
-
- llply(.data = pieces, .fun = .fun, ...,
- .progress = .progress, .parallel = .parallel)
-}
-
-#' Split array, apply function, and return results in a list.
-#'
-#' For each slice of an array, apply function then combine results into a
-#' list. \code{alply} is somewhat similar to \code{\link{apply}} for cases
-#' where the results are not atomic.
-#'
-#' @template ply
-#' @template a-
-#' @template -l
-#' @export
-#' @examples
-#' alply(ozone, 3, quantile)
-#' alply(ozone, 3, function(x) table(round(x)))
-alply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
- pieces <- splitter_a(.data, .margins, .expand)
-
- llply(.data = pieces, .fun = .fun, ...,
- .progress = .progress, .parallel = .parallel)
-}
diff --git a/R/ply-mapply.r b/R/ply-mapply.r
deleted file mode 100644
index fa38934..0000000
--- a/R/ply-mapply.r
+++ /dev/null
@@ -1,160 +0,0 @@
-#' Call function with arguments in array or data frame, returning a data frame.
-#'
-#' Call a multi-argument function with values taken from columns of an data frame or array, and combine results into a data frame
-#'
-#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
-#' specialised according to the type of output they produce. These functions
-#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
-#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
-#' This function combines the result into a data frame. If there are no
-#' results, then this function will return a data frame with zero rows and
-#' columns (\code{data.frame()}).
-#'
-#'
-#' @keywords manip
-#' @param .data matrix or data frame to use as source of arguments
-#' @param .fun function to be called with varying arguments
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .expand should output be 1d (expand = FALSE), with an element for
-#' each row; or nd (expand = TRUE), with a dimension for each variable.
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' @return a data frame
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' mdply(data.frame(mean = 1:5, sd = 1:5), rnorm, n = 2)
-#' mdply(expand.grid(mean = 1:5, sd = 1:5), rnorm, n = 2)
-#' mdply(cbind(mean = 1:5, sd = 1:5), rnorm, n = 5)
-#' mdply(cbind(mean = 1:5, sd = 1:5), as.data.frame(rnorm), n = 5)
-mdply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
- if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
-
- f <- splat(.fun)
- adply(.data = .data, .margins = 1, .fun = f, ...,
- .expand = .expand, .progress = .progress, .parallel = .parallel)
-}
-
-#' Call function with arguments in array or data frame, returning an array.
-#'
-#' Call a multi-argument function with values taken from columns of an data frame or array, and combine results into an array
-#'
-#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
-#' specialised according to the type of output they produce. These functions
-#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
-#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
-#' This function combines the result into an array. If there are no results,
-#' then this function will return a vector of length 0 (\code{vector()}).
-#'
-#'
-#' @keywords manip
-#' @param .data matrix or data frame to use as source of arguments
-#' @param .fun function to be called with varying arguments
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .expand should output be 1d (expand = FALSE), with an element for
-#' each row; or nd (expand = TRUE), with a dimension for each variable.
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' @return if results are atomic with same type and dimensionality, a vector, matrix or array; otherwise, a list-array (a list with dimensions)
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' maply(cbind(mean = 1:5, sd = 1:5), rnorm, n = 5)
-#' maply(expand.grid(mean = 1:5, sd = 1:5), rnorm, n = 5)
-#' maply(cbind(1:5, 1:5), rnorm, n = 5)
-maply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
- if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
-
- f <- splat(.fun)
- aaply(.data = .data, .margins = 1, .fun = f, ...,
- .expand = .expand, .progress = .progress, .parallel = .parallel)
-}
-
-#' Call function with arguments in array or data frame, returning a list.
-#'
-#' Call a multi-argument function with values taken from columns of an data frame or array, and combine results into a list
-#'
-#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
-#' specialised according to the type of output they produce. These functions
-#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
-#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
-#' This function combines the result into a list. If there are no results,
-#' then this function will return a list of length 0 (\code{list()}).
-#'
-#'
-#' @keywords manip
-#' @param .data matrix or data frame to use as source of arguments
-#' @param .fun function to be called with varying arguments
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .expand should output be 1d (expand = FALSE), with an element for
-#' each row; or nd (expand = TRUE), with a dimension for each variable.
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' @return list of results
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' mlply(cbind(1:4, 4:1), rep)
-#' mlply(cbind(1:4, times = 4:1), rep)
-#'
-#' mlply(cbind(1:4, 4:1), seq)
-#' mlply(cbind(1:4, length = 4:1), seq)
-#' mlply(cbind(1:4, by = 4:1), seq, to = 20)
-mlply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none", .parallel = FALSE) {
- if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
-
- f <- splat(.fun)
- alply(.data = .data, .margins = 1, .fun = f, ...,
- .expand = .expand, .progress = .progress, .parallel = .parallel)
-}
-
-#' Call function with arguments in array or data frame, discarding results.
-#'
-#' Call a multi-argument function with values taken from columns of an data frame or array, and discard results
-#'
-#' The \code{m*ply} functions are the \code{plyr} version of \code{mapply},
-#' specialised according to the type of output they produce. These functions
-#' are just a convenient wrapper around \code{a*ply} with \code{margins = 1}
-#' and \code{.fun} wrapped in \code{\link{splat}}.
-#'
-#' This function combines the result into a list. If there are no results,
-#' then this function will return a list of length 0 (\code{list()}).
-#'
-#' @keywords manip
-#' @param .data matrix or data frame to use as source of arguments
-#' @param .fun function to be called with varying arguments
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .expand should output be 1d (expand = FALSE), with an element for
-#' each row; or nd (expand = TRUE), with a dimension for each variable.
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-m_ply <- function(.data, .fun = NULL, ..., .expand = TRUE, .progress = "none") {
- if (is.matrix(.data) & !is.list(.data)) .data <- .matrix_to_df(.data)
-
- f <- splat(.fun)
- a_ply(.data = .data, .margins = 1, .fun = f, ...,
- .expand = .expand, .progress = .progress)
-}
-
-.matrix_to_df <- function(.data) {
- cnames <- colnames(.data)
- if (is.null(cnames)) cnames <- rep("", ncol(.data))
- .data <- as.data.frame(.data, stringsAsFactors = FALSE)
- colnames(.data) <- cnames
- .data
-}
diff --git a/R/ply-null.r b/R/ply-null.r
deleted file mode 100644
index 2ca3326..0000000
--- a/R/ply-null.r
+++ /dev/null
@@ -1,100 +0,0 @@
-#' Split list, apply function, and discard results.
-#'
-#' For each element of a list, apply function and discard results
-#'
-#' All plyr functions use the same split-apply-combine strategy: they split the
-#' input into simpler pieces, apply \code{.fun} to each piece, and then combine
-#' the pieces into a single data structure. This function splits lists by
-#' elements and discards the output. This is useful for functions that you are
-#' calling purely for their side effects like display plots and saving output.
-#'
-#'
-#' @keywords manip
-#' @param .data list to be processed
-#' @param .fun function to apply to each piece
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .print automatically print each result? (default: \code{FALSE})
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-l_ply <- function(.data, .fun = NULL, ..., .progress = "none", .print = FALSE) {
- if (is.character(.fun) || is.list(.fun)) .fun <- each(.fun)
- if (!is.function(.fun)) stop(".fun is not a function.")
-
- progress <- create_progress_bar(.progress)
- progress$init(length(.data))
- on.exit(progress$term())
-
- .data <- as.list(.data)
- for(i in seq_along(.data)) {
- x <- .fun(.data[[i]], ...)
- if (.print) print(x)
- progress$step()
- }
-
- invisible()
-}
-
-#' Split data frame, apply function, and discard results.
-#'
-#' For each subset of a data frame, apply function and discard results
-#'
-#' All plyr functions use the same split-apply-combine strategy: they split the
-#' input into simpler pieces, apply \code{.fun} to each piece, and then combine
-#' the pieces into a single data structure. This function splits data frames
-#' by variable and discards the output. This is useful for functions that you
-#' are calling purely for their side effects like display plots and saving
-#' output.
-#'
-#'
-#' @keywords manip
-#' @param .data data frame to be processed
-#' @param .variables variables to split data frame by, as quoted variables, a formula or character vector
-#' @param .fun function to apply to each piece
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .print automatically print each result? (default: \code{FALSE})
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-d_ply <- function(.data, .variables, .fun = NULL, ..., .progress = "none", .print = FALSE) {
- .variables <- as.quoted(.variables)
- pieces <- splitter_d(.data, .variables)
-
- l_ply(.data = pieces, .fun = .fun, ..., .progress = .progress, .print = .print)
-}
-
-#' Split array, apply function, and discard results.
-#'
-#' For each slice of an array, apply function and discard results
-#'
-#' All plyr functions use the same split-apply-combine strategy: they split the
-#' input into simpler pieces, apply \code{.fun} to each piece, and then combine
-#' the pieces into a single data structure. This function splits matrices,
-#' arrays and data frames by dimensions and discards the output. This is
-#' useful for functions that you are calling purely for their side effects like
-#' display plots and saving output.
-#'
-#'
-#' @keywords manip
-#' @param .data matrix, array or data frame to be processed
-#' @param .margins a vector giving the subscripts to split up \code{data} by. 1 splits up by rows, 2 by columns and c(1,2) by rows and columns, and so on for higher dimensions
-#' @param .fun function to apply to each piece
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .expand if \code{.data} is a data frame, should output be 1d
-#' (expand = FALSE), with an element for each row; or nd (expand = TRUE),
-#' with a dimension for each variable.
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .print automatically print each result? (default: \code{FALSE})
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-a_ply <- function(.data, .margins, .fun = NULL, ..., .expand = TRUE, .progress = "none", .print = FALSE) {
- pieces <- splitter_a(.data, .margins, .expand)
-
- l_ply(.data = pieces, .fun = .fun, ..., .progress = .progress, .print = .print)
-}
diff --git a/R/ply-replicate.r b/R/ply-replicate.r
deleted file mode 100644
index b8c6a1b..0000000
--- a/R/ply-replicate.r
+++ /dev/null
@@ -1,160 +0,0 @@
-#' Replicate expression and return results in a list.
-#'
-#' Evalulate expression n times then combine results into a list
-#'
-#' This function runs an expression multiple times, and combines the
-#' result into a list. If there are no results, then this function will return
-#' a list of length 0 (\code{list()}). This function is equivalent to
-#' \code{\link{replicate}}, but will always return results as a list.
-#'
-#'
-#' @keywords manip
-#' @param .n number of times to evaluate the expression
-#' @param .expr expression to evaluate
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @return list of results
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' mods <- rlply(100, lm(y ~ x, data=data.frame(x=rnorm(100), y=rnorm(100))))
-#' hist(laply(mods, function(x) summary(x)$r.squared))
-rlply <- function(.n, .expr, .progress = "none") {
- if (is.function(.expr)) {
- f <- .expr
- } else {
- f <- eval.parent(substitute(function() .expr))
- }
-
- progress <- create_progress_bar(.progress)
- result <- vector("list", length = .n)
-
- progress$init(.n)
- on.exit(progress$term())
-
- for(i in seq_len(.n)) {
- result[i] <- list(f())
- progress$step()
- }
-
- result
-}
-
-#' Replicate expression and return results in a data frame.
-#'
-#' Evalulate expression n times then combine results into a data frame
-#'
-#' This function runs an expression multiple times, and combines the
-#' result into a data frame. If there are no results, then this function
-#' returns a data frame with zero rows and columns (\code{data.frame()}).
-#' This function is equivalent to \code{\link{replicate}}, but will always
-#' return results as a data frame.
-#'
-#'
-#' @keywords manip
-#' @param .n number of times to evaluate the expression
-#' @param .expr expression to evaluate
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @return a data frame
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' rdply(20, mean(runif(100)))
-#' rdply(20, each(mean, var)(runif(100)))
-#' rdply(20, data.frame(x = runif(2)))
-rdply <- function(.n, .expr, .progress = "none") {
- if (is.function(.expr)) {
- f <- .expr
- } else {
- f <- eval.parent(substitute(function() .expr))
- }
-
- res <- rlply(.n = .n, .expr = f, .progress = .progress)
- labels <- data.frame(.n = seq_len(.n))
- list_to_dataframe(res, labels)
-}
-
-
-#' Replicate expression and return results in a array.
-#'
-#' Evalulate expression n times then combine results into an array
-#'
-#' This function runs an expression multiple times, and combines the
-#' result into a data frame. If there are no results, then this function
-#' returns a vector of length 0 (\code{vector(0)}).
-#' This function is equivalent to \code{\link{replicate}}, but will always
-#' return results as a vector, matrix or array.
-#'
-#' @keywords manip
-#' @param .n number of times to evaluate the expression
-#' @param .expr expression to evaluate
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @return if results are atomic with same type and dimensionality, a vector, matrix or array; otherwise, a list-array (a list with dimensions)
-#' @param .drop should extra dimensions of length 1 be dropped, simplifying the output. Defaults to \code{TRUE}
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' raply(100, mean(runif(100)))
-#' raply(100, each(mean, var)(runif(100)))
-#'
-#' raply(10, runif(4))
-#' raply(10, matrix(runif(4), nrow=2))
-#'
-#' # See the central limit theorem in action
-#' hist(raply(1000, mean(rexp(10))))
-#' hist(raply(1000, mean(rexp(100))))
-#' hist(raply(1000, mean(rexp(1000))))
-raply <- function(.n, .expr, .progress = "none", .drop = TRUE) {
- if (is.function(.expr)) {
- f <- .expr
- } else {
- f <- eval.parent(substitute(function() .expr))
- }
-
- res <- rlply(.n = .n, .expr = f, .progress = .progress)
- list_to_array(res, NULL, .drop)
-}
-
-#' Replicate expression and discard results.
-#'
-#' Evalulate expression n times then discard results
-#'
-#' This function runs an expression multiple times, discarding the results.
-#' This function is equivalent to \code{\link{replicate}}, but never returns
-#' anything
-#'
-#' @keywords manip
-#' @param .n number of times to evaluate the expression
-#' @param .expr expression to evaluate
-#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
-#' @param .print automatically print each result? (default: \code{FALSE})
-#' @export
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
-#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
-#' @examples
-#' r_ply(10, plot(runif(50)))
-#' r_ply(25, hist(runif(1000)))
-r_ply <- function(.n, .expr, .progress = "none", .print = FALSE) {
- if (is.function(.expr)) {
- f <- .expr
- } else {
- f <- eval.parent(substitute(function() .expr))
- }
-
- progress <- create_progress_bar(.progress)
-
- progress$init(.n)
- on.exit(progress$term())
-
- for(i in seq_len(.n)) {
- f()
- progress$step()
- }
- progress$term()
-}
diff --git a/R/plyr.r b/R/plyr.r
new file mode 100644
index 0000000..6a948d4
--- /dev/null
+++ b/R/plyr.r
@@ -0,0 +1,69 @@
+#' plyr: the split-apply-combine paradigm for R.
+#'
+#' The plyr package is a set of clean and consistent tools that implement the
+#' split-apply-combine pattern in R. This is an extremely common pattern in
+#' data analysis: you solve a complex problem by breaking it down into small
+#' pieces, doing something to each piece and then combining the results back
+#' together again.
+#'
+#' The plyr functions are named according to what sort of data structure they
+#' split up and what sort of data structure they return:
+#'
+#' \describe{
+#' \item{a}{array}
+#' \item{l}{list}
+#' \item{d}{data.frame}
+#' \item{m}{multiple inputs}
+#' \item{r}{repeat multiple times}
+#' \item{_}{nothing}
+#' }
+#'
+#' So \code{\link{ddply}} takes a data frame as input and returns a data frame
+#' as output, and \code{\link{l_ply}} takes a list as input and returns nothing
+#' as output.
+#'
+#' @section Row names:
+#'
+#' By design, no plyr function will preserve row names - in general it is too
+#' hard to know what should be done with them for many of the operations
+#' supported by plyr. If you want to preserve row names, use
+#' \code{\link{name_rows}} to convert them into an explicit column in your
+#' data frame, perform the plyr operations, and then use \code{\link{name_rows}}
+#' again to convert the column back into row names.
+#'
+#' @section Helpers:
+#'
+#' Plyr also provides a set of helper functions for common data analysis
+#' problems:
+#'
+#' \itemize{
+#' \item \code{\link{arrange}}: re-order the rows of a data frame by
+#' specifying the columns to order by
+#' \item \code{\link{mutate}}: add new columns or modifying existing columns,
+#' like \code{\link{transform}}, but new columns can refer to other columns
+#' that you just created.
+#' \item \code{\link{summarise}}: like \code{\link{mutate}} but create a
+#' new data frame, not preserving any columns in the old data frame.
+#'
+#' \item \code{\link{join}}: an adapation of \code{\link{merge}} which is
+#' more similar to SQL, and has a much faster implementation if you only
+#' want to find the first match.
+#' \item \code{\link{match_df}}: a version of \code{\link{join}} that instead
+#' of returning the two tables combined together, only returns the rows
+#' in the first table that match the second.
+#'
+#' \item \code{\link{colwise}}: make any function work colwise on a dataframe
+#' \item \code{\link{rename}}: easily rename columns in a data frame
+#' \item \code{\link{round_any}}: round a number to any degree of precision
+#' \item \code{\link{count}}: quickly count unique combinations and return
+#' return as a data frame.
+#' }
+#'
+#' @docType package
+#' @name plyr
+NULL
+
+
+.onUnload <- function (libpath) {
+ library.dynam.unload("plyr", libpath)
+}
diff --git a/R/progress-time.r b/R/progress-time.r
new file mode 100644
index 0000000..615beb0
--- /dev/null
+++ b/R/progress-time.r
@@ -0,0 +1,88 @@
+#' Text progress bar with time.
+#'
+#' A textual progress bar that estimates time remaining. It displays the
+#' estimated time remaining and, when finished, total duration.
+#'
+#' @family progress bars
+#' @export
+#' @examples
+#' l_ply(1:100, function(x) Sys.sleep(.01), .progress = "time")
+progress_time <- function() {
+ n <- 0
+ txt <- NULL
+ list(
+ init = function(x) {
+ txt <<- txtTimerBar(x)
+ setTxtProgressBar(txt, 0)
+ },
+ step = function() {
+ n <<- n + 1
+ setTxtProgressBar(txt, n)
+ },
+ term = function() close(txt)
+ )
+}
+
+txtTimerBar <- function(n = 1) {
+ start <- .last_update_time <- proc.time()[3]
+ times <- numeric(n)
+ value <- NULL
+
+ killed <- FALSE
+
+ width <- getOption("width") - nchar('||100% ~ 999.9 h remaining.')
+
+ update <- function(i) {
+ if (i == 0) return()
+
+ value <<- i
+ times[i] <- proc.time()[3] - start
+
+ avg <- times[i] / i
+ time_left <- (n - i) * avg
+
+ nbars <- trunc(i / n * width)
+
+ cat_line("|", str_rep("=", nbars), str_rep(" ", width - nbars), "|",
+ format(i / n * 100, width = 3), "% ~", show_time(time_left), " remaining")
+ }
+ getVal <- function() value
+ kill <- function(){
+ if (killed) return()
+ killed <<- TRUE
+
+ if (value == n) {
+ cat_line("|", str_rep("=", width), "|100%")
+ cat("Completed after", show_time(proc.time()[3] - start), "\n")
+ } else {
+ cat("Killed after", show_time(proc.time()[3] - start), "\n")
+ }
+ }
+
+ cat_line("|", str_rep(" ", width), "| 0%")
+
+ structure(
+ list(getVal = getVal, up = update, kill = kill),
+ class = "txtProgressBar")
+}
+
+show_time <- function(x) {
+ if (x < 60) {
+ paste(round(x), "s")
+ } else if (x < 60 * 60) {
+ paste(round(x / 60), "m")
+ } else {
+ paste(round(x / (60 * 60)), "h")
+ }
+}
+
+cat_line <- function(...) {
+ msg <- paste(..., sep = "", collapse = "")
+ gap <- max(c(0, getOption("width") - nchar(msg, "width")))
+ cat("\r", msg, rep.int(" ", gap), sep = "")
+ flush.console()
+}
+
+str_rep <- function(x, i) {
+ paste(rep.int(x, i), collapse = "")
+}
diff --git a/R/progress.r b/R/progress.r
index 0975cb5..0e60213 100644
--- a/R/progress.r
+++ b/R/progress.r
@@ -1,39 +1,45 @@
#' Create progress bar.
#'
#' Create progress bar object from text string.
-#'
+#'
#' Progress bars give feedback on how apply step is proceeding. This
-#' is mainly useful for long running functions, as for short functions, the
-#' time taken up by splitting and combining may be on the same order (or
+#' is mainly useful for long running functions, as for short functions, the
+#' time taken up by splitting and combining may be on the same order (or
#' longer) as the apply step. Additionally, for short functions, the time
#' needed to update the progress bar can significantly slow down the process.
#' For the trivial examples below, using the tk progress bar slows things down
#' by a factor of a thousand.
-#'
+#'
#' Note the that progress bar is approximate, and if the time taken by
#' individual function applications is highly non-uniform it may not be very
#' informative of the time left.
-#'
+#'
#' There are currently four types of progress bar: "none", "text", "tk", and
-#' "win". See the individual documentation for more details. In plyr
+#' "win". See the individual documentation for more details. In plyr
#' functions, these can either be specified by name, or you can create the
#' progress bar object yourself if you want more control over its apperance.
#' See the examples.
-#'
+#'
#' @param name type of progress bar to create
#' @param ... other arguments passed onto progress bar function
#' @seealso \code{\link{progress_none}}, \code{\link{progress_text}}, \code{\link{progress_tk}}, \code{\link{progress_win}}
#' @keywords utilities
#' @export
#' @examples
+#' # No progress bar
#' l_ply(1:100, identity, .progress = "none")
+#' \dontrun{
+#' # Use the Tcl/Tk interface
#' l_ply(1:100, identity, .progress = "tk")
+#' }
+#' # Text-based progress (|======|)
#' l_ply(1:100, identity, .progress = "text")
-#' l_ply(1:100, identity, .progress = progress_text(char = "-"))
+#' # Choose a progress character, run a length of time you can see
+#' l_ply(1:10000, identity, .progress = progress_text(char = "."))
create_progress_bar <- function(name = "none", ...) {
if (!is.character(name)) return(name)
name <- paste("progress", name, sep="_")
-
+
if (!exists(name, mode = "function")) {
warning("Cannot find progress bar ", name, call. = FALSE)
progress_none()
@@ -45,10 +51,10 @@ create_progress_bar <- function(name = "none", ...) {
#' Null progress bar
#'
#' A progress bar that does nothing
-#'
+#'
#' This the default progress bar used by plyr functions. It's very simple to
#' understand - it does nothing!
-#'
+#'
#' @keywords internal
#' @family progress bars
#' @export
@@ -65,9 +71,9 @@ progress_none <- function() {
#' Text progress bar.
#'
#' A textual progress bar
-#'
-#' This progress bar displays a textual progress bar that works on all
-#' platforms. It is a thin wrapper around the built-in
+#'
+#' This progress bar displays a textual progress bar that works on all
+#' platforms. It is a thin wrapper around the built-in
#' \code{\link{setTxtProgressBar}} and can be customised in the same way.
#'
#' @param style style of text bar, see Details section of \code{\link{txtProgressBar}}
@@ -80,7 +86,7 @@ progress_none <- function() {
progress_text <- function(style = 3, ...) {
n <- 0
txt <- NULL
-
+
list(
init = function(x) {
txt <<- txtProgressBar(max = x, style = style, ...)
@@ -97,9 +103,9 @@ progress_text <- function(style = 3, ...) {
#' Graphical progress bar, powered by Tk.
#'
#' A graphical progress bar displayed in a Tk window
-#'
+#'
#' This graphical progress will appear in a separate window.
-#'
+#'
#' @param title window title
#' @param label progress bar label (inside window)
#' @param ... other arguments passed on to \code{\link[tcltk]{tkProgressBar}}
@@ -107,14 +113,16 @@ progress_text <- function(style = 3, ...) {
#' @family progress bars
#' @export
#' @examples
+#' \dontrun{
#' l_ply(1:100, identity, .progress = "tk")
#' l_ply(1:100, identity, .progress = progress_tk(width=400))
#' l_ply(1:100, identity, .progress = progress_tk(label=""))
+#' }
progress_tk <- function(title = "plyr progress", label = "Working...", ...) {
- stopifnot(require("tcltk", quiet=TRUE))
+ stopifnot(require("tcltk", quietly = TRUE))
n <- 0
tk <- NULL
-
+
list(
init = function(x) {
tk <<- tkProgressBar(max = x, title = title, label = label, ...)
@@ -131,9 +139,9 @@ progress_tk <- function(title = "plyr progress", label = "Working...", ...) {
#' Graphical progress bar, powered by Windows.
#'
#' A graphical progress bar displayed in a separate window
-#'
+#'
#' This graphical progress only works on Windows.
-#'
+#'
#' @param title window title
#' @param ... other arguments passed on to \code{winProgressBar}
#' @seealso \code{winProgressBar} for the function that powers this progress bar
@@ -147,7 +155,7 @@ progress_tk <- function(title = "plyr progress", label = "Working...", ...) {
progress_win <- function(title = "plyr progress", ...) {
n <- 0
win <- NULL
-
+
list(
init = function(x) {
win <<- winProgressBar(max = x, title = title, ...)
diff --git a/R/helper-quick-df.r b/R/quickdf.r
similarity index 69%
rename from R/helper-quick-df.r
rename to R/quickdf.r
index 8edb8dc..8cebb88 100644
--- a/R/helper-quick-df.r
+++ b/R/quickdf.r
@@ -1,6 +1,6 @@
#' Quick data frame.
#'
-#' Experimental version of \code{\link{as.data.frame}} that converts a
+#' Experimental version of \code{\link{as.data.frame}} that converts a
#' list to a data frame, but doesn't do any checks to make sure it's a
#' valid format. Much faster.
#'
@@ -10,20 +10,21 @@
quickdf <- function(list) {
rows <- unique(unlist(lapply(list, NROW)))
stopifnot(length(rows) == 1)
-
+
names(list) <- make_names(list, "X")
-
- structure(list,
- class = "data.frame",
- row.names = seq_len(rows))
+ class(list) <- "data.frame"
+ attr(list, "row.names") <- c(NA_integer_, -rows)
+
+ list
}
make_names <- function(x, prefix = "X") {
nm <- names(x)
if (is.null(nm)) {
- nm <- rep("", length = length(x))
+ nm <- rep.int("", length(x))
}
-
- nm[nm == ""] <- paste(prefix, seq_len(sum(nm == "")), sep = "")
+
+ n <- sum(nm == "", na.rm = TRUE)
+ nm[nm == ""] <- paste(prefix, seq_len(n), sep = "")
nm
}
diff --git a/R/quote.r b/R/quote.r
index 3d68714..79eb313 100644
--- a/R/quote.r
+++ b/R/quote.r
@@ -1,19 +1,19 @@
#' Quote variables to create a list of unevaluated expressions for later
#' evaluation.
-#'
+#'
#' This function is similar to \code{\link{~}} in that it is used to
#' capture the name of variables, not their current value. This is used
#' throughout plyr to specify the names of variables (or more complicated
#' expressions).
-#'
+#'
#' Similar tricks can be performed with \code{\link{substitute}}, but when
#' functions can be called in multiple ways it becomes increasingly tricky
#' to ensure that the values are extracted from the correct frame. Substitute
#' tricks also make it difficult to program against the functions that use
-#' them, while the \code{quoted} class provides
+#' them, while the \code{quoted} class provides
#' \code{as.quoted.character} to convert strings to the appropriate
#' data structure.
-#'
+#'
#' @param ... unevaluated expressions to be recorded. Specify names if you
#' want the set the names of the resultant variables
#' @param .env environment in which unbound symbols in \code{...} should be
@@ -29,7 +29,7 @@
#' as.quoted(~ a + b + c)
#' as.quoted(a ~ b + c)
#' as.quoted(c("a", "b", "c"))
-#'
+#'
#' # Some examples using ddply - look at the column names
#' ddply(mtcars, "cyl", each(nrow, ncol))
#' ddply(mtcars, ~ cyl, each(nrow, ncol))
@@ -47,7 +47,7 @@ is.quoted <- function(x) inherits(x, "quoted")
#' Print quoted variables.
#'
#' Display the \code{\link{str}}ucture of quoted variables
-#'
+#'
#' @keywords internal
#' @S3method print quoted
#' @method print quoted
@@ -56,9 +56,9 @@ print.quoted <- function(x, ...) str(x)
#' Compute names of quoted variables.
#'
#' Figure out names of quoted variables, using specified names if they exist,
-#' otherwise converting the values to character strings. This may create
+#' otherwise converting the values to character strings. This may create
#' variable names that can only be accessed using \code{``}.
-#'
+#'
#' @keywords internal
#' @S3method names quoted
#' @method names quoted
@@ -70,14 +70,14 @@ names.quoted <- function(x) {
if (!is.null(user_names)) {
part_names[user_names != ""] <- user_names[user_names != ""]
}
-
+
unname(part_names)
}
#' Evaluate a quoted list of variables.
#'
#' Evaluates quoted variables in specified environment
-#'
+#'
#' @return a list
#' @keywords internal
#' @param expr quoted object to evalution
@@ -86,36 +86,40 @@ names.quoted <- function(x) {
eval.quoted <- function(exprs, envir = NULL, enclos = NULL, try = FALSE) {
if (is.numeric(exprs)) return(envir[exprs])
+ if (!is.null(envir) && !is.list(envir) && !is.environment(envir)) {
+ stop("envir must be either NULL, a list, or an environment.")
+ }
+
qenv <- if (is.quoted(exprs)) attr(exprs, "env") else parent.frame()
if (is.null(envir)) envir <- qenv
if (is.data.frame(envir) && is.null(enclos)) enclos <- qenv
-
+
if (try) {
- results <- lapply(exprs, failwith(NULL, eval, quiet = TRUE),
+ results <- lapply(exprs, failwith(NULL, eval, quiet = TRUE),
envir = envir, enclos = enclos)
} else {
results <- lapply(exprs, eval, envir = envir, enclos = enclos)
}
names(results) <- names(exprs)
-
+
results
}
#' Convert input to quoted variables.
#'
#' Convert characters, formulas and calls to quoted .variables
-#'
-#' This method is called by default on all plyr functions that take a
+#'
+#' This method is called by default on all plyr functions that take a
#' \code{.variables} argument, so that equivalent forms can be used anywhere.
-#'
-#' Currently conversions exist for character vectors, formulas and
+#'
+#' Currently conversions exist for character vectors, formulas and
#' call objects.
-#'
+#'
#' @return a list of quoted variables
#' @seealso \code{\link{.}}
#' @param x input to quote
#' @param env environment in which unbound symbols in expression should be
-#' evaluated. Defaults to the environment in which \code{as.quoted} was
+#' evaluated. Defaults to the environment in which \code{as.quoted} was
#' executed.
#' @export
#' @examples
@@ -131,7 +135,7 @@ as.quoted.call <- function(x, env = parent.frame()) {
#' @S3method as.quoted character
as.quoted.character <- function(x, env = parent.frame()) {
structure(
- lapply(x, function(x) parse(text = x)[[1]]),
+ lapply(x, function(x) parse(text = x)[[1]]),
env = env, class = "quoted"
)
}
@@ -182,7 +186,7 @@ as.quoted.factor <- function(x, env = parent.frame()) {
#' @S3method c quoted
c.quoted <- function(..., recursive = FALSE) {
- structure(NextMethod("c"), class = "quoted",
+ structure(NextMethod("c"), class = "quoted",
env = attr(list(...)[[1]], "env"))
}
@@ -193,7 +197,7 @@ c.quoted <- function(..., recursive = FALSE) {
#' Is a formula?
#' Checks if argument is a formula
-#'
+#'
#' @keywords internal
#' @export
is.formula <- function(x) inherits(x, "formula")
diff --git a/R/r_ply.r b/R/r_ply.r
new file mode 100644
index 0000000..83868db
--- /dev/null
+++ b/R/r_ply.r
@@ -0,0 +1,38 @@
+#' Replicate expression and discard results.
+#'
+#' Evalulate expression n times then discard results
+#'
+#' This function runs an expression multiple times, discarding the results.
+#' This function is equivalent to \code{\link{replicate}}, but never returns
+#' anything
+#'
+#' @keywords manip
+#' @param .n number of times to evaluate the expression
+#' @param .expr expression to evaluate
+#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
+#' @param .print automatically print each result? (default: \code{FALSE})
+#' @export
+#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' \url{http://www.jstatsoft.org/v40/i01/}.
+#' @examples
+#' r_ply(10, plot(runif(50)))
+#' r_ply(25, hist(runif(1000)))
+r_ply <- function(.n, .expr, .progress = "none", .print = FALSE) {
+ if (is.function(.expr)) {
+ f <- .expr
+ } else {
+ f <- eval.parent(substitute(function() .expr))
+ }
+
+ progress <- create_progress_bar(.progress)
+
+ progress$init(.n)
+ on.exit(progress$term())
+
+ for(i in seq_len(.n)) {
+ f()
+ progress$step()
+ }
+ progress$term()
+}
diff --git a/R/raply.r b/R/raply.r
new file mode 100644
index 0000000..8cf24d7
--- /dev/null
+++ b/R/raply.r
@@ -0,0 +1,41 @@
+#' Replicate expression and return results in a array.
+#'
+#' Evalulate expression n times then combine results into an array
+#'
+#' This function runs an expression multiple times, and combines the
+#' result into a data frame. If there are no results, then this function
+#' returns a vector of length 0 (\code{vector(0)}).
+#' This function is equivalent to \code{\link{replicate}}, but will always
+#' return results as a vector, matrix or array.
+#'
+#' @keywords manip
+#' @param .n number of times to evaluate the expression
+#' @param .expr expression to evaluate
+#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
+#' @return if results are atomic with same type and dimensionality, a vector, matrix or array; otherwise, a list-array (a list with dimensions)
+#' @param .drop should extra dimensions of length 1 be dropped, simplifying the output. Defaults to \code{TRUE}
+#' @export
+#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' \url{http://www.jstatsoft.org/v40/i01/}.
+#' @examples
+#' raply(100, mean(runif(100)))
+#' raply(100, each(mean, var)(runif(100)))
+#'
+#' raply(10, runif(4))
+#' raply(10, matrix(runif(4), nrow=2))
+#'
+#' # See the central limit theorem in action
+#' hist(raply(1000, mean(rexp(10))))
+#' hist(raply(1000, mean(rexp(100))))
+#' hist(raply(1000, mean(rexp(1000))))
+raply <- function(.n, .expr, .progress = "none", .drop = TRUE) {
+ if (is.function(.expr)) {
+ f <- .expr
+ } else {
+ f <- eval.parent(substitute(function() .expr))
+ }
+
+ res <- rlply(.n = .n, .expr = f, .progress = .progress)
+ list_to_array(res, NULL, .drop)
+}
diff --git a/R/rbind-matrix.r b/R/rbind-fill-matrix.r
similarity index 78%
rename from R/rbind-matrix.r
rename to R/rbind-fill-matrix.r
index 072e2f8..85a58fa 100644
--- a/R/rbind-matrix.r
+++ b/R/rbind-fill-matrix.r
@@ -4,19 +4,20 @@
#' indices (in that order of precedence.) Numeric columns may be converted to
#' character beforehand, e.g. using format. If a matrix doesn't have
#' colnames, the column number is used. Note that this means that a
-#' column with name \code{"1"} is merged with the first column of a matrix
+#' column with name \code{"1"} is merged with the first column of a matrix
#' without name and so on. The returned matrix will always have column names.
#'
#' Vectors are converted to 1-column matrices.
#'
-#' Matrices of factors are not supported. (They are anyways quite
+#' Matrices of factors are not supported. (They are anyways quite
#' inconvenient.) You may convert them first to either numeric or character
-#' matrices. If a matrices of different types are merged, then normal
+#' matrices. If a matrices of different types are merged, then normal
#' covnersion precendence will apply.
#'
#' Row names are ignored.
#'
-#' @param ... the matrices to rbind
+#' @param ... the matrices to rbind. The first argument can be a list of
+#' matrices, in which case all other arguments are ignored.
#' @return a matrix with column names
#' @author C. Beleites
#' @seealso \code{\link[base]{rbind}}, \code{\link[base]{cbind}},
@@ -24,50 +25,54 @@
#' @family binding functions
#' @export
#' @keywords manip
-#' @examples
+#' @examples
#' A <- matrix (1:4, 2)
#' B <- matrix (6:11, 2)
#' A
#' B
#' rbind.fill.matrix (A, B)
-#'
+#'
#' colnames (A) <- c (3, 1)
#' A
#' rbind.fill.matrix (A, B)
-#'
+#'
#' rbind.fill.matrix (A, 99)
rbind.fill.matrix <- function(...) {
matrices <- list(...)
-
+ if (length(matrices) == 0) return()
+ if (is.list(matrices[[1]]) && !is.matrix(matrices[[1]])) {
+ matrices <- matrices[[1]]
+ }
+
## check the arguments
tmp <- unlist(lapply(matrices, is.factor))
if (any(tmp)) {
- stop("Input ", paste(which(tmp), collapse = ", "), " is a factor and ",
+ stop("Input ", paste(which(tmp), collapse = ", "), " is a factor and ",
"needs to be converted first to either numeric or character.")
}
-
+
matrices[] <- lapply(matrices, as.matrix)
-
+
# Work out common column names
lcols <- lapply(matrices, function(x) amv_dimnames(x)[[2]])
cols <- unique(unlist(lcols))
-
+
# Calculate rows in output
rows <- unlist(lapply(matrices, nrow))
nrows <- sum(rows)
-
+
# Generate output template
output <- matrix(NA, nrow = nrows, ncol = length(cols))
colnames(output) <- cols
-
- # Compute start and end positions for each matrix
- pos <- matrix(cumsum(rbind(1, rows - 1)), ncol = 2, byrow = TRUE)
-
- ## fill in the new matrix
- for(i in seq_along(rows)) {
- rng <- pos[i, 1]:pos[i, 2]
+
+ # Compute start and length for each matrix
+ pos <- matrix(c(cumsum(rows) - rows + 1, rows), ncol = 2)
+
+ ## fill in the new matrix
+ for(i in seq_along(rows)) {
+ rng <- seq(pos[i, 1], length = pos[i, 2])
output[rng, lcols[[i]]] <- matrices[[i]]
}
-
+
output
}
diff --git a/R/rbind.r b/R/rbind-fill.r
similarity index 81%
rename from R/rbind.r
rename to R/rbind-fill.r
index 242d309..5f80480 100644
--- a/R/rbind.r
+++ b/R/rbind-fill.r
@@ -1,16 +1,17 @@
#' Combine data.frames by row, filling in missing columns.
#'
#' \code{rbind}s a list of data frames filling missing columns with NA.
-#'
+#'
#' This is an enhancement to \code{\link{rbind}} that adds in columns
-#' that are not present in all inputs, accepts a list of data frames, and
+#' that are not present in all inputs, accepts a list of data frames, and
#' operates substantially faster.
#'
-#' Column names and types in the output will appear in the order in which
+#' Column names and types in the output will appear in the order in which
#' they were encountered. No checking is performed to ensure that each column
#' is of consistent type in the inputs.
-#'
-#' @param ... input data frames to row bind together
+#'
+#' @param ... input data frames to row bind together. The first argument can
+#' be a list of data frames, in which case all other arguments are ignored.
#' @keywords manip
#' @family binding functions
#' @return a single data frame
@@ -23,27 +24,36 @@ rbind.fill <- function(...) {
if (is.list(dfs[[1]]) && !is.data.frame(dfs[[1]])) {
dfs <- dfs[[1]]
}
- dfs <- Filter(Negate(empty), dfs)
-
+
if (length(dfs) == 0) return()
if (length(dfs) == 1) return(dfs[[1]])
-
+
+ # Check that all inputs are data frames
+ is_df <- vapply(dfs, is.data.frame, logical(1))
+ if (any(!is_df)) {
+ stop("All inputs to rbind.fill must be data.frames", call. = FALSE)
+ }
+
# Calculate rows in output
# Using .row_names_info directly is about 6 times faster than using nrow
rows <- unlist(lapply(dfs, .row_names_info, 2L))
nrows <- sum(rows)
-
+
# Generate output template
output <- output_template(dfs, nrows)
-
- # Compute start and end positions for each data frame
- pos <- matrix(cumsum(rbind(1, rows - 1)), ncol = 2, byrow = TRUE)
-
+ # Case of zero column inputs
+ if (length(output) == 0) {
+ return(as.data.frame(matrix(nrow = nrows, ncol = 0)))
+ }
+
+ # Compute start and length for each data frame
+ pos <- matrix(c(cumsum(rows) - rows + 1, rows), ncol = 2)
+
# Copy inputs into output
- for(i in seq_along(rows)) {
- rng <- pos[i, 1]:pos[i, 2]
+ for(i in seq_along(rows)) {
+ rng <- seq(pos[i, 1], length = pos[i, 2])
df <- dfs[[i]]
-
+
for(var in names(df)) {
if (!is.matrix(output[[var]])) {
if (is.factor(output[[var]]) && is.character(df[[var]])) {
@@ -54,8 +64,8 @@ rbind.fill <- function(...) {
output[[var]][rng, ] <- df[[var]]
}
}
- }
-
+ }
+
quickdf(output)
}
@@ -63,15 +73,15 @@ output_template <- function(dfs, nrows) {
vars <- unique(unlist(lapply(dfs, base::names))) # ~ 125,000/s
output <- vector("list", length(vars))
names(output) <- vars
-
+
seen <- rep(FALSE, length(output))
names(seen) <- vars
-
+
is_array <- seen
is_matrix <- seen
is_factor <- seen
-
- for(df in dfs) {
+
+ for(df in dfs) {
matching <- intersect(names(df), vars[!seen])
for(var in matching) {
value <- df[[var]]
@@ -111,22 +121,22 @@ output_template <- function(dfs, nrows) {
# Set up matrices
for(var in vars[is_matrix]) {
width <- unique(unlist(lapply(dfs, function(df) ncol(df[[var]]))))
- if (length(width) > 1)
+ if (length(width) > 1)
stop("Matrix variable ", var, " has inconsistent widths")
-
+
vec <- rep(NA, nrows * width)
output[[var]] <- array(vec, c(nrows, width))
}
-
+
# Set up arrays
for (var in vars[is_array]) {
dims <- unique(unlist(lapply(dfs, function(df) dims(df[[var]]))))
if (any(dims) > 1) {
stop("rbind.fill can only work with 1d arrays")
}
-
- output[[var]] <- rep(NA, nrows)
+
+ output[[var]] <- rep(NA, nrows)
}
-
+
output
}
diff --git a/R/rdply.r b/R/rdply.r
new file mode 100644
index 0000000..edd3bf9
--- /dev/null
+++ b/R/rdply.r
@@ -0,0 +1,35 @@
+#' Replicate expression and return results in a data frame.
+#'
+#' Evalulate expression n times then combine results into a data frame
+#'
+#' This function runs an expression multiple times, and combines the
+#' result into a data frame. If there are no results, then this function
+#' returns a data frame with zero rows and columns (\code{data.frame()}).
+#' This function is equivalent to \code{\link{replicate}}, but will always
+#' return results as a data frame.
+#'
+#'
+#' @keywords manip
+#' @param .n number of times to evaluate the expression
+#' @param .expr expression to evaluate
+#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
+#' @return a data frame
+#' @export
+#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' \url{http://www.jstatsoft.org/v40/i01/}.
+#' @examples
+#' rdply(20, mean(runif(100)))
+#' rdply(20, each(mean, var)(runif(100)))
+#' rdply(20, data.frame(x = runif(2)))
+rdply <- function(.n, .expr, .progress = "none") {
+ if (is.function(.expr)) {
+ f <- .expr
+ } else {
+ f <- eval.parent(substitute(function() .expr))
+ }
+
+ res <- rlply(.n = .n, .expr = f, .progress = .progress)
+ labels <- data.frame(.n = seq_len(.n))
+ list_to_dataframe(res, labels)
+}
diff --git a/R/rename.r b/R/rename.r
new file mode 100644
index 0000000..a200538
--- /dev/null
+++ b/R/rename.r
@@ -0,0 +1,22 @@
+#' Modify names by name, not position.
+#'
+#' @param x named object to modify
+#' @param replace named character vector, with new names as values, and
+#' old names as names.
+#' @param warn_missing print a message if any of the old names are
+#' not actually present in \code{x}.
+#' Note: x is not altered: To save the result, you need to copy the returned
+#' data into a variable.
+#' @export
+#' @importFrom stats setNames
+#' @examples
+#' x <- c("a" = 1, "b" = 2, d = 3, 4)
+#' # Rename column d to "c", updating the variable "x" with the result
+#' x <- rename(x, replace=c("d" = "c"))
+#' x
+#' # Rename column "disp" to "displacement"
+#' rename(mtcars, c("disp" = "displacement"))
+rename <- function(x, replace, warn_missing = TRUE) {
+ names(x) <- revalue(names(x), replace, warn_missing = warn_missing)
+ x
+}
diff --git a/R/revalue.r b/R/revalue.r
new file mode 100644
index 0000000..c57adf0
--- /dev/null
+++ b/R/revalue.r
@@ -0,0 +1,94 @@
+#' Replace specified values with new values, in a factor or character vector.
+#'
+#' If \code{x} is a factor, the named levels of the factor will be
+#' replaced with the new values.
+#'
+#' This function works only on character vectors and factors, but the
+#' related \code{mapvalues} function works on vectors of any type and factors,
+#' and instead of a named vector specifying the original and replacement values,
+#' it takes two separate vectors
+#'
+#' @param x factor or character vector to modify
+#' @param replace named character vector, with new values as values, and
+#' old values as names.
+#' @param warn_missing print a message if any of the old values are
+#' not actually present in \code{x}
+#'
+#' @seealso \code{\link{mapvalues}} to replace values with vectors of any type
+#' @export
+#' @examples
+#' x <- c("a", "b", "c")
+#' revalue(x, c(a = "A", c = "C"))
+#' revalue(x, c("a" = "A", "c" = "C"))
+#'
+#' y <- factor(c("a", "b", "c", "a"))
+#' revalue(y, c(a = "A", c = "C"))
+revalue <- function(x, replace = NULL, warn_missing = TRUE) {
+ if (!is.null(x) && !is.factor(x) && !is.character(x)) {
+ stop("x is not a factor or a character vector.")
+ }
+
+ mapvalues(x, from = names(replace), to = replace, warn_missing = warn_missing)
+}
+
+
+#' Replace specified values with new values, in a vector or factor.
+#'
+#' Item in \code{x} that match items \code{from} will be replaced by
+#' items in \code{to}, matched by position. For example, items in \code{x} that
+#' match the first element in \code{from} will be replaced by the first
+#' element of \code{to}.
+#'
+#' If \code{x} is a factor, the matching levels of the factor will be
+#' replaced with the new values.
+#'
+#' The related \code{revalue} function works only on character vectors
+#' and factors, but this function works on vectors of any type and factors.
+#'
+#' @param x the factor or vector to modify
+#' @param from a vector of the items to replace
+#' @param to a vector of replacement values
+#' @param warn_missing print a message if any of the old values are
+#' not actually present in \code{x}
+#'
+#' @seealso \code{\link{revalue}} to do the same thing but with a single
+#' named vector instead of two separate vectors.
+#' @export
+#' @examples
+#' x <- c("a", "b", "c")
+#' mapvalues(x, c("a", "c"), c("A", "C"))
+#'
+#' # Works on factors
+#' y <- factor(c("a", "b", "c", "a"))
+#' mapvalues(y, c("a", "c"), c("A", "C"))
+#'
+#' # Works on numeric vectors
+#' z <- c(1, 4, 5, 9)
+#' mapvalues(z, from = c(1, 5, 9), to = c(10, 50, 90))
+mapvalues <- function(x, from, to, warn_missing = TRUE) {
+ if (length(from) != length(to)) {
+ stop("`from` and `to` vectors are not the same length.")
+ }
+ if (!is.atomic(x)) {
+ stop("`x` must be an atomic vector.")
+ }
+
+ if (is.factor(x)) {
+ # If x is a factor, call self but operate on the levels
+ levels(x) <- mapvalues(levels(x), from, to)
+ return(x)
+ }
+
+ mapidx <- match(x, from)
+ mapidxNA <- is.na(mapidx)
+
+ # index of items in `from` that were found in `x`
+ from_found <- sort(unique(mapidx))
+ if (warn_missing && length(from_found) != length(from)) {
+ message("The following `from` values were not present in `x`: ",
+ paste(from[!(1:length(from) %in% from_found) ], collapse = ", "))
+ }
+
+ x[!mapidxNA] <- to[mapidx[!mapidxNA]]
+ x
+}
diff --git a/R/rlply.r b/R/rlply.r
new file mode 100644
index 0000000..80905c0
--- /dev/null
+++ b/R/rlply.r
@@ -0,0 +1,42 @@
+#' Replicate expression and return results in a list.
+#'
+#' Evalulate expression n times then combine results into a list
+#'
+#' This function runs an expression multiple times, and combines the
+#' result into a list. If there are no results, then this function will return
+#' a list of length 0 (\code{list()}). This function is equivalent to
+#' \code{\link{replicate}}, but will always return results as a list.
+#'
+#'
+#' @keywords manip
+#' @param .n number of times to evaluate the expression
+#' @param .expr expression to evaluate
+#' @param .progress name of the progress bar to use, see \code{\link{create_progress_bar}}
+#' @return list of results
+#' @export
+#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy for
+#' Data Analysis. Journal of Statistical Software, 40(1), 1-29.
+#' \url{http://www.jstatsoft.org/v40/i01/}.
+#' @examples
+#' mods <- rlply(100, lm(y ~ x, data=data.frame(x=rnorm(100), y=rnorm(100))))
+#' hist(laply(mods, function(x) summary(x)$r.squared))
+rlply <- function(.n, .expr, .progress = "none") {
+ if (is.function(.expr)) {
+ f <- .expr
+ } else {
+ f <- eval.parent(substitute(function() .expr))
+ }
+
+ progress <- create_progress_bar(.progress)
+ result <- vector("list", length = .n)
+
+ progress$init(.n)
+ on.exit(progress$term())
+
+ for(i in seq_len(.n)) {
+ result[i] <- list(f())
+ progress$step()
+ }
+
+ result
+}
diff --git a/R/round-any.r b/R/round-any.r
new file mode 100644
index 0000000..d748e76
--- /dev/null
+++ b/R/round-any.r
@@ -0,0 +1,37 @@
+#' Round to multiple of any number.
+#'
+#' @param x numeric or date-time (POSIXct) vector to round
+#' @param accuracy number to round to; for POSIXct objects, a number of seconds
+#' @param f rounding function: \code{\link{floor}}, \code{\link{ceiling}} or
+#' \code{\link{round}}
+#' @keywords manip
+#' @export
+#' @examples
+#' round_any(135, 10)
+#' round_any(135, 100)
+#' round_any(135, 25)
+#' round_any(135, 10, floor)
+#' round_any(135, 100, floor)
+#' round_any(135, 25, floor)
+#' round_any(135, 10, ceiling)
+#' round_any(135, 100, ceiling)
+#' round_any(135, 25, ceiling)
+#'
+#' round_any(Sys.time() + 1:10, 5)
+#' round_any(Sys.time() + 1:10, 5, floor)
+#' round_any(Sys.time(), 3600)
+round_any <- function(x, accuracy, f = round) {
+ UseMethod("round_any")
+}
+
+#' @S3method round_any numeric
+round_any.numeric <- function(x, accuracy, f = round) {
+ f(x / accuracy) * accuracy
+}
+
+#' @S3method round_any POSIXct
+round_any.POSIXct <- function(x, accuracy, f = round) {
+ tz <- format(x[1], "%Z")
+ xr <- round_any(as.numeric(x), accuracy, f)
+ as.POSIXct(xr, origin="1970-01-01 00:00.00 UTC", tz=tz)
+}
diff --git a/R/helper-splat.r b/R/splat.r
similarity index 98%
rename from R/helper-splat.r
rename to R/splat.r
index ca801cb..aafef65 100644
--- a/R/helper-splat.r
+++ b/R/splat.r
@@ -2,10 +2,10 @@
#'
#' Wraps a function in do.call, so instead of taking multiple arguments, it
#' takes a single named list which will be interpreted as its arguments.
-#'
+#'
#' This is useful when you want to pass a function a row of data frame or
#' array, and don't want to manually pull it apart in your function.
-#'
+#'
#' @param flat function to splat
#' @return a function
#' @export
diff --git a/R/split-indices.r b/R/split-indices.r
index 5cc40db..2c6e4db 100644
--- a/R/split-indices.r
+++ b/R/split-indices.r
@@ -8,10 +8,11 @@
#' @param n largest integer (may not appear in index)
#' @useDynLib plyr
#' @keywords internal manip
-split_indices <- function(index, group, n = max(group)) {
- if (length(index) == 0) return(integer())
- stopifnot(is.integer(index))
+#' @export
+split_indices <- function(group, n = max(group)) {
+ if (length(group) == 0) return(integer())
stopifnot(is.integer(group))
-
- .Call("split_indices", index, group, as.integer(n))
+ n <- as.integer(n)
+
+ .Call("split_indices", group, as.integer(n))
}
diff --git a/R/split.r b/R/split.r
index 7a5ae26..46578fe 100644
--- a/R/split.r
+++ b/R/split.r
@@ -1,7 +1,7 @@
#' Subset splits.
#'
#' Subset splits, ensuring that labels keep matching
-#'
+#'
#' @keywords internal
#' @param x split object
#' @param i index
@@ -21,7 +21,7 @@
#' Convert split list to regular list.
#'
#' Strip off label related attributed to make a strip list as regular list
-#'
+#'
#' @keywords internal
#' @param x object to convert to a list
#' @param ... unused
@@ -37,7 +37,7 @@ as.list.split <- function(x, ...) {
#' Print split.
#'
#' Don't print labels, so it appears like a regular list
-#'
+#'
#' @keywords internal
#' @param x object to print
#' @param ... unused
diff --git a/R/split-array.r b/R/splitter-a.r
similarity index 93%
rename from R/split-array.r
rename to R/splitter-a.r
index b895ec0..5b2f874 100644
--- a/R/split-array.r
+++ b/R/splitter-a.r
@@ -1,23 +1,23 @@
#' Split an array by .margins.
#'
#' Split a 2d or higher data structure into lower-d pieces based
-#'
-#' This is the workhorse of the \code{a*ply} functions. Given a >1 d
+#'
+#' This is the workhorse of the \code{a*ply} functions. Given a >1 d
#' data structure (matrix, array, data.frame), it splits it into pieces
#' based on the subscripts that you supply. Each piece is a lower dimensional
#' slice.
-#'
+#'
#' The margins are specified in the same way as \code{\link{apply}}, but
#' \code{splitter_a} just splits up the data, while \code{apply} also
#' applies a function and combines the pieces back together. This function
#' also includes enough information to recreate the split from attributes on
#' the list of pieces.
-#'
+#'
#' @param data >1d data structure (matrix, data.frame or array)
-#' @param .margins a vector giving the subscripts to split up \code{data} by.
+#' @param .margins a vector giving the subscripts to split up \code{data} by.
# 1 splits up by rows, 2 by columns and c(1,2) by rows and columns
-#' @param .expand if splitting a dataframe by row, should output be 1d
-#' (expand = FALSE), with an element for each row; or nd (expand = TRUE),
+#' @param .expand if splitting a dataframe by row, should output be 1d
+#' (expand = FALSE), with an element for each row; or nd (expand = TRUE),
#' with a dimension for each variable.
#' @return a list of lower-d slices, with attributes that record split details
#' @family splitter functions
@@ -31,15 +31,15 @@
#' plyr:::splitter_a(ozone, 1:2)
splitter_a <- function(data, .margins = 1L, .expand = TRUE) {
.margins <- as.integer(.margins)
-
+
if (length(.margins) == 0) {
return(list(data))
}
-
+
if (!all(.margins %in% seq_len(dims(data)))) stop("Invalid margin")
-
+
dimensions <- lapply(amv_dim(data), seq_len)
- dimensions[-.margins] <- list("")
+ dimensions[-.margins] <- list("")
indices <- expand.grid(dimensions, KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE)
names(indices) <- paste("X", 1:ncol(indices), sep="")
@@ -48,14 +48,14 @@ splitter_a <- function(data, .margins = 1L, .expand = TRUE) {
# output - last margin varies fastest
ord <- do.call(order, indices[rev(.margins)])
indices <- unrowname(indices[ord, , drop = FALSE])
-
+
il <- indexed_array(environment(), indices)
if (is.data.frame(data) && .expand && identical(.margins, 1L)) {
split_labels <- data
- } else {
+ } else {
if (is.data.frame(data)) {
- dnames <- list(seq_len(nrow(data)), names(data))
+ dnames <- list(seq_len(nrow(data)), names(data))
} else {
dnames <- amv_dimnames(data)
dnames <- lapply(dnames, function(x) factor(x, levels = x))
@@ -68,7 +68,7 @@ splitter_a <- function(data, .margins = 1L, .expand = TRUE) {
names(split_labels) <- names(dnames)[.margins]
} else {
names(split_labels) <- paste("X", seq_along(.margins), sep = "")
- }
+ }
}
structure(
diff --git a/R/split-data-frame.r b/R/splitter-d.r
similarity index 93%
rename from R/split-data-frame.r
rename to R/splitter-d.r
index 36251a0..b3b6edf 100644
--- a/R/split-data-frame.r
+++ b/R/splitter-d.r
@@ -1,17 +1,17 @@
#' Split a data frame by variables.
#'
#' Split a data frame into pieces based on variable contained in that data frame
-#'
+#'
#' This is the workhorse of the \code{d*ply} functions. Based on the variables
#' you supply, it breaks up a single data frame into a list of data frames,
#' each containing a single combination from the levels of the specified
#' variables.
-#'
+#'
#' This is basically a thin wrapper around \code{\link{split}} which
#' evaluates the variables in the context of the data, and includes enough
-#' information to reconstruct the labelling of the data frame after
+#' information to reconstruct the labelling of the data frame after
#' other operations.
-#'
+#'
#' @seealso \code{\link{.}} for quoting variables, \code{\link{split}}
#' @family splitter functions
#' @param data data frame
@@ -34,8 +34,8 @@
#' plyr:::splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
splitter_d <- function(data, .variables = NULL, drop = TRUE) {
stopifnot(is.quoted(.variables))
-
-
+
+
if (length(.variables) == 0) {
splitv <- rep(1, nrow(data))
split_labels <- NULL
@@ -48,12 +48,11 @@ splitter_d <- function(data, .variables = NULL, drop = TRUE) {
split_labels <- split_labels(splits, drop = drop, id = splitv)
vars <- unlist(lapply(.variables, all.vars))
}
-
- index <- split_indices(seq_along(splitv), as.integer(splitv),
- attr(splitv, "n"))
+
+ index <- split_indices(as.integer(splitv), attr(splitv, "n"))
il <- indexed_df(data, index, vars)
-
+
structure(
il,
class = c(class(il), "split", "list"),
@@ -65,14 +64,14 @@ splitter_d <- function(data, .variables = NULL, drop = TRUE) {
#' Generate labels for split data frame.
#'
#' Create data frame giving labels for split data frame.
-#'
+#'
#' @param list of variables to split up by
#' @param whether all possible combinations should be considered, or only those present in the data
#' @keywords internal
#' @export
split_labels <- function(splits, drop, id = plyr::id(splits, drop = TRUE)) {
if (length(splits) == 0) return(data.frame())
-
+
if (drop) {
# Need levels which occur in data
representative <- which(!duplicated(id))[order(unique(id))]
@@ -85,6 +84,11 @@ split_labels <- function(splits, drop, id = plyr::id(splits, drop = TRUE)) {
}
ulevels <- function(x) {
- if (is.factor(x)) return(levels(x))
- sort(unique(x))
+ if (is.factor(x)) {
+ levs <- levels(x)
+ factor(levs, levels = levs)
+ } else {
+ sort(unique(x))
+ }
+
}
diff --git a/R/helper-strip-splits.r b/R/strip-splits.r
similarity index 100%
rename from R/helper-strip-splits.r
rename to R/strip-splits.r
diff --git a/R/helper-summarise.r b/R/summarise.r
similarity index 53%
rename from R/helper-summarise.r
rename to R/summarise.r
index f3c2eb0..a409148 100644
--- a/R/helper-summarise.r
+++ b/R/summarise.r
@@ -1,25 +1,30 @@
#' Summarise a data frame.
#'
#' Summarise works in an analagous way to transform, except instead of adding
-#' columns to an existing data frame, it creates a new one. This is
+#' columns to an existing data frame, it creates a new data frame. This is
#' particularly useful in conjunction with \code{\link{ddply}} as it makes it
#' easy to perform group-wise summaries.
-#'
+#'
#' @param .data the data frame to be summarised
#' @param ... further arguments of the form var = value
#' @keywords manip
#' @aliases summarise summarize
#' @export summarise summarize
#' @examples
-#' summarise(baseball,
-#' duration = max(year) - min(year),
+#' # Let's extract the number of teams and total period of time
+#' # covered by the baseball dataframe
+#' summarise(baseball,
+#' duration = max(year) - min(year),
#' nteams = length(unique(team)))
-#' ddply(baseball, "id", summarise,
-#' duration = max(year) - min(year),
+#' # Combine with ddply to do that for each separate id
+#' ddply(baseball, "id", summarise,
+#' duration = max(year) - min(year),
#' nteams = length(unique(team)))
summarise <- function(.data, ...) {
- cols <- eval(substitute(list(...)), .data, parent.frame())
-
+ stopifnot(is.data.frame(.data) || is.list(.data) || is.environment(.data))
+
+ cols <- as.list(substitute(list(...))[-1])
+
# ... not a named list, figure out names by deparsing call
if(is.null(names(cols))) {
missing_names <- rep(TRUE, length(cols))
@@ -27,10 +32,13 @@ summarise <- function(.data, ...) {
missing_names <- names(cols) == ""
}
if (any(missing_names)) {
- names <- unname(unlist(lapply(match.call(expand = FALSE)$`...`, deparse)))
+ names <- unname(unlist(lapply(match.call(expand.dots = FALSE)$`...`, deparse)))
names(cols)[missing_names] <- names[missing_names]
}
-
- quickdf(cols)
+ .data <- as.list(.data)
+ for (col in names(cols)) {
+ .data[[col]] <- eval(cols[[col]], .data, parent.frame())
+ }
+ quickdf(.data[names(cols)])
}
summarize <- summarise
diff --git a/R/take.r b/R/take.r
new file mode 100644
index 0000000..0757913
--- /dev/null
+++ b/R/take.r
@@ -0,0 +1,25 @@
+#' Take a subset along an arbitrary dimension
+#'
+#' @param x matrix or array to subset
+#' @param along dimension to subset along
+#' @param indices the indices to select
+#' @param drop should the dimensions of the array be simplified? Defaults
+#' to \code{FALSE} which is the opposite of the useful R default.
+#' @export
+#' @examples
+#' x <- array(seq_len(3 * 4 * 5), c(3, 4, 5))
+#' take(x, 3, 1)
+#' take(x, 2, 1)
+#' take(x, 1, 1)
+#' take(x, 3, 1, drop = TRUE)
+#' take(x, 2, 1, drop = TRUE)
+#' take(x, 1, 1, drop = TRUE)
+take <- function(x, along, indices, drop = FALSE) {
+ nd <- length(dim(x))
+
+ index <- as.list(rep(TRUE, nd))
+ index[along] <- indices
+
+ eval(as.call(c(as.name("["), as.name("x"), index, drop = drop)))
+}
+
diff --git a/R/helper-try.r b/R/try.r
similarity index 94%
rename from R/helper-try.r
rename to R/try.r
index 18ee0cf..34f531f 100644
--- a/R/helper-try.r
+++ b/R/try.r
@@ -2,7 +2,7 @@
#'
#' Modify a function so that it returns a default value when there is an
#' error.
-#'
+#'
#' @param default default value
#' @param f function
#' @param quiet all error messages be suppressed?
@@ -19,7 +19,7 @@
#'
#' safef <- failwith(NULL, f)
#' safef(1)
-#' safef(2)
+#' safef(2)
failwith <- function(default = NULL, f, quiet = FALSE) {
f <- match.fun(f)
function(...) try_default(f(...), default, quiet = quiet)
@@ -28,9 +28,9 @@ failwith <- function(default = NULL, f, quiet = FALSE) {
#' Try, with default in case of error.
#'
#' \code{try_default} wraps try so that it returns a default value in the case of error.
-#'
+#'
#' \code{tryNULL} provides a useful special case when dealing with lists.
-#'
+#'
#' @param expr expression to try
#' @param default default value in case of error
#' @param quiet should errors be printed (TRUE) or ignored (FALSE, default)
@@ -41,7 +41,7 @@ failwith <- function(default = NULL, f, quiet = FALSE) {
try_default <- function(expr, default, quiet = FALSE) {
result <- default
if (quiet) {
- tryCatch(result <- expr, error = function(e) {})
+ tryCatch(result <- expr, error = function(e) {})
} else {
try(result <- expr)
}
@@ -52,7 +52,7 @@ tryNULL <- function(expr) try_default(expr, NULL, quiet = TRUE)
#' Apply with built in try.
#' Uses compact, lapply and tryNULL
-#'
+#'
#' @keywords internal
#' @export
tryapply <- function(list, fun, ...) {
diff --git a/R/utils-functional.r b/R/utils-functional.r
new file mode 100644
index 0000000..13c9253
--- /dev/null
+++ b/R/utils-functional.r
@@ -0,0 +1,17 @@
+# f <- function(...) {
+# dots()
+# }
+
+# g <- function() {
+# f <- function(x, y, ...) {
+# dots()
+# }
+# f(x = 1, y = 2, z = 3)
+# }
+dots <- function() {
+ call <- sys.call(-1)
+ def <- eval(call[[1]], parent.frame(2))
+ match.call(def, call, expand.dots = FALSE)$`...`
+}
+
+
diff --git a/R/utils.r b/R/utils.r
index 922876a..e75f838 100644
--- a/R/utils.r
+++ b/R/utils.r
@@ -1,7 +1,7 @@
#' Determine if a vector is discrete.
#'
#' A discrete vector is a factor or a character vector
-#'
+#'
#' @param x vector to test
#' @keywords internal
#' @export
@@ -14,7 +14,7 @@ is.discrete <- function(x) is.factor(x) || is.character(x) || is.logical(x)
#' Un-rowname.
#'
#' Strip rownames from an object
-#'
+#'
#' @keywords internal
#' @param x data frame
#' @export
@@ -24,7 +24,7 @@ unrowname <- function(x) {
}
#' Function that always returns true.
-#'
+#'
#' @param ... all input ignored
#' @return \code{TRUE}
#' @keywords internal
@@ -35,16 +35,16 @@ true <- function(...) TRUE
#' Compact list.
#'
#' Remove all NULL entries from a list
-#'
+#'
#' @param l list
-#' @keywords manip internal
+#' @keywords manip internal
#' @export
compact <- function(l) Filter(Negate(is.null), l)
#' Number of unique values.
#'
#' Calculate number of unique values of a variable as efficiently as possible.
-#'
+#'
#' @param x vector
#' @keywords internal
nunique <- function(x) {
@@ -58,10 +58,20 @@ nunique <- function(x) {
#' Check if a data frame is empty.
#'
#' Empty if it's null or it has 0 rows or columns
-#'
+#'
#' @param df data frame to check
#' @keywords internal
#' @export
empty <- function(df) {
(is.null(df) || nrow(df) == 0 || ncol(df) == 0)
}
+
+"%||%" <- function(a, b) if (is.null(a)) b else a
+
+.matrix_to_df <- function(.data) {
+ cnames <- colnames(.data)
+ if (is.null(cnames)) cnames <- rep("", ncol(.data))
+ .data <- as.data.frame(.data, stringsAsFactors = FALSE)
+ colnames(.data) <- cnames
+ .data
+}
diff --git a/R/helper-vaggregate.r b/R/vaggregate.r
similarity index 95%
rename from R/helper-vaggregate.r
rename to R/vaggregate.r
index 8192e05..4c07212 100644
--- a/R/helper-vaggregate.r
+++ b/R/vaggregate.r
@@ -1,19 +1,19 @@
#' Vector aggregate.
#'
#' This function is somewhat similar to \code{tapply}, but is designed for
-#' use in conjunction with \code{id}. It is simpler in that it only
+#' use in conjunction with \code{id}. It is simpler in that it only
#' accepts a single grouping vector (use \code{\link{id}} if you have more)
#' and uses \code{\link{vapply}} internally, using the \code{.default} value
#' as the template.
-#'
+#'
#' \code{vaggregate} should be faster than \code{tapply} in most situations
#' because it avoids making a copy of the data.
-#'
+#'
#' @param .value vector of values to aggregate
#' @param .group grouping vector
#' @param .fun aggregation function
#' @param ... other arguments passed on to \code{.fun}
-#' @param .default default value used for missing groups. This argument is
+#' @param .default default value used for missing groups. This argument is
#' also used as the template for function output.
#' @param .n total number of groups
#' @export
@@ -29,7 +29,7 @@
#' # Unlike tapply, vaggregate does not support multi-d output:
#' tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
#' vaggregate(warpbreaks$breaks, id(warpbreaks[,-1]), sum)
-#'
+#'
#' # But it is about 10x faster
#' x <- rnorm(1e6)
#' y1 <- sample.int(10, 1e6, replace = TRUE)
@@ -47,13 +47,13 @@ vaggregate <- function(.value, .group, .fun, ..., .default = NULL, .n = nlevels(
if (is.null(.default)) {
.default <- .fun(.value[0], ...)
}
-
+
fun <- function(i) {
if (length(i) == 0) return(.default)
.fun(.value[i], ...)
}
- indices <- split_indices(seq_along(.value), .group, .n)
+ indices <- split_indices(.group, .n)
vapply(indices, fun, .default)
}
diff --git a/inst/tests/quickdf.r b/inst/tests/quickdf.r
new file mode 100644
index 0000000..b622759
--- /dev/null
+++ b/inst/tests/quickdf.r
@@ -0,0 +1,8 @@
+context("quickdf")
+
+test_that("make_names handles NAs", {
+ x <- 1:3
+ names(x) <- c("", "a", NA)
+ expect_equal(make_names(x), c("X1", "a", NA))
+
+})
diff --git a/inst/tests/test-array.r b/inst/tests/test-array.r
index f4a12c4..55c90ca 100644
--- a/inst/tests/test-array.r
+++ b/inst/tests/test-array.r
@@ -3,25 +3,31 @@ context("Arrays")
test_that("incorrect result dimensions raise errors", {
fs <- list(
function(x) rep(x, sample(10, 1)),
- function(x) if (x < 5) x else matrix(x, 2, 2)
+ function(x) if (x < 5) x else matrix(x, 2, 2),
+ function(x) as.list(rep(x, sample(10, 1))),
+ function(x) if (x < 5) list(x) else matrix(list(x), 2, 2)
)
-
+
expect_that(laply(1:10, fs[[1]]), throws_error("same dim"))
expect_that(laply(1:10, fs[[2]]), throws_error("same number"))
+ expect_that(laply(1:10, fs[[3]]), throws_error("same dim"))
+ expect_that(laply(1:10, fs[[4]]), throws_error("same number"))
+})
+
+test_that("incompatible result types raise errors", {
+ f <- function(x) if (x < 5) c(x, x) else as.list(x)
+
+ expect_that(laply(1:10, f), throws_error("compatible type"))
})
test_that("zero length margin operates on whole object", {
a <- array(1:24, 2:4)
-
- expect_that(alply(a, NULL, sum)[[1]], equals(sum(1:24)))
+
+ expect_that(alply(a, NULL, sum)[[1]], equals(sum(1:24)))
})
test_that("results must have positive dimensions", {
expect_that(
- aaply(matrix(0,2,2), 2, function(x) numeric(0)),
- throws_error("one or more dimensions"))
-
- expect_that(
aaply(matrix(0,2,2), 2, function(x) NULL),
throws_error("one or more dimensions"))
})
@@ -37,26 +43,44 @@ test_that("array binding is correct", {
f <- function(x) matrix(x, 2, 2)
m2d <- lapply(1:10, f)
m3d <- abind(m2d, along = 0)
-
+
expect_that(laply(1:10, f), is_equivalent_to(m3d))
f <- function(x) array(x, c(2, 2, 2))
m3d <- lapply(1:10, f)
m4d <- abind(m3d, along = 0)
-
+
+ expect_that(laply(1:10, f), is_equivalent_to(m4d))
+})
+
+test_that("array binding of lists is correct", {
+ #this is identical to the previous test but in list mode
+ f <- function(x) matrix(as.list(x), 2, 2)
+ m2d <- lapply(1:10, f)
+ #as abind itself doesn't do lists...
+ m3d <- aperm(array(do.call(c, m2d), c(2, 2, 10)), c(3,1,2))
+
+ expect_that(laply(1:10, f), is_equivalent_to(m3d))
+ f <- function(x) array(as.list(x), c(2, 2, 2))
+ m3d <- lapply(1:10, f)
+ m4d <- aperm(array(do.call(c, m3d), c(2, 2, 2, 10)), c(4, 1, 2, 3))
+
expect_that(laply(1:10, f), is_equivalent_to(m4d))
})
# Basis of test contributed by anonymous reviewer.
-test_that("idempotent function equivalent to permutation", {
- x <- array(1:24, 4:2,
+test_that("idempotent function equivalent to permutation", {
+ x <- array(1:24, 4:2,
dimnames = list(LETTERS[1:4], letters[24:26], letters[1:2]))
- perms <- unique(alply(as.matrix(subset(expand.grid(x=0:3,y=0:3,z=0:3), (x+y+z)>0 & !any(duplicated(setdiff(c(x,y,z), 0))))), 1, function(x) setdiff(x, 0)))
+ perms <- unique(alply(as.matrix(subset(expand.grid(x=0:3,y=0:3,z=0:3),
+ (x+y+z)>0 & !any(duplicated(setdiff(c(x,y,z), 0))))),
+ 1,
+ function(x) setdiff(x, 0)))
aperms <- llply(perms, function(perm) aperm(x, unique(c(perm, 1:3))))
aaplys <- llply(perms, function(perm) aaply(x, perm, identity))
-
+
for(i in seq_along(aperms)) {
perm <- paste(perms[[i]], collapse = ", ")
expect_that(dim(aaplys[[i]]), equals(dim(aperms[[i]])), perm)
@@ -67,6 +91,33 @@ test_that("idempotent function equivalent to permutation", {
})
+test_that("alply sets dims and dimnames, equivalence to permutation", {
+ x <- array(1:24, 4:2,
+ dimnames = list(dim1=LETTERS[1:4], dim2=letters[c(24,26,25)], dim3=NULL))
+ #unlisting an alply should leave elements the the same order as
+ #an aperm with the unused dimensions shifted to the front.
+ #check against all ways to split this array
+ p_alply <- unique(alply(as.matrix(subset(expand.grid(x=0:3,y=0:3,z=0:3),
+ (x+y+z)>0 & !any(duplicated(setdiff(c(x,y,z), 0))))),
+ 1, function(x) setdiff(x, 0)))
+ p_aperm <- llply(p_alply, function(x) union(setdiff(1:3, x), x))
+ alplys <- lapply(p_alply, alply, .data=x, identity, .dims = TRUE)
+ #alply will fill in dimnames on a dim that has none, so match that here
+ dimnames(x)[[3]] <- c("1", "2")
+ aperms <- llply(p_aperm, .fun=aperm, a=x)
+
+ m_ply(cbind(x_perm=p_alply, x_ply=alplys, x_aperm=aperms),
+ function(x_perm, x_ply, x_aperm) {
+ expect_equivalent(dim(x_ply),
+ dim(x)[x_perm])
+ expect_equivalent(dimnames(x_ply),
+ dimnames(x)[x_perm])
+ expect_equivalent(dim(x_ply),
+ dim(x_aperm)[(length(dim(x)) - length(x_perm) + 1):(length(dim(x)))])
+ expect_equivalent(as.vector(unlist(x_ply)), as.vector(x_aperm))
+ })
+})
+
# Test contributed by Baptiste Auguie
test_that("single column data frames work when treated as an array", {
foo <- function(a="a", b="b", c="c", ...){
@@ -75,7 +126,7 @@ test_that("single column data frames work when treated as an array", {
df <- data.frame(b=1:2)
res <- adply(df, 1, splat(foo))
-
+
expect_that(res$b, equals(1:2))
expect_that(as.character(res$V1), equals(c("a1c", "a2c")))
})
@@ -84,14 +135,14 @@ test_that("aaply equivalent to apply with correct permutation", {
a <- matrix(seq_len(400), ncol = 20)
expect_that(rowMeans(a), equals(aaply(a, 1, mean), check.attr = FALSE))
expect_that(colMeans(a), equals(aaply(a, 2, mean), check.attr = FALSE))
-
+
b <- structure(a, dimnames = amv_dimnames(a))
expect_that(rowMeans(b), equals(aaply(b, 1, mean), check.attr = FALSE))
- expect_that(colMeans(b), equals(aaply(b, 2, mean), check.attr = FALSE))
+ expect_that(colMeans(b), equals(aaply(b, 2, mean), check.attr = FALSE))
})
test_that("array reconstruction correct with missing cells", {
- df <- data.frame(i = rep(1:3, each = 12), j = rep(1:3, each = 4), v = 1:36)
+ df <- data.frame(i = rep(1:3, each = 12), j = rep(1:3, each = 4), v = 1:36)
df <- subset(df, i != j)
da <- daply(df, .(i, j), function(q) sum(q$v))
@@ -99,7 +150,7 @@ test_that("array reconstruction correct with missing cells", {
m <- matrix(NA, 3, 3)
m[cbind(dd$i, dd$j)] <- dd$v1
-
+
expect_that(da, equals(m, check.attributes = FALSE))
})
@@ -117,14 +168,14 @@ test_that("array names do not affect output", {
numeric_rev = set_dimnames(base, list(R = 12:1, C = 4:1)),
alpha = set_dimnames(base, list(R = letters[1:12], C = LETTERS[1:4])),
alpha_rev = set_dimnames(base, list(R = letters[12:1], C = LETTERS[4:1]))
- )
+ )
for(name in names(arrays)) {
array <- arrays[[name]]
- expect_that(aaply(array, 1, sum),
+ expect_that(aaply(array, 1, sum),
equals(rowSums(array), check.attributes = FALSE), info = name)
- expect_that(aaply(array, 2, sum),
+ expect_that(aaply(array, 2, sum),
equals(colSums(array), check.attributes = FALSE), info = name)
}
-
+
})
diff --git a/inst/tests/test-count.r b/inst/tests/test-count.r
index 6887636..9339ea1 100644
--- a/inst/tests/test-count.r
+++ b/inst/tests/test-count.r
@@ -2,7 +2,7 @@ library(testthat)
context("Count")
count_f <- function(...) count(...)$freq
-table_f <- function(...) {
+table_f <- function(...) {
x <- unname(as.numeric(table(rev(...))))
x[x != 0]
}
@@ -12,38 +12,38 @@ test_that("count matches table", {
mtcars["cyl"],
mtcars["mpg"],
mtcars[c("cyl", "vs")])
-
+
for(datum in data) {
expect_that(count_f(datum), equals(table_f(datum)))
- }
+ }
})
test_that("random order doesn't affect count", {
usual <- count(mtcars, "cyl")
-
+
for(i in 1:5) {
mtcars_r <- mtcars[sample(1:nrow(mtcars)), ]
expect_that(count(mtcars_r, "cyl"), equals(usual))
}
-
+
})
test_that("weighted count matches xtab", {
xt1 <- as.data.frame(xtabs(g ~ id, data = baseball), responseName = "freq")
xt1$id <- as.character(xt1$id)
ct1 <- count(baseball, "id", "g")
-
+
expect_that(ct1, equals(xt1))
-
- xt2 <- as.data.frame(xtabs(g ~ year + team, data = baseball),
+
+ xt2 <- as.data.frame(xtabs(g ~ year + team, data = baseball),
responseName = "freq")
xt2 <- subset(xt2, freq > 0)
xt2$year <- as.numeric(as.character(xt2$year))
xt2$team <- as.character(xt2$team)
xt2 <- arrange(xt2, year, team)
ct2 <- count(baseball, c("year", "team"), "g")
-
+
expect_that(ct2, equals(xt2))
-
-})
\ No newline at end of file
+
+})
diff --git a/inst/tests/test-data-frame.r b/inst/tests/test-data-frame.r
index 159f585..fdd9c85 100644
--- a/inst/tests/test-data-frame.r
+++ b/inst/tests/test-data-frame.r
@@ -7,11 +7,11 @@ test_that("results ordered in order of split variables", {
plyed <- ddply(d, c("x" ,"y"))
expect_that(plyed$x, equals(c("a", "b")))
expect_that(plyed$y, equals(c("d", "c")))
-
+
plyed <- ddply(d, c("y" ,"x"))
expect_that(plyed$y, equals(c("c", "d")))
expect_that(plyed$x, equals(c("b", "a")))
-
+
})
test_that("character vectors not change to factors", {
@@ -31,18 +31,18 @@ test_that("character vectors not change to factors", {
plyed <- ddply(d, c("x"), identity, .drop = FALSE)
expect_that(plyed$x, is_a("character"))
- expect_that(plyed$y, is_a("character"))
+ expect_that(plyed$y, is_a("character"))
})
# Bug report contributed by Thomas P Harte <THarte at tiaa-cref.org>
test_that("column names not changed", {
- d1 <- data.frame(`--WEIRD`=1:5, a = letters[1:5], `-b` = 1:5,
+ d1 <- data.frame(`--WEIRD`=1:5, a = letters[1:5], `-b` = 1:5,
check.names = FALSE)
- d2 <- ddply(d1, .(`--WEIRD`), force)
+ d2 <- ddply(d1, .(`--WEIRD`), force)
expect_that(names(d2), equals(names(d1)))
-
-
+
+
})
# Bug reported by Karl Ove Hufthammer <karl at huftis.org>
@@ -51,12 +51,12 @@ test_that("label variables always preserved", {
d <- data.frame(x = 101:104, y = 1:4)
f <- function(df) sum(df$y)
g <- function(df) if(df$x <= 102) sum(df$y)
-
+
out1 <- ddply(d, "x", f) # This one works correctly
out2 <- ddply(d, "x", g) # This one doesn’t
-
+
expect_that(names(out1), equals(names(out2)))
expect_that(out1$x[1:2], equals(out2$x))
-
+
})
diff --git a/inst/tests/test-empty.r b/inst/tests/test-empty.r
index e5767dc..6b15b54 100644
--- a/inst/tests/test-empty.r
+++ b/inst/tests/test-empty.r
@@ -8,8 +8,7 @@ test_that("empty arrays returns object of same shape", {
expect_that(aaply(x, 3, identity), equals(logical()))
expect_that(adply(x, 1, identity), equals(data.frame()))
- expect_that(alply(x, 1, identity), equals(list()))
-
+ expect_that(alply(x, 1, identity), is_equivalent_to(list()))
})
test_that("empty lists return an empty object", {
@@ -31,10 +30,10 @@ test_that("empty data frame results returns empty object", {
ddply(df, "a", function(x) NULL),
equals(data.frame()))
expect_that(
- dlply(df, "a", function(x) NULL),
+ dlply(df, "a", function(x) NULL),
equals(rep(list(NULL), 10), check.attributes = FALSE))
expect_that(
daply(df, "a", function(x) NULL),
throws_error("must have one or more dimensions"))
-
-})
\ No newline at end of file
+
+})
diff --git a/inst/tests/test-idf.r b/inst/tests/test-idf.r
new file mode 100644
index 0000000..176338a
--- /dev/null
+++ b/inst/tests/test-idf.r
@@ -0,0 +1,87 @@
+context("Immutable")
+
+# Create smaller subset of baseball data (for speed)
+bsmall <- subset(baseball, id %in% sample(unique(baseball$id), 20))[, 1:5]
+bsmall$id <- factor(bsmall$id)
+bsmall <- bsmall[sample(rownames(bsmall)), ]
+rownames(bsmall) <- NULL
+
+test_that("idf is immutable", {
+ #Since idf are constructed by scratch in both idata.frame and `[.idf]`
+ #I will test idf objects created both ways.
+
+ #create both before testing any, to make sure that subsetting
+ #doesn't change the subsetted idf
+ idf <- idata.frame(bsmall)
+ x <- idf[1:10, ]
+ y <- bsmall[1:10, ]
+
+ expect_error(x[1,"year"] <- 1994)
+ expect_error(x[["stint"]] <- rev(y[["stint"]]))
+ expect_error(x$team <- sort(y$team))
+ expect_error(names(idf) <- c("ID", "YR", "ST", "TM", "LG"))
+
+ expect_error(idf[1,"year"] <- 1994)
+ expect_error(idf[["stint"]] <- rev(bsmall[["stint"]]))
+ expect_error(idf$team <- sort(bsmall$team))
+ expect_error(names(idf) <- c("ID", "YR", "ST", "TM", "LG"))
+})
+
+test_that("idf subset by [i]", {
+ idf <- idata.frame(bsmall)
+
+ x <- idf[3]
+ y <- bsmall[3]
+
+ expect_equal(idf[[2]], bsmall[[2]])
+ expect_equal(x[[1]], y[[1]])
+})
+
+test_that("idf subset data by [i,j]", {
+ idf <- idata.frame(bsmall)
+
+ x <- idf[1:10, ]
+ y <- bsmall[1:10, ]
+
+ xx <- x[3:5, c('id', 'team')]
+ yy <- y[3:5, c('id', 'team')]
+
+ xxx <- idf[ , names(idf)]
+ yyy <- idf[ , names(y)]
+
+ expect_equal(idf[3, "year"], bsmall[[3, "year"]])
+ expect_equal(x[, "year"], y[, "year"])
+ expect_equal(xx[, "id"], yy[, "id"])
+ expect_equal(xxx[, "team"], yyy[, "team"])
+})
+
+test_that("idf extract by [[i]]", {
+ idf <- idata.frame(bsmall)
+
+ x <- idf[6:20,]
+ y <- bsmall[6:20,]
+
+ expect_equal(x[[4]], y[[4]])
+ expect_equal(idf[[3]], bsmall[[3]])
+ expect_equal(idf[["year"]], bsmall[["year"]])
+})
+
+test_that("idf extract $name", {
+ idf <- idata.frame(bsmall)
+
+ x <- idf[500:510,]
+ y <- bsmall[500:510,]
+
+ expect_equal(x$team, y$team)
+ expect_equal(idf$team, bsmall$team)
+})
+
+test_that("idf as environment", {
+ idf <- idata.frame(bsmall)
+
+ x <- idf[5:10,]
+ y <- bsmall[5:10,]
+
+ expect_equal(with(x, mean(year)), with(y, mean(year)))
+ expect_equal(with(idf, table(team)), with(bsmall, table(team)))
+})
diff --git a/inst/tests/test-join.r b/inst/tests/test-join.r
index 10b8c58..aa0d47d 100644
--- a/inst/tests/test-join.r
+++ b/inst/tests/test-join.r
@@ -8,15 +8,15 @@ rownames(bsmall) <- NULL
first <- ddply(bsmall, "id", summarise, first = min(year))
-test_that("results consistent with merge", {
+test_that("results consistent with merge", {
b2 <- merge(bsmall, first, by = "id", all.x = TRUE)
b3 <- join(bsmall, first, by = "id")
b4 <- join(first, bsmall, by = "id")[names(b3)]
-
+
b2 <- arrange(b2, id, year, stint)
b3 <- arrange(b3, id, year, stint)
b4 <- arrange(b4, id, year, stint)
-
+
expect_that(b2, equals(b3))
expect_that(b2, equals(b4))
})
@@ -26,7 +26,7 @@ test_that("order is preserved", {
expect_that(bsmall$id, equals(b3$id))
expect_that(bsmall$year, equals(b3$year))
- expect_that(bsmall$stint, equals(b3$stint))
+ expect_that(bsmall$stint, equals(b3$stint))
})
test_that("rownames are preserved", {
@@ -37,11 +37,11 @@ test_that("rownames are preserved", {
test_that("duplicated keys are duplicated", {
x <- data.frame(a = c("a", "b"), b = c("a", "b"))
y <- data.frame(a = c("a", "a"), z = c(1, 2))
-
+
left <- join(x, y, by = "a")
expect_that(nrow(left), equals(3))
expect_that(left$z, equals(c(1, 2, NA)))
-
+
inner <- join(x, y, by = "a", type = "inner")
expect_that(nrow(inner), equals(2))
expect_that(inner$z, equals(c(1, 2)))
@@ -50,12 +50,12 @@ test_that("duplicated keys are duplicated", {
test_that("full merge preserves x and y", {
a <- data.frame(x = 1:10, a = 1:10)
b <- data.frame(x = 11:15, b = 1:5)
-
+
ab <- join(a, b, by = "x", type = "full")
expect_that(names(ab), equals(c("x", "a", "b")))
expect_that(ab$x, equals(1:15))
expect_that(ab$a, equals(c(1:10, rep(NA, 5))))
- expect_that(ab$b, equals(c(rep(NA, 10), 1:5)))
+ expect_that(ab$b, equals(c(rep(NA, 10), 1:5)))
})
test_that("left and right are equivalent", {
@@ -76,11 +76,11 @@ test_that("left and right are equivalent", {
test_that("large number of columns work", {
df1 <- data.frame(matrix(1:100, ncol = 50), y = 1:2)
df2 <- data.frame(matrix(1:100, ncol = 50), z = 3:4)
-
+
df <- join(df1, df2)
expect_that(df$y, equals(1:2))
expect_that(df$z, equals(3:4))
-
+
})
test_that("many potential combinations works", {
@@ -92,6 +92,75 @@ test_that("many potential combinations works", {
j <- join(df1, df2)
j <- merge(df1, df2, all.x = TRUE)
-
-
-})
\ No newline at end of file
+
+})
+
+test_that("joins with no common rows work", {
+ a <- data.frame(a = 1:10)
+ b <- data.frame(b = 1:10)
+
+ full1 <- join(a, b, type = "full")
+ full2 <- join(a, b, type = "full", match = "first")
+ inner1 <- join(a, b, type = "inner")
+ inner2 <- join(a, b, type = "inner", match = "first")
+ left1 <- join(a, b, type = "left")
+ left2 <- join(a, b, type = "left", match = "first")
+ right1 <- join(a, b, type = "right")
+ right2 <- join(a, b, type = "right", match = "first")
+
+ expect_equal(nrow(full1), 20)
+ expect_equal(nrow(full2), 20)
+ expect_equal(nrow(inner1), 0)
+ expect_equal(nrow(inner2), 0)
+ expect_equal(nrow(left1), 10)
+ expect_equal(nrow(left2), 10)
+ expect_equal(nrow(right1), 10)
+ expect_equal(nrow(right2), 10)
+})
+
+test_that("joins with zero row dataframe work", {
+ a <- data.frame(a = integer())
+ b <- data.frame(a = 1:10, b = letters[1:10])
+
+ full1 <- join(a, b, type = "full")
+ full2 <- join(a, b, type = "full", match = "first")
+ inner1 <- join(a, b, type = "inner")
+ inner2 <- join(a, b, type = "inner", match = "first")
+ left1 <- join(a, b, type = "left")
+ left2 <- join(a, b, type = "left", match = "first")
+ right1 <- join(a, b, type = "right")
+ right2 <- join(a, b, type = "right", match = "first")
+
+ expect_equal(nrow(full1), 10)
+ expect_equal(nrow(full2), 10)
+ expect_equal(nrow(inner1), 0)
+ expect_equal(nrow(inner2), 0)
+ expect_equal(nrow(left1), 0)
+ expect_equal(nrow(left2), 0)
+ expect_equal(nrow(right1), 10)
+ expect_equal(nrow(right2), 10)
+})
+
+test_that("column orders are common, x only, y only", {
+ a <- data.frame(a = 1:3, b = 1:3)
+ b <- data.frame(a = 1:4, c = 1:4)
+
+ full1 <- join(a, b, type = "full")
+ full2 <- join(a, b, type = "full", match = "first")
+ inner1 <- join(a, b, type = "inner")
+ inner2 <- join(a, b, type = "inner", match = "first")
+ left1 <- join(a, b, type = "left")
+ left2 <- join(a, b, type = "left", match = "first")
+ right1 <- join(a, b, type = "right")
+ right2 <- join(a, b, type = "right", match = "first")
+
+ expect_equal(names(full1), c("a", "b", "c"))
+ expect_equal(names(full2), c("a", "b", "c"))
+ expect_equal(names(inner1), c("a", "b", "c"))
+ expect_equal(names(inner2), c("a", "b", "c"))
+ expect_equal(names(left1), c("a", "b", "c"))
+ expect_equal(names(left2), c("a", "b", "c"))
+ expect_equal(names(right1), c("a", "b", "c"))
+ expect_equal(names(right2), c("a", "b", "c"))
+
+})
diff --git a/inst/tests/test-list.r b/inst/tests/test-list.r
index cd9ea9c..c3edb63 100644
--- a/inst/tests/test-list.r
+++ b/inst/tests/test-list.r
@@ -8,7 +8,7 @@ test_that("data frame variables converted to list names", {
y <- ddply(esoph, .(alcgp, agegp), function(df) mean(df$ncases))
labs <- paste(y$alcgp, y$agegp, sep = ".")
expect_that(names(x), equals(labs))
-
+
})
test_that("list names are preserved", {
@@ -17,5 +17,5 @@ test_that("list names are preserved", {
names(a) <- letters[1:10]
expect_that(names(llply(a)), equals(letters[1:10]))
-
-})
\ No newline at end of file
+
+})
diff --git a/inst/tests/test-mapply.r b/inst/tests/test-mapply.r
index 26968cb..41d7276 100644
--- a/inst/tests/test-mapply.r
+++ b/inst/tests/test-mapply.r
@@ -11,7 +11,7 @@ test_that("Lack of names is preserved", {
test_that("No expansion creates single output dimension", {
a <- maply(cbind(1:20, 1:20), "+", .expand = FALSE)
expect_that(a, is_equivalent_to(1:20 * 2))
-
+
d <- mdply(cbind(1:20, 1:20), "+", .expand = FALSE)
expect_that(d$X1, equals(1:20))
expect_that(d$V1, equals(1:20 * 2))
@@ -21,9 +21,9 @@ test_that("Expand = TRUE creates multiple output dimensions", {
a <- maply(cbind(1:20, 1:20), "+", .expand = TRUE)
expect_that(dim(a), equals(c(20, 20)))
expect_that(diag(a), is_equivalent_to(1:20 * 2))
-
+
d <- mdply(cbind(1:20, 1:20), "+", .expand = TRUE)
expect_that(d$X1, equals(1:20))
expect_that(d$X2, equals(1:20))
expect_that(d$V1, equals(1:20 * 2))
-})
\ No newline at end of file
+})
diff --git a/inst/tests/test-mutate.r b/inst/tests/test-mutate.r
index b7a2fa1..c58bc82 100644
--- a/inst/tests/test-mutate.r
+++ b/inst/tests/test-mutate.r
@@ -4,7 +4,7 @@ test_that("mutate behaves the same as transform", {
m1 <- mutate(airquality, Ozone = -Ozone)
t1 <- mutate(airquality, Ozone = -Ozone)
expect_that(m1, equals(t1))
-
+
m2 <- mutate(airquality, new = -Ozone, Temp = (Temp-32)/1.8)
t2 <- mutate(airquality, new = -Ozone, Temp = (Temp-32)/1.8)
expect_that(m2, equals(t2))
@@ -13,6 +13,6 @@ test_that("mutate behaves the same as transform", {
test_that("columns can depend on previously created", {
m1 <- mutate(airquality, dm = Month + Day / 31, dm2 = 2 * dm)
dm2 <- with(airquality, 2 * (Month + Day / 31))
-
+
expect_that(m1$dm2, equals(dm2))
-})
\ No newline at end of file
+})
diff --git a/inst/tests/test-ninteraction.r b/inst/tests/test-ninteraction.r
index e4c9287..961935e 100644
--- a/inst/tests/test-ninteraction.r
+++ b/inst/tests/test-ninteraction.r
@@ -56,6 +56,16 @@ test_that("NAs are placed last", {
})
test_that("zero length input gives single number", {
- expect_that(id(character()), is_equivalent_to(1))
+ expect_that(id(character()), is_equivalent_to(integer()))
})
+test_that("zero column data frame gives seq_len(nrow)", {
+ df <- as.data.frame(matrix(nrow = 10, ncol = 0))
+ expect_equivalent(id(df), 1:10)
+})
+
+test_that("empty list doesn't affect n", {
+ out <- id(list(integer(), 1:5))
+ expect_equivalent(out, 1:5)
+ expect_equal(attr(out, "n"), 5)
+})
diff --git a/inst/tests/test-parallel.r b/inst/tests/test-parallel.r
new file mode 100644
index 0000000..793808c
--- /dev/null
+++ b/inst/tests/test-parallel.r
@@ -0,0 +1,29 @@
+context("Parallel")
+
+if (require("doMC", quietly = TRUE)) {
+ registerDoMC(2)
+
+ test_that("l_ply respects .parallel", {
+ expect_that(
+ l_ply(c(0.1, 0.1), Sys.sleep, .parallel = TRUE),
+ takes_less_than(0.15))
+ })
+
+ test_that("l_ply + .parallel complains about invalid arguments", {
+ expect_message(
+ l_ply(1:10, force, .parallel = TRUE, .print = TRUE),
+ "Printing disabled")
+ expect_message(
+ l_ply(1:10, force, .parallel = TRUE, .progress = "text"),
+ "Progress disabled")
+ })
+
+ test_that(".paropts passes options to foreach", {
+ combine <- function(a, b) NULL
+ x <- llply(1:10, identity, .parallel = TRUE,
+ .paropts = list(.combine = combine))
+ expect_equal(x, NULL)
+ })
+
+ registerDoMC(1)
+}
diff --git a/inst/tests/test-progress.r b/inst/tests/test-progress.r
index 1440bdf..b848dab 100644
--- a/inst/tests/test-progress.r
+++ b/inst/tests/test-progress.r
@@ -7,4 +7,4 @@ test_that("unknown progress bar raised warning, not error", {
llply(1:10, identity, .progress = "blah"),
throws_error("Cannot find progress bar")
)
-})
\ No newline at end of file
+})
diff --git a/inst/tests/test-quote.r b/inst/tests/test-quote.r
index 99ec37f..b7611e8 100644
--- a/inst/tests/test-quote.r
+++ b/inst/tests/test-quote.r
@@ -3,9 +3,9 @@ context("Quoting")
test_that("quoting captures current environment", {
x <- .(a, b, c)
expect_that(attr(x, "env"), is_identical_to(environment()))
-
+
x <- as.quoted(c("a", "b", "c"))
- expect_that(attr(x, "env"), is_identical_to(environment()))
+ expect_that(attr(x, "env"), is_identical_to(environment()))
})
test_that("evaluation takes place in correct environment", {
@@ -14,20 +14,20 @@ test_that("evaluation takes place in correct environment", {
a <- 1
.(a)
})
-
+
expect_that(eval.quoted(x)$a, equals(1))
-
+
df <- data.frame(x = 1:10)
x <- local({
a <- 1
.(x * a)
})
expect_that(eval.quoted(x, df)[[1]], equals(1:10))
-
+
})
test_that("names work for long expressions", {
q <- .(foo = barjasdfgjadhfgjsdhfgusdhfgusheguisdhguioahsrofasdgsdfgsdfg +
dfgafgasdfgsdfgsdfgsdfgsdfgsdfgsdfg)
- expect_that(names(q), equals("foo"))
-})
\ No newline at end of file
+ expect_that(names(q), equals("foo"))
+})
diff --git a/inst/tests/test-rbind.matrix.r b/inst/tests/test-rbind.matrix.r
index 65d54af..d8ad036 100755
--- a/inst/tests/test-rbind.matrix.r
+++ b/inst/tests/test-rbind.matrix.r
@@ -30,7 +30,7 @@ test_that ("additional columns are NA: should behave like rbind.fill for data.fr
ref <- as.matrix (rbind.fill (as.data.frame (a), as.data.frame (b)))
colnames (ref) <- seq_len(ncol(ref))
rownames (ref) <- NULL
-
+
expect_that(new, equals(ref))
})
@@ -44,7 +44,7 @@ test_that ("merge with column names: should behave like rbind.fill for data.fram
ref <- as.matrix (rbind.fill (as.data.frame (a), as.data.frame (b)))
rownames (ref) <- NULL
-
+
expect_that(new, equals(ref))
})
@@ -59,22 +59,22 @@ test_that ("merge with column names: should behave like rbind.fill for data.fram
ref <- as.matrix (rbind.fill (as.data.frame (a), as.data.frame (b)))
rownames (ref) <- NULL
-
+
expect_that(new, equals(ref))
})
test_that ("only 1 element: should behave like rbind.fill for data.frame",{
a <- matrix (1, 1)
colnames (a) <- letters [2]
-
+
b <- matrix (1:9, 3)
colnames (b) <- letters [c (1, 2, 4)]
-
+
new <- rbind.fill.matrix (a, b)
ref <- as.matrix (rbind.fill (as.data.frame (a), as.data.frame (b)))
rownames (ref) <- NULL
-
+
expect_that(new, equals(ref))
})
@@ -90,7 +90,7 @@ test_that ("character + numeric: should behave like rbind.fill for data.frame",{
as.data.frame (b, stringsAsFactors = FALSE))
ref <- as.matrix (sapply (ref, as.character)) # the last column is integer and would gain a second
# character with direct as.matrix
-
+
expect_that(new, equals(ref))
})
@@ -112,5 +112,28 @@ test_that ("vector: uses as.matrix",{
expect_that(new, equals (new))
})
+test_that("zero-row matrices", {
+ m1 <- matrix(nrow=0, ncol=2, dimnames=list(NULL, c("x", "y")))
+ m2 <- matrix(nrow=0, ncol=2, dimnames=list(NULL, c("y", "z")))
+ m3 <- matrix(c(1,2), nrow=2, ncol=1, dimnames=list(NULL, "y"))
+
+ ba <- rbind.fill.matrix(m1)
+ bb <- rbind.fill.matrix(m2, m3)
+ bc <- rbind.fill.matrix(m1, m2)
+
+ expect_equal(class(ba), "matrix")
+ expect_equal(nrow(ba), 0)
+ expect_true(all(colnames(ba) %in% c("x", "y")))
+
+ expect_equal(class(bb), "matrix")
+ expect_equal(nrow(bb), 2)
+ expect_true(all(names(bb) %in% c("x", "y", "z")))
+ expect_equal(bb[,"y"], m3[,"y"])
+ expect_equal(bb[,"z"], rep(as.numeric(NA), nrow(m3)))
+
+ expect_equal(class(bc), "matrix")
+ expect_equal(nrow(bc), 0)
+ expect_true(all(colnames(bc) %in% c("x", "y", "z")))
+})
+
-
diff --git a/inst/tests/test-rbind.r b/inst/tests/test-rbind.r
index 837ceaf..deb03fa 100644
--- a/inst/tests/test-rbind.r
+++ b/inst/tests/test-rbind.r
@@ -3,18 +3,18 @@ context("rbind.fill")
test_that("variable classes are preserved", {
a <- data.frame(a = factor(letters[1:3]), b = 1:3, c = date())
b <- data.frame(
- a = factor(letters[3:5]),
+ a = factor(letters[3:5]),
d = as.Date(c("2008-01-01", "2009-01-01", "2010-01-01")))
- b$e <- as.POSIXlt(as.Date(c("2008-01-01", "2009-01-01", "2010-01-01")))
+ b$e <- as.POSIXlt(as.Date(c("2008-01-01", "2009-01-01", "2010-01-01")))
b$f <- matrix (1:6, nrow = 3)
-
+
ab1 <- rbind.fill(a, b)[, letters[1:6]]
ab2 <- rbind.fill(b, a)[c(4:6, 1:3), letters[1:6]]
ab2$a <- factor(ab2$a, levels(ab1$a))
rownames(ab2) <- NULL
-
+
expect_that(ab1, equals(ab2))
- expect_that(unname(lapply(ab1, class)),
+ expect_that(unname(lapply(ab1, class)),
equals(list("factor", "integer", "factor", "Date", c("POSIXct", "POSIXt"),
"matrix")))
})
@@ -25,7 +25,7 @@ test_that("same as rbind for simple cases", {
b1 <- do.call("rbind", bplayer)
rownames(b1) <- NULL
b2 <- rbind.fill(bplayer)
-
+
expect_that(b1, equals(b2))
})
@@ -33,7 +33,7 @@ test_that("columns are in expected order", {
a <- data.frame(a = 1, b = 2, c = 3)
b <- data.frame(b = 2, d = 4, e = 4)
c <- data.frame(c = 1, b = 2, a = 1)
-
+
expect_that(names(rbind.fill(a, b)), equals(c("a", "b", "c", "d", "e")))
expect_that(names(rbind.fill(a, c)), equals(c("a", "b", "c")))
expect_that(names(rbind.fill(c, a)), equals(c("c", "b", "a")))
@@ -42,16 +42,16 @@ test_that("columns are in expected order", {
test_that("matrices are preserved", {
a <- data.frame(a = factor(letters[3:5]))
a$b <- matrix(1:6, nrow = 3)
-
+
expect_that(rbind.fill(a, a)$b, is_equivalent_to(rbind(a, a)$b))
-
+
b <- data.frame(c = 1:3)
ab1 <- rbind.fill(a, b) [ , letters[1:3]]
ab2 <- rbind.fill(b, a) [c(4:6, 1:3), letters[1:3]]
ab2$a <- factor(ab2$a, levels(ab1$a))
rownames(ab2) <- NULL
-
+
expect_that(ab1, equals(ab2))
})
@@ -72,7 +72,7 @@ test_that("time zones are preserved", {
for(tz in tzs) {
start <- data.frame(x = as.POSIXct(dstart, tz = tz))
end <- data.frame(x = as.POSIXct(dstop, tz = tz))
-
+
both <- rbind.fill(start, end)
expect_that(get_tz(both$x)[1], equals(tz), label = tz)
}
@@ -95,25 +95,65 @@ test_that("attributes are preserved", {
attr(d1$b, "bar") <- "bar"
attr(d2$b, "foo") <- "two"
attr(d2$b, "baz") <- "baz"
-
+
d12 <- rbind.fill(d1, d2)
d21 <- rbind.fill(d2, d1)
-
+
expect_that(attr(d12$b, "foo"), equals("one"))
expect_that(attr(d21$b, "foo"), equals("two"))
-
+
})
test_that("characters override factors", {
d1a <- data.frame(x=c('a','b'), y=1:2)
d2a <- data.frame(x=c('b','d'), z=1:2, stringsAsFactors=F)
-
+
d1b <- data.frame(x=c('a','b'), y=1:2, stringsAsFactors=F)
d2b <- data.frame(x=c('b','d'), z=1:2)
-
+
d3a <- rbind.fill(d1a,d2a)
d3b <- rbind.fill(d1b,d2b)
-
+
expect_that(d3a$x, is_a("character"))
expect_that(d3b$x, is_a("character"))
-})
\ No newline at end of file
+})
+
+test_that("zero row data frames ok", {
+ d1 <- data.frame(x = 1:2, y = 2:3)
+ d2 <- data.frame(y = 3:4, z = 5:6)
+
+ za <- rbind.fill(subset(d1, FALSE))
+ zb <- rbind.fill(d1, subset(d2, FALSE))
+ zc <- rbind.fill(subset(d1, FALSE), subset(d2, FALSE))
+
+ expect_equal(class(za), "data.frame")
+ expect_equal(nrow(za), 0)
+ expect_true(all(names(za) %in% c("x", "y")))
+
+ expect_equal(class(zb), "data.frame")
+ expect_equal(nrow(zb), 2)
+ expect_true(all(names(zb) %in% c("x", "y", "z")))
+ expect_equal(zb$y, d1$y)
+ expect_equal(zb$z, rep(as.numeric(NA), nrow(d1)))
+
+ expect_equal(class(zc), "data.frame")
+ expect_equal(nrow(zc), 0)
+ expect_true(all(names(zc) %in% c("x", "y", "z")))
+})
+
+test_that("zero col data frames ok", {
+ d1 <- data.frame(x = "a", y = 1L)
+ d2 <- data.frame(y = 2L, z = 3L)
+
+ za <- rbind.fill(d1[0, ], d2[0, ])
+ zb <- rbind.fill(d1[0, ], d2)
+ zc <- rbind.fill(d1, d2[0, ])
+
+ expect_equal(names(za), c("x", "y", "z"))
+ expect_equal(names(zb), c("x", "y", "z"))
+ expect_equal(names(zc), c("x", "y", "z"))
+
+ expect_equal(nrow(za), 0)
+ expect_equal(nrow(zb), 1)
+ expect_equal(nrow(zc), 1)
+})
diff --git a/inst/tests/test-rename.r b/inst/tests/test-rename.r
index 149866d..89a5047 100644
--- a/inst/tests/test-rename.r
+++ b/inst/tests/test-rename.r
@@ -2,21 +2,38 @@ context("Rename")
test_that("No match leaves names unchanged", {
x <- c(a = 1, b = 2, c = 3, 4)
- y <- rename(x, c(d = "e"))
-
+ y <- rename(x, c(d = "e"), warn_missing = FALSE)
+
expect_equal(names(x), names(y))
})
+test_that("Missing old values result in message", {
+ # This is the same rename operation as above, but should give a message
+ x <- c(a = 1, b = 2, c = 3, 4)
+ expect_message(rename(x, c(d = "e")))
+})
+
test_that("Single name match makes change", {
x <- c(a = 1, b = 2)
y <- rename(x, c(b = "c"))
-
+
expect_equal(names(y), c("a", "c"))
})
test_that("Multiple names correctly changed", {
x <- c(a = 1, b = 2, c = 3)
y <- rename(x, c("c" = "f", "b" = "e", "a" = "d"))
-
+
expect_equal(names(y), c("d", "e", "f"))
-})
\ No newline at end of file
+})
+
+test_that("Empty vectors and lists", {
+ expect_identical(rename(character(), c("c" = "f"), warn_missing = FALSE), character())
+ expect_identical(rename(list(), c("c" = "f"), warn_missing = FALSE), list())
+})
+
+test_that("Renaming lists", {
+ x <- list(a = 1, b = 2, c = 3)
+ y <- rename(x, c("c" = "f", "b" = "e", "a" = "d"))
+ expect_identical(y, list(d = 1, e = 2, f = 3))
+})
diff --git a/inst/tests/test-replicate.r b/inst/tests/test-replicate.r
index 840d738..ebee4ce 100644
--- a/inst/tests/test-replicate.r
+++ b/inst/tests/test-replicate.r
@@ -3,7 +3,7 @@ context("Replicate")
test_that("length of results are correct", {
a <- rlply(4, NULL)
b <- rlply(4, 1)
-
+
expect_equal(length(a), 4)
expect_equal(length(b), 4)
-})
\ No newline at end of file
+})
diff --git a/inst/tests/test-revalue.r b/inst/tests/test-revalue.r
new file mode 100644
index 0000000..b56c9bf
--- /dev/null
+++ b/inst/tests/test-revalue.r
@@ -0,0 +1,120 @@
+context("Replace values")
+
+
+# Character vector
+chr <- c("A2", "A1", "A3", "A1")
+# Factor: To complicate things, set levels in a different order
+fac <- factor(c("A1", "A2", "A3"), levels=c("A2", "A1", "A3"))
+# Numeric vector
+num <- c(4, 1, 5, 8)
+
+
+# test warn if any missing
+
+test_that("Empty mapping results in no change", {
+ expect_identical(mapvalues(chr, from = NULL, to = NULL), chr)
+ expect_identical(revalue(chr, NULL), chr)
+
+ expect_identical(mapvalues(fac, from = NULL, to = NULL), fac)
+ expect_identical(revalue(fac, NULL), fac)
+})
+
+test_that("Basic mapping works", {
+ newchr <- c("B2", "A1", "B3", "A1")
+ expect_identical(mapvalues(chr, c("A3", "A2"), c("B3", "B2")), newchr)
+ expect_identical(revalue(chr, c(A3="B3", A2="B2")), newchr)
+
+ newfac <- factor(c("A1", "B2", "B3"), levels=c("B2", "A1", "B3"))
+ expect_identical(mapvalues(fac, c("A3", "A2"), c("B3", "B2")), newfac)
+ expect_identical(revalue(fac, c(A3="B3", A2="B2")), newfac)
+
+ newnum <- c(40, 1, 5, 80)
+ expect_identical(mapvalues(num, c(4, 8), c(40, 80)), newnum)
+ # revalue doesn't work for numeric vectors
+})
+
+test_that("Mapping with repeated original values works - should use first instance, and give message", {
+ newchr <- c("A2", "B1", "A3", "B1")
+ expect_message(
+ expect_identical(mapvalues(chr, c("A1", "A1"), c("B1", "C1")), newchr))
+ expect_message(
+ expect_identical(revalue(chr, c(A1="B1", A1="C1")), newchr))
+
+
+ newfac <- factor(c("B1", "A2", "A3"), levels=c("A2", "B1", "A3"))
+ expect_message(
+ expect_identical(mapvalues(fac, c("A1", "A1"), c("B1", "C1")), newfac))
+ expect_message(
+ expect_identical(revalue(fac, c(A1="B1", A1="C1")), newfac))
+
+ newnum <- c(4, 1, 5, 80)
+ expect_message(
+ expect_identical(mapvalues(num, c(8, 8), c(80, 800)), newnum))
+})
+
+test_that("Mapping with repeated new value works (for factors, levels should be in earliest position)", {
+ newchr <- c("BX", "A1", "BX", "A1")
+ expect_identical(mapvalues(chr, c("A3", "A2"), c("BX", "BX")), newchr)
+ expect_identical(revalue(chr, c(A3="BX", A2="BX")), newchr)
+
+
+ newfac <- factor(c("A1", "BX", "BX"), levels=c("BX", "A1"))
+ expect_identical(revalue(fac, c(A3="BX", A2="BX")), newfac)
+
+ # Factors can have levels in different orders
+ newfac2 <- factor(c("BX", "A2", "BX"), levels=c("A2", "BX"))
+ expect_identical(revalue(fac, c(A3="BX", A1="BX")), newfac2)
+})
+
+test_that("Mapping with multiple matches works", {
+ newchr <- c("B2", "B1", "A3", "B1")
+ expect_identical(mapvalues(chr, c("A1", "A2"), c("B1", "B2")), newchr)
+ expect_identical(revalue(chr, c(A1="B1", A2="B2")), newchr)
+ # Not relevant for factors because they can't have two levels be the same
+})
+
+test_that("Mapping with non-matching original levels results in no change, and message", {
+ expect_message(
+ expect_identical(revalue(chr, c(A4="B4")), chr))
+ expect_message(
+ expect_identical(revalue(chr, c(A3="B3", A4="B4")), c("A2", "A1", "B3", "A1")))
+
+ expect_message(
+ expect_identical(revalue(fac, c(A4="B4")), fac))
+ expect_message(
+ expect_identical(revalue(fac, c(A3="B3", A4="B4")),
+ factor(c("A1", "A2", "B3"), levels=c("A2", "A1", "B3"))))
+})
+
+test_that("Swapping values works", {
+ newchr <- c("A3", "A1", "A2", "A1")
+ expect_identical(mapvalues(chr, c("A2", "A3"), c("A3", "A2")), newchr)
+ expect_identical(revalue(chr, c(A2="A3", A3="A2")), newchr)
+
+ newfac <- factor(c("A1", "A3", "A2"), levels=c("A3", "A1", "A2"))
+ expect_identical(mapvalues(fac, c("A2", "A3"), c("A3", "A2")), newfac)
+ expect_identical(revalue(fac, c(A2="A3", A3="A2")), newfac)
+})
+
+test_that("Mapping with ' ' and '$' in original and replacement works", {
+ chr2 <- c("A2", "A $1", "A3", "A $1")
+ expect_identical(revalue(chr2, c("A $1"="B $1")),
+ c("A2", "B $1", "A3", "B $1"))
+
+ fac2 <- factor(c("A $1", "A2", "A3"), levels=c("A2", "A $1", "A3"))
+ expect_identical(revalue(fac2, c("A $1"="B $1")),
+ factor(c("B $1", "A2", "A3"), levels=c("A2", "B $1", "A3")))
+})
+
+test_that("revalue and mapvalues only accept atomic vectors", {
+ expect_error(revalue(list(A=3), c("3"=30)))
+ expect_error(mapvalues(list(A=3), 3, 30))
+})
+
+test_that("revalue and mapvalues accept empty vectors and NULL", {
+ expect_identical(revalue(character(0), c("3"=30), warn_missing=FALSE), character(0))
+ expect_identical(mapvalues(character(0), 3, 30, warn_missing=FALSE), character(0))
+
+ expect_identical(revalue(NULL, c("3"=30), warn_missing=FALSE), NULL)
+ expect_identical(mapvalues(NULL, 3, 30, warn_missing=FALSE), NULL)
+})
diff --git a/inst/tests/test-simplify-df.r b/inst/tests/test-simplify-df.r
index c6704af..b4fb7b4 100644
--- a/inst/tests/test-simplify-df.r
+++ b/inst/tests/test-simplify-df.r
@@ -117,6 +117,18 @@ test_that("names captured from list", {
test_that("correct number of rows outputted", {
testdata <- data.frame(a = rep(letters[1:3], each = 5), b = rnorm(15))
res <- ddply(testdata, .(a), function(x) c(mean(x$b), sd(x$b)))
-
+
expect_that(nrow(res), equals(3))
})
+
+
+test_that("matrices converted to data frames", {
+ mat <- matrix(1:20, ncol = 4)
+ colnames(mat) <- letters[1:4]
+
+ li <- list(a = mat, b = mat)
+ df <- list_to_dataframe(li)
+
+ expect_equal(nrow(df), 2 * nrow(mat))
+ expect_equal(names(df), c(".id", "a", "b", "c", "d"))
+})
diff --git a/inst/tests/test-split-data-frame.r b/inst/tests/test-split-data-frame.r
index aabda04..a767bdc 100644
--- a/inst/tests/test-split-data-frame.r
+++ b/inst/tests/test-split-data-frame.r
@@ -1,8 +1,22 @@
context("Split data frame")
-df <- data.frame(x = factor(1:10), y = letters[1:10])
test_that("correct order is used", {
+ df <- data.frame(x = factor(1:10), y = letters[1:10])
+
expect_that(ddply(df, .(x), .drop = FALSE), equals(df))
expect_that(ddply(df, .(x), .drop = TRUE), equals(df))
-})
\ No newline at end of file
+})
+
+test_that("factor levels are preserved", {
+ df <- data.frame(a = factor(1:4, levels = 1:5), x = runif(4))
+
+ out1 <- ddply(df, "a", strip_splits, .drop = TRUE)
+ out2 <- ddply(df, "a", strip_splits, .drop = FALSE)
+
+ expect_is(out1$a, "factor")
+ expect_is(out2$a, "factor")
+
+ expect_equal(levels(out1$a), levels(df$a))
+ expect_equal(levels(out2$a), levels(df$a))
+})
diff --git a/inst/tests/test-split-indices.r b/inst/tests/test-split-indices.r
new file mode 100644
index 0000000..04f8f69
--- /dev/null
+++ b/inst/tests/test-split-indices.r
@@ -0,0 +1,7 @@
+context("Split indices")
+
+test_that("Error if n too small", {
+ expect_error(split_indices(1:10, 5),
+ "n smaller than largest index")
+
+})
diff --git a/inst/tests/test-split-labels.r b/inst/tests/test-split-labels.r
index bd212a7..96f532e 100644
--- a/inst/tests/test-split-labels.r
+++ b/inst/tests/test-split-labels.r
@@ -2,10 +2,10 @@ context("Split labels")
test_that("Empty levels preserved", {
df <- data.frame(fac1 = letters[1:4], fac2 = LETTERS[1:4])
-
+
a <- split_labels(df, FALSE)
b <- split_labels(df[1, ], FALSE)
-
+
expect_that(a, equals(b))
-
+
})
diff --git a/inst/tests/test-summarise.r b/inst/tests/test-summarise.r
index 40fcb00..2dd5b62 100644
--- a/inst/tests/test-summarise.r
+++ b/inst/tests/test-summarise.r
@@ -12,5 +12,5 @@ test_that("summarise creates correct names", {
# df <- summarise(mtcars, mean(cyl), mean(vs))
# expect_that(names(df), equals("x", "y"))
-
-})
\ No newline at end of file
+
+})
diff --git a/man-roxygen/-a.r b/man-roxygen/-a.r
deleted file mode 100644
index 3d5f75a..0000000
--- a/man-roxygen/-a.r
+++ /dev/null
@@ -1,10 +0,0 @@
-#' @section Output:
-#' If there are no results, then this function will return a vector of
-#' length 0 (\code{vector()}).
-#'
-#' @return if results are atomic with same type and dimensionality, a
-#' vector, matrix or array; otherwise, a list-array (a list with
-#' dimensions)
-#' @param .drop should extra dimensions of length 1 in the output be
-#' dropped, simplifying the output. Defaults to \code{TRUE}
-#' @family array output
diff --git a/man-roxygen/-d.r b/man-roxygen/-d.r
deleted file mode 100644
index c4e0bea..0000000
--- a/man-roxygen/-d.r
+++ /dev/null
@@ -1,12 +0,0 @@
-#' @section Output:
-#' The most unambiguous behaviour is achieved when \code{.fun} returns a
-#' data frame - in that case pieces will be combined with
-#' \code{\link{rbind.fill}}. If \code{.fun} returns an atomic vector of
-#' fixed length, it will be \code{rbind}ed together and converted to a data
-#' frame. Any other values will result in an error.
-#'
-#' If there are no results, then this function will return a data
-#' frame with zero rows and columns (\code{data.frame()}).
-#'
-#' @return A data frame, as described in the output section.
-#' @family data frame output
diff --git a/man-roxygen/-l.r b/man-roxygen/-l.r
deleted file mode 100644
index b1e80d3..0000000
--- a/man-roxygen/-l.r
+++ /dev/null
@@ -1,5 +0,0 @@
-#' @section Output: If there are no results, then this function will return
-#' a list of length 0 (\code{list()}).
-#'
-#' @return list of results
-#' @family list output
diff --git a/man-roxygen/a-.r b/man-roxygen/a-.r
deleted file mode 100644
index 71316fb..0000000
--- a/man-roxygen/a-.r
+++ /dev/null
@@ -1,14 +0,0 @@
-#' @section Input: This function splits matrices, arrays and data frames by
-#' dimensions
-#'
-#' @param .data matrix, array or data frame to be processed
-#' @param .margins a vector giving the subscripts to split up \code{data} by.
-#' 1 splits up by rows, 2 by columns and c(1,2) by rows and columns, and so
-#' on for higher dimensions
-#' @family array input
-#' @param .expand if \code{.data} is a data frame, should output be 1d (expand
-#' = FALSE), with an element for each row; or nd (expand = TRUE), with a
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' dimension for each variable.
-
diff --git a/man-roxygen/d-.r b/man-roxygen/d-.r
deleted file mode 100644
index 380328a..0000000
--- a/man-roxygen/d-.r
+++ /dev/null
@@ -1,10 +0,0 @@
-#' @section Input: This function splits data frames by variables.
-#'
-#' @param .data data frame to be processed
-#' @param .variables variables to split data frame by, as quoted
-#' variables, a formula or character vector
-#' @param .drop should combinations of variables that do not appear in the
-#' input data be preserved (FALSE) or dropped (TRUE, default)
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' @family data frame input
diff --git a/man-roxygen/l-.r b/man-roxygen/l-.r
deleted file mode 100644
index 0ee3ab1..0000000
--- a/man-roxygen/l-.r
+++ /dev/null
@@ -1,7 +0,0 @@
-#' @section Input: This function splits lists by elements and combines the
-#' result into a data frame.
-#'
-#' @param .data list to be processed
-#' @param .parallel if \code{TRUE}, apply function in parallel, using parallel
-#' backend provided by foreach
-#' @family list input
diff --git a/man-roxygen/ply.r b/man-roxygen/ply.r
deleted file mode 100644
index dbb2c34..0000000
--- a/man-roxygen/ply.r
+++ /dev/null
@@ -1,8 +0,0 @@
-#' @param .fun function to apply to each piece
-#' @param ... other arguments passed on to \code{.fun}
-#' @param .progress name of the progress bar to use, see
-#' \code{\link{create_progress_bar}}
-#' @keywords manip
-#' @references Hadley Wickham (2011). The Split-Apply-Combine Strategy
-#' for Data Analysis. Journal of Statistical Software, 40(1), 1-29.
-#' \url{http://www.jstatsoft.org/v40/i01/}.
diff --git a/man/a_ply.Rd b/man/a_ply.Rd
index ec7160d..e9e1e8e 100644
--- a/man/a_ply.Rd
+++ b/man/a_ply.Rd
@@ -3,9 +3,32 @@
\title{Split array, apply function, and discard results.}
\usage{
a_ply(.data, .margins, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .print = FALSE)
+ .progress = "none", .inform = FALSE, .print = FALSE,
+ .parallel = FALSE, .paropts = NULL)
}
\arguments{
+ \item{.fun}{function to apply to each piece}
+
+ \item{...}{other arguments passed on to \code{.fun}}
+
+ \item{.progress}{name of the progress bar to use, see
+ \code{\link{create_progress_bar}}}
+
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{matrix, array or data frame to be processed}
\item{.margins}{a vector giving the subscripts to split
@@ -13,39 +36,42 @@
c(1,2) by rows and columns, and so on for higher
dimensions}
- \item{.fun}{function to apply to each piece}
-
- \item{...}{other arguments passed on to \code{.fun}}
-
\item{.expand}{if \code{.data} is a data frame, should
output be 1d (expand = FALSE), with an element for each
row; or nd (expand = TRUE), with a dimension for each
variable.}
- \item{.progress}{name of the progress bar to use, see
- \code{\link{create_progress_bar}}}
-
\item{.print}{automatically print each result? (default:
\code{FALSE})}
}
+\value{
+ Nothing
+}
\description{
For each slice of an array, apply function and discard
results
}
-\details{
- All plyr functions use the same split-apply-combine
- strategy: they split the input into simpler pieces, apply
- \code{.fun} to each piece, and then combine the pieces
- into a single data structure. This function splits
- matrices, arrays and data frames by dimensions and
- discards the output. This is useful for functions that
- you are calling purely for their side effects like
- display plots and saving output.
+\section{Input}{
+ This function splits matrices, arrays and data frames by
+ dimensions
+}
+
+\section{Output}{
+ All output is discarded. This is useful for functions
+ that you are calling purely for their side effects like
+ displaying plots or saving output.
}
\references{
Hadley Wickham (2011). The Split-Apply-Combine Strategy
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other array input: \code{\link{aaply}},
+ \code{\link{adply}}, \code{\link{alply}}
+
+ Other no output: \code{\link{d_ply}},
+ \code{\link{l_ply}}, \code{\link{m_ply}}
+}
\keyword{manip}
diff --git a/man/aaply.Rd b/man/aaply.Rd
index ad5660c..4ce52ff 100644
--- a/man/aaply.Rd
+++ b/man/aaply.Rd
@@ -3,7 +3,8 @@
\title{Split array, apply function, and return results in an array.}
\usage{
aaply(.data, .margins, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .drop = TRUE, .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .drop = TRUE,
+ .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,6 +14,21 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{matrix, array or data frame to be processed}
\item{.margins}{a vector giving the subscripts to split
@@ -22,11 +38,8 @@
\item{.expand}{if \code{.data} is a data frame, should
output be 1d (expand = FALSE), with an element for each
- row; or nd (expand = TRUE), with a}
-
- \item{.parallel}{if \code{TRUE}, apply function in
- parallel, using parallel backend provided by foreach
- dimension for each variable.}
+ row; or nd (expand = TRUE), with a dimension for each
+ variable.}
\item{.drop}{should extra dimensions of length 1 in the
output be dropped, simplifying the output. Defaults to
@@ -39,13 +52,16 @@
}
\description{
For each slice of an array, apply function, keeping
- results as an array. This function is very similar to
- \code{\link{apply}}, except that it will always return an
- array, and when the function returns >1 d data
- structures, those dimensions are added on to the highest
- dimensions, rather than the lowest dimensions. This
- makes \code{aaply} idempotent, so that \code{apply(input,
- X, identity)} is equivalent to \code{aperm(input, X)}.
+ results as an array.
+}
+\details{
+ This function is very similar to \code{\link{apply}},
+ except that it will always return an array, and when the
+ function returns >1 d data structures, those dimensions
+ are added on to the highest dimensions, rather than the
+ lowest dimensions. This makes \code{aaply} idempotent,
+ so that \code{aaply(input, X, identity)} is equivalent to
+ \code{aperm(input, X)}.
}
\section{Input}{
This function splits matrices, arrays and data frames by
@@ -81,11 +97,11 @@ aaply(ozone, 1:2, diff)
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
\seealso{
- Other array input: \code{\link{adply}},
- \code{\link{alply}}
+ Other array input: \code{\link{a_ply}},
+ \code{\link{adply}}, \code{\link{alply}}
Other array output: \code{\link{daply}},
- \code{\link{laply}}
+ \code{\link{laply}}, \code{\link{maply}}
}
\keyword{manip}
diff --git a/man/adply.Rd b/man/adply.Rd
index ee2d4dd..434f94d 100644
--- a/man/adply.Rd
+++ b/man/adply.Rd
@@ -3,7 +3,8 @@
\title{Split array, apply function, and return results in a data frame.}
\usage{
adply(.data, .margins, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .parallel = FALSE,
+ .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,6 +14,21 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{matrix, array or data frame to be processed}
\item{.margins}{a vector giving the subscripts to split
@@ -22,11 +38,8 @@
\item{.expand}{if \code{.data} is a data frame, should
output be 1d (expand = FALSE), with an element for each
- row; or nd (expand = TRUE), with a}
-
- \item{.parallel}{if \code{TRUE}, apply function in
- parallel, using parallel backend provided by foreach
- dimension for each variable.}
+ row; or nd (expand = TRUE), with a dimension for each
+ variable.}
}
\value{
A data frame, as described in the output section.
@@ -58,11 +71,11 @@
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
\seealso{
- Other array input: \code{\link{aaply}},
- \code{\link{alply}}
+ Other array input: \code{\link{a_ply}},
+ \code{\link{aaply}}, \code{\link{alply}}
Other data frame output: \code{\link{ddply}},
- \code{\link{ldply}}
+ \code{\link{ldply}}, \code{\link{mdply}}
}
\keyword{manip}
diff --git a/man/alply.Rd b/man/alply.Rd
index 4b759e8..3afdfa7 100644
--- a/man/alply.Rd
+++ b/man/alply.Rd
@@ -3,7 +3,8 @@
\title{Split array, apply function, and return results in a list.}
\usage{
alply(.data, .margins, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .parallel = FALSE,
+ .paropts = NULL, .dims = FALSE)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,6 +14,21 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{matrix, array or data frame to be processed}
\item{.margins}{a vector giving the subscripts to split
@@ -22,20 +38,27 @@
\item{.expand}{if \code{.data} is a data frame, should
output be 1d (expand = FALSE), with an element for each
- row; or nd (expand = TRUE), with a}
+ row; or nd (expand = TRUE), with a dimension for each
+ variable.}
- \item{.parallel}{if \code{TRUE}, apply function in
- parallel, using parallel backend provided by foreach
- dimension for each variable.}
+ \item{.dims}{if \code{TRUE}, copy over dimensions and
+ names from input.}
}
\value{
list of results
}
\description{
For each slice of an array, apply function then combine
- results into a list. \code{alply} is somewhat similar to
- \code{\link{apply}} for cases where the results are not
- atomic.
+ results into a list.
+}
+\details{
+ The list will have "dims" and "dimnames" corresponding to
+ the margins given. For instance \code{alply(x, c(3,2),
+ ...)} where \code{x} has dims \code{c(4,3,2)} will give a
+ result with dims \code{c(2,3)}.
+
+ \code{alply} is somewhat similar to \code{\link{apply}}
+ for cases where the results are not atomic.
}
\section{Input}{
This function splits matrices, arrays and data frames by
@@ -56,11 +79,11 @@ alply(ozone, 3, function(x) table(round(x)))
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
\seealso{
- Other array input: \code{\link{aaply}},
- \code{\link{adply}}
+ Other array input: \code{\link{a_ply}},
+ \code{\link{aaply}}, \code{\link{adply}}
Other list output: \code{\link{dlply}},
- \code{\link{llply}}
+ \code{\link{llply}}, \code{\link{mlply}}
}
\keyword{manip}
diff --git a/man/arrange.Rd b/man/arrange.Rd
index 3c2adb5..4418af4 100644
--- a/man/arrange.Rd
+++ b/man/arrange.Rd
@@ -18,9 +18,20 @@
saves a lot of typing!
}
\examples{
+# sort mtcars data by cylinder and displacement
mtcars[with(mtcars, order(cyl, disp)), ]
+# Same result using arrange: no need to use with(), as the context is implicit
+# NOTE: plyr functions do NOT preserve row.names
arrange(mtcars, cyl, disp)
-arrange(mtcars, cyl, desc(disp))
+# Let's keep the row.names in this example
+myCars = cbind(vehicle=row.names(mtcars), mtcars)
+arrange(myCars, cyl, disp)
+# Sort with displacement in descending order
+arrange(myCars, cyl, desc(disp))
+}
+\seealso{
+ \code{\link{order}} for sorting function in the base
+ package
}
\keyword{manip}
diff --git a/man/colwise.Rd b/man/colwise.Rd
index 82c9c71..7e71b61 100644
--- a/man/colwise.Rd
+++ b/man/colwise.Rd
@@ -4,7 +4,11 @@
\alias{numcolwise}
\title{Column-wise function.}
\usage{
- colwise(.fun, .cols = true)
+ colwise(.fun, .cols = true, ...)
+
+ catcolwise(.fun, ...)
+
+ numcolwise(.fun, ...)
}
\arguments{
\item{.fun}{function}
@@ -12,6 +16,8 @@
\item{.cols}{either a function that tests columns for
inclusion, or a quoted object giving which columns to
process}
+
+ \item{...}{other arguments passed on to \code{.fun}}
}
\description{
Turn a function that operates on a vector into a function
@@ -52,5 +58,10 @@ ddply(baseball, .(year), colwise(nmissing, is.discrete))
# provided:
ddply(baseball, .(year), numcolwise(nmissing))
ddply(baseball, .(year), catcolwise(nmissing))
+
+# You can supply additional arguments to either colwise, or the function
+# it generates:
+numcolwise(mean)(baseball, na.rm = TRUE)
+numcolwise(mean, na.rm = TRUE)(baseball)
}
diff --git a/man/count.Rd b/man/count.Rd
index ca3bd63..f6018fa 100644
--- a/man/count.Rd
+++ b/man/count.Rd
@@ -32,13 +32,23 @@
converting them to characters/factors.
}
\examples{
-count(baseball, "id")
-count(baseball, "id", "g")
+# Count of each value of "id" in the first 100 cases
+count(baseball[1:100,], vars = "id")
+# Count of ids, weighted by their "g" loading
+count(baseball[1:100,], vars = "id", wt_var = "g")
count(baseball, "id", "ab")
count(baseball, "lg")
+# How many stints do players do?
count(baseball, "stint")
-count(count(baseball, c("id", "year")), "id", "freq")
+# Count of times each player appeared in each of the years they played
+count(baseball[1:100,], c("id", "year"))
+# Count of counts
+count(count(baseball[1:100,], c("id", "year")), "id", "freq")
count(count(baseball, c("id", "year")), "freq")
}
+\seealso{
+ \code{\link{table}} for related functionality in the base
+ package
+}
\keyword{manip}
diff --git a/man/create_progress_bar.Rd b/man/create_progress_bar.Rd
index c2eaf9e..17a1806 100644
--- a/man/create_progress_bar.Rd
+++ b/man/create_progress_bar.Rd
@@ -37,10 +37,16 @@
over its apperance. See the examples.
}
\examples{
+# No progress bar
l_ply(1:100, identity, .progress = "none")
+\dontrun{
+# Use the Tcl/Tk interface
l_ply(1:100, identity, .progress = "tk")
+}
+# Text-based progress (|======|)
l_ply(1:100, identity, .progress = "text")
-l_ply(1:100, identity, .progress = progress_text(char = "-"))
+# Choose a progress character, run a length of time you can see
+l_ply(1:10000, identity, .progress = progress_text(char = "."))
}
\seealso{
\code{\link{progress_none}}, \code{\link{progress_text}},
diff --git a/man/d_ply.Rd b/man/d_ply.Rd
index 7e0b472..aa078e8 100644
--- a/man/d_ply.Rd
+++ b/man/d_ply.Rd
@@ -3,14 +3,10 @@
\title{Split data frame, apply function, and discard results.}
\usage{
d_ply(.data, .variables, .fun = NULL, ...,
- .progress = "none", .print = FALSE)
+ .progress = "none", .inform = FALSE, .drop = TRUE,
+ .print = FALSE, .parallel = FALSE, .paropts = NULL)
}
\arguments{
- \item{.data}{data frame to be processed}
-
- \item{.variables}{variables to split data frame by, as
- quoted variables, a formula or character vector}
-
\item{.fun}{function to apply to each piece}
\item{...}{other arguments passed on to \code{.fun}}
@@ -18,26 +14,61 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{data frame to be processed}
+
+ \item{.variables}{variables to split data frame by, as
+ \code{\link{as.quoted}} variables, a formula or character
+ vector}
+
+ \item{.drop}{should combinations of variables that do not
+ appear in the input data be preserved (FALSE) or dropped
+ (TRUE, default)}
+
\item{.print}{automatically print each result? (default:
\code{FALSE})}
}
+\value{
+ Nothing
+}
\description{
For each subset of a data frame, apply function and
discard results
}
-\details{
- All plyr functions use the same split-apply-combine
- strategy: they split the input into simpler pieces, apply
- \code{.fun} to each piece, and then combine the pieces
- into a single data structure. This function splits data
- frames by variable and discards the output. This is
- useful for functions that you are calling purely for
- their side effects like display plots and saving output.
+\section{Input}{
+ This function splits data frames by variables.
+}
+
+\section{Output}{
+ All output is discarded. This is useful for functions
+ that you are calling purely for their side effects like
+ displaying plots or saving output.
}
\references{
Hadley Wickham (2011). The Split-Apply-Combine Strategy
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other data frame input: \code{\link{daply}},
+ \code{\link{ddply}}, \code{\link{dlply}}
+
+ Other no output: \code{\link{a_ply}},
+ \code{\link{l_ply}}, \code{\link{m_ply}}
+}
\keyword{manip}
diff --git a/man/daply.Rd b/man/daply.Rd
index b78b809..393d11d 100644
--- a/man/daply.Rd
+++ b/man/daply.Rd
@@ -3,8 +3,8 @@
\title{Split data frame, apply function, and return results in an array.}
\usage{
daply(.data, .variables, .fun = NULL, ...,
- .progress = "none", .drop_i = TRUE, .drop_o = TRUE,
- .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .drop_i = TRUE,
+ .drop_o = TRUE, .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -14,6 +14,21 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{data frame to be processed}
\item{.variables}{variables to split data frame by, as
@@ -23,9 +38,6 @@
not appear in the input data be preserved (FALSE) or
dropped (TRUE, default)}
- \item{.parallel}{if \code{TRUE}, apply function in
- parallel, using parallel backend provided by foreach}
-
\item{.drop_o}{should extra dimensions of length 1 in the
output be dropped, simplifying the output. Defaults to
\code{TRUE}}
@@ -66,10 +78,10 @@ daply(baseball, .(year), function(df) colwise(mean)(df[, 6:9]))
}
\seealso{
Other array output: \code{\link{aaply}},
- \code{\link{laply}}
+ \code{\link{laply}}, \code{\link{maply}}
- Other data frame input: \code{\link{ddply}},
- \code{\link{dlply}}
+ Other data frame input: \code{\link{d_ply}},
+ \code{\link{ddply}}, \code{\link{dlply}}
}
\keyword{manip}
diff --git a/man/ddply.Rd b/man/ddply.Rd
index b566bef..c89b5d8 100644
--- a/man/ddply.Rd
+++ b/man/ddply.Rd
@@ -3,7 +3,8 @@
\title{Split data frame, apply function, and return results in a data frame.}
\usage{
ddply(.data, .variables, .fun = NULL, ...,
- .progress = "none", .drop = TRUE, .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .drop = TRUE,
+ .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,17 +14,30 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{data frame to be processed}
\item{.variables}{variables to split data frame by, as
- quoted variables, a formula or character vector}
+ \code{\link{as.quoted}} variables, a formula or character
+ vector}
\item{.drop}{should combinations of variables that do not
appear in the input data be preserved (FALSE) or dropped
(TRUE, default)}
-
- \item{.parallel}{if \code{TRUE}, apply function in
- parallel, using parallel backend provided by foreach}
}
\value{
A data frame, as described in the output section.
@@ -49,14 +63,34 @@
(\code{data.frame()}).
}
\examples{
-ddply(baseball, .(year), "nrow")
+# Summarize a dataset by two variables
+require(plyr)
+dfx <- data.frame(
+ group = c(rep('A', 8), rep('B', 15), rep('C', 6)),
+ sex = sample(c("M", "F"), size = 29, replace = TRUE),
+ age = runif(n = 29, min = 18, max = 54)
+)
+
+# Note the use of the '.' function to allow
+# group and sex to be used without quoting
+ddply(dfx, .(group, sex), summarize,
+ mean = round(mean(age), 2),
+ sd = round(sd(age), 2))
+
+# An example using a formula for .variables
+ddply(baseball[1:100,], ~ year, nrow)
+# Applying two functions; nrow and ncol
ddply(baseball, .(lg), c("nrow", "ncol"))
+# Calculate mean runs batted in for each year
rbi <- ddply(baseball, .(year), summarise,
mean_rbi = mean(rbi, na.rm = TRUE))
-with(rbi, plot(year, mean_rbi, type="l"))
+# Plot a line chart of the result
+plot(mean_rbi ~ year, type = "l", data = rbi)
-base2 <- ddply(baseball, .(id), transform,
+# make new variable career_year based on the
+# start year for each player (id)
+base2 <- ddply(baseball, .(id), mutate,
career_year = year - min(year) + 1
)
}
@@ -66,11 +100,14 @@ base2 <- ddply(baseball, .(id), transform,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
\seealso{
- Other data frame input: \code{\link{daply}},
- \code{\link{dlply}}
+ \code{\link{tapply}} for similar functionality in the
+ base package
+
+ Other data frame input: \code{\link{d_ply}},
+ \code{\link{daply}}, \code{\link{dlply}}
Other data frame output: \code{\link{adply}},
- \code{\link{ldply}}
+ \code{\link{ldply}}, \code{\link{mdply}}
}
\keyword{manip}
diff --git a/man/dlply.Rd b/man/dlply.Rd
index f49d584..6fcaff7 100644
--- a/man/dlply.Rd
+++ b/man/dlply.Rd
@@ -3,7 +3,8 @@
\title{Split data frame, apply function, and return results in a list.}
\usage{
dlply(.data, .variables, .fun = NULL, ...,
- .progress = "none", .drop = TRUE, .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .drop = TRUE,
+ .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,17 +14,30 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
\item{.data}{data frame to be processed}
\item{.variables}{variables to split data frame by, as
- quoted variables, a formula or character vector}
+ \code{\link{as.quoted}} variables, a formula or character
+ vector}
\item{.drop}{should combinations of variables that do not
appear in the input data be preserved (FALSE) or dropped
(TRUE, default)}
-
- \item{.parallel}{if \code{TRUE}, apply function in
- parallel, using parallel backend provided by foreach}
}
\value{
list of results
@@ -60,11 +74,11 @@ hist(qual)
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
\seealso{
- Other data frame input: \code{\link{daply}},
- \code{\link{ddply}}
+ Other data frame input: \code{\link{d_ply}},
+ \code{\link{daply}}, \code{\link{ddply}}
Other list output: \code{\link{alply}},
- \code{\link{llply}}
+ \code{\link{llply}}, \code{\link{mlply}}
}
\keyword{manip}
diff --git a/man/each.Rd b/man/each.Rd
index 1587063..6568e6b 100644
--- a/man/each.Rd
+++ b/man/each.Rd
@@ -10,14 +10,26 @@
}
\description{
Combine multiple functions into a single function
- returning a named vector of outputs.
+ returning a named vector of outputs. Note: you cannot
+ supply additional parameters for the summary functions
}
\examples{
+# Call min() and max() on the vector 1:10
each(min, max)(1:10)
+# This syntax looks a little different. It is shorthand for the
+# the following:
+f<- each(min, max)
+f(1:10)
+# Three equivalent ways to call min() and max() on the vector 1:10
each("min", "max")(1:10)
each(c("min", "max"))(1:10)
each(c(min, max))(1:10)
+# Call length(), min() and max() on a random normal vector
each(length, mean, var)(rnorm(100))
}
+\seealso{
+ \code{\link{summarise}} for applying summary functions to
+ data
+}
\keyword{manip}
diff --git a/man/here.Rd b/man/here.Rd
new file mode 100644
index 0000000..d94d11a
--- /dev/null
+++ b/man/here.Rd
@@ -0,0 +1,33 @@
+\name{here}
+\alias{here}
+\title{Capture current evaluation context.}
+\usage{
+ here(f)
+}
+\arguments{
+ \item{f}{a function that does non-standard evaluation}
+}
+\description{
+ This function captures the current context, making it
+ easier to use \code{**ply} with functions that do special
+ evaluation and need access to the environment where ddply
+ was called from.
+}
+\examples{
+df <- data.frame(a = rep(c("a","b"), each = 10), b = 1:20)
+f1 <- function(label) {
+ ddply(df, "a", mutate, label = paste(label, b))
+}
+\dontrun{f1("name:")}
+# Doesn't work because mutate can't find label in the current scope
+
+f2 <- function(label) {
+ ddply(df, "a", here(mutate), label = paste(label, b))
+}
+f2("name:")
+# Works :)
+}
+\author{
+ Peter Meilstrup, \url{https://github.com/crowding}
+}
+
diff --git a/man/join.Rd b/man/join.Rd
index d98a35a..dd9ec75 100644
--- a/man/join.Rd
+++ b/man/join.Rd
@@ -2,22 +2,24 @@
\alias{join}
\title{Join two data frames together.}
\usage{
- join(x, y, by = intersect(names(x), names(y)),
- type = "left", match = "all")
+ join(x, y, by = NULL, type = "left", match = "all")
}
\arguments{
\item{x}{data frame}
\item{y}{data frame}
- \item{by}{character vector of variable names to join by}
+ \item{by}{character vector of variable names to join by.
+ If omitted, will match on all common variables.}
\item{type}{type of join: left (default), right, inner or
full. See details for more information.}
\item{match}{how should duplicate ids be matched? Either
match just the \code{"first"} matching row, or match
- \code{"all"} matching rows.}
+ \code{"all"} matching rows. Defaults to \code{"all"} for
+ compatibility with merge, but \code{"first"} is
+ significantly faster.}
}
\description{
Join, like merge, is designed for the types of problems
diff --git a/man/join_all.Rd b/man/join_all.Rd
new file mode 100644
index 0000000..203ad69
--- /dev/null
+++ b/man/join_all.Rd
@@ -0,0 +1,34 @@
+\name{join_all}
+\alias{join_all}
+\title{Recursively join a list of data frames.}
+\usage{
+ join_all(dfs, by = NULL, type = "left", match = "all")
+}
+\arguments{
+ \item{dfs}{A list of data frames.}
+
+ \item{by}{character vector of variable names to join by.
+ If omitted, will match on all common variables.}
+
+ \item{type}{type of join: left (default), right, inner or
+ full. See details for more information.}
+
+ \item{match}{how should duplicate ids be matched? Either
+ match just the \code{"first"} matching row, or match
+ \code{"all"} matching rows. Defaults to \code{"all"} for
+ compatibility with merge, but \code{"first"} is
+ significantly faster.}
+}
+\description{
+ Recursively join a list of data frames.
+}
+\examples{
+dfs <- list(
+ a = data.frame(x = 1:10, a = runif(10)),
+ b = data.frame(x = 1:10, b = runif(10)),
+ c = data.frame(x = 1:10, c = runif(10))
+)
+join_all(dfs)
+join_all(dfs, "x")
+}
+
diff --git a/man/l_ply.Rd b/man/l_ply.Rd
index 09ba3be..0f58181 100644
--- a/man/l_ply.Rd
+++ b/man/l_ply.Rd
@@ -3,11 +3,10 @@
\title{Split list, apply function, and discard results.}
\usage{
l_ply(.data, .fun = NULL, ..., .progress = "none",
- .print = FALSE)
+ .inform = FALSE, .print = FALSE, .parallel = FALSE,
+ .paropts = NULL)
}
\arguments{
- \item{.data}{list to be processed}
-
\item{.fun}{function to apply to each piece}
\item{...}{other arguments passed on to \code{.fun}}
@@ -15,26 +14,53 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{list to be processed}
+
\item{.print}{automatically print each result? (default:
\code{FALSE})}
}
+\value{
+ Nothing
+}
\description{
For each element of a list, apply function and discard
results
}
-\details{
- All plyr functions use the same split-apply-combine
- strategy: they split the input into simpler pieces, apply
- \code{.fun} to each piece, and then combine the pieces
- into a single data structure. This function splits lists
- by elements and discards the output. This is useful for
- functions that you are calling purely for their side
- effects like display plots and saving output.
+\section{Input}{
+ This function splits lists by elements.
+}
+
+\section{Output}{
+ All output is discarded. This is useful for functions
+ that you are calling purely for their side effects like
+ displaying plots or saving output.
}
\references{
Hadley Wickham (2011). The Split-Apply-Combine Strategy
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other list input: \code{\link{laply}},
+ \code{\link{ldply}}, \code{\link{llply}}
+
+ Other no output: \code{\link{a_ply}},
+ \code{\link{d_ply}}, \code{\link{m_ply}}
+}
\keyword{manip}
diff --git a/man/laply.Rd b/man/laply.Rd
index 5a4eef7..2359be1 100644
--- a/man/laply.Rd
+++ b/man/laply.Rd
@@ -3,7 +3,8 @@
\title{Split list, apply function, and return results in an array.}
\usage{
laply(.data, .fun = NULL, ..., .progress = "none",
- .drop = TRUE, .parallel = FALSE)
+ .inform = FALSE, .drop = TRUE, .parallel = FALSE,
+ .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,11 +14,23 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
- \item{.data}{list to be processed}
-
\item{.parallel}{if \code{TRUE}, apply function in
parallel, using parallel backend provided by foreach}
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{list to be processed}
+
\item{.drop}{should extra dimensions of length 1 in the
output be dropped, simplifying the output. Defaults to
\code{TRUE}}
@@ -29,15 +42,16 @@
}
\description{
For each element of a list, apply function then combine
- results into an array. \code{laply} is similar in spirit
- to \code{\link{sapply}} except that it will always return
- an array, and the output is transposed with respect
- \code{sapply} - each element of the list corresponds to a
- column, not a row.
+ results into an array.
+}
+\details{
+ \code{laply} is similar in spirit to \code{\link{sapply}}
+ except that it will always return an array, and the
+ output is transposed with respect \code{sapply} - each
+ element of the list corresponds to a row, not a column.
}
\section{Input}{
- This function splits lists by elements and combines the
- result into a data frame.
+ This function splits lists by elements.
}
\section{Output}{
@@ -61,10 +75,10 @@ laply(seq_len(10), matrix, nrow = 2, ncol = 2)
}
\seealso{
Other array output: \code{\link{aaply}},
- \code{\link{daply}}
+ \code{\link{daply}}, \code{\link{maply}}
- Other list input: \code{\link{ldply}},
- \code{\link{llply}}
+ Other list input: \code{\link{l_ply}},
+ \code{\link{ldply}}, \code{\link{llply}}
}
\keyword{manip}
diff --git a/man/ldply.Rd b/man/ldply.Rd
index 967fed1..8fda85f 100644
--- a/man/ldply.Rd
+++ b/man/ldply.Rd
@@ -3,7 +3,7 @@
\title{Split list, apply function, and return results in a data frame.}
\usage{
ldply(.data, .fun = NULL, ..., .progress = "none",
- .parallel = FALSE)
+ .inform = FALSE, .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,10 +13,22 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
- \item{.data}{list to be processed}
-
\item{.parallel}{if \code{TRUE}, apply function in
parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{list to be processed}
}
\value{
A data frame, as described in the output section.
@@ -26,8 +38,7 @@
results into a data frame.
}
\section{Input}{
- This function splits lists by elements and combines the
- result into a data frame.
+ This function splits lists by elements.
}
\section{Output}{
@@ -49,10 +60,10 @@
}
\seealso{
Other data frame output: \code{\link{adply}},
- \code{\link{ddply}}
+ \code{\link{ddply}}, \code{\link{mdply}}
- Other list input: \code{\link{laply}},
- \code{\link{llply}}
+ Other list input: \code{\link{l_ply}},
+ \code{\link{laply}}, \code{\link{llply}}
}
\keyword{manip}
diff --git a/man/llply.Rd b/man/llply.Rd
index f9d4bf5..9752fc9 100644
--- a/man/llply.Rd
+++ b/man/llply.Rd
@@ -3,7 +3,7 @@
\title{Split list, apply function, and return results in a list.}
\usage{
llply(.data, .fun = NULL, ..., .progress = "none",
- .inform = FALSE, .parallel = FALSE)
+ .inform = FALSE, .parallel = FALSE, .paropts = NULL)
}
\arguments{
\item{.fun}{function to apply to each piece}
@@ -13,27 +13,37 @@
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
- \item{.data}{list to be processed}
-
\item{.parallel}{if \code{TRUE}, apply function in
parallel, using parallel backend provided by foreach}
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
\item{.inform}{produce informative error messages? This
is turned off by by default because it substantially
slows processing speed, but is very useful for debugging}
+
+ \item{.data}{list to be processed}
}
\value{
list of results
}
\description{
For each element of a list, apply function, keeping
- results as a list. \code{llply} is equivalent to
- \code{\link{lapply}} except that it will preserve labels
- and can display a progress bar.
+ results as a list.
+}
+\details{
+ \code{llply} is equivalent to \code{\link{lapply}} except
+ that it will preserve labels and can display a progress
+ bar.
}
\section{Input}{
- This function splits lists by elements and combines the
- result into a data frame.
+ This function splits lists by elements.
}
\section{Output}{
@@ -55,11 +65,11 @@ llply(x, quantile, probs = 1:3/4)
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
\seealso{
- Other list input: \code{\link{laply}},
- \code{\link{ldply}}
+ Other list input: \code{\link{l_ply}},
+ \code{\link{laply}}, \code{\link{ldply}}
Other list output: \code{\link{alply}},
- \code{\link{dlply}}
+ \code{\link{dlply}}, \code{\link{mlply}}
}
\keyword{manip}
diff --git a/man/m_ply.Rd b/man/m_ply.Rd
index ac1c709..08bd322 100644
--- a/man/m_ply.Rd
+++ b/man/m_ply.Rd
@@ -3,26 +3,49 @@
\title{Call function with arguments in array or data frame, discarding results.}
\usage{
m_ply(.data, .fun = NULL, ..., .expand = TRUE,
- .progress = "none")
+ .progress = "none", .inform = FALSE, .print = FALSE,
+ .parallel = FALSE, .paropts = NULL)
}
\arguments{
- \item{.data}{matrix or data frame to use as source of
- arguments}
-
- \item{.fun}{function to be called with varying arguments}
+ \item{.fun}{function to apply to each piece}
\item{...}{other arguments passed on to \code{.fun}}
+ \item{.progress}{name of the progress bar to use, see
+ \code{\link{create_progress_bar}}}
+
+ \item{.parallel}{if \code{TRUE}, apply function in
+ parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{matrix or data frame to use as source of
+ arguments}
+
\item{.expand}{should output be 1d (expand = FALSE), with
an element for each row; or nd (expand = TRUE), with a
dimension for each variable.}
- \item{.progress}{name of the progress bar to use, see
- \code{\link{create_progress_bar}}}
+ \item{.print}{automatically print each result? (default:
+ \code{FALSE})}
+}
+\value{
+ Nothing
}
\description{
Call a multi-argument function with values taken from
columns of an data frame or array, and discard results
+ into a list.
}
\details{
The \code{m*ply} functions are the \code{plyr} version of
@@ -30,15 +53,28 @@
output they produce. These functions are just a
convenient wrapper around \code{a*ply} with \code{margins
= 1} and \code{.fun} wrapped in \code{\link{splat}}.
+}
+\section{Input}{
+ Call a multi-argument function with values taken from
+ columns of an data frame or array
+}
- This function combines the result into a list. If there
- are no results, then this function will return a list of
- length 0 (\code{list()}).
+\section{Output}{
+ All output is discarded. This is useful for functions
+ that you are calling purely for their side effects like
+ displaying plots or saving output.
}
\references{
Hadley Wickham (2011). The Split-Apply-Combine Strategy
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other multiple arguments input: \code{\link{maply}},
+ \code{\link{mdply}}, \code{\link{mlply}}
+
+ Other no output: \code{\link{a_ply}},
+ \code{\link{d_ply}}, \code{\link{l_ply}}
+}
\keyword{manip}
diff --git a/man/maply.Rd b/man/maply.Rd
index 58c3073..c613db4 100644
--- a/man/maply.Rd
+++ b/man/maply.Rd
@@ -3,25 +3,42 @@
\title{Call function with arguments in array or data frame, returning an array.}
\usage{
maply(.data, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .drop = TRUE,
+ .parallel = FALSE, .paropts = NULL)
}
\arguments{
- \item{.data}{matrix or data frame to use as source of
- arguments}
-
- \item{.fun}{function to be called with varying arguments}
+ \item{.fun}{function to apply to each piece}
\item{...}{other arguments passed on to \code{.fun}}
- \item{.expand}{should output be 1d (expand = FALSE), with
- an element for each row; or nd (expand = TRUE), with a
- dimension for each variable.}
-
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
\item{.parallel}{if \code{TRUE}, apply function in
parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{matrix or data frame to use as source of
+ arguments}
+
+ \item{.expand}{should output be 1d (expand = FALSE), with
+ an element for each row; or nd (expand = TRUE), with a
+ dimension for each variable.}
+
+ \item{.drop}{should extra dimensions of length 1 in the
+ output be dropped, simplifying the output. Defaults to
+ \code{TRUE}}
}
\value{
if results are atomic with same type and dimensionality,
@@ -39,9 +56,14 @@
output they produce. These functions are just a
convenient wrapper around \code{a*ply} with \code{margins
= 1} and \code{.fun} wrapped in \code{\link{splat}}.
+}
+\section{Input}{
+ Call a multi-argument function with values taken from
+ columns of an data frame or array
+}
- This function combines the result into an array. If
- there are no results, then this function will return a
+\section{Output}{
+ If there are no results, then this function will return a
vector of length 0 (\code{vector()}).
}
\examples{
@@ -54,5 +76,12 @@ maply(cbind(1:5, 1:5), rnorm, n = 5)
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other array output: \code{\link{aaply}},
+ \code{\link{daply}}, \code{\link{laply}}
+
+ Other multiple arguments input: \code{\link{m_ply}},
+ \code{\link{mdply}}, \code{\link{mlply}}
+}
\keyword{manip}
diff --git a/man/mapvalues.Rd b/man/mapvalues.Rd
new file mode 100644
index 0000000..9993d2d
--- /dev/null
+++ b/man/mapvalues.Rd
@@ -0,0 +1,48 @@
+\name{mapvalues}
+\alias{mapvalues}
+\title{Replace specified values with new values, in a vector or factor.}
+\usage{
+ mapvalues(x, from, to, warn_missing = TRUE)
+}
+\arguments{
+ \item{x}{the factor or vector to modify}
+
+ \item{from}{a vector of the items to replace}
+
+ \item{to}{a vector of replacement values}
+
+ \item{warn_missing}{print a message if any of the old
+ values are not actually present in \code{x}}
+}
+\description{
+ Item in \code{x} that match items \code{from} will be
+ replaced by items in \code{to}, matched by position. For
+ example, items in \code{x} that match the first element
+ in \code{from} will be replaced by the first element of
+ \code{to}.
+}
+\details{
+ If \code{x} is a factor, the matching levels of the
+ factor will be replaced with the new values.
+
+ The related \code{revalue} function works only on
+ character vectors and factors, but this function works on
+ vectors of any type and factors.
+}
+\examples{
+x <- c("a", "b", "c")
+mapvalues(x, c("a", "c"), c("A", "C"))
+
+# Works on factors
+y <- factor(c("a", "b", "c", "a"))
+mapvalues(y, c("a", "c"), c("A", "C"))
+
+# Works on numeric vectors
+z <- c(1, 4, 5, 9)
+mapvalues(z, from = c(1, 5, 9), to = c(10, 50, 90))
+}
+\seealso{
+ \code{\link{revalue}} to do the same thing but with a
+ single named vector instead of two separate vectors.
+}
+
diff --git a/man/match_df.Rd b/man/match_df.Rd
index 89690c2..2dc5dd0 100644
--- a/man/match_df.Rd
+++ b/man/match_df.Rd
@@ -16,16 +16,38 @@
a data frame
}
\description{
- This is particularly useful when you've summarised the
- data in some way and want to subset the original data by
- a characteristic of the subset.
+ Match works in the same way as join, but instead of
+ return the combined dataset, it only returns the matching
+ rows from the first dataset. This is particularly useful
+ when you've summarised the data in some way and want to
+ subset the original data by a characteristic of the
+ subset.
+}
+\details{
+ \code{match_df} shares the same semantics as
+ \code{\link{join}}, not \code{\link{match}}:
+
+ \itemize{ \item the match criterion is \code{==}, not
+ \code{\link{identical}}). \item it doesn't work for
+ columns that are not atomic vectors \item if there are no
+ matches, the row will be omitted' }
}
\examples{
+# count the occurrences of each id in the baseball dataframe, then get the subset with a freq >25
longterm <- subset(count(baseball, "id"), freq > 25)
-bb_longterm <- match_df(baseball, longterm)
+# longterm
+# id freq
+# 30 ansonca01 27
+# 48 baineha01 27
+# ...
+# Select only rows from these longterm players from the baseball dataframe
+# (match would default to match on shared column names, but here was explicitly set "id")
+bb_longterm <- match_df(baseball, longterm, on="id")
+bb_longterm[1:5,]
}
\seealso{
\code{\link{join}} to combine the columns from both x and
- y
+ y and \code{\link{match}} for the base function selecting
+ matching items
}
diff --git a/man/mdply.Rd b/man/mdply.Rd
index 64c5138..38ba31e 100644
--- a/man/mdply.Rd
+++ b/man/mdply.Rd
@@ -3,28 +3,41 @@
\title{Call function with arguments in array or data frame, returning a data frame.}
\usage{
mdply(.data, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .parallel = FALSE,
+ .paropts = NULL)
}
\arguments{
- \item{.data}{matrix or data frame to use as source of
- arguments}
-
- \item{.fun}{function to be called with varying arguments}
+ \item{.fun}{function to apply to each piece}
\item{...}{other arguments passed on to \code{.fun}}
- \item{.expand}{should output be 1d (expand = FALSE), with
- an element for each row; or nd (expand = TRUE), with a
- dimension for each variable.}
-
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
\item{.parallel}{if \code{TRUE}, apply function in
parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{matrix or data frame to use as source of
+ arguments}
+
+ \item{.expand}{should output be 1d (expand = FALSE), with
+ an element for each row; or nd (expand = TRUE), with a
+ dimension for each variable.}
}
\value{
- a data frame
+ A data frame, as described in the output section.
}
\description{
Call a multi-argument function with values taken from
@@ -37,9 +50,21 @@
output they produce. These functions are just a
convenient wrapper around \code{a*ply} with \code{margins
= 1} and \code{.fun} wrapped in \code{\link{splat}}.
+}
+\section{Input}{
+ Call a multi-argument function with values taken from
+ columns of an data frame or array
+}
+
+\section{Output}{
+ The most unambiguous behaviour is achieved when
+ \code{.fun} returns a data frame - in that case pieces
+ will be combined with \code{\link{rbind.fill}}. If
+ \code{.fun} returns an atomic vector of fixed length, it
+ will be \code{rbind}ed together and converted to a data
+ frame. Any other values will result in an error.
- This function combines the result into a data frame. If
- there are no results, then this function will return a
+ If there are no results, then this function will return a
data frame with zero rows and columns
(\code{data.frame()}).
}
@@ -54,5 +79,12 @@ mdply(cbind(mean = 1:5, sd = 1:5), as.data.frame(rnorm), n = 5)
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other data frame output: \code{\link{adply}},
+ \code{\link{ddply}}, \code{\link{ldply}}
+
+ Other multiple arguments input: \code{\link{m_ply}},
+ \code{\link{maply}}, \code{\link{mlply}}
+}
\keyword{manip}
diff --git a/man/mlply.Rd b/man/mlply.Rd
index 4b3038e..f81148d 100644
--- a/man/mlply.Rd
+++ b/man/mlply.Rd
@@ -3,25 +3,38 @@
\title{Call function with arguments in array or data frame, returning a list.}
\usage{
mlply(.data, .fun = NULL, ..., .expand = TRUE,
- .progress = "none", .parallel = FALSE)
+ .progress = "none", .inform = FALSE, .parallel = FALSE,
+ .paropts = NULL)
}
\arguments{
- \item{.data}{matrix or data frame to use as source of
- arguments}
-
- \item{.fun}{function to be called with varying arguments}
+ \item{.fun}{function to apply to each piece}
\item{...}{other arguments passed on to \code{.fun}}
- \item{.expand}{should output be 1d (expand = FALSE), with
- an element for each row; or nd (expand = TRUE), with a
- dimension for each variable.}
-
\item{.progress}{name of the progress bar to use, see
\code{\link{create_progress_bar}}}
\item{.parallel}{if \code{TRUE}, apply function in
parallel, using parallel backend provided by foreach}
+
+ \item{.paropts}{a list of additional options passed into
+ the \code{\link[foreach]{foreach}} function when parallel
+ computation is enabled. This is important if (for
+ example) your code relies on external data or packages:
+ use the \code{.export} and \code{.packages} arguments to
+ supply them so that all cluster nodes have the correct
+ environment set up for computing.}
+
+ \item{.inform}{produce informative error messages? This
+ is turned off by by default because it substantially
+ slows processing speed, but is very useful for debugging}
+
+ \item{.data}{matrix or data frame to use as source of
+ arguments}
+
+ \item{.expand}{should output be 1d (expand = FALSE), with
+ an element for each row; or nd (expand = TRUE), with a
+ dimension for each variable.}
}
\value{
list of results
@@ -29,7 +42,7 @@
\description{
Call a multi-argument function with values taken from
columns of an data frame or array, and combine results
- into a list
+ into a list.
}
\details{
The \code{m*ply} functions are the \code{plyr} version of
@@ -37,10 +50,15 @@
output they produce. These functions are just a
convenient wrapper around \code{a*ply} with \code{margins
= 1} and \code{.fun} wrapped in \code{\link{splat}}.
+}
+\section{Input}{
+ Call a multi-argument function with values taken from
+ columns of an data frame or array
+}
- This function combines the result into a list. If there
- are no results, then this function will return a list of
- length 0 (\code{list()}).
+\section{Output}{
+ If there are no results, then this function will return a
+ list of length 0 (\code{list()}).
}
\examples{
mlply(cbind(1:4, 4:1), rep)
@@ -55,5 +73,12 @@ mlply(cbind(1:4, by = 4:1), seq, to = 20)
for Data Analysis. Journal of Statistical Software,
40(1), 1-29. \url{http://www.jstatsoft.org/v40/i01/}.
}
+\seealso{
+ Other list output: \code{\link{alply}},
+ \code{\link{dlply}}, \code{\link{llply}}
+
+ Other multiple arguments input: \code{\link{m_ply}},
+ \code{\link{maply}}, \code{\link{mdply}}
+}
\keyword{manip}
diff --git a/man/name_rows.Rd b/man/name_rows.Rd
new file mode 100644
index 0000000..c46c5f7
--- /dev/null
+++ b/man/name_rows.Rd
@@ -0,0 +1,28 @@
+\name{name_rows}
+\alias{name_rows}
+\title{Toggle row names between explicit and implicit.}
+\usage{
+ name_rows(df)
+}
+\arguments{
+ \item{df}{a data.frame, with either \code{rownames}, or a
+ column called \code{.rownames}.}
+}
+\description{
+ Plyr functions ignore row names, so this function
+ provides a way to preserve them by converting them to an
+ explicit column in the data frame. After the plyr
+ operation, you can then apply \code{name_rows} again to
+ convert back from the explicit column to the implicit
+ \code{rownames}.
+}
+\examples{
+name_rows(mtcars)
+name_rows(name_rows(mtcars))
+
+df <- data.frame(a = sample(10))
+arrange(df, a)
+arrange(name_rows(df), a)
+name_rows(arrange(name_rows(df), a))
+}
+
diff --git a/man/plyr.Rd b/man/plyr.Rd
new file mode 100644
index 0000000..6317892
--- /dev/null
+++ b/man/plyr.Rd
@@ -0,0 +1,66 @@
+\docType{package}
+\name{plyr}
+\alias{plyr}
+\alias{plyr-package}
+\title{plyr: the split-apply-combine paradigm for R.}
+\description{
+ The plyr package is a set of clean and consistent tools
+ that implement the split-apply-combine pattern in R. This
+ is an extremely common pattern in data analysis: you
+ solve a complex problem by breaking it down into small
+ pieces, doing something to each piece and then combining
+ the results back together again.
+}
+\details{
+ The plyr functions are named according to what sort of
+ data structure they split up and what sort of data
+ structure they return:
+
+ \describe{ \item{a}{array} \item{l}{list}
+ \item{d}{data.frame} \item{m}{multiple inputs}
+ \item{r}{repeat multiple times} \item{_}{nothing} }
+
+ So \code{\link{ddply}} takes a data frame as input and
+ returns a data frame as output, and \code{\link{l_ply}}
+ takes a list as input and returns nothing as output.
+}
+\section{Row names}{
+ By design, no plyr function will preserve row names - in
+ general it is too hard to know what should be done with
+ them for many of the operations supported by plyr. If you
+ want to preserve row names, use \code{\link{name_rows}}
+ to convert them into an explicit column in your data
+ frame, perform the plyr operations, and then use
+ \code{\link{name_rows}} again to convert the column back
+ into row names.
+}
+
+\section{Helpers}{
+ Plyr also provides a set of helper functions for common
+ data analysis problems:
+
+ \itemize{ \item \code{\link{arrange}}: re-order the rows
+ of a data frame by specifying the columns to order by
+ \item \code{\link{mutate}}: add new columns or modifying
+ existing columns, like \code{\link{transform}}, but new
+ columns can refer to other columns that you just created.
+ \item \code{\link{summarise}}: like \code{\link{mutate}}
+ but create a new data frame, not preserving any columns
+ in the old data frame.
+
+ \item \code{\link{join}}: an adapation of
+ \code{\link{merge}} which is more similar to SQL, and has
+ a much faster implementation if you only want to find the
+ first match. \item \code{\link{match_df}}: a version of
+ \code{\link{join}} that instead of returning the two
+ tables combined together, only returns the rows in the
+ first table that match the second.
+
+ \item \code{\link{colwise}}: make any function work
+ colwise on a dataframe \item \code{\link{rename}}: easily
+ rename columns in a data frame \item
+ \code{\link{round_any}}: round a number to any degree of
+ precision \item \code{\link{count}}: quickly count unique
+ combinations and return return as a data frame. }
+}
+
diff --git a/man/progress_none.Rd b/man/progress_none.Rd
index e0f7964..35441b8 100644
--- a/man/progress_none.Rd
+++ b/man/progress_none.Rd
@@ -16,7 +16,8 @@ l_ply(1:100, identity, .progress = "none")
}
\seealso{
Other progress bars: \code{\link{progress_text}},
- \code{\link{progress_tk}}, \code{\link{progress_win}}
+ \code{\link{progress_time}}, \code{\link{progress_tk}},
+ \code{\link{progress_win}}
}
\keyword{internal}
diff --git a/man/progress_text.Rd b/man/progress_text.Rd
index 1881e8b..f5be954 100644
--- a/man/progress_text.Rd
+++ b/man/progress_text.Rd
@@ -26,6 +26,7 @@ l_ply(1:100, identity, .progress = progress_text(char = "-"))
}
\seealso{
Other progress bars: \code{\link{progress_none}},
- \code{\link{progress_tk}}, \code{\link{progress_win}}
+ \code{\link{progress_time}}, \code{\link{progress_tk}},
+ \code{\link{progress_win}}
}
diff --git a/man/progress_time.Rd b/man/progress_time.Rd
new file mode 100644
index 0000000..4834654
--- /dev/null
+++ b/man/progress_time.Rd
@@ -0,0 +1,20 @@
+\name{progress_time}
+\alias{progress_time}
+\title{Text progress bar with time.}
+\usage{
+ progress_time()
+}
+\description{
+ A textual progress bar that estimates time remaining. It
+ displays the estimated time remaining and, when finished,
+ total duration.
+}
+\examples{
+l_ply(1:100, function(x) Sys.sleep(.01), .progress = "time")
+}
+\seealso{
+ Other progress bars: \code{\link{progress_none}},
+ \code{\link{progress_text}}, \code{\link{progress_tk}},
+ \code{\link{progress_win}}
+}
+
diff --git a/man/progress_tk.Rd b/man/progress_tk.Rd
index e5e9006..27fe8e8 100644
--- a/man/progress_tk.Rd
+++ b/man/progress_tk.Rd
@@ -20,15 +20,18 @@
This graphical progress will appear in a separate window.
}
\examples{
+\dontrun{
l_ply(1:100, identity, .progress = "tk")
l_ply(1:100, identity, .progress = progress_tk(width=400))
l_ply(1:100, identity, .progress = progress_tk(label=""))
}
+}
\seealso{
\code{\link[tcltk]{tkProgressBar}} for the function that
powers this progress bar
Other progress bars: \code{\link{progress_none}},
- \code{\link{progress_text}}, \code{\link{progress_win}}
+ \code{\link{progress_text}}, \code{\link{progress_time}},
+ \code{\link{progress_win}}
}
diff --git a/man/progress_win.Rd b/man/progress_win.Rd
index a5c8c7f..fa8adbe 100644
--- a/man/progress_win.Rd
+++ b/man/progress_win.Rd
@@ -27,6 +27,7 @@ l_ply(1:100, identity, .progress = progress_win(title="Working..."))
progress bar
Other progress bars: \code{\link{progress_none}},
- \code{\link{progress_text}}, \code{\link{progress_tk}}
+ \code{\link{progress_text}}, \code{\link{progress_time}},
+ \code{\link{progress_tk}}
}
diff --git a/man/rbind.fill.Rd b/man/rbind.fill.Rd
index 9d66477..2b93564 100644
--- a/man/rbind.fill.Rd
+++ b/man/rbind.fill.Rd
@@ -5,7 +5,9 @@
rbind.fill(...)
}
\arguments{
- \item{...}{input data frames to row bind together}
+ \item{...}{input data frames to row bind together. The
+ first argument can be a list of data frames, in which
+ case all other arguments are ignored.}
}
\value{
a single data frame
diff --git a/man/rbind.fill.matrix.Rd b/man/rbind.fill.matrix.Rd
index 31e1c7b..431a1d7 100644
--- a/man/rbind.fill.matrix.Rd
+++ b/man/rbind.fill.matrix.Rd
@@ -5,7 +5,9 @@
rbind.fill.matrix(...)
}
\arguments{
- \item{...}{the matrices to rbind}
+ \item{...}{the matrices to rbind. The first argument can
+ be a list of matrices, in which case all other arguments
+ are ignored.}
}
\value{
a matrix with column names
diff --git a/man/rename.Rd b/man/rename.Rd
index 6c21cad..8ea4ac5 100644
--- a/man/rename.Rd
+++ b/man/rename.Rd
@@ -2,20 +2,28 @@
\alias{rename}
\title{Modify names by name, not position.}
\usage{
- rename(x, replace)
+ rename(x, replace, warn_missing = TRUE)
}
\arguments{
\item{x}{named object to modify}
\item{replace}{named character vector, with new names as
values, and old names as names.}
+
+ \item{warn_missing}{print a message if any of the old
+ names are not actually present in \code{x}. Note: x is
+ not altered: To save the result, you need to copy the
+ returned data into a variable.}
}
\description{
Modify names by name, not position.
}
\examples{
x <- c("a" = 1, "b" = 2, d = 3, 4)
-rename(x, c("d" = "c"))
-rename(mtcars, c("disp" = "displ"))
+# Rename column d to "c", updating the variable "x" with the result
+x <- rename(x, replace=c("d" = "c"))
+x
+# Rename column "disp" to "displacement"
+rename(mtcars, c("disp" = "displacement"))
}
diff --git a/man/revalue.Rd b/man/revalue.Rd
new file mode 100644
index 0000000..c8b8f87
--- /dev/null
+++ b/man/revalue.Rd
@@ -0,0 +1,39 @@
+\name{revalue}
+\alias{revalue}
+\title{Replace specified values with new values, in a factor or character vector.}
+\usage{
+ revalue(x, replace = NULL, warn_missing = TRUE)
+}
+\arguments{
+ \item{x}{factor or character vector to modify}
+
+ \item{replace}{named character vector, with new values as
+ values, and old values as names.}
+
+ \item{warn_missing}{print a message if any of the old
+ values are not actually present in \code{x}}
+}
+\description{
+ If \code{x} is a factor, the named levels of the factor
+ will be replaced with the new values.
+}
+\details{
+ This function works only on character vectors and
+ factors, but the related \code{mapvalues} function works
+ on vectors of any type and factors, and instead of a
+ named vector specifying the original and replacement
+ values, it takes two separate vectors
+}
+\examples{
+x <- c("a", "b", "c")
+revalue(x, c(a = "A", c = "C"))
+revalue(x, c("a" = "A", "c" = "C"))
+
+y <- factor(c("a", "b", "c", "a"))
+revalue(y, c(a = "A", c = "C"))
+}
+\seealso{
+ \code{\link{mapvalues}} to replace values with vectors of
+ any type
+}
+
diff --git a/man/round_any.Rd b/man/round_any.Rd
index 34e5ad6..8b23f29 100644
--- a/man/round_any.Rd
+++ b/man/round_any.Rd
@@ -5,9 +5,10 @@
round_any(x, accuracy, f = round)
}
\arguments{
- \item{x}{numeric vector to round}
+ \item{x}{numeric or date-time (POSIXct) vector to round}
- \item{accuracy}{number to round to}
+ \item{accuracy}{number to round to; for POSIXct objects,
+ a number of seconds}
\item{f}{rounding function: \code{\link{floor}},
\code{\link{ceiling}} or \code{\link{round}}}
@@ -25,6 +26,10 @@ round_any(135, 25, floor)
round_any(135, 10, ceiling)
round_any(135, 100, ceiling)
round_any(135, 25, ceiling)
+
+round_any(Sys.time() + 1:10, 5)
+round_any(Sys.time() + 1:10, 5, floor)
+round_any(Sys.time(), 3600)
}
\keyword{manip}
diff --git a/man/split_indices.Rd b/man/split_indices.Rd
index 6e74fae..2e5bbc2 100644
--- a/man/split_indices.Rd
+++ b/man/split_indices.Rd
@@ -2,7 +2,7 @@
\alias{split_indices}
\title{Split indices.}
\usage{
- split_indices(index, group, n = max(group))
+ split_indices(group, n = max(group))
}
\arguments{
\item{index}{integer indices}
diff --git a/man/summarise.Rd b/man/summarise.Rd
index fa2ef1d..17200d5 100644
--- a/man/summarise.Rd
+++ b/man/summarise.Rd
@@ -13,14 +13,17 @@
\description{
Summarise works in an analagous way to transform, except
instead of adding columns to an existing data frame, it
- creates a new one. This is particularly useful in
+ creates a new data frame. This is particularly useful in
conjunction with \code{\link{ddply}} as it makes it easy
to perform group-wise summaries.
}
\examples{
+# Let's extract the number of teams and total period of time
+# covered by the baseball dataframe
summarise(baseball,
duration = max(year) - min(year),
nteams = length(unique(team)))
+# Combine with ddply to do that for each separate id
ddply(baseball, "id", summarise,
duration = max(year) - min(year),
nteams = length(unique(team)))
diff --git a/man/take.Rd b/man/take.Rd
new file mode 100644
index 0000000..8fb47ff
--- /dev/null
+++ b/man/take.Rd
@@ -0,0 +1,30 @@
+\name{take}
+\alias{take}
+\title{Take a subset along an arbitrary dimension}
+\usage{
+ take(x, along, indices, drop = FALSE)
+}
+\arguments{
+ \item{x}{matrix or array to subset}
+
+ \item{along}{dimension to subset along}
+
+ \item{indices}{the indices to select}
+
+ \item{drop}{should the dimensions of the array be
+ simplified? Defaults to \code{FALSE} which is the
+ opposite of the useful R default.}
+}
+\description{
+ Take a subset along an arbitrary dimension
+}
+\examples{
+x <- array(seq_len(3 * 4 * 5), c(3, 4, 5))
+take(x, 3, 1)
+take(x, 2, 1)
+take(x, 1, 1)
+take(x, 3, 1, drop = TRUE)
+take(x, 2, 1, drop = TRUE)
+take(x, 1, 1, drop = TRUE)
+}
+
diff --git a/man/try_default.Rd b/man/try_default.Rd
index efbe0fa..f51d615 100644
--- a/man/try_default.Rd
+++ b/man/try_default.Rd
@@ -1,6 +1,6 @@
\name{try_default}
-\alias{try_default}
\alias{tryNULL}
+\alias{try_default}
\title{Try, with default in case of error.}
\usage{
try_default(expr, default, quiet = FALSE)
diff --git a/src/split-numeric.c b/src/split-numeric.c
index f5c96bd..ad243ee 100644
--- a/src/split-numeric.c
+++ b/src/split-numeric.c
@@ -1,41 +1,40 @@
#include <R.h>
#include <Rdefines.h>
-SEXP split_indices(SEXP index, SEXP group, SEXP n) {
- SEXP counts, vec;
- int i, j, k, nobs, nlevs;
-
- PROTECT(index = AS_INTEGER(index));
- PROTECT(group = AS_INTEGER(group));
-
- nlevs = INTEGER(n)[0];
- nobs = LENGTH(index);
+SEXP split_indices(SEXP group, SEXP n) {
+ SEXP vec;
+ int i, j, k;
+ int nlevs = INTEGER(n)[0];
+ int nobs = LENGTH(group);
+ int *pgroup = INTEGER(group);
+
// Count number of cases in each group
- PROTECT(counts = allocVector(INTSXP, nlevs));
+ int counts[nlevs];
for (i = 0; i < nlevs; i++)
- INTEGER(counts)[i] = 0;
+ counts[i] = 0;
for (i = 0; i < nobs; i++) {
- j = INTEGER(group)[i] - 1;
- INTEGER(counts)[j]++;
+ j = pgroup[i];
+ if (j > nlevs) error("n smaller than largest index");
+ counts[j - 1]++;
}
// Allocate storage for results
PROTECT(vec = allocVector(VECSXP, nlevs));
for (i = 0; i < nlevs; i++) {
- SET_VECTOR_ELT(vec, i, allocVector(INTSXP, INTEGER(counts)[i]));
+ SET_VECTOR_ELT(vec, i, allocVector(INTSXP, counts[i]));
}
// Put indices in groups
for (i = 0; i < nlevs; i++) {
- INTEGER(counts)[i] = 0;
+ counts[i] = 0;
}
for (i = 0; i < nobs; i++) {
- j = INTEGER(group)[i] - 1;
- k = INTEGER(counts)[j];
- INTEGER(VECTOR_ELT(vec, j))[k] = INTEGER(index)[i];
- INTEGER(counts)[j] += 1;
+ j = pgroup[i] - 1;
+ k = counts[j];
+ INTEGER(VECTOR_ELT(vec, j))[k] = i + 1;
+ counts[j]++;
}
- UNPROTECT(4);
+ UNPROTECT(1);
return vec;
}
diff --git a/tests/dependencies.R b/tests/dependencies.R
deleted file mode 100644
index de5a055..0000000
--- a/tests/dependencies.R
+++ /dev/null
@@ -1,13 +0,0 @@
-if (Sys.info()["user"] == "hadley") {
- library(devtools)
- # check("reshape2", document = FALSE)
- check_cran("lubridate")
- check_cran("ggplot2")
- check_cran("reshape")
- check_cran("stringr")
- check_cran("nullabor")
- check_cran("stringr")
- check_cran("productplots")
- check_cran("reshape2")
-
-}
\ No newline at end of file
diff --git a/tests/test-all.R b/tests/test-all.R
index a961799..0ca0d68 100644
--- a/tests/test-all.R
+++ b/tests/test-all.R
@@ -1,4 +1,4 @@
library(testthat)
library(plyr)
-test_package("plyr")
\ No newline at end of file
+test_package("plyr")
--
Packaging for R/CRAN/plyr in Debian
More information about the debian-med-commit
mailing list