[med-svn] [r-cran-reshape2] 01/03: Imported Upstream version 1.4
Andreas Tille
tille at debian.org
Fri Jun 20 14:17:56 UTC 2014
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-reshape2.
commit ef7336e32eed6bef45cab1a163953d66e51e001b
Author: Andreas Tille <tille at debian.org>
Date: Fri Jun 20 16:17:09 2014 +0200
Imported Upstream version 1.4
---
DESCRIPTION | 29 +--
LICENSE | 2 +
MD5 | 62 +++---
NAMESPACE | 4 +
NEWS | 43 ----
R/RcppExports.R | 7 +
R/cast.r | 42 ++--
R/melt.r | 109 +++++++---
R/recast.r | 8 +-
R/reshape.r | 3 +
R/utils.r | 53 +++++
README.md | 17 +-
inst/tests/test-melt.r | 55 -----
man/add_margins.Rd | 18 +-
man/cast.Rd | 99 ++++-----
man/colsplit.Rd | 16 +-
man/french_fries.Rd | 29 ++-
man/guess_value.Rd | 13 +-
man/margins.Rd | 25 ++-
man/melt.Rd | 32 +--
man/melt.array.Rd | 42 ++--
man/melt.data.frame.Rd | 56 ++---
man/melt.default.Rd | 27 ++-
man/melt.list.Rd | 19 +-
man/melt_check.Rd | 28 +--
man/parse_formula.Rd | 24 +--
man/recast.Rd | 24 +--
man/smiths.Rd | 8 +-
man/tips.Rd | 34 +--
po/R-ko.po | 53 +++++
po/R-reshape.pot | 42 ++++
src/RcppExports.cpp | 29 +++
src/melt.cpp | 297 ++++++++++++++++++++++++++
tests/{test-all.R => testthat.R} | 2 +-
{inst/tests => tests/testthat}/test-cast.r | 28 ++-
{inst/tests => tests/testthat}/test-margins.r | 0
tests/testthat/test-melt.r | 175 +++++++++++++++
37 files changed, 1143 insertions(+), 411 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 5a77905..9b7b180 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,19 +1,20 @@
Package: reshape2
Type: Package
Title: Flexibly reshape data: a reboot of the reshape package.
-Version: 1.2.2
-Author: Hadley Wickham <hadley at rice.edu>
-Maintainer: Hadley Wickham <hadley at rice.edu>
-Description: Reshape lets you flexibly restructure and aggregate data
- using just two functions: melt and cast.
-URL: http://had.co.nz/reshape
-Imports: plyr (>= 1.5), stringr, lattice
-Suggests: testthat
-License: MIT
+Version: 1.4
+Author: Hadley Wickham <h.wickham at gmail.com>
+Maintainer: Hadley Wickham <h.wickham at gmail.com>
+Description: Reshape lets you flexibly restructure and
+ aggregate data using just two functions: melt and cast.
+URL: https://github.com/hadley/reshape
+BugReports: https://github.com/hadley/reshape/issues
+LinkingTo: Rcpp
+Imports: plyr (>= 1.8.1), stringr, Rcpp
+Roxygen: list(wrap = FALSE)
+Suggests: testthat (>= 0.8.0), lattice
+License: MIT + file LICENSE
LazyData: true
-Collate: 'cast.r' 'data.r' 'formula.r' 'helper-colsplit.r'
- 'helper-guess-value.r' 'helper-margins.r' 'melt.r' 'recast.r'
- 'utils.r'
-Packaged: 2012-12-04 19:05:50 UTC; hadley
+Packaged: 2014-04-23 12:12:47 UTC; hadley
+NeedsCompilation: yes
Repository: CRAN
-Date/Publication: 2012-12-04 23:10:53
+Date/Publication: 2014-04-23 16:34:12
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..02969bc
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2008-2014
+COPYRIGHT HOLDER: Hadley Wickham
diff --git a/MD5 b/MD5
index ae7cf4d..25baeab 100644
--- a/MD5
+++ b/MD5
@@ -1,37 +1,43 @@
-6259d21bc9b5e65334b961ac79f6a8af *DESCRIPTION
-2d170d25c55bab58f33ea60fdbb8afe5 *NAMESPACE
-96cb5b94a33b800bdbc9ed1c659d2466 *NEWS
-3b8a9f6833d4944a0b4330b1063a4f07 *R/cast.r
+80e1197e01ef035042dac55a858ffd16 *DESCRIPTION
+c7ea166b434ab74f81ce002ea5d87b07 *LICENSE
+49177420adf56e29f1e6df797daa4fd7 *NAMESPACE
+a41923a0c3dd9c64e173686355479ae1 *R/RcppExports.R
+4aa84b2006ec919fb46cd7b352952916 *R/cast.r
827ca227c89be29638317fd0cf09810d *R/data.r
64787f81028fc99b399324ea1e20d388 *R/formula.r
be60b82da1500a97b295ec2eedcbcecd *R/helper-colsplit.r
b88bd2b7d2e7ee40cefcf33e6b5145ab *R/helper-guess-value.r
6d720bda805d2903c1cde371a58611ba *R/helper-margins.r
-5b4dddf778fd06aad455818ef1c97f72 *R/melt.r
-3f85fc6e083cff5060dabc1571f0a9e5 *R/recast.r
-72e84fd8dbe786407e625134b251238b *R/utils.r
-a29aec5b95e38f7eab2ab9c2141abd5c *README.md
+8ac26acd4388cd90ba9d4601def9c412 *R/melt.r
+3b203639e96d72299d6dc7ea6e97a179 *R/recast.r
+56fe864c06a775eaaa3c4c39aa013415 *R/reshape.r
+2eb44aae9ad8d328afda7379086ec877 *R/utils.r
+1269f00722e814c55c8b4052cf5a03c4 *README.md
11d6f343f97ca34edc7cb5ad4a174d05 *data/french_fries.rda
931bb9da3bce71ebcb25ba53c5dcd1e5 *data/smiths.rda
6a3f0a74f813cd68547e665f42b8a3cb *data/tips.rda
dd664ad85751a470cf0b7414a1c4c3ec *inst/CITATION
-dcc9587c4ec1230deb72e502b86fc62d *inst/tests/test-cast.r
-c450402fc64e0d1a35d777917ff93ad0 *inst/tests/test-margins.r
-a60729e4f0dfd33aa100363287947386 *inst/tests/test-melt.r
-a7216e25cec082f3395da6863de83ccd *man/add_margins.Rd
-edde7408a7544589fc74e3552127ace8 *man/cast.Rd
-8214d531229d90c6de5b6bcac3c11015 *man/colsplit.Rd
-67acc8e7ad2943eb7acc9f358e2917ac *man/french_fries.Rd
-609a308bc03f2dbf5adff51742ad6146 *man/guess_value.Rd
-d5328616df8fb8e14c41cfe30788741a *man/margins.Rd
-634fc988a0bb7e6b29555b2cee7f2b56 *man/melt.Rd
-445dc1f03f79093822e9534b143b8b3f *man/melt.array.Rd
-2b423890570dd38eac40372e7922d937 *man/melt.data.frame.Rd
-b46a0ca7a796832651627834753b40af *man/melt.default.Rd
-0a8569287d6651219b0ba13d0b3eb5d7 *man/melt.list.Rd
-543444d9fb2c8533aae8a71b6bd880e2 *man/melt_check.Rd
-fa5c27a9488bc1a21b58a95752f38e07 *man/parse_formula.Rd
-c4573be1672fa0361040a596567b38ea *man/recast.Rd
-220f9b410ae11557d8f7e1d8f5424903 *man/smiths.Rd
-3995a24a8f5afd24dd6077c8f34e00c4 *man/tips.Rd
-e269149e26f67e8befc86829c303bd49 *tests/test-all.R
+760d53a13108f2f2688d3f48e0587389 *man/add_margins.Rd
+c3c5e1ece6293a89672a4e67fef1846d *man/cast.Rd
+9825c4f987358217df32f752a52de468 *man/colsplit.Rd
+1d1e6853db07b4f2669f486dd65a4309 *man/french_fries.Rd
+248ac8ff74c1ea6bd6c52ff30eddf82f *man/guess_value.Rd
+b43ca4b201b16cde1e6737099b122ac7 *man/margins.Rd
+62667b517bb67a06e4696238870260e3 *man/melt.Rd
+75d51e9dd1a45b61820847572250e081 *man/melt.array.Rd
+5a4f897301abfee4dcbf5a5fba1450a4 *man/melt.data.frame.Rd
+0a774f8518bb460b63853938ed3d75ce *man/melt.default.Rd
+8f56b5e3474bef6f0db9e5be64900d29 *man/melt.list.Rd
+f409237630dbedee29ed98ff30f7b782 *man/melt_check.Rd
+69465c7fa930c098d075dabc149576af *man/parse_formula.Rd
+c1e35b037d85d451808f9397a774c1d6 *man/recast.Rd
+56ebb0580192b71037dbc17b4963283d *man/smiths.Rd
+207d6da0290afadcdc17c39c5a30c456 *man/tips.Rd
+2dbab2cfefc21e9c3d9b44d60b2cc771 *po/R-ko.po
+15d543bb8dc1c303c8f4a9f54ef767ef *po/R-reshape.pot
+be0ef27f9591d078302bf4f59bb0d03f *src/RcppExports.cpp
+e47f1fa8028888545cc472508db272df *src/melt.cpp
+cbd1f6d4b546516421d74e001e092bbb *tests/testthat.R
+396d0d07c44ceeb304947ec03c2286d1 *tests/testthat/test-cast.r
+c450402fc64e0d1a35d777917ff93ad0 *tests/testthat/test-margins.r
+3b67b1ae2f6541474bfcf563381d3c78 *tests/testthat/test-melt.r
diff --git a/NAMESPACE b/NAMESPACE
index 7ded718..1f140ab 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,3 +1,5 @@
+# Generated by roxygen2 (4.0.0): do not edit by hand
+
S3method(melt,array)
S3method(melt,data.frame)
S3method(melt,default)
@@ -12,3 +14,5 @@ export(melt)
export(recast)
import(plyr)
import(stringr)
+importFrom(Rcpp,evalCpp)
+useDynLib(reshape2)
diff --git a/NEWS b/NEWS
deleted file mode 100644
index 58bcde1..0000000
--- a/NEWS
+++ /dev/null
@@ -1,43 +0,0 @@
-Version 1.2.2
--------------
-
-* Fix incompatibility with plyr 1.8
-
-* Fix evaluation bug revealed by knitr. (Fixes #18)
-
-* Fixed a bug in `melt` where it didn't automatically get variable names
- when used with tables. (Thanks to Winston Chang)
-
-Version 1.2.1
--------------
-
-* Fix bug in multiple margins revealed by plyr 1.7, but caused by mis-use of
- data frame subsetting.
-
-Version 1.2
------------
-
-* Fixed bug in melt where factors were converted to integers, instead of to
- characters
-
-* When the measured variable is a factor, `dcast` now converts it to a
- character rather than throwing an error. `acast` still returns a factor
- matrix. (Thanks to Brian Diggs.)
-
-* `acast` is now much faster, due to fixing a very slow way of naming the
- output. (Thanks to José Bartolomei Díaz for the bug report)
-
-* `value_var` argument to `acast` and `dcast` renamed to `value.var` to be
- consistent with other argument names
-
-* Order `NA` factor levels before `(all)` when creating margins
-
-* Corrected reshape citation.
-
-Version 1.1
------------
-
-* `melt.data.frame` no longer turns characters into factors
-
-* All melt methods gain a `na.rm` and `value.name` arguments - these
- previously were only possessed by `melt.data.frame` (Fixes #5)
\ No newline at end of file
diff --git a/R/RcppExports.R b/R/RcppExports.R
new file mode 100644
index 0000000..16a4e80
--- /dev/null
+++ b/R/RcppExports.R
@@ -0,0 +1,7 @@
+# This file was generated by Rcpp::compileAttributes
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+melt_dataframe <- function(data, id_ind, measure_ind, variable_name, value_name, measure_attributes, factorsAsStrings, valueAsFactor) {
+ .Call('reshape2_melt_dataframe', PACKAGE = 'reshape2', data, id_ind, measure_ind, variable_name, value_name, measure_attributes, factorsAsStrings, valueAsFactor)
+}
+
diff --git a/R/cast.r b/R/cast.r
index 5854f9f..79255cd 100644
--- a/R/cast.r
+++ b/R/cast.r
@@ -23,10 +23,6 @@
#' function, \code{fun.aggregate}. This function should take a vector of
#' numbers and return a single summary statistic.
#'
-#' @usage acast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
-#' subset = NULL, fill=NULL, drop = TRUE, value.var = guess_value(data))
-#' @usage dcast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
-#' subset = NULL, fill=NULL, drop = TRUE, value.var = guess_value(data))
#' @keywords manip
#' @param data molten data frame, see \code{\link{melt}}.
#' @param formula casting formula, see details for specifics.
@@ -45,9 +41,6 @@
#' @param value.var name of column which stores values, see
#' \code{\link{guess_value}} for default strategies to figure this out.
#' @seealso \code{\link{melt}}, \url{http://had.co.nz/reshape/}
-#' @name cast
-#' @aliases cast dcast acast
-#' @export dcast acast
#' @import plyr
#' @import stringr
#' @examples
@@ -93,10 +86,20 @@
#' acast(ff_d, subject ~ time, length, fill=0)
#' dcast(ff_d, treatment ~ variable, mean, margins = TRUE)
#' dcast(ff_d, treatment + subject ~ variable, mean, margins="treatment")
-#' lattice::xyplot(`1` ~ `2` | variable, dcast(ff_d, ... ~ rep), aspect="iso")
+#' if (require("lattice")) {
+#' lattice::xyplot(`1` ~ `2` | variable, dcast(ff_d, ... ~ rep), aspect="iso")
+#' }
+#' @name cast
NULL
-cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill = NULL, drop = TRUE, value.var = guess_value(data)) {
+cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill = NULL, drop = TRUE, value.var = guess_value(data), value_var) {
+
+ if (!missing(value_var)) {
+ stop("Please use value.var instead of value_var.", call. = FALSE)
+ }
+ if (!(value.var %in% names(data))) {
+ stop("value.var (", value.var, ") not found in input", call. = FALSE)
+ }
if (!is.null(subset)) {
include <- data.frame(eval.quoted(subset, data))
@@ -112,13 +115,17 @@ cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill =
# Compute labels and id values
ids <- lapply(vars, id, drop = drop)
+
+ # Empty specifications (.) get repeated id
+ is_empty <- vapply(ids, length, integer(1)) == 0
+ empty <- structure(rep(1, nrow(data)), n = 1L)
+ ids[is_empty] <- rep(list(empty), sum(is_empty))
+
labels <- mapply(split_labels, vars, ids, MoreArgs = list(drop = drop),
SIMPLIFY = FALSE, USE.NAMES = FALSE)
- overall <- id(rev(ids), drop = FALSE)
+ labels[is_empty] <- rep(list(data.frame(. = ".")), sum(is_empty))
- ns <- vapply(ids, attr, 0, "n")
- # Replace zeros (empty inputs) with 1 for dimensions of output
- ns[ns == 0] <- 1
+ overall <- id(rev(ids), drop = FALSE)
n <- attr(overall, "n")
# Aggregate duplicates
@@ -146,12 +153,17 @@ cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill =
}
}
+ ns <- vapply(ids, attr, double(1), "n")
+ dim(ordered) <- ns
+
list(
- data = structure(ordered, dim = ns),
+ data = ordered,
labels = labels
)
}
+#' @rdname cast
+#' @export
dcast <- function(data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill=NULL, drop = TRUE, value.var = guess_value(data)) {
formula <- parse_formula(formula, names(data), value.var)
@@ -174,6 +186,8 @@ dcast <- function(data, formula, fun.aggregate = NULL, ..., margins = NULL, subs
cbind(res$labels[[1]], data)
}
+#' @rdname cast
+#' @export
acast <- function(data, formula, fun.aggregate = NULL, ..., margins = NULL, subset = NULL, fill=NULL, drop = TRUE, value.var = guess_value(data)) {
formula <- parse_formula(formula, names(data), value.var)
diff --git a/R/melt.r b/R/melt.r
index 4a6c411..72d5060 100644
--- a/R/melt.r
+++ b/R/melt.r
@@ -15,6 +15,7 @@
#' convert explicit missings to implicit missings.
#' @param ... further arguments passed to or from other methods.
#' @param value.name name of variable used to store values
+#' @seealso \code{\link{cast}}
#' @export
melt <- function(data, ..., na.rm = FALSE, value.name = "value") {
UseMethod("melt", data)
@@ -28,9 +29,10 @@ melt <- function(data, ..., na.rm = FALSE, value.name = "value") {
#' convert explicit missings to implicit missings.
#' @param ... further arguments passed to or from other methods.
#' @param value.name name of variable used to store values
-#' @S3method melt default
-#' @method melt default
#' @keywords manip
+#' @seealso \code{\link{melt}}, \code{\link{cast}}
+#' @family melt methods
+#' @export
melt.default <- function(data, ..., na.rm = FALSE, value.name = "value") {
if (na.rm) data <- data[!is.na(data)]
setNames(data.frame(data), value.name)
@@ -39,11 +41,12 @@ melt.default <- function(data, ..., na.rm = FALSE, value.name = "value") {
#' Melt a list by recursively melting each component.
#'
#' @keywords manip
-#' @S3method melt list
-#' @method melt list
#' @param data list to recursively melt
#' @param ... further arguments passed to or from other methods.
#' @param level list level - used for creating labels
+#' @seealso \code{\link{cast}}
+#' @family melt methods
+#' @export
#' @examples
#' a <- as.list(c(1:4, NA))
#' melt(a)
@@ -84,7 +87,7 @@ melt.list <- function(data, ..., level = 1) {
#'
#' @param data data frame to melt
#' @param id.vars vector of id variables. Can be integer (variable position)
-#' or string (variable name)If blank, will use all non-measured variables.
+#' or string (variable name). If blank, will use all non-measured variables.
#' @param measure.vars vector of measured variables. Can be integer (variable
#' position) or string (variable name)If blank, will use all non id.vars
# variables.
@@ -93,37 +96,48 @@ melt.list <- function(data, ..., level = 1) {
#' @param na.rm Should NA values be removed from the data set? This will
#' convert explicit missings to implicit missings.
#' @param ... further arguments passed to or from other methods.
+#' @param factorsAsStrings Control whether factors are converted to character
+#' when melted as measure variables. When \code{FALSE}, coercion is forced if
+#' levels are not identical across the \code{measure.vars}.
+#' @family melt methods
#' @keywords manip
-#' @method melt data.frame
-#' @S3method melt data.frame
+#' @seealso \code{\link{cast}}
+#' @export
#' @examples
#' names(airquality) <- tolower(names(airquality))
#' melt(airquality, id=c("month", "day"))
#' names(ChickWeight) <- tolower(names(ChickWeight))
#' melt(ChickWeight, id=2:4)
-melt.data.frame <- function(data, id.vars, measure.vars, variable.name = "variable", ..., na.rm = FALSE, value.name = "value") {
- var <- melt_check(data, id.vars, measure.vars)
+melt.data.frame <- function(data, id.vars, measure.vars, variable.name = "variable", ..., na.rm = FALSE, value.name = "value", factorsAsStrings = TRUE) {
- ids <- unrowname(data[, var$id, drop = FALSE])
- if (length(var$measure) == 0) {
- return(ids)
- }
+ ## Get the names of id.vars, measure.vars
+ vars <- melt_check(data, id.vars, measure.vars, variable.name, value.name)
- # Turn factors to characters
- factors <- vapply(data, is.factor, logical(1))
- data[factors] <- lapply(data[factors], as.character)
+ ## Match them to indices in the data
+ id.ind <- match(vars$id, names(data))
+ measure.ind <- match(vars$measure, names(data))
- value <- unlist(unname(data[var$measure]))
- variable <- factor(rep(var$measure, each = nrow(data)),
- levels = var$measure)
+ ## Get the attributes if common, NULL if not.
+ args <- normalize_melt_arguments(data, measure.ind, factorsAsStrings)
+ measure.attributes <- args$measure.attributes
+ factorsAsStrings <- args$factorsAsStrings
+ valueAsFactor <- "factor" %in% measure.attributes$class
- df <- data.frame(ids, variable, value, stringsAsFactors = FALSE)
- names(df) <- c(names(ids), variable.name, value.name)
+ df <- melt_dataframe(
+ data,
+ as.integer(id.ind-1),
+ as.integer(measure.ind-1),
+ as.character(variable.name),
+ as.character(value.name),
+ as.pairlist(measure.attributes),
+ as.logical(factorsAsStrings),
+ as.logical(valueAsFactor)
+ )
if (na.rm) {
- subset(df, !is.na(value))
+ return(df[ !is.na(df[[value.name]]), ])
} else {
- df
+ return(df)
}
}
@@ -134,14 +148,16 @@ melt.data.frame <- function(data, id.vars, measure.vars, variable.name = "variab
#' @param data array to melt
#' @param varnames variable names to use in molten data.frame
#' @param ... further arguments passed to or from other methods.
+#' @param as.is if \code{FALSE}, the default, dimnames will be converted
+#' using \code{\link{type.convert}}. If \code{TRUE}, they will be left
+#' as strings.
#' @param value.name name of variable used to store values
#' @param na.rm Should NA values be removed from the data set? This will
#' convert explicit missings to implicit missings.
#' @keywords manip
-#' @S3method melt table
-#' @S3method melt matrix
-#' @S3method melt array
-#' @method melt array
+#' @export
+#' @seealso \code{\link{cast}}
+#' @family melt methods
#' @examples
#' a <- array(c(1:23, NA), c(2,3,4))
#' melt(a)
@@ -152,12 +168,24 @@ melt.data.frame <- function(data, id.vars, measure.vars, variable.name = "variab
#' melt(a, varnames=c("X","Y","Z"))
#' dimnames(a)[1] <- list(NULL)
#' melt(a)
-melt.array <- function(data, varnames = names(dimnames(data)), ..., na.rm = FALSE, value.name = "value") {
- var.convert <- function(x) if(is.character(x)) type.convert(x) else x
+melt.array <- function(data, varnames = names(dimnames(data)), ...,
+ na.rm = FALSE, as.is = FALSE, value.name = "value") {
+ var.convert <- function(x) {
+ if (!is.character(x)) return(x)
+
+ x <- type.convert(x, as.is = TRUE)
+ if (!is.character(x)) return(x)
+
+ factor(x, levels = unique(x))
+ }
dn <- amv_dimnames(data)
names(dn) <- varnames
- labels <- expand.grid(lapply(dn, var.convert), KEEP.OUT.ATTRS = FALSE,
+ if (!as.is) {
+ dn <- lapply(dn, var.convert)
+ }
+
+ labels <- expand.grid(dn, KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE)
if (na.rm) {
@@ -170,7 +198,12 @@ melt.array <- function(data, varnames = names(dimnames(data)), ..., na.rm = FALS
cbind(labels, value_df)
}
+#' @rdname melt.array
+#' @export
melt.table <- melt.array
+
+#' @rdname melt.array
+#' @export
melt.matrix <- melt.array
#' Check that input variables to melt are appropriate.
@@ -184,8 +217,10 @@ melt.matrix <- melt.array
#' @param data data frame
#' @param id.vars vector of identifying variable names or indexes
#' @param measure.vars vector of Measured variable names or indexes
+#' @param variable.name name of variable used to store measured variable names
+#' @param value.name name of variable used to store values
#' @return a list giving id and measure variables names.
-melt_check <- function(data, id.vars, measure.vars) {
+melt_check <- function(data, id.vars, measure.vars, variable.name, value.name) {
varnames <- names(data)
# Convert positions to names
@@ -218,12 +253,22 @@ melt_check <- function(data, id.vars, measure.vars) {
discrete <- sapply(data, is.discrete)
id.vars <- varnames[discrete]
measure.vars <- varnames[!discrete]
- message("Using ", paste(id.vars, collapse = ", "), " as id variables")
+ if (length(id.vars) != 0) {
+ message("Using ", paste(id.vars, collapse = ", "), " as id variables")
+ } else {
+ message("No id variables; using all as measure variables")
+ }
} else if (missing(id.vars)) {
id.vars <- setdiff(varnames, measure.vars)
} else if (missing(measure.vars)) {
measure.vars <- setdiff(varnames, id.vars)
}
+ # Ensure variable names are characters of length one
+ if (!is.string(variable.name))
+ stop("'variable.name' should be a string", call. = FALSE)
+ if (!is.string(value.name))
+ stop("'value.name' should be a string", call. = FALSE)
+
list(id = id.vars, measure = measure.vars)
}
diff --git a/R/recast.r b/R/recast.r
index 5870e8a..eae7b91 100644
--- a/R/recast.r
+++ b/R/recast.r
@@ -1,11 +1,11 @@
#' Recast: melt and cast in a single step
#'
-#' This conveniently wraps melting and casting a data frame into
+#' This conveniently wraps melting and (d)casting a data frame into
#' a single step.
#'
#' @param data data set to melt
-#' @param formula casting formula, see \link{cast} for specifics
-#' @param ... other arguments passed to \link{cast}
+#' @param formula casting formula, see \code{\link{dcast}} for specifics
+#' @param ... other arguments passed to \code{\link{dcast}}
#' @param id.var identifying variables. If blank, will use all non
#' measure.var variables
#' @param measure.var measured variables. If blank, will use all non
@@ -21,5 +21,5 @@ recast <- function(data, formula, ..., id.var, measure.var) {
}
molten <- melt(data, id.var, measure.var)
- cast(molten, formula, ...)
+ dcast(molten, formula, ...)
}
diff --git a/R/reshape.r b/R/reshape.r
new file mode 100644
index 0000000..3eb2328
--- /dev/null
+++ b/R/reshape.r
@@ -0,0 +1,3 @@
+##' @importFrom Rcpp evalCpp
+##' @useDynLib reshape2
+NULL
\ No newline at end of file
diff --git a/R/utils.r b/R/utils.r
index 6068fe9..e26ecf3 100644
--- a/R/utils.r
+++ b/R/utils.r
@@ -1 +1,54 @@
"%||%" <- function(a, b) if (!is.null(a)) a else b
+
+all_identical <- function(xs) {
+ if (length(xs) <= 1) return(TRUE)
+ for (i in seq(2, length(xs))) {
+ if (!identical(xs[[1]], xs[[i]])) return(FALSE)
+ }
+ TRUE
+}
+
+## Get the attributes if common, NULL if not.
+normalize_melt_arguments <- function(data, measure.ind, factorsAsStrings) {
+
+ measure.attributes <- lapply(measure.ind, function(i) {
+ attributes(data[[i]])
+ })
+
+ ## Determine if all measure.attributes are equal
+ measure.attrs.equal <- all_identical(measure.attributes)
+
+ if (measure.attrs.equal) {
+ measure.attributes <- measure.attributes[[1]]
+ } else {
+ warning("attributes are not identical across measure variables; ",
+ "they will be dropped", call. = FALSE)
+ measure.attributes <- NULL
+ }
+
+ if (!factorsAsStrings && !measure.attrs.equal) {
+ warning("cannot avoid coercion of factors when measure attributes not identical",
+ call. = FALSE)
+ factorsAsStrings <- TRUE
+ }
+
+ ## If we are going to be coercing any factors to strings, we don't want to
+ ## copy the attributes
+ any.factors <- any( sapply( measure.ind, function(i) {
+ is.factor( data[[i]] )
+ }))
+
+ if (factorsAsStrings && any.factors) {
+ measure.attributes <- NULL
+ }
+
+ list(
+ measure.attributes = measure.attributes,
+ factorsAsStrings = factorsAsStrings
+ )
+
+}
+
+is.string <- function(x) {
+ is.character(x) && length(x) == 1
+}
diff --git a/README.md b/README.md
index 50f99a0..98fd459 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
-Reshape2 is a reboot of the reshape package. It's been over five years since the first release of the package, and in that time I've learned a tremendous amount about R programming, and how to work with data in R. Reshape2 uses that knowledge to make a new package for reshaping data that is much more focussed and much much faster.
+# Reshape2
+
+[![Build Status](https://travis-ci.org/hadley/reshape.png)](https://travis-ci.org/hadley/reshape)
+
+Reshape2 is a reboot of the reshape package. It's been over five years since the first release of reshape, and in that time I've learned a tremendous amount about R programming, and how to work with data in R. Reshape2 uses that knowledge to make a new package for reshaping data that is much more focussed and much much faster.
This version improves speed at the cost of functionality, so I have renamed it to `reshape2` to avoid causing problems for existing users. Based on user feedback I may reintroduce some of these features.
@@ -10,7 +14,7 @@ What's new in `reshape2`:
* cast is replaced by two functions depending on the output type: `dcast`
produces data frames, and `acast` produces matrices/arrays.
-
+
* multidimensional margins are now possible: `grand_row` and `grand_col` have
been dropped: now the name of the margin refers to the variable that has
its value set to (all).
@@ -24,6 +28,15 @@ What's new in `reshape2`:
* better development practices like namespaces and tests.
+ * the function `melt` now names the columns of its returned data frame `Var1`, `Var2`, ..., `VarN` instead of `X1`, `X2`, ..., `XN`.
+
+ * the argument `variable.name` of `melt` replaces the old argument `variable_name`.
+
Initial benchmarking has shown `melt` to be up to 10x faster, pure reshaping `cast` up to 100x faster, and aggregating `cast()` up to 10x faster.
This work has been generously supported by BD (Becton Dickinson).
+
+## Installation
+
+* Get the released version from cran: `install.packages("reshape2")`
+* Get the dev version from github: `devtools::install_github("hadley/reshape2")`
diff --git a/inst/tests/test-melt.r b/inst/tests/test-melt.r
deleted file mode 100644
index ad7c62b..0000000
--- a/inst/tests/test-melt.r
+++ /dev/null
@@ -1,55 +0,0 @@
-context("Melt")
-
-test_that("Missing values removed when na.rm = TRUE", {
- v <- c(1:3, NA)
- expect_equal(melt(v)$value, v)
- expect_equal(melt(v, na.rm = TRUE)$value, 1:3)
-
- m <- matrix(v, nrow = 2)
- expect_equal(melt(m)$value, v)
- expect_equal(melt(m, na.rm = TRUE)$value, 1:3)
-
- l1 <- list(v)
- expect_equal(melt(l1)$value, v)
- expect_equal(melt(l1, na.rm = TRUE)$value, 1:3)
-
- l2 <- as.list(v)
- expect_equal(melt(l2)$value, v)
- expect_equal(melt(l2, na.rm = TRUE)$value, 1:3)
-
- df <- data.frame(x = v)
- expect_equal(melt(df)$value, v)
- expect_equal(melt(df, na.rm = TRUE)$value, 1:3)
-})
-
-test_that("value col name set by value.name", {
- v <- c(1:3, NA)
- expect_equal(names(melt(v, value.name = "v")), "v")
-
- m <- matrix(v, nrow = 2)
- expect_equal(names(melt(m, value.name = "v"))[3], "v")
-
- l1 <- list(v)
- expect_equal(names(melt(l1, value.name = "v"))[1], "v")
-
- df <- data.frame(x = v)
- expect_equal(names(melt(df, value.name = "v"))[2], "v")
-})
-
-test_that("lists can have zero element components", {
- l <- list(a = 1:10, b = integer(0))
- m <- melt(l)
-
- expect_equal(nrow(m), 10)
-})
-
-test_that("factors coerced to characters, not integers", {
- df <- data.frame(
- id = 1:3,
- v1 = 1:3,
- v2 = factor(letters[1:3]))
- dfm <- melt(df, 1)
-
- expect_equal(dfm$value, c(1:3, letters[1:3]))
-
-})
diff --git a/man/add_margins.Rd b/man/add_margins.Rd
index 30a8512..07e1e96 100644
--- a/man/add_margins.Rd
+++ b/man/add_margins.Rd
@@ -1,21 +1,21 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{add_margins}
\alias{add_margins}
\title{Add margins to a data frame.}
\usage{
- add_margins(df, vars, margins = TRUE)
+add_margins(df, vars, margins = TRUE)
}
\arguments{
- \item{df}{input data frame}
+\item{df}{input data frame}
- \item{vars}{a list of character vectors giving the
- variables in each dimension}
+\item{vars}{a list of character vectors giving the variables in each
+dimension}
- \item{margins}{a character vector of variable names to
- compute margins for. \code{TRUE} will compute all
- possible margins.}
+\item{margins}{a character vector of variable names to compute margins for.
+\code{TRUE} will compute all possible margins.}
}
\description{
- Rownames are silently stripped. All margining variables
- will be converted to factors.
+Rownames are silently stripped. All margining variables will be converted
+to factors.
}
diff --git a/man/cast.Rd b/man/cast.Rd
index 75d0632..aab4bf9 100644
--- a/man/cast.Rd
+++ b/man/cast.Rd
@@ -1,69 +1,68 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{cast}
\alias{acast}
\alias{cast}
\alias{dcast}
\title{Cast functions
Cast a molten data frame into an array or data frame.}
+\usage{
+dcast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
+ subset = NULL, fill = NULL, drop = TRUE,
+ value.var = guess_value(data))
+
+acast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
+ subset = NULL, fill = NULL, drop = TRUE,
+ value.var = guess_value(data))
+}
\arguments{
- \item{data}{molten data frame, see \code{\link{melt}}.}
+\item{data}{molten data frame, see \code{\link{melt}}.}
- \item{formula}{casting formula, see details for
- specifics.}
+\item{formula}{casting formula, see details for specifics.}
- \item{fun.aggregate}{aggregation function needed if
- variables do not identify a single observation for each
- output cell. Defaults to length (with a message) if
- needed but not specified.}
+\item{fun.aggregate}{aggregation function needed if variables do not
+identify a single observation for each output cell. Defaults to length
+(with a message) if needed but not specified.}
- \item{...}{further arguments are passed to aggregating
- function}
+\item{...}{further arguments are passed to aggregating function}
- \item{margins}{vector of variable names (can include
- "grand\_col" and "grand\_row") to compute margins for, or
- TRUE to compute all margins . Any variables that can not
- be margined over will be silently dropped.}
+\item{margins}{vector of variable names (can include "grand\_col" and
+"grand\_row") to compute margins for, or TRUE to compute all margins .
+Any variables that can not be margined over will be silently dropped.}
- \item{subset}{quoted expression used to subset data prior
- to reshaping, e.g. \code{subset =
- .(variable=="length")}.}
+\item{subset}{quoted expression used to subset data prior to reshaping,
+e.g. \code{subset = .(variable=="length")}.}
- \item{fill}{value with which to fill in structural
- missings, defaults to value from applying
- \code{fun.aggregate} to 0 length vector}
+\item{fill}{value with which to fill in structural missings, defaults to
+value from applying \code{fun.aggregate} to 0 length vector}
- \item{drop}{should missing combinations dropped or kept?}
+\item{drop}{should missing combinations dropped or kept?}
- \item{value.var}{name of column which stores values, see
- \code{\link{guess_value}} for default strategies to
- figure this out.}
+\item{value.var}{name of column which stores values, see
+\code{\link{guess_value}} for default strategies to figure this out.}
}
\description{
- Use \code{acast} or \code{dcast} depending on whether you
- want vector/matrix/array output or data frame output.
- Data frames can have at most two dimensions.
+Use \code{acast} or \code{dcast} depending on whether you want
+vector/matrix/array output or data frame output. Data frames can have at
+most two dimensions.
}
\details{
- The cast formula has the following format:
- \code{x_variable + x_2 ~ y_variable + y_2 ~ z_variable ~
- ... } The order of the variables makes a difference. The
- first varies slowest, and the last fastest. There are a
- couple of special variables: "..." represents all other
- variables not used in the formula and "." represents no
- variable, so you can do \code{formula = var1 ~ .}.
-
- Alternatively, you can supply a list of quoted
- expressions, in the form \code{list(.(x_variable, x_2),
- .(y_variable, y_2), .(z))}. The advantage of this form
- is that you can cast based on transformations of the
- variables: \code{list(.(a + b), (c = round(c)))}. See
- the documentation for \code{\link[plyr]{.}} for more
- details and alternative formats.
-
- If the combination of variables you supply does not
- uniquely identify one row in the original data set, you
- will need to supply an aggregating function,
- \code{fun.aggregate}. This function should take a vector
- of numbers and return a single summary statistic.
+The cast formula has the following format:
+\code{x_variable + x_2 ~ y_variable + y_2 ~ z_variable ~ ... }
+The order of the variables makes a difference. The first varies slowest,
+and the last fastest. There are a couple of special variables: "..."
+represents all other variables not used in the formula and "." represents
+no variable, so you can do \code{formula = var1 ~ .}.
+
+Alternatively, you can supply a list of quoted expressions, in the form
+\code{list(.(x_variable, x_2), .(y_variable, y_2), .(z))}. The advantage
+of this form is that you can cast based on transformations of the
+variables: \code{list(.(a + b), (c = round(c)))}. See the documentation
+for \code{\link[plyr]{.}} for more details and alternative formats.
+
+If the combination of variables you supply does not uniquely identify one
+row in the original data set, you will need to supply an aggregating
+function, \code{fun.aggregate}. This function should take a vector of
+numbers and return a single summary statistic.
}
\examples{
#Air quality example
@@ -108,10 +107,12 @@ acast(ff_d, subject ~ time, length)
acast(ff_d, subject ~ time, length, fill=0)
dcast(ff_d, treatment ~ variable, mean, margins = TRUE)
dcast(ff_d, treatment + subject ~ variable, mean, margins="treatment")
-lattice::xyplot(`1` ~ `2` | variable, dcast(ff_d, ... ~ rep), aspect="iso")
+if (require("lattice")) {
+ lattice::xyplot(`1` ~ `2` | variable, dcast(ff_d, ... ~ rep), aspect="iso")
+}
}
\seealso{
- \code{\link{melt}}, \url{http://had.co.nz/reshape/}
+\code{\link{melt}}, \url{http://had.co.nz/reshape/}
}
\keyword{manip}
diff --git a/man/colsplit.Rd b/man/colsplit.Rd
index b8e62f6..79bf465 100644
--- a/man/colsplit.Rd
+++ b/man/colsplit.Rd
@@ -1,21 +1,21 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{colsplit}
\alias{colsplit}
\title{Split a vector into multiple columns}
\usage{
- colsplit(string, pattern, names)
+colsplit(string, pattern, names)
}
\arguments{
- \item{string}{character vector or factor to split up}
+\item{string}{character vector or factor to split up}
- \item{pattern}{regular expression to split on}
+\item{pattern}{regular expression to split on}
- \item{names}{names for output columns}
+\item{names}{names for output columns}
}
\description{
- Useful for splitting variable names that a combination of
- multiple variables. Uses \code{\link{type.convert}} to
- convert each column to correct type, but will not convert
- character to factor.
+Useful for splitting variable names that a combination of multiple
+variables. Uses \code{\link{type.convert}} to convert each column to
+correct type, but will not convert character to factor.
}
\examples{
x <- c("a_1", "a_2", "b_2", "c_3")
diff --git a/man/french_fries.Rd b/man/french_fries.Rd
index c7371e1..9340844 100644
--- a/man/french_fries.Rd
+++ b/man/french_fries.Rd
@@ -1,22 +1,31 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\docType{data}
\name{french_fries}
\alias{french_fries}
\title{Sensory data from a french fries experiment.}
\format{A data frame with 696 rows and 9 variables}
+\usage{
+data(french_fries)
+}
\description{
- This data was collected from a sensory experiment
- conducted at Iowa State University in 2004. The
- investigators were interested in the effect of using
- three different fryer oils had on the taste of the fries.
+This data was collected from a sensory experiment conducted at Iowa State
+University in 2004. The investigators were interested in the effect of
+using three different fryer oils had on the taste of the fries.
}
\details{
- Variables:
+Variables:
- \itemize{ \item time in weeks from start of study. \item
- treatment (type of oil), \item subject, \item replicate,
- \item potato-y flavour, \item buttery flavour, \item
- grassy flavour, \item rancid flavour, \item painty
- flavour }
+\itemize{
+ \item time in weeks from start of study.
+ \item treatment (type of oil),
+ \item subject,
+ \item replicate,
+ \item potato-y flavour,
+ \item buttery flavour,
+ \item grassy flavour,
+ \item rancid flavour,
+ \item painty flavour
+}
}
\keyword{datasets}
diff --git a/man/guess_value.Rd b/man/guess_value.Rd
index 2b15fcd..1d20f9a 100644
--- a/man/guess_value.Rd
+++ b/man/guess_value.Rd
@@ -1,16 +1,19 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{guess_value}
\alias{guess_value}
\title{Guess name of value column}
\usage{
- guess_value(df)
+guess_value(df)
}
\arguments{
- \item{df}{data frame to guess value column from}
+\item{df}{data frame to guess value column from}
}
\description{
- Strategy: \enumerate{ \item Is value or (all) column
- present? If so, use that \item Otherwise, guess that last
- column is the value column }
+Strategy:
+\enumerate{
+ \item Is value or (all) column present? If so, use that
+ \item Otherwise, guess that last column is the value column
+}
}
\keyword{internal}
diff --git a/man/margins.Rd b/man/margins.Rd
index 4e369fb..d20c69f 100644
--- a/man/margins.Rd
+++ b/man/margins.Rd
@@ -1,27 +1,26 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{margins}
\alias{margins}
\title{Figure out margining variables.}
\usage{
- margins(vars, margins = NULL)
+margins(vars, margins = NULL)
}
\arguments{
- \item{vars}{a list of character vectors giving the
- variables in each dimension}
+\item{vars}{a list of character vectors giving the variables in each
+dimension}
- \item{margins}{a character vector of variable names to
- compute margins for. \code{TRUE} will compute all
- possible margins.}
+\item{margins}{a character vector of variable names to compute margins for.
+\code{TRUE} will compute all possible margins.}
}
\value{
- list of margining combinations, or \code{NULL} if none.
- These are the combinations of variables that should have
- their values set to \code{(all)}
+list of margining combinations, or \code{NULL} if none. These are
+ the combinations of variables that should have their values set to
+ \code{(all)}
}
\description{
- Given the variables that form the rows and columns, and a
- set of desired margins, works out which ones are
- possible. Variables that can't be margined over are
- dropped silently.
+Given the variables that form the rows and columns, and a set of desired
+margins, works out which ones are possible. Variables that can't be
+margined over are dropped silently.
}
\keyword{internal}
\keyword{manip}
diff --git a/man/melt.Rd b/man/melt.Rd
index 400ca3c..6acc368 100644
--- a/man/melt.Rd
+++ b/man/melt.Rd
@@ -1,31 +1,33 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{melt}
\alias{melt}
\title{Convert an object into a molten data frame.}
\usage{
- melt(data, ..., na.rm = FALSE, value.name = "value")
+melt(data, ..., na.rm = FALSE, value.name = "value")
}
\arguments{
- \item{data}{Data set to melt}
+\item{data}{Data set to melt}
- \item{na.rm}{Should NA values be removed from the data
- set? This will convert explicit missings to implicit
- missings.}
+\item{na.rm}{Should NA values be removed from the data set? This will
+convert explicit missings to implicit missings.}
- \item{...}{further arguments passed to or from other
- methods.}
+\item{...}{further arguments passed to or from other methods.}
- \item{value.name}{name of variable used to store values}
+\item{value.name}{name of variable used to store values}
}
\description{
- This the generic melt function. See the following
- functions for the details about different data
- structures:
+This the generic melt function. See the following functions
+for the details about different data structures:
}
\details{
- \itemize{ \item \code{\link{melt.data.frame}} for
- data.frames \item \code{\link{melt.array}} for arrays,
- matrices and tables \item \code{\link{melt.list}} for
- lists }
+\itemize{
+ \item \code{\link{melt.data.frame}} for data.frames
+ \item \code{\link{melt.array}} for arrays, matrices and tables
+ \item \code{\link{melt.list}} for lists
+}
+}
+\seealso{
+\code{\link{cast}}
}
\keyword{manip}
diff --git a/man/melt.array.Rd b/man/melt.array.Rd
index c5d4679..07e3291 100644
--- a/man/melt.array.Rd
+++ b/man/melt.array.Rd
@@ -1,29 +1,37 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{melt.array}
\alias{melt.array}
+\alias{melt.matrix}
+\alias{melt.table}
\title{Melt an array.}
\usage{
- \method{melt}{array} (data,
- varnames = names(dimnames(data)), ..., na.rm = FALSE,
- value.name = "value")
+\method{melt}{array}(data, varnames = names(dimnames(data)), ...,
+ na.rm = FALSE, as.is = FALSE, value.name = "value")
+
+\method{melt}{table}(data, varnames = names(dimnames(data)), ...,
+ na.rm = FALSE, as.is = FALSE, value.name = "value")
+
+\method{melt}{matrix}(data, varnames = names(dimnames(data)), ...,
+ na.rm = FALSE, as.is = FALSE, value.name = "value")
}
\arguments{
- \item{data}{array to melt}
+\item{data}{array to melt}
+
+\item{varnames}{variable names to use in molten data.frame}
- \item{varnames}{variable names to use in molten
- data.frame}
+\item{...}{further arguments passed to or from other methods.}
- \item{...}{further arguments passed to or from other
- methods.}
+\item{as.is}{if \code{FALSE}, the default, dimnames will be converted
+using \code{\link{type.convert}}. If \code{TRUE}, they will be left
+as strings.}
- \item{value.name}{name of variable used to store values}
+\item{value.name}{name of variable used to store values}
- \item{na.rm}{Should NA values be removed from the data
- set? This will convert explicit missings to implicit
- missings.}
+\item{na.rm}{Should NA values be removed from the data set? This will
+convert explicit missings to implicit missings.}
}
\description{
- This code is conceptually similar to
- \code{\link{as.data.frame.table}}
+This code is conceptually similar to \code{\link{as.data.frame.table}}
}
\examples{
a <- array(c(1:23, NA), c(2,3,4))
@@ -36,5 +44,11 @@ melt(a, varnames=c("X","Y","Z"))
dimnames(a)[1] <- list(NULL)
melt(a)
}
+\seealso{
+\code{\link{cast}}
+
+Other melt.methods: \code{\link{melt.data.frame}};
+ \code{\link{melt.default}}; \code{\link{melt.list}}
+}
\keyword{manip}
diff --git a/man/melt.data.frame.Rd b/man/melt.data.frame.Rd
index bcaf238..fa5d3c1 100644
--- a/man/melt.data.frame.Rd
+++ b/man/melt.data.frame.Rd
@@ -1,42 +1,41 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{melt.data.frame}
\alias{melt.data.frame}
\title{Melt a data frame into form suitable for easy casting.}
\usage{
- \method{melt}{data.frame} (data, id.vars, measure.vars,
- variable.name = "variable", ..., na.rm = FALSE,
- value.name = "value")
+\method{melt}{data.frame}(data, id.vars, measure.vars,
+ variable.name = "variable", ..., na.rm = FALSE, value.name = "value",
+ factorsAsStrings = TRUE)
}
\arguments{
- \item{data}{data frame to melt}
+\item{data}{data frame to melt}
- \item{id.vars}{vector of id variables. Can be integer
- (variable position) or string (variable name)If blank,
- will use all non-measured variables.}
+\item{id.vars}{vector of id variables. Can be integer (variable position)
+or string (variable name). If blank, will use all non-measured variables.}
- \item{measure.vars}{vector of measured variables. Can be
- integer (variable position) or string (variable name)If
- blank, will use all non id.vars}
+\item{measure.vars}{vector of measured variables. Can be integer (variable
+position) or string (variable name)If blank, will use all non id.vars}
- \item{variable.name}{name of variable used to store
- measured variable names}
+\item{variable.name}{name of variable used to store measured variable names}
- \item{value.name}{name of variable used to store values}
+\item{value.name}{name of variable used to store values}
- \item{na.rm}{Should NA values be removed from the data
- set? This will convert explicit missings to implicit
- missings.}
+\item{na.rm}{Should NA values be removed from the data set? This will
+convert explicit missings to implicit missings.}
- \item{...}{further arguments passed to or from other
- methods.}
+\item{...}{further arguments passed to or from other methods.}
+
+\item{factorsAsStrings}{Control whether factors are converted to character
+when melted as measure variables. When \code{FALSE}, coercion is forced if
+levels are not identical across the \code{measure.vars}.}
}
\description{
- You need to tell melt which of your variables are id
- variables, and which are measured variables. If you only
- supply one of \code{id.vars} and \code{measure.vars},
- melt will assume the remainder of the variables in the
- data set belong to the other. If you supply neither, melt
- will assume factor and character variables are id
- variables, and all others are measured.
+You need to tell melt which of your variables are id variables, and which
+are measured variables. If you only supply one of \code{id.vars} and
+\code{measure.vars}, melt will assume the remainder of the variables in the
+data set belong to the other. If you supply neither, melt will assume
+factor and character variables are id variables, and all others are
+measured.
}
\examples{
names(airquality) <- tolower(names(airquality))
@@ -44,5 +43,12 @@ melt(airquality, id=c("month", "day"))
names(ChickWeight) <- tolower(names(ChickWeight))
melt(ChickWeight, id=2:4)
}
+\seealso{
+\code{\link{cast}}
+
+Other melt.methods: \code{\link{melt.array}},
+ \code{\link{melt.matrix}}, \code{\link{melt.table}};
+ \code{\link{melt.default}}; \code{\link{melt.list}}
+}
\keyword{manip}
diff --git a/man/melt.default.Rd b/man/melt.default.Rd
index 5e19fc1..7456282 100644
--- a/man/melt.default.Rd
+++ b/man/melt.default.Rd
@@ -1,26 +1,31 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{melt.default}
\alias{melt.default}
\title{Melt a vector.
For vectors, makes a column of a data frame}
\usage{
- \method{melt}{default} (data, ..., na.rm = FALSE,
- value.name = "value")
+\method{melt}{default}(data, ..., na.rm = FALSE, value.name = "value")
}
\arguments{
- \item{data}{vector to melt}
+\item{data}{vector to melt}
- \item{na.rm}{Should NA values be removed from the data
- set? This will convert explicit missings to implicit
- missings.}
+\item{na.rm}{Should NA values be removed from the data set? This will
+convert explicit missings to implicit missings.}
- \item{...}{further arguments passed to or from other
- methods.}
+\item{...}{further arguments passed to or from other methods.}
- \item{value.name}{name of variable used to store values}
+\item{value.name}{name of variable used to store values}
}
\description{
- Melt a vector. For vectors, makes a column of a data
- frame
+Melt a vector.
+For vectors, makes a column of a data frame
+}
+\seealso{
+\code{\link{melt}}, \code{\link{cast}}
+
+Other melt.methods: \code{\link{melt.array}},
+ \code{\link{melt.matrix}}, \code{\link{melt.table}};
+ \code{\link{melt.data.frame}}; \code{\link{melt.list}}
}
\keyword{manip}
diff --git a/man/melt.list.Rd b/man/melt.list.Rd
index 80ad5c7..215821e 100644
--- a/man/melt.list.Rd
+++ b/man/melt.list.Rd
@@ -1,19 +1,19 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{melt.list}
\alias{melt.list}
\title{Melt a list by recursively melting each component.}
\usage{
- \method{melt}{list} (data, ..., level = 1)
+\method{melt}{list}(data, ..., level = 1)
}
\arguments{
- \item{data}{list to recursively melt}
+\item{data}{list to recursively melt}
- \item{...}{further arguments passed to or from other
- methods.}
+\item{...}{further arguments passed to or from other methods.}
- \item{level}{list level - used for creating labels}
+\item{level}{list level - used for creating labels}
}
\description{
- Melt a list by recursively melting each component.
+Melt a list by recursively melting each component.
}
\examples{
a <- as.list(c(1:4, NA))
@@ -27,5 +27,12 @@ melt(a)
melt(list(1:5, matrix(1:4, ncol=2)))
melt(list(list(1:3), 1, list(as.list(3:4), as.list(1:2))))
}
+\seealso{
+\code{\link{cast}}
+
+Other melt.methods: \code{\link{melt.array}},
+ \code{\link{melt.matrix}}, \code{\link{melt.table}};
+ \code{\link{melt.data.frame}}; \code{\link{melt.default}}
+}
\keyword{manip}
diff --git a/man/melt_check.Rd b/man/melt_check.Rd
index d59d0d4..9f0c61a 100644
--- a/man/melt_check.Rd
+++ b/man/melt_check.Rd
@@ -1,27 +1,29 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{melt_check}
\alias{melt_check}
\title{Check that input variables to melt are appropriate.}
\usage{
- melt_check(data, id.vars, measure.vars)
+melt_check(data, id.vars, measure.vars, variable.name, value.name)
}
\arguments{
- \item{data}{data frame}
+\item{data}{data frame}
- \item{id.vars}{vector of identifying variable names or
- indexes}
+\item{id.vars}{vector of identifying variable names or indexes}
- \item{measure.vars}{vector of Measured variable names or
- indexes}
+\item{measure.vars}{vector of Measured variable names or indexes}
+
+\item{variable.name}{name of variable used to store measured variable names}
+
+\item{value.name}{name of variable used to store values}
}
\value{
- a list giving id and measure variables names.
+a list giving id and measure variables names.
}
\description{
- If id.vars or measure.vars are missing, \code{melt_check}
- will do its best to impute them. If you only supply one
- of id.vars and measure.vars, melt will assume the
- remainder of the variables in the data set belong to the
- other. If you supply neither, melt will assume discrete
- variables are id variables and all other are measured.
+If id.vars or measure.vars are missing, \code{melt_check} will do its
+best to impute them. If you only supply one of id.vars and measure.vars,
+melt will assume the remainder of the variables in the data set belong to
+the other. If you supply neither, melt will assume discrete variables are
+id variables and all other are measured.
}
diff --git a/man/parse_formula.Rd b/man/parse_formula.Rd
index 174e135..ed4499a 100644
--- a/man/parse_formula.Rd
+++ b/man/parse_formula.Rd
@@ -1,27 +1,27 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{parse_formula}
\alias{parse_formula}
\title{Parse casting formulae.}
\usage{
- parse_formula(formula = "... ~ variable", varnames,
- value.var = "value")
+parse_formula(formula = "... ~ variable", varnames, value.var = "value")
}
\arguments{
- \item{formula}{formula to parse}
+\item{formula}{formula to parse}
- \item{varnames}{names of all variables in data}
+\item{varnames}{names of all variables in data}
- \item{value.var}{name of variable containing values}
+\item{value.var}{name of variable containing values}
}
\description{
- There are a two ways to specify a casting formula: either
- as a string, or a list of quoted variables. This function
- converts the former to the latter.
+There are a two ways to specify a casting formula: either as a string, or
+a list of quoted variables. This function converts the former to the
+latter.
}
\details{
- Casting formulas separate dimensions with \code{~} and
- variables within a dimension with \code{+} or \code{*}.
- \code{.} can be used as a placeholder, and \code{...}
- represents all other variables not otherwise used.
+Casting formulas separate dimensions with \code{~} and variables within
+a dimension with \code{+} or \code{*}. \code{.} can be used as a
+placeholder, and \code{...} represents all other variables not otherwise
+used.
}
\examples{
reshape2:::parse_formula("a + ...", letters[1:6])
diff --git a/man/recast.Rd b/man/recast.Rd
index 9a0a613..361b189 100644
--- a/man/recast.Rd
+++ b/man/recast.Rd
@@ -1,32 +1,32 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\name{recast}
\alias{recast}
\title{Recast: melt and cast in a single step}
\usage{
- recast(data, formula, ..., id.var, measure.var)
+recast(data, formula, ..., id.var, measure.var)
}
\arguments{
- \item{data}{data set to melt}
+\item{data}{data set to melt}
- \item{formula}{casting formula, see \link{cast} for
- specifics}
+\item{formula}{casting formula, see \code{\link{dcast}} for specifics}
- \item{...}{other arguments passed to \link{cast}}
+\item{...}{other arguments passed to \code{\link{dcast}}}
- \item{id.var}{identifying variables. If blank, will use
- all non measure.var variables}
+\item{id.var}{identifying variables. If blank, will use all non
+measure.var variables}
- \item{measure.var}{measured variables. If blank, will use
- all non id.var variables}
+\item{measure.var}{measured variables. If blank, will use all non
+id.var variables}
}
\description{
- This conveniently wraps melting and casting a data frame
- into a single step.
+This conveniently wraps melting and (d)casting a data frame into
+a single step.
}
\examples{
recast(french_fries, time ~ variable, id.var = 1:4)
}
\seealso{
- \url{http://had.co.nz/reshape/}
+\url{http://had.co.nz/reshape/}
}
\keyword{manip}
diff --git a/man/smiths.Rd b/man/smiths.Rd
index 05d2eb2..93772cc 100644
--- a/man/smiths.Rd
+++ b/man/smiths.Rd
@@ -1,11 +1,15 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\docType{data}
\name{smiths}
\alias{smiths}
\title{Demo data describing the Smiths.}
\format{A data frame with 2 rows and 5 variables}
+\usage{
+data(smiths)
+}
\description{
- A small demo dataset describing John and Mary Smith.
- Used in the introductory vignette.
+A small demo dataset describing John and Mary Smith. Used in the
+introductory vignette.
}
\keyword{datasets}
diff --git a/man/tips.Rd b/man/tips.Rd
index dc48ac8..c965aac 100644
--- a/man/tips.Rd
+++ b/man/tips.Rd
@@ -1,27 +1,35 @@
+% Generated by roxygen2 (4.0.0): do not edit by hand
\docType{data}
\name{tips}
\alias{tips}
\title{Tipping data}
\format{A data frame with 244 rows and 7 variables}
+\usage{
+data(tips)
+}
\description{
- One waiter recorded information about each tip he
- received over a period of a few months working in one
- restaurant. He collected several variables:
+One waiter recorded information about each tip he received over a
+period of a few months working in one restaurant. He collected several
+variables:
}
\details{
- \itemize{ \item tip in dollars, \item bill in dollars,
- \item sex of the bill payer, \item whether there were
- smokers in the party, \item day of the week, \item time
- of day, \item size of the party. }
+\itemize{
+ \item tip in dollars,
+ \item bill in dollars,
+ \item sex of the bill payer,
+ \item whether there were smokers in the party,
+ \item day of the week,
+ \item time of day,
+ \item size of the party.
+}
- In all he recorded 244 tips. The data was reported in a
- collection of case studies for business statistics
- (Bryant & Smith 1995).
+In all he recorded 244 tips. The data was reported in a collection of
+case studies for business statistics (Bryant & Smith 1995).
}
\references{
- Bryant, P. G. and Smith, M (1995) \emph{Practical Data
- Analysis: Case Studies in Business Statistics}.
- Homewood, IL: Richard D. Irwin Publishing:
+Bryant, P. G. and Smith, M (1995) \emph{Practical Data
+ Analysis: Case Studies in Business Statistics}. Homewood, IL: Richard D.
+ Irwin Publishing:
}
\keyword{datasets}
diff --git a/po/R-ko.po b/po/R-ko.po
new file mode 100644
index 0000000..8f3923e
--- /dev/null
+++ b/po/R-ko.po
@@ -0,0 +1,53 @@
+# Korean translation for R reshape package
+# /po/R-ko.po
+# This file is distributed under the same license as the R reshape package.
+# R Development Translation Team - Korean
+# Chel Hee Lee <gnustats at korea.gnu.org>, 2013.
+# Chel Hee Lee <gnustats at gmail.com>, 2013.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: reshape 1.2.2\n"
+"Report-Msgid-Bugs-To: bugs at r-project.org\n"
+"POT-Creation-Date: 2013-03-29 14:37\n"
+"PO-Revision-Date: 2013-04-01 17:44+0900\n"
+"Last-Translator: Eugene Jung <muoe78 at gmail.com>\n"
+"Language-Team: R Development Translation Teams (Korean) <gnustats at korea.gnu."
+"org>\n"
+"Language: ko\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"X-Poedit-SourceCharset: utf-8\n"
+"X-Generator: Poedit 1.5.5\n"
+
+msgid "Aggregation function missing: defaulting to length"
+msgstr "결합 함수 누락: 기본 길이로 설정됩니다."
+
+msgid "Dataframes have at most two output dimensions"
+msgstr "데이터프레임(dataframes)은 대부분 두 개의 출력 차원을 가집니다."
+
+msgid "Don't know how to parse"
+msgstr "어떻게 parse할지 알 수 없습니다."
+
+msgid "Using"
+msgstr "사용"
+
+msgid "as value column: use value.var to override."
+msgstr "값 열로: 덮어쓰기 위해 value.var을 사용합니다."
+
+msgid "id variables not found in data:"
+msgstr "데이터에서 id 변수를 찾을 수 없습니다:"
+
+msgid "measure variables not found in data:"
+msgstr "데이터에서 측정 변수를 찾을 수 없습니다:"
+
+msgid ","
+msgstr ","
+
+msgid "as id variables"
+msgstr "id 변수처럼"
+
+msgid "Use var, not vars"
+msgstr "vars가 아닌 var를 사용하십시오."
diff --git a/po/R-reshape.pot b/po/R-reshape.pot
new file mode 100644
index 0000000..4c50656
--- /dev/null
+++ b/po/R-reshape.pot
@@ -0,0 +1,42 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: R 2.15.1\n"
+"Report-Msgid-Bugs-To: bugs at r-project.org\n"
+"POT-Creation-Date: 2013-03-29 14:45\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL at ADDRESS>\n"
+"Language-Team: LANGUAGE <LL at li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+
+msgid "Aggregation function missing: defaulting to length"
+msgstr ""
+
+msgid "Dataframes have at most two output dimensions"
+msgstr ""
+
+msgid "Don't know how to parse"
+msgstr ""
+
+msgid "Using"
+msgstr ""
+
+msgid "as value column: use value.var to override."
+msgstr ""
+
+msgid "id variables not found in data:"
+msgstr ""
+
+msgid "measure variables not found in data:"
+msgstr ""
+
+msgid ","
+msgstr ""
+
+msgid "as id variables"
+msgstr ""
+
+msgid "Use var, not vars"
+msgstr ""
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
new file mode 100644
index 0000000..d9644e0
--- /dev/null
+++ b/src/RcppExports.cpp
@@ -0,0 +1,29 @@
+// This file was generated by Rcpp::compileAttributes
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+// melt_dataframe
+List melt_dataframe(const DataFrame& data, const IntegerVector& id_ind, const IntegerVector& measure_ind, String variable_name, String value_name, SEXP measure_attributes, bool factorsAsStrings, bool valueAsFactor);
+RcppExport SEXP reshape2_melt_dataframe(SEXP dataSEXP, SEXP id_indSEXP, SEXP measure_indSEXP, SEXP variable_nameSEXP, SEXP value_nameSEXP, SEXP measure_attributesSEXP, SEXP factorsAsStringsSEXP, SEXP valueAsFactorSEXP) {
+BEGIN_RCPP
+ SEXP __sexp_result;
+ {
+ Rcpp::RNGScope __rngScope;
+ Rcpp::traits::input_parameter< const DataFrame& >::type data(dataSEXP );
+ Rcpp::traits::input_parameter< const IntegerVector& >::type id_ind(id_indSEXP );
+ Rcpp::traits::input_parameter< const IntegerVector& >::type measure_ind(measure_indSEXP );
+ Rcpp::traits::input_parameter< String >::type variable_name(variable_nameSEXP );
+ Rcpp::traits::input_parameter< String >::type value_name(value_nameSEXP );
+ Rcpp::traits::input_parameter< SEXP >::type measure_attributes(measure_attributesSEXP );
+ Rcpp::traits::input_parameter< bool >::type factorsAsStrings(factorsAsStringsSEXP );
+ Rcpp::traits::input_parameter< bool >::type valueAsFactor(valueAsFactorSEXP );
+ List __result = melt_dataframe(data, id_ind, measure_ind, variable_name, value_name, measure_attributes, factorsAsStrings, valueAsFactor);
+ PROTECT(__sexp_result = Rcpp::wrap(__result));
+ }
+ UNPROTECT(1);
+ return __sexp_result;
+END_RCPP
+}
diff --git a/src/melt.cpp b/src/melt.cpp
new file mode 100644
index 0000000..3d811bc
--- /dev/null
+++ b/src/melt.cpp
@@ -0,0 +1,297 @@
+#include <Rcpp.h>
+using namespace Rcpp;
+
+// A debug macro -- change to 'debug(x) x' for debug output
+#define debug(x)
+
+// An optimized rep
+#define DO_REP(RTYPE, CTYPE, ACCESSOR) \
+ { \
+ Shield<SEXP> output(Rf_allocVector(RTYPE, nout)); \
+ for (int i = 0; i < n; ++i) { \
+ memcpy((char*)ACCESSOR(output) + i * xn * sizeof(CTYPE), \
+ (char*)ACCESSOR(x), \
+ sizeof(CTYPE) * xn); \
+ } \
+ return output; \
+ break; \
+ }
+
+SEXP rep_(SEXP x, int n) {
+ int xn = Rf_length(x);
+ int nout = xn * n;
+ switch (TYPEOF(x)) {
+ case INTSXP:
+ DO_REP(INTSXP, int, INTEGER);
+ case REALSXP:
+ DO_REP(REALSXP, double, REAL);
+ case STRSXP: {
+ int counter = 0;
+ Shield<SEXP> output(Rf_allocVector(STRSXP, nout));
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < xn; ++j) {
+ SET_STRING_ELT(output, counter, STRING_ELT(x, j));
+ ++counter;
+ }
+ }
+ return output;
+ break;
+ }
+ case LGLSXP:
+ DO_REP(LGLSXP, int, LOGICAL);
+ case CPLXSXP:
+ DO_REP(CPLXSXP, Rcomplex, COMPLEX);
+ case RAWSXP:
+ DO_REP(RAWSXP, Rbyte, RAW);
+ default: {
+ stop("Unhandled RTYPE");
+ return R_NilValue;
+ }
+ }
+}
+
+// An optimized rep_each
+#define DO_REP_EACH(RTYPE, CTYPE, ACCESSOR) \
+ { \
+ int counter = 0; \
+ Shield<SEXP> output(Rf_allocVector(RTYPE, nout)); \
+ CTYPE* x_ptr = ACCESSOR(x); \
+ CTYPE* output_ptr = ACCESSOR(output); \
+ for (int i = 0; i < xn; ++i) { \
+ for (int j = 0; j < n; ++j) { \
+ output_ptr[counter] = x_ptr[i]; \
+ ++counter; \
+ } \
+ } \
+ return output; \
+ break; \
+ }
+
+SEXP rep_each_(SEXP x, int n) {
+ int xn = Rf_length(x);
+ int nout = xn * n;
+ switch (TYPEOF(x)) {
+ case INTSXP:
+ DO_REP_EACH(INTSXP, int, INTEGER);
+ case REALSXP:
+ DO_REP_EACH(REALSXP, double, REAL);
+ case STRSXP: {
+ int counter = 0;
+ Shield<SEXP> output(Rf_allocVector(STRSXP, nout));
+ for (int i = 0; i < xn; ++i) {
+ for (int j = 0; j < n; ++j) {
+ SET_STRING_ELT(output, counter, STRING_ELT(x, i));
+ ++counter;
+ }
+ }
+ return output;
+ break;
+ }
+ DO_REP_EACH(STRSXP, SEXP, STRING_PTR);
+ case LGLSXP:
+ DO_REP_EACH(LGLSXP, int, LOGICAL);
+ case CPLXSXP:
+ DO_REP_EACH(CPLXSXP, Rcomplex, COMPLEX);
+ case RAWSXP:
+ DO_REP_EACH(RAWSXP, Rbyte, RAW);
+ default: {
+ stop("Unhandled RTYPE");
+ return R_NilValue;
+ }
+ }
+}
+
+// Optimized factor routine for the case where we want to make
+// a factor from a vector of names -- used for generating the
+// 'variable' column in the melted data.frame
+IntegerVector make_variable_column(CharacterVector x, int nrow) {
+ IntegerVector fact = seq(1, x.size());
+ IntegerVector output = rep_each_(fact, nrow);
+ output.attr("levels") = x;
+ output.attr("class") = "factor";
+ return output;
+}
+
+// Ensure that we index in the column range of the data --
+// just to double-check everything went okay upstream
+void check_indices(IntegerVector ind, int ncol, std::string msg) {
+ int n = ind.size();
+ for (int i = 0; i < n; ++i) {
+ if (ind[i] < 0) {
+ stop(msg + "index less than zero");
+ }
+ if (ind[i] >= ncol) {
+ stop(msg + "index > number of columns");
+ }
+ if (ind[i] == NA_INTEGER) {
+ stop(msg + "no match found");
+ }
+ }
+}
+
+// Concatenate vectors for the 'value' column
+#define DO_CONCATENATE(CTYPE) \
+ { \
+ memcpy((char*)dataptr(output) + i* nrow * sizeof(CTYPE), \
+ (char*)dataptr(tmp), \
+ nrow * sizeof(CTYPE)); \
+ break; \
+ }
+
+SEXP concatenate(const DataFrame& x, IntegerVector ind, bool factorsAsStrings) {
+
+ int nrow = x.nrows();
+ int n_ind = ind.size();
+
+ // We coerce up to the 'max type' if necessary, using the fact
+ // that R's SEXPTYPEs are also ordered in terms of 'precision'
+ // Note: we convert factors to characters if necessary
+ int max_type = 0;
+ int ctype = 0;
+ for (int i = 0; i < n_ind; ++i) {
+
+ if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) {
+ ctype = STRSXP;
+ } else {
+ ctype = TYPEOF(x[ind[i]]);
+ }
+ max_type = ctype > max_type ? ctype : max_type;
+ }
+
+ debug(printf("Max type of value variables is %s\n", Rf_type2char(max_type)));
+
+ Armor<SEXP> tmp;
+ Shield<SEXP> output(Rf_allocVector(max_type, nrow * n_ind));
+ for (int i = 0; i < n_ind; ++i) {
+
+ // a 'tmp' pointer to the current column being iterated over, or
+ // a coerced version if necessary
+ if (TYPEOF(x[ind[i]]) == max_type) {
+ tmp = x[ind[i]];
+ } else if (Rf_isFactor(x[ind[i]]) and factorsAsStrings) {
+ tmp = Rf_asCharacterFactor(x[ind[i]]);
+ } else {
+ tmp = Rf_coerceVector(x[ind[i]], max_type);
+ }
+
+ switch (max_type) {
+ case INTSXP:
+ DO_CONCATENATE(int);
+ case REALSXP:
+ DO_CONCATENATE(double);
+ case LGLSXP:
+ DO_CONCATENATE(int);
+ case CPLXSXP:
+ DO_CONCATENATE(Rcomplex);
+ case STRSXP: {
+ for (int j = 0; j < nrow; ++j) {
+ SET_STRING_ELT(output, i * nrow + j, STRING_ELT(tmp, j));
+ }
+ break;
+ }
+ case RAWSXP:
+ DO_CONCATENATE(Rbyte);
+ }
+ }
+
+ return output;
+}
+
+// [[Rcpp::export]]
+List melt_dataframe(const DataFrame& data,
+ const IntegerVector& id_ind,
+ const IntegerVector& measure_ind,
+ String variable_name,
+ String value_name,
+ SEXP measure_attributes,
+ bool factorsAsStrings,
+ bool valueAsFactor) {
+
+ int nrow = data.nrows();
+ int ncol = data.size();
+
+ CharacterVector data_names = as<CharacterVector>(data.attr("names"));
+
+ // We only melt data.frames that contain only atomic elements
+ for (int i = 0; i < ncol; ++i) {
+ if (!Rf_isVectorAtomic(data[i])) {
+ stop("Can't melt data.frames with non-atomic columns");
+ }
+ }
+
+ int n_id = id_ind.size();
+ debug(Rprintf("n_id == %i\n", n_id));
+
+ int n_measure = measure_ind.size();
+ debug(Rprintf("n_measure == %i\n", n_measure));
+
+ // The output should be a data.frame with:
+ // number of columns == number of id vars + 'variable' + 'value',
+ // with number of rows == data.nrow() * number of value vars
+ List output = no_init(n_id + 2);
+
+ // First, allocate the ID variables
+ // we repeat each ID vector n_measure times
+
+ // A define to handle the different possible types
+ #define REP(RTYPE) \
+ case RTYPE: { \
+ output[i] = rep_(data[id_ind[i]], n_measure); \
+ Rf_copyMostAttrib(data[id_ind[i]], output[i]); \
+ break; \
+ }
+
+ for (int i = 0; i < n_id; ++i) {
+ switch (TYPEOF(data[id_ind[i]])) {
+ REP(LGLSXP);
+ REP(INTSXP);
+ REP(REALSXP);
+ REP(STRSXP);
+ REP(CPLXSXP);
+ REP(RAWSXP);
+ default: { stop("internal error: unnhandled vector type in REP"); }
+ }
+ }
+
+ // Now, we assign the 'variable' and 'value' columns
+
+ // 'variable' is made up of repeating the names of the 'measure' variables,
+ // each nrow times. We want this to be a factor as well.
+ CharacterVector id_names = no_init(n_measure);
+ for (int i = 0; i < n_measure; ++i) {
+ id_names[i] = data_names[measure_ind[i]];
+ }
+ output[n_id] = make_variable_column(id_names, nrow);
+
+ // 'value' is made by concatenating each of the 'value' variables
+ output[n_id + 1] = concatenate(data, measure_ind, factorsAsStrings);
+ if (!Rf_isNull(measure_attributes)) {
+ SET_ATTRIB(output[n_id + 1], measure_attributes);
+ }
+
+ // Set the object bit explicitly to make sure that the 'value' is properly
+ // interpreted as a factor
+ if (valueAsFactor) {
+ SET_OBJECT(output[n_id + 1], 1);
+ }
+
+ // Make the List more data.frame like
+
+ // Set the row names
+ output.attr("row.names") =
+ IntegerVector::create(IntegerVector::get_na(), -(nrow * n_measure));
+
+ // Set the names
+ CharacterVector out_names = no_init(n_id + 2);
+ for (int i = 0; i < n_id; ++i) {
+ out_names[i] = data_names[id_ind[i]];
+ }
+ out_names[n_id] = variable_name;
+ out_names[n_id + 1] = value_name;
+ output.attr("names") = out_names;
+
+ // Set the class
+ output.attr("class") = "data.frame";
+
+ return output;
+}
diff --git a/tests/test-all.R b/tests/testthat.R
similarity index 59%
rename from tests/test-all.R
rename to tests/testthat.R
index 69b75ff..1056594 100644
--- a/tests/test-all.R
+++ b/tests/testthat.R
@@ -1,4 +1,4 @@
library(testthat)
library(reshape2)
-test_package("reshape2")
+test_check("reshape2")
diff --git a/inst/tests/test-cast.r b/tests/testthat/test-cast.r
similarity index 87%
rename from inst/tests/test-cast.r
rename to tests/testthat/test-cast.r
index f0acae6..fc09271 100644
--- a/inst/tests/test-cast.r
+++ b/tests/testthat/test-cast.r
@@ -108,7 +108,7 @@ test_that("aggregated values computed correctly", {
test_that("value.var overrides value col", {
df <- data.frame(
id1 = rep(letters[1:2],2),
- id2 = rep(LETTERS [1:2],each=2), var1=1:4)
+ id2 = rep(LETTERS[1:2],each=2), var1=1:4)
df.m <- melt(df)
df.m$value2 <- df.m$value * 2
@@ -175,3 +175,29 @@ test_that("dcast evaluated in correct argument", {
expect_equal(names(res), c("y", "b", "a"))
})
+
+test_that(". ~ . returns single value", {
+ one <- acast(s2m, . ~ ., sum)
+ expect_equal(as.vector(one), 78)
+ expect_equal(dimnames(one), list(".", "."))
+})
+
+test_that("drop = TRUE retains NA values", {
+ df <- data.frame(x = 1:5, y = c(letters[1:4], NA), value = 5:1)
+ out <- dcast(df, x + y ~ .)
+
+ expect_equal(dim(out), c(5, 3))
+ expect_equal(out$., 5:1)
+})
+
+test_that("useful error message if you use value_var", {
+ expect_error(dcast(mtcars, vs ~ am, value_var = "cyl"),
+ "Please use value.var", fixed = TRUE)
+ expect_equal(dim(dcast(mtcars, vs ~ am, value.var = "cyl")), c(2, 3))
+
+})
+
+test_that("useful error message if value.var doesn't exist", {
+ expect_error(dcast(airquality, month ~ day, value.var = "test"),
+ "value.var (test) not found in input", fixed = TRUE)
+})
diff --git a/inst/tests/test-margins.r b/tests/testthat/test-margins.r
similarity index 100%
rename from inst/tests/test-margins.r
rename to tests/testthat/test-margins.r
diff --git a/tests/testthat/test-melt.r b/tests/testthat/test-melt.r
new file mode 100644
index 0000000..4d97275
--- /dev/null
+++ b/tests/testthat/test-melt.r
@@ -0,0 +1,175 @@
+context("Melt")
+
+test_that("Missing values removed when na.rm = TRUE", {
+ v <- c(1:3, NA)
+ expect_equal(melt(v)$value, v)
+ expect_equal(melt(v, na.rm = TRUE)$value, 1:3)
+
+ m <- matrix(v, nrow = 2)
+ expect_equal(melt(m)$value, v)
+ expect_equal(melt(m, na.rm = TRUE)$value, 1:3)
+
+ l1 <- list(v)
+ expect_equal(melt(l1)$value, v)
+ expect_equal(melt(l1, na.rm = TRUE)$value, 1:3)
+
+ l2 <- as.list(v)
+ expect_equal(melt(l2)$value, v)
+ expect_equal(melt(l2, na.rm = TRUE)$value, 1:3)
+
+ df <- data.frame(x = v)
+ expect_equal(melt(df)$value, v)
+ expect_equal(melt(df, na.rm = TRUE)$value, 1:3)
+})
+
+test_that("value col name set by value.name", {
+ v <- c(1:3, NA)
+ expect_equal(names(melt(v, value.name = "v")), "v")
+
+ m <- matrix(v, nrow = 2)
+ expect_equal(names(melt(m, value.name = "v"))[3], "v")
+
+ l1 <- list(v)
+ expect_equal(names(melt(l1, value.name = "v"))[1], "v")
+
+ df <- data.frame(x = v)
+ expect_equal(names(melt(df, value.name = "v"))[2], "v")
+})
+
+test_that("lists can have zero element components", {
+ l <- list(a = 1:10, b = integer(0))
+ m <- melt(l)
+
+ expect_equal(nrow(m), 10)
+})
+
+test_that("factors coerced to characters, not integers", {
+ df <- data.frame(
+ id = 1:3,
+ v1 = 1:3,
+ v2 = factor(letters[1:3]))
+ expect_warning(dfm <- melt(df, 1))
+
+ expect_equal(dfm$value, c(1:3, letters[1:3]))
+})
+
+test_that("dimnames are preserved with arrays and tables", {
+ a <- array(c(1:12), c(2,3,2))
+
+ # Plain array with no dimnames
+ am <- melt(a)
+ expect_equal(names(am), c("Var1", "Var2", "Var3", "value"))
+ # Also check values
+ expect_equal(unique(am$Var1), 1:2)
+ expect_equal(unique(am$Var2), 1:3)
+ expect_equal(unique(am$Var3), 1:2)
+
+ # Explicitly set varnames
+ am <- melt(a, varnames = c("X", "Y", "Z"))
+ expect_equal(names(am), c("X", "Y", "Z", "value"))
+
+ # Set the dimnames for the array
+ b <- a
+ dimnames(b) <- list(X = c("A", "B"), Y = c("A", "B", "C"), Z = c("A", "B"))
+ bm <- melt(b)
+ expect_equal(names(bm), c("X", "Y", "Z", "value"))
+ # Also check values
+ expect_equal(levels(bm$X), c("A", "B"))
+ expect_equal(levels(bm$Y), c("A", "B", "C"))
+ expect_equal(levels(bm$Z), c("A", "B"))
+
+ # Make sure the same works for contingency tables
+ b <- as.table(a)
+ dimnames(b) <- list(X = c("A", "B"), Y = c("A", "B", "C"), Z = c("A", "B"))
+ bm <- melt(b)
+ expect_equal(names(bm), c("X", "Y", "Z", "value"))
+ # Also check values
+ expect_equal(levels(bm$X), c("A", "B"))
+ expect_equal(levels(bm$Y), c("A", "B", "C"))
+ expect_equal(levels(bm$Z), c("A", "B"))
+})
+
+test_that("dimnames kept in original order", {
+ x <- matrix(1:4, nrow = 2)
+ rownames(x) <- c("b", "a")
+ colnames(x) <- c("e", "d")
+ names(dimnames(x)) <- c("x", "y")
+
+ m <- melt(x)
+ expect_equal(levels(m$x), c("b", "a"))
+ expect_equal(levels(m$y), c("e", "d"))
+
+})
+
+test_that("as.is = TRUE suppresses dimnname conversion", {
+ x <- matrix(nrow = 2, ncol = 2)
+ dimnames(x) <- list(x = 1:2, y = 3:4)
+
+ out <- melt(x, as.is = TRUE)
+ expect_true(is.character(out$x))
+ expect_true(is.character(out$y))
+
+})
+
+test_that("The 'variable' column is a factor after melting a data.frame", {
+ df <- data.frame(x=1:3, y=4:6)
+ df.m <- melt(df)
+ expect_true( is.factor(df.m$variable) )
+})
+
+test_that("Common classes are preserved in measure variables", {
+ df <- data.frame(id = 1:2, date1 = Sys.Date(), date2 = Sys.Date() + 10)
+ m <- melt(df, measure.vars=c("date1", "date2"))
+ expect_true( class(m$value) == "Date" )
+})
+
+test_that("Common attributes are preserved in measure variables", {
+ df <- data.frame(
+ id = 1:2,
+ date1 = as.POSIXct( Sys.Date() ),
+ date2 = as.POSIXct( Sys.Date() + 10)
+ )
+ m <- melt(df, measure.vars=c("date1", "date2"))
+})
+
+test_that("A warning is thrown when attributes are dropped in measure variables", {
+ df <- data.frame(
+ id=1:2,
+ date1 = as.POSIXct( Sys.Date() ),
+ date2 = Sys.Date() + 10
+ )
+ expect_warning( melt(df, measure.vars=c("date1", "date2")) )
+})
+
+test_that("factorsAsStrings behaves as expected", {
+
+ ## factors with identical levels -> staying as factor is okay
+ df <- data.frame(
+ id=1:2,
+ f1=factor(c("a", "b")),
+ f2=factor(c("b", "a"))
+ )
+ m1 <- melt(df, 1, factorsAsStrings=TRUE)
+ expect_identical( class(m1$value), "character" )
+
+ m2 <- melt(df, 1, factorsAsStrings=FALSE)
+ expect_identical( class(m2$value), "factor" )
+
+ ## make sure we have faithfully reproduced an R factor
+ expect_identical(
+ m2$value,
+ factor(c("a", "b", "b", "a"))
+ )
+
+ ## factors with different levels -> convert to character to be safe
+ df <- data.frame(
+ id=1:2,
+ f1=factor(c("a", "b")),
+ f2=factor(c("c", "d"))
+ )
+ expect_warning(melt(df, 1))
+
+ expect_warning(m <- melt(df, 1, factorsAsStrings = FALSE))
+ expect_identical( class(m$value), "character" )
+
+})
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-reshape2.git
More information about the debian-med-commit
mailing list