[med-svn] [r-bioc-delayedarray] 03/05: New upstream version 0.2.7
Andreas Tille
tille at debian.org
Sun Oct 1 06:09:32 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-bioc-delayedarray.
commit c0833a2d911d8a1625f731f9e530ea33ebc5f5ed
Author: Andreas Tille <tille at debian.org>
Date: Sun Oct 1 08:02:35 2017 +0200
New upstream version 0.2.7
---
DESCRIPTION | 27 +
NAMESPACE | 155 +++++
NOTES | 46 ++
R/ArrayBlocks-class.R | 177 +++++
R/DelayedArray-class.R | 1061 +++++++++++++++++++++++++++++
R/DelayedArray-stats.R | 32 +
R/DelayedArray-utils.R | 649 ++++++++++++++++++
R/DelayedMatrix-stats.R | 246 +++++++
R/DelayedMatrix-utils.R | 44 ++
R/RleArray-class.R | 265 +++++++
R/block_processing.R | 230 +++++++
R/cbind-methods.R | 163 +++++
R/realize.R | 223 ++++++
R/show-utils.R | 340 +++++++++
R/utils.R | 205 ++++++
R/zzz.R | 19 +
TODO | 19 +
debian/changelog | 14 -
debian/compat | 1 -
debian/control | 31 -
debian/copyright | 106 ---
debian/rules | 4 -
debian/source/format | 1 -
debian/tests/bioc | 3 -
debian/tests/control | 3 -
debian/watch | 3 -
inst/unitTests/test_ArrayBlocks-class.R | 71 ++
inst/unitTests/test_DelayedArray-class.R | 290 ++++++++
inst/unitTests/test_DelayedArray-utils.R | 297 ++++++++
inst/unitTests/test_DelayedMatrix-stats.R | 66 ++
inst/unitTests/test_DelayedMatrix-utils.R | 106 +++
inst/unitTests/test_cbind-methods.R | 132 ++++
man/ArrayBlocks-class.Rd | 19 +
man/DelayedArray-class.Rd | 343 ++++++++++
man/DelayedArray-utils.Rd | 227 ++++++
man/RleArray-class.Rd | 128 ++++
man/cbind-methods.Rd | 91 +++
man/realize.Rd | 108 +++
tests/run_unitTests.R | 2 +
39 files changed, 5781 insertions(+), 166 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..dc12ac2
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,27 @@
+Package: DelayedArray
+Title: Delayed operations on array-like objects
+Description: Wrapping an array-like object (typically an on-disk object) in
+ a DelayedArray object allows one to perform common array operations
+ on it without loading the object in memory. In order to reduce memory
+ usage and optimize performance, operations on the object are either
+ delayed or executed using a block processing mechanism. Note that this
+ also works on in-memory array-like objects like DataFrame objects
+ (typically with Rle columns), Matrix objects, and ordinary arrays and
+ data frames.
+Version: 0.2.7
+Encoding: UTF-8
+Author: Hervé Pagès
+Maintainer: Hervé Pagès <hpages at fredhutch.org>
+biocViews: Infrastructure, DataRepresentation, Annotation,
+ GenomeAnnotation
+Depends: R (>= 3.4), methods, BiocGenerics, S4Vectors (>= 0.14.3),
+ IRanges, matrixStats
+Imports: stats
+Suggests: Matrix, HDF5Array, genefilter, BiocStyle
+License: Artistic-2.0
+Collate: utils.R ArrayBlocks-class.R show-utils.R DelayedArray-class.R
+ cbind-methods.R realize.R block_processing.R
+ DelayedArray-utils.R DelayedMatrix-utils.R DelayedArray-stats.R
+ DelayedMatrix-stats.R RleArray-class.R zzz.R
+NeedsCompilation: no
+Packaged: 2017-06-03 00:46:21 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..eb6102d
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,155 @@
+import(methods)
+importFrom(stats, setNames, dlogis, plogis, qlogis)
+import(BiocGenerics)
+import(S4Vectors)
+import(IRanges)
+import(matrixStats)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Export S4 classes
+###
+
+exportClasses(
+ ArrayBlocks,
+ DelayedArray, DelayedMatrix,
+ RealizationSink, arrayRealizationSink,
+ RleArray, RleMatrix, RleRealizationSink
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Export S3 methods
+###
+
+S3method(as.array, DelayedArray)
+
+S3method(as.character, DelayedArray)
+
+S3method(as.complex, DelayedArray)
+
+S3method(as.data.frame, DelayedArray)
+
+S3method(as.integer, DelayedArray)
+
+S3method(as.logical, DelayedArray)
+
+S3method(as.matrix, DelayedArray)
+
+S3method(as.numeric, DelayedArray)
+
+S3method(as.raw, DelayedArray)
+
+S3method(as.vector, DelayedArray)
+
+S3method(mean, DelayedArray)
+
+S3method(split, DelayedArray)
+
+### We also export them thru the export() directive so that (a) they can be
+### called directly, (b) tab-completion on the name of the generic shows them,
+### and (c) methods() doesn't asterisk them.
+
+export(
+ as.array.DelayedArray,
+
+ as.character.DelayedArray,
+
+ as.complex.DelayedArray,
+
+ as.data.frame.DelayedArray,
+
+ as.integer.DelayedArray,
+
+ as.logical.DelayedArray,
+
+ as.matrix.DelayedArray,
+
+ as.numeric.DelayedArray,
+
+ as.raw.DelayedArray,
+
+ as.vector.DelayedArray,
+
+ mean.DelayedArray,
+
+ split.DelayedArray
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Export S4 methods for generics not defined in DelayedArray
+###
+
+exportMethods(
+ ## Methods for generics defined in the base package:
+ length, names, "names<-",
+ dim, "dim<-", dimnames, "dimnames<-",
+ as.array, as.vector, as.matrix, as.data.frame,
+ "[", "[[",
+ drop,
+ t,
+ c,
+ split,
+ is.na, is.finite, is.infinite, is.nan,
+ "!",
+ #"+", "-", "*", "/", "^", "%%", "%/%", # "Arith" group generic
+ "==", "!=", "<=", ">=", "<", ">", # "Compare" group generic
+ anyNA, which,
+ max, min, range, sum, prod, any, all, # "Summary" group generic
+ mean,
+ round, signif,
+ rowSums, colSums, rowMeans, colMeans,
+ nchar, tolower, toupper,
+
+ ## Methods for generics defined in the methods package:
+ coerce, show,
+
+ ## Methods for generics defined in the stats package:
+ dlogis, plogis, qlogis,
+
+ ## Methods for generics defined in the BiocGenerics package:
+ cbind, rbind,
+
+ ## Methods for generics defined in the S4Vectors package:
+ isEmpty,
+
+ ## Methods for generics defined in the IRanges package:
+ splitAsList,
+ arbind, acbind
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Export non-generic functions
+###
+
+export(
+ ArrayBlocks,
+ supportedRealizationBackends, getRealizationBackend, setRealizationBackend,
+ RealizationSink, arrayRealizationSink,
+ RleArray, RleRealizationSink
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Export S4 generics defined in DelayedArray + export corresponding methods
+###
+
+export(
+ matrixClass, DelayedArray, seed, subset_seed_as_array, type,
+ realize,
+ write_to_sink, close,
+ pmax2, pmin2, apply,
+ rowMaxs, colMaxs, rowMins, colMins, rowRanges, colRanges
+)
+
+### Exactly the same list as above.
+exportMethods(
+ matrixClass, DelayedArray, seed, subset_seed_as_array, type,
+ realize,
+ write_to_sink, close,
+ pmax2, pmin2, apply,
+ rowMaxs, colMaxs, rowMins, colMins, rowRanges, colRanges
+)
+
diff --git a/NOTES b/NOTES
new file mode 100644
index 0000000..8ac11d0
--- /dev/null
+++ b/NOTES
@@ -0,0 +1,46 @@
+## Should this go in the SummarizedExperiment package? As an additional section
+## in the vignette? As a separate vignette? As a man page? Probably the former.
+
+
+## The problem
+## ===========
+##
+## When trying to create a SummarizedExperiment object with big dimensions it's
+## critical to use a memory-efficient container for the assay data. Depending
+## on the nature of the data, in-memory containers that compress the data (e.g.
+## a DataFrame of Rle's or a sparse matrix from the Matrix package) might help
+## to a certain extent. However, even after compression some data might remain
+## too big to fit in memory. In that case, one solution is to split the
+## SummarizedExperiment object in smaller objects, then process the smaller
+## objects separately, and finally combine the results. A disadvantage of this
+## approach is that the split/process/combine mechanism is the responsibility
+## of the SummarizedExperiment-based application so it makes the development of
+## such applications more complicated. Having the assay data stored in an
+## on-disk container like HDF5Matrix should greatly simplify this: the goal is
+## to make it possible for the end-user to manipulate the big
+## SummarizedExperiment object as a whole and have the split/process/combine
+## mechanism automatically and transparently happen behind the scene .
+
+## Comparison of assay data containers
+## ===================================
+##
+## Each container has its strengths and weaknesses and which one to use exactly
+## depends on several factors.
+##
+## DataFrame of Rle's
+## ------------------
+## Works great for coverage data. See ?GPos in GenomicRanges for an example.
+
+## Sparse matrix object from the Matrix package
+## --------------------------------------------
+## This sounds like a natural candidate for RNA-seq count data which tends to
+## be sparse. Unfortunately, because the Matrix package can only store the
+## counts as doubles and not as integers, trying to use it on real RNA-seq
+## count data actually increases the size of the matrix of counts:
+library(Matrix)
+library(airway)
+data(airway)
+head(assay(airway))
+object.size(assay(airway))
+object.size(Matrix(assay(airway), sparse=TRUE))
+
diff --git a/R/ArrayBlocks-class.R b/R/ArrayBlocks-class.R
new file mode 100644
index 0000000..1813fd9
--- /dev/null
+++ b/R/ArrayBlocks-class.R
@@ -0,0 +1,177 @@
+### =========================================================================
+### ArrayBlocks objects
+### -------------------------------------------------------------------------
+###
+
+
+setClass("ArrayBlocks",
+ contains="List",
+ representation(
+ dim="integer",
+ max_block_len="integer",
+ N="integer",
+ by="integer"
+ ),
+ prototype(elementType="list")
+)
+
+### Return an ArrayBlocks object i.e. a collection of subarrays of the
+### original array with the following properties:
+### (a) The collection of blocks is a partitioning of the original array
+### i.e. the blocks fully cover it and don't overlap each other.
+### (b) Each block is made of adjacent elements in the original array.
+### (c) Each block has a length (i.e. nb of elements) <= 'max_block_len'.
+ArrayBlocks <- function(dim, max_block_len)
+{
+ p <- cumprod(dim)
+ w <- which(p <= max_block_len)
+ N <- if (length(w) == 0L) 1L else w[[length(w)]] + 1L
+ if (N > length(dim)) {
+ by <- 1L
+ } else if (N == 1L) {
+ by <- max_block_len
+ } else {
+ by <- max_block_len %/% as.integer(p[[N - 1L]])
+ }
+ new("ArrayBlocks", dim=dim, max_block_len=max_block_len, N=N, by=by)
+}
+
+.get_ArrayBlocks_inner_length <- function(blocks)
+{
+ ndim <- length(blocks at dim)
+ if (blocks at N > ndim)
+ return(if (any(blocks at dim == 0L)) 0L else 1L)
+ inner_len <- blocks at dim[[blocks at N]] %/% blocks at by
+ by2 <- blocks at dim[[blocks at N]] %% blocks at by
+ if (by2 != 0L) # 'blocks' contains truncated blocks
+ inner_len <- inner_len + 1L
+ inner_len
+}
+
+.get_ArrayBlocks_outer_length <- function(blocks)
+{
+ ndim <- length(blocks at dim)
+ if (blocks at N >= ndim)
+ return(1L)
+ outer_dim <- blocks at dim[(blocks at N + 1L):ndim]
+ prod(outer_dim)
+}
+
+### Return the number of blocks in 'x'.
+setMethod("length", "ArrayBlocks",
+ function(x)
+ .get_ArrayBlocks_inner_length(x) * .get_ArrayBlocks_outer_length(x)
+)
+
+get_block_lengths <- function(blocks)
+{
+ p <- prod(blocks at dim[seq_len(blocks at N - 1L)])
+ ndim <- length(blocks at dim)
+ if (blocks at N > ndim)
+ return(p)
+ fb_len <- p * blocks at by # full block length
+ lens <- rep.int(fb_len, blocks at dim[[blocks at N]] %/% blocks at by)
+ by2 <- blocks at dim[[blocks at N]] %% blocks at by
+ if (by2 != 0L) { # 'blocks' contains truncated blocks
+ tb_len <- p * by2 # truncated block length
+ lens <- c(lens, tb_len)
+ }
+ rep.int(lens, .get_ArrayBlocks_outer_length(blocks))
+}
+
+### Return an IRanges object with 1 range per dimension.
+get_block_ranges <- function(blocks, i)
+{
+ nblock <- length(blocks)
+ stopifnot(isSingleInteger(i), i >= 1L, i <= nblock)
+
+ ndim <- length(blocks at dim)
+ ans <- IRanges(rep.int(1L, ndim), blocks at dim)
+
+ if (blocks at N > ndim)
+ return(ans)
+
+ i <- i - 1L
+ if (blocks at N < ndim) {
+ inner_len <- .get_ArrayBlocks_inner_length(blocks)
+ i1 <- i %% inner_len
+ i2 <- i %/% inner_len
+ } else {
+ i1 <- i
+ }
+
+ k1 <- i1 * blocks at by
+ k2 <- k1 + blocks at by
+ k1 <- k1 + 1L
+ upper_bound <- blocks at dim[[blocks at N]]
+ if (k2 > upper_bound)
+ k2 <- upper_bound
+ start(ans)[[blocks at N]] <- k1
+ end(ans)[[blocks at N]] <- k2
+
+ if (blocks at N < ndim) {
+ outer_dim <- blocks at dim[(blocks at N + 1L):ndim]
+ subindex <- arrayInd(i2 + 1L, outer_dim)
+ ans[(blocks at N + 1L):ndim] <- IRanges(subindex, width=1L)
+ }
+ ans
+}
+
+get_array_block_Nindex <- function(blocks, i)
+{
+ make_Nindex_from_block_ranges(get_block_ranges(blocks, i), blocks at dim)
+}
+
+setMethod("getListElement", "ArrayBlocks",
+ function(x, i, exact=TRUE)
+ {
+ i <- normalizeDoubleBracketSubscript(i, x, exact=exact,
+ error.if.nomatch=TRUE)
+ get_array_block_Nindex(x, i)
+ }
+)
+
+setMethod("show", "ArrayBlocks",
+ function(object)
+ {
+ dim_in1string <- paste0(object at dim, collapse=" x ")
+ cat(class(object), " object with ", length(object), " blocks ",
+ "of length <= ", object at max_block_len, " on a ",
+ dim_in1string, " array:\n", sep="")
+ for (i in seq_along(object)) {
+ Nindex <- object[[i]]
+ cat("[[", i, "]]: [", Nindex_as_string(Nindex), "]\n",
+ sep="")
+ }
+ }
+)
+
+extract_array_block <- function(x, blocks, i)
+{
+ Nindex <- get_array_block_Nindex(blocks, i)
+ subset_by_Nindex(x, Nindex)
+}
+
+### NOT exported but used in unit tests.
+split_array_in_blocks <- function(x, max_block_len)
+{
+ blocks <- ArrayBlocks(dim(x), max_block_len)
+ lapply(seq_along(blocks),
+ function(i) extract_array_block(x, blocks, i))
+}
+
+### NOT exported but used in unit tests.
+### Rebuild the original array from the subarrays obtained by
+### split_array_in_blocks() as an *ordinary* array.
+### So if 'x' is an ordinary array, then:
+###
+### unsplit_array_from_blocks(split_array_in_blocks(x, max_block_len), x)
+###
+### should be a no-op for any 'max_block_len' < 'length(x)'.
+unsplit_array_from_blocks <- function(subarrays, x)
+{
+ ans <- combine_array_objects(subarrays)
+ dim(ans) <- dim(x)
+ ans
+}
+
diff --git a/R/DelayedArray-class.R b/R/DelayedArray-class.R
new file mode 100644
index 0000000..1e44f99
--- /dev/null
+++ b/R/DelayedArray-class.R
@@ -0,0 +1,1061 @@
+### =========================================================================
+### DelayedArray objects
+### -------------------------------------------------------------------------
+
+
+setClass("DelayedArray",
+ representation(
+ seed="ANY", # An array-like object expected to satisfy
+ # the "seed contract" i.e. to support dim(),
+ # dimnames(), and subset_seed_as_array().
+
+ index="list", # List (possibly named) of subscripts as
+ # positive integer vectors, one vector per
+ # seed dimension. *Missing* list elements
+ # are allowed and represented by NULLs.
+
+ metaindex="integer", # Index into the "index" slot specifying the
+ # seed dimensions to keep.
+
+ delayed_ops="list", # List of delayed operations. See below
+ # for the details.
+
+ is_transposed="logical" # Is the object considered to be transposed
+ # with respect to the seed?
+ ),
+ prototype(
+ seed=new("array"),
+ index=list(NULL),
+ metaindex=1L,
+ is_transposed=FALSE
+ )
+)
+
+### Extending DataTable gives us a few things for free (head(), tail(),
+### etc...)
+setClass("DelayedMatrix",
+ contains=c("DelayedArray", "DataTable"),
+ prototype=prototype(
+ seed=new("matrix"),
+ index=list(NULL, NULL),
+ metaindex=1:2
+ )
+)
+
+### Automatic coercion method from DelayedArray to DelayedMatrix silently
+### returns a broken object (unfortunately these dummy automatic coercion
+### methods don't bother to validate the object they return). So we overwrite
+### it.
+setAs("DelayedArray", "DelayedMatrix",
+ function(from) new("DelayedMatrix", from)
+)
+
+### For internal use only.
+setGeneric("matrixClass", function(x) standardGeneric("matrixClass"))
+
+setMethod("matrixClass", "DelayedArray", function(x) "DelayedMatrix")
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Validity
+###
+
+.wmsg2 <- function(...)
+ paste0("\n ",
+ paste0(strwrap(paste0(c(...), collapse="")), collapse="\n "))
+
+.validate_DelayedArray <- function(x)
+{
+ x_dim <- dim(x at seed)
+ x_ndim <- length(x_dim)
+ ## 'seed' slot.
+ if (x_ndim == 0L)
+ return(.wmsg2("'x at seed' must have dimensions"))
+ ## 'index' slot.
+ if (length(x at index) != x_ndim)
+ return(.wmsg2("'x at index' must have one list element per dimension ",
+ "in 'x at seed'"))
+ if (!all(S4Vectors:::sapply_isNULL(x at index) |
+ vapply(x at index, is.integer, logical(1), USE.NAMES=FALSE)))
+ return(.wmsg2("every list element in 'x at index' must be either NULL ",
+ "or an integer vector"))
+ ## 'metaindex' slot.
+ if (length(x at metaindex) == 0L)
+ return(.wmsg2("'x at metaindex' cannot be empty"))
+ if (S4Vectors:::anyMissingOrOutside(x at metaindex, 1L, x_ndim))
+ return(.wmsg2("all values in 'x at metaindex' must be >= 1 ",
+ "and <= 'length(x at index)'"))
+ if (!isStrictlySorted(x at metaindex))
+ return(.wmsg2("'x at metaindex' must be strictly sorted"))
+ if (!all(get_Nindex_lengths(x at index, x_dim)[-x at metaindex] == 1L))
+ return(.wmsg2("all the dropped dimensions in 'x' must be equal to 1"))
+ ## 'is_transposed' slot.
+ if (!isTRUEorFALSE(x at is_transposed))
+ return(.wmsg2("'x at is_transposed' must be TRUE or FALSE"))
+ TRUE
+}
+
+setValidity2("DelayedArray", .validate_DelayedArray)
+
+### TODO: Move this to S4Vectors and make it the validity method for DataTable
+### object.
+.validate_DelayedMatrix <- function(x)
+{
+ if (length(dim(x)) != 2L)
+ return(wmsg("'x' must have exactly 2 dimensions"))
+ TRUE
+}
+
+setValidity2("DelayedMatrix", .validate_DelayedMatrix)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Constructor
+###
+
+### NOT exported but used in HDF5Array!
+new_DelayedArray <- function(seed=new("array"), Class="DelayedArray")
+{
+ seed <- remove_pristine_DelayedArray_wrapping(seed)
+ seed_ndim <- length(dim(seed))
+ if (seed_ndim == 2L)
+ Class <- matrixClass(new(Class))
+ index <- vector(mode="list", length=seed_ndim)
+ new2(Class, seed=seed, index=index, metaindex=seq_along(index))
+}
+
+setGeneric("DelayedArray", function(seed) standardGeneric("DelayedArray"))
+
+setMethod("DelayedArray", "ANY", function(seed) new_DelayedArray(seed))
+
+### Calling DelayedArray() on a DelayedArray object is a no-op.
+setMethod("DelayedArray", "DelayedArray", function(seed) seed)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### seed()
+###
+
+setGeneric("seed", function(x) standardGeneric("seed"))
+
+setMethod("seed", "DelayedArray", function(x) x at seed)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Pristine objects
+###
+### A pristine DelayedArray object is an object that does not carry any
+### delayed operation on it. In other words, it's in sync with (i.e. reflects
+### the content of) its seed.
+###
+
+### NOT exported but used in HDF5Array!
+### Note that false negatives happen when 'x' carries delayed operations that
+### do nothing, but that's ok.
+is_pristine <- function(x)
+{
+ ## 'x' should not carry any delayed operation on it, that is, all the
+ ## DelayedArray slots must be in their original state.
+ x2 <- new_DelayedArray(seed(x))
+ class(x) <- class(x2) <- "DelayedArray"
+ identical(x, x2)
+}
+
+### Remove the DelayedArray wrapping (or nested wrappings) from around the
+### seed if the wrappings are pristine.
+remove_pristine_DelayedArray_wrapping <- function(x)
+{
+ if (!(is(x, "DelayedArray") && is_pristine(x)))
+ return(x)
+ remove_pristine_DelayedArray_wrapping(seed(x))
+}
+
+### When a pristine DelayedArray derived object (i.e. an HDF5Array object) is
+### about to be touched, we first need to downgrade it to a DelayedArray or
+### DelayedMatrix *instance*.
+downgrade_to_DelayedArray_or_DelayedMatrix <- function(x)
+{
+ if (is(x, "DelayedMatrix"))
+ return(as(x, "DelayedMatrix"))
+ as(x, "DelayedArray")
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### dim()
+###
+
+### dim() getter.
+
+.get_DelayedArray_dim_before_transpose <- function(x)
+{
+ get_Nindex_lengths(x at index, dim(seed(x)))[x at metaindex]
+}
+.get_DelayedArray_dim <- function(x)
+{
+ ans <- .get_DelayedArray_dim_before_transpose(x)
+ if (x at is_transposed)
+ ans <- rev(ans)
+ ans
+}
+
+setMethod("dim", "DelayedArray", .get_DelayedArray_dim)
+
+### Even though prod() always returns a double, it seems that the length()
+### primitive function automatically turns this double into an integer if
+### it's <= .Machine$integer.max
+setMethod("length", "DelayedArray", function(x) prod(dim(x)))
+
+setMethod("isEmpty", "DelayedArray", function(x) any(dim(x) == 0L))
+
+### dim() setter.
+
+.normalize_dim_replacement_value <- function(value, x_dim)
+{
+ if (is.null(value))
+ stop(wmsg("you can't do that, sorry"))
+ if (!is.numeric(value))
+ stop(wmsg("the supplied dim vector must be numeric"))
+ if (length(value) == 0L)
+ stop(wmsg("the supplied dim vector cannot be empty"))
+ if (!is.integer(value))
+ value <- as.integer(value)
+ if (S4Vectors:::anyMissingOrOutside(value, 0L))
+ stop(wmsg("the supplied dim vector cannot contain negative ",
+ "or NA values"))
+ if (length(value) > length(x_dim))
+ stop(wmsg("too many dimensions supplied"))
+ prod1 <- prod(value)
+ prod2 <- prod(x_dim)
+ if (prod1 != prod2)
+ stop(wmsg("the supplied dims [product ", prod1, "] do not match ",
+ "the length of object [", prod2, "]"))
+ unname(value)
+}
+
+.map_new_to_old_dim <- function(new_dim, old_dim, x_class)
+{
+ idx1 <- which(new_dim != 1L)
+ idx2 <- which(old_dim != 1L)
+
+ cannot_map_msg <- wmsg(
+ "Cannot map the supplied dim vector to the current dimensions of ",
+ "the object. On a ", x_class, " object, the dim() setter can only ",
+ "be used to drop some of the ineffective dimensions (the dimensions ",
+ "equal to 1 are the ineffective dimensions)."
+ )
+
+ can_map <- function() {
+ if (length(idx1) != length(idx2))
+ return(FALSE)
+ if (length(idx1) == 0L)
+ return(TRUE)
+ if (!all(new_dim[idx1] == old_dim[idx2]))
+ return(FALSE)
+ tmp <- idx2 - idx1
+ tmp[[1L]] >= 0L && isSorted(tmp)
+ }
+ if (!can_map())
+ stop(cannot_map_msg)
+
+ new2old <- seq_along(new_dim) +
+ rep.int(c(0L, idx2 - idx1), diff(c(1L, idx1, length(new_dim) + 1L)))
+
+ if (new2old[[length(new2old)]] > length(old_dim))
+ stop(cannot_map_msg)
+
+ new2old
+}
+
+.set_DelayedArray_dim <- function(x, value)
+{
+ x_dim <- dim(x)
+ value <- .normalize_dim_replacement_value(value, x_dim)
+ new2old <- .map_new_to_old_dim(value, x_dim, class(x))
+ stopifnot(identical(value, x_dim[new2old])) # sanity check
+ if (x at is_transposed) {
+ x_metaindex <- rev(rev(x at metaindex)[new2old])
+ } else {
+ x_metaindex <- x at metaindex[new2old]
+ }
+ if (!identical(x at metaindex, x_metaindex)) {
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ x at metaindex <- x_metaindex
+ }
+ if (length(dim(x)) == 2L)
+ x <- as(x, matrixClass(x))
+ x
+}
+
+setReplaceMethod("dim", "DelayedArray", .set_DelayedArray_dim)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### drop()
+###
+
+setMethod("drop", "DelayedArray",
+ function(x)
+ {
+ x_dim <- dim(x)
+ dim(x) <- x_dim[x_dim != 1L]
+ x
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### dimnames()
+###
+
+### dimnames() getter.
+
+.get_DelayedArray_dimnames_before_transpose <- function(x)
+{
+ x_seed_dimnames <- dimnames(seed(x))
+ ans <- lapply(x at metaindex,
+ get_Nindex_names_along,
+ Nindex=x at index,
+ dimnames=x_seed_dimnames)
+ if (all(S4Vectors:::sapply_isNULL(ans)))
+ return(NULL)
+ ans
+}
+.get_DelayedArray_dimnames <- function(x)
+{
+ ans <- .get_DelayedArray_dimnames_before_transpose(x)
+ if (x at is_transposed)
+ ans <- rev(ans)
+ ans
+}
+
+setMethod("dimnames", "DelayedArray", .get_DelayedArray_dimnames)
+
+### dimnames() setter.
+
+.normalize_dimnames_replacement_value <- function(value, ndim)
+{
+ if (is.null(value))
+ return(vector("list", length=ndim))
+ if (!is.list(value))
+ stop("the supplied dimnames must be a list")
+ if (length(value) > ndim)
+ stop(wmsg("the supplied dimnames is longer ",
+ "than the number of dimensions"))
+ if (length(value) <- ndim)
+ length(value) <- ndim
+ value
+}
+
+.set_DelayedArray_dimnames <- function(x, value)
+{
+ value <- .normalize_dimnames_replacement_value(value, length(x at metaindex))
+ if (x at is_transposed)
+ value <- rev(value)
+
+ ## We quickly identify a no-op situation. While doing so, we are careful to
+ ## not trigger a copy of the "index" slot (which can be big). The goal is
+ ## to make a no-op like 'dimnames(x) <- dimnames(x)' as fast as possible.
+ x_seed_dimnames <- dimnames(seed(x))
+ touched_midx <- which(mapply(
+ function(N, names)
+ !identical(
+ get_Nindex_names_along(x at index, x_seed_dimnames, N),
+ names
+ ),
+ x at metaindex, value,
+ USE.NAMES=FALSE
+ ))
+ if (length(touched_midx) == 0L)
+ return(x) # no-op
+
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ touched_idx <- x at metaindex[touched_midx]
+ x_seed_dim <- dim(seed(x))
+ x at index[touched_idx] <- mapply(
+ function(N, names) {
+ i <- x at index[[N]]
+ if (is.null(i))
+ i <- seq_len(x_seed_dim[[N]]) # expand 'i'
+ setNames(i, names)
+ },
+ touched_idx, value[touched_midx],
+ SIMPLIFY=FALSE, USE.NAMES=FALSE
+ )
+ x
+}
+
+setReplaceMethod("dimnames", "DelayedArray", .set_DelayedArray_dimnames)
+
+### names() getter & setter.
+
+.get_DelayedArray_names <- function(x)
+{
+ if (length(dim(x)) != 1L)
+ return(NULL)
+ dimnames(x)[[1L]]
+}
+
+setMethod("names", "DelayedArray", .get_DelayedArray_names)
+
+.set_DelayedArray_names <- function(x, value)
+{
+ if (length(dim(x)) != 1L) {
+ if (!is.null(value))
+ stop("setting the names of a ", class(x), " object with more ",
+ "than 1 dimension is not supported")
+ return(x)
+ }
+ dimnames(x)[[1L]] <- value
+ x
+}
+
+setReplaceMethod("names", "DelayedArray", .set_DelayedArray_names)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### [
+###
+
+### Return an object of the same class as 'x' (endomorphism).
+### 'user_Nindex' must be a "multidimensional subsetting Nindex" i.e. a
+### list with one subscript per dimension in 'x'. Missing subscripts are
+### represented by NULLs.
+.subset_DelayedArray_by_Nindex <- function(x, user_Nindex)
+{
+ stopifnot(is.list(user_Nindex))
+ x_index <- x at index
+ x_ndim <- length(x at metaindex)
+ x_seed_dim <- dim(seed(x))
+ x_seed_dimnames <- dimnames(seed(x))
+ x_delayed_ops <- x at delayed_ops
+ for (n in seq_along(user_Nindex)) {
+ subscript <- user_Nindex[[n]]
+ if (is.null(subscript))
+ next
+ n0 <- if (x at is_transposed) x_ndim - n + 1L else n
+ N <- x at metaindex[[n0]]
+ i <- x_index[[N]]
+ if (is.null(i)) {
+ i <- seq_len(x_seed_dim[[N]]) # expand 'i'
+ names(i) <- get_Nindex_names_along(x_index, x_seed_dimnames, N)
+ }
+ subscript <- normalizeSingleBracketSubscript(subscript, i,
+ as.NSBS=TRUE)
+ x_index[[N]] <- extractROWS(i, subscript)
+ if (n0 == 1L)
+ x_delayed_ops <- .subset_delayed_ops_args(x_delayed_ops, subscript,
+ FALSE)
+ if (n0 == x_ndim)
+ x_delayed_ops <- .subset_delayed_ops_args(x_delayed_ops, subscript,
+ TRUE)
+ }
+ if (!identical(x at index, x_index)) {
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ x at index <- x_index
+ if (!identical(x at delayed_ops, x_delayed_ops))
+ x at delayed_ops <- x_delayed_ops
+ }
+ x
+}
+
+### Return an atomic vector.
+.subset_DelayedArray_by_1Dindex <- function(x, i)
+{
+ if (!is.numeric(i))
+ stop(wmsg("1D-style subsetting of a DelayedArray object only ",
+ "accepts a numeric subscript at the moment"))
+ if (length(i) == 0L) {
+ user_Nindex <- as.list(integer(length(dim(x))))
+ ## x0 <- x[0, ..., 0]
+ x0 <- .subset_DelayedArray_by_Nindex(x, user_Nindex)
+ return(as.vector(x0))
+ }
+ if (anyNA(i))
+ stop(wmsg("1D-style subsetting of a DelayedArray object does ",
+ "not support NA indices yet"))
+ if (min(i) < 1L)
+ stop(wmsg("1D-style subsetting of a DelayedArray object only ",
+ "supports positive indices at the moment"))
+ if (max(i) > length(x))
+ stop(wmsg("subscript contains out-of-bounds indices"))
+ if (length(i) == 1L)
+ return(.get_DelayedArray_element(x, i))
+
+ ## We want to walk only on the blocks that we actually need to visit so we
+ ## don't use block_APPLY() or family because they walk on all the blocks.
+
+ block_len <- get_block_length(type(x))
+ blocks <- ArrayBlocks(dim(x), block_len)
+ nblock <- length(blocks)
+
+ breakpoints <- cumsum(get_block_lengths(blocks))
+ part_idx <- get_part_index(i, breakpoints)
+ split_part_idx <- split_part_index(part_idx, length(breakpoints))
+ block_idx <- which(lengths(split_part_idx) != 0L) # blocks to visit
+ res <- lapply(block_idx, function(b) {
+ if (get_verbose_block_processing())
+ message("Visiting block ", b, "/", nblock, " ... ",
+ appendLF=FALSE)
+ Nindex <- get_array_block_Nindex(blocks, b)
+ subarray <- subset_by_Nindex(x, Nindex)
+ if (!is.array(subarray))
+ subarray <- as.array(subarray)
+ block_ans <- subarray[split_part_idx[[b]]]
+ if (get_verbose_block_processing())
+ message("OK")
+ block_ans
+ })
+ unlist(res, use.names=FALSE)[get_rev_index(part_idx)]
+}
+
+.subset_DelayedArray <- function(x, i, j, ..., drop=TRUE)
+{
+ if (missing(x))
+ stop("'x' is missing")
+
+ ## Check the dimensionality of the user call i.e whether the function was
+ ## called with 1D-style, or 2D-style, or 3D-style etc... subsetting.
+ ndim <- nargs() - 1L
+ x_dim <- dim(x)
+ x_ndim <- length(x_dim)
+ if (!missing(drop))
+ ndim <- ndim - 1L
+ if (ndim == 1L && missing(i))
+ ndim <- 0L
+ if (ndim != 0L && ndim != x_ndim) {
+ if (ndim != 1L)
+ stop("incorrect number of dimensions")
+ return(.subset_DelayedArray_by_1Dindex(x, i))
+ }
+
+ ## Prepare the "multidimensional subsetting index".
+ user_Nindex <- vector(mode="list", length=x_ndim)
+ if (!missing(i))
+ user_Nindex[[1L]] <- if (is.null(i)) integer(0) else i
+ if (!missing(j))
+ user_Nindex[[2L]] <- if (is.null(j)) integer(0) else j
+ dots <- match.call(expand.dots=FALSE)$... # list of non-evaluated args
+ eframe <- parent.frame()
+ for (n2 in seq_along(dots)) {
+ k <- dots[[n2]]
+ if (!missing(k)) {
+ k <- eval(k, envir=eframe, enclos=eframe)
+ user_Nindex[[2L + n2]] <- if (is.null(k)) integer(0) else k
+ }
+ }
+
+ ## Perform the subsetting.
+ .subset_DelayedArray_by_Nindex(x, user_Nindex)
+}
+
+setMethod("[", "DelayedArray", .subset_DelayedArray)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### subset_seed_as_array()
+###
+
+### Return the slice as a list.
+.extract_data_frame_slice <- function(x, index)
+{
+ slice <- subset_by_Nindex(x, index)
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice, as.vector)
+}
+.extract_DataFrame_slice <- function(x, index)
+{
+ slice <- subset_by_Nindex(x, index)
+ slice <- as.data.frame(slice)
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice, as.vector)
+}
+
+### Return a list with one list element per column in data frame 'x'.
+### All the list elements have length 0.
+.extract_data_frame_slice0 <- function(x)
+{
+ slice0 <- x[0L, , drop=FALSE]
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice0, as.vector)
+}
+.extract_DataFrame_slice0 <- function(x)
+{
+ slice0 <- x[0L, , drop=FALSE]
+ slice0 <- as.data.frame(slice0)
+ if (ncol(slice0) != ncol(x))
+ stop(wmsg("DataFrame object 'x' can be used as the seed of ",
+ "a DelayedArray object only if as.data.frame(x) ",
+ "preserves the number of columns"))
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice0, as.vector)
+}
+
+### Equivalent to 'typeof(as.matrix(x))' but with an almost-zero
+### memory footprint (it avoids the cost of turning 'x' into a matrix).
+.get_data_frame_type <- function(x)
+{
+ if (ncol(x) == 0L)
+ return("logical")
+ slice0 <- .extract_data_frame_slice0(x)
+ typeof(unlist(slice0, use.names=FALSE))
+}
+
+### Equivalent to 'typeof(as.matrix(as.data.frame(x)))' but with an
+### almost-zero memory footprint (it avoids the cost of turning 'x' first
+### into a data frame then into a matrix).
+.get_DataFrame_type <- function(x)
+{
+ if (ncol(x) == 0L)
+ return("logical")
+ slice0 <- .extract_DataFrame_slice0(x)
+ typeof(unlist(slice0, use.names=FALSE))
+}
+
+### 'index' is expected to be an unnamed list of subscripts as positive integer
+### vectors, one vector per seed dimension. *Missing* list elements are allowed
+### and represented by NULLs.
+### The "subset_seed_as_array" methods don't need to support anything else.
+### They must return an ordinary array. No need to propagate the dimnames.
+setGeneric("subset_seed_as_array", signature="seed",
+ function(seed, index) standardGeneric("subset_seed_as_array")
+)
+
+setMethod("subset_seed_as_array", "ANY",
+ function(seed, index)
+ {
+ slice <- subset_by_Nindex(seed, index)
+ as.array(slice)
+ }
+)
+
+setMethod("subset_seed_as_array", "array",
+ function(seed, index)
+ subset_by_Nindex(seed, index)
+)
+
+### Equivalent to
+###
+### subset_by_Nindex(as.matrix(x), index)
+###
+### but avoids the cost of turning the full data frame 'x' into a matrix so
+### memory footprint stays small when 'index' is small.
+setMethod("subset_seed_as_array", "data.frame",
+ function(seed, index)
+ {
+ #ans_type <- .get_data_frame_type(seed)
+ slice0 <- .extract_data_frame_slice0(seed)
+ slice <- .extract_data_frame_slice(seed, index)
+ data <- unlist(c(slice0, slice), use.names=FALSE)
+ array(data, dim=get_Nindex_lengths(index, dim(seed)))
+ }
+)
+
+### Equivalent to
+###
+### subset_by_Nindex(as.matrix(as.data.frame(x)), index)
+###
+### but avoids the cost of turning the full DataFrame 'x' first into a data
+### frame then into a matrix so memory footprint stays small when 'index' is
+### small.
+setMethod("subset_seed_as_array", "DataFrame",
+ function(seed, index)
+ {
+ #ans_type <- .get_DataFrame_type(seed)
+ slice0 <- .extract_DataFrame_slice0(seed)
+ slice <- .extract_DataFrame_slice(seed, index)
+ data <- unlist(c(slice0, slice), use.names=FALSE)
+ array(data, dim=get_Nindex_lengths(index, dim(seed)))
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Management of delayed operations
+###
+### The 'delayed_ops' slot represents the list of delayed operations op1, op2,
+### etc... Each delayed operation is itself represented by a list of length 4:
+### 1) The name of the function to call (e.g. "+" or "log").
+### 2) The list of "left arguments" i.e. the list of arguments to place
+### before the array in the function call.
+### 3) The list of "right arguments" i.e. the list of arguments to place
+### after the array in the function call.
+### 4) A single logical. Indicates the dimension along which the (left or
+### right) argument of the function call needs to be recycled when the
+### operation is actually executed (done by .execute_delayed_ops() which
+### is called by as.array()). FALSE: along the 1st dim; TRUE: along
+### the last dim; NA: no recycling. Recycling is only supported for
+### function calls with 2 arguments (i.e. the array and the recycled
+### argument) at the moment.
+###
+### Each operation must return an array of the same dimensions as the original
+### array.
+###
+
+register_delayed_op <- function(x, FUN, Largs=list(), Rargs=list(),
+ recycle_along_last_dim=NA)
+{
+ if (isTRUEorFALSE(recycle_along_last_dim)) {
+ nLargs <- length(Largs)
+ nRargs <- length(Rargs)
+ ## Recycling is only supported for function calls with 2 arguments
+ ## (i.e. the array and the recycled argument) at the moment.
+ stopifnot(nLargs + nRargs == 1L)
+ partially_recycled_arg <- if (nLargs == 1L) Largs[[1L]] else Rargs[[1L]]
+ stopifnot(length(partially_recycled_arg) == nrow(x))
+ }
+ delayed_op <- list(FUN, Largs, Rargs, recycle_along_last_dim)
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ x at delayed_ops <- c(x at delayed_ops, list(delayed_op))
+ x
+}
+
+.subset_delayed_op_args <- function(delayed_op, i, subset_along_last_dim)
+{
+ recycle_along_last_dim <- delayed_op[[4L]]
+ if (is.na(recycle_along_last_dim)
+ || recycle_along_last_dim != subset_along_last_dim)
+ return(delayed_op)
+ Largs <- delayed_op[[2L]]
+ Rargs <- delayed_op[[3L]]
+ nLargs <- length(Largs)
+ nRargs <- length(Rargs)
+ stopifnot(nLargs + nRargs == 1L)
+ if (nLargs == 1L) {
+ new_arg <- extractROWS(Largs[[1L]], i)
+ delayed_op[[2L]] <- list(new_arg)
+ } else {
+ new_arg <- extractROWS(Rargs[[1L]], i)
+ delayed_op[[3L]] <- list(new_arg)
+ }
+ if (length(new_arg) == 1L)
+ delayed_op[[4L]] <- NA
+ delayed_op
+}
+
+.subset_delayed_ops_args <- function(delayed_ops, i, subset_along_last_dim)
+ lapply(delayed_ops, .subset_delayed_op_args, i, subset_along_last_dim)
+
+### 'a' is the ordinary array returned by the "combining" operator.
+.execute_delayed_ops <- function(a, delayed_ops)
+{
+ a_dim <- dim(a)
+ first_dim <- a_dim[[1L]]
+ last_dim <- a_dim[[length(a_dim)]]
+ a_len <- length(a)
+ if (a_len == 0L) {
+ p1 <- p2 <- 0L
+ } else {
+ p1 <- a_len / first_dim
+ p2 <- a_len / last_dim
+ }
+
+ recycle_arg <- function(partially_recycled_arg, recycle_along_last_dim) {
+ if (recycle_along_last_dim) {
+ stopifnot(length(partially_recycled_arg) == last_dim)
+ rep(partially_recycled_arg, each=p2)
+ } else {
+ stopifnot(length(partially_recycled_arg) == first_dim)
+ rep.int(partially_recycled_arg, p1)
+ }
+ }
+
+ prepare_call_args <- function(a, delayed_op) {
+ Largs <- delayed_op[[2L]]
+ Rargs <- delayed_op[[3L]]
+ recycle_along_last_dim <- delayed_op[[4L]]
+ if (isTRUEorFALSE(recycle_along_last_dim)) {
+ nLargs <- length(Largs)
+ nRargs <- length(Rargs)
+ stopifnot(nLargs + nRargs == 1L)
+ if (nLargs == 1L) {
+ Largs <- list(recycle_arg(Largs[[1L]], recycle_along_last_dim))
+ } else {
+ Rargs <- list(recycle_arg(Rargs[[1L]], recycle_along_last_dim))
+ }
+ }
+ c(Largs, list(a), Rargs)
+ }
+
+ for (delayed_op in delayed_ops) {
+ FUN <- delayed_op[[1L]]
+ call_args <- prepare_call_args(a, delayed_op)
+
+ ## Perform the delayed operation.
+ a <- do.call(FUN, call_args)
+
+ ## Some vectorized operations on an ordinary array can drop the dim
+ ## attribute (e.g. comparing a zero-col matrix with an atomic vector).
+ a_new_dim <- dim(a)
+ if (is.null(a_new_dim)) {
+ ## Restore the dim attribute.
+ dim(a) <- a_dim
+ } else {
+ ## Sanity check.
+ stopifnot(identical(a_dim, a_new_dim))
+ }
+ }
+ a
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Transpose
+###
+
+### The actual transposition of the data is delayed i.e. it will be realized
+### on the fly only when as.array() (or as.vector() or as.matrix()) is called
+### on 'x'.
+setMethod("t", "DelayedArray",
+ function(x)
+ {
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ x at is_transposed <- !x at is_transposed
+ x
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### as.array()
+###
+
+### TODO: Not sure we need this. Using drop() should do it.
+.reduce_array_dimensions <- function(x)
+{
+ x_dim <- dim(x)
+ x_dimnames <- dimnames(x)
+ effdim_idx <- which(x_dim != 1L) # index of effective dimensions
+ if (length(effdim_idx) >= 2L) {
+ dim(x) <- x_dim[effdim_idx]
+ dimnames(x) <- x_dimnames[effdim_idx]
+ } else {
+ dim(x) <- NULL
+ if (length(effdim_idx) == 1L)
+ names(x) <- x_dimnames[[effdim_idx]]
+ }
+ x
+}
+
+### Realize the object i.e. execute all the delayed operations and turn the
+### object back into an ordinary array.
+.from_DelayedArray_to_array <- function(x, drop=FALSE)
+{
+ if (!isTRUEorFALSE(drop))
+ stop("'drop' must be TRUE or FALSE")
+ ans <- subset_seed_as_array(seed(x), unname(x at index))
+ dim(ans) <- .get_DelayedArray_dim_before_transpose(x)
+ ans <- .execute_delayed_ops(ans, x at delayed_ops)
+ dimnames(ans) <- .get_DelayedArray_dimnames_before_transpose(x)
+ if (drop)
+ ans <- .reduce_array_dimensions(ans)
+ ## Base R doesn't support transposition of an array of arbitrary dimension
+ ## (generalized transposition) so the call to t() below will fail if 'ans'
+ ## has more than 2 dimensions. If we want as.array() to work on a
+ ## transposed DelayedArray object of arbitrary dimension, we need to
+ ## implement our own generalized transposition of an ordinary array.
+ if (x at is_transposed) {
+ if (length(dim(ans)) > 2L)
+ stop("can't do as.array() on this object, sorry")
+ ans <- t(ans)
+ }
+ ans
+}
+
+### S3/S4 combo for as.array.DelayedArray
+as.array.DelayedArray <- function(x, ...)
+ .from_DelayedArray_to_array(x, ...)
+setMethod("as.array", "DelayedArray", .from_DelayedArray_to_array)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Other coercions based on as.array()
+###
+
+slicing_tip <- c(
+ "Consider reducing its number of effective dimensions by slicing it ",
+ "first (e.g. x[8, 30, , 2, ]). Make sure that all the indices used for ",
+ "the slicing have length 1 except at most 2 of them which can be of ",
+ "arbitrary length or missing."
+)
+
+.from_DelayedArray_to_matrix <- function(x)
+{
+ x_dim <- dim(x)
+ if (sum(x_dim != 1L) > 2L)
+ stop(wmsg(class(x), " object with more than 2 effective dimensions ",
+ "cannot be coerced to a matrix. ", slicing_tip))
+ ans <- as.array(x, drop=TRUE)
+ if (length(x_dim) == 2L) {
+ dim(ans) <- x_dim
+ dimnames(ans) <- dimnames(x)
+ } else {
+ as.matrix(ans)
+ }
+ ans
+}
+
+### S3/S4 combo for as.matrix.DelayedArray
+as.matrix.DelayedArray <- function(x, ...) .from_DelayedArray_to_matrix(x, ...)
+setMethod("as.matrix", "DelayedArray", .from_DelayedArray_to_matrix)
+
+### S3/S4 combo for as.data.frame.DelayedArray
+as.data.frame.DelayedArray <- function(x, row.names=NULL, optional=FALSE, ...)
+ as.data.frame(as.array(x, drop=TRUE),
+ row.names=row.names, optional=optional, ...)
+setMethod("as.data.frame", "DelayedArray", as.data.frame.DelayedArray)
+
+### S3/S4 combo for as.vector.DelayedArray
+as.vector.DelayedArray <- function(x, mode="any")
+{
+ ans <- as.array(x, drop=TRUE)
+ as.vector(ans, mode=mode)
+}
+setMethod("as.vector", "DelayedArray", as.vector.DelayedArray)
+
+### S3/S4 combo for as.logical.DelayedArray
+as.logical.DelayedArray <- function(x, ...) as.vector(x, mode="logical", ...)
+setMethod("as.logical", "DelayedArray", as.logical.DelayedArray)
+
+### S3/S4 combo for as.integer.DelayedArray
+as.integer.DelayedArray <- function(x, ...) as.vector(x, mode="integer", ...)
+setMethod("as.integer", "DelayedArray", as.integer.DelayedArray)
+
+### S3/S4 combo for as.numeric.DelayedArray
+as.numeric.DelayedArray <- function(x, ...) as.vector(x, mode="numeric", ...)
+setMethod("as.numeric", "DelayedArray", as.numeric.DelayedArray)
+
+### S3/S4 combo for as.complex.DelayedArray
+as.complex.DelayedArray <- function(x, ...) as.vector(x, mode="complex", ...)
+setMethod("as.complex", "DelayedArray", as.complex.DelayedArray)
+
+### S3/S4 combo for as.character.DelayedArray
+as.character.DelayedArray <- function(x, ...) as.vector(x, mode="character", ...)
+setMethod("as.character", "DelayedArray", as.character.DelayedArray)
+
+### S3/S4 combo for as.raw.DelayedArray
+as.raw.DelayedArray <- function(x) as.vector(x, mode="raw")
+setMethod("as.raw", "DelayedArray", as.raw.DelayedArray)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Coercion to sparse matrix (requires the Matrix package)
+###
+
+.from_DelayedMatrix_to_dgCMatrix <- function(from)
+{
+ idx <- which(from != 0L)
+ array_ind <- arrayInd(idx, dim(from))
+ i <- array_ind[ , 1L]
+ j <- array_ind[ , 2L]
+ x <- from[idx]
+ Matrix::sparseMatrix(i, j, x=x, dims=dim(from), dimnames=dimnames(from))
+}
+
+setAs("DelayedMatrix", "dgCMatrix", .from_DelayedMatrix_to_dgCMatrix)
+setAs("DelayedMatrix", "sparseMatrix", .from_DelayedMatrix_to_dgCMatrix)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### type()
+###
+### For internal use only.
+###
+
+setGeneric("type", function(x) standardGeneric("type"))
+
+setMethod("type", "array", function(x) typeof(x))
+
+### If 'x' is a DelayedArray object, 'type(x)' must always return the same
+### as 'typeof(as.array(x))'.
+setMethod("type", "DelayedArray",
+ function(x)
+ {
+ user_Nindex <- as.list(integer(length(dim(x))))
+ ## x0 <- x[0, ..., 0]
+ x0 <- .subset_DelayedArray_by_Nindex(x, user_Nindex)
+ typeof(as.array(x0, drop=TRUE))
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### [[
+###
+
+.get_DelayedArray_element <- function(x, i)
+{
+ i <- normalizeDoubleBracketSubscript(i, x)
+ user_Nindex <- as.list(arrayInd(i, dim(x)))
+ as.vector(.subset_DelayedArray_by_Nindex(x, user_Nindex))
+}
+
+### Only support linear subscripting at the moment.
+### TODO: Support multidimensional subscripting e.g. x[[5, 15, 2]] or
+### x[["E", 15, "b"]].
+setMethod("[[", "DelayedArray",
+ function(x, i, j, ...)
+ {
+ dots <- list(...)
+ if (length(dots) > 0L)
+ dots <- dots[names(dots) != "exact"]
+ if (!missing(j) || length(dots) > 0L)
+ stop("incorrect number of subscripts")
+ .get_DelayedArray_element(x, i)
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Show
+###
+
+setMethod("show", "DelayedArray", show_compact_array)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Combining and splitting
+###
+### Combining arrays with c() is NOT an endomorphism!
+###
+
+### 'objects' must be a list of array-like objects that support as.vector().
+combine_array_objects <- function(objects)
+{
+ if (!is.list(objects))
+ stop("'objects' must be a list")
+
+ NULL_idx <- which(S4Vectors:::sapply_isNULL(objects))
+ if (length(NULL_idx) != 0L)
+ objects <- objects[-NULL_idx]
+ if (length(objects) == 0L)
+ return(NULL)
+
+ unlist(lapply(objects, as.vector), recursive=FALSE, use.names=FALSE)
+}
+
+setMethod("c", "DelayedArray",
+ function (x, ..., recursive=FALSE)
+ {
+ if (!identical(recursive, FALSE))
+ stop("\"c\" method for DelayedArray objects ",
+ "does not support the 'recursive' argument")
+ if (missing(x)) {
+ objects <- list(...)
+ } else {
+ objects <- list(x, ...)
+ }
+ combine_array_objects(objects)
+ }
+)
+
+setMethod("splitAsList", "DelayedArray",
+ function(x, f, drop=FALSE, ...)
+ splitAsList(as.vector(x), f, drop=drop, ...)
+)
+
+### S3/S4 combo for split.DelayedArray
+split.DelayedArray <- function(x, f, drop=FALSE, ...)
+ splitAsList(x, f, drop=drop, ...)
+setMethod("split", c("DelayedArray", "ANY"), split.DelayedArray)
+
diff --git a/R/DelayedArray-stats.R b/R/DelayedArray-stats.R
new file mode 100644
index 0000000..4bc5ed9
--- /dev/null
+++ b/R/DelayedArray-stats.R
@@ -0,0 +1,32 @@
+### =========================================================================
+### Statistical methods for DelayedArray objects
+### -------------------------------------------------------------------------
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Logistic
+###
+### All these methods return a DelayedArray object of the same dimensions
+### as their first argument.
+###
+
+setMethod("dlogis", "DelayedArray",
+ function(x, location=0, scale=1, log=FALSE)
+ register_delayed_op(x, "dlogis",
+ Rargs=list(location=location, scale=scale, log=log))
+)
+
+setMethod("plogis", "DelayedArray",
+ function(q, location=0, scale=1, lower.tail=TRUE, log.p=FALSE)
+ register_delayed_op(q, "plogis",
+ Rargs=list(location=location, scale=scale,
+ lower.tail=lower.tail, log.p=log.p))
+)
+
+setMethod("qlogis", "DelayedArray",
+ function(p, location=0, scale=1, lower.tail=TRUE, log.p=FALSE)
+ register_delayed_op(p, "qlogis",
+ Rargs=list(location=location, scale=scale,
+ lower.tail=lower.tail, log.p=log.p))
+)
+
diff --git a/R/DelayedArray-utils.R b/R/DelayedArray-utils.R
new file mode 100644
index 0000000..4bf2aea
--- /dev/null
+++ b/R/DelayedArray-utils.R
@@ -0,0 +1,649 @@
+### =========================================================================
+### Common operations on DelayedArray objects
+### -------------------------------------------------------------------------
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### ConformableSeedCombiner objects
+###
+### This class is for internal use only and is not exported.
+###
+
+setClass("ConformableSeedCombiner",
+ representation(
+ seeds="list", # List of n conformable array-like objects
+ # to combine. Each object is expected to
+ # satisfy the "seed contract" i.e. to
+ # support dim(), dimnames(), and
+ # subset_seed_as_array().
+
+ COMBINING_OP="character", # n-ary operator to combine the seeds.
+
+ Rargs="list" # Additional arguments to the n-ary
+ # operator. Currently unused.
+ ),
+ prototype(
+ seeds=list(new("array")),
+ COMBINING_OP="identity"
+ )
+)
+
+.objects_are_conformable_arrays <- function(objects)
+{
+ dims <- lapply(objects, dim)
+ ndims <- lengths(dims)
+ first_ndim <- ndims[[1L]]
+ if (!all(ndims == first_ndim))
+ return(FALSE)
+ tmp <- unlist(dims, use.names=FALSE)
+ if (is.null(tmp))
+ return(FALSE)
+ dims <- matrix(tmp, nrow=first_ndim)
+ first_dim <- dims[ , 1L]
+ all(dims == first_dim)
+}
+
+.validate_ConformableSeedCombiner <- function(x)
+{
+ ## 'seeds' slot.
+ if (length(x at seeds) == 0L)
+ return(wmsg("'x at seeds' cannot be empty"))
+ if (!.objects_are_conformable_arrays(x at seeds))
+ return(wmsg("'x at seeds' must be a list of conformable ",
+ "array-like objects"))
+ ## 'COMBINING_OP' slot.
+ if (!isSingleString(x at COMBINING_OP))
+ return(wmsg("'x at COMBINING_OP' must be a single string"))
+ OP <- try(match.fun(x at COMBINING_OP), silent=TRUE)
+ if (is(OP, "try-error"))
+ return(wmsg("the name in 'x at COMBINING_OP' (\"", x at COMBINING_OP,
+ "\") must refer to a known n-ary operator"))
+ TRUE
+}
+
+setValidity2("ConformableSeedCombiner", .validate_ConformableSeedCombiner)
+
+.new_ConformableSeedCombiner <- function(seed=new("array"), ...,
+ COMBINING_OP="identity",
+ Rargs=list())
+{
+ seeds <- unname(list(seed, ...))
+ seeds <- lapply(seeds, remove_pristine_DelayedArray_wrapping)
+ new2("ConformableSeedCombiner", seeds=seeds,
+ COMBINING_OP=COMBINING_OP,
+ Rargs=Rargs)
+}
+
+### Implement the "seed contract" i.e. dim(), dimnames(), and
+### subset_seed_as_array().
+
+.get_ConformableSeedCombiner_dim <- function(x) dim(x at seeds[[1L]])
+
+setMethod("dim", "ConformableSeedCombiner",
+ .get_ConformableSeedCombiner_dim
+)
+
+.get_ConformableSeedCombiner_dimnames <- function(x)
+{
+ IRanges:::combine_dimnames(x at seeds)
+}
+
+setMethod("dimnames", "ConformableSeedCombiner",
+ .get_ConformableSeedCombiner_dimnames
+)
+
+.subset_ConformableSeedCombiner_as_array <- function(seed, index)
+{
+ arrays <- lapply(seed at seeds, subset_seed_as_array, index)
+ do.call(seed at COMBINING_OP, c(arrays, seed at Rargs))
+}
+
+setMethod("subset_seed_as_array", "ConformableSeedCombiner",
+ .subset_ConformableSeedCombiner_as_array
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### "Ops" group generics
+###
+### Arith members: "+", "-", "*", "/", "^", "%%", "%/%"
+### Compare members: ==, !=, <=, >=, <, >
+### Logic members: &, |
+###
+
+### Return a DelayedArray object of the same dimensions as 'e1'.
+.DelayedArray_Ops_with_right_vector <- function(.Generic, e1, e2)
+{
+ stopifnot(is(e1, "DelayedArray"))
+ e1_class <- class(e1)
+ e2_class <- class(e2)
+ if (!is.vector(e2))
+ e2 <- as.vector(e2)
+ if (!is.atomic(e2))
+ stop(wmsg("`", .Generic, "` between ", e1_class, " and ",
+ e2_class, " objects is not supported"))
+ e2_len <- length(e2)
+ if (e2_len == 1L)
+ return(register_delayed_op(e1, .Generic, Rargs=list(e2)))
+ e1_len <- length(e1)
+ if (e2_len > e1_len)
+ stop(wmsg("right object is longer than left object"))
+ e1_nrow <- nrow(e1)
+ if (e1_nrow != 0L) {
+ if (e2_len == 0L || e1_nrow %% e2_len != 0L)
+ stop(wmsg("length of right object is not a divisor ",
+ "of number of rows in left object"))
+ e2 <- rep(e2, length.out=e1_nrow)
+ }
+ register_delayed_op(e1, .Generic, Rargs=list(e2),
+ recycle_along_last_dim=e1 at is_transposed)
+}
+
+### Return a DelayedArray object of the same dimensions as 'e2'.
+.DelayedArray_Ops_with_left_vector <- function(.Generic, e1, e2)
+{
+ stopifnot(is(e2, "DelayedArray"))
+ e1_class <- class(e1)
+ e2_class <- class(e2)
+ if (!is.vector(e1))
+ e1 <- as.vector(e1)
+ if (!is.atomic(e1))
+ stop(wmsg("`", .Generic, "` between ", e1_class, " and ",
+ e2_class, " objects is not supported"))
+ e1_len <- length(e1)
+ if (e1_len == 1L)
+ return(register_delayed_op(e2, .Generic, Largs=list(e1)))
+ e2_len <- length(e2)
+ if (e1_len > e2_len)
+ stop(wmsg("left object is longer than right object"))
+ e2_nrow <- nrow(e2)
+ if (e2_nrow != 0L) {
+ if (e1_len == 0L || e2_nrow %% e1_len != 0L)
+ stop(wmsg("length of left object is not a divisor ",
+ "of number of rows in right object"))
+ e1 <- rep(e1, length.out=e2_nrow)
+ }
+ register_delayed_op(e2, .Generic, Largs=list(e1),
+ recycle_along_last_dim=e2 at is_transposed)
+}
+
+### Return a DelayedArray object of the same dimensions as 'e1' and 'e2'.
+.DelayedArray_Ops_COMBINE_seeds <- function(.Generic, e1, e2)
+{
+ if (!identical(dim(e1), dim(e2)))
+ stop("non-conformable arrays")
+ DelayedArray(.new_ConformableSeedCombiner(e1, e2, COMBINING_OP=.Generic))
+}
+
+.DelayedArray_Ops <- function(.Generic, e1, e2)
+{
+ e1_dim <- dim(e1)
+ e2_dim <- dim(e2)
+ if (identical(e1_dim, e2_dim))
+ return(.DelayedArray_Ops_COMBINE_seeds(.Generic, e1, e2))
+ ## Effective dimensions.
+ effdim_idx1 <- which(e1_dim != 1L)
+ effdim_idx2 <- which(e2_dim != 1L)
+ if ((length(effdim_idx1) == 1L) == (length(effdim_idx2) == 1L))
+ stop("non-conformable arrays")
+ if (length(effdim_idx1) == 1L) {
+ .DelayedArray_Ops_with_left_vector(.Generic, e1, e2)
+ } else {
+ .DelayedArray_Ops_with_right_vector(.Generic, e1, e2)
+ }
+}
+
+setMethod("Ops", c("DelayedArray", "vector"),
+ function(e1, e2)
+ .DelayedArray_Ops_with_right_vector(.Generic, e1, e2)
+)
+
+setMethod("Ops", c("vector", "DelayedArray"),
+ function(e1, e2)
+ .DelayedArray_Ops_with_left_vector(.Generic, e1, e2)
+)
+
+setMethod("Ops", c("DelayedArray", "DelayedArray"),
+ function(e1, e2)
+ .DelayedArray_Ops(.Generic, e1, e2)
+)
+
+### Support unary operators "+" and "-".
+setMethod("+", c("DelayedArray", "missing"),
+ function(e1, e2) register_delayed_op(e1, .Generic, Largs=list(0L))
+)
+setMethod("-", c("DelayedArray", "missing"),
+ function(e1, e2) register_delayed_op(e1, .Generic, Largs=list(0L))
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### pmax2() and pmin2()
+###
+### We treat them like the binary operators of the "Ops" group generics.
+###
+
+setGeneric("pmax2", function(e1, e2) standardGeneric("pmax2"))
+setGeneric("pmin2", function(e1, e2) standardGeneric("pmin2"))
+
+### Mimicking how the "Ops" members combine the "dim", "names", and "dimnames"
+### attributes of the 2 operands.
+.check_and_combine_dims <- function(e1, e2)
+{
+ dim1 <- dim(e1)
+ dim2 <- dim(e2)
+ if (is.null(dim1))
+ return(dim2)
+ if (is.null(dim2))
+ return(dim1)
+ if (!identical(dim1, dim2))
+ stop("non-conformable arrays")
+ dim1
+}
+
+.combine_names <- function(e1, e2)
+{
+ len1 <- length(e1)
+ len2 <- length(e2)
+ names1 <- names(e1)
+ if (len1 > len2)
+ return(names1)
+ names2 <- names(e2)
+ if (len2 > len1 || is.null(names1))
+ return(names2)
+ names1
+}
+
+setMethod("pmax2", c("ANY", "ANY"),
+ function(e1, e2)
+ {
+ ans_dim <- .check_and_combine_dims(e1, e2)
+ ans <- pmax(e1, e2)
+ if (is.null(ans_dim)) {
+ names(ans) <- .combine_names(e1, e2)
+ } else {
+ dim(ans) <- ans_dim
+ dimnames(ans) <- IRanges:::combine_dimnames(list(e1, e2))
+ }
+ ans
+ }
+)
+
+setMethod("pmin2", c("ANY", "ANY"),
+ function(e1, e2)
+ {
+ ans_dim <- .check_and_combine_dims(e1, e2)
+ ans <- pmin(e1, e2)
+ if (is.null(ans_dim)) {
+ names(ans) <- .combine_names(e1, e2)
+ } else {
+ dim(ans) <- ans_dim
+ dimnames(ans) <- IRanges:::combine_dimnames(list(e1, e2))
+ }
+ ans
+ }
+)
+
+for (.Generic in c("pmax2", "pmin2")) {
+ setMethod(.Generic, c("DelayedArray", "vector"),
+ function(e1, e2)
+ .DelayedArray_Ops_with_right_vector(.Generic, e1, e2)
+ )
+ setMethod(.Generic, c("vector", "DelayedArray"),
+ function(e1, e2)
+ .DelayedArray_Ops_with_left_vector(.Generic, e1, e2)
+ )
+ setMethod(.Generic, c("DelayedArray", "DelayedArray"),
+ function(e1, e2)
+ .DelayedArray_Ops(.Generic, e1, e2)
+ )
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Various unary operators + the "Math" and "Math2" groups
+###
+### All these operations return a DelayedArray object of the same dimensions
+### as 'x'.
+###
+
+.UNARY_OPS <- c("is.na", "is.finite", "is.infinite", "is.nan", "!",
+ "tolower", "toupper")
+
+for (.Generic in .UNARY_OPS) {
+ setMethod(.Generic, "DelayedArray",
+ function(x) register_delayed_op(x, .Generic)
+ )
+}
+
+setMethod("nchar", "DelayedArray",
+ function(x, type="chars", allowNA=FALSE, keepNA=NA)
+ register_delayed_op(x, "nchar",
+ Rargs=list(type=type, allowNA=allowNA, keepNA=keepNA))
+)
+
+setMethod("Math", "DelayedArray",
+ function(x) register_delayed_op(x, .Generic)
+)
+
+.DelayedArray_Math2 <- function(.Generic, x, digits)
+{
+ stopifnot(is(x, "DelayedArray"))
+ if (!isSingleNumberOrNA(digits))
+ stop(wmsg("'digits' must be a single numeric"))
+ if (!is.integer(digits))
+ digits <- as.integer(digits)
+ register_delayed_op(x, .Generic, Rargs=list(digits=digits))
+}
+
+### Note that round() and signif() don't use the same default for 'digits'.
+setMethod("round", "DelayedArray",
+ function(x, digits=0) .DelayedArray_Math2("round", x, digits)
+)
+setMethod("signif", "DelayedArray",
+ function(x, digits=6) .DelayedArray_Math2("signif", x, digits)
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### A low-level utility for putting DelayedArray object in a "straight" form
+###
+### Untranspose the DelayedArray object and put its rows and columns in their
+### "native" order. The result is a DelayedArray object where the array
+### elements are in the same order as in the seeds. This makes block-processing
+### faster if the seeds are on-disk objects where the 1st dimension is the fast
+### changing dimension (e.g. 5x faster if the seeds are HDF5ArraySeed objects).
+###
+
+.straighten_index <- function(i)
+{
+ i_len <- length(i)
+ if (i_len == 0L)
+ return(i)
+ i_max <- max(i)
+ ## Threshold is a rough estimate obtained empirically.
+ ## TODO: Refine this.
+ if (i_max <= 2L * i_len * log(i_len)) {
+ which(as.logical(tabulate(i, nbins=i_max)))
+ } else {
+ sort(unique(i))
+ }
+}
+
+.straighten <- function(x, untranspose=FALSE, straighten.index=FALSE)
+{
+ if (is.array(x))
+ return(x)
+ if (untranspose)
+ x at is_transposed <- FALSE
+ if (!straighten.index)
+ return(x)
+ x_index <- x at index
+ x_seed_dim <- dim(seed(x))
+ for (N in x at metaindex) {
+ i <- x_index[[N]]
+ if (is.null(i) || isStrictlySorted(i))
+ next
+ x_index[[N]] <- .straighten_index(i)
+ }
+ if (!identical(x at index, x_index))
+ x at index <- x_index
+ x
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### anyNA()
+###
+
+### Used in unit tests!
+.DelayedArray_block_anyNA <- function(x, recursive=FALSE)
+{
+ APPLY <- anyNA
+ COMBINE <- function(b, subarray, init, reduced) { init || reduced }
+ init <- FALSE
+ BREAKIF <- identity
+
+ x <- .straighten(x, untranspose=TRUE, straighten.index=TRUE)
+ block_APPLY_and_COMBINE(x, APPLY, COMBINE, init, BREAKIF)
+}
+
+setMethod("anyNA", "DelayedArray", .DelayedArray_block_anyNA)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### which()
+###
+
+### Used in unit tests!
+.DelayedArray_block_which <- function(x, arr.ind=FALSE, useNames=TRUE)
+{
+ if (!isTRUEorFALSE(arr.ind))
+ stop("'arr.ind' must be TRUE or FALSE")
+ if (!isTRUEorFALSE(useNames))
+ stop("'useNames' must be TRUE or FALSE")
+ APPLY <- base::which
+ COMBINE <- function(b, subarray, init, reduced) {
+ if (length(reduced) != 0L) {
+ reduced <- reduced + init[["offset"]]
+ part_number <- sprintf("%010d", b)
+ init[[part_number]] <- reduced
+ }
+ init[["offset"]] <- init[["offset"]] + length(subarray)
+ init
+ }
+ offset <- 0L
+ ## If 'x' is a "long array" (i.e. longer than 2^31), we use an offset of
+ ## type double to avoid integer overflow.
+ x_len <- length(x)
+ if (is.double(x_len))
+ offset <- as.double(offset)
+ init <- new.env(parent=emptyenv())
+ init[["offset"]] <- offset
+
+ init <- block_APPLY_and_COMBINE(x, APPLY, COMBINE, init)
+ stopifnot(identical(x_len, init[["offset"]])) # sanity check
+ rm(list="offset", envir=init)
+
+ if (length(init) == 0L) {
+ ans <- if (is.integer(x_len)) integer(0) else numeric(0)
+ } else {
+ ans <- unlist(as.list(init, sorted=TRUE),
+ recursive=FALSE, use.names=FALSE)
+ }
+ if (arr.ind)
+ ans <- arrayInd(ans, dim(x), dimnames(x), useNames=useNames)
+ ans
+}
+
+setMethod("which", "DelayedArray", .DelayedArray_block_which)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### "Summary" group generic
+###
+### Members: max, min, range, sum, prod, any, all
+###
+
+.collect_objects <- function(x, ...)
+{
+ if (missing(x)) {
+ objects <- unname(list(...))
+ } else {
+ objects <- unname(list(x, ...))
+ }
+ NULL_idx <- which(S4Vectors:::sapply_isNULL(objects))
+ if (length(NULL_idx) != 0L)
+ objects <- objects[-NULL_idx]
+ is_array_like <- function(x) is(x, "DelayedArray") || is.array(x)
+ if (!all(vapply(objects, is_array_like, logical(1))))
+ stop("the supplied objects must be array-like objects (or NULLs)")
+ objects
+}
+
+### Used in unit tests!
+.DelayedArray_block_Summary <- function(.Generic, x, ..., na.rm=FALSE)
+{
+ objects <- .collect_objects(x, ...)
+
+ GENERIC <- match.fun(.Generic)
+ APPLY <- function(subarray) {
+ ## We get a warning if 'subarray' is empty (which can't happen, blocks
+ ## can't be empty) or if 'na.rm' is TRUE and 'subarray' contains only
+ ## NA's or NaN's.
+ reduced <- tryCatch(GENERIC(subarray, na.rm=na.rm), warning=identity)
+ if (is(reduced, "warning"))
+ return(NULL)
+ reduced
+ }
+ COMBINE <- function(b, subarray, init, reduced) {
+ if (is.null(init) && is.null(reduced))
+ return(NULL)
+ GENERIC(init, reduced)
+ }
+ init <- NULL
+ BREAKIF <- function(init) {
+ if (is.null(init))
+ return(FALSE)
+ switch(.Generic,
+ max= is.na(init) || init == Inf,
+ min= is.na(init) || init == -Inf,
+ range= is.na(init[[1L]]) || all(init == c(-Inf, Inf)),
+ sum=, prod= is.na(init),
+ any= identical(init, TRUE),
+ all= identical(init, FALSE),
+ FALSE) # fallback (actually not needed)
+ }
+
+ for (x in objects) {
+ if (.Generic %in% c("sum", "prod")) {
+ x <- .straighten(x, untranspose=TRUE)
+ } else {
+ x <- .straighten(x, untranspose=TRUE, straighten.index=TRUE)
+ }
+ init <- block_APPLY_and_COMBINE(x, APPLY, COMBINE, init, BREAKIF)
+ }
+ if (is.null(init))
+ init <- GENERIC()
+ init
+}
+
+setMethod("Summary", "DelayedArray",
+ function(x, ..., na.rm=FALSE)
+ .DelayedArray_block_Summary(.Generic, x, ..., na.rm=na.rm)
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### mean()
+###
+
+### Same arguments as base::mean.default().
+.DelayedArray_block_mean <- function(x, trim=0, na.rm=FALSE)
+{
+ if (!identical(trim, 0))
+ stop("\"mean\" method for DelayedArray objects ",
+ "does not support the 'trim' argument yet")
+
+ APPLY <- function(subarray) {
+ tmp <- as.vector(subarray, mode="numeric")
+ subarray_sum <- sum(tmp, na.rm=na.rm)
+ subarray_nval <- length(tmp)
+ if (na.rm)
+ subarray_nval <- subarray_nval - sum(is.na(tmp))
+ c(subarray_sum, subarray_nval)
+ }
+ COMBINE <- function(b, subarray, init, reduced) { init + reduced }
+ init <- numeric(2) # sum and nval
+ BREAKIF <- function(init) is.na(init[[1L]])
+
+ x <- .straighten(x, untranspose=TRUE)
+ ans <- block_APPLY_and_COMBINE(x, APPLY, COMBINE, init, BREAKIF)
+ ans[[1L]] / ans[[2L]]
+}
+
+### S3/S4 combo for mean.DelayedArray
+mean.DelayedArray <- function(x, trim=0, na.rm=FALSE, ...)
+ .DelayedArray_block_mean(x, trim=trim, na.rm=na.rm, ...)
+setMethod("mean", "DelayedArray", .DelayedArray_block_mean)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### apply()
+###
+
+setGeneric("apply", signature="X")
+
+.simplify_apply_answer <- function(ans)
+{
+ if (!all(vapply(ans, is.atomic, logical(1), USE.NAMES=FALSE)))
+ return(ans) # won't simplify
+
+ ans_lens <- lengths(ans, use.names=FALSE)
+ mat_nrow <- ans_lens[[1L]]
+ if (!all(ans_lens == mat_nrow))
+ return(ans) # won't simplify
+
+ mat_data <- unlist(unname(ans))
+ if (mat_nrow == 0L)
+ return(mat_data) # zero-length atomic vector
+
+ mat_colnames <- names(ans)
+ if (mat_nrow == 1L)
+ return(setNames(mat_data, mat_colnames)) # atomic vector parallel
+ # to 'ans'
+
+ ## Simplify as matrix.
+ mat_data_names <- names(mat_data) # comes from the 'ans' inner names
+ if (is.null(mat_data_names)) {
+ mat_rownames <- NULL
+ } else {
+ mat_rownames <- head(mat_data_names, n=mat_nrow)
+ if (!all(mat_data_names == mat_rownames))
+ mat_rownames <- NULL
+ }
+ if (is.null(mat_rownames) && is.null(mat_colnames)) {
+ mat_dimnames <- NULL
+ } else {
+ mat_dimnames <- list(mat_rownames, mat_colnames)
+ }
+ matrix(mat_data, ncol=length(ans), dimnames=mat_dimnames)
+}
+
+### MARGIN must be a single integer.
+.DelayedArray_apply <- function(X, MARGIN, FUN, ...)
+{
+ FUN <- match.fun(FUN)
+ X_dim <- dim(X)
+ if (!isSingleNumber(MARGIN))
+ stop("'MARGIN' must be a single integer")
+ if (!is.integer(MARGIN))
+ MARGIN <- as.integer(MARGIN)
+ if (MARGIN < 1L || MARGIN > length(X_dim))
+ stop("'MARGIN' must be >= 1 and <= length(dim(X))")
+
+ if (X_dim[[MARGIN]] == 0L) {
+ ## base::apply seems to be doing something like that!
+ ans <- FUN(X, ...)
+ return(as.vector(ans[0L]))
+ }
+
+ ## TODO: Try using sapply() instead of lapply(). Maybe we're lucky
+ ## and it achieves the kind of simplification that we're doing with
+ ## .simplify_apply_answer() so we can get rid of .simplify_apply_answer().
+ ans_names <- dimnames(X)[[MARGIN]]
+ ans <- lapply(setNames(seq_len(X_dim[[MARGIN]]), ans_names),
+ function(i) {
+ Nindex <- vector(mode="list", length=length(X_dim))
+ Nindex[[MARGIN]] <- i
+ slice <- subset_by_Nindex(X, Nindex, drop=TRUE)
+ dim(slice) <- dim(slice)[-MARGIN]
+ FUN(slice, ...)
+ })
+
+ ## Try to simplify the answer.
+ .simplify_apply_answer(ans)
+}
+
+setMethod("apply", "DelayedArray", .DelayedArray_apply)
+
diff --git a/R/DelayedMatrix-stats.R b/R/DelayedMatrix-stats.R
new file mode 100644
index 0000000..3a598cc
--- /dev/null
+++ b/R/DelayedMatrix-stats.R
@@ -0,0 +1,246 @@
+### =========================================================================
+### Statistical/summarization methods for DelayedMatrix objects
+### -------------------------------------------------------------------------
+###
+
+
+### Raise an error if invalid input type. Otherwise return "integer",
+### "numeric", "double", or "complex".
+.get_ans_type <- function(x, must.be.numeric=FALSE)
+{
+ x_type <- type(x)
+ ans_type <- switch(x_type,
+ logical="integer",
+ integer=, numeric=, double=, complex=x_type,
+ stop(wmsg("operation not supported on matrices of type ", x_type)))
+ if (must.be.numeric && !is.numeric(get(ans_type)(0)))
+ stop(wmsg("operation not supported on matrices of type ", x_type))
+ ans_type
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### row/colSums() and row/colMeans()
+###
+
+.normarg_dims <- function(dims, method)
+{
+ if (!identical(dims, 1))
+ stop("\"", method, "\" method for DelayedMatrix objects ",
+ "does not support the 'dims' argument yet")
+}
+
+### row/colSums()
+
+.DelayedMatrix_block_rowSums <- function(x, na.rm=FALSE, dims=1)
+{
+ .normarg_dims(dims, "rowSums")
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_colSums(t(x), na.rm=na.rm, dims=dims))
+
+ .get_ans_type(x) # check input type
+ APPLY <- function(m) rowSums(m, na.rm=na.rm)
+ COMBINE <- function(b, m, init, reduced) { init + reduced }
+ init <- numeric(nrow(x))
+ ans <- colblock_APPLY_and_COMBINE(x, APPLY, COMBINE, init)
+ setNames(ans, rownames(x))
+}
+
+.DelayedMatrix_block_colSums <- function(x, na.rm=FALSE, dims=1)
+{
+ .normarg_dims(dims, "colSums")
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_rowSums(t(x), na.rm=na.rm, dims=dims))
+
+ .get_ans_type(x) # check input type
+ colsums_list <- colblock_APPLY(x, colSums, na.rm=na.rm)
+ if (length(colsums_list) == 0L)
+ return(numeric(ncol(x)))
+ unlist(colsums_list, recursive=FALSE)
+}
+
+setMethod("rowSums", "DelayedMatrix", .DelayedMatrix_block_rowSums)
+setMethod("colSums", "DelayedMatrix", .DelayedMatrix_block_colSums)
+
+### row/colMeans()
+
+.DelayedMatrix_block_rowMeans <- function(x, na.rm=FALSE, dims=1)
+{
+ .normarg_dims(dims, "rowMeans")
+ if (is(x, "DelayedMatrix") && x at is_transposed)
+ return(.DelayedMatrix_block_colMeans(t(x), na.rm=na.rm, dims=dims))
+
+ .get_ans_type(x) # check input type
+ APPLY <- function(m) {
+ m_sums <- rowSums(m, na.rm=na.rm)
+ m_nvals <- ncol(m)
+ if (na.rm)
+ m_nvals <- m_nvals - rowSums(is.na(m))
+ cbind(m_sums, m_nvals)
+ }
+ COMBINE <- function(b, m, init, reduced) { init + reduced }
+ init <- cbind(
+ numeric(nrow(x)), # sums
+ numeric(nrow(x)) # nvals
+ )
+ ans <- colblock_APPLY_and_COMBINE(x, APPLY, COMBINE, init)
+ setNames(ans[ , 1L] / ans[ , 2L], rownames(x))
+}
+
+.DelayedMatrix_block_colMeans <- function(x, na.rm=FALSE, dims=1)
+{
+ .normarg_dims(dims, "colMeans")
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_rowMeans(t(x), na.rm=na.rm, dims=dims))
+
+ .get_ans_type(x) # check input type
+ colmeans_list <- colblock_APPLY(x, colMeans, na.rm=na.rm)
+ if (length(colmeans_list) == 0L)
+ return(rep.int(NaN, ncol(x)))
+ unlist(colmeans_list, recursive=FALSE)
+}
+
+setMethod("rowMeans", "DelayedMatrix", .DelayedMatrix_block_rowMeans)
+setMethod("colMeans", "DelayedMatrix", .DelayedMatrix_block_colMeans)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Row/column summarization from the matrixStats package
+###
+### row/colMaxs(), row/colMins(), row/colRanges(),
+### row/colProds(), row/colAnys(), row/colAlls(), row/colMedians()
+###
+
+.fix_type <- function(x, ans_type)
+{
+ if (ans_type == "integer" && !is.integer(x) && all(is.finite(x)))
+ storage.mode(x) <- ans_type
+ x
+}
+
+### row/colMaxs()
+
+.DelayedMatrix_block_rowMaxs <- function(x, rows=NULL, cols=NULL,
+ na.rm=FALSE, dim.=dim(x))
+{
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_colMaxs(t(x), rows=rows, cols=cols,
+ na.rm=na.rm, dim.=dim.))
+
+ ans_type <- .get_ans_type(x, must.be.numeric=TRUE)
+ APPLY <- function(m) rowMaxs(m, na.rm=na.rm)
+ COMBINE <- function(b, m, init, reduced)
+ .fix_type(pmax(init, reduced), ans_type)
+ init <- .fix_type(rep.int(-Inf, nrow(x)), ans_type)
+ ans <- colblock_APPLY_and_COMBINE(x, APPLY, COMBINE, init)
+ setNames(ans, rownames(x))
+}
+
+.DelayedMatrix_block_colMaxs <- function(x, rows=NULL, cols=NULL,
+ na.rm=FALSE, dim.=dim(x))
+{
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_rowMaxs(t(x), rows=rows, cols=cols,
+ na.rm=na.rm, dim.=dim.))
+
+ ans_type <- .get_ans_type(x, must.be.numeric=TRUE)
+ colmaxs_list <- colblock_APPLY(x, colMaxs, na.rm=na.rm)
+ if (length(colmaxs_list) == 0L)
+ return(.fix_type(rep.int(-Inf, ncol(x)), ans_type))
+ unlist(colmaxs_list, recursive=FALSE)
+}
+
+setGeneric("rowMaxs", signature="x")
+setGeneric("colMaxs", signature="x")
+
+setMethod("rowMaxs", "DelayedMatrix", .DelayedMatrix_block_rowMaxs)
+setMethod("colMaxs", "DelayedMatrix", .DelayedMatrix_block_colMaxs)
+
+### row/colMins()
+
+.DelayedMatrix_block_rowMins <- function(x, rows=NULL, cols=NULL,
+ na.rm=FALSE, dim.=dim(x))
+{
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_colMins(t(x), rows=rows, cols=cols,
+ na.rm=na.rm, dim.=dim.))
+
+ ans_type <- .get_ans_type(x, must.be.numeric=TRUE)
+ APPLY <- function(m) rowMins(m, na.rm=na.rm)
+ COMBINE <- function(b, m, init, reduced)
+ .fix_type(pmin(init, reduced), ans_type)
+ init <- .fix_type(rep.int(Inf, nrow(x)), ans_type)
+ ans <- colblock_APPLY_and_COMBINE(x, APPLY, COMBINE, init)
+ setNames(ans, rownames(x))
+}
+
+.DelayedMatrix_block_colMins <- function(x, rows=NULL, cols=NULL,
+ na.rm=FALSE, dim.=dim(x))
+{
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_rowMins(t(x), rows=rows, cols=cols,
+ na.rm=na.rm, dim.=dim.))
+
+ ans_type <- .get_ans_type(x, must.be.numeric=TRUE)
+ colmins_list <- colblock_APPLY(x, colMins, na.rm=na.rm)
+ if (length(colmins_list) == 0L)
+ return(.fix_type(rep.int(Inf, ncol(x)), ans_type))
+ unlist(colmins_list, recursive=FALSE)
+}
+
+setGeneric("rowMins", signature="x")
+setGeneric("colMins", signature="x")
+
+setMethod("rowMins", "DelayedMatrix", .DelayedMatrix_block_rowMins)
+setMethod("colMins", "DelayedMatrix", .DelayedMatrix_block_colMins)
+
+### row/colRanges()
+
+.DelayedMatrix_block_rowRanges <- function(x, rows=NULL, cols=NULL,
+ na.rm=FALSE, dim.=dim(x))
+{
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_colRanges(t(x), rows=rows, cols=cols,
+ na.rm=na.rm, dim.=dim.))
+
+ ans_type <- .get_ans_type(x, must.be.numeric=TRUE)
+ APPLY <- function(m) rowRanges(m, na.rm=na.rm)
+ COMBINE <- function(b, m, init, reduced) {
+ .fix_type(cbind(pmin(init[ , 1L], reduced[ , 1L]),
+ pmax(init[ , 2L], reduced[ , 2L])),
+ ans_type)
+ }
+ init <- .fix_type(matrix(rep(c(Inf, -Inf), each=nrow(x)), ncol=2L),
+ ans_type)
+ ans <- colblock_APPLY_and_COMBINE(x, APPLY, COMBINE, init)
+ setNames(ans, rownames(x))
+}
+
+.DelayedMatrix_block_colRanges <- function(x, rows=NULL, cols=NULL,
+ na.rm=FALSE, dim.=dim(x))
+{
+ if (is(x, "DelayedArray") && x at is_transposed)
+ return(.DelayedMatrix_block_rowRanges(t(x), rows=rows, cols=cols,
+ na.rm=na.rm, dim.=dim.))
+
+ ans_type <- .get_ans_type(x, must.be.numeric=TRUE)
+ colranges_list <- colblock_APPLY(x, colRanges, na.rm=na.rm)
+ if (length(colranges_list) == 0L)
+ return(.fix_type(matrix(rep(c(Inf, -Inf), each=ncol(x)), ncol=2L),
+ ans_type))
+ do.call(rbind, colranges_list)
+}
+
+.rowRanges.useAsDefault <- function(x, ...) matrixStats::rowRanges(x, ...)
+setGeneric("rowRanges", signature="x",
+ function(x, ...) standardGeneric("rowRanges"),
+ useAsDefault=.rowRanges.useAsDefault
+)
+
+setGeneric("colRanges", signature="x")
+
+setMethod("rowRanges", "DelayedMatrix", .DelayedMatrix_block_rowRanges)
+setMethod("colRanges", "DelayedMatrix", .DelayedMatrix_block_colRanges)
+
+### TODO: Add more row/column summarization generics/methods.
+
diff --git a/R/DelayedMatrix-utils.R b/R/DelayedMatrix-utils.R
new file mode 100644
index 0000000..c5461fe
--- /dev/null
+++ b/R/DelayedMatrix-utils.R
@@ -0,0 +1,44 @@
+### =========================================================================
+### Common operations on DelayedMatrix objects
+### -------------------------------------------------------------------------
+###
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Matrix multiplication
+###
+### We only support multiplication of an ordinary matrix (typically
+### small) by a DelayedMatrix object (typically big). Multiplication of 2
+### DelayedMatrix objects is not supported.
+###
+
+.DelayedMatrix_block_mult_by_left_matrix <- function(x, y)
+{
+ stopifnot(is.matrix(x),
+ is(y, "DelayedMatrix") || is.matrix(y),
+ ncol(x) == nrow(y))
+
+ ans_dim <- c(nrow(x), ncol(y))
+ ans_dimnames <- list(rownames(x), colnames(y))
+ ans_type <- typeof(match.fun(type(x))(1) * match.fun(type(y))(1))
+ sink <- RealizationSink(ans_dim, ans_dimnames, ans_type)
+ on.exit(close(sink))
+ colblock_APPLY(y, function(submatrix) { x %*% submatrix }, sink=sink)
+ as(sink, "DelayedArray")
+}
+
+setMethod("%*%", c("DelayedMatrix", "matrix"),
+ function(x, y) t(t(y) %*% t(x))
+)
+
+setMethod("%*%", c("matrix", "DelayedMatrix"),
+ .DelayedMatrix_block_mult_by_left_matrix
+)
+
+setMethod("%*%", c("DelayedMatrix", "DelayedMatrix"),
+ function(x, y)
+ stop(wmsg("multiplication of 2 DelayedMatrix objects is not ",
+ "supported, only multiplication of an ordinary matrix by ",
+ "a DelayedMatrix object at the moment"))
+)
+
diff --git a/R/RleArray-class.R b/R/RleArray-class.R
new file mode 100644
index 0000000..6eca1e0
--- /dev/null
+++ b/R/RleArray-class.R
@@ -0,0 +1,265 @@
+### =========================================================================
+### RleArray objects
+### -------------------------------------------------------------------------
+
+
+### NOT exported!
+setClass("RleArraySeed",
+ representation(
+ rle="Rle",
+ dim="integer",
+ dimnames="list"
+ )
+)
+
+.validate_RleArraySeed <- function(x)
+{
+ ## 'rle' slot.
+ if (!is(x at rle, "Rle"))
+ return(wmsg("'x at rle' must be an Rle object"))
+ ## 'dim' slot.
+ if (!is.integer(x at dim))
+ return(wmsg("'x at dim' must be an integer vector"))
+ x_ndim <- length(x at dim)
+ if (x_ndim == 0L)
+ return(wmsg("'x at dim' cannot be empty"))
+ if (S4Vectors:::anyMissingOrOutside(x at dim, 0L))
+ return(wmsg("'x at dim' cannot contain negative or NA values"))
+ p <- prod(x at dim)
+ if (p != length(x at rle))
+ return(wmsg("object dimensions [product ", p, "] do not match ",
+ "its length [" , length(x at rle), "]"))
+ ## 'dimnames' slot.
+ if (!is.list(x at dimnames))
+ return(wmsg("'x at dimnames' must be a list of length"))
+ if (length(x at dimnames) != x_ndim)
+ return(wmsg("length of 'x at dimnames' must match that of 'x at dim'"))
+ if (!all(vapply(seq_len(x_ndim),
+ function(n) {
+ dn <- x at dimnames[[n]]
+ if (is.null(dn))
+ return(TRUE)
+ is.character(dn) && length(dn) == x at dim[[n]]
+ },
+ logical(1),
+ USE.NAMES=FALSE)))
+ return(wmsg("every list element in 'x at dimnames' must be NULL or ",
+ "a character vector along the corresponding dimension"))
+ TRUE
+}
+
+setValidity2("RleArraySeed", .validate_RleArraySeed)
+
+setMethod("dim", "RleArraySeed", function(x) x at dim)
+
+setMethod("length", "RleArraySeed", function(x) prod(dim(x)))
+
+setMethod("dimnames", "RleArraySeed",
+ function(x)
+ {
+ ans <- x at dimnames
+ if (all(S4Vectors:::sapply_isNULL(ans)))
+ return(NULL)
+ ans
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### subset_seed_as_array()
+###
+
+.subset_RleArraySeed_as_array <- function(seed, index)
+{
+ seed_dim <- dim(seed)
+ i <- to_linear_index(index, seed_dim)
+ ans <- S4Vectors:::extract_positions_from_Rle(seed at rle, i, decoded=TRUE)
+ dim(ans) <- get_Nindex_lengths(index, seed_dim)
+ ans
+}
+
+setMethod("subset_seed_as_array", "RleArraySeed",
+ .subset_RleArraySeed_as_array
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### RleArraySeed constructor
+###
+
+### NOT exported!
+RleArraySeed <- function(rle, dim, dimnames=NULL)
+{
+ if (!is.numeric(dim))
+ stop(wmsg("the supplied dim vector must be numeric"))
+ if (!is.integer(dim))
+ dim <- as.integer(dim)
+ if (is.null(dimnames))
+ dimnames <- vector("list", length=length(dim))
+ new2("RleArraySeed", rle=rle, dim=dim, dimnames=dimnames)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### RleArray and RleMatrix objects
+###
+
+setClass("RleArray", contains="DelayedArray")
+
+setClass("RleMatrix", contains=c("DelayedMatrix", "RleArray"))
+
+### Automatic coercion method from RleArray to RleMatrix silently returns
+### a broken object (unfortunately these dummy automatic coercion methods
+### don't bother to validate the object they return). So we overwrite it.
+setAs("RleArray", "RleMatrix", function(from) new("RleMatrix", from))
+
+### For internal use only.
+setMethod("matrixClass", "RleArray", function(x) "RleMatrix")
+
+.validate_RleArray <- function(x)
+{
+ if (!is(seed(x), "RleArraySeed"))
+ return(wmsg("'seed(x)' must be an RleArraySeed object"))
+ if (!is_pristine(x))
+ return(wmsg("'x' carries delayed operations on it"))
+ TRUE
+}
+
+setValidity2("RleArray", .validate_RleArray)
+
+setAs("ANY", "RleMatrix",
+ function(from) as(as(from, "RleArray"), "RleMatrix")
+)
+
+setMethod("DelayedArray", "RleArraySeed",
+ function(seed) new_DelayedArray(seed, Class="RleArray")
+)
+
+### Works directly on an RleArraySeed object, in which case it must be called
+### with a single argument.
+RleArray <- function(rle, dim, dimnames=NULL)
+{
+ if (is(rle, "RleArraySeed")) {
+ if (!(missing(dim) && is.null(dimnames)))
+ stop(wmsg("RleArray() must be called with a single argument ",
+ "when passed an RleArraySeed object"))
+ seed <- rle
+ } else {
+ seed <- RleArraySeed(rle, dim, dimnames=dimnames)
+ }
+ DelayedArray(seed)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### RleRealizationSink objects
+###
+### The RleRealizationSink class is a concrete RealizationSink subclass that
+### implements realization of an array-like object as an RleArray object.
+###
+
+setClass("RleRealizationSink",
+ contains="RealizationSink",
+ representation(
+ dim="integer",
+ dimnames="list",
+ type="character",
+ dump="environment"
+ )
+)
+
+RleRealizationSink <- function(dim, dimnames=NULL, type="double")
+{
+ if (is.null(dimnames))
+ dimnames <- vector("list", length(dim))
+ dump <- new.env(parent=emptyenv())
+ new("RleRealizationSink", dim=dim, dimnames=dimnames, type=type, dump=dump)
+}
+
+setMethod("write_to_sink", c("array", "RleRealizationSink"),
+ function(x, sink, offsets=NULL)
+ {
+ x_dim <- dim(x)
+ sink_dim <- sink at dim
+ stopifnot(length(x_dim) == length(sink_dim))
+ if (is.null(offsets)) {
+ stopifnot(identical(x_dim, sink_dim))
+ } else {
+ stopifnot(length(x_dim) == length(sink_dim))
+ }
+ name <- sprintf("%09d", length(sink at dump))
+ stopifnot(nchar(name) == 9L)
+ assign(name, Rle(x), envir=sink at dump)
+ }
+)
+
+setAs("RleRealizationSink", "RleArraySeed",
+ function(from)
+ {
+ if (length(from at dump) == 0L) {
+ rle <- Rle(get(from at type)(0))
+ } else {
+ rle <- do.call("c", unname(as.list(from at dump, sorted=TRUE)))
+ }
+ RleArraySeed(rle, from at dim, from at dimnames)
+ }
+)
+
+setAs("RleRealizationSink", "RleArray",
+ function(from) RleArray(as(from, "RleArraySeed"))
+)
+
+setAs("RleRealizationSink", "DelayedArray",
+ function(from) RleArray(as(from, "RleArraySeed"))
+)
+
+.as_RleArray <- function(from)
+{
+ sink <- RleRealizationSink(dim(from), dimnames(from), type(from))
+ write_to_sink(from, sink)
+ as(sink, "RleArray")
+}
+
+setAs("ANY", "RleArray", .as_RleArray)
+
+### Automatic coercion methods from DelayedArray to RleArray and from
+### DelayedMatrix to RleMatrix silently return broken objects (unfortunately
+### these dummy automatic coercion methods don't bother to validate the object
+### they return). So we overwrite them.
+setAs("DelayedArray", "RleArray", .as_RleArray)
+setAs("DelayedMatrix", "RleMatrix", .as_RleArray)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Switching between DataFrame and RleMatrix representation
+###
+
+### From DataFrame to RleMatrix.
+.from_DataFrame_to_RleMatrix <- function(from)
+{
+ as(DelayedArray(from), "RleMatrix")
+}
+
+setAs("DataFrame", "RleMatrix", .from_DataFrame_to_RleMatrix)
+setAs("DataFrame", "RleArray", .from_DataFrame_to_RleMatrix)
+
+### From RleMatrix to DataFrame.
+.from_RleMatrix_to_DataFrame <- function(from)
+{
+ ## We mangle the colnames exactly like as.data.frame() would do.
+ ans_colnames <- colnames(as.data.frame(from[0L, ]))
+ partitioning <- PartitioningByEnd(nrow(from) * seq_len(ncol(from)),
+ names=ans_colnames)
+ listData <- as.list(relist(seed(from)@rle, partitioning))
+ new("DataFrame", listData=listData,
+ nrows=nrow(from),
+ rownames=rownames(from))
+}
+
+setAs("RleMatrix", "DataFrame", .from_RleMatrix_to_DataFrame)
+
+### From DelayedMatrix to DataFrame.
+setAs("DelayedMatrix", "DataFrame",
+ function(from) as(as(from, "RleMatrix"), "DataFrame")
+)
+
diff --git a/R/block_processing.R b/R/block_processing.R
new file mode 100644
index 0000000..d26c8c4
--- /dev/null
+++ b/R/block_processing.R
@@ -0,0 +1,230 @@
+### =========================================================================
+### Internal utilities for block processing an array
+### -------------------------------------------------------------------------
+###
+### Unless stated otherwise, nothing in this file is exported.
+###
+
+
+### Default block size in bytes.
+DEFAULT_BLOCK_SIZE <- 4500000L # 4.5 Mb
+
+### Atomic type sizes in bytes.
+.TYPE_SIZES <- c(
+ logical=4L,
+ integer=4L,
+ numeric=8L,
+ double=8L,
+ complex=16L,
+ character=8L, # just the overhead of a CHARSXP; doesn't account for the
+ # string data itself
+ raw=1L
+)
+
+### Used in HDF5Array!
+get_block_length <- function(type)
+{
+ type_size <- .TYPE_SIZES[type]
+ idx <- which(is.na(type_size))
+ if (length(idx) != 0L) {
+ unsupported_types <- unique(type[idx])
+ in1string <- paste0(unsupported_types, collapse=", ")
+ stop("unsupported type(s): ", in1string)
+ }
+ block_size <- getOption("DelayedArray.block.size",
+ default=DEFAULT_BLOCK_SIZE)
+ as.integer(block_size / type_size)
+}
+
+### Used in HDF5Array!
+get_verbose_block_processing <- function()
+{
+ getOption("DelayedArray.verbose.block.processing", default=FALSE)
+}
+
+### Used in HDF5Array!
+set_verbose_block_processing <- function(verbose)
+{
+ if (!isTRUEorFALSE(verbose))
+ stop("'verbose' must be TRUE or FALSE")
+ old_verbose <- get_verbose_block_processing()
+ options(DelayedArray.verbose.block.processing=verbose)
+ old_verbose
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Walking on the blocks
+###
+### 3 utility functions to process array-like objects by block.
+###
+
+.as_array_or_matrix <- function(x)
+{
+ if (length(dim(x)) == 2L)
+ return(as.matrix(x))
+ as.array(x)
+}
+
+### An lapply-like function.
+block_APPLY <- function(x, APPLY, ..., sink=NULL, block_len=NULL)
+{
+ APPLY <- match.fun(APPLY)
+ if (is.null(block_len))
+ block_len <- get_block_length(type(x))
+ blocks <- ArrayBlocks(dim(x), block_len)
+ nblock <- length(blocks)
+ lapply(seq_len(nblock),
+ function(b) {
+ if (get_verbose_block_processing())
+ message("Processing block ", b, "/", nblock, " ... ",
+ appendLF=FALSE)
+ block_ranges <- get_block_ranges(blocks, b)
+ Nindex <- make_Nindex_from_block_ranges(block_ranges, blocks at dim)
+ subarray <- subset_by_Nindex(x, Nindex)
+ if (!is.array(subarray))
+ subarray <- .as_array_or_matrix(subarray)
+ block_ans <- APPLY(subarray, ...)
+ if (!is.null(sink)) {
+ write_to_sink(block_ans, sink, offsets=start(block_ranges))
+ block_ans <- NULL
+ }
+ if (get_verbose_block_processing())
+ message("OK")
+ block_ans
+ })
+}
+
+### A mapply-like function for conformable arrays.
+block_MAPPLY <- function(MAPPLY, ..., sink=NULL, block_len=NULL)
+{
+ MAPPLY <- match.fun(MAPPLY)
+ dots <- unname(list(...))
+ dims <- sapply(dots, dim)
+ x_dim <- dims[ , 1L]
+ if (!all(dims == x_dim))
+ stop("non-conformable arrays")
+ if (is.null(block_len)) {
+ types <- unlist(lapply(dots, type))
+ block_len <- min(get_block_length(types))
+ }
+ blocks <- ArrayBlocks(x_dim, block_len)
+ nblock <- length(blocks)
+ lapply(seq_len(nblock),
+ function(b) {
+ if (get_verbose_block_processing())
+ message("Processing block ", b, "/", nblock, " ... ",
+ appendLF=FALSE)
+ block_ranges <- get_block_ranges(blocks, b)
+ Nindex <- make_Nindex_from_block_ranges(block_ranges, blocks at dim)
+ subarrays <- lapply(dots,
+ function(x) {
+ subarray <- subset_by_Nindex(x, Nindex)
+ if (!is.array(subarray))
+ subarray <- .as_array_or_matrix(subarray)
+ subarray
+ })
+ block_ans <- do.call(MAPPLY, subarrays)
+ if (!is.null(sink)) {
+ write_to_sink(block_ans, sink, offsets=start(block_ranges))
+ block_ans <- NULL
+ }
+ if (get_verbose_block_processing())
+ message("OK")
+ block_ans
+ })
+}
+
+### A Reduce-like function.
+block_APPLY_and_COMBINE <- function(x, APPLY, COMBINE, init,
+ BREAKIF=NULL, block_len=NULL)
+{
+ APPLY <- match.fun(APPLY)
+ COMBINE <- match.fun(COMBINE)
+ if (!is.null(BREAKIF))
+ BREAKIF <- match.fun(BREAKIF)
+ if (is.null(block_len))
+ block_len <- get_block_length(type(x))
+ blocks <- ArrayBlocks(dim(x), block_len)
+ nblock <- length(blocks)
+ for (b in seq_len(nblock)) {
+ if (get_verbose_block_processing())
+ message("Processing block ", b, "/", nblock, " ... ",
+ appendLF=FALSE)
+ subarray <- extract_array_block(x, blocks, b)
+ if (!is.array(subarray))
+ subarray <- .as_array_or_matrix(subarray)
+ reduced <- APPLY(subarray)
+ init <- COMBINE(b, subarray, init, reduced)
+ if (get_verbose_block_processing())
+ message("OK")
+ if (!is.null(BREAKIF) && BREAKIF(init)) {
+ if (get_verbose_block_processing())
+ message("BREAK condition encountered")
+ break
+ }
+ }
+ init
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Walking on the blocks of columns
+###
+### 2 convenience wrappers around block_APPLY() and block_APPLY_and_COMBINE()
+### to process a matrix-like object by block of columns.
+###
+
+colblock_APPLY <- function(x, APPLY, ..., sink=NULL)
+{
+ x_dim <- dim(x)
+ if (length(x_dim) != 2L)
+ stop("'x' must be a matrix-like object")
+ APPLY <- match.fun(APPLY)
+ ## We're going to walk along the columns so need to increase the block
+ ## length so each block is made of at least one column.
+ block_len <- max(get_block_length(type(x)), x_dim[[1L]])
+ block_APPLY(x, APPLY, ..., sink=sink, block_len=block_len)
+}
+
+colblock_APPLY_and_COMBINE <- function(x, APPLY, COMBINE, init)
+{
+ x_dim <- dim(x)
+ if (length(x_dim) != 2L)
+ stop("'x' must be a matrix-like object")
+ ## We're going to walk along the columns so need to increase the block
+ ## length so each block is made of at least one column.
+ block_len <- max(get_block_length(type(x)), x_dim[[1L]])
+ block_APPLY_and_COMBINE(x, APPLY, COMBINE, init, block_len=block_len)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Block by block realization of an array-like object
+###
+
+### Exported!
+setMethod("write_to_sink", c("DelayedArray", "RealizationSink"),
+ function(x, sink, offsets=NULL)
+ {
+ if (!is.null(offsets))
+ stop(wmsg("'offsets' must be NULL when the object to write ",
+ "is a DelayedArray object"))
+ ## Semantically equivalent to 'write_to_sink(as.array(x), sink)'
+ ## but uses block-processing so the full DelayedArray object is
+ ## not realized at once in memory. Instead the object is first
+ ## split into blocks and the blocks are realized and written to
+ ## disk one at a time.
+ block_APPLY(x, identity, sink=sink)
+ }
+)
+
+### Exported!
+setMethod("write_to_sink", c("ANY", "RealizationSink"),
+ function(x, sink, offsets=NULL)
+ {
+ x <- as(x, "DelayedArray")
+ write_to_sink(x, sink, offsets=offsets)
+ }
+)
+
diff --git a/R/cbind-methods.R b/R/cbind-methods.R
new file mode 100644
index 0000000..f545ec2
--- /dev/null
+++ b/R/cbind-methods.R
@@ -0,0 +1,163 @@
+### =========================================================================
+### Bind DelayedArray objects along their rows or columns
+### -------------------------------------------------------------------------
+###
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### SeedBinder objects
+###
+### This class is for internal use only and is not exported.
+###
+
+setClass("SeedBinder",
+ representation(
+ seeds="list", # List of array-like objects to bind. Each object
+ # is expected to satisfy the "seed contract" i.e.
+ # to support dim(), dimnames(), and
+ # subset_seed_as_array().
+
+ along="integer" # Single integer indicating the dimension along
+ # which to bind the seeds.
+ ),
+ prototype(
+ seeds=list(new("array")),
+ along=1L
+ )
+)
+
+.validate_SeedBinder <- function(x)
+{
+ if (length(x at seeds) == 0L)
+ return(wmsg("'x at seeds' cannot be empty"))
+ if (!(isSingleInteger(x at along) && x at along > 0L))
+ return(wmsg("'x at along' must be a single positive integer"))
+ dims <- IRanges:::get_dims_to_bind(x at seeds, x at along)
+ if (is.character(dims))
+ return(wmsg(dims))
+ TRUE
+}
+
+setValidity2("SeedBinder", .validate_SeedBinder)
+
+.new_SeedBinder <- function(seeds, along)
+{
+ seeds <- lapply(seeds, remove_pristine_DelayedArray_wrapping)
+ new2("SeedBinder", seeds=seeds, along=along)
+}
+
+### Implement the "seed contract" i.e. dim(), dimnames(), and
+### subset_seed_as_array().
+
+.get_SeedBinder_dim <- function(x)
+{
+ dims <- IRanges:::get_dims_to_bind(x at seeds, x at along)
+ IRanges:::combine_dims_along(dims, x at along)
+}
+
+setMethod("dim", "SeedBinder", .get_SeedBinder_dim)
+
+.get_SeedBinder_dimnames <- function(x)
+{
+ dims <- IRanges:::get_dims_to_bind(x at seeds, x at along)
+ IRanges:::combine_dimnames_along(x at seeds, dims, x at along)
+}
+
+setMethod("dimnames", "SeedBinder", .get_SeedBinder_dimnames)
+
+.subset_SeedBinder_as_array <- function(seed, index)
+{
+ i <- index[[seed at along]]
+
+ if (is.null(i)) {
+ ## This is the easy situation.
+ tmp <- lapply(seed at seeds, subset_seed_as_array, index)
+ ## Bind the ordinary arrays in 'tmp'.
+ ans <- do.call(IRanges:::simple_abind, c(tmp, list(along=seed at along)))
+ return(ans)
+ }
+
+ ## From now on 'i' is a vector of positive integers.
+ dims <- IRanges:::get_dims_to_bind(seed at seeds, seed at along)
+ breakpoints <- cumsum(dims[seed at along, ])
+ part_idx <- get_part_index(i, breakpoints)
+ split_part_idx <- split_part_index(part_idx, length(breakpoints))
+ FUN <- function(s) {
+ index[[seed at along]] <- split_part_idx[[s]]
+ subset_seed_as_array(seed at seeds[[s]], index)
+ }
+ tmp <- lapply(seq_along(seed at seeds), FUN)
+
+ ## Bind the ordinary arrays in 'tmp'.
+ ans <- do.call(IRanges:::simple_abind, c(tmp, list(along=seed at along)))
+
+ ## Reorder the rows or columns in 'ans'.
+ Nindex <- vector(mode="list", length=length(index))
+ Nindex[[seed at along]] <- get_rev_index(part_idx)
+ subset_by_Nindex(ans, Nindex)
+}
+
+setMethod("subset_seed_as_array", "SeedBinder", .subset_SeedBinder_as_array)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### arbind() and acbind()
+###
+
+.DelayedArray_arbind <- function(...)
+{
+ objects <- unname(list(...))
+ dims <- IRanges:::get_dims_to_bind(objects, 1L)
+ if (is.character(dims))
+ stop(wmsg(dims))
+ DelayedArray(.new_SeedBinder(objects, 1L))
+}
+
+.DelayedArray_acbind <- function(...)
+{
+ objects <- unname(list(...))
+ dims <- IRanges:::get_dims_to_bind(objects, 2L)
+ if (is.character(dims))
+ stop(wmsg(dims))
+ DelayedArray(.new_SeedBinder(objects, 2L))
+}
+
+setMethod("arbind", "DelayedArray", .DelayedArray_arbind)
+setMethod("acbind", "DelayedArray", .DelayedArray_acbind)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### rbind() and cbind()
+###
+
+setMethod("rbind", "DelayedMatrix", .DelayedArray_arbind)
+setMethod("cbind", "DelayedMatrix", .DelayedArray_acbind)
+
+.as_DelayedMatrix_objects <- function(objects)
+{
+ lapply(objects,
+ function(object) {
+ if (length(dim(object)) != 2L)
+ stop(wmsg("cbind() and rbind() are not supported on ",
+ "DelayedArray objects that don't have exactly ",
+ "2 dimensions. Please use acbind() or arnind() ",
+ "instead."))
+ as(object, "DelayedMatrix")
+ })
+}
+
+.DelayedArray_rbind <- function(...)
+{
+ objects <- .as_DelayedMatrix_objects(list(...))
+ do.call("rbind", objects)
+}
+
+.DelayedArray_cbind <- function(...)
+{
+ objects <- .as_DelayedMatrix_objects(list(...))
+ do.call("cbind", objects)
+}
+
+setMethod("rbind", "DelayedArray", .DelayedArray_rbind)
+setMethod("cbind", "DelayedArray", .DelayedArray_cbind)
+
diff --git a/R/realize.R b/R/realize.R
new file mode 100644
index 0000000..caa6d13
--- /dev/null
+++ b/R/realize.R
@@ -0,0 +1,223 @@
+### =========================================================================
+### realize()
+### -------------------------------------------------------------------------
+###
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### RealizationSink objects
+###
+
+### Virtual class with no slots. Intended to be extended by implementations
+### of DelayedArray backends. Concrete subclasses must implement:
+### 1) A constructor function that takes argument 'dim', 'dimnames', and
+### 'type'.
+### 2) A "write_to_sink" method that works on an ordinary array.
+### 3) A "close" method (optional).
+### 4) Coercion to DelayedArray.
+### See the arrayRealizationSink class below or the HDF5RealizationSink class
+### in the HDF5Array package for examples of concrete RealizationSink
+### subclasses.
+setClass("RealizationSink", representation("VIRTUAL"))
+
+### 'x' and 'sink' must have the same number of dimensions.
+### 'offsets' must be NULL or an integer vector with 1 offset per dimension
+### in 'x' (or in 'sink').
+### A default "write_to_sink" method is defined in DelayedArray-class.R.
+setGeneric("write_to_sink", signature=c("x", "sink"),
+ function(x, sink, offsets=NULL) standardGeneric("write_to_sink")
+)
+
+setGeneric("close")
+
+### The default "close" method for RealizationSink objects is a no-op.
+setMethod("close", "RealizationSink", function(con) invisible(NULL))
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### arrayRealizationSink objects
+###
+### The arrayRealizationSink class is a concrete RealizationSink subclass that
+### implements an in-memory realization sink.
+###
+
+setClass("arrayRealizationSink",
+ contains="RealizationSink",
+ representation(
+ result_envir="environment"
+ )
+)
+
+.get_arrayRealizationSink_result <- function(sink)
+{
+ get("result", envir=sink at result_envir)
+}
+
+arrayRealizationSink <- function(dim, dimnames=NULL, type="double")
+{
+ result <- array(get(type)(0), dim=dim, dimnames=dimnames)
+ result_envir <- new.env(parent=emptyenv())
+ assign("result", result, envir=result_envir)
+ new("arrayRealizationSink", result_envir=result_envir)
+}
+
+setMethod("write_to_sink", c("array", "arrayRealizationSink"),
+ function(x, sink, offsets=NULL)
+ {
+ x_dim <- dim(x)
+ result <- .get_arrayRealizationSink_result(sink)
+ sink_dim <- dim(result)
+ if (is.null(offsets)) {
+ stopifnot(identical(x_dim, sink_dim))
+ result[] <- x
+ } else {
+ stopifnot(length(x_dim) == length(sink_dim))
+ block_ranges <- IRanges(offsets, width=x_dim)
+ Nindex <- make_Nindex_from_block_ranges(
+ block_ranges, sink_dim,
+ expand.RangeNSBS=TRUE)
+ result <- replace_by_Nindex(result, Nindex, x)
+ }
+ assign("result", result, envir=sink at result_envir)
+ }
+)
+
+setAs("arrayRealizationSink", "DelayedArray",
+ function(from) DelayedArray(.get_arrayRealizationSink_result(from))
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Get/set the "realization backend" for the current session
+###
+
+.realization_backend_envir <- new.env(parent=emptyenv())
+
+getRealizationBackend <- function()
+{
+ BACKEND <- try(get("BACKEND", envir=.realization_backend_envir),
+ silent=TRUE)
+ if (is(BACKEND, "try-error"))
+ return(NULL)
+ BACKEND
+}
+
+.SUPPORTED_REALIZATION_BACKENDS <- data.frame(
+ BACKEND=c("RleArray", "HDF5Array"),
+ package=c("DelayedArray", "HDF5Array"),
+ realization_sink_class=c("RleRealizationSink", "HDF5RealizationSink"),
+ stringsAsFactors=FALSE
+)
+
+supportedRealizationBackends <- function()
+{
+ ans <- .SUPPORTED_REALIZATION_BACKENDS[ , c("BACKEND", "package")]
+ backend <- getRealizationBackend()
+ Lcol <- ifelse(ans[ , "BACKEND"] %in% backend, "->", "")
+ Rcol <- ifelse(ans[ , "BACKEND"] %in% backend, "<-", "")
+ cbind(data.frame(` `=Lcol, check.names=FALSE),
+ ans,
+ data.frame(` `=Rcol, check.names=FALSE))
+}
+
+.load_BACKEND_package <- function(BACKEND)
+{
+ if (!isSingleString(BACKEND))
+ stop(wmsg("'BACKEND' must be a single string or NULL"))
+ backends <- .SUPPORTED_REALIZATION_BACKENDS
+ m <- match(BACKEND, backends[ , "BACKEND"])
+ if (is.na(m))
+ stop(wmsg("\"", BACKEND, "\" is not a supported backend. Please ",
+ "use supportedRealizationBackends() to get the list of ",
+ "supported \"realization backends\"."))
+ package <- backends[ , "package"][[m]]
+ class_package <- attr(BACKEND, "package")
+ if (is.null(class_package)) {
+ attr(BACKEND, "package") <- package
+ } else if (!identical(package, class_package)) {
+ stop(wmsg("\"package\" attribute on supplied 'BACKEND' is ",
+ "inconsistent with package normally associated with ",
+ "this backend"))
+ }
+ library(package, character.only=TRUE)
+ stopifnot(getClass(BACKEND)@package == package)
+}
+
+.get_REALIZATION_SINK_CONSTRUCTOR <- function(BACKEND)
+{
+ backends <- .SUPPORTED_REALIZATION_BACKENDS
+ m <- match(BACKEND, backends[ , "BACKEND"])
+ realization_sink_class <- backends[ , "realization_sink_class"][[m]]
+ package <- backends[ , "package"][[m]]
+ REALIZATION_SINK_CONSTRUCTOR <- get(realization_sink_class,
+ envir=.getNamespace(package),
+ inherits=FALSE)
+ stopifnot(is.function(REALIZATION_SINK_CONSTRUCTOR))
+ stopifnot(identical(head(formalArgs(REALIZATION_SINK_CONSTRUCTOR), n=3L),
+ c("dim", "dimnames", "type")))
+ REALIZATION_SINK_CONSTRUCTOR
+}
+
+setRealizationBackend <- function(BACKEND=NULL)
+{
+ if (is.null(BACKEND)) {
+ remove(list=ls(envir=.realization_backend_envir),
+ envir=.realization_backend_envir)
+ return(invisible(NULL))
+ }
+ .load_BACKEND_package(BACKEND)
+ REALIZATION_SINK_CONSTRUCTOR <- .get_REALIZATION_SINK_CONSTRUCTOR(BACKEND)
+ assign("BACKEND", BACKEND,
+ envir=.realization_backend_envir)
+ assign("REALIZATION_SINK_CONSTRUCTOR", REALIZATION_SINK_CONSTRUCTOR,
+ envir=.realization_backend_envir)
+ return(invisible(NULL))
+}
+
+.get_realization_sink_constructor <- function()
+{
+ if (is.null(getRealizationBackend()))
+ return(arrayRealizationSink)
+ REALIZATION_SINK_CONSTRUCTOR <- try(get("REALIZATION_SINK_CONSTRUCTOR",
+ envir=.realization_backend_envir),
+ silent=TRUE)
+ if (is(REALIZATION_SINK_CONSTRUCTOR, "try-error"))
+ stop(wmsg("This operation requires a \"realization backend\". ",
+ "Please see '?setRealizationBackend' for how to set one."))
+ REALIZATION_SINK_CONSTRUCTOR
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### realize()
+###
+
+setGeneric("realize", function(x, ...) standardGeneric("realize"))
+
+setMethod("realize", "ANY",
+ function(x, BACKEND=getRealizationBackend())
+ {
+ x <- DelayedArray(x)
+ if (is.null(BACKEND))
+ return(DelayedArray(as.array(x)))
+ .load_BACKEND_package(BACKEND)
+ ans <- as(x, BACKEND)
+ ## Temporarily needed because coercion to HDF5Array currently drops
+ ## the dimnames. See R/writeHDF5Array.R in the HDF5Array package for
+ ## more information about this.
+ ## TODO: Remove line below when this is addressed.
+ dimnames(ans) <- dimnames(x)
+ ans
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### RealizationSink constructor
+###
+
+RealizationSink <- function(dim, dimnames=NULL, type="double")
+{
+ .get_realization_sink_constructor()(dim, dimnames, type)
+}
+
diff --git a/R/show-utils.R b/R/show-utils.R
new file mode 100644
index 0000000..71ed6e7
--- /dev/null
+++ b/R/show-utils.R
@@ -0,0 +1,340 @@
+### =========================================================================
+### Compact display of an array-like object
+### -------------------------------------------------------------------------
+###
+### Nothing in this file is exported.
+###
+
+
+.format_as_character_vector <- function(x, justify)
+{
+ x_names <- names(x)
+ x <- as.vector(x)
+ if (typeof(x) == "character" && length(x) != 0L)
+ x <- paste0("\"", x, "\"")
+ names(x) <- x_names
+ format(x, justify=justify)
+}
+
+.format_as_character_matrix <- function(x, justify)
+{
+ x <- as.matrix(x)
+ if (typeof(x) == "character" && length(x) != 0L) {
+ x_dim <- dim(x)
+ x_dimnames <- dimnames(x)
+ x <- paste0("\"", x, "\"")
+ dim(x) <- x_dim
+ dimnames(x) <- x_dimnames
+ }
+ format(x, justify=justify)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### 1D array
+###
+
+.split_1D_array_names <- function(x_names, idx1, idx2, justify)
+{
+ make_elt_indices <- function(i) {
+ if (length(i) == 0L)
+ return(character(0))
+ paste0("[", i, "]", sep="")
+ }
+ if (is.null(x_names)) {
+ s1 <- make_elt_indices(idx1)
+ s2 <- make_elt_indices(idx2)
+ } else {
+ s1 <- x_names[idx1]
+ s2 <- x_names[idx2]
+ }
+ format(c(s1, ".", s2), justify=justify)
+}
+
+.prepare_1D_array_sample <- function(x, n1, n2, justify)
+{
+ x_len <- length(x)
+ x_names <- names(x)
+ if (x_len <= n1 + n2 + 1L) {
+ ans <- .format_as_character_vector(x, justify)
+ idx1 <- seq_len(x_len)
+ idx2 <- integer(0)
+ names(ans) <- .split_1D_array_names(x_names, idx1, idx2, justify)[idx1]
+ } else {
+ idx1 <- seq_len(n1)
+ idx2 <- seq(to=x_len, by=1L, length.out=n2)
+ ans1 <- .format_as_character_vector(x[idx1], justify)
+ ans2 <- .format_as_character_vector(x[idx2], justify)
+ ans <- c(ans1, ".", ans2)
+ names(ans) <- .split_1D_array_names(x_names, idx1, idx2, justify)
+ }
+ ans
+}
+
+.print_1D_array_data <- function(x, n1, n2)
+{
+ stopifnot(length(dim(x)) == 1L)
+ right <- type(x) != "character"
+ justify <- if (right) "right" else "left"
+ out <- .prepare_1D_array_sample(x, n1, n2, justify)
+ print(out, quote=FALSE, right=right, max=length(out))
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### 2D array
+###
+
+.split_rownames <- function(x_rownames, idx1, idx2, justify)
+{
+ make_row_indices <- function(i) {
+ if (length(i) == 0L)
+ return(character(0))
+ paste0("[", i, ",]", sep="")
+ }
+ if (is.null(x_rownames)) {
+ s1 <- make_row_indices(idx1)
+ s2 <- make_row_indices(idx2)
+ } else {
+ s1 <- x_rownames[idx1]
+ s2 <- x_rownames[idx2]
+ }
+ max_width <- max(nchar(s1, type="width"), nchar(s2, type="width"))
+ if (max_width <= 1L) {
+ ellipsis <- "."
+ } else if (max_width == 2L) {
+ ellipsis <- ".."
+ } else {
+ ellipsis <- "..."
+ }
+ format(c(s1, ellipsis, s2), justify=justify)
+}
+
+.split_colnames <- function(x_colnames, idx1, idx2, justify)
+{
+ make_col_indices <- function(j) {
+ if (length(j) == 0L)
+ return(character(0))
+ paste0("[,", j, "]", sep="")
+ }
+ if (is.null(x_colnames)) {
+ s1 <- make_col_indices(idx1)
+ s2 <- make_col_indices(idx2)
+ } else {
+ s1 <- x_colnames[idx1]
+ s2 <- x_colnames[idx2]
+ }
+ ans <- format(c(s1, s2), justify=justify)
+ c(head(ans, n=length(s1)), "...", tail(ans, n=length(s2)))
+}
+
+.rsplit_2D_array_data <- function(x, m1, m2, justify)
+{
+ x_nrow <- nrow(x)
+ x_rownames <- rownames(x)
+ idx1 <- seq_len(m1)
+ idx2 <- seq(to=x_nrow, by=1L, length.out=m2)
+
+ ans1 <- .format_as_character_matrix(x[idx1, , drop=FALSE], justify)
+ ans2 <- .format_as_character_matrix(x[idx2, , drop=FALSE], justify)
+ dots <- rep.int(".", ncol(ans1))
+ ans <- rbind(ans1, matrix(dots, nrow=1L), ans2)
+
+ rownames(ans) <- .split_rownames(x_rownames, idx1, idx2, justify)
+ ans
+}
+
+.csplit_2D_array_data <- function(x, n1, n2, justify)
+{
+ x_ncol <- ncol(x)
+ x_colnames <- colnames(x)
+ idx1 <- seq_len(n1)
+ idx2 <- seq(to=x_ncol, by=1L, length.out=n2)
+
+ ans1 <- .format_as_character_matrix(x[ , idx1, drop=FALSE], justify)
+ ans2 <- .format_as_character_matrix(x[ , idx2, drop=FALSE], justify)
+ dots <- rep.int(".", nrow(ans1))
+ ans <- cbind(ans1, matrix(dots, ncol=1L), ans2)
+
+ colnames(ans) <- .split_colnames(x_colnames, idx1, idx2, justify)
+ ans
+}
+
+.split_2D_array_data <- function(x, m1, m2, n1, n2, justify)
+{
+ x_ncol <- ncol(x)
+ x_colnames <- colnames(x)
+ idx1 <- seq_len(n1)
+ idx2 <- seq(to=x_ncol, by=1L, length.out=n2)
+
+ x1 <- x[ , idx1, drop=FALSE]
+ x2 <- x[ , idx2, drop=FALSE]
+ ans1 <- .rsplit_2D_array_data(x1, m1, m2, justify)
+ ans2 <- .rsplit_2D_array_data(x2, m1, m2, justify)
+ dots <- rep.int(".", nrow(ans1))
+ ans <- cbind(ans1, matrix(dots, ncol=1L), ans2)
+
+ colnames(ans) <- .split_colnames(x_colnames, idx1, idx2, justify)
+ ans
+}
+
+.prepare_2D_array_sample <- function(x, m1, m2, n1, n2, justify)
+{
+ ## An attempt at reducing the nb of columns to display when 'x' has
+ ## dimnames so the object fits in getOption("width"). Won't necessarily
+ ## pick up the optimal nb of columns so should be revisited at some point.
+ x_rownames <- rownames(x)
+ if (is.null(x_rownames)) {
+ rownames_width <- 6L
+ } else {
+ rownames_width <- nchar(x_rownames[[1L]])
+ }
+ half_width <- (getOption("width") - rownames_width) / 2
+ x_colnames <- colnames(x)
+ if (!is.null(x_colnames)) {
+ colnames1 <- head(x_colnames, n=n1)
+ colnames2 <- tail(x_colnames, n=n2)
+ n1 <- pmax(sum(cumsum(nchar(colnames1) + 1L) < half_width), 1L)
+ n2 <- pmax(sum(cumsum(nchar(colnames2) + 1L) < half_width), 1L)
+ }
+
+ x_nrow <- nrow(x)
+ x_ncol <- ncol(x)
+ if (x_nrow <= m1 + m2 + 1L) {
+ if (x_ncol <= n1 + n2 + 1L) {
+ ans <- .format_as_character_matrix(x, justify)
+ ## Only needed because of this bug in base::print.default:
+ ## https://stat.ethz.ch/pipermail/r-devel/2016-March/072479.html
+ ## TODO: Remove when the bug is fixed.
+ if (is.null(colnames(ans))) {
+ idx1 <- seq_len(ncol(ans))
+ idx2 <- integer(0)
+ colnames(ans) <- .split_colnames(NULL, idx1, idx2,
+ justify)[idx1]
+ }
+ } else {
+ ans <- .csplit_2D_array_data(x, n1, n2, justify)
+ }
+ } else {
+ if (x_ncol <= n1 + n2 + 1L) {
+ ans <- .rsplit_2D_array_data(x, m1, m2, justify)
+ ## Only needed because of this bug in base::print.default:
+ ## https://stat.ethz.ch/pipermail/r-devel/2016-March/072479.html
+ ## TODO: Remove when the bug is fixed.
+ if (is.null(colnames(ans))) {
+ idx1 <- seq_len(ncol(ans))
+ idx2 <- integer(0)
+ colnames(ans) <- .split_colnames(NULL, idx1, idx2,
+ justify)[idx1]
+ }
+ } else {
+ ans <- .split_2D_array_data(x, m1, m2, n1, n2, justify)
+ }
+ }
+ ans
+}
+
+.print_2D_array_data <- function(x, m1, m2, n1, n2)
+{
+ stopifnot(length(dim(x)) == 2L)
+ right <- type(x) != "character"
+ justify <- if (right) "right" else "left"
+ out <- .prepare_2D_array_sample(x, m1, m2, n1, n2, justify)
+ print(out, quote=FALSE, right=right, max=length(out))
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Array of arbitrary dimensions
+###
+
+.print_2D_slices <- function(x, m1, m2, n1, n2, blocks, idx)
+{
+ x_dimnames <- dimnames(x)
+ for (i in idx) {
+ Nindex <- get_array_block_Nindex(blocks, i)
+ cat(Nindex_as_string(Nindex, x_dimnames), "\n", sep="")
+ slice <- subset_by_Nindex(x, Nindex)
+ dim(slice) <- dim(slice)[1:2]
+ .print_2D_array_data(slice, m1, m2, n1, n2)
+ cat("\n")
+ }
+}
+
+.print_nD_array_data <- function(x, n1, n2)
+{
+ x_dim <- dim(x)
+ x_nrow <- x_dim[[1L]]
+ x_ncol <- x_dim[[2L]]
+ if (x_ncol <= 5L) {
+ if (x_nrow <= 3L) {
+ m1 <- m2 <- 3L # print all rows of each slice
+ z1 <- z2 <- 3L # print first 3 and last 3 slices
+ } else {
+ m1 <- m2 <- 2L # print first 2 and last 2 rows of each slice
+ z1 <- z2 <- 1L # print only first and last slices
+ }
+ } else {
+ if (x_nrow <= 3L) {
+ m1 <- m2 <- 2L # print first 2 and last 2 rows of each slice
+ z1 <- z2 <- 2L # print first 2 and last 2 slices
+ } else {
+ m1 <- m2 <- 2L # print first 2 and last 2 rows of each slice
+ z1 <- z2 <- 1L # print only first and last slices
+ }
+ }
+ blocks <- new("ArrayBlocks", dim=x_dim, N=3L, by=1L)
+ nblock <- length(blocks)
+ if (nblock <= z1 + z2 + 1L) {
+ idx <- seq_len(nblock)
+ .print_2D_slices(x, m1, m2, n1, n2, blocks, idx)
+ } else {
+ idx1 <- seq_len(z1)
+ idx2 <- seq(to=nblock, by=1L, length.out=z2)
+ .print_2D_slices(x, m1, m2, n1, n2, blocks, idx1)
+ cat("...\n\n")
+ .print_2D_slices(x, m1, m2, n1, n2, blocks, idx2)
+ }
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### show_compact_array()
+###
+
+.print_array_data <- function(x, n1, n2)
+{
+ x_dim <- dim(x)
+ if (length(x_dim) == 1L)
+ return(.print_1D_array_data(x, n1, n2))
+ if (length(x_dim) == 2L) {
+ nhead <- get_showHeadLines()
+ ntail <- get_showTailLines()
+ return(.print_2D_array_data(x, nhead, ntail, n1, n2))
+ }
+ .print_nD_array_data(x, n1, n2)
+}
+
+show_compact_array <- function(object)
+{
+ object_class <- class(object)
+ object_dim <- dim(object)
+ dim_in1string <- paste0(object_dim, collapse=" x ")
+ object_type <- type(object)
+ if (any(object_dim == 0L)) {
+ cat(sprintf("<%s> %s object of type \"%s\"\n",
+ dim_in1string, object_class, object_type))
+ } else {
+ cat(sprintf("%s object of %s %s%s:\n",
+ object_class, dim_in1string, object_type,
+ ifelse(any(object_dim >= 2L), "s", "")))
+ if (object_type == "integer") {
+ n1 <- n2 <- 4L
+ } else {
+ n1 <- 3L
+ n2 <- 2L
+ }
+ .print_array_data(object, n1, n2)
+ }
+}
+
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..a9d0b0d
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,205 @@
+### =========================================================================
+### Some low-level utilities
+### -------------------------------------------------------------------------
+###
+### Nothing in this file is exported.
+###
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Manipulating an Nindex
+###
+### An Nindex is a "multidimensional subsetting index" is a list with one
+### subscript per dimension in the object to subset. Missing subscripts are
+### represented by NULLs. Before it can be used to actually subset an
+### array-like object, the NULLs must be replaced with elements of class "name".
+
+### Used in HDF5Array!
+expand_Nindex_RangeNSBS <- function(Nindex)
+{
+ stopifnot(is.list(Nindex))
+ RangeNSBS_idx <- which(vapply(Nindex, is, logical(1), "RangeNSBS"))
+ Nindex[RangeNSBS_idx] <- lapply(Nindex[RangeNSBS_idx], as.integer)
+ Nindex
+}
+
+.Nindex2subscripts <- function(Nindex, x)
+{
+ stopifnot(is.list(Nindex), length(Nindex) == length(dim(x)))
+
+ if (is.array(x))
+ Nindex <- expand_Nindex_RangeNSBS(Nindex)
+
+ ## Replace NULLs with list elements of class "name".
+ subscripts <- rep.int(alist(foo=), length(Nindex))
+ names(subscripts ) <- names(Nindex)
+ not_missing_idx <- which(!S4Vectors:::sapply_isNULL(Nindex))
+ subscripts[not_missing_idx] <- Nindex[not_missing_idx]
+
+ subscripts
+}
+
+subset_by_Nindex <- function(x, Nindex, drop=FALSE)
+{
+ Nindex <- .Nindex2subscripts(Nindex, x)
+ do.call(`[`, c(list(x), Nindex, list(drop=drop)))
+}
+
+replace_by_Nindex <- function(x, Nindex, value)
+{
+ Nindex <- .Nindex2subscripts(Nindex, x)
+ do.call(`[<-`, c(list(x), Nindex, list(value=value)))
+}
+
+Nindex_as_string <- function(Nindex, dimnames=NULL)
+{
+ stopifnot(is.list(Nindex))
+ missing_idx <- which(S4Vectors:::sapply_isNULL(Nindex))
+ Nindex[missing_idx] <- ""
+ s <- as.character(Nindex)
+ RangeNSBS_idx <- which(vapply(Nindex, is, logical(1), "RangeNSBS"))
+ s[RangeNSBS_idx] <- lapply(Nindex[RangeNSBS_idx],
+ function(i) paste0(i at subscript, collapse=":")
+ )
+ if (!is.null(dimnames)) {
+ stopifnot(is.list(dimnames), length(Nindex) == length(dimnames))
+ usename_idx <- which(nzchar(s) &
+ lengths(Nindex) == 1L &
+ lengths(dimnames) != 0L)
+ s[usename_idx] <- mapply(`[`, dimnames[usename_idx],
+ Nindex[usename_idx],
+ SIMPLIFY=FALSE)
+ }
+ paste0(s, collapse=", ")
+}
+
+### Used in HDF5Array!
+### Return the lengths of the subscripts in 'Nindex'. The length of a
+### missing subscript is the length it would have after expansion.
+get_Nindex_lengths <- function(Nindex, dim)
+{
+ stopifnot(is.list(Nindex), length(Nindex) == length(dim))
+ ans <- lengths(Nindex)
+ missing_idx <- which(S4Vectors:::sapply_isNULL(Nindex))
+ ans[missing_idx] <- dim[missing_idx]
+ ans
+}
+
+### 'dimnames' must be NULL or a list of the same length as 'Nindex'.
+### 'along' must be an integer >= 1 and <= length(Nindex).
+get_Nindex_names_along <- function(Nindex, dimnames, along)
+{
+ stopifnot(is.list(Nindex))
+ i <- Nindex[[along]]
+ if (is.null(i))
+ return(dimnames[[along]])
+ names(i)
+}
+
+### Used in HDF5Array!
+### Return an Nindex ("multidimensional subsetting index").
+make_Nindex_from_block_ranges <- function(block_ranges, dim,
+ expand.RangeNSBS=FALSE)
+{
+ stopifnot(is(block_ranges, "Ranges"),
+ is.integer(dim))
+ ndim <- length(dim)
+ block_offsets <- start(block_ranges)
+ block_dim <- width(block_ranges)
+ stopifnot(length(block_ranges) == ndim,
+ all(block_offsets >= 1L),
+ all(block_offsets + block_dim - 1L <= dim))
+ Nindex <- vector(mode="list", length=ndim)
+ is_not_missing <- block_dim < dim
+ if (expand.RangeNSBS) {
+ expand_idx <- which(is_not_missing)
+ } else {
+ is_width1 <- block_dim == 1L
+ expand_idx <- which(is_not_missing & is_width1)
+ RangeNSBS_idx <- which(is_not_missing & !is_width1)
+ Nindex[RangeNSBS_idx] <- lapply(RangeNSBS_idx,
+ function(i) {
+ start <- block_offsets[[i]]
+ end <- start + block_dim[[i]] - 1L
+ upper_bound <- dim[[i]]
+ new2("RangeNSBS", subscript=c(start, end),
+ upper_bound=upper_bound,
+ check=FALSE)
+ }
+ )
+ }
+ Nindex[expand_idx] <- as.list(block_ranges[expand_idx])
+ Nindex
+}
+
+### Convert 'Nindex' to a "linear index".
+### Return the "linear index" as an integer vector if prod(dim) <=
+### .Machine$integer.max, otherwise as a vector of doubles.
+to_linear_index <- function(Nindex, dim)
+{
+ stopifnot(is.list(Nindex), is.integer(dim), length(Nindex) == length(dim))
+ if (prod(dim) <= .Machine$integer.max) {
+ ans <- p <- 1L
+ } else {
+ ans <- p <- 1
+ }
+ for (along in seq_along(Nindex)) {
+ d <- dim[[along]]
+ i <- Nindex[[along]]
+ if (is.null(i))
+ i <- seq_len(d)
+ ans <- rep((i - 1L) * p, each=length(ans)) + ans
+ p <- p * d
+ }
+ ans
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Translate an index into the whole to an index into the parts
+###
+### This is .rowidx2rowkeys() from BSgenome/R/OnDiskLongTable-class.R, copied
+### here and renamed get_part_index() !
+### TODO: Put it somewhere else where it can be shared.
+###
+
+.breakpoints2offsets <- function(breakpoints)
+{
+ breakpoints_len <- length(breakpoints)
+ if (breakpoints_len == 0L)
+ return(integer(0))
+ c(0L, breakpoints[-breakpoints_len])
+}
+
+### breakpoints: integer vector of break points that define the parts.
+### idx: index into the whole as an integer vector.
+### Return a list of 2 integer vectors parallel to 'idx'. The 1st vector
+### contains part numbers and the 2nd vector indices into the parts.
+### In addition, if 'breakpoints' has names (part names) then they are
+### propagated to the 1st vector.
+get_part_index <- function(idx, breakpoints)
+{
+ part_idx <- findInterval(idx, breakpoints + 1L) + 1L
+ names(part_idx) <- names(breakpoints)[part_idx]
+ rel_idx <- idx - .breakpoints2offsets(unname(breakpoints))[part_idx]
+ list(part_idx, rel_idx)
+}
+
+split_part_index <- function(part_index, npart)
+{
+ ans <- rep.int(list(integer(0)), npart)
+ tmp <- split(unname(part_index[[2L]]), part_index[[1L]])
+ ans[as.integer(names(tmp))] <- tmp
+ ans
+}
+
+get_rev_index <- function(part_index)
+{
+ f <- part_index[[1L]]
+ idx <- split(seq_along(f), f)
+ idx <- unlist(idx, use.names=FALSE)
+ rev_idx <- integer(length(idx))
+ rev_idx[idx] <- seq_along(idx)
+ rev_idx
+}
+
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..99845d8
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,19 @@
+.onLoad <- function(libname, pkgname)
+{
+ options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE)
+}
+
+.test <- function()
+{
+ cat("------------------------------------------------------------------\n")
+ cat("Running tests with realization backend set to \"RleArray\" ...\n")
+ setRealizationBackend("RleArray")
+ BiocGenerics:::testPackage("DelayedArray")
+
+ cat("\n")
+ cat("------------------------------------------------------------------\n")
+ cat("Running tests with realization backend set to \"HDF5Array\" ...\n")
+ setRealizationBackend("HDF5Array")
+ BiocGenerics:::testPackage("DelayedArray")
+}
+
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..9f43889
--- /dev/null
+++ b/TODO
@@ -0,0 +1,19 @@
+- Document global option DelayedArray.block.size
+
+- Add man page and unit tests for statistical methods defined in
+ DelayedArray-stats.R
+
+- Make DelayedArray contain Annotated from S4Vectors.
+
+- Add more examples to the man pages (using the toy dataset).
+
+- Add unit tests for round() and signif() (Math2 group).
+
+- Add vignette.
+
+- Support more matrix- and array-like operations.
+
+- How well supported are DelayedArray of type "character"?
+
+- Add more unit tests.
+
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 3f46516..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,14 +0,0 @@
-r-bioc-delayedarray (0.2.7-1) unstable; urgency=medium
-
- * New upstream version
- * Update Build-Depends: r-bioc-s4vectors (>= 0.14.3)
- * Add missing dependency for autopkgtest: r-cran-runit
- * Add copyright year to debian/copyright
-
- -- Graham Inggs <ginggs at debian.org> Tue, 13 Jun 2017 11:34:56 +0200
-
-r-bioc-delayedarray (0.2.4-1) unstable; urgency=medium
-
- * Initial release (Closes: #863415)
-
- -- Graham Inggs <ginggs at debian.org> Fri, 26 May 2017 17:15:55 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 39f5b93..0000000
--- a/debian/control
+++ /dev/null
@@ -1,31 +0,0 @@
-Source: r-bioc-delayedarray
-Section: gnu-r
-Priority: optional
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Graham Inggs <ginggs at debian.org>
-Build-Depends: debhelper (>= 10),
- dh-r,
- r-base-dev,
- r-bioc-biocgenerics,
- r-bioc-iranges,
- r-bioc-s4vectors (>= 0.14.3),
- r-cran-matrixstats
-Standards-Version: 3.9.8
-Homepage: https://bioconductor.org/packages/DelayedArray/
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-bioc-delayedarray/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-bioc-delayedarray/trunk/
-
-Package: r-bioc-delayedarray
-Architecture: all
-Depends: ${R:Depends}, ${misc:Depends}, ${shlibs:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: BioConductor delayed operations on array-like objects
- Wrapping an array-like object (typically an on-disk object) in
- a DelayedArray object allows one to perform common array operations
- on it without loading the object in memory. In order to reduce memory
- usage and optimize performance, operations on the object are either
- delayed or executed using a block processing mechanism. Note that this
- also works on in-memory array-like objects like DataFrame objects
- (typically with Rle columns), Matrix objects, and ordinary arrays and
- data frames.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index cfcbbb9..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,106 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: DelayedArray
-Upstream-Contact: Hervé Pagès <hpages at fredhutch.org>
-Source: https://bioconductor.org/packages/DelayedArray/
-
-Files: *
-Copyright: 2017 Hervé Pagès <hpages at fredhutch.org>
-License: Artistic-2.0
-
-Files: debian/*
-Copyright: 2017 Graham Inggs <ginggs at debian.org>
-License: Artistic-2.0
-
-License: Artistic-2.0
- The "Artistic License"
- .
- Preamble
- .
- 1. You may make and give away verbatim copies of the source form of the
- Standard Version of this Package without restriction, provided that
- you duplicate all of the original copyright notices and associated
- disclaimers.
- .
- 2. You may apply bug fixes, portability fixes and other modifications
- derived from the Public Domain or from the Copyright Holder. A
- Package modified in such a way shall still be considered the Standard
- Version.
- .
- 3. You may otherwise modify your copy of this Package in any way,
- provided that you insert a prominent notice in each changed file stating
- how and when you changed that file, and provided that you do at least
- ONE of the following:
- .
- a) place your modifications in the Public Domain or otherwise make them
- Freely Available, such as by posting said modifications to Usenet or
- an equivalent medium, or placing the modifications on a major archive
- site such as uunet.uu.net, or by allowing the Copyright Holder to include
- your modifications in the Standard Version of the Package.
- .
- b) use the modified Package only within your corporation or organization.
- .
- c) rename any non-standard executables so the names do not conflict
- with standard executables, which must also be provided, and provide
- a separate manual page for each non-standard executable that clearly
- documents how it differs from the Standard Version.
- .
- d) make other distribution arrangements with the Copyright Holder.
- .
- 4. You may distribute the programs of this Package in object code or
- executable form, provided that you do at least ONE of the following:
- .
- a) distribute a Standard Version of the executables and library files,
- together with instructions (in the manual page or equivalent) on where
- to get the Standard Version.
- .
- b) accompany the distribution with the machine-readable source of
- the Package with your modifications.
- .
- c) give non-standard executables non-standard names, and clearly
- document the differences in manual pages (or equivalent), together
- with instructions on where to get the Standard Version.
- .
- d) make other distribution arrangements with the Copyright Holder.
- .
- 5. You may charge a reasonable copying fee for any distribution of this
- Package. You may charge any fee you choose for support of this Package.
- You may not charge a fee for this Package itself. However, you may
- distribute this Package in aggregate with other (possibly commercial)
- programs as part of a larger (possibly commercial) software distribution
- provided that you do not advertise this Package as a product of your
- own. You may embed this Package's interpreter within an executable of
- yours (by linking); this shall be construed as a mere form of
- aggregation, provided that the complete Standard Version of the
- interpreter is so embedded.
- .
- 6. The scripts and library files supplied as input to or produced as
- output from the programs of this Package do not automatically fall under
- the copyright of this Package, but belong to whoever generated them, and
- may be sold commercially, and may be aggregated with this Package. If
- such scripts or library files are aggregated with this Package via the
- so-called "undump" or "unexec" methods of producing a binary executable
- image, then distribution of such an image shall neither be construed as
- a distribution of this Package nor shall it fall under the restrictions
- of Paragraphs 3 and 4, provided that you do not represent such an
- executable image as a Standard Version of this Package.
- .
- 7. C subroutines (or comparably compiled subroutines in other
- languages) supplied by you and linked into this Package in order to
- emulate subroutines and variables of the language defined by this
- Package shall not be considered part of this Package, but are the
- equivalent of input as in Paragraph 6, provided these subroutines do
- not change the language in any way that would cause it to fail the
- regression tests for the language.
- .
- 8. Aggregation of this Package with a commercial distribution is always
- permitted provided that the use of this Package is embedded; that is,
- when no overt attempt is made to make this Package's interfaces visible
- to the end user of the commercial distribution. Such use shall not be
- construed as a distribution of this Package.
- .
- 9. The name of the Copyright Holder may not be used to endorse or promote
- products derived from this software without specific prior written permission.
- .
- 10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 68d9a36..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/make -f
-
-%:
- dh $@ --buildsystem R
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/tests/bioc b/debian/tests/bioc
deleted file mode 100644
index adb43e4..0000000
--- a/debian/tests/bioc
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh -e
-
-LC_ALL=C R --no-save -e 'BiocGenerics:::testPackage("DelayedArray")'
diff --git a/debian/tests/control b/debian/tests/control
deleted file mode 100644
index 4277d11..0000000
--- a/debian/tests/control
+++ /dev/null
@@ -1,3 +0,0 @@
-Tests: bioc
-Depends: @, r-cran-runit, r-bioc-biocgenerics, r-bioc-genefilter
-Restrictions: allow-stderr
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index e04123b..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=4
-opts=downloadurlmangle=s?^(.*)\.\.?https:$1packages/release/bioc? \
-https://bioconductor.org/packages/release/bioc/html/DelayedArray.html .*/DelayedArray_(.*).tar.gz
diff --git a/inst/unitTests/test_ArrayBlocks-class.R b/inst/unitTests/test_ArrayBlocks-class.R
new file mode 100644
index 0000000..a367281
--- /dev/null
+++ b/inst/unitTests/test_ArrayBlocks-class.R
@@ -0,0 +1,71 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+test_split_array_in_blocks <- function()
+{
+ split_array_in_blocks <- DelayedArray:::split_array_in_blocks
+ unsplit_array_from_blocks <- DelayedArray:::unsplit_array_from_blocks
+
+ a1 <- array(1:300, c(3, 10, 2, 5))
+ A1 <- realize(a1)
+
+ for (max_block_len in c(1:7, 29:31, 39:40, 59:60, 119:120)) {
+ subarrays <- split_array_in_blocks(a1, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, a1)
+ checkIdentical(a1, current)
+
+ subarrays <- split_array_in_blocks(A1, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, A1)
+ checkIdentical(a1, current)
+ }
+}
+
+test_split_matrix_in_blocks <- function()
+{
+ split_array_in_blocks <- DelayedArray:::split_array_in_blocks
+ unsplit_array_from_blocks <- DelayedArray:::unsplit_array_from_blocks
+
+ a1 <- array(1:300, c(3, 10, 2, 5))
+ A1 <- realize(a1)
+
+ m1 <- a1[2, c(9, 3:7), 2, -4]
+ M1a <- drop(A1[2, c(9, 3:7), 2, -4])
+ checkIdentical(m1, as.matrix(M1a))
+
+ M1b <- realize(m1)
+ checkIdentical(m1, as.matrix(M1b))
+
+ tm1 <- t(m1)
+ tM1a <- t(M1a)
+ checkIdentical(tm1, as.matrix(tM1a))
+
+ tM1b <- t(M1b)
+ checkIdentical(tm1, as.matrix(tM1b))
+
+ for (max_block_len in seq_len(length(m1) * 2L)) {
+ subarrays <- split_array_in_blocks(m1, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, m1)
+ checkIdentical(m1, current)
+
+ subarrays <- split_array_in_blocks(M1a, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, M1a)
+ checkIdentical(m1, current)
+
+ subarrays <- split_array_in_blocks(M1b, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, M1b)
+ checkIdentical(m1, current)
+
+ subarrays <- split_array_in_blocks(tm1, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, tm1)
+ checkIdentical(tm1, current)
+
+ subarrays <- split_array_in_blocks(tM1a, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, tM1a)
+ checkIdentical(tm1, current)
+
+ subarrays <- split_array_in_blocks(tM1b, max_block_len)
+ current <- unsplit_array_from_blocks(subarrays, tM1b)
+ checkIdentical(tm1, current)
+ }
+}
+
diff --git a/inst/unitTests/test_DelayedArray-class.R b/inst/unitTests/test_DelayedArray-class.R
new file mode 100644
index 0000000..cea6bfb
--- /dev/null
+++ b/inst/unitTests/test_DelayedArray-class.R
@@ -0,0 +1,290 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+test_DelayedArray_constructor <- function()
+{
+ check_DelayedMatrix <- function(m, M) {
+ checkTrue(is(M, "DelayedMatrix"))
+ checkTrue(validObject(M, complete=TRUE))
+ checkIdentical(typeof(m), type(M))
+ checkIdentical(typeof(m), type(M[ , -2]))
+ checkIdentical(typeof(m), type(M[ , 0]))
+ checkIdentical(typeof(m), type(M[0 , ]))
+ checkIdentical(dim(m), dim(M))
+ checkIdentical(rownames(m), rownames(M))
+ checkIdentical(colnames(m), colnames(M))
+ }
+
+ ## 6-col seed
+
+ DF1 <- DataFrame(aa=1:5,
+ bb=letters[1:5],
+ cc=Rle(c(-0.25, 7.1), 3:2),
+ dd=factor(c("E", "B", "A", "A", "B"), levels=LETTERS),
+ ee=Rle(factor(c("d", "b"), levels=letters[1:5]), 2:3),
+ ff=c(FALSE, TRUE, TRUE, FALSE, FALSE))
+
+ df1 <- as.data.frame(DF1)
+ m1 <- as.matrix(df1) # character matrix
+
+ M1a <- DelayedArray(m1) # matrix seed
+ check_DelayedMatrix(m1, M1a)
+ checkIdentical(m1, as.matrix(M1a))
+ checkIdentical(m1, as.array(M1a))
+
+ M1b <- DelayedArray(df1) # data.frame seed
+ rownames(M1b) <- NULL
+ check_DelayedMatrix(m1, M1b)
+
+ M1c <- DelayedArray(DF1) # DataFrame seed
+ check_DelayedMatrix(m1, M1c)
+
+ ## 5-col seed
+
+ DF2 <- DF1[ , -2, drop=FALSE]
+ df2 <- as.data.frame(DF2)
+ m2 <- as.matrix(df2) # character matrix
+
+ M2a <- DelayedArray(m2) # matrix seed
+ check_DelayedMatrix(m2, M2a)
+ checkIdentical(m2, as.matrix(M2a))
+ checkIdentical(m2, as.array(M2a))
+
+ M2b <- DelayedArray(df2) # data.frame seed
+ rownames(M2b) <- NULL
+ check_DelayedMatrix(m2, M2b)
+
+ M2c <- DelayedArray(DF2) # DataFrame seed
+ check_DelayedMatrix(m2, M2c)
+
+ ## 4-col seed
+
+ DF3 <- DF1[ , -c(2, 4), drop=FALSE]
+ df3 <- as.data.frame(DF3)
+ m3 <- as.matrix(df3) # character matrix
+
+ M3a <- DelayedArray(m3) # matrix seed
+ check_DelayedMatrix(m3, M3a)
+ checkIdentical(m3, as.matrix(M3a))
+ checkIdentical(m3, as.array(M3a))
+
+ M3b <- DelayedArray(df3) # data.frame seed
+ rownames(M3b) <- NULL
+ check_DelayedMatrix(m3, M3b)
+
+ M3c <- DelayedArray(DF3) # DataFrame seed
+ check_DelayedMatrix(m3, M3c)
+
+ ## 3-col seed
+
+ DF4 <- DF1[ , -c(2, 4, 5), drop=FALSE]
+ df4 <- as.data.frame(DF4)
+ m4 <- as.matrix(df4) # double matrix
+
+ M4a <- DelayedArray(m4) # matrix seed
+ check_DelayedMatrix(m4, M4a)
+ checkIdentical(m4, as.matrix(M4a))
+ checkIdentical(m4, as.array(M4a))
+
+ M4b <- DelayedArray(df4) # data.frame seed
+ rownames(M4b) <- NULL
+ check_DelayedMatrix(m4, M4b)
+ checkIdentical(m4, as.matrix(M4b))
+ checkIdentical(m4, as.array(M4b))
+
+ M4c <- DelayedArray(DF4) # DataFrame seed
+ check_DelayedMatrix(m4, M4c)
+ checkIdentical(m4, as.matrix(M4c))
+ checkIdentical(m4, as.array(M4c))
+
+ ## 2-col seed
+
+ DF5 <- DF1[ , c(1, 6), drop=FALSE]
+ df5 <- as.data.frame(DF5)
+ m5 <- as.matrix(df5) # integer matrix
+
+ M5a <- DelayedArray(m5) # matrix seed
+ check_DelayedMatrix(m5, M5a)
+ checkIdentical(m5, as.matrix(M5a))
+ checkIdentical(m5, as.array(M5a))
+
+ M5b <- DelayedArray(df5) # data.frame seed
+ rownames(M5b) <- NULL
+ check_DelayedMatrix(m5, M5b)
+ checkIdentical(m5, as.matrix(M5b))
+ checkIdentical(m5, as.array(M5b))
+
+ M5c <- DelayedArray(DF5) # DataFrame seed
+ check_DelayedMatrix(m5, M5c)
+ checkIdentical(m5, as.matrix(M5c))
+ checkIdentical(m5, as.array(M5c))
+
+ ## 1-col seed
+
+ DF6 <- DF1[ , 6, drop=FALSE]
+ df6 <- as.data.frame(DF6)
+ m6 <- as.matrix(df6) # logical matrix
+
+ M6a <- DelayedArray(m6) # matrix seed
+ check_DelayedMatrix(m6, M6a)
+ checkIdentical(m6, as.matrix(M6a))
+ checkIdentical(m6, as.array(M6a))
+
+ M6b <- DelayedArray(df6) # data.frame seed
+ rownames(M6b) <- NULL
+ check_DelayedMatrix(m6, M6b)
+ checkIdentical(m6, as.matrix(M6b))
+ checkIdentical(m6, as.array(M6b))
+
+ M6c <- DelayedArray(DF6) # DataFrame seed
+ check_DelayedMatrix(m6, M6c)
+ checkIdentical(m6, as.matrix(M6c))
+ checkIdentical(m6, as.array(M6c))
+}
+
+test_DelayedArray_subsetting <- function()
+{
+ a <- array(runif(78000), dim=c(600, 26, 5),
+ dimnames=list(NULL, LETTERS, letters[1:5]))
+ A <- realize(a)
+ checkTrue(is(A, "DelayedArray"))
+ checkTrue(validObject(A, complete=TRUE))
+
+ checkIdentical(A, A[ , , ])
+ checkIdentical(a, as.array(A[ , , ]))
+ checkException(A[ , 27, ], silent=TRUE)
+ checkException(A[ , , "f"], silent=TRUE)
+
+ target <- a[0, , ]
+ checkIdentical(target, as.array(A[0, , ]))
+ checkIdentical(target, as.array(A[integer(0), , ]))
+ checkIdentical(target, as.array(A[NULL, , ]))
+
+ target <- a[ , 0, ]
+ checkIdentical(target, as.array(A[ , 0, ]))
+ checkIdentical(target, as.array(A[ , integer(0), ]))
+ checkIdentical(target, as.array(A[ , NULL, ]))
+
+ target <- a[ , , 0]
+ checkIdentical(target, as.array(A[ , , 0]))
+ checkIdentical(target, as.array(A[ , , integer(0)]))
+ checkIdentical(target, as.array(A[ , , NULL]))
+
+ target <- a[ , 0, 0]
+ checkIdentical(target, as.array(A[ , 0, 0]))
+ checkIdentical(target, as.array(A[ , integer(0), integer(0)]))
+ checkIdentical(target, as.array(A[ , NULL, NULL]))
+
+ target <- a[0, , 0]
+ checkIdentical(target, as.array(A[0, , 0]))
+ checkIdentical(target, as.array(A[integer(0), , integer(0)]))
+ checkIdentical(target, as.array(A[NULL, , NULL]))
+
+ target <- a[0, 0, ]
+ checkIdentical(target, as.array(A[0, 0, ]))
+ checkIdentical(target, as.array(A[integer(0), integer(0), ]))
+ checkIdentical(target, as.array(A[NULL, NULL, ]))
+
+ target <- a[0, 0, 0]
+ checkIdentical(target, as.array(A[0, 0, 0]))
+ checkIdentical(target, as.array(A[integer(0), integer(0), integer(0)]))
+ checkIdentical(target, as.array(A[NULL, NULL, NULL]))
+
+ i <- c(FALSE, TRUE)
+ target <- a[i, , ]
+ checkIdentical(target, as.array(A[i, , ]))
+ target <- a[i, 0, ]
+ checkIdentical(target, as.array(A[i, 0, ]))
+ checkIdentical(target, as.array(A[i, integer(0), ]))
+ checkIdentical(target, as.array(A[i, NULL, ]))
+ i <- c(FALSE, TRUE, TRUE, FALSE, FALSE)
+ target <- a[i, , ]
+ checkIdentical(target, as.array(A[i, , ]))
+ target <- a[i, 0, ]
+ checkIdentical(target, as.array(A[i, 0, ]))
+ checkIdentical(target, as.array(A[i, integer(0), ]))
+ checkIdentical(target, as.array(A[i, NULL, ]))
+
+ k <- c(TRUE, FALSE)
+ target <- a[ , , k]
+ checkIdentical(target, as.array(A[ , , k]))
+ target <- a[0, , k]
+ checkIdentical(target, as.array(A[0, , k]))
+ checkIdentical(target, as.array(A[integer(0), , k]))
+ checkIdentical(target, as.array(A[NULL, , k]))
+ target <- a[i, , k]
+ checkIdentical(target, as.array(A[i, , k]))
+ target <- a[i, 0, k]
+ checkIdentical(target, as.array(A[i, 0, k]))
+ checkIdentical(target, as.array(A[i, integer(0), k]))
+ checkIdentical(target, as.array(A[i, NULL, k]))
+
+ j <- c(20:5, 11:22)
+ target <- a[0, j, ]
+ checkIdentical(target, as.array(A[0, j, ]))
+ checkIdentical(target, as.array(A[integer(0), j, ]))
+ checkIdentical(target, as.array(A[NULL, j, ]))
+ target <- a[0, j, 0]
+ checkIdentical(target, as.array(A[0, j, 0]))
+ checkIdentical(target, as.array(A[integer(0), j, integer(0)]))
+ checkIdentical(target, as.array(A[NULL, j, NULL]))
+ target <- a[99:9, j, ]
+ checkIdentical(target, as.array(A[99:9, j, ]))
+
+ k <- c("e", "b", "b", "d", "e", "c")
+ target <- a[-3, j, k]
+ checkIdentical(target, as.array(A[-3, j, k]))
+ target <- a[-3, -j, k]
+ checkIdentical(target, as.array(A[-3, -j, k]))
+
+ target <- a[-3, -j, ]
+ checkIdentical(target, as.array(A[-3, -j, ]))
+ target <- a[-3, -j, -555555]
+ checkIdentical(target, as.array(A[-3, -j, -555555]))
+
+ target <- a[99:9, j, k]
+ checkIdentical(target, as.array(A[99:9, j, k]))
+ i <- c(150:111, 88:90, 75:90)
+ target <- a[i, j, k]
+ checkIdentical(target, as.array(A[i, j, k]))
+
+ target1 <- a[99, j, k]
+ checkIdentical(target1, as.matrix(A[99, j, k]))
+ B1 <- drop(A[99, j, k])
+ checkIdentical(target1, as.array(B1))
+ target <- target1[15:8, c("c", "b")]
+ checkIdentical(target, as.array(B1[15:8, c("c", "b")]))
+ target <- target1[0, ]
+ checkIdentical(target, as.array(B1[0, ]))
+ target <- target1[ , 0]
+ checkIdentical(target, as.array(B1[ , 0]))
+ target <- target1[0, 0]
+ checkIdentical(target, as.array(B1[0, 0]))
+
+ target2 <- a[i, 22, k]
+ checkIdentical(target2, as.matrix(A[i, 22, k]))
+ B2 <- drop(A[i, 22, k])
+ checkIdentical(target2, as.array(B2))
+ target <- target2[15:8, c("c", "b")]
+ checkIdentical(target, as.array(B2[15:8, c("c", "b")]))
+ target <- target2[0, ]
+ checkIdentical(target, as.array(B2[0, ]))
+ target <- target2[ , 0]
+ checkIdentical(target, as.array(B2[ , 0]))
+ target <- target2[0, 0]
+ checkIdentical(target, as.array(B2[0, 0]))
+
+ target3 <- a[i, j, 5]
+ checkIdentical(target3, as.matrix(A[i, j, 5]))
+ B3 <- drop(A[i, j, 5])
+ checkIdentical(target3, as.array(B3))
+ target <- target3[15:8, LETTERS[18:12]]
+ checkIdentical(target, as.array(B3[15:8, LETTERS[18:12]]))
+ target <- target3[0, ]
+ checkIdentical(target, as.array(B3[0, ]))
+ target <- target3[ , 0]
+ checkIdentical(target, as.array(B3[ , 0]))
+ target <- target3[0, 0]
+ checkIdentical(target, as.array(B3[0, 0]))
+}
+
diff --git a/inst/unitTests/test_DelayedArray-utils.R b/inst/unitTests/test_DelayedArray-utils.R
new file mode 100644
index 0000000..ecc3a22
--- /dev/null
+++ b/inst/unitTests/test_DelayedArray-utils.R
@@ -0,0 +1,297 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+DEFAULT_BLOCK_SIZE <- DelayedArray:::DEFAULT_BLOCK_SIZE
+
+Arith_members <- c("+", "-", "*", "/", "^", "%%", "%/%")
+Compare_members <- c("==", "!=", "<=", ">=", "<", ">")
+Logic_members <- c("&", "|") # currently untested
+
+a1 <- array(sample(5L, 150, replace=TRUE), c(5, 10, 3)) # integer array
+a2 <- a1 + runif(150) - 0.5 # numeric array
+
+block_sizes1 <- c(12L, 20L, 50L, 15000L)
+block_sizes2 <- 2L * block_sizes1
+
+test_DelayedArray_unary_ops <- function()
+{
+ a <- 2:-2 / (a1 - 3)
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+ for (.Generic in c("is.na", "is.finite", "is.infinite", "is.nan")) {
+ GENERIC <- match.fun(.Generic)
+ checkIdentical(GENERIC(a), as.array(GENERIC(A)))
+ }
+
+ a <- array(sample(c(LETTERS, letters), 60, replace=TRUE), 5:3)
+ A <- realize(a)
+ ## For some obscure reason, the tests below fail in the context of
+ ## 'DelayedArray:::.test()' or 'R CMD check'.
+ ## TODO: Investigate this.
+ #for (.Generic in c("nchar", "tolower", "toupper")) {
+ # GENERIC <- match.fun(.Generic)
+ # checkIdentical(GENERIC(a), as.array(GENERIC(A)))
+ #}
+}
+
+test_DelayedArray_Math_ans_Arith <- function()
+{
+ toto1 <- function(a) { 100 / floor(abs((5 * log(a + 0.2) - 1)^3)) }
+ toto2 <- function(a) { 100L + (5L * (a - 2L)) %% 7L }
+
+ ## with an integer array
+ a <- a1
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+ ## Not sure what's going on but it seems that this call to checkIdentical()
+ ## crashes the RUnit package but only when the tests are run by
+ ## 'R CMD check'.
+ #checkIdentical(toto2(a), as.array(toto2(A)))
+
+ ## with a numeric array
+ a <- a2
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+ checkIdentical(toto1(a), as.array(toto1(A)))
+ checkIdentical(toto2(a), as.array(toto2(A)))
+ checkIdentical(toto2(toto1(a)), as.array(toto2(toto1(A))))
+ checkIdentical(toto1(toto2(a)), as.array(toto1(toto2(A))))
+
+ a <- a[ , 10:4, -2]
+ A <- A[ , 10:4, -2]
+ checkIdentical(toto1(a), as.array(toto1(A)))
+ checkIdentical(toto2(a), as.array(toto2(A)))
+ checkIdentical(toto2(toto1(a)), as.array(toto2(toto1(A))))
+ checkIdentical(toto1(toto2(a)), as.array(toto1(toto2(A))))
+
+ ## with a numeric matrix
+ m <- a[ , , 2]
+ M <- realize(m)
+ checkIdentical(toto1(m), as.matrix(toto1(M)))
+ checkIdentical(t(toto1(m)), as.matrix(toto1(t(M))))
+ checkIdentical(t(toto1(m)), as.matrix(t(toto1(M))))
+ M <- drop(A[ , , 2])
+ checkIdentical(toto1(m), as.matrix(toto1(M)))
+ checkIdentical(t(toto1(m)), as.matrix(toto1(t(M))))
+ checkIdentical(t(toto1(m)), as.matrix(t(toto1(M))))
+}
+
+test_DelayedArray_Ops_with_left_or_right_vector <- function()
+{
+ test_delayed_Ops_on_array <- function(.Generic, a, A, m, M) {
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ GENERIC <- match.fun(.Generic)
+
+ target_current <- list(
+ list(GENERIC(a, m[ , 1]), GENERIC(A, M[ , 1])),
+ list(GENERIC(m[ , 1], a), GENERIC(M[ , 1], A)),
+
+ list(GENERIC(a, a[ , 1, 1]), GENERIC(A, A[ , 1, 1])),
+ list(GENERIC(a[ , 1, 1], a), GENERIC(A[ , 1, 1], A)),
+
+ list(GENERIC(a, a[[1]]), GENERIC(A, A[[1]])),
+ list(GENERIC(a[[1]], a), GENERIC(A[[1]], A))
+ )
+ for (i in seq_along(target_current)) {
+ target <- target_current[[i]][[1L]]
+ current <- target_current[[i]][[2L]]
+ checkIdentical(target, as.array(current))
+ checkIdentical(target[5:3, , -2], as.array(current[5:3, , -2]))
+ checkIdentical(target[0, 0, 0], as.array(current[0, 0, 0]))
+ checkIdentical(target[0, 0, -2], as.array(current[0, 0, -2]))
+ checkIdentical(target[ , 0, ], as.array(current[ , 0, ]))
+ for (block_size in block_sizes2) {
+ options(DelayedArray.block.size=block_size)
+ checkEquals(sum(target, na.rm=TRUE), sum(current, na.rm=TRUE))
+ }
+ }
+ }
+
+ a <- a2
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+ m <- a[ , , 2]
+ M <- realize(m)
+
+ ## "Logic" members currently untested.
+ for (.Generic in c(Arith_members, Compare_members))
+ test_delayed_Ops_on_array(.Generic, a, A, m, M)
+
+ ## Takes too long and probably not that useful.
+ #M <- drop(A[ , , 2])
+ #for (.Generic in c(Arith_members, Compare_members))
+ # test_delayed_Ops_on_array(.Generic, a, A, m, M)
+}
+
+test_DelayedArray_Ops_COMBINE_seeds <- function()
+{
+ ## comparing 2 DelayedArray objects
+ A1 <- realize(a1)
+ A2 <- realize(a2)
+ a3 <- array(sample(5L, 150, replace=TRUE), c(5, 10, 3))
+ a3[2, 9, 2] <- NA # same as a3[[92]] <- NA
+ A3 <- realize(a3)
+
+ ## "Logic" members currently untested.
+ for (.Generic in c(Arith_members, Compare_members)) {
+ GENERIC <- match.fun(.Generic)
+ target1 <- GENERIC(a1, a2)
+ target2 <- GENERIC(a2, a1)
+ target3 <- GENERIC(a1, a3)
+ checkIdentical(target1, as.array(GENERIC(A1, A2)))
+ checkIdentical(target2, as.array(GENERIC(A2, A1)))
+ checkIdentical(target3, as.array(GENERIC(A1, A3)))
+ }
+}
+
+test_DelayedArray_anyNA <- function()
+{
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ DelayedArray_block_anyNA <- DelayedArray:::.DelayedArray_block_anyNA
+
+ A1 <- realize(a1)
+ a <- a1
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+
+ for (block_size in block_sizes2) {
+ options(DelayedArray.block.size=block_size)
+ checkIdentical(FALSE, anyNA(A1))
+ checkIdentical(FALSE, DelayedArray_block_anyNA(a1))
+ checkIdentical(TRUE, anyNA(A))
+ checkIdentical(TRUE, DelayedArray_block_anyNA(a))
+ }
+}
+
+test_DelayedArray_which <- function()
+{
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ DelayedArray_block_which <- DelayedArray:::.DelayedArray_block_which
+
+ a <- a1 == 1L
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+
+ target <- which(a)
+ for (block_size in block_sizes2) {
+ options(DelayedArray.block.size=block_size)
+ checkIdentical(target, which(A))
+ checkIdentical(target, DelayedArray_block_which(a))
+ }
+
+ a <- a1 == -1L # all FALSE
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+
+ target <- integer(0)
+ for (block_size in block_sizes2) {
+ options(DelayedArray.block.size=block_size)
+ checkIdentical(target, which(A))
+ checkIdentical(target, DelayedArray_block_which(a))
+ }
+}
+
+test_DelayedArray_Summary <- function()
+{
+ test_Summary <- function(.Generic, a, block_sizes) {
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ DelayedArray_block_Summary <- DelayedArray:::.DelayedArray_block_Summary
+
+ GENERIC <- match.fun(.Generic)
+ target1 <- GENERIC(a)
+ target2 <- GENERIC(a, na.rm=TRUE)
+ A <- realize(a)
+ for (block_size in block_sizes) {
+ options(DelayedArray.block.size=block_size)
+ checkIdentical(target1, GENERIC(A))
+ checkIdentical(target1, GENERIC(t(A)))
+ checkIdentical(target1, DelayedArray_block_Summary(.Generic, a))
+ checkIdentical(target2, GENERIC(A, na.rm=TRUE))
+ checkIdentical(target2, GENERIC(t(A), na.rm=TRUE))
+ checkIdentical(target2, DelayedArray_block_Summary(.Generic, a,
+ na.rm=TRUE))
+ }
+ }
+
+ ## on an integer array
+ a <- a1
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ #for (.Generic in c("max", "min", "range", "sum", "prod")) {
+ for (.Generic in c("max", "min", "range", "sum"))
+ test_Summary(.Generic, a, block_sizes1)
+
+ ## on a numeric array
+ a <- a2
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ a[2, 10, 2] <- Inf # same as a[[97]] <- Inf
+ for (.Generic in c("max", "min", "range", "sum", "prod"))
+ test_Summary(.Generic, a, block_sizes2)
+
+ ## on a logical array
+ a <- array(c(rep(NA, 62), rep(TRUE, 87), FALSE), c(5, 10, 3))
+ for (.Generic in c("any", "all"))
+ test_Summary(.Generic, a, block_sizes1)
+}
+
+test_DelayedArray_mean <- function()
+{
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+
+ ## on a numeric array
+ a <- a2
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ A <- realize(a)
+
+ target1 <- mean(a)
+ target2 <- mean(a, na.rm=TRUE)
+ target3 <- mean(a[ , 10:4, -2])
+ for (block_size in block_sizes2) {
+ options(DelayedArray.block.size=block_size)
+ checkIdentical(target1, mean(A))
+ checkIdentical(target1, mean(t(A)))
+ checkIdentical(target2, mean(A, na.rm=TRUE))
+ checkIdentical(target2, mean(t(A), na.rm=TRUE))
+ checkIdentical(target3, mean(A[ , 10:4, -2]))
+ checkIdentical(target3, mean(t(A[ , 10:4, -2])))
+ }
+}
+
+test_DelayedArray_apply <- function()
+{
+ test_apply <- function(a) {
+ A <- realize(a)
+ for (MARGIN in seq_along(dim(a))) {
+ checkIdentical(apply(a, MARGIN, dim),
+ apply(A, MARGIN, dim))
+ checkIdentical(apply(a, MARGIN, sum),
+ apply(A, MARGIN, sum))
+ checkIdentical(apply(a, MARGIN, sum, na.rm=TRUE),
+ apply(A, MARGIN, sum, na.rm=TRUE))
+ ## row/colSums and row/colMeans don't work yet in that case.
+ if (dim(A)[[MARGIN]] == 0L && length(dim(A)) >= 3L)
+ next
+ checkIdentical(apply(a, MARGIN, rowSums),
+ apply(A, MARGIN, rowSums))
+ checkIdentical(apply(a, MARGIN, rowSums, na.rm=TRUE),
+ apply(A, MARGIN, rowSums, na.rm=TRUE))
+ checkIdentical(apply(a, MARGIN, colMeans),
+ apply(A, MARGIN, colMeans))
+ checkIdentical(apply(a, MARGIN, colMeans, na.rm=TRUE),
+ apply(A, MARGIN, colMeans, na.rm=TRUE))
+ }
+ }
+
+ a <- a1
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+ test_apply(a)
+ test_apply(a[ , , 0])
+
+ dimnames(a) <- list(NULL, NULL, LETTERS[1:3])
+ test_apply(a)
+ test_apply(a[ , , 0])
+
+ dimnames(a) <- list(NULL, letters[1:10], LETTERS[1:3])
+ test_apply(a)
+ test_apply(a[ , , 0])
+}
+
diff --git a/inst/unitTests/test_DelayedMatrix-stats.R b/inst/unitTests/test_DelayedMatrix-stats.R
new file mode 100644
index 0000000..ac7b078
--- /dev/null
+++ b/inst/unitTests/test_DelayedMatrix-stats.R
@@ -0,0 +1,66 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+DEFAULT_BLOCK_SIZE <- DelayedArray:::DEFAULT_BLOCK_SIZE
+
+a1 <- array(sample(5L, 150, replace=TRUE), c(5, 10, 3)) # integer array
+a2 <- a1 + runif(150) - 0.5 # numeric array
+m2 <- matrix(runif(60), ncol=6) # numeric matrix
+
+block_sizes1 <- c(12L, 20L, 50L, 15000L)
+block_sizes2 <- 2L * block_sizes1
+
+test_DelayedMatrix_row_col_summarization <- function()
+{
+ test_row_col_summary <- function(FUN, m, M, block_sizes) {
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ FUN <- match.fun(FUN)
+
+ target1 <- FUN(m)
+ target2 <- FUN(m, na.rm=TRUE)
+ target3 <- FUN(t(m))
+ target4 <- FUN(t(m), na.rm=TRUE)
+ for (block_size in block_sizes) {
+ options(DelayedArray.block.size=block_size)
+ current <- FUN(M)
+ checkEquals(target1, current)
+ checkIdentical(typeof(target1), typeof(current))
+ current <- FUN(M, na.rm=TRUE)
+ checkEquals(target2, current)
+ checkIdentical(typeof(target2), typeof(current))
+ current <- FUN(t(M))
+ checkEquals(target3, current)
+ checkIdentical(typeof(target3), typeof(current))
+ current <- FUN(t(M), na.rm=TRUE)
+ checkEquals(target4, current)
+ checkIdentical(typeof(target4), typeof(current))
+ }
+ }
+
+ FUNS <- c("rowSums", "colSums", "rowMeans", "colMeans",
+ "rowMaxs", "colMaxs", "rowMins", "colMins",
+ "rowRanges", "colRanges")
+
+ ## on an integer matrix
+ m <- a1[ , , 1]
+ A1 <- realize(a1)
+ M <- drop(A1[ , , 1])
+ for (FUN in FUNS) {
+ test_row_col_summary(FUN, m, M, block_sizes2)
+ test_row_col_summary(FUN, m[ , 0], M[ , 0], block_sizes2)
+ }
+
+ ## on a numeric matrix
+ m <- m2
+ m[2, 4] <- NA
+ m[5, 4] <- Inf
+ m[6, 3] <- -Inf
+ M <- realize(m)
+ for (FUN in FUNS)
+ test_row_col_summary(FUN, m, M, block_sizes2)
+
+ library(genefilter)
+ ## Note that the matrixStats package also defines a rowVars() function.
+ test_row_col_summary(genefilter::rowVars, m, M, block_sizes2)
+}
+
diff --git a/inst/unitTests/test_DelayedMatrix-utils.R b/inst/unitTests/test_DelayedMatrix-utils.R
new file mode 100644
index 0000000..d087adc
--- /dev/null
+++ b/inst/unitTests/test_DelayedMatrix-utils.R
@@ -0,0 +1,106 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+DEFAULT_BLOCK_SIZE <- DelayedArray:::DEFAULT_BLOCK_SIZE
+
+Arith_members <- c("+", "-", "*", "/", "^", "%%", "%/%")
+Compare_members <- c("==", "!=", "<=", ">=", "<", ">")
+Logic_members <- c("&", "|") # currently untested
+
+a1 <- array(sample(5L, 150, replace=TRUE), c(5, 10, 3)) # integer array
+a2 <- a1 + runif(150) - 0.5 # numeric array
+m2 <- matrix(runif(60), ncol=6) # numeric matrix
+
+block_sizes1 <- c(12L, 20L, 50L, 15000L)
+block_sizes2 <- 2L * block_sizes1
+
+test_DelayedMatrix_Ops <- function()
+{
+ test_delayed_Ops_on_matrix <- function(.Generic, m, M) {
+ GENERIC <- match.fun(.Generic)
+
+ target_current <- list(
+ list(GENERIC(m, m[ , 1]), GENERIC(M, M[ , 1])),
+ list(GENERIC(m[ , 2], m), GENERIC(M[ , 2], M))
+ )
+ for (i in seq_along(target_current)) {
+ target <- target_current[[i]][[1L]]
+ current <- target_current[[i]][[2L]]
+ checkIdentical(target, as.matrix(current))
+ checkIdentical(t(target), as.matrix(t(current)))
+ checkIdentical(target[-2, 8:5], as.matrix(current[-2, 8:5]))
+ checkIdentical(t(target[-2, 8:5]), as.matrix(t(current[-2, 8:5])))
+ checkIdentical(target[-2, 0], as.matrix(current[-2, 0]))
+ checkIdentical(t(target[-2, 0]), as.matrix(t(current[-2, 0])))
+ checkIdentical(target[0, ], as.matrix(current[0, ]))
+ checkIdentical(t(target[0, ]), as.matrix(t(current[0, ])))
+ }
+
+ target_current <- list(
+ list(GENERIC(t(m), 8:-1), GENERIC(t(M), 8:-1)),
+ list(GENERIC(8:-1, t(m)), GENERIC(8:-1, t(M))),
+
+ list(GENERIC(t(m), m[1 , ]), GENERIC(t(M), M[1 , ])),
+ list(GENERIC(m[2 , ], t(m)), GENERIC(M[2 , ], t(M))),
+
+ list(GENERIC(t(m), m[1 , 6:10]), GENERIC(t(M), M[1 , 6:10])),
+ list(GENERIC(m[2 , 8:7], t(m)), GENERIC(M[2 , 8:7], t(M)))
+ )
+ for (i in seq_along(target_current)) {
+ target <- target_current[[i]][[1L]]
+ current <- target_current[[i]][[2L]]
+ checkIdentical(target, as.matrix(current))
+ checkIdentical(target[1:3 , ], as.matrix(current[1:3 , ]))
+ checkIdentical(target[ , 1:3], as.matrix(current[ , 1:3]))
+ checkIdentical(t(target), as.matrix(t(current)))
+ checkIdentical(t(target)[1:3 , ], as.matrix(t(current)[1:3 , ]))
+ checkIdentical(t(target)[ , 1:3], as.matrix(t(current)[ , 1:3]))
+ checkIdentical(target[8:5, -2], as.matrix(current[8:5, -2]))
+ checkIdentical(t(target[8:5, -2]), as.matrix(t(current[8:5, -2])))
+ checkIdentical(target[0, -2], as.matrix(current[0, -2]))
+ checkIdentical(t(target[0, -2]), as.matrix(t(current[0, -2])))
+ checkIdentical(target[ , 0], as.matrix(current[ , 0]))
+ checkIdentical(t(target[ , 0]), as.matrix(t(current[ , 0])))
+ }
+ }
+
+ a <- a2
+ a[2, 9, 2] <- NA # same as a[[92]] <- NA
+
+ toto <- function(x) t((5 * x[ , 1:2] ^ 3 + 1L) * log(x)[, 10:9])[ , -1]
+
+ m <- a[ , , 2]
+ M <- realize(m)
+ checkIdentical(toto(m), as.array(toto(M)))
+ ## "Logic" members currently untested.
+ for (.Generic in c(Arith_members, Compare_members))
+ test_delayed_Ops_on_matrix(.Generic, m, M)
+
+ A <- realize(a)[ , , 2]
+ M <- drop(A)
+ checkIdentical(toto(m), as.array(toto(M)))
+ for (.Generic in c(Arith_members, Compare_members))
+ test_delayed_Ops_on_matrix(.Generic, m, M)
+}
+
+test_DelayedMatrix_mult <- function()
+{
+ m <- m2
+ m[2, 4] <- NA
+ m[5, 4] <- Inf
+ m[6, 3] <- -Inf
+ M <- realize(m)
+
+ Lm <- rbind(rep(1L, 10), rep(c(1L, 0L), 5), rep(-100L, 10))
+ Rm <- rbind(Lm + 7.05, 0.1 * Lm)
+
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ for (block_size in block_sizes2) {
+ options(DelayedArray.block.size=block_size)
+ P <- Lm %*% M
+ checkEquals(Lm %*% m, as.matrix(P))
+ P <- M %*% Rm
+ checkEquals(m %*% Rm, as.matrix(P))
+ }
+}
+
diff --git a/inst/unitTests/test_cbind-methods.R b/inst/unitTests/test_cbind-methods.R
new file mode 100644
index 0000000..49fc807
--- /dev/null
+++ b/inst/unitTests/test_cbind-methods.R
@@ -0,0 +1,132 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+.TEST_matrices <- list(
+ matrix(1:15, nrow=3, ncol=5,
+ dimnames=list(NULL, paste0("M1y", 1:5))),
+ matrix(101:135, nrow=7, ncol=5,
+ dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5))),
+ matrix(1001:1025, nrow=5, ncol=5,
+ dimnames=list(paste0("M3x", 1:5), NULL))
+)
+
+.TEST_arrays <- list(
+ array(1:60, c(3, 5, 4),
+ dimnames=list(NULL, paste0("M1y", 1:5), NULL)),
+ array(101:240, c(7, 5, 4),
+ dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL)),
+ array(10001:10100, c(5, 5, 4),
+ dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
+)
+
+test_DelayedMatrix_rbind_cbind <- function()
+{
+ m1 <- .TEST_matrices[[1]]
+ m2 <- .TEST_matrices[[2]]
+ m3 <- .TEST_matrices[[3]]
+ M1 <- realize(m1)
+ M2 <- realize(m2)
+ M3 <- realize(m3)
+
+ target <- rbind(a=m1, b=m2, c=m3)
+ current <- rbind(a=M1, b=M2, c=M3)
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.matrix(current))
+
+ current <- cbind(a=t(M1), b=t(M2), c=t(M3))
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(t(target), as.matrix(current))
+
+ ## unary form
+
+ target <- rbind(a=m1)
+ current <- rbind(a=M1)
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.matrix(current))
+
+ target <- cbind(a=m1)
+ current <- cbind(a=M1)
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.matrix(current))
+
+ ## with empty matrices
+
+ m1 <- matrix(nrow=0, ncol=3, dimnames=list(NULL, letters[1:3]))
+ m2 <- matrix(1:15, ncol=3, dimnames=list(NULL, LETTERS[1:3]))
+ M1 <- realize(m1)
+ M2 <- realize(m2)
+
+ target <- rbind(a=m1, a=m2)
+ current <- rbind(a=M1, b=M2)
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.matrix(current))
+
+ target <- rbind(a=m2, a=m1)
+ current <- rbind(a=M2, b=M1)
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.matrix(current))
+
+ target <- rbind(a=m1, a=m1)
+ current <- rbind(a=M1, b=M1)
+ checkTrue(is(current, "DelayedMatrix"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.matrix(current))
+}
+
+test_DelayedArray_arbind <- function()
+{
+ TEST_hdf5arrays <- lapply(.TEST_arrays, realize)
+
+ target <- do.call(arbind, .TEST_arrays)
+ current <- do.call(arbind, TEST_hdf5arrays)
+ checkTrue(is(current, "DelayedArray"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.array(current))
+
+ ## For some mysterious reason, the code below fails in the context
+ ## of 'R CMD check' but not when running the tests interactively with
+ ## DelayedArray:::.test().
+ #check_2D_slice <- function(k) {
+ # slices <- lapply(lapply(TEST_hdf5arrays, `[`, , , k), drop)
+ # target_slice <- do.call(rbind, slices)
+ # checkIdentical(as.matrix(target_slice), as.matrix(current[ , , k]))
+ #}
+ #for (k in seq_len(dim(current)[[3L]])) check_2D_slice(k)
+}
+
+test_DelayedArray_acbind <- function()
+{
+ ## transpose the 1st 2 dimensions
+ arrays <- lapply(.TEST_arrays,
+ function(a) {
+ a_dimnames <- dimnames(a)
+ dim(a)[1:2] <- dim(a)[2:1]
+ a_dimnames[1:2] <- a_dimnames[2:1]
+ dimnames(a) <- a_dimnames
+ a
+ })
+ TEST_hdf5arrays <- lapply(arrays, realize)
+
+ target <- do.call(acbind, arrays)
+ current <- do.call(acbind, TEST_hdf5arrays)
+ checkTrue(is(current, "DelayedArray"))
+ checkTrue(validObject(current, complete=TRUE))
+ checkIdentical(target, as.array(current))
+
+ ## For some mysterious reason, the code below fails in the context
+ ## of 'R CMD check' but not when running the tests interactively with
+ ## DelayedArray:::.test().
+ #check_2D_slice <- function(k) {
+ # slices <- lapply(lapply(TEST_hdf5arrays, `[`, , , k), drop)
+ # target_slice <- do.call(cbind, slices)
+ # checkIdentical(as.matrix(target_slice), as.matrix(current[ , , k]))
+ #}
+ #for (k in seq_len(dim(current)[[3L]])) check_2D_slice(k)
+}
+
diff --git a/man/ArrayBlocks-class.Rd b/man/ArrayBlocks-class.Rd
new file mode 100644
index 0000000..5b57a61
--- /dev/null
+++ b/man/ArrayBlocks-class.Rd
@@ -0,0 +1,19 @@
+\name{ArrayBlocks-class}
+\docType{class}
+
+\alias{class:ArrayBlocks}
+\alias{ArrayBlocks-class}
+\alias{ArrayBlocks}
+
+\alias{length,ArrayBlocks-method}
+\alias{getListElement,ArrayBlocks-method}
+\alias{show,ArrayBlocks-method}
+
+\title{ArrayBlocks objects}
+
+\description{
+ ArrayBlocks objects are used internally to support block processing of
+ array-like objects.
+}
+
+\keyword{internal}
diff --git a/man/DelayedArray-class.Rd b/man/DelayedArray-class.Rd
new file mode 100644
index 0000000..c011502
--- /dev/null
+++ b/man/DelayedArray-class.Rd
@@ -0,0 +1,343 @@
+\name{DelayedArray-class}
+\docType{class}
+
+\alias{class:DelayedArray}
+\alias{DelayedArray-class}
+
+\alias{class:DelayedMatrix}
+\alias{DelayedMatrix-class}
+\alias{DelayedMatrix}
+
+\alias{coerce,DelayedArray,DelayedMatrix-method}
+
+\alias{DelayedArray}
+\alias{DelayedArray,ANY-method}
+\alias{DelayedArray,DelayedArray-method}
+
+\alias{seed}
+\alias{seed,DelayedArray-method}
+
+\alias{dim,DelayedArray-method}
+\alias{dim<-,DelayedArray-method}
+\alias{length,DelayedArray-method}
+\alias{isEmpty,DelayedArray-method}
+\alias{dimnames,DelayedArray-method}
+\alias{dimnames<-,DelayedArray-method}
+\alias{names,DelayedArray-method}
+\alias{names<-,DelayedArray-method}
+
+\alias{drop,DelayedArray-method}
+
+\alias{[,DelayedArray-method}
+\alias{t,DelayedArray-method}
+
+\alias{as.array.DelayedArray}
+\alias{as.array,DelayedArray-method}
+\alias{as.matrix.DelayedArray}
+\alias{as.matrix,DelayedArray-method}
+\alias{as.data.frame.DelayedArray}
+\alias{as.data.frame,DelayedArray-method}
+\alias{as.vector.DelayedArray}
+\alias{as.vector,DelayedArray-method}
+\alias{as.logical.DelayedArray}
+\alias{as.logical,DelayedArray-method}
+\alias{as.integer.DelayedArray}
+\alias{as.integer,DelayedArray-method}
+\alias{as.numeric.DelayedArray}
+\alias{as.numeric,DelayedArray-method}
+\alias{as.complex.DelayedArray}
+\alias{as.complex,DelayedArray-method}
+\alias{as.character.DelayedArray}
+\alias{as.character,DelayedArray-method}
+\alias{as.raw.DelayedArray}
+\alias{as.raw,DelayedArray-method}
+
+\alias{coerce,DelayedMatrix,dgCMatrix-method}
+\alias{coerce,DelayedMatrix,sparseMatrix-method}
+
+\alias{type}
+\alias{type,array-method}
+\alias{type,DelayedArray-method}
+
+\alias{[[,DelayedArray-method}
+
+\alias{show,DelayedArray-method}
+
+\alias{c,DelayedArray-method}
+\alias{splitAsList,DelayedArray-method}
+\alias{split.DelayedArray}
+\alias{split,DelayedArray,ANY-method}
+
+% Internal stuff
+\alias{matrixClass}
+\alias{matrixClass,DelayedArray-method}
+
+\alias{subset_seed_as_array}
+\alias{subset_seed_as_array,ANY-method}
+\alias{subset_seed_as_array,array-method}
+\alias{subset_seed_as_array,data.frame-method}
+\alias{subset_seed_as_array,DataFrame-method}
+
+\title{DelayedArray objects}
+
+\description{
+ Wrapping an array-like object (typically an on-disk object) in a
+ DelayedArray object allows one to perform common array operations on it
+ without loading the object in memory. In order to reduce memory usage and
+ optimize performance, operations on the object are either delayed or
+ executed using a block processing mechanism.
+}
+
+\usage{
+DelayedArray(seed) # constructor function
+seed(x) # seed getter
+type(x)
+}
+
+\arguments{
+ \item{seed}{
+ An array-like object.
+ }
+ \item{x}{
+ A DelayedArray object. (Can also be an ordinary array in case of
+ \code{type}.)
+ }
+}
+
+\section{In-memory versus on-disk realization}{
+ To \emph{realize} a DelayedArray object (i.e. to trigger execution of the
+ delayed operations carried by the object and return the result as an
+ ordinary array), call \code{as.array} on it. However this realizes the
+ full object at once \emph{in memory} which could require too much memory
+ if the object is big. A big DelayedArray object is preferrably realized
+ \emph{on disk} e.g. by calling \code{\link[HDF5Array]{writeHDF5Array}} on
+ it (this function is defined in the \pkg{HDF5Array} package) or coercing it
+ to an \link[HDF5Array]{HDF5Array} object with \code{as(x, "HDF5Array")}.
+ Other on-disk backends can be supported. This uses a block-processing
+ strategy so that the full object is not realized at once in memory. Instead
+ the object is processed block by block i.e. the blocks are realized in
+ memory and written to disk one at a time.
+ See \code{?\link[HDF5Array]{writeHDF5Array}} in the \pkg{HDF5Array} package
+ for more information about this.
+}
+
+\section{Accessors}{
+ DelayedArray objects support the same set of getters as ordinary arrays
+ i.e. \code{dim()}, \code{length()}, and \code{dimnames()}.
+ In addition, they support \code{type()}, which is the DelayedArray
+ equivalent of \code{typeof()} or \code{storage.mode()} for ordinary
+ arrays. Note that, for convenience and consistency, \code{type()} also
+ works on ordinary arrays.
+
+ Only \code{dimnames()} is supported as a setter.
+}
+
+\section{Subsetting}{
+ A DelayedArray object can be subsetted with \code{[} like an ordinary array
+ but with the following differences:
+ \itemize{
+ \item \emph{Multi-dimensional single bracket subsetting} (i.e. subsetting
+ of the form \code{x[i_1, i_2, ..., i_n]} with one (possibly missing)
+ subscript per dimension) returns a DelayedArray object where the
+ subsetting is actually delayed. So it's a very light operation.
+
+ \item The \code{drop} argument of the \code{[} operator is ignored i.e.
+ subsetting a DelayedArray object always returns a DelayedArray
+ object with the same number of dimensions as the original object.
+ You need to call \code{drop()} on the subsetted object to actually
+ drop its ineffective dimensions (i.e. the dimensions equal to 1).
+ \code{drop()} is also a delayed operation so is very light.
+
+ \item \emph{Linear single bracket subsetting} (a.k.a. 1D-style subsetting,
+ that is, subsetting of the form \code{x[i]}) only works if subscript
+ \code{i} is a numeric vector at the moment. Furthermore, \code{i}
+ cannot contain NAs and all the indices in it must be >= 1 and <=
+ \code{length(x)} for now. It returns an atomic vector of the same
+ length as \code{i}. This is NOT a delayed operation.
+ }
+
+ Subsetting with \code{[[} is supported but only the \emph{linear} form
+ of it at the moment i.e. the \code{x[[i]]} form where \code{i} is a
+ \emph{single} numeric value >= 1 and <= \code{length(x)}. It is equivalent
+ to \code{x[i]}.
+
+ DelayedArray objects don't support subassignment (\code{[<-} or \code{[[<-}).
+}
+
+\seealso{
+ \itemize{
+ \item \code{\link{realize}} for realizing a DelayedArray object in memory
+ or on disk.
+
+ \item \link{DelayedArray-utils} for common operations on DelayedArray
+ objects.
+
+ \item \link[DelayedArray]{cbind} in this package (\pkg{DelayedArray})
+ for binding DelayedArray objects along their rows or columns.
+
+ \item \link{RleArray} objects.
+
+ \item \link[HDF5Array]{HDF5Array} objects in the \pkg{HDF5Array} package.
+
+ \item \link[S4Vectors]{DataFrame} objects in the \pkg{S4Vectors} package.
+
+ \item \link[base]{array} objects in base R.
+ }
+}
+
+\examples{
+## ---------------------------------------------------------------------
+## A. WRAP AN ORDINARY ARRAY IN A DelayedArray OBJECT
+## ---------------------------------------------------------------------
+a <- array(runif(1500000), dim=c(10000, 30, 5))
+A <- DelayedArray(a)
+A
+## The seed of A is treated as a "read-only" object so won't change when
+## we start operating on A:
+stopifnot(identical(a, seed(A)))
+type(A)
+
+## Multi-dimensional single bracket subsetting:
+m <- a[11:20 , 5, ] # a matrix
+A[11:20 , 5, ] # not a DelayedMatrix (still 3 dimensions)
+M <- drop(A[11:20 , 5, ]) # a DelayedMatrix object
+stopifnot(identical(m, as.array(M)))
+stopifnot(identical(a, seed(M)))
+
+## Linear single bracket subsetting:
+A[11:20]
+A[which(A <= 1e-5)]
+
+## Other operations:
+toto <- function(x) (5 * x[ , , 1] ^ 3 + 1L) * log(x[, , 2])
+b <- toto(a)
+head(b)
+
+B <- toto(A) # very fast! (operations are delayed)
+B # still 3 dimensions (subsetting a DelayedArray object never drops
+ # dimensions)
+B <- drop(B)
+B
+
+cs <- colSums(b)
+CS <- colSums(B)
+stopifnot(identical(cs, CS))
+
+## ---------------------------------------------------------------------
+## B. WRAP A DataFrame OBJECT IN A DelayedArray OBJECT
+## ---------------------------------------------------------------------
+
+## Generate random coverage and score along an imaginary chromosome:
+cov <- Rle(sample(20, 5000, replace=TRUE), sample(6, 5000, replace=TRUE))
+score <- Rle(sample(100, nrun(cov), replace=TRUE), runLength(cov))
+
+DF <- DataFrame(cov, score)
+A2 <- DelayedArray(DF)
+A2
+seed(A2) # 'DF'
+
+## Coercion of a DelayedMatrix object to DataFrame produces a DataFrame
+## object with Rle columns:
+as(A2, "DataFrame")
+stopifnot(identical(DF, as(A2, "DataFrame")))
+
+t(A2) # transposition is delayed so is very fast and very memory
+ # efficient
+stopifnot(identical(DF, seed(t(A2)))) # the "seed" is still the same
+colSums(A2)
+
+## ---------------------------------------------------------------------
+## C. A HDF5Array OBJECT IS A (PARTICULAR KIND OF) DelayedArray OBJECT
+## ---------------------------------------------------------------------
+library(HDF5Array)
+A3 <- as(a, "HDF5Array") # write 'a' to an HDF5 file
+A3
+is(A3, "DelayedArray") # TRUE
+seed(A3) # a HDF5ArraySeed object
+
+B3 <- toto(A3) # very fast! (operations are delayed)
+
+B3 # not a HDF5Array object because now it
+ # carries delayed operations
+B3 <- drop(B3)
+
+CS3 <- colSums(B3)
+stopifnot(identical(cs, CS3))
+
+## ---------------------------------------------------------------------
+## D. PERFORM THE DELAYED OPERATIONS
+## ---------------------------------------------------------------------
+as(B3, "HDF5Array") # "realize" 'B3' on disk
+
+## If this is just an intermediate result, you can either keep going
+## with B3 or replace it with its "realized" version:
+B3 <- as(B3, "HDF5Array") # no more delayed operations on new 'B3'
+seed(B3)
+
+## For convenience, realize() can be used instead of explicit coercion.
+## The current "realization backend" controls where realization
+## happens e.g. in memory if set to NULL or in an HDF5 file if set
+## to "HDF5Array":
+D <- cbind(B3, exp(B3))
+D
+setRealizationBackend("HDF5Array")
+D <- realize(D)
+D
+## See '?realize' for more information about "realization backends".
+
+## ---------------------------------------------------------------------
+## E. WRAP A SPARSE MATRIX IN A DelayedArray OBJECT
+## ---------------------------------------------------------------------
+\dontrun{
+library(Matrix)
+M <- 75000L
+N <- 1800L
+p <- sparseMatrix(sample(M, 9000000, replace=TRUE),
+ sample(N, 9000000, replace=TRUE),
+ x=runif(9000000), dims=c(M, N))
+P <- DelayedArray(p)
+P
+p2 <- as(P, "sparseMatrix")
+stopifnot(identical(p, p2))
+
+## The following is based on the following post by Murat Tasan on the
+## R-help mailing list:
+## https://stat.ethz.ch/pipermail/r-help/2017-May/446702.html
+
+## As pointed out by Murat, the straight-forward row normalization
+## directly on sparse matrix 'p' would consume too much memory:
+row_normalized_p <- p / rowSums(p^2) # consumes too much memory
+## because the rowSums() result is being recycled (appropriately) into a
+## *dense* matrix with dimensions equal to dim(p).
+
+## Murat came up with the following solution that is very fast and memory
+## efficient:
+row_normalized_p1 <- Diagonal(x=1/sqrt(Matrix::rowSums(p^2))) %*% p
+
+## With a DelayedArray object, the straight-forward approach uses a
+## block processing strategy behind the scene so it doesn't consume
+## too much memory.
+
+## First, let's see the block processing in action:
+DelayedArray:::set_verbose_block_processing(TRUE)
+## and set block size to a bigger value than the default:
+getOption("DelayedArray.block.size")
+options(DelayedArray.block.size=80e6)
+
+row_normalized_P <- P / sqrt(DelayedArray::rowSums(P^2))
+
+## Increasing the block size increases the speed but also memory usage:
+options(DelayedArray.block.size=200e6)
+row_normalized_P2 <- P / sqrt(DelayedArray::rowSums(P^2))
+stopifnot(all.equal(row_normalized_P, row_normalized_P2))
+
+## Back to sparse representation:
+DelayedArray:::set_verbose_block_processing(FALSE)
+row_normalized_p2 <- as(row_normalized_P, "sparseMatrix")
+stopifnot(all.equal(row_normalized_p1, row_normalized_p2))
+
+options(DelayedArray.block.size=10e6)
+}
+}
+\keyword{methods}
+\keyword{classes}
diff --git a/man/DelayedArray-utils.Rd b/man/DelayedArray-utils.Rd
new file mode 100644
index 0000000..8ca069d
--- /dev/null
+++ b/man/DelayedArray-utils.Rd
@@ -0,0 +1,227 @@
+\name{DelayedArray-utils}
+
+\alias{DelayedArray-utils}
+
+% DelayedArray utils
+
+\alias{dim,ConformableSeedCombiner-method}
+\alias{dimnames,ConformableSeedCombiner-method}
+
+\alias{+,DelayedArray,missing-method}
+\alias{-,DelayedArray,missing-method}
+
+\alias{pmax2}
+\alias{pmin2}
+\alias{pmax2,ANY,ANY-method}
+\alias{pmin2,ANY,ANY-method}
+\alias{pmax2,DelayedArray,vector-method}
+\alias{pmin2,DelayedArray,vector-method}
+\alias{pmax2,vector,DelayedArray-method}
+\alias{pmin2,vector,DelayedArray-method}
+\alias{pmax2,DelayedArray,DelayedArray-method}
+\alias{pmin2,DelayedArray,DelayedArray-method}
+
+\alias{is.na,DelayedArray-method}
+\alias{is.finite,DelayedArray-method}
+\alias{is.infinite,DelayedArray-method}
+\alias{is.nan,DelayedArray-method}
+\alias{!,DelayedArray-method}
+
+\alias{nchar,DelayedArray-method}
+\alias{tolower,DelayedArray-method}
+\alias{toupper,DelayedArray-method}
+
+\alias{round,DelayedArray-method}
+\alias{signif,DelayedArray-method}
+
+\alias{anyNA,DelayedArray-method}
+\alias{which,DelayedArray-method}
+
+\alias{mean.DelayedArray}
+\alias{mean,DelayedArray-method}
+
+\alias{apply}
+\alias{apply,DelayedArray-method}
+
+% DelayedMatrix utils
+
+\alias{\%*\%}
+\alias{\%*\%,DelayedMatrix,matrix-method}
+\alias{\%*\%,matrix,DelayedMatrix-method}
+\alias{\%*\%,DelayedMatrix,DelayedMatrix-method}
+
+% DelayedMatrix row/col summarization
+
+\alias{rowSums}
+\alias{rowSums,DelayedMatrix-method}
+\alias{colSums}
+\alias{colSums,DelayedMatrix-method}
+
+\alias{rowMeans}
+\alias{rowMeans,DelayedMatrix-method}
+\alias{colMeans}
+\alias{colMeans,DelayedMatrix-method}
+
+\alias{rowMaxs}
+\alias{rowMaxs,DelayedMatrix-method}
+\alias{colMaxs}
+\alias{colMaxs,DelayedMatrix-method}
+
+\alias{rowMins}
+\alias{rowMins,DelayedMatrix-method}
+\alias{colMins}
+\alias{colMins,DelayedMatrix-method}
+
+\alias{rowRanges}
+\alias{rowRanges,DelayedMatrix-method}
+\alias{colRanges}
+\alias{colRanges,DelayedMatrix-method}
+
+% Internal stuff
+
+\alias{subset_seed_as_array,ConformableSeedCombiner-method}
+
+\title{Common operations on DelayedArray objects}
+
+\description{
+ Common operations on \link{DelayedArray} objects.
+}
+
+\details{
+ The operations currently supported on \link{DelayedArray} objects are:
+
+ Delayed operations:
+ \itemize{
+ \item all the members of the \code{\link[methods]{Ops}},
+ \code{\link[methods]{Math}}, and \code{\link[methods]{Math2}} groups
+ \item \code{!}
+ \item \code{is.na}, \code{is.finite}, \code{is.infinite}, \code{is.nan}
+ \item \code{nchar}, \code{tolower}, \code{toupper}
+ \item \code{pmax2} and \code{pmin2}
+ \item \code{rbind} and \code{cbind} (documented in
+ \link[DelayedArray]{cbind})
+ }
+
+ Block-processed operations:
+ \itemize{
+ \item \code{anyNA}, \code{which}
+ \item all the members of the \code{\link[methods]{Summary}} group
+ \item \code{mean}
+ \item \code{apply}
+ \item matrix multiplication (\%*\%) of an ordinary matrix by a
+ \link{DelayedMatrix} object
+ \item matrix row/col summarization [\link{DelayedMatrix} objects only]:
+ \code{rowSums}, \code{colSums}, \code{rowMeans}, \code{colMeans},
+ \code{rowMaxs}, \code{colMaxs}, \code{rowMins}, \code{colMins},
+ \code{rowRanges}, and \code{colRanges}
+ }
+}
+
+\seealso{
+ \itemize{
+ \item \code{\link[base]{is.na}}, \code{\link[base]{!}},
+ \code{\link[base]{mean}}, \code{\link[base]{apply}},
+ and \code{\link[base]{\%*\%}} in the \pkg{base} package for the
+ corresponding operations on ordinary arrays or matrices.
+
+ \item \code{\link[base]{rowSums}} in the \pkg{base} package and
+ \code{\link[matrixStats]{rowMaxs}} in the \pkg{matrixStats} package
+ for row/col summarization of an ordinary matrix.
+
+ \item \code{\link{setRealizationBackend}} for how to set a
+ \emph{realization backend}.
+
+ \item \code{\link[HDF5Array]{writeHDF5Array}} in the \pkg{HDF5Array}
+ package for writting an array-like object to an HDF5 file and other
+ low-level utilities to control the location of automatically created
+ HDF5 datasets.
+
+ \item \link{DelayedArray} objects.
+
+ \item \link[HDF5Array]{HDF5Array} objects in the \pkg{HDF5Array} package.
+
+ \item \code{\link[methods]{S4groupGeneric}} in the \pkg{methods} package
+ for the members of the \code{\link[methods]{Ops}},
+ \code{\link[methods]{Math}}, and \code{\link[methods]{Math2}} groups.
+
+ \item \link[base]{array} objects in base R.
+ }
+}
+
+\examples{
+library(HDF5Array)
+toy_h5 <- system.file("extdata", "toy.h5", package="HDF5Array")
+h5ls(toy_h5)
+
+M1 <- HDF5Array(toy_h5, "M1")
+range(M1)
+M1 >= 0.5 & M1 < 0.75
+log(M1)
+
+M2 <- HDF5Array(toy_h5, "M2")
+pmax2(M2, 0)
+
+M3 <- rbind(M1, t(M2))
+M3
+
+## ---------------------------------------------------------------------
+## MATRIX MULTIPLICATION
+## ---------------------------------------------------------------------
+
+## Matrix multiplication is not delayed: the output matrix is realized
+## block by block. The current "realization backend" controls where
+## realization happens e.g. in memory if set to NULL or in an HDF5 file
+## if set to "HDF5Array". See '?realize' for more information about
+## "realization backends".
+## The output matrix is returned as a DelayedMatrix object with no delayed
+## operations on it. The exact class of the object depends on the backend
+## e.g. it will be HDF5Matrix with "HDF5Array" backend.
+
+m <- matrix(runif(50000), ncol=nrow(M1))
+
+## Set backend to NULL for in-memory realization:
+setRealizationBackend()
+P1 <- m \%*\% M1
+P1
+
+## Set backend to HDF5Array for realization in HDF5 file:
+setRealizationBackend("HDF5Array")
+
+## With the HDF5Array backend, the output matrix will be written to an
+## automatic location on disk:
+getHDF5DumpFile() # HDF5 file where the output matrix will be written
+lsHDF5DumpFile()
+
+P2 <- m \%*\% M1
+P2
+
+lsHDF5DumpFile()
+
+## Use setHDF5DumpFile() and setHDF5DumpName() from the HDF5Array package
+## to control the location of automatically created HDF5 datasets.
+
+stopifnot(identical(as.array(P1), as.array(P2)))
+
+## ---------------------------------------------------------------------
+## MATRIX ROW/COL SUMMARIZATION
+## ---------------------------------------------------------------------
+
+rowSums(M1)
+colSums(M1)
+
+rowMeans(M1)
+colMeans(M1)
+
+rmaxs <- rowMaxs(M1)
+cmaxs <- colMaxs(M1)
+
+rmins <- rowMins(M1)
+cmins <- colMins(M1)
+
+rranges <- rowRanges(M1)
+cranges <- colRanges(M1)
+
+stopifnot(identical(cbind(rmins, rmaxs, deparse.level=0), rranges))
+stopifnot(identical(cbind(cmins, cmaxs, deparse.level=0), cranges))
+}
+\keyword{methods}
diff --git a/man/RleArray-class.Rd b/man/RleArray-class.Rd
new file mode 100644
index 0000000..923648d
--- /dev/null
+++ b/man/RleArray-class.Rd
@@ -0,0 +1,128 @@
+\name{RleArray-class}
+\docType{class}
+
+\alias{dim,RleArraySeed-method}
+\alias{length,RleArraySeed-method}
+\alias{dimnames,RleArraySeed-method}
+
+\alias{subset_seed_as_array,RleArraySeed-method}
+
+\alias{class:RleArray}
+\alias{RleArray-class}
+
+\alias{class:RleMatrix}
+\alias{RleMatrix-class}
+\alias{RleMatrix}
+
+\alias{coerce,RleArray,RleMatrix-method}
+\alias{matrixClass,RleArray-method}
+\alias{coerce,ANY,RleMatrix-method}
+
+\alias{DelayedArray,RleArraySeed-method}
+\alias{RleArray}
+
+\alias{class:RleRealizationSink}
+\alias{RleRealizationSink-class}
+\alias{RleRealizationSink}
+
+\alias{write_to_sink,array,RleRealizationSink-method}
+
+\alias{coerce,RleRealizationSink,RleArraySeed-method}
+\alias{coerce,RleRealizationSink,RleArray-method}
+\alias{coerce,RleRealizationSink,DelayedArray-method}
+\alias{coerce,ANY,RleArray-method}
+\alias{coerce,DelayedArray,RleArray-method}
+\alias{coerce,DelayedMatrix,RleMatrix-method}
+
+\alias{coerce,DataFrame,RleMatrix-method}
+\alias{coerce,DataFrame,RleArray-method}
+\alias{coerce,RleMatrix,DataFrame-method}
+\alias{coerce,DelayedMatrix,DataFrame-method}
+
+\title{RleArray objects}
+
+\description{
+ The RleArray class is an array-like container where the values are stored
+ in a run-length encoding format. RleArray objects support delayed operations
+ and block processing.
+}
+
+\usage{
+RleArray(rle, dim, dimnames=NULL) # constructor function
+}
+
+\arguments{
+ \item{rle}{
+ An \link[S4Vectors]{Rle} object.
+ }
+ \item{dim}{
+ The dimensions of the object to be created, that is, an integer vector
+ of length one or more giving the maximal indices in each dimension.
+ }
+ \item{dimnames}{
+ Either \code{NULL} or the names for the dimensions. This must a list of
+ length the number of dimensions. Each list element must be either
+ \code{NULL} or a character vector along the corresponding dimension.
+ }
+}
+
+\details{
+ RleArray extends \link{DelayedArray}. All the operations available on
+ \link{DelayedArray} objects work on RleArray objects.
+}
+
+\seealso{
+ \itemize{
+ \item \link[S4Vectors]{Rle} objects in the \pkg{S4Vectors} package.
+
+ \item \link{DelayedArray} objects.
+
+ \item \link{DelayedArray-utils} for common operations on
+ \link{DelayedArray} objects.
+
+ \item \code{\link{realize}} for realizing a DelayedArray object in memory
+ or on disk.
+
+ \item \link[HDF5Array]{HDF5Array} objects in the \pkg{HDF5Array} package.
+
+ \item \link[S4Vectors]{DataFrame} objects in the \pkg{S4Vectors} package.
+
+ \item \link[base]{array} objects in base R.
+ }
+}
+
+\examples{
+rle <- Rle(sample(6L, 500000, replace=TRUE), 8)
+a <- array(rle, dim=c(50, 20, 4000)) # array() expands the Rle object
+ # internally with as.vector()
+
+A <- RleArray(rle, dim=c(50, 20, 4000)) # Rle object is NOT expanded
+A
+
+object.size(a)
+object.size(A)
+
+stopifnot(identical(a, as.array(A)))
+
+toto <- function(x) (5 * x[ , , 1] ^ 3 + 1L) * log(x[, , 2])
+b <- toto(a)
+head(b)
+
+B <- toto(A) # very fast! (operations are delayed)
+B # still 3 dimensions (subsetting a DelayedArray object never drops
+ # dimensions)
+B <- drop(B)
+B
+
+stopifnot(identical(b, as.array(B)))
+
+cs <- colSums(b)
+CS <- colSums(B)
+stopifnot(identical(cs, CS))
+
+## Coercion of a DelayedMatrix object to DataFrame produces a DataFrame
+## object with Rle columns:
+as(B, "DataFrame")
+}
+\keyword{methods}
+\keyword{classes}
diff --git a/man/cbind-methods.Rd b/man/cbind-methods.Rd
new file mode 100644
index 0000000..568ab17
--- /dev/null
+++ b/man/cbind-methods.Rd
@@ -0,0 +1,91 @@
+\name{cbind-methods}
+
+\alias{cbind-methods}
+
+\alias{dim,SeedBinder-method}
+\alias{dimnames,SeedBinder-method}
+
+\alias{rbind}
+\alias{rbind,DelayedMatrix-method}
+\alias{rbind,DelayedArray-method}
+
+\alias{cbind}
+\alias{cbind,DelayedMatrix-method}
+\alias{cbind,DelayedArray-method}
+
+\alias{arbind}
+\alias{arbind,DelayedArray-method}
+
+\alias{acbind}
+\alias{acbind,DelayedArray-method}
+
+% Internal stuff
+\alias{subset_seed_as_array,SeedBinder-method}
+
+\title{Bind DelayedArray objects along their rows or columns}
+
+\description{
+ Methods for binding DelayedArray objects along their rows or columns.
+}
+
+\details{
+ \code{rbind}, \code{cbind}, \code{arbind}, \code{acbind} methods are defined
+ for \link{DelayedArray} objects. They perform delayed binding along the rows
+ (\code{rbind} and \code{arbind}) or columns (\code{cbind} and \code{acbind})
+ of the objects passed to them.
+}
+
+\seealso{
+ \itemize{
+ \item \code{\link[base]{cbind}} in the \pkg{base} package for
+ rbind/cbind'ing ordinary arrays.
+
+ \item \code{\link[IRanges]{acbind}} in the \pkg{IRanges} package for
+ arbind/acbind'ing ordinary arrays.
+
+ \item \link{DelayedArray-utils} for common operations on
+ \link{DelayedArray} objects.
+
+ \item \link{DelayedArray} objects.
+
+ \item \link[HDF5Array]{HDF5Array} objects in the \pkg{HDF5Array} package.
+
+ \item \link[base]{array} objects in base R.
+ }
+}
+
+\examples{
+## ---------------------------------------------------------------------
+## rbind/cbind
+## ---------------------------------------------------------------------
+library(HDF5Array)
+toy_h5 <- system.file("extdata", "toy.h5", package="HDF5Array")
+h5ls(toy_h5)
+
+M1 <- HDF5Array(toy_h5, "M1")
+M2 <- HDF5Array(toy_h5, "M2")
+
+M <- rbind(M1, t(M2))
+M
+colMeans(M)
+
+## ---------------------------------------------------------------------
+## arbind/acbind
+## ---------------------------------------------------------------------
+a1 <- array(1:60, c(3, 5, 4),
+ dimnames=list(NULL, paste0("M1y", 1:5), NULL))
+a2 <- array(101:240, c(7, 5, 4),
+ dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL))
+a3 <- array(10001:10100, c(5, 5, 4),
+ dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
+
+A1 <- DelayedArray(a1)
+A2 <- DelayedArray(a2)
+A3 <- DelayedArray(a3)
+A <- arbind(A1, A2, A3)
+A
+
+## Sanity check:
+stopifnot(identical(arbind(a1, a2, a3), as.array(A)))
+}
+\keyword{methods}
diff --git a/man/realize.Rd b/man/realize.Rd
new file mode 100644
index 0000000..2fcd148
--- /dev/null
+++ b/man/realize.Rd
@@ -0,0 +1,108 @@
+\name{realize}
+
+\alias{class:RealizationSink}
+\alias{RealizationSink-class}
+
+\alias{write_to_sink}
+\alias{write_to_sink,DelayedArray,RealizationSink-method}
+\alias{write_to_sink,ANY,RealizationSink-method}
+
+\alias{close,RealizationSink-method}
+
+\alias{class:arrayRealizationSink}
+\alias{arrayRealizationSink-class}
+\alias{arrayRealizationSink}
+
+\alias{write_to_sink,array,arrayRealizationSink-method}
+\alias{coerce,arrayRealizationSink,DelayedArray-method}
+
+\alias{supportedRealizationBackends}
+\alias{getRealizationBackend}
+\alias{setRealizationBackend}
+
+\alias{realize}
+\alias{realize,ANY-method}
+
+\alias{RealizationSink}
+
+\title{Realize a DelayedArray object}
+
+\description{
+ Realize a \link{DelayedArray} object in memory or on disk.
+ Get or set the \emph{realization backend} for the current session with
+ \code{getRealizationBackend} or \code{setRealizationBackend}.
+}
+
+\usage{
+supportedRealizationBackends()
+getRealizationBackend()
+setRealizationBackend(BACKEND=NULL)
+
+realize(x, ...)
+
+\S4method{realize}{ANY}(x, BACKEND=getRealizationBackend())
+}
+
+\arguments{
+ \item{x}{
+ The array-like object to realize.
+ }
+ \item{...}{
+ Additional arguments passed to methods.
+ }
+ \item{BACKEND}{
+ \code{NULL} (the default), or a single string specifying the name of
+ the backend. When the backend is set to \code{NULL}, \code{x} is
+ realized in memory as an ordinary array by just calling \code{as.array}
+ on it.
+ }
+}
+
+\details{
+ The \emph{realization backend} controls where/how realization happens e.g.
+ as an ordinary array if set to \code{NULL}, as an \link{RleArray} object
+ if set to \code{"RleArray"}, or in an HDF5 file if set to \code{"HDF5Array"}.
+}
+
+\value{
+ \code{realize(x)} returns a \link{DelayedArray} object. More precisely,
+ it returns \code{DelayedArray(as.array(x))} when the backend is set to
+ \code{NULL} (the default). Otherwise it returns an instance of the class
+ associated with the specified backend (which should extend
+ \link{DelayedArray}).
+}
+
+\seealso{
+ \itemize{
+ \item \link{DelayedArray} objects.
+
+ \item \link{RleArray} objects.
+
+ \item \link[HDF5Array]{HDF5Array} objects in the \pkg{HDF5Array} package.
+
+ \item \link[base]{array} objects in base R.
+ }
+}
+
+\examples{
+library(HDF5Array)
+toy_h5 <- system.file("extdata", "toy.h5", package="HDF5Array")
+h5ls(toy_h5)
+M1 <- HDF5Array(toy_h5, "M1")
+M2 <- HDF5Array(toy_h5, "M2")
+M3 <- rbind(log(M1), t(M2))
+
+supportedRealizationBackends()
+getRealizationBackend() # backend is set to NULL
+realize(M3) # realization as ordinary array
+
+setRealizationBackend("RleArray")
+getRealizationBackend() # backend is set to "RleArray"
+realize(M3) # realization as RleArray object
+
+setRealizationBackend("HDF5Array")
+getRealizationBackend() # backend is set to "HDF5Array"
+realize(M3) # realization in HDF5 file
+}
+
+\keyword{methods}
diff --git a/tests/run_unitTests.R b/tests/run_unitTests.R
new file mode 100644
index 0000000..f7c4734
--- /dev/null
+++ b/tests/run_unitTests.R
@@ -0,0 +1,2 @@
+require("DelayedArray") || stop("unable to load DelayedArray package")
+DelayedArray:::.test()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-delayedarray.git
More information about the debian-med-commit
mailing list