[med-svn] [r-bioc-delayedarray] 01/04: New upstream version 0.4.1
Andreas Tille
tille at debian.org
Wed Nov 8 17:59:27 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-bioc-delayedarray.
commit c54872c7ec08b1f846a3c4829badc62dda55e083
Author: Andreas Tille <tille at debian.org>
Date: Wed Nov 8 18:42:33 2017 +0100
New upstream version 0.4.1
---
DESCRIPTION | 18 +-
NAMESPACE | 52 ++-
R/Array-class.R | 68 +++
R/ArrayBlocks-class.R | 177 --------
R/ArrayGrid-class.R | 574 ++++++++++++++++++++++++
R/ConformableSeedCombiner-class.R | 93 ++++
R/DelayedArray-class.R | 624 +++++++++++++-------------
R/DelayedArray-stats.R | 56 ++-
R/DelayedArray-utils.R | 136 +-----
R/DelayedMatrix-utils.R | 21 +-
R/RleArray-class.R | 392 +++++++++++-----
R/SeedBinder-class.R | 96 ++++
R/bind-arrays.R | 162 +++++++
R/block_processing.R | 114 +++--
R/cbind-methods.R | 163 -------
R/realize.R | 72 +--
R/show-utils.R | 88 ++--
R/subset_seed_as_array.R | 128 ++++++
R/utils.R | 172 ++++---
TODO | 3 +
build/vignette.rds | Bin 0 -> 291 bytes
inst/doc/Working_with_large_arrays.R | 115 +++++
inst/doc/Working_with_large_arrays.Rnw | 764 ++++++++++++++++++++++++++++++++
inst/doc/Working_with_large_arrays.pdf | Bin 0 -> 182894 bytes
inst/unitTests/test_ArrayBlocks-class.R | 71 ---
inst/unitTests/test_ArrayGrid-class.R | 270 +++++++++++
inst/unitTests/test_RleArray-class.R | 45 ++
inst/unitTests/test_bind-arrays.R | 96 ++++
man/Array-class.Rd | 26 ++
man/ArrayBlocks-class.Rd | 19 -
man/ArrayGrid-class.Rd | 142 ++++++
man/DelayedArray-class.Rd | 77 +++-
man/DelayedArray-utils.Rd | 10 +-
man/RleArray-class.Rd | 32 +-
man/bind-arrays.Rd | 58 +++
man/block_processing.Rd | 26 ++
man/cbind-methods.Rd | 91 ----
man/realize.Rd | 12 +-
man/subset_seed_as_array.Rd | 55 +++
vignettes/Working_with_large_arrays.Rnw | 764 ++++++++++++++++++++++++++++++++
vignettes/slides.sty | 34 ++
41 files changed, 4582 insertions(+), 1334 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index dc12ac2..472dc36 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,20 +8,24 @@ Description: Wrapping an array-like object (typically an on-disk object) in
also works on in-memory array-like objects like DataFrame objects
(typically with Rle columns), Matrix objects, and ordinary arrays and
data frames.
-Version: 0.2.7
+Version: 0.4.1
Encoding: UTF-8
Author: Hervé Pagès
Maintainer: Hervé Pagès <hpages at fredhutch.org>
biocViews: Infrastructure, DataRepresentation, Annotation,
GenomeAnnotation
-Depends: R (>= 3.4), methods, BiocGenerics, S4Vectors (>= 0.14.3),
- IRanges, matrixStats
+Depends: R (>= 3.4), methods, matrixStats, BiocGenerics, S4Vectors (>=
+ 0.15.3), IRanges (>= 2.11.17)
Imports: stats
-Suggests: Matrix, HDF5Array, genefilter, BiocStyle
+Suggests: Matrix, HDF5Array, genefilter, SummarizedExperiment, airway,
+ pryr, knitr, BiocStyle
License: Artistic-2.0
-Collate: utils.R ArrayBlocks-class.R show-utils.R DelayedArray-class.R
- cbind-methods.R realize.R block_processing.R
+VignetteBuilder: knitr
+Collate: utils.R bind-arrays.R Array-class.R ArrayGrid-class.R
+ show-utils.R subset_seed_as_array.R
+ ConformableSeedCombiner-class.R SeedBinder-class.R
+ DelayedArray-class.R realize.R block_processing.R
DelayedArray-utils.R DelayedMatrix-utils.R DelayedArray-stats.R
DelayedMatrix-stats.R RleArray-class.R zzz.R
NeedsCompilation: no
-Packaged: 2017-06-03 00:46:21 UTC; biocbuild
+Packaged: 2017-11-04 00:48:32 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index eb6102d..c2bf487 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,8 @@
import(methods)
-importFrom(stats, setNames, dlogis, plogis, qlogis)
+importFrom(stats, setNames,
+ dbinom, pbinom, qbinom,
+ dpois, ppois, qpois,
+ dlogis, plogis, qlogis)
import(BiocGenerics)
import(S4Vectors)
import(IRanges)
@@ -11,10 +14,12 @@ import(matrixStats)
###
exportClasses(
- ArrayBlocks,
+ Array,
+ ArrayViewport, ArrayGrid, ArrayArbitraryGrid, ArrayRegularGrid,
DelayedArray, DelayedMatrix,
RealizationSink, arrayRealizationSink,
- RleArray, RleMatrix, RleRealizationSink
+ RleArraySeed, SolidRleArraySeed, RleRealizationSink, ChunkedRleArraySeed,
+ RleArray, RleMatrix
)
@@ -24,6 +29,7 @@ exportClasses(
S3method(as.array, DelayedArray)
+S3method(as.character, ArrayGrid)
S3method(as.character, DelayedArray)
S3method(as.complex, DelayedArray)
@@ -53,6 +59,7 @@ S3method(split, DelayedArray)
export(
as.array.DelayedArray,
+ as.character.ArrayGrid,
as.character.DelayedArray,
as.complex.DelayedArray,
@@ -85,12 +92,12 @@ exportMethods(
## Methods for generics defined in the base package:
length, names, "names<-",
dim, "dim<-", dimnames, "dimnames<-",
- as.array, as.vector, as.matrix, as.data.frame,
- "[", "[[",
- drop,
- t,
- c,
- split,
+ "[", "[[", "[<-",
+ lengths,
+ as.array, as.matrix, as.data.frame, as.vector,
+ as.logical, as.integer, as.numeric, as.complex, as.character, as.raw,
+ c, split,
+ drop, t,
is.na, is.finite, is.infinite, is.nan,
"!",
#"+", "-", "*", "/", "^", "%%", "%/%", # "Arith" group generic
@@ -106,6 +113,8 @@ exportMethods(
coerce, show,
## Methods for generics defined in the stats package:
+ dbinom, pbinom, qbinom,
+ dpois, ppois, qpois,
dlogis, plogis, qlogis,
## Methods for generics defined in the BiocGenerics package:
@@ -115,8 +124,8 @@ exportMethods(
isEmpty,
## Methods for generics defined in the IRanges package:
- splitAsList,
- arbind, acbind
+ ranges, start, end, width,
+ splitAsList
)
@@ -125,10 +134,11 @@ exportMethods(
###
export(
- ArrayBlocks,
+ ArrayViewport, makeNindexFromArrayViewport,
+ ArrayArbitraryGrid, ArrayRegularGrid,
supportedRealizationBackends, getRealizationBackend, setRealizationBackend,
- RealizationSink, arrayRealizationSink,
- RleArray, RleRealizationSink
+ write_array_to_sink,
+ RleArray
)
@@ -137,18 +147,24 @@ export(
###
export(
- matrixClass, DelayedArray, seed, subset_seed_as_array, type,
+ arbind, acbind,
+ refdim, isLinear,
+ subset_seed_as_array,
+ matrixClass, DelayedArray, seed, type,
+ chunk_dim, write_block_to_sink, close,
realize,
- write_to_sink, close,
pmax2, pmin2, apply,
rowMaxs, colMaxs, rowMins, colMins, rowRanges, colRanges
)
### Exactly the same list as above.
exportMethods(
- matrixClass, DelayedArray, seed, subset_seed_as_array, type,
+ arbind, acbind,
+ refdim, isLinear,
+ subset_seed_as_array,
+ matrixClass, DelayedArray, seed, type,
+ chunk_dim, write_block_to_sink, close,
realize,
- write_to_sink, close,
pmax2, pmin2, apply,
rowMaxs, colMaxs, rowMins, colMins, rowRanges, colRanges
)
diff --git a/R/Array-class.R b/R/Array-class.R
new file mode 100644
index 0000000..1555b1c
--- /dev/null
+++ b/R/Array-class.R
@@ -0,0 +1,68 @@
+### =========================================================================
+### Array objects
+### -------------------------------------------------------------------------
+
+
+### A virtual class with no slots to be extended by concrete subclasses with
+### an array-like semantic.
+setClass("Array", representation("VIRTUAL"))
+
+### Even though prod() always returns a double, it seems that the length()
+### primitive function takes care of turning this double into an integer if
+### it's <= .Machine$integer.max
+setMethod("length", "Array", function(x) prod(dim(x)))
+
+### 'subscripts' is assumed to be an integer vector parallel to 'dim(x)' and
+### with no out-of-bounds subscripts (i.e. 'all(subscripts >= 1)' and
+### 'all(subscripts <= dim(x))').
+### NOT exported at the moment but should probably be at some point (like
+### S4Vectors::getListElement() is.
+setGeneric("getArrayElement", signature="x",
+ function(x, subscripts) standardGeneric("getArrayElement")
+)
+
+### Return an integer vector parallel to 'dim' and guaranteed to contain no
+### out-of-bounds subscripts.
+.from_linear_to_multi_subscript <- function(i, dim)
+{
+ stopifnot(isSingleInteger(i))
+ if (i < 1L || i > prod(dim))
+ stop("subscript is out of bounds")
+ i <- i - 1L
+ subscripts <- integer(length(dim))
+ for (along in seq_along(dim)) {
+ d <- dim[[along]]
+ subscripts[[along]] <- offset <- i %% d
+ i <- (i - offset) %/% d
+ }
+ subscripts + 1L
+}
+
+### Support multi-dimensional and linear subsetting.
+setMethod("[[", "Array",
+ function(x, i, j, ...)
+ {
+ if (missing(x))
+ stop("'x' is missing")
+ Nindex <- extract_Nindex_from_syscall(sys.call(), parent.frame())
+ nsubscript <- length(Nindex)
+ x_dim <- dim(x)
+ x_ndim <- length(x_dim)
+ if (!(nsubscript == x_ndim || nsubscript == 1L))
+ stop("incorrect number of subscripts")
+ ok <- vapply(Nindex, isSingleInteger, logical(1), USE.NAMES=FALSE)
+ if (!all(ok))
+ stop(wmsg("each subscript must be a single integer ",
+ "when subsetting an ", class(x), " object with [["))
+ if (nsubscript == x_ndim) {
+ subscripts <- unlist(Nindex, use.names=FALSE)
+ if (!(all(subscripts >= 1L) && all(subscripts <= x_dim)))
+ stop("some subscripts are out of bounds")
+ } else {
+ ## Translate linear subsetting into multi-dimensional subsetting.
+ subscripts <- .from_linear_to_multi_subscript(Nindex[[1L]], x_dim)
+ }
+ getArrayElement(x, subscripts)
+ }
+)
+
diff --git a/R/ArrayBlocks-class.R b/R/ArrayBlocks-class.R
deleted file mode 100644
index 1813fd9..0000000
--- a/R/ArrayBlocks-class.R
+++ /dev/null
@@ -1,177 +0,0 @@
-### =========================================================================
-### ArrayBlocks objects
-### -------------------------------------------------------------------------
-###
-
-
-setClass("ArrayBlocks",
- contains="List",
- representation(
- dim="integer",
- max_block_len="integer",
- N="integer",
- by="integer"
- ),
- prototype(elementType="list")
-)
-
-### Return an ArrayBlocks object i.e. a collection of subarrays of the
-### original array with the following properties:
-### (a) The collection of blocks is a partitioning of the original array
-### i.e. the blocks fully cover it and don't overlap each other.
-### (b) Each block is made of adjacent elements in the original array.
-### (c) Each block has a length (i.e. nb of elements) <= 'max_block_len'.
-ArrayBlocks <- function(dim, max_block_len)
-{
- p <- cumprod(dim)
- w <- which(p <= max_block_len)
- N <- if (length(w) == 0L) 1L else w[[length(w)]] + 1L
- if (N > length(dim)) {
- by <- 1L
- } else if (N == 1L) {
- by <- max_block_len
- } else {
- by <- max_block_len %/% as.integer(p[[N - 1L]])
- }
- new("ArrayBlocks", dim=dim, max_block_len=max_block_len, N=N, by=by)
-}
-
-.get_ArrayBlocks_inner_length <- function(blocks)
-{
- ndim <- length(blocks at dim)
- if (blocks at N > ndim)
- return(if (any(blocks at dim == 0L)) 0L else 1L)
- inner_len <- blocks at dim[[blocks at N]] %/% blocks at by
- by2 <- blocks at dim[[blocks at N]] %% blocks at by
- if (by2 != 0L) # 'blocks' contains truncated blocks
- inner_len <- inner_len + 1L
- inner_len
-}
-
-.get_ArrayBlocks_outer_length <- function(blocks)
-{
- ndim <- length(blocks at dim)
- if (blocks at N >= ndim)
- return(1L)
- outer_dim <- blocks at dim[(blocks at N + 1L):ndim]
- prod(outer_dim)
-}
-
-### Return the number of blocks in 'x'.
-setMethod("length", "ArrayBlocks",
- function(x)
- .get_ArrayBlocks_inner_length(x) * .get_ArrayBlocks_outer_length(x)
-)
-
-get_block_lengths <- function(blocks)
-{
- p <- prod(blocks at dim[seq_len(blocks at N - 1L)])
- ndim <- length(blocks at dim)
- if (blocks at N > ndim)
- return(p)
- fb_len <- p * blocks at by # full block length
- lens <- rep.int(fb_len, blocks at dim[[blocks at N]] %/% blocks at by)
- by2 <- blocks at dim[[blocks at N]] %% blocks at by
- if (by2 != 0L) { # 'blocks' contains truncated blocks
- tb_len <- p * by2 # truncated block length
- lens <- c(lens, tb_len)
- }
- rep.int(lens, .get_ArrayBlocks_outer_length(blocks))
-}
-
-### Return an IRanges object with 1 range per dimension.
-get_block_ranges <- function(blocks, i)
-{
- nblock <- length(blocks)
- stopifnot(isSingleInteger(i), i >= 1L, i <= nblock)
-
- ndim <- length(blocks at dim)
- ans <- IRanges(rep.int(1L, ndim), blocks at dim)
-
- if (blocks at N > ndim)
- return(ans)
-
- i <- i - 1L
- if (blocks at N < ndim) {
- inner_len <- .get_ArrayBlocks_inner_length(blocks)
- i1 <- i %% inner_len
- i2 <- i %/% inner_len
- } else {
- i1 <- i
- }
-
- k1 <- i1 * blocks at by
- k2 <- k1 + blocks at by
- k1 <- k1 + 1L
- upper_bound <- blocks at dim[[blocks at N]]
- if (k2 > upper_bound)
- k2 <- upper_bound
- start(ans)[[blocks at N]] <- k1
- end(ans)[[blocks at N]] <- k2
-
- if (blocks at N < ndim) {
- outer_dim <- blocks at dim[(blocks at N + 1L):ndim]
- subindex <- arrayInd(i2 + 1L, outer_dim)
- ans[(blocks at N + 1L):ndim] <- IRanges(subindex, width=1L)
- }
- ans
-}
-
-get_array_block_Nindex <- function(blocks, i)
-{
- make_Nindex_from_block_ranges(get_block_ranges(blocks, i), blocks at dim)
-}
-
-setMethod("getListElement", "ArrayBlocks",
- function(x, i, exact=TRUE)
- {
- i <- normalizeDoubleBracketSubscript(i, x, exact=exact,
- error.if.nomatch=TRUE)
- get_array_block_Nindex(x, i)
- }
-)
-
-setMethod("show", "ArrayBlocks",
- function(object)
- {
- dim_in1string <- paste0(object at dim, collapse=" x ")
- cat(class(object), " object with ", length(object), " blocks ",
- "of length <= ", object at max_block_len, " on a ",
- dim_in1string, " array:\n", sep="")
- for (i in seq_along(object)) {
- Nindex <- object[[i]]
- cat("[[", i, "]]: [", Nindex_as_string(Nindex), "]\n",
- sep="")
- }
- }
-)
-
-extract_array_block <- function(x, blocks, i)
-{
- Nindex <- get_array_block_Nindex(blocks, i)
- subset_by_Nindex(x, Nindex)
-}
-
-### NOT exported but used in unit tests.
-split_array_in_blocks <- function(x, max_block_len)
-{
- blocks <- ArrayBlocks(dim(x), max_block_len)
- lapply(seq_along(blocks),
- function(i) extract_array_block(x, blocks, i))
-}
-
-### NOT exported but used in unit tests.
-### Rebuild the original array from the subarrays obtained by
-### split_array_in_blocks() as an *ordinary* array.
-### So if 'x' is an ordinary array, then:
-###
-### unsplit_array_from_blocks(split_array_in_blocks(x, max_block_len), x)
-###
-### should be a no-op for any 'max_block_len' < 'length(x)'.
-unsplit_array_from_blocks <- function(subarrays, x)
-{
- ans <- combine_array_objects(subarrays)
- dim(ans) <- dim(x)
- ans
-}
-
diff --git a/R/ArrayGrid-class.R b/R/ArrayGrid-class.R
new file mode 100644
index 0000000..16ef524
--- /dev/null
+++ b/R/ArrayGrid-class.R
@@ -0,0 +1,574 @@
+### =========================================================================
+### ArrayViewport and ArrayGrid objects
+### -------------------------------------------------------------------------
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### ArrayViewport objects
+###
+
+### We don't extend the IRanges class because we don't want to inherit the
+### full Ranges API (most operations in that API would do the wrong thing on
+### ArrayViewport objects).
+setClass("ArrayViewport",
+ contains="Array",
+ representation(
+ refdim="integer", # Dimensions of "the reference array" i.e. the
+ # array on top of which the viewport is defined
+ # (a.k.a. "the underlying array").
+ ranges="IRanges" # Must be parallel to the 'refdim' slot.
+ )
+)
+
+### Validity
+
+.validate_ranges_slot <- function(x)
+{
+ x_ranges <- x at ranges
+ x_refdim <- x at refdim
+ if (length(x_ranges) != length(x_refdim))
+ return(wmsg2("'ranges' and 'refdim' slots must have the same length"))
+
+ ## Check that the viewport is contained in the reference array.
+ x_start <- start(x_ranges)
+ x_end <- end(x_ranges)
+ if (!(all(x_start >= 1L) && all(x_end <= x_refdim)))
+ return(wmsg2("object represents a viewport that is not ",
+ "within the bounds of the reference array"))
+
+ ## A viewport cannot be longer than 2^31-1.
+ x_dim <- width(x_ranges)
+ if (prod(x_dim) > .Machine$integer.max)
+ return(wmsg2("a viewport cannot be longer than .Machine$integer.max"))
+ TRUE
+}
+
+.validate_ArrayViewport <- function(x)
+{
+ msg <- validate_dim_slot(x, "refdim")
+ if (!isTRUE(msg))
+ return(msg)
+ msg <- .validate_ranges_slot(x)
+ if (!isTRUE(msg))
+ return(msg)
+ TRUE
+}
+
+setValidity2("ArrayViewport", .validate_ArrayViewport)
+
+### Getters
+
+setGeneric("refdim", function(x) standardGeneric("refdim"))
+
+setMethod("refdim", "ArrayViewport", function(x) x at refdim)
+
+setMethod("ranges", "ArrayViewport", function(x) x at ranges)
+
+setMethod("start", "ArrayViewport", function(x) start(ranges(x)))
+
+setMethod("width", "ArrayViewport", function(x) width(ranges(x)))
+
+setMethod("end", "ArrayViewport", function(x) end(ranges(x)))
+
+### 'width(x)' and 'dim(x)' are synonyms.
+setMethod("dim", "ArrayViewport", function(x) width(ranges(x)))
+
+### Constructor
+
+### If 'ranges' is omitted, return a viewport that covers the whole
+### reference array.
+ArrayViewport <- function(refdim, ranges=NULL)
+{
+ if (is.null(ranges))
+ ranges <- IRanges(rep.int(1L, length(refdim)), refdim)
+ new("ArrayViewport", refdim=refdim, ranges=ranges)
+}
+
+### Show
+
+make_string_from_ArrayViewport <- function(viewport, dimnames=NULL,
+ with.brackets=FALSE)
+{
+ if (!isTRUEorFALSE(with.brackets))
+ stop("'with.brackets' must be TRUE or FALSE")
+ viewport_ranges <- ranges(viewport)
+ viewport_dim <- dim(viewport)
+ viewport_refdim <- refdim(viewport)
+ ans <- as.character(viewport_ranges)
+ ans[viewport_dim == viewport_refdim] <- ""
+ if (!is.null(dimnames)) {
+ stopifnot(is.list(dimnames), length(viewport_dim) == length(dimnames))
+ usename_idx <- which(viewport_dim == 1L &
+ viewport_refdim != 1L &
+ lengths(dimnames) != 0L)
+ ans[usename_idx] <- mapply(`[`, dimnames[usename_idx],
+ start(viewport_ranges)[usename_idx],
+ SIMPLIFY=FALSE)
+ }
+ if (ans[[1L]] == "" && with.brackets)
+ ans[[1L]] <- " "
+ ans <- paste0(ans, collapse=", ")
+ if (with.brackets)
+ ans <- paste0("[", ans, "]")
+ ans
+}
+
+setMethod("show", "ArrayViewport",
+ function(object)
+ {
+ refdim_in1string <- paste0(refdim(object), collapse=" x ")
+ cat(class(object), " object on a ", refdim_in1string, " array: ",
+ sep="")
+ s <- make_string_from_ArrayViewport(object, with.brackets=TRUE)
+ cat(s, "\n", sep="")
+ }
+)
+
+### makeNindexFromArrayViewport()
+
+### Used in HDF5Array!
+makeNindexFromArrayViewport <- function(viewport, expand.RangeNSBS=FALSE)
+{
+ viewport_ranges <- ranges(viewport)
+ viewport_dim <- dim(viewport)
+ viewport_refdim <- refdim(viewport)
+ ndim <- length(viewport_dim)
+ Nindex <- vector(mode="list", length=ndim)
+ is_not_missing <- viewport_dim < viewport_refdim
+ if (expand.RangeNSBS) {
+ expand_idx <- which(is_not_missing)
+ } else {
+ viewport_starts <- start(viewport_ranges)
+ viewport_ends <- end(viewport_ranges)
+ is_width1 <- viewport_dim == 1L
+ expand_idx <- which(is_not_missing & is_width1)
+ RangeNSBS_idx <- which(is_not_missing & !is_width1)
+ Nindex[RangeNSBS_idx] <- lapply(RangeNSBS_idx,
+ function(i) {
+ range_start <- viewport_starts[[i]]
+ range_end <- viewport_ends[[i]]
+ upper_bound <- viewport_refdim[[i]]
+ new2("RangeNSBS", subscript=c(range_start, range_end),
+ upper_bound=upper_bound,
+ check=FALSE)
+ }
+ )
+ }
+ Nindex[expand_idx] <- as.list(viewport_ranges[expand_idx])
+ Nindex
+}
+
+### 2 utilities for extracting/replacing blocks from/in an array-like object
+
+extract_block <- function(x, viewport)
+{
+ stopifnot(identical(dim(x), refdim(viewport)))
+ Nindex <- makeNindexFromArrayViewport(viewport)
+ subset_by_Nindex(x, Nindex)
+}
+
+### Return the modified array.
+replace_block <- function(x, viewport, block)
+{
+ stopifnot(identical(dim(x), refdim(viewport)),
+ identical(dim(viewport), dim(block)))
+ Nindex <- makeNindexFromArrayViewport(viewport)
+ replace_by_Nindex(x, Nindex, block)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### ArrayGrid objects
+###
+### An ArrayGrid object represents a grid on top of an array (called "the
+### reference array" or "the underlying array"). The ArrayGrid class is a
+### virtual class with 2 concrete subclasses, ArrayArbitraryGrid and
+### ArrayRegularGrid, for representing an arbitrarily-spaced or a
+### regularly-spaced grid, respectively. The API we implement on these objects
+### is divided into 3 groups of methods:
+### 1) One special method:
+### - refdim(x): Return the dimensions of the reference array.
+### 2) Methods from the array API (an ArrayGrid object can be seen as an
+### array of ArrayViewport objects that cover the reference array without
+### overlapping with each others):
+### - dim(x): Return the number of grid elements (i.e. viewports) along
+### each dimension of the reference array.
+### - x[[i_1, i_2, ..., i_n]]: Multi-dimensional double bracket
+### subsetting. Return an ArrayViewport object.
+### 3) Methods from the list API:
+### - length(): Return the total number of grid elements.
+### - x[[i]]: Linear double bracket subsetting. Return an ArrayViewport
+### object.
+### Groups 2) and 3) give these objects 2 semantics: array-like and list-like.
+### Note that length() and "linear double bracket subsetting" are consistent
+### with dim() and "multi-dimensional double bracket subsetting", respectively.
+### So the array-like and list-like semantics are compatible.
+###
+
+setClass("ArrayGrid",
+ contains=c("Array", "List"),
+ representation("VIRTUAL"),
+ prototype(elementType="ArrayViewport")
+)
+
+setClass("ArrayArbitraryGrid",
+ contains="ArrayGrid",
+ representation(
+ tickmarks="list" # A list of integer vectors, one along each
+ # dimension of the reference array,
+ # representing the tickmarks along that
+ # dimension. Each integer vector must be sorted
+ # in ascending order.
+ )
+)
+
+setClass("ArrayRegularGrid",
+ contains="ArrayGrid",
+ representation(
+ refdim="integer", # Dimensions of the reference array.
+ spacings="integer" # Grid spacing along each dimension.
+ )
+)
+
+### Low-level helpers
+
+.get_ArrayArbitraryGrid_spacings_along <- function(x, along)
+ S4Vectors:::diffWithInitialZero(x at tickmarks[[along]])
+
+.get_ArrayArbitraryGrid_max_spacings <- function(x)
+{
+ vapply(seq_along(x at tickmarks),
+ function(along)
+ max(0L, .get_ArrayArbitraryGrid_spacings_along(x, along)),
+ integer(1))
+}
+
+.get_ArrayRegularGrid_dim <- function(refdim, spacings)
+{
+ ans <- refdim %/% spacings + (refdim %% spacings != 0L)
+ ans[is.na(ans)] <- 1L
+ ans
+}
+
+.get_ArrayRegularGrid_spacings_along <- function(x, along)
+{
+ D <- x at refdim[[along]]
+ if (D == 0L)
+ return(0L)
+ spacing <- x at spacings[[along]]
+ ans <- rep.int(spacing, D %/% spacing)
+ r <- D %% spacing
+ if (r != 0L)
+ ans <- c(ans, r)
+ ans
+}
+
+### Validity
+
+.valid_tickmarks <- function(tm)
+{
+ is.integer(tm) && !S4Vectors:::anyMissingOrOutside(tm, 0L) && isSorted(tm)
+}
+.validate_ArrayArbitraryGrid <- function(x)
+{
+ x_tickmarks <- x at tickmarks
+ if (!is.list(x_tickmarks))
+ return(wmsg2("'tickmarks' slot must be a list"))
+ ok <- vapply(x_tickmarks, .valid_tickmarks, logical(1), USE.NAMES=FALSE)
+ if (!all(ok))
+ return(wmsg2("each list element in 'tickmarks' slot must be a ",
+ "sorted integer vector of non-negative values"))
+ max_spacings <- .get_ArrayArbitraryGrid_max_spacings(x)
+ if (prod(max_spacings) > .Machine$integer.max)
+ return(wmsg2("grid is too coarse (all grid elements must have a ",
+ "length <= .Machine$integer.max)"))
+ TRUE
+}
+setValidity2("ArrayArbitraryGrid", .validate_ArrayArbitraryGrid)
+
+.validate_ArrayRegularGrid <- function(x)
+{
+ msg <- validate_dim_slot(x, "refdim")
+ if (!isTRUE(msg))
+ return(msg)
+ msg <- validate_dim_slot(x, "spacings")
+ if (!isTRUE(msg))
+ return(msg)
+ x_spacings <- x at spacings
+ x_refdim <- x at refdim
+ if (length(x_spacings) != length(x_refdim))
+ return(wmsg2("'spacings' and 'refdim' slots must have ",
+ "the same length"))
+ if (!all(x_spacings <= x_refdim))
+ return(wmsg2("values in 'spacings' slot must be <= the ",
+ "corresponding values in 'refdim' slot"))
+ if (any(x_spacings == 0L & x_refdim != 0L))
+ return(wmsg2("values in 'spacings' slot cannot be 0 unless the ",
+ "corresponding values in 'refdim' slot are 0"))
+ if (prod(x_spacings) > .Machine$integer.max)
+ return(wmsg2("grid is too coarse (all grid elements must have a ",
+ "length <= .Machine$integer.max)"))
+ TRUE
+}
+setValidity2("ArrayRegularGrid", .validate_ArrayRegularGrid)
+
+### Getters
+
+setMethod("refdim", "ArrayArbitraryGrid",
+ function(x)
+ {
+ mapply(function(tm, tm_len) if (tm_len == 0L) 0L else tm[[tm_len]],
+ x at tickmarks,
+ lengths(x at tickmarks),
+ USE.NAMES=FALSE)
+ }
+)
+
+setMethod("refdim", "ArrayRegularGrid", function(x) x at refdim)
+
+setMethod("dim", "ArrayArbitraryGrid", function(x) lengths(x at tickmarks))
+
+setMethod("dim", "ArrayRegularGrid",
+ function(x) .get_ArrayRegularGrid_dim(refdim(x), x at spacings)
+)
+
+### Constructors
+
+ArrayArbitraryGrid <- function(tickmarks)
+ new("ArrayArbitraryGrid", tickmarks=tickmarks)
+
+### Note that none of the dimensions of an ArrayRegularGrid object can be 0,
+### even when some dimensions of the reference array are 0 (in which case,
+### the corresponding dimensions of the grid object are set to 1). As a
+### consequence, an ArrayRegularGrid object always contains at least 1 grid
+### element. Each dimension of the first grid element is always equal to the
+### spacing along that dimension i.e. for any ArrayRegularGrid object,
+### 'dim(grid[[1]])' is identical to 'spacings'.
+### If 'spacings' is omitted, return a grid with a single grid element
+### covering the whole reference array.
+ArrayRegularGrid <- function(refdim, spacings=refdim)
+ new("ArrayRegularGrid", refdim=refdim, spacings=spacings)
+
+### [[
+
+setMethod("getArrayElement", "ArrayArbitraryGrid",
+ function(x, subscripts)
+ {
+ x_refdim <- refdim(x)
+ ans_end <- mapply(`[[`, x at tickmarks, subscripts)
+ ans_width <- mapply(
+ function(along, i)
+ .get_ArrayArbitraryGrid_spacings_along(x, along)[[i]],
+ seq_along(x_refdim),
+ subscripts)
+ ans_ranges <- IRanges(end=ans_end, width=ans_width)
+ ArrayViewport(x_refdim, ans_ranges)
+ }
+)
+
+setMethod("getArrayElement", "ArrayRegularGrid",
+ function(x, subscripts)
+ {
+ x_refdim <- refdim(x)
+ ans_offset <- (subscripts - 1L) * x at spacings
+ ans_end <- pmin(ans_offset + x at spacings, refdim(x))
+ ans_ranges <- IRanges(start=ans_offset + 1L, end=ans_end)
+ ArrayViewport(x_refdim, ans_ranges)
+ }
+)
+
+### lengths()
+
+### NOT exported.
+setGeneric("get_spacings_along", signature="x",
+ function(x, along) standardGeneric("get_spacings_along")
+)
+setMethod("get_spacings_along", "ArrayArbitraryGrid",
+ .get_ArrayArbitraryGrid_spacings_along
+)
+setMethod("get_spacings_along", "ArrayRegularGrid",
+ .get_ArrayRegularGrid_spacings_along
+)
+
+### Equivalent to 'vapply(x, length, integer(1))' but faster.
+### The sum of the hyper-volumes of all the grid elements should be equal
+### to the hyper-volume of the reference array.
+### More concisely: sum(lengths(x)) should be equal to 'prod(refdim(x))'.
+setMethod("lengths", "ArrayGrid",
+ function (x, use.names=TRUE)
+ {
+ ans <- get_spacings_along(x, 1L)
+ x_ndim <- length(refdim(x))
+ if (x_ndim >= 2L) {
+ for (along in 2:x_ndim)
+ ans <- ans * rep(get_spacings_along(x, along), each=length(ans))
+ }
+ ans
+ }
+)
+
+### Show
+
+### S3/S4 combo for as.character.ArrayGrid
+.as.character.ArrayGrid <- function(x, with.brackets=FALSE)
+{
+ data <- vapply(x,
+ function(viewport)
+ make_string_from_ArrayViewport(viewport,
+ with.brackets=with.brackets),
+ character(1)
+ )
+ array(data, dim(x))
+}
+as.character.ArrayGrid <- function(x, ...) .as.character.ArrayGrid(x, ...)
+setMethod("as.character", "ArrayGrid", .as.character.ArrayGrid)
+
+setMethod("show", "ArrayGrid",
+ function(object)
+ {
+ dim_in1string <- paste0(dim(object), collapse=" x ")
+ refdim_in1string <- paste0(refdim(object), collapse=" x ")
+ cat(dim_in1string, " ", class(object), " object ",
+ "on a ", refdim_in1string, " array:\n", sep="")
+ ## Turn 'object' into a character array.
+ print(as.character(object, TRUE), quote=FALSE)
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### get_max_spacings_for_hypercube_blocks()
+###
+
+### Typically used to create a grid that divides the reference array 'x' into
+### blocks that have a shape that is as close as possible to an hypercube
+### (while having their length <= 'max_block_len'):
+###
+### max_block_len <- get_max_block_length(type(x))
+### spacings <- get_max_spacings_for_hypercube_blocks(dim(x), max_block_len)
+### grid <- ArrayRegularGrid(dim(x), spacings)
+###
+### NOT exported but used in HDF5Array!
+get_max_spacings_for_hypercube_blocks <- function(refdim, max_block_len)
+{
+ if (!isSingleNumber(max_block_len))
+ stop("'max_block_len' must be a single number")
+ p <- prod(refdim)
+ if (p <= max_block_len)
+ return(refdim)
+
+ spacings <- refdim
+ L <- max(spacings)
+ while (TRUE) {
+ is_max <- spacings == L
+ not_max_spacings <- spacings[!is_max]
+ L <- (max_block_len / prod(not_max_spacings)) ^ (1 / sum(is_max))
+ if (length(not_max_spacings) == 0L)
+ break
+ L2 <- max(not_max_spacings)
+ if (L >= L2)
+ break
+ L <- L2
+ spacings[is_max] <- L
+ }
+ spacings[is_max] <- as.integer(L)
+ q <- .get_ArrayRegularGrid_dim(refdim, spacings + 1L) /
+ .get_ArrayRegularGrid_dim(refdim, spacings)
+ for (along in which(is_max)[order(q[is_max])]) {
+ spacings[[along]] <- spacings[[along]] + 1L
+ p <- prod(spacings)
+ if (p == max_block_len)
+ break
+ if (p > max_block_len) {
+ spacings[[along]] <- spacings[[along]] - 1L
+ break
+ }
+ }
+ spacings
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Grids with "linear grid elements"
+###
+### The grid elements are said to be "linear" if they divide the reference
+### array in "linear blocks", i.e. in blocks that would be made of array
+### elements contiguous in memory if the reference array was an ordinary R
+### array (where the fastest changing dimension is the first one).
+### Note that if the 1st grid element is linear, then they all are.
+###
+
+setGeneric("isLinear", function(x) standardGeneric("isLinear"))
+
+setMethod("isLinear", "ArrayViewport",
+ function(x)
+ {
+ x_width <- width(x)
+ idx <- which(x_width != refdim(x))
+ if (length(idx) == 0L)
+ return(TRUE)
+ all(tail(x_width, n=-idx[[1L]]) == 1L)
+ }
+)
+
+setMethod("isLinear", "ArrayGrid",
+ function(x)
+ {
+ if (length(x) == 0L)
+ return(TRUE)
+ isLinear(x[[1L]])
+ }
+)
+
+### Typically used to create a regular grid with "linear grid elements" i.e.
+### a grid that divides the reference array 'x' into "linear blocks":
+###
+### max_block_len <- get_max_block_length(type(x))
+### spacings <- get_max_spacings_for_linear_blocks(dim(x), max_block_len)
+### grid <- ArrayRegularGrid(dim(x), spacings)
+###
+### All the grid elements are guaranteed to have a length <= 'max_block_len'.
+### NOT exported but used in HDF5Array!
+get_max_spacings_for_linear_blocks <- function(refdim, max_block_len)
+{
+ if (!isSingleNumber(max_block_len))
+ stop("'max_block_len' must be a single number")
+ if (!is.integer(max_block_len))
+ max_block_len <- as.integer(max_block_len)
+ p <- cumprod(refdim)
+ w <- which(p <= max_block_len)
+ N <- if (length(w) == 0L) 1L else w[[length(w)]] + 1L
+ if (N > length(refdim))
+ return(refdim)
+ if (N == 1L) {
+ by <- max_block_len
+ } else {
+ by <- max_block_len %/% as.integer(p[[N - 1L]])
+ }
+ c(head(refdim, n=N-1L), by, rep.int(1L, length(refdim)-N))
+}
+
+### NOT exported but used in unit tests.
+split_array_in_linear_blocks <- function(x, max_block_len)
+{
+ spacings <- get_max_spacings_for_linear_blocks(dim(x), max_block_len)
+ grid <- ArrayRegularGrid(dim(x), spacings)
+ lapply(grid, function(viewport) extract_block(x, viewport))
+}
+
+### NOT exported but used in unit tests.
+### Rebuild the original array from the blocks obtained by
+### split_array_in_linear_blocks() as an *ordinary* array.
+### So if 'x' is an ordinary array, then:
+###
+### blocks <- split_array_in_linear_blocks(x, max_block_len)
+### unsplit_array_from_linear_blocks(blocks, x)
+###
+### should be a no-op for any 'max_block_len' < 'length(x)'.
+unsplit_array_from_linear_blocks <- function(blocks, x)
+{
+ ans <- combine_array_objects(blocks)
+ dim(ans) <- dim(x)
+ ans
+}
+
diff --git a/R/ConformableSeedCombiner-class.R b/R/ConformableSeedCombiner-class.R
new file mode 100644
index 0000000..f6cc378
--- /dev/null
+++ b/R/ConformableSeedCombiner-class.R
@@ -0,0 +1,93 @@
+### =========================================================================
+### ConformableSeedCombiner objects
+### -------------------------------------------------------------------------
+###
+### This class is for internal use only and is not exported.
+###
+
+setClass("ConformableSeedCombiner",
+ representation(
+ seeds="list", # List of n conformable array-like objects
+ # to combine. Each object is expected to
+ # satisfy the "seed contract" i.e. to
+ # support dim(), dimnames(), and
+ # subset_seed_as_array().
+
+ COMBINING_OP="function", # n-ary operator to combine the seeds.
+
+ Rargs="list" # Additional arguments to the n-ary
+ # operator.
+ ),
+ prototype(
+ seeds=list(new("array")),
+ COMBINING_OP=identity
+ )
+)
+
+.objects_are_conformable_arrays <- function(objects)
+{
+ dims <- lapply(objects, dim)
+ ndims <- lengths(dims)
+ first_ndim <- ndims[[1L]]
+ if (!all(ndims == first_ndim))
+ return(FALSE)
+ tmp <- unlist(dims, use.names=FALSE)
+ if (is.null(tmp))
+ return(FALSE)
+ dims <- matrix(tmp, nrow=first_ndim)
+ first_dim <- dims[ , 1L]
+ all(dims == first_dim)
+}
+
+.validate_ConformableSeedCombiner <- function(x)
+{
+ ## 'seeds' slot.
+ if (length(x at seeds) == 0L)
+ return(wmsg2("'x at seeds' cannot be empty"))
+ if (!.objects_are_conformable_arrays(x at seeds))
+ return(wmsg2("'x at seeds' must be a list of conformable ",
+ "array-like objects"))
+ TRUE
+}
+
+setValidity2("ConformableSeedCombiner", .validate_ConformableSeedCombiner)
+
+new_ConformableSeedCombiner <- function(seed=new("array"), ...,
+ COMBINING_OP=identity,
+ Rargs=list())
+{
+ seeds <- unname(list(seed, ...))
+ COMBINING_OP <- match.fun(COMBINING_OP)
+ new2("ConformableSeedCombiner", seeds=seeds,
+ COMBINING_OP=COMBINING_OP,
+ Rargs=Rargs)
+}
+
+### Implement the "seed contract" i.e. dim(), dimnames(), and
+### subset_seed_as_array().
+
+.get_ConformableSeedCombiner_dim <- function(x) dim(x at seeds[[1L]])
+
+setMethod("dim", "ConformableSeedCombiner",
+ .get_ConformableSeedCombiner_dim
+)
+
+.get_ConformableSeedCombiner_dimnames <- function(x)
+{
+ combine_dimnames(x at seeds)
+}
+
+setMethod("dimnames", "ConformableSeedCombiner",
+ .get_ConformableSeedCombiner_dimnames
+)
+
+.subset_ConformableSeedCombiner_as_array <- function(seed, index)
+{
+ arrays <- lapply(seed at seeds, subset_seed_as_array, index)
+ do.call(seed at COMBINING_OP, c(arrays, seed at Rargs))
+}
+
+setMethod("subset_seed_as_array", "ConformableSeedCombiner",
+ .subset_ConformableSeedCombiner_as_array
+)
+
diff --git a/R/DelayedArray-class.R b/R/DelayedArray-class.R
index 1e44f99..8d7185a 100644
--- a/R/DelayedArray-class.R
+++ b/R/DelayedArray-class.R
@@ -4,6 +4,7 @@
setClass("DelayedArray",
+ contains="Array",
representation(
seed="ANY", # An array-like object expected to satisfy
# the "seed contract" i.e. to support dim(),
@@ -60,38 +61,34 @@ setMethod("matrixClass", "DelayedArray", function(x) "DelayedMatrix")
### Validity
###
-.wmsg2 <- function(...)
- paste0("\n ",
- paste0(strwrap(paste0(c(...), collapse="")), collapse="\n "))
-
.validate_DelayedArray <- function(x)
{
x_dim <- dim(x at seed)
x_ndim <- length(x_dim)
## 'seed' slot.
if (x_ndim == 0L)
- return(.wmsg2("'x at seed' must have dimensions"))
+ return(wmsg2("'x at seed' must have dimensions"))
## 'index' slot.
if (length(x at index) != x_ndim)
- return(.wmsg2("'x at index' must have one list element per dimension ",
- "in 'x at seed'"))
+ return(wmsg2("'x at index' must have one list element per dimension ",
+ "in 'x at seed'"))
if (!all(S4Vectors:::sapply_isNULL(x at index) |
vapply(x at index, is.integer, logical(1), USE.NAMES=FALSE)))
- return(.wmsg2("every list element in 'x at index' must be either NULL ",
- "or an integer vector"))
+ return(wmsg2("every list element in 'x at index' must be either NULL ",
+ "or an integer vector"))
## 'metaindex' slot.
if (length(x at metaindex) == 0L)
- return(.wmsg2("'x at metaindex' cannot be empty"))
+ return(wmsg2("'x at metaindex' cannot be empty"))
if (S4Vectors:::anyMissingOrOutside(x at metaindex, 1L, x_ndim))
- return(.wmsg2("all values in 'x at metaindex' must be >= 1 ",
- "and <= 'length(x at index)'"))
+ return(wmsg2("all values in 'x at metaindex' must be >= 1 ",
+ "and <= 'length(x at index)'"))
if (!isStrictlySorted(x at metaindex))
- return(.wmsg2("'x at metaindex' must be strictly sorted"))
+ return(wmsg2("'x at metaindex' must be strictly sorted"))
if (!all(get_Nindex_lengths(x at index, x_dim)[-x at metaindex] == 1L))
- return(.wmsg2("all the dropped dimensions in 'x' must be equal to 1"))
+ return(wmsg2("all the dropped dimensions in 'x' must be equal to 1"))
## 'is_transposed' slot.
if (!isTRUEorFALSE(x at is_transposed))
- return(.wmsg2("'x at is_transposed' must be TRUE or FALSE"))
+ return(wmsg2("'x at is_transposed' must be TRUE or FALSE"))
TRUE
}
@@ -102,7 +99,7 @@ setValidity2("DelayedArray", .validate_DelayedArray)
.validate_DelayedMatrix <- function(x)
{
if (length(dim(x)) != 2L)
- return(wmsg("'x' must have exactly 2 dimensions"))
+ return(wmsg2("'x' must have exactly 2 dimensions"))
TRUE
}
@@ -201,11 +198,6 @@ downgrade_to_DelayedArray_or_DelayedMatrix <- function(x)
setMethod("dim", "DelayedArray", .get_DelayedArray_dim)
-### Even though prod() always returns a double, it seems that the length()
-### primitive function automatically turns this double into an integer if
-### it's <= .Machine$integer.max
-setMethod("length", "DelayedArray", function(x) prod(dim(x)))
-
setMethod("isEmpty", "DelayedArray", function(x) any(dim(x) == 0L))
### dim() setter.
@@ -402,8 +394,8 @@ setMethod("names", "DelayedArray", .get_DelayedArray_names)
{
if (length(dim(x)) != 1L) {
if (!is.null(value))
- stop("setting the names of a ", class(x), " object with more ",
- "than 1 dimension is not supported")
+ stop(wmsg("setting the names of a ", class(x), " object ",
+ "with more than 1 dimension is not supported"))
return(x)
}
dimnames(x)[[1L]] <- value
@@ -414,258 +406,18 @@ setReplaceMethod("names", "DelayedArray", .set_DelayedArray_names)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### [
-###
-
-### Return an object of the same class as 'x' (endomorphism).
-### 'user_Nindex' must be a "multidimensional subsetting Nindex" i.e. a
-### list with one subscript per dimension in 'x'. Missing subscripts are
-### represented by NULLs.
-.subset_DelayedArray_by_Nindex <- function(x, user_Nindex)
-{
- stopifnot(is.list(user_Nindex))
- x_index <- x at index
- x_ndim <- length(x at metaindex)
- x_seed_dim <- dim(seed(x))
- x_seed_dimnames <- dimnames(seed(x))
- x_delayed_ops <- x at delayed_ops
- for (n in seq_along(user_Nindex)) {
- subscript <- user_Nindex[[n]]
- if (is.null(subscript))
- next
- n0 <- if (x at is_transposed) x_ndim - n + 1L else n
- N <- x at metaindex[[n0]]
- i <- x_index[[N]]
- if (is.null(i)) {
- i <- seq_len(x_seed_dim[[N]]) # expand 'i'
- names(i) <- get_Nindex_names_along(x_index, x_seed_dimnames, N)
- }
- subscript <- normalizeSingleBracketSubscript(subscript, i,
- as.NSBS=TRUE)
- x_index[[N]] <- extractROWS(i, subscript)
- if (n0 == 1L)
- x_delayed_ops <- .subset_delayed_ops_args(x_delayed_ops, subscript,
- FALSE)
- if (n0 == x_ndim)
- x_delayed_ops <- .subset_delayed_ops_args(x_delayed_ops, subscript,
- TRUE)
- }
- if (!identical(x at index, x_index)) {
- x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
- x at index <- x_index
- if (!identical(x at delayed_ops, x_delayed_ops))
- x at delayed_ops <- x_delayed_ops
- }
- x
-}
-
-### Return an atomic vector.
-.subset_DelayedArray_by_1Dindex <- function(x, i)
-{
- if (!is.numeric(i))
- stop(wmsg("1D-style subsetting of a DelayedArray object only ",
- "accepts a numeric subscript at the moment"))
- if (length(i) == 0L) {
- user_Nindex <- as.list(integer(length(dim(x))))
- ## x0 <- x[0, ..., 0]
- x0 <- .subset_DelayedArray_by_Nindex(x, user_Nindex)
- return(as.vector(x0))
- }
- if (anyNA(i))
- stop(wmsg("1D-style subsetting of a DelayedArray object does ",
- "not support NA indices yet"))
- if (min(i) < 1L)
- stop(wmsg("1D-style subsetting of a DelayedArray object only ",
- "supports positive indices at the moment"))
- if (max(i) > length(x))
- stop(wmsg("subscript contains out-of-bounds indices"))
- if (length(i) == 1L)
- return(.get_DelayedArray_element(x, i))
-
- ## We want to walk only on the blocks that we actually need to visit so we
- ## don't use block_APPLY() or family because they walk on all the blocks.
-
- block_len <- get_block_length(type(x))
- blocks <- ArrayBlocks(dim(x), block_len)
- nblock <- length(blocks)
-
- breakpoints <- cumsum(get_block_lengths(blocks))
- part_idx <- get_part_index(i, breakpoints)
- split_part_idx <- split_part_index(part_idx, length(breakpoints))
- block_idx <- which(lengths(split_part_idx) != 0L) # blocks to visit
- res <- lapply(block_idx, function(b) {
- if (get_verbose_block_processing())
- message("Visiting block ", b, "/", nblock, " ... ",
- appendLF=FALSE)
- Nindex <- get_array_block_Nindex(blocks, b)
- subarray <- subset_by_Nindex(x, Nindex)
- if (!is.array(subarray))
- subarray <- as.array(subarray)
- block_ans <- subarray[split_part_idx[[b]]]
- if (get_verbose_block_processing())
- message("OK")
- block_ans
- })
- unlist(res, use.names=FALSE)[get_rev_index(part_idx)]
-}
-
-.subset_DelayedArray <- function(x, i, j, ..., drop=TRUE)
-{
- if (missing(x))
- stop("'x' is missing")
-
- ## Check the dimensionality of the user call i.e whether the function was
- ## called with 1D-style, or 2D-style, or 3D-style etc... subsetting.
- ndim <- nargs() - 1L
- x_dim <- dim(x)
- x_ndim <- length(x_dim)
- if (!missing(drop))
- ndim <- ndim - 1L
- if (ndim == 1L && missing(i))
- ndim <- 0L
- if (ndim != 0L && ndim != x_ndim) {
- if (ndim != 1L)
- stop("incorrect number of dimensions")
- return(.subset_DelayedArray_by_1Dindex(x, i))
- }
-
- ## Prepare the "multidimensional subsetting index".
- user_Nindex <- vector(mode="list", length=x_ndim)
- if (!missing(i))
- user_Nindex[[1L]] <- if (is.null(i)) integer(0) else i
- if (!missing(j))
- user_Nindex[[2L]] <- if (is.null(j)) integer(0) else j
- dots <- match.call(expand.dots=FALSE)$... # list of non-evaluated args
- eframe <- parent.frame()
- for (n2 in seq_along(dots)) {
- k <- dots[[n2]]
- if (!missing(k)) {
- k <- eval(k, envir=eframe, enclos=eframe)
- user_Nindex[[2L + n2]] <- if (is.null(k)) integer(0) else k
- }
- }
-
- ## Perform the subsetting.
- .subset_DelayedArray_by_Nindex(x, user_Nindex)
-}
-
-setMethod("[", "DelayedArray", .subset_DelayedArray)
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### subset_seed_as_array()
-###
-
-### Return the slice as a list.
-.extract_data_frame_slice <- function(x, index)
-{
- slice <- subset_by_Nindex(x, index)
- ## Turn into a list and replace factors with character vectors.
- lapply(slice, as.vector)
-}
-.extract_DataFrame_slice <- function(x, index)
-{
- slice <- subset_by_Nindex(x, index)
- slice <- as.data.frame(slice)
- ## Turn into a list and replace factors with character vectors.
- lapply(slice, as.vector)
-}
-
-### Return a list with one list element per column in data frame 'x'.
-### All the list elements have length 0.
-.extract_data_frame_slice0 <- function(x)
-{
- slice0 <- x[0L, , drop=FALSE]
- ## Turn into a list and replace factors with character vectors.
- lapply(slice0, as.vector)
-}
-.extract_DataFrame_slice0 <- function(x)
-{
- slice0 <- x[0L, , drop=FALSE]
- slice0 <- as.data.frame(slice0)
- if (ncol(slice0) != ncol(x))
- stop(wmsg("DataFrame object 'x' can be used as the seed of ",
- "a DelayedArray object only if as.data.frame(x) ",
- "preserves the number of columns"))
- ## Turn into a list and replace factors with character vectors.
- lapply(slice0, as.vector)
-}
-
-### Equivalent to 'typeof(as.matrix(x))' but with an almost-zero
-### memory footprint (it avoids the cost of turning 'x' into a matrix).
-.get_data_frame_type <- function(x)
-{
- if (ncol(x) == 0L)
- return("logical")
- slice0 <- .extract_data_frame_slice0(x)
- typeof(unlist(slice0, use.names=FALSE))
-}
-
-### Equivalent to 'typeof(as.matrix(as.data.frame(x)))' but with an
-### almost-zero memory footprint (it avoids the cost of turning 'x' first
-### into a data frame then into a matrix).
-.get_DataFrame_type <- function(x)
-{
- if (ncol(x) == 0L)
- return("logical")
- slice0 <- .extract_DataFrame_slice0(x)
- typeof(unlist(slice0, use.names=FALSE))
-}
-
-### 'index' is expected to be an unnamed list of subscripts as positive integer
-### vectors, one vector per seed dimension. *Missing* list elements are allowed
-### and represented by NULLs.
-### The "subset_seed_as_array" methods don't need to support anything else.
-### They must return an ordinary array. No need to propagate the dimnames.
-setGeneric("subset_seed_as_array", signature="seed",
- function(seed, index) standardGeneric("subset_seed_as_array")
-)
-
-setMethod("subset_seed_as_array", "ANY",
- function(seed, index)
- {
- slice <- subset_by_Nindex(seed, index)
- as.array(slice)
- }
-)
-
-setMethod("subset_seed_as_array", "array",
- function(seed, index)
- subset_by_Nindex(seed, index)
-)
-
-### Equivalent to
-###
-### subset_by_Nindex(as.matrix(x), index)
+### Transpose
###
-### but avoids the cost of turning the full data frame 'x' into a matrix so
-### memory footprint stays small when 'index' is small.
-setMethod("subset_seed_as_array", "data.frame",
- function(seed, index)
- {
- #ans_type <- .get_data_frame_type(seed)
- slice0 <- .extract_data_frame_slice0(seed)
- slice <- .extract_data_frame_slice(seed, index)
- data <- unlist(c(slice0, slice), use.names=FALSE)
- array(data, dim=get_Nindex_lengths(index, dim(seed)))
- }
-)
-### Equivalent to
-###
-### subset_by_Nindex(as.matrix(as.data.frame(x)), index)
-###
-### but avoids the cost of turning the full DataFrame 'x' first into a data
-### frame then into a matrix so memory footprint stays small when 'index' is
-### small.
-setMethod("subset_seed_as_array", "DataFrame",
- function(seed, index)
+### The actual transposition of the data is delayed i.e. it will be realized
+### on the fly only when as.array() (or as.vector() or as.matrix()) is called
+### on 'x'.
+setMethod("t", "DelayedArray",
+ function(x)
{
- #ans_type <- .get_DataFrame_type(seed)
- slice0 <- .extract_DataFrame_slice0(seed)
- slice <- .extract_DataFrame_slice(seed, index)
- data <- unlist(c(slice0, slice), use.names=FALSE)
- array(data, dim=get_Nindex_lengths(index, dim(seed)))
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ x at is_transposed <- !x at is_transposed
+ x
}
)
@@ -800,27 +552,225 @@ register_delayed_op <- function(x, FUN, Largs=list(), Rargs=list(),
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Transpose
+### Multi-dimensional single bracket subsetting
+###
+### x[i_1, i_2, ..., i_n]
+###
+### Return an object of the same class as 'x' (endomorphism).
+### 'user_Nindex' must be a "multidimensional subsetting Nindex" i.e. a
+### list with one subscript per dimension in 'x'. Missing subscripts must
+### be represented by NULLs.
###
-### The actual transposition of the data is delayed i.e. it will be realized
-### on the fly only when as.array() (or as.vector() or as.matrix()) is called
-### on 'x'.
-setMethod("t", "DelayedArray",
+.subset_DelayedArray_by_Nindex <- function(x, user_Nindex)
+{
+ stopifnot(is.list(user_Nindex))
+ x_index <- x at index
+ x_ndim <- length(x at metaindex)
+ x_seed_dim <- dim(seed(x))
+ x_seed_dimnames <- dimnames(seed(x))
+ x_delayed_ops <- x at delayed_ops
+ for (n in seq_along(user_Nindex)) {
+ subscript <- user_Nindex[[n]]
+ if (is.null(subscript))
+ next
+ n0 <- if (x at is_transposed) x_ndim - n + 1L else n
+ N <- x at metaindex[[n0]]
+ i <- x_index[[N]]
+ if (is.null(i)) {
+ i <- seq_len(x_seed_dim[[N]]) # expand 'i'
+ names(i) <- get_Nindex_names_along(x_index, x_seed_dimnames, N)
+ }
+ subscript <- normalizeSingleBracketSubscript(subscript, i,
+ as.NSBS=TRUE)
+ x_index[[N]] <- extractROWS(i, subscript)
+ if (n0 == 1L)
+ x_delayed_ops <- .subset_delayed_ops_args(x_delayed_ops, subscript,
+ FALSE)
+ if (n0 == x_ndim)
+ x_delayed_ops <- .subset_delayed_ops_args(x_delayed_ops, subscript,
+ TRUE)
+ }
+ if (!identical(x at index, x_index)) {
+ x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
+ x at index <- x_index
+ if (!identical(x at delayed_ops, x_delayed_ops))
+ x at delayed_ops <- x_delayed_ops
+ }
+ x
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### type()
+###
+### For internal use only.
+###
+
+setGeneric("type", function(x) standardGeneric("type"))
+
+setMethod("type", "array", function(x) typeof(x))
+
+### If 'x' is a DelayedArray object, 'type(x)' must always return the same
+### as 'typeof(as.array(x))'.
+setMethod("type", "DelayedArray",
function(x)
{
- x <- downgrade_to_DelayedArray_or_DelayedMatrix(x)
- x at is_transposed <- !x at is_transposed
- x
+ user_Nindex <- as.list(integer(length(dim(x))))
+ ## x0 <- x[0, ..., 0]
+ x0 <- .subset_DelayedArray_by_Nindex(x, user_Nindex)
+ typeof(as.array(x0, drop=TRUE))
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### as.array()
+### Linear single bracket subsetting
+###
+### x[i]
+###
+### Return an atomic vector.
+###
+
+.subset_DelayedArray_by_1Dindex <- function(x, i)
+{
+ if (!is.numeric(i))
+ stop(wmsg("1D-style subsetting of a DelayedArray object only ",
+ "accepts a numeric subscript at the moment"))
+ if (length(i) == 0L) {
+ user_Nindex <- as.list(integer(length(dim(x))))
+ ## x0 <- x[0, ..., 0]
+ x0 <- .subset_DelayedArray_by_Nindex(x, user_Nindex)
+ return(as.vector(x0))
+ }
+ if (anyNA(i))
+ stop(wmsg("1D-style subsetting of a DelayedArray object does ",
+ "not support NA indices yet"))
+ if (min(i) < 1L)
+ stop(wmsg("1D-style subsetting of a DelayedArray object only ",
+ "supports positive indices at the moment"))
+ if (max(i) > length(x))
+ stop(wmsg("subscript contains out-of-bounds indices"))
+ if (length(i) == 1L)
+ return(.get_DelayedArray_element(x, i))
+
+ ## We want to walk only on the blocks that we actually need to visit so we
+ ## don't use block_APPLY() or family because they walk on all the blocks.
+
+ max_block_len <- get_max_block_length(type(x))
+ spacings <- get_max_spacings_for_linear_blocks(dim(x), max_block_len)
+ grid <- ArrayRegularGrid(dim(x), spacings)
+ nblock <- length(grid)
+
+ breakpoints <- cumsum(lengths(grid))
+ part_idx <- get_part_index(i, breakpoints)
+ split_part_idx <- split_part_index(part_idx, length(breakpoints))
+ block_idx <- which(lengths(split_part_idx) != 0L) # blocks to visit
+ res <- lapply(block_idx, function(b) {
+ if (get_verbose_block_processing())
+ message("Visiting block ", b, "/", nblock, " ... ",
+ appendLF=FALSE)
+ block <- extract_block(x, grid[[b]])
+ if (!is.array(block))
+ block <- as.array(block)
+ block_ans <- block[split_part_idx[[b]]]
+ if (get_verbose_block_processing())
+ message("OK")
+ block_ans
+ })
+ unlist(res, use.names=FALSE)[get_rev_index(part_idx)]
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### [
###
-### TODO: Not sure we need this. Using drop() should do it.
+.subset_DelayedArray <- function(x, i, j, ..., drop=TRUE)
+{
+ if (missing(x))
+ stop("'x' is missing")
+ user_Nindex <- extract_Nindex_from_syscall(sys.call(), parent.frame())
+ nsubscript <- length(user_Nindex)
+ if (nsubscript != 0L && nsubscript != length(dim(x))) {
+ if (nsubscript != 1L)
+ stop("incorrect number of subscripts")
+ return(.subset_DelayedArray_by_1Dindex(x, user_Nindex[[1L]]))
+ }
+ .subset_DelayedArray_by_Nindex(x, user_Nindex)
+}
+setMethod("[", "DelayedArray", .subset_DelayedArray)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### [<-
+###
+
+.filling_error_msg <- c(
+ "filling a DelayedArray object 'x' with a value (i.e. 'x[] <- value') ",
+ "is supported only when 'value' is an atomic vector and 'length(value)' ",
+ "is a divisor of 'nrow(x)'"
+)
+
+.subassign_error_msg <- c(
+ "subassignment to a DelayedArray object 'x' (i.e. 'x[i] <- value') is ",
+ "supported only when the subscript 'i' is a logical DelayedArray object ",
+ "with the same dimensions as 'x' and when 'value' is a scalar (i.e. an ",
+ "atomic vector of length 1)"
+)
+
+.fill_DelayedArray_with_value <- function(x, value)
+{
+ if (!(is.vector(value) && is.atomic(value)))
+ stop(wmsg(.filling_error_msg))
+ value_len <- length(value)
+ if (value_len == 1L)
+ return(register_delayed_op(x, `[<-`, Rargs=list(value=value)))
+ x_len <- length(x)
+ if (value_len > x_len)
+ stop(wmsg("'value' is longer than 'x'"))
+ x_nrow <- nrow(x)
+ if (x_nrow != 0L) {
+ if (value_len == 0L || x_nrow %% value_len != 0L)
+ stop(wmsg(.filling_error_msg))
+ value <- rep(value, length.out=x_nrow)
+ }
+ register_delayed_op(x, `[<-`, Rargs=list(value=value),
+ recycle_along_last_dim=x at is_transposed)
+}
+
+.subassign_DelayedArray <- function(x, i, j, ..., value)
+{
+ if (missing(x))
+ stop("'x' is missing")
+ user_Nindex <- extract_Nindex_from_syscall(sys.call(), parent.frame())
+ nsubscript <- length(user_Nindex)
+ if (nsubscript == 0L)
+ return(.fill_DelayedArray_with_value(x, value))
+ if (nsubscript != 1L)
+ stop(wmsg(.subassign_error_msg))
+ i <- user_Nindex[[1L]]
+ if (!(is(i, "DelayedArray") &&
+ identical(dim(x), dim(i)) &&
+ type(i) == "logical"))
+ stop(wmsg(.subassign_error_msg))
+ if (!(is.vector(value) && is.atomic(value) && length(value) == 1L))
+ stop(wmsg(.subassign_error_msg))
+ DelayedArray(new_ConformableSeedCombiner(x, i,
+ COMBINING_OP=`[<-`,
+ Rargs=list(value=value)))
+}
+
+setReplaceMethod("[", "DelayedArray", .subassign_DelayedArray)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Realization in memory
+###
+### as.array(x)
+###
+
+### TODO: Do we actually need this? Using drop() should do it.
.reduce_array_dimensions <- function(x)
{
x_dim <- dim(x)
@@ -957,29 +907,6 @@ setAs("DelayedMatrix", "sparseMatrix", .from_DelayedMatrix_to_dgCMatrix)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### type()
-###
-### For internal use only.
-###
-
-setGeneric("type", function(x) standardGeneric("type"))
-
-setMethod("type", "array", function(x) typeof(x))
-
-### If 'x' is a DelayedArray object, 'type(x)' must always return the same
-### as 'typeof(as.array(x))'.
-setMethod("type", "DelayedArray",
- function(x)
- {
- user_Nindex <- as.list(integer(length(dim(x))))
- ## x0 <- x[0, ..., 0]
- x0 <- .subset_DelayedArray_by_Nindex(x, user_Nindex)
- typeof(as.array(x0, drop=TRUE))
- }
-)
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### [[
###
@@ -1016,30 +943,14 @@ setMethod("show", "DelayedArray", show_compact_array)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Combining and splitting
###
-### Combining arrays with c() is NOT an endomorphism!
-###
-
-### 'objects' must be a list of array-like objects that support as.vector().
-combine_array_objects <- function(objects)
-{
- if (!is.list(objects))
- stop("'objects' must be a list")
-
- NULL_idx <- which(S4Vectors:::sapply_isNULL(objects))
- if (length(NULL_idx) != 0L)
- objects <- objects[-NULL_idx]
- if (length(objects) == 0L)
- return(NULL)
-
- unlist(lapply(objects, as.vector), recursive=FALSE, use.names=FALSE)
-}
+### Note that combining arrays with c() is NOT an endomorphism!
setMethod("c", "DelayedArray",
function (x, ..., recursive=FALSE)
{
if (!identical(recursive, FALSE))
- stop("\"c\" method for DelayedArray objects ",
- "does not support the 'recursive' argument")
+ stop(wmsg("\"c\" method for DelayedArray objects ",
+ "does not support the 'recursive' argument"))
if (missing(x)) {
objects <- list(...)
} else {
@@ -1059,3 +970,68 @@ split.DelayedArray <- function(x, f, drop=FALSE, ...)
splitAsList(x, f, drop=drop, ...)
setMethod("split", c("DelayedArray", "ANY"), split.DelayedArray)
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Binding
+###
+### We only support binding DelayedArray objects along the rows or the cols
+### at the moment. No binding along an arbitrary dimension yet! (i.e. no
+### "abind" method yet)
+###
+
+### arbind() and acbind()
+
+.DelayedArray_arbind <- function(...)
+{
+ objects <- unname(list(...))
+ dims <- get_dims_to_bind(objects, 1L)
+ if (is.character(dims))
+ stop(wmsg(dims))
+ DelayedArray(new_SeedBinder(objects, 1L))
+}
+
+.DelayedArray_acbind <- function(...)
+{
+ objects <- unname(list(...))
+ dims <- get_dims_to_bind(objects, 2L)
+ if (is.character(dims))
+ stop(wmsg(dims))
+ DelayedArray(new_SeedBinder(objects, 2L))
+}
+
+setMethod("arbind", "DelayedArray", .DelayedArray_arbind)
+setMethod("acbind", "DelayedArray", .DelayedArray_acbind)
+
+### rbind() and cbind()
+
+setMethod("rbind", "DelayedMatrix", .DelayedArray_arbind)
+setMethod("cbind", "DelayedMatrix", .DelayedArray_acbind)
+
+.as_DelayedMatrix_objects <- function(objects)
+{
+ lapply(objects,
+ function(object) {
+ if (length(dim(object)) != 2L)
+ stop(wmsg("cbind() and rbind() are not supported on ",
+ "DelayedArray objects that don't have exactly ",
+ "2 dimensions. Please use acbind() or arnind() ",
+ "instead."))
+ as(object, "DelayedMatrix")
+ })
+}
+
+.DelayedArray_rbind <- function(...)
+{
+ objects <- .as_DelayedMatrix_objects(list(...))
+ do.call("rbind", objects)
+}
+
+.DelayedArray_cbind <- function(...)
+{
+ objects <- .as_DelayedMatrix_objects(list(...))
+ do.call("cbind", objects)
+}
+
+setMethod("rbind", "DelayedArray", .DelayedArray_rbind)
+setMethod("cbind", "DelayedArray", .DelayedArray_cbind)
+
diff --git a/R/DelayedArray-stats.R b/R/DelayedArray-stats.R
index 4bc5ed9..0a74863 100644
--- a/R/DelayedArray-stats.R
+++ b/R/DelayedArray-stats.R
@@ -4,7 +4,61 @@
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Logistic
+### The Binomial Distribution
+###
+### All these methods return a DelayedArray object of the same dimensions
+### as their first argument.
+###
+
+setMethod("dbinom", "DelayedArray",
+ function(x, size, prob, log=FALSE)
+ register_delayed_op(x, "dbinom",
+ Rargs=list(size=size, prob=prob, log=log))
+)
+
+setMethod("pbinom", "DelayedArray",
+ function(q, size, prob, lower.tail=TRUE, log.p=FALSE)
+ register_delayed_op(q, "pbinom",
+ Rargs=list(size=size, prob=prob,
+ lower.tail=lower.tail, log.p=log.p))
+)
+
+setMethod("qbinom", "DelayedArray",
+ function(p, size, prob, lower.tail=TRUE, log.p=FALSE)
+ register_delayed_op(p, "qbinom",
+ Rargs=list(size=size, prob=prob,
+ lower.tail=lower.tail, log.p=log.p))
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### The Poisson Distribution
+###
+### All these methods return a DelayedArray object of the same dimensions
+### as their first argument.
+###
+
+setMethod("dpois", "DelayedArray",
+ function(x, lambda, log=FALSE)
+ register_delayed_op(x, "dpois",
+ Rargs=list(lambda=lambda, log=log))
+)
+
+setMethod("ppois", "DelayedArray",
+ function(q, lambda, lower.tail=TRUE, log.p=FALSE)
+ register_delayed_op(q, "ppois",
+ Rargs=list(lambda=lambda, lower.tail=lower.tail, log.p=log.p))
+)
+
+setMethod("qpois", "DelayedArray",
+ function(p, lambda, lower.tail=TRUE, log.p=FALSE)
+ register_delayed_op(p, "qpois",
+ Rargs=list(lambda=lambda, lower.tail=lower.tail, log.p=log.p))
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### The Logistic Distribution
###
### All these methods return a DelayedArray object of the same dimensions
### as their first argument.
diff --git a/R/DelayedArray-utils.R b/R/DelayedArray-utils.R
index 4bf2aea..01201ad 100644
--- a/R/DelayedArray-utils.R
+++ b/R/DelayedArray-utils.R
@@ -4,106 +4,6 @@
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### ConformableSeedCombiner objects
-###
-### This class is for internal use only and is not exported.
-###
-
-setClass("ConformableSeedCombiner",
- representation(
- seeds="list", # List of n conformable array-like objects
- # to combine. Each object is expected to
- # satisfy the "seed contract" i.e. to
- # support dim(), dimnames(), and
- # subset_seed_as_array().
-
- COMBINING_OP="character", # n-ary operator to combine the seeds.
-
- Rargs="list" # Additional arguments to the n-ary
- # operator. Currently unused.
- ),
- prototype(
- seeds=list(new("array")),
- COMBINING_OP="identity"
- )
-)
-
-.objects_are_conformable_arrays <- function(objects)
-{
- dims <- lapply(objects, dim)
- ndims <- lengths(dims)
- first_ndim <- ndims[[1L]]
- if (!all(ndims == first_ndim))
- return(FALSE)
- tmp <- unlist(dims, use.names=FALSE)
- if (is.null(tmp))
- return(FALSE)
- dims <- matrix(tmp, nrow=first_ndim)
- first_dim <- dims[ , 1L]
- all(dims == first_dim)
-}
-
-.validate_ConformableSeedCombiner <- function(x)
-{
- ## 'seeds' slot.
- if (length(x at seeds) == 0L)
- return(wmsg("'x at seeds' cannot be empty"))
- if (!.objects_are_conformable_arrays(x at seeds))
- return(wmsg("'x at seeds' must be a list of conformable ",
- "array-like objects"))
- ## 'COMBINING_OP' slot.
- if (!isSingleString(x at COMBINING_OP))
- return(wmsg("'x at COMBINING_OP' must be a single string"))
- OP <- try(match.fun(x at COMBINING_OP), silent=TRUE)
- if (is(OP, "try-error"))
- return(wmsg("the name in 'x at COMBINING_OP' (\"", x at COMBINING_OP,
- "\") must refer to a known n-ary operator"))
- TRUE
-}
-
-setValidity2("ConformableSeedCombiner", .validate_ConformableSeedCombiner)
-
-.new_ConformableSeedCombiner <- function(seed=new("array"), ...,
- COMBINING_OP="identity",
- Rargs=list())
-{
- seeds <- unname(list(seed, ...))
- seeds <- lapply(seeds, remove_pristine_DelayedArray_wrapping)
- new2("ConformableSeedCombiner", seeds=seeds,
- COMBINING_OP=COMBINING_OP,
- Rargs=Rargs)
-}
-
-### Implement the "seed contract" i.e. dim(), dimnames(), and
-### subset_seed_as_array().
-
-.get_ConformableSeedCombiner_dim <- function(x) dim(x at seeds[[1L]])
-
-setMethod("dim", "ConformableSeedCombiner",
- .get_ConformableSeedCombiner_dim
-)
-
-.get_ConformableSeedCombiner_dimnames <- function(x)
-{
- IRanges:::combine_dimnames(x at seeds)
-}
-
-setMethod("dimnames", "ConformableSeedCombiner",
- .get_ConformableSeedCombiner_dimnames
-)
-
-.subset_ConformableSeedCombiner_as_array <- function(seed, index)
-{
- arrays <- lapply(seed at seeds, subset_seed_as_array, index)
- do.call(seed at COMBINING_OP, c(arrays, seed at Rargs))
-}
-
-setMethod("subset_seed_as_array", "ConformableSeedCombiner",
- .subset_ConformableSeedCombiner_as_array
-)
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### "Ops" group generics
###
### Arith members: "+", "-", "*", "/", "^", "%%", "%/%"
@@ -172,7 +72,7 @@ setMethod("subset_seed_as_array", "ConformableSeedCombiner",
{
if (!identical(dim(e1), dim(e2)))
stop("non-conformable arrays")
- DelayedArray(.new_ConformableSeedCombiner(e1, e2, COMBINING_OP=.Generic))
+ DelayedArray(new_ConformableSeedCombiner(e1, e2, COMBINING_OP=.Generic))
}
.DelayedArray_Ops <- function(.Generic, e1, e2)
@@ -263,7 +163,7 @@ setMethod("pmax2", c("ANY", "ANY"),
names(ans) <- .combine_names(e1, e2)
} else {
dim(ans) <- ans_dim
- dimnames(ans) <- IRanges:::combine_dimnames(list(e1, e2))
+ dimnames(ans) <- combine_dimnames(list(e1, e2))
}
ans
}
@@ -278,7 +178,7 @@ setMethod("pmin2", c("ANY", "ANY"),
names(ans) <- .combine_names(e1, e2)
} else {
dim(ans) <- ans_dim
- dimnames(ans) <- IRanges:::combine_dimnames(list(e1, e2))
+ dimnames(ans) <- combine_dimnames(list(e1, e2))
}
ans
}
@@ -400,7 +300,7 @@ setMethod("signif", "DelayedArray",
.DelayedArray_block_anyNA <- function(x, recursive=FALSE)
{
APPLY <- anyNA
- COMBINE <- function(b, subarray, init, reduced) { init || reduced }
+ COMBINE <- function(b, block, init, reduced) { init || reduced }
init <- FALSE
BREAKIF <- identity
@@ -423,13 +323,13 @@ setMethod("anyNA", "DelayedArray", .DelayedArray_block_anyNA)
if (!isTRUEorFALSE(useNames))
stop("'useNames' must be TRUE or FALSE")
APPLY <- base::which
- COMBINE <- function(b, subarray, init, reduced) {
+ COMBINE <- function(b, block, init, reduced) {
if (length(reduced) != 0L) {
reduced <- reduced + init[["offset"]]
part_number <- sprintf("%010d", b)
init[[part_number]] <- reduced
}
- init[["offset"]] <- init[["offset"]] + length(subarray)
+ init[["offset"]] <- init[["offset"]] + length(block)
init
}
offset <- 0L
@@ -487,16 +387,16 @@ setMethod("which", "DelayedArray", .DelayedArray_block_which)
objects <- .collect_objects(x, ...)
GENERIC <- match.fun(.Generic)
- APPLY <- function(subarray) {
- ## We get a warning if 'subarray' is empty (which can't happen, blocks
- ## can't be empty) or if 'na.rm' is TRUE and 'subarray' contains only
+ APPLY <- function(block) {
+ ## We get a warning if 'block' is empty (which can't happen, blocks
+ ## can't be empty) or if 'na.rm' is TRUE and 'block' contains only
## NA's or NaN's.
- reduced <- tryCatch(GENERIC(subarray, na.rm=na.rm), warning=identity)
+ reduced <- tryCatch(GENERIC(block, na.rm=na.rm), warning=identity)
if (is(reduced, "warning"))
return(NULL)
reduced
}
- COMBINE <- function(b, subarray, init, reduced) {
+ COMBINE <- function(b, block, init, reduced) {
if (is.null(init) && is.null(reduced))
return(NULL)
GENERIC(init, reduced)
@@ -545,15 +445,15 @@ setMethod("Summary", "DelayedArray",
stop("\"mean\" method for DelayedArray objects ",
"does not support the 'trim' argument yet")
- APPLY <- function(subarray) {
- tmp <- as.vector(subarray, mode="numeric")
- subarray_sum <- sum(tmp, na.rm=na.rm)
- subarray_nval <- length(tmp)
+ APPLY <- function(block) {
+ tmp <- as.vector(block, mode="numeric")
+ block_sum <- sum(tmp, na.rm=na.rm)
+ block_nval <- length(tmp)
if (na.rm)
- subarray_nval <- subarray_nval - sum(is.na(tmp))
- c(subarray_sum, subarray_nval)
+ block_nval <- block_nval - sum(is.na(tmp))
+ c(block_sum, block_nval)
}
- COMBINE <- function(b, subarray, init, reduced) { init + reduced }
+ COMBINE <- function(b, block, init, reduced) { init + reduced }
init <- numeric(2) # sum and nval
BREAKIF <- function(init) is.na(init[[1L]])
diff --git a/R/DelayedMatrix-utils.R b/R/DelayedMatrix-utils.R
index c5461fe..346700c 100644
--- a/R/DelayedMatrix-utils.R
+++ b/R/DelayedMatrix-utils.R
@@ -23,7 +23,26 @@
ans_type <- typeof(match.fun(type(x))(1) * match.fun(type(y))(1))
sink <- RealizationSink(ans_dim, ans_dimnames, ans_type)
on.exit(close(sink))
- colblock_APPLY(y, function(submatrix) { x %*% submatrix }, sink=sink)
+
+ ## We're going to walk along the columns so need to increase the block
+ ## length so that each block is made of at least one column.
+ max_block_len <- max(get_max_block_length(type(y)), nrow(y))
+ spacings <- get_max_spacings_for_linear_blocks(dim(y), max_block_len)
+ y_grid <- ArrayRegularGrid(dim(y), spacings)
+ spacings[[1L]] <- ans_dim[[1L]]
+ ans_grid <- ArrayRegularGrid(ans_dim, spacings) # parallel to 'y_grid'
+ nblock <- length(y_grid) # same as 'length(ans_grid)'
+ for (b in seq_len(nblock)) {
+ if (get_verbose_block_processing())
+ message("Processing block ", b, "/", nblock, " ... ",
+ appendLF=FALSE)
+ y_viewport <- y_grid[[b]]
+ block <- as.matrix(extract_block(y, y_viewport))
+ block_ans <- x %*% block
+ write_block_to_sink(block_ans, sink, ans_grid[[b]])
+ if (get_verbose_block_processing())
+ message("OK")
+ }
as(sink, "DelayedArray")
}
diff --git a/R/RleArray-class.R b/R/RleArray-class.R
index 6eca1e0..909787b 100644
--- a/R/RleArray-class.R
+++ b/R/RleArray-class.R
@@ -3,61 +3,186 @@
### -------------------------------------------------------------------------
-### NOT exported!
setClass("RleArraySeed",
+ contains="Array",
+ representation(
+ "VIRTUAL",
+ ## Must use upper case or won't be able to extend the class.
+ ## See https://stat.ethz.ch/pipermail/r-devel/2017-June/074383.html
+ DIM="integer",
+ DIMNAMES="list"
+ )
+)
+
+### We don't support long SolidRleArraySeed objects yet! This would first
+### require that S4Vectors:::extract_positions_from_Rle() accepts 'pos' as
+### a numeric vector.
+setClass("SolidRleArraySeed",
+ contains="RleArraySeed",
+ representation(
+ rle="Rle"
+ )
+)
+
+### The RleRealizationSink class is a concrete RealizationSink subclass that
+### implements realization of an array-like object as an RleArray object that
+### will have a ChunkedRleArraySeed seed (once writting to the sink is
+### complete).
+setClass("RleRealizationSink",
+ contains=c("RleArraySeed", "RealizationSink"),
+ representation(
+ type="character",
+ chunks="environment"
+ )
+)
+
+### We support long ChunkedRleArraySeed objects but for now the chunks
+### cannot be long. Supporting long chunks would require that
+### S4Vectors:::extract_positions_from_Rle() accepts 'pos' as a numeric
+### vector.
+setClass("ChunkedRleArraySeed",
+ contains="RleRealizationSink",
representation(
- rle="Rle",
- dim="integer",
- dimnames="list"
+ ## A numeric vector of length the nb of chunks. Contains the cumulated
+ ## lengths of the chunks so must be "numeric" (and not "integer") to
+ ## support long objects. A chunk cannot be empty so 'breakpoints' must
+ ## contain *strictly* sorted positive values.
+ ## If the object is of length 0, then 'breakpoints' is empty.
+ ## Otherwise, its last element must equal the length of the object.
+ breakpoints="numeric"
)
)
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Low-level chunk accessors
+###
+
+.get_chunk <- function(envir, k)
+{
+ name <- sprintf("%06d", k)
+ stopifnot(nchar(name) == 6L)
+ get(name, envir=envir, inherits=FALSE)
+}
+
+.get_chunk_lens <- function(envir)
+{
+ ## Too bad we can't just do 'lengths(envir)' for this.
+ ## Also would have been nice to be able to just do
+ ## 'unlist(eapply(envir, length))' but the list returned by eapply()
+ ## is not guaranteed to be sorted and eapply() does not have a 'sorted'
+ ## argument. So would need to manually sort it.
+ ## Another possibility would be to vapply() on the sorted symbols returned
+ ## by 'ls(envir, sorted=TRUE)'.
+ vapply(seq_len(length(envir)),
+ function(k) length(.get_chunk(envir, k)),
+ numeric(1))
+}
+
+.set_chunk <- function(envir, k, chunk)
+{
+ name <- sprintf("%06d", k)
+ stopifnot(nchar(name) == 6L)
+ assign(name, chunk, envir=envir)
+}
+
+.append_chunk <- function(envir, chunk)
+{
+ .set_chunk(envir, length(envir) + 1L, chunk)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Validity
+###
+
.validate_RleArraySeed <- function(x)
{
+ msg <- validate_dim_slot(x, "DIM")
+ if (!isTRUE(msg))
+ return(msg)
+ msg <- validate_dimnames_slot(x, x at DIM, "DIMNAMES")
+ if (!isTRUE(msg))
+ return(msg)
+ TRUE
+}
+setValidity2("RleArraySeed", .validate_RleArraySeed)
+
+.validate_SolidRleArraySeed <- function(x)
+{
## 'rle' slot.
if (!is(x at rle, "Rle"))
- return(wmsg("'x at rle' must be an Rle object"))
- ## 'dim' slot.
- if (!is.integer(x at dim))
- return(wmsg("'x at dim' must be an integer vector"))
- x_ndim <- length(x at dim)
- if (x_ndim == 0L)
- return(wmsg("'x at dim' cannot be empty"))
- if (S4Vectors:::anyMissingOrOutside(x at dim, 0L))
- return(wmsg("'x at dim' cannot contain negative or NA values"))
- p <- prod(x at dim)
- if (p != length(x at rle))
- return(wmsg("object dimensions [product ", p, "] do not match ",
- "its length [" , length(x at rle), "]"))
- ## 'dimnames' slot.
- if (!is.list(x at dimnames))
- return(wmsg("'x at dimnames' must be a list of length"))
- if (length(x at dimnames) != x_ndim)
- return(wmsg("length of 'x at dimnames' must match that of 'x at dim'"))
- if (!all(vapply(seq_len(x_ndim),
- function(n) {
- dn <- x at dimnames[[n]]
- if (is.null(dn))
- return(TRUE)
- is.character(dn) && length(dn) == x at dim[[n]]
- },
- logical(1),
- USE.NAMES=FALSE)))
- return(wmsg("every list element in 'x at dimnames' must be NULL or ",
- "a character vector along the corresponding dimension"))
+ return(wmsg2("'rle' slot must be an Rle object"))
+ x_len <- length(x)
+ data_len <- length(x at rle)
+ if (x_len != data_len)
+ return(wmsg2("object dimensions [product ", x_len, "] do not ",
+ "match the length of its data [" , data_len, "]"))
+ ## Until S4Vectors:::extract_positions_from_Rle() accepts 'pos' as a
+ ## numeric vector, we cannot support long SolidRleArraySeed objects.
+ if (x_len > .Machine$integer.max)
+ return(wmsg2("long SolidRleArraySeed objects are not supported yet"))
TRUE
}
+setValidity2("SolidRleArraySeed", .validate_SolidRleArraySeed)
-setValidity2("RleArraySeed", .validate_RleArraySeed)
+.validate_RleRealizationSink <- function(x)
+{
+ ## 'type' slot.
+ if (!isSingleString(x at type))
+ return(wmsg2("'type' slot must be a single string"))
+ ## 'chunks' slot.
+ if (!is.environment(x at chunks))
+ return(wmsg2("'chunks' slot must be an environment"))
+ # TODO: Validate the content of 'chunks'.
+ TRUE
+}
+setValidity2("RleRealizationSink", .validate_RleRealizationSink)
+
+.get_data_length_from_breakpoints <- function(breakpoints)
+{
+ breakpoints_len <- length(breakpoints)
+ if (breakpoints_len == 0L) 0L else breakpoints[[breakpoints_len]]
+}
+
+.validate_ChunkedRleArraySeed <- function(x)
+{
+ ## 'breakpoints' slot.
+ if (!is.numeric(x at breakpoints)
+ || S4Vectors:::anyMissing(x at breakpoints)
+ || is.unsorted(x at breakpoints, strictly=TRUE)
+ || length(x at breakpoints) != 0L && x at breakpoints[[1L]] <= 0L)
+ return(wmsg2("'x at breakpoints' must be a numeric vector containing ",
+ "strictly sorted positive values"))
+ x_len <- length(x)
+ data_len <- .get_data_length_from_breakpoints(x at breakpoints)
+ if (data_len != x_len)
+ return(wmsg2("length of object data [" , data_len, "] does not ",
+ "match object dimensions [product ", x_len, "]"))
+ chunk_lens <- diff(c(0, x at breakpoints)) # chunk lengths as inferred from
+ # 'breakpoints'
+ ## Until S4Vectors:::extract_positions_from_Rle() accepts 'pos' as a
+ ## numeric vector, the chunks cannot be long Rle objects.
+ if (any(chunk_lens > .Machine$integer.max))
+ return(wmsg2("ChunkedRleArraySeed objects do not support ",
+ "long chunks yet"))
+ # TODO: Check that the chunk lengths as inferred from 'breakpoints'
+ # actually match the real ones.
+ TRUE
+}
+setValidity2("ChunkedRleArraySeed", .validate_ChunkedRleArraySeed)
-setMethod("dim", "RleArraySeed", function(x) x at dim)
-setMethod("length", "RleArraySeed", function(x) prod(dim(x)))
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Getters
+###
+
+setMethod("dim", "RleArraySeed", function(x) x at DIM)
setMethod("dimnames", "RleArraySeed",
function(x)
{
- ans <- x at dimnames
+ ans <- x at DIMNAMES
if (all(S4Vectors:::sapply_isNULL(ans)))
return(NULL)
ans
@@ -66,10 +191,31 @@ setMethod("dimnames", "RleArraySeed",
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Coercion to Rle objects
+###
+
+setAs("SolidRleArraySeed", "Rle", function(from) from at rle)
+
+### In practice this coercion is not used on a RleRealizationSink instance
+### but on a *ChunkedRleArraySeed* instance (by the coercion method from
+### ChunkedRleArraySeed to SolidRleArraySeed defined below in this file).
+setAs("RleRealizationSink", "Rle",
+ function(from)
+ {
+ ans <- Rle(match.fun(from at type)(0))
+ if (length(from at chunks) == 0L)
+ return(ans)
+ list_of_Rles <- c(list(ans), unname(as.list(from at chunks, sorted=TRUE)))
+ do.call("c", list_of_Rles)
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### subset_seed_as_array()
###
-.subset_RleArraySeed_as_array <- function(seed, index)
+.subset_SolidRleArraySeed_as_array <- function(seed, index)
{
seed_dim <- dim(seed)
i <- to_linear_index(index, seed_dim)
@@ -77,18 +223,79 @@ setMethod("dimnames", "RleArraySeed",
dim(ans) <- get_Nindex_lengths(index, seed_dim)
ans
}
+setMethod("subset_seed_as_array", "SolidRleArraySeed",
+ .subset_SolidRleArraySeed_as_array
+)
-setMethod("subset_seed_as_array", "RleArraySeed",
- .subset_RleArraySeed_as_array
+.subset_ChunkedRleArraySeed_as_array <- function(seed, index)
+{
+ seed_dim <- dim(seed)
+ i <- to_linear_index(index, seed_dim)
+ ans <- match.fun(seed at type)(0)
+ if (length(i) != 0L) {
+ part_idx <- get_part_index(i, seed at breakpoints)
+ split_part_idx <- split_part_index(part_idx, length(seed at breakpoints))
+ chunk_idx <- which(lengths(split_part_idx) != 0L) # chunks to visit
+ res <- lapply(chunk_idx, function(i1) {
+ chunk <- .get_chunk(seed at chunks, i1)
+ ## Because a valid ChunkedRleArraySeed object is guaranteed to not
+ ## contain long chunks at the moment, 'i2' can be represented as
+ ## an integer vector.
+ i2 <- as.integer(split_part_idx[[i1]])
+ S4Vectors:::extract_positions_from_Rle(chunk, i2, decoded=TRUE)
+ })
+ res <- c(list(ans), res)
+ ans <- unlist(res, use.names=FALSE)[get_rev_index(part_idx)]
+ }
+ dim(ans) <- get_Nindex_lengths(index, seed_dim)
+ ans
+}
+setMethod("subset_seed_as_array", "ChunkedRleArraySeed",
+ .subset_ChunkedRleArraySeed_as_array
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### RleArraySeed constructor
+### Construction of RleRealizationSink and RleArraySeed objects
###
### NOT exported!
-RleArraySeed <- function(rle, dim, dimnames=NULL)
+RleRealizationSink <- function(dim, dimnames=NULL, type="double")
+{
+ if (is.null(dimnames))
+ dimnames <- vector("list", length(dim))
+ chunks <- new.env(hash=TRUE, parent=emptyenv())
+ new2("RleRealizationSink", DIM=dim, DIMNAMES=dimnames,
+ type=type, chunks=chunks)
+}
+
+.append_Rle_to_sink <- function(x, sink)
+{
+ stopifnot(is(x, "Rle"))
+ if (length(x) == 0L)
+ return() # nothing to do
+ if (sink at type == "integer") {
+ run_values <- runValue(x)
+ ## Replace integer-Rle with raw-Rle if this doesn't loose
+ ## information.
+ if (!S4Vectors:::anyMissingOrOutside(run_values, 0L, 255L))
+ runValue(x) <- as.raw(run_values)
+ }
+ .append_chunk(sink at chunks, x)
+}
+
+### This coercion is used by the RleArraySeed() constructor and by the
+### coercion method from RleRealizationSink to RleArray.
+setAs("RleRealizationSink", "ChunkedRleArraySeed",
+ function(from)
+ {
+ breakpoints <- cumsum(as.double(.get_chunk_lens(from at chunks)))
+ new2("ChunkedRleArraySeed", from, breakpoints=breakpoints)
+ }
+)
+
+### NOT exported!
+RleArraySeed <- function(rle, dim, dimnames=NULL, chunksize=NULL)
{
if (!is.numeric(dim))
stop(wmsg("the supplied dim vector must be numeric"))
@@ -96,9 +303,25 @@ RleArraySeed <- function(rle, dim, dimnames=NULL)
dim <- as.integer(dim)
if (is.null(dimnames))
dimnames <- vector("list", length=length(dim))
- new2("RleArraySeed", rle=rle, dim=dim, dimnames=dimnames)
+
+ if (is.null(chunksize))
+ return(new2("SolidRleArraySeed", DIM=dim, DIMNAMES=dimnames, rle=rle))
+
+ type <- typeof(runValue(rle))
+ sink <- RleRealizationSink(dim, dimnames, type)
+ ## FIXME: breakInChunks() does not accept a 'totalsize' >= 2^31 at the
+ ## moment so this won't work on a long Rle.
+ partitioning <- breakInChunks(length(rle), chunksize)
+ rle_list <- relist(rle, partitioning)
+ for (k in seq_along(rle_list))
+ .append_Rle_to_sink(rle_list[[k]], sink)
+ as(sink, "ChunkedRleArraySeed")
}
+setAs("ChunkedRleArraySeed", "SolidRleArraySeed",
+ function(from) RleArraySeed(as(from, "Rle"), dim(from), dimnames(from))
+)
+
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### RleArray and RleMatrix objects
@@ -111,7 +334,7 @@ setClass("RleMatrix", contains=c("DelayedMatrix", "RleArray"))
### Automatic coercion method from RleArray to RleMatrix silently returns
### a broken object (unfortunately these dummy automatic coercion methods
### don't bother to validate the object they return). So we overwrite it.
-setAs("RleArray", "RleMatrix", function(from) new("RleMatrix", from))
+setAs("RleArray", "RleMatrix", function(from) new2("RleMatrix", from))
### For internal use only.
setMethod("matrixClass", "RleArray", function(x) "RleMatrix")
@@ -119,9 +342,9 @@ setMethod("matrixClass", "RleArray", function(x) "RleMatrix")
.validate_RleArray <- function(x)
{
if (!is(seed(x), "RleArraySeed"))
- return(wmsg("'seed(x)' must be an RleArraySeed object"))
+ return(wmsg2("'seed(x)' must be an RleArraySeed object"))
if (!is_pristine(x))
- return(wmsg("'x' carries delayed operations on it"))
+ return(wmsg2("'x' carries delayed operations on it"))
TRUE
}
@@ -137,86 +360,44 @@ setMethod("DelayedArray", "RleArraySeed",
### Works directly on an RleArraySeed object, in which case it must be called
### with a single argument.
-RleArray <- function(rle, dim, dimnames=NULL)
+RleArray <- function(rle, dim, dimnames=NULL, chunksize=NULL)
{
if (is(rle, "RleArraySeed")) {
- if (!(missing(dim) && is.null(dimnames)))
+ if (!(missing(dim) && is.null(dimnames) && is.null(chunksize)))
stop(wmsg("RleArray() must be called with a single argument ",
"when passed an RleArraySeed object"))
seed <- rle
} else {
- seed <- RleArraySeed(rle, dim, dimnames=dimnames)
+ seed <- RleArraySeed(rle, dim, dimnames=dimnames, chunksize=chunksize)
}
DelayedArray(seed)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### RleRealizationSink objects
-###
-### The RleRealizationSink class is a concrete RealizationSink subclass that
-### implements realization of an array-like object as an RleArray object.
+### Realization as an RleArray object
###
-setClass("RleRealizationSink",
- contains="RealizationSink",
- representation(
- dim="integer",
- dimnames="list",
- type="character",
- dump="environment"
- )
-)
-
-RleRealizationSink <- function(dim, dimnames=NULL, type="double")
-{
- if (is.null(dimnames))
- dimnames <- vector("list", length(dim))
- dump <- new.env(parent=emptyenv())
- new("RleRealizationSink", dim=dim, dimnames=dimnames, type=type, dump=dump)
-}
-
-setMethod("write_to_sink", c("array", "RleRealizationSink"),
- function(x, sink, offsets=NULL)
+setMethod("write_block_to_sink", "RleRealizationSink",
+ function(block, sink, viewport)
{
- x_dim <- dim(x)
- sink_dim <- sink at dim
- stopifnot(length(x_dim) == length(sink_dim))
- if (is.null(offsets)) {
- stopifnot(identical(x_dim, sink_dim))
- } else {
- stopifnot(length(x_dim) == length(sink_dim))
- }
- name <- sprintf("%09d", length(sink at dump))
- stopifnot(nchar(name) == 9L)
- assign(name, Rle(x), envir=sink at dump)
- }
-)
-
-setAs("RleRealizationSink", "RleArraySeed",
- function(from)
- {
- if (length(from at dump) == 0L) {
- rle <- Rle(get(from at type)(0))
- } else {
- rle <- do.call("c", unname(as.list(from at dump, sorted=TRUE)))
- }
- RleArraySeed(rle, from at dim, from at dimnames)
+ stopifnot(identical(dim(sink), refdim(viewport)),
+ identical(dim(block), dim(viewport)))
+ ## 'viewport' is ignored!
+ .append_Rle_to_sink(Rle(block), sink)
}
)
setAs("RleRealizationSink", "RleArray",
- function(from) RleArray(as(from, "RleArraySeed"))
+ function(from) RleArray(as(from, "ChunkedRleArraySeed"))
)
-setAs("RleRealizationSink", "DelayedArray",
- function(from) RleArray(as(from, "RleArraySeed"))
-)
+setAs("RleRealizationSink", "DelayedArray", function(from) as(from, "RleArray"))
.as_RleArray <- function(from)
{
sink <- RleRealizationSink(dim(from), dimnames(from), type(from))
- write_to_sink(from, sink)
+ write_array_to_sink(from, sink)
as(sink, "RleArray")
}
@@ -248,12 +429,13 @@ setAs("DataFrame", "RleArray", .from_DataFrame_to_RleMatrix)
{
## We mangle the colnames exactly like as.data.frame() would do.
ans_colnames <- colnames(as.data.frame(from[0L, ]))
+ rle <- as(seed(from), "Rle")
partitioning <- PartitioningByEnd(nrow(from) * seq_len(ncol(from)),
names=ans_colnames)
- listData <- as.list(relist(seed(from)@rle, partitioning))
- new("DataFrame", listData=listData,
- nrows=nrow(from),
- rownames=rownames(from))
+ listData <- as.list(relist(rle, partitioning))
+ new2("DataFrame", listData=listData,
+ nrows=nrow(from),
+ rownames=rownames(from))
}
setAs("RleMatrix", "DataFrame", .from_RleMatrix_to_DataFrame)
diff --git a/R/SeedBinder-class.R b/R/SeedBinder-class.R
new file mode 100644
index 0000000..bda5e45
--- /dev/null
+++ b/R/SeedBinder-class.R
@@ -0,0 +1,96 @@
+### =========================================================================
+### SeedBinder objects
+### -------------------------------------------------------------------------
+###
+### This class is for internal use only and is not exported.
+###
+
+setClass("SeedBinder",
+ representation(
+ seeds="list", # List of array-like objects to bind. Each object
+ # is expected to satisfy the "seed contract" i.e.
+ # to support dim(), dimnames(), and
+ # subset_seed_as_array().
+
+ along="integer" # Single integer indicating the dimension along
+ # which to bind the seeds.
+ ),
+ prototype(
+ seeds=list(new("array")),
+ along=1L
+ )
+)
+
+.validate_SeedBinder <- function(x)
+{
+ if (length(x at seeds) == 0L)
+ return(wmsg2("'x at seeds' cannot be empty"))
+ if (!(isSingleInteger(x at along) && x at along > 0L))
+ return(wmsg2("'x at along' must be a single positive integer"))
+ dims <- get_dims_to_bind(x at seeds, x at along)
+ if (is.character(dims))
+ return(wmsg2(dims))
+ TRUE
+}
+
+setValidity2("SeedBinder", .validate_SeedBinder)
+
+new_SeedBinder <- function(seeds, along)
+{
+ seeds <- lapply(seeds, remove_pristine_DelayedArray_wrapping)
+ new2("SeedBinder", seeds=seeds, along=along)
+}
+
+### Implement the "seed contract" i.e. dim(), dimnames(), and
+### subset_seed_as_array().
+
+.get_SeedBinder_dim <- function(x)
+{
+ dims <- get_dims_to_bind(x at seeds, x at along)
+ combine_dims_along(dims, x at along)
+}
+
+setMethod("dim", "SeedBinder", .get_SeedBinder_dim)
+
+.get_SeedBinder_dimnames <- function(x)
+{
+ dims <- get_dims_to_bind(x at seeds, x at along)
+ combine_dimnames_along(x at seeds, dims, x at along)
+}
+
+setMethod("dimnames", "SeedBinder", .get_SeedBinder_dimnames)
+
+.subset_SeedBinder_as_array <- function(seed, index)
+{
+ i <- index[[seed at along]]
+
+ if (is.null(i)) {
+ ## This is the easy situation.
+ tmp <- lapply(seed at seeds, subset_seed_as_array, index)
+ ## Bind the ordinary arrays in 'tmp'.
+ ans <- do.call(simple_abind, c(tmp, list(along=seed at along)))
+ return(ans)
+ }
+
+ ## From now on 'i' is a vector of positive integers.
+ dims <- get_dims_to_bind(seed at seeds, seed at along)
+ breakpoints <- cumsum(dims[seed at along, ])
+ part_idx <- get_part_index(i, breakpoints)
+ split_part_idx <- split_part_index(part_idx, length(breakpoints))
+ FUN <- function(s) {
+ index[[seed at along]] <- split_part_idx[[s]]
+ subset_seed_as_array(seed at seeds[[s]], index)
+ }
+ tmp <- lapply(seq_along(seed at seeds), FUN)
+
+ ## Bind the ordinary arrays in 'tmp'.
+ ans <- do.call(simple_abind, c(tmp, list(along=seed at along)))
+
+ ## Reorder the rows or columns in 'ans'.
+ Nindex <- vector(mode="list", length=length(index))
+ Nindex[[seed at along]] <- get_rev_index(part_idx)
+ subset_by_Nindex(ans, Nindex)
+}
+
+setMethod("subset_seed_as_array", "SeedBinder", .subset_SeedBinder_as_array)
+
diff --git a/R/bind-arrays.R b/R/bind-arrays.R
new file mode 100644
index 0000000..48c68a3
--- /dev/null
+++ b/R/bind-arrays.R
@@ -0,0 +1,162 @@
+### =========================================================================
+### Bind arrays with an arbitrary number of dimensions along an arbitrary
+### dimension
+### -------------------------------------------------------------------------
+###
+
+
+### Return a matrix with one row per dim and one column per object if the
+### objects are bindable. Otherwise return a character vector describing why
+### the objects are not bindable. This design allows the function to be used
+### in the context of a validity method.
+### NOT exported but used in the HDF5Array package.
+get_dims_to_bind <- function(objects, no.check.along)
+{
+ dims <- lapply(objects, dim)
+ ndims <- lengths(dims)
+ ndim <- ndims[[1L]]
+ if (!all(ndims == ndim))
+ return(c("all the objects to bind must have ",
+ "the same number of dimensions"))
+ tmp <- unlist(dims, use.names=FALSE)
+ if (is.null(tmp))
+ return("the objects to bind have no dimensions")
+ dims <- matrix(tmp, nrow=ndim)
+ tmp <- dims[-no.check.along, , drop=FALSE]
+ if (!all(tmp == tmp[ , 1L]))
+ return("the objects to bind have incompatible dimensions")
+ dims
+}
+
+### Combine the dims the rbind/cbind way.
+combine_dims_along <- function(dims, along)
+{
+ ans_dim <- dims[ , 1L]
+ ans_dim[[along]] <- sum(dims[along, ])
+ ans_dim
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Combine the dimnames of a list of array-like objects
+###
+
+### Assume all the arrays in 'objects' have the same number of dimensions.
+### NOT exported but used in the HDF5Array package.
+combine_dimnames <- function(objects)
+{
+ lapply(seq_along(dim(objects[[1L]])),
+ function(n) {
+ for (x in objects) {
+ dn <- dimnames(x)[[n]]
+ if (!is.null(dn))
+ return(dn)
+ }
+ NULL
+ })
+}
+
+### Combine the dimnames the rbind/cbind way.
+### NOT exported but used in the HDF5Array package.
+combine_dimnames_along <- function(objects, dims, along)
+{
+ dimnames <- combine_dimnames(objects)
+ along_names <- lapply(objects, function(x) dimnames(x)[[along]])
+ along_names_lens <- lengths(along_names)
+ if (any(along_names_lens != 0L)) {
+ fix_idx <- which(along_names_lens != dims[along, ])
+ along_names[fix_idx] <- lapply(dims[along, fix_idx], character)
+ }
+ along_names <- unlist(along_names, use.names=FALSE)
+ if (!is.null(along_names))
+ dimnames[[along]] <- along_names
+ if (all(S4Vectors:::sapply_isNULL(dimnames)))
+ dimnames <- NULL
+ dimnames
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### simple_abind()
+###
+
+### 'objects' is assumed to be a list of vector-like objects.
+### 'block_lens' is assumed to be an integer vector parallel to 'objects'
+### specifying the block length for each object in 'objects'. In addition the
+### length of 'object[[i]]' must be 'k * block_lens[[i]]' (k is the same for
+### all the objects).
+.intertwine_blocks <- function(objects, block_lens)
+{
+ data <- unlist(objects, recursive=FALSE, use.names=FALSE)
+ objects_lens <- lengths(objects)
+ if (all(objects_lens == 0L))
+ return(data)
+
+ k <- objects_lens %/% block_lens
+ k <- unique(k[!is.na(k)])
+ stopifnot(length(k) == 1L) # sanity check
+
+ nobject <- length(objects)
+ objects_cumlens <- cumsum(objects_lens)
+ ranges <- lapply(seq_len(nobject),
+ function(i) {
+ width <- block_lens[[i]]
+ offset <- if (i == 1L) 0L else objects_cumlens[[i - 1L]]
+ successiveIRanges(rep.int(width, k), from=offset + 1L)
+ })
+ ranges <- do.call(c, ranges)
+ i <- as.vector(matrix(seq_len(nobject * k), nrow=nobject, byrow=TRUE))
+ extractROWS(data, ranges[i])
+}
+
+### A stripped-down version of abind::abind().
+### Some differences:
+### (a) Treatment of dimnames: simple_abind() treatment of dimnames is
+### consistent with base::rbind() and base::cbind(). This is not the
+### case for abind::abind() which does some strange things with the
+### dimnames.
+### (b) Performance: simple_abind() has a little bit more overhead than
+### abind::abind(). This makes it slower on small objects. However it
+### tends to be slightly faster on big objects.
+### NOT exported but used in the HDF5Array package.
+simple_abind <- function(..., along)
+{
+ objects <- list(...)
+ object_is_NULL <- S4Vectors:::sapply_isNULL(objects)
+ if (any(object_is_NULL))
+ objects <- objects[!object_is_NULL]
+ if (length(objects) == 0L)
+ return(NULL)
+ if (length(objects) == 1L)
+ return(objects[[1L]])
+
+ ## Check dim compatibility.
+ dims <- get_dims_to_bind(objects, no.check.along=along)
+ if (is.character(dims))
+ stop(wmsg(dims))
+
+ ## Perform the binding.
+ block_lens <- dims[along, ]
+ for (n in seq_len(along - 1L))
+ block_lens <- block_lens * dims[n, ]
+ ans <- .intertwine_blocks(objects, block_lens)
+
+ ## Set the dim.
+ dim(ans) <- combine_dims_along(dims, along)
+
+ ## Combine and set the dimnames.
+ dimnames(ans) <- combine_dimnames_along(objects, dims, along)
+ ans
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Bind arrays along their 1st or 2nd dimension
+###
+
+setGeneric("arbind", function(...) standardGeneric("arbind"))
+setGeneric("acbind", function(...) standardGeneric("acbind"))
+
+setMethod("arbind", "array", function(...) simple_abind(..., along=1L))
+setMethod("acbind", "array", function(...) simple_abind(..., along=2L))
+
diff --git a/R/block_processing.R b/R/block_processing.R
index d26c8c4..3cce892 100644
--- a/R/block_processing.R
+++ b/R/block_processing.R
@@ -22,7 +22,7 @@ DEFAULT_BLOCK_SIZE <- 4500000L # 4.5 Mb
)
### Used in HDF5Array!
-get_block_length <- function(type)
+get_max_block_length <- function(type)
{
type_size <- .TYPE_SIZES[type]
idx <- which(is.na(type_size))
@@ -67,26 +67,26 @@ set_verbose_block_processing <- function(verbose)
}
### An lapply-like function.
-block_APPLY <- function(x, APPLY, ..., sink=NULL, block_len=NULL)
+block_APPLY <- function(x, APPLY, ..., sink=NULL, max_block_len=NULL)
{
APPLY <- match.fun(APPLY)
- if (is.null(block_len))
- block_len <- get_block_length(type(x))
- blocks <- ArrayBlocks(dim(x), block_len)
- nblock <- length(blocks)
+ if (is.null(max_block_len))
+ max_block_len <- get_max_block_length(type(x))
+ spacings <- get_max_spacings_for_linear_blocks(dim(x), max_block_len)
+ grid <- ArrayRegularGrid(dim(x), spacings)
+ nblock <- length(grid)
lapply(seq_len(nblock),
function(b) {
if (get_verbose_block_processing())
message("Processing block ", b, "/", nblock, " ... ",
appendLF=FALSE)
- block_ranges <- get_block_ranges(blocks, b)
- Nindex <- make_Nindex_from_block_ranges(block_ranges, blocks at dim)
- subarray <- subset_by_Nindex(x, Nindex)
- if (!is.array(subarray))
- subarray <- .as_array_or_matrix(subarray)
- block_ans <- APPLY(subarray, ...)
+ viewport <- grid[[b]]
+ block <- extract_block(x, viewport)
+ if (!is.array(block))
+ block <- .as_array_or_matrix(block)
+ block_ans <- APPLY(block, ...)
if (!is.null(sink)) {
- write_to_sink(block_ans, sink, offsets=start(block_ranges))
+ write_block_to_sink(block_ans, sink, viewport)
block_ans <- NULL
}
if (get_verbose_block_processing())
@@ -96,7 +96,7 @@ block_APPLY <- function(x, APPLY, ..., sink=NULL, block_len=NULL)
}
### A mapply-like function for conformable arrays.
-block_MAPPLY <- function(MAPPLY, ..., sink=NULL, block_len=NULL)
+block_MAPPLY <- function(MAPPLY, ..., sink=NULL, max_block_len=NULL)
{
MAPPLY <- match.fun(MAPPLY)
dots <- unname(list(...))
@@ -104,29 +104,29 @@ block_MAPPLY <- function(MAPPLY, ..., sink=NULL, block_len=NULL)
x_dim <- dims[ , 1L]
if (!all(dims == x_dim))
stop("non-conformable arrays")
- if (is.null(block_len)) {
+ if (is.null(max_block_len)) {
types <- unlist(lapply(dots, type))
- block_len <- min(get_block_length(types))
+ max_block_len <- min(get_max_block_length(types))
}
- blocks <- ArrayBlocks(x_dim, block_len)
- nblock <- length(blocks)
+ spacings <- get_max_spacings_for_linear_blocks(x_dim, max_block_len)
+ grid <- ArrayRegularGrid(x_dim, spacings)
+ nblock <- length(grid)
lapply(seq_len(nblock),
function(b) {
if (get_verbose_block_processing())
message("Processing block ", b, "/", nblock, " ... ",
appendLF=FALSE)
- block_ranges <- get_block_ranges(blocks, b)
- Nindex <- make_Nindex_from_block_ranges(block_ranges, blocks at dim)
- subarrays <- lapply(dots,
+ viewport <- grid[[b]]
+ blocks <- lapply(dots,
function(x) {
- subarray <- subset_by_Nindex(x, Nindex)
- if (!is.array(subarray))
- subarray <- .as_array_or_matrix(subarray)
- subarray
+ block <- extract_block(x, viewport)
+ if (!is.array(block))
+ block <- .as_array_or_matrix(block)
+ block
})
- block_ans <- do.call(MAPPLY, subarrays)
+ block_ans <- do.call(MAPPLY, blocks)
if (!is.null(sink)) {
- write_to_sink(block_ans, sink, offsets=start(block_ranges))
+ write_block_to_sink(block_ans, sink, viewport)
block_ans <- NULL
}
if (get_verbose_block_processing())
@@ -137,25 +137,26 @@ block_MAPPLY <- function(MAPPLY, ..., sink=NULL, block_len=NULL)
### A Reduce-like function.
block_APPLY_and_COMBINE <- function(x, APPLY, COMBINE, init,
- BREAKIF=NULL, block_len=NULL)
+ BREAKIF=NULL, max_block_len=NULL)
{
APPLY <- match.fun(APPLY)
COMBINE <- match.fun(COMBINE)
if (!is.null(BREAKIF))
BREAKIF <- match.fun(BREAKIF)
- if (is.null(block_len))
- block_len <- get_block_length(type(x))
- blocks <- ArrayBlocks(dim(x), block_len)
- nblock <- length(blocks)
+ if (is.null(max_block_len))
+ max_block_len <- get_max_block_length(type(x))
+ spacings <- get_max_spacings_for_linear_blocks(dim(x), max_block_len)
+ grid <- ArrayRegularGrid(dim(x), spacings)
+ nblock <- length(grid)
for (b in seq_len(nblock)) {
if (get_verbose_block_processing())
message("Processing block ", b, "/", nblock, " ... ",
appendLF=FALSE)
- subarray <- extract_array_block(x, blocks, b)
- if (!is.array(subarray))
- subarray <- .as_array_or_matrix(subarray)
- reduced <- APPLY(subarray)
- init <- COMBINE(b, subarray, init, reduced)
+ block <- extract_block(x, grid[[b]])
+ if (!is.array(block))
+ block <- .as_array_or_matrix(block)
+ reduced <- APPLY(block)
+ init <- COMBINE(b, block, init, reduced)
if (get_verbose_block_processing())
message("OK")
if (!is.null(BREAKIF) && BREAKIF(init)) {
@@ -183,8 +184,8 @@ colblock_APPLY <- function(x, APPLY, ..., sink=NULL)
APPLY <- match.fun(APPLY)
## We're going to walk along the columns so need to increase the block
## length so each block is made of at least one column.
- block_len <- max(get_block_length(type(x)), x_dim[[1L]])
- block_APPLY(x, APPLY, ..., sink=sink, block_len=block_len)
+ max_block_len <- max(get_max_block_length(type(x)), x_dim[[1L]])
+ block_APPLY(x, APPLY, ..., sink=sink, max_block_len=max_block_len)
}
colblock_APPLY_and_COMBINE <- function(x, APPLY, COMBINE, init)
@@ -194,8 +195,9 @@ colblock_APPLY_and_COMBINE <- function(x, APPLY, COMBINE, init)
stop("'x' must be a matrix-like object")
## We're going to walk along the columns so need to increase the block
## length so each block is made of at least one column.
- block_len <- max(get_block_length(type(x)), x_dim[[1L]])
- block_APPLY_and_COMBINE(x, APPLY, COMBINE, init, block_len=block_len)
+ max_block_len <- max(get_max_block_length(type(x)), x_dim[[1L]])
+ block_APPLY_and_COMBINE(x, APPLY, COMBINE, init,
+ max_block_len=max_block_len)
}
@@ -204,27 +206,11 @@ colblock_APPLY_and_COMBINE <- function(x, APPLY, COMBINE, init)
###
### Exported!
-setMethod("write_to_sink", c("DelayedArray", "RealizationSink"),
- function(x, sink, offsets=NULL)
- {
- if (!is.null(offsets))
- stop(wmsg("'offsets' must be NULL when the object to write ",
- "is a DelayedArray object"))
- ## Semantically equivalent to 'write_to_sink(as.array(x), sink)'
- ## but uses block-processing so the full DelayedArray object is
- ## not realized at once in memory. Instead the object is first
- ## split into blocks and the blocks are realized and written to
- ## disk one at a time.
- block_APPLY(x, identity, sink=sink)
- }
-)
-
-### Exported!
-setMethod("write_to_sink", c("ANY", "RealizationSink"),
- function(x, sink, offsets=NULL)
- {
- x <- as(x, "DelayedArray")
- write_to_sink(x, sink, offsets=offsets)
- }
-)
+### Split the array-like object into blocks, then realize and write one block
+### at a time to disk.
+write_array_to_sink <- function(x, sink)
+{
+ stopifnot(identical(dim(x), dim(sink)))
+ block_APPLY(DelayedArray(x), identity, sink=sink)
+}
diff --git a/R/cbind-methods.R b/R/cbind-methods.R
deleted file mode 100644
index f545ec2..0000000
--- a/R/cbind-methods.R
+++ /dev/null
@@ -1,163 +0,0 @@
-### =========================================================================
-### Bind DelayedArray objects along their rows or columns
-### -------------------------------------------------------------------------
-###
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### SeedBinder objects
-###
-### This class is for internal use only and is not exported.
-###
-
-setClass("SeedBinder",
- representation(
- seeds="list", # List of array-like objects to bind. Each object
- # is expected to satisfy the "seed contract" i.e.
- # to support dim(), dimnames(), and
- # subset_seed_as_array().
-
- along="integer" # Single integer indicating the dimension along
- # which to bind the seeds.
- ),
- prototype(
- seeds=list(new("array")),
- along=1L
- )
-)
-
-.validate_SeedBinder <- function(x)
-{
- if (length(x at seeds) == 0L)
- return(wmsg("'x at seeds' cannot be empty"))
- if (!(isSingleInteger(x at along) && x at along > 0L))
- return(wmsg("'x at along' must be a single positive integer"))
- dims <- IRanges:::get_dims_to_bind(x at seeds, x at along)
- if (is.character(dims))
- return(wmsg(dims))
- TRUE
-}
-
-setValidity2("SeedBinder", .validate_SeedBinder)
-
-.new_SeedBinder <- function(seeds, along)
-{
- seeds <- lapply(seeds, remove_pristine_DelayedArray_wrapping)
- new2("SeedBinder", seeds=seeds, along=along)
-}
-
-### Implement the "seed contract" i.e. dim(), dimnames(), and
-### subset_seed_as_array().
-
-.get_SeedBinder_dim <- function(x)
-{
- dims <- IRanges:::get_dims_to_bind(x at seeds, x at along)
- IRanges:::combine_dims_along(dims, x at along)
-}
-
-setMethod("dim", "SeedBinder", .get_SeedBinder_dim)
-
-.get_SeedBinder_dimnames <- function(x)
-{
- dims <- IRanges:::get_dims_to_bind(x at seeds, x at along)
- IRanges:::combine_dimnames_along(x at seeds, dims, x at along)
-}
-
-setMethod("dimnames", "SeedBinder", .get_SeedBinder_dimnames)
-
-.subset_SeedBinder_as_array <- function(seed, index)
-{
- i <- index[[seed at along]]
-
- if (is.null(i)) {
- ## This is the easy situation.
- tmp <- lapply(seed at seeds, subset_seed_as_array, index)
- ## Bind the ordinary arrays in 'tmp'.
- ans <- do.call(IRanges:::simple_abind, c(tmp, list(along=seed at along)))
- return(ans)
- }
-
- ## From now on 'i' is a vector of positive integers.
- dims <- IRanges:::get_dims_to_bind(seed at seeds, seed at along)
- breakpoints <- cumsum(dims[seed at along, ])
- part_idx <- get_part_index(i, breakpoints)
- split_part_idx <- split_part_index(part_idx, length(breakpoints))
- FUN <- function(s) {
- index[[seed at along]] <- split_part_idx[[s]]
- subset_seed_as_array(seed at seeds[[s]], index)
- }
- tmp <- lapply(seq_along(seed at seeds), FUN)
-
- ## Bind the ordinary arrays in 'tmp'.
- ans <- do.call(IRanges:::simple_abind, c(tmp, list(along=seed at along)))
-
- ## Reorder the rows or columns in 'ans'.
- Nindex <- vector(mode="list", length=length(index))
- Nindex[[seed at along]] <- get_rev_index(part_idx)
- subset_by_Nindex(ans, Nindex)
-}
-
-setMethod("subset_seed_as_array", "SeedBinder", .subset_SeedBinder_as_array)
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### arbind() and acbind()
-###
-
-.DelayedArray_arbind <- function(...)
-{
- objects <- unname(list(...))
- dims <- IRanges:::get_dims_to_bind(objects, 1L)
- if (is.character(dims))
- stop(wmsg(dims))
- DelayedArray(.new_SeedBinder(objects, 1L))
-}
-
-.DelayedArray_acbind <- function(...)
-{
- objects <- unname(list(...))
- dims <- IRanges:::get_dims_to_bind(objects, 2L)
- if (is.character(dims))
- stop(wmsg(dims))
- DelayedArray(.new_SeedBinder(objects, 2L))
-}
-
-setMethod("arbind", "DelayedArray", .DelayedArray_arbind)
-setMethod("acbind", "DelayedArray", .DelayedArray_acbind)
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### rbind() and cbind()
-###
-
-setMethod("rbind", "DelayedMatrix", .DelayedArray_arbind)
-setMethod("cbind", "DelayedMatrix", .DelayedArray_acbind)
-
-.as_DelayedMatrix_objects <- function(objects)
-{
- lapply(objects,
- function(object) {
- if (length(dim(object)) != 2L)
- stop(wmsg("cbind() and rbind() are not supported on ",
- "DelayedArray objects that don't have exactly ",
- "2 dimensions. Please use acbind() or arnind() ",
- "instead."))
- as(object, "DelayedMatrix")
- })
-}
-
-.DelayedArray_rbind <- function(...)
-{
- objects <- .as_DelayedMatrix_objects(list(...))
- do.call("rbind", objects)
-}
-
-.DelayedArray_cbind <- function(...)
-{
- objects <- .as_DelayedMatrix_objects(list(...))
- do.call("cbind", objects)
-}
-
-setMethod("rbind", "DelayedArray", .DelayedArray_rbind)
-setMethod("cbind", "DelayedArray", .DelayedArray_cbind)
-
diff --git a/R/realize.R b/R/realize.R
index caa6d13..61f3a6f 100644
--- a/R/realize.R
+++ b/R/realize.R
@@ -12,20 +12,32 @@
### of DelayedArray backends. Concrete subclasses must implement:
### 1) A constructor function that takes argument 'dim', 'dimnames', and
### 'type'.
-### 2) A "write_to_sink" method that works on an ordinary array.
-### 3) A "close" method (optional).
-### 4) Coercion to DelayedArray.
-### See the arrayRealizationSink class below or the HDF5RealizationSink class
-### in the HDF5Array package for examples of concrete RealizationSink
-### subclasses.
+### 2) "dim" and "dimnames" methods.
+### 3) A "chunk_dim" method (optional).
+### 4) A "write_block_to_sink" method.
+### 5) A "close" method (optional).
+### 6) Coercion to DelayedArray.
+### See the arrayRealizationSink class below, or the RleRealizationSink class
+### in RleArray-class.R, or the HDF5RealizationSink class in the HDF5Array
+### package for examples of concrete RealizationSink subclasses.
setClass("RealizationSink", representation("VIRTUAL"))
-### 'x' and 'sink' must have the same number of dimensions.
-### 'offsets' must be NULL or an integer vector with 1 offset per dimension
-### in 'x' (or in 'sink').
-### A default "write_to_sink" method is defined in DelayedArray-class.R.
-setGeneric("write_to_sink", signature=c("x", "sink"),
- function(x, sink, offsets=NULL) standardGeneric("write_to_sink")
+setGeneric("chunk_dim", function(x) standardGeneric("chunk_dim"))
+
+### The default "chunk_dim" method for RealizationSink objects returns NULL
+### (i.e. no chunking i.e. implicit chunks of dimensions rep(1, length(dim(x)))
+### i.e. 1 array element per chunk).
+setMethod("chunk_dim", "RealizationSink", function(x) NULL)
+
+### 'block', 'sink', and 'viewport' are expected to be an ordinary array, a
+### RealizationSink, and a Viewport object, respectively. They must satisfy:
+###
+### stopifnot(identical(dim(sink), refdim(viewport)),
+### identical(dim(block), dim(viewport)))
+###
+### Just to be safe, methods should perform this sanity check.
+setGeneric("write_block_to_sink", signature="sink",
+ function(block, sink, viewport) standardGeneric("write_block_to_sink")
)
setGeneric("close")
@@ -53,32 +65,32 @@ setClass("arrayRealizationSink",
get("result", envir=sink at result_envir)
}
+.set_arrayRealizationSink_result <- function(sink, result)
+{
+ assign("result", result, envir=sink at result_envir)
+}
+
+setMethod("dim", "arrayRealizationSink",
+ function(x) dim(.get_arrayRealizationSink_result(x))
+)
+
arrayRealizationSink <- function(dim, dimnames=NULL, type="double")
{
result <- array(get(type)(0), dim=dim, dimnames=dimnames)
result_envir <- new.env(parent=emptyenv())
- assign("result", result, envir=result_envir)
- new("arrayRealizationSink", result_envir=result_envir)
+ sink <- new("arrayRealizationSink", result_envir=result_envir)
+ .set_arrayRealizationSink_result(sink, result)
+ sink
}
-setMethod("write_to_sink", c("array", "arrayRealizationSink"),
- function(x, sink, offsets=NULL)
+setMethod("write_block_to_sink", "arrayRealizationSink",
+ function(block, sink, viewport)
{
- x_dim <- dim(x)
+ stopifnot(identical(dim(sink), refdim(viewport)),
+ identical(dim(block), dim(viewport)))
result <- .get_arrayRealizationSink_result(sink)
- sink_dim <- dim(result)
- if (is.null(offsets)) {
- stopifnot(identical(x_dim, sink_dim))
- result[] <- x
- } else {
- stopifnot(length(x_dim) == length(sink_dim))
- block_ranges <- IRanges(offsets, width=x_dim)
- Nindex <- make_Nindex_from_block_ranges(
- block_ranges, sink_dim,
- expand.RangeNSBS=TRUE)
- result <- replace_by_Nindex(result, Nindex, x)
- }
- assign("result", result, envir=sink at result_envir)
+ result <- replace_block(result, viewport, block)
+ .set_arrayRealizationSink_result(sink, result)
}
)
diff --git a/R/show-utils.R b/R/show-utils.R
index 71ed6e7..7eef425 100644
--- a/R/show-utils.R
+++ b/R/show-utils.R
@@ -6,20 +6,20 @@
###
-.format_as_character_vector <- function(x, justify)
+.format_as_character_vector <- function(x, justify, quote=TRUE)
{
x_names <- names(x)
x <- as.vector(x)
- if (typeof(x) == "character" && length(x) != 0L)
+ if (quote && typeof(x) == "character" && length(x) != 0L)
x <- paste0("\"", x, "\"")
names(x) <- x_names
format(x, justify=justify)
}
-.format_as_character_matrix <- function(x, justify)
+.format_as_character_matrix <- function(x, justify, quote=TRUE)
{
x <- as.matrix(x)
- if (typeof(x) == "character" && length(x) != 0L) {
+ if (quote && typeof(x) == "character" && length(x) != 0L) {
x_dim <- dim(x)
x_dimnames <- dimnames(x)
x <- paste0("\"", x, "\"")
@@ -51,32 +51,32 @@
format(c(s1, ".", s2), justify=justify)
}
-.prepare_1D_array_sample <- function(x, n1, n2, justify)
+.prepare_1D_array_sample <- function(x, n1, n2, justify, quote=TRUE)
{
x_len <- length(x)
x_names <- names(x)
if (x_len <= n1 + n2 + 1L) {
- ans <- .format_as_character_vector(x, justify)
+ ans <- .format_as_character_vector(x, justify, quote=quote)
idx1 <- seq_len(x_len)
idx2 <- integer(0)
names(ans) <- .split_1D_array_names(x_names, idx1, idx2, justify)[idx1]
} else {
idx1 <- seq_len(n1)
idx2 <- seq(to=x_len, by=1L, length.out=n2)
- ans1 <- .format_as_character_vector(x[idx1], justify)
- ans2 <- .format_as_character_vector(x[idx2], justify)
+ ans1 <- .format_as_character_vector(x[idx1], justify, quote=quote)
+ ans2 <- .format_as_character_vector(x[idx2], justify, quote=quote)
ans <- c(ans1, ".", ans2)
names(ans) <- .split_1D_array_names(x_names, idx1, idx2, justify)
}
ans
}
-.print_1D_array_data <- function(x, n1, n2)
+.print_1D_array_data <- function(x, n1, n2, quote=TRUE)
{
stopifnot(length(dim(x)) == 1L)
right <- type(x) != "character"
justify <- if (right) "right" else "left"
- out <- .prepare_1D_array_sample(x, n1, n2, justify)
+ out <- .prepare_1D_array_sample(x, n1, n2, justify, quote=quote)
print(out, quote=FALSE, right=right, max=length(out))
}
@@ -128,15 +128,17 @@
c(head(ans, n=length(s1)), "...", tail(ans, n=length(s2)))
}
-.rsplit_2D_array_data <- function(x, m1, m2, justify)
+.rsplit_2D_array_data <- function(x, m1, m2, justify, quote=TRUE)
{
x_nrow <- nrow(x)
x_rownames <- rownames(x)
idx1 <- seq_len(m1)
idx2 <- seq(to=x_nrow, by=1L, length.out=m2)
- ans1 <- .format_as_character_matrix(x[idx1, , drop=FALSE], justify)
- ans2 <- .format_as_character_matrix(x[idx2, , drop=FALSE], justify)
+ ans1 <- .format_as_character_matrix(x[idx1, , drop=FALSE], justify,
+ quote=quote)
+ ans2 <- .format_as_character_matrix(x[idx2, , drop=FALSE], justify,
+ quote=quote)
dots <- rep.int(".", ncol(ans1))
ans <- rbind(ans1, matrix(dots, nrow=1L), ans2)
@@ -144,15 +146,17 @@
ans
}
-.csplit_2D_array_data <- function(x, n1, n2, justify)
+.csplit_2D_array_data <- function(x, n1, n2, justify, quote=TRUE)
{
x_ncol <- ncol(x)
x_colnames <- colnames(x)
idx1 <- seq_len(n1)
idx2 <- seq(to=x_ncol, by=1L, length.out=n2)
- ans1 <- .format_as_character_matrix(x[ , idx1, drop=FALSE], justify)
- ans2 <- .format_as_character_matrix(x[ , idx2, drop=FALSE], justify)
+ ans1 <- .format_as_character_matrix(x[ , idx1, drop=FALSE], justify,
+ quote=quote)
+ ans2 <- .format_as_character_matrix(x[ , idx2, drop=FALSE], justify,
+ quote=quote)
dots <- rep.int(".", nrow(ans1))
ans <- cbind(ans1, matrix(dots, ncol=1L), ans2)
@@ -160,7 +164,7 @@
ans
}
-.split_2D_array_data <- function(x, m1, m2, n1, n2, justify)
+.split_2D_array_data <- function(x, m1, m2, n1, n2, justify, quote=TRUE)
{
x_ncol <- ncol(x)
x_colnames <- colnames(x)
@@ -169,8 +173,8 @@
x1 <- x[ , idx1, drop=FALSE]
x2 <- x[ , idx2, drop=FALSE]
- ans1 <- .rsplit_2D_array_data(x1, m1, m2, justify)
- ans2 <- .rsplit_2D_array_data(x2, m1, m2, justify)
+ ans1 <- .rsplit_2D_array_data(x1, m1, m2, justify, quote=quote)
+ ans2 <- .rsplit_2D_array_data(x2, m1, m2, justify, quote=quote)
dots <- rep.int(".", nrow(ans1))
ans <- cbind(ans1, matrix(dots, ncol=1L), ans2)
@@ -178,7 +182,7 @@
ans
}
-.prepare_2D_array_sample <- function(x, m1, m2, n1, n2, justify)
+.prepare_2D_array_sample <- function(x, m1, m2, n1, n2, justify, quote=TRUE)
{
## An attempt at reducing the nb of columns to display when 'x' has
## dimnames so the object fits in getOption("width"). Won't necessarily
@@ -202,7 +206,7 @@
x_ncol <- ncol(x)
if (x_nrow <= m1 + m2 + 1L) {
if (x_ncol <= n1 + n2 + 1L) {
- ans <- .format_as_character_matrix(x, justify)
+ ans <- .format_as_character_matrix(x, justify, quote=quote)
## Only needed because of this bug in base::print.default:
## https://stat.ethz.ch/pipermail/r-devel/2016-March/072479.html
## TODO: Remove when the bug is fixed.
@@ -213,11 +217,11 @@
justify)[idx1]
}
} else {
- ans <- .csplit_2D_array_data(x, n1, n2, justify)
+ ans <- .csplit_2D_array_data(x, n1, n2, justify, quote=quote)
}
} else {
if (x_ncol <= n1 + n2 + 1L) {
- ans <- .rsplit_2D_array_data(x, m1, m2, justify)
+ ans <- .rsplit_2D_array_data(x, m1, m2, justify, quote=quote)
## Only needed because of this bug in base::print.default:
## https://stat.ethz.ch/pipermail/r-devel/2016-March/072479.html
## TODO: Remove when the bug is fixed.
@@ -228,18 +232,18 @@
justify)[idx1]
}
} else {
- ans <- .split_2D_array_data(x, m1, m2, n1, n2, justify)
+ ans <- .split_2D_array_data(x, m1, m2, n1, n2, justify, quote=quote)
}
}
ans
}
-.print_2D_array_data <- function(x, m1, m2, n1, n2)
+.print_2D_array_data <- function(x, m1, m2, n1, n2, quote=TRUE)
{
stopifnot(length(dim(x)) == 2L)
right <- type(x) != "character"
justify <- if (right) "right" else "left"
- out <- .prepare_2D_array_sample(x, m1, m2, n1, n2, justify)
+ out <- .prepare_2D_array_sample(x, m1, m2, n1, n2, justify, quote=quote)
print(out, quote=FALSE, right=right, max=length(out))
}
@@ -248,20 +252,21 @@
### Array of arbitrary dimensions
###
-.print_2D_slices <- function(x, m1, m2, n1, n2, blocks, idx)
+.print_2D_slices <- function(x, m1, m2, n1, n2, grid, idx, quote=TRUE)
{
x_dimnames <- dimnames(x)
for (i in idx) {
- Nindex <- get_array_block_Nindex(blocks, i)
- cat(Nindex_as_string(Nindex, x_dimnames), "\n", sep="")
- slice <- subset_by_Nindex(x, Nindex)
+ viewport <- grid[[i]]
+ s <- make_string_from_ArrayViewport(viewport, dimnames=x_dimnames)
+ cat(s, "\n", sep="")
+ slice <- extract_block(x, viewport)
dim(slice) <- dim(slice)[1:2]
- .print_2D_array_data(slice, m1, m2, n1, n2)
+ .print_2D_array_data(slice, m1, m2, n1, n2, quote=quote)
cat("\n")
}
}
-.print_nD_array_data <- function(x, n1, n2)
+.print_nD_array_data <- function(x, n1, n2, quote=TRUE)
{
x_dim <- dim(x)
x_nrow <- x_dim[[1L]]
@@ -283,17 +288,18 @@
z1 <- z2 <- 1L # print only first and last slices
}
}
- blocks <- new("ArrayBlocks", dim=x_dim, N=3L, by=1L)
- nblock <- length(blocks)
+ spacings <- get_max_spacings_for_linear_blocks(x_dim, prod(x_dim[1:2]))
+ grid <- ArrayRegularGrid(x_dim, spacings)
+ nblock <- length(grid)
if (nblock <= z1 + z2 + 1L) {
idx <- seq_len(nblock)
- .print_2D_slices(x, m1, m2, n1, n2, blocks, idx)
+ .print_2D_slices(x, m1, m2, n1, n2, grid, idx, quote=quote)
} else {
idx1 <- seq_len(z1)
idx2 <- seq(to=nblock, by=1L, length.out=z2)
- .print_2D_slices(x, m1, m2, n1, n2, blocks, idx1)
+ .print_2D_slices(x, m1, m2, n1, n2, grid, idx1, quote=quote)
cat("...\n\n")
- .print_2D_slices(x, m1, m2, n1, n2, blocks, idx2)
+ .print_2D_slices(x, m1, m2, n1, n2, grid, idx2, quote=quote)
}
}
@@ -302,17 +308,17 @@
### show_compact_array()
###
-.print_array_data <- function(x, n1, n2)
+.print_array_data <- function(x, n1, n2, quote=TRUE)
{
x_dim <- dim(x)
if (length(x_dim) == 1L)
- return(.print_1D_array_data(x, n1, n2))
+ return(.print_1D_array_data(x, n1, n2, quote=quote))
if (length(x_dim) == 2L) {
nhead <- get_showHeadLines()
ntail <- get_showTailLines()
- return(.print_2D_array_data(x, nhead, ntail, n1, n2))
+ return(.print_2D_array_data(x, nhead, ntail, n1, n2, quote=quote))
}
- .print_nD_array_data(x, n1, n2)
+ .print_nD_array_data(x, n1, n2, quote=quote)
}
show_compact_array <- function(object)
diff --git a/R/subset_seed_as_array.R b/R/subset_seed_as_array.R
new file mode 100644
index 0000000..d1c9fc3
--- /dev/null
+++ b/R/subset_seed_as_array.R
@@ -0,0 +1,128 @@
+### =========================================================================
+### subset_seed_as_array()
+### -------------------------------------------------------------------------
+###
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Low-level helpers
+###
+
+### Return the slice as a list.
+.extract_data_frame_slice <- function(x, index)
+{
+ slice <- subset_by_Nindex(x, index)
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice, as.vector)
+}
+.extract_DataFrame_slice <- function(x, index)
+{
+ slice <- subset_by_Nindex(x, index)
+ slice <- as.data.frame(slice)
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice, as.vector)
+}
+
+### Return a list with one list element per column in data frame 'x'.
+### All the list elements have length 0.
+.extract_data_frame_slice0 <- function(x)
+{
+ slice0 <- x[0L, , drop=FALSE]
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice0, as.vector)
+}
+.extract_DataFrame_slice0 <- function(x)
+{
+ slice0 <- x[0L, , drop=FALSE]
+ slice0 <- as.data.frame(slice0)
+ if (ncol(slice0) != ncol(x))
+ stop(wmsg("DataFrame object 'x' can be used as the seed of ",
+ "a DelayedArray object only if as.data.frame(x) ",
+ "preserves the number of columns"))
+ ## Turn into a list and replace factors with character vectors.
+ lapply(slice0, as.vector)
+}
+
+### Equivalent to 'typeof(as.matrix(x))' but with an almost-zero
+### memory footprint (it avoids the cost of turning 'x' into a matrix).
+.get_data_frame_type <- function(x)
+{
+ if (ncol(x) == 0L)
+ return("logical")
+ slice0 <- .extract_data_frame_slice0(x)
+ typeof(unlist(slice0, use.names=FALSE))
+}
+
+### Equivalent to 'typeof(as.matrix(as.data.frame(x)))' but with an
+### almost-zero memory footprint (it avoids the cost of turning 'x' first
+### into a data frame then into a matrix).
+.get_DataFrame_type <- function(x)
+{
+ if (ncol(x) == 0L)
+ return("logical")
+ slice0 <- .extract_DataFrame_slice0(x)
+ typeof(unlist(slice0, use.names=FALSE))
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### subset_seed_as_array() generic and methods
+###
+
+### 'index' is expected to be an unnamed list of subscripts as positive integer
+### vectors, one vector per seed dimension. *Missing* list elements are allowed
+### and represented by NULLs.
+### The "subset_seed_as_array" methods don't need to support anything else.
+### They must return an ordinary array. No need to propagate the dimnames.
+setGeneric("subset_seed_as_array", signature="seed",
+ function(seed, index) standardGeneric("subset_seed_as_array")
+)
+
+setMethod("subset_seed_as_array", "ANY",
+ function(seed, index)
+ {
+ slice <- subset_by_Nindex(seed, index)
+ as.array(slice)
+ }
+)
+
+setMethod("subset_seed_as_array", "array",
+ function(seed, index)
+ subset_by_Nindex(seed, index)
+)
+
+### Equivalent to
+###
+### subset_by_Nindex(as.matrix(x), index)
+###
+### but avoids the cost of turning the full data frame 'x' into a matrix so
+### memory footprint stays small when 'index' is small.
+setMethod("subset_seed_as_array", "data.frame",
+ function(seed, index)
+ {
+ #ans_type <- .get_data_frame_type(seed)
+ slice0 <- .extract_data_frame_slice0(seed)
+ slice <- .extract_data_frame_slice(seed, index)
+ data <- unlist(c(slice0, slice), use.names=FALSE)
+ array(data, dim=get_Nindex_lengths(index, dim(seed)))
+ }
+)
+
+### Equivalent to
+###
+### subset_by_Nindex(as.matrix(as.data.frame(x)), index)
+###
+### but avoids the cost of turning the full DataFrame 'x' first into a data
+### frame then into a matrix so memory footprint stays small when 'index' is
+### small.
+setMethod("subset_seed_as_array", "DataFrame",
+ function(seed, index)
+ {
+ #ans_type <- .get_DataFrame_type(seed)
+ slice0 <- .extract_DataFrame_slice0(seed)
+ slice <- .extract_DataFrame_slice(seed, index)
+ data <- unlist(c(slice0, slice), use.names=FALSE)
+ array(data, dim=get_Nindex_lengths(index, dim(seed)))
+ }
+)
+
diff --git a/R/utils.R b/R/utils.R
index a9d0b0d..5b38085 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -7,12 +7,89 @@
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Used in validity methods
+###
+
+### A modified version of S4Vectors::wmsg() that is better suited for use
+### by validity methods.
+### TODO: Put this in S4Vectors next to wmsg(). Would probably need a better
+### name.
+wmsg2 <- function(...)
+ paste0("\n ",
+ paste0(strwrap(paste0(c(...), collapse="")), collapse="\n "))
+
+validate_dim_slot <- function(x, slotname="dim")
+{
+ x_dim <- slot(x, slotname)
+ if (!is.integer(x_dim))
+ return(wmsg2(sprintf("'%s' slot must be an integer vector", slotname)))
+ if (length(x_dim) == 0L)
+ return(wmsg2(sprintf("'%s' slot cannot be empty", slotname)))
+ if (S4Vectors:::anyMissingOrOutside(x_dim, 0L))
+ return(wmsg2(sprintf("'%s' slot cannot contain negative or NA values",
+ slotname)))
+ TRUE
+}
+
+validate_dimnames_slot <- function(x, dim, slotname="dimnames")
+{
+ x_dimnames <- slot(x, slotname)
+ if (!is.list(x_dimnames))
+ return(wmsg2(sprintf("'%s' slot must be a list", slotname)))
+ if (length(x_dimnames) != length(dim))
+ return(wmsg2(sprintf("'%s' slot must have ", slotname),
+ "one list element per dimension in the object"))
+ ok <- vapply(seq_along(dim),
+ function(along) {
+ dn <- x_dimnames[[along]]
+ if (is.null(dn))
+ return(TRUE)
+ is.character(dn) && length(dn) == dim[[along]]
+ },
+ logical(1),
+ USE.NAMES=FALSE)
+ if (!all(ok))
+ return(wmsg2(sprintf("each list element in '%s' slot ", slotname),
+ "must be NULL or a character vector along ",
+ "the corresponding dimension in the object"))
+ TRUE
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Manipulating an Nindex
###
-### An Nindex is a "multidimensional subsetting index" is a list with one
-### subscript per dimension in the object to subset. Missing subscripts are
-### represented by NULLs. Before it can be used to actually subset an
-### array-like object, the NULLs must be replaced with elements of class "name".
+### An Nindex is a "multidimensional subsetting index". It's represented as a
+### list with one subscript per dimension in the array-like object to subset.
+### NULL list elements in it are interpreted as missing subscripts, that is, as
+### subscripts that run along the full extend of the corresponding dimension.
+### Before an Nindex can be used in a call to `[`, `[<-`, `[[` or `[[<-`, the
+### NULL list elements must be replaced with object of class "name".
+###
+
+### For use in "[", "[<-", "[[", or "[[<-" methods to extract the user
+### supplied subscripts as an Nindex. NULL subscripts are replace with
+### integer(0). Missing subscripts are set to NULL.
+extract_Nindex_from_syscall <- function(call, eframe)
+{
+ Nindex <- lapply(seq_len(length(call) - 2L),
+ function(i) {
+ subscript <- call[[2L + i]]
+ if (missing(subscript))
+ return(NULL)
+ subscript <- eval(subscript, envir=eframe, enclos=eframe)
+ if (is.null(subscript))
+ return(integer(0))
+ subscript
+ }
+ )
+ argnames <- tail(names(call), n=-2L)
+ if (!is.null(argnames))
+ Nindex <- Nindex[!(argnames %in% c("drop", "exact", "value"))]
+ if (length(Nindex) == 1L && is.null(Nindex[[1L]]))
+ Nindex <- Nindex[0L]
+ Nindex
+}
### Used in HDF5Array!
expand_Nindex_RangeNSBS <- function(Nindex)
@@ -23,7 +100,7 @@ expand_Nindex_RangeNSBS <- function(Nindex)
Nindex
}
-.Nindex2subscripts <- function(Nindex, x)
+.make_subscripts_from_Nindex <- function(Nindex, x)
{
stopifnot(is.list(Nindex), length(Nindex) == length(dim(x)))
@@ -41,36 +118,15 @@ expand_Nindex_RangeNSBS <- function(Nindex)
subset_by_Nindex <- function(x, Nindex, drop=FALSE)
{
- Nindex <- .Nindex2subscripts(Nindex, x)
- do.call(`[`, c(list(x), Nindex, list(drop=drop)))
+ subscripts <- .make_subscripts_from_Nindex(Nindex, x)
+ do.call(`[`, c(list(x), subscripts, list(drop=drop)))
}
+### Return the modified array.
replace_by_Nindex <- function(x, Nindex, value)
{
- Nindex <- .Nindex2subscripts(Nindex, x)
- do.call(`[<-`, c(list(x), Nindex, list(value=value)))
-}
-
-Nindex_as_string <- function(Nindex, dimnames=NULL)
-{
- stopifnot(is.list(Nindex))
- missing_idx <- which(S4Vectors:::sapply_isNULL(Nindex))
- Nindex[missing_idx] <- ""
- s <- as.character(Nindex)
- RangeNSBS_idx <- which(vapply(Nindex, is, logical(1), "RangeNSBS"))
- s[RangeNSBS_idx] <- lapply(Nindex[RangeNSBS_idx],
- function(i) paste0(i at subscript, collapse=":")
- )
- if (!is.null(dimnames)) {
- stopifnot(is.list(dimnames), length(Nindex) == length(dimnames))
- usename_idx <- which(nzchar(s) &
- lengths(Nindex) == 1L &
- lengths(dimnames) != 0L)
- s[usename_idx] <- mapply(`[`, dimnames[usename_idx],
- Nindex[usename_idx],
- SIMPLIFY=FALSE)
- }
- paste0(s, collapse=", ")
+ subscripts <- .make_subscripts_from_Nindex(Nindex, x)
+ do.call(`[<-`, c(list(x), subscripts, list(value=value)))
}
### Used in HDF5Array!
@@ -96,42 +152,6 @@ get_Nindex_names_along <- function(Nindex, dimnames, along)
names(i)
}
-### Used in HDF5Array!
-### Return an Nindex ("multidimensional subsetting index").
-make_Nindex_from_block_ranges <- function(block_ranges, dim,
- expand.RangeNSBS=FALSE)
-{
- stopifnot(is(block_ranges, "Ranges"),
- is.integer(dim))
- ndim <- length(dim)
- block_offsets <- start(block_ranges)
- block_dim <- width(block_ranges)
- stopifnot(length(block_ranges) == ndim,
- all(block_offsets >= 1L),
- all(block_offsets + block_dim - 1L <= dim))
- Nindex <- vector(mode="list", length=ndim)
- is_not_missing <- block_dim < dim
- if (expand.RangeNSBS) {
- expand_idx <- which(is_not_missing)
- } else {
- is_width1 <- block_dim == 1L
- expand_idx <- which(is_not_missing & is_width1)
- RangeNSBS_idx <- which(is_not_missing & !is_width1)
- Nindex[RangeNSBS_idx] <- lapply(RangeNSBS_idx,
- function(i) {
- start <- block_offsets[[i]]
- end <- start + block_dim[[i]] - 1L
- upper_bound <- dim[[i]]
- new2("RangeNSBS", subscript=c(start, end),
- upper_bound=upper_bound,
- check=FALSE)
- }
- )
- }
- Nindex[expand_idx] <- as.list(block_ranges[expand_idx])
- Nindex
-}
-
### Convert 'Nindex' to a "linear index".
### Return the "linear index" as an integer vector if prod(dim) <=
### .Machine$integer.max, otherwise as a vector of doubles.
@@ -156,6 +176,24 @@ to_linear_index <- function(Nindex, dim)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### combine_array_objects()
+###
+
+### 'objects' must be a list of array-like objects that support as.vector().
+combine_array_objects <- function(objects)
+{
+ if (!is.list(objects))
+ stop("'objects' must be a list")
+ NULL_idx <- which(S4Vectors:::sapply_isNULL(objects))
+ if (length(NULL_idx) != 0L)
+ objects <- objects[-NULL_idx]
+ if (length(objects) == 0L)
+ return(NULL)
+ unlist(lapply(objects, as.vector), recursive=FALSE, use.names=FALSE)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Translate an index into the whole to an index into the parts
###
### This is .rowidx2rowkeys() from BSgenome/R/OnDiskLongTable-class.R, copied
diff --git a/TODO b/TODO
index 9f43889..a3334ab 100644
--- a/TODO
+++ b/TODO
@@ -11,6 +11,9 @@
- Add vignette.
+- Support subsetting an arbitrary object by a DelayedArray or
+ DelayedMatrix of type logical.
+
- Support more matrix- and array-like operations.
- How well supported are DelayedArray of type "character"?
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..db4fb6f
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/inst/doc/Working_with_large_arrays.R b/inst/doc/Working_with_large_arrays.R
new file mode 100644
index 0000000..cdfc082
--- /dev/null
+++ b/inst/doc/Working_with_large_arrays.R
@@ -0,0 +1,115 @@
+## ----setup, include=FALSE-----------------------------------------------------
+library(knitr)
+opts_chunk$set(size="scriptsize")
+if (!dir.exists("~/mydata")) dir.create("~/mydata")
+options(width=80)
+library(Matrix)
+library(DelayedArray)
+library(HDF5Array)
+library(SummarizedExperiment)
+library(airway)
+library(pryr)
+
+## ----airway-------------------------------------------------------------------
+library(airway)
+data(airway)
+m <- unname(assay(airway))
+dim(m)
+typeof(m)
+
+## ----airway2------------------------------------------------------------------
+head(m, n=4)
+tail(m, n=4)
+sum(m != 0) / length(m)
+
+## ----object_size--------------------------------------------------------------
+library(pryr) # for object_size()
+object_size(m)
+
+library(Matrix)
+object_size(as(m, "dgCMatrix"))
+
+library(DelayedArray)
+object_size(as(m, "RleMatrix"))
+object_size(as(t(m), "RleMatrix"))
+
+library(HDF5Array)
+object_size(as(m, "HDF5Matrix"))
+
+## ----M------------------------------------------------------------------------
+M <- as(m, "HDF5Matrix")
+M
+
+## ----M2-----------------------------------------------------------------------
+M2 <- M[10:12, 1:5]
+M2
+
+## ----seed_of_M2---------------------------------------------------------------
+seed(M2)
+
+## -----------------------------------------------------------------------------
+M3 <- t(M2)
+M3
+
+## -----------------------------------------------------------------------------
+seed(M3)
+
+## -----------------------------------------------------------------------------
+M4 <- cbind(M3, M[1:5, 6:8])
+M4
+
+## -----------------------------------------------------------------------------
+seed(M4)
+
+## -----------------------------------------------------------------------------
+M5 <- M == 0
+M5
+
+## -----------------------------------------------------------------------------
+seed(M5)
+
+## -----------------------------------------------------------------------------
+M6 <- round(M[11:14, ] / M[1:4, ], digits=3)
+M6
+
+## -----------------------------------------------------------------------------
+seed(M6)
+
+## -----------------------------------------------------------------------------
+M6a <- as(M6, "HDF5Array")
+M6a
+
+## -----------------------------------------------------------------------------
+seed(M6a)
+
+## -----------------------------------------------------------------------------
+M6b <- as(M6, "RleArray")
+M6b
+
+## -----------------------------------------------------------------------------
+seed(M6b)
+
+## -----------------------------------------------------------------------------
+setHDF5DumpFile("~/mydata/M6c.h5")
+setHDF5DumpName("M6c")
+M6c <- as(M6, "HDF5Array")
+
+## -----------------------------------------------------------------------------
+seed(M6c)
+h5ls("~/mydata/M6c.h5")
+
+## -----------------------------------------------------------------------------
+showHDF5DumpLog()
+
+## -----------------------------------------------------------------------------
+DelayedArray:::set_verbose_block_processing(TRUE)
+colSums(M)
+
+## -----------------------------------------------------------------------------
+getOption("DelayedArray.block.size")
+options(DelayedArray.block.size=1e6)
+colSums(M)
+
+## ----cleanup, include=FALSE---------------------------------------------------
+unlink("~/mydata", recursive=TRUE, force=TRUE)
+
diff --git a/inst/doc/Working_with_large_arrays.Rnw b/inst/doc/Working_with_large_arrays.Rnw
new file mode 100644
index 0000000..bd26240
--- /dev/null
+++ b/inst/doc/Working_with_large_arrays.Rnw
@@ -0,0 +1,764 @@
+%\VignetteEngine{knitr::knitr}
+%\VignetteIndexEntry{Working with large arrays in R}
+%\VignetteDepends{knitr,Matrix,DelayedArray,HDF5Array,SummarizedExperiment,airway,pryr}
+
+\documentclass[8pt]{beamer}
+
+\mode<presentation> {
+\usetheme{Madrid}
+\usecolortheme{whale}
+}
+
+\usepackage{slides}
+\renewcommand\Rclass[1]{{\texttt{#1}\index{#1 (class)}}}
+
+\AtBeginSection[]
+{
+ \begin{frame}<beamer>
+ \tableofcontents[currentsection]
+ \end{frame}
+}
+
+\title{Working with large arrays in R}
+\subtitle{A look at HDF5Array/RleArray/DelayedArray objects}
+
+\author{Herv\'e Pag\`es\\
+ \href{mailto:hpages at fredhutch.org}{hpages at fredhutch.org}}
+
+\institute{Bioconductor conference\\Boston}
+
+\date{July 2017}
+
+\begin{document}
+
+<<setup, include=FALSE>>=
+library(knitr)
+opts_chunk$set(size="scriptsize")
+if (!dir.exists("~/mydata")) dir.create("~/mydata")
+options(width=80)
+library(Matrix)
+library(DelayedArray)
+library(HDF5Array)
+library(SummarizedExperiment)
+library(airway)
+library(pryr)
+@
+
+\maketitle
+
+\frame{\tableofcontents}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Motivation and challenges}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ R ordinary {\bf matrix} or {\bf array} is not suitable for big datasets:
+ \begin{block}{}
+ \begin{itemize}
+ \item 10x Genomics dataset (single cell experiment):
+ 30,000 genes x 1.3 million cells = 36.5 billion values
+ \item in an ordinary integer matrix ==> 136G in memory!
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ Need for alternative containers:
+ \begin{block}{}
+ \begin{itemize}
+ \item but at the same time, the object should be (almost) as easy to
+ manipulate as an ordinary matrix or array
+ \item {\em standard R matrix/array API}: \Rcode{dim}, \Rcode{dimnames},
+ \Rcode{t}, \Rcode{is.na}, \Rcode{==}, \Rcode{+}, \Rcode{log},
+ \Rcode{cbind}, \Rcode{max}, \Rcode{sum}, \Rcode{colSums}, etc...
+ \item not limited to 2 dimensions ==> also support arrays of arbitrary
+ number of dimensions
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ 2 approaches: {\bf in-memory data} vs {\bf on-disk data}
+\end{frame}
+
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf In-memory data}
+
+ \begin{block}{}
+ \begin{itemize}
+ \item a 30k x 1.3M matrix might still fit in memory if the data can
+ be efficiently compressed
+ \item example: sparse data (small percentage of non-zero values) ==>
+ {\em sparse representation} (storage of non-zero values only)
+ \item example: data with long runs of identical values ==> {\em RLE
+ compression (Run Length Encoding)}
+ \item choose the {\em smallest type} to store the values: \Rcode{raw}
+ (1 byte) < \Rcode{integer} (4 bytes) < \Rcode{double} (8 bytes)
+ \item if using {\em RLE compression}:
+ \begin{itemize}
+ \item choose the {\em best orientation} to store the values:
+ {\em by row} or {\em by column} (one might give better
+ compression than the other)
+ \item store the data by chunk ==> opportunity to pick up
+ {\em best type} and {\em best orientation} on a chunk
+ basis (instead of for the whole data)
+ \end{itemize}
+ \item size of 30k x 1.3M matrix in memory can be reduced from 136G
+ to 16G!
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf Examples of in-memory containers}
+
+ \bigskip
+
+ {\bf dgCMatrix} container from the \Biocpkg{Matrix} package:
+ \begin{block}{}
+ \begin{itemize}
+ \item sparse matrix representation
+ \item non-zero values stored as \Rcode{double}
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ {\bf RleArray} and {\bf RleMatrix} containers from the
+ \Biocpkg{DelayedArray} package:
+ \begin{block}{}
+ \begin{itemize}
+ \item use RLE compression
+ \item arbitrary number of dimensions
+ \item type of values: any R atomic type (\Rcode{integer},
+ \Rcode{double}, \Rcode{logical}, \Rcode{complex},
+ \Rcode{character}, and \Rcode{raw})
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf On-disk data}
+
+ \bigskip
+
+ However...
+ \begin{itemize}
+ \item if data is too big to fit in memory (even after compression) ==>
+ must use {\em on-disk representation}
+ \item challenge: should still be (almost) as easy to manipulate as
+ an ordinary matrix! ({\em standard R matrix/array API})
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf Examples of on-disk containers}
+
+ \bigskip
+
+ Direct manipulation of an {\bf HDF5 dataset} via the
+ \Biocpkg{rhdf5} API. Low level API!
+
+ \bigskip
+
+ {\bf HDF5Array} and {\bf HDF5Matrix} containers from the
+ \Biocpkg{HDF5Array} package:
+ \begin{block}{}
+ Provide access to the HDF5 dataset via an API that mimics the standard
+ R matrix/array API
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Memory footprint}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Memory footprint}
+
+ \centerline{\bf The "airway" dataset}
+
+ \begin{columns}[t]
+ \begin{column}{0.36\textwidth}
+ \begin{exampleblock}{}
+<<airway>>=
+library(airway)
+data(airway)
+m <- unname(assay(airway))
+dim(m)
+typeof(m)
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.52\textwidth}
+ \begin{exampleblock}{}
+<<airway2>>=
+head(m, n=4)
+tail(m, n=4)
+sum(m != 0) / length(m)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Memory footprint}
+
+ \centerline{{\bf dgCMatrix} vs {\bf RleMatrix} vs {\bf HDF5Matrix}}
+
+ \begin{columns}[t]
+ \begin{column}{0.60\textwidth}
+ \begin{exampleblock}{}
+<<object_size>>=
+library(pryr) # for object_size()
+object_size(m)
+
+library(Matrix)
+object_size(as(m, "dgCMatrix"))
+
+library(DelayedArray)
+object_size(as(m, "RleMatrix"))
+object_size(as(t(m), "RleMatrix"))
+
+library(HDF5Array)
+object_size(as(m, "HDF5Matrix"))
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Memory footprint}
+
+ Some limitations of the sparse matrix implementation in the \Biocpkg{Matrix}
+ package:
+
+ \begin{block}{}
+ \begin{itemize}
+ \item non-zero values always stored as \Rcode{double}, the most memory
+ consuming type
+ \item number of non-zero values must be $< 2^{31}$
+ \item limited to 2 dimensions: no support for arrays of arbitrary number
+ of dimensions
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{RleArray and HDF5Array objects}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ RleMatrix/RleArray and HDF5Matrix/HDF5Array provide:
+ \begin{block}{}
+ \begin{itemize}
+ \item support all R atomic types
+ \item no limits in size (but each dimension must be $< 2^{31}$)
+ \item arbitrary number of dimensions
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ And also:
+ \begin{block}{}
+ \begin{itemize}
+ \item {\bf delayed operations}
+ \item {\bf block-processing} (behind the scene)
+ \item TODO: multicore block-processing (sequential only at the moment)
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Delayed operations}
+
+ \bigskip
+
+ \centerline{We start with HDF5Matrix object \Rcode{M}:}
+
+ \begin{columns}[t]
+ \begin{column}{0.60\textwidth}
+ \begin{exampleblock}{}
+<<M>>=
+M <- as(m, "HDF5Matrix")
+M
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ Subsetting is delayed:
+
+ \begin{columns}[t]
+ \begin{column}{0.40\textwidth}
+ \begin{exampleblock}{}
+<<M2>>=
+M2 <- M[10:12, 1:5]
+M2
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.48\textwidth}
+ \begin{exampleblock}{}
+<<seed_of_M2>>=
+seed(M2)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ Transposition is delayed:
+
+ \begin{columns}[t]
+ \begin{column}{0.40\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M3 <- t(M2)
+M3
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.48\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M3)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \Rcode{cbind()} / \Rcode{rbind()} are delayed:
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M4 <- cbind(M3, M[1:5, 6:8])
+M4
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M4)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ All the operations in the following groups are delayed:
+ \begin{itemize}
+ \item \Rcode{Arith} (\Rcode{+}, \Rcode{-}, ...)
+ \item \Rcode{Compare} (\Rcode{==}, \Rcode{<}, ...)
+ \item \Rcode{Logic} (\Rcode{\&}, \Rcode{|})
+ \item \Rcode{Math} (\Rcode{log}, \Rcode{sqrt})
+ \item and more ...
+ \end{itemize}
+
+ \begin{columns}[t]
+ \begin{column}{0.42\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M5 <- M == 0
+M5
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.47\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M5)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M6 <- round(M[11:14, ] / M[1:4, ], digits=3)
+M6
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Realization}
+
+ \bigskip
+
+ Delayed operations can be {\bf realized} by coercing the DelayedMatrix
+ object to HDF5Array:
+
+ \begin{columns}[t]
+ \begin{column}{0.40\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M6a <- as(M6, "HDF5Array")
+M6a
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.48\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6a)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \bigskip
+
+ ... or by coercing it to RleArray:
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M6b <- as(M6, "RleArray")
+M6b
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6b)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Controlling where HDF5 datasets are realized}
+
+ \bigskip
+
+ {\em HDF5 dump management utilities}: a set of utilities to control where
+ HDF5 datasets are written to disk.
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+setHDF5DumpFile("~/mydata/M6c.h5")
+setHDF5DumpName("M6c")
+M6c <- as(M6, "HDF5Array")
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6c)
+h5ls("~/mydata/M6c.h5")
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\Rcode{showHDF5DumpLog()}}
+
+ \begin{exampleblock}{}
+<<>>=
+showHDF5DumpLog()
+@
+ \end{exampleblock}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Block processing}
+
+ \bigskip
+
+ The following operations are NOT delayed. They are implemented via a
+ {\em block processing} mechanism that loads and processes one block
+ at a time:
+ \begin{itemize}
+ \item operations in the \Rcode{Summary} group (\Rcode{max}, \Rcode{min},
+ \Rcode{sum}, \Rcode{any}, \Rcode{all})
+ \item \Rcode{mean}
+ \item Matrix row/col summarization operations (\Rcode{col/rowSums},
+ \Rcode{col/rowMeans}, ...)
+ \item \Rcode{anyNA}, \Rcode{which}
+ \item \Rcode{apply}
+ \item and more ...
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \begin{columns}[t]
+ \begin{column}{0.75\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+DelayedArray:::set_verbose_block_processing(TRUE)
+colSums(M)
+@
+ \end{exampleblock}
+
+ Control the block size:
+
+ \begin{exampleblock}{}
+<<>>=
+getOption("DelayedArray.block.size")
+options(DelayedArray.block.size=1e6)
+colSums(M)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Hands-on}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Hands-on}
+
+ \begin{block}{}
+ 1. Load the "airway" dataset.
+ \end{block}
+
+ \begin{block}{}
+ 2. It's wrapped in a SummarizedExperiment object. Get the count data as
+ an ordinary matrix.
+ \end{block}
+
+ \begin{block}{}
+ 3. Wrap it in an HDF5Matrix object: (1) using \Rcode{writeHDF5Array()};
+ then (2) using coercion.
+ \end{block}
+
+ \begin{block}{}
+ 4. When using coercion, where has the data been written on disk?
+ \end{block}
+
+ \begin{block}{}
+ 5. See \Rcode{?setHDF5DumpFile} for how to control the location of
+ "automatic" HDF5 datasets. Try to control the destination of the
+ data when coercing.
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Hands-on}
+
+ \begin{block}{}
+ 6. Use \Rcode{showHDF5DumpLog()} to see all the HDF5 datasets written to
+ disk during the current session.
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 7. Try some operations on the HDF5Matrix object: (1) some delayed ones;
+ (2) some non-delayed ones (block processing).
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 8. Use \Rcode{DelayedArray:::set\_verbose\_block\_processing(TRUE)}
+ to see block processing in action.
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 9. Control the block size via \Rcode{DelayedArray.block.size} global
+ option.
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Hands-on}
+
+ \begin{block}{}
+ 10. Stick the HDF5Matrix object back in the SummarizedExperiment object.
+ The resulting object is an "HDF5-backed SummarizedExperiment object".
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 11. The HDF5-backed SummarizedExperiment object can be manipulated
+ (almost) like an in-memory SummarizedExperiment object.
+ Try \Rcode{[}, \Rcode{cbind}, \Rcode{rbind} on it.
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 12. The \Biocpkg{SummarizedExperiment} package provides
+ \Rcode{saveHDF5SummarizedExperiment} to save a SummarizedExperiment
+ object (HDF5-backed or not) as an HDF5-backed SummarizedExperiment
+ object. Try it.
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{DelayedArray/HDF5Array: Future developments}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Future developments}
+
+ \centerline{\bf Block processing improvements}
+
+ \begin{block}{}
+ Block genometry: (1) better by default, (2) let the user have more
+ control on it
+ \end{block}
+
+ \begin{block}{}
+ Support multicore
+ \end{block}
+
+ \begin{block}{}
+ Expose it: \Rcode{blockApply()}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Future developments}
+
+ \centerline{\bf HDF5Array improvements}
+
+ \begin{block}{}
+ Store the \Rcode{dimnames} in the HDF5 file (in {\em HDF5 Dimension Scale
+ datasets} - \url{https://www.hdfgroup.org/HDF5/Tutor/h5dimscale.html})
+ \end{block}
+
+ \begin{block}{}
+ Use better default chunk geometry when realizing an HDF5Array object
+ \end{block}
+
+ \begin{block}{}
+ Block processing should take advantage of the chunk geometry (e.g.
+ \Rcode{realize()} should use blocks that are clusters of chunks)
+ \end{block}
+
+ \begin{block}{}
+ Unfortunately: not possible to support multicore realization at the
+ moment (HDF5 does not support concurrent writing to a dataset yet)
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Future developments}
+
+ \centerline{\bf RleArray improvements}
+
+ \begin{block}{}
+ Let the user have more control on the chunk geometry when
+ constructing/realizing an RleArray object
+ \end{block}
+
+ \begin{block}{}
+ Like for HDF5Array objects, block processing should take advantage
+ of the chunk geometry
+ \end{block}
+
+ \begin{block}{}
+ Support multicore realization
+ \end{block}
+
+ \begin{block}{}
+ Provide C/C++ low-level API for direct row/column access from C/C++ code
+ (e.g. from the \Biocpkg{beachmat} package)
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+<<cleanup, include=FALSE>>=
+unlink("~/mydata", recursive=TRUE, force=TRUE)
+@
+
+\end{document}
+
diff --git a/inst/doc/Working_with_large_arrays.pdf b/inst/doc/Working_with_large_arrays.pdf
new file mode 100644
index 0000000..18e45bd
Binary files /dev/null and b/inst/doc/Working_with_large_arrays.pdf differ
diff --git a/inst/unitTests/test_ArrayBlocks-class.R b/inst/unitTests/test_ArrayBlocks-class.R
deleted file mode 100644
index a367281..0000000
--- a/inst/unitTests/test_ArrayBlocks-class.R
+++ /dev/null
@@ -1,71 +0,0 @@
-#setRealizationBackend("RleArray")
-#setRealizationBackend("HDF5Array")
-
-test_split_array_in_blocks <- function()
-{
- split_array_in_blocks <- DelayedArray:::split_array_in_blocks
- unsplit_array_from_blocks <- DelayedArray:::unsplit_array_from_blocks
-
- a1 <- array(1:300, c(3, 10, 2, 5))
- A1 <- realize(a1)
-
- for (max_block_len in c(1:7, 29:31, 39:40, 59:60, 119:120)) {
- subarrays <- split_array_in_blocks(a1, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, a1)
- checkIdentical(a1, current)
-
- subarrays <- split_array_in_blocks(A1, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, A1)
- checkIdentical(a1, current)
- }
-}
-
-test_split_matrix_in_blocks <- function()
-{
- split_array_in_blocks <- DelayedArray:::split_array_in_blocks
- unsplit_array_from_blocks <- DelayedArray:::unsplit_array_from_blocks
-
- a1 <- array(1:300, c(3, 10, 2, 5))
- A1 <- realize(a1)
-
- m1 <- a1[2, c(9, 3:7), 2, -4]
- M1a <- drop(A1[2, c(9, 3:7), 2, -4])
- checkIdentical(m1, as.matrix(M1a))
-
- M1b <- realize(m1)
- checkIdentical(m1, as.matrix(M1b))
-
- tm1 <- t(m1)
- tM1a <- t(M1a)
- checkIdentical(tm1, as.matrix(tM1a))
-
- tM1b <- t(M1b)
- checkIdentical(tm1, as.matrix(tM1b))
-
- for (max_block_len in seq_len(length(m1) * 2L)) {
- subarrays <- split_array_in_blocks(m1, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, m1)
- checkIdentical(m1, current)
-
- subarrays <- split_array_in_blocks(M1a, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, M1a)
- checkIdentical(m1, current)
-
- subarrays <- split_array_in_blocks(M1b, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, M1b)
- checkIdentical(m1, current)
-
- subarrays <- split_array_in_blocks(tm1, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, tm1)
- checkIdentical(tm1, current)
-
- subarrays <- split_array_in_blocks(tM1a, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, tM1a)
- checkIdentical(tm1, current)
-
- subarrays <- split_array_in_blocks(tM1b, max_block_len)
- current <- unsplit_array_from_blocks(subarrays, tM1b)
- checkIdentical(tm1, current)
- }
-}
-
diff --git a/inst/unitTests/test_ArrayGrid-class.R b/inst/unitTests/test_ArrayGrid-class.R
new file mode 100644
index 0000000..649a356
--- /dev/null
+++ b/inst/unitTests/test_ArrayGrid-class.R
@@ -0,0 +1,270 @@
+#setRealizationBackend("RleArray")
+#setRealizationBackend("HDF5Array")
+
+test_get_max_spacings_for_hypercube_blocks <- function()
+{
+ get_max_spacings_for_hypercube_blocks <-
+ DelayedArray:::get_max_spacings_for_hypercube_blocks
+
+ refdim <- c(15L, 10L, 5L, 8L, 10L)
+
+ target <- c( 3L, 3L, 3L, 3L, 3L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 243)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 323)
+ checkIdentical(target, current)
+
+ target <- c( 3L, 3L, 3L, 4L, 3L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 324)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 431)
+ checkIdentical(target, current)
+
+ target <- c( 3L, 4L, 3L, 4L, 3L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 432)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 575)
+ checkIdentical(target, current)
+
+ target <- c( 3L, 4L, 3L, 4L, 4L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 576)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 767)
+ checkIdentical(target, current)
+
+ target <- c( 4L, 4L, 3L, 4L, 4L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 768)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1023)
+ checkIdentical(target, current)
+
+ target <- c( 4L, 4L, 4L, 4L, 4L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1024)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1279)
+ checkIdentical(target, current)
+
+ target <- c( 4L, 4L, 5L, 4L, 4L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1280)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1599)
+ checkIdentical(target, current)
+
+ target <- c( 4L, 5L, 5L, 4L, 4L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1600)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 1999)
+ checkIdentical(target, current)
+
+ target <- c( 4L, 5L, 5L, 4L, 5L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 2000)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 2499)
+ checkIdentical(target, current)
+
+ target <- c( 5L, 5L, 5L, 4L, 5L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 2500)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 3124)
+ checkIdentical(target, current)
+
+ target <- c( 5L, 5L, 5L, 5L, 5L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 3125)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 3749)
+ checkIdentical(target, current)
+
+ target <- c( 6L, 5L, 5L, 5L, 5L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 3750)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 4499)
+ checkIdentical(target, current)
+
+ target <- c( 6L, 6L, 5L, 5L, 5L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 4500)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 5399)
+ checkIdentical(target, current)
+
+ target <- c( 6L, 6L, 5L, 6L, 5L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 5400)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 6479)
+ checkIdentical(target, current)
+
+ target <- c( 6L, 6L, 5L, 6L, 6L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 6480)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 7559)
+ checkIdentical(target, current)
+
+ target <- c( 7L, 6L, 5L, 6L, 6L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 7560)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 8819)
+ checkIdentical(target, current)
+
+ target <- c( 7L, 7L, 5L, 6L, 6L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 8820)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 10289)
+ checkIdentical(target, current)
+
+ target <- c( 7L, 7L, 5L, 7L, 6L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 10290)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 12004)
+ checkIdentical(target, current)
+
+ target <- c( 7L, 7L, 5L, 7L, 7L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 12005)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 13719)
+ checkIdentical(target, current)
+
+ target <- c( 7L, 7L, 5L, 8L, 7L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 13720)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 15679)
+ checkIdentical(target, current)
+
+ target <- c( 8L, 7L, 5L, 8L, 7L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 15680)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 17919)
+ checkIdentical(target, current)
+
+ target <- c( 8L, 8L, 5L, 8L, 7L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 17920)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 20479)
+ checkIdentical(target, current)
+
+ target <- c( 8L, 8L, 5L, 8L, 8L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 20480)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 23039)
+ checkIdentical(target, current)
+
+ target <- c( 9L, 8L, 5L, 8L, 8L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 23040)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 25919)
+ checkIdentical(target, current)
+
+ target <- c( 9L, 9L, 5L, 8L, 8L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 25920)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 29159)
+ checkIdentical(target, current)
+
+ target <- c( 9L, 9L, 5L, 8L, 9L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 29160)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 32399)
+ checkIdentical(target, current)
+
+ target <- c( 9L, 10L, 5L, 8L, 9L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 32400)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 35999)
+ checkIdentical(target, current)
+
+ target <- c( 9L, 10L, 5L, 8L, 10L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 36000)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 39999)
+ checkIdentical(target, current)
+
+ target <- c( 10L, 10L, 5L, 8L, 10L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 40000)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 43999)
+ checkIdentical(target, current)
+
+ target <- c( 11L, 10L, 5L, 8L, 10L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 44000)
+ checkIdentical(target, current)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, 47999)
+ checkIdentical(target, current)
+
+ target <- c( 14L, 10L, 5L, 8L, 10L)
+ current <- get_max_spacings_for_hypercube_blocks(refdim, prod(refdim)-1)
+ checkIdentical(target, current)
+
+ current <- get_max_spacings_for_hypercube_blocks(refdim, prod(refdim))
+ checkIdentical(refdim, current)
+}
+
+test_split_array_in_linear_blocks <- function()
+{
+ split_array_in_linear_blocks <-
+ DelayedArray:::split_array_in_linear_blocks
+ unsplit_array_from_linear_blocks <-
+ DelayedArray:::unsplit_array_from_linear_blocks
+
+ a1 <- array(1:300, c(3, 10, 2, 5))
+ A1 <- realize(a1)
+
+ for (max_block_len in c(1:7, 29:31, 39:40, 59:60, 119:120)) {
+ blocks <- split_array_in_linear_blocks(a1, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, a1)
+ checkIdentical(a1, current)
+
+ blocks <- split_array_in_linear_blocks(A1, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, A1)
+ checkIdentical(a1, current)
+ }
+}
+
+test_split_matrix_in_blocks <- function()
+{
+ split_array_in_linear_blocks <-
+ DelayedArray:::split_array_in_linear_blocks
+ unsplit_array_from_linear_blocks <-
+ DelayedArray:::unsplit_array_from_linear_blocks
+
+ a1 <- array(1:300, c(3, 10, 2, 5))
+ A1 <- realize(a1)
+
+ m1 <- a1[2, c(9, 3:7), 2, -4]
+ M1a <- drop(A1[2, c(9, 3:7), 2, -4])
+ checkIdentical(m1, as.matrix(M1a))
+
+ M1b <- realize(m1)
+ checkIdentical(m1, as.matrix(M1b))
+
+ tm1 <- t(m1)
+ tM1a <- t(M1a)
+ checkIdentical(tm1, as.matrix(tM1a))
+
+ tM1b <- t(M1b)
+ checkIdentical(tm1, as.matrix(tM1b))
+
+ for (max_block_len in seq_len(length(m1) * 2L)) {
+ blocks <- split_array_in_linear_blocks(m1, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, m1)
+ checkIdentical(m1, current)
+
+ blocks <- split_array_in_linear_blocks(M1a, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, M1a)
+ checkIdentical(m1, current)
+
+ blocks <- split_array_in_linear_blocks(M1b, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, M1b)
+ checkIdentical(m1, current)
+
+ blocks <- split_array_in_linear_blocks(tm1, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, tm1)
+ checkIdentical(tm1, current)
+
+ blocks <- split_array_in_linear_blocks(tM1a, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, tM1a)
+ checkIdentical(tm1, current)
+
+ blocks <- split_array_in_linear_blocks(tM1b, max_block_len)
+ current <- unsplit_array_from_linear_blocks(blocks, tM1b)
+ checkIdentical(tm1, current)
+ }
+}
+
diff --git a/inst/unitTests/test_RleArray-class.R b/inst/unitTests/test_RleArray-class.R
new file mode 100644
index 0000000..7e4575a
--- /dev/null
+++ b/inst/unitTests/test_RleArray-class.R
@@ -0,0 +1,45 @@
+DEFAULT_BLOCK_SIZE <- DelayedArray:::DEFAULT_BLOCK_SIZE
+
+test_RleArray <- function()
+{
+ rle <- Rle(1:200000, 125)
+ A1 <- RleArray(rle, c(62500, 400))
+ A2 <- RleArray(rle, c(62500, 400), chunksize=1e8)
+
+ on.exit(options(DelayedArray.block.size=DEFAULT_BLOCK_SIZE))
+ options(DelayedArray.block.size=10e6)
+ rs1 <- rowSums(A1)
+ rs2 <- rowSums(A2)
+ checkIdentical(rs1, rs2)
+ cs1 <- colSums(A1)
+ cs2 <- colSums(A2)
+ checkIdentical(cs1, cs2)
+
+ ## TODO: Add more tests...
+}
+
+test_long_RleArray <- function()
+{
+ ## Right now it's not possible to create a long RleArray object with
+ ## the RleArray() constructor function. So we use the low-level RleArray
+ ## construction API to do this:
+ RleRealizationSink <- DelayedArray:::RleRealizationSink
+ append_Rle_to_sink <- DelayedArray:::.append_Rle_to_sink
+ sink <- RleRealizationSink(c(30000L, 75000L), type="integer")
+ #rle1 <- Rle(1:500000, 2000)
+ rle1 <- Rle(1:5000, 200000)
+ append_Rle_to_sink(rle1, sink)
+ #rle2 <- Rle(1:2000000, 125)
+ rle2 <- Rle(1:20000, 12500)
+ append_Rle_to_sink(rle2, sink)
+ #rle3 <- Rle(1:5000000, 200)
+ rle3 <- Rle(1:50000, 20000)
+ append_Rle_to_sink(rle3, sink)
+ A <- as(sink, "RleArray")
+
+ checkTrue(validObject(A, complete=TRUE))
+ checkTrue(is(seed(A), "ChunkedRleArraySeed"))
+
+ ## TODO: Add more tests...
+}
+
diff --git a/inst/unitTests/test_bind-arrays.R b/inst/unitTests/test_bind-arrays.R
new file mode 100644
index 0000000..fe8c5bd
--- /dev/null
+++ b/inst/unitTests/test_bind-arrays.R
@@ -0,0 +1,96 @@
+.TEST_matrices <- list(
+ matrix(1:15, nrow=3, ncol=5,
+ dimnames=list(NULL, paste0("M1y", 1:5))),
+ matrix(101:135, nrow=7, ncol=5,
+ dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5))),
+ matrix(1001:1025, nrow=5, ncol=5,
+ dimnames=list(paste0("M3x", 1:5), NULL))
+)
+
+.TEST_arrays <- list(
+ array(1:60, c(3, 5, 4),
+ dimnames=list(NULL, paste0("M1y", 1:5), NULL)),
+ array(101:240, c(7, 5, 4),
+ dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL)),
+ array(10001:10100, c(5, 5, 4),
+ dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
+)
+
+test_arbind <- function()
+{
+ ## on matrices
+ target <- do.call(rbind, .TEST_matrices)
+ current <- do.call(arbind, .TEST_matrices)
+ checkIdentical(target, current)
+
+ ## on empty matrices
+ m1 <- matrix(nrow=0, ncol=3, dimnames=list(NULL, letters[1:3]))
+ m2 <- matrix(1:15, ncol=3, dimnames=list(NULL, LETTERS[1:3]))
+
+ target <- do.call(rbind, list(m1, m2))
+ current <- do.call(arbind, list(m1, m2))
+ checkIdentical(target, current)
+
+ target <- do.call(rbind, list(m2, m1))
+ current <- do.call(arbind, list(m2, m1))
+ checkIdentical(target, current)
+
+ target <- do.call(rbind, list(m1, m1))
+ current <- do.call(arbind, list(m1, m1))
+ checkIdentical(target, current)
+
+ ## on arrays
+ current <- do.call(arbind, .TEST_arrays)
+ check_2D_slice <- function(k) {
+ slices <- lapply(.TEST_arrays, `[`, , , k)
+ target_slice <- do.call(rbind, slices)
+ checkIdentical(target_slice, current[ , , k])
+ }
+ for (k in seq_len(dim(current)[[3L]])) check_2D_slice(k)
+}
+
+test_acbind <- function()
+{
+ ## on matrices
+ matrices <- lapply(.TEST_matrices, t)
+ target <- do.call(cbind, matrices)
+ current <- do.call(acbind, matrices)
+ checkIdentical(target, current)
+
+ ## on empty matrices
+ m1 <- matrix(nrow=3, ncol=0, dimnames=list(letters[1:3], NULL))
+ m2 <- matrix(1:15, nrow=3, dimnames=list(LETTERS[1:3], NULL))
+
+ target <- do.call(cbind, list(m1, m2))
+ current <- do.call(acbind, list(m1, m2))
+ checkIdentical(target, current)
+
+ target <- do.call(cbind, list(m2, m1))
+ current <- do.call(acbind, list(m2, m1))
+ checkIdentical(target, current)
+
+ target <- do.call(cbind, list(m1, m1))
+ current <- do.call(acbind, list(m1, m1))
+ checkIdentical(target, current)
+
+ ## on arrays
+
+ ## transpose the 1st 2 dimensions
+ arrays <- lapply(.TEST_arrays,
+ function(a) {
+ a_dimnames <- dimnames(a)
+ dim(a)[1:2] <- dim(a)[2:1]
+ a_dimnames[1:2] <- a_dimnames[2:1]
+ dimnames(a) <- a_dimnames
+ a
+ })
+
+ current <- do.call(acbind, arrays)
+ check_2D_slice <- function(k) {
+ slices <- lapply(arrays, `[`, , , k)
+ target_slice <- do.call(cbind, slices)
+ checkIdentical(target_slice, current[ , , k])
+ }
+ for (k in seq_len(dim(current)[[3L]])) check_2D_slice(k)
+}
+
diff --git a/man/Array-class.Rd b/man/Array-class.Rd
new file mode 100644
index 0000000..083d14b
--- /dev/null
+++ b/man/Array-class.Rd
@@ -0,0 +1,26 @@
+\name{Array-class}
+\docType{class}
+
+\alias{class:Array}
+\alias{Array-class}
+\alias{Array}
+
+\alias{length,Array-method}
+\alias{[[,Array-method}
+
+\title{Array objects}
+
+\description{
+ Array is a virtual class intended to be extended by concrete subclasses
+ with an array-like semantic.
+}
+
+\seealso{
+ \link{DelayedArray}, \link{ArrayGrid}, and \link{ArrayViewport} for
+ examples of classes with an array-like semantic.
+}
+
+\examples{
+showClass("Array") # virtual class with no slots
+}
+\keyword{internal}
diff --git a/man/ArrayBlocks-class.Rd b/man/ArrayBlocks-class.Rd
deleted file mode 100644
index 5b57a61..0000000
--- a/man/ArrayBlocks-class.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-\name{ArrayBlocks-class}
-\docType{class}
-
-\alias{class:ArrayBlocks}
-\alias{ArrayBlocks-class}
-\alias{ArrayBlocks}
-
-\alias{length,ArrayBlocks-method}
-\alias{getListElement,ArrayBlocks-method}
-\alias{show,ArrayBlocks-method}
-
-\title{ArrayBlocks objects}
-
-\description{
- ArrayBlocks objects are used internally to support block processing of
- array-like objects.
-}
-
-\keyword{internal}
diff --git a/man/ArrayGrid-class.Rd b/man/ArrayGrid-class.Rd
new file mode 100644
index 0000000..7ae8054
--- /dev/null
+++ b/man/ArrayGrid-class.Rd
@@ -0,0 +1,142 @@
+\name{ArrayGrid-class}
+\docType{class}
+
+\alias{class:ArrayViewport}
+\alias{ArrayViewport-class}
+\alias{ArrayViewport}
+
+\alias{refdim}
+\alias{refdim,ArrayViewport-method}
+\alias{ranges,ArrayViewport-method}
+\alias{start,ArrayViewport-method}
+\alias{width,ArrayViewport-method}
+\alias{end,ArrayViewport-method}
+\alias{dim,ArrayViewport-method}
+
+\alias{show,ArrayViewport-method}
+
+\alias{makeNindexFromArrayViewport}
+
+\alias{class:ArrayGrid}
+\alias{ArrayGrid-class}
+\alias{ArrayGrid}
+
+\alias{class:ArrayArbitraryGrid}
+\alias{ArrayArbitraryGrid-class}
+\alias{ArrayArbitraryGrid}
+
+\alias{class:ArrayRegularGrid}
+\alias{ArrayRegularGrid-class}
+\alias{ArrayRegularGrid}
+
+\alias{refdim,ArrayArbitraryGrid-method}
+\alias{refdim,ArrayRegularGrid-method}
+\alias{dim,ArrayArbitraryGrid-method}
+\alias{dim,ArrayRegularGrid-method}
+\alias{as.character.ArrayGrid}
+\alias{as.character,ArrayGrid-method}
+\alias{lengths,ArrayGrid-method}
+\alias{show,ArrayGrid-method}
+
+\alias{isLinear}
+\alias{isLinear,ArrayViewport-method}
+\alias{isLinear,ArrayGrid-method}
+
+\title{ArrayGrid and ArrayViewport objects}
+
+\description{
+ ArrayGrid and ArrayViewport objects are used internally to support
+ block processing of array-like objects.
+}
+
+\examples{
+## ---------------------------------------------------------------------
+## ArrayGrid OBJECTS
+## ---------------------------------------------------------------------
+
+## Create a regularly-spaced grid on top of a 3700 x 100 x 33 array:
+grid <- ArrayRegularGrid(c(3700L, 100L, 33L), c(250L, 100L, 10L))
+
+## Dimensions of the reference array:
+refdim(grid)
+
+## Number of grid elements along each dimension of the reference array:
+dim(grid)
+
+## Total number of grid elements:
+length(grid)
+
+## First element in the grid:
+grid[[1L]] # same as grid[[1L, 1L, 1L]]
+
+## Last element in the grid:
+grid[[length(grid)]] # same as grid[[15L, 1L, 4L]]
+
+## Lengths of the grid elements:
+lengths(grid)
+
+stopifnot(prod(refdim(grid)) == sum(lengths(grid)))
+
+## ---------------------------------------------------------------------
+## ArrayViewport OBJECTS
+## ---------------------------------------------------------------------
+
+## Grid elements are ArrayViewport objects:
+class(grid[[1L]])
+
+m0 <- matrix(1:30, ncol=5)
+
+block_dim <- c(4, 3)
+viewport1 <- ArrayViewport(dim(m0), IRanges(c(3, 2), width=block_dim))
+viewport1
+
+dim(viewport1) # 'block_dim'
+length(viewport1)
+ranges(viewport1)
+
+## 2 utilities (not exported yet) for extracting/replacing blocks from/in
+## an array-like object:
+extract_block <- DelayedArray:::extract_block
+replace_block <- DelayedArray:::replace_block
+
+block1 <- extract_block(m0, viewport1)
+block1
+
+## No-op:
+replace_block(m0, viewport1, block1)
+stopifnot(identical(m0, replace_block(m0, viewport1, block1)))
+
+replace_block(m0, viewport1, block1 + 100L)
+
+viewport2 <- ArrayViewport(dim(m0), IRanges(c(1, 3), width=block_dim))
+replace_block(m0, viewport2, block1 + 100L)
+
+## Using a grid:
+grid <- ArrayRegularGrid(dim(m0), spacings=c(3L, 2L))
+grid
+extract_block(m0, grid[[3L]])
+extract_block(m0, grid[[1L, 3L]])
+
+## Walk on the grid, colum by column:
+m1 <- m0
+for (b in seq_along(grid)) {
+ viewport <- grid[[b]]
+ block <- extract_block(m1, viewport)
+ block <- b * 1000L + block
+ m1 <- replace_block(m1, viewport, block)
+}
+m1
+
+## Walk on the grid, row by row:
+m2 <- m0
+for (i in seq_len(dim(grid)[[1]])) {
+ for (j in seq_len(dim(grid)[[2]])) {
+ viewport <- grid[[i, j]]
+ block <- extract_block(m2, viewport)
+ block <- (i * 10L + j) * 1000L + block
+ m2 <- replace_block(m2, viewport, block)
+ }
+}
+m2
+}
+\keyword{internal}
diff --git a/man/DelayedArray-class.Rd b/man/DelayedArray-class.Rd
index c011502..59198a7 100644
--- a/man/DelayedArray-class.Rd
+++ b/man/DelayedArray-class.Rd
@@ -19,7 +19,6 @@
\alias{dim,DelayedArray-method}
\alias{dim<-,DelayedArray-method}
-\alias{length,DelayedArray-method}
\alias{isEmpty,DelayedArray-method}
\alias{dimnames,DelayedArray-method}
\alias{dimnames<-,DelayedArray-method}
@@ -68,16 +67,20 @@
\alias{split.DelayedArray}
\alias{split,DelayedArray,ANY-method}
+\alias{rbind}
+\alias{rbind,DelayedMatrix-method}
+\alias{rbind,DelayedArray-method}
+\alias{arbind,DelayedArray-method}
+
+\alias{cbind}
+\alias{cbind,DelayedMatrix-method}
+\alias{cbind,DelayedArray-method}
+\alias{acbind,DelayedArray-method}
+
% Internal stuff
\alias{matrixClass}
\alias{matrixClass,DelayedArray-method}
-\alias{subset_seed_as_array}
-\alias{subset_seed_as_array,ANY-method}
-\alias{subset_seed_as_array,array-method}
-\alias{subset_seed_as_array,data.frame-method}
-\alias{subset_seed_as_array,DataFrame-method}
-
\title{DelayedArray objects}
\description{
@@ -161,7 +164,21 @@ type(x)
\emph{single} numeric value >= 1 and <= \code{length(x)}. It is equivalent
to \code{x[i]}.
- DelayedArray objects don't support subassignment (\code{[<-} or \code{[[<-}).
+ DelayedArray objects support only 2 forms of subassignment at the moment:
+ \code{x[i] <- value} and \code{x[] <- value}. The former is supported only
+ when the subscript \code{i} is a logical DelayedArray object with the same
+ dimensions as \code{x} and when \code{value} is a \emph{scalar} (i.e. an
+ atomic vector of length 1). The latter is supported only when \code{value}
+ is an atomic vector and \code{length(value)} is a divisor of \code{nrow(x)}.
+ Both are delayed operations so are very light.
+
+ Single value replacement (\code{x[[...]] <- value}) is not supported.
+}
+
+\section{Binding}{
+ Binding DelayedArray objects along the rows (or columns) is supported
+ via the \code{rbind} and \code{arbind} (or \code{cbind} and \code{acbind})
+ methods for DelayedArray objects. All these operations are delayed.
}
\seealso{
@@ -172,8 +189,11 @@ type(x)
\item \link{DelayedArray-utils} for common operations on DelayedArray
objects.
- \item \link[DelayedArray]{cbind} in this package (\pkg{DelayedArray})
- for binding DelayedArray objects along their rows or columns.
+ \item \code{\link[base]{cbind}} in the \pkg{base} package for
+ rbind/cbind'ing ordinary arrays.
+
+ \item \code{\link{acbind}} in this package (\pkg{DelayedArray}) for
+ arbind/acbind'ing ordinary arrays.
\item \link{RleArray} objects.
@@ -208,6 +228,11 @@ stopifnot(identical(a, seed(M)))
A[11:20]
A[which(A <= 1e-5)]
+## Subassignment:
+A[A < 0.2] <- NA
+a[a < 0.2] <- NA
+stopifnot(identical(a, as.array(A)))
+
## Other operations:
toto <- function(x) (5 * x[ , , 1] ^ 3 + 1L) * log(x[, , 2])
b <- toto(a)
@@ -286,7 +311,37 @@ D
## See '?realize' for more information about "realization backends".
## ---------------------------------------------------------------------
-## E. WRAP A SPARSE MATRIX IN A DelayedArray OBJECT
+## E. BIND DelayedArray OBJECTS
+## ---------------------------------------------------------------------
+
+## rbind/cbind
+
+library(HDF5Array)
+toy_h5 <- system.file("extdata", "toy.h5", package="HDF5Array")
+h5ls(toy_h5)
+
+M1 <- HDF5Array(toy_h5, "M1")
+M2 <- HDF5Array(toy_h5, "M2")
+
+M12 <- rbind(M1, t(M2))
+M12
+colMeans(M12)
+
+## arbind/acbind
+
+example(acbind) # to create arrays a1, a2, a3
+
+A1 <- DelayedArray(a1)
+A2 <- DelayedArray(a2)
+A3 <- DelayedArray(a3)
+A <- arbind(A1, A2, A3)
+A
+
+## Sanity check:
+stopifnot(identical(arbind(a1, a2, a3), as.array(A)))
+
+## ---------------------------------------------------------------------
+## F. WRAP A SPARSE MATRIX IN A DelayedArray OBJECT
## ---------------------------------------------------------------------
\dontrun{
library(Matrix)
diff --git a/man/DelayedArray-utils.Rd b/man/DelayedArray-utils.Rd
index 8ca069d..51a7068 100644
--- a/man/DelayedArray-utils.Rd
+++ b/man/DelayedArray-utils.Rd
@@ -4,9 +4,6 @@
% DelayedArray utils
-\alias{dim,ConformableSeedCombiner-method}
-\alias{dimnames,ConformableSeedCombiner-method}
-
\alias{+,DelayedArray,missing-method}
\alias{-,DelayedArray,missing-method}
@@ -77,10 +74,6 @@
\alias{colRanges}
\alias{colRanges,DelayedMatrix-method}
-% Internal stuff
-
-\alias{subset_seed_as_array,ConformableSeedCombiner-method}
-
\title{Common operations on DelayedArray objects}
\description{
@@ -98,8 +91,7 @@
\item \code{is.na}, \code{is.finite}, \code{is.infinite}, \code{is.nan}
\item \code{nchar}, \code{tolower}, \code{toupper}
\item \code{pmax2} and \code{pmin2}
- \item \code{rbind} and \code{cbind} (documented in
- \link[DelayedArray]{cbind})
+ \item \code{rbind} and \code{cbind} (documented in \link{DelayedArray})
}
Block-processed operations:
diff --git a/man/RleArray-class.Rd b/man/RleArray-class.Rd
index 923648d..ef3dc26 100644
--- a/man/RleArray-class.Rd
+++ b/man/RleArray-class.Rd
@@ -1,11 +1,26 @@
\name{RleArray-class}
\docType{class}
+\alias{class:RleArraySeed}
+\alias{RleArraySeed-class}
+\alias{class:SolidRleArraySeed}
+\alias{SolidRleArraySeed-class}
+\alias{class:RleRealizationSink}
+\alias{RleRealizationSink-class}
+\alias{class:ChunkedRleArraySeed}
+\alias{ChunkedRleArraySeed-class}
+
\alias{dim,RleArraySeed-method}
-\alias{length,RleArraySeed-method}
\alias{dimnames,RleArraySeed-method}
-\alias{subset_seed_as_array,RleArraySeed-method}
+\alias{coerce,SolidRleArraySeed,Rle-method}
+\alias{coerce,RleRealizationSink,Rle-method}
+
+\alias{subset_seed_as_array,SolidRleArraySeed-method}
+\alias{subset_seed_as_array,ChunkedRleArraySeed-method}
+
+\alias{coerce,RleRealizationSink,ChunkedRleArraySeed-method}
+\alias{coerce,ChunkedRleArraySeed,SolidRleArraySeed-method}
\alias{class:RleArray}
\alias{RleArray-class}
@@ -21,13 +36,7 @@
\alias{DelayedArray,RleArraySeed-method}
\alias{RleArray}
-\alias{class:RleRealizationSink}
-\alias{RleRealizationSink-class}
-\alias{RleRealizationSink}
-
-\alias{write_to_sink,array,RleRealizationSink-method}
-
-\alias{coerce,RleRealizationSink,RleArraySeed-method}
+\alias{write_block_to_sink,RleRealizationSink-method}
\alias{coerce,RleRealizationSink,RleArray-method}
\alias{coerce,RleRealizationSink,DelayedArray-method}
\alias{coerce,ANY,RleArray-method}
@@ -48,7 +57,7 @@
}
\usage{
-RleArray(rle, dim, dimnames=NULL) # constructor function
+RleArray(rle, dim, dimnames=NULL, chunksize=NULL) # constructor function
}
\arguments{
@@ -64,6 +73,9 @@ RleArray(rle, dim, dimnames=NULL) # constructor function
length the number of dimensions. Each list element must be either
\code{NULL} or a character vector along the corresponding dimension.
}
+ \item{chunksize}{
+ Experimental. Don't use!
+ }
}
\details{
diff --git a/man/bind-arrays.Rd b/man/bind-arrays.Rd
new file mode 100644
index 0000000..b7767f0
--- /dev/null
+++ b/man/bind-arrays.Rd
@@ -0,0 +1,58 @@
+\name{bind-arrays}
+
+\alias{bind-arrays}
+\alias{bind arrays}
+
+\alias{arbind}
+\alias{acbind}
+\alias{arbind,array-method}
+\alias{acbind,array-method}
+
+
+\title{Bind arrays along their rows or columns}
+
+\description{
+ Bind array-like objects with an arbitrary number of dimensions along their
+ rows (\code{arbind}) or columns (\code{acbind}).
+}
+
+\usage{
+arbind(...)
+acbind(...)
+}
+
+\arguments{
+ \item{...}{
+ The array-like objects to bind.
+ }
+}
+
+\value{
+ An array-like object, typically of the same class as the input objects if
+ they all have the same class.
+}
+
+\seealso{
+ \itemize{
+ \item \code{\link{DelayedArray}} in this package for
+ arbind/acbind'ing DelayedArray objects.
+
+ \item \code{\link[base]{rbind}} and \code{\link[base]{cbind}} in the
+ \pkg{base} package for the corresponding operations on matrix-like
+ objects.
+
+ \item The \pkg{abind} package on CRAN.
+ }
+}
+
+\examples{
+a1 <- array(1:60, c(3, 5, 4),
+ dimnames=list(NULL, paste0("M1y", 1:5), NULL))
+a2 <- array(101:240, c(7, 5, 4),
+ dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL))
+a3 <- array(10001:10100, c(5, 5, 4),
+ dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
+
+arbind(a1, a2, a3)
+}
+\keyword{methods}
diff --git a/man/block_processing.Rd b/man/block_processing.Rd
new file mode 100644
index 0000000..9b42148
--- /dev/null
+++ b/man/block_processing.Rd
@@ -0,0 +1,26 @@
+\name{block_processing}
+
+\alias{block_processing}
+
+\alias{write_array_to_sink}
+
+\title{Block processing of an array}
+
+\description{
+ A set of utilities for processing an array-like object block by block.
+}
+
+\details{
+ Coming soon...
+}
+
+\seealso{
+ \itemize{
+ \item \link{DelayedArray} objects.
+
+ \item \code{\link{realize}} for realizing a DelayedArray object in memory
+ or on disk.
+ }
+}
+
+\keyword{methods}
diff --git a/man/cbind-methods.Rd b/man/cbind-methods.Rd
deleted file mode 100644
index 568ab17..0000000
--- a/man/cbind-methods.Rd
+++ /dev/null
@@ -1,91 +0,0 @@
-\name{cbind-methods}
-
-\alias{cbind-methods}
-
-\alias{dim,SeedBinder-method}
-\alias{dimnames,SeedBinder-method}
-
-\alias{rbind}
-\alias{rbind,DelayedMatrix-method}
-\alias{rbind,DelayedArray-method}
-
-\alias{cbind}
-\alias{cbind,DelayedMatrix-method}
-\alias{cbind,DelayedArray-method}
-
-\alias{arbind}
-\alias{arbind,DelayedArray-method}
-
-\alias{acbind}
-\alias{acbind,DelayedArray-method}
-
-% Internal stuff
-\alias{subset_seed_as_array,SeedBinder-method}
-
-\title{Bind DelayedArray objects along their rows or columns}
-
-\description{
- Methods for binding DelayedArray objects along their rows or columns.
-}
-
-\details{
- \code{rbind}, \code{cbind}, \code{arbind}, \code{acbind} methods are defined
- for \link{DelayedArray} objects. They perform delayed binding along the rows
- (\code{rbind} and \code{arbind}) or columns (\code{cbind} and \code{acbind})
- of the objects passed to them.
-}
-
-\seealso{
- \itemize{
- \item \code{\link[base]{cbind}} in the \pkg{base} package for
- rbind/cbind'ing ordinary arrays.
-
- \item \code{\link[IRanges]{acbind}} in the \pkg{IRanges} package for
- arbind/acbind'ing ordinary arrays.
-
- \item \link{DelayedArray-utils} for common operations on
- \link{DelayedArray} objects.
-
- \item \link{DelayedArray} objects.
-
- \item \link[HDF5Array]{HDF5Array} objects in the \pkg{HDF5Array} package.
-
- \item \link[base]{array} objects in base R.
- }
-}
-
-\examples{
-## ---------------------------------------------------------------------
-## rbind/cbind
-## ---------------------------------------------------------------------
-library(HDF5Array)
-toy_h5 <- system.file("extdata", "toy.h5", package="HDF5Array")
-h5ls(toy_h5)
-
-M1 <- HDF5Array(toy_h5, "M1")
-M2 <- HDF5Array(toy_h5, "M2")
-
-M <- rbind(M1, t(M2))
-M
-colMeans(M)
-
-## ---------------------------------------------------------------------
-## arbind/acbind
-## ---------------------------------------------------------------------
-a1 <- array(1:60, c(3, 5, 4),
- dimnames=list(NULL, paste0("M1y", 1:5), NULL))
-a2 <- array(101:240, c(7, 5, 4),
- dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL))
-a3 <- array(10001:10100, c(5, 5, 4),
- dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
-
-A1 <- DelayedArray(a1)
-A2 <- DelayedArray(a2)
-A3 <- DelayedArray(a3)
-A <- arbind(A1, A2, A3)
-A
-
-## Sanity check:
-stopifnot(identical(arbind(a1, a2, a3), as.array(A)))
-}
-\keyword{methods}
diff --git a/man/realize.Rd b/man/realize.Rd
index 2fcd148..f34d00a 100644
--- a/man/realize.Rd
+++ b/man/realize.Rd
@@ -3,17 +3,17 @@
\alias{class:RealizationSink}
\alias{RealizationSink-class}
-\alias{write_to_sink}
-\alias{write_to_sink,DelayedArray,RealizationSink-method}
-\alias{write_to_sink,ANY,RealizationSink-method}
+\alias{chunk_dim}
+\alias{chunk_dim,RealizationSink-method}
+\alias{write_block_to_sink}
\alias{close,RealizationSink-method}
\alias{class:arrayRealizationSink}
\alias{arrayRealizationSink-class}
-\alias{arrayRealizationSink}
-\alias{write_to_sink,array,arrayRealizationSink-method}
+\alias{dim,arrayRealizationSink-method}
+\alias{write_block_to_sink,arrayRealizationSink-method}
\alias{coerce,arrayRealizationSink,DelayedArray-method}
\alias{supportedRealizationBackends}
@@ -23,8 +23,6 @@
\alias{realize}
\alias{realize,ANY-method}
-\alias{RealizationSink}
-
\title{Realize a DelayedArray object}
\description{
diff --git a/man/subset_seed_as_array.Rd b/man/subset_seed_as_array.Rd
new file mode 100644
index 0000000..2384d62
--- /dev/null
+++ b/man/subset_seed_as_array.Rd
@@ -0,0 +1,55 @@
+\name{subset_seed_as_array}
+
+\alias{subset_seed_as_array}
+\alias{subset_seed_as_array,ANY-method}
+\alias{subset_seed_as_array,array-method}
+\alias{subset_seed_as_array,data.frame-method}
+\alias{subset_seed_as_array,DataFrame-method}
+
+\alias{dim,ConformableSeedCombiner-method}
+\alias{dimnames,ConformableSeedCombiner-method}
+\alias{subset_seed_as_array,ConformableSeedCombiner-method}
+
+\alias{dim,SeedBinder-method}
+\alias{dimnames,SeedBinder-method}
+\alias{subset_seed_as_array,SeedBinder-method}
+
+\title{subset_seed_as_array}
+
+\description{
+ \code{subset_seed_as_array} is an internal generic function not
+ aimed to be used directly by the user.
+ It has methods defined for array, data.frame, DataFrame objects
+ and other array-like objects.
+
+ The \code{DelayedArray()} constructor function will accept any
+ seed that supports \code{dim()}, \code{dimnames()}, and
+ \code{subset_seed_as_array()}.
+}
+
+\usage{
+subset_seed_as_array(seed, index)
+}
+
+\arguments{
+ \item{seed}{
+ An array-like object.
+ }
+ \item{index}{
+ An unnamed list of subscripts as positive integer vectors, one vector
+ per seed dimension. \emph{Missing} list elements are allowed and must
+ be represented by \code{NULL}s.
+ }
+}
+
+\seealso{
+ \itemize{
+ \item \link{DelayedArray} objects.
+
+ \item \link[base]{array} and \link[base]{data.frame} objects in base R.
+
+ \item \link[S4Vectors]{DataFrame} objects in the \pkg{S4Vectors} package.
+ }
+}
+
+\keyword{internal}
diff --git a/vignettes/Working_with_large_arrays.Rnw b/vignettes/Working_with_large_arrays.Rnw
new file mode 100644
index 0000000..bd26240
--- /dev/null
+++ b/vignettes/Working_with_large_arrays.Rnw
@@ -0,0 +1,764 @@
+%\VignetteEngine{knitr::knitr}
+%\VignetteIndexEntry{Working with large arrays in R}
+%\VignetteDepends{knitr,Matrix,DelayedArray,HDF5Array,SummarizedExperiment,airway,pryr}
+
+\documentclass[8pt]{beamer}
+
+\mode<presentation> {
+\usetheme{Madrid}
+\usecolortheme{whale}
+}
+
+\usepackage{slides}
+\renewcommand\Rclass[1]{{\texttt{#1}\index{#1 (class)}}}
+
+\AtBeginSection[]
+{
+ \begin{frame}<beamer>
+ \tableofcontents[currentsection]
+ \end{frame}
+}
+
+\title{Working with large arrays in R}
+\subtitle{A look at HDF5Array/RleArray/DelayedArray objects}
+
+\author{Herv\'e Pag\`es\\
+ \href{mailto:hpages at fredhutch.org}{hpages at fredhutch.org}}
+
+\institute{Bioconductor conference\\Boston}
+
+\date{July 2017}
+
+\begin{document}
+
+<<setup, include=FALSE>>=
+library(knitr)
+opts_chunk$set(size="scriptsize")
+if (!dir.exists("~/mydata")) dir.create("~/mydata")
+options(width=80)
+library(Matrix)
+library(DelayedArray)
+library(HDF5Array)
+library(SummarizedExperiment)
+library(airway)
+library(pryr)
+@
+
+\maketitle
+
+\frame{\tableofcontents}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Motivation and challenges}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ R ordinary {\bf matrix} or {\bf array} is not suitable for big datasets:
+ \begin{block}{}
+ \begin{itemize}
+ \item 10x Genomics dataset (single cell experiment):
+ 30,000 genes x 1.3 million cells = 36.5 billion values
+ \item in an ordinary integer matrix ==> 136G in memory!
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ Need for alternative containers:
+ \begin{block}{}
+ \begin{itemize}
+ \item but at the same time, the object should be (almost) as easy to
+ manipulate as an ordinary matrix or array
+ \item {\em standard R matrix/array API}: \Rcode{dim}, \Rcode{dimnames},
+ \Rcode{t}, \Rcode{is.na}, \Rcode{==}, \Rcode{+}, \Rcode{log},
+ \Rcode{cbind}, \Rcode{max}, \Rcode{sum}, \Rcode{colSums}, etc...
+ \item not limited to 2 dimensions ==> also support arrays of arbitrary
+ number of dimensions
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ 2 approaches: {\bf in-memory data} vs {\bf on-disk data}
+\end{frame}
+
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf In-memory data}
+
+ \begin{block}{}
+ \begin{itemize}
+ \item a 30k x 1.3M matrix might still fit in memory if the data can
+ be efficiently compressed
+ \item example: sparse data (small percentage of non-zero values) ==>
+ {\em sparse representation} (storage of non-zero values only)
+ \item example: data with long runs of identical values ==> {\em RLE
+ compression (Run Length Encoding)}
+ \item choose the {\em smallest type} to store the values: \Rcode{raw}
+ (1 byte) < \Rcode{integer} (4 bytes) < \Rcode{double} (8 bytes)
+ \item if using {\em RLE compression}:
+ \begin{itemize}
+ \item choose the {\em best orientation} to store the values:
+ {\em by row} or {\em by column} (one might give better
+ compression than the other)
+ \item store the data by chunk ==> opportunity to pick up
+ {\em best type} and {\em best orientation} on a chunk
+ basis (instead of for the whole data)
+ \end{itemize}
+ \item size of 30k x 1.3M matrix in memory can be reduced from 136G
+ to 16G!
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf Examples of in-memory containers}
+
+ \bigskip
+
+ {\bf dgCMatrix} container from the \Biocpkg{Matrix} package:
+ \begin{block}{}
+ \begin{itemize}
+ \item sparse matrix representation
+ \item non-zero values stored as \Rcode{double}
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ {\bf RleArray} and {\bf RleMatrix} containers from the
+ \Biocpkg{DelayedArray} package:
+ \begin{block}{}
+ \begin{itemize}
+ \item use RLE compression
+ \item arbitrary number of dimensions
+ \item type of values: any R atomic type (\Rcode{integer},
+ \Rcode{double}, \Rcode{logical}, \Rcode{complex},
+ \Rcode{character}, and \Rcode{raw})
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf On-disk data}
+
+ \bigskip
+
+ However...
+ \begin{itemize}
+ \item if data is too big to fit in memory (even after compression) ==>
+ must use {\em on-disk representation}
+ \item challenge: should still be (almost) as easy to manipulate as
+ an ordinary matrix! ({\em standard R matrix/array API})
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Motivation and challenges}
+
+ \centerline{\bf Examples of on-disk containers}
+
+ \bigskip
+
+ Direct manipulation of an {\bf HDF5 dataset} via the
+ \Biocpkg{rhdf5} API. Low level API!
+
+ \bigskip
+
+ {\bf HDF5Array} and {\bf HDF5Matrix} containers from the
+ \Biocpkg{HDF5Array} package:
+ \begin{block}{}
+ Provide access to the HDF5 dataset via an API that mimics the standard
+ R matrix/array API
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Memory footprint}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Memory footprint}
+
+ \centerline{\bf The "airway" dataset}
+
+ \begin{columns}[t]
+ \begin{column}{0.36\textwidth}
+ \begin{exampleblock}{}
+<<airway>>=
+library(airway)
+data(airway)
+m <- unname(assay(airway))
+dim(m)
+typeof(m)
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.52\textwidth}
+ \begin{exampleblock}{}
+<<airway2>>=
+head(m, n=4)
+tail(m, n=4)
+sum(m != 0) / length(m)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Memory footprint}
+
+ \centerline{{\bf dgCMatrix} vs {\bf RleMatrix} vs {\bf HDF5Matrix}}
+
+ \begin{columns}[t]
+ \begin{column}{0.60\textwidth}
+ \begin{exampleblock}{}
+<<object_size>>=
+library(pryr) # for object_size()
+object_size(m)
+
+library(Matrix)
+object_size(as(m, "dgCMatrix"))
+
+library(DelayedArray)
+object_size(as(m, "RleMatrix"))
+object_size(as(t(m), "RleMatrix"))
+
+library(HDF5Array)
+object_size(as(m, "HDF5Matrix"))
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Memory footprint}
+
+ Some limitations of the sparse matrix implementation in the \Biocpkg{Matrix}
+ package:
+
+ \begin{block}{}
+ \begin{itemize}
+ \item non-zero values always stored as \Rcode{double}, the most memory
+ consuming type
+ \item number of non-zero values must be $< 2^{31}$
+ \item limited to 2 dimensions: no support for arrays of arbitrary number
+ of dimensions
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{RleArray and HDF5Array objects}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ RleMatrix/RleArray and HDF5Matrix/HDF5Array provide:
+ \begin{block}{}
+ \begin{itemize}
+ \item support all R atomic types
+ \item no limits in size (but each dimension must be $< 2^{31}$)
+ \item arbitrary number of dimensions
+ \end{itemize}
+ \end{block}
+
+ \bigskip
+
+ And also:
+ \begin{block}{}
+ \begin{itemize}
+ \item {\bf delayed operations}
+ \item {\bf block-processing} (behind the scene)
+ \item TODO: multicore block-processing (sequential only at the moment)
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Delayed operations}
+
+ \bigskip
+
+ \centerline{We start with HDF5Matrix object \Rcode{M}:}
+
+ \begin{columns}[t]
+ \begin{column}{0.60\textwidth}
+ \begin{exampleblock}{}
+<<M>>=
+M <- as(m, "HDF5Matrix")
+M
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ Subsetting is delayed:
+
+ \begin{columns}[t]
+ \begin{column}{0.40\textwidth}
+ \begin{exampleblock}{}
+<<M2>>=
+M2 <- M[10:12, 1:5]
+M2
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.48\textwidth}
+ \begin{exampleblock}{}
+<<seed_of_M2>>=
+seed(M2)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ Transposition is delayed:
+
+ \begin{columns}[t]
+ \begin{column}{0.40\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M3 <- t(M2)
+M3
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.48\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M3)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \Rcode{cbind()} / \Rcode{rbind()} are delayed:
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M4 <- cbind(M3, M[1:5, 6:8])
+M4
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M4)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ All the operations in the following groups are delayed:
+ \begin{itemize}
+ \item \Rcode{Arith} (\Rcode{+}, \Rcode{-}, ...)
+ \item \Rcode{Compare} (\Rcode{==}, \Rcode{<}, ...)
+ \item \Rcode{Logic} (\Rcode{\&}, \Rcode{|})
+ \item \Rcode{Math} (\Rcode{log}, \Rcode{sqrt})
+ \item and more ...
+ \end{itemize}
+
+ \begin{columns}[t]
+ \begin{column}{0.42\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M5 <- M == 0
+M5
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.47\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M5)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M6 <- round(M[11:14, ] / M[1:4, ], digits=3)
+M6
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Realization}
+
+ \bigskip
+
+ Delayed operations can be {\bf realized} by coercing the DelayedMatrix
+ object to HDF5Array:
+
+ \begin{columns}[t]
+ \begin{column}{0.40\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M6a <- as(M6, "HDF5Array")
+M6a
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.48\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6a)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \bigskip
+
+ ... or by coercing it to RleArray:
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+M6b <- as(M6, "RleArray")
+M6b
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6b)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Controlling where HDF5 datasets are realized}
+
+ \bigskip
+
+ {\em HDF5 dump management utilities}: a set of utilities to control where
+ HDF5 datasets are written to disk.
+
+ \begin{columns}[t]
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+setHDF5DumpFile("~/mydata/M6c.h5")
+setHDF5DumpName("M6c")
+M6c <- as(M6, "HDF5Array")
+@
+ \end{exampleblock}
+ \end{column}
+ \begin{column}{0.44\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+seed(M6c)
+h5ls("~/mydata/M6c.h5")
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\Rcode{showHDF5DumpLog()}}
+
+ \begin{exampleblock}{}
+<<>>=
+showHDF5DumpLog()
+@
+ \end{exampleblock}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \centerline{\bf Block processing}
+
+ \bigskip
+
+ The following operations are NOT delayed. They are implemented via a
+ {\em block processing} mechanism that loads and processes one block
+ at a time:
+ \begin{itemize}
+ \item operations in the \Rcode{Summary} group (\Rcode{max}, \Rcode{min},
+ \Rcode{sum}, \Rcode{any}, \Rcode{all})
+ \item \Rcode{mean}
+ \item Matrix row/col summarization operations (\Rcode{col/rowSums},
+ \Rcode{col/rowMeans}, ...)
+ \item \Rcode{anyNA}, \Rcode{which}
+ \item \Rcode{apply}
+ \item and more ...
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{RleArray and HDF5Array objects}
+
+ \begin{columns}[t]
+ \begin{column}{0.75\textwidth}
+ \begin{exampleblock}{}
+<<>>=
+DelayedArray:::set_verbose_block_processing(TRUE)
+colSums(M)
+@
+ \end{exampleblock}
+
+ Control the block size:
+
+ \begin{exampleblock}{}
+<<>>=
+getOption("DelayedArray.block.size")
+options(DelayedArray.block.size=1e6)
+colSums(M)
+@
+ \end{exampleblock}
+ \end{column}
+ \end{columns}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Hands-on}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Hands-on}
+
+ \begin{block}{}
+ 1. Load the "airway" dataset.
+ \end{block}
+
+ \begin{block}{}
+ 2. It's wrapped in a SummarizedExperiment object. Get the count data as
+ an ordinary matrix.
+ \end{block}
+
+ \begin{block}{}
+ 3. Wrap it in an HDF5Matrix object: (1) using \Rcode{writeHDF5Array()};
+ then (2) using coercion.
+ \end{block}
+
+ \begin{block}{}
+ 4. When using coercion, where has the data been written on disk?
+ \end{block}
+
+ \begin{block}{}
+ 5. See \Rcode{?setHDF5DumpFile} for how to control the location of
+ "automatic" HDF5 datasets. Try to control the destination of the
+ data when coercing.
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Hands-on}
+
+ \begin{block}{}
+ 6. Use \Rcode{showHDF5DumpLog()} to see all the HDF5 datasets written to
+ disk during the current session.
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 7. Try some operations on the HDF5Matrix object: (1) some delayed ones;
+ (2) some non-delayed ones (block processing).
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 8. Use \Rcode{DelayedArray:::set\_verbose\_block\_processing(TRUE)}
+ to see block processing in action.
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 9. Control the block size via \Rcode{DelayedArray.block.size} global
+ option.
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Hands-on}
+
+ \begin{block}{}
+ 10. Stick the HDF5Matrix object back in the SummarizedExperiment object.
+ The resulting object is an "HDF5-backed SummarizedExperiment object".
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 11. The HDF5-backed SummarizedExperiment object can be manipulated
+ (almost) like an in-memory SummarizedExperiment object.
+ Try \Rcode{[}, \Rcode{cbind}, \Rcode{rbind} on it.
+ \end{block}
+
+ \bigskip
+
+ \begin{block}{}
+ 12. The \Biocpkg{SummarizedExperiment} package provides
+ \Rcode{saveHDF5SummarizedExperiment} to save a SummarizedExperiment
+ object (HDF5-backed or not) as an HDF5-backed SummarizedExperiment
+ object. Try it.
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{DelayedArray/HDF5Array: Future developments}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+ \frametitle{Future developments}
+
+ \centerline{\bf Block processing improvements}
+
+ \begin{block}{}
+ Block genometry: (1) better by default, (2) let the user have more
+ control on it
+ \end{block}
+
+ \begin{block}{}
+ Support multicore
+ \end{block}
+
+ \begin{block}{}
+ Expose it: \Rcode{blockApply()}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Future developments}
+
+ \centerline{\bf HDF5Array improvements}
+
+ \begin{block}{}
+ Store the \Rcode{dimnames} in the HDF5 file (in {\em HDF5 Dimension Scale
+ datasets} - \url{https://www.hdfgroup.org/HDF5/Tutor/h5dimscale.html})
+ \end{block}
+
+ \begin{block}{}
+ Use better default chunk geometry when realizing an HDF5Array object
+ \end{block}
+
+ \begin{block}{}
+ Block processing should take advantage of the chunk geometry (e.g.
+ \Rcode{realize()} should use blocks that are clusters of chunks)
+ \end{block}
+
+ \begin{block}{}
+ Unfortunately: not possible to support multicore realization at the
+ moment (HDF5 does not support concurrent writing to a dataset yet)
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Future developments}
+
+ \centerline{\bf RleArray improvements}
+
+ \begin{block}{}
+ Let the user have more control on the chunk geometry when
+ constructing/realizing an RleArray object
+ \end{block}
+
+ \begin{block}{}
+ Like for HDF5Array objects, block processing should take advantage
+ of the chunk geometry
+ \end{block}
+
+ \begin{block}{}
+ Support multicore realization
+ \end{block}
+
+ \begin{block}{}
+ Provide C/C++ low-level API for direct row/column access from C/C++ code
+ (e.g. from the \Biocpkg{beachmat} package)
+ \end{block}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+<<cleanup, include=FALSE>>=
+unlink("~/mydata", recursive=TRUE, force=TRUE)
+@
+
+\end{document}
+
diff --git a/vignettes/slides.sty b/vignettes/slides.sty
new file mode 100644
index 0000000..119363c
--- /dev/null
+++ b/vignettes/slides.sty
@@ -0,0 +1,34 @@
+\usepackage{Sweave}
+\usepackage{color, graphics}
+\usepackage{latexsym, amsmath, amssymb}
+
+%% simple macros
+\newcommand{\software}[1]{\textsl{#1}}
+\newcommand\R{\textsl{R}}
+\newcommand\Bioconductor{\textsl{Bioconductor}}
+\newcommand\Rpackage[1]{{\textsl{#1}\index{#1 (package)}}}
+\newcommand\Biocpkg[1]{%
+ {\href{http://bioconductor.org/packages/release/bioc/html/#1.html}%
+ {\textsl{#1}}}%
+ \index{#1 (package)}}
+\newcommand\Rpkg[1]{%
+ {\href{http://cran.fhcrc.org/web/packages/#1/index.html}%
+ {\textsl{#1}}}%
+ \index{#1 (package)}}
+\newcommand\Biocdatapkg[1]{%
+ {\href{http://bioconductor.org/packages/release/data/experiment/html/#1.html}%
+ {\textsl{#1}}}%
+ \index{#1 (package)}}
+\newcommand\Robject[1]{{\small\texttt{#1}}}
+\newcommand\Rclass[1]{{\textit{#1}\index{#1 (class)}}}
+\newcommand\Rfunction[1]{{{\small\texttt{#1}}\index{#1 (function)}}}
+\newcommand\Rmethod[1]{{\texttt{#1}}}
+\newcommand\Rfunarg[1]{{\small\texttt{#1}}}
+\newcommand\Rcode[1]{{\small\texttt{#1}}}
+
+%% \AtBeginSection[]
+%% {
+%% \begin{frame}<beamer>{Outline}
+%% \tableofcontents
+%% \end{frame}
+%% }
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-delayedarray.git
More information about the debian-med-commit
mailing list