[med-svn] [r-bioc-iranges] 01/04: New upstream version 2.12.0
Andreas Tille
tille at debian.org
Wed Nov 8 09:29:12 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-bioc-iranges.
commit e7bd5e0714b0255590ce2f9005f7f4011477adae
Author: Andreas Tille <tille at debian.org>
Date: Wed Nov 8 10:26:26 2017 +0100
New upstream version 2.12.0
---
DESCRIPTION | 25 +-
NAMESPACE | 47 ++--
NEWS | 109 +++++++-
R/AtomicList-impl.R | 16 +-
R/AtomicList-utils.R | 34 ++-
R/CompressedList-class.R | 42 +---
R/Grouping-class.R | 7 +-
R/IPos-class.R | 354 ++++++++++++++++++++++++++
R/IRanges-class.R | 65 ++++-
R/List-class-leftovers.R | 4 +-
R/NCList-class.R | 50 ++--
R/RangedData-class.R | 20 +-
R/RangedSelection-class.R | 4 +-
R/Ranges-class.R | 23 ++
R/Ranges-comparison.R | 72 +++++-
R/RangesList-class.R | 28 ++-
R/Rle-class-leftovers.R | 5 +-
R/RleViewsList-class.R | 12 +-
R/Views-class.R | 6 +-
R/ViewsList-class.R | 7 +-
R/bind-arrays.R | 162 ------------
R/coverage-methods.R | 9 +
R/extractList.R | 2 +
R/findOverlaps-methods.R | 389 ++++++++++++++++++-----------
R/inter-range-methods.R | 27 +-
R/intra-range-methods.R | 26 ++
R/nearest-methods.R | 4 +-
R/range-squeezers.R | 27 ++
R/setops-methods.R | 55 ++--
inst/doc/IRangesOverview.pdf | Bin 230670 -> 230876 bytes
inst/unitTests/test_DataFrameList.R | 11 +-
inst/unitTests/test_IRanges-class.R | 6 -
inst/unitTests/test_NCList-class.R | 75 +++---
inst/unitTests/test_bind-arrays.R | 96 -------
inst/unitTests/test_findOverlaps-methods.R | 36 ++-
man/AtomicList-utils.Rd | 27 +-
man/DataFrame-utils.Rd | 4 +-
man/DataFrameList-class.Rd | 3 +-
man/Grouping-class.Rd | 1 -
man/IPos-class.Rd | 232 +++++++++++++++++
man/IRanges-class.Rd | 11 +
man/IRangesList-class.Rd | 2 +-
man/NCList-class.Rd | 2 -
man/RangedData-class.Rd | 22 +-
man/Ranges-class.Rd | 22 +-
man/Ranges-comparison.Rd | 40 ++-
man/RangesList-class.Rd | 24 +-
man/RleViewsList-class.Rd | 4 +-
man/Views-class.Rd | 1 -
man/ViewsList-class.Rd | 3 -
man/bind-arrays.Rd | 56 -----
man/findOverlaps-methods.Rd | 194 +++++++-------
man/inter-range-methods.Rd | 8 +-
man/intra-range-methods.Rd | 1 +
man/range-squeezers.Rd | 108 ++++++++
man/setops-methods.Rd | 11 +
src/NCList.c | 63 +++--
src/coverage_methods.c | 2 +-
58 files changed, 1792 insertions(+), 904 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 3d12fb4..122478e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,22 +8,24 @@ Description: Provides efficient low-level and highly reusable S4
list-like classes for storing, transforming and aggregating
large grouped data, i.e., collections of atomic vectors and
DataFrames.
-Version: 2.10.5
+Version: 2.12.0
Encoding: UTF-8
Author: H. Pagès, P. Aboyoun and M. Lawrence
Maintainer: Bioconductor Package Maintainer <maintainer at bioconductor.org>
biocViews: Infrastructure, DataRepresentation
-Depends: R (>= 3.1.0), methods, utils, stats, BiocGenerics (>= 0.21.1),
- S4Vectors (>= 0.13.17)
+Depends: R (>= 3.1.0), methods, utils, stats, BiocGenerics (>= 0.23.3),
+ S4Vectors (>= 0.15.5)
Imports: stats4
LinkingTo: S4Vectors
-Suggests: XVector, GenomicRanges, GenomicFeatures, GenomicAlignments,
- BSgenome.Celegans.UCSC.ce2, RUnit
+Suggests: XVector, GenomicRanges, Rsamtools, GenomicAlignments,
+ GenomicFeatures, BSgenome.Celegans.UCSC.ce2, pasillaBamSubset,
+ RUnit
License: Artistic-2.0
-Collate: Vector-class-leftovers.R List-class-leftovers.R
- AtomicList-class.R Ranges-class.R subsetting-utils.R
- Ranges-comparison.R IRanges-class.R IRanges-constructor.R
- IRanges-utils.R Grouping-class.R CompressedList-class.R
+Collate: range-squeezers.R Vector-class-leftovers.R
+ List-class-leftovers.R AtomicList-class.R Ranges-class.R
+ subsetting-utils.R Ranges-comparison.R IRanges-class.R
+ IRanges-constructor.R IRanges-utils.R IPos-class.R
+ Grouping-class.R CompressedList-class.R
CompressedList-comparison.R Views-class.R Rle-class-leftovers.R
RleViews-class.R RleViews-utils.R extractList.R seqapply.R
multisplit.R AtomicList-impl.R AtomicList-utils.R
@@ -35,7 +37,6 @@ Collate: Vector-class-leftovers.R List-class-leftovers.R
RangedSelection-class.R read.Mask.R findOverlaps-methods.R
intra-range-methods.R inter-range-methods.R reverse-methods.R
coverage-methods.R slice-methods.R setops-methods.R
- nearest-methods.R cbind-Rle-methods.R tile-methods.R
- bind-arrays.R zzz.R
+ nearest-methods.R cbind-Rle-methods.R tile-methods.R zzz.R
NeedsCompilation: yes
-Packaged: 2017-10-07 22:27:32 UTC; biocbuild
+Packaged: 2017-10-30 22:38:39 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index dff2a01..3f56bb9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -18,6 +18,7 @@ importFrom(stats4, summary, update)
exportClasses(
Ranges, RangesORmissing,
IRanges, NormalIRanges,
+ IPos,
NCList, NCLists,
Grouping, ManyToOneGrouping, ManyToManyGrouping,
H2LGrouping, Dups,
@@ -55,9 +56,7 @@ exportClasses(
### Export S3 methods
###
-S3method(diff, IntegerList)
-S3method(diff, NumericList)
-S3method(diff, RleList)
+S3method(diff, AtomicList)
S3method(`window<-`, Vector)
S3method(`window<-`, vector)
@@ -67,9 +66,7 @@ S3method(`window<-`, factor)
### called directly, (b) tab-completion on the name of the generic shows them,
### and (c) methods() doesn't asterisk them.
export(
- diff.IntegerList,
- diff.NumericList,
- diff.RleList,
+ diff.AtomicList,
"window<-.Vector",
"window<-.vector",
@@ -93,6 +90,8 @@ exportMethods(
"[", "[<-", "[[", "[[<-", "$<-",
as.vector,
as.integer,
+ as.character,
+ as.factor,
as.matrix,
as.data.frame,
as.list,
@@ -100,7 +99,7 @@ exportMethods(
c,
show,
match, duplicated, unique, anyDuplicated, "%in%",
- order, xtfrm,
+ is.unsorted, order,
Ops, Math, Math2, Summary, Complex,
summary,
rev,
@@ -128,7 +127,6 @@ exportMethods(
table,
tapply,
union, intersect, setdiff,
- ifelse,
values, "values<-",
classNameForDisplay,
from, to,
@@ -160,16 +158,18 @@ export(
whichAsIRanges,
asNormalIRanges,
rangeComparisonCodeToLetter,
+ IPos,
NCList, NCLists,
H2LGrouping, Dups,
PartitioningByEnd, PartitioningByWidth, PartitioningMap,
- grouplength,
RangedData,
RangedSelection,
RangesList,
IRangesList,
RleViewsList,
- "%over%", "%within%", "%outside%", "%pover%", "%pwithin%", "%poutside%",
+ "%over%", "%within%", "%outside%",
+ "%pover%", "%pwithin%", "%poutside%",
+ mergeByOverlaps, findOverlapPairs,
MaskCollection.show_frame,
Mask,
read.gapMask,
@@ -182,7 +182,7 @@ export(
LogicalList, IntegerList, NumericList, ComplexList,
CharacterList, RawList, RleList, FactorList,
DataFrameList, SplitDataFrameList,
- ManyToOneGrouping, ManyToManyGrouping, findOverlapPairs, regroup,
+ ManyToOneGrouping, ManyToManyGrouping, regroup,
selectNearest
)
@@ -192,6 +192,9 @@ export(
###
export(
+ ## range-squeezers.R:
+ ranges, rglist,
+
## Vector-class-leftovers.R:
showAsCell,
mstack,
@@ -200,9 +203,12 @@ export(
mid,
isNormal, whichFirstNotNormal,
+ ## IPos-class.R:
+ pos,
+
## Views-class.R:
subject,
- ranges, "ranges<-",
+ "ranges<-",
Views,
trim, subviews,
viewApply, viewMins, viewMaxs, viewSums, viewMeans,
@@ -234,7 +240,8 @@ export(
score, "score<-",
## findOverlaps-methods.R:
- findOverlaps, countOverlaps, overlapsAny, subsetByOverlaps, mergeByOverlaps,
+ findOverlaps, countOverlaps, overlapsAny, subsetByOverlaps,
+ overlapsRanges,
poverlaps,
## intra-range-methods.R:
@@ -261,19 +268,21 @@ export(
## tile-methods.R:
tile, slidingWindows,
- ## bind-arrays.R:
- arbind, acbind
+ ## AtomicList-utils.R:
+ ifelse2
)
### Exactly the same list as above.
exportMethods(
+ ranges, rglist,
runsum, runmean, runwtsum, runq,
showAsCell,
mstack,
mid,
isNormal, whichFirstNotNormal,
+ pos,
subject,
- ranges, "ranges<-",
+ "ranges<-",
Views,
trim, subviews,
viewApply, viewMins, viewMaxs, viewSums, viewMeans,
@@ -290,6 +299,7 @@ exportMethods(
collapse,
score, "score<-",
findOverlaps, countOverlaps, overlapsAny, subsetByOverlaps,
+ overlapsRanges,
poverlaps,
shift, narrow, resize, flank, reflect, promoters, restrict, threebands,
reduce, gaps, disjoin, isDisjoint, disjointBins,
@@ -298,6 +308,7 @@ exportMethods(
slice,
punion, pintersect, psetdiff, pgap,
precede, follow, nearest, distance, distanceToNearest,
- tile,
- arbind, acbind
+ tile, slidingWindows,
+ ifelse2
)
+
diff --git a/NEWS b/NEWS
index 38fad07..660a831 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,96 @@
+CHANGES IN VERSION 2.12.0
+-------------------------
+
+NEW FEATURES
+
+ o Add IPos objects for storing a set of integer positions where most of
+ the positions are typically (but not necessarily) adjacent.
+
+ o Add coercion of a character vector or factor representing ranges (e.g.
+ "22-155") to an IRanges object, as well as "as.character" and "as.factor"
+ methods for Ranges objects.
+
+ o Introduce overlapsRanges() as a replacement for "ranges" methods for
+ Hits and HitsList objects, and deprecate the latter.
+
+ o Add "is.unsorted" method for Ranges objects.
+
+ o Add "ranges" method for Ranges objects (downgrade the object to an
+ IRanges instance and drop its metadata columns).
+
+ o Add 'use.names' and 'use.mcols' args to ranges() generic.
+
+SIGNIFICANT USER-VISIBLE CHANGES
+
+ o Change 'maxgap' and 'minoverlap' defaults for findOverlaps() and family
+ (i.e. countOverlaps(), overlapsAny(), and subsetByOverlaps()). This
+ change addresses 2 long-standing issues:
+ (1) by default zero-width ranges are not excluded anymore, and
+ (2) control of zero-width ranges and adjacent ranges is finally
+ decoupled (only partially though).
+ New default for 'minoverlap' is 0 instead of 1. New default for 'maxgap'
+ is -1 instead of 0. See ?findOverlaps for more information about 'maxgap'
+ and the meaning of -1. For example, if 'type' is "any", you need to set
+ 'maxgap' to 0 if you want adjacent ranges to be considered as overlapping.
+
+ Note that poverlaps() still uses the old 'maxgap' and 'minoverlap'
+ defaults.
+
+ o subsetByOverlaps() first 2 arguments are now named 'x' and 'ranges'
+ (instead of 'query' and 'subject') for consistency with the
+ transcriptsByOverlaps(), exonsByOverlaps(), and cdsByOverlaps()
+ functions from the GenomicFeatures package and with the snpsByOverlaps()
+ function from the BSgenome package.
+
+ o Replace ifelse() generic and methods with ifelse2() (eager semantics).
+
+ o Coercion from Ranges to IRanges now propagates the metadata columns.
+
+ o Move rglist() generic from GenomicRanges to IRanges package.
+
+ o The "union", "intersect", and "setdiff" methods for Ranges objects
+ don't act like endomorphisms anymore: now they always return an
+ IRanges *instance* whatever Ranges derivatives are passed to them
+ (e.g. NCList or NormalIRanges).
+
+DEPRECATED AND DEFUNCT
+
+ o Deprecate "ranges" methods for Hits and HitsList objects (replaced with
+ overlapsRanges()).
+
+ o Deprecate the "overlapsAny", "subsetByOverlaps", "coverage" and "range"
+ methods for RangedData objects.
+
+ o Deprecate the universe() getter and setter as well as the 'universe'
+ argument of the RangesList(), IRangesList(), RleViewsList(), and
+ RangedData() constructor functions.
+
+ o Default "togroup" method is now defunct (was deprecated in BioC 3.3).
+
+ o Remove grouplength() (was deprecated in BioC 3.3 and replaced with
+ grouplengths, then defunct in BioC 3.4).
+
+BUG FIXES
+
+ o nearest() and distanceToNearest() now call findOverlaps() internally
+ with maxgap=0 and minoverlap=0. This fixes incorrect results obtained
+ in some situations e.g. in the situation reported here:
+
+ https://support.bioconductor.org/p/99369/ (zero-width ranges)
+
+ but also in this situation:
+
+ nearest(IRanges(5, 10), IRanges(1, 4:5), select="all")
+
+ where the 2 ranges in the subject are *both* nearest to the 5-10 range.
+
+ o Fix restrict() and reverse() on IRanges objects with metadata columns.
+
+ o Fix table() on Ranges objects.
+
+ o Various other minor fixes.
+
+
CHANGES IN VERSION 2.10.0
-------------------------
@@ -14,14 +107,6 @@ NEW FEATURES
processing strategy if the input object has more than 10 million list
elements. The hope is to reduce memory usage on very big input objects.
-BUG FIXES
-
- o Fix "setdiff" method for CompressedIRangesList for when all ranges are
- empty.
-
- o Fix long standing bug in coercion from Ranges to PartitioningByEnd when
- the object to coerce has names.
-
DEPRECATED AND DEFUNCT
o Remove the RangedDataList and RDApplyParams classes, rdapply(), and the
@@ -34,6 +119,14 @@ DEPRECATED AND DEFUNCT
o Remove GappedRanges class (was defunct in BioC 3.4).
+BUG FIXES
+
+ o Fix "setdiff" method for CompressedIRangesList for when all ranges are
+ empty.
+
+ o Fix long standing bug in coercion from Ranges to PartitioningByEnd when
+ the object to coerce has names.
+
CHANGES IN VERSION 2.8.0
------------------------
diff --git a/R/AtomicList-impl.R b/R/AtomicList-impl.R
index 640ec20..3874cd6 100644
--- a/R/AtomicList-impl.R
+++ b/R/AtomicList-impl.R
@@ -261,6 +261,12 @@ setMethod("table", "AtomicList",
stop("\"table\" method for AtomicList objects ",
"can only take one input object")
x <- args[[1L]]
+ if (!pcompareRecursively(x)) {
+ ## Not sure why callNextMethod() doesn't work. Is it because of
+ ## dispatch on the ellipsis?
+ #return(callNextMethod())
+ return(selectMethod("table", "Vector")(...))
+ }
y1 <- togroup(PartitioningByWidth(x))
attributes(y1) <- list(levels=as.character(seq_along(x)),
class="factor")
@@ -385,8 +391,8 @@ setMethod("runLength", "CompressedRleList", function(x) {
width(ranges(x))
})
-setMethod("ranges", "RleList", function(x) {
- as(lapply(x, ranges), "List")
+setMethod("ranges", "RleList", function(x, use.names=TRUE, use.mcols=FALSE) {
+ as(lapply(x, ranges, use.names=use.names, use.mcols=use.mcols), "List")
})
diceRangesByList <- function(x, list) {
@@ -397,16 +403,16 @@ diceRangesByList <- function(x, list) {
## bit faster.
hits <- findOverlaps_Ranges_Partitioning(x, listPart,
hit.empty.query.ranges=TRUE)
+ ov <- overlapsRanges(x, listPart, hits)
+ ans_unlistData <- shift(ov, 1L - start(listPart)[subjectHits(hits)])
ans_partitioning <- PartitioningByEnd(subjectHits(hits), NG=length(list))
- ans_unlistData <- shift(ranges(hits, x, listPart),
- 1L - start(listPart)[subjectHits(hits)])
ans <- relist(ans_unlistData, ans_partitioning)
names(ans) <- names(list)
ans
}
setMethod("ranges", "CompressedRleList",
- function(x)
+ function(x, use.names=TRUE, use.mcols=FALSE)
{
rle <- unlist(x, use.names=FALSE)
rlePart <- PartitioningByWidth(runLength(rle))
diff --git a/R/AtomicList-utils.R b/R/AtomicList-utils.R
index 84589d8..a4c43b2 100644
--- a/R/AtomicList-utils.R
+++ b/R/AtomicList-utils.R
@@ -480,9 +480,11 @@ ifelseReturnValue <- function(yes, no, len) {
if(compress) "CompressedList" else "SimpleList")
}
-setMethods("ifelse", list(c("ANY", "ANY", "List"),
- c("ANY", "List", "List"),
- c("ANY", "List", "ANY")),
+setGeneric("ifelse2", function(test, yes, no) standardGeneric("ifelse2"))
+
+setMethods("ifelse2", list(c("ANY", "ANY", "List"),
+ c("ANY", "List", "List"),
+ c("ANY", "List", "ANY")),
function(test, yes, no) {
ans <- ifelseReturnValue(yes, no, length(test))
ok <- !(nas <- is.na(test))
@@ -495,20 +497,20 @@ setMethods("ifelse", list(c("ANY", "ANY", "List"),
ans
})
-setMethods("ifelse", list(c("CompressedLogicalList", "ANY", "ANY"),
- c("CompressedLogicalList", "ANY", "List"),
- c("CompressedLogicalList", "List", "ANY"),
- c("CompressedLogicalList", "List", "List")),
+setMethods("ifelse2", list(c("CompressedLogicalList", "ANY", "ANY"),
+ c("CompressedLogicalList", "ANY", "List"),
+ c("CompressedLogicalList", "List", "ANY"),
+ c("CompressedLogicalList", "List", "List")),
function(test, yes, no) {
doBinaryCompressedListOp(function(yes, no) {
ifelse(unlist(test, use.names=FALSE), yes, no)
}, as(yes, "List"), as(no, "List"), test)
})
-setMethods("ifelse", list(c("SimpleLogicalList", "ANY", "ANY"),
- c("SimpleLogicalList", "ANY", "List"),
- c("SimpleLogicalList", "List", "ANY"),
- c("SimpleLogicalList", "List", "List")),
+setMethods("ifelse2", list(c("SimpleLogicalList", "ANY", "ANY"),
+ c("SimpleLogicalList", "ANY", "List"),
+ c("SimpleLogicalList", "List", "ANY"),
+ c("SimpleLogicalList", "List", "List")),
function(test, yes, no) {
as(mapply(ifelse, test, yes, no, SIMPLIFY=FALSE), "List")
})
@@ -664,13 +666,9 @@ setMethod("IQR", "AtomicList",
function(x, na.rm=FALSE, type=7) sapply(x, IQR, na.rm=na.rm, type=type)
)
-diff.IntegerList <- function(x, ...) diff(x, ...)
-diff.NumericList <- function(x, ...) diff(x, ...)
-diff.RleList <- function(x, ...) diff(x, ...)
+diff.AtomicList <- function(x, ...) diff(x, ...)
-setMethods("diff",
- c("CompressedIntegerList", "CompressedNumericList",
- "CompressedRleList"),
+setMethod("diff", "CompressedAtomicList",
function(x, lag = 1L, differences = 1L) {
stopifnot(isSingleNumber(lag))
stopifnot(isSingleNumber(differences))
@@ -820,7 +818,7 @@ setMethod("rank", "CompressedAtomicList",
setMethod("order", "CompressedAtomicList",
function (..., na.last = TRUE, decreasing = FALSE,
- method = c("auto", "shell", "radix"))
+ method = c("auto", "shell", "radix"))
{
args <- list(...)
if (length(args) != 1L)
diff --git a/R/CompressedList-class.R b/R/CompressedList-class.R
index 412d6c6..4836299 100644
--- a/R/CompressedList-class.R
+++ b/R/CompressedList-class.R
@@ -157,49 +157,15 @@ setValidity2("CompressedList", .valid.CompressedList)
### Coercion.
###
-setUnlistDataNames <- function(unlisted_x, grouping, use.names, x_class)
-{
- ## If 'use.names' is FALSE or 'x' has no *outer* names, then we don't
- ## do anything to 'ans' i.e. we just keep whatever names/rownames are
- ## on it (which are the *inner* names/rownames of 'x'). Note that this
- ## behavior is NOT consistent with unlist,List or base::unlist as
- ## both of them will return a vector with no names/rownames when
- ## 'use.names' is FALSE.
- ## FIXME: Make unlist,CompressedList and unlist,List behave
- ## consistently in *any* situation.
- ## Otherwise (i.e. if 'use.names' is TRUE and 'x' has *outer* names),
- ## we make up new names/rownames for 'ans' by prepending the *outer*
- ## names of 'x' to its *inner* names/rownames. Note that this differs
- ## from what base::unlist does but THIS IS A FEATURE and is consistent
- ## with what unlist,List does.
- if (use.names && !is.null(x_names <- names(grouping))) {
- if (length(dim(unlisted_x)) < 2L) {
- ans_ROWNAMES <- names(unlisted_x)
- } else {
- ans_ROWNAMES <- rownames(unlisted_x)
- }
- nms <- rep.int(x_names, elementNROWS(grouping))
- ans_ROWNAMES <- S4Vectors:::make_unlist_result_names(nms, ans_ROWNAMES)
- if (length(dim(unlisted_x)) < 2L) {
- res <- try(names(unlisted_x) <- ans_ROWNAMES, silent=TRUE)
- what <- "names"
- } else {
- res <- try(rownames(unlisted_x) <- ans_ROWNAMES, silent=TRUE)
- what <- "rownames"
- }
- if (is(res, "try-error"))
- warning("failed to set ", what, " on the ",
- "unlisted ", x_class, " object")
- }
- unlisted_x
-}
-
setMethod("unlist", "CompressedList",
function(x, recursive=TRUE, use.names=TRUE)
{
if (!isTRUEorFALSE(use.names))
stop("'use.names' must be TRUE or FALSE")
- setUnlistDataNames(x at unlistData, x at partitioning, use.names, class(x))
+ unlisted_x <- x at unlistData
+ if (use.names)
+ unlisted_x <- S4Vectors:::set_unlisted_names(unlisted_x, x)
+ unlisted_x
}
)
diff --git a/R/Grouping-class.R b/R/Grouping-class.R
index e97eba1..036fe74 100644
--- a/R/Grouping-class.R
+++ b/R/Grouping-class.R
@@ -941,13 +941,10 @@ setMethod("togroup", "ANY",
function(x, j=NULL)
{
msg <- wmsg(
- "Using togroup() on a ", class(x), " object is deprecated. ",
+ "Using togroup() on a ", class(x), " object is defunct. ",
"Please use togroup(PartitioningByWidth(...)) instead."
)
- .Deprecated(msg=msg)
- togroup(PartitioningByWidth(x), j=j)
+ .Defunct(msg=msg)
}
)
-grouplength <- function(...) .Defunct("grouplengths")
-
diff --git a/R/IPos-class.R b/R/IPos-class.R
new file mode 100644
index 0000000..a955653
--- /dev/null
+++ b/R/IPos-class.R
@@ -0,0 +1,354 @@
+### =========================================================================
+### IPos objects
+### -------------------------------------------------------------------------
+###
+
+
+setClass("IPos",
+ contains="Ranges",
+ representation(
+ pos_runs="IRanges"
+ )
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Getters
+###
+
+setMethod("length", "IPos", function(x) sum(width(x at pos_runs)))
+
+setMethod("names", "IPos", function(x) NULL)
+
+setReplaceMethod("names", "IPos",
+ function(x, value)
+ {
+ if (!is.null(value))
+ stop(class(x), " objects don't accept names")
+ x
+ }
+)
+
+setGeneric("pos", function(x) standardGeneric("pos"))
+setMethod("pos", "IPos", function(x) as.integer(x at pos_runs))
+setMethod("start", "IPos", function(x) pos(x))
+setMethod("end", "IPos", function(x) pos(x))
+setMethod("width", "IPos", function(x) rep.int(1L, length(x)))
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Collapse runs of "stitchable integer ranges"
+###
+### In a Ranges object 'x', 2 ranges x[i] and x[i+1] are "stitchable" if
+### start(x[i+1]) == end(x[i])+1. For example, in the following object:
+### 1: .....xxxx.............
+### 2: ...xx.................
+### 3: .........xxx..........
+### 4: ............xxxxxx....
+### 5: ..................x...
+### x[3] and x[4] are stitchable, and x[4] and x[5] are stitchable. So
+### x[3], x[4], and x[5] form a run of "stitchable ranges" that will collapse
+### into the following single range after stitching:
+### .........xxxxxxxxxx...
+### Note that x[1] and x[3] are not stitchable because they are not
+### consecutive vector elements (but they would if we removed x[2]).
+
+### stitch_Ranges() below takes any Ranges derivative and returns an IRanges
+### object (so is NOT an endomorphism). Note that this transformation
+### preserves 'sum(width(x))'.
+### Also note that this is an "inter range transformation". However unlike
+### range(), reduce(), gaps(), or disjoin(), its result depends on the order
+### of the elements in the input vector. It's also idempotent like range(),
+### reduce(), and disjoin() (gaps() is not).
+
+### TODO: Define and export stitch() generic and method for Ranges objects
+### (in inter-range-methods.R).
+### Maybe it would also make sense to have an isStitched() generic like we
+### have isDisjoint() to provide a quick and easy way to check the state of
+### the object before applying the transformation to it. In theory each
+### idempotent inter range transformation could have a "state checker" so
+### maybe add isReduced() too (range() probably doesn't need one).
+
+stitch_Ranges <- function(x)
+{
+ if (length(x) == 0L)
+ return(IRanges())
+ x_start <- start(x)
+ x_end <- end(x)
+
+ ## Find runs of stitchable elements along 'x'.
+ ## Each run is described by the indices of its first ('run_from') and
+ ## last ('run_to') elements in 'x'.
+ ## The runs form a partitioning of 'x'.
+ new_run_idx <- which(x_start[-1L] != x_end[-length(x)] + 1L)
+ run_from <- c(1L, new_run_idx + 1L)
+ run_to <- c(new_run_idx, length(x))
+
+ IRanges(x_start[run_from], x_end[run_to])
+}
+
+### The runs of positions in an IPos object are guaranteed to be stitched.
+stitch_IPos <- function(x) x at pos_runs
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Validity
+###
+
+### TODO
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Constructor
+###
+
+### Note that if 'pos_runs' is an IPos instance with no metadata or metadata
+### columns, then 'identical(IPos(pos_runs), pos_runs)' is TRUE.
+IPos <- function(pos_runs=IRanges())
+{
+ if (!is(pos_runs, "Ranges"))
+ pos_runs <- as(pos_runs, "Ranges", strict=FALSE)
+ suppressWarnings(ans_len <- sum(width(pos_runs)))
+ if (is.na(ans_len))
+ stop("too many positions in 'pos_runs'")
+ pos_runs <- stitch_Ranges(pos_runs)
+ pos_runs <- pos_runs[width(pos_runs) != 0L]
+ new2("IPos", pos_runs=pos_runs, check=FALSE)
+}
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Coercion
+###
+
+.from_Ranges_to_IPos <- function(from)
+{
+ if (!all(width(from) == 1L))
+ stop(wmsg("all the ranges in the ", class(from), " object to ",
+ "coerce to IPos must have a width of 1"))
+ if (!is.null(names(from)))
+ warning(wmsg("because an IPos object cannot hold them, the names ",
+ "on the ", class(from), " object couldn't be ",
+ "propagated during its coercion to IPos"))
+ ans <- IPos(from)
+ mcols(ans) <- mcols(from)
+ ans
+}
+setAs("Ranges", "IPos", .from_Ranges_to_IPos)
+
+setAs("ANY", "IPos", function(from) .from_Ranges_to_IPos(as(from, "Ranges")))
+
+### The "as.data.frame" method for Ranges objects works on an IPos object
+### but returns a data.frame with identical "start" and "end" columns, and
+### a "width" column filled with 1. We overwrite it to return a data.frame
+### with a "pos" column instead of the "start" and "end" columns, and no
+### "width" column.
+### TODO: Turn this into an S3/S4 combo for as.data.frame.IPos
+setMethod("as.data.frame", "IPos",
+ function(x, row.names=NULL, optional=FALSE, ...)
+ {
+ ans <- data.frame(pos=pos(x), stringsAsFactors=FALSE)
+ x_mcols <- mcols(x)
+ if (!is.null(x_mcols))
+ ans <- cbind(ans, as.data.frame(x_mcols))
+ ans
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Subsetting
+###
+
+### NOT exported but used in the GenomicRanges package.
+### 'pos_runs' must be an IRanges or GRanges object or any range-based
+### object as long as it supports start(), end(), width(), and is subsettable.
+### 'i' must be a Ranges object with no zero-width ranges.
+extract_pos_runs_by_ranges <- function(pos_runs, i)
+{
+ map <- S4Vectors:::map_ranges_to_runs(width(pos_runs),
+ start(i), width(i))
+ ## Because 'i' has no zero-width ranges, 'mapped_range_span' cannot
+ ## contain zeroes and so 'mapped_range_Ltrim' and 'mapped_range_Rtrim'
+ ## cannot contain garbbage.
+ mapped_range_offset <- map[[1L]]
+ mapped_range_span <- map[[2L]]
+ mapped_range_Ltrim <- map[[3L]]
+ mapped_range_Rtrim <- map[[4L]]
+ run_idx <- S4Vectors:::fancy_mseq(mapped_range_span,
+ mapped_range_offset)
+ pos_runs <- pos_runs[run_idx]
+ if (length(run_idx) != 0L) {
+ Rtrim_idx <- cumsum(mapped_range_span)
+ Ltrim_idx <- c(1L, Rtrim_idx[-length(Rtrim_idx)] + 1L)
+ trimmed_start <- start(pos_runs)[Ltrim_idx] +
+ mapped_range_Ltrim
+ trimmed_end <- end(pos_runs)[Rtrim_idx] - mapped_range_Rtrim
+ start(pos_runs)[Ltrim_idx] <- trimmed_start
+ end(pos_runs)[Rtrim_idx] <- trimmed_end
+ suppressWarnings(new_len <- sum(width(pos_runs)))
+ if (is.na(new_len))
+ stop("subscript is too big")
+ }
+ pos_runs
+}
+
+setMethod("extractROWS", "IPos",
+ function(x, i)
+ {
+ i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE)
+ ## TODO: Maybe make this the coercion method from NSBS to Ranges.
+ if (is(i, "RangesNSBS")) {
+ ir <- i at subscript
+ ir <- ir[width(ir) != 0L]
+ } else {
+ ir <- as(as.integer(i), "IRanges")
+ }
+ new_pos_runs <- extract_pos_runs_by_ranges(x at pos_runs, ir)
+ x at pos_runs <- stitch_Ranges(new_pos_runs)
+ mcols(x) <- extractROWS(mcols(x), i)
+ x
+ }
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Show
+###
+
+.make_naked_matrix_from_IPos <- function(x)
+{
+ x_len <- length(x)
+ x_mcols <- mcols(x)
+ x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols)
+ ans <- cbind(pos=as.character(pos(x)))
+ if (x_nmc > 0L) {
+ tmp <- do.call(data.frame, c(lapply(x_mcols, showAsCell),
+ list(check.names=FALSE)))
+ ans <- cbind(ans, `|`=rep.int("|", x_len), as.matrix(tmp))
+ }
+ ans
+}
+
+show_IPos <- function(x, margin="", print.classinfo=FALSE)
+{
+ x_class <- class(x)
+ x_len <- length(x)
+ x_mcols <- mcols(x)
+ x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols)
+ cat(x_class, " object with ",
+ x_len, " ", ifelse(x_len == 1L, "position", "positions"),
+ " and ",
+ x_nmc, " metadata ", ifelse(x_nmc == 1L, "column", "columns"),
+ ":\n", sep="")
+ ## S4Vectors:::makePrettyMatrixForCompactPrinting() assumes that head()
+ ## and tail() work on 'xx'.
+ xx <- as(x, "IPos")
+ out <- S4Vectors:::makePrettyMatrixForCompactPrinting(xx,
+ .make_naked_matrix_from_IPos)
+ if (print.classinfo) {
+ .COL2CLASS <- c(pos="integer")
+ classinfo <-
+ S4Vectors:::makeClassinfoRowForCompactPrinting(x, .COL2CLASS)
+ ## A sanity check, but this should never happen!
+ stopifnot(identical(colnames(classinfo), colnames(out)))
+ out <- rbind(classinfo, out)
+ }
+ if (nrow(out) != 0L)
+ rownames(out) <- paste0(margin, rownames(out))
+ ## We set 'max' to 'length(out)' to avoid the getOption("max.print")
+ ## limit that would typically be reached when 'showHeadLines' global
+ ## option is set to Inf.
+ print(out, quote=FALSE, right=TRUE, max=length(out))
+}
+
+setMethod("show", "IPos",
+ function(object) show_IPos(object, margin=" ", print.classinfo=TRUE)
+)
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Combining
+###
+### Note that supporting "[" and "c" makes "[<-" work out-of-the-box!
+###
+
+### 'Class' must be "IPos" or the name of a concrete IPos subclass.
+### 'objects' must be a list of IPos objects.
+### Returns an instance of class 'Class'.
+combine_IPos_objects <- function(Class, objects,
+ use.names=TRUE, ignore.mcols=FALSE)
+{
+ if (!isSingleString(Class))
+ stop("'Class' must be a single character string")
+ if (!extends(Class, "IPos"))
+ stop("'Class' must be the name of a class that extends IPos")
+ if (!is.list(objects))
+ stop("'objects' must be a list")
+ if (!isTRUEorFALSE(use.names))
+ stop("'use.names' must be TRUE or FALSE")
+ ### TODO: Support 'use.names=TRUE'.
+ if (use.names)
+ stop("'use.names=TRUE' is not supported yet")
+ if (!isTRUEorFALSE(ignore.mcols))
+ stop("'ignore.mcols' must be TRUE or FALSE")
+
+ if (length(objects) != 0L) {
+ ## TODO: Implement (in C) fast 'elementIsNull(objects)' in S4Vectors
+ ## that does 'sapply(objects, is.null, USE.NAMES=FALSE)', and use it
+ ## here.
+ null_idx <- which(sapply(objects, is.null, USE.NAMES=FALSE))
+ if (length(null_idx) != 0L)
+ objects <- objects[-null_idx]
+ }
+ if (length(objects) == 0L)
+ return(new(Class))
+
+ ## TODO: Implement (in C) fast 'elementIs(objects, class)' in S4Vectors
+ ## that does 'sapply(objects, is, class, USE.NAMES=FALSE)', and use it
+ ## here. 'elementIs(objects, "NULL")' should work and be equivalent to
+ ## 'elementIsNull(objects)'.
+ if (!all(sapply(objects, is, Class, USE.NAMES=FALSE)))
+ stop("the objects to combine must be ", Class, " objects (or NULLs)")
+ objects_names <- names(objects)
+ names(objects) <- NULL # so lapply(objects, ...) below returns an
+ # unnamed list
+
+ ## Combine "pos_runs" slots.
+ pos_runs_slots <- lapply(objects, function(x) x at pos_runs)
+ ## TODO: Use combine_IRanges_objects() here when it's available.
+ ans_pos_runs <- stitch_Ranges(do.call(c, pos_runs_slots))
+
+ suppressWarnings(ans_len <- sum(width(ans_pos_runs)))
+ if (is.na(ans_len))
+ stop("too many genomic positions to combine")
+
+ ## Combine "mcols" slots.
+ if (ignore.mcols) {
+ ans_mcols <- NULL
+ } else {
+ ans_mcols <- do.call(S4Vectors:::rbind_mcols, objects)
+ }
+
+ ## Make 'ans' and return it.
+ new2(Class, pos_runs=ans_pos_runs, elementMetadata=ans_mcols, check=FALSE)
+}
+
+setMethod("c", "IPos",
+ function (x, ..., ignore.mcols=FALSE, recursive=FALSE)
+ {
+ if (!identical(recursive, FALSE))
+ stop("\"c\" method for IPos objects ",
+ "does not support the 'recursive' argument")
+ if (missing(x)) {
+ objects <- list(...)
+ x <- objects[[1L]]
+ } else {
+ objects <- list(x, ...)
+ }
+ combine_IPos_objects(class(x), objects,
+ use.names=FALSE,
+ ignore.mcols=ignore.mcols)
+ }
+)
+
diff --git a/R/IRanges-class.R b/R/IRanges-class.R
index 3e7dae6..9196e47 100644
--- a/R/IRanges-class.R
+++ b/R/IRanges-class.R
@@ -46,7 +46,7 @@ setMethod("parallelSlotNames", "IRanges",
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Accessor methods.
+### Getters
###
setMethod("start", "IRanges", function(x, ...) x at start)
@@ -55,6 +55,25 @@ setMethod("width", "IRanges", function(x) x at width)
setMethod("names", "IRanges", function(x) x at NAMES)
+setMethod("ranges", "Ranges",
+ function(x, use.names=TRUE, use.mcols=FALSE)
+ {
+ if (!isTRUEorFALSE(use.names))
+ stop("'use.names' must be TRUE or FALSE")
+ if (!isTRUEorFALSE(use.mcols))
+ stop("'use.mcols' must be TRUE or FALSE")
+ ans_start <- start(x)
+ ans_width <- width(x)
+ ans_names <- if (use.names) names(x) else NULL
+ ans_mcols <- if (use.mcols) mcols(x) else NULL
+ new2("IRanges", start=ans_start,
+ width=ans_width,
+ NAMES=ans_names,
+ elementMetadata=ans_mcols,
+ check=FALSE)
+ }
+)
+
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### isEmpty() and isNormal()
@@ -129,11 +148,8 @@ setValidity2("NormalIRanges", .valid.NormalIRanges)
### Coercion.
###
-### Any Ranges object can be turned into an IRanges instance.
setAs("Ranges", "IRanges",
- function(from)
- new2("IRanges", start=start(from), width=width(from),
- NAMES=names(from), check=FALSE)
+ function(from) ranges(from, use.mcols=TRUE)
)
### Helper function (not exported) used by the "coerce" methods defined in
@@ -161,7 +177,7 @@ setAs("logical", "NormalIRanges",
function(from) .Call2("NormalIRanges_from_logical", from, PACKAGE="IRanges")
)
-### coersion from integer
+### coercion from integer
setAs("integer", "IRanges",
function(from) .Call2("IRanges_from_integer", from, PACKAGE="IRanges")
)
@@ -175,6 +191,43 @@ setAs("numeric", "IRanges", function(from) as(as.integer(from), "IRanges"))
setAs("numeric", "NormalIRanges",
function(from) newNormalIRangesFromIRanges(as(as.integer(from), "IRanges")))
+### coercion from character
+.from_character_to_IRanges <- function(from)
+{
+ stopifnot(is.character(from))
+ if (anyNA(from))
+ stop(wmsg("converting a character vector to an IRanges object ",
+ "does not support NAs"))
+ error_msg <- wmsg(
+ "The character vector to convert to an IRanges object must ",
+ "contain strings of the form \"start-end\" or \"start..end\", ",
+ "with end >= start - 1, or just \"pos\". For example: \"2501-2900\", ",
+ "\"2501..2900\", or \"740\"."
+ )
+ ## We want to split on the first occurence of "-" that is preceeded by
+ ## a digit (ignoring and removing the spaces in between if any).
+ from <- sub("([[:digit:]])[[:space:]]*-", "\\1..", from)
+ split2 <- CharacterList(strsplit(from, "..", fixed=TRUE))
+ split2_eltNROWS <- elementNROWS(split2)
+ if (!all(split2_eltNROWS <= 2L))
+ stop(error_msg)
+ ans_start <- suppressWarnings(as.integer(phead(split2, n=1L)))
+ ans_end <- suppressWarnings(as.integer(ptail(split2, n=1L)))
+ if (anyNA(ans_start) || anyNA(ans_end))
+ stop(error_msg)
+ IRanges(ans_start, ans_end, names=names(from))
+}
+setAs("character", "IRanges", .from_character_to_IRanges)
+
+.from_factor_to_IRanges <- function(from)
+{
+ from <- setNames(as.character(from), names(from))
+ .from_character_to_IRanges(from)
+}
+setAs("factor", "IRanges", .from_factor_to_IRanges)
+
+setAs("ANY", "Ranges", function(from) as(from, "IRanges"))
+
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Low-level setters for IRanges objects.
diff --git a/R/List-class-leftovers.R b/R/List-class-leftovers.R
index c5b4c82..b1e433c 100644
--- a/R/List-class-leftovers.R
+++ b/R/List-class-leftovers.R
@@ -41,9 +41,9 @@ setMethod("stack", "List",
index <- .stack.ind(x, index.var)
unlistsToVector <- is(value, "Vector")
if (unlistsToVector) {
- df <- cbind(index, S4Vectors:::ensureMcols(unname(value)))
+ df <- cbind(index, S4Vectors:::ensureMcols(unname(value)))
} else {
- df <- DataFrame(index, as(value, "DataFrame"))
+ df <- DataFrame(index, as(unname(value), "DataFrame"))
colnames(df)[2] <- value.var
}
if (!is.null(name.var)) {
diff --git a/R/NCList-class.R b/R/NCList-class.R
index 270ba81..59e1609 100644
--- a/R/NCList-class.R
+++ b/R/NCList-class.R
@@ -16,25 +16,11 @@ setClass("NCList",
)
)
-setMethod("ranges", "NCList",
- function(x, use.mcols=FALSE)
- {
- if (!isTRUEorFALSE(use.mcols))
- stop("'use.mcols' must be TRUE or FALSE")
- ans <- x at ranges
- if (use.mcols)
- mcols(ans) <- mcols(x)
- ans
- }
-)
-
-setMethod("length", "NCList", function(x) length(ranges(x)))
-setMethod("names", "NCList", function(x) names(ranges(x)))
-setMethod("start", "NCList", function(x, ...) start(ranges(x)))
-setMethod("end", "NCList", function(x, ...) end(ranges(x)))
-setMethod("width", "NCList", function(x) width(ranges(x)))
-
-setAs("NCList", "IRanges", function(from) ranges(from, use.mcols=TRUE))
+setMethod("length", "NCList", function(x) length(x at ranges))
+setMethod("names", "NCList", function(x) names(x at ranges))
+setMethod("start", "NCList", function(x, ...) start(x at ranges))
+setMethod("end", "NCList", function(x, ...) end(x at ranges))
+setMethod("width", "NCList", function(x) width(x at ranges))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -161,7 +147,7 @@ print_NCList <- function(x)
### NOT exported.
findOverlaps_NCList <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "extend", "equal"),
select=c("all", "first", "last", "arbitrary", "count"),
circle.length=NA_integer_)
@@ -227,27 +213,31 @@ setMethod("parallelSlotNames", "NCLists",
)
### TODO: Move rglist() generic from GenomicRanges to IRanges
-#setMethod("rglist", "NCLists", function(x, ...) x at ranges)
+#setMethod("rglist", "NCLists", function(x, ...) x at rglist)
setMethod("ranges", "NCLists",
- function(x, use.mcols=FALSE)
+ function(x, use.names=TRUE, use.mcols=FALSE)
{
+ if (!isTRUEorFALSE(use.names))
+ stop("'use.names' must be TRUE or FALSE")
if (!isTRUEorFALSE(use.mcols))
stop("'use.mcols' must be TRUE or FALSE")
ans <- x at rglist
+ if (!use.names)
+ names(ans) <- NULL
if (use.mcols)
mcols(ans) <- mcols(x)
ans
}
)
-setMethod("length", "NCLists", function(x) length(ranges(x)))
-setMethod("names", "NCLists", function(x) names(ranges(x)))
-setMethod("start", "NCLists", function(x, ...) start(ranges(x)))
-setMethod("end", "NCLists", function(x, ...) end(ranges(x)))
-setMethod("width", "NCLists", function(x) width(ranges(x)))
+setMethod("length", "NCLists", function(x) length(x at rglist))
+setMethod("names", "NCLists", function(x) names(x at rglist))
+setMethod("start", "NCLists", function(x, ...) start(x at rglist))
+setMethod("end", "NCLists", function(x, ...) end(x at rglist))
+setMethod("width", "NCLists", function(x) width(x at rglist))
-setMethod("elementNROWS", "NCLists", function(x) elementNROWS(ranges(x)))
+setMethod("elementNROWS", "NCLists", function(x) elementNROWS(x at rglist))
setMethod("getListElement", "NCLists",
function (x, i, exact=TRUE)
{
@@ -316,7 +306,7 @@ NCList_find_overlaps_in_groups <- function(
q, q_space, q_groups,
s, s_space, s_groups,
nclists, nclist_is_q,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "extend", "equal"),
select=c("all", "first", "last", "arbitrary", "count"),
circle.length)
@@ -400,7 +390,7 @@ NCList_find_overlaps_in_groups <- function(
### (b) integer vectors if 'select' is not "all". In that case the list is
### parallel to and has the same shape as 'query'.
findOverlaps_NCLists <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "extend", "equal"),
select=c("all", "first", "last", "arbitrary", "count"),
circle.length=NA_integer_)
diff --git a/R/RangedData-class.R b/R/RangedData-class.R
index 0e72783..a90687b 100644
--- a/R/RangedData-class.R
+++ b/R/RangedData-class.R
@@ -52,7 +52,9 @@ setReplaceMethod("values", "RangedData",
x
})
-setMethod("ranges", "RangedData", function(x) x at ranges)
+setMethod("ranges", "RangedData",
+ function(x, use.names=TRUE, use.mcols=FALSE) x at ranges
+)
setReplaceMethod("ranges", "RangedData",
function(x, value) {
@@ -235,10 +237,13 @@ setValidity2("RangedData", .valid.RangedData)
RangedData <- function(ranges = IRanges(), ..., space = NULL, universe = NULL)
{
hasDots <- (((nargs() - !missing(space)) - !missing(universe)) > 1)
-
- if (!is.null(universe) && !isSingleString(universe))
- stop("'universe' must be a single string")
-
+ if (!is.null(universe)) {
+ msg <- wmsg("The 'universe' argument of the RangedData() ",
+ "constructor function is deprecated.")
+ .Deprecated(msg=msg)
+ if (!isSingleString(universe))
+ stop("'universe' must be a single string")
+ }
if (is(ranges, "RangesList")) {
if (!is.null(space))
warning("since 'class(ranges)' extends RangesList, 'space' argument is ignored")
@@ -285,7 +290,8 @@ RangedData <- function(ranges = IRanges(), ..., space = NULL, universe = NULL)
ranges <- as(ranges, "IRanges")
ranges <- split(ranges, space)
}
- universe(ranges) <- universe
+ if (!is.null(universe))
+ universe(ranges) <- universe
if (hasDots) {
args <- list(...)
@@ -540,8 +546,6 @@ setMethod("c", "RangedData", function(x, ..., recursive = FALSE) {
setMethod("rbind", "RangedData", function(..., deparse.level=1) {
args <- unname(list(...))
rls <- lapply(args, ranges)
- if (!all(sapply(sapply(rls, universe), identical, universe(rls[[1L]]))))
- stop("All args in '...' must have the same universe")
nms <- unique(unlist(lapply(args, names), use.names=FALSE))
rls <- lapply(rls, function(x) {y <- as.list(x)[nms];names(y) <- nms;y})
dfs <-
diff --git a/R/RangedSelection-class.R b/R/RangedSelection-class.R
index 6042df2..d032cba 100644
--- a/R/RangedSelection-class.R
+++ b/R/RangedSelection-class.R
@@ -9,7 +9,9 @@ setClass("RangedSelection",
### Accessor methods.
###
-setMethod("ranges", "RangedSelection", function(x) x at ranges)
+setMethod("ranges", "RangedSelection",
+ function(x, use.names=TRUE, use.mcols=FALSE) x at ranges
+)
setReplaceMethod("ranges", "RangedSelection",
function(x, value) {
x at ranges <- value
diff --git a/R/Ranges-class.R b/R/Ranges-class.R
index 45849d7..9eb6a5a 100644
--- a/R/Ranges-class.R
+++ b/R/Ranges-class.R
@@ -68,6 +68,29 @@ setValidity2("Ranges", .valid.Ranges)
### Coercion.
###
+### Propagate the names.
+setMethod("as.character", "Ranges",
+ function(x)
+ {
+ if (length(x) == 0L)
+ return(setNames(character(0), names(x)))
+ x_start <- start(x)
+ x_end <- end(x)
+ ans <- paste0(x_start, "-", x_end)
+ idx <- which(x_start == x_end)
+ ans[idx] <- as.character(x_start)[idx]
+ names(ans) <- names(x)
+ ans
+ }
+)
+
+### The as.factor() generic doesn't have the ... argument so this method
+### cannot support the 'ignore.strand' argument.
+setMethod("as.factor", "Ranges",
+ function(x)
+ factor(as.character(x), levels=as.character(sort(unique(x))))
+)
+
setMethod("as.matrix", "Ranges",
function(x, ...)
matrix(data=c(start(x), width(x)), ncol=2,
diff --git a/R/Ranges-comparison.R b/R/Ranges-comparison.R
index 0478806..c977e16 100644
--- a/R/Ranges-comparison.R
+++ b/R/Ranges-comparison.R
@@ -69,32 +69,78 @@ setMethod("selfmatch", "Ranges",
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Ordering ranges
+### order() and related methods.
###
-### order(), sort(), rank() on Ranges objects are consistent with the order
-### on ranges implied by pcompare().
+### is.unsorted(), order(), sort(), rank() on Ranges derivatives are
+### consistent with the order implied by pcompare().
+### is.unsorted() is a quick/cheap way of checking whether a Ranges
+### derivative is already sorted, e.g., called prior to a costly sort.
+### sort() and rank() will work out-of-the-box on a Ranges derivative thanks
+### to the method for List objects (which delegates to the method for Vector
+### objects).
###
+.Ranges_as_IntegerPairs <- function(x)
+{
+ a <- start(x)
+ b <- width(x)
+ list(a, b)
+}
+
+setMethod("is.unsorted", "Ranges",
+ function(x, na.rm=FALSE, strictly=FALSE)
+ {
+ if (!identical(na.rm, FALSE))
+ warning("\"is.unsorted\" method for Ranges objects ",
+ "ignores the 'na.rm' argument")
+ if (!isTRUEorFALSE(strictly))
+ stop("'strictly' must be TRUE of FALSE")
+ ## It seems that creating the integer pairs below is faster when
+ ## 'x' is already sorted (TODO: Investigate why). Therefore, and
+ ## somewhat counterintuitively, is.unsorted() can be faster when 'x'
+ ## is already sorted (which, in theory, is the worst-case scenario
+ ## because S4Vectors:::sortedIntegerPairs() will then need to take a
+ ## full walk on 'x') than when it is unsorted (in which case
+ ## S4Vectors:::sortedIntegerPairs() might stop walking on 'x' after
+ ## checking its first 2 elements only -- the best-case scenario).
+ pairs <- .Ranges_as_IntegerPairs(x)
+ !S4Vectors:::sortedIntegerPairs(pairs[[1L]], pairs[[2L]],
+ strictly=strictly)
+ }
+)
+
+.order_Ranges <- function(x, decreasing=FALSE)
+{
+ if (!isTRUEorFALSE(decreasing))
+ stop("'decreasing' must be TRUE or FALSE")
+ pairs <- .Ranges_as_IntegerPairs(x)
+ orderIntegerPairs(pairs[[1L]], pairs[[2L]], decreasing=decreasing)
+}
+
### 'na.last' is pointless (Ranges objects don't contain NAs) so is ignored.
### 'method' is also ignored at the moment.
setMethod("order", "Ranges",
function(..., na.last=TRUE, decreasing=FALSE,
method=c("auto", "shell", "radix"))
{
+ ## Turn off this warning for now since it triggers spurious warnings
+ ## when calling sort() on a RangesList object. The root of the
+ ## problem is inconsistent defaults for 'na.last' between order() and
+ ## sort(), as reported here:
+ ## https://stat.ethz.ch/pipermail/r-devel/2015-November/072012.html
+ #if (!identical(na.last, TRUE))
+ # warning("\"order\" method for Ranges objects ",
+ # "ignores the 'na.last' argument")
if (!isTRUEorFALSE(decreasing))
stop("'decreasing' must be TRUE or FALSE")
## All arguments in '...' are guaranteed to be Ranges objects.
args <- list(...)
- if (length(args) == 1L) {
- x <- args[[1L]]
- return(orderIntegerPairs(start(x), width(x),
- decreasing=decreasing))
- }
- order_args <- vector("list", 2L * length(args))
- idx <- 2L * seq_along(args)
- order_args[idx - 1L] <- lapply(args, start)
- order_args[idx] <- lapply(args, width)
- do.call(order, c(order_args, list(decreasing=decreasing)))
+ if (length(args) == 1L)
+ return(.order_Ranges(args[[1L]], decreasing))
+ order_args <- c(unlist(lapply(args, .Ranges_as_IntegerPairs),
+ recursive=FALSE, use.names=FALSE),
+ list(na.last=na.last, decreasing=decreasing))
+ do.call(order, order_args)
}
)
diff --git a/R/RangesList-class.R b/R/RangesList-class.R
index 3b9c3b1..e2499c4 100644
--- a/R/RangesList-class.R
+++ b/R/RangesList-class.R
@@ -175,6 +175,7 @@ setGeneric("universe", function(x) standardGeneric("universe"))
setMethod("universe", "RangesList",
function(x)
{
+ .Deprecated(msg="The universe() getter is deprecated.")
metadata(x)$universe
})
@@ -182,6 +183,7 @@ setGeneric("universe<-", function(x, value) standardGeneric("universe<-"))
setReplaceMethod("universe", "RangesList",
function(x, value)
{
+ .Deprecated(msg="The universe() setter is deprecated.")
if (!is.null(value) && !isSingleString(value))
stop("'value' must be a single string or NULL")
metadata(x)$universe <- value
@@ -334,15 +336,21 @@ setAs("CompressedRleList", "CompressedIRangesList",
RangesList <- function(..., universe = NULL)
{
- if (!is.null(universe) && !isSingleString(universe))
- stop("'universe' must be a single string or NULL")
+ if (!is.null(universe)) {
+ msg <- wmsg("The 'universe' argument of the RangesList() ",
+ "constructor function is deprecated.")
+ .Deprecated(msg=msg)
+ if (!isSingleString(universe))
+ stop("'universe' must be a single string or NULL")
+ }
ranges <- list(...)
if (length(ranges) == 1 && is.list(ranges[[1L]]))
ranges <- ranges[[1L]]
if (!all(sapply(ranges, is, "Ranges")))
stop("all elements in '...' must be Ranges objects")
ans <- S4Vectors:::new_SimpleList_from_list("SimpleRangesList", ranges)
- universe(ans) <- universe
+ if (!is.null(universe))
+ universe(ans) <- universe
ans
}
@@ -350,8 +358,13 @@ IRangesList <- function(..., universe=NULL, compress=TRUE)
{
if (!isTRUEorFALSE(compress))
stop("'compress' must be TRUE or FALSE")
- if (!is.null(universe) && !isSingleString(universe))
- stop("'universe' must be a single string or NULL")
+ if (!is.null(universe)) {
+ msg <- wmsg("The 'universe' argument of the IRangesList() ",
+ "constructor function is deprecated.")
+ .Deprecated(msg=msg)
+ if (!isSingleString(universe))
+ stop("'universe' must be a single string or NULL")
+ }
args <- list(...)
if (length(args) == 2L &&
setequal(names(args), c("start", "end")) &&
@@ -380,7 +393,8 @@ IRangesList <- function(..., universe=NULL, compress=TRUE)
else
ans <- as(args, "SimpleIRangesList")
}
- universe(ans) <- universe
+ if (!is.null(universe))
+ universe(ans) <- universe
ans
}
@@ -619,8 +633,6 @@ setAs("RleList", "SimpleNormalIRangesList",
if (length(args) == 0L)
stop("nothing to merge")
x <- args[[1L]]
- if (!all(sapply(sapply(args, universe), identical, universe(x))))
- stop("all RangesList objects to merge must have the same universe")
spaceList <- lapply(args, names)
names <- spaces <- unique(do.call(c, spaceList))
if (any(sapply(spaceList, is.null))) {
diff --git a/R/Rle-class-leftovers.R b/R/Rle-class-leftovers.R
index 964a798..3046719 100644
--- a/R/Rle-class-leftovers.R
+++ b/R/Rle-class-leftovers.R
@@ -12,7 +12,10 @@
### Accessor methods.
###
-setMethod("ranges", "Rle", function(x) IRanges(start(x), width = width(x)))
+setMethod("ranges", "Rle",
+ function(x, use.names=TRUE, use.mcols=FALSE)
+ IRanges(start(x), width=width(x))
+)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/R/RleViewsList-class.R b/R/RleViewsList-class.R
index bb3286a..2655201 100644
--- a/R/RleViewsList-class.R
+++ b/R/RleViewsList-class.R
@@ -29,8 +29,13 @@ setMethod("Views", "RleList",
RleViewsList <- function(..., rleList, rangesList, universe = NULL)
{
- if (!is.null(universe) && !isSingleString(universe))
- stop(wmsg("'universe' must be a single string or NULL"))
+ if (!is.null(universe)) {
+ msg <- wmsg("The 'universe' argument of the RleViewsList() ",
+ "constructor function is deprecated.")
+ .Deprecated(msg=msg)
+ if (!isSingleString(universe))
+ stop(wmsg("'universe' must be a single string or NULL"))
+ }
views <- list(...)
if (!missing(rleList) && !missing(rangesList)) {
if (length(views) > 0)
@@ -73,7 +78,8 @@ RleViewsList <- function(..., rleList, rangesList, universe = NULL)
stop(wmsg("all elements in '...' must be RleViews objects"))
}
ans <- S4Vectors:::new_SimpleList_from_list("SimpleRleViewsList", views)
- universe(ans) <- universe
+ if (!is.null(universe))
+ universe(ans) <- universe
ans
}
diff --git a/R/Views-class.R b/R/Views-class.R
index 42a4b14..01ed4d1 100644
--- a/R/Views-class.R
+++ b/R/Views-class.R
@@ -24,9 +24,9 @@ setGeneric("subject", function(x) standardGeneric("subject"))
setMethod("subject", "Views", function(x) x at subject)
-setGeneric("ranges", function(x, ...) standardGeneric("ranges"))
-
-setMethod("ranges", "Views", function(x) x at ranges)
+setMethod("ranges", "Views",
+ function(x, use.names=TRUE, use.mcols=FALSE) x at ranges
+)
setGeneric("ranges<-", function(x, ..., value) standardGeneric("ranges<-"))
diff --git a/R/ViewsList-class.R b/R/ViewsList-class.R
index 6b9a27f..e543366 100644
--- a/R/ViewsList-class.R
+++ b/R/ViewsList-class.R
@@ -20,8 +20,9 @@ setClass("SimpleViewsList",
###
setMethod("ranges", "SimpleViewsList",
- function(x, ...) S4Vectors:::new_SimpleList_from_list("SimpleIRangesList",
- lapply(x, ranges))
+ function(x, use.names=TRUE, use.mcols=FALSE)
+ S4Vectors:::new_SimpleList_from_list("SimpleIRangesList",
+ lapply(x, ranges, use.names=use.names, use.mcols=use.mcols))
)
setMethod("start", "SimpleViewsList", function(x, ...) start(ranges(x)))
@@ -32,6 +33,7 @@ setMethod("width", "SimpleViewsList", function(x) width(ranges(x)))
setMethod("universe", "ViewsList",
function(x)
{
+ .Deprecated(msg="The universe() getter is deprecated.")
### FIXME: for compatibility with older versions, eventually emit warning
if (is.null(metadata(x)) || is.character(metadata(x)))
metadata(x)
@@ -43,6 +45,7 @@ setMethod("universe", "ViewsList",
setReplaceMethod("universe", "ViewsList",
function(x, value)
{
+ .Deprecated(msg="The universe() setter is deprecated.")
if (!is.null(value) && !isSingleString(value))
stop("'value' must be a single string or NULL")
metadata(x)$universe <- value
diff --git a/R/bind-arrays.R b/R/bind-arrays.R
deleted file mode 100644
index 48c68a3..0000000
--- a/R/bind-arrays.R
+++ /dev/null
@@ -1,162 +0,0 @@
-### =========================================================================
-### Bind arrays with an arbitrary number of dimensions along an arbitrary
-### dimension
-### -------------------------------------------------------------------------
-###
-
-
-### Return a matrix with one row per dim and one column per object if the
-### objects are bindable. Otherwise return a character vector describing why
-### the objects are not bindable. This design allows the function to be used
-### in the context of a validity method.
-### NOT exported but used in the HDF5Array package.
-get_dims_to_bind <- function(objects, no.check.along)
-{
- dims <- lapply(objects, dim)
- ndims <- lengths(dims)
- ndim <- ndims[[1L]]
- if (!all(ndims == ndim))
- return(c("all the objects to bind must have ",
- "the same number of dimensions"))
- tmp <- unlist(dims, use.names=FALSE)
- if (is.null(tmp))
- return("the objects to bind have no dimensions")
- dims <- matrix(tmp, nrow=ndim)
- tmp <- dims[-no.check.along, , drop=FALSE]
- if (!all(tmp == tmp[ , 1L]))
- return("the objects to bind have incompatible dimensions")
- dims
-}
-
-### Combine the dims the rbind/cbind way.
-combine_dims_along <- function(dims, along)
-{
- ans_dim <- dims[ , 1L]
- ans_dim[[along]] <- sum(dims[along, ])
- ans_dim
-}
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Combine the dimnames of a list of array-like objects
-###
-
-### Assume all the arrays in 'objects' have the same number of dimensions.
-### NOT exported but used in the HDF5Array package.
-combine_dimnames <- function(objects)
-{
- lapply(seq_along(dim(objects[[1L]])),
- function(n) {
- for (x in objects) {
- dn <- dimnames(x)[[n]]
- if (!is.null(dn))
- return(dn)
- }
- NULL
- })
-}
-
-### Combine the dimnames the rbind/cbind way.
-### NOT exported but used in the HDF5Array package.
-combine_dimnames_along <- function(objects, dims, along)
-{
- dimnames <- combine_dimnames(objects)
- along_names <- lapply(objects, function(x) dimnames(x)[[along]])
- along_names_lens <- lengths(along_names)
- if (any(along_names_lens != 0L)) {
- fix_idx <- which(along_names_lens != dims[along, ])
- along_names[fix_idx] <- lapply(dims[along, fix_idx], character)
- }
- along_names <- unlist(along_names, use.names=FALSE)
- if (!is.null(along_names))
- dimnames[[along]] <- along_names
- if (all(S4Vectors:::sapply_isNULL(dimnames)))
- dimnames <- NULL
- dimnames
-}
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### simple_abind()
-###
-
-### 'objects' is assumed to be a list of vector-like objects.
-### 'block_lens' is assumed to be an integer vector parallel to 'objects'
-### specifying the block length for each object in 'objects'. In addition the
-### length of 'object[[i]]' must be 'k * block_lens[[i]]' (k is the same for
-### all the objects).
-.intertwine_blocks <- function(objects, block_lens)
-{
- data <- unlist(objects, recursive=FALSE, use.names=FALSE)
- objects_lens <- lengths(objects)
- if (all(objects_lens == 0L))
- return(data)
-
- k <- objects_lens %/% block_lens
- k <- unique(k[!is.na(k)])
- stopifnot(length(k) == 1L) # sanity check
-
- nobject <- length(objects)
- objects_cumlens <- cumsum(objects_lens)
- ranges <- lapply(seq_len(nobject),
- function(i) {
- width <- block_lens[[i]]
- offset <- if (i == 1L) 0L else objects_cumlens[[i - 1L]]
- successiveIRanges(rep.int(width, k), from=offset + 1L)
- })
- ranges <- do.call(c, ranges)
- i <- as.vector(matrix(seq_len(nobject * k), nrow=nobject, byrow=TRUE))
- extractROWS(data, ranges[i])
-}
-
-### A stripped-down version of abind::abind().
-### Some differences:
-### (a) Treatment of dimnames: simple_abind() treatment of dimnames is
-### consistent with base::rbind() and base::cbind(). This is not the
-### case for abind::abind() which does some strange things with the
-### dimnames.
-### (b) Performance: simple_abind() has a little bit more overhead than
-### abind::abind(). This makes it slower on small objects. However it
-### tends to be slightly faster on big objects.
-### NOT exported but used in the HDF5Array package.
-simple_abind <- function(..., along)
-{
- objects <- list(...)
- object_is_NULL <- S4Vectors:::sapply_isNULL(objects)
- if (any(object_is_NULL))
- objects <- objects[!object_is_NULL]
- if (length(objects) == 0L)
- return(NULL)
- if (length(objects) == 1L)
- return(objects[[1L]])
-
- ## Check dim compatibility.
- dims <- get_dims_to_bind(objects, no.check.along=along)
- if (is.character(dims))
- stop(wmsg(dims))
-
- ## Perform the binding.
- block_lens <- dims[along, ]
- for (n in seq_len(along - 1L))
- block_lens <- block_lens * dims[n, ]
- ans <- .intertwine_blocks(objects, block_lens)
-
- ## Set the dim.
- dim(ans) <- combine_dims_along(dims, along)
-
- ## Combine and set the dimnames.
- dimnames(ans) <- combine_dimnames_along(objects, dims, along)
- ans
-}
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Bind arrays along their 1st or 2nd dimension
-###
-
-setGeneric("arbind", function(...) standardGeneric("arbind"))
-setGeneric("acbind", function(...) standardGeneric("acbind"))
-
-setMethod("arbind", "array", function(...) simple_abind(..., along=1L))
-setMethod("acbind", "array", function(...) simple_abind(..., along=2L))
-
diff --git a/R/coverage-methods.R b/R/coverage-methods.R
index 5f4b005..d6ca8db 100644
--- a/R/coverage-methods.R
+++ b/R/coverage-methods.R
@@ -278,10 +278,19 @@ setMethod("coverage", "RangesList",
}
)
+.coverage_RangedData_deprecation_msg <- c(
+ "The \"coverage\" method for RangedData objects is deprecated ",
+ "and won't be replaced. Please migrate your code to use GRanges or ",
+ "GRangesList objects instead. RangedData objects will be deprecated ",
+ "soon (their use has been discouraged since BioC 2.12, that is, since ",
+ "2014). See IMPORTANT NOTE in ?RangedData"
+)
+
setMethod("coverage", "RangedData",
function(x, shift=0L, width=NULL, weight=1L,
method=c("auto", "sort", "hash"))
{
+ .Deprecated(msg=wmsg(.coverage_RangedData_deprecation_msg))
x_ranges <- ranges(x)
if (length(metadata(x)) > 0)
metadata(x_ranges) <- metadata(x)
diff --git a/R/extractList.R b/R/extractList.R
index 4f4ab94..1fd11ec 100644
--- a/R/extractList.R
+++ b/R/extractList.R
@@ -268,6 +268,8 @@ normSplitFactor <- function(f, x) {
as.factor2 <- function(x) {
if (is.factor(x))
return(x)
+ if (is.null(x))
+ return(factor())
g <- grouping(x)
p <- PartitioningByEnd(relist(g))
levs <- as.character(x[g[end(p)]])
diff --git a/R/findOverlaps-methods.R b/R/findOverlaps-methods.R
index 83dd428..4480b22 100644
--- a/R/findOverlaps-methods.R
+++ b/R/findOverlaps-methods.R
@@ -58,7 +58,7 @@ setMethod("process_self_hits", "SortedByQueryHitsList",
###
setGeneric("findOverlaps", signature=c("query", "subject"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
...)
@@ -66,7 +66,7 @@ setGeneric("findOverlaps", signature=c("query", "subject"),
)
findOverlaps_Ranges <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"))
{
@@ -80,7 +80,7 @@ findOverlaps_Ranges <- function(query, subject,
setMethod("findOverlaps", c("Ranges", "Ranges"), findOverlaps_Ranges)
setMethod("findOverlaps", c("Vector", "missing"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
...,
@@ -96,7 +96,7 @@ setMethod("findOverlaps", c("Vector", "missing"),
)
setMethod("findOverlaps", c("integer", "Ranges"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"))
{
@@ -107,7 +107,7 @@ setMethod("findOverlaps", c("integer", "Ranges"),
)
setMethod("findOverlaps", c("Views", "Views"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"))
{
@@ -118,7 +118,7 @@ setMethod("findOverlaps", c("Views", "Views"),
)
setMethod("findOverlaps", c("Views", "Vector"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"))
{
@@ -129,7 +129,7 @@ setMethod("findOverlaps", c("Views", "Vector"),
)
setMethod("findOverlaps", c("Vector", "Views"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"))
{
@@ -141,7 +141,7 @@ setMethod("findOverlaps", c("Vector", "Views"),
# might consider making this the following:
# setMethod("findOverlaps", c("RangesList", "RangesList"),
-# function(query, subject, maxgap = 0L, minoverlap = 1L,
+# function(query, subject, maxgap = -1L, minoverlap = 0L,
# type = c("any", "start", "end", "within", "equal"),
# select = c("all", "first", "last", "arbitrary"),
# drop = FALSE)
@@ -153,7 +153,7 @@ setMethod("findOverlaps", c("Vector", "Views"),
# )
setMethod("findOverlaps", c("RangesList", "RangesList"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"),
drop = FALSE)
@@ -198,9 +198,8 @@ setMethod("findOverlaps", c("RangesList", "RangesList"),
ans
})
-
setMethod("findOverlaps", c("ViewsList", "ViewsList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
drop=FALSE)
@@ -213,7 +212,7 @@ setMethod("findOverlaps", c("ViewsList", "ViewsList"),
)
setMethod("findOverlaps", c("ViewsList", "Vector"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
drop=FALSE)
@@ -226,7 +225,7 @@ setMethod("findOverlaps", c("ViewsList", "Vector"),
)
setMethod("findOverlaps", c("Vector", "ViewsList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
drop=FALSE)
@@ -239,7 +238,7 @@ setMethod("findOverlaps", c("Vector", "ViewsList"),
)
setMethod("findOverlaps", c("RangedData", "RangedData"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"),
drop = FALSE)
@@ -251,7 +250,7 @@ setMethod("findOverlaps", c("RangedData", "RangedData"),
})
setMethod("findOverlaps", c("RangedData", "RangesList"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"),
drop = FALSE)
@@ -263,7 +262,7 @@ setMethod("findOverlaps", c("RangedData", "RangesList"),
})
setMethod("findOverlaps", c("RangesList", "RangedData"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"),
drop = FALSE)
@@ -275,8 +274,7 @@ setMethod("findOverlaps", c("RangesList", "RangedData"),
})
setMethod("findOverlaps", c("Pairs", "missing"),
- function (query, subject, maxgap = 0L,
- minoverlap = 1L,
+ function (query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"), ...) {
findOverlaps(zipup(query), maxgap=maxgap,
@@ -284,8 +282,7 @@ setMethod("findOverlaps", c("Pairs", "missing"),
})
setMethod("findOverlaps", c("Pairs", "ANY"),
- function (query, subject, maxgap = 0L,
- minoverlap = 1L,
+ function (query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"), ...) {
findOverlaps(zipup(query), subject, maxgap=maxgap,
@@ -293,8 +290,7 @@ setMethod("findOverlaps", c("Pairs", "ANY"),
})
setMethod("findOverlaps", c("ANY", "Pairs"),
- function (query, subject, maxgap = 0L,
- minoverlap = 1L,
+ function (query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"), ...) {
findOverlaps(query, zipup(subject), maxgap=maxgap,
@@ -302,8 +298,7 @@ setMethod("findOverlaps", c("ANY", "Pairs"),
})
setMethod("findOverlaps", c("Pairs", "Pairs"),
- function (query, subject, maxgap = 0L,
- minoverlap = 1L,
+ function (query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
select = c("all", "first", "last", "arbitrary"), ...) {
findOverlaps(zipup(query), zipup(subject),
@@ -315,27 +310,27 @@ setMethod("findOverlaps", c("Pairs", "Pairs"),
### countOverlaps()
###
-setGeneric("countOverlaps", signature = c("query", "subject"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
- type = c("any", "start", "end", "within", "equal"),
+setGeneric("countOverlaps", signature=c("query", "subject"),
+ function(query, subject, maxgap=-1L, minoverlap=0L,
+ type=c("any", "start", "end", "within", "equal"),
...)
standardGeneric("countOverlaps")
)
.countOverlaps_default <-
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
type <- match.arg(type)
if (missing(subject)) {
hits <- findOverlaps(query,
- maxgap=maxgap, minoverlap=minoverlap,
+ maxgap=maxgap, minoverlap=minoverlap,
type=type,
...)
} else {
hits <- findOverlaps(query, subject,
- maxgap=maxgap, minoverlap=minoverlap,
+ maxgap=maxgap, minoverlap=minoverlap,
type=type,
...)
}
@@ -348,7 +343,7 @@ setMethod("countOverlaps", c("Vector", "Vector"), .countOverlaps_default)
setMethod("countOverlaps", c("Vector", "missing"), .countOverlaps_default)
countOverlaps_Ranges <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"))
{
type <- match.arg(type)
@@ -362,7 +357,7 @@ countOverlaps_Ranges <- function(query, subject,
setMethod("countOverlaps", c("Ranges", "Ranges"), countOverlaps_Ranges)
setMethod("countOverlaps", c("RangesList", "RangesList"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type = c("any", "start", "end", "within", "equal"))
{
IntegerList(mapply(countOverlaps, query, subject,
@@ -373,7 +368,7 @@ setMethod("countOverlaps", c("RangesList", "RangesList"),
})
setMethod("countOverlaps", c("ViewsList", "ViewsList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"))
{
countOverlaps(ranges(query), ranges(subject),
@@ -383,7 +378,7 @@ setMethod("countOverlaps", c("ViewsList", "ViewsList"),
)
setMethod("countOverlaps", c("ViewsList", "Vector"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"))
{
countOverlaps(ranges(query), subject,
@@ -393,7 +388,7 @@ setMethod("countOverlaps", c("ViewsList", "Vector"),
)
setMethod("countOverlaps", c("Vector", "ViewsList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"))
{
countOverlaps(query, ranges(subject),
@@ -403,21 +398,21 @@ setMethod("countOverlaps", c("Vector", "ViewsList"),
)
setMethod("countOverlaps", c("RangedData", "RangedData"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"))
{
countOverlaps(ranges(query), ranges(subject), maxgap = maxgap,
minoverlap = minoverlap, type = match.arg(type))
})
setMethod("countOverlaps", c("RangedData", "RangesList"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"))
{
countOverlaps(ranges(query), subject, maxgap = maxgap,
minoverlap = minoverlap, type = match.arg(type))
})
setMethod("countOverlaps", c("RangesList", "RangedData"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
+ function(query, subject, maxgap = -1L, minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"))
{
countOverlaps(query, ranges(subject), maxgap = maxgap,
@@ -429,16 +424,16 @@ setMethod("countOverlaps", c("RangesList", "RangedData"),
### overlapsAny()
###
-### Same args and signature as countOverlaps() and subsetByOverlaps().
+### Same args and signature as countOverlaps().
setGeneric("overlapsAny", signature=c("query", "subject"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
standardGeneric("overlapsAny")
)
.overlapsAny_default <-
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
@@ -461,7 +456,7 @@ setMethod("overlapsAny", c("Vector", "Vector"), .overlapsAny_default)
setMethod("overlapsAny", c("Vector", "missing"), .overlapsAny_default)
setMethod("overlapsAny", c("RangesList", "RangesList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
@@ -488,7 +483,7 @@ setMethod("overlapsAny", c("RangesList", "RangesList"),
)
setMethod("overlapsAny", c("ViewsList", "ViewsList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
@@ -500,7 +495,7 @@ setMethod("overlapsAny", c("ViewsList", "ViewsList"),
)
setMethod("overlapsAny", c("ViewsList", "Vector"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
@@ -512,7 +507,7 @@ setMethod("overlapsAny", c("ViewsList", "Vector"),
)
setMethod("overlapsAny", c("Vector", "ViewsList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
@@ -523,11 +518,20 @@ setMethod("overlapsAny", c("Vector", "ViewsList"),
}
)
+.overlapsAny_RangedData_deprecation_msg <- c(
+ "The \"overlapsAny\" methods for RangedData objects are deprecated ",
+ "and won't be replaced. Please migrate your code to use GRanges or ",
+ "GRangesList objects instead. RangedData objects will be deprecated ",
+ "soon (their use has been discouraged since BioC 2.12, that is, since ",
+ "2014). See IMPORTANT NOTE in ?RangedData"
+)
+
setMethod("overlapsAny", c("RangedData", "RangedData"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
+ .Deprecated(msg=wmsg(.overlapsAny_RangedData_deprecation_msg))
overlapsAny(ranges(query), ranges(subject),
maxgap=maxgap, minoverlap=minoverlap,
type=match.arg(type),
@@ -536,10 +540,11 @@ setMethod("overlapsAny", c("RangedData", "RangedData"),
)
setMethod("overlapsAny", c("RangedData", "RangesList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
+ .Deprecated(msg=wmsg(.overlapsAny_RangedData_deprecation_msg))
overlapsAny(ranges(query), subject,
maxgap=maxgap, minoverlap=minoverlap,
type=match.arg(type),
@@ -548,10 +553,11 @@ setMethod("overlapsAny", c("RangedData", "RangesList"),
)
setMethod("overlapsAny", c("RangesList", "RangedData"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
{
+ .Deprecated(msg=.wmsg(overlapsAny_RangedData_deprecation_msg))
overlapsAny(query, ranges(subject),
maxgap=maxgap, minoverlap=minoverlap,
type=match.arg(type),
@@ -567,6 +573,194 @@ setMethod("overlapsAny", c("RangesList", "RangedData"),
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### subsetByOverlaps()
+###
+
+### First 2 arguments are 'x' and 'ranges' like for the
+### transcriptsByOverlaps(), exonsByOverlaps(), and cdsByOverlaps() functions
+### from the GenomicFeatures package and the snpsByOverlaps() function from
+### the BSgenome package.
+setGeneric("subsetByOverlaps", signature=c("x", "ranges"),
+ function(x, ranges, maxgap=-1L, minoverlap=0L,
+ type=c("any", "start", "end", "within", "equal"),
+ invert=FALSE, ...)
+ standardGeneric("subsetByOverlaps")
+)
+
+setMethod("subsetByOverlaps", c("Vector", "Vector"),
+ function(x, ranges, maxgap=-1L, minoverlap=0L,
+ type=c("any", "start", "end", "within", "equal"), invert=FALSE,
+ ...)
+ {
+ ov_any <- overlapsAny(x, ranges,
+ maxgap=maxgap, minoverlap=minoverlap,
+ type=match.arg(type),
+ ...)
+ if (invert)
+ ov_any <- !ov_any
+ x[ov_any]
+ }
+)
+
+.subsetByOverlaps_RangedData_deprecation_msg <- c(
+ "The \"subsetByOverlaps\" methods for RangedData objects are deprecated ",
+ "and won't be replaced. Please migrate your code to use GRanges or ",
+ "GRangesList objects instead. RangedData objects will be deprecated ",
+ "soon (their use has been discouraged since BioC 2.12, that is, since ",
+ "2014). See IMPORTANT NOTE in ?RangedData"
+)
+
+setMethod("subsetByOverlaps", c("RangedData", "RangedData"),
+ function(x, ranges, maxgap = -1L, minoverlap = 0L,
+ type = c("any", "start", "end", "within", "equal"),
+ invert = FALSE)
+ {
+ .Deprecated(msg=wmsg(.subsetByOverlaps_RangedData_deprecation_msg))
+ ov_any <- unlist(!is.na(findOverlaps(ranges(x), ranges(ranges),
+ maxgap = maxgap,
+ minoverlap = minoverlap,
+ type = match.arg(type),
+ select = "arbitrary")),
+ use.names=FALSE)
+ if (invert)
+ ov_any <- !ov_any
+ x[ov_any]
+ })
+
+setMethod("subsetByOverlaps", c("RangedData", "RangesList"),
+ function(x, ranges, maxgap = -1L, minoverlap = 0L,
+ type = c("any", "start", "end", "within", "equal"),
+ invert = FALSE)
+ {
+ .Deprecated(msg=wmsg(.subsetByOverlaps_RangedData_deprecation_msg))
+ ov_any <- unlist(!is.na(findOverlaps(ranges(x), ranges,
+ maxgap = maxgap,
+ minoverlap = minoverlap,
+ type = match.arg(type),
+ select = "arbitrary")),
+ use.names=FALSE)
+ if (invert)
+ ov_any <- !ov_any
+ x[ov_any]
+ })
+
+setMethod("subsetByOverlaps", c("RangesList", "RangedData"),
+ function(x, ranges, maxgap = -1L, minoverlap = 0L,
+ type = c("any", "start", "end", "within", "equal"),
+ invert = FALSE)
+ {
+ .Deprecated(msg=wmsg(.subsetByOverlaps_RangedData_deprecation_msg))
+ ov_any <- !is.na(findOverlaps(x, ranges(ranges),
+ maxgap = maxgap,
+ minoverlap = minoverlap,
+ type = match.arg(type),
+ select = "arbitrary"))
+ if (invert)
+ ov_any <- !ov_any
+ x[ov_any]
+ })
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### overlapsRanges()
+###
+### Extracts the actual regions of intersection between the overlapping ranges.
+###
+
+setGeneric("overlapsRanges", signature=c("query", "subject"),
+ function(query, subject, hits=NULL, ...) standardGeneric("overlapsRanges")
+)
+
+setMethod("overlapsRanges", c("Ranges", "Ranges"),
+ function(query, subject, hits=NULL, ...)
+ {
+ if (is.null(hits)) {
+ hits <- findOverlaps(query, subject, ...)
+ } else {
+ if (!is(hits, "Hits"))
+ stop("'hits' must be a Hits object")
+ if (length(list(...)) != 0L)
+ stop(wmsg("Extra arguments are only accepted when the 'hits' ",
+ "argument is not supplied, in which case they are ",
+ "passed to the internal call to findOverlaps(). ",
+ "See ?overlapsRanges for more information."))
+ if (queryLength(hits) != length(query) ||
+ subjectLength(hits) != length(subject))
+ stop("'hits' is not compatible with 'query' and 'subject'")
+ }
+ ### Could be replaced by 1-liner:
+ ### pintersect(query[queryHits(hits)], subject[subjectHits(hits)])
+ ### but will fail if 'query' or 'subject' is a kind of Ranges object
+ ### that cannot be subsetted (e.g. Partitioning object).
+ m <- as.matrix(hits)
+ qstart <- start(query)[m[,1L]]
+ qend <- end(query)[m[,1L]]
+ sstart <- start(subject)[m[,2L]]
+ send <- end(subject)[m[,2L]]
+ IRanges(pmax.int(qstart, sstart), pmin.int(send, qend))
+ }
+)
+
+setMethod("overlapsRanges", c("RangesList", "RangesList"),
+ function(query, subject, hits=NULL, ...)
+ {
+ if (is.null(hits)) {
+ hits <- findOverlaps(query, subject, ...)
+ } else {
+ if (!is(hits, "HitsList"))
+ stop("'hits' must be a HitsList object")
+ if (length(list(...)) != 0L)
+ stop(wmsg("Extra arguments are only accepted when the 'hits' ",
+ "argument is not supplied, in which case they are ",
+ "passed to the internal call to findOverlaps(). ",
+ "See ?overlapsRanges for more information."))
+ if (length(hits) != length(query) ||
+ length(hits) != length(subject))
+ stop("'query', 'subject', and 'hits' must have the same length")
+ }
+ queries <- as.list(query, use.names = FALSE)
+ subjects <- as.list(subject, use.names = FALSE)
+ els <- as.list(hits, use.names = FALSE)
+ ans <- do.call(RangesList, lapply(seq_len(length(hits)), function(i) {
+ overlapsRanges(queries[[i]], subjects[[i]], els[[i]])
+ }))
+ names(ans) <- names(hits)
+ ans
+ }
+)
+
+setMethod("ranges", "Hits", function(x, use.names=TRUE, use.mcols=FALSE,
+ query, subject)
+{
+ msg <- c("\"ranges\" method for Hits objects is deprecated. ",
+ "Please use overlapsRanges() instead.")
+ .Deprecated(msg=wmsg(msg))
+ query_is_missing <- missing(query)
+ subject_is_missing <- missing(subject)
+ if (query_is_missing || subject_is_missing) {
+ query <- if (subject_is_missing) use.names else use.mcols
+ subject <- if (query_is_missing) use.mcols else use.names
+ }
+ overlapsRanges(query, subject, x)
+})
+
+setMethod("ranges", "HitsList", function(x, use.names=TRUE, use.mcols=FALSE,
+ query, subject)
+{
+ msg <- c("\"ranges\" method for HitsList objects is deprecated. ",
+ "Please use overlapsRanges() instead.")
+ .Deprecated(msg=wmsg(msg))
+ query_is_missing <- missing(query)
+ subject_is_missing <- missing(subject)
+ if (query_is_missing || subject_is_missing) {
+ query <- if (subject_is_missing) use.names else use.mcols
+ subject <- if (query_is_missing) use.mcols else use.names
+ }
+ overlapsRanges(query, subject, x)
+})
+
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### poverlaps()
###
@@ -633,71 +827,6 @@ setMethod("poverlaps", c("Ranges", "integer"),
`%pwithin%` <- function(query, subject) poverlaps(query, subject, type="within")
`%poutside%` <- function(query, subject) !poverlaps(query, subject)
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### subsetByOverlaps()
-###
-
-setGeneric("subsetByOverlaps", signature=c("query", "subject"),
- function(query, subject, maxgap=0L, minoverlap=1L,
- type=c("any", "start", "end", "within", "equal"),
- invert=FALSE, ...)
- standardGeneric("subsetByOverlaps")
-)
-
-setMethod("subsetByOverlaps", c("Vector", "Vector"),
- function(query, subject, maxgap=0L, minoverlap=1L,
- type=c("any", "start", "end", "within", "equal"), invert=FALSE,
- ...)
- {
- o <- overlapsAny(query, subject,
- maxgap=maxgap, minoverlap=minoverlap,
- type=match.arg(type),
- ...)
- if (invert) query[!o] else query[o]
- }
-)
-
-setMethod("subsetByOverlaps", c("RangedData", "RangedData"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
- type = c("any", "start", "end", "within", "equal"),
- invert = FALSE)
- {
- o <- unlist(!is.na(findOverlaps(ranges(query), ranges(subject),
- maxgap = maxgap,
- minoverlap = minoverlap,
- type = match.arg(type),
- select = "arbitrary")),
- use.names=FALSE)
- if (invert) query[!o,] else query[o,]
- })
-
-setMethod("subsetByOverlaps", c("RangedData", "RangesList"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
- type = c("any", "start", "end", "within", "equal"),
- invert = FALSE)
- {
- o <- unlist(!is.na(findOverlaps(ranges(query), subject,
- maxgap = maxgap,
- minoverlap = minoverlap,
- type = match.arg(type),
- select = "arbitrary")),
- use.names=FALSE)
- if (invert) query[!o,] else query[o,]
- })
-
-setMethod("subsetByOverlaps", c("RangesList", "RangedData"),
- function(query, subject, maxgap = 0L, minoverlap = 1L,
- type = c("any", "start", "end", "within", "equal"),
- invert = FALSE)
- {
- o <- !is.na(findOverlaps(query, ranges(subject),
- maxgap = maxgap,
- minoverlap = minoverlap,
- type = match.arg(type),
- select = "arbitrary"))
- if (invert) query[!o] else query[o]
- })
-
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Merge two sets of ranges by overlap into a DataFrame
@@ -712,40 +841,6 @@ mergeByOverlaps <- function(query, subject, ...) {
cbind(query_df, subject_df)
}
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### "ranges" methods for Hits and HitsList objects
-###
-
-### Extracts the actual regions of intersection between the overlapping ranges.
-### Not much value. Could be replaced by 1-liner:
-### pintersect(query[queryHits(x)], subject[subjectHits(x)])
-setMethod("ranges", "Hits", function(x, query, subject) {
- if (!is(query, "Ranges") || length(query) != queryLength(x))
- stop("'query' must be a Ranges of length equal to number of queries")
- if (!is(subject, "Ranges") || length(subject) != subjectLength(x))
- stop("'subject' must be a Ranges of length equal to number of subjects")
- m <- as.matrix(x)
- qstart <- start(query)[m[,1L]]
- qend <- end(query)[m[,1L]]
- sstart <- start(subject)[m[,2L]]
- send <- end(subject)[m[,2L]]
- IRanges(pmax.int(qstart, sstart), pmin.int(send, qend))
-})
-
-setMethod("ranges", "HitsList", function(x, query, subject) {
- if (!is(query, "RangesList") || length(query) != length(x))
- stop("'query' must be a RangesList of length equal to that of 'x'")
- if (!is(subject, "RangesList") || length(subject) != length(x))
- stop("'subject' must be a RangesList of length equal to that of 'x'")
- els <- as.list(x, use.names = FALSE)
- queries <- as.list(query, use.names = FALSE)
- subjects <- as.list(subject, use.names = FALSE)
- ans <- do.call(RangesList, lapply(seq_len(length(x)), function(i) {
- ranges(els[[i]], queries[[i]], subjects[[i]])
- }))
- names(ans) <- names(x)
- ans
-})
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Convenience for dereferencing overlap hits to a Pairs
diff --git a/R/inter-range-methods.R b/R/inter-range-methods.R
index 1d7f2bf..10af96f 100644
--- a/R/inter-range-methods.R
+++ b/R/inter-range-methods.R
@@ -62,6 +62,15 @@ setMethod("range", "Ranges",
}
)
+### Overwrite above method with optimized method for IPos objects.
+### Like the above method, return an IRanges instance.
+setMethod("range", "IPos",
+ function(x, ..., with.revmap=FALSE, ignore.strand=FALSE, na.rm=FALSE)
+ callGeneric(stitch_IPos(x), ...,
+ with.revmap=with.revmap, ignore.strand=ignore.strand,
+ na.rm=na.rm)
+)
+
setMethod("range", "RangesList",
function(x, ..., with.revmap=FALSE, na.rm=FALSE)
{
@@ -112,7 +121,16 @@ setMethod("range", "CompressedIRangesList",
}
)
+.range_RangedData_deprecation_msg <- c(
+ "The \"range\" method for RangedData objects is deprecated ",
+ "and won't be replaced. Please migrate your code to use GRanges or ",
+ "GRangesList objects instead. RangedData objects will be deprecated ",
+ "soon (their use has been discouraged since BioC 2.12, that is, since ",
+ "2014). See IMPORTANT NOTE in ?RangedData"
+)
+
setMethod("range", "RangedData", function(x, ..., na.rm) {
+ .Deprecated(msg=wmsg(.range_RangedData_deprecation_msg))
args <- list(x, ...)
rangeLists <- lapply(args, ranges)
do.call(range, rangeLists)
@@ -375,6 +393,10 @@ setGeneric("disjoin", function(x, ...) standardGeneric("disjoin"))
### Always return an IRanges *instance* whatever Ranges derivative the input
### is, so does NOT act like an endomorphism in general.
+### FIXME: Does not properly handle zero-width ranges at the moment e.g.
+### disjoin(IRanges(c(1, 11, 13), width=c(2, 5, 0)) returns
+### IRanges(c(1, 11, 13), width=c(2, 2, 3)) when it should return
+### IRanges(c(1, 11, 13, 13), width=c(2, 2, 0, 3)).
setMethod("disjoin", "Ranges",
function(x, with.revmap=FALSE)
{
@@ -390,7 +412,7 @@ setMethod("disjoin", "Ranges",
adj <- new2("IRanges", start=adj_start,
width=adj_width,
check=FALSE)
- adj <- subsetByOverlaps(adj, x)
+ adj <- subsetByOverlaps(adj, x, minoverlap=1L)
if (with.revmap)
mcols(adj)$revmap <- as(sort(findOverlaps(adj, x)),"List")
adj
@@ -475,6 +497,9 @@ setMethod("isDisjoint", "Ranges",
}
)
+### Overwrite above method with optimized method for IPos objects.
+setMethod("isDisjoint", "IPos", function(x) callGeneric(stitch_IPos(x)))
+
setMethod("isDisjoint", "NormalIRanges", function(x) TRUE)
setMethod("isDisjoint", "RangesList",
diff --git a/R/intra-range-methods.R b/R/intra-range-methods.R
index c5f28bd..c0d0e40 100644
--- a/R/intra-range-methods.R
+++ b/R/intra-range-methods.R
@@ -64,6 +64,32 @@ setMethod("shift", "Ranges",
}
)
+### Overwrite above method with optimized method for IPos objects.
+### An IPos object cannot hold names so the 'use.names' arg has no effect.
+### NOTE: We only support shifting by a single value at the moment!
+setMethod("shift", "IPos",
+ function(x, shift=0L, use.names=TRUE)
+ {
+ if (!is.numeric(shift))
+ stop("'shift' must be a numeric vector")
+ if (!is.integer(shift))
+ shift <- as.integer(shift)
+ if (length(shift) != 1L) {
+ if (length(shift) != length(x))
+ stop("'shift' must be a single number or have the ",
+ "length of 'x' when shifting an IPos object")
+ if (length(shift) != 0L) {
+ if (!isConstant(shift))
+ stop("'shift' must be constant when shifting ",
+ "an IPos object")
+ shift <- shift[[1L]]
+ }
+ }
+ x at pos_runs <- callGeneric(x at pos_runs, shift=shift)
+ x
+ }
+)
+
setMethod("shift", "Views",
function(x, shift=0L, use.names=TRUE)
{
diff --git a/R/nearest-methods.R b/R/nearest-methods.R
index 22446d4..636e1cb 100644
--- a/R/nearest-methods.R
+++ b/R/nearest-methods.R
@@ -94,10 +94,10 @@ setMethod("nearest", c("Ranges", "RangesORmissing"),
{
select <- match.arg(select)
if (!missing(subject)) {
- ol <- findOverlaps(x, subject, minoverlap = 0L, select = select)
+ ol <- findOverlaps(x, subject, maxgap = 0L, select = select)
} else {
subject <- x
- ol <- findOverlaps(x, minoverlap = 0L, select = select,
+ ol <- findOverlaps(x, maxgap = 0L, select = select,
drop.self = TRUE)
}
if (select == "all") {
diff --git a/R/range-squeezers.R b/R/range-squeezers.R
new file mode 100644
index 0000000..99e176e
--- /dev/null
+++ b/R/range-squeezers.R
@@ -0,0 +1,27 @@
+### =========================================================================
+### Generic functions for squeezing the ranges out of a range-based object
+### -------------------------------------------------------------------------
+
+
+### Extract the ranges as an IRanges object.
+setGeneric("ranges", signature="x",
+ function(x, use.names=TRUE, use.mcols=FALSE, ...)
+ standardGeneric("ranges")
+)
+
+### Extract the ranges as an IRangesList object.
+setGeneric("rglist", signature="x",
+ function(x, use.names=TRUE, use.mcols=FALSE, ...)
+ standardGeneric("rglist")
+)
+
+### Pairs method.
+setMethod("rglist", "Pairs", function(x, use.names=TRUE, use.mcols=FALSE) {
+ stopifnot(isTRUEorFALSE(use.mcols))
+ rl <- zipup(ranges(first(x)), ranges(second(x)))
+ if (!use.mcols) {
+ mcols(rl) <- NULL
+ }
+ rl
+ })
+
diff --git a/R/setops-methods.R b/R/setops-methods.R
index 02adc61..8768148 100644
--- a/R/setops-methods.R
+++ b/R/setops-methods.R
@@ -2,25 +2,20 @@
### Set operations
### -------------------------------------------------------------------------
###
-### I. Vector-wise set operations: union, intersect, setdiff
+### 1) Vector-wise set operations: union, intersect, setdiff
###
-### All the functions in that group are implemented to behave like
-### endomorphisms with respect to their first argument 'x'.
+### When the input are Ranges objects, the functions in that group interpret
+### each supplied object ('x' or 'y') as a set of integer values. Therefore,
+### if 2 IRanges objects 'x1' and 'x2' represent the same set of integers,
+### then each of these functions will return the same result when 'x1' is
+### replaced with 'x2' in the input. The returned IRanges object is
+### guaranteed to be normal but is *not* promoted to NormalIRanges.
###
-### On IRanges objects, the functions in that group interpret each supplied
-### object ('x' or 'y') as a set of integer values. Therefore, if 2 IRanges
-### objects 'x1' and 'x2' represent the same set of integers, then each of
-### these functions will return the same result when 'x1' is replaced by 'x2'
-### in the input. The returned IRanges object is guaranteed to be normal
-### (note that if 'x' is an IRanges *instance* then the returned object is
-### still an IRanges *instance*, that is, it is *not* promoted to
-### NormalIRanges).
+### 2) Element-wise (aka "parallel") set operations: punion, pintersect,
+### psetdiff, pgap
###
-### II. Element-wise (aka "parallel") set operations: punion, pintersect,
-### psetdiff, pgap
-###
-### The functions in that group take 2 *objects* of the same length and
-### return an object of the same class and length as the first argument.
+### The functions in that group take 2 *objects* of the same length and
+### return an object of the same class and length as the first argument.
###
@@ -28,21 +23,16 @@
### union()
###
+### Always return an IRanges *instance* whatever Ranges derivatives are passed
+### to it (e.g. IPos, NCList or NormalIRanges), so does NOT act like an
+### endomorphism in general.
setMethod("union", c("Ranges", "Ranges"),
function(x, y)
{
- ## We need to downgrade 'x' to an IRanges instance 'x0' so 'c(x0, y)'
- ## is guaranteed to work (even e.g. if 'x' is a NormalIRanges object).
- x0 <- as(x, "IRanges") # downgrade x to IRanges
- x0 <- reduce(c(x0, y), drop.empty.ranges=TRUE)
- ## Maybe the call to update() below could be replaced by
- ## 'as(x, "IRanges") <- x0' but I was not lucky with my first
- ## attempt to use this construct:
- ## > v <- Views(XInteger(18), 2:5, 13:10)
- ## > as(v, "IRanges") <- IRanges(3, 8)
- ## Error: evaluation nested too deeply: infinite recursion / options(expressions=)?
- initialize(x, start=start(x0), width=width(x0), NAMES=names(x0),
- elementMetadata=NULL)
+ ## We downgrade 'x' to an IRanges instance so 'c(x, y)' is guaranteed
+ ## to work (even e.g. if 'x' is a NormalIRanges object).
+ x <- as(x, "IRanges", strict=TRUE)
+ reduce(c(x, y), drop.empty.ranges=TRUE)
}
)
@@ -68,10 +58,14 @@ setMethod("union", c("Pairs", "missing"), function(x, y, ...) {
callGeneric(first(x), second(x), ...)
})
+
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### intersect()
###
+### Always return an IRanges *instance* whatever Ranges derivatives are passed
+### to it (e.g. IPos, NCList or NormalIRanges), so does NOT act like an
+### endomorphism in general.
setMethod("intersect", c("Ranges", "Ranges"),
function(x, y)
{
@@ -119,6 +113,9 @@ setMethod("intersect", c("CompressedAtomicList", "CompressedAtomicList"),
### setdiff()
###
+### Always return an IRanges *instance* whatever Ranges derivatives are passed
+### to it (e.g. IPos, NCList or NormalIRanges), so does NOT act like an
+### endomorphism in general.
setMethod("setdiff", c("Ranges", "Ranges"),
function(x, y)
{
@@ -148,6 +145,7 @@ setMethod("setdiff", c("Pairs", "missing"), function(x, y, ...) {
callGeneric(first(x), second(x), ...)
})
+
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### punion()
###
@@ -288,6 +286,7 @@ setMethod("psetdiff", c("Pairs", "missing"), function(x, y, ...) {
callGeneric(first(x), second(x), ...)
})
+
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### pgap()
###
diff --git a/inst/doc/IRangesOverview.pdf b/inst/doc/IRangesOverview.pdf
index 3183370..c06de45 100644
Binary files a/inst/doc/IRangesOverview.pdf and b/inst/doc/IRangesOverview.pdf differ
diff --git a/inst/unitTests/test_DataFrameList.R b/inst/unitTests/test_DataFrameList.R
index 8bef089..967155e 100644
--- a/inst/unitTests/test_DataFrameList.R
+++ b/inst/unitTests/test_DataFrameList.R
@@ -102,16 +102,15 @@ test_SplitDataFrameList_subset <- function() {
}
}
-test_SplitDataFrameList_as_data.frame <- function() {
+test_SplitDataFrameList_as.data.frame <- function() {
checkDFL2dfl <- function(DFL, dfl, compress) {
- df <-
+ target <-
data.frame(group = togroup(PartitioningByWidth(dfl)),
group_name = names(dfl)[togroup(PartitioningByWidth(dfl))],
do.call(rbind, dfl),
stringsAsFactors=FALSE, row.names=NULL)
- if (compress)
- rownames(df) <- unlist(lapply(dfl, row.names), use.names = FALSE)
- checkIdentical(as.data.frame(DFL), df)
+ rownames(target) <- unlist(lapply(dfl, row.names), use.names = FALSE)
+ checkIdentical(target, as.data.frame(DFL))
}
data(swiss)
@@ -214,7 +213,7 @@ test_DataFrameList_transform <- function() {
DF <- DataFrame(state.division, state.region, state.area)
DFL <- split(DF, DF$state.division) # NICER: split(DF, ~ state.devision)
DFL <- transform(DFL, total.area=sum(state.area[state.region!="South"]),
- fraction=ifelse(total.area == 0, 0, state.area/total.area))
+ fraction=ifelse2(total.area == 0, 0, state.area/total.area))
ANS <- DataFrame(lapply(unlist(DFL, use.names=FALSE), unname))
diff --git a/inst/unitTests/test_IRanges-class.R b/inst/unitTests/test_IRanges-class.R
index b26c9c6..67402fe 100644
--- a/inst/unitTests/test_IRanges-class.R
+++ b/inst/unitTests/test_IRanges-class.R
@@ -53,12 +53,6 @@ test_IRanges_combine <- function() {
checkIdentical(mcols(c(ir1, ir2, ignore.mcols=TRUE)), NULL)
}
-test_IRanges_subset <- function() { # by range
- query <- IRanges(c(1, 4, 9), c(5, 7, 10))
- subject <- IRanges(c(6, 8, 10), c(7, 12, 14))
- checkIdentical(subsetByOverlaps(query, subject), query[2:3])
-}
-
test_IRanges_annotation <- function() {
range <- IRanges(c(1, 4), c(5, 7))
mcols(range) <- DataFrame(a = 1:2)
diff --git a/inst/unitTests/test_NCList-class.R b/inst/unitTests/test_NCList-class.R
index 91de4e5..80c4622 100644
--- a/inst/unitTests/test_NCList-class.R
+++ b/inst/unitTests/test_NCList-class.R
@@ -37,31 +37,49 @@ findOverlaps_NCLists <- IRanges:::findOverlaps_NCLists
selectHits(x, select)
}
-### Used in the unit tests for GNCList located in GenomicRanges.
-.min_overlap_score <- function(maxgap=0L, minoverlap=1L)
+### Vectorized. Return -1 if the query and subject overlap (i.e. if
+### end(query) < start(subject) and end(subject) < start(query) are both
+### false). Otherwise (i.e. if they are disjoint), return the width of the
+### gap between them. Note that a gap width of 0 means that they are adjacent.
+### TODO: Rename this pgapWidth(), make it a generic with various methods
+### (at least one for Ranges and one for GenomicRanges objects), and export it.
+.gapwidth <- function(query, subject)
{
- if (maxgap != 0L && minoverlap > 1L)
- stop("'minoverlap' must be <= 1 when 'maxgap' is not 0")
- minoverlap - maxgap
+ ifelse(end(query) < start(subject),
+ start(subject) - end(query),
+ ifelse(end(subject) < start(query),
+ start(query) - end(subject),
+ 0L)) - 1L
}
-### Used in the unit tests for GNCList located in GenomicRanges.
-.overlap_score <- function(query, subject, type="any")
+### Vectorized.
+### TODO: Rename this poverlapWidth(), make it a generic with various methods
+### (at least one for Ranges and one for GenomicRanges objects), and export it.
+.overlapwidth <- function(query, subject)
{
- overlap_score <- pmin(end(query), end(subject)) -
- pmax(start(query), start(subject)) + 1L
- if (type %in% c("start", "end", "equal"))
- overlap_score <- pmax(overlap_score, 0L)
- overlap_score
+ score <- pmin.int(end(query), end(subject)) -
+ pmax.int(start(query), start(subject)) + 1L
+ pmax.int(score, 0L)
}
### Used in the unit tests for GNCList located in GenomicRanges.
.get_query_overlaps <- function(query, subject,
- maxgap, min_overlap_score, type)
+ maxgap=-1L, minoverlap=0L,
+ type=c("any", "start", "end", "within", "extend", "equal"))
{
- ok <- .overlap_score(query, subject, type) >= min_overlap_score
- if (type == "any")
+ type <- match.arg(type)
+ if (type == "any" && maxgap != -1L && minoverlap != 0L)
+ stop("when 'type' is \"any\", at least one of 'maxgap' ",
+ "and 'minoverlap' must be set to its default value")
+ overlapwidth <- .overlapwidth(query, subject)
+ ok <- overlapwidth >= minoverlap
+ if (type == "any") {
+ gapwidth <- .gapwidth(query, subject)
+ ok <- ok & gapwidth <= maxgap
return(ok)
+ }
+ if (maxgap == -1L)
+ maxgap <- 0L
if (type != "end")
d1 <- abs(start(subject) - start(query))
if (type != "start")
@@ -84,23 +102,19 @@ findOverlaps_NCLists <- IRanges:::findOverlaps_NCLists
}
.findOverlaps_naive <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end",
"within", "extend", "equal"),
select=c("all", "first", "last", "arbitrary",
"count"))
{
type <- match.arg(type)
- if (type == "any") {
- min_overlap_score <- .min_overlap_score(maxgap, minoverlap)
- } else {
- min_overlap_score <- minoverlap
- }
select <- match.arg(select)
hits_per_query <- lapply(seq_along(query),
function(i)
which(.get_query_overlaps(query[i], subject,
- maxgap, min_overlap_score, type)))
+ maxgap=maxgap, minoverlap=minoverlap,
+ type=type)))
hits <- .make_Hits_from_q2s(hits_per_query, length(subject))
selectHits(hits, select=select)
}
@@ -124,6 +138,9 @@ test_NCList <- function()
checkIdentical(x[-6], as(nclist[-6], "IRanges"))
}
+### Test findOverlaps_NCList() *default* behavior, that is, with all optional
+### arguments (i.e. 'maxgap', 'minoverlap', 'type', 'select', and
+### 'circle.length') set to their default value.
test_findOverlaps_NCList <- function()
{
query <- IRanges(-3:7, width=3)
@@ -180,11 +197,11 @@ test_findOverlaps_NCList_with_filtering <- function()
pp_query <- NCList(query)
pp_subject <- NCList(subject)
for (type in c("any", "start", "end", "within", "extend", "equal")) {
- for (maxgap in 0:3) {
- if (type != "any" || maxgap == 0L)
+ for (maxgap in -1:3) {
+ if (type != "any" || maxgap == -1L)
max_minoverlap <- 4L
else
- max_minoverlap <- 1L
+ max_minoverlap <- 0L
for (minoverlap in 0:max_minoverlap) {
for (select in c("all", "first", "last", "count")) {
## query - subject
@@ -284,7 +301,7 @@ test_findOverlaps_NCList_special_types <- function()
checkEquals(nhit, length(hits))
}
## no hits
- for (maxgap in 0:2) {
+ for (maxgap in -1:2) {
test_maxgap_and_type(maxgap, minoverlap=1L, 0L)
test_maxgap_and_type(maxgap, minoverlap=0L, 0L)
}
@@ -301,11 +318,11 @@ test_findOverlaps_NCList_special_types <- function()
pp_query <- NCList(query)
pp_subject <- NCList(subject)
for (type in c("any", "start", "end", "within", "extend", "equal")) {
- for (maxgap in 0:3) {
- if (type != "any" || maxgap == 0L)
+ for (maxgap in -1:3) {
+ if (type != "any" || maxgap == -1L)
max_minoverlap <- 4L
else
- max_minoverlap <- 1L
+ max_minoverlap <- 0L
for (minoverlap in 0:max_minoverlap) {
target <- as(.findOverlaps_naive(query, subject,
maxgap=maxgap, minoverlap=minoverlap,
diff --git a/inst/unitTests/test_bind-arrays.R b/inst/unitTests/test_bind-arrays.R
deleted file mode 100644
index fe8c5bd..0000000
--- a/inst/unitTests/test_bind-arrays.R
+++ /dev/null
@@ -1,96 +0,0 @@
-.TEST_matrices <- list(
- matrix(1:15, nrow=3, ncol=5,
- dimnames=list(NULL, paste0("M1y", 1:5))),
- matrix(101:135, nrow=7, ncol=5,
- dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5))),
- matrix(1001:1025, nrow=5, ncol=5,
- dimnames=list(paste0("M3x", 1:5), NULL))
-)
-
-.TEST_arrays <- list(
- array(1:60, c(3, 5, 4),
- dimnames=list(NULL, paste0("M1y", 1:5), NULL)),
- array(101:240, c(7, 5, 4),
- dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL)),
- array(10001:10100, c(5, 5, 4),
- dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
-)
-
-test_arbind <- function()
-{
- ## on matrices
- target <- do.call(rbind, .TEST_matrices)
- current <- do.call(arbind, .TEST_matrices)
- checkIdentical(target, current)
-
- ## on empty matrices
- m1 <- matrix(nrow=0, ncol=3, dimnames=list(NULL, letters[1:3]))
- m2 <- matrix(1:15, ncol=3, dimnames=list(NULL, LETTERS[1:3]))
-
- target <- do.call(rbind, list(m1, m2))
- current <- do.call(arbind, list(m1, m2))
- checkIdentical(target, current)
-
- target <- do.call(rbind, list(m2, m1))
- current <- do.call(arbind, list(m2, m1))
- checkIdentical(target, current)
-
- target <- do.call(rbind, list(m1, m1))
- current <- do.call(arbind, list(m1, m1))
- checkIdentical(target, current)
-
- ## on arrays
- current <- do.call(arbind, .TEST_arrays)
- check_2D_slice <- function(k) {
- slices <- lapply(.TEST_arrays, `[`, , , k)
- target_slice <- do.call(rbind, slices)
- checkIdentical(target_slice, current[ , , k])
- }
- for (k in seq_len(dim(current)[[3L]])) check_2D_slice(k)
-}
-
-test_acbind <- function()
-{
- ## on matrices
- matrices <- lapply(.TEST_matrices, t)
- target <- do.call(cbind, matrices)
- current <- do.call(acbind, matrices)
- checkIdentical(target, current)
-
- ## on empty matrices
- m1 <- matrix(nrow=3, ncol=0, dimnames=list(letters[1:3], NULL))
- m2 <- matrix(1:15, nrow=3, dimnames=list(LETTERS[1:3], NULL))
-
- target <- do.call(cbind, list(m1, m2))
- current <- do.call(acbind, list(m1, m2))
- checkIdentical(target, current)
-
- target <- do.call(cbind, list(m2, m1))
- current <- do.call(acbind, list(m2, m1))
- checkIdentical(target, current)
-
- target <- do.call(cbind, list(m1, m1))
- current <- do.call(acbind, list(m1, m1))
- checkIdentical(target, current)
-
- ## on arrays
-
- ## transpose the 1st 2 dimensions
- arrays <- lapply(.TEST_arrays,
- function(a) {
- a_dimnames <- dimnames(a)
- dim(a)[1:2] <- dim(a)[2:1]
- a_dimnames[1:2] <- a_dimnames[2:1]
- dimnames(a) <- a_dimnames
- a
- })
-
- current <- do.call(acbind, arrays)
- check_2D_slice <- function(k) {
- slices <- lapply(arrays, `[`, , , k)
- target_slice <- do.call(cbind, slices)
- checkIdentical(target_slice, current[ , , k])
- }
- for (k in seq_len(dim(current)[[3L]])) check_2D_slice(k)
-}
-
diff --git a/inst/unitTests/test_findOverlaps-methods.R b/inst/unitTests/test_findOverlaps-methods.R
index 07340ca..a86f75d 100644
--- a/inst/unitTests/test_findOverlaps-methods.R
+++ b/inst/unitTests/test_findOverlaps-methods.R
@@ -27,8 +27,8 @@ test_findOverlaps_Ranges <- function()
checkOverlap(result, c(1, 1, 3), c(1, 2, 3), 3, 3)
## with 'maxgap'
- result <- findOverlaps(query, subject, 1)
- checkOverlap(result, c(1, 1, 2, 3), c(1, 2, 2, 3), 3, 3)
+ result <- findOverlaps(query, subject, maxgap = 0L)
+ checkOverlap(result, c(1, 1, 2, 3), c(2, 1, 2, 3), 3, 3)
## with 'minoverlap'
result <- findOverlaps(query, subject, minoverlap = 3L)
@@ -44,17 +44,21 @@ test_findOverlaps_Ranges <- function()
## zero-width ranges
query <- IRanges(9:14, 8:13)
- result <- findOverlaps(query, subject)
+ result <- findOverlaps(query, subject, minoverlap = 1L)
checkOverlap(result, integer(0), integer(0), 6, 3)
- result <- findOverlaps(query, subject, minoverlap = 0L)
+ result <- findOverlaps(query, subject)
+ checkOverlap(result, c(3, 4), c(3, 3), 6, 3)
+ result <- findOverlaps(query, subject, maxgap = 0L)
checkOverlap(result, 2:5, c(3, 3, 3, 3), 6, 3)
- result <- findOverlaps(query, subject, maxgap=1L, minoverlap = 0L)
+ result <- findOverlaps(query, subject, maxgap = 1L)
checkOverlap(result, 1:6, c(3, 3, 3, 3, 3, 3), 6, 3)
- result <- findOverlaps(subject, query)
+ result <- findOverlaps(subject, query, minoverlap = 1L)
checkOverlap(result, integer(0), integer(0), 3, 6)
- result <- findOverlaps(subject, query, minoverlap = 0L)
+ result <- findOverlaps(subject, query)
+ checkOverlap(result, c(3, 3), c(3, 4), 3, 6)
+ result <- findOverlaps(subject, query, maxgap = 0L)
checkOverlap(result, c(3, 3, 3, 3), 2:5, 3, 6)
- result <- findOverlaps(subject, query, maxgap=1L, minoverlap = 0L)
+ result <- findOverlaps(subject, query, maxgap = 1L)
checkOverlap(result, c(3, 3, 3, 3, 3, 3), 1:6, 3, 6)
## .....
@@ -144,3 +148,19 @@ test_findOverlaps_Ranges <- function()
checkException(findOverlaps(NULL, query), silent = TRUE)
}
+test_subsetByOverlaps_Ranges <- function() {
+ x <- IRanges(9:12, 15)
+ ranges <- IRanges(1, 10)
+ checkIdentical(x[1:2], subsetByOverlaps(x, ranges))
+ checkIdentical(x[3:4], subsetByOverlaps(x, ranges, invert=TRUE))
+ checkIdentical(x[1:3], subsetByOverlaps(x, ranges, maxgap=0))
+ checkIdentical(x[4], subsetByOverlaps(x, ranges, maxgap=0, invert=TRUE))
+
+ x <- IRanges(c(1, 4, 9), c(5, 7, 10))
+ ranges <- IRanges(c(6, 8, 10), c(7, 12, 14))
+ checkIdentical(x[2:3], subsetByOverlaps(x, ranges))
+ checkIdentical(x[1], subsetByOverlaps(x, ranges, invert=TRUE))
+ checkIdentical(x, subsetByOverlaps(x, ranges, maxgap=0))
+ checkIdentical(x[0], subsetByOverlaps(x, ranges, maxgap=0, invert=TRUE))
+}
+
diff --git a/man/AtomicList-utils.Rd b/man/AtomicList-utils.Rd
index b86bce4..ccd5d54 100644
--- a/man/AtomicList-utils.Rd
+++ b/man/AtomicList-utils.Rd
@@ -41,12 +41,8 @@
\alias{any,CompressedAtomicList-method}
\alias{anyNA,CompressedAtomicList-method}
-\alias{diff,IntegerList-method}
-\alias{diff,NumericList-method}
-\alias{diff,RleList-method}
-\alias{diff,CompressedIntegerList-method}
-\alias{diff,CompressedNumericList-method}
-\alias{diff,CompressedRleList-method}
+\alias{diff.AtomicList}
+\alias{diff,CompressedAtomicList-method}
\alias{pmax,IntegerList-method}
\alias{pmax,NumericList-method}
@@ -130,6 +126,21 @@
\alias{selfmatch,CompressedAtomicList-method}
\alias{intersect,CompressedAtomicList,CompressedAtomicList-method}
+\alias{ifelse2}
+\alias{ifelse2,ANY,ANY,List-method}
+\alias{ifelse2,ANY,List,ANY-method}
+\alias{ifelse2,List,ANY,ANY-method}
+\alias{ifelse2,CompressedLogicalList,ANY,ANY-method}
+\alias{ifelse2,CompressedLogicalList,List,ANY-method}
+\alias{ifelse2,CompressedLogicalList,ANY,List-method}
+\alias{ifelse2,CompressedLogicalList,List,ANY-method}
+\alias{ifelse2,CompressedLogicalList,List,List-method}
+\alias{ifelse2,SimpleLogicalList,ANY,ANY-method}
+\alias{ifelse2,SimpleLogicalList,List,ANY-method}
+\alias{ifelse2,SimpleLogicalList,ANY,List-method}
+\alias{ifelse2,SimpleLogicalList,List,ANY-method}
+\alias{ifelse2,SimpleLogicalList,List,List-method}
+
\title{Common operations on AtomicList objects}
\description{
@@ -187,6 +198,10 @@
The \code{rank} method only supports tie methods \dQuote{average},
\dQuote{first}, \dQuote{min} and \dQuote{max}.
+
+ Since \code{\link{ifelse}} relies on non-standard evaluation for
+ arguments that need to be in the generic signature, we provide
+ \code{ifelse2}, which has eager but otherwise equivalent semantics.
}
\section{Specialized Methods}{
diff --git a/man/DataFrame-utils.Rd b/man/DataFrame-utils.Rd
index 6040d17..8eec4f8 100644
--- a/man/DataFrame-utils.Rd
+++ b/man/DataFrame-utils.Rd
@@ -34,9 +34,7 @@
\author{ Michael Lawrence }
\seealso{
- \code{\linkS4class{DataTable}},
- \code{\linkS4class{Vector}}, and
- \code{\linkS4class{RangedData}}, which makes heavy use of this class.
+ \code{\linkS4class{DataTable}} and \code{\linkS4class{Vector}}
}
\examples{
## split
diff --git a/man/DataFrameList-class.Rd b/man/DataFrameList-class.Rd
index d985b37..c872cbe 100644
--- a/man/DataFrameList-class.Rd
+++ b/man/DataFrameList-class.Rd
@@ -192,8 +192,7 @@
\author{ Michael Lawrence }
\seealso{
- \code{\linkS4class{DataFrame}}, \code{\linkS4class{RangedData}},
- which uses a \code{DataFrameList} to split the data by the spaces.
+ \code{\linkS4class{DataFrame}}
}
\keyword{methods}
\keyword{classes}
diff --git a/man/Grouping-class.Rd b/man/Grouping-class.Rd
index 090a3ef..0d79533 100644
--- a/man/Grouping-class.Rd
+++ b/man/Grouping-class.Rd
@@ -147,7 +147,6 @@
% old stuff (deprecated & defunct)
\alias{togroup,ANY-method}
-\alias{grouplength}
\title{Grouping objects}
diff --git a/man/IPos-class.Rd b/man/IPos-class.Rd
new file mode 100644
index 0000000..c9b700d
--- /dev/null
+++ b/man/IPos-class.Rd
@@ -0,0 +1,232 @@
+\name{IPos-class}
+\docType{class}
+
+\alias{class:IPos}
+\alias{IPos-class}
+\alias{IPos}
+
+\alias{length,IPos-method}
+\alias{names,IPos-method}
+\alias{names<-,IPos-method}
+\alias{pos}
+\alias{pos,IPos-method}
+\alias{start,IPos-method}
+\alias{end,IPos-method}
+\alias{width,IPos-method}
+
+\alias{coerce,Ranges,IPos-method}
+\alias{coerce,ANY,IPos-method}
+\alias{as.data.frame,IPos-method}
+\alias{extractROWS,IPos-method}
+\alias{show,IPos-method}
+\alias{c,IPos-method}
+
+\title{Memory-efficient representation of integer positions}
+
+\description{
+ The IPos class is a container for storing a set of \emph{integer positions}
+ where most of the positions are typically (but not necessarily) adjacent.
+ Because integer positions can be seen as integer ranges of width 1, the IPos
+ class extends the \link{Ranges} virtual class. Note that even though an
+ \link{IRanges} object can be used for storing integer positions, using an
+ IPos object will be much more memory-efficient, especially when the object
+ contains long runs of adjacent positions in \emph{ascending order}.
+}
+
+\usage{
+IPos(pos_runs) # constructor function
+}
+
+\arguments{
+ \item{pos_runs}{
+ An \link{IRanges} object (or any other \link{Ranges} derivative) where
+ each range is interpreted as a run of adjacent ascending positions.
+ If \code{pos_runs} is not a \link{Ranges} derivative, \code{IPos()} first
+ tries to coerce it to one with \code{as(pos_runs, "Ranges", strict=FALSE)}.
+ }
+}
+
+\value{
+ An IPos object.
+}
+
+\section{Accessors}{
+
+ \subsection{Getters}{
+ IPos objects support the same set of getters as other \link{Ranges}
+ derivatives (i.e. \code{start()}, \code{end()}, \code{mcols()}, etc...),
+ plus the \code{pos()} getter which is equivalent to \code{start()}
+ or \code{end()}. See \code{?\link{Ranges}} for the list of getters
+ supported by \link{Ranges} derivatives.
+
+ IMPORTANT NOTE: An IPos object cannot hold names i.e. \code{names()}
+ always returns \code{NULL} on it.
+ }
+
+ \subsection{Setters}{
+ IPos objects support the \code{mcols()} and \code{metadata()} setters
+ only.
+ }
+}
+
+\section{Coercion}{
+ From \link{Ranges} to IPos:
+ A \link{Ranges} derivative \code{x} in which all the ranges have a
+ width of 1 can be coerced to an IPos object with \code{as(x, "IPos")}.
+ The names on \code{x} are not propagated (a warning is issued if \code{x}
+ has names on it).
+
+ From IPos to \link{IRanges}:
+ An IPos object \code{x} can be coerced to an \link{IRanges} object
+ with \code{as(x, "IRanges")}. However be aware that the resulting object
+ can use thousands times (or more) memory than \code{x}!
+ See "MEMORY USAGE" in the Examples section below.
+
+ From IPos to ordinary R objects:
+ Like with any other \link{Ranges} derivative, \code{as.character()},
+ \code{as.factor()}, and \code{as.data.frame()} work on an IPos object
+ \code{x}. Note however that \code{as.data.frame(x)} returns a data frame
+ with a \code{pos} column (containing \code{pos(x)}) instead of the
+ \code{start}, \code{end}, and \code{width} columns that one gets with other
+ \link{Ranges} derivatives.
+}
+
+\section{Subsetting}{
+ An IPos object can be subsetted exactly like an \link{IRanges} object.
+}
+
+\section{Combining}{
+ IPos objects can be combined (a.k.a. appended) with \code{c()} or
+ \code{append()}.
+}
+
+\section{Splitting and Relisting}{
+ Like with an \link{IRanges} object, \code{split()} and \code{relist()} work
+ on an IPos object.
+}
+
+\note{
+ Like for any \link[S4Vectors]{Vector} derivative, the length of an
+ IPos object cannot exceed \code{.Machine$integer.max} (i.e. 2^31 on
+ most platforms). \code{IPos()} will return an error if \code{pos_runs}
+ contains too many integer positions.
+}
+
+\author{
+ Hervé Pagès; based on ideas borrowed from Georg Stricker
+ \email{georg.stricker at in.tum.de} and Julien Gagneur
+ \email{gagneur at in.tum.de}
+}
+
+\seealso{
+ \itemize{
+ \item The \link[GenomicRanges]{GPos} class in the \pkg{GenomicRanges}
+ package for a memory-efficient representation of \emph{genomic
+ positions} (i.e. genomic ranges of width 1).
+
+ \item \link{Ranges} and \link{IRanges} objects.
+
+ \item \link{Ranges-comparison} for comparing and ordering integer ranges
+ and/or positions.
+
+ \item \link{findOverlaps-methods} for finding overlapping
+ integer ranges and/or positions.
+
+ \item \link{nearest-methods} for finding the nearest integer range
+ and/or position.
+ }
+}
+
+\examples{
+## ---------------------------------------------------------------------
+## BASIC EXAMPLES
+## ---------------------------------------------------------------------
+
+## Example 1:
+ipos1 <- IPos(c("44-53", "5-10", "2-5"))
+ipos1
+
+length(ipos1)
+pos(ipos1) # same as 'start(ipos1)' and 'end(ipos1)'
+as.character(ipos1)
+as.data.frame(ipos1)
+as(ipos1, "IRanges")
+as.data.frame(as(ipos1, "IRanges"))
+ipos1[9:17]
+
+## Example 2:
+pos_runs <- IRanges(c(1, 6, 12, 17), c(5, 10, 16, 20))
+ipos2 <- IPos(pos_runs)
+ipos2
+
+## Example 3:
+ipos3A <- ipos3B <- IPos(c("1-15000", "15400-88700"))
+npos <- length(ipos3A)
+
+mcols(ipos3A)$sample <- Rle("sA")
+sA_counts <- sample(10, npos, replace=TRUE)
+mcols(ipos3A)$counts <- sA_counts
+
+mcols(ipos3B)$sample <- Rle("sB")
+sB_counts <- sample(10, npos, replace=TRUE)
+mcols(ipos3B)$counts <- sB_counts
+
+ipos3 <- c(ipos3A, ipos3B)
+ipos3
+
+## ---------------------------------------------------------------------
+## MEMORY USAGE
+## ---------------------------------------------------------------------
+
+## Coercion to IRanges works...
+ipos4 <- IPos(c("1-125000", "135000-575000"))
+ir4 <- as(ipos4, "IRanges")
+ir4
+## ... but is generally not a good idea:
+object.size(ipos4)
+object.size(ir4) # 1739 times bigger than the IPos object!
+
+## Shuffling the order of the positions impacts memory usage:
+ipos4s <- sample(ipos4)
+object.size(ipos4s)
+
+## AN IMPORTANT NOTE: In the worst situations, IPos still performs as
+## good as an IRanges object.
+object.size(as(ipos4s, "IRanges")) # same size as 'ipos4s'
+
+## Best case scenario is when the object is strictly sorted (i.e.
+## positions are in strict ascending order).
+## This can be checked with:
+is.unsorted(ipos4, strict=TRUE) # 'ipos4' is strictly sorted
+
+## ---------------------------------------------------------------------
+## USING MEMORY-EFFICIENT METADATA COLUMNS
+## ---------------------------------------------------------------------
+## In order to keep memory usage as low as possible, it is recommended
+## to use a memory-efficient representation of the metadata columns that
+## we want to set on the object. Rle's are particularly well suited for
+## this, especially if the metadata columns contain long runs of
+## identical values. This is the case for example if we want to use an
+## IPos object to represent the coverage of sequencing reads along a
+## chromosome.
+
+## Example 5:
+library(pasillaBamSubset)
+library(Rsamtools) # for the BamFile() constructor function
+bamfile1 <- BamFile(untreated1_chr4())
+bamfile2 <- BamFile(untreated3_chr4())
+ipos5 <- IPos(IRanges(1, seqlengths(bamfile1)[["chr4"]]))
+library(GenomicAlignments) # for "coverage" method for BamFile objects
+cov1 <- coverage(bamfile1)$chr4
+cov2 <- coverage(bamfile2)$chr4
+mcols(ipos5) <- DataFrame(cov1, cov2)
+ipos5
+
+object.size(ipos5) # lightweight
+
+## Keep only the positions where coverage is at least 10 in one of the
+## 2 samples:
+ipos5[mcols(ipos5)$cov1 >= 10 | mcols(ipos5)$cov2 >= 10]
+}
+\keyword{methods}
+\keyword{classes}
diff --git a/man/IRanges-class.Rd b/man/IRanges-class.Rd
index d83d548..5b8edc8 100644
--- a/man/IRanges-class.Rd
+++ b/man/IRanges-class.Rd
@@ -5,6 +5,7 @@
\alias{class:IRanges}
\alias{IRanges-class}
+% Accessors
\alias{start,IRanges-method}
\alias{width,IRanges-method}
\alias{names,IRanges-method}
@@ -12,6 +13,8 @@
\alias{width<-,IRanges-method}
\alias{end<-,IRanges-method}
\alias{names<-,IRanges-method}
+\alias{ranges,Ranges-method}
+
\alias{isNormal,IRanges-method}
\alias{update,IRanges-method}
\alias{c,IRanges-method}
@@ -34,6 +37,9 @@
\alias{coerce,integer,NormalIRanges-method}
\alias{coerce,numeric,IRanges-method}
\alias{coerce,numeric,NormalIRanges-method}
+\alias{coerce,character,IRanges-method}
+\alias{coerce,factor,IRanges-method}
+\alias{coerce,ANY,Ranges-method}
\title{IRanges and NormalIRanges objects}
@@ -62,6 +68,11 @@
\section{Coercion}{
\describe{
\item{}{
+ \code{ranges(x, use.names=FALSE, use.mcols=FALSE)}: Squeeze the ranges
+ out of \link{Ranges} object \code{x} and return them in an IRanges
+ object \emph{parallel} to \code{x} (i.e. same length as \code{x}).
+ }
+ \item{}{
\code{as(from, "IRanges")}: Creates an IRanges instance from a Ranges
object, logical vector, or integer vector. When \code{from} is a logical
vector, the resulting IRanges object contains the indices for the runs
diff --git a/man/IRangesList-class.Rd b/man/IRangesList-class.Rd
index bf79562..b582673 100644
--- a/man/IRangesList-class.Rd
+++ b/man/IRangesList-class.Rd
@@ -78,7 +78,7 @@
\section{Constructor}{
\describe{
- \item{}{\code{IRangesList(..., universe = NULL, compress = TRUE)}:
+ \item{}{\code{IRangesList(..., compress=TRUE)}:
The \code{...} argument accepts either a comma-separated list of
\code{IRanges} objects, or a single \code{LogicalList} / logical
\code{RleList} object, or 2 elements named \code{start} and \code{end}
diff --git a/man/NCList-class.Rd b/man/NCList-class.Rd
index dafac98..8021c6f 100644
--- a/man/NCList-class.Rd
+++ b/man/NCList-class.Rd
@@ -6,13 +6,11 @@
\alias{NCList-class}
\alias{NCList}
-\alias{ranges,NCList-method}
\alias{length,NCList-method}
\alias{names,NCList-method}
\alias{start,NCList-method}
\alias{end,NCList-method}
\alias{width,NCList-method}
-\alias{coerce,NCList,IRanges-method}
\alias{coerce,Ranges,NCList-method}
% NCLists objects:
diff --git a/man/RangedData-class.Rd b/man/RangedData-class.Rd
index ff80ab5..ae2ab58 100644
--- a/man/RangedData-class.Rd
+++ b/man/RangedData-class.Rd
@@ -73,12 +73,19 @@
\title{Data on ranges}
\description{
- IMPORTANT NOTE: \code{RangedData} objects will be deprecated in BioC 3.6!
-
+ IMPORTANT NOTE: \code{RangedData} objects will be deprecated in BioC 3.7!
The use of \code{RangedData} objects has been discouraged in favor
- of \link[GenomicRanges]{GRanges} objects since BioC 2.12. The
- \link[GenomicRanges]{GRanges} class is defined in the \pkg{GenomicRanges}
- package.
+ of \link[GenomicRanges]{GRanges} or \link[GenomicRanges]{GRangesList}
+ objects since BioC 2.12, that is, since 2014.
+ The \link[GenomicRanges]{GRanges} and \link[GenomicRanges]{GRangesList}
+ classes are defined in the \pkg{GenomicRanges} package.
+ See \code{?GRanges} and \code{?GenomicRanges} (after loading the
+ \pkg{GenomicRanges} package) for more information about these classes.
+ PLEASE MIGRATE YOUR CODE TO USE \link[GenomicRanges]{GRanges} OR
+ \link[GenomicRanges]{GRangesList} OBJECTS INSTEAD OF \code{RangedData}
+ OBJECTS AS SOON AS POSSIBLE. Don't hesitate to ask on the bioc-devel
+ mailing list (\url{https://bioconductor.org/help/support/#bioc-devel})
+ if you need help with this.
\code{RangedData} supports storing data, i.e. a set of variables, on a
set of ranges spanning multiple spaces (e.g. chromosomes). Although
@@ -449,9 +456,6 @@
rd[["filter"]]
rd <- RangedData(ranges, score + score)
rd[["score...score"]] # names made valid
- ## use a universe
- rd <- RangedData(ranges, universe = "hg18")
- universe(rd)
## split some data over chromosomes
@@ -461,7 +465,7 @@
filter <- c(filter, c(0L, 1L, NA, 0L))
chrom <- paste("chr", rep(c(1,2), c(length(ranges), length(range2))), sep="")
- rd <- RangedData(both, score, filter, space = chrom, universe = "hg18")
+ rd <- RangedData(both, score, filter, space = chrom)
rd[["score"]] # identical to score
rd[1][["score"]] # identical to score[1:3]
diff --git a/man/Ranges-class.Rd b/man/Ranges-class.Rd
index 4b82643..ffadb83 100644
--- a/man/Ranges-class.Rd
+++ b/man/Ranges-class.Rd
@@ -18,6 +18,8 @@
\alias{start<-}
\alias{width<-}
\alias{end<-}
+\alias{as.character,Ranges-method}
+\alias{as.factor,Ranges-method}
\alias{as.matrix,Ranges-method}
\alias{as.data.frame,Ranges-method}
\alias{as.integer,Ranges-method}
@@ -79,19 +81,19 @@
can be subsetted by row and by column).
The Ranges class itself is a virtual class. The following classes derive
- directly from the Ranges class: \link{IRanges}, \link{NCList},
+ directly from the Ranges class: \link{IRanges}, \link{IPos}, \link{NCList},
\link{PartitioningByEnd}.
}
\section{Methods}{
In the code snippets below, \code{x}, \code{y} and \code{object} are
Ranges objects. Not all the functions described below will necessarily
- work with all kinds of Ranges objects but they should work at least
+ work with all kinds of Ranges derivatives but they should work at least
for \link{IRanges} objects.
Note that many more operations on Ranges objects are described in other
- man pages of the IRanges package. See for example the man page for intra
- range transformations (e.g. \code{shift()}, see
+ man pages of the \pkg{IRanges} package. See for example the man page for
+ \emph{intra range transformations} (e.g. \code{shift()}, see
\code{?`\link{intra-range-methods}`}), or the man page for inter range
transformations (e.g. \code{reduce()}, see
\code{?`\link{inter-range-methods}`}), or the man page for
@@ -237,8 +239,10 @@
lines. The number of lines can be altered by setting the global
options \code{showHeadLines} and \code{showTailLines}. If the
object length is less than the sum of the options, the full object
- is displayed. These options affect GRanges, GAlignments,
- Ranges and XString objects.
+ is displayed. These options affect display of \link{IRanges},
+ \link{IPos}, \link[S4Vectors]{Hits}, \link[GenomicRanges]{GRanges},
+ \link[GenomicRanges]{GPos}, \link[GenomicAlignments]{GAlignments},
+ \link[Biostrings]{XStringSet} objects, and more...
}
}
}
@@ -299,13 +303,17 @@
\item \link{IRanges} objects (\link{NormalIRanges} objects are documented
in the same man page).
+ \item The \link{IPos} class, a memory-efficient \link{Ranges} derivative
+ for representing \emph{integer positions} (i.e. integer ranges
+ of width 1).
+
\item \link{Ranges-comparison} for comparing and ordering ranges.
\item \link{findOverlaps-methods} for finding/counting overlapping ranges.
\item \link{intra-range-methods} and
\link{inter-range-methods} for intra range and
- inter range transformations of a Ranges object.
+ inter range transformations of a Ranges derivative.
\item \link{coverage-methods} for computing the coverage
of a set of ranges.
diff --git a/man/Ranges-comparison.Rd b/man/Ranges-comparison.Rd
index 078ba1d..d96f265 100644
--- a/man/Ranges-comparison.Rd
+++ b/man/Ranges-comparison.Rd
@@ -9,6 +9,7 @@
\alias{match,Ranges,Ranges-method}
\alias{selfmatch,Ranges-method}
+\alias{is.unsorted,Ranges-method}
\alias{order,Ranges-method}
@@ -27,8 +28,10 @@
\S4method{selfmatch}{Ranges}(x, method=c("auto", "quick", "hash"))
-## order()
-## -------
+## order() and related methods
+## ----------------------------
+
+\S4method{is.unsorted}{Ranges}(x, na.rm=FALSE, strictly=FALSE)
\S4method{order}{Ranges}(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix"))
@@ -62,6 +65,13 @@ rangeComparisonCodeToLetter(code)
For \code{order}: The \code{method} argument is ignored.
}
+ \item{na.rm}{
+ Ignored.
+ }
+ \item{strictly}{
+ Logical indicating if the check should be for \emph{strictly} increasing
+ values.
+ }
\item{...}{
One or more \link{Ranges} objects. The \link{Ranges} objects
after the first one are used to break ties.
@@ -78,7 +88,11 @@ rangeComparisonCodeToLetter(code)
}
\details{
- Two ranges are considered equal iff they share the same start and width.
+ Two elements of a \link{Ranges} derivative (i.e. two integer ranges)
+ are considered equal iff they share the same start and width.
+ \code{duplicated()} and \code{unique()} on a \link{Ranges} derivative
+ are conforming to this.
+
Note that with this definition, 2 empty ranges are generally
not equal (they need to share the same start to be considered equal).
This means that, when it comes to comparing ranges, an empty range is
@@ -86,10 +100,21 @@ rangeComparisonCodeToLetter(code)
typical usecase is comparison of insertion points defined along a string
(like a DNA sequence) and represented as empty ranges.
- Ranges are ordered by starting position first, and then by width.
- This way, the space of ranges is totally ordered.
- On a \link{Ranges} object, \code{order}, \code{sort}, and \code{rank}
- are consistent with this order.
+ The "natural order" for the elements of a \link{Ranges} derivative
+ is to order them (a) first by start and (b) then by width.
+ This way, the space of integer ranges is totally ordered.
+
+ \code{pcompare()}, \code{==}, \code{!=}, \code{<=}, \code{>=}, \code{<}
+ and \code{>} on \link{Ranges} derivatives behave accordingly to this
+ "natural order".
+
+ \code{is.unsorted()}, \code{order()}, \code{sort()}, \code{rank()} on
+ \link{Ranges} derivatives also behave accordingly to this
+ "natural order".
+
+ Finally, note that some \emph{inter range transformations} like
+ \code{\link{reduce}} or \code{\link{disjoin}} also use this "natural order"
+ implicitly when operating on \link{Ranges} derivatives.
\describe{
\item{}{
@@ -300,6 +325,7 @@ countMatches(x2_levels, x2)
## ---------------------------------------------------------------------
## D. order() AND RELATED METHODS
## ---------------------------------------------------------------------
+is.unsorted(x2)
order(x2)
sort(x2)
rank(x2, ties.method="first")
diff --git a/man/RangesList-class.Rd b/man/RangesList-class.Rd
index 20cf83e..c40a34b 100644
--- a/man/RangesList-class.Rd
+++ b/man/RangesList-class.Rd
@@ -41,9 +41,7 @@
\description{An extension of \linkS4class{List} that holds only
\linkS4class{Ranges} objects. Useful for storing ranges over a set
of spaces (e.g. chromosomes), each of which requires a separate
- \code{Ranges} object. As a \code{Vector}, \code{RangesList} may be
- annotated with its universe identifier (e.g. a genome) in which all of its
- spaces exist.
+ \code{Ranges} object.
}
\section{Accessors}{
@@ -69,27 +67,16 @@
name is repeated according to the length of its element.
}
}
-
- These accessors are for the \code{universe} identifier:
- \describe{
- \item{}{\code{universe(x)}: gets the name of the universe as a
- single string, if one has been specified, \code{NULL} otherwise.
- }
- \item{}{\code{universe(x) <- value}: sets the name of the universe
- to \code{value}, a single string or \code{NULL}.
- }
- }
}
\section{Constructor}{
\describe{
- \item{}{\code{RangesList(..., universe = NULL)}:
+ \item{}{\code{RangesList(...)}:
Each \code{Ranges} in \code{...}
becomes an element in the new \code{RangesList}, in the same
order. This is analogous to the \code{\link{list}} constructor,
except every argument in \code{...} must be derived from
- \code{Ranges}. The universe is specified by the \code{universe}
- parameter, which should be a single string or NULL, to leave unspecified.
+ \code{Ranges}.
}
}
}
@@ -154,10 +141,7 @@ as.list(RangesList(range1, range2))
# coerce to data.frame
as.data.frame(named)
-# set the universe
-universe(named) <- "hg18"
-universe(named)
-RangesList(range1, range2, universe = "hg18")
+RangesList(range1, range2)
## zoom in 2X
collection <- RangesList(one = range1, range2)
diff --git a/man/RleViewsList-class.Rd b/man/RleViewsList-class.Rd
index a6be11a..78743ef 100644
--- a/man/RleViewsList-class.Rd
+++ b/man/RleViewsList-class.Rd
@@ -30,14 +30,12 @@
\section{Constructor}{
\describe{
- \item{}{\code{RleViewsList(..., rleList, rangesList, universe = NULL)}:
+ \item{}{\code{RleViewsList(..., rleList, rangesList)}:
Either \code{...} or the \code{rleList}/\code{rangesList} couplet
provide the RleViews for the list. If \code{...} is provided, each
of these arguments must be RleViews objects. Alternatively,
\code{rleList} and \code{rangesList} accept Rle and Ranges objects
respectively that are meshed together for form the RleViewsList.
- The universe is specified by the \code{universe} parameter, which
- should be a single string or NULL, to leave unspecified.
}
\item{}{\code{Views(subject, start=NULL, end=NULL, width=NULL, names=NULL)}:
Same as \code{RleViewsList(rleList = subject, rangesList = start)}.
diff --git a/man/Views-class.Rd b/man/Views-class.Rd
index 2434c94..97e8243 100644
--- a/man/Views-class.Rd
+++ b/man/Views-class.Rd
@@ -5,7 +5,6 @@
\alias{Views-class}
\alias{subject}
\alias{subject,Views-method}
-\alias{ranges}
\alias{ranges,Views-method}
\alias{ranges<-}
\alias{ranges<-,Views-method}
diff --git a/man/ViewsList-class.Rd b/man/ViewsList-class.Rd
index ae7be5b..73accfd 100644
--- a/man/ViewsList-class.Rd
+++ b/man/ViewsList-class.Rd
@@ -29,9 +29,6 @@
for storing coverage vectors over a set of spaces (e.g. chromosomes),
each of which requires a separate \linkS4class{RleViews} object.
- As a \linkS4class{Vector} subclass, ViewsList may be annotated with its
- universe identifier (e.g. a genome) in which all of its spaces exist.
-
As a \linkS4class{List} subclass, ViewsList inherits all the methods
available for \linkS4class{List} objects. It also presents an API that
is very similar to that of \linkS4class{Views}, where operations are
diff --git a/man/bind-arrays.Rd b/man/bind-arrays.Rd
deleted file mode 100644
index 23a0b49..0000000
--- a/man/bind-arrays.Rd
+++ /dev/null
@@ -1,56 +0,0 @@
-\name{bind-arrays}
-
-\alias{bind-arrays}
-\alias{bind arrays}
-
-\alias{arbind}
-\alias{acbind}
-\alias{arbind,array-method}
-\alias{acbind,array-method}
-
-
-\title{Bind arrays along their rows or columns}
-
-\description{
- Bind array-like objects with an arbitrary number of dimensions along their
- rows (\code{arbind}) or columns (\code{acbind}).
-}
-
-\usage{
-arbind(...)
-acbind(...)
-}
-
-\arguments{
- \item{...}{
- The array-like objects to bind.
- }
-}
-
-\value{
- An array-like object, typically of the same class as the input objects if
- they all have the same class.
-}
-
-\seealso{
- \itemize{
- \item \code{\link[base]{rbind}} and \code{\link[base]{cbind}} in the
- \pkg{base} package for the corresponding operations on matrix-like
- objects.
-
- \item The \pkg{abind} package on CRAN.
- }
-}
-
-\examples{
-a1 <- array(1:60, c(3, 5, 4),
- dimnames=list(NULL, paste0("M1y", 1:5), NULL))
-a2 <- array(101:240, c(7, 5, 4),
- dimnames=list(paste0("M2x", 1:7), paste0("M2y", 1:5), NULL))
-a3 <- array(10001:10100, c(5, 5, 4),
- dimnames=list(paste0("M3x", 1:5), NULL, paste0("M3z", 1:4)))
-
-arbind(a1, a2, a3)
-
-}
-\keyword{methods}
diff --git a/man/findOverlaps-methods.Rd b/man/findOverlaps-methods.Rd
index 9bb4515..8986788 100644
--- a/man/findOverlaps-methods.Rd
+++ b/man/findOverlaps-methods.Rd
@@ -54,18 +54,23 @@
\alias{subsetByOverlaps,RangedData,RangesList-method}
\alias{subsetByOverlaps,RangesList,RangedData-method}
-\alias{mergeByOverlaps}
-
-\alias{ranges,Hits-method}
-\alias{ranges,HitsList-method}
-
-\alias{findOverlapPairs}
+\alias{overlapsRanges}
+\alias{overlapsRanges,Ranges,Ranges-method}
+\alias{overlapsRanges,RangesList,RangesList-method}
\alias{poverlaps}
\alias{poverlaps,Ranges,Ranges-method}
\alias{poverlaps,Ranges,integer-method}
\alias{poverlaps,integer,Ranges-method}
+\alias{mergeByOverlaps}
+
+\alias{findOverlapPairs}
+
+% deprecated
+\alias{ranges,Hits-method}
+\alias{ranges,HitsList-method}
+
\title{Finding overlapping ranges}
\description{
@@ -81,51 +86,54 @@
}
\usage{
-findOverlaps(query, subject, maxgap=0L, minoverlap=1L,
+findOverlaps(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
...)
-countOverlaps(query, subject, maxgap=0L, minoverlap=1L,
+countOverlaps(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
-overlapsAny(query, subject, maxgap=0L, minoverlap=1L,
+overlapsAny(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
...)
query \%over\% subject
query \%within\% subject
query \%outside\% subject
-subsetByOverlaps(query, subject, maxgap=0L, minoverlap=1L,
+subsetByOverlaps(x, ranges, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
invert=FALSE,
...)
-mergeByOverlaps(query, subject, ...)
-findOverlapPairs(query, subject, ...)
+overlapsRanges(query, subject, hits=NULL, ...)
poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
type = c("any", "start", "end", "within", "equal"),
...)
-\S4method{ranges}{Hits}(x, query, subject)
-\S4method{ranges}{HitsList}(x, query, subject)
+mergeByOverlaps(query, subject, ...)
+
+findOverlapPairs(query, subject, ...)
}
\arguments{
- \item{query, subject}{
+ \item{query, subject, x, ranges}{
Each of them can be a \link{Ranges}, \link{Views}, \link{RangesList},
- \link{ViewsList}, or \link{RangedData} object.
- In addition, if \code{subject} is a \link{Ranges} object, \code{query}
- can be an integer vector to be converted to length-one ranges.
+ or \link{ViewsList} object.
+ In addition, if \code{subject} or \code{ranges} is a \link{Ranges} object,
+ \code{query} or \code{x} can be an integer vector to be converted to
+ length-one ranges.
- If \code{query} is a \link{RangesList} or \link{RangedData},
- \code{subject} must be a \link{RangesList} or \link{RangedData}.
- If both lists have names, each element from the subject is paired
- with the element from the query with the matching name, if any.
- Otherwise, elements are paired by position. The overlap is then
- computed between the pairs as described below.
+ If \code{query} (or \code{x}) is a \link{RangesList} object, then
+ \code{subject} (or \code{ranges}) must also be a \link{RangesList} object.
+
+ If both arguments are list-like objects with names, each list element
+ from the 2nd argument is paired with the list element from the 1st
+ argument with the matching name, if any. Otherwise, list elements are
+ paired by position. The overlap is then computed between the pairs as
+ described below.
If \code{subject} is omitted, \code{query} is queried against
itself. In this case, and only this case, the \code{drop.self}
@@ -136,12 +144,29 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
\code{drop.redundant} is \code{TRUE}, only one of A->B and B->A
is returned.
}
- \item{maxgap, minoverlap}{
- Intervals with a separation of \code{maxgap} or less and a minimum
- of \code{minoverlap} overlapping positions, allowing for
- \code{maxgap}, are considered to be overlapping. \code{maxgap}
- should be a scalar, non-negative, integer. \code{minoverlap}
- should be a scalar, positive integer.
+ \item{maxgap}{
+ A single integer >= -1.
+
+ If \code{type} is set to \code{"any"}, \code{maxgap} is interpreted as
+ the maximum \emph{gap} that is allowed between 2 ranges for the ranges
+ to be considered as overlapping. The \emph{gap} between 2 ranges
+ is the number of positions that separate them. The \emph{gap} between
+ 2 adjacent ranges is 0. By convention when one range has its start or
+ end strictly inside the other (i.e. non-disjoint ranges), the \emph{gap}
+ is considered to be -1.
+
+ If \code{type} is set to anything else, \code{maxgap} has a special
+ meaning that depends on the particular \code{type}. See \code{type}
+ below for more information.
+ }
+ \item{minoverlap}{
+ A single non-negative integer.
+
+ Only ranges with a minimum of \code{minoverlap} overlapping positions
+ are considered to be overlapping.
+
+ When \code{type} is \code{"any"}, at least one of \code{maxgap} and
+ \code{minoverlap} must be set to its default value.
}
\item{type}{
By default, any overlap is accepted. By specifying the \code{type}
@@ -149,9 +174,7 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
correspond to operations in Allen's Interval Algebra (see
references). If \code{type} is \code{start} or \code{end}, the
intervals are required to have matching starts or ends,
- respectively. While this operation seems trivial, the naive
- implementation using \code{outer} would be much less
- efficient. Specifying \code{equal} as the type returns the
+ respectively. Specifying \code{equal} as the type returns the
intersection of the \code{start} and \code{end} matches. If
\code{type} is \code{within}, the query interval must be wholly
contained within the subject interval. Note that all matches must
@@ -161,19 +184,19 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
overlap types. For \code{start}, \code{end}, and \code{equal},
it specifies the maximum difference in the starts, ends or both,
respectively. For \code{within}, it is the maximum amount by which
- the subject may be wider than the query.
+ the subject may be wider than the query. If \code{maxgap} is set to -1
+ (the default), it's replaced internally by 0.
}
\item{select}{
If \code{query} is a \link{Ranges} or \link{Views} object:
When \code{select} is \code{"all"} (the default), the results are
returned as a \link[S4Vectors]{Hits} object.
- Otherwise the returned value is an integer vector parallel to \code{query}
- (i.e. same length) containing the first, last, or arbitrary overlapping
- interval in \code{subject}, with \code{NA} indicating intervals that did
- not overlap any intervals in \code{subject}.
+ Otherwise the returned value is an integer vector \emph{parallel} to
+ \code{query} (i.e. same length) containing the first, last,
+ or arbitrary overlapping interval in \code{subject}, with \code{NA}
+ indicating intervals that did not overlap any intervals in \code{subject}.
- If \code{query} is a \link{RangesList}, \link{ViewsList}, or
- \link{RangedData} object:
+ If \code{query} is a \link{RangesList} or \link{ViewsList} object:
When \code{select} is \code{"all"} (the default), the results are
returned as a \link[S4Vectors]{HitsList} object.
Otherwise the returned value depends on the \code{drop} argument.
@@ -183,14 +206,21 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
containing indices that are offset to align with the unlisted \code{query}.
}
\item{invert}{
- If \code{TRUE}, keep only the query ranges that do \emph{not}
- overlap the subject.
+ If \code{TRUE}, keep only the ranges in \code{x} that do \emph{not}
+ overlap \code{ranges}.
+ }
+ \item{hits}{
+ The \link[S4Vectors]{Hits} or \link[S4Vectors]{HitsList} object returned
+ by \code{findOverlaps}, or \code{NULL}. If \code{NULL} then \code{hits}
+ is computed by calling \code{findOverlaps(query, subject, ...)} internally
+ (the extra arguments passed to \code{overlapsRanges} are passed to
+ \code{findOverlaps}).
}
\item{...}{
Further arguments to be passed to or from other methods:
\itemize{
\item \code{drop}: Supported only when \code{query} is a
- \link{RangesList}, \link{ViewsList}, or \link{RangedData} object.
+ \link{RangesList} or \link{ViewsList} object.
\code{FALSE} by default. See \code{select} argument above for the
details.
\item \code{drop.self}, \code{drop.redundant}: When \code{subject}
@@ -200,10 +230,6 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
details.
}
}
- \item{x}{
- \link[S4Vectors]{Hits} or \link[S4Vectors]{HitsList} object returned
- by \code{findOverlaps}.
- }
}
\details{
@@ -225,9 +251,9 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
\code{overlapsAny} finds the ranges in \code{query} that overlap any
of the ranges in \code{subject}. For \link{Ranges} or \link{Views}
objects, it returns a logical vector of length equal to the number of
- ranges in \code{query}. For \link{RangesList}, \link{RangedData}, or
- \link{ViewsList} objects, it returns a \link{LogicalList} object,
- where each element of the result corresponds to a space in \code{query}.
+ ranges in \code{query}. For \link{RangesList} or \link{ViewsList} objects,
+ it returns a \link{LogicalList} object where each element of the result
+ corresponds to a space in \code{query}.
\code{\%over\%} and \code{\%within\%} are convenience wrappers for the
2 most common use cases. Currently defined as
@@ -237,10 +263,26 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
overlapsAny(query, subject,
type="within")}. \code{\%outside\%} is simply the inverse of \code{\%over\%}.
- \code{subsetByOverlaps} returns the subset of \code{query} that
- has an overlap hit with a range in \code{subject} using the specified
+ \code{subsetByOverlaps} returns the subset of \code{x} that
+ has an overlap hit with a range in \code{ranges} using the specified
\code{findOverlaps} parameters.
+ When \code{hits} is a \link[S4Vectors]{Hits} (or \link[S4Vectors]{HitsList})
+ object, \code{overlapsRanges(query, subject, hits)} returns a \link{Ranges}
+ (or \link{RangesList}) object of the \emph{same shape} as \code{hits}
+ holding the regions of intersection between the overlapping ranges
+ in objects \code{query} and \code{subject}, which should be the same
+ query and subject used in the call to \code{findOverlaps} that generated
+ \code{hits}.
+ \emph{Same shape} means same length when \code{hits} is a
+ \link[S4Vectors]{Hits} object, and same length and same elementNROWS
+ when \code{hits} is a \link[S4Vectors]{HitsList} object.
+
+ \code{poverlaps} compares \code{query} and \code{subject} in parallel
+ (like e.g., \code{pmin}) and returns a logical vector indicating
+ whether each pair of ranges overlaps. Integer vectors are treated as
+ width-one ranges.
+
\code{mergeByOverlaps} computes the overlap between query and subject
according to the arguments in \code{\dots}. It then extracts the
corresponding hits from each object and returns a \code{DataFrame}
@@ -253,22 +295,6 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
returns a formal \code{\link[S4Vectors:Pairs-class]{Pairs}} object
that provides useful downstream conveniences, such as finding the
intersection of the overlapping ranges with \code{\link{pintersect}}.
-
- \code{poverlaps} compares \code{query} and \code{subject} in parallel
- (like e.g., \code{pmin}) and returns a logical vector indicating
- whether each pair of ranges overlaps. Integer vectors are treated as
- width-one ranges.
-
- When \code{x} is a \link[S4Vectors]{Hits} (or \link[S4Vectors]{HitsList})
- object, \code{ranges(x, query, subject)} returns a \link{Ranges}
- (or \link{RangesList}) object of the \emph{same shape} as \code{x}
- holding the regions of intersection between the overlapping ranges
- in objects \code{query} and \code{subject}, which should be the same
- query and subject used in the call to \code{findOverlaps} that generated
- \code{x}.
- \emph{Same shape} means same length when \code{x} is a
- \link[S4Vectors]{Hits} object, and same length and same elementNROWS
- when \code{x} is a \link[S4Vectors]{HitsList} object.
}
\references{
@@ -300,39 +326,34 @@ poverlaps(query, subject, maxgap = 0L, minoverlap = 1L,
}
\examples{
-query <- IRanges(c(1, 4, 9), c(5, 7, 10))
-subject <- IRanges(c(2, 2, 10), c(2, 3, 12))
-
## ---------------------------------------------------------------------
## findOverlaps()
## ---------------------------------------------------------------------
+query <- IRanges(c(1, 4, 9), c(5, 7, 10))
+subject <- IRanges(c(2, 2, 10), c(2, 3, 12))
+
+findOverlaps(query, subject)
+
## at most one hit per query
findOverlaps(query, subject, select="first")
findOverlaps(query, subject, select="last")
findOverlaps(query, subject, select="arbitrary")
-## overlap even if adjacent only
-## (FIXME: the gap between 2 adjacent ranges should be still considered
-## 0. So either we have an argument naming problem, or we should modify
-## the handling of the 'maxgap' argument so that the user would need to
-## specify maxgap=0L to obtain the result below.)
-findOverlaps(query, subject, maxgap=1L)
-
-## shortcut
-findOverlaps(query, subject)
+## including adjacent ranges in the result
+findOverlaps(query, subject, maxgap=0L)
query <- IRanges(c(1, 4, 9), c(5, 7, 10))
subject <- IRanges(c(2, 2), c(5, 4))
-## one Ranges with itself
+## one IRanges object with itself
findOverlaps(query)
## single points as query
subject <- IRanges(c(1, 6, 13), c(4, 9, 14))
findOverlaps(c(3L, 7L, 10L), subject, select="first")
-## alternative overlap types
+## special overlap types
query <- IRanges(c(1, 5, 3, 4), width=c(2, 2, 4, 6))
subject <- IRanges(c(1, 3, 5, 6), width=c(4, 4, 5, 4))
@@ -359,18 +380,19 @@ overlapsAny(query, subject, type="start")
overlapsAny(query, subject, type="end")
query \%over\% subject # same as overlapsAny(query, subject)
query \%within\% subject # same as overlapsAny(query, subject,
- # type="within")
+ # type="within")
## ---------------------------------------------------------------------
-## "ranges" METHODS FOR Hits OR HitsList OBJECTS
+## overlapsRanges()
## ---------------------------------------------------------------------
-## extract the regions of intersection between the overlapping ranges
-ranges(ov, query, subject)
+## Extract the regions of intersection between the overlapping ranges:
+overlapsRanges(query, subject, ov)
## ---------------------------------------------------------------------
## Using RangesList objects
## ---------------------------------------------------------------------
+
query <- IRanges(c(1, 4, 9), c(5, 7, 10))
qpartition <- factor(c("a","a","b"))
qlist <- split(query, qpartition)
diff --git a/man/inter-range-methods.Rd b/man/inter-range-methods.Rd
index f39fee2..8a454e1 100644
--- a/man/inter-range-methods.Rd
+++ b/man/inter-range-methods.Rd
@@ -4,6 +4,7 @@
\alias{range}
\alias{range,Ranges-method}
+\alias{range,IPos-method}
\alias{range,RangesList-method}
\alias{range,CompressedIRangesList-method}
\alias{range,RangedData-method}
@@ -29,6 +30,7 @@
\alias{isDisjoint}
\alias{isDisjoint,Ranges-method}
+\alias{isDisjoint,IPos-method}
\alias{isDisjoint,NormalIRanges-method}
\alias{isDisjoint,RangesList-method}
@@ -39,7 +41,7 @@
\title{Inter range transformations of a Ranges, Views, RangesList,
- MaskCollection, or RangedData object}
+ or MaskCollection object}
\description{
Range-based transformations are grouped in 2 categories:
@@ -163,10 +165,6 @@ disjointBins(x, ...)
if all objects have names, otherwise, if they are all of the same
length, by position. Else, an exception is thrown.
- If \code{x} is a \link{RangedData} object, then \code{range} returns
- a \link{RangesList} object resulting from calling \code{range(ranges(x))},
- i.e. the bounds of the ranges in each space.
-
}\subsection{gaps}{
\code{gaps} returns the "normal" \link{Ranges} object representing
diff --git a/man/intra-range-methods.Rd b/man/intra-range-methods.Rd
index f3d4f29..6152acc 100644
--- a/man/intra-range-methods.Rd
+++ b/man/intra-range-methods.Rd
@@ -4,6 +4,7 @@
\alias{shift}
\alias{shift,Ranges-method}
+\alias{shift,IPos-method}
\alias{shift,Views-method}
\alias{shift,RangesList-method}
\alias{shift,CompressedIRangesList-method}
diff --git a/man/range-squeezers.Rd b/man/range-squeezers.Rd
new file mode 100644
index 0000000..b892c9d
--- /dev/null
+++ b/man/range-squeezers.Rd
@@ -0,0 +1,108 @@
+\name{range-squeezers}
+
+\alias{range-squeezers}
+
+\alias{ranges}
+\alias{rglist}
+
+\alias{rglist,Pairs-method}
+
+\title{Squeeze the ranges out of a range-based object}
+
+\description{
+ S4 generic functions for squeezing the ranges out of a range-based object.
+
+ These are analog to range squeezers \code{\link[GenomicRanges]{granges}}
+ and \code{\link[GenomicRanges]{grglist}} defined in the \pkg{GenomicRanges}
+ package, except that \code{ranges} returns the ranges in an \link{IRanges}
+ object (instead of a \link[GenomicRanges]{GRanges} object for
+ \code{\link[GenomicRanges]{granges}}), and \code{rglist} returns them in an
+ \link{IRangesList} object (instead of a \link[GenomicRanges]{GRangesList}
+ object for \code{\link[GenomicRanges]{grglist}}).
+}
+
+\usage{
+ranges(x, use.names=TRUE, use.mcols=FALSE, ...)
+rglist(x, use.names=TRUE, use.mcols=FALSE, ...)
+}
+
+\arguments{
+ \item{x}{
+ An object containing ranges e.g. a
+ \link{Ranges}, \link[GenomicRanges]{GenomicRanges},
+ \link[SummarizedExperiment]{RangedSummarizedExperiment},
+ \link[GenomicAlignments]{GAlignments},
+ \link[GenomicAlignments]{GAlignmentPairs}, or
+ \link[GenomicAlignments]{GAlignmentsList} object,
+ or a \link[S4Vectors]{Pairs} object containing ranges.
+ }
+ \item{use.names}{
+ \code{TRUE} (the default) or \code{FALSE}.
+ Whether or not the names on \code{x} (accessible with \code{names(x)})
+ should be propagated to the returned object.
+ }
+ \item{use.mcols}{
+ \code{TRUE} or \code{FALSE} (the default).
+ Whether or not the metadata columns on \code{x} (accessible with
+ \code{mcols(x)}) should be propagated to the returned object.
+ }
+ \item{...}{
+ Additional arguments, for use in specific methods.
+ }
+}
+
+\details{
+ Various packages (e.g. \pkg{IRanges}, \pkg{GenomicRanges},
+ \pkg{SummarizedExperiment}, \pkg{GenomicAlignments}, etc...)
+ define and document various range squeezing methods for various types
+ of objects.
+
+ Note that these functions can be seen as \emph{object getters} or as
+ functions performing coercion.
+
+ For some objects (e.g. \link[GenomicAlignments]{GAlignments} and
+ \link[GenomicAlignments]{GAlignmentPairs} objects defined in the
+ \pkg{GenomicAlignments} package), \code{as(x, "IRanges")} and
+ \code{as(x, "IRangesList")}, are equivalent to
+ \code{ranges(x, use.names=TRUE, use.mcols=TRUE)} and
+ \code{rglist(x, use.names=TRUE, use.mcols=TRUE)}, respectively.
+}
+
+\value{
+ An \link{IRanges} object for \code{ranges}.
+
+ An \link{IRangesList} object for \code{rglist}.
+
+ If \code{x} is a vector-like object (e.g.
+ \link[GenomicAlignments]{GAlignments}), the returned object is expected
+ to be \emph{parallel} to \code{x}, that is, the i-th element in the output
+ corresponds to the i-th element in the input.
+
+ If \code{use.names} is TRUE, then the names on \code{x}
+ (if any) are propagated to the returned object.
+ If \code{use.mcols} is TRUE, then the metadata columns on \code{x}
+ (if any) are propagated to the returned object.
+}
+
+\author{H. Pagès}
+
+\seealso{
+ \itemize{
+ \item \link{IRanges} and \link{IRangesList} objects.
+
+ \item \link[SummarizedExperiment]{RangedSummarizedExperiment} objects
+ in the \pkg{SummarizedExperiment} packages.
+
+ \item \link[GenomicAlignments]{GAlignments},
+ \link[GenomicAlignments]{GAlignmentPairs},
+ and \link[GenomicAlignments]{GAlignmentsList} objects in the
+ \pkg{GenomicAlignments} package.
+ }
+}
+
+\examples{
+## See ?GAlignments in the GenomicAlignments package for examples of
+## "ranges" and "rglist" methods.
+}
+
+\keyword{methods}
diff --git a/man/setops-methods.Rd b/man/setops-methods.Rd
index 3c3b998..c46acbe 100644
--- a/man/setops-methods.Rd
+++ b/man/setops-methods.Rd
@@ -129,6 +129,17 @@
changed using the \code{resolve.empty} argument.
}
+\value{
+ On \link{Ranges} objects, \code{union}, \code{intersect}, and \code{setdiff}
+ return an \link{IRanges} \emph{instance} that is guaranteed to be
+ \emph{normal} (see \code{\link{isNormal}}) but is NOT
+ promoted to \link{NormalIRanges}.
+
+ On \link{Ranges} objects, \code{punion}, \code{pintersect}, \code{psetdiff},
+ and \code{pgap} return an object of the same class and length as their first
+ argument.
+}
+
\author{H. Pagès and M. Lawrence}
\seealso{
diff --git a/src/NCList.c b/src/NCList.c
index cc6f4ae..abd7374 100644
--- a/src/NCList.c
+++ b/src/NCList.c
@@ -730,6 +730,8 @@ static int is_TYPE_ANY_hit(int rgid, const Backpack *backpack)
{
int x_start, x_end;
+ if (backpack->minoverlap == 0)
+ return 1;
/* Check the score */
x_start = backpack->x_start_p[rgid];
x_end = backpack->x_end_p[rgid];
@@ -893,7 +895,7 @@ static Backpack prepare_backpack(const int *x_start_p, const int *x_end_p,
backpack.minoverlap = minoverlap;
backpack.overlap_type = overlap_type;
if (overlap_type == TYPE_ANY)
- backpack.min_overlap_score0 = minoverlap - maxgap - 1;
+ backpack.min_overlap_score0 = minoverlap - maxgap - 2;
else
backpack.min_overlap_score0 = minoverlap - 1;
@@ -930,7 +932,7 @@ static Backpack prepare_backpack(const int *x_start_p, const int *x_end_p,
static void update_backpack(Backpack *backpack, int y_rgid,
int y_start, int y_end, int y_space)
{
- int min_x_end, max_x_start, min_overlap_score0;
+ int slack, min_x_end, max_x_start, min_overlap_score0;
backpack->y_rgid = y_rgid;
backpack->y_start = y_start;
@@ -938,23 +940,31 @@ static void update_backpack(Backpack *backpack, int y_rgid,
backpack->y_space = y_space;
/* set 'min_x_end' and 'max_x_start' */
+ if (backpack->overlap_type == TYPE_ANY) {
+ if (backpack->minoverlap == 0) {
+ slack = backpack->maxgap + 1;
+ } else {
+ slack = 1 - backpack->minoverlap;
+ }
+ backpack->min_x_end = y_start - slack;
+ backpack->max_x_start = y_end + slack;
+ return;
+ }
if (backpack->overlap_type == TYPE_WITHIN) {
backpack->min_x_end = backpack->y_end;
backpack->max_x_start = backpack->y_start;
return;
}
- if (backpack->overlap_type == TYPE_ANY
- || backpack->overlap_type == TYPE_EXTEND
+ if (backpack->overlap_type == TYPE_EXTEND
|| backpack->minoverlap != 0
|| backpack->circle_len != NA_INTEGER)
{
min_overlap_score0 = backpack->min_overlap_score0;
backpack->min_x_end = y_start + min_overlap_score0;
backpack->max_x_start = y_end - min_overlap_score0;
+ if (backpack->overlap_type == TYPE_EXTEND)
+ return;
}
- if (backpack->overlap_type == TYPE_ANY
- || backpack->overlap_type == TYPE_EXTEND)
- return;
/* TYPE_START, TYPE_END, or TYPE_EQUAL */
/* min_x_end */
@@ -1337,20 +1347,6 @@ static int find_overlaps(
* NCList_find_overlaps_in_groups()
*/
-static int get_maxgap0(SEXP maxgap)
-{
- int maxgap0;
-
- if (!IS_INTEGER(maxgap) || LENGTH(maxgap) != 1)
- error("'maxgap' must be a single integer");
- maxgap0 = INTEGER(maxgap)[0];
- if (maxgap0 == NA_INTEGER)
- error("'maxgap' cannot be NA");
- if (maxgap0 < 0)
- error("'maxgap' cannot be negative");
- return maxgap0;
-}
-
static int get_overlap_type(SEXP type)
{
const char *type0;
@@ -1378,6 +1374,22 @@ static int get_overlap_type(SEXP type)
return 0;
}
+static int get_maxgap0(SEXP maxgap, int overlap_type)
+{
+ int maxgap0;
+
+ if (!IS_INTEGER(maxgap) || LENGTH(maxgap) != 1)
+ error("'maxgap' must be a single integer");
+ maxgap0 = INTEGER(maxgap)[0];
+ if (maxgap0 == NA_INTEGER)
+ error("'maxgap' cannot be NA");
+ if (maxgap0 < -1)
+ error("'maxgap' must be >= -1");
+ if (maxgap0 == -1 && overlap_type != TYPE_ANY)
+ maxgap0 = 0;
+ return maxgap0;
+}
+
static int get_minoverlap0(SEXP minoverlap, int maxgap, int overlap_type)
{
int minoverlap0;
@@ -1389,8 +1401,9 @@ static int get_minoverlap0(SEXP minoverlap, int maxgap, int overlap_type)
error("'minoverlap' cannot be NA");
if (minoverlap0 < 0)
error("'minoverlap' cannot be negative");
- if (overlap_type == TYPE_ANY && maxgap != 0 && minoverlap0 > 1)
- error("'minoverlap' must be <= 1 when 'maxgap' is not 0");
+ if (overlap_type == TYPE_ANY && maxgap != -1 && minoverlap0 != 0)
+ error("when 'type' is \"any\", at least one of 'maxgap' "
+ "and 'minoverlap' must be set to its default value");
return minoverlap0;
}
@@ -1457,8 +1470,8 @@ SEXP NCList_find_overlaps(
s_len = check_integer_pairs(s_start, s_end,
&s_start_p, &s_end_p,
"start(s)", "end(s)");
- maxgap0 = get_maxgap0(maxgap);
overlap_type = get_overlap_type(type);
+ maxgap0 = get_maxgap0(maxgap, overlap_type);
minoverlap0 = get_minoverlap0(minoverlap, maxgap0, overlap_type);
select_mode = get_select_mode(select);
circle_len = get_circle_length(circle_length);
@@ -1559,8 +1572,8 @@ SEXP NCList_find_overlaps_in_groups(
s_groups_holder = _hold_CompressedIntegerList(s_groups);
NG2 = _get_length_from_CompressedIntsList_holder(&s_groups_holder);
- maxgap0 = get_maxgap0(maxgap);
overlap_type = get_overlap_type(type);
+ maxgap0 = get_maxgap0(maxgap, overlap_type);
minoverlap0 = get_minoverlap0(minoverlap, maxgap0, overlap_type);
select_mode = get_select_mode(select);
diff --git a/src/coverage_methods.c b/src/coverage_methods.c
index ed418b1..b2f6998 100644
--- a/src/coverage_methods.c
+++ b/src/coverage_methods.c
@@ -343,7 +343,7 @@ static void check_arg_is_numeric(SEXP arg, const char *arg_label)
static void check_arg_is_list(SEXP arg, const char *arg_label)
{
- if (!IS_LIST(arg))
+ if (!isVectorList(arg))
error("'%s' must be a list", arg_label);
return;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-iranges.git
More information about the debian-med-commit
mailing list