[med-svn] [r-bioc-genomicranges] 01/04: New upstream version 1.30.0
Andreas Tille
tille at debian.org
Wed Nov 8 14:04:31 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-bioc-genomicranges.
commit 68b3305bbfd405fe8fa3aca5bc4b8983f09b0c05
Author: Andreas Tille <tille at debian.org>
Date: Wed Nov 8 15:00:33 2017 +0100
New upstream version 1.30.0
---
DESCRIPTION | 10 +-
NAMESPACE | 37 ++-
NEWS | 59 ++++-
R/GNCList-class.R | 2 +-
R/GPos-class.R | 391 ++++++++++------------------
R/GRanges-class.R | 84 +++---
R/GRangesList-class.R | 19 +-
R/GenomicRanges-class.R | 30 +--
R/GenomicRanges-comparison.R | 8 +-
R/RangedData-methods.R | 7 -
R/findOverlaps-methods.R | 92 ++++---
R/genomic-range-squeezers.R | 28 ++
R/inter-range-methods.R | 5 +-
R/intra-range-methods.R | 13 +-
R/nearest-methods.R | 34 ++-
R/range-squeezers.R | 49 ----
R/setops-methods.R | 6 +-
R/strand-utils.R | 28 +-
build/vignette.rds | Bin 697 -> 698 bytes
inst/doc/ExtendingGenomicRanges.pdf | Bin 103036 -> 140031 bytes
inst/doc/GRanges_and_GRangesList_slides.pdf | Bin 250940 -> 250940 bytes
inst/doc/GenomicRangesHOWTOs.pdf | Bin 195126 -> 234135 bytes
inst/doc/GenomicRangesIntroduction.Rnw | 3 +-
inst/doc/GenomicRangesIntroduction.pdf | Bin 159556 -> 189468 bytes
inst/doc/Ten_things_slides.pdf | Bin 138327 -> 138321 bytes
inst/unitTests/test_GNCList-class.R | 20 +-
man/GPos-class.Rd | 104 ++++----
man/GRanges-class.Rd | 350 ++++++++++++-------------
man/GRangesList-class.Rd | 2 +
man/GenomicRanges-comparison.Rd | 73 +++---
man/findOverlaps-methods.Rd | 6 +-
man/genomic-range-squeezers.Rd | 94 +++++++
man/intra-range-methods.Rd | 1 -
man/makeGRangesFromDataFrame.Rd | 10 +-
man/range-squeezers.Rd | 114 --------
vignettes/GenomicRangesIntroduction.Rnw | 3 +-
36 files changed, 809 insertions(+), 873 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 7a8e09c..ec7e8cc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,15 +11,15 @@ Description: The ability to efficiently represent and manipulate genomic
summarization of an experiment, are defined in the GenomicAlignments
and SummarizedExperiment packages respectively. Both packages build
on top of the GenomicRanges infrastructure.
-Version: 1.28.6
+Version: 1.30.0
Encoding: UTF-8
Author: P. Aboyoun, H. Pagès, and M. Lawrence
Maintainer: Bioconductor Package Maintainer <maintainer at bioconductor.org>
biocViews: Genetics, Infrastructure, Sequencing, Annotation, Coverage,
GenomeAnnotation
Depends: R (>= 2.10), methods, stats4, BiocGenerics (>= 0.21.2),
- S4Vectors (>= 0.9.47), IRanges (>= 2.9.11), GenomeInfoDb (>=
- 1.11.5)
+ S4Vectors (>= 0.9.47), IRanges (>= 2.11.16), GenomeInfoDb (>=
+ 1.13.1)
Imports: utils, stats, XVector
LinkingTo: S4Vectors, IRanges
Suggests: Biobase, AnnotationDbi (>= 1.21.1), annotate, Biostrings (>=
@@ -38,7 +38,7 @@ Suggests: Biobase, AnnotationDbi (>= 1.21.1), annotate, Biostrings (>=
RNAseqData.HNRNPC.bam.chr14, hgu95av2probe
License: Artistic-2.0
Collate: utils.R phicoef.R transcript-utils.R constraint.R
- strand-utils.R range-squeezers.R GenomicRanges-class.R
+ strand-utils.R genomic-range-squeezers.R GenomicRanges-class.R
GenomicRanges-comparison.R GRanges-class.R GPos-class.R
DelegatingGenomicRanges-class.R GNCList-class.R
GenomicRangesList-class.R GRangesList-class.R
@@ -49,4 +49,4 @@ Collate: utils.R phicoef.R transcript-utils.R constraint.R
tileGenome.R tile-methods.R genomicvars.R zzz.R
RoxygenNote: 5.0.1.9000
NeedsCompilation: yes
-Packaged: 2017-10-03 22:49:24 UTC; biocbuild
+Packaged: 2017-10-30 23:03:01 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
index b54a0e8..0175837 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,11 +19,11 @@ import(XVector) # only for the "Views" method for integer vectors, the
exportClasses(
Constraint, Constraint_OR_NULL,
- GenomicRanges, GenomicRangesORmissing,
- GRanges, GPos,
+ GenomicRanges, GenomicRanges_OR_missing,
+ IRanges_OR_IPos, GRanges, GPos,
DelegatingGenomicRanges,
GNCList,
- GenomicRangesORGRangesList, GRangesList,
+ GRangesList, GenomicRanges_OR_GRangesList,
GenomicRangesList, SimpleGenomicRangesList
)
@@ -32,6 +32,9 @@ exportClasses(
### Export S3 methods
###
+S3method(.DollarNames, GenomicRanges)
+S3method(.DollarNames, GRanges)
+
S3method(duplicated, GenomicRanges)
S3method(sort, GenomicRanges)
@@ -50,7 +53,6 @@ export(
)
-
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Export S4 methods for generics not defined in GenomicRanges
###
@@ -77,19 +79,21 @@ exportMethods(
union, intersect, setdiff,
start, "start<-", end, "end<-", width, "width<-",
strand, "strand<-", invertStrand,
+ score, "score<-",
updateObject,
## Generics defined in S4Vectors:
- selfmatch,
-
- ## Generics defined in IRanges:
elementMetadata, "elementMetadata<-",
mcols, "mcols<-",
values, "values<-",
+ selfmatch,
relistToClass,
pcompare,
+
+ ## Generics defined in IRanges:
ranges, "ranges<-",
- score, "score<-",
+ rglist,
+ pos,
findOverlaps, countOverlaps,
shift, narrow, resize, flank, promoters, restrict, trim,
reduce, gaps, disjoin, isDisjoint, disjointBins,
@@ -97,7 +101,6 @@ exportMethods(
punion, pintersect, psetdiff, pgap,
precede, follow, nearest, distance, distanceToNearest,
tile, slidingWindows,
- subset,
## Generics defined in GenomeInfoDb:
seqinfo, "seqinfo<-",
@@ -137,24 +140,14 @@ export(
#constraint, "constraint<-",
checkConstraint,
- ## range-squeezers.R:
- granges, grglist, rglist,
-
- ## GPos-class.R:
- pos
+ ## genomic-range-squeezers.R:
+ granges, grglist
)
### Exactly the same list as above.
exportMethods(
#constraint, "constraint<-",
checkConstraint,
- granges, grglist, rglist,
- pos
+ granges, grglist
)
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Register S3 methods
-###
-
-S3method(.DollarNames, GenomicRanges)
-S3method(.DollarNames, GRanges)
diff --git a/NEWS b/NEWS
index bbaeb75..f822b8b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,10 +1,67 @@
-CHANGES IN VERSION 1.28.1
+CHANGES IN VERSION 1.30.0
-------------------------
+NEW FEATURES
+
+ o Support GPos-based GRangesList objects.
+
+ o Add 'na.rm' argument to binnedAverage().
+
+SIGNIFICANT USER-LEVEL CHANGES
+
+ o Change 'maxgap' and 'minoverlap' defaults for findOverlaps() and family
+ (i.e. countOverlaps(), overlapsAny(), and subsetByOverlaps()). This
+ change addresses 2 long-standing issues:
+ (1) by default zero-width ranges are not excluded anymore, and
+ (2) control of zero-width ranges and adjacent ranges is finally
+ decoupled (only partially though).
+ New default for 'minoverlap' is 0 instead of 1. New default for 'maxgap'
+ is -1 instead of 0. See ?findOverlaps for more information about 'maxgap'
+ and the meaning of -1. For example, if 'type' is "any", you need to set
+ 'maxgap' to 0 if you want adjacent ranges to be considered as overlapping.
+
+ o GPos now extends GRanges but with a ranges slot that must be an IPos
+ object. Update "old" GPos objects with updateObject().
+
+ o Move pos() generic to IRanges package.
+
+ o Move rglist() generic to IRanges package.
+
+ o Rename GenomicRangesORmissing and GenomicRangesORGRangesList classes ->
+ GenomicRanges_OR_missing and GenomicRanges_OR_GRangesList, respectively.
+
+ o Remove "seqinfo" method for RangesList objects.
+
+ o Remove "stack" method for GenomicRangesList objects.
+
+DEPRECATED AND DEFUNCT
+
+ o Remove 'force' argument from seqinfo() and seqlevels() setters (the
+ argument got deprecated in BioC 3.5 in favor of new and more flexible
+ 'pruning.mode' argument).
+
BUG FIXES
+ o nearest() and distanceToNearest() now call findOverlaps() internally
+ with maxgap=0 and minoverlap=0. This fixes incorrect results obtained
+ in some situations e.g. in the situation reported here:
+
+ https://support.bioconductor.org/p/99369/ (zero-width ranges)
+
+ but also in this situation:
+
+ nearest(GRanges("chr1", IRanges(5, 10)),
+ GRanges("chr1", IRanges(1, 4:5)),
+ select="all")
+
+ where the 2 ranges in the subject are *both* nearest to the 5-10 range.
+
o '$' completion on GenomicRanges works in RStudio.
+ o Minor tweaks to conversion from character to GRanges and reverse
+ conversion.
+
+
CHANGES IN VERSION 1.28.0
-------------------------
diff --git a/R/GNCList-class.R b/R/GNCList-class.R
index 2bd7a9a..bee31fd 100644
--- a/R/GNCList-class.R
+++ b/R/GNCList-class.R
@@ -103,7 +103,7 @@ setMethod("extractROWS", "GNCList",
### NOT exported.
findOverlaps_GNCList <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "extend", "equal"),
select=c("all", "first", "last", "arbitrary", "count"),
ignore.strand=FALSE)
diff --git a/R/GPos-class.R b/R/GPos-class.R
index 7a65bd4..96b7139 100644
--- a/R/GPos-class.R
+++ b/R/GPos-class.R
@@ -2,17 +2,12 @@
### GPos objects
### -------------------------------------------------------------------------
###
-### The GPos class is a container for storing a set of genomic *positions*,
-### that is, genomic ranges of width 1. Even though a GRanges object can
-### be used for that, using a GPos object can be much more memory-efficient,
-### especially when the object contains long runs of adjacent positions.
-###
+
setClass("GPos",
- contains="GenomicRanges",
+ contains="GRanges",
representation(
- pos_runs="GRanges",
- elementMetadata="DataFrame"
+ ranges="IPos"
)
)
@@ -21,8 +16,6 @@ setClass("GPos",
### Getters
###
-setMethod("length", "GPos", function(x) sum(width(x at pos_runs)))
-
setMethod("names", "GPos", function(x) NULL)
setReplaceMethod("names", "GPos",
@@ -34,56 +27,18 @@ setReplaceMethod("names", "GPos",
}
)
-setMethod("seqnames", "GPos",
- function(x) rep.int(seqnames(x at pos_runs), width(x at pos_runs))
-)
-
-setGeneric("pos", function(x) standardGeneric("pos"))
-setMethod("pos", "GPos", function(x) as.integer(ranges(x at pos_runs)))
-setMethod("start", "GPos", function(x) pos(x))
-setMethod("end", "GPos", function(x) pos(x))
-setMethod("width", "GPos", function(x) rep.int(1L, length(x)))
-setMethod("ranges", "GPos",
- function(x, use.names=TRUE, use.mcols=FALSE)
- {
- if (!isTRUEorFALSE(use.mcols))
- stop("'use.mcols' must be TRUE or FALSE")
- ans <- IRanges(pos(x), width=1L)
- if (use.mcols)
- mcols(ans) <- mcols(x)
- ans
- }
-)
-
-setMethod("strand", "GPos",
- function(x) rep.int(strand(x at pos_runs), width(x at pos_runs))
-)
-
-setMethod("seqinfo", "GPos", function(x) seqinfo(x at pos_runs))
+setMethod("pos", "GPos", function(x) pos(ranges(x)))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Collapse runs of "stitchable ranges"
-###
-### In a Ranges object 'x', 2 ranges x[i] and x[i+1] are "stitchable" if
-### start(x[i+1]) == end(x[i])+1. For example, in the following object:
-### 1: .....xxxx.............
-### 2: ...xx.................
-### 3: .........xxx..........
-### 4: ............xxxxxx....
-### 5: ..................x...
-### x[3] and x[4] are stitchable, and x[4] and x[5] are stitchable. So
-### x[3], x[4], and x[5] form a run of "stitchable ranges" that will collapse
-### into the following single range after stitching:
-### .........xxxxxxxxxx...
-### Note that x[1] and x[3] are not stitchable because they are not
-### consecutive vector elements (but they would if we removed x[2]).
+### Collapse runs of "stitchable genomic ranges"
###
-### If 'x' contains genomic ranges (i.e. is a GenomicRanges object), 2 ranges
-### are "stitchable" if, in addition to the above, they are also on the same
+### 2 genomic ranges are "stitchable" if, in addition to be stitchable from
+### an integer ranges point-of-view (see stitch_Ranges() in
+### IRanges/R/IPos-class.R for what that means), they are also on the same
### chromosome and strand.
-### .stitch_GenomicRanges() below takes any GenomicRanges derivative and
+### stitch_GenomicRanges() below takes any GenomicRanges derivative and
### returns a GRanges object (so is NOT an endomorphism).
### Note that this transformation preserves 'sum(width(x))'.
### Also note that this is an "inter range transformation". However unlike
@@ -93,78 +48,83 @@ setMethod("seqinfo", "GPos", function(x) seqinfo(x at pos_runs))
### TODO: Define and export stitch() generic and method for Ranges objects
### in the IRanges package (in inter-range-methods.R). Then make
-### .stitch_GenomicRanges() the "stitch" method for GenomicRanges objects and
-### support the 'ignore.strand' argument.
-### Maybe it would also make sense to have an isStitched() generic like we
-### have isDisjoint() to provide a quick and easy way to check the state of
-### the object before applying the transformation to it. In theory each
-### idempotent inter range transformation could have a "state checker" so
-### maybe add isReduced() too (range() probably doesn't need one).
-.stitch_GenomicRanges <- function(x, drop.empty.ranges=FALSE)
+### stitch_GenomicRanges() and stitch_GPos() the "stitch" methods for
+### GenomicRanges and GPos objects, respectively, and support the
+### 'ignore.strand' argument.
+
+### To be as fast as possible, we don't use internal low-level constructor
+### new_GRanges() and we don't check the new object.
+.new_stitched_GRanges <- function(seqnames, ranges, strand, seqinfo)
+{
+ mcols <- S4Vectors:::make_zero_col_DataFrame(length(ranges))
+ new2("GRanges", seqnames=seqnames,
+ ranges=ranges,
+ strand=strand,
+ elementMetadata=mcols,
+ seqinfo=seqinfo,
+ check=FALSE)
+}
+
+stitch_GenomicRanges <- function(x)
{
if (length(x) == 0L)
- return(granges(x)) # returning GRanges() would loose the seqinfo
+ return(granges(x, use.names=FALSE)) # returning GRanges() would loose
+ # the seqinfo
x_seqnames <- seqnames(x)
x_strand <- strand(x)
x_start <- start(x)
x_end <- end(x)
- new_run <- x_seqnames[-1L] != x_seqnames[-length(x)] |
- x_strand[-1L] != x_strand[-length(x)] |
- Rle(x_start[-1L] != x_end[-length(x)] + 1L)
- new_run_idx <- which(new_run)
- start_idx <- c(1L, new_run_idx + 1L)
- end_idx <- c(new_run_idx, length(x))
-
- ans_ranges <- IRanges(x_start[start_idx], x_end[end_idx])
-
- if (drop.empty.ranges) {
- keep_idx <- which(width(ans_ranges) != 0L)
- ans_ranges <- ans_ranges[keep_idx]
- start_idx <- start_idx[keep_idx]
- }
- ans_seqnames <- x_seqnames[start_idx]
- ans_strand <- x_strand[start_idx]
- ans_mcols <- S4Vectors:::make_zero_col_DataFrame(length(start_idx))
- ans_seqinfo <- seqinfo(x)
-
- ## To be as fast as possible, we don't use internal low-level constructor
- ## new_GRanges() and we don't check the new object.
- new2("GRanges", seqnames=ans_seqnames,
- ranges=ans_ranges,
- strand=ans_strand,
- elementMetadata=ans_mcols,
- seqinfo=ans_seqinfo,
- check=FALSE)
+ ## Find runs of stitchable elements along 'x'.
+ ## Each run is described by the indices of its first ('run_from') and
+ ## last ('run_to') elements in 'x'.
+ ## The runs form a partitioning of 'x'.
+ is_new_run <- x_seqnames[-1L] != x_seqnames[-length(x)] |
+ x_strand[-1L] != x_strand[-length(x)] |
+ Rle(x_start[-1L] != x_end[-length(x)] + 1L)
+ new_run_idx <- which(is_new_run)
+ run_from <- c(1L, new_run_idx + 1L)
+ run_to <- c(new_run_idx, length(x))
+
+ ans_ranges <- IRanges(x_start[run_from], x_end[run_to])
+ ans_seqnames <- x_seqnames[run_from] # same as x_seqnames[run_to]
+ ans_strand <- x_strand[run_from] # same as x_strand[run_to]
+ .new_stitched_GRanges(ans_seqnames, ans_ranges, ans_strand, seqinfo(x))
}
+stitch_GPos <- function(x)
+{
+ if (length(x) == 0L)
+ return(granges(x, use.names=FALSE)) # returning GRanges() would loose
+ # the seqinfo
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Validity
-###
+ x_seqnames <- seqnames(x)
+ x_strand <- strand(x)
+
+ ## Find runs of identical (seqnames, strand) pairs along 'x'.
+ ## The runs are described by IRanges object 'runs'.
+ ## They form a partitioning of 'x'.
+ is_new_run <- x_seqnames[-1L] != x_seqnames[-length(x)] |
+ x_strand[-1L] != x_strand[-length(x)]
+ new_run_idx <- which(is_new_run)
+ run_from <- c(1L, new_run_idx + 1L)
+ run_to <- c(new_run_idx, length(x))
+ runs <- IRanges(run_from, run_to)
+
+ ans_ranges <- IRanges:::extract_pos_runs_by_ranges(x at ranges@pos_runs, runs)
+ breakpoints <- cumsum(width(ans_ranges))
+ ans_seqnames <- x_seqnames[breakpoints]
+ ans_strand <- x_strand[breakpoints]
+ .new_stitched_GRanges(ans_seqnames, ans_ranges, ans_strand, seqinfo(x))
+}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Setters
+### Validity
###
-### Supporting the seqinfo() setter makes the following work out-of-the-box:
-### - The family of seqinfo-related setters: seqlevels(), seqlevelsStyle(),
-### seqlengths(), isCircular(), and genome().
-### - pcompare() and all the binary comparison operators (==, <=, !=,
-### >=, <, >).
-setReplaceMethod("seqinfo", "GPos",
- function(x, new2old=NULL, force=FALSE,
- pruning.mode=c("error", "coarse", "fine", "tidy"),
- value)
- {
- new_pos_runs <- callGeneric(x at pos_runs, new2old=new2old, force=force,
- pruning.mode=pruning.mode, value)
- x at pos_runs <- .stitch_GenomicRanges(new_pos_runs)
- x
- }
-)
+### TODO
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -180,11 +140,12 @@ GPos <- function(pos_runs=GRanges())
suppressWarnings(ans_len <- sum(width(pos_runs)))
if (is.na(ans_len))
stop("too many genomic positions in 'pos_runs'")
+ ans_seqnames <- rep.int(seqnames(pos_runs), width(pos_runs))
+ ans_ranges <- IPos(ranges(pos_runs))
+ ans_strand <- rep.int(strand(pos_runs), width(pos_runs))
ans_mcols <- S4Vectors:::make_zero_col_DataFrame(ans_len)
- ans_pos_runs <- .stitch_GenomicRanges(pos_runs, drop.empty.ranges=TRUE)
- new2("GPos", pos_runs=ans_pos_runs,
- elementMetadata=ans_mcols,
- metadata=pos_runs at metadata,
+ new2("GPos", seqnames=ans_seqnames, ranges=ans_ranges, strand=ans_strand,
+ elementMetadata=ans_mcols, seqinfo=seqinfo(pos_runs),
check=FALSE)
}
@@ -193,20 +154,40 @@ GPos <- function(pos_runs=GRanges())
### Coercion
###
-.from_GenomicRanges_to_GPos <- function(from)
+.from_GRanges_to_GPos <- function(from)
{
if (!all(width(from) == 1L))
- stop(wmsg("all the ranges in the ", class(from), " object to ",
- "coerce to GPos must have a width of 1"))
- if (!is.null(names(from)))
+ stop(wmsg("all the ranges in the object to coerce to GPos ",
+ "must have a width of 1"))
+ if (!is.null(names(from))) {
+ names(from) <- NULL
warning(wmsg("because a GPos object cannot hold them, the names ",
- "on the ", class(from), " object couldn't be ",
- "propagated during its coercion to GPos"))
- ans <- GPos(from)
- mcols(ans) <- mcols(from)
- ans
+ "on the object to coerce to GPos couldn't be ",
+ "propagated by the coercion"))
+ }
+ class(from) <- "GPos" # temporarily broken GRanges instance!
+ from at ranges <- as(from at ranges, "IPos") # now fixed :-)
+ from
+}
+setAs("GRanges", "GPos", .from_GRanges_to_GPos)
+
+setAs("ANY", "GPos", function(from) .from_GRanges_to_GPos(as(from, "GRanges")))
+
+### Because we implemented the 'strict' argument we cannot use setAs().
+### 'to' is ignored but we must have it in the signature otherwise the call
+### to setMethod("coerce") below will complain.
+.from_GPos_to_GRanges <- function(from, to="GRanges", strict=TRUE)
+{
+ if (!isTRUEorFALSE(strict))
+ stop("'strict' must be TRUE or FALSE")
+ if (!strict)
+ return(from)
+ class(from) <- "GRanges" # temporarily broken GRanges instance!
+ from at ranges <- as(from at ranges, "IRanges") # now fixed :-)
+ from
}
-setAs("GenomicRanges", "GPos", .from_GenomicRanges_to_GPos)
+#setAs("GPos", "GRanges", .from_GPos_to_GRanges)
+setMethod("coerce", c("GPos", "GRanges"), .from_GPos_to_GRanges)
### The "as.data.frame" method for GenomicRanges objects works on a GPos
### object but returns a data.frame with identical "start" and "end" columns,
@@ -228,45 +209,36 @@ setMethod("as.data.frame", "GPos",
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Subsetting
+### updateObject()
+###
+### Internal representation of GPos objects has changed in GenomicRanges
+### 1.29.10 (Bioc 3.6).
###
-setMethod("extractROWS", "GPos",
- function(x, i)
- {
- i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE)
- ## TODO: Maybe make this the coercion method from NSBS to Ranges.
- if (is(i, "RangesNSBS")) {
- ir <- i at subscript
- ir <- ir[width(ir) != 0L]
- } else {
- ir <- as(as.integer(i), "IRanges")
- }
- map <- S4Vectors:::map_ranges_to_runs(width(x at pos_runs),
- start(ir), width(ir))
- ## Because 'ir' has no zero-width ranges, 'spanned_nrun' cannot
- ## contain zeroes and so 'Ltrim' and 'Rtrim' cannot contain garbbage.
- offset_nrun <- map[[1L]]
- spanned_nrun <- map[[2L]]
- Ltrim <- map[[3L]]
- Rtrim <- map[[4L]]
- run_idx <- S4Vectors:::fancy_mseq(spanned_nrun, offset_nrun)
- new_pos_runs <- x at pos_runs[run_idx]
- if (length(run_idx) != 0L) {
- Rtrim_idx <- cumsum(spanned_nrun)
- Ltrim_idx <- c(1L, Rtrim_idx[-length(Rtrim_idx)] + 1L)
- trimmed_start <- start(new_pos_runs)[Ltrim_idx] + Ltrim
- trimmed_end <- end(new_pos_runs)[Rtrim_idx] - Rtrim
- start(new_pos_runs)[Ltrim_idx] <- trimmed_start
- end(new_pos_runs)[Rtrim_idx] <- trimmed_end
- suppressWarnings(new_len <- sum(width(new_pos_runs)))
- if (is.na(new_len))
- stop("subscript is too big")
- }
- x at pos_runs <- .stitch_GenomicRanges(new_pos_runs)
- mcols(x) <- extractROWS(mcols(x), i)
- x
- }
+.get_GPos_version <- function(object)
+{
+ if (.hasSlot(object, "pos_runs")) "< 1.29.10" else "current"
+}
+
+setMethod("updateObject", "GPos",
+ function(object, ..., verbose=FALSE)
+ {
+ version <- .get_GPos_version(object)
+ if (version == "current") {
+ if (verbose)
+ message("[updateObject] Internal representation of ",
+ class(object), " object is current.\n",
+ "[updateObject] Nothing to update.")
+ return(object)
+ }
+ if (verbose)
+ message("[updateObject] ", class(object), " object uses ",
+ "internal representation from GenomicRanges\n",
+ "[updateObject] ", version, ". Updating it ...")
+ object <- GPos(object at pos_runs)
+ metadata(object) <- metadata(object)
+ object
+ }
)
@@ -293,6 +265,12 @@ setMethod("extractROWS", "GPos",
show_GPos <- function(x, margin="",
print.classinfo=FALSE, print.seqinfo=FALSE)
{
+ version <- .get_GPos_version(x)
+ if (version != "current")
+ stop(class(x), " object uses internal representation from ",
+ "GenomicRanges ", version, "\n and cannot be displayed or ",
+ "used. Please update it with:\n",
+ " x <- updateObject(x, verbose=TRUE)")
x_class <- class(x)
x_len <- length(x)
x_mcols <- mcols(x)
@@ -337,94 +315,3 @@ setMethod("show", "GPos",
print.classinfo=TRUE, print.seqinfo=TRUE)
)
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Combining
-###
-### Note that supporting "[" and "c" makes "[<-" work out-of-the-box!
-###
-
-### 'Class' must be "GPos" or the name of a concrete GPos subclass.
-### 'objects' must be a list of GPos objects.
-### Returns an instance of class 'Class'.
-combine_GPos_objects <- function(Class, objects,
- use.names=TRUE, ignore.mcols=FALSE)
-{
- if (!isSingleString(Class))
- stop("'Class' must be a single character string")
- if (!extends(Class, "GPos"))
- stop("'Class' must be the name of a class that extends GPos")
- if (!is.list(objects))
- stop("'objects' must be a list")
- if (!isTRUEorFALSE(use.names))
- stop("'use.names' must be TRUE or FALSE")
- ### TODO: Support 'use.names=TRUE'.
- if (use.names)
- stop("'use.names=TRUE' is not supported yet")
- if (!isTRUEorFALSE(ignore.mcols))
- stop("'ignore.mcols' must be TRUE or FALSE")
-
- if (length(objects) != 0L) {
- ## TODO: Implement (in C) fast 'elementIsNull(objects)' in S4Vectors
- ## that does 'sapply(objects, is.null, USE.NAMES=FALSE)', and use it
- ## here.
- null_idx <- which(sapply(objects, is.null, USE.NAMES=FALSE))
- if (length(null_idx) != 0L)
- objects <- objects[-null_idx]
- }
- if (length(objects) == 0L)
- return(new(Class))
-
- ## TODO: Implement (in C) fast 'elementIs(objects, class)' in S4Vectors
- ## that does 'sapply(objects, is, class, USE.NAMES=FALSE)', and use it
- ## here. 'elementIs(objects, "NULL")' should work and be equivalent to
- ## 'elementIsNull(objects)'.
- if (!all(sapply(objects, is, Class, USE.NAMES=FALSE)))
- stop("the objects to combine must be ", Class, " objects (or NULLs)")
- objects_names <- names(objects)
- names(objects) <- NULL # so lapply(objects, ...) below returns an
- # unnamed list
-
- ## Combine "pos_runs" slots.
- pos_runs_slots <- lapply(objects, function(x) x at pos_runs)
- ## TODO: Use combine_GRanges_objects() here when it's available.
- ans_pos_runs <- .stitch_GenomicRanges(do.call(c, pos_runs_slots))
-
- suppressWarnings(ans_len <- sum(width(ans_pos_runs)))
- if (is.na(ans_len))
- stop("too many genomic positions to combine")
-
- ## Combine "mcols" slots. We don't need to use fancy
- ## S4Vectors:::rbind_mcols() for this because the "mcols" slot of a
- ## GPos object is guaranteed to be a DataFrame.
- if (ignore.mcols) {
- ans_mcols <- S4Vectors:::make_zero_col_DataFrame(ans_len)
- } else {
- mcols_slots <- lapply(objects, function(x) x at elementMetadata)
- ## Will fail if not all the GPos objects in 'objects' have
- ## exactly the same metadata cols.
- ans_mcols <- do.call(rbind, mcols_slots)
- }
-
- ## Make 'ans' and return it.
- new2(Class, pos_runs=ans_pos_runs, elementMetadata=ans_mcols, check=FALSE)
-}
-
-setMethod("c", "GPos",
- function (x, ..., ignore.mcols=FALSE, recursive=FALSE)
- {
- if (!identical(recursive, FALSE))
- stop("\"c\" method for GPos objects ",
- "does not support the 'recursive' argument")
- if (missing(x)) {
- objects <- list(...)
- x <- objects[[1L]]
- } else {
- objects <- list(x, ...)
- }
- combine_GPos_objects(class(x), objects,
- use.names=FALSE,
- ignore.mcols=ignore.mcols)
- }
-)
-
diff --git a/R/GRanges-class.R b/R/GRanges-class.R
index b85582b..b197c95 100644
--- a/R/GRanges-class.R
+++ b/R/GRanges-class.R
@@ -3,11 +3,14 @@
### -------------------------------------------------------------------------
###
+
+setClassUnion("IRanges_OR_IPos", c("IRanges", "IPos"))
+
setClass("GRanges",
contains="GenomicRanges",
representation(
seqnames="Rle",
- ranges="IRanges",
+ ranges="IRanges_OR_IPos", # an IPos only for GPos
strand="Rle",
elementMetadata="DataFrame",
seqinfo="Seqinfo"
@@ -207,9 +210,33 @@ setMethod("updateObject", "GRanges",
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Coercion
+### Accessors
###
+setMethod("seqnames", "GRanges", function(x) x at seqnames)
+
+setMethod("strand", "GRanges", function(x) x at strand)
+
+setMethod("seqinfo", "GRanges", function(x) x at seqinfo)
+
+### Range squeezer.
+setMethod("ranges", "GRanges",
+ function(x, use.names=TRUE, use.mcols=FALSE)
+ {
+ if (!isTRUEorFALSE(use.names))
+ stop("'use.names' must be TRUE or FALSE")
+ if (!isTRUEorFALSE(use.mcols))
+ stop("'use.mcols' must be TRUE or FALSE")
+ ans <- x at ranges
+ if (!use.names)
+ names(ans) <- NULL
+ if (use.mcols)
+ mcols(ans) <- mcols(x)
+ ans
+ }
+)
+
+### Genomic range squeezer.
setMethod("granges", "GenomicRanges",
function(x, use.names=TRUE, use.mcols=FALSE)
{
@@ -225,6 +252,11 @@ setMethod("granges", "GenomicRanges",
}
)
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Coercion
+###
+
setAs("GenomicRanges", "GRanges",
function(from) granges(from, use.mcols=TRUE)
)
@@ -237,9 +269,11 @@ setAs("GenomicRanges", "GRanges",
"does not support NAs"))
error_msg <- wmsg(
"The character vector to convert to a GRanges object must contain ",
- "strings of the form \"chr1:2501-2800\" or \"chr1:2501-2800:+\" ",
- "(\"..\" being also supported as a separator between the start and ",
- "end positions). Strand can be \"+\", \"-\", \"*\", or missing."
+ "strings of the form \"chr:start-end\" or \"chr:start-end:strand\", ",
+ "with end >= start - 1, or \"chr:pos\" or \"chr:pos:strand\". ",
+ "For example: \"chr1:2501-2900\", \"chr1:2501-2900:+\", or ",
+ "\"chr1:740\". Note that \"..\" is a valid alternate start/end ",
+ "separator. Strand can be \"+\", \"-\", \"*\", or missing."
)
split0 <- CharacterList(strsplit(from, ":", fixed=TRUE))
split0_eltNROWS <- elementNROWS(split0)
@@ -249,20 +283,13 @@ setAs("GenomicRanges", "GRanges",
ans_strand[is.na(ans_strand)] <- "*"
split1 <- phead(split0, n=2L)
ans_seqnames <- as.character(phead(split1, n=1L))
- ranges <- as.character(ptail(split1, n=-1L))
- ## We want to split on the first occurence of "-" that is preceeded by
- ## a digit (ignoring and removing the spaces in between if any).
- ranges <- sub("([[:digit:]])[[:space:]]*-", "\\1..", ranges)
- split2 <- CharacterList(strsplit(ranges, "..", fixed=TRUE))
- split2_eltNROWS <- elementNROWS(split2)
- if (!all(split2_eltNROWS == 2L))
+ ranges <- ptail(split1, n=-1L)
+ ranges <- setNames(as.character(ranges), names(ranges))
+ ans_ranges <- try(as(ranges, "IRanges"), silent=TRUE)
+ if (is(ans_ranges, "try-error"))
stop(error_msg)
- ans_start <- as.integer(phead(split2, n=1L))
- ans_end <- as.integer(ptail(split2, n=1L))
- ans_ranges <- IRanges(ans_start, ans_end, names=names(from))
GRanges(ans_seqnames, ans_ranges, ans_strand)
}
-
setAs("character", "GRanges", .from_character_to_GRanges)
.from_factor_to_GRanges <- function(from)
@@ -270,7 +297,6 @@ setAs("character", "GRanges", .from_character_to_GRanges)
from <- setNames(as.character(from), names(from))
.from_character_to_GRanges(from)
}
-
setAs("factor", "GRanges", .from_factor_to_GRanges)
### Does NOT propagate the ranges names and metadata columns i.e. always
@@ -340,30 +366,6 @@ setAs("ANY", "GenomicRanges", function(from) as(from, "GRanges"))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Slot getters and setters
-###
-
-setMethod("seqnames", "GRanges", function(x) x at seqnames)
-setMethod("ranges", "GRanges",
- function(x, use.names=TRUE, use.mcols=FALSE)
- {
- if (!isTRUEorFALSE(use.names))
- stop("'use.names' must be TRUE or FALSE")
- if (!isTRUEorFALSE(use.mcols))
- stop("'use.mcols' must be TRUE or FALSE")
- ans <- x at ranges
- if (!use.names)
- names(ans) <- NULL
- if (use.mcols)
- mcols(ans) <- mcols(x)
- ans
- }
-)
-setMethod("strand", "GRanges", function(x) x at strand)
-setMethod("seqinfo", "GRanges", function(x) x at seqinfo)
-
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Subsetting
###
diff --git a/R/GRangesList-class.R b/R/GRangesList-class.R
index 39dbc63..ce37b72 100644
--- a/R/GRangesList-class.R
+++ b/R/GRangesList-class.R
@@ -14,7 +14,9 @@ setClass("GRangesList",
)
)
-setClassUnion("GenomicRangesORGRangesList", c("GenomicRanges", "GRangesList"))
+### Note that rtracklayer also defines GenomicRanges_OR_GenomicRangesList.
+### Do we need the 2 union classes?
+setClassUnion("GenomicRanges_OR_GRangesList", c("GenomicRanges", "GRangesList"))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -63,7 +65,7 @@ GRangesList <- function(...)
stop("all elements in '...' must be GRanges objects")
unlistData <- suppressWarnings(do.call("c", unname(listData)))
}
- relist(unlistData, PartitioningByEnd(listData))
+ relist(unlistData, listData)
}
### Typically, the field values will come from a file that needs to be loaded
@@ -369,7 +371,7 @@ setMethod("seqinfo", "GRangesList", function(x) seqinfo(x at unlistData))
### NOT exported but used in GenomicAlignments package.
set_GRangesList_seqinfo <-
- function(x, new2old=NULL, force=FALSE,
+ function(x, new2old=NULL,
pruning.mode=c("error", "coarse", "fine", "tidy"),
value)
{
@@ -377,7 +379,7 @@ set_GRangesList_seqinfo <-
if (!is(value, "Seqinfo"))
stop("the supplied 'seqinfo' must be a Seqinfo object")
dangling_seqlevels <- GenomeInfoDb:::getDanglingSeqlevels(x,
- new2old=new2old, force=force,
+ new2old=new2old,
pruning.mode=pruning.mode,
seqlevels(value))
if (length(dangling_seqlevels) != 0L) {
@@ -583,6 +585,13 @@ showList <- function(object, showFunction, print.classinfo)
}
setMethod("show", "GRangesList",
- function(object) showList(object, show_GenomicRanges, TRUE)
+ function(object)
+ {
+ if (is(object at unlistData, "GPos"))
+ showFunction <- show_GPos
+ else
+ showFunction <- show_GenomicRanges
+ showList(object, showFunction, print.classinfo=TRUE)
+ }
)
diff --git a/R/GenomicRanges-class.R b/R/GenomicRanges-class.R
index 1973249..3a58a57 100644
--- a/R/GenomicRanges-class.R
+++ b/R/GenomicRanges-class.R
@@ -14,7 +14,7 @@ setClass("GenomicRanges",
)
)
-setClassUnion("GenomicRangesORmissing", c("GenomicRanges", "missing"))
+setClassUnion("GenomicRanges_OR_missing", c("GenomicRanges", "missing"))
### The code in this file will work out-of-the-box on 'x' as long as
### seqnames(x), ranges(x), strand(x), seqlengths(x), seqinfo(),
@@ -140,8 +140,8 @@ make_out_of_bound_warning_msg <- function(x, idx, suggest.trim)
.valid.GenomicRanges.ranges <- function(x)
{
- if (class(ranges(x)) != "IRanges")
- return("'ranges(x)' must be an IRanges instance")
+ if (!(class(ranges(x)) %in% c("IRanges", "IPos")))
+ return("'ranges(x)' must be an IRanges or IPos instance")
NULL
}
@@ -231,7 +231,7 @@ setMethod("as.character", "GenomicRanges",
stop(wmsg("'ignore.strand' must be TRUE or FALSE"))
if (length(x) == 0L)
return(setNames(character(0), names(x)))
- ans <- paste0(seqnames(x), ":", start(x), "-", end(x))
+ ans <- paste0(seqnames(x), ":", as.character(ranges(x)))
names(ans) <- names(x)
if (ignore.strand)
return(ans)
@@ -362,26 +362,10 @@ setReplaceMethod("ranges", "GenomicRanges",
}
)
-normargGenomicRangesStrand <- function(strand, n)
-{
- if (!is(strand, "Rle"))
- strand <- Rle(strand)
- if (!is.factor(runValue(strand))
- || !identical(levels(runValue(strand)), levels(strand())))
- runValue(strand) <- strand(runValue(strand))
- k <- length(strand)
- if (k != n) {
- if (k != 1L && (k == 0L || k > n || n %% k != 0L))
- stop("supplied 'strand' has ", k, " elements (", n, " expected)")
- strand <- rep(strand, length.out=n)
- }
- strand
-}
-
setReplaceMethod("strand", "GenomicRanges",
function(x, value)
{
- value <- normargGenomicRangesStrand(value, length(x))
+ value <- normalize_strand_replacement_value(value, x)
x <- update(x, strand=value, check=FALSE)
msg <- .valid.GenomicRanges.strand(x)
if (!is.null(msg))
@@ -393,7 +377,7 @@ setReplaceMethod("strand", "GenomicRanges",
### Does NOT suppoprt pruning mode "fine". Pruning modes "coarse" and "tidy"
### are equivalent on a GenomicRanges object.
set_GenomicRanges_seqinfo <-
- function(x, new2old=NULL, force=FALSE,
+ function(x, new2old=NULL,
pruning.mode=c("error", "coarse", "fine", "tidy"),
value)
{
@@ -404,7 +388,7 @@ set_GenomicRanges_seqinfo <-
if (!is(value, "Seqinfo"))
stop("the supplied 'seqinfo' must be a Seqinfo object")
dangling_seqlevels <- GenomeInfoDb:::getDanglingSeqlevels(x,
- new2old=new2old, force=force,
+ new2old=new2old,
pruning.mode=pruning.mode,
seqlevels(value))
if (length(dangling_seqlevels) != 0L) {
diff --git a/R/GenomicRanges-comparison.R b/R/GenomicRanges-comparison.R
index a039f13..8a93253 100644
--- a/R/GenomicRanges-comparison.R
+++ b/R/GenomicRanges-comparison.R
@@ -187,12 +187,10 @@ setMethod("selfmatch", "GenomicRanges",
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### order() and related methods.
###
-### The "order" and "rank" methods for GenomicRanges objects are consistent
-### with the order implied by pcompare().
+### is.unsorted(), order(), sort(), rank() on GenomicRanges derivatives are
+### consistent with the order implied by pcompare().
### is.unsorted() is a quick/cheap way of checking whether a GenomicRanges
-### object is already sorted, e.g., called prior to a costly sort.
-### sort() will work out-of-the-box on a GenomicRanges object thanks to the
-### method for Vector objects.
+### derivative is already sorted, e.g., called prior to a costly sort.
###
.GenomicRanges_as_IntegerQuads <- function(x, ignore.strand=FALSE)
diff --git a/R/RangedData-methods.R b/R/RangedData-methods.R
index 039c54c..a460733 100644
--- a/R/RangedData-methods.R
+++ b/R/RangedData-methods.R
@@ -18,13 +18,6 @@ setMethod("seqinfo", "List", function(x) {
si
})
-setMethod("seqinfo", "RangesList", function(x) {
- si <- callNextMethod()
- if (!is.null(universe(x)))
- genome(si) <- universe(x)
- si
-})
-
### FIXME: needs sanity checks
setReplaceMethod("seqinfo", "List",
function(x, value) {
diff --git a/R/findOverlaps-methods.R b/R/findOverlaps-methods.R
index 91f5c79..2343fa7 100644
--- a/R/findOverlaps-methods.R
+++ b/R/findOverlaps-methods.R
@@ -8,7 +8,7 @@
###
findOverlaps_GenomicRanges <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
ignore.strand=FALSE)
@@ -30,12 +30,15 @@ setMethod("findOverlaps", c("GenomicRanges", "GenomicRanges"),
### "findOverlaps" methods for GRangesList objects
###
-.overlap_score <- function(hits, query, subject)
+.overlapwidth <- function(hits, query, subject)
{
q_ranges <- ranges(query)[queryHits(hits)]
s_ranges <- ranges(subject)[subjectHits(hits)]
- 1L + pmin.int(end(q_ranges), end(s_ranges)) -
- pmax.int(start(q_ranges), start(s_ranges))
+ ## TODO: Replace the code below by a call to
+ ## poverlapWidth(q_ranges, s_ranges) when it's available.
+ score <- pmin.int(end(q_ranges), end(s_ranges)) -
+ pmax.int(start(q_ranges), start(s_ranges)) + 1L
+ pmax.int(score, 0L)
}
.aggregated_sum <- function(x, f1, f2)
@@ -45,13 +48,13 @@ setMethod("findOverlaps", c("GenomicRanges", "GenomicRanges"),
}
setMethod("findOverlaps", c("GRangesList", "GRangesList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within"),
select=c("all", "first", "last", "arbitrary"),
ignore.strand=FALSE)
{
- if (!isSingleNumber(maxgap) || maxgap < 0)
- stop("'maxgap' must be a non-negative integer")
+ if (!isSingleNumber(minoverlap) || minoverlap < 0L)
+ stop("'minoverlap' must be a single non-negative integer")
type <- match.arg(type)
select <- match.arg(select)
@@ -75,11 +78,12 @@ setMethod("findOverlaps", c("GRangesList", "GRangesList"),
type=type, select="all",
ignore.strand=ignore.strand)
- if (minoverlap > 1L) {
- score <- .overlap_score(ans00, unlisted_query, unlisted_subject)
- score <- .aggregated_sum(score, query_groups[queryHits(ans00)],
- subject_groups[subjectHits(ans00)])
- mcols(ans00) <- DataFrame(score=score)
+ if (minoverlap > 0L) {
+ owidth <- .overlapwidth(ans00, unlisted_query, unlisted_subject)
+ owidth <- .aggregated_sum(owidth,
+ query_groups[queryHits(ans00)],
+ subject_groups[subjectHits(ans00)])
+ mcols(ans00) <- DataFrame(owidth=owidth)
}
if (type == "within") {
ans01 <- remapHits(ans00, Rnodes.remapping=subject_groups,
@@ -97,8 +101,8 @@ setMethod("findOverlaps", c("GRangesList", "GRangesList"),
Rnodes.remapping=subject_groups,
new.nRnode=length(subject))
}
- if (minoverlap > 1L) {
- keep_idx <- which(mcols(ans)[ , "score"] >= minoverlap)
+ if (minoverlap > 0L) {
+ keep_idx <- which(mcols(ans)[ , "owidth"] >= minoverlap)
mcols(ans) <- NULL
ans <- ans[keep_idx]
}
@@ -107,13 +111,13 @@ setMethod("findOverlaps", c("GRangesList", "GRangesList"),
)
setMethod("findOverlaps", c("GRangesList", "GenomicRanges"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within"),
select=c("all", "first", "last", "arbitrary"),
ignore.strand=FALSE)
{
- if (!isSingleNumber(maxgap) || maxgap < 0)
- stop("'maxgap' must be a non-negative integer")
+ if (!isSingleNumber(minoverlap) || minoverlap < 0L)
+ stop("'minoverlap' must be a single non-negative integer")
type <- match.arg(type)
select <- match.arg(select)
@@ -125,11 +129,12 @@ setMethod("findOverlaps", c("GRangesList", "GenomicRanges"),
type=type, select="all",
ignore.strand=ignore.strand)
- if (minoverlap > 1L) {
- score <- .overlap_score(ans00, unlisted_query, subject)
- score <- .aggregated_sum(score, query_groups[queryHits(ans00)],
- subjectHits(ans00))
- mcols(ans00) <- DataFrame(score=score)
+ if (minoverlap > 0L) {
+ owidth <- .overlapwidth(ans00, unlisted_query, subject)
+ owidth <- .aggregated_sum(owidth,
+ query_groups[queryHits(ans00)],
+ subjectHits(ans00))
+ mcols(ans00) <- DataFrame(owidth=owidth)
}
if (type == "within") {
ans10 <- remapHits(ans00, Lnodes.remapping=query_groups,
@@ -143,8 +148,8 @@ setMethod("findOverlaps", c("GRangesList", "GenomicRanges"),
ans <- remapHits(ans00, Lnodes.remapping=query_groups,
new.nLnode=length(query))
}
- if (minoverlap > 1L) {
- keep_idx <- which(mcols(ans)[ , "score"] >= minoverlap)
+ if (minoverlap > 0L) {
+ keep_idx <- which(mcols(ans)[ , "owidth"] >= minoverlap)
mcols(ans) <- NULL
ans <- ans[keep_idx]
}
@@ -153,13 +158,13 @@ setMethod("findOverlaps", c("GRangesList", "GenomicRanges"),
)
setMethod("findOverlaps", c("GenomicRanges", "GRangesList"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within"),
select=c("all", "first", "last", "arbitrary"),
ignore.strand=FALSE)
{
- if (!isSingleNumber(maxgap) || maxgap < 0)
- stop("'maxgap' must be a non-negative integer")
+ if (!isSingleNumber(minoverlap) || minoverlap < 0L)
+ stop("'minoverlap' must be a single non-negative integer")
type <- match.arg(type)
select <- match.arg(select)
@@ -181,16 +186,17 @@ setMethod("findOverlaps", c("GenomicRanges", "GRangesList"),
type=type, select="all",
ignore.strand=ignore.strand)
- if(minoverlap > 1L) {
- score <- .overlap_score(ans00, query, unlisted_subject)
- score <- .aggregated_sum(score, queryHits(ans00),
- subject_groups[subjectHits(ans00)])
- mcols(ans00) <- DataFrame(score=score)
+ if(minoverlap > 0L) {
+ owidth <- .overlapwidth(ans00, query, unlisted_subject)
+ owidth <- .aggregated_sum(owidth,
+ queryHits(ans00),
+ subject_groups[subjectHits(ans00)])
+ mcols(ans00) <- DataFrame(owidth=owidth)
}
ans <- remapHits(ans00, Rnodes.remapping=subject_groups,
new.nRnode=length(subject))
- if (minoverlap > 1L) {
- keep_idx <- which(mcols(ans)[ , "score"] >= minoverlap)
+ if (minoverlap > 0L) {
+ keep_idx <- which(mcols(ans)[ , "owidth"] >= minoverlap)
mcols(ans) <- NULL
ans <- ans[keep_idx]
}
@@ -200,7 +206,7 @@ setMethod("findOverlaps", c("GenomicRanges", "GRangesList"),
### Needed by chipseq package.
setMethod("findOverlaps", c("RangedData", "GenomicRanges"),
- function(query, subject, maxgap=0L, minoverlap=1L,
+ function(query, subject, maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within"),
select=c("all", "first", "last", "arbitrary"),
ignore.strand=FALSE)
@@ -220,7 +226,7 @@ setMethod("findOverlaps", c("RangedData", "GenomicRanges"),
### -------------------------------------------------------------------------
countOverlaps_GenomicRanges <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
ignore.strand=FALSE)
{
@@ -237,3 +243,17 @@ setMethod("countOverlaps", c("GenomicRanges", "GenomicRanges"),
countOverlaps_GenomicRanges
)
+compatibleStrand <- function(a, b) {
+ a == "*" | b == "*" | a == b
+}
+
+setMethod("poverlaps", c("GenomicRanges", "GenomicRanges"),
+ function(query, subject, maxgap=0L, minoverlap=1L,
+ type=c("any", "start", "end", "within", "equal"),
+ ignore.strand=FALSE)
+{
+ seqnames(query) == seqnames(subject) &
+ (if (ignore.strand) TRUE
+ else compatibleStrand(strand(query), strand(subject))) &
+ poverlaps(ranges(query), ranges(subject), maxgap, minoverlaps, type)
+})
diff --git a/R/genomic-range-squeezers.R b/R/genomic-range-squeezers.R
new file mode 100644
index 0000000..f4a8f33
--- /dev/null
+++ b/R/genomic-range-squeezers.R
@@ -0,0 +1,28 @@
+### =========================================================================
+### Generic functions for squeezing the genomic ranges out of a range-based
+### object
+### -------------------------------------------------------------------------
+
+
+### Extract the genomic ranges as a GRanges object.
+setGeneric("granges", signature="x",
+ function(x, use.names=TRUE, use.mcols=FALSE, ...)
+ standardGeneric("granges")
+)
+
+### Extract the genomic ranges as a GRangesList object.
+setGeneric("grglist", signature="x",
+ function(x, use.names=TRUE, use.mcols=FALSE, ...)
+ standardGeneric("grglist")
+)
+
+### Pairs method.
+setMethod("grglist", "Pairs", function(x, use.names=TRUE, use.mcols=FALSE) {
+ stopifnot(isTRUEorFALSE(use.mcols))
+ grl <- zipup(granges(first(x)), granges(second(x)))
+ if (!use.mcols) {
+ mcols(grl) <- NULL
+ }
+ grl
+ })
+
diff --git a/R/inter-range-methods.R b/R/inter-range-methods.R
index 3f00711..97f9a13 100644
--- a/R/inter-range-methods.R
+++ b/R/inter-range-methods.R
@@ -232,7 +232,7 @@ setMethod("range", "GenomicRanges",
### Like the above method, return a GRanges instance.
setMethod("range", "GPos",
function(x, ..., with.revmap=FALSE, ignore.strand=FALSE, na.rm=FALSE)
- callGeneric(x at pos_runs, ...,
+ callGeneric(stitch_GPos(x), ...,
with.revmap=with.revmap, ignore.strand=ignore.strand,
na.rm=na.rm)
)
@@ -373,9 +373,10 @@ setMethod("isDisjoint", "GenomicRanges",
all(callGeneric(rgl))
}
)
+
### Overwrite above method with optimized method for GPos objects.
setMethod("isDisjoint", "GPos",
- function(x, ignore.strand=FALSE) callGeneric(x at pos_runs, ignore.strand)
+ function(x, ignore.strand=FALSE) callGeneric(stitch_GPos(x), ignore.strand)
)
setMethod("isDisjoint", "GRangesList",
diff --git a/R/intra-range-methods.R b/R/intra-range-methods.R
index 0ce5d8b..6251721 100644
--- a/R/intra-range-methods.R
+++ b/R/intra-range-methods.R
@@ -15,18 +15,7 @@ setMethod("shift", "GenomicRanges",
function(x, shift=0L, use.names=TRUE)
{
new_ranges <- shift(ranges(x), shift=shift, use.names=use.names)
- ranges(x) <- new_ranges
- x
- }
-)
-
-### Overwrite above method with optimized method for GPos objects.
-### A GPos object cannot hold names so the 'use.names' arg has no effect.
-setMethod("shift", "GPos",
- function(x, shift=0L, use.names=TRUE)
- {
- x at pos_runs <- callGeneric(x at pos_runs, shift=shift)
- x
+ update(x, ranges=new_ranges)
}
)
diff --git a/R/nearest-methods.R b/R/nearest-methods.R
index f1c5144..9fb2617 100644
--- a/R/nearest-methods.R
+++ b/R/nearest-methods.R
@@ -243,10 +243,10 @@ setMethod("follow", c("GenomicRanges", "missing"),
{
## overlapping ranges
if (drop.self) {
- ol <- findOverlaps(x, minoverlap=0L, select=select,
+ ol <- findOverlaps(x, maxgap=0L, select=select,
ignore.strand=ignore.strand, drop.self=TRUE)
} else {
- ol <- findOverlaps(x, subject, minoverlap=0L, select=select,
+ ol <- findOverlaps(x, subject, maxgap=0L, select=select,
ignore.strand=ignore.strand)
}
@@ -408,6 +408,36 @@ setMethod("distanceToNearest", c("GenomicRanges", "missing"),
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### precedes() and follows()
+###
+
+.normBounds <- function(x) {
+ if (is(x, "GRangesList")) {
+ x <- range(x)
+ if (all(lengths(x) == 1L)) {
+ x <- unlist(x)
+ } else {
+ stop("operation undefined when ranges cross seqnames and strands")
+ }
+ }
+ x
+}
+
+precedes <- function(x, y) {
+ x <- .normBounds(x)
+ y <- .normBounds(y)
+ seqnames(x) == seqnames(y) &
+ ifelse(strand(y) == "-", start(x) > end(y), end(x) < start(y))
+}
+
+follows <- function(x, y) {
+ x <- .normBounds(x)
+ y <- .normBounds(y)
+ seqnames(x) == seqnames(y) &
+ ifelse(strand(y) == "-", end(x) < start(y), start(x) > end(y))
+}
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Find 'k' nearest neighbors
###
### FIXME: Largely untested code; unexported for now
diff --git a/R/range-squeezers.R b/R/range-squeezers.R
deleted file mode 100644
index 05ae309..0000000
--- a/R/range-squeezers.R
+++ /dev/null
@@ -1,49 +0,0 @@
-### =========================================================================
-### Generic functions for squeezing the ranges out of a range-based object
-### -------------------------------------------------------------------------
-
-
-### Extract the ranges as a GRanges object.
-setGeneric("granges", signature="x",
- function(x, use.names=TRUE, use.mcols=FALSE, ...)
- standardGeneric("granges")
-)
-
-### Extract the ranges as a GRangesList object.
-setGeneric("grglist", signature="x",
- function(x, use.names=TRUE, use.mcols=FALSE, ...)
- standardGeneric("grglist")
-)
-
-### Extract the ranges as a RangesList object.
-### TODO: This one should probably be in IRanges together with ranges(), which
-### is another range-squeezer.
-### TODO: For consistency the ranges() generic should also get the 'use.mcols'
-### arg with default to FALSE.
-setGeneric("rglist", signature="x",
- function(x, use.names=TRUE, use.mcols=FALSE, ...)
- standardGeneric("rglist")
-)
-
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Pairs methods
-###
-
-setMethod("grglist", "Pairs", function(x, use.names=TRUE, use.mcols=FALSE) {
- stopifnot(isTRUEorFALSE(use.mcols))
- grl <- zipup(granges(first(x)), granges(second(x)))
- if (!use.mcols) {
- mcols(grl) <- NULL
- }
- grl
- })
-
-setMethod("rglist", "Pairs", function(x, use.names=TRUE, use.mcols=FALSE) {
- stopifnot(isTRUEorFALSE(use.mcols))
- rl <- zipup(ranges(first(x)), ranges(second(x)))
- if (!use.mcols) {
- mcols(rl) <- NULL
- }
- rl
- })
-
diff --git a/R/setops-methods.R b/R/setops-methods.R
index 07f54ed..7e0db24 100644
--- a/R/setops-methods.R
+++ b/R/setops-methods.R
@@ -75,7 +75,7 @@
###
### Always return a GRanges *instance* whatever GenomicRanges derivatives are
-### passed to it (e.g. GNCList or GPos), so does NOT act like an endomorphism
+### passed to it (e.g. GPos or GNCList), so does NOT act like an endomorphism
### in general.
setMethod("union", c("GenomicRanges", "GenomicRanges"),
function(x, y, ignore.strand=FALSE)
@@ -102,7 +102,7 @@ setMethod("union", c("GRangesList", "GRangesList"),
)
### Always return a GRanges *instance* whatever GenomicRanges derivatives are
-### passed to it (e.g. GNCList or GPos), so does NOT act like an endomorphism
+### passed to it (e.g. GPos or GNCList), so does NOT act like an endomorphism
### in general.
setMethod("intersect", c("GenomicRanges", "GenomicRanges"),
function(x, y, ignore.strand=FALSE)
@@ -138,7 +138,7 @@ setMethod("intersect", c("GRangesList", "GRangesList"),
)
### Always return a GRanges *instance* whatever GenomicRanges derivatives are
-### passed to it (e.g. GNCList or GPos), so does NOT act like an endomorphism
+### passed to it (e.g. GPos or GNCList), so does NOT act like an endomorphism
### in general.
setMethod("setdiff", c("GenomicRanges", "GenomicRanges"),
function(x, y, ignore.strand=FALSE)
diff --git a/R/strand-utils.R b/R/strand-utils.R
index 5240827..df124b3 100644
--- a/R/strand-utils.R
+++ b/R/strand-utils.R
@@ -4,7 +4,7 @@
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-### Some "strand" and "strand<-" methods
+### Some "strand" methods
###
setMethod("strand", "missing", function(x) factor(levels=c("+","-","*")))
@@ -92,10 +92,28 @@ setMethod("strand", "DataTable",
}
)
-setReplaceMethod("strand", "DataTable", function(x, value) {
- x$strand <- normargGenomicRangesStrand(value, nrow(x))
- x
-})
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### Some "strand<-" methods
+###
+
+normalize_strand_replacement_value <- function(value, x)
+{
+ if (!is(value, "Rle"))
+ value <- Rle(value)
+ if (!is.factor(runValue(value))
+ || !identical(levels(runValue(value)), levels(strand())))
+ runValue(value) <- strand(runValue(value))
+ S4Vectors:::V_recycle(value, x, x_what="value", skeleton_what="x")
+}
+
+setReplaceMethod("strand", "DataTable",
+ function(x, value)
+ {
+ x$strand <- normalize_strand_replacement_value(value, seq_len(nrow(x)))
+ x
+ }
+)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/build/vignette.rds b/build/vignette.rds
index e873b9d..85a10de 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/doc/ExtendingGenomicRanges.pdf b/inst/doc/ExtendingGenomicRanges.pdf
index a128de4..4bdfc4a 100644
Binary files a/inst/doc/ExtendingGenomicRanges.pdf and b/inst/doc/ExtendingGenomicRanges.pdf differ
diff --git a/inst/doc/GRanges_and_GRangesList_slides.pdf b/inst/doc/GRanges_and_GRangesList_slides.pdf
index 59dab1f..5e247e2 100644
Binary files a/inst/doc/GRanges_and_GRangesList_slides.pdf and b/inst/doc/GRanges_and_GRangesList_slides.pdf differ
diff --git a/inst/doc/GenomicRangesHOWTOs.pdf b/inst/doc/GenomicRangesHOWTOs.pdf
index baf21be..533f711 100644
Binary files a/inst/doc/GenomicRangesHOWTOs.pdf and b/inst/doc/GenomicRangesHOWTOs.pdf differ
diff --git a/inst/doc/GenomicRangesIntroduction.Rnw b/inst/doc/GenomicRangesIntroduction.Rnw
index e9387ff..dda9320 100644
--- a/inst/doc/GenomicRangesIntroduction.Rnw
+++ b/inst/doc/GenomicRangesIntroduction.Rnw
@@ -10,8 +10,7 @@ BiocStyle::latex()
\newcommand{\GenomicRanges}{\Biocpkg{GenomicRanges}}
\title{An Introduction to the GenomicRanges Package}
-\author{Marc Carlson \and Patrick Aboyoun \and Herv\'{e} Pag\`{e}s \and
- Martin Morgan}
+\author{Marc Carlson, Patrick Aboyoun, Herv\'{e} Pag\`{e}s, and Martin Morgan}
\date{\today; updated 16 November, 2016}
\begin{document}
diff --git a/inst/doc/GenomicRangesIntroduction.pdf b/inst/doc/GenomicRangesIntroduction.pdf
index 4577996..131eda2 100644
Binary files a/inst/doc/GenomicRangesIntroduction.pdf and b/inst/doc/GenomicRangesIntroduction.pdf differ
diff --git a/inst/doc/Ten_things_slides.pdf b/inst/doc/Ten_things_slides.pdf
index 42b99cc..c4b5f8e 100644
Binary files a/inst/doc/Ten_things_slides.pdf and b/inst/doc/Ten_things_slides.pdf differ
diff --git a/inst/unitTests/test_GNCList-class.R b/inst/unitTests/test_GNCList-class.R
index edcc47e..4f77482 100644
--- a/inst/unitTests/test_GNCList-class.R
+++ b/inst/unitTests/test_GNCList-class.R
@@ -7,10 +7,13 @@ findOverlaps_GNCList <- GenomicRanges:::findOverlaps_GNCList
source(system.file("unitTests", "test_NCList-class.R", package="IRanges"))
.get_query_overlaps2 <- function(query, subject,
- maxgap, min_overlap_score, type,
- ignore.strand)
+ maxgap=-1L, minoverlap=0L,
+ type=c("any", "start", "end", "within", "extend", "equal"),
+ ignore.strand=FALSE)
{
- ok <- .get_query_overlaps(query, subject, maxgap, min_overlap_score, type)
+ ok <- .get_query_overlaps(query, subject,
+ maxgap=maxgap, minoverlap=minoverlap,
+ type=type)
ok <- ok & seqnames(query) == seqnames(subject)
if (ignore.strand || as.logical(strand(query) == "*"))
return(ok)
@@ -20,7 +23,7 @@ source(system.file("unitTests", "test_NCList-class.R", package="IRanges"))
### Redefine the .findOverlaps_naive() function we got from sourcing
### test_NCList-class.R above.
.findOverlaps_naive <- function(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end",
"within", "extend", "equal"),
select=c("all", "first", "last", "arbitrary",
@@ -28,17 +31,12 @@ source(system.file("unitTests", "test_NCList-class.R", package="IRanges"))
ignore.strand=FALSE)
{
type <- match.arg(type)
- if (type == "any") {
- min_overlap_score <- .min_overlap_score(maxgap, minoverlap)
- } else {
- min_overlap_score <- minoverlap
- }
select <- match.arg(select)
hits_per_query <- lapply(seq_along(query),
function(i)
which(.get_query_overlaps2(query[i], subject,
- maxgap, min_overlap_score, type,
- ignore.strand)))
+ maxgap=maxgap, minoverlap=minoverlap,
+ type=type, ignore.strand=ignore.strand)))
hits <- .make_Hits_from_q2s(hits_per_query, length(subject))
selectHits(hits, select=select)
}
diff --git a/man/GPos-class.Rd b/man/GPos-class.Rd
index ec6cf5d..25c11dc 100644
--- a/man/GPos-class.Rd
+++ b/man/GPos-class.Rd
@@ -5,33 +5,28 @@
\alias{GPos-class}
\alias{GPos}
-\alias{length,GPos-method}
\alias{names,GPos-method}
\alias{names<-,GPos-method}
-\alias{seqnames,GPos-method}
-\alias{pos}
\alias{pos,GPos-method}
-\alias{start,GPos-method}
-\alias{end,GPos-method}
-\alias{width,GPos-method}
-\alias{ranges,GPos-method}
-\alias{strand,GPos-method}
-\alias{seqinfo,GPos-method}
-\alias{seqinfo<-,GPos-method}
-
-\alias{coerce,GenomicRanges,GPos-method}
+
+\alias{coerce,GRanges,GPos-method}
+\alias{coerce,ANY,GPos-method}
+\alias{coerce,GPos,GRanges-method}
\alias{as.data.frame,GPos-method}
-\alias{extractROWS,GPos-method}
+
+\alias{updateObject,GPos-method}
\alias{show,GPos-method}
-\alias{c,GPos-method}
-\title{GPos objects}
+\title{Memory-efficient representation of genomic positions}
\description{
- The GPos class is a container for storing a set of genomic \emph{positions},
- that is, genomic ranges of width 1. Even though a \link{GRanges} object can
- be used for that, using a GPos object can be much more memory-efficient,
- especially when the object contains long runs of adjacent positions.
+ The GPos class is a container for storing a set of \emph{genomic positions}
+ where most of the positions are typically (but not necessarily) adjacent.
+ Because genomic positions can be seen as genomic ranges of width 1, the GPos
+ class extends the \link{GenomicRanges} virtual class. Note that even though
+ a \link{GRanges} instance can be used for storing genomic positions, using a
+ GPos object will be much more memory-efficient, especially when the object
+ contains long runs of adjacent positions in \emph{ascending order}.
}
\usage{
@@ -41,8 +36,9 @@ GPos(pos_runs) # constructor function
\arguments{
\item{pos_runs}{
A \link{GRanges} object (or any other \link{GenomicRanges} derivative)
- where each range is interpreted as a run of adjacent genomic positions.
- If \code{pos_runs} is not a \link{GenomicRanges} object,
+ where each range is interpreted as a run of adjacent ascending genomic
+ positions on the same strand.
+ If \code{pos_runs} is not a \link{GenomicRanges} derivative,
\code{GPos()} first tries to coerce it to one with
\code{as(pos_runs, "GenomicRanges", strict=FALSE)}.
}
@@ -53,21 +49,33 @@ GPos(pos_runs) # constructor function
}
\section{Accessors}{
+
\subsection{Getters}{
- GPos objects support the same set of getters as \link{GRanges}
- objects (i.e. \code{seqnames()}, \code{start()}, \code{end()},
- \code{ranges()}, \code{strand()}, \code{mcols()}, \code{seqinfo()},
+ GPos objects support the same set of getters as other \link{GenomicRanges}
+ derivatives (i.e. \code{seqnames()}, \code{ranges()}, \code{start()},
+ \code{end()}, \code{strand()}, \code{mcols()}, \code{seqinfo()},
etc...), plus the \code{pos()} getter which is equivalent to
- \code{start()} or \code{end()}. See \code{?GRanges} for the list of
- getters supported by \link{GRanges} objects.
-
- Note that a GPos object cannot hold names i.e. \code{names()}
- always returns \code{NULL} on it.
+ \code{start()} or \code{end()}. See \code{?\link{GenomicRanges}} for the
+ list of getters supported by \link{GenomicRanges} derivatives.
+
+ IMPORTANT NOTES:
+ \enumerate{
+ \item \code{ranges()} returns an \link[IRanges]{IPos} object instead
+ of the \link[IRanges]{IRanges} object that one gets with other
+ \link{GenomicRanges} derivatives. To get an \link[IRanges]{IRanges}
+ object, you need to call \code{ranges()} again on the
+ \link[IRanges]{IPos} object i.e. do \code{ranges(ranges(x))})
+ on GPos object \code{x}.
+ \item Note that a GPos object cannot hold names i.e. \code{names()}
+ always returns \code{NULL} on it.
+ }
}
+
\subsection{Setters}{
Like \link{GRanges} objects, GPos objects support the following
setters:
\itemize{
+ \item The \code{seqnames()} and \code{strand()} setters.
\item The \code{mcols()} and \code{metadata()} setters.
\item The family of setters that operate on the seqinfo component of
an object:
@@ -81,16 +89,15 @@ GPos(pos_runs) # constructor function
package.
}
- However, there is no \code{seqnames()}, \code{pos()}, or \code{strand()}
- setter for GPos objects at the moment (although they might be
- added in the future).
+ However, there is no \code{pos()} setter for GPos objects at the moment
+ (although one might be added in the future).
}
}
\section{Coercion}{
From GenomicRanges to GPos:
- A \link{GenomicRanges} object \code{x} in which all the ranges have a width
- of 1 can be coerced to a GPos object with \code{as(x, "GPos")}.
+ A \link{GenomicRanges} derivative \code{x} in which all the ranges have a
+ width of 1 can be coerced to a GPos object with \code{as(x, "GPos")}.
The names on \code{x} are not propagated (a warning is issued if \code{x}
has names on it).
@@ -101,12 +108,12 @@ GPos(pos_runs) # constructor function
See "MEMORY USAGE" in the Examples section below.
From GPos to ordinary R objects:
- Like with a \link{GRanges} object, \code{as.character()}, \code{as.factor()},
- and \code{as.data.frame()} work with a GPos object \code{x}.
- Note that \code{as.data.frame(x)} returns a data frame with a \code{pos}
- column (containing \code{pos(x)}) instead of the \code{start}, \code{end},
- and \code{width} columns that one gets when \code{x} is a \link{GRanges}
- object.
+ Like with any other \link{GenomicRanges} derivative, \code{as.character()},
+ \code{as.factor()}, and \code{as.data.frame()} work on a GPos object
+ \code{x}. Note however that \code{as.data.frame(x)} returns a data frame
+ with a \code{pos} column (containing \code{pos(x)}) instead of the
+ \code{start}, \code{end}, and \code{width} columns that one gets with other
+ \link{GenomicRanges} derivatives.
}
\section{Subsetting}{
@@ -119,9 +126,8 @@ GPos(pos_runs) # constructor function
}
\section{Splitting and Relisting}{
- Like with a \link{GRanges} object, \code{split()} and \code{relist()} work
- with a GPos object \code{x}. Note that they return a
- \link{GenomicRangesList} object instead of a \link{GRangesList} object.
+ Like with any other \link{GRanges} object, \code{split()} and \code{relist()}
+ work on a GPos object.
}
\note{
@@ -129,6 +135,10 @@ GPos(pos_runs) # constructor function
GPos object cannot exceed \code{.Machine$integer.max} (i.e. 2^31 on
most platforms). \code{GPos()} will return an error if \code{pos_runs}
contains too many genomic positions.
+
+ Internal representation of GPos objects has changed in \pkg{GenomicRanges}
+ 1.29.10 (Bioc 3.6). Update any old object \code{x} with:
+ \code{x <- updateObject(x, verbose=TRUE)}.
}
\author{
@@ -139,7 +149,11 @@ GPos(pos_runs) # constructor function
\seealso{
\itemize{
- \item \link{GRanges} objects.
+ \item The \link[IRanges]{IPos} class in the \pkg{IRanges}
+ package for a memory-efficient representation of \emph{integer
+ positions} (i.e. integer ranges of width 1).
+
+ \item \link{GenomicRanges} and \link{GRanges} objects.
\item The \code{\link[GenomeInfoDb]{seqinfo}} accessor and family in
the \pkg{GenomeInfoDb} package for accessing/modifying the seqinfo
@@ -238,7 +252,7 @@ gr4 <- as(gpos4, "GRanges")
gr4
## ... but is generally not a good idea:
object.size(gpos4)
-object.size(gr4) # 6951 times bigger than the GPos object!
+object.size(gr4) # 8 times bigger than the GPos object!
## Shuffling the order of the positions impacts memory usage:
gpos4r <- rev(gpos4)
diff --git a/man/GRanges-class.Rd b/man/GRanges-class.Rd
index 0ed1a7c..d498b87 100644
--- a/man/GRanges-class.Rd
+++ b/man/GRanges-class.Rd
@@ -1,80 +1,81 @@
\name{GRanges-class}
\docType{class}
-% Class:
+\alias{class:IRanges_OR_IPos}
+\alias{IRanges_OR_IPos-class}
+\alias{IRanges_OR_IPos}
+
\alias{class:GenomicRanges}
\alias{GenomicRanges-class}
\alias{GenomicRanges}
+
+\alias{GenomicRanges_OR_missing-class}
+
\alias{class:GRanges}
\alias{GRanges-class}
\alias{GRanges}
-\alias{GenomicRangesORGRangesList-class}
-\alias{GenomicRangesORmissing-class}
% Constructors:
\alias{GRanges}
\alias{updateObject,GRanges-method}
-% Coercion:
-\alias{as.character,GenomicRanges-method}
-\alias{as.factor,GenomicRanges-method}
-\alias{as.data.frame,GenomicRanges-method}
-\alias{coerce,GenomicRanges,RangesList-method}
-\alias{coerce,GenomicRanges,RangedData-method}
-\alias{granges,GenomicRanges-method}
-\alias{coerce,GenomicRanges,GRanges-method}
-\alias{coerce,character,GRanges-method}
-\alias{coerce,factor,GRanges-method}
-\alias{coerce,RangesList,GRanges-method}
-\alias{coerce,RangedData,GRanges-method}
-\alias{coerce,Seqinfo,GRanges-method}
-\alias{coerce,Seqinfo,RangesList-method}
-\alias{coerce,GenomicRanges,Grouping-method}
-\alias{coerce,ANY,GenomicRanges-method}
-
% Accessors:
+\alias{length,GenomicRanges-method}
\alias{seqnames,GRanges-method}
\alias{seqnames,RangedData-method}
\alias{seqnames<-,GenomicRanges-method}
\alias{ranges,GRanges-method}
\alias{ranges<-,GenomicRanges-method}
+\alias{start,GenomicRanges-method}
+\alias{start<-,GenomicRanges-method}
+\alias{end,GenomicRanges-method}
+\alias{end<-,GenomicRanges-method}
+\alias{width,GenomicRanges-method}
+\alias{width<-,GenomicRanges-method}
\alias{strand,GRanges-method}
\alias{strand<-,GenomicRanges,ANY-method}
\alias{names,GenomicRanges-method}
\alias{names<-,GenomicRanges-method}
+\alias{$,GenomicRanges-method}
+\alias{$<-,GenomicRanges-method}
\alias{seqinfo,GRanges-method}
\alias{seqinfo,List-method}
\alias{seqinfo,RangedData-method}
-\alias{seqinfo,RangesList-method}
\alias{seqinfo<-,GenomicRanges-method}
\alias{seqinfo<-,List-method}
\alias{seqinfo<-,RangedData-method}
\alias{score,GenomicRanges-method}
\alias{score<-,GenomicRanges-method}
+\alias{granges,GenomicRanges-method}
-% Ranges methods:
-\alias{start,GenomicRanges-method}
-\alias{start<-,GenomicRanges-method}
-\alias{end,GenomicRanges-method}
-\alias{end<-,GenomicRanges-method}
-\alias{width,GenomicRanges-method}
-\alias{width<-,GenomicRanges-method}
+% Coercion:
+\alias{coerce,GenomicRanges,GRanges-method}
+\alias{as.character,GenomicRanges-method}
+\alias{as.factor,GenomicRanges-method}
+\alias{as.data.frame,GenomicRanges-method}
+\alias{coerce,GenomicRanges,RangesList-method}
+\alias{coerce,GenomicRanges,RangedData-method}
+\alias{coerce,character,GRanges-method}
+\alias{coerce,factor,GRanges-method}
+\alias{coerce,RangesList,GRanges-method}
+\alias{coerce,RangedData,GRanges-method}
+\alias{coerce,Seqinfo,GRanges-method}
+\alias{coerce,Seqinfo,RangesList-method}
+\alias{coerce,GenomicRanges,Grouping-method}
+\alias{coerce,ANY,GenomicRanges-method}
-% Vector methods:
-\alias{length,GenomicRanges-method}
+% Subsetting:
\alias{[<-,GRanges-method}
\alias{[,GenomicRanges,ANY-method}
\alias{[<-,GenomicRanges,ANY,ANY,ANY-method}
\alias{[,List,GenomicRanges-method}
\alias{[,list,GenomicRanges-method}
-\alias{c,GenomicRanges-method}
\alias{window,GenomicRanges-method}
-% $ and $<- methods:
-\alias{$,GenomicRanges-method}
-\alias{$<-,GenomicRanges-method}
+% Combining:
+\alias{c,GenomicRanges-method}
-% displaying:
+% Displaying:
\alias{summary.GenomicRanges}
\alias{summary,GenomicRanges-method}
\alias{show,GenomicRanges-method}
@@ -158,98 +159,6 @@
}
}
-\section{Coercion}{
- In the code snippets below, \code{x} is a GRanges object.
-
- \describe{
- \item{}{
- \code{as(from, "GRanges")}: Creates a GRanges object from a character
- vector, a factor, or a RangedData, or RangesList object.
-
- When \code{from} is a character vector (or a factor), each element
- in it must represent a genomic range in format \code{chr1:2501-2800}
- (unstranded range) or \code{chr1:2501-2800:+} (stranded range).
- \code{..} is also supported as a separator between the start and end
- positions. Strand can be \code{+}, \code{-}, \code{*}, or missing.
- The names on \code{from} are propagated to the returned GRanges object.
- See \code{as.character()} and \code{as.factor()} below for the
- reverse transformations.
-
- Coercing a data.frame or DataFrame into a GRanges object is also
- supported. See \code{\link{makeGRangesFromDataFrame}} for the details.
- }
- \item{}{
- \code{as(from, "RangedData")}:
- Creates a RangedData object from a GRanges
- object. The \code{strand} and metadata columns become columns
- in the result. The \code{seqlengths(from)}, \code{isCircular(from)},
- and \code{genome(from)} vectors are stored in the metadata columns
- of \code{ranges(rd)}.
- }
- \item{}{
- \code{as(from, "RangesList")}:
- Creates a RangesList object from a GRanges
- object. The \code{strand} and metadata columns become \emph{inner}
- metadata columns (i.e. metadata columns on the ranges).
- The \code{seqlengths(from)}, \code{isCircular(from)}, and
- \code{genome(from)} vectors become the metadata columns.
- }
- \item{}{
- \code{as.character(x, ignore.strand=FALSE)}:
- Turn GRanges object \code{x} into a character vector where each
- range in \code{x} is represented by a string in format
- \code{chr1:2501-2800:+}. If \code{ignore.strand} is TRUE or if
- \emph{all} the ranges in \code{x} are unstranded (i.e. their strand
- is set to \code{*}), then all the strings in the output are in
- format \code{chr1:2501-2800}.
-
- The names on \code{x} are propagated to the returned character vector.
- Its metadata (\code{metadata(x)}) and metadata columns (\code{mcols(x)})
- are ignored.
-
- See \code{as(from, "GRanges")} above for the reverse transformation.
- }
- \item{}{
- \code{as.factor(x)}:
- Equivalent to
-\preformatted{ factor(as.character(x), levels=as.character(sort(unique(x))))
-}
- See \code{as(from, "GRanges")} above for the reverse transformation.
-
- Note that \code{table(x)} is supported on a GRanges object. It is
- equivalent to, but much faster than, \code{table(as.factor(x))}.
- }
- \item{}{
- \code{as.data.frame(x, row.names = NULL, optional = FALSE, ...)}:
- Creates a data.frame with columns \code{seqnames} (factor),
- \code{start} (integer), \code{end} (integer), \code{width} (integer),
- \code{strand} (factor), as well as the additional metadata columns
- stored in \code{mcols(x)}. Pass an explicit
- \code{stringsAsFactors=TRUE/FALSE} argument via \code{\ldots} to
- override the default conversions for the metadata columns in
- \code{mcols(x)}.
- }
- \item{}{
- \code{as(from, "Grouping")}: Creates a
- \code{\link[IRanges]{ManyToOneGrouping}} object that groups
- \code{from} by seqname, strand, start and end (same as the default
- sort order). This makes it convenient, for example, to aggregate a
- GenomicRanges object by range.
- }
- }
-
- In the code snippets below, \code{x} is a \link[GenomeInfoDb]{Seqinfo}
- object.
-
- \describe{
- \item{}{
- \code{as(x, "GRanges")}, \code{as(x, "GenomicRanges")},
- \code{as(x, "RangesList")}: Turns \link[GenomeInfoDb]{Seqinfo} object
- \code{x} (with no \code{NA} lengths) into a GRanges or RangesList.
- }
- }
-}
-
\section{Accessors}{
In the following code snippets, \code{x} is a GRanges object.
@@ -269,8 +178,16 @@
Get or set the ranges. \code{value} can be a Ranges object.
}
\item{}{
- \code{names(x)}, \code{names(x) <- value}:
- Get or set the names of the elements.
+ \code{start(x)}, \code{start(x) <- value}:
+ Get or set \code{start(ranges(x))}.
+ }
+ \item{}{
+ \code{end(x)}, \code{end(x) <- value}:
+ Get or set \code{end(ranges(x))}.
+ }
+ \item{}{
+ \code{width(x)}, \code{width(x) <- value}:
+ Get or set \code{width(ranges(x))}.
}
\item{}{
\code{strand(x)}, \code{strand(x) <- value}:
@@ -278,6 +195,10 @@
vector, or factor.
}
\item{}{
+ \code{names(x)}, \code{names(x) <- value}:
+ Get or set the names of the elements.
+ }
+ \item{}{
\code{mcols(x, use.names=FALSE)}, \code{mcols(x) <- value}:
Get or set the metadata columns.
If \code{use.names=TRUE} and the metadata columns are not \code{NULL},
@@ -337,72 +258,105 @@
column from the element metadata.
}
\item{}{
- \code{granges(x, use.mcols=FALSE)}: Gets a \code{GRanges} with
- only the range information from \code{x}, unless \code{use.mcols}
- is \code{TRUE}, in which case the metadata columns are also
- returned. Those columns will include any "extra column slots" if
- \code{x} is a specialized \code{GenomicRanges} derivative.
+ \code{granges(x, use.names=FALSE, use.mcols=FALSE)}: Squeeze the genomic
+ ranges out of \link{GenomicRanges} object \code{x} and return them in a
+ GRanges object \emph{parallel} to \code{x} (i.e. same length as \code{x}).
+ If \code{use.mcols} is \code{TRUE}, the metadata columns are propagated.
+ If \code{x} is a \link{GenomicRanges} derivative with \emph{extra column
+ slots}, these will be propagated as metadata columns on the returned
+ GRanges object.
}
}
}
-\section{Ranges methods}{
- In the following code snippets, \code{x} is a GRanges object.
+\section{Coercion}{
+ In the code snippets below, \code{x} is a GRanges object.
\describe{
\item{}{
- \code{start(x)}, \code{start(x) <- value}:
- Get or set \code{start(ranges(x))}.
+ \code{as(from, "GRanges")}: Creates a GRanges object from a character
+ vector, a factor, or a RangedData, or RangesList object.
+
+ When \code{from} is a character vector (or a factor), each element
+ in it must represent a genomic range in format \code{chr1:2501-2800}
+ (unstranded range) or \code{chr1:2501-2800:+} (stranded range).
+ \code{..} is also supported as a separator between the start and end
+ positions. Strand can be \code{+}, \code{-}, \code{*}, or missing.
+ The names on \code{from} are propagated to the returned GRanges object.
+ See \code{as.character()} and \code{as.factor()} below for the
+ reverse transformations.
+
+ Coercing a data.frame or DataFrame into a GRanges object is also
+ supported. See \code{\link{makeGRangesFromDataFrame}} for the details.
}
\item{}{
- \code{end(x)}, \code{end(x) <- value}:
- Get or set \code{end(ranges(x))}.
+ \code{as(from, "RangedData")}:
+ Creates a RangedData object from a GRanges
+ object. The \code{strand} and metadata columns become columns
+ in the result. The \code{seqlengths(from)}, \code{isCircular(from)},
+ and \code{genome(from)} vectors are stored in the metadata columns
+ of \code{ranges(rd)}.
}
\item{}{
- \code{width(x)}, \code{width(x) <- value}:
- Get or set \code{width(ranges(x))}.
+ \code{as(from, "RangesList")}:
+ Creates a RangesList object from a GRanges
+ object. The \code{strand} and metadata columns become \emph{inner}
+ metadata columns (i.e. metadata columns on the ranges).
+ The \code{seqlengths(from)}, \code{isCircular(from)}, and
+ \code{genome(from)} vectors become the metadata columns.
}
- }
-}
+ \item{}{
+ \code{as.character(x, ignore.strand=FALSE)}:
+ Turn GRanges object \code{x} into a character vector where each
+ range in \code{x} is represented by a string in format
+ \code{chr1:2501-2800:+}. If \code{ignore.strand} is TRUE or if
+ \emph{all} the ranges in \code{x} are unstranded (i.e. their strand
+ is set to \code{*}), then all the strings in the output are in
+ format \code{chr1:2501-2800}.
-\section{Splitting and Combining}{
- In the code snippets below, \code{x} is a GRanges object.
+ The names on \code{x} are propagated to the returned character vector.
+ Its metadata (\code{metadata(x)}) and metadata columns (\code{mcols(x)})
+ are ignored.
- \describe{
+ See \code{as(from, "GRanges")} above for the reverse transformation.
+ }
\item{}{
- \code{append(x, values, after = length(x))}:
- Inserts the \code{values} into \code{x} at the position given by
- \code{after}, where \code{x} and \code{values} are of the same
- class.
+ \code{as.factor(x)}:
+ Equivalent to
+\preformatted{ factor(as.character(x), levels=as.character(sort(unique(x))))
+}
+ See \code{as(from, "GRanges")} above for the reverse transformation.
+
+ Note that \code{table(x)} is supported on a GRanges object. It is
+ equivalent to, but much faster than, \code{table(as.factor(x))}.
}
\item{}{
- \code{c(x, ...)}:
- Combines \code{x} and the GRanges objects in \code{...} together.
- Any object in \code{...} must belong to the same class as \code{x},
- or to one of its subclasses, or must be \code{NULL}.
- The result is an object of the same class as \code{x}.
+ \code{as.data.frame(x, row.names = NULL, optional = FALSE, ...)}:
+ Creates a data.frame with columns \code{seqnames} (factor),
+ \code{start} (integer), \code{end} (integer), \code{width} (integer),
+ \code{strand} (factor), as well as the additional metadata columns
+ stored in \code{mcols(x)}. Pass an explicit
+ \code{stringsAsFactors=TRUE/FALSE} argument via \code{\ldots} to
+ override the default conversions for the metadata columns in
+ \code{mcols(x)}.
}
\item{}{
- \code{c(x, ..., ignore.mcols=FALSE)}
- If the \code{GRanges} objects have metadata columns (represented as one
- \link{DataFrame} per object), each such \link{DataFrame} must have the
- same columns in order to combine successfully. In order to circumvent
- this restraint, you can pass in an \code{ignore.mcols=TRUE} argument
- which will combine all the objects into one and drop all of their
- metadata columns.
+ \code{as(from, "Grouping")}: Creates a
+ \code{\link[IRanges]{ManyToOneGrouping}} object that groups
+ \code{from} by seqname, strand, start and end (same as the default
+ sort order). This makes it convenient, for example, to aggregate a
+ GenomicRanges object by range.
}
+ }
+
+ In the code snippets below, \code{x} is a \link[GenomeInfoDb]{Seqinfo}
+ object.
+
+ \describe{
\item{}{
- \code{split(x, f, drop=FALSE)}:
- Splits \code{x} according to \code{f} to create a
- \link{GRangesList} object.
- If \code{f} is a list-like object then \code{drop} is ignored
- and \code{f} is treated as if it was
- \code{rep(seq_len(length(f)), sapply(f, length))},
- so the returned object has the same shape as \code{f} (it also
- receives the names of \code{f}).
- Otherwise, if \code{f} is not a list-like object, empty list
- elements are removed from the returned object if \code{drop} is
- \code{TRUE}.
+ \code{as(x, "GRanges")}, \code{as(x, "GenomicRanges")},
+ \code{as(x, "RangesList")}: Turns \link[GenomeInfoDb]{Seqinfo} object
+ \code{x} (with no \code{NA} lengths) into a GRanges or RangesList.
}
}
}
@@ -514,7 +468,48 @@
}
}
-\section{Other methods}{
+\section{Combining and splitting}{
+ In the code snippets below, \code{x} is a GRanges object.
+
+ \describe{
+ \item{}{
+ \code{c(x, ..., ignore.mcols=FALSE)}:
+ Combines \code{x} and the GRanges objects in \code{...} together.
+ Any object in \code{...} must belong to the same class as \code{x},
+ or to one of its subclasses, or must be \code{NULL}.
+ The result is an object of the same class as \code{x}.
+
+ If the \code{GRanges} objects have metadata columns (represented as one
+ \link{DataFrame} per object), each such \link{DataFrame} must have the
+ same columns in order to combine successfully. In order to circumvent
+ this restraint, you can pass in an \code{ignore.mcols=TRUE} argument
+ which will combine all the objects into one and drop all of their
+ metadata columns.
+ }
+ \item{}{
+ \code{append(x, values, after = length(x))}:
+ Inserts the \code{values} into \code{x} at the position given by
+ \code{after}, where \code{x} and \code{values} are of the same
+ class.
+ }
+ \item{}{
+ \code{split(x, f, drop=FALSE)}:
+ Splits \code{x} according to \code{f} to create a
+ \link{GRangesList} object.
+ If \code{f} is a list-like object then \code{drop} is ignored
+ and \code{f} is treated as if it was
+ \code{rep(seq_len(length(f)), sapply(f, length))},
+ so the returned object has the same shape as \code{f} (it also
+ receives the names of \code{f}).
+ Otherwise, if \code{f} is not a list-like object, empty list
+ elements are removed from the returned object if \code{drop} is
+ \code{TRUE}.
+ }
+ }
+}
+
+\section{Displaying}{
+ In the code snippets below, \code{x} is a GRanges object.
\describe{
\item{}{
@@ -544,8 +539,9 @@
\item \code{\link[GenomeInfoDb]{seqinfo}} for accessing/modifying
information about the underlying sequences of a GRanges object.
- \item The \link{GPos} class, a memory-efficient container for storing
- genomic \emph{positions}, that is, genomic ranges of width 1.
+ \item The \link{GPos} class, a memory-efficient \link{GenomicRanges}
+ derivative for representing \emph{genomic positions} (i.e.
+ genomic ranges of width 1).
\item \link{GenomicRanges-comparison} for comparing and ordering genomic
ranges.
diff --git a/man/GRangesList-class.Rd b/man/GRangesList-class.Rd
index ff77b4f..9af79df 100644
--- a/man/GRangesList-class.Rd
+++ b/man/GRangesList-class.Rd
@@ -6,6 +6,8 @@
\alias{GRangesList-class}
\alias{GRangesList}
+\alias{GenomicRanges_OR_GRangesList-class}
+
% Constructors:
\alias{GRangesList}
\alias{makeGRangesListFromFeatureFragments}
diff --git a/man/GenomicRanges-comparison.Rd b/man/GenomicRanges-comparison.Rd
index 616dda6..b741b3f 100644
--- a/man/GenomicRanges-comparison.Rd
+++ b/man/GenomicRanges-comparison.Rd
@@ -20,8 +20,7 @@
\title{Comparing and ordering genomic ranges}
\description{
- Methods for comparing and ordering the elements in one or more
- \link{GenomicRanges} objects.
+ Methods for comparing and/or ordering \link{GenomicRanges} objects.
}
\usage{
@@ -65,7 +64,7 @@
\item{incomparables}{
Not supported.
}
- \item{fromLast, method, nomatch, nmax}{
+ \item{fromLast, method, nomatch, nmax, na.rm, strictly, na.last, decreasing}{
See \code{?`\link[IRanges]{Ranges-comparison}`} in the \pkg{IRanges}
package for a description of these arguments.
}
@@ -73,23 +72,10 @@
Whether or not the strand should be ignored when comparing 2 genomic
ranges.
}
- \item{na.rm}{
- Ignored.
- }
- \item{strictly}{
- Logical indicating if the check should be for \emph{strictly} increasing
- values.
- }
\item{...}{
One or more \link{GenomicRanges} objects. The \link{GenomicRanges} objects
after the first one are used to break ties.
}
- \item{na.last}{
- Ignored.
- }
- \item{decreasing}{
- \code{TRUE} or \code{FALSE}.
- }
\item{ties.method}{
A character string specifying how ties are treated. Only \code{"first"}
is supported for now.
@@ -102,27 +88,31 @@
}
\details{
- Two elements of a \link{GenomicRanges} object (i.e. two genomic ranges) are
- considered equal iff they are on the same underlying sequence and strand,
- and have the same start and width. \code{duplicated()} and \code{unique()}
- on a \link{GenomicRanges} object are conforming to this.
-
- The "natural order" for the elements of a \link{GenomicRanges} object is to
- order them (a) first by sequence level, (b) then by strand, (c) then by
- start, (d) and finally by width.
+ Two elements of a \link{GenomicRanges} derivative (i.e. two genomic ranges)
+ are considered equal iff they are on the same underlying sequence and strand,
+ and share the same start and width. \code{duplicated()} and \code{unique()}
+ on a \link{GenomicRanges} derivative are conforming to this.
+
+ The "natural order" for the elements of a \link{GenomicRanges} derivative
+ is to order them (a) first by sequence level, (b) then by strand, (c) then
+ by start, (d) and finally by width.
This way, the space of genomic ranges is totally ordered.
- Note that the \code{reduce} method for \link{GenomicRanges} uses this
- "natural order" implicitly. Also, note that, because we already do (c)
- and (d) for regular ranges (see \code{?`\link[IRanges]{Ranges-comparison}`}),
- genomic ranges that belong to the same underlying sequence and strand are
- ordered like regular ranges.
+ Note that, because we already do (c) and (d) for regular ranges (see
+ \code{?`\link[IRanges]{Ranges-comparison}`}), genomic ranges that belong to
+ the same underlying sequence and strand are ordered like regular ranges.
- \code{is.unsorted()}, \code{order()}, \code{sort()}, and \code{rank()} on a
- \link{GenomicRanges} object behave accordingly to this "natural order".
+ \code{pcompare()}, \code{==}, \code{!=}, \code{<=}, \code{>=}, \code{<}
+ and \code{>} on \link{GenomicRanges} derivatives behave accordingly to this
+ "natural order".
- \code{==}, \code{!=}, \code{<=}, \code{>=}, \code{<} and \code{>}
- on \link{GenomicRanges} objects also behave accordingly to this
+ \code{is.unsorted()}, \code{order()}, \code{sort()}, \code{rank()} on
+ \link{GenomicRanges} derivatives also behave accordingly to this
"natural order".
+
+ Finally, note that some \emph{inter range transformations} like
+ \code{\link[GenomicRanges]{reduce}} or \code{\link[GenomicRanges]{disjoin}}
+ also use this "natural order" implicitly when operating on
+ \link{GenomicRanges} derivatives.
}
\author{H. Pagès, \code{is.unsorted} contributed by Pete Hickey}
@@ -165,13 +155,7 @@ gr == gr[4]
gr >= gr[3]
## ---------------------------------------------------------------------
-## B. duplicated(), unique()
-## ---------------------------------------------------------------------
-duplicated(gr)
-unique(gr)
-
-## ---------------------------------------------------------------------
-## C. match(), %in%
+## B. match(), selfmatch(), %in%, duplicated(), unique()
## ---------------------------------------------------------------------
table <- gr[1:7]
match(gr, table)
@@ -179,8 +163,11 @@ match(gr, table, ignore.strand=TRUE)
gr \%in\% table
+duplicated(gr)
+unique(gr)
+
## ---------------------------------------------------------------------
-## D. findMatches(), countMatches()
+## C. findMatches(), countMatches()
## ---------------------------------------------------------------------
findMatches(gr, table)
countMatches(gr, table)
@@ -192,7 +179,7 @@ gr_levels <- unique(gr)
countMatches(gr_levels, gr)
## ---------------------------------------------------------------------
-## E. order() AND RELATED METHODS
+## D. order() AND RELATED METHODS
## ---------------------------------------------------------------------
is.unsorted(gr)
order(gr)
@@ -216,7 +203,7 @@ rank(gr, ties.method="first")
rank(gr, ties.method="first", ignore.strand=TRUE)
## ---------------------------------------------------------------------
-## F. GENERALIZED ELEMENT-WISE COMPARISON OF 2 GenomicRanges OBJECTS
+## E. GENERALIZED ELEMENT-WISE COMPARISON OF 2 GenomicRanges OBJECTS
## ---------------------------------------------------------------------
gr3 <- GRanges(c(rep("chr1", 12), "chr2"), IRanges(c(1:11, 6:7), width=3))
strand(gr3)[12] <- "+"
diff --git a/man/findOverlaps-methods.Rd b/man/findOverlaps-methods.Rd
index d722c43..135c32c 100644
--- a/man/findOverlaps-methods.Rd
+++ b/man/findOverlaps-methods.Rd
@@ -40,13 +40,13 @@
\usage{
\S4method{findOverlaps}{GenomicRanges,GenomicRanges}(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
select=c("all", "first", "last", "arbitrary"),
ignore.strand=FALSE)
\S4method{countOverlaps}{GenomicRanges,GenomicRanges}(query, subject,
- maxgap=0L, minoverlap=1L,
+ maxgap=-1L, minoverlap=0L,
type=c("any", "start", "end", "within", "equal"),
ignore.strand=FALSE)
}
@@ -56,7 +56,7 @@
A \link{GRanges} or \link{GRangesList} object.
}
\item{maxgap, minoverlap, type}{
- See \code{\link[IRanges]{findOverlaps}} in the \pkg{IRanges} package
+ See \code{?\link[IRanges]{findOverlaps}} in the \pkg{IRanges} package
for a description of these arguments.
}
\item{select}{
diff --git a/man/genomic-range-squeezers.Rd b/man/genomic-range-squeezers.Rd
new file mode 100644
index 0000000..84f3598
--- /dev/null
+++ b/man/genomic-range-squeezers.Rd
@@ -0,0 +1,94 @@
+\name{genomic-range-squeezers}
+
+\alias{genomic-range-squeezers}
+
+\alias{granges}
+\alias{grglist}
+
+\alias{grglist,Pairs-method}
+
+\title{Squeeze the genomic ranges out of a range-based object}
+
+\description{
+ S4 generic functions for squeezing the genomic ranges out of a range-based
+ object.
+
+ These are analog to range squeezers \code{\link[IRanges]{ranges}} and
+ \code{\link[IRanges]{rglist}} defined in the \pkg{IRanges} package, except
+ that \code{granges} returns the ranges in a \link{GRanges} object (instead
+ of an \link[IRanges]{IRanges} object for \code{\link[IRanges]{ranges}}),
+ and \code{grglist} returns them in a \link{GRangesList} object (instead of
+ an \link[IRanges]{IRangesList} object for \code{\link[IRanges]{rglist}}).
+}
+
+\usage{
+granges(x, use.names=TRUE, use.mcols=FALSE, ...)
+grglist(x, use.names=TRUE, use.mcols=FALSE, ...)
+}
+
+\arguments{
+ \item{x}{
+ An object containing genomic ranges e.g. a
+ \link[GenomicRanges]{GenomicRanges},
+ \link[SummarizedExperiment]{RangedSummarizedExperiment},
+ \link[GenomicAlignments]{GAlignments},
+ \link[GenomicAlignments]{GAlignmentPairs}, or
+ \link[GenomicAlignments]{GAlignmentsList} object,
+ or a \link[S4Vectors]{Pairs} object containing genomic ranges.
+ }
+ \item{use.names, use.mcols, ...}{
+ See \code{\link[IRanges]{ranges}} in the \pkg{IRanges} package for
+ a description of these arguments.
+ }
+}
+
+\details{
+ See \code{\link[IRanges]{ranges}} in the \pkg{IRanges} package for
+ some details.
+
+ For some objects (e.g. \link[GenomicAlignments]{GAlignments} and
+ \link[GenomicAlignments]{GAlignmentPairs} objects defined in the
+ \pkg{GenomicAlignments} package), \code{as(x, "GRanges")} and
+ \code{as(x, "GRangesList")}, are equivalent to
+ \code{granges(x, use.names=TRUE, use.mcols=TRUE)} and
+ \code{grglist(x, use.names=TRUE, use.mcols=TRUE)}, respectively.
+}
+
+\value{
+ A \link{GRanges} object for \code{granges}.
+
+ A \link{GRangesList} object for \code{grglist}.
+
+ If \code{x} is a vector-like object (e.g.
+ \link[GenomicAlignments]{GAlignments}), the returned object is expected
+ to be \emph{parallel} to \code{x}, that is, the i-th element in the output
+ corresponds to the i-th element in the input.
+
+ If \code{use.names} is TRUE, then the names on \code{x}
+ (if any) are propagated to the returned object.
+ If \code{use.mcols} is TRUE, then the metadata columns on \code{x}
+ (if any) are propagated to the returned object.
+}
+
+\author{H. Pagès}
+
+\seealso{
+ \itemize{
+ \item \link{GRanges} and \link{GRangesList} objects.
+
+ \item \link[SummarizedExperiment]{RangedSummarizedExperiment} objects
+ in the \pkg{SummarizedExperiment} packages.
+
+ \item \link[GenomicAlignments]{GAlignments},
+ \link[GenomicAlignments]{GAlignmentPairs},
+ and \link[GenomicAlignments]{GAlignmentsList} objects in the
+ \pkg{GenomicAlignments} package.
+ }
+}
+
+\examples{
+## See ?GAlignments in the GenomicAlignments package for examples of
+## "ranges" and "rglist" methods.
+}
+
+\keyword{methods}
diff --git a/man/intra-range-methods.Rd b/man/intra-range-methods.Rd
index c8fdc5f..4c71a46 100644
--- a/man/intra-range-methods.Rd
+++ b/man/intra-range-methods.Rd
@@ -4,7 +4,6 @@
\alias{shift}
\alias{shift,GenomicRanges-method}
-\alias{shift,GPos-method}
\alias{shift,GRangesList-method}
\alias{shift,GenomicRangesList-method}
diff --git a/man/makeGRangesFromDataFrame.Rd b/man/makeGRangesFromDataFrame.Rd
index bf392c8..6403e0e 100644
--- a/man/makeGRangesFromDataFrame.Rd
+++ b/man/makeGRangesFromDataFrame.Rd
@@ -205,19 +205,21 @@ if (require(rtracklayer)) {
genome(session) <- "sacCer2"
query <- ucscTableQuery(session, "Assembly")
df <- getTable(query)
+ head(df)
## A common pitfall is to forget that the UCSC Table Browser uses the
## "0-based start" convention:
gr0 <- makeGRangesFromDataFrame(df, keep.extra.columns=TRUE,
- start.field="chromStart", end.field="chromEnd")
+ start.field="chromStart",
+ end.field="chromEnd")
head(gr0)
- min(start(gr0))
## The start positions need to be converted into 1-based positions,
## to adhere to the convention used in Bioconductor:
gr1 <- makeGRangesFromDataFrame(df, keep.extra.columns=TRUE,
- starts.in.df.are.0based=TRUE,
- start.field="chromStart", end.field="chromEnd")
+ start.field="chromStart",
+ end.field="chromEnd",
+ starts.in.df.are.0based=TRUE)
head(gr1)
}
}
diff --git a/man/range-squeezers.Rd b/man/range-squeezers.Rd
deleted file mode 100644
index 76312f6..0000000
--- a/man/range-squeezers.Rd
+++ /dev/null
@@ -1,114 +0,0 @@
-\name{range-squeezers}
-
-\alias{range-squeezers}
-
-\alias{granges}
-\alias{grglist}
-\alias{rglist}
-
-\alias{grglist,Pairs-method}
-\alias{rglist,Pairs-method}
-
-\title{Squeeze the ranges out of a range-based object}
-
-\description{
- S4 generic functions for squeezing the ranges out of a range-based object.
- \code{granges} returns them in a \link{GRanges} object, \code{grglist}
- in a \link{GRangesList} object, and \code{rglist} in a
- \link[IRanges]{RangesList} object.
-}
-
-\usage{
-granges(x, use.names=TRUE, use.mcols=FALSE, ...)
-grglist(x, use.names=TRUE, use.mcols=FALSE, ...)
-rglist(x, use.names=TRUE, use.mcols=FALSE, ...)
-}
-
-\arguments{
- \item{x}{
- A range-based object e.g. a
- \link[SummarizedExperiment]{RangedSummarizedExperiment},
- \link[GenomicAlignments]{GAlignments},
- \link[GenomicAlignments]{GAlignmentPairs},
- \link[GenomicAlignments]{GAlignmentsList} or a
- \link[S4Vectors:Pairs-class]{Pairs} object containing ranges.
- }
- \item{use.names}{
- \code{TRUE} (the default) or \code{FALSE}.
- Whether or not the names on \code{x} (accessible with \code{names(x)})
- should be propagated to the returned object.
- }
- \item{use.mcols}{
- \code{TRUE} or \code{FALSE} (the default).
- Whether or not the metadata columns on \code{x} (accessible with
- \code{mcols(x)}) should be propagated to the returned object.
- }
- \item{...}{
- Additional arguments, for use in specific methods.
- }
-}
-
-\details{
- The \pkg{GenomicRanges}, \pkg{SummarizedExperiment}, and
- \pkg{GenomicAlignments} packages define and document methods
- for various types of range-based objects (e.g. for
- \link[SummarizedExperiment]{RangedSummarizedExperiment},
- \link[GenomicAlignments]{GAlignments},
- \link[GenomicAlignments]{GAlignmentPairs}, and
- \link[GenomicAlignments]{GAlignmentsList} objects).
- Other Bioconductor packages might as well.
-
- Note that these functions can be seen as a specific kind of \emph{object
- getters} as well as functions performing coercion.
-
- For some objects (e.g. \link[GenomicAlignments]{GAlignments} and
- \link[GenomicAlignments]{GAlignmentPairs} objects defined in the
- \pkg{GenomicAlignments} package), \code{as(x, "GRanges")},
- \code{as(x, "GRangesList")}, and \code{as(x, "RangesList")}, are
- equivalent to \code{granges(x, use.names=TRUE, use.mcols=TRUE)},
- \code{grglist(x, use.names=TRUE, use.mcols=TRUE)}, and
- \code{rglist(x, use.names=TRUE, use.mcols=TRUE)}, respectively.
-}
-
-\value{
- A \link{GRanges} object for \code{granges}.
-
- A \link{GRangesList} object for \code{grglist}.
-
- A \link[IRanges]{RangesList} object for \code{rglist}.
-
- If \code{x} is a vector-like object (e.g.
- \link[GenomicAlignments]{GAlignments}), the returned object is expected
- to be \emph{parallel} to \code{x}, that is, the i-th element in the output
- corresponds to the i-th element in the input.
-
- If \code{use.names} is TRUE, then the names on \code{x}
- (if any) are propagated to the returned object.
- If \code{use.mcols} is TRUE, then the metadata columns on \code{x}
- (if any) are propagated to the returned object.
-}
-
-\author{H. Pagès}
-
-\seealso{
- \itemize{
- \item \link{GRanges} and \link{GRangesList} objects.
-
- \item \link[IRanges]{RangesList} objects in the \pkg{IRanges} package.
-
- \item \link[SummarizedExperiment]{RangedSummarizedExperiment} objects
- in the \pkg{SummarizedExperiment} packages.
-
- \item \link[GenomicAlignments]{GAlignments},
- \link[GenomicAlignments]{GAlignmentPairs},
- and \link[GenomicAlignments]{GAlignmentsList} objects in the
- \pkg{GenomicAlignments} package.
- }
-}
-
-\examples{
-## See ?GAlignments in the GenomicAlignments package for some
-## examples.
-}
-
-\keyword{methods}
diff --git a/vignettes/GenomicRangesIntroduction.Rnw b/vignettes/GenomicRangesIntroduction.Rnw
index e9387ff..dda9320 100644
--- a/vignettes/GenomicRangesIntroduction.Rnw
+++ b/vignettes/GenomicRangesIntroduction.Rnw
@@ -10,8 +10,7 @@ BiocStyle::latex()
\newcommand{\GenomicRanges}{\Biocpkg{GenomicRanges}}
\title{An Introduction to the GenomicRanges Package}
-\author{Marc Carlson \and Patrick Aboyoun \and Herv\'{e} Pag\`{e}s \and
- Martin Morgan}
+\author{Marc Carlson, Patrick Aboyoun, Herv\'{e} Pag\`{e}s, and Martin Morgan}
\date{\today; updated 16 November, 2016}
\begin{document}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-genomicranges.git
More information about the debian-med-commit
mailing list