[med-svn] [r-bioc-dnacopy] 01/02: Imported Upstream version 1.46.0
Michael Crusoe
misterc-guest at moszumanska.debian.org
Sat Jun 25 23:20:24 UTC 2016
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to branch master
in repository r-bioc-dnacopy.
commit 41a12c30d4593b1e249f7542a2aef96faf32e4e7
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date: Sat Jun 25 14:52:18 2016 -0700
Imported Upstream version 1.46.0
---
DESCRIPTION | 13 +
NAMESPACE | 8 +
R/DNAcopyMethods.R | 381 ++++++++++++
R/changepoints.R | 194 +++++++
R/exonsegment.R | 55 ++
R/getbdry.R | 14 +
R/glFrequency.R | 31 +
R/plotSample.R | 62 ++
R/segment.R | 91 +++
R/segmentp.R | 104 ++++
R/segmentsummary.R | 31 +
R/zoomIntoRegion.R | 30 +
R/zzz.R | 3 +
build/vignette.rds | Bin 0 -> 216 bytes
chrom-lengths | 1 +
data/coriell.rda | Bin 0 -> 54719 bytes
data/cytoBand.tab | 863 ++++++++++++++++++++++++++++
data/default.DNAcopy.bdry.R | 472 +++++++++++++++
demo/00Index | 1 +
demo/DNAcopy.R | 56 ++
inst/CHANGES | 359 ++++++++++++
inst/benchmark/benchmark,20090610,segment.R | 169 ++++++
inst/doc/DNAcopy.R | 66 +++
inst/doc/DNAcopy.Rnw | 213 +++++++
inst/doc/DNAcopy.pdf | Bin 0 -> 234494 bytes
man/CNA.Rd | 62 ++
man/DNAcopy-internal.Rd | 26 +
man/DNAcopy.Rd | 34 ++
man/coriell.Rd | 17 +
man/cytoBand.Rd | 14 +
man/exon.segment.Rd | 45 ++
man/getbdry.Rd | 25 +
man/glFrequency.Rd | 25 +
man/plot.DNAcopy.Rd | 110 ++++
man/plotSample.Rd | 81 +++
man/segment.Rd | 176 ++++++
man/segments.p.Rd | 60 ++
man/segments.summary.Rd | 36 ++
man/smooth.CNA.Rd | 45 ++
man/subset.CNA.Rd | 42 ++
man/subset.DNAcopy.Rd | 23 +
man/zoomIntoRegion.Rd | 66 +++
src/cbsWtstats.f | 741 ++++++++++++++++++++++++
src/cbststats.f | 796 +++++++++++++++++++++++++
src/changepoints-wtd.f | 203 +++++++
src/changepoints.f | 210 +++++++
src/esegment.f | 52 ++
src/flchoose.c | 4 +
src/fphyper.c | 4 +
src/fpnorm.c | 4 +
src/getbdry.f | 123 ++++
src/prune.f | 120 ++++
src/rshared.c | 4 +
src/segmentp.f | 93 +++
src/smoothCNA.f | 89 +++
src/tailprobs.f | 120 ++++
tests/redundancy,20090610,segment.R | 117 ++++
vignettes/DNAcopy.Rnw | 213 +++++++
vignettes/DNAcopy.bib | 64 +++
59 files changed, 7061 insertions(+)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..329e992
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,13 @@
+Package: DNAcopy
+Title: DNA copy number data analysis
+Version: 1.46.0
+Author: Venkatraman E. Seshan, Adam Olshen
+Description: Implements the circular binary segmentation (CBS)
+ algorithm to segment DNA copy number data and identify
+ genomic regions with abnormal copy number.
+Maintainer: Venkatraman E. Seshan <seshanv at mskcc.org>
+LazyData: yes
+License: GPL (>= 2)
+biocViews: Microarray, CopyNumberVariation
+NeedsCompilation: yes
+Packaged: 2016-05-04 02:41:35 UTC; biocbuild
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..d281bcb
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,8 @@
+useDynLib(DNAcopy)
+import(stats,graphics)
+export("CNA","exon.segment","getbdry","segment","segments.p","segments.summary","smooth.CNA","glFrequency","plotSample","zoomIntoRegion")
+S3method(print, CNA)
+S3method(plot, DNAcopy)
+S3method(print, DNAcopy)
+S3method(subset, CNA)
+S3method(subset, DNAcopy)
diff --git a/R/DNAcopyMethods.R b/R/DNAcopyMethods.R
new file mode 100644
index 0000000..0de13fa
--- /dev/null
+++ b/R/DNAcopyMethods.R
@@ -0,0 +1,381 @@
+CNA <- function(genomdat, chrom, maploc, data.type=c("logratio","binary"),
+ sampleid=NULL, presorted=FALSE)
+ {
+ if (is.data.frame(genomdat)) genomdat <- as.matrix(genomdat)
+ if (!is.numeric(genomdat)) stop("genomdat must be numeric")
+ if (!is.numeric(maploc)) stop("maploc must be numeric")
+ data.type <- match.arg(data.type)
+ ina <- (!is.na(chrom) & is.finite(maploc))
+ if (sum(!ina)>0)
+ warning("markers with missing chrom and/or maploc removed\n")
+ if (!presorted) {
+ sortindex <- which(ina)[order(chrom[ina], maploc[ina])]
+ } else {
+ sortindex <- which(ina)
+ }
+ if (is.factor(chrom)) chrom <- as.character(chrom)
+ if (is.vector(genomdat)) genomdat <- as.matrix(genomdat)
+ if (!missing(sampleid)) {
+ if (length(sampleid) != ncol(genomdat)) {
+ warning("length(sampleid) and ncol(genomdat) differ, names ignored\n")
+ sampleid <- paste("Sample", 1:ncol(genomdat))
+ }
+ } else {
+ sampleid <- paste("Sample", 1:ncol(genomdat))
+ }
+ colnames(genomdat) <- sampleid
+ zzz <- data.frame(chrom=I(chrom), maploc=maploc, genomdat)
+ zzz <- zzz[sortindex,]
+
+# check for duplicate probes (i.e. repeated maploc within a chromosome)
+ if (length(ii <- which(diff(maploc)==0)) > 0) {
+ if (any(chrom[ii]==chrom[ii+1])) warning("array has repeated maploc positions\n")
+ }
+
+ attr(zzz, "data.type") <- data.type
+ class(zzz) <- c("CNA","data.frame")
+ zzz
+ }
+
+subset.CNA <- function(x, chromlist=NULL, samplelist=NULL, ...)
+ {
+ if (!inherits(x, 'CNA')) stop("First arg must be of class CNA")
+ chrom <- x$chrom
+ uchrom <- unique(chrom)
+ if (missing(chromlist)) chromlist <- uchrom
+ if (length(setdiff(chromlist, uchrom)) > 0)
+ stop("chromlist contains chromosomes not in the data")
+ if (length(chromlist) > length(unique(chromlist)))
+ warning("duplicate chromosomes in chromlist removed")
+ sampleid <- colnames(x)[-(1:2)]
+ if (missing(samplelist)) samplelist <- sampleid
+ nsample <- length(sampleid)
+ if (length(setdiff(samplelist, 1:nsample)) > 0 & length(setdiff(samplelist, sampleid)) > 0)
+ stop("samplelist should be a list of valid sample numbers or names")
+ if (!is.numeric(samplelist)) samplelist <- match(samplelist, names(x)) - 2
+ if (length(samplelist) > length(unique(samplelist)))
+ warning("duplicate samples in samplelist removed")
+ samplelist <- unique(samplelist)
+ y <- x[chrom %in% chromlist,c(1:2,samplelist+2)]
+ attr(y, "data.type") <- attr(x, "data.type")
+ y
+ }
+
+smooth.CNA <- function(x, smooth.region=10, outlier.SD.scale=4,
+ smooth.SD.scale=2, trim=0.025)
+ {
+ if (!inherits(x, 'CNA')) stop("First arg must be of class CNA")
+ nsample <- ncol(x)-2
+ chrom <- x$chrom
+ uchrom <- unique(chrom)
+ if(attr(x, "data.type")=="binary") stop("Not smoothing binary data ")
+ for (isamp in 1:nsample) {
+ genomdat <- x[,isamp+2]
+ ina <- which(is.finite(genomdat))
+ trimmed.SD <- sqrt(trimmed.variance(genomdat[ina], trim))
+ outlier.SD <- outlier.SD.scale*trimmed.SD
+ smooth.SD <- smooth.SD.scale*trimmed.SD
+ k <- smooth.region
+ n <- length(genomdat[ina])
+ cfrq <- diff(c(which(!duplicated(chrom[ina])), n+1))
+ nchr <- length(cfrq) # to allow for some chrom with all missing
+ smoothed.data <- .Fortran("smoothLR",
+ as.integer(n),
+ as.double(genomdat[ina]),
+ as.integer(nchr),
+ as.integer(cfrq),
+ sgdat=double(n),
+ as.integer(k),
+ as.double(outlier.SD),
+ as.double(smooth.SD),
+ PACKAGE = "DNAcopy")$sgdat
+ x[,isamp+2][ina] <- smoothed.data
+ }
+ x
+ }
+
+print.CNA <- function(x, ...)
+ {
+ if (!inherits(x, 'CNA')) stop("First arg must be of class CNA")
+ cat("Number of Samples", ncol(x)-2,
+ "\nNumber of Probes ", nrow(x),
+ "\nData Type ", attr(x,"data.type"),"\n")
+ }
+
+plot.DNAcopy <- function (x, plot.type=c("whole", "plateau", "samplebychrom",
+ "chrombysample"), xmaploc=FALSE, altcol=TRUE,
+ sbyc.layout=NULL, cbys.nchrom=1, cbys.layout=NULL,
+ include.means=TRUE, zeroline=TRUE, pt.pch=NULL,
+ pt.cex=NULL, pt.cols=NULL, segcol=NULL, zlcol=NULL,
+ ylim=NULL, lwd=NULL, ...)
+{
+ if (!inherits(x, "DNAcopy"))
+ stop("First arg must be the result of segment")
+ xdat <- x$data
+ nsample <- ncol(xdat)-2
+ if(missing(ylim)) {
+ uylim <- max(abs(xdat[,-(1:2)]), na.rm=TRUE)
+ ylim <- c(-uylim, uylim)
+ }
+ xres <- x$output
+ if(dev.cur() <= 1) dev.new()
+ int.dev <- dev.interactive()
+ plot.type <- match.arg(plot.type)
+ op <- par(no.readonly = TRUE)
+ parask <- par("ask")
+ if (int.dev & !parask & nsample>1) par(ask = TRUE)
+ sampleid <- colnames(xdat)[-(1:2)]
+ chrom0 <- xdat$chrom
+ uchrom <- unique(chrom0)
+ nchrom <- length(uchrom)
+ if (xmaploc) {
+ maploc0 <- as.numeric(xdat$maploc)
+ if(length(uchrom)>1 & max(maploc0[chrom0==uchrom[1]]) > min(maploc0[chrom0==uchrom[2]])) {
+ plen <- max(maploc0[chrom0==uchrom[1]])
+ for(i in 2:nchrom) {
+ maploc0[chrom0==uchrom[i]] <- plen + maploc0[chrom0==uchrom[i]]
+ plen <- max(maploc0[chrom0==uchrom[i]])
+ }
+ }
+ }
+ if (missing(pt.pch)) pt.pch <- "."
+ if (missing(pt.cex)) {
+ if (pt.pch==".") { pt.cex <- 3}
+ else {pt.cex <- 1}
+ }
+ wcol0 <- rep(1, length(chrom0))
+ if (altcol) {
+ j <- 0
+ for (i in uchrom) {
+ j <- (j+1) %% 2
+ wcol0[chrom0==i] <- 1+j
+ }
+ }
+ if (missing(pt.cols)) pt.cols <- c("black","green")
+ if (missing(segcol)) segcol <- "red"
+ if (missing(zlcol)) zlcol <- "grey"
+ if (missing(lwd)) lwd <- 3
+ if (plot.type == "chrombysample") {
+ cat("Setting multi-figure configuration\n")
+ par(mar = c(0, 4, 0, 2), oma = c(4, 0, 4, 0), mgp = c(2, 0.7, 0))
+ if (missing(cbys.layout)) {
+ nrow <- ncol <- ceiling(sqrt(nsample))
+ if (nrow*ncol - nsample > 0) {
+ nrow <- nrow - 1
+ ncol <- ncol + 1
+ }
+ if (nrow*ncol - nsample >= nrow) ncol <- ncol - 1
+ cbys.layout <- c(nrow, ncol)
+ }
+ lmat0 <- lmat1 <- c(1:nsample, rep(-cbys.nchrom*nsample, prod(cbys.layout) - nsample))
+ for(i in 1:(cbys.nchrom-1)) {
+ lmat1 <- c(lmat1,lmat0+nsample*i)
+ }
+ lmat1[lmat1<0] <- 0
+ lmat <- matrix(lmat1, nrow = cbys.layout[1], ncol = cbys.nchrom*cbys.layout[2], byrow = FALSE)
+ layout(lmat)
+ }
+ if (plot.type == "samplebychrom") {
+ cat("Setting multi-figure configuration\n")
+ par(mar = c(4, 4, 4, 2), oma = c(0, 0, 2, 0), mgp = c(2, 0.7, 0))
+ if (missing(sbyc.layout)) {
+ nrow <- ncol <- ceiling(sqrt(nchrom))
+ if (nrow*ncol - nchrom > 0) {
+ nrow <- nrow - 1
+ ncol <- ncol + 1
+ }
+ if (nrow*ncol - nchrom > ncol) ncol <- ncol - 1
+ sbyc.layout <- c(nrow, ncol)
+ }
+ lmat <- matrix(c(1:nchrom, rep(0,prod(sbyc.layout)-nchrom)),
+ nrow = sbyc.layout[1], ncol = sbyc.layout[2], byrow=TRUE)
+ layout(lmat)
+ }
+ if (plot.type == "chrombysample") {
+ atchrom <- 0.5/cbys.nchrom
+ for (ichrom in uchrom) {
+ if (xmaploc) maploc1 <- maploc0[chrom0==ichrom]
+ for (isamp in 1:nsample) {
+ genomdat <- xdat[chrom0==ichrom, isamp+2]
+ ina <- which(is.finite(genomdat))
+ genomdat <- genomdat[ina]
+ if (xmaploc) maploc <- maploc1[ina]
+ ii <- cumsum(c(0, xres$num.mark[xres$ID == sampleid[isamp] & xres$chrom==ichrom]))
+ mm <- xres$seg.mean[xres$ID == sampleid[isamp] & xres$chrom==ichrom]
+ kk <- length(ii)
+ zz <- cbind(ii[-kk] + 1, ii[-1])
+ if (xmaploc) {
+ plot(maploc, genomdat, pch = pt.pch, cex=pt.cex, xaxt="n", ylim = ylim, ylab = sampleid[isamp])
+ } else {
+ plot(genomdat, pch = pt.pch, cex=pt.cex, xaxt="n", ylim = ylim, ylab = sampleid[isamp])
+ }
+ if(zeroline) abline(h=0, col=zlcol, lwd=lwd)
+ if (isamp%%cbys.layout[1] == 0) {
+ axis(1, outer=TRUE)
+ title(xlab="Index")
+ }
+ if (include.means) {
+ if (xmaploc) {
+ segments(maploc[zz[,1]], mm, x1=maploc[zz[,2]], y1=mm, col = segcol, lwd=lwd)
+ } else {
+ segments(zz[,1], mm, x1=zz[,2], y1=mm, col = segcol, lwd=lwd)
+ }
+# for (i in 1:(kk - 1)) {
+# if (xmaploc) {
+# lines(maploc[zz[i, ]], rep(mm[i], 2), col = segcol, lwd=lwd)
+# } else {
+# lines(zz[i, ], rep(mm[i], 2), col = segcol, lwd=lwd)
+# }
+# }
+ }
+ }
+ mtext(paste("Chromosome",ichrom), side = 3, line = 1, at = atchrom, outer=TRUE, font=2)
+ atchrom <- atchrom + 1/cbys.nchrom
+ atchrom <- atchrom - floor(atchrom)
+ }
+ } else {
+ for (isamp in 1:nsample)
+ {
+ genomdat <- xdat[, isamp+2]
+ ina <- which(is.finite(genomdat))
+ genomdat <- genomdat[ina]
+ wcol <- wcol0[ina]
+ chrom <- chrom0[ina]
+ if (xmaploc) maploc <- maploc0[ina]
+ ii <- cumsum(c(0, xres$num.mark[xres$ID == sampleid[isamp]]))
+ mm <- xres$seg.mean[xres$ID == sampleid[isamp]]
+ kk <- length(ii)
+ zz <- cbind(ii[-kk] + 1, ii[-1])
+ if(missing(ylim)) ylim <- range(c(genomdat, -genomdat))
+ if (plot.type=="whole")
+ {
+ if (xmaploc) {
+ plot(maploc, genomdat, pch = pt.pch, cex=pt.cex, col=pt.cols[wcol], main = sampleid[isamp], ylab = "", ylim = ylim)
+ if(zeroline) abline(h=0, col=zlcol, lwd=lwd)
+ } else {
+ plot(genomdat, pch = pt.pch, cex=pt.cex, col=pt.cols[wcol], main = sampleid[isamp], ylab = "", ylim = ylim)
+ if(zeroline) abline(h=0, col=zlcol, lwd=lwd)
+ }
+ if (include.means) {
+ if (xmaploc) {
+ segments(maploc[zz[,1]], mm, x1=maploc[zz[,2]], y1=mm, col = segcol, lwd=lwd)
+ } else {
+ segments(zz[,1], mm, x1=zz[,2], y1=mm, col = segcol, lwd=lwd)
+ }
+# for (i in 1:(kk - 1))
+# {
+# if (xmaploc) {
+# lines(maploc[zz[i, ]], rep(mm[i], 2), col = segcol, lwd=lwd)
+# } else {
+# lines(zz[i, ], rep(mm[i], 2), col = segcol, lwd=lwd)
+# }
+# }
+ }
+ }
+ if (plot.type=="samplebychrom")
+ {
+ cc <- xres$chrom[xres$ID == sampleid[isamp]]
+ for (ichrom in uchrom)
+ {
+ if (xmaploc) {
+ plot(maploc[chrom == ichrom], genomdat[chrom == ichrom], pch = pt.pch, cex=pt.cex, xlab="maploc", ylab = "", main = paste("Chromosome", ichrom), ylim = ylim)
+ } else {
+ plot(genomdat[chrom == ichrom], pch = pt.pch, cex=pt.cex, ylab = "", main = paste("Chromosome", ichrom), ylim = ylim)
+ }
+ if(zeroline) abline(h=0, col=zlcol, lwd=lwd)
+ if (include.means) {
+ jj <- which(cc==ichrom)
+ jj0 <- min(jj)
+ if (xmaploc) {
+ segments(maploc[zz[jj,1]], mm[jj], x1=maploc[zz[jj,2]], y1=mm[jj], col = segcol, lwd=lwd)
+ } else {
+ segments(1+zz[jj,1]-zz[jj0,1], mm[jj], x1=1+zz[jj,2]-zz[jj0,1], y1=mm[jj], col = segcol, lwd=lwd)
+ }
+# for (i in jj)
+# {
+# if (xmaploc) {
+# lines(maploc[zz[i, ]], rep(mm[i], 2), col = segcol, lwd=lwd)
+# } else {
+# lines(1+zz[i, ]-zz[jj0,1], rep(mm[i], 2), col = segcol, lwd=lwd)
+# }
+# }
+ }
+ }
+ mtext(sampleid[isamp], side = 3, line = 0, outer = TRUE, font=2)
+ }
+ if (plot.type=="plateau")
+ {
+ omm <- order(mm)
+ ozz <- zz[omm,]
+ ina <- unlist(apply(ozz, 1, function(ii) ii[1]:ii[2]))
+ plot(genomdat[ina], pch = pt.pch, cex=pt.cex, main = sampleid[isamp], ylab = "", ylim = ylim)
+ if(zeroline) abline(h=0, col=zlcol, lwd=lwd)
+ if (include.means) {
+ ii <- cumsum(c(0, xres$num.mark[xres$ID == sampleid[isamp]][omm]))
+ smm <- mm[omm]
+ zz <- cbind(ii[-kk] + 1, ii[-1])
+ segments(zz[,1], smm, x1=zz[,2], y1=smm, col = segcol, lwd=lwd)
+# for (i in 1:(kk-1)) lines(zz[i, ], rep(smm[i], 2), col = segcol, lwd=lwd)
+ }
+ }
+ }
+ }
+ on.exit( if (plot.type=="chrombysample" | plot.type=="samplebychrom") {
+ par(op)
+ } else { if(int.dev & !parask & nsample>1) par(ask=parask) })
+}
+
+print.DNAcopy <- function(x, showSegRows=FALSE, ...)
+ {
+ if (!inherits(x, "DNAcopy")) stop("Object is not the result of segment")
+ if (!is.null(cl<- x$call))
+ {
+ cat("Call:\n")
+ dput(cl)
+ cat("\n")
+ }
+ if (showSegRows) {
+ if (is.null(x$segRows)) {
+ print(x$output)
+ warning("segRows missing. Object may be a subset or from DNAcopy < 1.23.2.\n")
+ } else {
+ print(cbind(x$output, x$segRows))
+ }
+ } else {
+ print(x$output)
+ }
+ }
+
+subset.DNAcopy <- function(x, chromlist=NULL, samplelist=NULL, ...)
+ {
+ if (!inherits(x, 'DNAcopy')) stop("First arg must be of class DNAcopy")
+ zdat <- x$data
+ zres <- x$output
+ chrom <- zdat$chrom
+ uchrom <- unique(chrom)
+ if (missing(chromlist) | is.null(chromlist)) chromlist <- uchrom
+ if (length(setdiff(chromlist, uchrom)) > 0)
+ stop("chromlist contains chromosomes not in the data")
+ if (length(chromlist) > length(unique(chromlist)))
+ warning("duplicate chromosomes in chromlist removed")
+ sampleid <- colnames(zdat)[-(1:2)]
+ if (missing(samplelist)) samplelist <- sampleid
+ nsample <- length(sampleid)
+ if (length(setdiff(samplelist, 1:nsample)) > 0 & length(setdiff(samplelist, sampleid)) > 0)
+ stop("samplelist should be a list of valid sample numbers or names")
+ if (!is.numeric(samplelist)) samplelist <- match(samplelist, names(zdat)) - 2
+ if (length(samplelist) > length(unique(samplelist)))
+ warning("duplicate samples in samplelist removed")
+ samplelist <- unique(samplelist)
+ jj <- unlist(sapply(sampleid[samplelist], function(i, id) {which(id==i)}, zres$ID ))
+ zres <- zres[jj,]
+ y <- list()
+ y$data <- zdat[chrom %in% chromlist,c(1:2,samplelist+2)]
+ attr(y$data, "data.type") <- attr(zdat, "data.type")
+ y$output <- zres[zres$chrom %in% chromlist,]
+ class(y) <- "DNAcopy"
+ y
+ }
+
+# Chromosome.Lengths <- c(263, 255, 214, 203, 194, 183, 171, 155, 145, 144, 144, 143, 114, 109, 106, 98, 92, 85, 67, 72, 50, 56, 164, 59)
+# names(Chromosome.Lengths) <- c(as.character(1:22),"X","Y")
diff --git a/R/changepoints.R b/R/changepoints.R
new file mode 100644
index 0000000..3a567fd
--- /dev/null
+++ b/R/changepoints.R
@@ -0,0 +1,194 @@
+changepoints <- function(genomdat, data.type="logratio", alpha=0.01, weights=
+ NULL, sbdry, sbn, nperm=10000, p.method="hybrid",
+ min.width=2, kmax=25, nmin=200, trimmed.SD=NULL,
+ undo.splits="none", undo.prune=0.05, undo.SD=3,
+ verbose=1, ngrid=100, tol=1e-6)
+ {
+ n <- length(genomdat)
+ if (missing(trimmed.SD)) trimmed.SD <- mad(diff(genomdat))/sqrt(2)
+# start with the whole
+ seg.end <- c(0,n)
+ k <- length(seg.end)
+ change.loc <- NULL
+ weighted <- ifelse(is.null(weights), FALSE, TRUE)
+ while (k > 1)
+ {
+ current.n <- seg.end[k]-seg.end[k-1]
+ if (verbose>=3) cat(".... current segment:",seg.end[k-1]+1,"-",seg.end[k],"\n")
+ if(current.n >= 2*min.width) {
+ current.genomdat <- genomdat[(seg.end[k-1]+1):seg.end[k]]
+# check whether hybrid method needs to be used
+ hybrid <- FALSE
+ delta <- 0
+ if ((p.method=="hybrid") & (nmin < current.n)) {
+ hybrid <- TRUE
+ delta <- (kmax+1)/current.n
+ }
+# call the changepoint routine
+ if (weighted) {
+# get the weights for the current set of probes
+ current.wts <- weights[(seg.end[k-1]+1):seg.end[k]]
+ current.rwts <- sqrt(current.wts)
+ current.cwts <- cumsum(current.wts)/sqrt(sum(current.wts))
+# if all values of current.genomdat are the same don't segment
+ if (isTRUE(all.equal(diff(range(current.genomdat)), 0))) {
+ zzz <- list()
+ zzz$ncpt <- 0
+ } else {
+# centering the current data will save a lot of computations later
+ current.avg <- sum(current.genomdat*current.wts)/sum(current.wts)
+ current.genomdat <- current.genomdat - current.avg
+# need total sum of squares too
+ current.tss <- sum(current.wts*(current.genomdat^2))
+ zzz <- .Fortran("wfindcpt",
+ n=as.integer(current.n),
+ x=as.double(current.genomdat),
+ tss=as.double(current.tss),
+ wts=as.double(current.wts),
+ rwts=as.double(current.rwts),
+ cwts=as.double(current.cwts),
+ px=double(current.n),
+ sx=double(current.n),
+ nperm=as.integer(nperm),
+ cpval=as.double(alpha),
+ ncpt=integer(1),
+ icpt=integer(2),
+ hybrid=as.logical(hybrid),
+ al0=as.integer(min.width),
+ hk=as.integer(kmax),
+ mncwt=double(kmax),
+ delta=as.double(delta),
+ ngrid=as.integer(ngrid),
+ sbn=as.integer(sbn),
+ sbdry=as.integer(sbdry),
+ tol= as.double(tol),
+ PACKAGE="DNAcopy")
+ }
+ } else {
+# if all values of current.genomdat are the same don't segment
+ if (isTRUE(all.equal(diff(range(current.genomdat)), 0))) {
+ zzz <- list()
+ zzz$ncpt <- 0
+ } else {
+# centering the current data will save a lot of computations later
+ current.avg <- mean(current.genomdat)
+ current.genomdat <- current.genomdat - current.avg
+# need total sum of squares too
+ current.tss <- sum(current.genomdat^2)
+ zzz <- .Fortran("fndcpt",
+ n=as.integer(current.n),
+ x=as.double(current.genomdat),
+ tss=as.double(current.tss),
+ px=double(current.n),
+ sx=double(current.n),
+ nperm=as.integer(nperm),
+ cpval=as.double(alpha),
+ ncpt=integer(1),
+ icpt=integer(2),
+ ibin=as.logical(data.type=="binary"),
+ hybrid=as.logical(hybrid),
+ al0=as.integer(min.width),
+ hk=as.integer(kmax),
+ delta=as.double(delta),
+ ngrid=as.integer(ngrid),
+ sbn=as.integer(sbn),
+ sbdry=as.integer(sbdry),
+ tol= as.double(tol),
+ PACKAGE="DNAcopy")
+ }
+ }
+ } else {
+ zzz <- list()
+ zzz$ncpt <- 0
+ }
+ if(zzz$ncpt==0) change.loc <- c(change.loc,seg.end[k])
+ seg.end <- switch(1+zzz$ncpt,seg.end[-k],
+ c(seg.end[1:(k-1)],seg.end[k-1]+zzz$icpt[1],seg.end[k]),
+ c(seg.end[1:(k-1)],seg.end[k-1]+zzz$icpt,seg.end[k]))
+ k <- length(seg.end)
+ if(verbose>=3) cat(".... segments to go:",seg.end,"\n")
+ }
+ seg.ends <- rev(change.loc)
+ nseg <- length(seg.ends)
+ lseg <- diff(c(0,seg.ends))
+ if (nseg > 1) {
+ if (undo.splits == "prune") {
+ lseg <- changepoints.prune(genomdat, lseg, undo.prune)
+ }
+ if (undo.splits == "sdundo") {
+ lseg <- changepoints.sdundo(genomdat, lseg, trimmed.SD, undo.SD)
+ }
+ }
+ segmeans <- 0*lseg
+ ll <- uu <- 0
+ for(i in 1:length(lseg)) {
+ uu <- uu + lseg[i]
+ if (weighted) {
+ segmeans[i] <- sum(genomdat[(ll+1):uu]*weights[(ll+1):uu])/sum(weights[(ll+1):uu])
+ } else {
+ segmeans[i] <- mean(genomdat[(ll+1):uu])
+ }
+ ll <- uu
+ }
+ list("lseg" = lseg, "segmeans" = segmeans)
+ }
+
+changepoints.prune <- function(genomdat, lseg, change.cutoff=0.05) {
+ n <- length(genomdat)
+ nseg <- length(lseg)
+ ncpt <- nseg-1
+ zzz <- .Fortran("prune",
+ as.integer(n),
+ as.double(genomdat),
+ as.integer(nseg),
+ as.integer(lseg),
+ as.double(change.cutoff),
+ double(nseg),
+ as.integer(ncpt),
+ loc=integer(ncpt),
+ integer(2*ncpt),
+ pncpt=integer(1), PACKAGE="DNAcopy")
+ pruned.ncpt <- zzz$pncpt
+ pruned.cpts <- cumsum(lseg)[zzz$loc[1:pruned.ncpt]]
+ pruned.lseg <- diff(c(0,pruned.cpts,n))
+ pruned.lseg
+}
+
+changepoints.sdundo <- function(genomdat, lseg, trimmed.SD, change.SD=3) {
+ change.SD <- trimmed.SD*change.SD
+ cpt.loc <- cumsum(lseg)
+ sdundo <- TRUE
+ while(sdundo) {
+ k <- length(cpt.loc)
+ if (k>1) {
+ segments0 <- cbind(c(1,1+cpt.loc[-k]),cpt.loc)
+ segmed <- apply(segments0, 1, function(i,x) {median(x[i[1]:i[2]])}, genomdat)
+ adsegmed <- abs(diff(segmed))
+ if (min(adsegmed) < change.SD) {
+ i <- which(adsegmed == min(adsegmed))
+ cpt.loc <- cpt.loc[-i]
+ } else {
+ sdundo <- FALSE
+ }
+ } else {
+ sdundo <- FALSE
+ }
+ }
+ lseg.sdundo <- diff(c(0,cpt.loc))
+ lseg.sdundo
+}
+
+trimmed.variance <- function(genomdat, trim=0.025)
+ {
+ n <- length(genomdat)
+ n.keep <- round((1-2*trim)*(n-1))
+ inflfact(trim)*sum((sort(abs(diff(genomdat)))[1:n.keep])^2 / (2*n.keep))
+ }
+
+inflfact <- function(trim)
+ {
+ a <- qnorm(1-trim)
+ x <- seq(-a,a,length=10001)
+ x1 <- (x[-10001] + x[-1])/2
+ 1/(sum(x1^2*dnorm(x1)/(1-2*trim))*(2*a/10000))
+ }
diff --git a/R/exonsegment.R b/R/exonsegment.R
new file mode 100644
index 0000000..0783877
--- /dev/null
+++ b/R/exonsegment.R
@@ -0,0 +1,55 @@
+exon.segment <- function(gene, eloc, edat, ngrid=100, tol=1e-6) {
+ ii <- order(gene, eloc)
+ gene <- gene[ii]
+ eloc <- eloc[ii]
+ if (is.matrix(edat)) {
+ edat <- edat[ii,]
+ } else {
+ edat <- cbind(edat[ii])
+ }
+ ugene <- unique(gene)
+ ngene <- length(ugene)
+ nsample <- ncol(edat)
+ out.stat <- out.loc <- out.p <- matrix(0, ngene, nsample)
+ ss <- 3*(1:nsample)
+ for(i in 1:ngene) {
+ exondat <- edat[gene==ugene[i],]
+ gout <- exon.changepoint(exondat, ngrid, tol)
+ out.stat[i,] <- gout[[1]]
+ out.loc[i,] <- gout[[2]]
+ out.p[i,] <- gout[[3]]
+ }
+ rownames(out.stat) <- rownames(out.loc) <- rownames(out.p) <- ugene
+ list(statistic=out.stat, location=out.loc, p.value=out.p)
+}
+
+exon.changepoint <- function(exondat, ngrid=100, tol=1e-6) {
+#
+# exondat -- is a matrix of normalized expression values
+# rows are ordered by location and columns are samples
+#
+ nsample <- ncol(exondat) # number of samples
+ n <- nrow(exondat) # number of exons in the gene
+# initialize sample specific output
+ estat <- epval <- eloc <- rep(0, nsample)
+# calculate the max t-stat, location and p-value
+ for(i in 1:nsample) {
+ exondati <- exondat[,i]
+# center the data
+ exondati <- (exondati - mean(exondati))
+# call the p-value subroutine
+ zzz <- .Fortran("esegp",
+ as.integer(n),
+ as.double(exondati),
+ ostat=double(1),
+ eloc=integer(1),
+ pval=double(1),
+ as.integer(ngrid),
+ as.double(tol),
+ PACKAGE="DNAcopy")
+ estat[i] <- zzz$ostat
+ epval[i] <- zzz$pval
+ eloc[i] <- zzz$eloc
+ }
+ list(estat, eloc, epval)
+}
diff --git a/R/getbdry.R b/R/getbdry.R
new file mode 100644
index 0000000..bb995dd
--- /dev/null
+++ b/R/getbdry.R
@@ -0,0 +1,14 @@
+getbdry <- function(eta, nperm, max.ones, tol= 1e-2) {
+ bdry <- rep(0, max.ones*(max.ones+1)/2)
+ zz <- .Fortran("getbdry",
+ as.double(eta),
+ as.integer(max.ones),
+ as.integer(nperm),
+ as.integer(max.ones*(max.ones+1)/2),
+ bdry=as.integer(bdry),
+ etastr=double(max.ones),
+ as.double(tol),
+ PACKAGE="DNAcopy")
+# list("eta.star"=zz$etastr, "boundary"=zz$bdry)
+ zz$bdry
+}
diff --git a/R/glFrequency.R b/R/glFrequency.R
new file mode 100644
index 0000000..d517d77
--- /dev/null
+++ b/R/glFrequency.R
@@ -0,0 +1,31 @@
+glFrequency <- function(xout, threshold=1) {
+ if (!inherits(xout, 'DNAcopy')) stop("First arg must be of class DNAcopy")
+ nsample <- ncol(xout$data)-2
+ snames <- names(xout$data)
+ xmad <- rep(NA,nsample)
+ for(i in 2+(1:nsample)) {
+ sout <- xout$output[xout$output$ID==snames[i],]
+ xmad[i-2] <- mad(na.omit(xout$data[,i]) - rep(sout$seg.mean,sout$num.mark))
+ }
+ pfreq <- gain <- loss <- rep(0, nrow(xout$data))
+ for(i in 1:nsample) {
+# ii <- !is.na(xout$data[,i+2])
+ genomdat <- xout$data[,i+2]
+# ii = location of the missing values and infinity
+ ii <- which(is.finite(genomdat))
+# segment means as a vector
+ segout <- xout$output[xout$output$ID==snames[i+2],]
+ segmean <- rep(segout$seg.mean, segout$num.mark)
+# gains and losses
+ pfreq[ii] <- pfreq[ii] + 1
+ gain[ii] <- gain[ii] + 1*((segmean - median(segmean))/xmad[i] > threshold)
+ loss[ii] <- loss[ii] - 1*((segmean - median(segmean))/xmad[i] < -threshold)
+ }
+ out <- list()
+ out$chrom <- xout$data$chrom
+ out$maploc <- xout$data$maploc
+ out$pfreq <- pfreq
+ out$gain <- gain/pfreq
+ out$loss <- loss/pfreq
+ as.data.frame(out)
+}
diff --git a/R/plotSample.R b/R/plotSample.R
new file mode 100644
index 0000000..bcd8884
--- /dev/null
+++ b/R/plotSample.R
@@ -0,0 +1,62 @@
+plotSample <- function(x, sampleid=NULL, chromlist=NULL, xmaploc=FALSE,
+ col=c("black","green"), pch=".", cex=NULL, altcol=TRUE,
+ segcol="red", lwd=3, zeroline=TRUE, zlcol="grey",
+ xlab=NULL, ylab=NULL, main=NULL, ...) {
+ if (class(x) != "DNAcopy") stop("First arg must be a DNAcopy object")
+ if (missing(sampleid)) {sampleid <- 1}
+ subx <- subset(x, chromlist=chromlist, samplelist=sampleid[1])
+# get the data for plotting
+ genomdat <- subx$data[,3]
+ ina <- is.finite(genomdat)
+ genomdat <- genomdat[ina]
+ chrom <- subx$data[ina,1]
+ uchrom <- unique(chrom)
+ segres <- subx$output
+# setup the X-axis based on xmaploc
+ if (xmaploc) {
+ maploc <- subx$data[ina,2]
+ rmaploc <- sapply(uchrom, function(i, maploc, chrom) range(maploc[chrom==i]), maploc, chrom)
+ nc <- length(uchrom)
+ if ((nc>1) && any(rmaploc[1,-1] < rmaploc[2,-nc])) {
+ cmaploc <- cumsum(as.numeric(rmaploc[2,]))
+ for (i in 2:nc) {
+ maploc[chrom==uchrom[i]] <- cmaploc[i-1] + maploc[chrom==uchrom[i]]
+ }
+ }
+ xlabel <- "Genomic Position"
+ } else {
+ maploc <- 1:sum(ina)
+ xlabel <- "Index"
+ }
+# setup altenating colors
+ if (altcol & length(uchrom)>1) {
+ colvec <- rep(1, length(chrom))
+ j <- 0
+ for(i in uchrom) {
+ j <- (j+1) %% 2
+ colvec[chrom == i] <- j+1
+ }
+ } else {
+ colvec <- 1
+ }
+# set other graphical parameters
+ if (missing(cex)) cex <- ifelse(pch == ".", 3, 1)
+ if (missing(main)) main <- names(subx$data)[3]
+ if (missing(xlab)) xlab <- xlabel
+ if (missing(ylab)) {
+ if (attr(subx$data, "data.type") == "logratio") {ylab <- "log(relative CN)"}
+ else {ylab <- "LOH"}
+ }
+# plot the data
+ plot(maploc, genomdat, col=col[colvec], pch=pch, cex=cex, main=main, xlab=xlab, ylab=ylab, ...)
+# add the segment means
+ ii <- cumsum(c(0, segres$num.mark))
+ mm <- segres$seg.mean
+ kk <- length(ii)
+ segments(maploc[ii[-kk]+1], segres$seg.mean, x1=maploc[ii[-1]], y1=segres$seg.mean, col = segcol, lwd=lwd)
+# for (i in 1:(kk - 1)) {
+# lines(maploc[c(ii[i]+1,ii[i+1])], rep(mm[i], 2), col = segcol, lwd=lwd)
+# }
+# add the zeroline
+ if (zeroline) abline(h=0, col=zlcol, lwd=lwd)
+}
diff --git a/R/segment.R b/R/segment.R
new file mode 100644
index 0000000..b8ad631
--- /dev/null
+++ b/R/segment.R
@@ -0,0 +1,91 @@
+segment <- function(x, weights=NULL, alpha=0.01, nperm=10000, p.method=
+ c("hybrid","perm"), min.width=2, kmax=25, nmin=200,
+ eta=0.05, sbdry=NULL, trim = 0.025, undo.splits=
+ c("none","prune", "sdundo"), undo.prune=0.05, undo.SD=3,
+ verbose=1)
+ {
+ if (!inherits(x, 'CNA')) stop("First arg must be a copy number array object")
+ call <- match.call()
+ if (min.width < 2 | min.width > 5) stop("minimum segment width should be between 2 and 5")
+ if (nmin < 4*kmax) stop("nmin should be >= 4*kmax")
+ if (missing(sbdry)) {
+ if (nperm==10000 & alpha==0.01 & eta==0.05) {
+ if (!exists("default.DNAcopy.bdry")) data(default.DNAcopy.bdry, package="DNAcopy",envir=environment())
+ sbdry <- get("default.DNAcopy.bdry", envir=environment())
+ } else {
+ max.ones <- floor(nperm*alpha) + 1
+ sbdry <- getbdry(eta, nperm, max.ones)
+ }
+ }
+ weighted <- ifelse(missing(weights), FALSE, TRUE)
+# rudimentary error checking for weights
+ if (weighted) {
+ if (length(weights) != nrow(x)) stop("length of weights should be the same as the number of probes")
+ if (min(weights) <= 0) stop("all weights should be positive")
+ }
+ sbn <- length(sbdry)
+ nsample <- ncol(x)-2
+ sampleid <- colnames(x)[-(1:2)]
+ uchrom <- unique(x$chrom)
+ data.type <- attr(x, "data.type")
+ p.method <- match.arg(p.method)
+ undo.splits <- match.arg(undo.splits)
+ segres <- list()
+ segres$data <- x
+ allsegs <- list()
+ allsegs$ID <- NULL
+ allsegs$chrom <- NULL
+ allsegs$loc.start <- NULL
+ allsegs$loc.end <- NULL
+ allsegs$num.mark <- NULL
+ allsegs$seg.mean <- NULL
+ segRows <- list()
+ segRows$startRow <- NULL
+ segRows$endRow <- NULL
+ for (isamp in 1:nsample) {
+ if (verbose>=1) cat(paste("Analyzing:", sampleid[isamp],"\n"))
+ genomdati <- x[,isamp+2]
+ ina <- which(is.finite(genomdati))
+ genomdati <- genomdati[ina]
+ trimmed.SD <- sqrt(trimmed.variance(genomdati, trim))
+ chromi <- x$chrom[ina]
+# maploci <- x$maploc[ina]
+ if (weighted) {
+ wghts <- weights[ina]
+ } else {
+ wghts <- NULL
+ }
+ sample.lsegs <- NULL
+ sample.segmeans <- NULL
+ for (ic in uchrom) {
+ if (verbose>=2) cat(paste(" current chromosome:", ic, "\n"))
+ segci <- changepoints(genomdati[chromi==ic], data.type, alpha, wghts,
+ sbdry, sbn, nperm, p.method, min.width, kmax,
+ nmin, trimmed.SD, undo.splits, undo.prune,
+ undo.SD, verbose)
+ sample.lsegs <- c(sample.lsegs, segci$lseg)
+ sample.segmeans <- c(sample.segmeans, segci$segmeans)
+ }
+ sample.nseg <- length(sample.lsegs)
+ sample.segs.start <- ina[cumsum(c(1,sample.lsegs[-sample.nseg]))]
+ sample.segs.end <- ina[cumsum(sample.lsegs)]
+ allsegs$ID <- c(allsegs$ID, rep(isamp,sample.nseg))
+ allsegs$chrom <- c(allsegs$chrom, x$chrom[sample.segs.end])
+ allsegs$loc.start <- c(allsegs$loc.start, x$maploc[sample.segs.start])
+ allsegs$loc.end <- c(allsegs$loc.end, x$maploc[sample.segs.end])
+ allsegs$num.mark <- c(allsegs$num.mark, sample.lsegs)
+ allsegs$seg.mean <- c(allsegs$seg.mean, sample.segmeans)
+ segRows$startRow <- c(segRows$startRow, sample.segs.start)
+ segRows$endRow <- c(segRows$endRow, sample.segs.end)
+ }
+ allsegs$ID <- sampleid[allsegs$ID]
+ allsegs$seg.mean <- round(allsegs$seg.mean, 4)
+ allsegs <- as.data.frame(allsegs)
+ allsegs$ID <- as.character(allsegs$ID)
+ segres$output <- allsegs
+ segres$segRows <- as.data.frame(segRows)
+ segres$call <- call
+ if (weighted) segres$weights <- weights
+ class(segres) <- "DNAcopy"
+ segres
+ }
diff --git a/R/segmentp.R b/R/segmentp.R
new file mode 100644
index 0000000..2f62cf0
--- /dev/null
+++ b/R/segmentp.R
@@ -0,0 +1,104 @@
+segments.p <- function(x, ngrid=100, tol=1e-6, alpha=0.05, search.range=100,
+ nperm=1000)
+ {
+ if (!inherits(x, "DNAcopy"))
+ stop("First arg must be the result of segment")
+ xdat <- x$data
+ xout <- x$output
+ nsample <- ncol(xdat)-2
+ sampleid <- colnames(xdat)[-(1:2)]
+ chrom0 <- xdat$chrom
+ maploc0 <- xdat$maploc
+ uchrom <- unique(chrom0)
+ nchrom <- length(uchrom)
+ bstat <- pval <- lcl <- ucl <- rep(NA, nrow(xout))
+ ll <- 0
+ iisamp <- 2
+ for (isamp in sampleid) {
+ iisamp <- iisamp + 1
+# genomdat = logratio data of sample isamp
+ genomdat <- xdat[, iisamp]
+# ina = location of the missing values and infinity
+ ina <- which(is.finite(genomdat))
+# subset out the missing & infinity locations
+ genomdat <- genomdat[ina]
+ chrom <- chrom0[ina]
+ maploc <- maploc0[ina]
+ for(ichrom in uchrom) {
+# kk = number of segments in chromosome ichrom of sample isamp
+ kk <- sum(1*(xout$ID == isamp & xout$chrom == ichrom))
+ if (kk > 1) {
+# gendat = logratio data in chromosome ichrom of sample isamp
+ gendat <- genomdat[chrom == ichrom]
+# seglen = lengths of the segments in chromosome ichrom of sample isamp
+ seglen <- xout$num.mark[xout$ID == isamp & xout$chrom == ichrom]
+# segmean = means of the segments in chromosome ichrom of sample isamp
+ segmean <- xout$seg.mean[xout$ID == isamp & xout$chrom == ichrom]
+# xresid = residuals of the data in chromosome ichrom of sample isamp
+ xresid <- gendat - rep(segmean, seglen)
+ ibstat <- ipval <- ilcl <- iucl <- rep(NA, kk)
+# begin with the first 2 segments lo & hi are the start & end points
+ lo <- 1
+ hi <- sum(seglen[1:2])
+ for(i in 1:(kk-1)) {
+# prep data from adjacent segments
+ gendati <- gendat[lo:hi]
+ xresidi <- xresid[lo:hi]
+# standardize data
+ gendati <- (gendati - mean(gendati))/sd(xresidi)
+ n <- length(gendati)
+# call the p-value subroutine
+ zzz <- .Fortran("bsegp",
+ as.integer(n),
+ as.double(gendati),
+ ostat=double(1),
+ pval=double(1),
+ as.integer(ngrid),
+ as.double(tol),
+ PACKAGE="DNAcopy")
+ ibstat[i] <- zzz$ostat
+ ipval[i] <- zzz$pval
+# additional data for CI routine
+# k = location of change-point
+# sr = search range
+# sumxk = partial sum at k (all paths are pegged at that point)
+# var.factor = variance for 2-sample t-statistic
+ k <- seglen[i]
+ sr <- c(max(2, k-search.range),min(n-2,k+search.range))
+ sumxk <- sum(gendati[1:k])
+ var.factor <- n/((1:n)*(n:1 - 1))
+ var.factor[n] <- 0
+# call the confidence subroutine
+ zzz <- .Fortran("bsegci",
+ as.integer(n),
+ as.integer(k),
+ as.double(sumxk),
+ as.double(gendati),
+ px = double(n),
+ sr = as.integer(sr),
+ vfact = as.double(var.factor),
+ as.integer(nperm),
+ bsloc = integer(nperm),
+ PACKAGE="DNAcopy")
+ bsloc <- zzz$bsloc
+ bsci <- quantile(bsloc, c(alpha/2, 1-alpha/2), type=1)
+ ilcl[i] <- bsci[1]
+ iucl[i] <- bsci[2]
+# increment to the next segment
+ lo <- lo + seglen[i]
+ if(i < kk-1) hi <- hi + seglen[i+2]
+ }
+ ibstat[kk] <- ipval[kk] <- ilcl[kk] <- iucl[kk] <- NA
+ } else {
+ seglen <- ibstat <- ipval <- ilcl <- iucl <- NA
+ }
+ bstat[ll + (1:kk)] <- ibstat
+ pval[ll + (1:kk)] <- ipval
+# convert the lcl & ucl from probe number to maploc
+ lcl[ll + (1:kk)] <- maploc[chrom == ichrom][cumsum(seglen) + (ilcl - seglen)]
+ ucl[ll + (1:kk)] <- maploc[chrom == ichrom][cumsum(seglen) + (iucl - seglen)]
+ ll <- ll + kk
+ }
+ }
+ cbind(xout, bstat, pval, lcl, ucl)
+ }
diff --git a/R/segmentsummary.R b/R/segmentsummary.R
new file mode 100644
index 0000000..6a452a2
--- /dev/null
+++ b/R/segmentsummary.R
@@ -0,0 +1,31 @@
+segments.summary <- function(x)
+ {
+ if (!inherits(x, "DNAcopy"))
+ stop("First arg must be the result of segment")
+ xdat <- x$data
+ xout <- x$output
+ nsample <- ncol(xdat)-2
+ sampleid <- colnames(xdat)[-(1:2)]
+ seg.median <- seg.sd <- seg.mad <- rep(NA, nrow(xout))
+ ll <- 0
+ iisamp <- 2
+ for (isamp in sampleid) {
+ iisamp <- iisamp + 1
+# genomdat = logratio data of sample isamp
+ genomdat <- xdat[, iisamp]
+# ina = location of the missing values and infinity
+ ina <- which(is.finite(genomdat))
+# subset out the missing & infinity locations
+ genomdat <- genomdat[ina]
+ seglen <- xout$num.mark[xout$ID == isamp]
+ kk <- length(seglen)
+ seg.sd[ll+(1:kk)] <- tapply(genomdat, rep(1:kk,seglen), sd)
+ seg.median[ll+(1:kk)] <- tapply(genomdat, rep(1:kk,seglen), median)
+ seg.mad[ll+(1:kk)] <- tapply(genomdat, rep(1:kk,seglen), mad)
+ ll <- ll + kk
+ }
+ xout$seg.sd <- round(seg.sd, 4)
+ xout$seg.median <- round(seg.median, 4)
+ xout$seg.mad <- round(seg.mad, 4)
+ xout
+ }
diff --git a/R/zoomIntoRegion.R b/R/zoomIntoRegion.R
new file mode 100644
index 0000000..13441ed
--- /dev/null
+++ b/R/zoomIntoRegion.R
@@ -0,0 +1,30 @@
+zoomIntoRegion <- function(x, chrom, sampleid, maploc.start=NULL, maploc.end=NULL, pt.pch=NULL, pt.cex=NULL, pt.col=NULL, segcol=NULL, seglwd=NULL, main=NULL, xlab=NULL, ylab=NULL, ...) {
+ if (class(x) != "DNAcopy") stop("First arg must be a DNAcopy object")
+ tmp <- subset(x, chrom=chrom[1], samplelist=sampleid[1])
+ lrdata <- tmp$data
+ if (missing(maploc.start)) maploc.start <- min(lrdata$maploc, na.rm=T) - 1
+ if (missing(maploc.end)) maploc.end <- max(lrdata$maploc, na.rm=T) + 1
+ ii <- ((lrdata$maploc >= maploc.start) & (lrdata$maploc <= maploc.end))
+ if (missing(pt.pch)) pt.pch <- "."
+ if (missing(pt.cex))
+ pt.cex <- ifelse(pt.pch==".", 3, 1)
+ if (missing(pt.col)) pt.col <- "green3"
+ if (missing(segcol)) segcol <- "red"
+ if (missing(seglwd)) seglwd <- 3
+ if (missing(main))
+ main <- paste("chr", chrom, ": ", maploc.start,"-", maploc.end, " from sample ", sampleid, sep="")
+ if (missing(xlab)) xlab = "Genomic Position"
+ if (missing(ylab)) ylab = "log-ratio"
+
+ plot(lrdata[ii,2], lrdata[ii,3], main = main, xlab=xlab, ylab = ylab, pch = pt.pch, cex = pt.cex, col = pt.col, ...)
+ segs <- tmp$output
+ jj <- ((segs$loc.start <= maploc.end) & (segs$loc.end >= maploc.start))
+ segs <- segs[jj,]
+ k <- nrow(segs)
+ segs$loc.start[1] <- maploc.start
+ segs$loc.end[k] <- maploc.end
+ segments(segs$loc.start, segs$seg.mean, x1=segs$loc.end, y1=segs$seg.mean, col = segcol, lwd = seglwd)
+# for(i in 1:k) {
+# lines(c(segs$loc.start[i],segs$loc.end[i]), rep(segs$seg.mean[i],2), col=segcol, lwd=seglwd)
+# }
+}
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..97ad4df
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,3 @@
+.onLoad <- function(libname, pkgname) {
+ library.dynam("DNAcopy", pkgname, libname)
+}
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..5baebb0
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/chrom-lengths b/chrom-lengths
new file mode 100644
index 0000000..a5908de
--- /dev/null
+++ b/chrom-lengths
@@ -0,0 +1 @@
+Chromosome, {Length (Mb)}; 1, {263}; 2, {255}; 3, {214}; 4, {203}; 5{194}; 6, {183}; 7, {171}; 8, {155}; 9, {145}; 10, {144}; 11, {144}, 12, {143}; 13, (114}; 14, {109}; 15, {106}; 16, {98}; 17, {92}; 18, {85}; 19, {67}; 20, {72}; 21, {50}; 22, {56}; X {164}/Y {59} (total = 3286)
\ No newline at end of file
diff --git a/data/coriell.rda b/data/coriell.rda
new file mode 100644
index 0000000..1916f61
Binary files /dev/null and b/data/coriell.rda differ
diff --git a/data/cytoBand.tab b/data/cytoBand.tab
new file mode 100644
index 0000000..be783a8
--- /dev/null
+++ b/data/cytoBand.tab
@@ -0,0 +1,863 @@
+chromNum chromStart chromEnd bandname gieStain
+chr01 0 2300000 p36.33 gneg
+chr01 2300000 5400000 p36.32 gpos25
+chr01 5400000 7200000 p36.31 gneg
+chr01 7200000 9200000 p36.23 gpos25
+chr01 9200000 12700000 p36.22 gneg
+chr01 12700000 16200000 p36.21 gpos50
+chr01 16200000 20400000 p36.13 gneg
+chr01 20400000 23900000 p36.12 gpos25
+chr01 23900000 28000000 p36.11 gneg
+chr01 28000000 30200000 p35.3 gpos25
+chr01 30200000 32400000 p35.2 gneg
+chr01 32400000 34600000 p35.1 gpos25
+chr01 34600000 40100000 p34.3 gneg
+chr01 40100000 44100000 p34.2 gpos25
+chr01 44100000 46800000 p34.1 gneg
+chr01 46800000 50700000 p33 gpos75
+chr01 50700000 56100000 p32.3 gneg
+chr01 56100000 59000000 p32.2 gpos50
+chr01 59000000 61300000 p32.1 gneg
+chr01 61300000 68900000 p31.3 gpos50
+chr01 68900000 69700000 p31.2 gneg
+chr01 69700000 84900000 p31.1 gpos100
+chr01 84900000 88400000 p22.3 gneg
+chr01 88400000 92000000 p22.2 gpos75
+chr01 92000000 94700000 p22.1 gneg
+chr01 94700000 99700000 p21.3 gpos75
+chr01 99700000 102200000 p21.2 gneg
+chr01 102200000 107200000 p21.1 gpos100
+chr01 107200000 111800000 p13.3 gneg
+chr01 111800000 116100000 p13.2 gpos50
+chr01 116100000 117800000 p13.1 gneg
+chr01 117800000 120600000 p12 gpos50
+chr01 120600000 121500000 p11.2 gneg
+chr01 121500000 125000000 p11.1 acen
+chr01 125000000 128900000 q11 acen
+chr01 128900000 142600000 q12 gvar
+chr01 142600000 147000000 q21.1 gneg
+chr01 147000000 150300000 q21.2 gpos50
+chr01 150300000 155000000 q21.3 gneg
+chr01 155000000 156500000 q22 gpos50
+chr01 156500000 159100000 q23.1 gneg
+chr01 159100000 160500000 q23.2 gpos50
+chr01 160500000 165500000 q23.3 gneg
+chr01 165500000 167200000 q24.1 gpos50
+chr01 167200000 170900000 q24.2 gneg
+chr01 170900000 172900000 q24.3 gpos75
+chr01 172900000 176000000 q25.1 gneg
+chr01 176000000 180300000 q25.2 gpos50
+chr01 180300000 185800000 q25.3 gneg
+chr01 185800000 190800000 q31.1 gpos100
+chr01 190800000 193800000 q31.2 gneg
+chr01 193800000 198700000 q31.3 gpos100
+chr01 198700000 207200000 q32.1 gneg
+chr01 207200000 211500000 q32.2 gpos25
+chr01 211500000 214500000 q32.3 gneg
+chr01 214500000 224100000 q41 gpos100
+chr01 224100000 224600000 q42.11 gneg
+chr01 224600000 227000000 q42.12 gpos25
+chr01 227000000 230700000 q42.13 gneg
+chr01 230700000 234700000 q42.2 gpos50
+chr01 234700000 236600000 q42.3 gneg
+chr01 236600000 243700000 q43 gpos75
+chr01 243700000 249250621 q44 gneg
+chr02 0 4400000 p25.3 gneg
+chr02 4400000 7100000 p25.2 gpos50
+chr02 7100000 12200000 p25.1 gneg
+chr02 12200000 16700000 p24.3 gpos75
+chr02 16700000 19200000 p24.2 gneg
+chr02 19200000 24000000 p24.1 gpos75
+chr02 24000000 27900000 p23.3 gneg
+chr02 27900000 30000000 p23.2 gpos25
+chr02 30000000 32100000 p23.1 gneg
+chr02 32100000 36600000 p22.3 gpos75
+chr02 36600000 38600000 p22.2 gneg
+chr02 38600000 41800000 p22.1 gpos50
+chr02 41800000 47800000 p21 gneg
+chr02 47800000 52900000 p16.3 gpos100
+chr02 52900000 55000000 p16.2 gneg
+chr02 55000000 61300000 p16.1 gpos100
+chr02 61300000 64100000 p15 gneg
+chr02 64100000 68600000 p14 gpos50
+chr02 68600000 71500000 p13.3 gneg
+chr02 71500000 73500000 p13.2 gpos50
+chr02 73500000 75000000 p13.1 gneg
+chr02 75000000 83300000 p12 gpos100
+chr02 83300000 90500000 p11.2 gneg
+chr02 90500000 93300000 p11.1 acen
+chr02 93300000 96800000 q11.1 acen
+chr02 96800000 102700000 q11.2 gneg
+chr02 102700000 106000000 q12.1 gpos50
+chr02 106000000 107500000 q12.2 gneg
+chr02 107500000 110200000 q12.3 gpos25
+chr02 110200000 114400000 q13 gneg
+chr02 114400000 118800000 q14.1 gpos50
+chr02 118800000 122400000 q14.2 gneg
+chr02 122400000 129900000 q14.3 gpos50
+chr02 129900000 132500000 q21.1 gneg
+chr02 132500000 135100000 q21.2 gpos25
+chr02 135100000 136800000 q21.3 gneg
+chr02 136800000 142200000 q22.1 gpos100
+chr02 142200000 144100000 q22.2 gneg
+chr02 144100000 148700000 q22.3 gpos100
+chr02 148700000 149900000 q23.1 gneg
+chr02 149900000 150500000 q23.2 gpos25
+chr02 150500000 154900000 q23.3 gneg
+chr02 154900000 159800000 q24.1 gpos75
+chr02 159800000 163700000 q24.2 gneg
+chr02 163700000 169700000 q24.3 gpos75
+chr02 169700000 178000000 q31.1 gneg
+chr02 178000000 180600000 q31.2 gpos50
+chr02 180600000 183000000 q31.3 gneg
+chr02 183000000 189400000 q32.1 gpos75
+chr02 189400000 191900000 q32.2 gneg
+chr02 191900000 197400000 q32.3 gpos75
+chr02 197400000 203300000 q33.1 gneg
+chr02 203300000 204900000 q33.2 gpos50
+chr02 204900000 209000000 q33.3 gneg
+chr02 209000000 215300000 q34 gpos100
+chr02 215300000 221500000 q35 gneg
+chr02 221500000 225200000 q36.1 gpos75
+chr02 225200000 226100000 q36.2 gneg
+chr02 226100000 231000000 q36.3 gpos100
+chr02 231000000 235600000 q37.1 gneg
+chr02 235600000 237300000 q37.2 gpos50
+chr02 237300000 243199373 q37.3 gneg
+chr03 0 2800000 p26.3 gpos50
+chr03 2800000 4000000 p26.2 gneg
+chr03 4000000 8700000 p26.1 gpos50
+chr03 8700000 11800000 p25.3 gneg
+chr03 11800000 13300000 p25.2 gpos25
+chr03 13300000 16400000 p25.1 gneg
+chr03 16400000 23900000 p24.3 gpos100
+chr03 23900000 26400000 p24.2 gneg
+chr03 26400000 30900000 p24.1 gpos75
+chr03 30900000 32100000 p23 gneg
+chr03 32100000 36500000 p22.3 gpos50
+chr03 36500000 39400000 p22.2 gneg
+chr03 39400000 43700000 p22.1 gpos75
+chr03 43700000 44100000 p21.33 gneg
+chr03 44100000 44200000 p21.32 gpos50
+chr03 44200000 50600000 p21.31 gneg
+chr03 50600000 52300000 p21.2 gpos25
+chr03 52300000 54400000 p21.1 gneg
+chr03 54400000 58600000 p14.3 gpos50
+chr03 58600000 63700000 p14.2 gneg
+chr03 63700000 69800000 p14.1 gpos50
+chr03 69800000 74200000 p13 gneg
+chr03 74200000 79800000 p12.3 gpos75
+chr03 79800000 83500000 p12.2 gneg
+chr03 83500000 87200000 p12.1 gpos75
+chr03 87200000 87900000 p11.2 gneg
+chr03 87900000 91000000 p11.1 acen
+chr03 91000000 93900000 q11.1 acen
+chr03 93900000 98300000 q11.2 gvar
+chr03 98300000 100000000 q12.1 gneg
+chr03 100000000 100900000 q12.2 gpos25
+chr03 100900000 102800000 q12.3 gneg
+chr03 102800000 106200000 q13.11 gpos75
+chr03 106200000 107900000 q13.12 gneg
+chr03 107900000 111300000 q13.13 gpos50
+chr03 111300000 113500000 q13.2 gneg
+chr03 113500000 117300000 q13.31 gpos75
+chr03 117300000 119000000 q13.32 gneg
+chr03 119000000 121900000 q13.33 gpos75
+chr03 121900000 123800000 q21.1 gneg
+chr03 123800000 125800000 q21.2 gpos25
+chr03 125800000 129200000 q21.3 gneg
+chr03 129200000 133700000 q22.1 gpos25
+chr03 133700000 135700000 q22.2 gneg
+chr03 135700000 138700000 q22.3 gpos25
+chr03 138700000 142800000 q23 gneg
+chr03 142800000 148900000 q24 gpos100
+chr03 148900000 152100000 q25.1 gneg
+chr03 152100000 155000000 q25.2 gpos50
+chr03 155000000 157000000 q25.31 gneg
+chr03 157000000 159000000 q25.32 gpos50
+chr03 159000000 160700000 q25.33 gneg
+chr03 160700000 167600000 q26.1 gpos100
+chr03 167600000 170900000 q26.2 gneg
+chr03 170900000 175700000 q26.31 gpos75
+chr03 175700000 179000000 q26.32 gneg
+chr03 179000000 182700000 q26.33 gpos75
+chr03 182700000 184500000 q27.1 gneg
+chr03 184500000 186000000 q27.2 gpos25
+chr03 186000000 187900000 q27.3 gneg
+chr03 187900000 192300000 q28 gpos75
+chr03 192300000 198022430 q29 gneg
+chr04 0 4500000 p16.3 gneg
+chr04 4500000 6000000 p16.2 gpos25
+chr04 6000000 11300000 p16.1 gneg
+chr04 11300000 15200000 p15.33 gpos50
+chr04 15200000 17800000 p15.32 gneg
+chr04 17800000 21300000 p15.31 gpos75
+chr04 21300000 27700000 p15.2 gneg
+chr04 27700000 35800000 p15.1 gpos100
+chr04 35800000 41200000 p14 gneg
+chr04 41200000 44600000 p13 gpos50
+chr04 44600000 48200000 p12 gneg
+chr04 48200000 50400000 p11 acen
+chr04 50400000 52700000 q11 acen
+chr04 52700000 59500000 q12 gneg
+chr04 59500000 66600000 q13.1 gpos100
+chr04 66600000 70500000 q13.2 gneg
+chr04 70500000 76300000 q13.3 gpos75
+chr04 76300000 78900000 q21.1 gneg
+chr04 78900000 82400000 q21.21 gpos50
+chr04 82400000 84100000 q21.22 gneg
+chr04 84100000 86900000 q21.23 gpos25
+chr04 86900000 88000000 q21.3 gneg
+chr04 88000000 93700000 q22.1 gpos75
+chr04 93700000 95100000 q22.2 gneg
+chr04 95100000 98800000 q22.3 gpos75
+chr04 98800000 101100000 q23 gneg
+chr04 101100000 107700000 q24 gpos50
+chr04 107700000 114100000 q25 gneg
+chr04 114100000 120800000 q26 gpos75
+chr04 120800000 123800000 q27 gneg
+chr04 123800000 128800000 q28.1 gpos50
+chr04 128800000 131100000 q28.2 gneg
+chr04 131100000 139500000 q28.3 gpos100
+chr04 139500000 141500000 q31.1 gneg
+chr04 141500000 146800000 q31.21 gpos25
+chr04 146800000 148500000 q31.22 gneg
+chr04 148500000 151100000 q31.23 gpos25
+chr04 151100000 155600000 q31.3 gneg
+chr04 155600000 161800000 q32.1 gpos100
+chr04 161800000 164500000 q32.2 gneg
+chr04 164500000 170100000 q32.3 gpos100
+chr04 170100000 171900000 q33 gneg
+chr04 171900000 176300000 q34.1 gpos75
+chr04 176300000 177500000 q34.2 gneg
+chr04 177500000 183200000 q34.3 gpos100
+chr04 183200000 187100000 q35.1 gneg
+chr04 187100000 191154276 q35.2 gpos25
+chr05 0 4500000 p15.33 gneg
+chr05 4500000 6300000 p15.32 gpos25
+chr05 6300000 9800000 p15.31 gneg
+chr05 9800000 15000000 p15.2 gpos50
+chr05 15000000 18400000 p15.1 gneg
+chr05 18400000 23300000 p14.3 gpos100
+chr05 23300000 24600000 p14.2 gneg
+chr05 24600000 28900000 p14.1 gpos100
+chr05 28900000 33800000 p13.3 gneg
+chr05 33800000 38400000 p13.2 gpos25
+chr05 38400000 42500000 p13.1 gneg
+chr05 42500000 46100000 p12 gpos50
+chr05 46100000 48400000 p11 acen
+chr05 48400000 50700000 q11.1 acen
+chr05 50700000 58900000 q11.2 gneg
+chr05 58900000 62900000 q12.1 gpos75
+chr05 62900000 63200000 q12.2 gneg
+chr05 63200000 66700000 q12.3 gpos75
+chr05 66700000 68400000 q13.1 gneg
+chr05 68400000 73300000 q13.2 gpos50
+chr05 73300000 76900000 q13.3 gneg
+chr05 76900000 81400000 q14.1 gpos50
+chr05 81400000 82800000 q14.2 gneg
+chr05 82800000 92300000 q14.3 gpos100
+chr05 92300000 98200000 q15 gneg
+chr05 98200000 102800000 q21.1 gpos100
+chr05 102800000 104500000 q21.2 gneg
+chr05 104500000 109600000 q21.3 gpos100
+chr05 109600000 111500000 q22.1 gneg
+chr05 111500000 113100000 q22.2 gpos50
+chr05 113100000 115200000 q22.3 gneg
+chr05 115200000 121400000 q23.1 gpos100
+chr05 121400000 127300000 q23.2 gneg
+chr05 127300000 130600000 q23.3 gpos100
+chr05 130600000 136200000 q31.1 gneg
+chr05 136200000 139500000 q31.2 gpos25
+chr05 139500000 144500000 q31.3 gneg
+chr05 144500000 149800000 q32 gpos75
+chr05 149800000 152700000 q33.1 gneg
+chr05 152700000 155700000 q33.2 gpos50
+chr05 155700000 159900000 q33.3 gneg
+chr05 159900000 168500000 q34 gpos100
+chr05 168500000 172800000 q35.1 gneg
+chr05 172800000 176600000 q35.2 gpos25
+chr05 176600000 180915260 q35.3 gneg
+chr06 0 2300000 p25.3 gneg
+chr06 2300000 4200000 p25.2 gpos25
+chr06 4200000 7100000 p25.1 gneg
+chr06 7100000 10600000 p24.3 gpos50
+chr06 10600000 11600000 p24.2 gneg
+chr06 11600000 13400000 p24.1 gpos25
+chr06 13400000 15200000 p23 gneg
+chr06 15200000 25200000 p22.3 gpos75
+chr06 25200000 27000000 p22.2 gneg
+chr06 27000000 30400000 p22.1 gpos50
+chr06 30400000 32100000 p21.33 gneg
+chr06 32100000 33500000 p21.32 gpos25
+chr06 33500000 36600000 p21.31 gneg
+chr06 36600000 40500000 p21.2 gpos25
+chr06 40500000 46200000 p21.1 gneg
+chr06 46200000 51800000 p12.3 gpos100
+chr06 51800000 52900000 p12.2 gneg
+chr06 52900000 57000000 p12.1 gpos100
+chr06 57000000 58700000 p11.2 gneg
+chr06 58700000 61000000 p11.1 acen
+chr06 61000000 63300000 q11.1 acen
+chr06 63300000 63400000 q11.2 gneg
+chr06 63400000 70000000 q12 gpos100
+chr06 70000000 75900000 q13 gneg
+chr06 75900000 83900000 q14.1 gpos50
+chr06 83900000 84900000 q14.2 gneg
+chr06 84900000 88000000 q14.3 gpos50
+chr06 88000000 93100000 q15 gneg
+chr06 93100000 99500000 q16.1 gpos100
+chr06 99500000 100600000 q16.2 gneg
+chr06 100600000 105500000 q16.3 gpos100
+chr06 105500000 114600000 q21 gneg
+chr06 114600000 118300000 q22.1 gpos75
+chr06 118300000 118500000 q22.2 gneg
+chr06 118500000 126100000 q22.31 gpos100
+chr06 126100000 127100000 q22.32 gneg
+chr06 127100000 130300000 q22.33 gpos75
+chr06 130300000 131200000 q23.1 gneg
+chr06 131200000 135200000 q23.2 gpos50
+chr06 135200000 139000000 q23.3 gneg
+chr06 139000000 142800000 q24.1 gpos75
+chr06 142800000 145600000 q24.2 gneg
+chr06 145600000 149000000 q24.3 gpos75
+chr06 149000000 152500000 q25.1 gneg
+chr06 152500000 155500000 q25.2 gpos50
+chr06 155500000 161000000 q25.3 gneg
+chr06 161000000 164500000 q26 gpos50
+chr06 164500000 171115067 q27 gneg
+chr07 0 2800000 p22.3 gneg
+chr07 2800000 4500000 p22.2 gpos25
+chr07 4500000 7300000 p22.1 gneg
+chr07 7300000 13800000 p21.3 gpos100
+chr07 13800000 16500000 p21.2 gneg
+chr07 16500000 20900000 p21.1 gpos100
+chr07 20900000 25500000 p15.3 gneg
+chr07 25500000 28000000 p15.2 gpos50
+chr07 28000000 28800000 p15.1 gneg
+chr07 28800000 35000000 p14.3 gpos75
+chr07 35000000 37200000 p14.2 gneg
+chr07 37200000 43300000 p14.1 gpos75
+chr07 43300000 45400000 p13 gneg
+chr07 45400000 49000000 p12.3 gpos75
+chr07 49000000 50500000 p12.2 gneg
+chr07 50500000 54000000 p12.1 gpos75
+chr07 54000000 58000000 p11.2 gneg
+chr07 58000000 59900000 p11.1 acen
+chr07 59900000 61700000 q11.1 acen
+chr07 61700000 67000000 q11.21 gneg
+chr07 67000000 72200000 q11.22 gpos50
+chr07 72200000 77500000 q11.23 gneg
+chr07 77500000 86400000 q21.11 gpos100
+chr07 86400000 88200000 q21.12 gneg
+chr07 88200000 91100000 q21.13 gpos75
+chr07 91100000 92800000 q21.2 gneg
+chr07 92800000 98000000 q21.3 gpos75
+chr07 98000000 103800000 q22.1 gneg
+chr07 103800000 104500000 q22.2 gpos50
+chr07 104500000 107400000 q22.3 gneg
+chr07 107400000 114600000 q31.1 gpos75
+chr07 114600000 117400000 q31.2 gneg
+chr07 117400000 121100000 q31.31 gpos75
+chr07 121100000 123800000 q31.32 gneg
+chr07 123800000 127100000 q31.33 gpos75
+chr07 127100000 129200000 q32.1 gneg
+chr07 129200000 130400000 q32.2 gpos25
+chr07 130400000 132600000 q32.3 gneg
+chr07 132600000 138200000 q33 gpos50
+chr07 138200000 143100000 q34 gneg
+chr07 143100000 147900000 q35 gpos75
+chr07 147900000 152600000 q36.1 gneg
+chr07 152600000 155100000 q36.2 gpos25
+chr07 155100000 159138663 q36.3 gneg
+chr08 0 2200000 p23.3 gneg
+chr08 2200000 6200000 p23.2 gpos75
+chr08 6200000 12700000 p23.1 gneg
+chr08 12700000 19000000 p22 gpos100
+chr08 19000000 23300000 p21.3 gneg
+chr08 23300000 27400000 p21.2 gpos50
+chr08 27400000 28800000 p21.1 gneg
+chr08 28800000 36500000 p12 gpos75
+chr08 36500000 38300000 p11.23 gneg
+chr08 38300000 39700000 p11.22 gpos25
+chr08 39700000 43100000 p11.21 gneg
+chr08 43100000 45600000 p11.1 acen
+chr08 45600000 48100000 q11.1 acen
+chr08 48100000 52200000 q11.21 gneg
+chr08 52200000 52600000 q11.22 gpos75
+chr08 52600000 55500000 q11.23 gneg
+chr08 55500000 61600000 q12.1 gpos50
+chr08 61600000 62200000 q12.2 gneg
+chr08 62200000 66000000 q12.3 gpos50
+chr08 66000000 68000000 q13.1 gneg
+chr08 68000000 70500000 q13.2 gpos50
+chr08 70500000 73900000 q13.3 gneg
+chr08 73900000 78300000 q21.11 gpos100
+chr08 78300000 80100000 q21.12 gneg
+chr08 80100000 84600000 q21.13 gpos75
+chr08 84600000 86900000 q21.2 gneg
+chr08 86900000 93300000 q21.3 gpos100
+chr08 93300000 99000000 q22.1 gneg
+chr08 99000000 101600000 q22.2 gpos25
+chr08 101600000 106200000 q22.3 gneg
+chr08 106200000 110500000 q23.1 gpos75
+chr08 110500000 112100000 q23.2 gneg
+chr08 112100000 117700000 q23.3 gpos100
+chr08 117700000 119200000 q24.11 gneg
+chr08 119200000 122500000 q24.12 gpos50
+chr08 122500000 127300000 q24.13 gneg
+chr08 127300000 131500000 q24.21 gpos50
+chr08 131500000 136400000 q24.22 gneg
+chr08 136400000 139900000 q24.23 gpos75
+chr08 139900000 146364022 q24.3 gneg
+chr09 0 2200000 p24.3 gneg
+chr09 2200000 4600000 p24.2 gpos25
+chr09 4600000 9000000 p24.1 gneg
+chr09 9000000 14200000 p23 gpos75
+chr09 14200000 16600000 p22.3 gneg
+chr09 16600000 18500000 p22.2 gpos25
+chr09 18500000 19900000 p22.1 gneg
+chr09 19900000 25600000 p21.3 gpos100
+chr09 25600000 28000000 p21.2 gneg
+chr09 28000000 33200000 p21.1 gpos100
+chr09 33200000 36300000 p13.3 gneg
+chr09 36300000 38400000 p13.2 gpos25
+chr09 38400000 41000000 p13.1 gneg
+chr09 41000000 43600000 p12 gpos50
+chr09 43600000 47300000 p11.2 gneg
+chr09 47300000 49000000 p11.1 acen
+chr09 49000000 50700000 q11 acen
+chr09 50700000 65900000 q12 gvar
+chr09 65900000 68700000 q13 gneg
+chr09 68700000 72200000 q21.11 gpos25
+chr09 72200000 74000000 q21.12 gneg
+chr09 74000000 79200000 q21.13 gpos50
+chr09 79200000 81100000 q21.2 gneg
+chr09 81100000 84100000 q21.31 gpos50
+chr09 84100000 86900000 q21.32 gneg
+chr09 86900000 90400000 q21.33 gpos50
+chr09 90400000 91800000 q22.1 gneg
+chr09 91800000 93900000 q22.2 gpos25
+chr09 93900000 96600000 q22.31 gneg
+chr09 96600000 99300000 q22.32 gpos25
+chr09 99300000 102600000 q22.33 gneg
+chr09 102600000 108200000 q31.1 gpos100
+chr09 108200000 111300000 q31.2 gneg
+chr09 111300000 114900000 q31.3 gpos25
+chr09 114900000 117700000 q32 gneg
+chr09 117700000 122500000 q33.1 gpos75
+chr09 122500000 125800000 q33.2 gneg
+chr09 125800000 130300000 q33.3 gpos25
+chr09 130300000 133500000 q34.11 gneg
+chr09 133500000 134000000 q34.12 gpos25
+chr09 134000000 135900000 q34.13 gneg
+chr09 135900000 137400000 q34.2 gpos25
+chr09 137400000 141213431 q34.3 gneg
+chr10 0 3000000 p15.3 gneg
+chr10 3000000 3800000 p15.2 gpos25
+chr10 3800000 6600000 p15.1 gneg
+chr10 6600000 12200000 p14 gpos75
+chr10 12200000 17300000 p13 gneg
+chr10 17300000 18600000 p12.33 gpos75
+chr10 18600000 18700000 p12.32 gneg
+chr10 18700000 22600000 p12.31 gpos75
+chr10 22600000 24600000 p12.2 gneg
+chr10 24600000 29600000 p12.1 gpos50
+chr10 29600000 31300000 p11.23 gneg
+chr10 31300000 34400000 p11.22 gpos25
+chr10 34400000 38000000 p11.21 gneg
+chr10 38000000 40200000 p11.1 acen
+chr10 40200000 42300000 q11.1 acen
+chr10 42300000 46100000 q11.21 gneg
+chr10 46100000 49900000 q11.22 gpos25
+chr10 49900000 52900000 q11.23 gneg
+chr10 52900000 61200000 q21.1 gpos100
+chr10 61200000 64500000 q21.2 gneg
+chr10 64500000 70600000 q21.3 gpos100
+chr10 70600000 74900000 q22.1 gneg
+chr10 74900000 77700000 q22.2 gpos50
+chr10 77700000 82000000 q22.3 gneg
+chr10 82000000 87900000 q23.1 gpos100
+chr10 87900000 89500000 q23.2 gneg
+chr10 89500000 92900000 q23.31 gpos75
+chr10 92900000 94100000 q23.32 gneg
+chr10 94100000 97000000 q23.33 gpos50
+chr10 97000000 99300000 q24.1 gneg
+chr10 99300000 101900000 q24.2 gpos50
+chr10 101900000 103000000 q24.31 gneg
+chr10 103000000 104900000 q24.32 gpos25
+chr10 104900000 105800000 q24.33 gneg
+chr10 105800000 111900000 q25.1 gpos100
+chr10 111900000 114900000 q25.2 gneg
+chr10 114900000 119100000 q25.3 gpos75
+chr10 119100000 121700000 q26.11 gneg
+chr10 121700000 123100000 q26.12 gpos50
+chr10 123100000 127500000 q26.13 gneg
+chr10 127500000 130600000 q26.2 gpos50
+chr10 130600000 135534747 q26.3 gneg
+chr11 0 2800000 p15.5 gneg
+chr11 2800000 10700000 p15.4 gpos50
+chr11 10700000 12700000 p15.3 gneg
+chr11 12700000 16200000 p15.2 gpos50
+chr11 16200000 21700000 p15.1 gneg
+chr11 21700000 26100000 p14.3 gpos100
+chr11 26100000 27200000 p14.2 gneg
+chr11 27200000 31000000 p14.1 gpos75
+chr11 31000000 36400000 p13 gneg
+chr11 36400000 43500000 p12 gpos100
+chr11 43500000 48800000 p11.2 gneg
+chr11 48800000 51600000 p11.12 gpos75
+chr11 51600000 53700000 p11.11 acen
+chr11 53700000 55700000 q11 acen
+chr11 55700000 59900000 q12.1 gpos75
+chr11 59900000 61700000 q12.2 gneg
+chr11 61700000 63400000 q12.3 gpos25
+chr11 63400000 65900000 q13.1 gneg
+chr11 65900000 68400000 q13.2 gpos25
+chr11 68400000 70400000 q13.3 gneg
+chr11 70400000 75200000 q13.4 gpos50
+chr11 75200000 77100000 q13.5 gneg
+chr11 77100000 85600000 q14.1 gpos100
+chr11 85600000 88300000 q14.2 gneg
+chr11 88300000 92800000 q14.3 gpos100
+chr11 92800000 97200000 q21 gneg
+chr11 97200000 102100000 q22.1 gpos100
+chr11 102100000 102900000 q22.2 gneg
+chr11 102900000 110400000 q22.3 gpos100
+chr11 110400000 112500000 q23.1 gneg
+chr11 112500000 114500000 q23.2 gpos50
+chr11 114500000 121200000 q23.3 gneg
+chr11 121200000 123900000 q24.1 gpos50
+chr11 123900000 127800000 q24.2 gneg
+chr11 127800000 130800000 q24.3 gpos50
+chr11 130800000 135006516 q25 gneg
+chr12 0 3300000 p13.33 gneg
+chr12 3300000 5400000 p13.32 gpos25
+chr12 5400000 10100000 p13.31 gneg
+chr12 10100000 12800000 p13.2 gpos75
+chr12 12800000 14800000 p13.1 gneg
+chr12 14800000 20000000 p12.3 gpos100
+chr12 20000000 21300000 p12.2 gneg
+chr12 21300000 26500000 p12.1 gpos100
+chr12 26500000 27800000 p11.23 gneg
+chr12 27800000 30700000 p11.22 gpos50
+chr12 30700000 33300000 p11.21 gneg
+chr12 33300000 35800000 p11.1 acen
+chr12 35800000 38200000 q11 acen
+chr12 38200000 46400000 q12 gpos100
+chr12 46400000 49100000 q13.11 gneg
+chr12 49100000 51500000 q13.12 gpos25
+chr12 51500000 54900000 q13.13 gneg
+chr12 54900000 56600000 q13.2 gpos25
+chr12 56600000 58100000 q13.3 gneg
+chr12 58100000 63100000 q14.1 gpos75
+chr12 63100000 65100000 q14.2 gneg
+chr12 65100000 67700000 q14.3 gpos50
+chr12 67700000 71500000 q15 gneg
+chr12 71500000 75700000 q21.1 gpos75
+chr12 75700000 80300000 q21.2 gneg
+chr12 80300000 86700000 q21.31 gpos100
+chr12 86700000 89000000 q21.32 gneg
+chr12 89000000 92600000 q21.33 gpos100
+chr12 92600000 96200000 q22 gneg
+chr12 96200000 101600000 q23.1 gpos75
+chr12 101600000 103800000 q23.2 gneg
+chr12 103800000 109000000 q23.3 gpos50
+chr12 109000000 111700000 q24.11 gneg
+chr12 111700000 112300000 q24.12 gpos25
+chr12 112300000 114300000 q24.13 gneg
+chr12 114300000 116800000 q24.21 gpos50
+chr12 116800000 118100000 q24.22 gneg
+chr12 118100000 120700000 q24.23 gpos50
+chr12 120700000 125900000 q24.31 gneg
+chr12 125900000 129300000 q24.32 gpos50
+chr12 129300000 133851895 q24.33 gneg
+chr13 0 4500000 p13 gvar
+chr13 4500000 10000000 p12 stalk
+chr13 10000000 16300000 p11.2 gvar
+chr13 16300000 17900000 p11.1 acen
+chr13 17900000 19500000 q11 acen
+chr13 19500000 23300000 q12.11 gneg
+chr13 23300000 25500000 q12.12 gpos25
+chr13 25500000 27800000 q12.13 gneg
+chr13 27800000 28900000 q12.2 gpos25
+chr13 28900000 32200000 q12.3 gneg
+chr13 32200000 34000000 q13.1 gpos50
+chr13 34000000 35500000 q13.2 gneg
+chr13 35500000 40100000 q13.3 gpos75
+chr13 40100000 45200000 q14.11 gneg
+chr13 45200000 45800000 q14.12 gpos25
+chr13 45800000 47300000 q14.13 gneg
+chr13 47300000 50900000 q14.2 gpos50
+chr13 50900000 55300000 q14.3 gneg
+chr13 55300000 59600000 q21.1 gpos100
+chr13 59600000 62300000 q21.2 gneg
+chr13 62300000 65700000 q21.31 gpos75
+chr13 65700000 68600000 q21.32 gneg
+chr13 68600000 73300000 q21.33 gpos100
+chr13 73300000 75400000 q22.1 gneg
+chr13 75400000 77200000 q22.2 gpos50
+chr13 77200000 79000000 q22.3 gneg
+chr13 79000000 87700000 q31.1 gpos100
+chr13 87700000 90000000 q31.2 gneg
+chr13 90000000 95000000 q31.3 gpos100
+chr13 95000000 98200000 q32.1 gneg
+chr13 98200000 99300000 q32.2 gpos25
+chr13 99300000 101700000 q32.3 gneg
+chr13 101700000 104800000 q33.1 gpos100
+chr13 104800000 107000000 q33.2 gneg
+chr13 107000000 110300000 q33.3 gpos100
+chr13 110300000 115169878 q34 gneg
+chr14 0 3700000 p13 gvar
+chr14 3700000 8100000 p12 stalk
+chr14 8100000 16100000 p11.2 gvar
+chr14 16100000 17600000 p11.1 acen
+chr14 17600000 19100000 q11.1 acen
+chr14 19100000 24600000 q11.2 gneg
+chr14 24600000 33300000 q12 gpos100
+chr14 33300000 35300000 q13.1 gneg
+chr14 35300000 36600000 q13.2 gpos50
+chr14 36600000 37800000 q13.3 gneg
+chr14 37800000 43500000 q21.1 gpos100
+chr14 43500000 47200000 q21.2 gneg
+chr14 47200000 50900000 q21.3 gpos100
+chr14 50900000 54100000 q22.1 gneg
+chr14 54100000 55500000 q22.2 gpos25
+chr14 55500000 58100000 q22.3 gneg
+chr14 58100000 62100000 q23.1 gpos75
+chr14 62100000 64800000 q23.2 gneg
+chr14 64800000 67900000 q23.3 gpos50
+chr14 67900000 70200000 q24.1 gneg
+chr14 70200000 73800000 q24.2 gpos50
+chr14 73800000 79300000 q24.3 gneg
+chr14 79300000 83600000 q31.1 gpos100
+chr14 83600000 84900000 q31.2 gneg
+chr14 84900000 89800000 q31.3 gpos100
+chr14 89800000 91900000 q32.11 gneg
+chr14 91900000 94700000 q32.12 gpos25
+chr14 94700000 96300000 q32.13 gneg
+chr14 96300000 101400000 q32.2 gpos50
+chr14 101400000 103200000 q32.31 gneg
+chr14 103200000 104000000 q32.32 gpos50
+chr14 104000000 107349540 q32.33 gneg
+chr15 0 3900000 p13 gvar
+chr15 3900000 8700000 p12 stalk
+chr15 8700000 15800000 p11.2 gvar
+chr15 15800000 19000000 p11.1 acen
+chr15 19000000 20700000 q11.1 acen
+chr15 20700000 25700000 q11.2 gneg
+chr15 25700000 28100000 q12 gpos50
+chr15 28100000 30300000 q13.1 gneg
+chr15 30300000 31200000 q13.2 gpos50
+chr15 31200000 33600000 q13.3 gneg
+chr15 33600000 40100000 q14 gpos75
+chr15 40100000 42800000 q15.1 gneg
+chr15 42800000 43600000 q15.2 gpos25
+chr15 43600000 44800000 q15.3 gneg
+chr15 44800000 49500000 q21.1 gpos75
+chr15 49500000 52900000 q21.2 gneg
+chr15 52900000 59100000 q21.3 gpos75
+chr15 59100000 59300000 q22.1 gneg
+chr15 59300000 63700000 q22.2 gpos25
+chr15 63700000 67200000 q22.31 gneg
+chr15 67200000 67300000 q22.32 gpos25
+chr15 67300000 67500000 q22.33 gneg
+chr15 67500000 72700000 q23 gpos25
+chr15 72700000 75200000 q24.1 gneg
+chr15 75200000 76600000 q24.2 gpos25
+chr15 76600000 78300000 q24.3 gneg
+chr15 78300000 81700000 q25.1 gpos50
+chr15 81700000 85200000 q25.2 gneg
+chr15 85200000 89100000 q25.3 gpos50
+chr15 89100000 94300000 q26.1 gneg
+chr15 94300000 98500000 q26.2 gpos50
+chr15 98500000 102531392 q26.3 gneg
+chr16 0 7900000 p13.3 gneg
+chr16 7900000 10500000 p13.2 gpos50
+chr16 10500000 12600000 p13.13 gneg
+chr16 12600000 14800000 p13.12 gpos50
+chr16 14800000 16800000 p13.11 gneg
+chr16 16800000 21200000 p12.3 gpos50
+chr16 21200000 24200000 p12.2 gneg
+chr16 24200000 28100000 p12.1 gpos50
+chr16 28100000 34600000 p11.2 gneg
+chr16 34600000 36600000 p11.1 acen
+chr16 36600000 38600000 q11.1 acen
+chr16 38600000 47000000 q11.2 gvar
+chr16 47000000 52600000 q12.1 gneg
+chr16 52600000 56700000 q12.2 gpos50
+chr16 56700000 57400000 q13 gneg
+chr16 57400000 66700000 q21 gpos100
+chr16 66700000 70800000 q22.1 gneg
+chr16 70800000 72900000 q22.2 gpos50
+chr16 72900000 74100000 q22.3 gneg
+chr16 74100000 79200000 q23.1 gpos75
+chr16 79200000 81700000 q23.2 gneg
+chr16 81700000 84200000 q23.3 gpos50
+chr16 84200000 87100000 q24.1 gneg
+chr16 87100000 88700000 q24.2 gpos25
+chr16 88700000 90354753 q24.3 gneg
+chr17 0 3300000 p13.3 gneg
+chr17 3300000 6500000 p13.2 gpos50
+chr17 6500000 10700000 p13.1 gneg
+chr17 10700000 16000000 p12 gpos75
+chr17 16000000 22200000 p11.2 gneg
+chr17 22200000 24000000 p11.1 acen
+chr17 24000000 25800000 q11.1 acen
+chr17 25800000 31800000 q11.2 gneg
+chr17 31800000 38100000 q12 gpos50
+chr17 38100000 38400000 q21.1 gneg
+chr17 38400000 40900000 q21.2 gpos25
+chr17 40900000 44900000 q21.31 gneg
+chr17 44900000 47400000 q21.32 gpos25
+chr17 47400000 50200000 q21.33 gneg
+chr17 50200000 57600000 q22 gpos75
+chr17 57600000 58300000 q23.1 gneg
+chr17 58300000 61100000 q23.2 gpos75
+chr17 61100000 62600000 q23.3 gneg
+chr17 62600000 64200000 q24.1 gpos50
+chr17 64200000 67100000 q24.2 gneg
+chr17 67100000 70900000 q24.3 gpos75
+chr17 70900000 74800000 q25.1 gneg
+chr17 74800000 75300000 q25.2 gpos25
+chr17 75300000 81195210 q25.3 gneg
+chr18 0 2900000 p11.32 gneg
+chr18 2900000 7100000 p11.31 gpos50
+chr18 7100000 8500000 p11.23 gneg
+chr18 8500000 10900000 p11.22 gpos25
+chr18 10900000 15400000 p11.21 gneg
+chr18 15400000 17200000 p11.1 acen
+chr18 17200000 19000000 q11.1 acen
+chr18 19000000 25000000 q11.2 gneg
+chr18 25000000 32700000 q12.1 gpos100
+chr18 32700000 37200000 q12.2 gneg
+chr18 37200000 43500000 q12.3 gpos75
+chr18 43500000 48200000 q21.1 gneg
+chr18 48200000 53800000 q21.2 gpos75
+chr18 53800000 56200000 q21.31 gneg
+chr18 56200000 59000000 q21.32 gpos50
+chr18 59000000 61600000 q21.33 gneg
+chr18 61600000 66800000 q22.1 gpos100
+chr18 66800000 68700000 q22.2 gneg
+chr18 68700000 73100000 q22.3 gpos25
+chr18 73100000 78077248 q23 gneg
+chr19 0 6900000 p13.3 gneg
+chr19 6900000 13900000 p13.2 gpos25
+chr19 13900000 14000000 p13.13 gneg
+chr19 14000000 16300000 p13.12 gpos25
+chr19 16300000 20000000 p13.11 gneg
+chr19 20000000 24400000 p12 gvar
+chr19 24400000 26500000 p11 acen
+chr19 26500000 28600000 q11 acen
+chr19 28600000 32400000 q12 gvar
+chr19 32400000 35500000 q13.11 gneg
+chr19 35500000 38300000 q13.12 gpos25
+chr19 38300000 38700000 q13.13 gneg
+chr19 38700000 43400000 q13.2 gpos25
+chr19 43400000 45200000 q13.31 gneg
+chr19 45200000 48000000 q13.32 gpos25
+chr19 48000000 51400000 q13.33 gneg
+chr19 51400000 53600000 q13.41 gpos25
+chr19 53600000 56300000 q13.42 gneg
+chr19 56300000 59128983 q13.43 gpos25
+chr20 0 5100000 p13 gneg
+chr20 5100000 9200000 p12.3 gpos75
+chr20 9200000 12100000 p12.2 gneg
+chr20 12100000 17900000 p12.1 gpos75
+chr20 17900000 21300000 p11.23 gneg
+chr20 21300000 22300000 p11.22 gpos25
+chr20 22300000 25600000 p11.21 gneg
+chr20 25600000 27500000 p11.1 acen
+chr20 27500000 29400000 q11.1 acen
+chr20 29400000 32100000 q11.21 gneg
+chr20 32100000 34400000 q11.22 gpos25
+chr20 34400000 37600000 q11.23 gneg
+chr20 37600000 41700000 q12 gpos75
+chr20 41700000 42100000 q13.11 gneg
+chr20 42100000 46400000 q13.12 gpos25
+chr20 46400000 49800000 q13.13 gneg
+chr20 49800000 55000000 q13.2 gpos75
+chr20 55000000 56500000 q13.31 gneg
+chr20 56500000 58400000 q13.32 gpos50
+chr20 58400000 63025520 q13.33 gneg
+chr21 0 2800000 p13 gvar
+chr21 2800000 6800000 p12 stalk
+chr21 6800000 10900000 p11.2 gvar
+chr21 10900000 13200000 p11.1 acen
+chr21 13200000 14300000 q11.1 acen
+chr21 14300000 16400000 q11.2 gneg
+chr21 16400000 24000000 q21.1 gpos100
+chr21 24000000 26800000 q21.2 gneg
+chr21 26800000 31500000 q21.3 gpos75
+chr21 31500000 35800000 q22.11 gneg
+chr21 35800000 37800000 q22.12 gpos50
+chr21 37800000 39700000 q22.13 gneg
+chr21 39700000 42600000 q22.2 gpos50
+chr21 42600000 48129895 q22.3 gneg
+chr22 0 3800000 p13 gvar
+chr22 3800000 8300000 p12 stalk
+chr22 8300000 12200000 p11.2 gvar
+chr22 12200000 14700000 p11.1 acen
+chr22 14700000 17900000 q11.1 acen
+chr22 17900000 22200000 q11.21 gneg
+chr22 22200000 23500000 q11.22 gpos25
+chr22 23500000 25900000 q11.23 gneg
+chr22 25900000 29600000 q12.1 gpos50
+chr22 29600000 32200000 q12.2 gneg
+chr22 32200000 37600000 q12.3 gpos50
+chr22 37600000 41000000 q13.1 gneg
+chr22 41000000 44200000 q13.2 gpos50
+chr22 44200000 48400000 q13.31 gneg
+chr22 48400000 49400000 q13.32 gpos50
+chr22 49400000 51304566 q13.33 gneg
+chrX 0 4300000 p22.33 gneg
+chrX 4300000 6000000 p22.32 gpos50
+chrX 6000000 9500000 p22.31 gneg
+chrX 9500000 17100000 p22.2 gpos50
+chrX 17100000 19300000 p22.13 gneg
+chrX 19300000 21900000 p22.12 gpos50
+chrX 21900000 24900000 p22.11 gneg
+chrX 24900000 29300000 p21.3 gpos100
+chrX 29300000 31500000 p21.2 gneg
+chrX 31500000 37600000 p21.1 gpos100
+chrX 37600000 42400000 p11.4 gneg
+chrX 42400000 46400000 p11.3 gpos75
+chrX 46400000 49800000 p11.23 gneg
+chrX 49800000 54800000 p11.22 gpos25
+chrX 54800000 58100000 p11.21 gneg
+chrX 58100000 60600000 p11.1 acen
+chrX 60600000 63000000 q11.1 acen
+chrX 63000000 64600000 q11.2 gneg
+chrX 64600000 67800000 q12 gpos50
+chrX 67800000 71800000 q13.1 gneg
+chrX 71800000 73900000 q13.2 gpos50
+chrX 73900000 76000000 q13.3 gneg
+chrX 76000000 84600000 q21.1 gpos100
+chrX 84600000 86200000 q21.2 gneg
+chrX 86200000 91800000 q21.31 gpos100
+chrX 91800000 93500000 q21.32 gneg
+chrX 93500000 98300000 q21.33 gpos75
+chrX 98300000 102600000 q22.1 gneg
+chrX 102600000 103700000 q22.2 gpos50
+chrX 103700000 108700000 q22.3 gneg
+chrX 108700000 116500000 q23 gpos75
+chrX 116500000 120900000 q24 gneg
+chrX 120900000 128700000 q25 gpos100
+chrX 128700000 130400000 q26.1 gneg
+chrX 130400000 133600000 q26.2 gpos25
+chrX 133600000 138000000 q26.3 gneg
+chrX 138000000 140300000 q27.1 gpos75
+chrX 140300000 142100000 q27.2 gneg
+chrX 142100000 147100000 q27.3 gpos100
+chrX 147100000 155270560 q28 gneg
+chrY 0 2500000 p11.32 gneg
+chrY 2500000 3000000 p11.31 gpos50
+chrY 3000000 11600000 p11.2 gneg
+chrY 11600000 12500000 p11.1 acen
+chrY 12500000 13400000 q11.1 acen
+chrY 13400000 15100000 q11.21 gneg
+chrY 15100000 19800000 q11.221 gpos50
+chrY 19800000 22100000 q11.222 gneg
+chrY 22100000 26200000 q11.223 gpos50
+chrY 26200000 28800000 q11.23 gneg
+chrY 28800000 59373566 q12 gvar
diff --git a/data/default.DNAcopy.bdry.R b/data/default.DNAcopy.bdry.R
new file mode 100644
index 0000000..a38efae
--- /dev/null
+++ b/data/default.DNAcopy.bdry.R
@@ -0,0 +1,472 @@
+"default.DNAcopy.bdry" <-
+as.integer(c(9500, 8352, 9864, 7316, 9174, 9936, 6473, 8360,
+9474, 9962, 5809, 7623, 8843, 9627, 9975, 5259, 6976, 8209, 9113,
+9714, 9982, 4815, 6429, 7633, 8575, 9290, 9773, 9986, 4439, 5956,
+7116, 8058, 8820, 9411, 9813, 9989, 4121, 5548, 6659, 7582, 8358,
+8998, 9499, 9842, 9991, 3840, 5186, 6248, 7145, 7916, 8576, 9129,
+9564, 9863, 9993, 3603, 4875, 5889, 6756, 7512, 8175, 8750, 9234,
+9617, 9880, 9994, 3391, 4596, 5565, 6400, 7138, 7794, 8376, 8885,
+9316, 9659, 9894, 9995, 3209, 4353, 5279, 6083, 6799, 7443, 8023,
+8542, 8997, 9384, 9693, 9905, 9995, 3042, 4132, 5018, 5792, 6486,
+7116, 7689, 8208, 8676, 9088, 9440, 9722, 9915, 9996, 2887, 3926,
+4775, 5521, 6193, 6807, 7370, 7887, 8359, 8785, 9162, 9486, 9744,
+9922, 9996, 2757, 3751, 4565, 5283, 5933, 6530, 7082, 7591, 8061,
+8493, 8883, 9230, 9527, 9766, 9929, 9997, 2640, 3592, 4375, 5066,
+5696, 6275, 6813, 7314, 7780, 8211, 8608, 8968, 9288, 9564, 9784,
+9935, 9997, 2518, 3431, 4184, 4851, 5461, 6024, 6550, 7041, 7501,
+7930, 8330, 8698, 9034, 9333, 9591, 9797, 9939, 9998, 2420, 3297,
+4023, 4667, 5257, 5804, 6316, 6797, 7249, 7673, 8071, 8442, 8785,
+9098, 9378, 9619, 9812, 9943, 9998, 2331, 3175, 3874, 4498, 5069,
+5600, 6099, 6568, 7011, 7430, 7824, 8195, 8542, 8862, 9156, 9417,
+9643, 9824, 9947, 9998, 2246, 3060, 3735, 4338, 4892, 5408, 5894,
+6352, 6786, 7198, 7588, 7956, 8304, 8629, 8930, 9206, 9452, 9665,
+9835, 9951, 9998, 2169, 2954, 3607, 4191, 4728, 5230, 5703, 6150,
+6575, 6979, 7364, 7729, 8074, 8401, 8707, 8991, 9251, 9483, 9684,
+9845, 9954, 9998, 2086, 2844, 3476, 4042, 4564, 5052, 5512, 5949,
+6365, 6762, 7141, 7502, 7846, 8172, 8481, 8770, 9040, 9286, 9508,
+9699, 9852, 9956, 9999, 2014, 2747, 3359, 3908, 4414, 4889, 5338,
+5765, 6172, 6561, 6933, 7289, 7630, 7955, 8263, 8556, 8830, 9086,
+9321, 9531, 9713, 9859, 9958, 9999, 1951, 2661, 3254, 3787, 4279,
+4741, 5179, 5595, 5993, 6375, 6740, 7091, 7427, 7749, 8056, 8349,
+8627, 8887, 9131, 9354, 9554, 9727, 9866, 9961, 9999, 1892, 2580,
+3156, 3674, 4153, 4603, 5029, 5436, 5825, 6198, 6557, 6902, 7234,
+7552, 7857, 8149, 8427, 8691, 8940, 9171, 9384, 9575, 9740, 9873,
+9963, 9999, 1837, 2504, 3064, 3567, 4033, 4472, 4888, 5285, 5665,
+6031, 6383, 6722, 7048, 7362, 7665, 7955, 8233, 8498, 8750, 8987,
+9208, 9412, 9595, 9752, 9879, 9965, 9999, 1786, 2435, 2978, 3468,
+3922, 4350, 4755, 5143, 5516, 5874, 6219, 6551, 6873, 7183, 7482,
+7769, 8046, 8311, 8564, 8805, 9032, 9243, 9438, 9613, 9764, 9885,
+9966, 9999, 1737, 2367, 2896, 3373, 3815, 4232, 4629, 5008, 5372,
+5722, 6061, 6388, 6703, 7009, 7304, 7589, 7864, 8128, 8382, 8624,
+8854, 9071, 9274, 9461, 9629, 9774, 9890, 9968, 9999, 1691, 2304,
+2820, 3284, 3716, 4122, 4509, 4880, 5236, 5580, 5911, 6232, 6543,
+6844, 7135, 7417, 7689, 7952, 8205, 8447, 8680, 8900, 9109, 9304,
+9483, 9644, 9783, 9894, 9970, 9999, 1647, 2244, 2745, 3199, 3619,
+4017, 4395, 4757, 5106, 5442, 5768, 6083, 6388, 6684, 6971, 7249,
+7519, 7780, 8032, 8274, 8507, 8730, 8943, 9143, 9330, 9503, 9658,
+9791, 9899, 9971, 9999, 1606, 2187, 2677, 3119, 3530, 3918, 4287,
+4642, 4983, 5313, 5632, 5941, 6241, 6532, 6815, 7090, 7356, 7615,
+7865, 8107, 8340, 8564, 8778, 8982, 9175, 9355, 9521, 9671, 9800,
+9903, 9972, 9999, 1567, 2134, 2612, 3043, 3445, 3824, 4185, 4532,
+4866, 5189, 5502, 5806, 6101, 6387, 6666, 6937, 7200, 7456, 7704,
+7944, 8176, 8401, 8616, 8823, 9019, 9205, 9379, 9539, 9683, 9807,
+9907, 9973, 9999, 1530, 2083, 2549, 2970, 3363, 3733, 4087, 4427,
+4754, 5071, 5378, 5676, 5966, 6248, 6522, 6789, 7049, 7302, 7547,
+7786, 8017, 8241, 8457, 8665, 8864, 9054, 9233, 9401, 9555, 9694,
+9814, 9910, 9974, 9999, 1488, 2028, 2482, 2894, 3278, 3640, 3986,
+4319, 4640, 4950, 5252, 5545, 5829, 6107, 6377, 6640, 6897, 7147,
+7390, 7627, 7857, 8080, 8296, 8505, 8706, 8898, 9082, 9256, 9418,
+9568, 9703, 9819, 9912, 9975, 9999, 1447, 1973, 2417, 2819, 3194,
+3549, 3888, 4214, 4528, 4833, 5129, 5416, 5696, 5969, 6236, 6495,
+6748, 6995, 7236, 7471, 7700, 7922, 8138, 8347, 8549, 8743, 8930,
+9108, 9276, 9434, 9580, 9710, 9824, 9914, 9976, 10000, 1411,
+1925, 2359, 2752, 3119, 3466, 3798, 4118, 4426, 4725, 5015, 5298,
+5573, 5842, 6104, 6360, 6609, 6853, 7091, 7324, 7550, 7771, 7986,
+8195, 8397, 8592, 8781, 8962, 9134, 9297, 9450, 9592, 9719, 9829,
+9917, 9976, 10000, 1380, 1883, 2307, 2692, 3051, 3391, 3717,
+4030, 4332, 4625, 4910, 5188, 5459, 5723, 5980, 6232, 6479, 6719,
+6955, 7184, 7409, 7628, 7841, 8049, 8251, 8446, 8636, 8818, 8993,
+9161, 9319, 9467, 9604, 9727, 9834, 9920, 9977, 10000, 1351,
+1842, 2258, 2635, 2986, 3320, 3638, 3945, 4242, 4530, 4810, 5083,
+5349, 5608, 5862, 6110, 6353, 6590, 6823, 7050, 7272, 7489, 7700,
+7907, 8108, 8303, 8493, 8677, 8854, 9023, 9186, 9339, 9483, 9616,
+9736, 9839, 9922, 9978, 10000, 1322, 1803, 2210, 2579, 2924,
+3250, 3563, 3864, 4155, 4438, 4713, 4981, 5242, 5498, 5748, 5992,
+6231, 6466, 6695, 6919, 7139, 7354, 7564, 7769, 7969, 8164, 8353,
+8537, 8715, 8887, 9052, 9209, 9358, 9498, 9627, 9743, 9844, 9925,
+9979, 10000, 1296, 1767, 2165, 2527, 2865, 3185, 3492, 3787,
+4073, 4351, 4621, 4884, 5142, 5393, 5639, 5880, 6115, 6346, 6572,
+6794, 7011, 7224, 7432, 7635, 7834, 8028, 8217, 8401, 8579, 8752,
+8919, 9079, 9232, 9377, 9513, 9638, 9751, 9849, 9927, 9979, 10000,
+1270, 1731, 2122, 2476, 2808, 3122, 3423, 3713, 3993, 4266, 4532,
+4791, 5043, 5291, 5533, 5770, 6002, 6230, 6453, 6672, 6887, 7097,
+7303, 7504, 7702, 7895, 8083, 8266, 8445, 8618, 8786, 8948, 9104,
+9253, 9394, 9526, 9648, 9758, 9853, 9929, 9980, 10000, 1245,
+1697, 2080, 2427, 2752, 3061, 3356, 3641, 3917, 4184, 4445, 4700,
+4949, 5192, 5431, 5664, 5893, 6118, 6338, 6554, 6766, 6974, 7178,
+7378, 7573, 7765, 7952, 8135, 8313, 8487, 8655, 8819, 8976, 9128,
+9273, 9410, 9539, 9657, 9764, 9857, 9931, 9981, 10000, 1222,
+1665, 2040, 2382, 2700, 3003, 3293, 3573, 3844, 4107, 4364, 4614,
+4859, 5098, 5333, 5563, 5789, 6010, 6228, 6441, 6650, 6856, 7057,
+7255, 7449, 7639, 7825, 8007, 8185, 8358, 8527, 8691, 8850, 9004,
+9151, 9292, 9426, 9551, 9667, 9771, 9861, 9933, 9981, 10000,
+1198, 1633, 2001, 2336, 2649, 2946, 3231, 3506, 3772, 4031, 4284,
+4530, 4771, 5007, 5238, 5464, 5687, 5905, 6120, 6330, 6537, 6740,
+6940, 7135, 7328, 7516, 7701, 7882, 8059, 8232, 8400, 8565, 8725,
+8879, 9029, 9173, 9310, 9440, 9562, 9675, 9777, 9865, 9935, 9982,
+10000, 1177, 1604, 1965, 2294, 2601, 2893, 3173, 3443, 3705,
+3960, 4208, 4450, 4687, 4919, 5147, 5370, 5590, 5805, 6016, 6224,
+6429, 6629, 6826, 7020, 7210, 7397, 7581, 7760, 7936, 8109, 8277,
+8441, 8601, 8757, 8908, 9053, 9194, 9327, 9454, 9574, 9683, 9783,
+9868, 9937, 9982, 10000, 1156, 1574, 1929, 2252, 2554, 2841,
+3116, 3381, 3639, 3889, 4133, 4372, 4605, 4834, 5058, 5278, 5494,
+5707, 5915, 6120, 6322, 6520, 6715, 6907, 7096, 7281, 7462, 7641,
+7816, 7987, 8155, 8319, 8479, 8635, 8787, 8934, 9076, 9213, 9344,
+9468, 9584, 9691, 9788, 9871, 9938, 9983, 10000, 1135, 1547,
+1895, 2212, 2509, 2791, 3061, 3322, 3576, 3822, 4062, 4297, 4527,
+4752, 4973, 5190, 5403, 5612, 5818, 6021, 6220, 6416, 6609, 6798,
+6985, 7168, 7348, 7525, 7699, 7869, 8036, 8200, 8360, 8516, 8668,
+8816, 8960, 9099, 9232, 9359, 9480, 9594, 9699, 9793, 9875, 9940,
+9983, 10000, 1115, 1519, 1861, 2173, 2465, 2742, 3008, 3265,
+3514, 3756, 3993, 4224, 4450, 4672, 4890, 5103, 5313, 5520, 5723,
+5923, 6120, 6313, 6504, 6691, 6876, 7057, 7236, 7412, 7584, 7753,
+7920, 8082, 8242, 8398, 8551, 8699, 8844, 8984, 9119, 9250, 9374,
+9492, 9603, 9706, 9798, 9878, 9941, 9984, 10000, 1097, 1493,
+1830, 2136, 2423, 2696, 2957, 3210, 3455, 3694, 3926, 4154, 4377,
+4596, 4810, 5021, 5228, 5432, 5632, 5830, 6024, 6215, 6403, 6589,
+6771, 6951, 7128, 7302, 7473, 7641, 7806, 7968, 8127, 8283, 8435,
+8584, 8729, 8870, 9007, 9140, 9267, 9389, 9504, 9612, 9713, 9803,
+9881, 9943, 9984, 10000, 1079, 1469, 1800, 2101, 2383, 2651,
+2909, 3157, 3399, 3634, 3863, 4087, 4307, 4522, 4734, 4941, 5146,
+5347, 5545, 5739, 5931, 6120, 6306, 6489, 6670, 6848, 7023, 7195,
+7365, 7532, 7696, 7857, 8015, 8170, 8323, 8471, 8617, 8758, 8896,
+9030, 9159, 9284, 9403, 9516, 9621, 9719, 9807, 9883, 9944, 9985,
+10000, 1061, 1444, 1770, 2066, 2343, 2607, 2860, 3105, 3343,
+3574, 3800, 4021, 4237, 4449, 4658, 4863, 5064, 5262, 5458, 5650,
+5839, 6026, 6210, 6391, 6570, 6746, 6919, 7090, 7258, 7424, 7587,
+7747, 7904, 8059, 8211, 8359, 8505, 8647, 8785, 8920, 9051, 9177,
+9299, 9415, 9526, 9630, 9725, 9812, 9886, 9945, 9985, 10000,
+1044, 1421, 1741, 2032, 2305, 2565, 2814, 3055, 3289, 3517, 3739,
+3957, 4170, 4380, 4585, 4787, 4986, 5182, 5374, 5564, 5751, 5936,
+6118, 6297, 6473, 6648, 6819, 6988, 7155, 7319, 7481, 7640, 7796,
+7950, 8101, 8250, 8395, 8537, 8676, 8811, 8943, 9071, 9195, 9314,
+9428, 9536, 9638, 9731, 9816, 9889, 9947, 9985, 10000, 1027,
+1398, 1712, 1999, 2268, 2523, 2769, 3006, 3237, 3461, 3680, 3895,
+4105, 4311, 4514, 4713, 4909, 5103, 5293, 5480, 5665, 5847, 6027,
+6204, 6379, 6551, 6721, 6889, 7054, 7216, 7377, 7535, 7690, 7843,
+7994, 8141, 8286, 8428, 8567, 8703, 8836, 8965, 9091, 9212, 9328,
+9440, 9546, 9645, 9737, 9819, 9891, 9948, 9986, 10000, 1012,
+1376, 1686, 1968, 2233, 2485, 2727, 2960, 3187, 3408, 3624, 3836,
+4043, 4247, 4447, 4643, 4837, 5027, 5215, 5400, 5583, 5763, 5940,
+6115, 6288, 6459, 6627, 6793, 6956, 7117, 7277, 7433, 7588, 7740,
+7889, 8036, 8181, 8323, 8462, 8598, 8731, 8861, 8987, 9110, 9228,
+9342, 9452, 9555, 9653, 9743, 9823, 9893, 9949, 9986, 10000,
+996, 1355, 1660, 1938, 2199, 2447, 2685, 2915, 3139, 3357, 3570,
+3778, 3983, 4183, 4380, 4575, 4765, 4954, 5139, 5322, 5502, 5680,
+5855, 6028, 6199, 6368, 6534, 6699, 6861, 7021, 7178, 7334, 7487,
+7638, 7787, 7933, 8077, 8218, 8357, 8493, 8626, 8757, 8884, 9008,
+9128, 9244, 9356, 9463, 9564, 9660, 9748, 9827, 9896, 9950, 9986,
+10000, 981, 1334, 1634, 1908, 2165, 2409, 2643, 2870, 3091, 3306,
+3516, 3721, 3923, 4121, 4315, 4507, 4695, 4881, 5064, 5245, 5423,
+5598, 5772, 5943, 6112, 6279, 6443, 6606, 6767, 6925, 7081, 7235,
+7388, 7538, 7685, 7831, 7974, 8115, 8253, 8389, 8523, 8653, 8781,
+8906, 9027, 9145, 9259, 9368, 9473, 9573, 9666, 9753, 9830, 9898,
+9951, 9987, 10000, 966, 1314, 1610, 1880, 2133, 2373, 2605, 2828,
+3046, 3257, 3464, 3667, 3866, 4061, 4253, 4442, 4628, 4812, 4992,
+5171, 5347, 5520, 5692, 5861, 6028, 6193, 6356, 6517, 6676, 6833,
+6988, 7141, 7292, 7440, 7587, 7732, 7874, 8014, 8152, 8288, 8421,
+8552, 8680, 8805, 8927, 9046, 9162, 9273, 9381, 9484, 9581, 9673,
+9758, 9834, 9900, 9952, 9987, 10000, 953, 1295, 1587, 1853, 2102,
+2339, 2567, 2787, 3002, 3211, 3415, 3615, 3811, 4004, 4193, 4380,
+4563, 4744, 4923, 5099, 5273, 5444, 5614, 5781, 5946, 6110, 6271,
+6430, 6588, 6743, 6897, 7048, 7198, 7345, 7491, 7635, 7777, 7916,
+8053, 8189, 8322, 8452, 8580, 8706, 8828, 8948, 9065, 9178, 9287,
+9393, 9494, 9590, 9679, 9763, 9837, 9902, 9953, 9987, 10000,
+939, 1277, 1564, 1826, 2072, 2306, 2530, 2748, 2959, 3165, 3367,
+3564, 3757, 3948, 4135, 4319, 4500, 4679, 4855, 5029, 5201, 5371,
+5538, 5704, 5867, 6028, 6188, 6346, 6502, 6655, 6808, 6958, 7106,
+7253, 7397, 7540, 7681, 7820, 7956, 8091, 8224, 8354, 8482, 8607,
+8730, 8851, 8968, 9082, 9194, 9301, 9404, 9503, 9597, 9686, 9767,
+9841, 9904, 9954, 9987, 10000, 925, 1258, 1541, 1799, 2041, 2272,
+2493, 2708, 2916, 3119, 3318, 3513, 3704, 3892, 4076, 4258, 4437,
+4614, 4788, 4960, 5130, 5297, 5463, 5626, 5788, 5948, 6106, 6262,
+6416, 6569, 6719, 6868, 7015, 7161, 7304, 7446, 7586, 7724, 7860,
+7994, 8126, 8256, 8384, 8509, 8633, 8753, 8871, 8987, 9099, 9208,
+9313, 9415, 9512, 9605, 9691, 9771, 9843, 9906, 9955, 9988, 10000,
+913, 1241, 1520, 1774, 2013, 2240, 2459, 2670, 2876, 3076, 3272,
+3465, 3653, 3838, 4021, 4200, 4377, 4552, 4724, 4894, 5061, 5227,
+5391, 5552, 5712, 5870, 6027, 6181, 6334, 6485, 6634, 6782, 6928,
+7072, 7214, 7355, 7494, 7631, 7766, 7900, 8031, 8161, 8288, 8414,
+8537, 8658, 8776, 8892, 9005, 9115, 9222, 9326, 9426, 9521, 9612,
+9697, 9776, 9846, 9907, 9956, 9988, 10000, 900, 1224, 1499, 1750,
+1985, 2209, 2425, 2634, 2837, 3034, 3228, 3418, 3604, 3787, 3967,
+4144, 4319, 4491, 4661, 4829, 4995, 5159, 5320, 5480, 5639, 5795,
+5950, 6103, 6254, 6403, 6551, 6697, 6842, 6985, 7126, 7266, 7404,
+7540, 7674, 7807, 7938, 8067, 8194, 8319, 8442, 8563, 8682, 8798,
+8912, 9023, 9131, 9236, 9338, 9436, 9530, 9619, 9702, 9780, 9849,
+9909, 9957, 9988, 10000, 889, 1207, 1479, 1726, 1958, 2179, 2392,
+2598, 2798, 2994, 3185, 3372, 3556, 3737, 3914, 4089, 4262, 4432,
+4600, 4766, 4930, 5092, 5252, 5410, 5567, 5721, 5874, 6026, 6176,
+6324, 6470, 6615, 6758, 6900, 7040, 7179, 7316, 7451, 7585, 7716,
+7847, 7975, 8102, 8226, 8349, 8470, 8589, 8705, 8820, 8931, 9040,
+9147, 9250, 9350, 9446, 9538, 9626, 9708, 9784, 9852, 9911, 9958,
+9989, 10000, 876, 1190, 1458, 1702, 1931, 2149, 2359, 2563, 2760,
+2953, 3142, 3327, 3508, 3687, 3862, 4035, 4206, 4374, 4540, 4704,
+4866, 5026, 5184, 5341, 5496, 5649, 5800, 5950, 6098, 6245, 6390,
+6534, 6676, 6816, 6955, 7093, 7228, 7363, 7496, 7627, 7756, 7884,
+8010, 8134, 8257, 8378, 8496, 8613, 8727, 8840, 8949, 9057, 9161,
+9263, 9361, 9455, 9546, 9632, 9713, 9787, 9854, 9912, 9958, 9989,
+10000, 865, 1175, 1439, 1679, 1905, 2121, 2328, 2529, 2724, 2914,
+3100, 3283, 3462, 3639, 3812, 3983, 4151, 4318, 4482, 4644, 4804,
+4962, 5119, 5274, 5427, 5578, 5728, 5877, 6023, 6169, 6312, 6455,
+6595, 6735, 6872, 7009, 7144, 7277, 7409, 7539, 7668, 7795, 7921,
+8044, 8166, 8287, 8405, 8522, 8637, 8749, 8859, 8967, 9073, 9175,
+9275, 9372, 9465, 9554, 9638, 9718, 9791, 9857, 9914, 9959, 9989,
+10000, 854, 1160, 1420, 1658, 1881, 2094, 2298, 2496, 2689, 2877,
+3061, 3241, 3418, 3592, 3764, 3933, 4099, 4263, 4426, 4586, 4744,
+4901, 5056, 5209, 5360, 5510, 5659, 5805, 5951, 6095, 6237, 6378,
+6517, 6655, 6792, 6927, 7061, 7193, 7324, 7454, 7582, 7708, 7833,
+7956, 8078, 8198, 8316, 8433, 8547, 8660, 8770, 8879, 8985, 9088,
+9189, 9287, 9382, 9474, 9561, 9644, 9723, 9795, 9859, 9915, 9960,
+9989, 10000, 843, 1145, 1402, 1636, 1857, 2067, 2269, 2464, 2654,
+2840, 3022, 3200, 3375, 3547, 3716, 3883, 4048, 4210, 4371, 4529,
+4686, 4840, 4994, 5145, 5295, 5443, 5590, 5735, 5879, 6022, 6163,
+6302, 6441, 6578, 6713, 6847, 6980, 7111, 7241, 7370, 7497, 7623,
+7747, 7869, 7990, 8110, 8228, 8344, 8459, 8571, 8682, 8791, 8897,
+9001, 9103, 9203, 9299, 9392, 9482, 9569, 9650, 9727, 9798, 9862,
+9917, 9961, 9989, 10000, 832, 1130, 1383, 1615, 1832, 2039, 2239,
+2432, 2620, 2803, 2983, 3159, 3332, 3502, 3669, 3834, 3997, 4157,
+4316, 4473, 4627, 4781, 4932, 5082, 5230, 5377, 5522, 5666, 5809,
+5950, 6090, 6228, 6365, 6501, 6635, 6768, 6900, 7030, 7159, 7287,
+7413, 7538, 7661, 7783, 7904, 8023, 8140, 8256, 8371, 8483, 8594,
+8703, 8810, 8915, 9017, 9117, 9215, 9310, 9402, 9491, 9575, 9656,
+9731, 9801, 9864, 9918, 9961, 9990, 10000, 821, 1115, 1366, 1594,
+1809, 2014, 2211, 2402, 2587, 2768, 2946, 3120, 3290, 3458, 3624,
+3787, 3948, 4107, 4263, 4418, 4571, 4723, 4873, 5021, 5168, 5313,
+5457, 5600, 5741, 5880, 6019, 6156, 6292, 6426, 6559, 6691, 6822,
+6951, 7079, 7206, 7331, 7455, 7578, 7699, 7819, 7938, 8055, 8171,
+8285, 8397, 8508, 8617, 8724, 8829, 8932, 9033, 9132, 9228, 9321,
+9412, 9499, 9582, 9661, 9736, 9804, 9866, 9920, 9962, 9990, 10000,
+811, 1102, 1349, 1575, 1787, 1989, 2184, 2372, 2555, 2734, 2910,
+3081, 3250, 3416, 3580, 3741, 3900, 4057, 4212, 4365, 4517, 4667,
+4815, 4962, 5107, 5251, 5393, 5534, 5674, 5813, 5950, 6085, 6220,
+6353, 6485, 6616, 6746, 6874, 7001, 7127, 7251, 7375, 7496, 7617,
+7736, 7854, 7971, 8086, 8200, 8312, 8422, 8531, 8639, 8744, 8847,
+8949, 9048, 9145, 9240, 9332, 9421, 9507, 9589, 9667, 9740, 9808,
+9868, 9921, 9963, 9990, 10000, 802, 1089, 1333, 1556, 1765, 1965,
+2157, 2344, 2525, 2702, 2875, 3045, 3212, 3376, 3538, 3697, 3854,
+4009, 4163, 4314, 4464, 4613, 4759, 4905, 5048, 5191, 5332, 5471,
+5610, 5747, 5882, 6017, 6150, 6282, 6413, 6543, 6671, 6799, 6925,
+7050, 7173, 7296, 7417, 7537, 7655, 7773, 7889, 8003, 8117, 8228,
+8339, 8448, 8555, 8660, 8764, 8866, 8966, 9063, 9159, 9252, 9342,
+9430, 9514, 9595, 9672, 9744, 9811, 9871, 9922, 9963, 9990, 10000,
+793, 1076, 1317, 1537, 1744, 1941, 2131, 2315, 2494, 2669, 2841,
+3008, 3173, 3336, 3496, 3653, 3809, 3962, 4114, 4264, 4412, 4559,
+4704, 4848, 4990, 5131, 5271, 5409, 5546, 5682, 5816, 5950, 6082,
+6213, 6342, 6471, 6598, 6725, 6850, 6974, 7096, 7218, 7338, 7457,
+7575, 7692, 7807, 7922, 8034, 8146, 8256, 8364, 8471, 8577, 8681,
+8783, 8883, 8981, 9078, 9172, 9263, 9353, 9439, 9522, 9602, 9677,
+9748, 9814, 9873, 9924, 9964, 9990, 10000, 784, 1063, 1302, 1519,
+1724, 1919, 2106, 2288, 2465, 2638, 2807, 2973, 3136, 3297, 3455,
+3611, 3765, 3917, 4067, 4215, 4362, 4507, 4651, 4793, 4934, 5073,
+5212, 5349, 5484, 5619, 5752, 5884, 6015, 6145, 6273, 6401, 6527,
+6652, 6776, 6899, 7021, 7142, 7262, 7380, 7497, 7613, 7728, 7841,
+7954, 8065, 8174, 8283, 8390, 8495, 8599, 8701, 8801, 8900, 8997,
+9092, 9184, 9275, 9362, 9447, 9529, 9608, 9682, 9752, 9817, 9875,
+9925, 9964, 9990, 10000, 773, 1050, 1285, 1500, 1702, 1895, 2080,
+2260, 2435, 2606, 2773, 2937, 3099, 3257, 3414, 3568, 3720, 3870,
+4019, 4166, 4311, 4454, 4597, 4737, 4877, 5015, 5152, 5287, 5422,
+5555, 5687, 5818, 5948, 6076, 6204, 6330, 6456, 6580, 6703, 6825,
+6946, 7066, 7185, 7302, 7419, 7534, 7648, 7761, 7873, 7984, 8093,
+8201, 8307, 8413, 8516, 8619, 8719, 8818, 8916, 9011, 9105, 9196,
+9285, 9371, 9455, 9536, 9613, 9687, 9755, 9819, 9876, 9926, 9965,
+9991, 10000, 765, 1038, 1271, 1483, 1683, 1873, 2057, 2234, 2407,
+2576, 2742, 2904, 3063, 3220, 3375, 3528, 3678, 3827, 3974, 4119,
+4263, 4405, 4546, 4685, 4823, 4960, 5095, 5230, 5363, 5495, 5625,
+5755, 5884, 6011, 6138, 6263, 6387, 6510, 6633, 6754, 6874, 6993,
+7111, 7227, 7343, 7458, 7571, 7684, 7795, 7905, 8014, 8122, 8228,
+8333, 8436, 8539, 8639, 8739, 8836, 8932, 9026, 9118, 9208, 9296,
+9381, 9463, 9543, 9619, 9691, 9759, 9822, 9878, 9927, 9966, 9991,
+10000, 756, 1026, 1256, 1466, 1663, 1852, 2033, 2209, 2380, 2547,
+2710, 2871, 3028, 3184, 3337, 3488, 3637, 3784, 3929, 4073, 4215,
+4356, 4495, 4633, 4770, 4905, 5039, 5172, 5304, 5435, 5565, 5693,
+5821, 5947, 6072, 6196, 6320, 6442, 6563, 6683, 6802, 6921, 7038,
+7154, 7269, 7383, 7495, 7607, 7718, 7827, 7936, 8043, 8149, 8254,
+8357, 8459, 8560, 8659, 8757, 8853, 8947, 9040, 9131, 9219, 9306,
+9390, 9471, 9549, 9624, 9696, 9763, 9825, 9880, 9928, 9966, 9991,
+10000, 748, 1014, 1242, 1449, 1644, 1831, 2010, 2184, 2353, 2518,
+2680, 2839, 2995, 3148, 3300, 3449, 3596, 3742, 3886, 4028, 4169,
+4308, 4446, 4583, 4718, 4852, 4985, 5117, 5247, 5377, 5505, 5633,
+5759, 5884, 6008, 6132, 6254, 6375, 6495, 6614, 6733, 6850, 6966,
+7081, 7196, 7309, 7421, 7532, 7642, 7751, 7859, 7966, 8071, 8176,
+8279, 8381, 8481, 8581, 8678, 8775, 8869, 8962, 9054, 9143, 9230,
+9316, 9398, 9479, 9556, 9630, 9700, 9766, 9827, 9882, 9929, 9967,
+9991, 10000, 740, 1003, 1228, 1433, 1626, 1810, 1988, 2159, 2327,
+2490, 2650, 2807, 2962, 3114, 3263, 3411, 3557, 3701, 3843, 3984,
+4124, 4262, 4398, 4533, 4667, 4800, 4932, 5062, 5192, 5320, 5447,
+5574, 5699, 5823, 5946, 6068, 6189, 6310, 6429, 6547, 6664, 6781,
+6896, 7011, 7124, 7237, 7348, 7458, 7568, 7676, 7784, 7890, 7995,
+8099, 8202, 8304, 8404, 8503, 8601, 8697, 8792, 8885, 8977, 9067,
+9155, 9241, 9325, 9407, 9486, 9562, 9635, 9705, 9770, 9830, 9884,
+9930, 9967, 9991, 10000, 732, 992, 1215, 1418, 1608, 1790, 1966,
+2136, 2301, 2463, 2621, 2776, 2929, 3080, 3228, 3374, 3518, 3661,
+3802, 3941, 4079, 4216, 4351, 4485, 4618, 4749, 4880, 5009, 5137,
+5264, 5390, 5515, 5639, 5763, 5885, 6006, 6126, 6245, 6363, 6481,
+6597, 6713, 6827, 6941, 7054, 7165, 7276, 7386, 7495, 7602, 7709,
+7815, 7920, 8023, 8126, 8227, 8327, 8426, 8524, 8620, 8715, 8809,
+8901, 8991, 9080, 9167, 9252, 9335, 9415, 9493, 9568, 9640, 9709,
+9773, 9832, 9885, 9931, 9968, 9991, 10000, 723, 981, 1200, 1401,
+1589, 1769, 1943, 2111, 2275, 2435, 2591, 2745, 2896, 3045, 3192,
+3336, 3479, 3620, 3760, 3898, 4034, 4170, 4303, 4436, 4568, 4698,
+4827, 4955, 5082, 5208, 5333, 5457, 5580, 5702, 5823, 5943, 6062,
+6181, 6298, 6414, 6530, 6645, 6758, 6871, 6983, 7094, 7204, 7313,
+7421, 7529, 7635, 7740, 7844, 7948, 8050, 8151, 8251, 8350, 8447,
+8543, 8639, 8732, 8825, 8915, 9005, 9092, 9178, 9262, 9343, 9423,
+9500, 9574, 9645, 9712, 9776, 9834, 9887, 9932, 9968, 9992, 10000,
+716, 970, 1188, 1386, 1573, 1751, 1922, 2089, 2250, 2409, 2564,
+2716, 2865, 3013, 3158, 3301, 3442, 3582, 3720, 3857, 3992, 4126,
+4259, 4390, 4520, 4649, 4777, 4904, 5030, 5155, 5279, 5402, 5523,
+5644, 5764, 5884, 6002, 6119, 6235, 6351, 6466, 6579, 6692, 6804,
+6915, 7026, 7135, 7243, 7351, 7458, 7563, 7668, 7772, 7874, 7976,
+8077, 8176, 8275, 8372, 8469, 8564, 8657, 8750, 8841, 8930, 9018,
+9105, 9189, 9272, 9352, 9431, 9507, 9580, 9650, 9716, 9779, 9837,
+9888, 9933, 9968, 9992, 10000, 708, 960, 1175, 1371, 1556, 1732,
+1902, 2066, 2226, 2383, 2536, 2687, 2835, 2981, 3124, 3266, 3406,
+3544, 3681, 3817, 3950, 4083, 4214, 4345, 4474, 4601, 4728, 4854,
+4979, 5102, 5225, 5347, 5468, 5587, 5706, 5825, 5942, 6058, 6174,
+6288, 6402, 6515, 6627, 6738, 6849, 6958, 7067, 7174, 7281, 7387,
+7492, 7596, 7700, 7802, 7903, 8003, 8103, 8201, 8298, 8394, 8489,
+8583, 8675, 8766, 8856, 8944, 9031, 9116, 9200, 9281, 9361, 9438,
+9513, 9585, 9654, 9720, 9782, 9839, 9890, 9934, 9969, 9992, 10000,
+701, 950, 1163, 1357, 1539, 1714, 1882, 2045, 2203, 2358, 2510,
+2659, 2805, 2950, 3092, 3232, 3371, 3508, 3643, 3777, 3910, 4041,
+4171, 4300, 4428, 4555, 4680, 4805, 4928, 5051, 5173, 5293, 5413,
+5532, 5650, 5767, 5883, 5999, 6113, 6227, 6340, 6452, 6563, 6674,
+6783, 6892, 7000, 7107, 7213, 7318, 7423, 7526, 7629, 7731, 7832,
+7932, 8030, 8128, 8225, 8321, 8416, 8509, 8602, 8693, 8783, 8871,
+8959, 9044, 9128, 9211, 9291, 9370, 9446, 9520, 9591, 9659, 9724,
+9785, 9841, 9892, 9935, 9969, 9992, 10000, 693, 940, 1150, 1342,
+1523, 1696, 1862, 2023, 2180, 2333, 2483, 2631, 2776, 2919, 3060,
+3199, 3336, 3472, 3606, 3738, 3870, 4000, 4129, 4256, 4383, 4508,
+4633, 4756, 4879, 5000, 5121, 5240, 5359, 5477, 5594, 5710, 5826,
+5940, 6054, 6167, 6279, 6390, 6500, 6610, 6718, 6826, 6934, 7040,
+7145, 7250, 7354, 7457, 7559, 7660, 7761, 7860, 7959, 8056, 8153,
+8248, 8343, 8436, 8529, 8620, 8710, 8799, 8886, 8972, 9057, 9140,
+9221, 9300, 9378, 9453, 9526, 9596, 9664, 9728, 9788, 9843, 9893,
+9936, 9970, 9992, 10000, 687, 931, 1139, 1329, 1508, 1678, 1843,
+2002, 2158, 2310, 2458, 2604, 2748, 2889, 3029, 3166, 3302, 3437,
+3570, 3701, 3831, 3960, 4088, 4214, 4340, 4464, 4587, 4709, 4831,
+4951, 5071, 5189, 5307, 5424, 5540, 5655, 5770, 5883, 5996, 6108,
+6219, 6329, 6439, 6548, 6656, 6763, 6869, 6975, 7080, 7184, 7287,
+7389, 7491, 7592, 7691, 7790, 7889, 7986, 8082, 8177, 8271, 8365,
+8457, 8548, 8638, 8727, 8814, 8901, 8986, 9069, 9151, 9231, 9309,
+9386, 9460, 9532, 9602, 9668, 9731, 9790, 9845, 9894, 9937, 9970,
+9992, 10000, 680, 921, 1127, 1315, 1492, 1661, 1824, 1982, 2135,
+2286, 2433, 2577, 2720, 2860, 2998, 3134, 3269, 3402, 3533, 3663,
+3792, 3920, 4046, 4172, 4296, 4419, 4541, 4663, 4783, 4902, 5021,
+5138, 5255, 5371, 5486, 5600, 5714, 5826, 5938, 6049, 6160, 6269,
+6378, 6486, 6593, 6700, 6805, 6910, 7014, 7118, 7220, 7322, 7423,
+7523, 7623, 7721, 7819, 7915, 8011, 8106, 8200, 8293, 8385, 8476,
+8566, 8655, 8743, 8829, 8914, 8998, 9081, 9161, 9241, 9318, 9393,
+9467, 9538, 9607, 9672, 9735, 9793, 9847, 9896, 9938, 9971, 9992,
+10000, 673, 912, 1116, 1302, 1477, 1645, 1806, 1962, 2114, 2263,
+2409, 2552, 2693, 2832, 2968, 3103, 3237, 3368, 3499, 3628, 3755,
+3882, 4007, 4131, 4255, 4377, 4498, 4618, 4737, 4855, 4973, 5089,
+5205, 5320, 5434, 5547, 5660, 5772, 5883, 5993, 6102, 6211, 6319,
+6426, 6533, 6638, 6743, 6847, 6951, 7054, 7156, 7257, 7357, 7457,
+7556, 7654, 7751, 7847, 7943, 8037, 8131, 8224, 8316, 8406, 8496,
+8585, 8673, 8759, 8844, 8929, 9011, 9093, 9172, 9250, 9327, 9401,
+9474, 9544, 9612, 9677, 9738, 9796, 9849, 9897, 9938, 9971, 9992,
+10000, 667, 903, 1105, 1289, 1462, 1628, 1788, 1942, 2093, 2240,
+2385, 2527, 2666, 2803, 2939, 3073, 3205, 3335, 3464, 3592, 3719,
+3844, 3968, 4091, 4213, 4334, 4454, 4573, 4691, 4809, 4925, 5041,
+5155, 5269, 5383, 5495, 5607, 5717, 5827, 5937, 6045, 6153, 6260,
+6367, 6472, 6577, 6682, 6785, 6888, 6990, 7091, 7192, 7292, 7391,
+7489, 7587, 7683, 7779, 7874, 7968, 8062, 8154, 8246, 8337, 8426,
+8515, 8603, 8689, 8775, 8859, 8942, 9023, 9104, 9183, 9260, 9335,
+9409, 9480, 9550, 9617, 9681, 9741, 9798, 9851, 9899, 9939, 9971,
+9993, 10000, 659, 893, 1092, 1275, 1446, 1610, 1768, 1921, 2071,
+2216, 2359, 2500, 2638, 2774, 2908, 3041, 3171, 3301, 3429, 3555,
+3680, 3805, 3928, 4050, 4171, 4290, 4409, 4528, 4645, 4761, 4876,
+4991, 5105, 5218, 5330, 5441, 5552, 5662, 5771, 5880, 5988, 6095,
+6201, 6307, 6412, 6516, 6619, 6722, 6824, 6926, 7026, 7126, 7226,
+7324, 7422, 7519, 7615, 7711, 7805, 7899, 7992, 8085, 8176, 8266,
+8356, 8444, 8532, 8619, 8704, 8789, 8872, 8954, 9034, 9114, 9192,
+9268, 9343, 9415, 9486, 9555, 9621, 9684, 9744, 9801, 9853, 9900,
+9940, 9972, 9993, 10000, 653, 884, 1082, 1263, 1432, 1595, 1751,
+1903, 2051, 2195, 2337, 2476, 2613, 2747, 2880, 3011, 3141, 3269,
+3396, 3521, 3645, 3768, 3890, 4011, 4131, 4250, 4368, 4485, 4601,
+4716, 4831, 4945, 5057, 5169, 5281, 5391, 5501, 5610, 5719, 5826,
+5933, 6039, 6145, 6250, 6354, 6457, 6560, 6662, 6764, 6865, 6965,
+7064, 7163, 7261, 7358, 7454, 7550, 7645, 7739, 7833, 7925, 8017,
+8108, 8199, 8288, 8376, 8464, 8550, 8636, 8720, 8803, 8886, 8967,
+9046, 9125, 9202, 9277, 9351, 9423, 9492, 9560, 9625, 9688, 9747,
+9803, 9855, 9901, 9941, 9972, 9993, 10000, 647, 876, 1071, 1250,
+1419, 1579, 1734, 1885, 2031, 2174, 2314, 2452, 2587, 2721, 2853,
+2983, 3111, 3238, 3363, 3488, 3611, 3733, 3854, 3973, 4092, 4210,
+4327, 4443, 4558, 4672, 4786, 4899, 5011, 5122, 5232, 5342, 5451,
+5559, 5666, 5773, 5879, 5985, 6090, 6194, 6297, 6400, 6502, 6603,
+6704, 6804, 6904, 7002, 7100, 7198, 7294, 7390, 7486, 7580, 7674,
+7767, 7859, 7951, 8041, 8131, 8220, 8309, 8396, 8482, 8568, 8652,
+8736, 8818, 8899, 8979, 9058, 9135, 9211, 9286, 9359, 9430, 9499,
+9565, 9630, 9692, 9750, 9806, 9856, 9902, 9941, 9973, 9993, 10000,
+641, 868, 1061, 1239, 1405, 1564, 1718, 1867, 2012, 2153, 2292,
+2429, 2563, 2695, 2826, 2954, 3082, 3207, 3332, 3455, 3577, 3698,
+3818, 3936, 4054, 4171, 4287, 4402, 4516, 4629, 4742, 4854, 4965,
+5075, 5184, 5293, 5401, 5509, 5615, 5721, 5827, 5931, 6035, 6139,
+6241, 6343, 6445, 6545, 6645, 6745, 6844, 6942, 7039, 7136, 7232,
+7328, 7422, 7516, 7610, 7702, 7794, 7885, 7976, 8065, 8154, 8242,
+8329, 8415, 8501, 8585, 8668, 8751, 8832, 8912, 8991, 9069, 9146,
+9221, 9294, 9366, 9436, 9505, 9571, 9634, 9695, 9753, 9808, 9858,
+9903, 9942, 9973, 9993, 10000, 635, 859, 1051, 1227, 1392, 1549,
+1701, 1849, 1992, 2133, 2270, 2406, 2539, 2670, 2799, 2926, 3053,
+3177, 3301, 3423, 3544, 3663, 3782, 3900, 4017, 4132, 4247, 4361,
+4474, 4587, 4698, 4809, 4919, 5029, 5137, 5245, 5352, 5459, 5565,
+5670, 5775, 5878, 5982, 6084, 6186, 6288, 6388, 6488, 6588, 6686,
+6784, 6882, 6979, 7075, 7171, 7265, 7360, 7453, 7546, 7638, 7730,
+7820, 7910, 8000, 8088, 8176, 8263, 8349, 8434, 8518, 8602, 8684,
+8765, 8846, 8925, 9003, 9080, 9156, 9230, 9303, 9374, 9443, 9510,
+9576, 9639, 9699, 9756, 9810, 9860, 9905, 9943, 9973, 9993, 10000,
+629, 851, 1041, 1215, 1379, 1535, 1685, 1831, 1974, 2113, 2249,
+2383, 2515, 2645, 2773, 2899, 3024, 3148, 3270, 3391, 3511, 3629,
+3747, 3864, 3979, 4094, 4208, 4321, 4434, 4545, 4656, 4766, 4875,
+4983, 5091, 5198, 5304, 5410, 5515, 5620, 5723, 5826, 5929, 6031,
+6132, 6233, 6333, 6432, 6531, 6629, 6726, 6823, 6919, 7015, 7110,
+7204, 7298, 7391, 7483, 7575, 7666, 7756, 7846, 7935, 8023, 8110,
+8197, 8283, 8368, 8452, 8535, 8618, 8699, 8780, 8859, 8938, 9015,
+9091, 9165, 9239, 9311, 9381, 9449, 9516, 9581, 9643, 9703, 9759,
+9812, 9862, 9906, 9944, 9974, 9993, 10000, 623, 844, 1032, 1204,
+1366, 1521, 1670, 1814, 1955, 2093, 2228, 2361, 2492, 2620, 2747,
+2872, 2996, 3119, 3240, 3360, 3479, 3596, 3713, 3829, 3943, 4057,
+4170, 4282, 4394, 4504, 4614, 4723, 4831, 4939, 5046, 5152, 5258,
+5362, 5467, 5570, 5673, 5776, 5877, 5978, 6079, 6179, 6278, 6377,
+6475, 6572, 6669, 6765, 6861, 6956, 7050, 7144, 7237, 7330, 7422,
+7513, 7604, 7693, 7783, 7871, 7959, 8046, 8133, 8218, 8303, 8387,
+8470, 8553, 8634, 8714, 8794, 8872, 8950, 9026, 9101, 9175, 9248,
+9319, 9388, 9456, 9522, 9586, 9647, 9706, 9762, 9815, 9863, 9907,
+9944, 9974, 9993, 10000, 618, 836, 1022, 1193, 1353, 1507, 1654,
+1798, 1937, 2074, 2208, 2339, 2469, 2596, 2722, 2846, 2969, 3090,
+3211, 3329, 3447, 3564, 3680, 3794, 3908, 4021, 4133, 4244, 4355,
+4464, 4573, 4681, 4789, 4895, 5001, 5107, 5212, 5316, 5419, 5522,
+5624, 5726, 5827, 5927, 6027, 6126, 6224, 6322, 6420, 6517, 6613,
+6708, 6804, 6898, 6992, 7085, 7178, 7270, 7361, 7452, 7542, 7631,
+7720, 7808, 7896, 7983, 8069, 8154, 8239, 8323, 8406, 8488, 8569,
+8650, 8729, 8808, 8885, 8962, 9037, 9112, 9185, 9256, 9326, 9395,
+9462, 9527, 9590, 9651, 9709, 9765, 9817, 9865, 9908, 9945, 9974,
+9993, 10000, 612, 828, 1013, 1182, 1341, 1493, 1639, 1781, 1919,
+2055, 2187, 2318, 2446, 2572, 2697, 2820, 2942, 3062, 3181, 3299,
+3416, 3532, 3646, 3760, 3873, 3985, 4096, 4206, 4316, 4424, 4532,
+4640, 4746, 4852, 4957, 5062, 5166, 5269, 5372, 5474, 5575, 5676,
+5776, 5876, 5975, 6073, 6171, 6269, 6365, 6462, 6557, 6652, 6747,
+6840, 6934, 7026, 7118, 7210, 7301, 7391, 7481, 7570, 7658, 7746,
+7833, 7920, 8005, 8091, 8175, 8259, 8341, 8423, 8505, 8585, 8665,
+8743, 8821, 8898, 8973, 9048, 9121, 9194, 9264, 9334, 9402, 9468,
+9532, 9595, 9655, 9713, 9767, 9819, 9866, 9909, 9946, 9975, 9993,
+10000, 607, 821, 1004, 1171, 1329, 1479, 1624, 1765, 1902, 2036,
+2168, 2297, 2424, 2550, 2673, 2795, 2916, 3035, 3153, 3270, 3386,
+3500, 3614, 3727, 3839, 3950, 4060, 4169, 4278, 4386, 4493, 4599,
+4705, 4810, 4915, 5018, 5122, 5224, 5326, 5427, 5528, 5628, 5727,
+5826, 5925, 6022, 6120, 6216, 6312, 6408, 6503, 6597, 6691, 6784,
+6877, 6969, 7061, 7152, 7242, 7332, 7421, 7510, 7598, 7685, 7772,
+7858, 7943, 8028, 8112, 8196, 8278, 8360, 8441, 8522, 8601, 8680,
+8758, 8834, 8910, 8985, 9059, 9131, 9203, 9273, 9341, 9409, 9474,
+9538, 9600, 9659, 9716, 9770, 9821, 9868, 9910, 9946, 9975, 9993,
+10000, 600, 813, 994, 1160, 1316, 1465, 1609, 1748, 1884, 2017,
+2147, 2275, 2401, 2525, 2648, 2769, 2888, 3007, 3124, 3239, 3354,
+3468, 3581, 3693, 3804, 3914, 4023, 4131, 4239, 4346, 4452, 4558,
+4663, 4767, 4871, 4974, 5076, 5178, 5279, 5379, 5479, 5579, 5677,
+5776, 5873, 5970, 6067, 6163, 6258, 6353, 6448, 6541, 6635, 6727,
+6819, 6911, 7002, 7093, 7183, 7272, 7361, 7449, 7536, 7623, 7710,
+7796, 7881, 7965, 8049, 8132, 8215, 8296, 8377, 8458, 8537, 8616,
+8693, 8770, 8846, 8921, 8995, 9068, 9140, 9211, 9280, 9348, 9414,
+9479, 9542, 9603, 9662, 9719, 9772, 9823, 9869, 9911, 9947, 9975,
+9994, 10000, 595, 806, 985, 1150, 1304, 1452, 1595, 1733, 1867,
+1999, 2128, 2255, 2380, 2503, 2625, 2745, 2863, 2980, 3097, 3211,
+3325, 3438, 3550, 3661, 3771, 3880, 3988, 4096, 4203, 4309, 4415,
+4519, 4623, 4727, 4830, 4932, 5033, 5134, 5235, 5334, 5433, 5532,
+5630, 5728, 5825, 5921, 6017, 6112, 6207, 6301, 6395, 6488, 6581,
+6673, 6765, 6856, 6946, 7036, 7125, 7214, 7303, 7390, 7477, 7564,
+7650, 7735, 7820, 7904, 7988, 8071, 8153, 8234, 8315, 8395, 8475,
+8553, 8631, 8708, 8784, 8859, 8933, 9006, 9079, 9150, 9219, 9288,
+9355, 9421, 9485, 9547, 9608, 9666, 9722, 9775, 9825, 9871, 9912,
+9947, 9975, 9994, 10000))
diff --git a/demo/00Index b/demo/00Index
new file mode 100644
index 0000000..2d0dd93
--- /dev/null
+++ b/demo/00Index
@@ -0,0 +1 @@
+DNAcopy Demo of DNAcopy package analysis and plotting capabilities
diff --git a/demo/DNAcopy.R b/demo/DNAcopy.R
new file mode 100644
index 0000000..f6c1050
--- /dev/null
+++ b/demo/DNAcopy.R
@@ -0,0 +1,56 @@
+
+if(dev.cur() <= 1) get(getOption("device"))()
+
+opar <-
+ par(ask = interactive() &&
+ (.Device %in% c("X11", "GTK", "windows","quartz"))
+ )
+
+#datadir <- system.file("examples", package = "DNAcopy")
+#Read in two example by Snijders et al.
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+#Segmentation at default parameters
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+
+#Plot whole studies
+
+plot(segment.smoothed.CNA.object, plot.type="w")
+
+#Plot each study by chromosome
+
+plot(segment.smoothed.CNA.object, plot.type="s")
+
+#Plot each chromosome across studies (6 per page)
+
+plot(segment.smoothed.CNA.object, plot.type="c", cbys.layout=c(2,1), cbys.nchrom=6)
+
+#Plot by plateaus
+
+plot(segment.smoothed.CNA.object, plot.type="p")
+
+#Segment again but making sure that splits correspond are 3SDs separated
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, undo.splits="sdundo", verbose=1)
+
+#All the non-obvious splits have been removed
+
+plot(segment.smoothed.CNA.object,plot.type="s")
+
+
+
+
+
diff --git a/inst/CHANGES b/inst/CHANGES
new file mode 100644
index 0000000..72b02e4
--- /dev/null
+++ b/inst/CHANGES
@@ -0,0 +1,359 @@
+Changes in Version 1.41.3
+
+ o Address the "malformed Description field" NOTE.
+
+Changes in Version 1.41.2
+
+ o change in segment to remove "no visible binding for global variable"
+ NOTE during R CMD check
+
+Changes in Version 1.41.1
+
+ o add as.numeric to cumsum call in plotSample to address integer overflow
+
+
+1.37.4 became 1.38.0 in Bioconductor 2.14 & 1.40.0 in Bioconductor 3.0
+----------------------------------------------------------------------
+
+Changes in Version 1.37.4
+
+ o segment function now returns the weights used if weights are given.
+
+Changes in Version 1.37.3
+
+ o Changed default main, xlab, ylab options in plotSample to NULL to allow
+ for empty strings to produce blank labels.
+
+Changes in Version 1.37.2
+
+ o Version 1.37.1 appears to be a system change to add vignettes directory
+
+ o Changed cex option in plotSample and main, xlab, ylab options in
+ zoomIntoRegions to be customizable.
+
+
+1.35.1 became 1.36.0 in Bioconductor 2.13 & =1.36.0 now
+------------------------------------------------------
+
+Changes in Version 1.35.1
+
+ o update cytoBand data to match the goldenPath hg19 file
+
+
+1.33.1 became 1.34.0 in Bioconductor 2.12 & =1.35.0 now
+------------------------------------------------------
+
+Changes in Version 1.33.1
+
+ o changes to smooth.CNA. Looping over chromosomes done within fortran
+ and calculate median of nbhd only when needed. Change R code to match.
+
+
+1.31.1 became 1.32.0 in Bioconductor 2.11 & =1.33.0 now
+------------------------------------------------------
+
+Changes in Version 1.31.1
+
+ o use function segments in plots instead of lines in a loop
+
+
+1.29.2 became 1.30.0 in Bioconductor 2.10 & =1.31.0 now
+------------------------------------------------------
+
+Changes in Version 1.29.2
+
+ o Dropped startup message about data format.
+
+Changes in Version 1.29.1
+
+ o added additional details the smooth.CNA help function
+ o made sure that CNA orders data correctly when chrom is an ordered
+ variable with both integer and character values (i.e. X or Y).
+
+
+1.27.2 became 1.28.0 in Bioconductor 2.9 & =1.29.0 now
+------------------------------------------------------
+
+Changes in Version 1.27.2
+
+ o segment function gets default.DNAcopy.bdry internaly instead of assuming
+ DNAcopy is loaded. Allows for loading package by NAMESPACE only.
+ o xmaploc now works for samplebychrom and chrombysample also.
+
+
+Changes in Version 1.27.1
+
+ o Don't further segment if all values in the current segment are the same.
+ o Use is.finite in functions instead of is.na(.) & !(abs(.)==Inf)
+
+
+1.25.1 became 1.26.0 in Bioconductor 2.8 & =1.27.0 now
+------------------------------------------------------
+
+Changes in Version 1.25.1
+
+ o House keeping: changed the URL for cytoband and removed Columbia email
+
+
+1.23.8 became 1.24.0 in Bioconductor 2.7 & =1.25.0 now
+------------------------------------------------------
+
+Changes in Version 1.23.8
+
+ o Added getbdry to exported functions in NAMESPACE
+
+Changes in Version 1.23.7
+
+ o Versions 1.23.4 to 1.23.6 were changes made to use file based data.
+ This needed changing CNA and DNAcopy objects to S4 classes which was
+ too much of a paradigm shift for users. So changes are undone here.
+ o Change default smooth.region to 10 in smooth.CNA
+ o Added function plotSample for whole genome plot of a single sample.
+ o Fixed the is.integer to is.numeric in subset functions.
+ o CNA accomodates data.frames (as.matrix not needed) and presorted data.
+
+Changes in Version 1.23.3 (same fix applied in v1.22.1)
+
+ o Fixed weird bug in smoothCNA.f triggered when there are at most
+ 2*smooth.region (default smooth.region=2) markers in a chromosome.
+
+Changes in Version 1.23.2
+
+ o row numbers of the segments are automatically returned as the object
+ segRows in the output. The print method shows it if showSegRows=TRUE.
+
+Changes in Version 1.23.1
+
+ o Added zoomIntoRegion function.
+ o Added an option to return the row numbers of the segments.
+
+
+1.21.6 became 1.22.0 in Bioconductor 2.6 & =1.23.0 now
+------------------------------------------------------
+
+Changes in Version 1.21.6
+
+ o Fixed indexing bug (i <= k or i > n-k, not and) in smoothCNA.f
+
+Changes in Version 1.21.5
+
+ o ADDED packageStartupMessage THAT CNA DATA FORMAT WILL CHANGE
+ o changed filename of NEWS to CHANGES; file format didn't work with news
+ o changed function .First.lib to .onLoad
+ o in the function CNA, changed code for ordering by chrom and maploc
+ because of "na.last=NA" slowed it down terribly
+ o removed the unnecessary rownames command in CNA
+
+Changes in Version 1.21.4
+
+ o moved CHANGES file to inst/NEWS
+ o documentation of CNA and segment say that missing values will be removed
+
+Changes in Version 1.21.3
+
+ o forgot to deallocate memory in the new code
+
+Changes in Version 1.21.2
+
+ o changed the code for htmaxp & hwtmaxp for the hybrid
+
+Changes in Version 1.21.1
+
+ o changed our affiliations in the vignette
+
+
+1.19.6 became 1.20.0 in Bioconductor 2.5 & =1.21.0 now
+------------------------------------------------------
+
+Changes in Version 1.19.6
+
+ o added function glFrequency to calculate gains and losses
+
+Changes in Version 1.19.5
+
+ o Changed smooth.CNA from all R to Fortran backbone
+
+Changes in Version 1.19.4
+
+ o Fixed infinite loop when the logratio is constant across a segment
+
+Changes in Version 1.19.3
+
+ o Redundancy check by Henrik Bengtsson
+ o Bioconductor fixes (license version, BiocViews etc.)
+
+Changes in Version 1.19.2
+
+ o Fixed a rarely triggered bug in the new code
+
+
+Changes in Version 1.19.1
+
+ o Faster algorithm to compute the CBS maximum t-statistic
+
+
+1.17.5 became 1.18.0 in Bioconductor 2.4 & =1.19.0 now
+------------------------------------------------------
+
+Changes in Version 1.17.5
+
+ o For weighted CBS return weighted segment means (not ordinary mean).
+
+Changes in Version 1.17.4
+
+ o Bug fixes to wtermp code and how it's called in wfindcpt.
+
+Changes in Version 1.17.3
+
+ o Weighted segmentation code added. Weights per probe can be used.
+
+Changes in Version 1.17.1
+
+ o NAMESPACE file added and man pages fixed to reflect it.
+
+
+1.15.4 became 1.16.0 in Bioconductor 2.3 & =1.17.0 now
+------------------------------------------------------
+
+Changes in Version 1.15.4
+
+ o make sure there are at least 10 obsns when t-stat threshold is used
+
+Changes in Version 1.15.3
+
+ o the minimum width of segment is user selectable (argument min.width).
+ Changed from the fixed value of 2 based on user demand. Max value 5.
+
+Changes in Version 1.15.2
+
+ o changed to a faster algorithm for calculating the max t-statistic
+
+ o changed to a faster code for undoing edge effects (redundant perm)
+
+ o added basic functions exon.segment and exon.changepoint (workhorse)
+ These functions are used for finding a translocation using binary
+ segmentation. The data for this should be properly normalized.
+
+ o fortran code re-arranged into different files
+
+Changes in Version 1.15.1
+
+ o updated the call to get(getOption("device")) to use dev.new
+
+
+1.13.3 became 1.14.0 in Bioconductor 2.2 & =1.15.0 now
+------------------------------------------------------
+
+Changes in Version 1.13.3
+
+ o Added segments.summary to give the median, sd & mad for each segment.
+
+Changes in Version 1.13.2
+
+ o Modified the p-value function to replace p > 1 with 1.
+
+Changes in Version 1.13.1
+
+ o Modified the p-value function. Added pseudo confidence intervals
+ for the change-points.
+
+ o Windowing option has been removed (R & fortran code modified).
+
+
+1.11.2 became 1.12.0 in Bioconductor 2.1 & =1.13.0 now
+------------------------------------------------------
+
+Changes in Version 1.11.2
+
+ o Added a p-value function for the change-points. This is based on
+ binary segmentation and not necessarily correct but gives an idea
+ of the relative importance of the change-points.
+
+Changes in Version 1.11.1
+
+ o Changed Venkat's affiliation from MSKCC to Columbia University
+
+
+1.9.3 became 1.10.0 in Bioconductor 2.0 & =1.11.0 now
+-----------------------------------------------------
+
+Changes in Version 1.9.3
+
+ o Added warning that windowing will be deprecated in the next version.
+
+Changes in Version 1.9.2
+
+ o Added code to not bother with p-values and split the segment when the
+ maximal T is large (p-value will be tiny if T > 7).
+
+ o Added code to not split when the maximal T is small (fixes a numerical
+ problem where a constant large segment can have a significant split).
+
+Changes in Version 1.9.1
+
+ o The mod function in fortran inflicts serious time penalty --
+ code rewritten to fix it
+
+
+1.5.3 became 1.6(8).0 in Bioconductor 1.8(9) & =1.9.0 now
+---------------------------------------------------------
+
+Changes in Version 1.5.3
+
+ o Fixed the subset functions to enable sample re-ordering
+
+Changes in Version 1.5.2
+
+ o Bug fix in plot.DNAcopy (triggered only when #chrom=1)
+
+ o Added information about stopping rule in the Vignette
+
+ o Cytoband data from the goldenPath repository added for future plots
+
+Changes in Version 1.5.1
+
+ o Add a sequential stopping rule to declare change early
+
+ o Bug fix - make the object from subset.DNAcopy of class DNAcopy
+
+ ###############################################################
+ ### ###
+ ## E. S. Venkatraman is the new package maintainer ##
+ ### ###
+ ###############################################################
+
+
+
+1.2.5 became 1.4.0 in Bioconductor 1.7 & =1.5.0 in the development branch
+-------------------------------------------------------------------------
+
+Changes in Version 1.2.5
+
+ o Updated the vignette to include references to additional features.
+
+Changes in Version 1.2.4
+
+ o Seg fault occurs if window.size is set when hybrid method is used.
+ Fixed by setting window.size to be NULL in the function segment.
+
+ o Added options to plot command to draw line at y=0 and control its
+ color and lwd of all lines.
+
+ o If ylim is missing it's calculated from all the data instead of just
+ the first sample.
+
+Changes in Version 1.2.3
+
+ o Modifications to the plot function to make par("ask") behave better.
+
+Changes in Version 1.2.2
+
+ o Modified the plot function. X-axis is either the index or (cumulative)
+ map location. Incorporate col, pch, ylim etc. to provide better control
+ over plots.
+
+Changes in Version 1.2.1
+
+ o Added the hybrid method for computing p-values to determine splits. This
+ speeds up the computations considerably and so has been made the default.
+
diff --git a/inst/benchmark/benchmark,20090610,segment.R b/inst/benchmark/benchmark,20090610,segment.R
new file mode 100644
index 0000000..fc486c2
--- /dev/null
+++ b/inst/benchmark/benchmark,20090610,segment.R
@@ -0,0 +1,169 @@
+######################################################################
+# Type: Redundancy test
+# Created by: Henrik Bengtsson <hb at stat.berkeley.edu>
+# Created on: 2009-06-10
+######################################################################
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Startup
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+scriptName <- "benchmark,20090610,segment"
+
+library("DNAcopy")
+library("R.utils")
+
+# Record current random seed
+sample(1) # Assert that a random seed exists
+oldSeed <- .Random.seed
+# Alway use the same random seed
+set.seed(0xbeef)
+
+# Tolerance (maybe decrease?)
+tol <- .Machine$double.eps^0.5
+
+pd <- packageDescription("DNAcopy")
+pkgStr <- sprintf("%s v%s", pd$Package, pd$Version)
+
+figPath <- Arguments$getWritablePath("figures")
+
+benchmarkName <- paste(c(scriptName, gsub(" ", "_", pkgStr)), collapse=",")
+
+logFilename <- sprintf("%s.log", benchmarkName)
+log <- Verbose(logFilename, threshold=-10, timestamp=TRUE)
+
+log && header(log, "BENCHMARKING")
+log && cat(log, "Script: ", scriptName)
+log && print(log, sessionInfo())
+
+benchmarkFilename <- sprintf("%s.Rbin", benchmarkName)
+force <- FALSE
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Main benchmarking loop
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Sizes of data sets to be benchmarked
+Js <- c(1e3, 1e4, 1e5, 2e5, 5e5, 1e6)
+
+
+if (!force && isFile(benchmarkFilename)) {
+ benchmarkData <- loadObject(benchmarkFilename)
+} else {
+ benchmarkData <- data.frame(J=NULL, seg=NULL, weightSeg=NULL)
+}
+
+
+for (jj in seq(along=Js)) {
+ # Number of loci
+ J <- as.integer(Js[jj])
+
+ log && enter(log, sprintf("Case #%d (J=%d) of %d", jj, J, length(Js)))
+
+ if (is.element(J, benchmarkData$J)) {
+ log && cat(log, "Already done.")
+ log && exit(log)
+ next
+ }
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Simulating copy-number data
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ x <- sort(runif(J, min=0, max=1000))
+ w <- runif(J)
+ mu <- double(J)
+ jj <- (200 <= x & x < 300)
+ mu[jj] <- mu[jj] + 1
+ jj <- (650 <= x & x < 800)
+ mu[jj] <- mu[jj] - 1
+ w[jj] <- 0.001
+ eps <- rnorm(J, sd=1/2)
+ y <- mu + eps
+
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Setting up a raw CNA object
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ cnR <- CNA(
+ genomdat = y,
+ chrom = rep(1, times=J),
+ maploc = x,
+ data.type = "logratio",
+ sampleid = "SampleA"
+ )
+ log && print(log, cnR)
+
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Non-weighted segmentation
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ log && enter(log, "Non-weighted segmentation")
+ t1 <- system.time({
+ fitR <- segment(cnR, verbose=1)
+ })[3]
+ log && printf(log, "Processing time: %.3f secs\n", t1)
+ log && print(log, fitR)
+ log && exit(log)
+
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Weighted segmentation
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ log && enter(log, "Weighted segmentation")
+ t2 <- system.time({
+ fitR <- segment(cnR, weights=w, verbose=1)
+ })[3]
+ log && printf(log, "Processing time: %.3f secs\n", t1)
+ log && print(log, fitR)
+ log && exit(log)
+
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Record benchmarking
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ timings <- data.frame(J=J, seg=t1, weightSeg=t2)
+ benchmarkData <- rbind(benchmarkData, timings)
+ log && print(log, benchmarkData)
+
+ # Saving to file
+ saveObject(benchmarkData, file=benchmarkFilename)
+
+
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Cleanup
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+ # Reset to previous random seed
+ .Random.seed <- oldSeed
+
+ log && exit(log)
+} # for (jj ...)
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Benchmarking summary
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+log && print(log, benchmarkData)
+
+log && header(log, "APPENDIX")
+log && print(log, sessionInfo())
+
+
+figName <- paste(c(scriptName, gsub(" ", "_", pkgStr)), collapse=",")
+width <- 640
+height <- 0.618*width
+filename <- sprintf("%s.png", figName)
+pathname <- file.path(figPath, filename)
+devNew(png, pathname, width=width, height=height)
+n <- ncol(benchmarkData)-1
+matplot(benchmarkData[1], benchmarkData[,-1], type="b", pch=20, lwd=3,
+ xlab="J", ylab="seconds", main=pkgStr)
+legend("topleft", colnames(benchmarkData)[-1], col=1:n, lty=1:n, lwd=3)
+devDone()
+
+
+######################################################################
+# HISTORY:
+# 2009-06-10
+# o Benchmarking show a major improvement in the algorithm when going
+# from DNAcopy v1.19.0 to the recent DNAcopy v1.19.2. It was
+# roughly O(J*ln(J)) and now it is O(J). For a chromosome with
+# 500,000 loci, we observed a speed up in the weighted case going
+# from 20 mins to 30 seconds, which is a 40 times speedup.
+# o Created.
+######################################################################
diff --git a/inst/doc/DNAcopy.R b/inst/doc/DNAcopy.R
new file mode 100644
index 0000000..6d6bdca
--- /dev/null
+++ b/inst/doc/DNAcopy.R
@@ -0,0 +1,66 @@
+### R code from vignette source 'DNAcopy.Rnw'
+
+###################################################
+### code chunk number 1: DNAcopy.Rnw:74-75
+###################################################
+library(DNAcopy)
+
+
+###################################################
+### code chunk number 2: DNAcopy.Rnw:78-79
+###################################################
+data(coriell)
+
+
+###################################################
+### code chunk number 3: DNAcopy.Rnw:85-88
+###################################################
+CNA.object <- CNA(cbind(coriell$Coriell.05296),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid="c05296")
+
+
+###################################################
+### code chunk number 4: DNAcopy.Rnw:96-97
+###################################################
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+
+###################################################
+### code chunk number 5: DNAcopy.Rnw:105-106
+###################################################
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+
+
+###################################################
+### code chunk number 6: DNAcopy.Rnw:120-121
+###################################################
+plot(segment.smoothed.CNA.object, plot.type="w")
+
+
+###################################################
+### code chunk number 7: DNAcopy.Rnw:129-130
+###################################################
+plot(segment.smoothed.CNA.object, plot.type="s")
+
+
+###################################################
+### code chunk number 8: DNAcopy.Rnw:157-158
+###################################################
+plot(segment.smoothed.CNA.object, plot.type="p")
+
+
+###################################################
+### code chunk number 9: DNAcopy.Rnw:169-172
+###################################################
+sdundo.CNA.object <- segment(smoothed.CNA.object,
+ undo.splits="sdundo",
+ undo.SD=3,verbose=1)
+
+
+###################################################
+### code chunk number 10: DNAcopy.Rnw:177-178
+###################################################
+plot(sdundo.CNA.object,plot.type="s")
+
+
diff --git a/inst/doc/DNAcopy.Rnw b/inst/doc/DNAcopy.Rnw
new file mode 100644
index 0000000..1cbbff7
--- /dev/null
+++ b/inst/doc/DNAcopy.Rnw
@@ -0,0 +1,213 @@
+%\VignetteIndexEntry{DNAcopy}
+%\VignetteDepends{}
+%\VignetteKeywords{DNA Copy Number Analysis}
+%\VignettePackage{DNAcopy}
+
+\documentclass[11pt]{article}
+
+\usepackage{amsmath}
+\usepackage[authoryear,round]{natbib}
+\usepackage{hyperref}
+\SweaveOpts{echo=FALSE}
+
+\setlength{\textheight}{8.5in}
+\setlength{\textwidth}{6in}
+\setlength{\topmargin}{-0.25in}
+\setlength{\oddsidemargin}{0.25in}
+\setlength{\evensidemargin}{0.25in}
+
+\begin{document}
+\setkeys{Gin}{width=0.99\textwidth}
+
+
+\title{\bf DNAcopy: A Package for Analyzing DNA Copy Data}
+
+\author{Venkatraman E. Seshan$^1$ and Adam B. Olshen$^2$}
+
+\maketitle
+
+\begin{center}
+$^1$Department of Epidemiology and Biostatistics\\
+Memorial Sloan-Kettering Cancer Center\\
+{\tt seshanv at mskcc.org}\\
+\ \\
+$^2$Department of Epidemiology and Biostatistics\\
+University of California, San Francisco\\
+{\tt olshena at biostat.ucsf.edu}
+\end{center}
+
+\tableofcontents
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Overview}
+
+This document presents an overview of the {\tt DNAcopy} package. This
+package is for analyzing array DNA copy number data, which is usually
+(but not always) called array Comparative Genomic Hybridization (array
+CGH) data \citep{pinkel98, snijders01, wigler03}. It implements our
+methodology for finding change-points in these data \citep{olshen04},
+which are points after which the (log) test over reference ratios have
+changed location. Our model is that the change-points
+correspond to positions where the underlying DNA copy number has
+changed. Therefore, change-points can be used to identify regions of
+gained and lost copy number. We also provide a function for making
+relevant plots of these data.
+
+\section{Data}
+
+We selected a subset of the data set presented in \cite{snijders01}.
+We are calling this data set {\tt coriell}. The data correspond to
+two array CGH studies of fibroblast cell strains. In particular, we
+chose the studies {\bf GM05296} and {\bf GM13330}. After selecting
+only the mapped data from chromosomes 1-22 and X, there are 2271 data
+points. There is accompanying spectral karyotype data (not included),
+which can serve
+as a gold standard. The data can be found at \\
+\url{http://www.nature.com/ng/journal/v29/n3/suppinfo/ng754_S1.html}
+
+\section{An Example}
+
+Here we perform an analysis on the {\bf GM05296} array CGH study
+described above.
+
+
+<<echo=TRUE,print=FALSE>>=
+library(DNAcopy)
+@
+
+<<echo=TRUE,print=FALSE>>=
+data(coriell)
+@
+
+\noindent
+Before segmentation the data needs to be made into a CNA object.
+
+<<echo=TRUE,print=FALSE>>=
+CNA.object <- CNA(cbind(coriell$Coriell.05296),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid="c05296")
+@
+
+\noindent
+We generally recommend smoothing single point outliers before
+analysis. It is a good idea to check that the smoothing is proper for
+a particular data set.
+
+<<echo=TRUE,print=FALSE>>=
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+@
+
+\noindent
+After smoothing, if necessary, the segmentation is run. Here the
+default parameters are used. A brief discussion of parameters that
+can be adjusted is in the Tips section.
+
+<<echo=TRUE,print=FALSE>>=
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+@
+
+%Plot whole studies
+
+\noindent
+There are a number of plots that can be made. The first is ordering
+the data by chromosome and map positons. The red lines correspond to
+mean values in segments. Note that the points are in alternate colors
+to indicate different chromosomes.
+
+\pagebreak
+
+\begin{center}
+<<fig=TRUE,echo=TRUE>>=
+plot(segment.smoothed.CNA.object, plot.type="w")
+@
+\end{center}
+
+\noindent
+Another possibility is to plot by chromosome within a study.
+
+\begin{center}
+<<fig=TRUE,echo=TRUE,width=11,height=9>>=
+plot(segment.smoothed.CNA.object, plot.type="s")
+@
+\end{center}
+
+%Plot each chromosome across studies (6 per page)
+
+%\begin{center}
+%<<fig=FALSE,echo=TRUE>>=
+%plot(segment.smoothed.CNA.object, plot.type="c",
+% cbys.layout=c(2,1),
+% cbys.nchrom=6)
+%@
+%\end{center}
+
+%Plot by plateaus
+
+\noindent
+If there are multiple studies, one could plot by chromosome across
+studies using the option {\tt plot.type='c'}. A final plot orders the
+segment by their chromosome means. One can take the plateaus in this
+plot to determine what the mean values should be for calling segments
+gains or losses. In this case, maybe $0.4$ for gains and $-0.6$ for
+losses. For most data, these plateaus are much closer to zero. The
+next generation of this software will have automatic methods for
+calling gains and losses.
+
+\begin{center}
+<<fig=TRUE,echo=TRUE>>=
+plot(segment.smoothed.CNA.object, plot.type="p")
+@
+\end{center}
+
+\noindent
+Change-points are often found due to local trends in the data. An
+undo method is needed to get rid of unnecessary change-points. Below all
+splits that are not at least three SDs apart are removed. The
+following plot shows that all splits not corresponding to the gold
+standard results have been removed.
+
+<<echo=TRUE,print=FALSE,width=6,height=5>>=
+sdundo.CNA.object <- segment(smoothed.CNA.object,
+ undo.splits="sdundo",
+ undo.SD=3,verbose=1)
+@
+
+
+\begin{center}
+<<fig=TRUE,echo=TRUE,width=11,height=9>>=
+plot(sdundo.CNA.object,plot.type="s")
+@
+\end{center}
+
+\section{Tips}
+
+\noindent
+A function that may be of interest that has not been mentioned is {\tt
+ subset.CNA}. It allows for subsetting of a CNA object by chromosome and
+sample so that segmentation does not have to be run on a whole data set.
+Similarly, {\tt subset.DNAcopy} allows subsetting of DNAcopy objects, which
+contain the output of segmentation.
+
+The original default segmentation algorithm, because it was based on
+permutation, took $O(N^2)$ computations, where $N$ is the number of markers on
+a chromosome. The new default algorithm is much faster. It includes a hybrid
+approach to compute the $p$-value for segmenting based partly on permutation
+and partly on a Gaussian approximation (available in all versions after 1.2.0)
+and a stopping rule (available in all versions after 1.5.0) to declare change
+when there is a strong evidence for its presence \citep{venkat07}. We no
+longer recommend using overlapping windows for larger data sets. It is still
+possible to run the full permutations analysis using the option {\tt
+ p.method='perm'}. If the new algorithm is still too slow, one can reduce the
+number of permutations in the hybrid method using the parameter {\tt nperm}
+(default is 10,000). However, the lower {\tt alpha} (the significance level
+for the test to accept change-points) is, the more permutations that are
+needed. The stopping boundary needs to be computed for any choice of {\tt
+ nperm} and {\tt alpha} which is not the default which is done automatically
+within the function {\tt segment} or can be done externally using the function
+{\tt getbdry} and passed on to {\tt segment}.
+
+%\newpage
+\bibliographystyle{apalike}
+\bibliography{DNAcopy}
+
+\end{document}
diff --git a/inst/doc/DNAcopy.pdf b/inst/doc/DNAcopy.pdf
new file mode 100644
index 0000000..bf24aa5
Binary files /dev/null and b/inst/doc/DNAcopy.pdf differ
diff --git a/man/CNA.Rd b/man/CNA.Rd
new file mode 100644
index 0000000..094dede
--- /dev/null
+++ b/man/CNA.Rd
@@ -0,0 +1,62 @@
+\name{CNA}
+\alias{CNA}
+\alias{print.CNA}
+\title{Create `Copy Number Array' data object}
+\description{
+ Creates a `copy number array' data object used for DNA copy number
+ analyses by programs such as circular binary segmentation (CBS).
+}
+\usage{
+ CNA(genomdat, chrom, maploc, data.type=c("logratio","binary"),
+ sampleid=NULL, presorted = FALSE)
+ \method{print}{CNA}(x, \dots)
+}
+\arguments{
+ \item{genomdat}{a vector or matrix of data from array-CGH, ROMA, or
+ other copy number experiments. If it is a matrix the rows correspond
+ to the markers and the columns to the samples.}
+ \item{chrom}{the chromosomes (or other group identifier) from which
+ the markers came. Vector of length same as the number of rows of
+ genomdat. If one wants the chromosomes to be ordered in the
+ natural order, this variable should be numeric or ordered category.}
+ \item{maploc}{the locations of marker on the genome. Vector of length
+ same as the number of rows of genomdat. This has to be numeric.}
+ \item{data.type}{logratio (aCGH, ROMA, etc.) or binary (LOH).}
+ \item{sampleid}{sample identifier. If missing the samples are named
+ by prefixing "Sample" to consecutive integers.}
+ \item{presorted}{logical indicator telling if the data have already
+ been sorted by chrom and maploc. Default is FALSE.}
+ \item{x}{object returned by CNA}
+ \item{...}{arguments to be passed onto print command called within.}
+}
+\value{
+ An object of class \code{CNA}. There is a \code{print} method that
+ gives the number of samples and probes and the type of data.
+}
+
+\details{
+ Data that are NA, Inf, NaN will be removed on a per sample basis for
+ "genomdat" and all samples for "chrom" and "maploc".
+
+ If the chrom variable has non-numeric values make it into an ordered
+ variable to get them ordered correctly. E.g. for human genome use:
+ \code{chrom <- ordered(chrom, levels=c(1:22,"X","Y"))} to prepare the
+ variable if chromosomes X and Y are present in your data.
+}
+
+\examples{
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+}
+
+\keyword{nonparametric}
+
+
+
diff --git a/man/DNAcopy-internal.Rd b/man/DNAcopy-internal.Rd
new file mode 100644
index 0000000..42e010b
--- /dev/null
+++ b/man/DNAcopy-internal.Rd
@@ -0,0 +1,26 @@
+\name{DNAcopy-internal}
+\alias{changepoints}
+\alias{changepoints.prune}
+\alias{changepoints.sdundo}
+\alias{smooth.data}
+\alias{trimmed.variance}
+\alias{inflfact}
+\alias{exon.changepoint}
+\title{Internal DNAcopy functions}
+\description{
+ Internal functions of package DNAcopy.
+}
+\usage{
+changepoints(genomdat, data.type = "logratio", alpha = 0.01, weights = NULL,
+ sbdry, sbn, nperm = 10000, p.method="hybrid", min.width=2,
+ kmax=25, nmin = 200, trimmed.SD = NULL, undo.splits = "none",
+ undo.prune = 0.05, undo.SD = 3, verbose = 1, ngrid=100,
+ tol=1e-6)
+changepoints.prune(genomdat, lseg, change.cutoff=0.05)
+changepoints.sdundo(genomdat, lseg, trimmed.SD, change.SD=3)
+trimmed.variance(genomdat, trim=0.025)
+inflfact(trim)
+exon.changepoint(exondat, ngrid=100, tol=1e-6)
+}
+\details{These are not to be called directly by the user}
+\keyword{internal}
diff --git a/man/DNAcopy.Rd b/man/DNAcopy.Rd
new file mode 100644
index 0000000..73868c8
--- /dev/null
+++ b/man/DNAcopy.Rd
@@ -0,0 +1,34 @@
+\name{DNAcopy}
+\alias{DNAcopy}
+\alias{print.DNAcopy}
+\title{Results of segmenting a CNA data object}
+\usage{
+\method{print}{DNAcopy}(x, showSegRows=FALSE, \dots)
+}
+\description{
+ The results of segmenting data from copy number array experiments from
+ programs such as circular binary segmentation (CBS).
+}
+\arguments{
+ \item{x}{an object of class \code{DNAcopy} -- output of segment.}
+ \item{showSegRows}{option to show row numbers for the segment start
+ and end. default is FALSE.}
+ \item{...}{arguments to be passed onto print command called within.}
+}
+\value{
+ \item{data}{The original CNA object which was the input for segment}
+ \item{ID}{sample identifier.}
+ \item{chrom}{the chromosome within the sample.}
+ \item{loc.start}{the starting map location of the segment}
+ \item{loc.end}{the ending map location of the segment}
+ \item{num.mark}{the number of markers in the segment}
+ \item{data.type}{the segment mean.}
+ \item{call}{the call that produced the object.}
+}
+\details{
+ An object of class \code{DNAcopy}. There is a \code{print} method
+ that prints the results in a tabular format. Each row gives the
+ sample, the chromosome, the start and end map locations, the number of
+ markers and the mean of each segment.
+}
+\keyword{nonparametric}
diff --git a/man/coriell.Rd b/man/coriell.Rd
new file mode 100644
index 0000000..b2a3fde
--- /dev/null
+++ b/man/coriell.Rd
@@ -0,0 +1,17 @@
+\name{coriell}
+\alias{coriell}
+\title{Array CGH data set of Coriell cell lines}
+\description{
+ These are two data array CGH studies sets of Corriel cell lines taken
+ from the reference below.
+}
+\usage{data(coriell)}
+\format{A data frame containing five variables: first is clone name,
+ second is clone chromosome, third is clone position, fourth and fifth
+ are log2ratio for two cell lines.}
+\source{
+http://www.nature.com/ng/journal/v29/n3/suppinfo/ng754\_S1.html
+}
+\references{Snijders et al., Assembly of microarrays for genome-wide
+ measurement of DNA copy number, \emph{Nature Genetics}, 2001}
+\keyword{datasets}
diff --git a/man/cytoBand.Rd b/man/cytoBand.Rd
new file mode 100644
index 0000000..dda22a9
--- /dev/null
+++ b/man/cytoBand.Rd
@@ -0,0 +1,14 @@
+\name{cytoBand}
+\alias{cytoBand}
+\title{Cytogenic band data}
+\description{
+ Cytogenic band data from the goldenPath repository
+}
+\usage{data(cytoBand)}
+\format{A data frame containing five variables: chromosome, start and
+ end positions, band name and giesma stain.
+}
+\source{
+ http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz
+}
+\keyword{datasets}
diff --git a/man/exon.segment.Rd b/man/exon.segment.Rd
new file mode 100644
index 0000000..f40ec4b
--- /dev/null
+++ b/man/exon.segment.Rd
@@ -0,0 +1,45 @@
+\name{exon.segment}
+\alias{exon.segment}
+\title{Binary segmentation of exon data.}
+\description{
+ Compute the binary segmentation statistic, location and approximate p-value.
+}
+\usage{
+ exon.segment(gene, eloc, edat, ngrid=100, tol=1e-6)
+}
+\arguments{
+ \item{gene}{gene names in the exon data}
+ \item{eloc}{exon locations within gene}
+ \item{edat}{exon expressions within gene}
+ \item{ngrid}{number grid points for the integral}
+ \item{tol}{tolerance level for calculating nu}
+}
+\value{
+ a matrix with three columns. The maximal statistic from binary
+ segmentation, its location and the p-values for each gene.
+}
+\details{
+ The p-values are obtained by applying Siegmund's approximation for the
+ maximal statistic from binary segmenting consecutive segments within a
+ chromosome. These are one-sided test for an increase in expression.
+}
+\examples{
+
+# test code on an easy data set
+set.seed(25)
+gene <- rep(c("A", "B"), c(30,20))
+eloc <- c(1:30, 1:20)
+edat <- matrix(rnorm(500), 50, 10)
+# changes for gene1 in samples 3 & 7
+edat[1:30, 3] <- edat[1:30, 3] + rep(0.9*0:1, c(17, 13))
+edat[1:30, 7] <- edat[1:30, 7] + rep(1.1*0:1, c(21, 9))
+# changes for gene2 in samples 4 & 7
+edat[31:50, 4] <- edat[31:50, 4] + rep(1.1*0:1, c(8, 12))
+edat[31:50, 7] <- edat[31:50, 7] + rep(1.2*0:1, c(13, 7))
+exon.segment(gene, eloc, edat)
+
+}
+
+\author{Venkatraman E. Seshan}
+
+\keyword{nonparametric}
diff --git a/man/getbdry.Rd b/man/getbdry.Rd
new file mode 100644
index 0000000..1fd48b2
--- /dev/null
+++ b/man/getbdry.Rd
@@ -0,0 +1,25 @@
+\name{getbdry}
+\alias{getbdry}
+\alias{default.DNAcopy.bdry}
+\title{Sequential stopping boundary}
+\description{
+ Function to compute the sequential boundary for early stopping.
+}
+\usage{
+ getbdry(eta, nperm, max.ones, tol= 1e-2)
+}
+\arguments{
+ \item{eta}{Type I error rate of the boundary.}
+ \item{nperm}{Number of permutations for the reference distribution.}
+ \item{max.ones}{maximum number of ones given by "floor(nperm*alpha)+1".}
+ \item{tol}{tolerance level for the iterations.}
+}
+\value{
+ A vector integer values of length max.ones*(max.ones+1)/2 corresponding
+ to the boundary for the number of ones from 1 to max.ones. The default
+ boundary for nperm=10000, eta=0.05, alpha=0.01 is stored in the data
+ object "default.DNAcopy.bdry". Use this function to get the boundary
+ for your favorite values for the parameters "nperm, eta, alpha" and use
+ it for the argument "sbdry" in the function "segment."
+}
+\keyword{nonparametric}
diff --git a/man/glFrequency.Rd b/man/glFrequency.Rd
new file mode 100644
index 0000000..075f7f7
--- /dev/null
+++ b/man/glFrequency.Rd
@@ -0,0 +1,25 @@
+\name{glFrequency}
+\alias{glFrequency}
+\title{Additional summary measured for the segments}
+\description{
+ This program computes the frequency of gains and losses for each probe
+ as a function of level of mad.
+}
+\usage{
+ glFrequency(xout, threshold=1)
+}
+\arguments{
+ \item{xout}{an object of class DNAcopy}
+ \item{threshold}{threshold value to call gain or loss}
+}
+\value{
+ A segment is called a gain or loss if the segment mean is at least the
+ threshold* mad distance away from the median copy number level. The
+ output is a data frame with five columns which give the chromosome
+ (chrom), genomic position (maploc), the number of samples with
+ available data (pfreq), and the gain (gain) and loss (loss).
+}
+
+\author{Venkatraman E. Seshan}
+
+\keyword{nonparametric}
diff --git a/man/plot.DNAcopy.Rd b/man/plot.DNAcopy.Rd
new file mode 100644
index 0000000..4b5d41b
--- /dev/null
+++ b/man/plot.DNAcopy.Rd
@@ -0,0 +1,110 @@
+\name{plot.DNAcopy}
+\alias{plot.DNAcopy}
+\title{Plot the data and results from segment of a CNA object}
+\description{
+ Plots the data from a copy number array experiment (aCGH, ROMA etc.)
+ along with the results of segmenting it into regions of equal copy
+ numbers.
+}
+\usage{
+ \method{plot}{DNAcopy}(x, plot.type=c("whole", "plateau", "samplebychrom",
+ "chrombysample"), xmaploc=FALSE, altcol=TRUE, sbyc.layout=
+ NULL, cbys.nchrom=1, cbys.layout=NULL, include.means=TRUE,
+ zeroline=TRUE, pt.pch=NULL, pt.cex=NULL, pt.cols=NULL,
+ segcol= NULL, zlcol=NULL, ylim=NULL, lwd=NULL, ...)
+}
+\arguments{
+ \item{x}{an object of class \code{DNAcopy} resulting from analyzing
+ data from copy number array experiments.}
+ \item{plot.type}{the type of plot.}
+ \item{xmaploc}{logical flag to indicate that the X axis is the maploc
+ position rather than the index. Since the segments are rearranged
+ the plateau plot does not use maploc position.}
+ \item{altcol}{logical flag to indicate if chromosomes should be
+ plotted in alternating colors in the whole genome plot.}
+ \item{sbyc.layout}{\code{layout} settings for the multifigure grid layout
+ for the `samplebychrom' type. It should be specified as a vector of
+ two integers which are the number of rows and columns. The default
+ values are chosen based on the number of chromosomes to produce a
+ near square graph. For normal genome it is 4x6 (24 chromosomes)
+ plotted by rows.}
+ \item{cbys.layout}{\code{layout} settings for the multifigure grid layout
+ for the `chrombysample' type. As above it should be specified as
+ number of rows and columns and the default chosen based on the
+ number of samples.}
+ \item{cbys.nchrom}{the number of chromosomes per page in the layout.
+ The default is 1.}
+ \item{include.means}{logical flag to indicate whether segment means
+ are to be drawn.}
+ \item{zeroline}{logical flag to indicate whether a horizontal line at
+ y=0 is to be drawn.}
+ \item{pt.pch}{the plotting character used for plotting the log-ratio
+ values (default is ".").}
+ \item{pt.cex}{the size of plotting character used for the log-ratio
+ values (default is 3).}
+ \item{pt.cols}{the color list for the points. The colors alternate
+ between chromosomes. If missing the point colors are black and green.}
+ \item{segcol}{the color of the lines indicating the segment means. If
+ missing the line color is set to be red.}
+ \item{zlcol}{the color of the zeroline. If missing it is set to be grey.}
+ \item{ylim}{this argument is present to override the default limits
+ which is the range of symmetrized log-ratios.}
+ \item{lwd}{line weight of lines for segment mean and zeroline. If
+ missing it is set to 3.}
+ \item{...}{other arguments which will be passed to \code{plot}
+ commands.}
+}
+\details{
+ There are four possible plot types. For the type `whole' the data
+ are plotted for the entire genome. For the `samplebychrom' type a
+ graph with each chromosome (of a given sample) is drawn in a separate
+ figure on a multi-figure grid. For the `plateau' type the graph
+ is drawn with the chromosome segments re-ordered by the segment means.
+ For the `chrombysample' type the samples for a given chromosome are
+ drawn in a 4x6 multi-figure grid in multiples of 24. By default the
+ segments means are drawn. For multisample data each sample or
+ chromosome is drawn on a separate sheet. When invoked interactively
+ the user is prompted before advancing to the next sample.
+}
+
+\examples{
+
+#Read in two examples from Snijders et al.
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+#Segmentation at default parameters
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+
+#Plot whole studies
+
+plot(segment.smoothed.CNA.object, plot.type="w")
+
+#Plot each study by chromosome
+
+plot(segment.smoothed.CNA.object, plot.type="s")
+
+#Plot each chromosome across studies (6 per page)
+
+plot(segment.smoothed.CNA.object, plot.type="c", cbys.layout=c(2,1), cbys.nchrom=6)
+
+#Plot by plateaus
+
+plot(segment.smoothed.CNA.object, plot.type="p")
+
+}
+
+\keyword{nonparametric}
+
diff --git a/man/plotSample.Rd b/man/plotSample.Rd
new file mode 100644
index 0000000..3789368
--- /dev/null
+++ b/man/plotSample.Rd
@@ -0,0 +1,81 @@
+\name{plotSample}
+\alias{plotSample}
+\title{Plot the data and results from segmentation for a single sample}
+\description{
+ Plots the data for a single sample from a copy number array experiment
+ (aCGH, ROMA etc.) along with the results of segmenting it into regions
+ of equal copy numbers.
+}
+\usage{
+ plotSample(x, sampleid=NULL, chromlist=NULL, xmaploc=FALSE,
+ col=c("black","green"), pch=".", cex=NULL, altcol=TRUE,
+ segcol="red", lwd=3, zeroline=TRUE, zlcol="grey",
+ xlab=NULL, ylab=NULL, main=NULL, ...)
+}
+\arguments{
+ \item{x}{an object of class \code{DNAcopy} resulting from analyzing
+ data from copy number array experiments.}
+ \item{sampleid}{the sample for which the plot is requested. Should be
+ a valid sample name or number. If missing the first sample is plotted.}
+ \item{chromlist}{a vector of chromosome numers or names to be plotted.
+ If missing the whole genome is plotted.}
+ \item{xmaploc}{a logical indicating if data are plotted against genomic
+ position or Index. Defaults to \code{FALSE}.}
+ \item{col}{a vector of two colors that can be used for alternating
+ colors for successive chromosomes.}
+ \item{pch}{the plotting character. Defaults to \code{.}.}
+ \item{cex}{the size of plotting character. If missing it is set to 3
+ if pch is `.' and 1 otherwise.}
+ \item{altcol}{a logical indicating if colors of successive chromosomes
+ should be alternated. Defaults to \code{TRUE}.}
+ \item{segcol}{color for segment means.}
+ \item{zeroline}{a logical indicating if the zeroline is drawn.
+ Defaults to \code{TRUE}.}
+ \item{zlcol}{color for zero line.}
+ \item{lwd}{thickness of the lines.}
+ \item{xlab}{the x-axis lavel. If missing Index or Genomic Position
+ will be used depending on xmaploc.}
+ \item{ylab}{the y-axis label. If missing log(CN) or LOH will be used
+ depending on data type.}
+ \item{main}{the main title. If missing sample name will be used.}
+ \item{...}{other arguments to the \code{plot} function can be passed here.}
+}
+\details{
+ This function plots the whole genome and segmentation results for a single
+ sample. This function overcomes the deficiency in the plot.DNAcopy function
+ which cycles through all the samples. If sampleid is not specified the
+ first sample is plotted.
+}
+
+\examples{
+
+#Read in two examples from Snijders et al.
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+#Segmentation at default parameters
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+
+# Plot whole sample c13330
+
+plotSample(segment.smoothed.CNA.object, sampleid="c13330")
+
+# Plot only chromosomes 1,3,5,7,9 from first sample
+plotSample(segment.smoothed.CNA.object, sampleid=1, chromlist=c(1,3,5,7,9))
+
+}
+
+\keyword{nonparametric}
+
diff --git a/man/segment.Rd b/man/segment.Rd
new file mode 100644
index 0000000..6b45796
--- /dev/null
+++ b/man/segment.Rd
@@ -0,0 +1,176 @@
+\name{segment}
+\alias{segment}
+\title{Genome Segmentation Program}
+\description{
+ This program segments DNA copy number data into regions of estimated
+ equal copy number using circular binary segmentation (CBS).
+}
+\usage{
+ segment(x, weights = NULL, alpha = 0.01, nperm = 10000, p.method =
+ c("hybrid", "perm"), min.width=2, kmax=25, nmin=200,
+ eta=0.05, sbdry=NULL, trim = 0.025, undo.splits =
+ c("none", "prune", "sdundo"), undo.prune=0.05,
+ undo.SD=3, verbose=1)
+}
+\arguments{
+ \item{x}{an object of class CNA}
+ \item{weights}{a vector of weights for the probes. The weights should be
+ inversely proportional to their variances. Currently all weights
+ should be positive i.e. remove probes with zero weight prior to
+ segmentation.}
+ \item{alpha}{significance levels for the test to accept change-points.}
+ \item{nperm}{number of permutations used for p-value computation.}
+ \item{p.method}{method used for p-value computation. For the "perm"
+ method the p-value is based on full permutation. For the "hybrid"
+ method the maximum over the entire region is split into maximum of
+ max over small segments and max over the rest. Approximation is
+ used for the larger segment max. Default is hybrid.}
+ \item{min.width}{the minimum number of markers for a changed segment.
+ The default is 2 but can be made larger. Maximum possible value is
+ set at 5 since arbitrary widths can have the undesirable effect of
+ incorrect change-points when a true signal of narrow widths exists.}
+ \item{kmax}{the maximum width of smaller segment for permutation
+ in the hybrid method.}
+ \item{nmin}{the minimum length of data for which the approximation of
+ maximum statistic is used under the hybrid method. should be larger
+ than 4*kmax}
+ \item{eta}{the probability to declare a change conditioned on the
+ permuted statistic exceeding the observed statistic exactly
+ j (= 1,...,nperm*alpha) times.}
+ \item{sbdry}{the sequential boundary used to stop and declare a
+ change. This boundary is a function of nperm, alpha and eta. It can
+ be obtained using the function "getbdry" and used instead of having
+ the "segment" function compute it every time it is called.}
+ \item{trim}{proportion of data to be trimmed for variance calculation
+ for smoothing outliers and undoing splits based on SD.}
+ \item{undo.splits}{A character string specifying how change-points are
+ to be undone, if at all. Default is "none". Other choices are
+ "prune", which uses a sum of squares criterion, and "sdundo", which
+ undoes splits that are not at least this many SDs apart.}
+ \item{undo.prune}{the proportional increase in sum of squares allowed
+ when eliminating splits if undo.splits="prune".}
+ \item{undo.SD}{the number of SDs between means to keep a split if
+ undo.splits="sdundo".}
+ \item{verbose}{level of verbosity for monitoring the program's
+ progress where 0 produces no printout, 1 prints the current sample,
+ 2 the current chromosome and 3 the current segment. The default
+ level is 1.}
+}
+
+\value{
+ An object of class \code{DNAcopy}. It has three elements:
+ \item{data}{The original CNA object which was the input for segment}
+ \item{out}{a data frame with six columns. Each row of the data frame
+ contains a segment for which there are six variables: the sample id,
+ the chromosome number, the map position of the start of the segment,
+ the map position of the end of the segment, the number of markers in
+ the segment, and the average value in the segment.}
+ \item{segRows}{a data frame with the start and end row of each segment
+ in the data matrix. print command shows it with the showSegRows=T}
+ \item{call}{the call that produced the output object.}
+}
+
+\details{
+ This function implements the cicular binary segmentation (CBS)
+ algorithm of Olshen and Venkatraman (2004). Given a set of genomic
+ data, either continuous or binary, the algorithm recursively splits
+ chromosomes into either two or three subsegments based on a maximum
+ t-statistic. A reference distribution, used to decided whether or not
+ to split, is estimated by permutation. Options are given to eliminate
+ splits when the means of adjacent segments are not sufficiently far
+ apart. Note that after the first split the \eqn{\alpha}-levels of the
+ tests for splitting are not unconditional.
+
+ We recommend using one of the undoing options to remove change-points
+ detected due to local trends (see the manuscript below for examples of
+ local trends).
+
+ Since the segmentation procedure uses a permutation reference
+ distribution, R commands for setting and saving seeds should be used
+ if the user wishes to reproduce the results.
+
+ Data that are NA, Inf, NaN will be removed on a per sample basis for
+ "genomdat" and all samples for "chrom" and "maploc".
+}
+
+\examples{
+
+# test code on an easy data set
+set.seed(25)
+genomdat <- rnorm(500, sd=0.1) +
+rep(c(-0.2,0.1,1,-0.5,0.2,-0.5,0.1,-0.2),c(137,87,17,49,29,52,87,42))
+plot(genomdat)
+chrom <- rep(1:2,c(290,210))
+maploc <- c(1:290,1:210)
+test1 <- segment(CNA(genomdat, chrom, maploc))
+
+# test code on a noisier and hence more difficult data set
+set.seed(51)
+genomdat <- rnorm(500, sd=0.2) +
+rep(c(-0.2,0.1,1,-0.5,0.2,-0.5,0.1,-0.2),c(137,87,17,49,29,52,87,42))
+plot(genomdat)
+chrom <- rep(1:2,c(290,210))
+maploc <- c(1:290,1:210)
+test2 <- segment(CNA(genomdat, chrom, maploc))
+
+# test code for weighted CBS
+set.seed(97)
+wts <- sample(1:3, 500, replace=TRUE)
+genomdat <- rnorm(500, sd=0.3)/sqrt(wts) +
+rep(c(-0.2,0.1,1,-0.5,0.2,-0.5,0.1,-0.2),c(137,87,17,49,29,52,87,42))
+plot(genomdat)
+chrom <- rep(1:2,c(290,210))
+maploc <- c(1:290,1:210)
+test3 <- segment(CNA(genomdat, chrom, maploc), weights=wts)
+
+#A real analyis
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+#Segmentation at default parameters
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+#Segmentation at default parameters
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+
+}
+
+\author{Venkatraman E. Seshan \email{seshanv at mskcc.org} and Adam Olshen
+ \email{olshena at biostat.ucsf.edu} }
+
+\references{
+ Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. (2004).
+ Circular binary segmentation for the analysis of array-based DNA copy
+ number data. \emph{Biostatistics} 5: 557-572.
+
+ Venkatraman, E. S., Olshen, A. B. (2007) A faster circular binary
+ segmentation algorithm for the analysis of array CGH data.
+ \emph{Bioinformatics} 23: 657-63.
+}
+
+\keyword{nonparametric}
diff --git a/man/segments.p.Rd b/man/segments.p.Rd
new file mode 100644
index 0000000..f0a3658
--- /dev/null
+++ b/man/segments.p.Rd
@@ -0,0 +1,60 @@
+\name{segments.p}
+\alias{segments.p}
+\title{p-values for the change-points}
+\description{
+ This program computes pseudo p-values and confidence intervals for the
+ change-points found by the circular binary segmentation (CBS) algorithm.
+}
+\usage{
+ segments.p(x, ngrid=100, tol=1e-6, alpha=0.05, search.range=100, nperm=1000)
+}
+\arguments{
+ \item{x}{an object of class DNAcopy}
+ \item{ngrid}{number grid points for the integral}
+ \item{tol}{tolerance level for calculating nu}
+ \item{alpha}{Confidence level is 1-alpha}
+ \item{search.range}{statistic is maximized over nu +/- search.range}
+ \item{nperm}{number of permutations for confidence interval}
+}
+\value{
+ a data frame with ten columns. The maximal statistic from binary
+ segmentation, the p-values and lower and upper alpha/2 confidence
+ limits (as genomic positions) are added to the six columns from the
+ segment command.
+
+ NOTE: THE p VALUES ARE APPROXIMATE TAIL PROBABILITIES. ANY VALUE
+ GREATER THAN 0.1 CAN HAVE LARGE ERROR. p > 1 ARE REPLACED WITH 1.
+}
+\details{
+ The p-values are obtained by applying Siegmund's approximation for the
+ maximal statistic from binary segmenting consecutive segments within a
+ chromosome. This p-value is only to give the relative importance of
+ the change-points as the CBS is different from the algorithm used here.
+
+ The confidence intervals are obtained by a permutation algorithm. The
+ data are permuted to the left and right of the identified change-point
+ and the location of the maximal binary segmentation statistic computed.
+ The confidence interval is given by the quantiles of the permutation
+ distribution of the locations.
+
+ The statistical properties of this confidence interval is unknown. It
+ is used to give an idea of the uncertainity on the location of the
+ change-points as the CBS is different from the algorithm used here.
+}
+\examples{
+
+# test code on an easy data set
+set.seed(25)
+genomdat <- rnorm(500, sd=0.1) +
+rep(c(-0.2,0.1,1,-0.5,0.2,-0.5,0.1,-0.2),c(137,87,17,49,29,52,87,42))
+plot(genomdat)
+chrom <- rep(1:2,c(290,210))
+maploc <- c(1:290,1:210)
+test1 <- segment(CNA(genomdat, chrom, maploc))
+segments.p(test1)
+
+}
+
+\author{Venkatraman E. Seshan}
+
+\keyword{nonparametric}
diff --git a/man/segments.summary.Rd b/man/segments.summary.Rd
new file mode 100644
index 0000000..d7def50
--- /dev/null
+++ b/man/segments.summary.Rd
@@ -0,0 +1,36 @@
+\name{segments.summary}
+\alias{segments.summary}
+\title{Additional summary measured for the segments}
+\description{
+ This program computes the standard deviation, median and the mad of
+ the data for each segment found by the CBS algorithm.
+}
+\usage{
+ segments.summary(x)
+}
+\arguments{
+ \item{x}{an object of class DNAcopy}
+}
+\value{
+ a data frame with nine columns. The sd, median and mad of each
+ segment is added to the six columns from the segment command.
+}
+\examples{
+
+# test code on an easy data set
+set.seed(25)
+genomdat1 <- rnorm(500, sd=0.1) +
+rep(c(-0.2,0.1,1,-0.5,0.2,-0.5,0.1,-0.2),c(137,87,17,49,29,52,87,42))
+genomdat2 <- rnorm(500, sd=0.1) +
+rep(c(-0.2,0.1,1,-0.5,0.2,-0.5,0.1,-0.2),c(137,87,17,49,29,52,87,42))
+genomdat1[sample(1:500,5)] <- NA
+chrom <- rep(1:2,c(290,210))
+maploc <- c(1:290,1:210)
+test1 <- segment(CNA(cbind(genomdat1,genomdat2), chrom, maploc))
+segments.summary(test1)
+
+}
+
+\author{Venkatraman E. Seshan}
+
+\keyword{nonparametric}
diff --git a/man/smooth.CNA.Rd b/man/smooth.CNA.Rd
new file mode 100644
index 0000000..46fdddd
--- /dev/null
+++ b/man/smooth.CNA.Rd
@@ -0,0 +1,45 @@
+\name{smooth.CNA}
+\alias{smooth.CNA}
+\title{Smooth a `Copy Number Array' data object}
+\description{
+ Detect outliers and smooth the data prior to analysis by programs such
+ as circular binary segmentation (CBS).
+}
+\usage{
+ smooth.CNA(x, smooth.region=10, outlier.SD.scale=4, smooth.SD.scale=2,
+ trim=0.025)
+}
+\arguments{
+ \item{x}{Copy number array data object}
+ \item{smooth.region}{number of points to consider on the left and the
+ right of a point to detect it as an outlier. (default=10)}
+ \item{outlier.SD.scale}{the number of SDs away from the nearest point
+ in the smoothing region to call a point an outlier.}
+ \item{smooth.SD.scale}{the number of SDs from the median in the
+ smoothing region where a smoothed point is positioned.}
+ \item{trim}{proportion of data to be trimmed for variance calculation
+ for smoothing outliers and undoing splits based on SD.}
+}
+\value{
+ An object of class \code{CNA} with outliers smoothed i.e the logratio
+ values of singleton outliers is shrunk towards the values of its
+ neighbors. The output is of the same dimension as the input.
+}
+\examples{
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+}
+
+\keyword{nonparametric}
diff --git a/man/subset.CNA.Rd b/man/subset.CNA.Rd
new file mode 100644
index 0000000..2b4b8a3
--- /dev/null
+++ b/man/subset.CNA.Rd
@@ -0,0 +1,42 @@
+\name{subset.CNA}
+\alias{subset.CNA}
+\title{Subset a `Copy Number Array' data object}
+\description{
+ Function to return a subset of a copy number array data object by a
+ list of chromosomes and sample.
+}
+\usage{
+ \method{subset}{CNA}(x, chromlist=NULL, samplelist=NULL, ...)
+}
+\arguments{
+ \item{x}{Copy number array data object}
+ \item{chromlist}{chromosomes of interest. Should be a subset of the
+ valid chromosome names in the original data.}
+ \item{samplelist}{samples of interest. Can be integers denoting the
+ samples of interest or a vector of valid sample names.}
+ \item{...}{other arguments which may be passed to \code{subset}.}
+}
+\value{
+ An object of class \code{CNA} with the data for the list of
+ chromosomes and samples of interest.
+}
+
+\examples{
+
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#Take the first ten chromosomes of the first sample
+
+#subset.CNA.object <- subset.CNA(CNA.object,chromlist=1:10,samplelist="c05296")
+subset.CNA.object <- subset(CNA.object,chromlist=1:10,samplelist="c05296")
+}
+
+\keyword{nonparametric}
+
+
diff --git a/man/subset.DNAcopy.Rd b/man/subset.DNAcopy.Rd
new file mode 100644
index 0000000..2276ded
--- /dev/null
+++ b/man/subset.DNAcopy.Rd
@@ -0,0 +1,23 @@
+\name{subset.DNAcopy}
+\alias{subset.DNAcopy}
+\title{Subset a DNAcopy data object}
+\description{
+ Function to return a subset of a copy number array data object by a
+ list of chromosomes and sample.
+}
+\usage{
+ \method{subset}{DNAcopy}(x, chromlist=NULL, samplelist=NULL, ...)
+}
+\arguments{
+ \item{x}{DNAcopy object}
+ \item{chromlist}{chromosomes of interest. Should be a subset of the
+ valid chromosome names in the original data.}
+ \item{samplelist}{samples of interest. Can be integers denoting the
+ samples of interest or a vector of valid sample names.}
+ \item{...}{other arguments which may be passed to \code{subset}.}
+}
+\value{
+ An object of class \code{DNAcopy} with the input data and the results
+ of segmenting them only for the chromosomes and samples of interest.
+}
+\keyword{nonparametric}
diff --git a/man/zoomIntoRegion.Rd b/man/zoomIntoRegion.Rd
new file mode 100644
index 0000000..e763e34
--- /dev/null
+++ b/man/zoomIntoRegion.Rd
@@ -0,0 +1,66 @@
+\name{zoomIntoRegion}
+\alias{zoomIntoRegion}
+\title{Zoomed in view of genomic region}
+\description{
+ This program computes the frequency of gains and losses for each probe
+ as a function of level of mad.
+}
+\usage{
+ zoomIntoRegion(x, chrom, sampleid, maploc.start=NULL, maploc.end=NULL,
+ pt.pch=NULL, pt.cex=NULL, pt.col=NULL, segcol=NULL, seglwd=NULL,
+ main=NULL, xlab=NULL, ylab=NULL, ...)
+}
+\arguments{
+ \item{x}{an object of class DNAcopy.}
+ \item{chrom}{the chromosome in which the region lies.}
+ \item{sampleid}{the sample of interest.}
+ \item{maploc.start}{genomic start position of the region of interest.
+ Default is the beginning of the chromosome.}
+ \item{maploc.end}{genomic end position of the region of interest.
+ Default is the end of the chromosome.}
+ \item{pt.pch}{the plotting character used for plotting the log-ratio
+ values (default is ".").}
+ \item{pt.cex}{the size of plotting character used for the log-ratio
+ values (default is 3 if "." and 1 otherwise).}
+ \item{pt.col}{the color used for the points. Default is green3.}
+ \item{segcol}{the color of the lines indicating the segment means. If
+ missing the line color is set to be red.}
+ \item{seglwd}{line weight of lines for segment mean and zeroline. If
+ missing it is set to 3.}
+ \item{main}{figure title. If missing will be generated by pasting the
+ chromosome, range and sample name together.}
+ \item{xlab}{x-axis label. If missing "Genomic position" will be used}
+ \item{ylab}{y-axis label. If missing "log-ratio" will be used}
+ \item{...}{additional plotting options.}
+}
+\details{
+ This command plots the region of interest with the log-ratio and
+ segments. It works for a region from a single chromosome in a single
+ sample. So if more than one chromosome and/or one sample are given
+ only the first chromosome from the first sample will be used.
+}
+
+\examples{
+data(coriell)
+
+#Combine into one CNA object to prepare for analysis on Chromosomes 1-23
+
+CNA.object <- CNA(cbind(coriell$Coriell.05296,coriell$Coriell.13330),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid=c("c05296","c13330"))
+
+#We generally recommend smoothing single point outliers before analysis
+#Make sure to check that the smoothing is proper
+
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+
+#Segmentation at default parameters
+
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+
+zoomIntoRegion(segment.smoothed.CNA.object, chrom=10, sampleid="c05296")
+}
+
+\author{Venkatraman E. Seshan \email{seshanv at mskcc.org} }
+
+\keyword{nonparametric}
diff --git a/src/cbsWtstats.f b/src/cbsWtstats.f
new file mode 100644
index 0000000..eb16e49
--- /dev/null
+++ b/src/cbsWtstats.f
@@ -0,0 +1,741 @@
+c these are the subroutines to do the weigthed version of CBS
+c which is useful in order to merge data from multiple platforms
+c --------------------------------------------------------------
+c This is relevant only for log-ratio not binary data
+c --------------------------------------------------------------
+c function for calculating the full max weighted t-statistic
+c new approach to maximizing t-statistic
+
+ subroutine wtmaxo(n,x,wts,tss,sx,cwts,iseg,ostat,al0)
+ integer n,iseg(2),al0
+ double precision x(n),wts(n),tss,sx(n),cwts(n),ostat
+c
+c look at the partial sums in blocks of size sqrt(n)
+c
+ integer ipsmin, ipsmax, ipsmin0, ipsmax0, nb, i, j, k, l, nb1,
+ 1 nb2, bi, bj, ilo, ihi, jlo, jhi, ihi1, jlo1, jhi1,
+ 2 tmaxi, tmaxj, nal0
+ double precision psum, psmin, psmax, psmin0, psmax0, bssmax,
+ 1 bsslim, rn, sij1, sij2, sijmx0, bijbss, awtmax, psrnov2,
+ 2 psdiff, psrj, psrn, psrnj, awtlo, awthi, awt1
+c
+c use local arrays for working within blocks
+c block partial sum max and min
+ double precision, allocatable :: bpsmax(:), bpsmin(:)
+c location of the max and min
+ integer, allocatable :: bb(:), ibmin(:), ibmax(:)
+
+c t statistic corresponding to max for block i,j
+ double precision, allocatable :: bssbij(:), bssijmax(:), awt(:)
+c row, column and order vector for reordering bssbij
+ integer, allocatable :: bloci(:), blocj(:), loc(:)
+
+c calculate number of blocks (nb) and block boundaries (vector bb)
+ rn = dfloat(n)
+ if (n .ge. 50) then
+ nb = nint(sqrt(dfloat(n)))
+ else
+ nb = 1
+ endif
+
+c the number of paiwise block comparison
+ nb2 = nb*(nb+1)/2
+c allocate memory
+ allocate(bpsmax(nb), bpsmin(nb))
+ allocate(bb(nb), ibmin(nb), ibmax(nb))
+ allocate(bssbij(nb2), bssijmax(nb2), awt(nb2))
+ allocate(bloci(nb2), blocj(nb2), loc(nb2))
+
+c block boundaries
+ do 110 i = 1, nb
+ bb(i) = nint(rn*(dfloat(i)/dfloat(nb)))
+ 110 continue
+
+c find the max, min of partial sums and their locations within blocks
+ ilo = 1
+ psum = 0
+ psmin0 = 0
+ psmax0 = 0
+ ipsmin0 = n
+ ipsmax0 = n
+ do 20 j = 1, nb
+ sx(ilo) = psum + x(ilo)*wts(ilo)
+ psmin = sx(ilo)
+ ipsmin = ilo
+ psmax = sx(ilo)
+ ipsmax = ilo
+ do 10 i = ilo+1, bb(j)
+ sx(i) = sx(i-1) + x(i)*wts(i)
+ if (sx(i) .lt. psmin) then
+ psmin = sx(i)
+ ipsmin = i
+ endif
+ if (sx(i) .gt. psmax) then
+ psmax = sx(i)
+ ipsmax = i
+ endif
+ 10 continue
+c store the block min, max and locations
+ ibmin(j) = ipsmin
+ ibmax(j) = ipsmax
+ bpsmin(j) = psmin
+ bpsmax(j) = psmax
+c adjust global min, max and locations
+ if (psmin .lt. psmin0) then
+ psmin0 = psmin
+ ipsmin0 = ipsmin
+ endif
+ if (psmax .gt. psmax0) then
+ psmax0 = psmax
+ ipsmax0 = ipsmax
+ endif
+c reset ilo to be the block boundary + 1
+ psum = sx(bb(j))
+ ilo = bb(j) + 1
+ 20 continue
+
+c calculate bss for max s_i - min s_i
+ psdiff = psmax0 - psmin0
+c if the segment is all constant then psdiff = 0 and so bssmax = 0
+ if (psdiff .le. 0) then
+ bssmax = 0
+ go to 120
+ endif
+ psrn = cwts(n)
+ psrj = abs(cwts(ipsmax0) - cwts(ipsmin0))
+ psrnj = psrj*(psrn-psrj)
+ bssmax = (psdiff**2)/psrnj
+ tmaxi = min(ipsmax0, ipsmin0)
+ tmaxj = max(ipsmax0, ipsmin0)
+
+c for a pair of blocks (i,j) calculate the max absolute t-statistic
+c at the (min_i, max_j) and (max_i, min_j) locations
+c for other indices the t-statistic can be bounded using this
+c
+c if a block doesn't have the potential to exceed bssmax ignore it
+c calculate the bsslim for each block and include ones >= bssmax
+
+ psrnov2 = psrn/2
+ l = 0
+ nal0 = n - al0
+ do 40 i = 1, nb
+ do 30 j = i, nb
+c calculate bsslim
+ if (i .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(i-1) + 1
+ endif
+ ihi = bb(i)
+ if (j .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(j-1) + 1
+ endif
+ jhi = bb(j)
+c for wCBS calculated hi and lo arc weights instead of lengths
+ awthi = cwts(jhi) - cwts(ilo)
+ if (jhi - ilo .gt. nal0) then
+ awthi = 0
+ do 35 k = 1, al0
+ awthi = max(awthi, cwts(nal0+k) - cwts(k))
+ 35 continue
+ endif
+ if (i .eq. j) then
+ awtlo = cwts(ilo+al0) - cwts(ilo)
+ do 36 k = ilo + 1, ihi - al0
+ awtlo = min(awtlo, cwts(k+al0) - cwts(k))
+ 36 continue
+ else if (i+1 .eq. j) then
+ awtlo = cwts(jlo) - cwts(jlo-al0)
+ do 37 k = jlo - al0 + 1, ihi
+ awtlo = min(awtlo, cwts(k+al0) - cwts(k))
+ 37 continue
+ else
+ awtlo = cwts(jlo) - cwts(ihi)
+ endif
+c max S_k over block j - min S_k over block i
+ sij1 = abs(bpsmax(j) - bpsmin(i))
+c max S_k over block i - min S_k over block j
+ sij2 = abs(bpsmax(i) - bpsmin(j))
+c if i = j then sij1 and sij2 are the same
+ sijmx0 = max(sij1, sij2)
+ psrnj = min(awtlo*(psrn-awtlo), awthi*(psrn-awthi))
+ bsslim = (sijmx0**2)/psrnj
+c if its as large as bssmax add block
+ if (bssmax .le. bsslim) then
+ l = l+1
+ loc(l) = l
+ bloci(l) = i
+ blocj(l) = j
+ bssijmax(l) = bsslim
+c max sij in the (i,j) block, t-statistic etc
+ if (sij1 .gt. sij2) then
+ awt(l) = abs(cwts(ibmax(j)) - cwts(ibmin(i)))
+ bssbij(l) = (sij1**2)/(awt(l)*(psrn-awt(l)))
+ else
+ awt(l) = abs(cwts(ibmin(j)) - cwts(ibmax(i)))
+ bssbij(l) = (sij2**2)/(awt(l)*(psrn-awt(l)))
+ endif
+ endif
+ 30 continue
+ 40 continue
+
+ nb1 = l
+
+c Now sort the t-statistics by their magnitude
+ call qsort4(bssbij, loc, 1, nb1)
+
+c now go through the blocks in reverse order (largest down)
+ do 100 l = nb1, 1, -1
+ k = loc(l)
+c need to check a block only if it has potential to increase bss
+c rjlo is the smalllest (j-i) in the block and rjhi is the largest
+ bsslim = bssijmax(k)
+ if (bssmax .le. bsslim) then
+c bi, bj give the block location
+ bi = bloci(k)
+ bj = blocj(k)
+ awtmax = awt(k)
+ if (bi .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(bi-1) + 1
+ endif
+ ihi = bb(bi)
+ if (bj .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(bj-1) + 1
+ endif
+ jhi = bb(bj)
+ awthi = cwts(jhi) - cwts(ilo)
+ if (bi .eq. bj) then
+ awtlo = 0
+ else
+ awtlo = cwts(jlo) - cwts(ihi)
+ endif
+c
+c if arc wt is larger than half total wt (psrn/2) make is psrn - arc wt
+c
+ if (awtmax .gt. psrn - awtmax) awtmax = psrn - awtmax
+c
+c if awtlo <= psrn/2 start from (ihi, jlo) and go up
+c if awthi >= psrn/2 start from (ilo, jhi) and go down
+c
+ if (awtlo .le. psrnov2) then
+ if (bi .eq.bj) then
+ ihi1 = ihi - al0
+ else
+ ihi1 = ihi
+ endif
+ do 60 i = ihi1, ilo, -1
+ jlo1 = max(i + al0, jlo)
+ do 55 j = jlo1, jhi
+ awt1 = cwts(j) - cwts(i)
+ if (awt1 .le. awtmax) then
+ bijbss = (sx(j) - sx(i))**2/(awt1*(psrn-awt1))
+ if (bijbss .gt. bssmax) then
+ bssmax = bijbss
+ tmaxi = i
+ tmaxj = j
+ endif
+ endif
+ 55 continue
+ 60 continue
+ endif
+c
+c make arc wt psrn - arc wt
+c
+ awtmax = psrn - awtmax
+ if (awthi .ge. psrnov2) then
+ do 70 i = ilo, ihi
+ if ((bi .eq. 1) .and. (bj .eq. nb)) then
+ jhi1 = min(jhi, jhi - al0 + i)
+ else
+ jhi1 = jhi
+ endif
+ do 65 j = jhi1, jlo, -1
+ awt1 = cwts(j) - cwts(i)
+ if (awt1 .ge. awtmax) then
+ bijbss = (sx(j) - sx(i))**2/(awt1*(psrn-awt1))
+ if (bijbss .gt. bssmax) then
+ bssmax = bijbss
+ tmaxi = i
+ tmaxj = j
+ endif
+ endif
+ 65 continue
+ 70 continue
+ endif
+ endif
+ 100 continue
+
+ 120 if (tss.le.bssmax+0.0001) tss = bssmax + 1.0
+ bssmax = bssmax/((tss-bssmax)/(rn-2.0))
+
+c deallocate memory
+ deallocate(bpsmax, bpsmin, bb, ibmin, ibmax)
+ deallocate(bssbij, bssijmax, bloci, blocj, loc, awt)
+
+ ostat = bssmax
+ iseg(1) = tmaxi
+ iseg(2) = tmaxj
+
+ return
+ end
+
+c function for calculating the full max wtd t-statistic on permuted data
+c using a new approach to maximizing t-statistic
+ double precision function wtmaxp(n,px,wts,sx,cwts,al0)
+ integer n,al0
+ double precision px(n),wts(n),sx(n),cwts(n)
+c
+c look at the partial sums in blocks of size sqrt(n)
+c
+ integer ipsmin, ipsmax, ipsmin0, ipsmax0, nb, i, j, k, l, nb1,
+ 1 nb2, bi, bj, ilo, ihi, jlo, jhi, ihi1, jlo1, jhi1, nal0
+ double precision psum, psmin, psmax, psmin0, psmax0, bssmax,
+ 1 bsslim, rn, sij1, sij2, sijmx0, bijbss, awtmax, psrnov2,
+ 2 psdiff, psrj, psrn, psrnj, awtlo, awthi, awt1, ssq, tss
+c
+c use local arrays for working within blocks
+c block partial sum max and min
+ double precision, allocatable :: bpsmax(:), bpsmin(:)
+c location of the max and min
+ integer, allocatable :: bb(:), ibmin(:), ibmax(:)
+
+c t statistic corresponding to max for block i,j
+ double precision, allocatable :: bssbij(:), bssijmax(:), awt(:)
+c row, column and order vector for reordering bssbij
+ integer, allocatable :: bloci(:), blocj(:), loc(:)
+
+c calculate number of blocks (nb) and block boundaries (vector bb)
+ rn = dfloat(n)
+ if (n .ge. 50) then
+ nb = nint(sqrt(dfloat(n)))
+ else
+ nb = 1
+ endif
+
+c the number of paiwise block comparison
+ nb2 = nb*(nb+1)/2
+c allocate memory
+ allocate(bpsmax(nb), bpsmin(nb))
+ allocate(bb(nb), ibmin(nb), ibmax(nb))
+ allocate(bssbij(nb2), bssijmax(nb2), awt(nb2))
+ allocate(bloci(nb2), blocj(nb2), loc(nb2))
+
+c block boundaries
+ do 110 i = 1, nb
+ bb(i) = nint(rn*(dfloat(i)/dfloat(nb)))
+ 110 continue
+
+c find the max, min of partial sums and their locations within blocks
+ ilo = 1
+ psum = 0
+ psmin0 = 0
+ psmax0 = 0
+ ipsmin0 = n
+ ipsmax0 = n
+ ssq = 0
+ do 20 j = 1, nb
+ sx(ilo) = psum + px(ilo)*wts(ilo)
+ ssq = ssq + (px(ilo)**2)*wts(ilo)
+ psmin = sx(ilo)
+ ipsmin = ilo
+ psmax = sx(ilo)
+ ipsmax = ilo
+ do 10 i = ilo+1, bb(j)
+ sx(i) = sx(i-1) + px(i)*wts(i)
+ ssq = ssq + (px(i)**2)*wts(i)
+ if (sx(i) .lt. psmin) then
+ psmin = sx(i)
+ ipsmin = i
+ endif
+ if (sx(i) .gt. psmax) then
+ psmax = sx(i)
+ ipsmax = i
+ endif
+ 10 continue
+c store the block min, max and locations
+ ibmin(j) = ipsmin
+ ibmax(j) = ipsmax
+ bpsmin(j) = psmin
+ bpsmax(j) = psmax
+c adjust global min, max and locations
+ if (psmin .lt. psmin0) then
+ psmin0 = psmin
+ ipsmin0 = ipsmin
+ endif
+ if (psmax .gt. psmax0) then
+ psmax0 = psmax
+ ipsmax0 = ipsmax
+ endif
+c reset ilo to be the block boundary + 1
+ psum = sx(bb(j))
+ ilo = bb(j) + 1
+ 20 continue
+
+c calculate bss for max s_i - min s_i
+ psdiff = psmax0 - psmin0
+ psrn = cwts(n)
+ tss = ssq - (sx(n)/psrn)**2
+ psrj = abs(cwts(ipsmax0) - cwts(ipsmin0))
+ psrnj = psrj*(psrn-psrj)
+ bssmax = (psdiff**2)/psrnj
+
+c for a pair of blocks (i,j) calculate the max absolute t-statistic
+c at the (min_i, max_j) and (max_i, min_j) locations
+c for other indices the t-statistic can be bounded using this
+c
+c if a block doesn't have the potential to exceed bssmax ignore it
+c calculate the bsslim for each block and include ones >= bssmax
+
+ psrnov2 = psrn/2
+ l = 0
+ nal0 = n - al0
+ do 40 i = 1, nb
+ do 30 j = i, nb
+c calculate bsslim
+ if (i .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(i-1) + 1
+ endif
+ ihi = bb(i)
+ if (j .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(j-1) + 1
+ endif
+ jhi = bb(j)
+c for wCBS calculated hi and lo arc weights instead of lengths
+ awthi = cwts(jhi) - cwts(ilo)
+ if (jhi - ilo .gt. nal0) then
+ awthi = 0
+ do 35 k = 1, al0
+ awthi = max(awthi, cwts(nal0+k) - cwts(k))
+ 35 continue
+ endif
+ if (i .eq. j) then
+ awtlo = cwts(ilo+al0) - cwts(ilo)
+ do 36 k = ilo + 1, ihi - al0
+ awtlo = min(awtlo, cwts(k+al0) - cwts(k))
+ 36 continue
+ else if (i+1 .eq. j) then
+ awtlo = cwts(jlo) - cwts(jlo-al0)
+ do 37 k = jlo - al0 + 1, ihi
+ awtlo = min(awtlo, cwts(k+al0) - cwts(k))
+ 37 continue
+ else
+ awtlo = cwts(jlo) - cwts(ihi)
+ endif
+c max S_k over block j - min S_k over block i
+ sij1 = abs(bpsmax(j) - bpsmin(i))
+c max S_k over block i - min S_k over block j
+ sij2 = abs(bpsmax(i) - bpsmin(j))
+c if i = j then sij1 and sij2 are the same
+ sijmx0 = max(sij1, sij2)
+ psrnj = min(awtlo*(psrn-awtlo), awthi*(psrn-awthi))
+ bsslim = (sijmx0**2)/psrnj
+c if its as large as bssmax add block
+ if (bssmax .le. bsslim) then
+ l = l+1
+ loc(l) = l
+ bloci(l) = i
+ blocj(l) = j
+ bssijmax(l) = bsslim
+c max sij in the (i,j) block, t-statistic etc
+ if (sij1 .gt. sij2) then
+ awt(l) = abs(cwts(ibmax(j)) - cwts(ibmin(i)))
+ bssbij(l) = (sij1**2)/(awt(l)*(psrn-awt(l)))
+ else
+ awt(l) = abs(cwts(ibmin(j)) - cwts(ibmax(i)))
+ bssbij(l) = (sij2**2)/(awt(l)*(psrn-awt(l)))
+ endif
+ endif
+ 30 continue
+ 40 continue
+ nb1 = l
+
+c Now sort the t-statistics by their magnitude
+ call qsort4(bssbij, loc, 1, nb1)
+
+c now go through the blocks in reverse order (largest down)
+ do 100 l = nb1, 1, -1
+ k = loc(l)
+c need to check a block only if it has potential to increase bss
+c rjlo is the smalllest (j-i) in the block and rjhi is the largest
+ bsslim = bssijmax(k)
+ if (bssmax .le. bsslim) then
+c bi, bj give the block location
+ bi = bloci(k)
+ bj = blocj(k)
+ awtmax = awt(k)
+ if (bi .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(bi-1) + 1
+ endif
+ ihi = bb(bi)
+ if (bj .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(bj-1) + 1
+ endif
+ jhi = bb(bj)
+ awthi = cwts(jhi) - cwts(ilo)
+ if (bi .eq. bj) then
+ awtlo = 0
+ else
+ awtlo = cwts(jlo) - cwts(ihi)
+ endif
+c
+c if arc wt is larger than half total wt (psrn/2) make is psrn - arc wt
+c
+ if (awtmax .gt. psrn - awtmax) awtmax = psrn - awtmax
+c
+c if awtlo <= psrn/2 start from (ihi, jlo) and go up
+c if awthi >= psrn/2 start from (ilo, jhi) and go down
+c
+ if (awtlo .le. psrnov2) then
+ if (bi .eq.bj) then
+ ihi1 = ihi - al0
+ else
+ ihi1 = ihi
+ endif
+ do 60 i = ihi1, ilo, -1
+ jlo1 = max(i + al0, jlo)
+ do 55 j = jlo1, jhi
+ awt1 = cwts(j) - cwts(i)
+ if (awt1 .le. awtmax) then
+ bijbss = (sx(j) - sx(i))**2/(awt1*(psrn-awt1))
+ if (bijbss .gt. bssmax) bssmax = bijbss
+ endif
+ 55 continue
+ 60 continue
+ endif
+c
+c make arc wt psrn - arc wt
+c
+ awtmax = psrn - awtmax
+ if (awthi .ge. psrnov2) then
+ do 70 i = ilo, ihi
+ if ((bi .eq. 1) .and. (bj .eq. nb)) then
+ jhi1 = min(jhi, jhi - al0 + i)
+ else
+ jhi1 = jhi
+ endif
+ do 65 j = jhi1, jlo, -1
+ awt1 = cwts(j) - cwts(i)
+ if (awt1 .ge. awtmax) then
+ bijbss = (sx(j) - sx(i))**2/(awt1*(psrn-awt1))
+ if (bijbss .gt. bssmax) bssmax = bijbss
+ endif
+ 65 continue
+ 70 continue
+ endif
+ endif
+ 100 continue
+
+ if (tss.le.bssmax+0.0001) tss = bssmax + 1.0
+ wtmaxp = bssmax/((tss-bssmax)/(rn-2.0))
+
+c deallocate memory
+ deallocate(bpsmax, bpsmin, bb, ibmin, ibmax)
+ deallocate(bssbij, bssijmax, bloci, blocj, loc, awt)
+
+ return
+ end
+
+c function for the max (over small arcs) wtd t-statistic on permuted data
+c new code to speed up this part 4/1/2010
+ double precision function hwtmaxp(n,k,px,wts,sx,cwts,mncwt,al0)
+ integer n,k,al0
+ double precision px(n),wts(n),sx(n),cwts(n),mncwt(k)
+
+ integer i, j, nmj, ipj, ipnmj
+ double precision rn, rj, rnj, bssmax, bssij, psmin, psmax, psdiff,
+ 1 bsslim, ssq, tss
+
+c create blocks of size k (or k+1) to span 1 thru n
+c block partial sum max and min
+ double precision, allocatable :: bpsmax(:), bpsmin(:)
+c location of the max and min
+ integer, allocatable :: bb(:)
+c variables to work on block specific data
+ integer nb, ilo, ihi, l
+ double precision psum, psdiffsq
+
+ rn = dfloat(n)
+c number of blocks
+ nb = int(rn/dfloat(k))
+c allocate memory
+ allocate(bpsmax(nb), bpsmin(nb))
+ allocate(bb(nb))
+c block boundaries
+ do 110 i = 1, nb
+ bb(i) = nint(rn*(dfloat(i)/dfloat(nb)))
+ 110 continue
+
+c don't need global min and max
+c find the max, min of partial sums and their locations within blocks
+ ilo = 1
+ psum = 0
+ ssq = 0.0d0
+ bssmax = 0.0d0
+ rn = cwts(n)
+ do 20 j = 1, nb
+ sx(ilo) = psum + px(ilo)*wts(ilo)
+ ssq = ssq + wts(ilo)*px(ilo)**2
+ psmin = sx(ilo)
+ ipsmin = ilo
+ psmax = sx(ilo)
+ ipsmax = ilo
+ do 10 i = ilo+1, bb(j)
+ sx(i) = sx(i-1) + px(i)*wts(i)
+ ssq = ssq + wts(i)*px(i)**2
+ if (sx(i) .lt. psmin) then
+ psmin = sx(i)
+ ipsmin = i
+ endif
+ if (sx(i) .gt. psmax) then
+ psmax = sx(i)
+ ipsmax = i
+ endif
+ 10 continue
+c store the block min, max and locations
+ bpsmin(j) = psmin
+ bpsmax(j) = psmax
+c reset ilo to be the block boundary + 1
+ psum = sx(bb(j))
+ ilo = bb(j) + 1
+c calculate the bss at the block max & min pr
+ i = abs(ipsmin - ipsmax)
+ if ((i .le. k) .and. (i .ge. al0)) then
+ rj = abs(cwts(ipsmax) - cwts(ipsmin))
+ rnj = rj*(rn-rj)
+ bssij = (bpsmax(j) - bpsmin(j))**2/rnj
+ if (bssmax .lt. bssij) bssmax = bssij
+ endif
+ 20 continue
+ tss = ssq - (sx(n)/rn)**2
+
+c check the first block
+ ilo = 1
+ ihi = bb(1)
+ psdiff = bpsmax(1) - bpsmin(1)
+ psdiffsq = psdiff**2
+ do 40 j = al0,k
+ rj = mncwt(j)
+ bsslim = psdiffsq/(rj*(rn-rj))
+ if (bsslim .lt. bssmax) go to 50
+ sxmx = 0.0d0
+ do 30 i = ilo,ihi-j
+ ipj = i+j
+ rj = cwts(ipj) - cwts(i)
+ bssij = (sx(ipj) - sx(i))**2/(rj*(rn-rj))
+ if (bssij .gt. bssmax) bssmax = bssij
+ 30 continue
+ 40 continue
+
+c now the minor arcs spanning the end (n)
+ 50 psdiff = max(abs(bpsmax(1)-bpsmin(nb)), abs(bpsmax(nb)-bpsmin(1)))
+ psdiffsq = psdiff**2
+ do 70 j = al0,k
+ rj = mncwt(j)
+ bsslim = psdiffsq/(rj*(rn-rj))
+ if (bsslim .lt. bssmax) go to 100
+ nmj = n-j
+ do 60 i = 1,j
+ ipnmj = i + nmj
+ rj = cwts(ipnmj) - cwts(i)
+ bssij = (sx(ipnmj) - sx(i))**2/(rj*(rn-rj))
+ if (bssij .gt. bssmax) bssmax = bssij
+ 60 continue
+ 70 continue
+
+c now the other blocks
+ 100 do 200 l = 2,nb
+ ilo = bb(l-1)+1
+ ihi = bb(l)
+ psdiff = bpsmax(l) - bpsmin(l)
+ psdiffsq = psdiff**2
+ do 140 j = al0,k
+ rj = mncwt(j)
+ bsslim = psdiffsq/(rj*(rn-rj))
+ if (bsslim .lt. bssmax) go to 150
+ sxmx = 0.0d0
+ do 130 i = ilo,ihi-j
+ ipj = i+j
+ rj = cwts(ipj) - cwts(i)
+ bssij = (sx(ipj) - sx(i))**2/(rj*(rn-rj))
+ if (bssij .gt. bssmax) bssmax = bssij
+ 130 continue
+ 140 continue
+ 150 psdiff = max(abs(bpsmax(l)-bpsmin(l-1)),
+ 1 abs(bpsmax(l-1)-bpsmin(l)))
+ psdiffsq = psdiff**2
+ do 170 j = al0,k
+ rj = mncwt(j)
+ bsslim = psdiffsq/(rj*(rn-rj))
+ if (bsslim .lt. bssmax) go to 200
+ do 160 i = ilo-j,ilo-1
+ ipj = i+j
+ rj = cwts(ipj) - cwts(i)
+ bssij = (sx(ipj) - sx(i))**2/(rj*(rn-rj))
+ if (bssij .gt. bssmax) bssmax = bssij
+ 160 continue
+ 170 continue
+ 200 continue
+
+c call dblepr("bss max", 7, bssmax, 1)
+
+ if (tss .le. bssmax+0.0001d0) tss = bssmax + 1.0d0
+ hwtmaxp = bssmax/((tss-bssmax)/(dfloat(n)-2.0d0))
+
+c deallocate memory
+ deallocate(bpsmax, bpsmin, bb)
+
+ return
+ end
+
+c the new statistic routine doesn't compute mncwt
+ subroutine getmncwt(n, cwts, k, mncwt, delta)
+ integer n, k
+ double precision cwts(n), mncwt(k), delta
+
+ integer i, j, nmj
+ double precision rj, rn
+
+ rn = cwts(n)
+ do 30 j = 1,k
+ mncwt(j) = cwts(j)
+ nmj = n-j
+ do 10 i = 1,nmj
+ rj = cwts(i+j) - cwts(i)
+ mncwt(j) = min(mncwt(j), rj)
+ 10 continue
+ do 20 i = 1, j
+ rj = cwts(i+nmj) - cwts(i)
+ mncwt(j) = min(mncwt(j), rn-rj)
+ 20 continue
+ 30 continue
+
+ j = k+1
+ nmj = n-j
+ delta = cwts(j)
+ do 40 i = 1,nmj
+ rj = cwts(i+j) - cwts(i)
+ delta = min(delta, rj)
+ 40 continue
+ do 50 i = 1, j
+ rj = cwts(i+nmj) - cwts(i)
+ delta = min(delta, rn-rj)
+ 50 continue
+
+ delta = delta/cwts(n)
+
+ return
+ end
diff --git a/src/cbststats.f b/src/cbststats.f
new file mode 100644
index 0000000..5a3d806
--- /dev/null
+++ b/src/cbststats.f
@@ -0,0 +1,796 @@
+c new approach to maximizing t-statistic
+c dynamic memory allocation using allocatable arrays
+ subroutine tmaxo(n,x,tss,sx,iseg,ostat,al0,ibin)
+ integer n,iseg(2),al0
+ double precision x(n),tss,sx(n),ostat
+ logical ibin
+c
+c look at the partial sums in blocks of size sqrt(n)
+c
+ integer ipsmin, ipsmax, ipsmin0, ipsmax0, nb, i, j, k, l, nb1,
+ 1 nb2, bi, bj, ilo, ihi, jlo, jhi, alenmax, i2j, sxmxi,
+ 2 alenlo, alenhi, tmaxi, tmaxj, ixlo, ixhi, nal0
+ double precision psum, psmin, psmax, psmin0, psmax0, bssmax,
+ 1 bsslim, rn, rj, rjhi, rjlo, rnjov1, sij1, sij2, sijmx0,
+ 2 absx, sxmx, bijbss, rnov2, psdiff
+c
+c use local arrays for working within blocks
+c block partial sum max and min
+ double precision, allocatable :: bpsmax(:), bpsmin(:)
+c location of the max and min
+ integer, allocatable :: bb(:), ibmin(:), ibmax(:)
+
+c t statistic corresponding to max for block i,j (and max possible)
+ double precision, allocatable :: bssbij(:), bssijmax(:)
+c row, column and order vector for reordering bssbij
+ integer, allocatable :: bloci(:), blocj(:), loc(:), alen(:)
+
+c calculate number of blocks (nb) and block boundaries (vector bb)
+ rn = dfloat(n)
+ if (n .ge. 50) then
+ nb = nint(sqrt(dfloat(n)))
+ else
+ nb = 1
+ endif
+
+c the number of paiwise block comparison
+ nb2 = nb*(nb+1)/2
+c allocate memory
+ allocate(bpsmax(nb), bpsmin(nb))
+ allocate(bb(nb), ibmin(nb), ibmax(nb))
+ allocate(bssbij(nb2), bssijmax(nb2))
+ allocate(bloci(nb2), blocj(nb2), loc(nb2), alen(nb2))
+
+c block boundaries
+ do 110 i = 1, nb
+ bb(i) = nint(rn*(dfloat(i)/dfloat(nb)))
+ 110 continue
+
+c find the max, min of partial sums and their locations within blocks
+ ilo = 1
+ psum = 0
+ psmin0 = 0
+ psmax0 = 0
+ ipsmin0 = n
+ ipsmax0 = n
+ do 20 j = 1, nb
+ sx(ilo) = psum + x(ilo)
+ psmin = sx(ilo)
+ ipsmin = ilo
+ psmax = sx(ilo)
+ ipsmax = ilo
+ do 10 i = ilo+1, bb(j)
+ sx(i) = sx(i-1) + x(i)
+ if (sx(i) .lt. psmin) then
+ psmin = sx(i)
+ ipsmin = i
+ endif
+ if (sx(i) .gt. psmax) then
+ psmax = sx(i)
+ ipsmax = i
+ endif
+ 10 continue
+c store the block min, max and locations
+ ibmin(j) = ipsmin
+ ibmax(j) = ipsmax
+ bpsmin(j) = psmin
+ bpsmax(j) = psmax
+c adjust global min, max and locations
+ if (psmin .lt. psmin0) then
+ psmin0 = psmin
+ ipsmin0 = ipsmin
+ endif
+ if (psmax .gt. psmax0) then
+ psmax0 = psmax
+ ipsmax0 = ipsmax
+ endif
+c reset ilo to be the block boundary + 1
+ psum = sx(bb(j))
+ ilo = bb(j) + 1
+ 20 continue
+
+c calculate bss for max s_i - min s_i
+ psdiff = psmax0 - psmin0
+ rj = dfloat(abs(ipsmax0 - ipsmin0))
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssmax = rnjov1*(psdiff-0.5)**2
+ else
+ bssmax = rnjov1*psdiff**2
+ endif
+ tmaxi = min(ipsmax0, ipsmin0)
+ tmaxj = max(ipsmax0, ipsmin0)
+
+c if the segment is all constant then psdiff = 0 and so bssmax = 0
+ if (psdiff .le. 0) then
+ bssmax = 0
+ go to 120
+ endif
+
+c for a pair of blocks (i,j) calculate the max absolute t-statistic
+c at the (min_i, max_j) and (max_i, min_j) locations
+c for other indices the t-statistic can be bounded using this
+c
+c if a block doesn't have the potential to exceed bssmax ignore it
+c calculate the bsslim for each block and include ones >= bssmax
+
+ rnov2 = rn/2
+ l = 0
+ nal0 = n - al0
+ do 40 i = 1, nb
+ do 30 j = i, nb
+c calculate bsslim
+ if (i .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(i-1) + 1
+ endif
+ ihi = bb(i)
+ if (j .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(j-1) + 1
+ endif
+ jhi = bb(j)
+ alenhi = jhi - ilo
+ if (alenhi .gt. nal0) alenhi = nal0
+ rjhi = dfloat(alenhi)
+ if (i .eq. j) then
+ alenlo = 1
+ else
+ alenlo = jlo - ihi
+ endif
+ if (alenlo .lt. al0) alenlo = al0
+c max S_k over block j - min S_k over block i
+ sij1 = abs(bpsmax(j) - bpsmin(i))
+c max S_k over block i - min S_k over block j
+ sij2 = abs(bpsmax(i) - bpsmin(j))
+c if i = j then sij1 and sij2 are the same
+ sijmx0 = max(sij1, sij2)
+ rjlo = dfloat(alenlo)
+ rnjov1 = rn/min(rjlo*(rn-rjlo), rjhi*(rn-rjhi))
+ if (ibin) then
+ bsslim = rnjov1*(sijmx0-0.5)**2
+ else
+ bsslim = rnjov1*(sijmx0**2)
+ endif
+c if its as large as bssmax add block
+ if (bssmax .le. bsslim) then
+ l = l+1
+ loc(l) = l
+ bloci(l) = i
+ blocj(l) = j
+ bssijmax(l) = bsslim
+c max sij in the (i,j) block, t-statistic etc
+ if (sij1 .gt. sij2) then
+ alen(l) = abs(ibmax(j) - ibmin(i))
+ rj = dfloat(alen(l))
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssbij(l) = rnjov1*(sij1-0.5)**2
+ else
+ bssbij(l) = rnjov1*(sij1**2)
+ endif
+ else
+ alen(l) = abs(ibmin(j) - ibmax(i))
+ rj = dfloat(alen(l))
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssbij(l) = rnjov1*(sij2-0.5)**2
+ else
+ bssbij(l) = rnjov1*(sij2**2)
+ endif
+ endif
+ endif
+ 30 continue
+ 40 continue
+ nb1 = l
+
+c Now sort the t-statistics by their magnitude
+ call qsort4(bssbij, loc, 1, nb1)
+
+c now go through the blocks in reverse order (largest down)
+ do 100 l = nb1, 1, -1
+ k = loc(l)
+c need to check a block only if it has potential to increase bss
+c rjlo is the smalllest (j-i) in the block and rjhi is the largest
+ bsslim = bssijmax(k)
+ if (bssmax .le. bsslim) then
+c bi, bj give the block location
+ bi = bloci(k)
+ bj = blocj(k)
+c max arc length of interest in block
+ alenmax = alen(k)
+ if (bi .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(bi-1) + 1
+ endif
+ ihi = bb(bi)
+ if (bj .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(bj-1) + 1
+ endif
+ jhi = bb(bj)
+ alenhi = jhi - ilo
+ if (alenhi .gt. nal0) alenhi = nal0
+ rjhi = dfloat(alenhi)
+ if (bi .eq. bj) then
+ alenlo = 1
+ else
+ alenlo = jlo - ihi
+ endif
+ if (alenlo .lt. al0) alenlo = al0
+ rjlo = dfloat(alenlo)
+c
+c if arc length is larger than n/2 make is n - arc length
+c
+ if (alenmax .gt. n - alenmax) alenmax = n - alenmax
+c
+c if alenlo <= n/2 start from (ihi, jlo) and go up
+c if alenhi >= n/2 start from (ilo, jhi) and go down
+c
+ if ((rjlo .le. rnov2) .and. (alenlo .le. alenmax)) then
+ do 60 i2j = alenlo, alenmax
+c excess calcultaions to set range of i
+ ixlo = max(0, jlo - ilo - i2j)
+ ixhi = max(0, ihi + i2j - jhi)
+ sxmx = 0
+ do 55 i = ilo + ixlo, ihi - ixhi
+ j = i+i2j
+ absx = abs(sx(j) - sx(i))
+ if (sxmx .lt. absx) then
+ sxmx = absx
+ sxmxi = i
+ endif
+ 55 continue
+ rj = dfloat(i2j)
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bijbss = rnjov1*(sxmx-0.5)**2
+ else
+ bijbss = rnjov1*(sxmx**2)
+ endif
+ if (bijbss .gt. bssmax) then
+ bssmax = bijbss
+ tmaxi = sxmxi
+ tmaxj = sxmxi + i2j
+ endif
+ 60 continue
+ endif
+c
+c make arclength n - arc length
+c
+ alenmax = n - alenmax
+ if ((rjhi .ge. rnov2) .and. (alenhi .ge. alenmax)) then
+ do 70 i2j = alenhi, alenmax, -1
+c excess calcultaions to set range of i
+ ixlo = max(0, jlo - ilo - i2j)
+ ixhi = max(0, ihi + i2j - jhi)
+ sxmx = 0
+ do 65 i = ilo + ixlo, ihi - ixhi
+ j = i + i2j
+ absx = abs(sx(j) - sx(i))
+ if (sxmx .lt. absx) then
+ sxmx = absx
+ sxmxi = i
+ endif
+ 65 continue
+ rj = dfloat(i2j)
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bijbss = rnjov1*(sxmx-0.5)**2
+ else
+ bijbss = rnjov1*(sxmx**2)
+ endif
+ if (bijbss .gt. bssmax) then
+ bssmax = bijbss
+ tmaxi = sxmxi
+ tmaxj = sxmxi + i2j
+ endif
+ 70 continue
+ endif
+ endif
+ 100 continue
+
+ 120 if (ibin) then
+ if (tss.le.0.0001) tss = 1.0
+ bssmax = bssmax/(tss/rn)
+ else
+ if (tss.le.bssmax+0.0001) tss = bssmax + 1.0
+ bssmax = bssmax/((tss-bssmax)/(rn-2.0))
+ endif
+
+c deallocate memory
+ deallocate(bpsmax, bpsmin, bb, ibmin, ibmax)
+ deallocate(bssbij, bssijmax, bloci, blocj, loc, alen)
+
+ ostat = bssmax
+ iseg(1) = tmaxi
+ iseg(2) = tmaxj
+
+ return
+ end
+
+c function for calculating the full max t-statistic on permuted data
+c new approach to maximizing t-statistic using allocatable arrays
+ double precision function tmaxp(n,tss,px,sx,al0,ibin)
+ integer n,al0
+ double precision tss,px(n),sx(n)
+ logical ibin
+c
+c look at the partial sums in blocks of size sqrt(n)
+c
+ integer ipsmin, ipsmax, ipsmin0, ipsmax0, nb, i, j, k, l, nb1,
+ 1 nb2, bi, bj, ilo, ihi, jlo, jhi, alenmax, i2j, alenlo,
+ 2 alenhi, ixlo, ixhi, nal0
+ double precision psum, psmin, psmax, psmin0, psmax0, bssmax,
+ 1 bsslim, rn, rj, rjhi, rjlo, rnjov1, sij1, sij2, sijmx0,
+ 2 absx, sxmx, bijbss, rnov2, psdiff
+c
+c use local arrays for working within blocks
+c block partial sum max and min
+ double precision, allocatable :: bpsmax(:), bpsmin(:)
+c location of the max and min
+ integer, allocatable :: bb(:), ibmin(:), ibmax(:)
+
+c t statistic corresponding to max for block i,j (and max possible)
+ double precision, allocatable :: bssbij(:), bssijmax(:)
+c row, column and order vector for reordering bssbij
+ integer, allocatable :: bloci(:), blocj(:), loc(:), alen(:)
+
+c calculate number of blocks (nb) and block boundaries (vector bb)
+ rn = dfloat(n)
+ if (n .ge. 50) then
+ nb = nint(sqrt(dfloat(n)))
+ else
+ nb = 1
+ endif
+
+c the number of paiwise block comparison
+ nb2 = nb*(nb+1)/2
+c allocate memory
+ allocate(bpsmax(nb), bpsmin(nb))
+ allocate(bb(nb), ibmin(nb), ibmax(nb))
+ allocate(bssbij(nb2), bssijmax(nb2))
+ allocate(bloci(nb2), blocj(nb2), loc(nb2), alen(nb2))
+
+c block boundaries
+ do 110 i = 1, nb
+ bb(i) = nint(rn*(dfloat(i)/dfloat(nb)))
+ 110 continue
+
+c find the max, min of partial sums and their locations within blocks
+ ilo = 1
+ psum = 0
+ psmin0 = 0
+ psmax0 = 0
+ ipsmin0 = n
+ ipsmax0 = n
+ do 20 j = 1, nb
+ sx(ilo) = psum + px(ilo)
+ psmin = sx(ilo)
+ ipsmin = ilo
+ psmax = sx(ilo)
+ ipsmax = ilo
+ do 10 i = ilo+1, bb(j)
+ sx(i) = sx(i-1) + px(i)
+ if (sx(i) .lt. psmin) then
+ psmin = sx(i)
+ ipsmin = i
+ endif
+ if (sx(i) .gt. psmax) then
+ psmax = sx(i)
+ ipsmax = i
+ endif
+ 10 continue
+c store the block min, max and locations
+ ibmin(j) = ipsmin
+ ibmax(j) = ipsmax
+ bpsmin(j) = psmin
+ bpsmax(j) = psmax
+c adjust global min, max and locations
+ if (psmin .lt. psmin0) then
+ psmin0 = psmin
+ ipsmin0 = ipsmin
+ endif
+ if (psmax .gt. psmax0) then
+ psmax0 = psmax
+ ipsmax0 = ipsmax
+ endif
+c reset ilo to be the block boundary + 1
+ psum = sx(bb(j))
+ ilo = bb(j) + 1
+ 20 continue
+
+c calculate bss for max s_i - min s_i
+ psdiff = psmax0 - psmin0
+ rj = dfloat(abs(ipsmax0 - ipsmin0))
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssmax = rnjov1*(psdiff-0.5)**2
+ else
+ bssmax = rnjov1*psdiff**2
+ endif
+
+c for a pair of blocks (i,j) calculate the max absolute t-statistic
+c at the (min_i, max_j) and (max_i, min_j) locations
+c for other indices the t-statistic can be bounded using this
+c
+c if a block doesn't have the potential to exceed bssmax ignore it
+c calculate the bsslim for each block and include ones >= bssmax
+
+ rnov2 = rn/2
+ l = 0
+ nal0 = n - al0
+ do 40 i = 1, nb
+ do 30 j = i, nb
+c calculate bsslim
+ if (i .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(i-1) + 1
+ endif
+ ihi = bb(i)
+ if (j .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(j-1) + 1
+ endif
+ jhi = bb(j)
+ alenhi = jhi - ilo
+ if (alenhi .gt. nal0) alenhi = nal0
+ rjhi = dfloat(alenhi)
+ if (i .eq. j) then
+ alenlo = 1
+ else
+ alenlo = jlo - ihi
+ endif
+ if (alenlo .lt. al0) alenlo = al0
+c max S_k over block j - min S_k over block i
+ sij1 = abs(bpsmax(j) - bpsmin(i))
+c max S_k over block i - min S_k over block j
+ sij2 = abs(bpsmax(i) - bpsmin(j))
+c if i = j then sij1 and sij2 are the same
+ sijmx0 = max(sij1, sij2)
+ rjlo = dfloat(alenlo)
+ rnjov1 = rn/min(rjlo*(rn-rjlo), rjhi*(rn-rjhi))
+ if (ibin) then
+ bsslim = rnjov1*(sijmx0-0.5)**2
+ else
+ bsslim = rnjov1*(sijmx0**2)
+ endif
+c if its as large as bssmax add block
+ if (bssmax .le. bsslim) then
+ l = l+1
+ loc(l) = l
+ bloci(l) = i
+ blocj(l) = j
+ bssijmax(l) = bsslim
+c max sij in the (i,j) block, t-statistic etc
+ if (sij1 .gt. sij2) then
+ alen(l) = abs(ibmax(j) - ibmin(i))
+ rj = dfloat(alen(l))
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssbij(l) = rnjov1*(sij1-0.5)**2
+ else
+ bssbij(l) = rnjov1*(sij1**2)
+ endif
+ else
+ alen(l) = abs(ibmin(j) - ibmax(i))
+ rj = dfloat(alen(l))
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssbij(l) = rnjov1*(sij2-0.5)**2
+ else
+ bssbij(l) = rnjov1*(sij2**2)
+ endif
+ endif
+ endif
+ 30 continue
+ 40 continue
+ nb1 = l
+
+c Now sort the t-statistics by their magnitude
+ call qsort4(bssbij, loc, 1, nb1)
+
+c now go through the blocks in reverse order (largest down)
+ do 100 l = nb1, 1, -1
+ k = loc(l)
+c need to check a block only if it has potential to increase bss
+c rjlo is the smalllest (j-i) in the block and rjhi is the largest
+ bsslim = bssijmax(k)
+ if (bssmax .le. bsslim) then
+c bi, bj give the block location
+ bi = bloci(k)
+ bj = blocj(k)
+c max arc length of interest in block
+ alenmax = alen(k)
+ if (bi .eq. 1) then
+ ilo = 1
+ else
+ ilo = bb(bi-1) + 1
+ endif
+ ihi = bb(bi)
+ if (bj .eq. 1) then
+ jlo = 1
+ else
+ jlo = bb(bj-1) + 1
+ endif
+ jhi = bb(bj)
+ alenhi = jhi - ilo
+ if (alenhi .gt. nal0) alenhi = nal0
+ rjhi = dfloat(alenhi)
+ if (bi .eq. bj) then
+ alenlo = 1
+ else
+ alenlo = jlo - ihi
+ endif
+ if (alenlo .lt. al0) alenlo = al0
+ rjlo = dfloat(alenlo)
+c
+c if arc length is larger than n/2 make is n - arc length
+c
+ if (alenmax .gt. n - alenmax) alenmax = n - alenmax
+c
+c if alenlo <= n/2 start from (ihi, jlo) and go up
+c if alenhi >= n/2 start from (ilo, jhi) and go down
+c
+ if ((rjlo .le. rnov2) .and. (alenlo .le. alenmax)) then
+ do 60 i2j = alenlo, alenmax
+c excess calcultaions to set range of i
+ ixlo = max(0, jlo - ilo - i2j)
+ ixhi = max(0, ihi + i2j - jhi)
+ sxmx = 0
+ do 55 i = ilo + ixlo, ihi - ixhi
+ j = i+i2j
+ absx = abs(sx(j) - sx(i))
+ if (sxmx .lt. absx) sxmx = absx
+ 55 continue
+ rj = dfloat(i2j)
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bijbss = rnjov1*(sxmx-0.5)**2
+ else
+ bijbss = rnjov1*(sxmx**2)
+ endif
+ if (bijbss .gt. bssmax) bssmax = bijbss
+ 60 continue
+ endif
+c
+c make arclength n - arc length
+c
+ alenmax = n - alenmax
+ if ((rjhi .ge. rnov2) .and. (alenhi .ge. alenmax)) then
+ do 70 i2j = alenhi, alenmax, -1
+c excess calcultaions to set range of i
+ ixlo = max(0, jlo - ilo - i2j)
+ ixhi = max(0, ihi + i2j - jhi)
+ sxmx = 0
+ do 65 i = ilo + ixlo, ihi - ixhi
+ j = i + i2j
+ absx = abs(sx(j) - sx(i))
+ if (sxmx .lt. absx) sxmx = absx
+ 65 continue
+ rj = dfloat(i2j)
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bijbss = rnjov1*(sxmx-0.5)**2
+ else
+ bijbss = rnjov1*(sxmx**2)
+ endif
+ if (bijbss .gt. bssmax) bssmax = bijbss
+ 70 continue
+ endif
+ endif
+ 100 continue
+
+ if (ibin) then
+ if (tss.le.0.0001) tss = 1.0
+ tmaxp = bssmax/(tss/rn)
+ else
+ if (tss.le.bssmax+0.0001) tss = bssmax + 1.0
+ tmaxp = bssmax/((tss-bssmax)/(rn-2.0))
+ endif
+
+c deallocate memory
+ deallocate(bpsmax, bpsmin, bb, ibmin, ibmax)
+ deallocate(bssbij, bssijmax, bloci, blocj, loc, alen)
+
+ return
+ end
+
+c function for the max (over small arcs) t-statistic on permuted data
+c new code to speed up this part 3/31/2010
+ double precision function htmaxp(n,k,tss,px,sx,al0,ibin)
+ integer n,k,al0
+ double precision tss,px(n),sx(n)
+ logical ibin
+
+ integer i, j, nmj
+ double precision rn, rj, absx, sxmx, bssmx, psmin, psmax, psdiff,
+ 1 bsslim, rnjov1
+
+c create blocks of size k (or k+1) to span 1 thru n
+c block partial sum max and min
+ double precision, allocatable :: bpsmax(:), bpsmin(:)
+c location of the max and min
+ integer, allocatable :: bb(:)
+c variables to work on block specific data
+ integer nb, ilo, ihi, l
+ double precision psum, psdiffsq
+
+ rn = dfloat(n)
+c number of blocks of size k (plus fraction since n/k may not be integer)
+ nb = int(rn/dfloat(k))
+c allocate memory
+ allocate(bpsmax(nb), bpsmin(nb))
+ allocate(bb(nb))
+c block boundaries
+ do 110 i = 1, nb
+ bb(i) = nint(rn*(dfloat(i)/dfloat(nb)))
+ 110 continue
+
+c don't need global min and max
+c find the max, min of partial sums and their locations within blocks
+ ilo = 1
+ psum = 0
+ htmaxp = 0.0d0
+ do 20 j = 1, nb
+ sx(ilo) = psum + px(ilo)
+ psmin = sx(ilo)
+ ipsmin = ilo
+ psmax = sx(ilo)
+ ipsmax = ilo
+ do 10 i = ilo+1, bb(j)
+ sx(i) = sx(i-1) + px(i)
+ if (sx(i) .lt. psmin) then
+ psmin = sx(i)
+ ipsmin = i
+ endif
+ if (sx(i) .gt. psmax) then
+ psmax = sx(i)
+ ipsmax = i
+ endif
+ 10 continue
+c store the block min, max and locations
+ bpsmin(j) = psmin
+ bpsmax(j) = psmax
+c reset ilo to be the block boundary + 1
+ psum = sx(bb(j))
+ ilo = bb(j) + 1
+c calculate the bss at the block max & min pr
+ i = abs(ipsmin - ipsmax)
+ if ((i .le. k) .and. (i .ge. al0)) then
+ rj = dfloat(i)
+ rnjov1 = rn/(rj*(rn-rj))
+ if (ibin) then
+ bssmx = rnjov1*(bpsmax(j) - bpsmin(j) -0.5)**2
+ else
+ bssmx = rnjov1*(bpsmax(j) - bpsmin(j))**2
+ endif
+ if (htmaxp .lt. bssmx) htmaxp = bssmx
+ endif
+ 20 continue
+
+c check the first block
+ ilo = 1
+ ihi = bb(1)
+ psdiff = bpsmax(1) - bpsmin(1)
+ if (ibin) then
+ psdiffsq = (psdiff-0.5)**2
+ else
+ psdiffsq = psdiff**2
+ endif
+ do 40 j = al0,k
+ rj = dfloat(j)
+ rnjov1 = rn/(rj*(rn-rj))
+ bsslim = rnjov1*psdiffsq
+ if (bsslim .lt. htmaxp) go to 50
+ sxmx = 0.0d0
+ do 30 i = ilo,ihi-j
+ absx = abs(sx(i+j) - sx(i))
+ if (sxmx.lt.absx) sxmx = absx
+ 30 continue
+ if (ibin) then
+ bssmx = rnjov1*(abs(sxmx)-0.5)**2
+ else
+ bssmx = rnjov1*sxmx**2
+ endif
+ if (htmaxp.lt.bssmx) htmaxp = bssmx
+ 40 continue
+
+c now the minor arcs spanning the end (n)
+ 50 psdiff = max(abs(bpsmax(1)-bpsmin(nb)), abs(bpsmax(nb)-bpsmin(1)))
+ if (ibin) then
+ psdiffsq = (psdiff-0.5)**2
+ else
+ psdiffsq = psdiff**2
+ endif
+ do 70 j = al0,k
+ rj = dfloat(j)
+ rnjov1 = rn/(rj*(rn-rj))
+ bsslim = rnjov1*psdiffsq
+ if (bsslim .lt. htmaxp) go to 100
+ sxmx = 0.0d0
+ nmj = n-j
+ do 60 i = 1,j
+ absx = abs(sx(i+nmj) - sx(i))
+ if (sxmx.lt.absx) sxmx = absx
+ 60 continue
+ if (ibin) then
+ bssmx = rnjov1*(abs(sxmx)-0.5)**2
+ else
+ bssmx = rnjov1*sxmx**2
+ endif
+ if (htmaxp.lt.bssmx) htmaxp = bssmx
+ 70 continue
+
+c now the other blocks
+ 100 do 200 l = 2,nb
+ ilo = bb(l-1)+1
+ ihi = bb(l)
+ psdiff = bpsmax(l) - bpsmin(l)
+ if (ibin) then
+ psdiffsq = (psdiff-0.5)**2
+ else
+ psdiffsq = psdiff**2
+ endif
+ do 140 j = al0,k
+ rj = dfloat(j)
+ rnjov1 = rn/(rj*(rn-rj))
+ bsslim = rnjov1*psdiffsq
+ if (bsslim .lt. htmaxp) go to 150
+ sxmx = 0.0d0
+ do 130 i = ilo,ihi-j
+ absx = abs(sx(i+j) - sx(i))
+ if (sxmx.lt.absx) sxmx = absx
+ 130 continue
+ if (ibin) then
+ bssmx = rnjov1*(abs(sxmx)-0.5)**2
+ else
+ bssmx = rnjov1*sxmx**2
+ endif
+ if (htmaxp.lt.bssmx) htmaxp = bssmx
+ 140 continue
+ 150 psdiff = max(abs(bpsmax(l)-bpsmin(l-1)),
+ 1 abs(bpsmax(l-1)-bpsmin(l)))
+ if (ibin) then
+ psdiffsq = (psdiff-0.5)**2
+ else
+ psdiffsq = psdiff**2
+ endif
+ do 170 j = al0,k
+ rj = dfloat(j)
+ rnjov1 = rn/(rj*(rn-rj))
+ bsslim = rnjov1*psdiffsq
+ if (bsslim .lt. htmaxp) go to 200
+ sxmx = 0.0d0
+ nmj = n-j
+ do 160 i = ilo-j,ilo-1
+ absx = abs(sx(i+j) - sx(i))
+ if (sxmx.lt.absx) sxmx = absx
+ 160 continue
+ if (ibin) then
+ bssmx = rnjov1*(abs(sxmx)-0.5)**2
+ else
+ bssmx = rnjov1*sxmx**2
+ endif
+ if (htmaxp.lt.bssmx) htmaxp = bssmx
+ 170 continue
+ 200 continue
+ if (ibin) then
+ if (tss .le. 0.0001d0) tss = 1.0d0
+ htmaxp = htmaxp/(tss/rn)
+ else
+ if (tss .le. htmaxp+0.0001d0) tss = htmaxp + 1.0d0
+ htmaxp = htmaxp/((tss-htmaxp)/(rn-2.0d0))
+ endif
+
+c deallocate memory
+ deallocate(bpsmax, bpsmin, bb)
+
+ return
+ end
diff --git a/src/changepoints-wtd.f b/src/changepoints-wtd.f
new file mode 100644
index 0000000..42a1c1f
--- /dev/null
+++ b/src/changepoints-wtd.f
@@ -0,0 +1,203 @@
+c Ternary segmentation with permutation reference distribution
+c probes have weights due to differences in variances
+
+ subroutine wfindcpt(n,x,tss,wts,rwts,cwts,px,sx,nperm,cpval,ncpt,
+ 1 icpt,hybrid,al0,hk,mncwt,delta,ngrid,sbn,sbdry,tol)
+ integer n,nperm,ncpt,icpt(2),al0,hk,ngrid,sbn,sbdry(sbn)
+ logical hybrid
+ double precision x(n),tss,wts(n),rwts(n),cwts(n),px(n),sx(n),
+ 1 cpval,mncwt(hk),delta,tol
+
+ integer np,nrej,nrejc,iseg(2),n1,n2,n12,l,k
+ double precision ostat,ostat1,pstat,tpval,pval1,pval2
+
+c new functions to replace tmax and htmax (also tmaxo replaces tmax1)
+ double precision tailp, wtmaxp, hwtmaxp, wtpermp
+ external tailp, wtmaxp, hwtmaxp, wtpermp
+
+ call rndstart()
+
+ nrej = 0
+ ncpt = 0
+
+c call the observed statistic routine
+ call wtmaxo(n,x,wts,tss,sx,cwts,iseg,ostat,al0)
+ ostat1 = sqrt(ostat)
+ ostat = ostat * 0.99999
+
+c if maximal t-statistic is too small (for now use 0.1) don't split
+ if (ostat1 .le. 0.1) go to 500
+c if maximal t-statistic is too large (for now use 7.0) split
+c also make sure it's not affected by outliers i.e. small seglength
+ l = min(iseg(2) - iseg(1), n - iseg(2) + iseg(1))
+ if ((ostat1 .ge. 7.0) .and. (l .ge. 10)) go to 200
+c o.w calculate p-value and decide if & how data are segmented
+ if (hybrid) then
+ call getmncwt(n, cwts, hk, mncwt, delta)
+c delta is a function of arc lengths
+ pval1 = tailp(ostat1, delta, n, ngrid, tol)
+ if (pval1 .gt. cpval) go to 500
+ pval2 = cpval - pval1
+ nrejc = int(pval2*dfloat(nperm))
+ k=nrejc*(nrejc+1)/2 + 1
+ do 50 np = 1,nperm
+c call permutation code for data with weights
+ call wxperm(n,x,px,rwts)
+c call the small arc permutation statistic function
+ pstat = hwtmaxp(n,hk,px,wts,sx,cwts,mncwt,al0)
+ if (ostat.le.pstat) then
+ nrej = nrej + 1
+ k = k + 1
+ endif
+ if (nrej.gt.nrejc) go to 500
+ if (np .ge. sbdry(k)) go to 200
+ 50 continue
+ else
+ nrejc = int(cpval*dfloat(nperm))
+ k=nrejc*(nrejc+1)/2 + 1
+ do 100 np = 1,nperm
+c call permutation code for data with weights
+ call wxperm(n,x,px,rwts)
+c call full data permutation statistic function
+ pstat = wtmaxp(n,px,wts,sx,cwts,al0)
+ if (ostat.le.pstat) then
+ nrej = nrej + 1
+ k = k + 1
+ endif
+ if (nrej.gt.nrejc) go to 500
+ if (np .ge. sbdry(k)) go to 200
+ 100 continue
+ endif
+ 200 if (iseg(2).eq.n) then
+ ncpt = 1
+ icpt(1) = iseg(1)
+ else
+ if(iseg(1).eq.0) then
+ ncpt = 1
+ icpt(1) = iseg(2)
+ else
+ l = 1
+ n1 = iseg(1)
+ n12 = iseg(2)
+ n2 = n12 - n1
+ tpval = wtpermp(n1,n2,n12,x(l),px,wts(l),rwts(l),nperm)
+ if (tpval.le.cpval) then
+ ncpt = 1
+ icpt(1) = iseg(1)
+ endif
+ l = iseg(1) + 1
+ n12 = n - iseg(1)
+ n2 = n - iseg(2)
+ n1 = n12 - n2
+ tpval = wtpermp(n1,n2,n12,x(l),px,wts(l),rwts(l),nperm)
+ if (tpval.le.cpval) then
+ ncpt = ncpt + 1
+ icpt(ncpt) = iseg(2)
+ endif
+ endif
+ endif
+
+ 500 call rndend()
+
+ return
+ end
+
+c ******* code to permute the data vector with weights ********
+c since variance of probe i is inversely proportional to weights
+c multiply by square root, permute and then divide by square root
+ subroutine wxperm(n,x,px,rwts)
+ integer n
+ double precision x(n),px(n),rwts(n)
+
+ integer i,j
+ double precision cc,tmpx
+
+ double precision dunif
+ external dunif
+
+ do 10 i = 1,n
+ px(i) = x(i)*rwts(i)
+ 10 continue
+
+ do 20 i = n,1,-1
+ cc = dunif()
+ j = int(cc*dfloat(i))+1
+ tmpx = px(i)
+ px(i) = px(j)/rwts(i)
+ px(j) = tmpx
+ 20 continue
+
+ return
+ end
+
+c function for the p-value of t-statistics for removing edge effects
+ double precision function wtpermp(n1,n2,n,x,px,wts,rwts,nperm)
+ integer n1,n2,n,nperm
+ double precision x(n),px(n),wts(n),rwts(n)
+
+ integer np,i,m1,j,nrej
+ double precision xsum1,xsum2,xbar,ostat,pstat,rn1,rn2,rm1,
+ 1 tstat, tss, rn, cc, tmpx
+
+ double precision dunif
+ external dunif
+
+ if (n1.eq.1 .or. n2.eq.1) then
+ nrej = nperm
+ go to 110
+ endif
+ xsum1 = 0.0
+ tss = 0.0
+ rn1 = 0.0
+ do 10 i=1,n1
+ px(i) = x(i)*rwts(i)
+ xsum1 = xsum1 + wts(i)*x(i)
+ tss = tss + wts(i)*x(i)**2
+ rn1 = rn1 + wts(i)
+ 10 continue
+ xsum2 = 0.0
+ rn2 = 0.0
+ do 20 i=n1+1,n
+ px(i) = x(i)
+ xsum2 = xsum2 + wts(i)*x(i)
+ tss = tss + wts(i)*x(i)**2
+ rn2 = rn2 + wts(i)
+ 20 continue
+ rn = rn1 + rn2
+ xbar = (xsum1 + xsum2)/rn
+ tss = tss - rn*(xbar**2)
+ if (n1.le.n2) then
+ m1 = n1
+ rm1 = rn1
+ ostat = 0.99999*abs(xsum1/rn1 - xbar)
+ tstat = (ostat**2)*rn1*rn/rn2
+ else
+ m1 = n2
+ rm1 = rn2
+ ostat = 0.99999*abs(xsum2/rn2 - xbar)
+ tstat = (ostat**2)*rn2*rn/rn1
+ endif
+ nrej = 0
+ tstat = tstat/((tss-tstat)/(dfloat(n)-2.0))
+c if observed t is large (> 5) don't bother with permutation p-value
+c also make sure there are enough observations i.e. m1 >= 10
+ if ((tstat .gt. 25) .and. (m1 .ge. 10)) go to 110
+ do 100 np = 1,nperm
+ xsum1 = 0
+ do 30 i = n,n-m1+1,-1
+ cc = dunif()
+ j = int(cc*dfloat(i))+1
+ tmpx = px(i)
+ px(i) = px(j)
+ px(j) = tmpx
+c the observation should be divided by sqrt(wts(i)) to get the correct
+c probe variance. But should be multiplied by wts(i) for statistic
+ xsum1 = xsum1 + px(i)*rwts(i)
+ 30 continue
+ pstat = abs(xsum1/rm1 - xbar)
+ if (ostat.le.pstat) nrej = nrej + 1
+ 100 continue
+ 110 wtpermp = dfloat(nrej)/dfloat(nperm)
+
+ return
+ end
diff --git a/src/changepoints.f b/src/changepoints.f
new file mode 100644
index 0000000..b90af0f
--- /dev/null
+++ b/src/changepoints.f
@@ -0,0 +1,210 @@
+c Ternary segmentation with permutation reference distribution
+ subroutine fndcpt(n,x,tss,px,sx,nperm,cpval,ncpt,icpt,ibin,
+ 1 hybrid,al0,hk,delta,ngrid,sbn,sbdry,tol)
+ integer n,nperm,ncpt,icpt(2),al0,hk,ngrid,sbn,sbdry(sbn)
+ logical ibin,hybrid
+ double precision x(n),tss,px(n),sx(n),cpval,delta,tol
+
+ integer np,nrej,nrejc,iseg(2),n1,n2,n12,l,k
+ double precision ostat,ostat1,pstat,tpval,pval1,pval2
+
+c new functions to replace tmax and htmax (also tmaxo replaces tmax1)
+ double precision tailp, tmaxp, htmaxp, tpermp
+ external tailp, tmaxp, htmaxp, tpermp
+
+ call rndstart()
+
+ nrej = 0
+ ncpt = 0
+
+c call tmax1(n,twon,x,tss,sx,tx,iseg,ostat,ibin)
+ call tmaxo(n,x,tss,sx,iseg,ostat,al0,ibin)
+ ostat1 = sqrt(ostat)
+ ostat = ostat * 0.99999
+c call dblepr("Max Stat",8,ostat,1)
+c call intpr("Location",8,iseg,2)
+
+c if maximal t-statistic is too small (for now use 0.1) don't split
+ if (ostat1 .le. 0.1) go to 500
+c if maximal t-statistic is too large (for now use 7.0) split
+c also make sure it's not affected by outliers i.e. small seglength
+ l = min(iseg(2) - iseg(1), n - iseg(2) + iseg(1))
+ if ((ostat1 .ge. 7.0) .and. (l .ge. 10)) go to 200
+c o.w calculate p-value and decide if & how data are segmented
+ if (hybrid) then
+ pval1 = tailp(ostat1, delta, n, ngrid, tol)
+ if (pval1 .gt. cpval) go to 500
+ pval2 = cpval - pval1
+ nrejc = int(pval2*dfloat(nperm))
+ k=nrejc*(nrejc+1)/2 + 1
+ do 50 np = 1,nperm
+ call xperm(n,x,px)
+c pstat = htmax(n,twon,hk,tss,px,sx,tx,ibin)
+ pstat = htmaxp(n,hk,tss,px,sx,al0,ibin)
+ if (ostat.le.pstat) then
+ nrej = nrej + 1
+ k = k + 1
+ endif
+ if (nrej.gt.nrejc) go to 500
+ if (np .ge. sbdry(k)) go to 200
+ 50 continue
+ else
+ nrejc = int(cpval*dfloat(nperm))
+ k=nrejc*(nrejc+1)/2 + 1
+ do 100 np = 1,nperm
+ call xperm(n,x,px)
+c pstat = tmax(n,twon,tss,px,sx,tx,ibin)
+ pstat = tmaxp(n,tss,px,sx,al0,ibin)
+c call dblepr("Perm Max Stat",13,pstat,1)
+ if (ostat.le.pstat) then
+ nrej = nrej + 1
+ k = k + 1
+ endif
+c call intpr("num rej",7,nrej,1)
+ if (nrej.gt.nrejc) go to 500
+ if (np .ge. sbdry(k)) go to 200
+ 100 continue
+ endif
+ 200 if (iseg(2).eq.n) then
+ ncpt = 1
+ icpt(1) = iseg(1)
+ else
+ if(iseg(1).eq.0) then
+ ncpt = 1
+ icpt(1) = iseg(2)
+ else
+ l = 1
+ n1 = iseg(1)
+ n12 = iseg(2)
+ n2 = n12 - n1
+ tpval = tpermp(n1,n2,n12,x(l),px,nperm)
+c call dblepr("binseg p-value",14,tpval,1)
+ if (tpval.le.cpval) then
+ ncpt = 1
+ icpt(1) = iseg(1)
+ endif
+ l = iseg(1) + 1
+ n12 = n - iseg(1)
+ n2 = n - iseg(2)
+ n1 = n12 - n2
+ tpval = tpermp(n1,n2,n12,x(l),px,nperm)
+c call dblepr("binseg p-value",14,tpval,1)
+ if (tpval.le.cpval) then
+ ncpt = ncpt + 1
+ icpt(ncpt) = iseg(2)
+ endif
+ endif
+ endif
+
+ 500 call rndend()
+
+ return
+ end
+
+c code to permute the data vector
+ subroutine xperm(n,x,px)
+ integer n
+ double precision x(n),px(n)
+
+ integer i,j
+ double precision cc,tmpx
+
+ double precision dunif
+ external dunif
+
+ do 10 i = 1,n
+ px(i) = x(i)
+ 10 continue
+
+ do 20 i = n,1,-1
+ cc = dunif()
+ j = int(cc*dfloat(i))+1
+ tmpx = px(i)
+ px(i) = px(j)
+ px(j) = tmpx
+ 20 continue
+ return
+ end
+
+c function for the p-value of t-statistics for removing edge effects
+ double precision function tpermp(n1,n2,n,x,px,nperm)
+ integer n1,n2,n,nperm
+ double precision x(n),px(n)
+
+ integer np,i,m1,j,nrej
+ double precision xsum1,xsum2,xbar,ostat,pstat,rn1,rn2,rm1,
+ 1 tstat, tss, rn, cc, tmpx
+
+ double precision dunif
+ external dunif
+
+ rn1 = dfloat(n1)
+ rn2 = dfloat(n2)
+ rn = rn1 + rn2
+ if (n1.eq.1 .or. n2.eq.1) then
+ nrej = nperm
+ go to 110
+ endif
+ xsum1 = 0.0
+ tss = 0.0
+ do 10 i=1,n1
+ px(i) = x(i)
+ xsum1 = xsum1 + x(i)
+ tss = tss + x(i)**2
+ 10 continue
+ xsum2 = 0.0
+ do 20 i=n1+1,n
+ px(i) = x(i)
+ xsum2 = xsum2 + x(i)
+ tss = tss + x(i)**2
+ 20 continue
+ xbar = (xsum1 + xsum2)/rn
+ tss = tss - rn*(xbar**2)
+ if (n1.le.n2) then
+ m1 = n1
+ rm1 = rn1
+ ostat = 0.99999*abs(xsum1/rn1 - xbar)
+ tstat = (ostat**2)*rn1*rn/rn2
+ else
+ m1 = n2
+ rm1 = rn2
+ ostat = 0.99999*abs(xsum2/rn2 - xbar)
+ tstat = (ostat**2)*rn2*rn/rn1
+ endif
+c call dblepr("O-Stat",6,ostat,1)
+ nrej = 0
+ tstat = tstat/((tss-tstat)/(rn-2.0))
+c call dblepr("T-square",8,tstat,1)
+c if observed t is large (> 5) don't bother with permutation p-value
+c also make sure there are enough observations i.e. m1 >= 10
+ if ((tstat .gt. 25) .and. (m1 .ge. 10)) go to 110
+ do 100 np = 1,nperm
+c*******************************************
+c the following is very inefficient
+c*******************************************
+c call xperm(n,x,px)
+c xsum1 = 0.0
+c do 30 i=1,m1
+c xsum1 = xsum1 + px(i)
+c 30 continue
+c*******************************************
+c changed to the following: instead of
+c full permutation sample m1 w.o. repl
+c*******************************************
+ xsum1 = 0
+ do 30 i = n,n-m1+1,-1
+ cc = dunif()
+ j = int(cc*dfloat(i))+1
+ tmpx = px(i)
+ px(i) = px(j)
+ px(j) = tmpx
+ xsum1 = xsum1 + px(i)
+ 30 continue
+ pstat = abs(xsum1/rm1 - xbar)
+c call dblepr("P-Stat",6,pstat,1)
+ if (ostat.le.pstat) nrej = nrej + 1
+ 100 continue
+ 110 tpermp = dfloat(nrej)/dfloat(nperm)
+
+ return
+ end
diff --git a/src/esegment.f b/src/esegment.f
new file mode 100644
index 0000000..b8400b6
--- /dev/null
+++ b/src/esegment.f
@@ -0,0 +1,52 @@
+c for binary segmentation of exon data re-use the segment.p code
+c with one-sided p-value
+ subroutine esegp(n, exndat, ostat, eloc, pval, ng, tol)
+ integer n, eloc, ng
+ double precision exndat(n), ostat, pval, tol
+
+ double precision btailp
+ external btailp
+
+ integer i
+ double precision tss
+
+ tss = 0
+ do 10 i = 1, n
+ tss = tss + exndat(i)**2
+ 10 continue
+ call etmax(n, exndat, tss, ostat, eloc)
+c call dblepr("Max Stat",8,ostat,1)
+ pval = btailp(ostat, n, ng, tol)
+ pval = pval/2
+ if (pval .gt. 1) pval = 1.0d0
+
+ return
+ end
+
+c looking for increase in expression - use a one-sided t-statistic
+c t_i = S_i*sqrt(n/(i*(n-i)))/sqrt((tss - S_i^2*n/(i*(n-i)))/(n-2))
+ subroutine etmax(n, x, tss, ostat, eloc)
+ integer n, eloc
+ double precision x(n), tss, ostat
+
+ integer i
+ double precision sumxi, btmaxi, dn, di
+
+ sumxi = x(1)
+ ostat = 0.0
+ eloc = -1
+ dn = dfloat(n)
+ di = 1.0
+ do 20 i = 2,n-2
+ di = di + 1.0
+ sumxi = sumxi + x(i)
+ btmaxi = -sumxi/sqrt(di*(dn-di))
+ if (ostat .lt. btmaxi) then
+ ostat = btmaxi
+ eloc = i
+ endif
+ 20 continue
+ ostat = (ostat/sqrt(tss - dn*ostat**2))*sqrt(dn*(dn-2))
+
+ return
+ end
diff --git a/src/flchoose.c b/src/flchoose.c
new file mode 100644
index 0000000..be85793
--- /dev/null
+++ b/src/flchoose.c
@@ -0,0 +1,4 @@
+#include <R.h>
+#include <Rmath.h>
+/* Fortran function for log-combinations */
+double F77_SUB(flchoose)(double *n, double *k) { return lchoose(*n, *k); }
diff --git a/src/fphyper.c b/src/fphyper.c
new file mode 100644
index 0000000..93d661f
--- /dev/null
+++ b/src/fphyper.c
@@ -0,0 +1,4 @@
+#include <R.h>
+#include <Rmath.h>
+/* Fortran function for hypergeometric CDF */
+double F77_SUB(fphypr)(double *i, double *m, double *n, double *k) { return phyper(*i, *m, *n, *k, 1, 0); }
diff --git a/src/fpnorm.c b/src/fpnorm.c
new file mode 100644
index 0000000..6cfeab6
--- /dev/null
+++ b/src/fpnorm.c
@@ -0,0 +1,4 @@
+#include <R.h>
+#include <Rmath.h>
+/* Fortran function for standard normal CDF */
+double F77_SUB(fpnorm)(double *x) { return pnorm(*x, 0, 1, 1, 0); }
diff --git a/src/getbdry.f b/src/getbdry.f
new file mode 100644
index 0000000..5c0ee05
--- /dev/null
+++ b/src/getbdry.f
@@ -0,0 +1,123 @@
+ subroutine getbdry(eta, m, nperm, mb, ibdry, etastr, tol)
+ integer m, nperm, mb, ibdry(mb)
+ double precision eta, etastr(m), tol
+
+ double precision eta0, etalo, etahi, plo, phi, pexcd
+ integer j, l
+
+ l = 1
+ ibdry(1) = nperm-int(dfloat(nperm)*eta)
+ etastr(1) = eta
+ eta0 = eta
+ do 20 j = 2,m
+ etahi = eta0*1.1
+ call etabdry(nperm, etahi, j, ibdry(l+1))
+ call pexceed(nperm, j, ibdry(l+1), phi)
+ etalo = eta0*0.25
+ call etabdry(nperm, etalo, j, ibdry(l+1))
+ call pexceed(nperm, j, ibdry(l+1), plo)
+ do 10 while ((etahi-etalo)/etalo .gt. tol)
+ eta0 = etalo + (etahi-etalo)*(eta-plo)/(phi-plo)
+ call etabdry(nperm, eta0, j, ibdry(l+1))
+ call pexceed(nperm, j, ibdry(l+1), pexcd)
+ if (pexcd .gt. eta) then
+ etahi = eta0
+ phi = pexcd
+ else
+ etalo = eta0
+ plo = pexcd
+ endif
+ 10 continue
+ etastr(j) = eta0
+ l = l+j
+ 20 continue
+
+ return
+ end
+
+ subroutine etabdry(nperm, eta0, n1s, ibdry)
+ integer nperm, n1s, ibdry(n1s)
+ double precision eta0
+
+ double precision fphypr
+ external fphypr
+
+ integer i, k
+ double precision di, dn, dn1s, dk, tprob
+
+ dn1s = dfloat(n1s)
+ dn = dfloat(nperm-n1s)
+
+ k = 0
+ dk = 0.0d0
+ do 10 i = 1, nperm
+ di = dfloat(i)
+ tprob = fphypr(dk, dn1s, dn, di)
+ if (tprob .le. eta0) then
+ k = k+1
+ dk = dk + 1.0d0
+ ibdry(k) = i
+ endif
+ 10 continue
+
+ return
+ end
+
+ subroutine pexceed(nperm, n1s, ibdry, pexcd)
+ integer nperm, n1s, ibdry(n1s)
+ double precision pexcd
+
+ double precision dn, dk, dn1, dk1, dn2, dk2, dn3, dk3, dlcnk
+ integer i
+
+ double precision flchoose
+ external flchoose
+
+ dn = dfloat(nperm)
+ dk = dfloat(n1s)
+ dn1 = dfloat(nperm-ibdry(1))
+ dlcnk = flchoose(dn, dk)
+
+ pexcd = exp(flchoose(dn1, dk) - dlcnk)
+
+ if (n1s .ge. 2) then
+ dn1 = dfloat(ibdry(1))
+ dn = dfloat(nperm-ibdry(2))
+ dk = dfloat(n1s-1)
+ pexcd = pexcd + exp(log(dn1) + flchoose(dn, dk) - dlcnk)
+ endif
+
+ if (n1s .ge. 3) then
+ dn1 = dfloat(ibdry(1))
+ dn2 = dfloat(ibdry(2))
+ dn = dfloat(nperm-ibdry(3))
+ dk = dfloat(n1s-2)
+ pexcd = pexcd +
+ 1 exp(log(dn1) + log(dn1-1.0) - log(2.0) +
+ 2 flchoose(dn, dk) - dlcnk) +
+ 3 exp(log(dn1) + log(dn2-dn1) + flchoose(dn, dk) - dlcnk)
+ endif
+
+ if (n1s .gt. 3) then
+ do 10 i = 4, n1s
+ dn1 = dfloat(ibdry(i-3))
+ dk1 = dfloat(i-1)
+ dk2 = dfloat(i-2)
+ dk3 = dfloat(i-3)
+ dn2 = dfloat(ibdry(i-2))
+ dn3 = dfloat(ibdry(i-1))
+ dn = dfloat(nperm-ibdry(i))
+ dk = dfloat(n1s-i+1)
+ pexcd = pexcd +
+ 1 exp(flchoose(dn1, dk1) + flchoose(dn, dk) - dlcnk) +
+ 2 exp(flchoose(dn1, dk2) + log(dn3-dn1) +
+ 3 flchoose(dn, dk) - dlcnk) +
+ 4 exp(flchoose(dn1, dk3) + log(dn2-dn1) + log(dn3-dn2) +
+ 3 flchoose(dn, dk) - dlcnk) +
+ 5 exp(flchoose(dn1, dk3) + log(dn2-dn1) - log(2.0) +
+ 6 log(dn2-dn1-1.0) + flchoose(dn, dk) - dlcnk)
+ 10 continue
+ endif
+
+ return
+ end
diff --git a/src/prune.f b/src/prune.f
new file mode 100644
index 0000000..55382ef
--- /dev/null
+++ b/src/prune.f
@@ -0,0 +1,120 @@
+ subroutine prune(n,x,nseg,lseg,pcut,sx,ncpt,loc,loc1,pncpt)
+ integer n, nseg, lseg(nseg), ncpt, loc(ncpt), loc1(2,ncpt), pncpt
+ double precision x(n), pcut, sx(nseg)
+
+ integer i, j, k, kmj
+ double precision ssq, wssqk, wssq1, wssqj
+ logical jleft
+
+ double precision errssq
+ external errssq
+
+ ssq = 0.0
+ do 10 i = 1,n
+ ssq = ssq + x(i)**2
+ 10 continue
+ k = 0
+ do 15 i = 1,nseg
+ sx(i) = 0
+ do 14 j = 1,lseg(i)
+ k = k + 1
+ sx(i) = sx(i) + x(k)
+ 14 continue
+ 15 continue
+
+ k = nseg - 1
+ do 16 i = 1,k
+ loc(i) = i
+ loc1(2,i) = i
+ 16 continue
+ wssqk = ssq - errssq(nseg,lseg,sx,k,loc)
+ do 100 j = k-1, 1, -1
+ kmj = k - j
+ jleft = .TRUE.
+ do 20 i = 1,j
+ loc(i) = i
+ loc1(1,i) = i
+ 20 continue
+ wssqj = ssq - errssq(nseg,lseg,sx,j,loc)
+ do 30 while(jleft)
+ call combn(j, kmj, loc, jleft)
+ wssq1 = ssq - errssq(nseg,lseg,sx,j,loc)
+ if (wssq1 .le. wssqj) then
+ wssqj = wssq1
+ do 25 i = 1,j
+ loc1(1,i) = loc(i)
+ 25 continue
+ endif
+ 30 continue
+ if (wssqj/wssqk .gt. 1+pcut) then
+ pncpt = j+1
+ do 35 i = 1,pncpt
+ loc(i) = loc1(2,i)
+ 35 continue
+ return
+ else
+ do 40 i = 1,j
+ loc1(2,i) = loc1(1,i)
+ 40 continue
+ endif
+ 100 continue
+ pncpt = 0
+ return
+ end
+
+ double precision function errssq(nseg,lseg,sx,k,loc)
+ integer nseg, lseg(nseg),k,loc(k)
+ double precision sx(nseg)
+
+ double precision segsx
+ integer segnx, i, j
+
+ errssq = 0.0
+ segsx = 0.0
+ segnx = 0
+ do 10 i = 1,loc(1)
+ segsx = segsx + sx(i)
+ segnx = segnx + lseg(i)
+ 10 continue
+ errssq = errssq + segsx**2/dfloat(segnx)
+ do 20 j = 2,k
+ segsx = 0.0
+ segnx = 0
+ do 15 i = loc(j-1)+1,loc(j)
+ segsx = segsx + sx(i)
+ segnx = segnx + lseg(i)
+ 15 continue
+ errssq = errssq + segsx**2/dfloat(segnx)
+ 20 continue
+ segsx = 0.0
+ segnx = 0
+ do 25 i = loc(k)+1,nseg
+ segsx = segsx + sx(i)
+ segnx = segnx + lseg(i)
+ 25 continue
+ errssq = errssq + segsx**2/dfloat(segnx)
+
+ return
+ end
+c
+c This program generates Choose(n,r) combinations one at a time
+c Adapted from Algorithm AS 88 Appl. Statist. (1975) Vol.24, No. 3
+c
+ subroutine combn(r, nmr, loc, rleft)
+ integer r, nmr, loc(r)
+ logical rleft
+
+ integer i,j
+
+ i = r
+ do 10 while (loc(i) .eq. nmr+i)
+ i = i-1
+ 10 continue
+ loc(i) = loc(i) + 1
+ do 20 j = i+1,r
+ loc(j) = loc(j-1)+1
+ 20 continue
+ if (loc(1) .eq. nmr+1) rleft = .FALSE.
+
+ return
+ end
diff --git a/src/rshared.c b/src/rshared.c
new file mode 100644
index 0000000..7d6dc82
--- /dev/null
+++ b/src/rshared.c
@@ -0,0 +1,4 @@
+#include <R.h>
+void F77_SUB(rndstart)(void) { GetRNGstate(); }
+void F77_SUB(rndend)(void) { PutRNGstate(); }
+double F77_SUB(dunif)(void) { return unif_rand(); }
diff --git a/src/segmentp.f b/src/segmentp.f
new file mode 100644
index 0000000..2ee4be3
--- /dev/null
+++ b/src/segmentp.f
@@ -0,0 +1,93 @@
+ subroutine bsegp(n, gendat, ostat, pval, ng, tol)
+ integer n, ng
+ double precision gendat(n), ostat, pval, tol
+
+ double precision btmax, btailp
+ external btmax, btailp
+
+ ostat = btmax(n, gendat)
+c call dblepr("Max Stat",8,ostat,1)
+ pval = btailp(ostat, n, ng, tol)
+ if (pval .gt. 1) pval = 1.0d0
+
+ return
+ end
+
+ double precision function btmax(n, x)
+ integer n
+ double precision x(n)
+
+ integer i
+ double precision sumxi, btmaxi, dn, di, ostat
+
+ sumxi = x(1)
+ ostat = 0.0
+ dn = dfloat(n)
+ di = 1.0
+ do 20 i = 2,n-2
+ di = di + 1.0
+ sumxi = sumxi + x(i)
+ btmaxi = dn*(sumxi**2)/(di*(dn-di))
+ if (ostat .lt. btmaxi) then
+ ostat = btmaxi
+c ibseg = i
+ endif
+ 20 continue
+ btmax = sqrt(ostat)
+
+ return
+ end
+
+c pseudo confidence interval based on permutations
+ subroutine bsegci(n, k, sumxk, x, px, sr, vfact, nperm, bsloc)
+ integer n, k, sr(2), nperm, bsloc(nperm)
+ double precision sumxk, x(n), px(n), vfact(n)
+
+ integer k1, nk, np, ibseg
+
+ call rndstart()
+ k1 = k+1
+ nk = n-k
+ do 10 np = 1, nperm
+ call xperm(k,x,px)
+ call xperm(nk,x(k1),px(k1))
+ call btmxci(n,k,sr,px,vfact,ibseg,sumxk)
+ bsloc(np) = ibseg
+ 10 continue
+ call rndend()
+
+ return
+ end
+
+ subroutine btmxci(n,k,sr,x,vfact,ibseg,sumxk)
+ integer n,k,sr(2),ibseg
+ double precision x(n),vfact(n),sumxk
+
+ integer i
+ double precision sumxi, ostat, btmaxi
+
+ ostat = vfact(k)*(sumxk**2)
+ ibseg = k
+ sumxi = sumxk
+ do 10 i = k-1,sr(1),-1
+ sumxi = sumxi - x(i+1)
+ btmaxi = vfact(i)*(sumxi**2)
+ if (ostat .lt. btmaxi) then
+ ostat = btmaxi
+ ibseg = i
+ endif
+ 10 continue
+
+ sumxi = sumxk
+ do 20 i = k+1,sr(2)
+ sumxi = sumxi + x(i)
+ btmaxi = vfact(i)*(sumxi**2)
+ if (ostat .lt. btmaxi) then
+ ostat = btmaxi
+ ibseg = i
+ endif
+ 20 continue
+ ostat = sqrt(ostat)
+
+ return
+ end
diff --git a/src/smoothCNA.f b/src/smoothCNA.f
new file mode 100644
index 0000000..cdfeddf
--- /dev/null
+++ b/src/smoothCNA.f
@@ -0,0 +1,89 @@
+c n = length(gdat) i.e. number of markers and log-ratio
+c k is the neighborhood width
+c oSD = how far the outlier is to the nearest observation
+c sSD = how close should it be moved
+c nchr = number of chromosomes
+c cfrq = number of probes in respective chromosomes
+ subroutine smoothLR(n, gdat, nchr, cfrq, sgdat, k, oSD, sSD)
+ integer n, nchr, cfrq(nchr), k
+ double precision gdat(n), sgdat(n), oSD, sSD
+
+ integer i, j, ilo, ihi, k1, k2, j1, ic, cilo, cihi
+ double precision mnnbd, mxnbd, distij, xmed
+
+c temporary array for finding median
+ double precision, allocatable :: xnbhd(:)
+
+ k2 = 2*k + 1
+ allocate(xnbhd(k2))
+
+c initial values for start and end of chromosomes
+ cilo = 1
+ cihi = 0
+c loop over chromomsomes
+ do 100 ic = 1, nchr
+c end of the current chromosome
+ cihi = cihi + cfrq(ic)
+ do 50 i = cilo, cihi
+c range of the neighborhood
+ ilo = max(cilo, i-k)
+ ihi = min(cihi, i+k)
+c check if ith observation is an outlier
+c initialize the distances to be large
+ mxnbd = 100*oSD
+ mnnbd = 100*oSD
+ do 10 j = ilo, ihi
+ if (j .ne. i) then
+c calculate distance from between ith and jth obsn
+ distij = gdat(i) - gdat(j)
+c if distance is less than oSD no smoothing necessary
+ if (abs(distij) .le. oSD) then
+ sgdat(i) = gdat(i)
+ go to 50
+c otherwise calculate distances from above and below
+ else
+c mxnbd is distance from above
+ if (distij .lt. mxnbd) mxnbd = distij
+c mnnbd is distance from below
+ if (-distij .lt. mnnbd) mnnbd = -distij
+ endif
+ endif
+ 10 continue
+c if all the points in the nbhd are above than mxnbd will be negative
+c and mnnbd will be greater than oSD. Vice versa if all points below
+c
+c If both are negative then the ith point is singleton in the middle
+c but distance oSD away from all points in the nbhd. No smoothing done.
+ if ((mxnbd .le. 0) .and. (mnnbd .le. 0)) then
+ sgdat(i) = gdat(i)
+ go to 50
+ else
+c calculate the median of the nbhd
+c number of points in the nbhd
+ k1 = ihi - ilo + 1
+c get the data into temporary array
+ do 20 j = ilo, ihi
+ xnbhd(j-ilo+1) = gdat(j)
+ 20 continue
+c sort the data
+ call qsort3(xnbhd, 1, k1)
+c median is the midpoint if n is odd and average of the two if even
+ j1 = k1/2
+ if (k1 .eq. 2*j1) then
+ xmed = (xnbhd(j1) + xnbhd(j1+1))/2
+ else
+ xmed = xnbhd(j1+1)
+ endif
+c if point is above the nbhd bring it down
+ if (mxnbd .gt. 0) sgdat(i) = xmed + sSD
+c if point is below the nbhd bring it up
+ if (mnnbd .gt. 0) sgdat(i) = xmed - sSD
+ endif
+ 50 continue
+c beginning of next chromosome
+ cilo = cilo + cfrq(ic)
+ 100 continue
+ deallocate(xnbhd)
+
+ return
+ end
diff --git a/src/tailprobs.f b/src/tailprobs.f
new file mode 100644
index 0000000..dd236ad
--- /dev/null
+++ b/src/tailprobs.f
@@ -0,0 +1,120 @@
+c tail probability of circular binary segmentation statistic
+c from Siegmund (1988) or Yao (1989) paper
+ double precision function tailp(b, delta, m, ngrid, tol)
+ double precision b, delta, tol
+ integer m, ngrid
+c it1tsq is the integral of 1/(t*(1-t))**2
+ double precision nu, it1tsq
+ external nu, it1tsq
+
+ double precision t, tl, dincr, bsqrtm, x, nux
+ integer i
+
+ dincr = (0.5d0 - delta)/dfloat(ngrid)
+ bsqrtm = b/sqrt(dfloat(m))
+
+ tl = 0.5d0 - dincr
+ t = 0.5d0 - 0.5d0*dincr
+ tailp = 0.0d0
+ do 10 i = 1,ngrid
+ tl = tl + dincr
+ t = t + dincr
+ x = bsqrtm/sqrt(t*(1-t))
+ nux = nu(x, tol)
+ tailp = tailp + (nux**2)*it1tsq(tl, dincr)
+ 10 continue
+ tailp = 9.973557d-2*(b**3)*exp(-b**2/2)*tailp
+c since test is two-sided need to multiply tailp by 2
+ tailp = 2.0d0*tailp
+
+ return
+ end
+
+c integral of 1/(t*(1-t))**2 from x to x+a
+ double precision function it1tsq(x, a)
+ double precision x, a
+
+ double precision y
+
+ y = x + a - 0.5d0
+ it1tsq = (8.0d0*y)/(1.0d0 - 4.0d0*y**2) +
+ 1 2.0d0*log((1.0d0 + 2.0d0*y)/(1.0d0 - 2.0d0*y))
+ y = x - 0.5d0
+ it1tsq = it1tsq - (8.0d0*y)/(1.0d0 - 4.0d0*y**2) -
+ 1 2.0d0*log((1.0d0 + 2.0d0*y)/(1.0d0 - 2.0d0*y))
+
+ return
+ end
+
+ double precision function nu(x, tol)
+ double precision x, tol
+
+ double precision fpnorm
+ external fpnorm
+
+ double precision lnu0, lnu1, dk, xk
+ integer i, k
+
+ if (x .gt. 0.01d0) then
+ lnu1 = log(2.0d0) - 2*log(x)
+ lnu0 = lnu1
+ k = 2
+ dk = 0
+ do 10 i = 1, k
+ dk = dk + 1
+ xk = -x*sqrt(dk)/2.0d0
+ lnu1 = lnu1 - 2.0d0*fpnorm(xk)/dk
+ 10 continue
+
+ do 50 while (dabs((lnu1-lnu0)/lnu1) .gt. tol)
+ lnu0 = lnu1
+ do 20 i = 1,k
+ dk = dk + 1
+ xk = -x*sqrt(dk)/2.0d0
+ lnu1 = lnu1 - 2.0d0*fpnorm(xk)/dk
+ 20 continue
+ k = 2*k
+ 50 enddo
+ else
+ lnu1 = -0.583d0*x
+ endif
+ nu = exp(lnu1)
+
+ return
+ end
+
+c tail probability of binary segmentation statistic
+c from page 387 of Siegmund (1986) paper
+ double precision function btailp(b, m, ng, tol)
+ integer m, ng
+ double precision b, tol
+
+ double precision ll, ul, dincr, nulo, nuhi, x, x1, dm
+ integer i, k
+
+ double precision fpnorm, nu
+ external fpnorm, nu
+
+ dm = dfloat(m)
+ k = 2
+ ll = b*sqrt(1.0/dfloat(m-k) - 1.0/dfloat(m))
+ ul = b*sqrt(1.0/dfloat(k) - 1.0/dfloat(m))
+ dincr = (ul - ll)/dfloat(ng)
+
+ btailp = 0.0
+ x = ll
+ x1 = x + (b**2)/(dm*x)
+ nulo = nu(x1, tol)/x
+ do 10 i = 1, ng
+ x = x + dincr
+ x1 = x + (b**2)/(dm*x)
+ nuhi = nu(x1, tol)/x
+ btailp = btailp + (nuhi + nulo)*dincr
+ nulo = nuhi
+ 10 continue
+ btailp = b*exp(-b**2/2)*btailp/2.506628275
+
+ btailp = btailp + 2*(1.0-fpnorm(b))
+
+ return
+ end
diff --git a/tests/redundancy,20090610,segment.R b/tests/redundancy,20090610,segment.R
new file mode 100644
index 0000000..5627842
--- /dev/null
+++ b/tests/redundancy,20090610,segment.R
@@ -0,0 +1,117 @@
+######################################################################
+# Type: Redundancy test
+# Created by: Henrik Bengtsson <hb at stat.berkeley.edu>
+# Created on: 2009-06-10
+######################################################################
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Startup
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+library("DNAcopy")
+
+# Record current random seed
+sample(1) # Assert that a random seed exists
+oldSeed <- .Random.seed
+# Alway use the same random seed
+set.seed(0xbeef)
+
+# Tolerance (maybe decrease?)
+tol <- .Machine$double.eps^0.5
+
+print(sessionInfo())
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Simulating copy-number data
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Number of loci
+J <- 1000
+
+x <- sort(runif(J, min=0, max=1000))
+w <- runif(J)
+mu <- double(J)
+jj <- (200 <= x & x < 300)
+mu[jj] <- mu[jj] + 1
+jj <- (650 <= x & x < 800)
+mu[jj] <- mu[jj] - 1
+w[jj] <- 0.001
+eps <- rnorm(J, sd=1/2)
+y <- mu + eps
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Setting up a raw CNA object
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+cnR <- CNA(
+ genomdat = y,
+ chrom = rep(1, times=J),
+ maploc = x,
+ data.type = "logratio",
+ sampleid = "SampleA"
+)
+print(cnR)
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Test: Non-weighted segmentation
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+t <- system.time({
+fitR <- segment(cnR, verbose=1)
+})
+cat("Processing time:\n")
+print(t)
+print(fitR)
+
+# Expected results
+# These were obtained by dput(fitR$output) using DNAcopy v1.19.0
+truth <- structure(list(ID = c("SampleA", "SampleA", "SampleA", "SampleA",
+"SampleA"), chrom = c(1, 1, 1, 1, 1), loc.start = c(1.36857712641358,
+201.604291098192, 303.775111911818, 650.741211604327, 800.302447052673
+), loc.end = c(199.083976913244, 301.066882908344, 647.42697100155,
+798.971758922562, 999.329038895667), num.mark = c(209, 105, 337,
+138, 211), seg.mean = c(0.0256, 1.0099, -0.0084, -0.9792, -0.0289
+)), .Names = c("ID", "chrom", "loc.start", "loc.end", "num.mark",
+"seg.mean"), row.names = c(NA, -5L), class = "data.frame")
+
+stopifnot(all.equal(fitR$output, truth, tolerance=tol))
+
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Test: Weighted segmentation
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+t <- system.time({
+fitR <- segment(cnR, weights=w, verbose=1)
+})
+cat("Processing time:\n")
+print(t)
+print(fitR)
+
+# Expected results
+# These were obtained by dput(fitR$output) using DNAcopy v1.19.0
+truth <- structure(list(ID = c("SampleA", "SampleA", "SampleA"), chrom = c(1,
+1, 1), loc.start = c(1.36857712641358, 201.604291098192, 303.775111911818
+), loc.end = c(199.083976913244, 301.066882908344, 999.329038895667
+), num.mark = c(209, 105, 686), seg.mean = c(0.0259, 1.0004,
+-0.0233)), .Names = c("ID", "chrom", "loc.start", "loc.end",
+"num.mark", "seg.mean"), row.names = c(NA, -3L), class = "data.frame")
+
+stopifnot(all.equal(fitR$output, truth, tolerance=tol))
+
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Cleanup
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Reset to previous random seed
+.Random.seed <- oldSeed
+
+print(sessionInfo())
+
+
+######################################################################
+# HISTORY
+# 2009-06-10
+# o ROBUSTNESS: Added this test to assert that DNAcopy v1.19.2 and
+# newer will numerically give the same results as DNAcopy v1.19.0.
+# This test is ran each time with R CMD check.
+# o Created.
+######################################################################
diff --git a/vignettes/DNAcopy.Rnw b/vignettes/DNAcopy.Rnw
new file mode 100644
index 0000000..1cbbff7
--- /dev/null
+++ b/vignettes/DNAcopy.Rnw
@@ -0,0 +1,213 @@
+%\VignetteIndexEntry{DNAcopy}
+%\VignetteDepends{}
+%\VignetteKeywords{DNA Copy Number Analysis}
+%\VignettePackage{DNAcopy}
+
+\documentclass[11pt]{article}
+
+\usepackage{amsmath}
+\usepackage[authoryear,round]{natbib}
+\usepackage{hyperref}
+\SweaveOpts{echo=FALSE}
+
+\setlength{\textheight}{8.5in}
+\setlength{\textwidth}{6in}
+\setlength{\topmargin}{-0.25in}
+\setlength{\oddsidemargin}{0.25in}
+\setlength{\evensidemargin}{0.25in}
+
+\begin{document}
+\setkeys{Gin}{width=0.99\textwidth}
+
+
+\title{\bf DNAcopy: A Package for Analyzing DNA Copy Data}
+
+\author{Venkatraman E. Seshan$^1$ and Adam B. Olshen$^2$}
+
+\maketitle
+
+\begin{center}
+$^1$Department of Epidemiology and Biostatistics\\
+Memorial Sloan-Kettering Cancer Center\\
+{\tt seshanv at mskcc.org}\\
+\ \\
+$^2$Department of Epidemiology and Biostatistics\\
+University of California, San Francisco\\
+{\tt olshena at biostat.ucsf.edu}
+\end{center}
+
+\tableofcontents
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Overview}
+
+This document presents an overview of the {\tt DNAcopy} package. This
+package is for analyzing array DNA copy number data, which is usually
+(but not always) called array Comparative Genomic Hybridization (array
+CGH) data \citep{pinkel98, snijders01, wigler03}. It implements our
+methodology for finding change-points in these data \citep{olshen04},
+which are points after which the (log) test over reference ratios have
+changed location. Our model is that the change-points
+correspond to positions where the underlying DNA copy number has
+changed. Therefore, change-points can be used to identify regions of
+gained and lost copy number. We also provide a function for making
+relevant plots of these data.
+
+\section{Data}
+
+We selected a subset of the data set presented in \cite{snijders01}.
+We are calling this data set {\tt coriell}. The data correspond to
+two array CGH studies of fibroblast cell strains. In particular, we
+chose the studies {\bf GM05296} and {\bf GM13330}. After selecting
+only the mapped data from chromosomes 1-22 and X, there are 2271 data
+points. There is accompanying spectral karyotype data (not included),
+which can serve
+as a gold standard. The data can be found at \\
+\url{http://www.nature.com/ng/journal/v29/n3/suppinfo/ng754_S1.html}
+
+\section{An Example}
+
+Here we perform an analysis on the {\bf GM05296} array CGH study
+described above.
+
+
+<<echo=TRUE,print=FALSE>>=
+library(DNAcopy)
+@
+
+<<echo=TRUE,print=FALSE>>=
+data(coriell)
+@
+
+\noindent
+Before segmentation the data needs to be made into a CNA object.
+
+<<echo=TRUE,print=FALSE>>=
+CNA.object <- CNA(cbind(coriell$Coriell.05296),
+ coriell$Chromosome,coriell$Position,
+ data.type="logratio",sampleid="c05296")
+@
+
+\noindent
+We generally recommend smoothing single point outliers before
+analysis. It is a good idea to check that the smoothing is proper for
+a particular data set.
+
+<<echo=TRUE,print=FALSE>>=
+smoothed.CNA.object <- smooth.CNA(CNA.object)
+@
+
+\noindent
+After smoothing, if necessary, the segmentation is run. Here the
+default parameters are used. A brief discussion of parameters that
+can be adjusted is in the Tips section.
+
+<<echo=TRUE,print=FALSE>>=
+segment.smoothed.CNA.object <- segment(smoothed.CNA.object, verbose=1)
+@
+
+%Plot whole studies
+
+\noindent
+There are a number of plots that can be made. The first is ordering
+the data by chromosome and map positons. The red lines correspond to
+mean values in segments. Note that the points are in alternate colors
+to indicate different chromosomes.
+
+\pagebreak
+
+\begin{center}
+<<fig=TRUE,echo=TRUE>>=
+plot(segment.smoothed.CNA.object, plot.type="w")
+@
+\end{center}
+
+\noindent
+Another possibility is to plot by chromosome within a study.
+
+\begin{center}
+<<fig=TRUE,echo=TRUE,width=11,height=9>>=
+plot(segment.smoothed.CNA.object, plot.type="s")
+@
+\end{center}
+
+%Plot each chromosome across studies (6 per page)
+
+%\begin{center}
+%<<fig=FALSE,echo=TRUE>>=
+%plot(segment.smoothed.CNA.object, plot.type="c",
+% cbys.layout=c(2,1),
+% cbys.nchrom=6)
+%@
+%\end{center}
+
+%Plot by plateaus
+
+\noindent
+If there are multiple studies, one could plot by chromosome across
+studies using the option {\tt plot.type='c'}. A final plot orders the
+segment by their chromosome means. One can take the plateaus in this
+plot to determine what the mean values should be for calling segments
+gains or losses. In this case, maybe $0.4$ for gains and $-0.6$ for
+losses. For most data, these plateaus are much closer to zero. The
+next generation of this software will have automatic methods for
+calling gains and losses.
+
+\begin{center}
+<<fig=TRUE,echo=TRUE>>=
+plot(segment.smoothed.CNA.object, plot.type="p")
+@
+\end{center}
+
+\noindent
+Change-points are often found due to local trends in the data. An
+undo method is needed to get rid of unnecessary change-points. Below all
+splits that are not at least three SDs apart are removed. The
+following plot shows that all splits not corresponding to the gold
+standard results have been removed.
+
+<<echo=TRUE,print=FALSE,width=6,height=5>>=
+sdundo.CNA.object <- segment(smoothed.CNA.object,
+ undo.splits="sdundo",
+ undo.SD=3,verbose=1)
+@
+
+
+\begin{center}
+<<fig=TRUE,echo=TRUE,width=11,height=9>>=
+plot(sdundo.CNA.object,plot.type="s")
+@
+\end{center}
+
+\section{Tips}
+
+\noindent
+A function that may be of interest that has not been mentioned is {\tt
+ subset.CNA}. It allows for subsetting of a CNA object by chromosome and
+sample so that segmentation does not have to be run on a whole data set.
+Similarly, {\tt subset.DNAcopy} allows subsetting of DNAcopy objects, which
+contain the output of segmentation.
+
+The original default segmentation algorithm, because it was based on
+permutation, took $O(N^2)$ computations, where $N$ is the number of markers on
+a chromosome. The new default algorithm is much faster. It includes a hybrid
+approach to compute the $p$-value for segmenting based partly on permutation
+and partly on a Gaussian approximation (available in all versions after 1.2.0)
+and a stopping rule (available in all versions after 1.5.0) to declare change
+when there is a strong evidence for its presence \citep{venkat07}. We no
+longer recommend using overlapping windows for larger data sets. It is still
+possible to run the full permutations analysis using the option {\tt
+ p.method='perm'}. If the new algorithm is still too slow, one can reduce the
+number of permutations in the hybrid method using the parameter {\tt nperm}
+(default is 10,000). However, the lower {\tt alpha} (the significance level
+for the test to accept change-points) is, the more permutations that are
+needed. The stopping boundary needs to be computed for any choice of {\tt
+ nperm} and {\tt alpha} which is not the default which is done automatically
+within the function {\tt segment} or can be done externally using the function
+{\tt getbdry} and passed on to {\tt segment}.
+
+%\newpage
+\bibliographystyle{apalike}
+\bibliography{DNAcopy}
+
+\end{document}
diff --git a/vignettes/DNAcopy.bib b/vignettes/DNAcopy.bib
new file mode 100644
index 0000000..66be8c3
--- /dev/null
+++ b/vignettes/DNAcopy.bib
@@ -0,0 +1,64 @@
+
+
+
+
+ at ARTICLE{pinkel98,
+ Author = {D. Pinkel and R. Segraves and D. Sudar and S. Clark and I. Poole and D. Kowbel and C. Collins and W. L. Kuo and C. Chen and Y. Zhai and S. H. Dairkee and B. M. Ljung and J. W. Gray and D. G. Albertson},
+ Journal = {Nat. Genet.},
+ Title = {High resolution analysis of DNA copy number variation using comparative genomic hybridization to microarrays},
+ Year = {1998},
+ volume = {20},
+ OPTnumber = {},
+ pages = {207-211}
+}
+
+
+
+
+ at ARTICLE{snijders01,
+ Author = {A. M. Snijders and N. Nowak and R. Segraves and S. Blackwood and N. Brown and J. Conroy and G. Hamilton and A. K. Hindle and B. Huey and K. Kimura and S. Law S and K. Myambo and J. Palmer and B. Ylstra and J. P. Yue and J. W. Gray and A. N. Jain and D. Pinkel and D. G. Albertson},
+ Journal = {Nat. Genet.},
+ Title = {Assembly of microarrays for genome-wide measurement of DNA copy number},
+ Year = {2001},
+ volume = {29},
+ OPTnumber = {3},
+ pages = {263-4}
+}
+
+
+
+
+ at ARTICLE{wigler03,
+ Author = {R. Lucito and J. Healey and J. Alexander and A. Reiner and D. Esposito and M. Chi and L. Rodgers and A. Brady and J. Sebat and J. Troge and JA West and S. Rostan and KC Nguyen and S. Powers and KQ Ye and A. Olshen and E. Venkatraman and L. Norton and M. Wigler},
+ Journal = {Nat. Genet.},
+ Title = {Representational oligonucleotide microarray analysis: a high resolution method to detect genome copy number variation},
+ Year = {2003},
+ volume = {13},
+ OPTnumber = {10},
+ pages = {2291-305}
+}
+
+
+
+ at ARTICLE{olshen04,
+ Author = {A. B. Olshen and E. S. Venkatraman and R. Lucito and M. Wigler},
+ Journal = {Biostatistics},
+ Title = {Circular binary segmentation for the analysis of array-based DNA copy number data},
+ Year = {2004},
+ volume = {5},
+ OPTnumber = {4},
+ pages = {557-72}
+}
+
+
+
+ at ARTICLE{venkat07,
+ Author = {E. S. Venkatraman and A. B. Olshen},
+ Journal = {Bioinformatics},
+ Title = {A faster circular binary segmentation algorithm for the analysis of array CGH data},
+ Year = {2007},
+ volume = {23},
+ OPTnumber = {6},
+ pages = {657-63}
+}
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-bioc-dnacopy.git
More information about the debian-med-commit
mailing list